From 88d3f61f41f7b6299e0d0f9e0047dd955cb67571 Mon Sep 17 00:00:00 2001 From: krasimir Date: Thu, 29 May 2008 17:55:05 +0000 Subject: change the library root namespace from GF.GFCC to PGF --- GF.cabal | 64 ++- src-3.0/GF/Command/Commands.hs | 12 +- src-3.0/GF/Command/Importing.hs | 7 +- src-3.0/GF/Command/Interpreter.hs | 6 +- src-3.0/GF/Command/PPrTree.hs | 6 +- src-3.0/GF/Compile.hs | 6 +- src-3.0/GF/Compile/Export.hs | 22 + src-3.0/GF/Compile/GFCCtoHaskell.hs | 212 ++++++++ src-3.0/GF/Compile/GFCCtoJS.hs | 117 +++++ src-3.0/GF/Compile/GenerateFCFG.hs | 12 +- src-3.0/GF/Compile/GrammarToGFCC.hs | 14 +- src-3.0/GF/Compile/OptimizeGFCC.hs | 124 +++++ src-3.0/GF/GFCC/API.hs | 184 ------- src-3.0/GF/GFCC/BuildParser.hs | 64 --- src-3.0/GF/GFCC/CId.hs | 14 - src-3.0/GF/GFCC/CheckGFCC.hs | 186 ------- src-3.0/GF/GFCC/DataGFCC.hs | 178 ------- src-3.0/GF/GFCC/GFCC.cf | 81 --- src-3.0/GF/GFCC/GFCCtoHaskell.hs | 212 -------- src-3.0/GF/GFCC/GFCCtoJS.hs | 117 ----- src-3.0/GF/GFCC/Generate.hs | 70 --- src-3.0/GF/GFCC/Linearize.hs | 87 ---- src-3.0/GF/GFCC/Macros.hs | 116 ----- src-3.0/GF/GFCC/OptimizeGFCC.hs | 124 ----- src-3.0/GF/GFCC/Parsing/FCFG.hs | 79 --- src-3.0/GF/GFCC/Parsing/FCFG/Active.hs | 186 ------- src-3.0/GF/GFCC/Parsing/FCFG/Utilities.hs | 271 ---------- src-3.0/GF/GFCC/PrintGFCC.hs | 22 - src-3.0/GF/GFCC/Raw/AbsGFCCRaw.hs | 14 - src-3.0/GF/GFCC/Raw/ConvertGFCC.hs | 250 --------- src-3.0/GF/GFCC/Raw/GFCCRaw.cf | 12 - src-3.0/GF/GFCC/Raw/ParGFCCRaw.hs | 101 ---- src-3.0/GF/GFCC/Raw/PrintGFCCRaw.hs | 35 -- src-3.0/GF/GFCC/ShowLinearize.hs | 87 ---- src-3.0/GF/GFCC/doc/Eng.gf | 13 - src-3.0/GF/GFCC/doc/Ex.gf | 8 - src-3.0/GF/GFCC/doc/Swe.gf | 13 - src-3.0/GF/GFCC/doc/Test.gf | 64 --- src-3.0/GF/GFCC/doc/gfcc.html | 809 ------------------------------ src-3.0/GF/GFCC/doc/gfcc.txt | 712 -------------------------- src-3.0/GF/GFCC/doc/old-GFCC.cf | 50 -- src-3.0/GF/GFCC/doc/old-gfcc.txt | 656 ------------------------ src-3.0/GF/GFCC/doc/syntax.txt | 180 ------- src-3.0/GFC.hs | 12 +- src-3.0/GFI.hs | 3 +- src-3.0/PGF.hs | 181 +++++++ src-3.0/PGF/BuildParser.hs | 64 +++ src-3.0/PGF/CId.hs | 14 + src-3.0/PGF/Check.hs | 186 +++++++ src-3.0/PGF/Data.hs | 178 +++++++ src-3.0/PGF/Generate.hs | 70 +++ src-3.0/PGF/Linearize.hs | 87 ++++ src-3.0/PGF/Macros.hs | 116 +++++ src-3.0/PGF/Parsing/FCFG.hs | 78 +++ src-3.0/PGF/Parsing/FCFG/Active.hs | 186 +++++++ src-3.0/PGF/Parsing/FCFG/Utilities.hs | 271 ++++++++++ src-3.0/PGF/Raw/Abstract.hs | 14 + src-3.0/PGF/Raw/Convert.hs | 250 +++++++++ src-3.0/PGF/Raw/Parse.hs | 101 ++++ src-3.0/PGF/Raw/Print.hs | 35 ++ src-3.0/PGF/ShowLinearize.hs | 86 ++++ src-3.0/PGF/doc/Eng.gf | 13 + src-3.0/PGF/doc/Ex.gf | 8 + src-3.0/PGF/doc/Swe.gf | 13 + src-3.0/PGF/doc/Test.gf | 64 +++ src-3.0/PGF/doc/gfcc.html | 809 ++++++++++++++++++++++++++++++ src-3.0/PGF/doc/gfcc.txt | 712 ++++++++++++++++++++++++++ src-3.0/PGF/doc/old-GFCC.cf | 50 ++ src-3.0/PGF/doc/old-gfcc.txt | 656 ++++++++++++++++++++++++ src-3.0/PGF/doc/syntax.txt | 180 +++++++ 70 files changed, 4964 insertions(+), 5070 deletions(-) create mode 100644 src-3.0/GF/Compile/Export.hs create mode 100644 src-3.0/GF/Compile/GFCCtoHaskell.hs create mode 100644 src-3.0/GF/Compile/GFCCtoJS.hs create mode 100644 src-3.0/GF/Compile/OptimizeGFCC.hs delete mode 100644 src-3.0/GF/GFCC/API.hs delete mode 100644 src-3.0/GF/GFCC/BuildParser.hs delete mode 100644 src-3.0/GF/GFCC/CId.hs delete mode 100644 src-3.0/GF/GFCC/CheckGFCC.hs delete mode 100644 src-3.0/GF/GFCC/DataGFCC.hs delete mode 100644 src-3.0/GF/GFCC/GFCC.cf delete mode 100644 src-3.0/GF/GFCC/GFCCtoHaskell.hs delete mode 100644 src-3.0/GF/GFCC/GFCCtoJS.hs delete mode 100644 src-3.0/GF/GFCC/Generate.hs delete mode 100644 src-3.0/GF/GFCC/Linearize.hs delete mode 100644 src-3.0/GF/GFCC/Macros.hs delete mode 100644 src-3.0/GF/GFCC/OptimizeGFCC.hs delete mode 100644 src-3.0/GF/GFCC/Parsing/FCFG.hs delete mode 100644 src-3.0/GF/GFCC/Parsing/FCFG/Active.hs delete mode 100644 src-3.0/GF/GFCC/Parsing/FCFG/Utilities.hs delete mode 100644 src-3.0/GF/GFCC/PrintGFCC.hs delete mode 100644 src-3.0/GF/GFCC/Raw/AbsGFCCRaw.hs delete mode 100644 src-3.0/GF/GFCC/Raw/ConvertGFCC.hs delete mode 100644 src-3.0/GF/GFCC/Raw/GFCCRaw.cf delete mode 100644 src-3.0/GF/GFCC/Raw/ParGFCCRaw.hs delete mode 100644 src-3.0/GF/GFCC/Raw/PrintGFCCRaw.hs delete mode 100644 src-3.0/GF/GFCC/ShowLinearize.hs delete mode 100644 src-3.0/GF/GFCC/doc/Eng.gf delete mode 100644 src-3.0/GF/GFCC/doc/Ex.gf delete mode 100644 src-3.0/GF/GFCC/doc/Swe.gf delete mode 100644 src-3.0/GF/GFCC/doc/Test.gf delete mode 100644 src-3.0/GF/GFCC/doc/gfcc.html delete mode 100644 src-3.0/GF/GFCC/doc/gfcc.txt delete mode 100644 src-3.0/GF/GFCC/doc/old-GFCC.cf delete mode 100644 src-3.0/GF/GFCC/doc/old-gfcc.txt delete mode 100644 src-3.0/GF/GFCC/doc/syntax.txt create mode 100644 src-3.0/PGF.hs create mode 100644 src-3.0/PGF/BuildParser.hs create mode 100644 src-3.0/PGF/CId.hs create mode 100644 src-3.0/PGF/Check.hs create mode 100644 src-3.0/PGF/Data.hs create mode 100644 src-3.0/PGF/Generate.hs create mode 100644 src-3.0/PGF/Linearize.hs create mode 100644 src-3.0/PGF/Macros.hs create mode 100644 src-3.0/PGF/Parsing/FCFG.hs create mode 100644 src-3.0/PGF/Parsing/FCFG/Active.hs create mode 100644 src-3.0/PGF/Parsing/FCFG/Utilities.hs create mode 100644 src-3.0/PGF/Raw/Abstract.hs create mode 100644 src-3.0/PGF/Raw/Convert.hs create mode 100644 src-3.0/PGF/Raw/Parse.hs create mode 100644 src-3.0/PGF/Raw/Print.hs create mode 100644 src-3.0/PGF/ShowLinearize.hs create mode 100644 src-3.0/PGF/doc/Eng.gf create mode 100644 src-3.0/PGF/doc/Ex.gf create mode 100644 src-3.0/PGF/doc/Swe.gf create mode 100644 src-3.0/PGF/doc/Test.gf create mode 100644 src-3.0/PGF/doc/gfcc.html create mode 100644 src-3.0/PGF/doc/gfcc.txt create mode 100644 src-3.0/PGF/doc/old-GFCC.cf create mode 100644 src-3.0/PGF/doc/old-gfcc.txt create mode 100644 src-3.0/PGF/doc/syntax.txt diff --git a/GF.cabal b/GF.cabal index 533fff9ee..aa7a3479a 100644 --- a/GF.cabal +++ b/GF.cabal @@ -25,20 +25,21 @@ library hs-source-dirs: src-3.0 extensions: exposed-modules: - GF.GFCC.API + PGF other-modules: - GF.GFCC.Raw.AbsGFCCRaw - GF.GFCC.DataGFCC - GF.GFCC.CId - GF.GFCC.Raw.ParGFCCRaw - GF.GFCC.Macros - GF.GFCC.Generate - GF.GFCC.Linearize - GF.GFCC.BuildParser - GF.GFCC.Parsing.FCFG.Utilities - GF.GFCC.Parsing.FCFG.Active - GF.GFCC.Parsing.FCFG - GF.GFCC.Raw.ConvertGFCC + PGF.CId + PGF.Data + PGF.Macros + PGF.Generate + PGF.Linearize + PGF.BuildParser + PGF.Parsing.FCFG.Utilities + PGF.Parsing.FCFG.Active + PGF.Parsing.FCFG + PGF.Raw.Parse + PGF.Raw.Print + PGF.Raw.Convert + PGF.Raw.Abstract GF.Data.RedBlackSet GF.Data.GeneralDeduction GF.Data.Utilities @@ -77,7 +78,6 @@ executable gf3 GF.Source.PrintGF GF.JavaScript.AbsJS GF.JavaScript.PrintJS - GF.GFCC.Raw.AbsGFCCRaw GF.Command.LexGFShell GF.Command.AbsGFShell GF.Command.PrintGFShell @@ -89,29 +89,12 @@ executable gf3 GF.Data.SortedList GF.Data.Assoc GF.Infra.PrintClass - GF.GFCC.CId - GF.GFCC.Raw.ParGFCCRaw - GF.GFCC.Raw.PrintGFCCRaw - GF.GFCC.BuildParser - GF.GFCC.DataGFCC - GF.GFCC.Parsing.FCFG.Utilities - GF.GFCC.Parsing.FCFG.Active - GF.GFCC.Parsing.FCFG - GF.GFCC.Raw.ConvertGFCC - GF.GFCC.Macros - GF.GFCC.Generate - GF.GFCC.Linearize GF.Compile.GenerateFCFG GF.Data.ErrM GF.Command.ParGFShell GF.Command.PPrTree - GF.GFCC.API - GF.GFCC.CheckGFCC GF.Source.ParGF GF.Data.Operations - GF.GFCC.GFCCtoHaskell - GF.GFCC.ShowLinearize - GF.GFCC.OptimizeGFCC GF.Infra.Ident GF.Grammar.Predef GF.Data.Str @@ -119,8 +102,6 @@ executable gf3 GF.Infra.GetOpt GF.Infra.Option GF.Infra.UseIO - GF.GFCC.GFCCtoJS - GF.GFCC.PrintGFCC GF.Command.Commands GF.Command.Interpreter GF.Infra.Modules @@ -140,7 +121,6 @@ executable gf3 GF.Grammar.Unify GF.Compile.TypeCheck GF.Compile.Update - GF.Compile.OptimizeGF GF.Infra.CheckM GF.Grammar.AppPredefined GF.Grammar.PatternMatch @@ -152,6 +132,8 @@ executable gf3 GF.Compile.GrammarToGFCC GF.Compile.Compute GF.Compile.Optimize + GF.Compile.OptimizeGF + GF.Compile.OptimizeGFCC GF.Compile.ModDeps GF.Compile.Rebuild GF.Source.SourceToGrammar @@ -159,6 +141,20 @@ executable gf3 GF.Compile GF.Command.Importing GF.System.Readline + PGF + PGF.CId + PGF.Data + PGF.Macros + PGF.Generate + PGF.Linearize + PGF.BuildParser + PGF.Parsing.FCFG.Utilities + PGF.Parsing.FCFG.Active + PGF.Parsing.FCFG + PGF.Raw.Parse + PGF.Raw.Print + PGF.Raw.Convert + PGF.Raw.Abstract GFC GFI diff --git a/src-3.0/GF/Command/Commands.hs b/src-3.0/GF/Command/Commands.hs index 8068d6c0e..759f9a8bf 100644 --- a/src-3.0/GF/Command/Commands.hs +++ b/src-3.0/GF/Command/Commands.hs @@ -12,12 +12,12 @@ module GF.Command.Commands ( import GF.Command.AbsGFShell hiding (Tree) import GF.Command.PPrTree import GF.Command.ParGFShell -import GF.GFCC.CId -import GF.GFCC.ShowLinearize -import GF.GFCC.API -import GF.GFCC.Macros -import GF.GFCC.PrintGFCC -import GF.GFCC.DataGFCC ---- +import PGF +import PGF.CId +import PGF.ShowLinearize +import PGF.Macros +import PGF.Data ---- +import GF.Compile.Export import GF.Data.ErrM ---- diff --git a/src-3.0/GF/Command/Importing.hs b/src-3.0/GF/Command/Importing.hs index f200ba7c4..91bcdcb73 100644 --- a/src-3.0/GF/Command/Importing.hs +++ b/src-3.0/GF/Command/Importing.hs @@ -1,11 +1,10 @@ module GF.Command.Importing (importGrammar, importSource) where -import GF.Compile -import GF.GFCC.DataGFCC -import GF.GFCC.API +import PGF +import PGF.Data +import GF.Compile import GF.Grammar.Grammar (SourceGrammar) -- for cc command - import GF.Infra.UseIO import GF.Infra.Option import GF.Data.ErrM diff --git a/src-3.0/GF/Command/Interpreter.hs b/src-3.0/GF/Command/Interpreter.hs index 24f16ea1d..825c2862b 100644 --- a/src-3.0/GF/Command/Interpreter.hs +++ b/src-3.0/GF/Command/Interpreter.hs @@ -7,9 +7,9 @@ import GF.Command.Commands import GF.Command.AbsGFShell hiding (Tree) import GF.Command.PPrTree import GF.Command.ParGFShell -import GF.GFCC.API -import GF.GFCC.Macros -import GF.GFCC.DataGFCC +import PGF +import PGF.Data +import PGF.Macros import GF.System.Signal import GF.Data.ErrM ---- diff --git a/src-3.0/GF/Command/PPrTree.hs b/src-3.0/GF/Command/PPrTree.hs index f80484799..dcc057cb7 100644 --- a/src-3.0/GF/Command/PPrTree.hs +++ b/src-3.0/GF/Command/PPrTree.hs @@ -1,8 +1,8 @@ module GF.Command.PPrTree (pTree, prExp, tree2exp) where -import GF.GFCC.DataGFCC -import GF.GFCC.CId -import GF.GFCC.Macros +import PGF.CId +import PGF.Data +import PGF.Macros import qualified GF.Command.ParGFShell as P import GF.Command.PrintGFShell import GF.Command.AbsGFShell diff --git a/src-3.0/GF/Compile.hs b/src-3.0/GF/Compile.hs index b7898a3ef..fb1f8ba0b 100644 --- a/src-3.0/GF/Compile.hs +++ b/src-3.0/GF/Compile.hs @@ -8,6 +8,7 @@ import GF.Compile.Rename import GF.Compile.CheckGrammar import GF.Compile.Optimize import GF.Compile.OptimizeGF +import GF.Compile.OptimizeGFCC import GF.Compile.GrammarToGFCC import GF.Compile.ReadFiles import GF.Compile.Update @@ -34,9 +35,8 @@ import System.FilePath import System.Time import qualified Data.Map as Map -import GF.GFCC.OptimizeGFCC -import GF.GFCC.CheckGFCC -import GF.GFCC.DataGFCC +import PGF.Check +import PGF.Data -- | Compiles a number of source files and builds a 'GFCC' structure for them. diff --git a/src-3.0/GF/Compile/Export.hs b/src-3.0/GF/Compile/Export.hs new file mode 100644 index 000000000..2b36b10a9 --- /dev/null +++ b/src-3.0/GF/Compile/Export.hs @@ -0,0 +1,22 @@ +module GF.Compile.Export where + +import PGF.Data (GFCC) +import PGF.Raw.Print (printTree) +import PGF.Raw.Convert (fromGFCC) +import GF.Compile.GFCCtoHaskell +import GF.Compile.GFCCtoJS +import GF.Infra.Option +import GF.Text.UTF8 + +-- top-level access to code generation + +prGFCC :: OutputFormat -> GFCC -> String +prGFCC fmt gr = case fmt of + FmtGFCC -> printGFCC gr + FmtJavaScript -> gfcc2js gr + FmtHaskell -> grammar2haskell gr + FmtHaskellGADT -> grammar2haskellGADT gr + +printGFCC :: GFCC -> String +printGFCC = encodeUTF8 . printTree . fromGFCC + diff --git a/src-3.0/GF/Compile/GFCCtoHaskell.hs b/src-3.0/GF/Compile/GFCCtoHaskell.hs new file mode 100644 index 000000000..9a5fb7ca2 --- /dev/null +++ b/src-3.0/GF/Compile/GFCCtoHaskell.hs @@ -0,0 +1,212 @@ +---------------------------------------------------------------------- +-- | +-- Module : GFCCtoHaskell +-- Maintainer : Aarne Ranta +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/06/17 12:39:07 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.8 $ +-- +-- to write a GF abstract grammar into a Haskell module with translations from +-- data objects into GF trees. Example: GSyntax for Agda. +-- AR 11/11/1999 -- 7/12/2000 -- 18/5/2004 +----------------------------------------------------------------------------- + +module GF.Compile.GFCCtoHaskell (grammar2haskell, grammar2haskellGADT) where + +import PGF.CId +import PGF.Data +import PGF.Macros + +import GF.Data.Operations +import GF.Text.UTF8 + +import Data.List --(isPrefixOf, find, intersperse) +import qualified Data.Map as Map + +-- | the main function +grammar2haskell :: GFCC -> String +grammar2haskell gr = encodeUTF8 $ foldr (++++) [] $ + haskPreamble ++ [datatypes gr', gfinstances gr'] + where gr' = hSkeleton gr + +grammar2haskellGADT :: GFCC -> String +grammar2haskellGADT gr = encodeUTF8 $ foldr (++++) [] $ + ["{-# OPTIONS_GHC -fglasgow-exts #-}"] ++ + haskPreamble ++ [datatypesGADT gr', gfinstances gr'] + where gr' = hSkeleton gr + +-- | by this you can prefix all identifiers with stg; the default is 'G' +gId :: OIdent -> OIdent +gId i = 'G':i + +haskPreamble = + [ + "module GSyntax where", + "", + "import PGF.CId", + "import PGF.Data", + "----------------------------------------------------", + "-- automatic translation from GF to Haskell", + "----------------------------------------------------", + "", + "class Gf a where", + " gf :: a -> Exp", + " fg :: Exp -> a", + "", + predefInst "GString" "String" "DTr [] (AS s) []", + "", + predefInst "GInt" "Integer" "DTr [] (AI s) []", + "", + predefInst "GFloat" "Double" "DTr [] (AF s) []", + "", + "----------------------------------------------------", + "-- below this line machine-generated", + "----------------------------------------------------", + "" + ] + +predefInst gtyp typ patt = + "newtype" +++ gtyp +++ "=" +++ gtyp +++ typ +++ " deriving Show" +++++ + "instance Gf" +++ gtyp +++ "where" ++++ + " gf (" ++ gtyp +++ "s) =" +++ patt ++++ + " fg t =" ++++ + " case t of" ++++ + " " +++ patt +++ " ->" +++ gtyp +++ "s" ++++ + " _ -> error (\"no" +++ gtyp +++ "\" ++ show t)" + +type OIdent = String + +type HSkeleton = [(OIdent, [(OIdent, [OIdent])])] + +datatypes, gfinstances :: (String,HSkeleton) -> String +datatypes = (foldr (+++++) "") . (filter (/="")) . (map hDatatype) . snd +gfinstances (m,g) = (foldr (+++++) "") $ (filter (/="")) $ (map (gfInstance m)) g + +hDatatype :: (OIdent, [(OIdent, [OIdent])]) -> String +gfInstance :: String -> (OIdent, [(OIdent, [OIdent])]) -> String + +hDatatype ("Cn",_) = "" --- +hDatatype (cat,[]) = "" +hDatatype (cat,rules) | isListCat (cat,rules) = + "newtype" +++ gId cat +++ "=" +++ gId cat +++ "[" ++ gId (elemCat cat) ++ "]" + +++ "deriving Show" +hDatatype (cat,rules) = + "data" +++ gId cat +++ "=" ++ + (if length rules == 1 then "" else "\n ") +++ + foldr1 (\x y -> x ++ "\n |" +++ y) + [gId f +++ foldr (+++) "" (map gId xx) | (f,xx) <- rules] ++++ + " deriving Show" + +-- GADT version of data types +datatypesGADT :: (String,HSkeleton) -> String +datatypesGADT (_,skel) = + unlines (concatMap hCatTypeGADT skel) + +++++ + "data Tree :: * -> * where" ++++ unlines (concatMap (map (" "++) . hDatatypeGADT) skel) + +hCatTypeGADT :: (OIdent, [(OIdent, [OIdent])]) -> [String] +hCatTypeGADT (cat,rules) + = ["type"+++gId cat+++"="+++"Tree"+++gId cat++"_", + "data"+++gId cat++"_"] + +hDatatypeGADT :: (OIdent, [(OIdent, [OIdent])]) -> [String] +hDatatypeGADT (cat, rules) + | isListCat (cat,rules) = [gId cat+++"::"+++"["++gId (elemCat cat)++"]" +++ "->" +++ t] + | otherwise = + [ gId f +++ "::" +++ concatMap (\a -> gId a +++ "-> ") args ++ t | (f,args) <- rules ] + where t = "Tree" +++ gId cat ++ "_" + +gfInstance m crs = hInstance m crs ++++ fInstance m crs + +----hInstance m ("Cn",_) = "" --- seems to belong to an old applic. AR 18/5/2004 +hInstance m (cat,[]) = "" +hInstance m (cat,rules) + | isListCat (cat,rules) = + "instance Gf" +++ gId cat +++ "where" ++++ + " gf (" ++ gId cat +++ "[" ++ concat (intersperse "," baseVars) ++ "])" + +++ "=" +++ mkRHS ("Base"++ec) baseVars ++++ + " gf (" ++ gId cat +++ "(x:xs)) = " + ++ mkRHS ("Cons"++ec) ["x",prParenth (gId cat+++"xs")] +-- no show for GADTs +-- ++++ " gf (" ++ gId cat +++ "xs) = error (\"Bad " ++ cat ++ " value: \" ++ show xs)" + | otherwise = + "instance Gf" +++ gId cat +++ "where\n" ++ + unlines [mkInst f xx | (f,xx) <- rules] + where + ec = elemCat cat + baseVars = mkVars (baseSize (cat,rules)) + mkInst f xx = let xx' = mkVars (length xx) in " gf " ++ + (if length xx == 0 then gId f else prParenth (gId f +++ foldr1 (+++) xx')) +++ + "=" +++ mkRHS f xx' + mkVars n = ["x" ++ show i | i <- [1..n]] + mkRHS f vars = "DTr [] (AC (CId \"" ++ f ++ "\"))" +++ + "[" ++ prTList ", " ["gf" +++ x | x <- vars] ++ "]" + + +----fInstance m ("Cn",_) = "" --- +fInstance m (cat,[]) = "" +fInstance m (cat,rules) = + " fg t =" ++++ + " case t of" ++++ + unlines [mkInst f xx | (f,xx) <- rules] ++++ + " _ -> error (\"no" +++ cat ++ " \" ++ show t)" + where + mkInst f xx = + " DTr [] (AC (CId \"" ++ f ++ "\")) " ++ + "[" ++ prTList "," xx' ++ "]" +++ + "->" +++ mkRHS f xx' + where xx' = ["x" ++ show i | (_,i) <- zip xx [1..]] + mkRHS f vars + | isListCat (cat,rules) = + if "Base" `isPrefixOf` f then + gId cat +++ "[" ++ prTList ", " [ "fg" +++ x | x <- vars ] ++ "]" + else + let (i,t) = (init vars,last vars) + in "let" +++ gId cat +++ "xs = fg " ++ t +++ "in" +++ + gId cat +++ prParenth (prTList ":" (["fg"+++v | v <- i] ++ ["xs"])) + | otherwise = + gId f +++ + prTList " " [prParenth ("fg" +++ x) | x <- vars] + + +--type HSkeleton = [(OIdent, [(OIdent, [OIdent])])] +hSkeleton :: GFCC -> (String,HSkeleton) +hSkeleton gr = + (prCId (absname gr), + [(prCId c, [(prCId f, map prCId cs) | (f, (cs,_)) <- fs]) | + fs@((_, (_,c)):_) <- fns] + ) + where + fns = groupBy valtypg (sortBy valtyps (map jty (Map.assocs (funs (abstract gr))))) + valtyps (_, (_,x)) (_, (_,y)) = compare x y + valtypg (_, (_,x)) (_, (_,y)) = x == y + jty (f,(ty,_)) = (f,catSkeleton ty) + +updateSkeleton :: OIdent -> HSkeleton -> (OIdent, [OIdent]) -> HSkeleton +updateSkeleton cat skel rule = + case skel of + (cat0,rules):rr | cat0 == cat -> (cat0, rule:rules) : rr + (cat0,rules):rr -> (cat0, rules) : updateSkeleton cat rr rule + +isListCat :: (OIdent, [(OIdent, [OIdent])]) -> Bool +isListCat (cat,rules) = "List" `isPrefixOf` cat && length rules == 2 + && ("Base"++c) `elem` fs && ("Cons"++c) `elem` fs + where c = elemCat cat + fs = map fst rules + +-- | Gets the element category of a list category. +elemCat :: OIdent -> OIdent +elemCat = drop 4 + +isBaseFun :: OIdent -> Bool +isBaseFun f = "Base" `isPrefixOf` f + +isConsFun :: OIdent -> Bool +isConsFun f = "Cons" `isPrefixOf` f + +baseSize :: (OIdent, [(OIdent, [OIdent])]) -> Int +baseSize (_,rules) = length bs + where Just (_,bs) = find (("Base" `isPrefixOf`) . fst) rules diff --git a/src-3.0/GF/Compile/GFCCtoJS.hs b/src-3.0/GF/Compile/GFCCtoJS.hs new file mode 100644 index 000000000..1c24627a3 --- /dev/null +++ b/src-3.0/GF/Compile/GFCCtoJS.hs @@ -0,0 +1,117 @@ +module GF.Compile.GFCCtoJS (gfcc2js) where + +import PGF.CId +import PGF.Data +import qualified PGF.Macros as M +import qualified GF.JavaScript.AbsJS as JS +import qualified GF.JavaScript.PrintJS as JS + +import GF.Text.UTF8 +import GF.Data.ErrM +import GF.Infra.Option + +import Control.Monad (mplus) +import Data.Array (Array) +import qualified Data.Array as Array +import Data.Maybe (fromMaybe) +import qualified Data.Map as Map + +gfcc2js :: GFCC -> String +gfcc2js gfcc = + encodeUTF8 $ JS.printTree $ JS.Program [JS.ElStmt $ JS.SDeclOrExpr $ JS.Decl [JS.DInit (JS.Ident n) grammar]] + where + n = prCId $ absname gfcc + as = abstract gfcc + cs = Map.assocs (concretes gfcc) + start = M.lookStartCat gfcc + grammar = new "GFGrammar" [js_abstract, js_concrete] + js_abstract = abstract2js start as + js_concrete = JS.EObj $ map (concrete2js start n) cs + +abstract2js :: String -> Abstr -> JS.Expr +abstract2js start ds = new "GFAbstract" [JS.EStr start, JS.EObj $ map absdef2js (Map.assocs (funs ds))] + +absdef2js :: (CId,(Type,Exp)) -> JS.Property +absdef2js (f,(typ,_)) = + let (args,cat) = M.catSkeleton typ in + JS.Prop (JS.IdentPropName (JS.Ident (prCId f))) (new "Type" [JS.EArray [JS.EStr (prCId x) | x <- args], JS.EStr (prCId cat)]) + +concrete2js :: String -> String -> (CId,Concr) -> JS.Property +concrete2js start n (c, cnc) = + JS.Prop l (new "GFConcrete" ([(JS.EObj $ ((map (cncdef2js n (prCId c)) ds) ++ litslins))] ++ + maybe [] (parser2js start) (parser cnc))) + where + l = JS.IdentPropName (JS.Ident (prCId c)) + ds = concatMap Map.assocs [lins cnc, opers cnc, lindefs cnc] + litslins = [JS.Prop (JS.StringPropName "Int") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]]), + JS.Prop (JS.StringPropName "Float") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]]), + JS.Prop (JS.StringPropName "String") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]])] + + +cncdef2js :: String -> String -> (CId,Term) -> JS.Property +cncdef2js n l (f, t) = JS.Prop (JS.IdentPropName (JS.Ident (prCId f))) (JS.EFun [children] [JS.SReturn (term2js n l t)]) + +term2js :: String -> String -> Term -> JS.Expr +term2js n l t = f t + where + f t = + case t of + R xs -> new "Arr" (map f xs) + P x y -> JS.ECall (JS.EMember (f x) (JS.Ident "sel")) [f y] + S xs -> mkSeq (map f xs) + K t -> tokn2js t + V i -> JS.EIndex (JS.EVar children) (JS.EInt i) + C i -> new "Int" [JS.EInt i] + F f -> JS.ECall (JS.EMember (JS.EIndex (JS.EMember (JS.EVar $ JS.Ident n) (JS.Ident "concretes")) (JS.EStr l)) (JS.Ident "rule")) [JS.EStr (prCId f), JS.EVar children] + FV xs -> new "Variants" (map f xs) + W str x -> new "Suffix" [JS.EStr str, f x] + TM _ -> new "Meta" [] + +tokn2js :: Tokn -> JS.Expr +tokn2js (KS s) = mkStr s +tokn2js (KP ss vs) = mkSeq (map mkStr ss) -- FIXME + +mkStr :: String -> JS.Expr +mkStr s = new "Str" [JS.EStr s] + +mkSeq :: [JS.Expr] -> JS.Expr +mkSeq [x] = x +mkSeq xs = new "Seq" xs + +argIdent :: Integer -> JS.Ident +argIdent n = JS.Ident ("x" ++ show n) + +children :: JS.Ident +children = JS.Ident "cs" + +-- Parser +parser2js :: String -> ParserInfo -> [JS.Expr] +parser2js start p = [new "Parser" [JS.EStr start, + JS.EArray $ map frule2js (Array.elems (allRules p)), + JS.EObj $ map cats (Map.assocs (startupCats p))]] + where + cats (c,is) = JS.Prop (JS.IdentPropName (JS.Ident (prCId c))) (JS.EArray (map JS.EInt is)) + +frule2js :: FRule -> JS.Expr +frule2js (FRule f ps args res lins) = new "Rule" [JS.EInt res, name2js (f,ps), JS.EArray (map JS.EInt args), lins2js lins] + +name2js :: (CId,[Profile]) -> JS.Expr +name2js (f,ps) | f == wildCId = fromProfile (head ps) + | otherwise = new "FunApp" $ [JS.EStr $ prCId f, JS.EArray (map fromProfile ps)] + where + fromProfile :: Profile -> JS.Expr + fromProfile [] = new "MetaVar" [] + fromProfile [x] = daughter x + fromProfile args = new "Unify" [JS.EArray (map daughter args)] + + daughter i = new "Arg" [JS.EInt i] + +lins2js :: Array FIndex (Array FPointPos FSymbol) -> JS.Expr +lins2js ls = JS.EArray [ JS.EArray [ sym2js s | s <- Array.elems l] | l <- Array.elems ls] + +sym2js :: FSymbol -> JS.Expr +sym2js (FSymCat l n) = new "ArgProj" [JS.EInt n, JS.EInt l] +sym2js (FSymTok t) = new "Terminal" [JS.EStr t] + +new :: String -> [JS.Expr] -> JS.Expr +new f xs = JS.ENew (JS.Ident f) xs diff --git a/src-3.0/GF/Compile/GenerateFCFG.hs b/src-3.0/GF/Compile/GenerateFCFG.hs index 3bea5ab36..64f824acf 100644 --- a/src-3.0/GF/Compile/GenerateFCFG.hs +++ b/src-3.0/GF/Compile/GenerateFCFG.hs @@ -15,13 +15,10 @@ module GF.Compile.GenerateFCFG (convertConcrete) where -import Control.Monad - -import GF.GFCC.Parsing.FCFG.Utilities - -import GF.GFCC.Macros --hiding (prt) -import GF.GFCC.DataGFCC -import GF.GFCC.CId +import PGF.CId +import PGF.Data +import PGF.Macros --hiding (prt) +import PGF.Parsing.FCFG.Utilities import GF.Data.BacktrackM import GF.Data.SortedList @@ -33,6 +30,7 @@ import qualified Data.List as List import qualified Data.ByteString.Char8 as BS import Data.Array import Data.Maybe +import Control.Monad ---------------------------------------------------------------------- -- main conversion function diff --git a/src-3.0/GF/Compile/GrammarToGFCC.hs b/src-3.0/GF/Compile/GrammarToGFCC.hs index d29c20e17..49ab4db70 100644 --- a/src-3.0/GF/Compile/GrammarToGFCC.hs +++ b/src-3.0/GF/Compile/GrammarToGFCC.hs @@ -1,14 +1,15 @@ {-# LANGUAGE PatternGuards #-} module GF.Compile.GrammarToGFCC (prGrammar2gfcc,mkCanon2gfcc,addParsers) where +import GF.Compile.Export import GF.Compile.OptimizeGF (unshareModule) +import GF.Compile.GenerateFCFG (convertConcrete) -import qualified GF.GFCC.Macros as CM -import qualified GF.GFCC.DataGFCC as C -import qualified GF.GFCC.DataGFCC as D -import GF.GFCC.CId -import GF.GFCC.PrintGFCC -import GF.GFCC.BuildParser (buildParserInfo) +import PGF.CId +import PGF.BuildParser (buildParserInfo) +import qualified PGF.Macros as CM +import qualified PGF.Data as C +import qualified PGF.Data as D import GF.Grammar.Predef import GF.Grammar.PrGrammar import GF.Grammar.Grammar @@ -19,7 +20,6 @@ import qualified GF.Compile.Compute as Compute ---- import qualified GF.Infra.Modules as M import qualified GF.Infra.Option as O -import GF.Compile.GenerateFCFG (convertConcrete) import GF.Infra.Ident import GF.Infra.Option import GF.Data.Operations diff --git a/src-3.0/GF/Compile/OptimizeGFCC.hs b/src-3.0/GF/Compile/OptimizeGFCC.hs new file mode 100644 index 000000000..16cdf9ac3 --- /dev/null +++ b/src-3.0/GF/Compile/OptimizeGFCC.hs @@ -0,0 +1,124 @@ +module GF.Compile.OptimizeGFCC where + +import PGF.CId +import PGF.Data + +import GF.Data.Operations + +import Data.List +import qualified Data.Map as Map + + +-- back-end optimization: +-- suffix analysis followed by common subexpression elimination + +optGFCC :: GFCC -> GFCC +optGFCC = cseOptimize . suffixOptimize + +suffixOptimize :: GFCC -> GFCC +suffixOptimize gfcc = gfcc { + concretes = Map.map opt (concretes gfcc) + } + where + opt cnc = cnc { + lins = Map.map optTerm (lins cnc), + lindefs = Map.map optTerm (lindefs cnc), + printnames = Map.map optTerm (printnames cnc) + } + +cseOptimize :: GFCC -> GFCC +cseOptimize gfcc = gfcc { + concretes = Map.map subex (concretes gfcc) + } + +-- analyse word form lists into prefix + suffixes +-- suffix sets can later be shared by subex elim + +optTerm :: Term -> Term +optTerm tr = case tr of + R ts@(_:_:_) | all isK ts -> mkSuff $ optToks [s | K (KS s) <- ts] + R ts -> R $ map optTerm ts + P t v -> P (optTerm t) v + _ -> tr + where + optToks ss = prf : suffs where + prf = pref (head ss) (tail ss) + suffs = map (drop (length prf)) ss + pref cand ss = case ss of + s1:ss2 -> if isPrefixOf cand s1 then pref cand ss2 else pref (init cand) ss + _ -> cand + isK t = case t of + K (KS _) -> True + _ -> False + mkSuff ("":ws) = R (map (K . KS) ws) + mkSuff (p:ws) = W p (R (map (K . KS) ws)) + + +-- common subexpression elimination + +---subex :: [(CId,Term)] -> [(CId,Term)] +subex :: Concr -> Concr +subex cnc = err error id $ do + (tree,_) <- appSTM (getSubtermsMod cnc) (Map.empty,0) + return $ addSubexpConsts tree cnc + +type TermList = Map.Map Term (Int,Int) -- number of occs, id +type TermM a = STM (TermList,Int) a + +addSubexpConsts :: TermList -> Concr -> Concr +addSubexpConsts tree cnc = cnc { + opers = Map.fromList [(f,recomp f trm) | (f,trm) <- ops], + lins = rec lins, + lindefs = rec lindefs, + printnames = rec printnames + } + where + ops = [(fid id, trm) | (trm,(_,id)) <- Map.assocs tree] + mkOne (f,trm) = (f, recomp f trm) + recomp f t = case Map.lookup t tree of + Just (_,id) | fid id /= f -> F $ fid id -- not to replace oper itself + _ -> case t of + R ts -> R $ map (recomp f) ts + S ts -> S $ map (recomp f) ts + W s t -> W s (recomp f t) + P t p -> P (recomp f t) (recomp f p) + _ -> t + fid n = mkCId $ "_" ++ show n + rec field = Map.fromAscList [(f,recomp f trm) | (f,trm) <- Map.assocs (field cnc)] + + +getSubtermsMod :: Concr -> TermM TermList +getSubtermsMod cnc = do + mapM getSubterms (Map.assocs (lins cnc)) + mapM getSubterms (Map.assocs (lindefs cnc)) + mapM getSubterms (Map.assocs (printnames cnc)) + (tree0,_) <- readSTM + return $ Map.filter (\ (nu,_) -> nu > 1) tree0 + where + getSubterms (f,trm) = collectSubterms trm >> return () + +collectSubterms :: Term -> TermM () +collectSubterms t = case t of + R ts -> do + mapM collectSubterms ts + add t + S ts -> do + mapM collectSubterms ts + add t + W s u -> do + collectSubterms u + add t + P p u -> do + collectSubterms p + collectSubterms u + add t + _ -> return () + where + add t = do + (ts,i) <- readSTM + let + ((count,id),next) = case Map.lookup t ts of + Just (nu,id) -> ((nu+1,id), i) + _ -> ((1, i ), i+1) + writeSTM (Map.insert t (count,id) ts, next) + diff --git a/src-3.0/GF/GFCC/API.hs b/src-3.0/GF/GFCC/API.hs deleted file mode 100644 index 0eb9d15da..000000000 --- a/src-3.0/GF/GFCC/API.hs +++ /dev/null @@ -1,184 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : GFCCAPI --- Maintainer : Aarne Ranta --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: --- > CVS $Author: --- > CVS $Revision: --- --- Reduced Application Programmer's Interface to GF, meant for --- embedded GF systems. AR 19/9/2007 ------------------------------------------------------------------------------ - -module GF.GFCC.API where - -import GF.GFCC.Linearize -import GF.GFCC.Generate -import GF.GFCC.Macros -import GF.GFCC.DataGFCC -import GF.GFCC.CId -import GF.GFCC.Raw.ConvertGFCC -import GF.GFCC.Raw.ParGFCCRaw - -import GF.Data.ErrM - -import GF.GFCC.Parsing.FCFG - -import Data.Char -import qualified Data.Map as Map -import Control.Monad -import System.Random (newStdGen) -import System.Directory (doesFileExist) -import qualified Text.PrettyPrint as PP -import qualified Text.ParserCombinators.ReadP as RP - - --- This API is meant to be used when embedding GF grammars in Haskell --- programs. The embedded system is supposed to use the --- .gfcc grammar format, which is first produced by the gf program. - ---------------------------------------------------- --- Interface ---------------------------------------------------- - -data MultiGrammar = MultiGrammar {gfcc :: GFCC} -type Language = String -type Category = String -type Tree = Exp - -file2grammar :: FilePath -> IO MultiGrammar - -linearize :: MultiGrammar -> Language -> Tree -> String -parse :: MultiGrammar -> Language -> Category -> String -> [Tree] - -linearizeAll :: MultiGrammar -> Tree -> [String] -linearizeAllLang :: MultiGrammar -> Tree -> [(Language,String)] - -parseAll :: MultiGrammar -> Category -> String -> [[Tree]] -parseAllLang :: MultiGrammar -> Category -> String -> [(Language,[Tree])] - -generateAll :: MultiGrammar -> Category -> [Tree] -generateRandom :: MultiGrammar -> Category -> IO [Tree] -generateAllDepth :: MultiGrammar -> Category -> Maybe Int -> [Tree] - -readTree :: String -> Tree -showTree :: Tree -> String - -languages :: MultiGrammar -> [Language] -categories :: MultiGrammar -> [Category] - -startCat :: MultiGrammar -> Category - ---------------------------------------------------- --- Implementation ---------------------------------------------------- - -file2grammar f = do - gfcc <- file2gfcc f - return (MultiGrammar gfcc) - -file2gfcc f = do - s <- readFileIf f - g <- parseGrammar s - return $ toGFCC g - -linearize mgr lang = GF.GFCC.Linearize.linearize (gfcc mgr) (mkCId lang) - -parse mgr lang cat s = - case lookParser (gfcc mgr) (mkCId lang) of - Nothing -> error "no parser" - Just pinfo -> case parseFCF "bottomup" pinfo (mkCId cat) (words s) of - Ok x -> x - Bad s -> error s - -linearizeAll mgr = map snd . linearizeAllLang mgr -linearizeAllLang mgr t = - [(lang,linearThis mgr lang t) | lang <- languages mgr] - -parseAll mgr cat = map snd . parseAllLang mgr cat - -parseAllLang mgr cat s = - [(lang,ts) | lang <- languages mgr, let ts = parse mgr lang cat s, not (null ts)] - -generateRandom mgr cat = do - gen <- newStdGen - return $ genRandom gen (gfcc mgr) (mkCId cat) - -generateAll mgr cat = generate (gfcc mgr) (mkCId cat) Nothing -generateAllDepth mgr cat = generate (gfcc mgr) (mkCId cat) - -readTree s = case RP.readP_to_S (pExp 0) s of - [(x,"")] -> x - _ -> error "no parse" - -pExps :: RP.ReadP [Exp] -pExps = liftM2 (:) (pExp 1) pExps RP.<++ (RP.skipSpaces >> return []) - -pExp :: Int -> RP.ReadP Exp -pExp n = RP.skipSpaces >> (pParen RP.<++ pApp RP.<++ pNum RP.<++ pStr RP.<++ pMeta) - where - pParen = RP.between (RP.char '(') (RP.char ')') (pExp 0) - pApp = do xs <- RP.option [] (RP.between (RP.char '\\') (RP.string "->") (RP.sepBy1 pIdent (RP.char ','))) - f <- pIdent - ts <- (if n == 0 then pExps else return []) - return (DTr xs (AC f) ts) - pStr = RP.char '"' >> liftM (\s -> DTr [] (AS s) []) (RP.manyTill (pEsc RP.<++ RP.get) (RP.char '"')) - pEsc = RP.char '\\' >> RP.get - pNum = do x <- RP.munch1 isDigit - ((RP.char '.' >> RP.munch1 isDigit >>= \y -> return (DTr [] (AF (read (x++"."++y))) [])) - RP.<++ - (return (DTr [] (AI (read x)) []))) - pMeta = do RP.char '?' - x <- RP.munch1 isDigit - return (DTr [] (AM (read x)) []) - - pIdent = fmap mkCId (liftM2 (:) (RP.satisfy isIdentFirst) (RP.munch isIdentRest)) - isIdentFirst c = c == '_' || isLetter c - isIdentRest c = c == '_' || c == '\'' || isAlphaNum c - - -showTree = PP.render . ppExp False - -ppExp isNested (DTr [] at []) = ppAtom at -ppExp isNested (DTr xs at ts) = ppParens isNested (ppLambdas xs PP.<+> ppAtom at PP.<+> PP.hsep (map (ppExp True) ts)) - where - ppLambdas [] = PP.empty - ppLambdas xs = PP.char '\\' PP.<> - PP.hsep (PP.punctuate PP.comma (map (PP.text . prCId) xs)) PP.<+> - PP.text "->" - - ppParens True = PP.parens - ppParens False = id - -ppAtom (AC id) = PP.text (prCId id) -ppAtom (AS s) = PP.text (show s) -ppAtom (AI n) = PP.integer n -ppAtom (AF d) = PP.double d -ppAtom (AM n) = PP.char '?' PP.<> PP.integer n -ppAtom (AV id) = PP.text (prCId id) - -abstractName mgr = prCId (absname (gfcc mgr)) - -languages mgr = [prCId l | l <- cncnames (gfcc mgr)] - -categories mgr = [prCId c | c <- Map.keys (cats (abstract (gfcc mgr)))] - -startCat mgr = lookStartCat (gfcc mgr) - -emptyMultiGrammar = MultiGrammar emptyGFCC - ------------- for internal use only - -linearThis = GF.GFCC.API.linearize - -err f g ex = case ex of - Ok x -> g x - Bad s -> f s - -readFileIf f = do - b <- doesFileExist f - if b then readFile f - else putStrLn ("file " ++ f ++ " not found") >> return "" diff --git a/src-3.0/GF/GFCC/BuildParser.hs b/src-3.0/GF/GFCC/BuildParser.hs deleted file mode 100644 index 1f9e6ab5f..000000000 --- a/src-3.0/GF/GFCC/BuildParser.hs +++ /dev/null @@ -1,64 +0,0 @@ ---------------------------------------------------------------------- --- | --- Maintainer : Krasimir Angelov --- Stability : (stable) --- Portability : (portable) --- --- FCFG parsing, parser information ------------------------------------------------------------------------------ - -module GF.GFCC.BuildParser where - -import GF.GFCC.Parsing.FCFG.Utilities -import GF.Data.SortedList -import GF.Data.Assoc -import GF.GFCC.CId -import GF.GFCC.DataGFCC - -import Data.Array -import Data.Maybe -import qualified Data.Map as Map -import qualified Data.Set as Set -import Debug.Trace - - ------------------------------------------------------------- --- parser information - -getLeftCornerTok (FRule _ _ _ _ lins) - | inRange (bounds syms) 0 = case syms ! 0 of - FSymTok tok -> [tok] - _ -> [] - | otherwise = [] - where - syms = lins ! 0 - -getLeftCornerCat (FRule _ _ args _ lins) - | inRange (bounds syms) 0 = case syms ! 0 of - FSymCat _ d -> [args !! d] - _ -> [] - | otherwise = [] - where - syms = lins ! 0 - -buildParserInfo :: FGrammar -> ParserInfo -buildParserInfo (grammar,startup) = -- trace (unlines [prt (x,Set.toList set) | (x,set) <- Map.toList leftcornFilter]) $ - ParserInfo { allRules = allrules - , topdownRules = topdownrules - -- , emptyRules = emptyrules - , epsilonRules = epsilonrules - , leftcornerCats = leftcorncats - , leftcornerTokens = leftcorntoks - , grammarCats = grammarcats - , grammarToks = grammartoks - , startupCats = startup - } - - where allrules = listArray (0,length grammar-1) grammar - topdownrules = accumAssoc id [(cat, ruleid) | (ruleid, FRule _ _ _ cat _) <- assocs allrules] - epsilonrules = [ ruleid | (ruleid, FRule _ _ _ _ lins) <- assocs allrules, - not (inRange (bounds (lins ! 0)) 0) ] - leftcorncats = accumAssoc id [ (cat, ruleid) | (ruleid, rule) <- assocs allrules, cat <- getLeftCornerCat rule ] - leftcorntoks = accumAssoc id [ (tok, ruleid) | (ruleid, rule) <- assocs allrules, tok <- getLeftCornerTok rule ] - grammarcats = aElems topdownrules - grammartoks = nubsort [t | (FRule _ _ _ _ lins) <- grammar, lin <- elems lins, FSymTok t <- elems lin] diff --git a/src-3.0/GF/GFCC/CId.hs b/src-3.0/GF/GFCC/CId.hs deleted file mode 100644 index 656778de4..000000000 --- a/src-3.0/GF/GFCC/CId.hs +++ /dev/null @@ -1,14 +0,0 @@ -module GF.GFCC.CId (CId(..), wildCId, mkCId, prCId) where - -import Data.ByteString.Char8 as BS - -newtype CId = CId BS.ByteString deriving (Eq,Ord,Show) - -wildCId :: CId -wildCId = CId (BS.singleton '_') - -mkCId :: String -> CId -mkCId s = CId (BS.pack s) - -prCId :: CId -> String -prCId (CId x) = BS.unpack x diff --git a/src-3.0/GF/GFCC/CheckGFCC.hs b/src-3.0/GF/GFCC/CheckGFCC.hs deleted file mode 100644 index 33143c9ad..000000000 --- a/src-3.0/GF/GFCC/CheckGFCC.hs +++ /dev/null @@ -1,186 +0,0 @@ -module GF.GFCC.CheckGFCC (checkGFCC, checkGFCCio, checkGFCCmaybe) where - -import GF.GFCC.CId -import GF.GFCC.Macros -import GF.GFCC.DataGFCC -import GF.Data.ErrM - -import qualified Data.Map as Map -import Control.Monad -import Debug.Trace - -checkGFCCio :: GFCC -> IO GFCC -checkGFCCio gfcc = case checkGFCC gfcc of - Ok (gc,b) -> do - putStrLn $ if b then "OK" else "Corrupted GFCC" - return gc - Bad s -> do - putStrLn s - error "building GFCC failed" - ----- needed in old Custom -checkGFCCmaybe :: GFCC -> Maybe GFCC -checkGFCCmaybe gfcc = case checkGFCC gfcc of - Ok (gc,b) -> return gc - Bad s -> Nothing - -checkGFCC :: GFCC -> Err (GFCC,Bool) -checkGFCC gfcc = do - (cs,bs) <- mapM (checkConcrete gfcc) - (Map.assocs (concretes gfcc)) >>= return . unzip - return (gfcc {concretes = Map.fromAscList cs}, and bs) - - --- errors are non-fatal; replace with 'fail' to change this -msg s = trace s (return ()) - -andMapM :: Monad m => (a -> m Bool) -> [a] -> m Bool -andMapM f xs = mapM f xs >>= return . and - -labelBoolErr :: String -> Err (x,Bool) -> Err (x,Bool) -labelBoolErr ms iob = do - (x,b) <- iob - if b then return (x,b) else (msg ms >> return (x,b)) - - -checkConcrete :: GFCC -> (CId,Concr) -> Err ((CId,Concr),Bool) -checkConcrete gfcc (lang,cnc) = - labelBoolErr ("happened in language " ++ prCId lang) $ do - (rs,bs) <- mapM checkl (Map.assocs (lins cnc)) >>= return . unzip - return ((lang,cnc{lins = Map.fromAscList rs}),and bs) - where - checkl = checkLin gfcc lang - -checkLin :: GFCC -> CId -> (CId,Term) -> Err ((CId,Term),Bool) -checkLin gfcc lang (f,t) = - labelBoolErr ("happened in function " ++ prCId f) $ do - (t',b) <- checkTerm (lintype gfcc lang f) t --- $ inline gfcc lang t - return ((f,t'),b) - -inferTerm :: [CType] -> Term -> Err (Term,CType) -inferTerm args trm = case trm of - K _ -> returnt str - C i -> returnt $ ints i - V i -> do - testErr (i < length args) ("too large index " ++ show i) - returnt $ args !! i - S ts -> do - (ts',tys) <- mapM infer ts >>= return . unzip - let tys' = filter (/=str) tys - testErr (null tys') - ("expected Str in " ++ show trm ++ " not " ++ unwords (map show tys')) - return (S ts',str) - R ts -> do - (ts',tys) <- mapM infer ts >>= return . unzip - return $ (R ts',tuple tys) - P t u -> do - (t',tt) <- infer t - (u',tu) <- infer u - case tt of - R tys -> case tu of - R vs -> infer $ foldl P t' [P u' (C i) | i <- [0 .. length vs - 1]] - --- R [v] -> infer $ P t v - --- R (v:vs) -> infer $ P (head tys) (R vs) - - C i -> do - testErr (i < length tys) - ("required more than " ++ show i ++ " fields in " ++ show (R tys)) - return (P t' u', tys !! i) -- record: index must be known - _ -> do - let typ = head tys - testErr (all (==typ) tys) ("different types in table " ++ show trm) - return (P t' u', typ) -- table: types must be same - _ -> Bad $ "projection from " ++ show t ++ " : " ++ show tt - FV [] -> returnt tm0 ---- - FV (t:ts) -> do - (t',ty) <- infer t - (ts',tys) <- mapM infer ts >>= return . unzip - testErr (all (eqType ty) tys) ("different types in variants " ++ show trm) - return (FV (t':ts'),ty) - W s r -> infer r - _ -> Bad ("no type inference for " ++ show trm) - where - returnt ty = return (trm,ty) - infer = inferTerm args - -checkTerm :: LinType -> Term -> Err (Term,Bool) -checkTerm (args,val) trm = case inferTerm args trm of - Ok (t,ty) -> if eqType ty val - then return (t,True) - else do - msg ("term: " ++ show trm ++ - "\nexpected type: " ++ show val ++ - "\ninferred type: " ++ show ty) - return (t,False) - Bad s -> do - msg s - return (trm,False) - -eqType :: CType -> CType -> Bool -eqType inf exp = case (inf,exp) of - (C k, C n) -> k <= n -- only run-time corr. - (R rs,R ts) -> length rs == length ts && and [eqType r t | (r,t) <- zip rs ts] - (TM _, _) -> True ---- for variants [] ; not safe - _ -> inf == exp - --- should be in a generic module, but not in the run-time DataGFCC - -type CType = Term -type LinType = ([CType],CType) - -tuple :: [CType] -> CType -tuple = R - -ints :: Int -> CType -ints = C - -str :: CType -str = S [] - -lintype :: GFCC -> CId -> CId -> LinType -lintype gfcc lang fun = case typeSkeleton (lookType gfcc fun) of - (cs,c) -> (map vlinc cs, linc c) ---- HOAS - where - linc = lookLincat gfcc lang - vlinc (0,c) = linc c - vlinc (i,c) = case linc c of - R ts -> R (ts ++ replicate i str) - -inline :: GFCC -> CId -> Term -> Term -inline gfcc lang t = case t of - F c -> inl $ look c - _ -> composSafeOp inl t - where - inl = inline gfcc lang - look = lookLin gfcc lang - -composOp :: Monad m => (Term -> m Term) -> Term -> m Term -composOp f trm = case trm of - R ts -> liftM R $ mapM f ts - S ts -> liftM S $ mapM f ts - FV ts -> liftM FV $ mapM f ts - P t u -> liftM2 P (f t) (f u) - W s t -> liftM (W s) $ f t - _ -> return trm - -composSafeOp :: (Term -> Term) -> Term -> Term -composSafeOp f = maybe undefined id . composOp (return . f) - --- from GF.Data.Oper - -maybeErr :: String -> Maybe a -> Err a -maybeErr s = maybe (Bad s) Ok - -testErr :: Bool -> String -> Err () -testErr cond msg = if cond then return () else Bad msg - -errVal :: a -> Err a -> a -errVal a = err (const a) id - -errIn :: String -> Err a -> Err a -errIn msg = err (\s -> Bad (s ++ "\nOCCURRED IN\n" ++ msg)) return - -err :: (String -> b) -> (a -> b) -> Err a -> b -err d f e = case e of - Ok a -> f a - Bad s -> d s diff --git a/src-3.0/GF/GFCC/DataGFCC.hs b/src-3.0/GF/GFCC/DataGFCC.hs deleted file mode 100644 index bba0801eb..000000000 --- a/src-3.0/GF/GFCC/DataGFCC.hs +++ /dev/null @@ -1,178 +0,0 @@ -module GF.GFCC.DataGFCC where - -import GF.GFCC.CId -import GF.Text.UTF8 -import GF.Data.Assoc - -import qualified Data.Map as Map -import Data.List -import Data.Array - --- internal datatypes for GFCC - -data GFCC = GFCC { - absname :: CId , - cncnames :: [CId] , - gflags :: Map.Map CId String, -- value of a global flag - abstract :: Abstr , - concretes :: Map.Map CId Concr - } - -data Abstr = Abstr { - aflags :: Map.Map CId String, -- value of a flag - funs :: Map.Map CId (Type,Exp), -- type and def of a fun - cats :: Map.Map CId [Hypo], -- context of a cat - catfuns :: Map.Map CId [CId] -- funs to a cat (redundant, for fast lookup) - } - -data Concr = Concr { - cflags :: Map.Map CId String, -- value of a flag - lins :: Map.Map CId Term, -- lin of a fun - opers :: Map.Map CId Term, -- oper generated by subex elim - lincats :: Map.Map CId Term, -- lin type of a cat - lindefs :: Map.Map CId Term, -- lin default of a cat - printnames :: Map.Map CId Term, -- printname of a cat or a fun - paramlincats :: Map.Map CId Term, -- lin type of cat, with printable param names - parser :: Maybe ParserInfo -- parser - } - -data Type = - DTyp [Hypo] CId [Exp] - deriving (Eq,Ord,Show) - -data Exp = - DTr [CId] Atom [Exp] - | EEq [Equation] - deriving (Eq,Ord,Show) - -data Atom = - AC CId - | AS String - | AI Integer - | AF Double - | AM Integer - | AV CId - deriving (Eq,Ord,Show) - -data Term = - R [Term] - | P Term Term - | S [Term] - | K Tokn - | V Int - | C Int - | F CId - | FV [Term] - | W String Term - | TM String - deriving (Eq,Ord,Show) - -data Tokn = - KS String - | KP [String] [Variant] - deriving (Eq,Ord,Show) - -data Variant = - Var [String] [String] - deriving (Eq,Ord,Show) - -data Hypo = - Hyp CId Type - deriving (Eq,Ord,Show) - -data Equation = - Equ [Exp] Exp - deriving (Eq,Ord,Show) - - -type FToken = String -type FCat = Int -type FIndex = Int -data FSymbol - = FSymCat {-# UNPACK #-} !FIndex {-# UNPACK #-} !Int - | FSymTok FToken -type Profile = [Int] -type FPointPos = Int -type FGrammar = ([FRule], Map.Map CId [FCat]) -data FRule = FRule CId [Profile] [FCat] FCat (Array FIndex (Array FPointPos FSymbol)) - -type RuleId = Int - -data ParserInfo - = ParserInfo { allRules :: Array RuleId FRule - , topdownRules :: Assoc FCat [RuleId] - -- ^ used in 'GF.Parsing.MCFG.Active' (Earley): - -- , emptyRules :: [RuleId] - , epsilonRules :: [RuleId] - -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): - , leftcornerCats :: Assoc FCat [RuleId] - , leftcornerTokens :: Assoc FToken [RuleId] - -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): - , grammarCats :: [FCat] - , grammarToks :: [FToken] - , startupCats :: Map.Map CId [FCat] - } - - -fcatString, fcatInt, fcatFloat, fcatVar :: Int -fcatString = (-1) -fcatInt = (-2) -fcatFloat = (-3) -fcatVar = (-4) - - --- print statistics - -statGFCC :: GFCC -> String -statGFCC gfcc = unlines [ - "Abstract\t" ++ prCId (absname gfcc), - "Concretes\t" ++ unwords (map prCId (cncnames gfcc)), - "Categories\t" ++ unwords (map prCId (Map.keys (cats (abstract gfcc)))) - ] - --- merge two GFCCs; fails is differens absnames; priority to second arg - -unionGFCC :: GFCC -> GFCC -> GFCC -unionGFCC one two = case absname one of - n | n == wildCId -> two -- extending empty grammar - | n == absname two -> one { -- extending grammar with same abstract - concretes = Map.union (concretes two) (concretes one), - cncnames = union (cncnames two) (cncnames one) - } - _ -> one -- abstracts don't match ---- print error msg - -emptyGFCC :: GFCC -emptyGFCC = GFCC { - absname = wildCId, - cncnames = [] , - gflags = Map.empty, - abstract = error "empty grammar, no abstract", - concretes = Map.empty - } - --- encode idenfifiers and strings in UTF8 - -utf8GFCC :: GFCC -> GFCC -utf8GFCC gfcc = gfcc { - concretes = Map.map u8concr (concretes gfcc) - } - where - u8concr cnc = cnc { - lins = Map.map u8term (lins cnc), - opers = Map.map u8term (opers cnc) - } - u8term = convertStringsInTerm encodeUTF8 - ----- TODO: convert identifiers and flags - -convertStringsInTerm conv t = case t of - K (KS s) -> K (KS (conv s)) - W s r -> W (conv s) (convs r) - R ts -> R $ map convs ts - S ts -> S $ map convs ts - FV ts -> FV $ map convs ts - P u v -> P (convs u) (convs v) - _ -> t - where - convs = convertStringsInTerm conv - diff --git a/src-3.0/GF/GFCC/GFCC.cf b/src-3.0/GF/GFCC/GFCC.cf deleted file mode 100644 index 96d68649b..000000000 --- a/src-3.0/GF/GFCC/GFCC.cf +++ /dev/null @@ -1,81 +0,0 @@ -Grm. Grammar ::= - "grammar" CId "(" [CId] ")" "(" [Flag] ")" ";" - Abstract ";" - [Concrete] ; - -Abs. Abstract ::= - "abstract" "{" - "flags" [Flag] - "fun" [FunDef] - "cat" [CatDef] - "}" ; - -Cnc. Concrete ::= - "concrete" CId "{" - "flags" [Flag] - "lin" [LinDef] - "oper" [LinDef] - "lincat" [LinDef] - "lindef" [LinDef] - "printname" [LinDef] - "param" [LinDef] -- lincats with param value names - "}" ; - -Flg. Flag ::= CId "=" String ; -Cat. CatDef ::= CId "[" [Hypo] "]" ; - -Fun. FunDef ::= CId ":" Type "=" Exp ; -Lin. LinDef ::= CId "=" Term ; - -DTyp. Type ::= "[" [Hypo] "]" CId [Exp] ; -- dependent type -DTr. Exp ::= "[" "(" [CId] ")" Atom [Exp] "]" ; -- term with bindings - -AC. Atom ::= CId ; -AS. Atom ::= String ; -AI. Atom ::= Integer ; -AF. Atom ::= Double ; -AM. Atom ::= "?" Integer ; - -R. Term ::= "[" [Term] "]" ; -- record/table -P. Term ::= "(" Term "!" Term ")" ; -- projection/selection -S. Term ::= "(" [Term] ")" ; -- concatenated sequence -K. Term ::= Tokn ; -- token -V. Term ::= "$" Integer ; -- argument -C. Term ::= Integer ; -- parameter value/label -F. Term ::= CId ; -- global constant -FV. Term ::= "[|" [Term] "|]" ; -- free variation -W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table -TM. Term ::= "?" ; -- lin of metavariable - -KS. Tokn ::= String ; -KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ; -Var. Variant ::= [String] "/" [String] ; - - -RP. Term ::= "(" Term "@" Term ")"; -- DEPRECATED: record parameter alias - -terminator Concrete ";" ; -terminator Flag ";" ; -terminator CatDef ";" ; -terminator FunDef ";" ; -terminator LinDef ";" ; -separator CId "," ; -separator Term "," ; -terminator Exp "" ; -terminator String "" ; -separator Variant "," ; - -token CId (('_' | letter) (letter | digit | '\'' | '_')*) ; - - --- the following are needed if dependent types or HOAS or defs are present - -Hyp. Hypo ::= CId ":" Type ; -AV. Atom ::= "$" CId ; - -EEq. Exp ::= "{" [Equation] "}" ; -- list of pattern eqs; primitive: [] -Equ. Equation ::= [Exp] "->" Exp ; -- patterns are encoded as exps - -separator Hypo "," ; -terminator Equation ";" ; - diff --git a/src-3.0/GF/GFCC/GFCCtoHaskell.hs b/src-3.0/GF/GFCC/GFCCtoHaskell.hs deleted file mode 100644 index c29d88b1a..000000000 --- a/src-3.0/GF/GFCC/GFCCtoHaskell.hs +++ /dev/null @@ -1,212 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : GFCCtoHaskell --- Maintainer : Aarne Ranta --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/06/17 12:39:07 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.8 $ --- --- to write a GF abstract grammar into a Haskell module with translations from --- data objects into GF trees. Example: GSyntax for Agda. --- AR 11/11/1999 -- 7/12/2000 -- 18/5/2004 ------------------------------------------------------------------------------ - -module GF.GFCC.GFCCtoHaskell (grammar2haskell, grammar2haskellGADT) where - -import GF.GFCC.Macros -import GF.GFCC.DataGFCC -import GF.GFCC.CId - -import GF.Data.Operations -import GF.Text.UTF8 - -import Data.List --(isPrefixOf, find, intersperse) -import qualified Data.Map as Map - --- | the main function -grammar2haskell :: GFCC -> String -grammar2haskell gr = encodeUTF8 $ foldr (++++) [] $ - haskPreamble ++ [datatypes gr', gfinstances gr'] - where gr' = hSkeleton gr - -grammar2haskellGADT :: GFCC -> String -grammar2haskellGADT gr = encodeUTF8 $ foldr (++++) [] $ - ["{-# OPTIONS_GHC -fglasgow-exts #-}"] ++ - haskPreamble ++ [datatypesGADT gr', gfinstances gr'] - where gr' = hSkeleton gr - --- | by this you can prefix all identifiers with stg; the default is 'G' -gId :: OIdent -> OIdent -gId i = 'G':i - -haskPreamble = - [ - "module GSyntax where", - "", - "import GF.GFCC.DataGFCC", - "import GF.GFCC.CId", - "----------------------------------------------------", - "-- automatic translation from GF to Haskell", - "----------------------------------------------------", - "", - "class Gf a where", - " gf :: a -> Exp", - " fg :: Exp -> a", - "", - predefInst "GString" "String" "DTr [] (AS s) []", - "", - predefInst "GInt" "Integer" "DTr [] (AI s) []", - "", - predefInst "GFloat" "Double" "DTr [] (AF s) []", - "", - "----------------------------------------------------", - "-- below this line machine-generated", - "----------------------------------------------------", - "" - ] - -predefInst gtyp typ patt = - "newtype" +++ gtyp +++ "=" +++ gtyp +++ typ +++ " deriving Show" +++++ - "instance Gf" +++ gtyp +++ "where" ++++ - " gf (" ++ gtyp +++ "s) =" +++ patt ++++ - " fg t =" ++++ - " case t of" ++++ - " " +++ patt +++ " ->" +++ gtyp +++ "s" ++++ - " _ -> error (\"no" +++ gtyp +++ "\" ++ show t)" - -type OIdent = String - -type HSkeleton = [(OIdent, [(OIdent, [OIdent])])] - -datatypes, gfinstances :: (String,HSkeleton) -> String -datatypes = (foldr (+++++) "") . (filter (/="")) . (map hDatatype) . snd -gfinstances (m,g) = (foldr (+++++) "") $ (filter (/="")) $ (map (gfInstance m)) g - -hDatatype :: (OIdent, [(OIdent, [OIdent])]) -> String -gfInstance :: String -> (OIdent, [(OIdent, [OIdent])]) -> String - -hDatatype ("Cn",_) = "" --- -hDatatype (cat,[]) = "" -hDatatype (cat,rules) | isListCat (cat,rules) = - "newtype" +++ gId cat +++ "=" +++ gId cat +++ "[" ++ gId (elemCat cat) ++ "]" - +++ "deriving Show" -hDatatype (cat,rules) = - "data" +++ gId cat +++ "=" ++ - (if length rules == 1 then "" else "\n ") +++ - foldr1 (\x y -> x ++ "\n |" +++ y) - [gId f +++ foldr (+++) "" (map gId xx) | (f,xx) <- rules] ++++ - " deriving Show" - --- GADT version of data types -datatypesGADT :: (String,HSkeleton) -> String -datatypesGADT (_,skel) = - unlines (concatMap hCatTypeGADT skel) - +++++ - "data Tree :: * -> * where" ++++ unlines (concatMap (map (" "++) . hDatatypeGADT) skel) - -hCatTypeGADT :: (OIdent, [(OIdent, [OIdent])]) -> [String] -hCatTypeGADT (cat,rules) - = ["type"+++gId cat+++"="+++"Tree"+++gId cat++"_", - "data"+++gId cat++"_"] - -hDatatypeGADT :: (OIdent, [(OIdent, [OIdent])]) -> [String] -hDatatypeGADT (cat, rules) - | isListCat (cat,rules) = [gId cat+++"::"+++"["++gId (elemCat cat)++"]" +++ "->" +++ t] - | otherwise = - [ gId f +++ "::" +++ concatMap (\a -> gId a +++ "-> ") args ++ t | (f,args) <- rules ] - where t = "Tree" +++ gId cat ++ "_" - -gfInstance m crs = hInstance m crs ++++ fInstance m crs - -----hInstance m ("Cn",_) = "" --- seems to belong to an old applic. AR 18/5/2004 -hInstance m (cat,[]) = "" -hInstance m (cat,rules) - | isListCat (cat,rules) = - "instance Gf" +++ gId cat +++ "where" ++++ - " gf (" ++ gId cat +++ "[" ++ concat (intersperse "," baseVars) ++ "])" - +++ "=" +++ mkRHS ("Base"++ec) baseVars ++++ - " gf (" ++ gId cat +++ "(x:xs)) = " - ++ mkRHS ("Cons"++ec) ["x",prParenth (gId cat+++"xs")] --- no show for GADTs --- ++++ " gf (" ++ gId cat +++ "xs) = error (\"Bad " ++ cat ++ " value: \" ++ show xs)" - | otherwise = - "instance Gf" +++ gId cat +++ "where\n" ++ - unlines [mkInst f xx | (f,xx) <- rules] - where - ec = elemCat cat - baseVars = mkVars (baseSize (cat,rules)) - mkInst f xx = let xx' = mkVars (length xx) in " gf " ++ - (if length xx == 0 then gId f else prParenth (gId f +++ foldr1 (+++) xx')) +++ - "=" +++ mkRHS f xx' - mkVars n = ["x" ++ show i | i <- [1..n]] - mkRHS f vars = "DTr [] (AC (CId \"" ++ f ++ "\"))" +++ - "[" ++ prTList ", " ["gf" +++ x | x <- vars] ++ "]" - - -----fInstance m ("Cn",_) = "" --- -fInstance m (cat,[]) = "" -fInstance m (cat,rules) = - " fg t =" ++++ - " case t of" ++++ - unlines [mkInst f xx | (f,xx) <- rules] ++++ - " _ -> error (\"no" +++ cat ++ " \" ++ show t)" - where - mkInst f xx = - " DTr [] (AC (CId \"" ++ f ++ "\")) " ++ - "[" ++ prTList "," xx' ++ "]" +++ - "->" +++ mkRHS f xx' - where xx' = ["x" ++ show i | (_,i) <- zip xx [1..]] - mkRHS f vars - | isListCat (cat,rules) = - if "Base" `isPrefixOf` f then - gId cat +++ "[" ++ prTList ", " [ "fg" +++ x | x <- vars ] ++ "]" - else - let (i,t) = (init vars,last vars) - in "let" +++ gId cat +++ "xs = fg " ++ t +++ "in" +++ - gId cat +++ prParenth (prTList ":" (["fg"+++v | v <- i] ++ ["xs"])) - | otherwise = - gId f +++ - prTList " " [prParenth ("fg" +++ x) | x <- vars] - - ---type HSkeleton = [(OIdent, [(OIdent, [OIdent])])] -hSkeleton :: GFCC -> (String,HSkeleton) -hSkeleton gr = - (prCId (absname gr), - [(prCId c, [(prCId f, map prCId cs) | (f, (cs,_)) <- fs]) | - fs@((_, (_,c)):_) <- fns] - ) - where - fns = groupBy valtypg (sortBy valtyps (map jty (Map.assocs (funs (abstract gr))))) - valtyps (_, (_,x)) (_, (_,y)) = compare x y - valtypg (_, (_,x)) (_, (_,y)) = x == y - jty (f,(ty,_)) = (f,catSkeleton ty) - -updateSkeleton :: OIdent -> HSkeleton -> (OIdent, [OIdent]) -> HSkeleton -updateSkeleton cat skel rule = - case skel of - (cat0,rules):rr | cat0 == cat -> (cat0, rule:rules) : rr - (cat0,rules):rr -> (cat0, rules) : updateSkeleton cat rr rule - -isListCat :: (OIdent, [(OIdent, [OIdent])]) -> Bool -isListCat (cat,rules) = "List" `isPrefixOf` cat && length rules == 2 - && ("Base"++c) `elem` fs && ("Cons"++c) `elem` fs - where c = elemCat cat - fs = map fst rules - --- | Gets the element category of a list category. -elemCat :: OIdent -> OIdent -elemCat = drop 4 - -isBaseFun :: OIdent -> Bool -isBaseFun f = "Base" `isPrefixOf` f - -isConsFun :: OIdent -> Bool -isConsFun f = "Cons" `isPrefixOf` f - -baseSize :: (OIdent, [(OIdent, [OIdent])]) -> Int -baseSize (_,rules) = length bs - where Just (_,bs) = find (("Base" `isPrefixOf`) . fst) rules diff --git a/src-3.0/GF/GFCC/GFCCtoJS.hs b/src-3.0/GF/GFCC/GFCCtoJS.hs deleted file mode 100644 index d2d12a776..000000000 --- a/src-3.0/GF/GFCC/GFCCtoJS.hs +++ /dev/null @@ -1,117 +0,0 @@ -module GF.GFCC.GFCCtoJS (gfcc2js) where - -import qualified GF.GFCC.Macros as M -import GF.GFCC.CId -import GF.GFCC.DataGFCC -import qualified GF.JavaScript.AbsJS as JS -import qualified GF.JavaScript.PrintJS as JS - -import GF.Text.UTF8 -import GF.Data.ErrM -import GF.Infra.Option - -import Control.Monad (mplus) -import Data.Array (Array) -import qualified Data.Array as Array -import Data.Maybe (fromMaybe) -import qualified Data.Map as Map - -gfcc2js :: GFCC -> String -gfcc2js gfcc = - encodeUTF8 $ JS.printTree $ JS.Program [JS.ElStmt $ JS.SDeclOrExpr $ JS.Decl [JS.DInit (JS.Ident n) grammar]] - where - n = prCId $ absname gfcc - as = abstract gfcc - cs = Map.assocs (concretes gfcc) - start = M.lookStartCat gfcc - grammar = new "GFGrammar" [js_abstract, js_concrete] - js_abstract = abstract2js start as - js_concrete = JS.EObj $ map (concrete2js start n) cs - -abstract2js :: String -> Abstr -> JS.Expr -abstract2js start ds = new "GFAbstract" [JS.EStr start, JS.EObj $ map absdef2js (Map.assocs (funs ds))] - -absdef2js :: (CId,(Type,Exp)) -> JS.Property -absdef2js (f,(typ,_)) = - let (args,cat) = M.catSkeleton typ in - JS.Prop (JS.IdentPropName (JS.Ident (prCId f))) (new "Type" [JS.EArray [JS.EStr (prCId x) | x <- args], JS.EStr (prCId cat)]) - -concrete2js :: String -> String -> (CId,Concr) -> JS.Property -concrete2js start n (c, cnc) = - JS.Prop l (new "GFConcrete" ([(JS.EObj $ ((map (cncdef2js n (prCId c)) ds) ++ litslins))] ++ - maybe [] (parser2js start) (parser cnc))) - where - l = JS.IdentPropName (JS.Ident (prCId c)) - ds = concatMap Map.assocs [lins cnc, opers cnc, lindefs cnc] - litslins = [JS.Prop (JS.StringPropName "Int") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]]), - JS.Prop (JS.StringPropName "Float") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]]), - JS.Prop (JS.StringPropName "String") (JS.EFun [children] [JS.SReturn $ new "Arr" [JS.EIndex (JS.EVar children) (JS.EInt 0)]])] - - -cncdef2js :: String -> String -> (CId,Term) -> JS.Property -cncdef2js n l (f, t) = JS.Prop (JS.IdentPropName (JS.Ident (prCId f))) (JS.EFun [children] [JS.SReturn (term2js n l t)]) - -term2js :: String -> String -> Term -> JS.Expr -term2js n l t = f t - where - f t = - case t of - R xs -> new "Arr" (map f xs) - P x y -> JS.ECall (JS.EMember (f x) (JS.Ident "sel")) [f y] - S xs -> mkSeq (map f xs) - K t -> tokn2js t - V i -> JS.EIndex (JS.EVar children) (JS.EInt i) - C i -> new "Int" [JS.EInt i] - F f -> JS.ECall (JS.EMember (JS.EIndex (JS.EMember (JS.EVar $ JS.Ident n) (JS.Ident "concretes")) (JS.EStr l)) (JS.Ident "rule")) [JS.EStr (prCId f), JS.EVar children] - FV xs -> new "Variants" (map f xs) - W str x -> new "Suffix" [JS.EStr str, f x] - TM _ -> new "Meta" [] - -tokn2js :: Tokn -> JS.Expr -tokn2js (KS s) = mkStr s -tokn2js (KP ss vs) = mkSeq (map mkStr ss) -- FIXME - -mkStr :: String -> JS.Expr -mkStr s = new "Str" [JS.EStr s] - -mkSeq :: [JS.Expr] -> JS.Expr -mkSeq [x] = x -mkSeq xs = new "Seq" xs - -argIdent :: Integer -> JS.Ident -argIdent n = JS.Ident ("x" ++ show n) - -children :: JS.Ident -children = JS.Ident "cs" - --- Parser -parser2js :: String -> ParserInfo -> [JS.Expr] -parser2js start p = [new "Parser" [JS.EStr start, - JS.EArray $ map frule2js (Array.elems (allRules p)), - JS.EObj $ map cats (Map.assocs (startupCats p))]] - where - cats (c,is) = JS.Prop (JS.IdentPropName (JS.Ident (prCId c))) (JS.EArray (map JS.EInt is)) - -frule2js :: FRule -> JS.Expr -frule2js (FRule f ps args res lins) = new "Rule" [JS.EInt res, name2js (f,ps), JS.EArray (map JS.EInt args), lins2js lins] - -name2js :: (CId,[Profile]) -> JS.Expr -name2js (f,ps) | f == wildCId = fromProfile (head ps) - | otherwise = new "FunApp" $ [JS.EStr $ prCId f, JS.EArray (map fromProfile ps)] - where - fromProfile :: Profile -> JS.Expr - fromProfile [] = new "MetaVar" [] - fromProfile [x] = daughter x - fromProfile args = new "Unify" [JS.EArray (map daughter args)] - - daughter i = new "Arg" [JS.EInt i] - -lins2js :: Array FIndex (Array FPointPos FSymbol) -> JS.Expr -lins2js ls = JS.EArray [ JS.EArray [ sym2js s | s <- Array.elems l] | l <- Array.elems ls] - -sym2js :: FSymbol -> JS.Expr -sym2js (FSymCat l n) = new "ArgProj" [JS.EInt n, JS.EInt l] -sym2js (FSymTok t) = new "Terminal" [JS.EStr t] - -new :: String -> [JS.Expr] -> JS.Expr -new f xs = JS.ENew (JS.Ident f) xs diff --git a/src-3.0/GF/GFCC/Generate.hs b/src-3.0/GF/GFCC/Generate.hs deleted file mode 100644 index 0c02f2034..000000000 --- a/src-3.0/GF/GFCC/Generate.hs +++ /dev/null @@ -1,70 +0,0 @@ -module GF.GFCC.Generate where - -import GF.GFCC.Macros -import GF.GFCC.DataGFCC -import GF.GFCC.CId - -import qualified Data.Map as M -import System.Random - --- generate an infinite list of trees exhaustively -generate :: GFCC -> CId -> Maybe Int -> [Exp] -generate gfcc cat dp = concatMap (\i -> gener i cat) depths - where - gener 0 c = [tree (AC f) [] | (f, ([],_)) <- fns c] - gener i c = [ - tr | - (f, (cs,_)) <- fns c, - let alts = map (gener (i-1)) cs, - ts <- combinations alts, - let tr = tree (AC f) ts, - depth tr >= i - ] - fns c = [(f,catSkeleton ty) | (f,ty) <- functionsToCat gfcc c] - depths = maybe [0 ..] (\d -> [0..d]) dp - --- generate an infinite list of trees randomly -genRandom :: StdGen -> GFCC -> CId -> [Exp] -genRandom gen gfcc cat = genTrees (randomRs (0.0, 1.0 :: Double) gen) cat where - - timeout = 47 -- give up - - genTrees ds0 cat = - let (ds,ds2) = splitAt (timeout+1) ds0 -- for time out, else ds - (t,k) = genTree ds cat - in (if k>timeout then id else (t:)) - (genTrees ds2 cat) -- else (drop k ds) - - genTree rs = gett rs where - gett ds cid | cid == mkCId "String" = (tree (AS "foo") [], 1) - gett ds cid | cid == mkCId "Int" = (tree (AI 12345) [], 1) - gett [] _ = (tree (AS "TIMEOUT") [], 1) ---- - gett ds cat = case fns cat of - [] -> (tree (AM 0) [],1) - fs -> let - d:ds2 = ds - (f,args) = getf d fs - (ts,k) = getts ds2 args - in (tree (AC f) ts, k+1) - getf d fs = let lg = (length fs) in - fs !! (floor (d * fromIntegral lg)) - getts ds cats = case cats of - c:cs -> let - (t, k) = gett ds c - (ts,ks) = getts (drop k ds) cs - in (t:ts, k + ks) - _ -> ([],0) - - fns cat = [(f,(fst (catSkeleton ty))) | (f,ty) <- functionsToCat gfcc cat] - - -{- --- brute-force parsing method; only returns the first result --- note: you cannot throw away rules with unknown words from the grammar --- because it is not known which field in each rule may match the input - -searchParse :: Int -> GFCC -> CId -> [String] -> [Exp] -searchParse i gfcc cat ws = [t | t <- gen, s <- lins t, words s == ws] where - gen = take i $ generate gfcc cat - lins t = [linearize gfcc lang t | lang <- cncnames gfcc] --} diff --git a/src-3.0/GF/GFCC/Linearize.hs b/src-3.0/GF/GFCC/Linearize.hs deleted file mode 100644 index 9db7040f8..000000000 --- a/src-3.0/GF/GFCC/Linearize.hs +++ /dev/null @@ -1,87 +0,0 @@ -module GF.GFCC.Linearize where - -import GF.GFCC.Macros -import GF.GFCC.DataGFCC -import GF.GFCC.CId -import qualified Data.Map as Map -import Data.List - -import Debug.Trace - --- linearization and computation of concrete GFCC Terms - -linearize :: GFCC -> CId -> Exp -> String -linearize mcfg lang = realize . linExp mcfg lang - -realize :: Term -> String -realize trm = case trm of - R ts -> realize (ts !! 0) - S ss -> unwords $ map realize ss - K t -> case t of - KS s -> s - KP s _ -> unwords s ---- prefix choice TODO - W s t -> s ++ realize t - FV ts -> realize (ts !! 0) ---- other variants TODO - TM s -> s - _ -> "ERROR " ++ show trm ---- debug - -linExp :: GFCC -> CId -> Exp -> Term -linExp mcfg lang tree@(DTr xs at trees) = - addB $ case at of - AC fun -> comp (map lin trees) $ look fun - AS s -> R [kks (show s)] -- quoted - AI i -> R [kks (show i)] - --- [C lst, kks (show i), C size] where - --- lst = mod (fromInteger i) 10 ; size = if i < 10 then 0 else 1 - AF d -> R [kks (show d)] - AV x -> TM (prCId x) - AM i -> TM (show i) - where - lin = linExp mcfg lang - comp = compute mcfg lang - look = lookLin mcfg lang - addB t - | Data.List.null xs = t - | otherwise = case t of - R ts -> R $ ts ++ (Data.List.map (kks . prCId) xs) - TM s -> R $ t : (Data.List.map (kks . prCId) xs) - -compute :: GFCC -> CId -> [Term] -> Term -> Term -compute mcfg lang args = comp where - comp trm = case trm of - P r p -> proj (comp r) (comp p) - W s t -> W s (comp t) - R ts -> R $ map comp ts - V i -> idx args i -- already computed - F c -> comp $ look c -- not computed (if contains argvar) - FV ts -> FV $ map comp ts - S ts -> S $ filter (/= S []) $ map comp ts - _ -> trm - - look = lookOper mcfg lang - - idx xs i = if i > length xs - 1 - then trace - ("too large " ++ show i ++ " for\n" ++ unlines (map show xs) ++ "\n") tm0 - else xs !! i - - proj r p = case (r,p) of - (_, FV ts) -> FV $ map (proj r) ts - (FV ts, _ ) -> FV $ map (\t -> proj t p) ts - (W s t, _) -> kks (s ++ getString (proj t p)) - _ -> comp $ getField r (getIndex p) - - getString t = case t of - K (KS s) -> s - _ -> error ("ERROR in grammar compiler: string from "++ show t) "ERR" - - getIndex t = case t of - C i -> i - TM _ -> 0 -- default value for parameter - _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 666 - - getField t i = case t of - R rs -> idx rs i - TM s -> TM s - _ -> error ("ERROR in grammar compiler: field from " ++ show t) t - diff --git a/src-3.0/GF/GFCC/Macros.hs b/src-3.0/GF/GFCC/Macros.hs deleted file mode 100644 index 1e3ebda17..000000000 --- a/src-3.0/GF/GFCC/Macros.hs +++ /dev/null @@ -1,116 +0,0 @@ -module GF.GFCC.Macros where - -import GF.GFCC.CId -import GF.GFCC.DataGFCC -import Control.Monad -import qualified Data.Map as Map -import qualified Data.Array as Array -import Data.Maybe -import Data.List - --- operations for manipulating GFCC grammars and objects - -lookLin :: GFCC -> CId -> CId -> Term -lookLin gfcc lang fun = - lookMap tm0 fun $ lins $ lookMap (error "no lang") lang $ concretes gfcc - -lookOper :: GFCC -> CId -> CId -> Term -lookOper gfcc lang fun = - lookMap tm0 fun $ opers $ lookMap (error "no lang") lang $ concretes gfcc - -lookLincat :: GFCC -> CId -> CId -> Term -lookLincat gfcc lang fun = - lookMap tm0 fun $ lincats $ lookMap (error "no lang") lang $ concretes gfcc - -lookParamLincat :: GFCC -> CId -> CId -> Term -lookParamLincat gfcc lang fun = - lookMap tm0 fun $ paramlincats $ lookMap (error "no lang") lang $ concretes gfcc - -lookType :: GFCC -> CId -> Type -lookType gfcc f = - fst $ lookMap (error $ "lookType " ++ show f) f (funs (abstract gfcc)) - -lookParser :: GFCC -> CId -> Maybe ParserInfo -lookParser gfcc lang = parser $ lookMap (error "no lang") lang $ concretes gfcc - -lookFCFG :: GFCC -> CId -> Maybe FGrammar -lookFCFG gfcc lang = fmap toFGrammar $ lookParser gfcc lang - where - toFGrammar :: ParserInfo -> FGrammar - toFGrammar pinfo = (Array.elems (allRules pinfo), startupCats pinfo) - -lookStartCat :: GFCC -> String -lookStartCat gfcc = fromMaybe "S" $ msum $ Data.List.map (Map.lookup (mkCId "startcat")) - [gflags gfcc, aflags (abstract gfcc)] - -lookGlobalFlag :: GFCC -> CId -> String -lookGlobalFlag gfcc f = - lookMap "?" f (gflags gfcc) - -lookAbsFlag :: GFCC -> CId -> String -lookAbsFlag gfcc f = - lookMap "?" f (aflags (abstract gfcc)) - -lookCncFlag :: GFCC -> CId -> CId -> String -lookCncFlag gfcc lang f = - lookMap "?" f $ cflags $ lookMap (error "no lang") lang $ concretes gfcc - -functionsToCat :: GFCC -> CId -> [(CId,Type)] -functionsToCat gfcc cat = - [(f,ty) | f <- fs, Just (ty,_) <- [Map.lookup f $ funs $ abstract gfcc]] - where - fs = lookMap [] cat $ catfuns $ abstract gfcc - -depth :: Exp -> Int -depth tr = case tr of - DTr _ _ [] -> 1 - DTr _ _ ts -> maximum (map depth ts) + 1 - -tree :: Atom -> [Exp] -> Exp -tree = DTr [] - -cftype :: [CId] -> CId -> Type -cftype args val = DTyp [Hyp wildCId (cftype [] arg) | arg <- args] val [] - -catSkeleton :: Type -> ([CId],CId) -catSkeleton ty = case ty of - DTyp hyps val _ -> ([valCat ty | Hyp _ ty <- hyps],val) - -typeSkeleton :: Type -> ([(Int,CId)],CId) -typeSkeleton ty = case ty of - DTyp hyps val _ -> ([(contextLength ty, valCat ty) | Hyp _ ty <- hyps],val) - -valCat :: Type -> CId -valCat ty = case ty of - DTyp _ val _ -> val - -contextLength :: Type -> Int -contextLength ty = case ty of - DTyp hyps _ _ -> length hyps - -exp0 :: Exp -exp0 = tree (AM 0) [] - -primNotion :: Exp -primNotion = EEq [] - -term0 :: CId -> Term -term0 = TM . prCId - -tm0 :: Term -tm0 = TM "?" - -kks :: String -> Term -kks = K . KS - --- lookup with default value -lookMap :: (Show i, Ord i) => a -> i -> Map.Map i a -> a -lookMap d c m = maybe d id $ Map.lookup c m - ---- from Operations -combinations :: [[a]] -> [[a]] -combinations t = case t of - [] -> [[]] - aa:uu -> [a:u | a <- aa, u <- combinations uu] - - diff --git a/src-3.0/GF/GFCC/OptimizeGFCC.hs b/src-3.0/GF/GFCC/OptimizeGFCC.hs deleted file mode 100644 index 94db12a2f..000000000 --- a/src-3.0/GF/GFCC/OptimizeGFCC.hs +++ /dev/null @@ -1,124 +0,0 @@ -module GF.GFCC.OptimizeGFCC where - -import GF.GFCC.CId -import GF.GFCC.DataGFCC - -import GF.Data.Operations - -import Data.List -import qualified Data.Map as Map - - --- back-end optimization: --- suffix analysis followed by common subexpression elimination - -optGFCC :: GFCC -> GFCC -optGFCC = cseOptimize . suffixOptimize - -suffixOptimize :: GFCC -> GFCC -suffixOptimize gfcc = gfcc { - concretes = Map.map opt (concretes gfcc) - } - where - opt cnc = cnc { - lins = Map.map optTerm (lins cnc), - lindefs = Map.map optTerm (lindefs cnc), - printnames = Map.map optTerm (printnames cnc) - } - -cseOptimize :: GFCC -> GFCC -cseOptimize gfcc = gfcc { - concretes = Map.map subex (concretes gfcc) - } - --- analyse word form lists into prefix + suffixes --- suffix sets can later be shared by subex elim - -optTerm :: Term -> Term -optTerm tr = case tr of - R ts@(_:_:_) | all isK ts -> mkSuff $ optToks [s | K (KS s) <- ts] - R ts -> R $ map optTerm ts - P t v -> P (optTerm t) v - _ -> tr - where - optToks ss = prf : suffs where - prf = pref (head ss) (tail ss) - suffs = map (drop (length prf)) ss - pref cand ss = case ss of - s1:ss2 -> if isPrefixOf cand s1 then pref cand ss2 else pref (init cand) ss - _ -> cand - isK t = case t of - K (KS _) -> True - _ -> False - mkSuff ("":ws) = R (map (K . KS) ws) - mkSuff (p:ws) = W p (R (map (K . KS) ws)) - - --- common subexpression elimination - ----subex :: [(CId,Term)] -> [(CId,Term)] -subex :: Concr -> Concr -subex cnc = err error id $ do - (tree,_) <- appSTM (getSubtermsMod cnc) (Map.empty,0) - return $ addSubexpConsts tree cnc - -type TermList = Map.Map Term (Int,Int) -- number of occs, id -type TermM a = STM (TermList,Int) a - -addSubexpConsts :: TermList -> Concr -> Concr -addSubexpConsts tree cnc = cnc { - opers = Map.fromList [(f,recomp f trm) | (f,trm) <- ops], - lins = rec lins, - lindefs = rec lindefs, - printnames = rec printnames - } - where - ops = [(fid id, trm) | (trm,(_,id)) <- Map.assocs tree] - mkOne (f,trm) = (f, recomp f trm) - recomp f t = case Map.lookup t tree of - Just (_,id) | fid id /= f -> F $ fid id -- not to replace oper itself - _ -> case t of - R ts -> R $ map (recomp f) ts - S ts -> S $ map (recomp f) ts - W s t -> W s (recomp f t) - P t p -> P (recomp f t) (recomp f p) - _ -> t - fid n = mkCId $ "_" ++ show n - rec field = Map.fromAscList [(f,recomp f trm) | (f,trm) <- Map.assocs (field cnc)] - - -getSubtermsMod :: Concr -> TermM TermList -getSubtermsMod cnc = do - mapM getSubterms (Map.assocs (lins cnc)) - mapM getSubterms (Map.assocs (lindefs cnc)) - mapM getSubterms (Map.assocs (printnames cnc)) - (tree0,_) <- readSTM - return $ Map.filter (\ (nu,_) -> nu > 1) tree0 - where - getSubterms (f,trm) = collectSubterms trm >> return () - -collectSubterms :: Term -> TermM () -collectSubterms t = case t of - R ts -> do - mapM collectSubterms ts - add t - S ts -> do - mapM collectSubterms ts - add t - W s u -> do - collectSubterms u - add t - P p u -> do - collectSubterms p - collectSubterms u - add t - _ -> return () - where - add t = do - (ts,i) <- readSTM - let - ((count,id),next) = case Map.lookup t ts of - Just (nu,id) -> ((nu+1,id), i) - _ -> ((1, i ), i+1) - writeSTM (Map.insert t (count,id) ts, next) - diff --git a/src-3.0/GF/GFCC/Parsing/FCFG.hs b/src-3.0/GF/GFCC/Parsing/FCFG.hs deleted file mode 100644 index 38f237a7a..000000000 --- a/src-3.0/GF/GFCC/Parsing/FCFG.hs +++ /dev/null @@ -1,79 +0,0 @@ ----------------------------------------------------------------------- --- | --- Maintainer : Krasimir Angelov --- Stability : (stable) --- Portability : (portable) --- --- FCFG parsing ------------------------------------------------------------------------------ - -module GF.GFCC.Parsing.FCFG - (parseFCF,buildParserInfo,ParserInfo(..),makeFinalEdge) where - -import GF.Data.SortedList -import GF.Data.Assoc - -import GF.GFCC.Parsing.FCFG.Utilities -import GF.GFCC.Parsing.FCFG.Active - -import GF.GFCC.CId -import GF.GFCC.DataGFCC -import GF.GFCC.BuildParser -import GF.GFCC.Macros -import GF.Data.ErrM - -import qualified Data.Map as Map - ----------------------------------------------------------------------- --- parsing - --- main parsing function - -parseFCF :: - String -> -- ^ parsing strategy - ParserInfo -> -- ^ compiled grammar (fcfg) - CId -> -- ^ starting category - [String] -> -- ^ input tokens - Err [Exp] -- ^ resulting GF terms -parseFCF strategy pinfo startCat inString = - do let inTokens = input inString - startCats <- Map.lookup startCat (startupCats pinfo) - fcfParser <- {- trace lctree $ -} parseFCF strategy - let chart = fcfParser pinfo startCats inTokens - (i,j) = inputBounds inTokens - finalEdges = [makeFinalEdge cat i j | cat <- startCats] - forests = chart2forests chart (const False) finalEdges - filteredForests = forests >>= applyProfileToForest - trees = nubsort $ filteredForests >>= forest2trees - return $ map tree2term trees - where - parseFCF :: String -> Err (FCFParser) - parseFCF "bottomup" = Ok $ parse "b" - parseFCF "topdown" = Ok $ parse "t" - parseFCF strat = Bad $ "FCFG parsing strategy not defined: " ++ strat - ----------------------------------------------------------------------- --- parse trees to GFCC terms - -tree2term :: SyntaxTree CId -> Exp -tree2term (TNode f ts) = tree (AC f) (map tree2term ts) -tree2term (TString s) = tree (AS s) [] -tree2term (TInt n) = tree (AI n) [] -tree2term (TFloat f) = tree (AF f) [] -tree2term (TMeta) = exp0 - ----------------------------------------------------------------------- --- conversion and unification of forests - --- simplest implementation -applyProfileToForest :: SyntaxForest (CId,[Profile]) -> [SyntaxForest CId] -applyProfileToForest (FNode (fun,profiles) children) - | fun == wildCId = concat chForests - | otherwise = [ FNode fun chForests | not (null chForests) ] - where chForests = concat [ mapM (unifyManyForests . map (forests !!)) profiles | - forests0 <- children, - forests <- mapM applyProfileToForest forests0 ] -applyProfileToForest (FString s) = [FString s] -applyProfileToForest (FInt n) = [FInt n] -applyProfileToForest (FFloat f) = [FFloat f] -applyProfileToForest (FMeta) = [FMeta] diff --git a/src-3.0/GF/GFCC/Parsing/FCFG/Active.hs b/src-3.0/GF/GFCC/Parsing/FCFG/Active.hs deleted file mode 100644 index 55114249d..000000000 --- a/src-3.0/GF/GFCC/Parsing/FCFG/Active.hs +++ /dev/null @@ -1,186 +0,0 @@ ----------------------------------------------------------------------- --- | --- Maintainer : Krasimir Angelov --- Stability : (stable) --- Portability : (portable) --- --- MCFG parsing, the active algorithm ------------------------------------------------------------------------------ - -module GF.GFCC.Parsing.FCFG.Active (FCFParser, parse, makeFinalEdge) where - -import GF.Data.GeneralDeduction -import GF.Data.Assoc -import GF.Data.SortedList -import GF.Data.Utilities - -import GF.GFCC.CId -import GF.GFCC.DataGFCC -import GF.GFCC.Parsing.FCFG.Utilities - -import Control.Monad (guard) - -import qualified Data.List as List -import qualified Data.Map as Map -import qualified Data.Set as Set -import Data.Array - ----------------------------------------------------------------------- --- * parsing - -makeFinalEdge cat 0 0 = (cat, [EmptyRange]) -makeFinalEdge cat i j = (cat, [makeRange i j]) - --- | the list of categories = possible starting categories -type FCFParser = ParserInfo - -> [FCat] - -> Input FToken - -> SyntaxChart (CId,[Profile]) (FCat,RangeRec) - - -parse :: String -> FCFParser -parse strategy pinfo starts toks = xchart2syntaxchart chart pinfo - where chart = process strategy pinfo toks axioms emptyXChart - axioms | isBU strategy = literals pinfo toks ++ initialBU pinfo toks - | isTD strategy = literals pinfo toks ++ initialTD pinfo starts toks - -isBU s = s=="b" -isTD s = s=="t" - --- used in prediction -emptyChildren :: RuleId -> ParserInfo -> SyntaxNode RuleId RangeRec -emptyChildren ruleid pinfo = SNode ruleid (replicate (length rhs) []) - where - FRule _ _ rhs _ _ = allRules pinfo ! ruleid - -process :: String -> ParserInfo -> Input FToken -> [(FCat,Item)] -> XChart FCat -> XChart FCat -process strategy pinfo toks [] chart = chart -process strategy pinfo toks ((c,item):items) chart = process strategy pinfo toks items $! univRule c item chart - where - univRule cat item@(Active found rng lbl ppos node@(SNode ruleid recs)) chart - | inRange (bounds lin) ppos = - case lin ! ppos of - FSymCat r d -> let c = args !! d - in case recs !! d of - [] -> case insertXChart chart item c of - Nothing -> chart - Just chart -> let items = do item@(Final found' _) <- lookupXChartFinal chart c - rng <- concatRange rng (found' !! r) - return (c, Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs))) - ++ - do guard (isTD strategy) - ruleid <- topdownRules pinfo ? c - return (c, Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo)) - in process strategy pinfo toks items chart - found' -> let items = do rng <- concatRange rng (found' !! r) - return (c, Active found rng lbl (ppos+1) node) - in process strategy pinfo toks items chart - FSymTok tok -> let items = do t_rng <- inputToken toks ? tok - rng' <- concatRange rng t_rng - return (cat, Active found rng' lbl (ppos+1) node) - in process strategy pinfo toks items chart - | otherwise = - if inRange (bounds lins) (lbl+1) - then univRule cat (Active (rng:found) EmptyRange (lbl+1) 0 node) chart - else univRule cat (Final (reverse (rng:found)) node) chart - where - (FRule _ _ args cat lins) = allRules pinfo ! ruleid - lin = lins ! lbl - univRule cat item@(Final found' node) chart = - case insertXChart chart item cat of - Nothing -> chart - Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _)) <- lookupXChartAct chart cat - let FRule _ _ args _ lins = allRules pinfo ! ruleid - FSymCat r d = lins ! l ! ppos - rng <- concatRange rng (found' !! r) - return (args !! d, Active found rng l (ppos+1) (updateChildren node d found')) - ++ - do guard (isBU strategy) - ruleid <- leftcornerCats pinfo ? cat - let FRule _ _ args _ lins = allRules pinfo ! ruleid - FSymCat r d = lins ! 0 ! 0 - return (args !! d, Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid pinfo) d found')) - - updateChildren :: SyntaxNode RuleId RangeRec -> Int -> RangeRec -> SyntaxNode RuleId RangeRec - updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs - in process strategy pinfo toks items chart - ----------------------------------------------------------------------- --- * XChart - -data Item - = Active RangeRec - Range - {-# UNPACK #-} !FIndex - {-# UNPACK #-} !FPointPos - (SyntaxNode RuleId RangeRec) - | Final RangeRec (SyntaxNode RuleId RangeRec) - deriving (Eq, Ord) - -data XChart c = XChart !(ParseChart Item c) !(ParseChart Item c) - -emptyXChart :: Ord c => XChart c -emptyXChart = XChart emptyChart emptyChart - -insertXChart (XChart actives finals) item@(Active _ _ _ _ _) c = - case chartInsert actives item c of - Nothing -> Nothing - Just actives -> Just (XChart actives finals) - -insertXChart (XChart actives finals) item@(Final _ _) c = - case chartInsert finals item c of - Nothing -> Nothing - Just finals -> Just (XChart actives finals) - -lookupXChartAct (XChart actives finals) c = chartLookup actives c -lookupXChartFinal (XChart actives finals) c = chartLookup finals c - -xchart2syntaxchart :: XChart FCat -> ParserInfo -> SyntaxChart (CId,[Profile]) (FCat,RangeRec) -xchart2syntaxchart (XChart actives finals) pinfo = - accumAssoc groupSyntaxNodes $ - [ case node of - SNode ruleid rrecs -> let FRule fun prof rhs cat _ = allRules pinfo ! ruleid - in ((cat,found), SNode (fun,prof) (zip rhs rrecs)) - SString s -> ((cat,found), SString s) - SInt n -> ((cat,found), SInt n) - SFloat f -> ((cat,found), SFloat f) - | (cat, Final found node) <- chartAssocs finals - ] - -literals :: ParserInfo -> Input FToken -> [(FCat,Item)] -literals pinfo toks = - [let (c,node) = lexer t in (c,Final [rng] node) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfo)] - where - lexer t = - case reads t of - [(n,"")] -> (fcatInt, SInt (n::Integer)) - _ -> case reads t of - [(f,"")] -> (fcatFloat, SFloat (f::Double)) - _ -> (fcatString,SString t) - - ----------------------------------------------------------------------- --- Earley -- - --- called with all starting categories -initialTD :: ParserInfo -> [FCat] -> Input FToken -> [(FCat,Item)] -initialTD pinfo starts toks = - do cat <- starts - ruleid <- topdownRules pinfo ? cat - return (cat,Active [] (Range 0 0) 0 0 (emptyChildren ruleid pinfo)) - - ----------------------------------------------------------------------- --- Kilbury -- - -initialBU :: ParserInfo -> Input FToken -> [(FCat,Item)] -initialBU pinfo toks = - do (tok,rngs) <- aAssocs (inputToken toks) - ruleid <- leftcornerTokens pinfo ? tok - let FRule _ _ _ cat _ = allRules pinfo ! ruleid - rng <- rngs - return (cat,Active [] rng 0 1 (emptyChildren ruleid pinfo)) - ++ - do ruleid <- epsilonRules pinfo - let FRule _ _ _ cat _ = allRules pinfo ! ruleid - return (cat,Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo)) diff --git a/src-3.0/GF/GFCC/Parsing/FCFG/Utilities.hs b/src-3.0/GF/GFCC/Parsing/FCFG/Utilities.hs deleted file mode 100644 index 5f91ccb2f..000000000 --- a/src-3.0/GF/GFCC/Parsing/FCFG/Utilities.hs +++ /dev/null @@ -1,271 +0,0 @@ ----------------------------------------------------------------------- --- | --- Maintainer : PL --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/05/13 12:40:19 $ --- > CVS $Author: peb $ --- > CVS $Revision: 1.6 $ --- --- Basic type declarations and functions for grammar formalisms ------------------------------------------------------------------------------ - - -module GF.GFCC.Parsing.FCFG.Utilities where - -import Control.Monad -import Data.Array -import Data.List (groupBy) - -import GF.Data.SortedList -import GF.Data.Assoc -import GF.Data.Utilities (sameLength, foldMerge, splitBy) - - ------------------------------------------------------------- --- ranges as single pairs - -type RangeRec = [Range] - -data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int - | EmptyRange - deriving (Eq, Ord) - -makeRange :: Int -> Int -> Range -makeRange = Range - -concatRange :: Range -> Range -> [Range] -concatRange EmptyRange rng = return rng -concatRange rng EmptyRange = return rng -concatRange (Range i j) (Range j' k) = [Range i k | j==j'] - -minRange :: Range -> Int -minRange (Range i j) = i - -maxRange :: Range -> Int -maxRange (Range i j) = j - - ------------------------------------------------------------- --- * representaions of input tokens - -data Input t = MkInput { inputBounds :: (Int, Int), - inputToken :: Assoc t [Range] - } - -input :: Ord t => [t] -> Input t -input toks = MkInput inBounds inToken - where - inBounds = (0, length toks) - inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,tok) <- zip3 [0..] [1..] toks ] - -inputMany :: Ord t => [[t]] -> Input t -inputMany toks = MkInput inBounds inToken - where - inBounds = (0, length toks) - inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,ts) <- zip3 [0..] [1..] toks, tok <- ts ] - - ------------------------------------------------------------- --- * representations of syntactical analyses - --- ** charts as finite maps over edges - --- | The values of the chart, a list of key-daughters pairs, --- has unique keys. In essence, it is a map from 'n' to daughters. --- The daughters should be a set (not necessarily sorted) of rhs's. -type SyntaxChart n e = Assoc e [SyntaxNode n [e]] - -data SyntaxNode n e = SMeta - | SNode n [e] - | SString String - | SInt Integer - | SFloat Double - deriving (Eq,Ord) - -groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]] -groupSyntaxNodes [] = [] -groupSyntaxNodes (SNode n0 es0:xs) = (SNode n0 (es0:ess)) : groupSyntaxNodes xs' - where - (ess,xs') = span xs - - span [] = ([],[]) - span xs@(SNode n es:xs') - | n0 == n = let (ess,xs) = span xs' in (es:ess,xs) - | otherwise = ([],xs) -groupSyntaxNodes (SString s:xs) = (SString s) : groupSyntaxNodes xs -groupSyntaxNodes (SInt n:xs) = (SInt n) : groupSyntaxNodes xs -groupSyntaxNodes (SFloat f:xs) = (SFloat f) : groupSyntaxNodes xs - --- better(?) representation of forests: --- data Forest n = F (SMap n (SList [Forest n])) Bool --- == --- type Forest n = GeneralTrie n (SList [Forest n]) Bool --- (the Bool == isMeta) - --- ** syntax forests - -data SyntaxForest n = FMeta - | FNode n [[SyntaxForest n]] - -- ^ The outer list should be a set (not necessarily sorted) - -- of possible alternatives. Ie. the outer list - -- is a disjunctive node, and the inner lists - -- are (conjunctive) concatenative nodes - | FString String - | FInt Integer - | FFloat Double - deriving (Eq, Ord, Show) - -instance Functor SyntaxForest where - fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests - fmap _ (FString s) = FString s - fmap _ (FInt n) = FInt n - fmap _ (FFloat f) = FFloat f - fmap _ (FMeta) = FMeta - -forestName :: SyntaxForest n -> Maybe n -forestName (FNode n _) = Just n -forestName _ = Nothing - -unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n) -unifyManyForests = foldM unifyForests FMeta - --- | two forests can be unified, if either is 'FMeta', or both have the same parent, --- and all children can be unified -unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n) -unifyForests FMeta forest = return forest -unifyForests forest FMeta = return forest -unifyForests (FNode name1 children1) (FNode name2 children2) - | name1 == name2 && not (null children) = return $ FNode name1 children - where children = [ forests | forests1 <- children1, forests2 <- children2, - sameLength forests1 forests2, - forests <- zipWithM unifyForests forests1 forests2 ] -unifyForests (FString s1) (FString s2) - | s1 == s2 = return $ FString s1 -unifyForests (FInt n1) (FInt n2) - | n1 == n2 = return $ FInt n1 -unifyForests (FFloat f1) (FFloat f2) - | f1 == f2 = return $ FFloat f1 -unifyForests _ _ = fail "forest unification failure" - -{- måste tänka mer på detta: -compactForests :: Ord n => [SyntaxForest n] -> SList (SyntaxForest n) -compactForests = map joinForests . groupBy eqNames . sortForests - where eqNames f g = forestName f == forestName g - sortForests = foldMerge mergeForests [] . map return - mergeForests [] gs = gs - mergeForests fs [] = fs - mergeForests fs@(f:fs') gs@(g:gs') - = case forestName f `compare` forestName g of - LT -> f : mergeForests fs' gs - GT -> g : mergeForests fs gs' - EQ -> f : g : mergeForests fs' gs' - joinForests fs = case forestName (head fs) of - Nothing -> FMeta - Just name -> FNode name $ - compactDaughters $ - concat [ fss | FNode _ fss <- fs ] - compactDaughters fss = case head fss of - [] -> [[]] - [_] -> map return $ compactForests $ concat fss - _ -> nubsort fss --} - --- ** syntax trees - -data SyntaxTree n = TMeta - | TNode n [SyntaxTree n] - | TString String - | TInt Integer - | TFloat Double - deriving (Eq, Ord, Show) - -instance Functor SyntaxTree where - fmap f (TNode n trees) = TNode (f n) $ map (fmap f) trees - fmap _ (TString s) = TString s - fmap _ (TInt n) = TInt n - fmap _ (TFloat f) = TFloat f - fmap _ (TMeta) = TMeta - -treeName :: SyntaxTree n -> Maybe n -treeName (TNode n _) = Just n -treeName (TMeta) = Nothing - -unifyManyTrees :: (Monad m, Eq n) => [SyntaxTree n] -> m (SyntaxTree n) -unifyManyTrees = foldM unifyTrees TMeta - --- | two trees can be unified, if either is 'TMeta', --- or both have the same parent, and their children can be unified -unifyTrees :: (Monad m, Eq n) => SyntaxTree n -> SyntaxTree n -> m (SyntaxTree n) -unifyTrees TMeta tree = return tree -unifyTrees tree TMeta = return tree -unifyTrees (TNode name1 children1) (TNode name2 children2) - | name1 == name2 && sameLength children1 children2 - = liftM (TNode name1) $ zipWithM unifyTrees children1 children2 -unifyTrees (TString s1) (TString s2) - | s1 == s2 = return (TString s1) -unifyTrees (TInt n1) (TInt n2) - | n1 == n2 = return (TInt n1) -unifyTrees (TFloat f1) (TFloat f2) - | f1 == f2 = return (TFloat f1) -unifyTrees _ _ = fail "tree unification failure" - --- ** conversions between representations - -chart2forests :: (Ord n, Ord e) => - SyntaxChart n e -- ^ The complete chart - -> (e -> Bool) -- ^ When is an edge 'FMeta'? - -> [e] -- ^ The starting edges - -> SList (SyntaxForest n) -- ^ The result has unique keys, ie. all 'n' are joined together. - -- In essence, the result is a map from 'n' to forest daughters - --- simplest implementation - -chart2forests chart isMeta = concatMap (edge2forests []) - where edge2forests edges edge - | isMeta edge = [FMeta] - | edge `elem` edges = [] - | otherwise = map (item2forest (edge:edges)) $ chart ? edge - item2forest edges (SMeta) = FMeta - item2forest edges (SNode name children) = - FNode name $ children >>= mapM (edge2forests edges) - item2forest edges (SString s) = FString s - item2forest edges (SInt n) = FInt n - item2forest edges (SFloat f) = FFloat f - -{- -before AR inserted peb's patch 8/7/2007, this was: - -chart2forests chart isMeta = concatMap edge2forests - where edge2forests edge = if isMeta edge then [FMeta] - else map item2forest $ chart ? edge - item2forest (SMeta) = FMeta - item2forest (SNode name children) = FNode name $ children >>= mapM edge2forests - item2forest (SString s) = FString s - item2forest (SInt n) = FInt n - item2forest (SFloat f) = FFloat f - --} - -{- --- more intelligent(?) implementation, --- requiring that charts and forests are sorted maps and sorted sets -chart2forests chart isMeta = es2fs - where e2fs e = if isMeta e then [FMeta] else map i2f $ chart ? e - es2fs es = if null metas then fs else FMeta : fs - where (metas, nonMetas) = splitBy isMeta es - fs = map i2f $ unionMap (<++>) $ map (chart ?) nonMetas - i2f (name, children) = FNode name $ - case head children of - [] -> [[]] - [_] -> map return $ es2fs $ concat children - _ -> children >>= mapM e2fs --} - - -forest2trees :: SyntaxForest n -> SList (SyntaxTree n) -forest2trees (FNode n forests) = map (TNode n) $ forests >>= mapM forest2trees -forest2trees (FString s) = [TString s] -forest2trees (FInt n) = [TInt n] -forest2trees (FFloat f) = [TFloat f] -forest2trees (FMeta) = [TMeta] diff --git a/src-3.0/GF/GFCC/PrintGFCC.hs b/src-3.0/GF/GFCC/PrintGFCC.hs deleted file mode 100644 index 6eee6f112..000000000 --- a/src-3.0/GF/GFCC/PrintGFCC.hs +++ /dev/null @@ -1,22 +0,0 @@ -module GF.GFCC.PrintGFCC where - -import GF.GFCC.DataGFCC (GFCC) -import GF.GFCC.Raw.ConvertGFCC (fromGFCC) -import GF.GFCC.Raw.PrintGFCCRaw (printTree) -import GF.GFCC.GFCCtoHaskell -import GF.GFCC.GFCCtoJS -import GF.Infra.Option -import GF.Text.UTF8 - --- top-level access to code generation - -prGFCC :: OutputFormat -> GFCC -> String -prGFCC fmt gr = case fmt of - FmtGFCC -> printGFCC gr - FmtJavaScript -> gfcc2js gr - FmtHaskell -> grammar2haskell gr - FmtHaskellGADT -> grammar2haskellGADT gr - -printGFCC :: GFCC -> String -printGFCC = encodeUTF8 . printTree . fromGFCC - diff --git a/src-3.0/GF/GFCC/Raw/AbsGFCCRaw.hs b/src-3.0/GF/GFCC/Raw/AbsGFCCRaw.hs deleted file mode 100644 index 2be8537eb..000000000 --- a/src-3.0/GF/GFCC/Raw/AbsGFCCRaw.hs +++ /dev/null @@ -1,14 +0,0 @@ -module GF.GFCC.Raw.AbsGFCCRaw where - -data Grammar = - Grm [RExp] - deriving (Eq,Ord,Show) - -data RExp = - App String [RExp] - | AInt Integer - | AStr String - | AFlt Double - | AMet - deriving (Eq,Ord,Show) - diff --git a/src-3.0/GF/GFCC/Raw/ConvertGFCC.hs b/src-3.0/GF/GFCC/Raw/ConvertGFCC.hs deleted file mode 100644 index fb805b4cd..000000000 --- a/src-3.0/GF/GFCC/Raw/ConvertGFCC.hs +++ /dev/null @@ -1,250 +0,0 @@ -module GF.GFCC.Raw.ConvertGFCC (toGFCC,fromGFCC) where - -import GF.GFCC.CId -import GF.GFCC.DataGFCC -import GF.GFCC.Raw.AbsGFCCRaw -import GF.GFCC.BuildParser (buildParserInfo) -import GF.GFCC.Parsing.FCFG.Utilities - -import qualified Data.Array as Array -import qualified Data.Map as Map - -pgfMajorVersion, pgfMinorVersion :: Integer -(pgfMajorVersion, pgfMinorVersion) = (1,0) - --- convert parsed grammar to internal GFCC - -toGFCC :: Grammar -> GFCC -toGFCC (Grm [ - App "pgf" (AInt v1 : AInt v2 : App a []:cs), - App "flags" gfs, - ab@( - App "abstract" [ - App "fun" fs, - App "cat" cts - ]), - App "concrete" ccs - ]) = GFCC { - absname = mkCId a, - cncnames = [mkCId c | App c [] <- cs], - gflags = Map.fromAscList [(mkCId f,v) | App f [AStr v] <- gfs], - abstract = - let - aflags = Map.fromAscList [(mkCId f,v) | App f [AStr v] <- gfs] - lfuns = [(mkCId f,(toType typ,toExp def)) | App f [typ, def] <- fs] - funs = Map.fromAscList lfuns - lcats = [(mkCId c, Prelude.map toHypo hyps) | App c hyps <- cts] - cats = Map.fromAscList lcats - catfuns = Map.fromAscList - [(cat,[f | (f, (DTyp _ c _,_)) <- lfuns, c==cat]) | (cat,_) <- lcats] - in Abstr aflags funs cats catfuns, - concretes = Map.fromAscList [(mkCId lang, toConcr ts) | App lang ts <- ccs] - } - where - -toConcr :: [RExp] -> Concr -toConcr = foldl add (Concr { - cflags = Map.empty, - lins = Map.empty, - opers = Map.empty, - lincats = Map.empty, - lindefs = Map.empty, - printnames = Map.empty, - paramlincats = Map.empty, - parser = Nothing - }) - where - add :: Concr -> RExp -> Concr - add cnc (App "flags" ts) = cnc { cflags = Map.fromAscList [(mkCId f,v) | App f [AStr v] <- ts] } - add cnc (App "lin" ts) = cnc { lins = mkTermMap ts } - add cnc (App "oper" ts) = cnc { opers = mkTermMap ts } - add cnc (App "lincat" ts) = cnc { lincats = mkTermMap ts } - add cnc (App "lindef" ts) = cnc { lindefs = mkTermMap ts } - add cnc (App "printname" ts) = cnc { printnames = mkTermMap ts } - add cnc (App "param" ts) = cnc { paramlincats = mkTermMap ts } - add cnc (App "parser" ts) = cnc { parser = Just (toPInfo ts) } - -toPInfo :: [RExp] -> ParserInfo -toPInfo [App "rules" rs, App "startupcats" cs] = buildParserInfo (rules, cats) - where - rules = map toFRule rs - cats = Map.fromList [(mkCId c, map expToInt fs) | App c fs <- cs] - - toFRule :: RExp -> FRule - toFRule (App "rule" - [n, - App "cats" (rt:at), - App "R" ls]) = FRule fun prof args res lins - where - (fun,prof) = toFName n - args = map expToInt at - res = expToInt rt - lins = mkArray [mkArray [toSymbol s | s <- l] | App "S" l <- ls] - -toFName :: RExp -> (CId,[Profile]) -toFName (App "_A" [x]) = (wildCId, [[expToInt x]]) -toFName (App f ts) = (mkCId f, map toProfile ts) - where - toProfile :: RExp -> Profile - toProfile AMet = [] - toProfile (App "_A" [t]) = [expToInt t] - toProfile (App "_U" ts) = [expToInt t | App "_A" [t] <- ts] - -toSymbol :: RExp -> FSymbol -toSymbol (App "P" [n,l]) = FSymCat (expToInt l) (expToInt n) -toSymbol (AStr t) = FSymTok t - -toType :: RExp -> Type -toType e = case e of - App cat [App "H" hypos, App "X" exps] -> - DTyp (map toHypo hypos) (mkCId cat) (map toExp exps) - _ -> error $ "type " ++ show e - -toHypo :: RExp -> Hypo -toHypo e = case e of - App x [typ] -> Hyp (mkCId x) (toType typ) - _ -> error $ "hypo " ++ show e - -toExp :: RExp -> Exp -toExp e = case e of - App "App" [App fun [], App "B" xs, App "X" exps] -> - DTr [mkCId x | App x [] <- xs] (AC (mkCId fun)) (map toExp exps) - App "Eq" eqs -> - EEq [Equ (map toExp ps) (toExp v) | App "E" (v:ps) <- eqs] - App "Var" [App i []] -> DTr [] (AV (mkCId i)) [] - AMet -> DTr [] (AM 0) [] - AInt i -> DTr [] (AI i) [] - AFlt i -> DTr [] (AF i) [] - AStr i -> DTr [] (AS i) [] - _ -> error $ "exp " ++ show e - -toTerm :: RExp -> Term -toTerm e = case e of - App "R" es -> R (map toTerm es) - App "S" es -> S (map toTerm es) - App "FV" es -> FV (map toTerm es) - App "P" [e,v] -> P (toTerm e) (toTerm v) - App "W" [AStr s,v] -> W s (toTerm v) - App "A" [AInt i] -> V (fromInteger i) - App f [] -> F (mkCId f) - AInt i -> C (fromInteger i) - AMet -> TM "?" - AStr s -> K (KS s) ---- - _ -> error $ "term " ++ show e - ------------------------------- ---- from internal to parser -- ------------------------------- - -fromGFCC :: GFCC -> Grammar -fromGFCC gfcc0 = Grm [ - App "pgf" (AInt pgfMajorVersion:AInt pgfMinorVersion - : App (prCId (absname gfcc)) [] : map (flip App [] . prCId) (cncnames gfcc)), - App "flags" [App (prCId f) [AStr v] | (f,v) <- Map.toList (gflags gfcc `Map.union` aflags agfcc)], - App "abstract" [ - App "fun" [App (prCId f) [fromType t,fromExp d] | (f,(t,d)) <- Map.toList (funs agfcc)], - App "cat" [App (prCId f) (map fromHypo hs) | (f,hs) <- Map.toList (cats agfcc)] - ], - App "concrete" [App (prCId lang) (fromConcrete c) | (lang,c) <- Map.toList (concretes gfcc)] - ] - where - gfcc = utf8GFCC gfcc0 - agfcc = abstract gfcc - fromConcrete cnc = [ - App "flags" [App (prCId f) [AStr v] | (f,v) <- Map.toList (cflags cnc)], - App "lin" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (lins cnc)], - App "oper" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (opers cnc)], - App "lincat" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (lincats cnc)], - App "lindef" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (lindefs cnc)], - App "printname" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (printnames cnc)], - App "param" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (paramlincats cnc)] - ] ++ maybe [] (\p -> [fromPInfo p]) (parser cnc) - -fromType :: Type -> RExp -fromType e = case e of - DTyp hypos cat exps -> - App (prCId cat) [ - App "H" (map fromHypo hypos), - App "X" (map fromExp exps)] - -fromHypo :: Hypo -> RExp -fromHypo e = case e of - Hyp x typ -> App (prCId x) [fromType typ] - -fromExp :: Exp -> RExp -fromExp e = case e of - DTr xs (AC fun) exps -> - App "App" [App (prCId fun) [], App "B" (map (flip App [] . prCId) xs), App "X" (map fromExp exps)] - DTr [] (AV x) [] -> App "Var" [App (prCId x) []] - DTr [] (AS s) [] -> AStr s - DTr [] (AF d) [] -> AFlt d - DTr [] (AI i) [] -> AInt (toInteger i) - DTr [] (AM _) [] -> AMet ---- - EEq eqs -> - App "Eq" [App "E" (map fromExp (v:ps)) | Equ ps v <- eqs] - _ -> error $ "exp " ++ show e - -fromTerm :: Term -> RExp -fromTerm e = case e of - R es -> App "R" (map fromTerm es) - S es -> App "S" (map fromTerm es) - FV es -> App "FV" (map fromTerm es) - P e v -> App "P" [fromTerm e, fromTerm v] - W s v -> App "W" [AStr s, fromTerm v] - C i -> AInt (toInteger i) - TM _ -> AMet - F f -> App (prCId f) [] - V i -> App "A" [AInt (toInteger i)] - K (KS s) -> AStr s ---- - K (KP d vs) -> App "FV" (str d : [str v | Var v _ <- vs]) ---- - where - str v = App "S" (map AStr v) - --- ** Parsing info - -fromPInfo :: ParserInfo -> RExp -fromPInfo p = App "parser" [ - App "rules" [fromFRule rule | rule <- Array.elems (allRules p)], - App "startupcats" [App (prCId f) (map intToExp cs) | (f,cs) <- Map.toList (startupCats p)] - ] - -fromFRule :: FRule -> RExp -fromFRule (FRule fun prof args res lins) = - App "rule" [fromFName (fun,prof), - App "cats" (intToExp res:map intToExp args), - App "R" [App "S" [fromSymbol s | s <- Array.elems l] | l <- Array.elems lins] - ] - -fromFName :: (CId,[Profile]) -> RExp -fromFName (f,ps) | f == wildCId = fromProfile (head ps) - | otherwise = App (prCId f) (map fromProfile ps) - where - fromProfile :: Profile -> RExp - fromProfile [] = AMet - fromProfile [x] = daughter x - fromProfile args = App "_U" (map daughter args) - - daughter n = App "_A" [intToExp n] - -fromSymbol :: FSymbol -> RExp -fromSymbol (FSymCat l n) = App "P" [intToExp n, intToExp l] -fromSymbol (FSymTok t) = AStr t - --- ** Utilities - -mkTermMap :: [RExp] -> Map.Map CId Term -mkTermMap ts = Map.fromAscList [(mkCId f,toTerm v) | App f [v] <- ts] - -mkArray :: [a] -> Array.Array Int a -mkArray xs = Array.listArray (0, length xs - 1) xs - -expToInt :: Integral a => RExp -> a -expToInt (App "neg" [AInt i]) = fromIntegral (negate i) -expToInt (AInt i) = fromIntegral i - -expToStr :: RExp -> String -expToStr (AStr s) = s - -intToExp :: Integral a => a -> RExp -intToExp x | x < 0 = App "neg" [AInt (fromIntegral (negate x))] - | otherwise = AInt (fromIntegral x) diff --git a/src-3.0/GF/GFCC/Raw/GFCCRaw.cf b/src-3.0/GF/GFCC/Raw/GFCCRaw.cf deleted file mode 100644 index bedaef685..000000000 --- a/src-3.0/GF/GFCC/Raw/GFCCRaw.cf +++ /dev/null @@ -1,12 +0,0 @@ -Grm. Grammar ::= [RExp] ; - -App. RExp ::= "(" CId [RExp] ")" ; -AId. RExp ::= CId ; -AInt. RExp ::= Integer ; -AStr. RExp ::= String ; -AFlt. RExp ::= Double ; -AMet. RExp ::= "?" ; - -terminator RExp "" ; - -token CId (('_' | letter) (letter | digit | '\'' | '_')*) ; diff --git a/src-3.0/GF/GFCC/Raw/ParGFCCRaw.hs b/src-3.0/GF/GFCC/Raw/ParGFCCRaw.hs deleted file mode 100644 index 159eea5fb..000000000 --- a/src-3.0/GF/GFCC/Raw/ParGFCCRaw.hs +++ /dev/null @@ -1,101 +0,0 @@ -module GF.GFCC.Raw.ParGFCCRaw (parseGrammar) where - -import GF.GFCC.CId -import GF.GFCC.Raw.AbsGFCCRaw - -import Control.Monad -import Data.Char -import qualified Data.ByteString.Char8 as BS - -parseGrammar :: String -> IO Grammar -parseGrammar s = case runP pGrammar s of - Just (x,"") -> return x - _ -> fail "Parse error" - -pGrammar :: P Grammar -pGrammar = liftM Grm pTerms - -pTerms :: P [RExp] -pTerms = liftM2 (:) (pTerm 1) pTerms <++ (skipSpaces >> return []) - -pTerm :: Int -> P RExp -pTerm n = skipSpaces >> (pParen <++ pApp <++ pNum <++ pStr <++ pMeta) - where pParen = between (char '(') (char ')') (pTerm 0) - pApp = liftM2 App pIdent (if n == 0 then pTerms else return []) - pStr = char '"' >> liftM AStr (manyTill (pEsc <++ get) (char '"')) - pEsc = char '\\' >> get - pNum = do x <- munch1 isDigit - ((char '.' >> munch1 isDigit >>= \y -> return (AFlt (read (x++"."++y)))) - <++ - return (AInt (read x))) - pMeta = char '?' >> return AMet - pIdent = liftM2 (:) (satisfy isIdentFirst) (munch isIdentRest) - isIdentFirst c = c == '_' || isAlpha c - isIdentRest c = c == '_' || c == '\'' || isAlphaNum c - --- Parser combinators with only left-biased choice - -newtype P a = P { runP :: String -> Maybe (a,String) } - -instance Monad P where - return x = P (\ts -> Just (x,ts)) - P p >>= f = P (\ts -> p ts >>= \ (x,ts') -> runP (f x) ts') - fail _ = pfail - -instance MonadPlus P where - mzero = pfail - mplus = (<++) - - -get :: P Char -get = P (\ts -> case ts of - [] -> Nothing - c:cs -> Just (c,cs)) - -look :: P String -look = P (\ts -> Just (ts,ts)) - -(<++) :: P a -> P a -> P a -P p <++ P q = P (\ts -> p ts `mplus` q ts) - -pfail :: P a -pfail = P (\ts -> Nothing) - -satisfy :: (Char -> Bool) -> P Char -satisfy p = do c <- get - if p c then return c else pfail - -char :: Char -> P Char -char c = satisfy (c==) - -string :: String -> P String -string this = look >>= scan this - where - scan [] _ = return this - scan (x:xs) (y:ys) | x == y = get >> scan xs ys - scan _ _ = pfail - -skipSpaces :: P () -skipSpaces = look >>= skip - where - skip (c:s) | isSpace c = get >> skip s - skip _ = return () - -manyTill :: P a -> P end -> P [a] -manyTill p end = scan - where scan = (end >> return []) <++ liftM2 (:) p scan - -munch :: (Char -> Bool) -> P String -munch p = munch1 p <++ return [] - -munch1 :: (Char -> Bool) -> P String -munch1 p = liftM2 (:) (satisfy p) (munch p) - -choice :: [P a] -> P a -choice = msum - -between :: P open -> P close -> P a -> P a -between open close p = do open - x <- p - close - return x diff --git a/src-3.0/GF/GFCC/Raw/PrintGFCCRaw.hs b/src-3.0/GF/GFCC/Raw/PrintGFCCRaw.hs deleted file mode 100644 index 23bb8a542..000000000 --- a/src-3.0/GF/GFCC/Raw/PrintGFCCRaw.hs +++ /dev/null @@ -1,35 +0,0 @@ -module GF.GFCC.Raw.PrintGFCCRaw (printTree) where - -import GF.GFCC.CId -import GF.GFCC.Raw.AbsGFCCRaw - -import Data.List (intersperse) -import Numeric (showFFloat) -import qualified Data.ByteString.Char8 as BS - -printTree :: Grammar -> String -printTree g = prGrammar g "" - -prGrammar :: Grammar -> ShowS -prGrammar (Grm xs) = prRExpList xs - -prRExp :: Int -> RExp -> ShowS -prRExp _ (App x []) = showString x -prRExp n (App x xs) = p (showString x . showChar ' ' . prRExpList xs) - where p s = if n == 0 then s else showChar '(' . s . showChar ')' -prRExp _ (AInt x) = shows x -prRExp _ (AStr x) = showChar '"' . concatS (map mkEsc x) . showChar '"' -prRExp _ (AFlt x) = showFFloat Nothing x -prRExp _ AMet = showChar '?' - -mkEsc :: Char -> ShowS -mkEsc s = case s of - '"' -> showString "\\\"" - '\\' -> showString "\\\\" - _ -> showChar s - -prRExpList :: [RExp] -> ShowS -prRExpList = concatS . intersperse (showChar ' ') . map (prRExp 1) - -concatS :: [ShowS] -> ShowS -concatS = foldr (.) id diff --git a/src-3.0/GF/GFCC/ShowLinearize.hs b/src-3.0/GF/GFCC/ShowLinearize.hs deleted file mode 100644 index f627dfd28..000000000 --- a/src-3.0/GF/GFCC/ShowLinearize.hs +++ /dev/null @@ -1,87 +0,0 @@ -module GF.GFCC.ShowLinearize ( - tableLinearize, - recordLinearize, - termLinearize, - allLinearize - ) where - -import GF.GFCC.Linearize -import GF.GFCC.Macros -import GF.GFCC.DataGFCC -import GF.GFCC.CId ---import GF.GFCC.PrintGFCC ---- - -import GF.Data.Operations -import Data.List - --- printing linearizations in different ways with source parameters - --- internal representation, only used internally in this module -data Record = - RR [(String,Record)] - | RT [(String,Record)] - | RFV [Record] - | RS String - | RCon String - -prRecord :: Record -> String -prRecord = prr where - prr t = case t of - RR fs -> concat $ - "{" : - (intersperse ";" (map (\ (l,v) -> unwords [l,"=", prr v]) fs)) ++ ["}"] - RT fs -> concat $ - "table {" : - (intersperse ";" (map (\ (l,v) -> unwords [l,"=>",prr v]) fs)) ++ ["}"] - RFV ts -> concat $ - "variants {" : (intersperse ";" (map prr ts)) ++ ["}"] - RS s -> prQuotedString s - RCon s -> s - --- uses the encoding of record types in GFCC.paramlincat -mkRecord :: Term -> Term -> Record -mkRecord typ trm = case (typ,trm) of - (R rs, R ts) -> RR [(str lab, mkRecord ty t) | (P lab ty, t) <- zip rs ts] - (S [FV ps,ty],R ts) -> RT [(str par, mkRecord ty t) | (par, t) <- zip ps ts] - (_,W s (R ts)) -> mkRecord typ (R [K (KS (s ++ u)) | K (KS u) <- ts]) - (FV ps, C i) -> RCon $ str $ ps !! i - (S [], _) -> RS $ realize trm - _ -> RS $ show trm ---- printTree trm - where - str = realize - --- show all branches, without labels and params -allLinearize :: GFCC -> CId -> Exp -> String -allLinearize gfcc lang = concat . map pr . tabularLinearize gfcc lang where - pr (p,vs) = unlines vs - --- show all branches, with labels and params -tableLinearize :: GFCC -> CId -> Exp -> String -tableLinearize gfcc lang = unlines . map pr . tabularLinearize gfcc lang where - pr (p,vs) = p +++ ":" +++ unwords (intersperse "|" vs) - --- create a table from labels+params to variants -tabularLinearize :: GFCC -> CId -> Exp -> [(String,[String])] -tabularLinearize gfcc lang = branches . recLinearize gfcc lang where - branches r = case r of - RR fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t] - RT fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t] - RFV rs -> [([], ss) | (_,ss) <- concatMap branches rs] - RS s -> [([], [s])] - RCon _ -> [] - --- show record in GF-source-like syntax -recordLinearize :: GFCC -> CId -> Exp -> String -recordLinearize gfcc lang = prRecord . recLinearize gfcc lang - --- create a GF-like record, forming the basis of all functions above -recLinearize :: GFCC -> CId -> Exp -> Record -recLinearize gfcc lang exp = mkRecord typ $ linExp gfcc lang exp where - typ = case exp of - DTr _ (AC f) _ -> lookParamLincat gfcc lang $ valCat $ lookType gfcc f - --- show GFCC term -termLinearize :: GFCC -> CId -> Exp -> String -termLinearize gfcc lang = show . linExp gfcc lang - - diff --git a/src-3.0/GF/GFCC/doc/Eng.gf b/src-3.0/GF/GFCC/doc/Eng.gf deleted file mode 100644 index c64f46313..000000000 --- a/src-3.0/GF/GFCC/doc/Eng.gf +++ /dev/null @@ -1,13 +0,0 @@ -concrete Eng of Ex = { - lincat - S = {s : Str} ; - NP = {s : Str ; n : Num} ; - VP = {s : Num => Str} ; - param - Num = Sg | Pl ; - lin - Pred np vp = {s = np.s ++ vp.s ! np.n} ; - She = {s = "she" ; n = Sg} ; - They = {s = "they" ; n = Pl} ; - Sleep = {s = table {Sg => "sleeps" ; Pl => "sleep"}} ; -} diff --git a/src-3.0/GF/GFCC/doc/Ex.gf b/src-3.0/GF/GFCC/doc/Ex.gf deleted file mode 100644 index bd0b03483..000000000 --- a/src-3.0/GF/GFCC/doc/Ex.gf +++ /dev/null @@ -1,8 +0,0 @@ -abstract Ex = { - cat - S ; NP ; VP ; - fun - Pred : NP -> VP -> S ; - She, They : NP ; - Sleep : VP ; -} diff --git a/src-3.0/GF/GFCC/doc/Swe.gf b/src-3.0/GF/GFCC/doc/Swe.gf deleted file mode 100644 index 1d6672371..000000000 --- a/src-3.0/GF/GFCC/doc/Swe.gf +++ /dev/null @@ -1,13 +0,0 @@ -concrete Swe of Ex = { - lincat - S = {s : Str} ; - NP = {s : Str} ; - VP = {s : Str} ; - param - Num = Sg | Pl ; - lin - Pred np vp = {s = np.s ++ vp.s} ; - She = {s = "hon"} ; - They = {s = "de"} ; - Sleep = {s = "sover"} ; -} diff --git a/src-3.0/GF/GFCC/doc/Test.gf b/src-3.0/GF/GFCC/doc/Test.gf deleted file mode 100644 index 5cd4c5474..000000000 --- a/src-3.0/GF/GFCC/doc/Test.gf +++ /dev/null @@ -1,64 +0,0 @@ --- to test GFCC compilation - -flags coding=utf8 ; - -cat S ; NP ; N ; VP ; - -fun Pred : NP -> VP -> S ; -fun Pred2 : NP -> VP -> NP -> S ; -fun Det, Dets : N -> NP ; -fun Mina, Sina, Me, Te : NP ; -fun Raha, Paska, Pallo : N ; -fun Puhua, Munia, Sanoa : VP ; - -param Person = P1 | P2 | P3 ; -param Number = Sg | Pl ; -param Case = Nom | Part ; - -param NForm = NF Number Case ; -param VForm = VF Number Person ; - -lincat N = Noun ; -lincat VP = Verb ; - -oper Noun = {s : NForm => Str} ; -oper Verb = {s : VForm => Str} ; - -lincat NP = {s : Case => Str ; a : {n : Number ; p : Person}} ; - -lin Pred np vp = {s = np.s ! Nom ++ vp.s ! VF np.a.n np.a.p} ; -lin Pred2 np vp ob = {s = np.s ! Nom ++ vp.s ! VF np.a.n np.a.p ++ ob.s ! Part} ; -lin Det no = {s = \\c => no.s ! NF Sg c ; a = {n = Sg ; p = P3}} ; -lin Dets no = {s = \\c => no.s ! NF Pl c ; a = {n = Pl ; p = P3}} ; -lin Mina = {s = table Case ["minä" ; "minua"] ; a = {n = Sg ; p = P1}} ; -lin Te = {s = table Case ["te" ; "teitä"] ; a = {n = Pl ; p = P2}} ; -lin Sina = {s = table Case ["sinä" ; "sinua"] ; a = {n = Sg ; p = P2}} ; -lin Me = {s = table Case ["me" ; "meitä"] ; a = {n = Pl ; p = P1}} ; - -lin Raha = mkN "raha" ; -lin Paska = mkN "paska" ; -lin Pallo = mkN "pallo" ; -lin Puhua = mkV "puhu" ; -lin Munia = mkV "muni" ; -lin Sanoa = mkV "sano" ; - -oper mkN : Str -> Noun = \raha -> { - s = table { - NF Sg Nom => raha ; - NF Sg Part => raha + "a" ; - NF Pl Nom => raha + "t" ; - NF Pl Part => Predef.tk 1 raha + "oja" - } - } ; - -oper mkV : Str -> Verb = \puhu -> { - s = table { - VF Sg P1 => puhu + "n" ; - VF Sg P2 => puhu + "t" ; - VF Sg P3 => puhu + Predef.dp 1 puhu ; - VF Pl P1 => puhu + "mme" ; - VF Pl P2 => puhu + "tte" ; - VF Pl P3 => puhu + "vat" - } - } ; - diff --git a/src-3.0/GF/GFCC/doc/gfcc.html b/src-3.0/GF/GFCC/doc/gfcc.html deleted file mode 100644 index 8f8c478c0..000000000 --- a/src-3.0/GF/GFCC/doc/gfcc.html +++ /dev/null @@ -1,809 +0,0 @@ - - - - -The GFCC Grammar Format - -

The GFCC Grammar Format

- -Aarne Ranta
-October 5, 2007 -
- -

-Author's address: -http://www.cs.chalmers.se/~aarne -

-

-History: -

- - -

What is GFCC

-

-GFCC is a low-level format for GF grammars. Its aim is to contain the minimum -that is needed to process GF grammars at runtime. This minimality has three -advantages: -

- - -

-Thus we also want to call GFCC the portable grammar format. -

-

-The idea is that all embedded GF applications use GFCC. -The GF system would be primarily used as a compiler and as a grammar -development tool. -

-

-Since GFCC is implemented in BNFC, a parser of the format is readily -available for C, C++, C#, Haskell, Java, and OCaml. Also an XML -representation can be generated in BNFC. A -reference implementation -of linearization and some other functions has been written in Haskell. -

-

GFCC vs. GFC

-

-GFCC is aimed to replace GFC as the run-time grammar format. GFC was designed -to be a run-time format, but also to -support separate compilation of grammars, i.e. -to store the results of compiling -individual GF modules. But this means that GFC has to contain extra information, -such as type annotations, which is only needed in compilation and not at -run-time. In particular, the pattern matching syntax and semantics of GFC is -complex and therefore difficult to implement in new platforms. -

-

-Actually, GFC is planned to be omitted also as the target format of -separate compilation, where plain GF (type annotated and partially evaluated) -will be used instead. GFC provides only marginal advantages as a target format -compared with GF, and it is therefore just extra weight to carry around this -format. -

-

-The main differences of GFCC compared with GFC (and GF) can be summarized as follows: -

- - -

-Here is an example of a GF grammar, consisting of three modules, -as translated to GFCC. The representations are aligned; thus they do not completely -reflect the order of judgements in GFCC files, which have different orders of -blocks of judgements, and alphabetical sorting. -

-
-                                      grammar Ex(Eng,Swe);
-  
-  abstract Ex = {                     abstract {
-    cat                                 cat
-      S ; NP ; VP ;                      NP[]; S[]; VP[];
-    fun                                 fun
-      Pred : NP -> VP -> S ;             Pred=[(($ 0! 1),(($ 1! 0)!($ 0! 0)))];
-      She, They : NP ;                   She=[0,"she"];
-      Sleep : VP ;                       They=[1,"they"];
-                                         Sleep=[["sleeps","sleep"]];
-  }                                     } ;
-                                      
-  concrete Eng of Ex = {              concrete Eng {
-    lincat                             lincat
-      S  = {s : Str} ;                  S=[()];
-      NP = {s : Str ; n : Num} ;        NP=[1,()];
-      VP = {s : Num => Str} ;           VP=[[(),()]];
-    param
-      Num = Sg | Pl ;
-    lin                                lin
-      Pred np vp = {                    Pred=[(($ 0! 1),(($ 1! 0)!($ 0! 0)))];
-        s = np.s ++ vp.s ! np.n} ;      
-      She = {s = "she" ; n = Sg} ;      She=[0,"she"];
-      They = {s = "they" ; n = Pl} ;    They = [1, "they"];
-      Sleep = {s = table {              Sleep=[["sleeps","sleep"]];
-        Sg => "sleeps" ; 
-        Pl => "sleep"                   
-        }                               
-      } ;
-  }                                   } ;
-  
-  concrete Swe of Ex = {              concrete Swe {
-    lincat                             lincat
-      S  = {s : Str} ;                  S=[()];
-      NP = {s : Str} ;                  NP=[()];
-      VP = {s : Str} ;                  VP=[()];
-    param
-      Num = Sg | Pl ;
-    lin                                lin
-      Pred np vp = {                    Pred = [(($0!0),($1!0))];
-        s = np.s ++ vp.s} ;
-      She = {s = "hon"} ;               She = ["hon"];
-      They = {s = "de"} ;               They = ["de"];
-      Sleep = {s = "sover"} ;           Sleep = ["sover"];
-  }                                     } ;                                   
-
-

-

The syntax of GFCC files

-

-The complete BNFC grammar, from which -the rules in this section are taken, is in the file -GF/GFCC/GFCC.cf. -

-

Top level

-

-A grammar has a header telling the name of the abstract syntax -(often specifying an application domain), and the names of -the concrete languages. The abstract syntax and the concrete -syntaxes themselves follow. -

-
-    Grm. Grammar  ::= 
-      "grammar" CId "(" [CId] ")" ";" 
-      Abstract ";" 
-      [Concrete] ;
-  
-    Abs. Abstract ::= 
-      "abstract" "{" 
-        "flags" [Flag] 
-        "fun"   [FunDef] 
-        "cat"   [CatDef] 
-      "}" ;
-  
-    Cnc. Concrete ::= 
-      "concrete" CId "{" 
-        "flags"  [Flag] 
-        "lin"    [LinDef] 
-        "oper"   [LinDef] 
-        "lincat" [LinDef] 
-        "lindef" [LinDef] 
-        "printname" [LinDef]
-      "}" ;
-
-

-This syntax organizes each module to a sequence of fields, such -as flags, linearizations, operations, linearization types, etc. -It is envisaged that particular applications can ignore some -of the fields, typically so that earlier fields are more -important than later ones. -

-

-The judgement forms have the following syntax. -

-
-    Flg. Flag     ::= CId "=" String ;
-    Cat. CatDef   ::= CId "[" [Hypo] "]" ;
-    Fun. FunDef   ::= CId ":" Type "=" Exp ;
-    Lin. LinDef   ::= CId "=" Term ;
-
-

-For the run-time system, the reference implementation in Haskell -uses a structure that gives efficient look-up: -

-
-    data GFCC = GFCC {
-      absname   :: CId ,
-      cncnames  :: [CId] ,
-      abstract  :: Abstr ,
-      concretes :: Map CId Concr
-      }
-  
-    data Abstr = Abstr {
-      aflags  :: Map CId String,     -- value of a flag
-      funs    :: Map CId (Type,Exp), -- type and def of a fun
-      cats    :: Map CId [Hypo],     -- context of a cat
-      catfuns :: Map CId [CId]       -- funs yielding a cat (redundant, for fast lookup)
-      }
-  
-    data Concr = Concr {
-      flags   :: Map CId String, -- value of a flag
-      lins    :: Map CId Term,   -- lin of a fun
-      opers   :: Map CId Term,   -- oper generated by subex elim
-      lincats :: Map CId Term,   -- lin type of a cat
-      lindefs :: Map CId Term,   -- lin default of a cat
-      printnames :: Map CId Term -- printname of a cat or a fun
-      }
-
-

-These definitions are from GF/GFCC/DataGFCC.hs. -

-

-Identifiers (CId) are like Ident in GF, except that -the compiler produces constants prefixed with _ in -the common subterm elimination optimization. -

-
-    token CId (('_' | letter) (letter | digit | '\'' | '_')*) ;
-
-

-

Abstract syntax

-

-Types are first-order function types built from argument type -contexts and value types. -category symbols. Syntax trees (Exp) are -rose trees with nodes consisting of a head (Atom) and -bound variables (CId). -

-
-    DTyp. Type  ::= "[" [Hypo] "]" CId [Exp] ;        
-    DTr.  Exp   ::= "[" "(" [CId] ")" Atom [Exp] "]" ;
-    Hyp.  Hypo  ::= CId ":" Type ;
-
-

-The head Atom is either a function -constant, a bound variable, or a metavariable, or a string, integer, or float -literal. -

-
-    AC.   Atom  ::= CId ;
-    AS.   Atom  ::= String ;
-    AI.   Atom  ::= Integer ;
-    AF.   Atom  ::= Double ;
-    AM.   Atom  ::= "?" Integer ;
-
-

-The context-free types and trees of the "old GFCC" are special -cases, which can be defined as follows: -

-
-    Typ.  Type  ::= [CId] "->" CId
-    Typ args val = DTyp [Hyp (CId "_") arg | arg <- args] val
-  
-    Tr.   Exp   ::= "(" CId [Exp] ")"
-    Tr fun exps  = DTr [] fun exps
-
-

-To store semantic (def) definitions by cases, the following expression -form is provided, but it is only meaningful in the last field of a function -declaration in an abstract syntax: -

-
-    EEq. Exp      ::= "{" [Equation] "}" ;
-    Equ. Equation ::= [Exp] "->" Exp ;
-
-

-Notice that expressions are used to encode patterns. Primitive notions -(the default semantics in GF) are encoded as empty sets of equations -([]). For a constructor (canonical form) of a category C, we -aim to use the encoding as the application (_constr C). -

-

Concrete syntax

-

-Linearization terms (Term) are built as follows. -Constructor names are shown to make the later code -examples readable. -

-
-    R.  Term ::= "[" [Term] "]" ;        -- array (record/table)
-    P.  Term ::= "(" Term "!" Term ")" ; -- access to field (projection/selection)
-    S.  Term ::= "(" [Term] ")" ;        -- concatenated sequence
-    K.  Term ::= Tokn ;                  -- token
-    V.  Term ::= "$" Integer ;           -- argument (subtree)
-    C.  Term ::= Integer ;               -- array index (label/parameter value)
-    FV. Term ::= "[|" [Term] "|]" ;      -- free variation
-    TM. Term ::= "?" ;                   -- linearization of metavariable
-
-

-Tokens are strings or (maybe obsolescent) prefix-dependent -variant lists. -

-
-    KS.  Tokn     ::= String ;
-    KP.  Tokn     ::= "[" "pre" [String] "[" [Variant] "]" "]" ;
-    Var. Variant  ::= [String] "/" [String] ;
-
-

-Two special forms of terms are introduced by the compiler -as optimizations. They can in principle be eliminated, but -their presence makes grammars much more compact. Their semantics -will be explained in a later section. -

-
-    F.  Term ::= CId ;                     -- global constant
-    W.  Term ::= "(" String "+" Term ")" ; -- prefix + suffix table
-
-

-There is also a deprecated form of "record parameter alias", -

-
-    RP. Term ::= "(" Term "@" Term ")";    -- DEPRECATED
-
-

-which will be removed when the migration to new GFCC is complete. -

-

The semantics of concrete syntax terms

-

-The code in this section is from GF/GFCC/Linearize.hs. -

-

Linearization and realization

-

-The linearization algorithm is essentially the same as in -GFC: a tree is linearized by evaluating its linearization term -in the environment of the linearizations of the subtrees. -Literal atoms are linearized in the obvious way. -The function also needs to know the language (i.e. concrete syntax) -in which linearization is performed. -

-
-    linExp :: GFCC -> CId -> Exp -> Term
-    linExp gfcc lang tree@(DTr _ at trees) = case at of
-      AC fun -> comp (Prelude.map lin trees) $ look fun
-      AS s   -> R [kks (show s)] -- quoted
-      AI i   -> R [kks (show i)]
-      AF d   -> R [kks (show d)]
-      AM     -> TM
-     where
-       lin  = linExp gfcc lang
-       comp = compute gfcc lang
-       look = lookLin gfcc lang
-
-

-TODO: bindings must be supported. -

-

-The result of linearization is usually a record, which is realized as -a string using the following algorithm. -

-
-    realize :: Term -> String
-    realize trm = case trm of
-      R (t:_)  -> realize t
-      S ss     -> unwords $ Prelude.map realize ss
-      K (KS s) -> s
-      K (KP s _) -> unwords s ---- prefix choice TODO
-      W s t    -> s ++ realize t
-      FV (t:_) -> realize t
-      TM       -> "?"
-
-

-Notice that realization always picks the first field of a record. -If a linearization type has more than one field, the first field -does not necessarily contain the desired string. -Also notice that the order of record fields in GFCC is not necessarily -the same as in GF source. -

-

Term evaluation

-

-Evaluation follows call-by-value order, with two environments -needed: -

- - -

-The code is presented in one-level pattern matching, to -enable reimplementations in languages that do not permit -deep patterns (such as Java and C++). -

-
-  compute :: GFCC -> CId -> [Term] -> Term -> Term
-  compute gfcc lang args = comp where
-    comp trm = case trm of
-      P r p  -> proj (comp r) (comp p)
-      W s t  -> W s (comp t)
-      R ts   -> R $ Prelude.map comp ts
-      V i    -> idx args (fromInteger i)  -- already computed
-      F c    -> comp $ look c             -- not computed (if contains V)
-      FV ts  -> FV $ Prelude.map comp ts
-      S ts   -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts
-      _ -> trm
-  
-    look = lookOper gfcc lang
-  
-    idx xs i = xs !! i
-  
-    proj r p = case (r,p) of
-      (_,     FV ts) -> FV $ Prelude.map (proj r) ts
-      (W s t, _)     -> kks (s ++ getString (proj t p))
-      _              -> comp $ getField r (getIndex p)
-  
-    getString t = case t of
-      K (KS s) -> s
-      _ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR"
-  
-    getIndex t =  case t of
-      C i    -> fromInteger i
-      RP p _ -> getIndex p
-      TM     -> 0  -- default value for parameter
-      _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0
-  
-    getField t i = case t of
-      R rs   -> idx rs i
-      RP _ r -> getField r i
-      TM     -> TM
-      _ -> trace ("ERROR in grammar compiler: field from " ++ show t) t
-
-

-

The special term constructors

-

-The three forms introduced by the compiler may a need special -explanation. -

-

-Global constants -

-
-    Term ::= CId ;
-
-

-are shorthands for complex terms. They are produced by the -compiler by (iterated) common subexpression elimination. -They are often more powerful than hand-devised code sharing in the source -code. They could be computed off-line by replacing each identifier by -its definition. -

-

-Prefix-suffix tables -

-
-    Term ::= "(" String "+" Term ")" ; 
-
-

-represent tables of word forms divided to the longest common prefix -and its array of suffixes. In the example grammar above, we have -

-
-    Sleep = [("sleep" + ["s",""])]
-
-

-which in fact is equal to the array of full forms -

-
-    ["sleeps", "sleep"]
-
-

-The power of this construction comes from the fact that suffix sets -tend to be repeated in a language, and can therefore be collected -by common subexpression elimination. It is this technique that -explains the used syntax rather than the more accurate -

-
-    "(" String "+" [String] ")"
-
-

-since we want the suffix part to be a Term for the optimization to -take effect. -

-

Compiling to GFCC

-

-Compilation to GFCC is performed by the GF grammar compiler, and -GFCC interpreters need not know what it does. For grammar writers, -however, it might be interesting to know what happens to the grammars -in the process. -

-

-The compilation phases are the following -

-
    -
  1. type check and partially evaluate GF source -
  2. create a symbol table mapping the GF parameter and record types to - fixed-size arrays, and parameter values and record labels to integers -
  3. traverse the linearization rules replacing parameters and labels by integers -
  4. reorganize the created GF grammar so that it has just one abstract syntax - and one concrete syntax per language -
  5. TODO: apply UTF8 encoding to the grammar, if not yet applied (this is told by the - coding flag) -
  6. translate the GF grammar object to a GFCC grammar object, using a simple - compositional mapping -
  7. perform the word-suffix optimization on GFCC linearization terms -
  8. perform subexpression elimination on each concrete syntax module -
  9. print out the GFCC code -
- -

Problems in GFCC compilation

-

-Two major problems had to be solved in compiling GF to GFCC: -

- - -

-The current implementation is still experimental and may fail -to generate correct code. Any errors remaining are likely to be -related to the two problems just mentioned. -

-

-The order problem is solved in slightly different ways for tables and records. -In both cases, eta expansion is used to establish a -canonical order. Tables are ordered by applying the preorder induced -by param definitions. Records are ordered by sorting them by labels. -This means that -e.g. the s field will in general no longer appear as the first -field, even if it does so in the GF source code. But relying on the -order of fields in a labelled record would be misplaced anyway. -

-

-The canonical form of records is further complicated by lock fields, -i.e. dummy fields of form lock_C = <>, which are added to grammar -libraries to force intensionality of linearization types. The problem -is that the absence of a lock field only generates a warning, not -an error. Therefore a GF grammar can contain objects of the same -type with and without a lock field. This problem was solved in GFCC -generation by just removing all lock fields (defined as fields whose -type is the empty record type). This has the further advantage of -(slightly) reducing the grammar size. More importantly, it is safe -to remove lock fields, because they are never used in computation, -and because intensional types are only needed in grammars reused -as libraries, not in grammars used at runtime. -

-

-While the order problem is rather bureaucratic in nature, run-time -variables are an interesting problem. They arise in the presence -of complex parameter values, created by argument-taking constructors -and parameter records. To give an example, consider the GF parameter -type system -

-
-    Number = Sg | Pl ;
-    Person = P1 | P2 | P3 ;
-    Agr = Ag Number Person ;
-
-

-The values can be translated to integers in the expected way, -

-
-    Sg = 0, Pl = 1
-    P1 = 0, P2 = 1, P3 = 2
-    Ag Sg P1 = 0, Ag Sg P2 = 1, Ag Sg P3 = 2,
-    Ag Pl P1 = 3, Ag Pl P2 = 4, Ag Pl P3 = 5
-
-

-However, an argument of Agr can be a run-time variable, as in -

-
-    Ag np.n P3
-
-

-This expression must first be translated to a case expression, -

-
-    case np.n of {
-      0 => 2 ;
-      1 => 5
-      }
-
-

-which can then be translated to the GFCC term -

-
-    ([2,5] ! ($0 ! $1))  
-
-

-assuming that the variable np is the first argument and that its -Number field is the second in the record. -

-

-This transformation of course has to be performed recursively, since -there can be several run-time variables in a parameter value: -

-
-    Ag np.n np.p
-
-

-A similar transformation would be possible to deal with the double -role of parameter records discussed above. Thus the type -

-
-    RNP = {n : Number ; p : Person}
-
-

-could be uniformly translated into the set {0,1,2,3,4,5} -as Agr above. Selections would be simple instances of indexing. -But any projection from the record should be translated into -a case expression, -

-
-    rnp.n  ===> 
-    case rnp of {
-      0 => 0 ;
-      1 => 0 ;
-      2 => 0 ;
-      3 => 1 ;
-      4 => 1 ;
-      5 => 1
-      }
-
-

-To avoid the code bloat resulting from this, we have chosen to -deal with records by a currying transformation: -

-
-    table {n : Number ; p : Person} {... ...}
-     ===>
-    table Number {Sg => table Person {...} ; table Person {...}}
-
-

-This is performed when GFCC is generated. Selections with -records have to be treated likewise, -

-
-    t ! r   ===> t ! r.n ! r.p
-
-

-

The representation of linearization types

-

-Linearization types (lincat) are not needed when generating with -GFCC, but they have been added to enable parser generation directly from -GFCC. The linearization type definitions are shown as a part of the -concrete syntax, by using terms to represent types. Here is the table -showing how different linearization types are encoded. -

-
-    P*                         = max(P)         -- parameter type
-    {r1 : T1 ; ... ; rn : Tn}* = [T1*,...,Tn*]  -- record
-    (P => T)*                  = [T* ,...,T*]   -- table, size(P) cases
-    Str*                       = ()
-
-

-For example, the linearization type present/CatEng.NP is -translated as follows: -

-
-    NP = {
-      a : {                     -- 6 = 2*3 values
-        n : {ParamX.Number} ;   -- 2 values
-        p : {ParamX.Person}     -- 3 values
-      } ;
-      s : {ResEng.Case} => Str  -- 3 values
-    }
-  
-    __NP = [[1,2],[(),(),()]]
-
-

-

Running the compiler and the GFCC interpreter

-

-GFCC generation is a part of the -developers' version -of GF since September 2006. To invoke the compiler, the flag --printer=gfcc to the command -pm = print_multi is used. It is wise to recompile the grammar from -source, since previously compiled libraries may not obey the canonical -order of records. -Here is an example, performed in -example/bronzeage. -

-
-    i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageEng.gf
-    i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageGer.gf
-    strip
-    pm -printer=gfcc | wf bronze.gfcc
-
-

-There is also an experimental batch compiler, which does not use the GFC -format or the record aliases. It can be produced by -

-
-    make gfc
-
-

-in GF/src, and invoked by -

-
-    gfc --make FILES
-
-

-

The reference interpreter

-

-The reference interpreter written in Haskell consists of the following files: -

-
-    -- source file for BNFC
-    GFCC.cf       -- labelled BNF grammar of gfcc
-  
-    -- files generated by BNFC
-    AbsGFCC.hs    -- abstrac syntax datatypes
-    ErrM.hs       -- error monad used internally
-    LexGFCC.hs    -- lexer of gfcc files
-    ParGFCC.hs    -- parser of gfcc files and syntax trees
-    PrintGFCC.hs  -- printer of gfcc files and syntax trees
-  
-    -- hand-written files
-    DataGFCC.hs   -- grammar datatype, post-parser grammar creation
-    Linearize.hs  -- linearization and evaluation
-    Macros.hs     -- utilities abstracting away from GFCC datatypes
-    Generate.hs   -- random and exhaustive generation, generate-and-test parsing
-    API.hs        -- functionalities accessible in embedded GF applications
-    Generate.hs   -- random and exhaustive generation
-    Shell.hs      -- main function - a simple command interpreter
-
-

-It is included in the -developers' version -of GF, in the subdirectories GF/src/GF/GFCC and -GF/src/GF/Devel. -

-

-As of September 2007, default parsing in main GF uses GFCC (implemented by Krasimir -Angelov). The interpreter uses the relevant modules -

-
-    GF/Conversions/SimpleToFCFG.hs  -- generate parser from GFCC
-    GF/Parsing/FCFG.hs              -- run the parser
-
-

-

-To compile the interpreter, type -

-
-    make gfcc
-
-

-in GF/src. To run it, type -

-
-    ./gfcc <GFCC-file>
-
-

-The available commands are -

- - -

Embedded formats

- - -

Some things to do

-

-Support for dependent types, higher-order abstract syntax, and -semantic definition in GFCC generation and interpreters. -

-

-Replacing the entire GF shell by one based on GFCC. -

-

-Interpreter in Java. -

-

-Hand-written parsers for GFCC grammars to reduce code size -(and efficiency?) of interpreters. -

-

-Binary format and/or file compression of GFCC output. -

-

-Syntax editor based on GFCC. -

-

-Rewriting of resource libraries in order to exploit the -word-suffix sharing better (depth-one tables, as in FM). -

- - - - diff --git a/src-3.0/GF/GFCC/doc/gfcc.txt b/src-3.0/GF/GFCC/doc/gfcc.txt deleted file mode 100644 index 5dcf2fbdc..000000000 --- a/src-3.0/GF/GFCC/doc/gfcc.txt +++ /dev/null @@ -1,712 +0,0 @@ -The GFCC Grammar Format -Aarne Ranta -December 14, 2007 - -Author's address: -[``http://www.cs.chalmers.se/~aarne`` http://www.cs.chalmers.se/~aarne] - -% to compile: txt2tags -thtml --toc gfcc.txt - -History: -- 14 Dec 2007: simpler, Lisp-like concrete syntax of GFCC -- 5 Oct 2007: new, better structured GFCC with full expressive power -- 19 Oct: translation of lincats, new figures on C++ -- 3 Oct 2006: first version - - -==What is GFCC== - -GFCC is a low-level format for GF grammars. Its aim is to contain the minimum -that is needed to process GF grammars at runtime. This minimality has three -advantages: -- compact grammar files and run-time objects -- time and space efficient processing -- simple definition of interpreters - - -Thus we also want to call GFCC the **portable grammar format**. - -The idea is that all embedded GF applications use GFCC. -The GF system would be primarily used as a compiler and as a grammar -development tool. - -Since GFCC is implemented in BNFC, a parser of the format is readily -available for C, C++, C#, Haskell, Java, and OCaml. Also an XML -representation can be generated in BNFC. A -[reference implementation ../] -of linearization and some other functions has been written in Haskell. - - -==GFCC vs. GFC== - -GFCC is aimed to replace GFC as the run-time grammar format. GFC was designed -to be a run-time format, but also to -support separate compilation of grammars, i.e. -to store the results of compiling -individual GF modules. But this means that GFC has to contain extra information, -such as type annotations, which is only needed in compilation and not at -run-time. In particular, the pattern matching syntax and semantics of GFC is -complex and therefore difficult to implement in new platforms. - -Actually, GFC is planned to be omitted also as the target format of -separate compilation, where plain GF (type annotated and partially evaluated) -will be used instead. GFC provides only marginal advantages as a target format -compared with GF, and it is therefore just extra weight to carry around this -format. - -The main differences of GFCC compared with GFC (and GF) can be -summarized as follows: -- there are no modules, and therefore no qualified names -- a GFCC grammar is multilingual, and consists of a common abstract syntax - together with one concrete syntax per language -- records and tables are replaced by arrays -- record labels and parameter values are replaced by integers -- record projection and table selection are replaced by array indexing -- even though the format does support dependent types and higher-order abstract - syntax, there is no interpreted yet that does this - - - -Here is an example of a GF grammar, consisting of three modules, -as translated to GFCC. The representations are aligned; -thus they do not completely -reflect the order of judgements in GFCC files, which have different orders of -blocks of judgements, and alphabetical sorting. -``` - grammar Ex(Eng,Swe); - -abstract Ex = { abstract { - cat cat - S ; NP ; VP ; NP[]; S[]; VP[]; - fun fun - Pred : NP -> VP -> S ; Pred=[(($ 0! 1),(($ 1! 0)!($ 0! 0)))]; - She, They : NP ; She=[0,"she"]; - Sleep : VP ; They=[1,"they"]; - Sleep=[["sleeps","sleep"]]; -} } ; - -concrete Eng of Ex = { concrete Eng { - lincat lincat - S = {s : Str} ; S=[()]; - NP = {s : Str ; n : Num} ; NP=[1,()]; - VP = {s : Num => Str} ; VP=[[(),()]]; - param - Num = Sg | Pl ; - lin lin - Pred np vp = { Pred=[(($ 0! 1),(($ 1! 0)!($ 0! 0)))]; - s = np.s ++ vp.s ! np.n} ; - She = {s = "she" ; n = Sg} ; She=[0,"she"]; - They = {s = "they" ; n = Pl} ; They = [1, "they"]; - Sleep = {s = table { Sleep=[["sleeps","sleep"]]; - Sg => "sleeps" ; - Pl => "sleep" - } - } ; -} } ; - -concrete Swe of Ex = { concrete Swe { - lincat lincat - S = {s : Str} ; S=[()]; - NP = {s : Str} ; NP=[()]; - VP = {s : Str} ; VP=[()]; - param - Num = Sg | Pl ; - lin lin - Pred np vp = { Pred = [(($0!0),($1!0))]; - s = np.s ++ vp.s} ; - She = {s = "hon"} ; She = ["hon"]; - They = {s = "de"} ; They = ["de"]; - Sleep = {s = "sover"} ; Sleep = ["sover"]; -} } ; -``` - -==The syntax of GFCC files== - -The complete BNFC grammar, from which -the rules in this section are taken, is in the file -[``GF/GFCC/GFCC.cf`` ../DataGFCC.cf]. - - -===Top level=== - -A grammar has a header telling the name of the abstract syntax -(often specifying an application domain), and the names of -the concrete languages. The abstract syntax and the concrete -syntaxes themselves follow. -``` - Grm. Grammar ::= - "grammar" CId "(" [CId] ")" ";" - Abstract ";" - [Concrete] ; - - Abs. Abstract ::= - "abstract" "{" - "flags" [Flag] - "fun" [FunDef] - "cat" [CatDef] - "}" ; - - Cnc. Concrete ::= - "concrete" CId "{" - "flags" [Flag] - "lin" [LinDef] - "oper" [LinDef] - "lincat" [LinDef] - "lindef" [LinDef] - "printname" [LinDef] - "}" ; -``` -This syntax organizes each module to a sequence of **fields**, such -as flags, linearizations, operations, linearization types, etc. -It is envisaged that particular applications can ignore some -of the fields, typically so that earlier fields are more -important than later ones. - -The judgement forms have the following syntax. -``` - Flg. Flag ::= CId "=" String ; - Cat. CatDef ::= CId "[" [Hypo] "]" ; - Fun. FunDef ::= CId ":" Type "=" Exp ; - Lin. LinDef ::= CId "=" Term ; -``` -For the run-time system, the reference implementation in Haskell -uses a structure that gives efficient look-up: -``` - data GFCC = GFCC { - absname :: CId , - cncnames :: [CId] , - abstract :: Abstr , - concretes :: Map CId Concr - } - - data Abstr = Abstr { - aflags :: Map CId String, -- value of a flag - funs :: Map CId (Type,Exp), -- type and def of a fun - cats :: Map CId [Hypo], -- context of a cat - catfuns :: Map CId [CId] -- funs yielding a cat (redundant, for fast lookup) - } - - data Concr = Concr { - flags :: Map CId String, -- value of a flag - lins :: Map CId Term, -- lin of a fun - opers :: Map CId Term, -- oper generated by subex elim - lincats :: Map CId Term, -- lin type of a cat - lindefs :: Map CId Term, -- lin default of a cat - printnames :: Map CId Term -- printname of a cat or a fun - } -``` -These definitions are from [``GF/GFCC/DataGFCC.hs`` ../DataGFCC.hs]. - -Identifiers (``CId``) are like ``Ident`` in GF, except that -the compiler produces constants prefixed with ``_`` in -the common subterm elimination optimization. -``` - token CId (('_' | letter) (letter | digit | '\'' | '_')*) ; -``` - - -===Abstract syntax=== - -Types are first-order function types built from argument type -contexts and value types. -category symbols. Syntax trees (``Exp``) are -rose trees with nodes consisting of a head (``Atom``) and -bound variables (``CId``). -``` - DTyp. Type ::= "[" [Hypo] "]" CId [Exp] ; - DTr. Exp ::= "[" "(" [CId] ")" Atom [Exp] "]" ; - Hyp. Hypo ::= CId ":" Type ; -``` -The head Atom is either a function -constant, a bound variable, or a metavariable, or a string, integer, or float -literal. -``` - AC. Atom ::= CId ; - AS. Atom ::= String ; - AI. Atom ::= Integer ; - AF. Atom ::= Double ; - AM. Atom ::= "?" Integer ; -``` -The context-free types and trees of the "old GFCC" are special -cases, which can be defined as follows: -``` - Typ. Type ::= [CId] "->" CId - Typ args val = DTyp [Hyp (CId "_") arg | arg <- args] val - - Tr. Exp ::= "(" CId [Exp] ")" - Tr fun exps = DTr [] fun exps -``` -To store semantic (``def``) definitions by cases, the following expression -form is provided, but it is only meaningful in the last field of a function -declaration in an abstract syntax: -``` - EEq. Exp ::= "{" [Equation] "}" ; - Equ. Equation ::= [Exp] "->" Exp ; -``` -Notice that expressions are used to encode patterns. Primitive notions -(the default semantics in GF) are encoded as empty sets of equations -(``[]``). For a constructor (canonical form) of a category ``C``, we -aim to use the encoding as the application ``(_constr C)``. - - - -===Concrete syntax=== - -Linearization terms (``Term``) are built as follows. -Constructor names are shown to make the later code -examples readable. -``` - R. Term ::= "[" [Term] "]" ; -- array (record/table) - P. Term ::= "(" Term "!" Term ")" ; -- access to field (projection/selection) - S. Term ::= "(" [Term] ")" ; -- concatenated sequence - K. Term ::= Tokn ; -- token - V. Term ::= "$" Integer ; -- argument (subtree) - C. Term ::= Integer ; -- array index (label/parameter value) - FV. Term ::= "[|" [Term] "|]" ; -- free variation - TM. Term ::= "?" ; -- linearization of metavariable -``` -Tokens are strings or (maybe obsolescent) prefix-dependent -variant lists. -``` - KS. Tokn ::= String ; - KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ; - Var. Variant ::= [String] "/" [String] ; -``` -Two special forms of terms are introduced by the compiler -as optimizations. They can in principle be eliminated, but -their presence makes grammars much more compact. Their semantics -will be explained in a later section. -``` - F. Term ::= CId ; -- global constant - W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table -``` -There is also a deprecated form of "record parameter alias", -``` - RP. Term ::= "(" Term "@" Term ")"; -- DEPRECATED -``` -which will be removed when the migration to new GFCC is complete. - - - -==The semantics of concrete syntax terms== - -The code in this section is from [``GF/GFCC/Linearize.hs`` ../Linearize.hs]. - - -===Linearization and realization=== - -The linearization algorithm is essentially the same as in -GFC: a tree is linearized by evaluating its linearization term -in the environment of the linearizations of the subtrees. -Literal atoms are linearized in the obvious way. -The function also needs to know the language (i.e. concrete syntax) -in which linearization is performed. -``` - linExp :: GFCC -> CId -> Exp -> Term - linExp gfcc lang tree@(DTr _ at trees) = case at of - AC fun -> comp (Prelude.map lin trees) $ look fun - AS s -> R [kks (show s)] -- quoted - AI i -> R [kks (show i)] - AF d -> R [kks (show d)] - AM -> TM - where - lin = linExp gfcc lang - comp = compute gfcc lang - look = lookLin gfcc lang -``` -TODO: bindings must be supported. - -The result of linearization is usually a record, which is realized as -a string using the following algorithm. -``` - realize :: Term -> String - realize trm = case trm of - R (t:_) -> realize t - S ss -> unwords $ Prelude.map realize ss - K (KS s) -> s - K (KP s _) -> unwords s ---- prefix choice TODO - W s t -> s ++ realize t - FV (t:_) -> realize t - TM -> "?" -``` -Notice that realization always picks the first field of a record. -If a linearization type has more than one field, the first field -does not necessarily contain the desired string. -Also notice that the order of record fields in GFCC is not necessarily -the same as in GF source. - - -===Term evaluation=== - -Evaluation follows call-by-value order, with two environments -needed: -- the grammar (a concrete syntax) to give the global constants -- an array of terms to give the subtree linearizations - - -The code is presented in one-level pattern matching, to -enable reimplementations in languages that do not permit -deep patterns (such as Java and C++). -``` -compute :: GFCC -> CId -> [Term] -> Term -> Term -compute gfcc lang args = comp where - comp trm = case trm of - P r p -> proj (comp r) (comp p) - W s t -> W s (comp t) - R ts -> R $ Prelude.map comp ts - V i -> idx args (fromInteger i) -- already computed - F c -> comp $ look c -- not computed (if contains V) - FV ts -> FV $ Prelude.map comp ts - S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts - _ -> trm - - look = lookOper gfcc lang - - idx xs i = xs !! i - - proj r p = case (r,p) of - (_, FV ts) -> FV $ Prelude.map (proj r) ts - (FV ts, _ ) -> FV $ Prelude.map (\t -> proj t p) ts - (W s t, _) -> kks (s ++ getString (proj t p)) - _ -> comp $ getField r (getIndex p) - - getString t = case t of - K (KS s) -> s - _ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR" - - getIndex t = case t of - C i -> fromInteger i - RP p _ -> getIndex p - TM -> 0 -- default value for parameter - _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0 - - getField t i = case t of - R rs -> idx rs i - RP _ r -> getField r i - TM -> TM - _ -> trace ("ERROR in grammar compiler: field from " ++ show t) t -``` - -===The special term constructors=== - -The three forms introduced by the compiler may a need special -explanation. - -Global constants -``` - Term ::= CId ; -``` -are shorthands for complex terms. They are produced by the -compiler by (iterated) **common subexpression elimination**. -They are often more powerful than hand-devised code sharing in the source -code. They could be computed off-line by replacing each identifier by -its definition. - -**Prefix-suffix tables** -``` - Term ::= "(" String "+" Term ")" ; -``` -represent tables of word forms divided to the longest common prefix -and its array of suffixes. In the example grammar above, we have -``` - Sleep = [("sleep" + ["s",""])] -``` -which in fact is equal to the array of full forms -``` - ["sleeps", "sleep"] -``` -The power of this construction comes from the fact that suffix sets -tend to be repeated in a language, and can therefore be collected -by common subexpression elimination. It is this technique that -explains the used syntax rather than the more accurate -``` - "(" String "+" [String] ")" -``` -since we want the suffix part to be a ``Term`` for the optimization to -take effect. - - - -==Compiling to GFCC== - -Compilation to GFCC is performed by the GF grammar compiler, and -GFCC interpreters need not know what it does. For grammar writers, -however, it might be interesting to know what happens to the grammars -in the process. - -The compilation phases are the following -+ type check and partially evaluate GF source -+ create a symbol table mapping the GF parameter and record types to - fixed-size arrays, and parameter values and record labels to integers -+ traverse the linearization rules replacing parameters and labels by integers -+ reorganize the created GF grammar so that it has just one abstract syntax - and one concrete syntax per language -+ TODO: apply UTF8 encoding to the grammar, if not yet applied (this is told by the - ``coding`` flag) -+ translate the GF grammar object to a GFCC grammar object, using a simple - compositional mapping -+ perform the word-suffix optimization on GFCC linearization terms -+ perform subexpression elimination on each concrete syntax module -+ print out the GFCC code - - - - -===Problems in GFCC compilation=== - -Two major problems had to be solved in compiling GF to GFCC: -- consistent order of tables and records, to permit the array translation -- run-time variables in complex parameter values. - - -The current implementation is still experimental and may fail -to generate correct code. Any errors remaining are likely to be -related to the two problems just mentioned. - -The order problem is solved in slightly different ways for tables and records. -In both cases, **eta expansion** is used to establish a -canonical order. Tables are ordered by applying the preorder induced -by ``param`` definitions. Records are ordered by sorting them by labels. -This means that -e.g. the ``s`` field will in general no longer appear as the first -field, even if it does so in the GF source code. But relying on the -order of fields in a labelled record would be misplaced anyway. - -The canonical form of records is further complicated by lock fields, -i.e. dummy fields of form ``lock_C = <>``, which are added to grammar -libraries to force intensionality of linearization types. The problem -is that the absence of a lock field only generates a warning, not -an error. Therefore a GF grammar can contain objects of the same -type with and without a lock field. This problem was solved in GFCC -generation by just removing all lock fields (defined as fields whose -type is the empty record type). This has the further advantage of -(slightly) reducing the grammar size. More importantly, it is safe -to remove lock fields, because they are never used in computation, -and because intensional types are only needed in grammars reused -as libraries, not in grammars used at runtime. - -While the order problem is rather bureaucratic in nature, run-time -variables are an interesting problem. They arise in the presence -of complex parameter values, created by argument-taking constructors -and parameter records. To give an example, consider the GF parameter -type system -``` - Number = Sg | Pl ; - Person = P1 | P2 | P3 ; - Agr = Ag Number Person ; -``` -The values can be translated to integers in the expected way, -``` - Sg = 0, Pl = 1 - P1 = 0, P2 = 1, P3 = 2 - Ag Sg P1 = 0, Ag Sg P2 = 1, Ag Sg P3 = 2, - Ag Pl P1 = 3, Ag Pl P2 = 4, Ag Pl P3 = 5 -``` -However, an argument of ``Agr`` can be a run-time variable, as in -``` - Ag np.n P3 -``` -This expression must first be translated to a case expression, -``` - case np.n of { - 0 => 2 ; - 1 => 5 - } -``` -which can then be translated to the GFCC term -``` - ([2,5] ! ($0 ! $1)) -``` -assuming that the variable ``np`` is the first argument and that its -``Number`` field is the second in the record. - -This transformation of course has to be performed recursively, since -there can be several run-time variables in a parameter value: -``` - Ag np.n np.p -``` -A similar transformation would be possible to deal with the double -role of parameter records discussed above. Thus the type -``` - RNP = {n : Number ; p : Person} -``` -could be uniformly translated into the set ``{0,1,2,3,4,5}`` -as ``Agr`` above. Selections would be simple instances of indexing. -But any projection from the record should be translated into -a case expression, -``` - rnp.n ===> - case rnp of { - 0 => 0 ; - 1 => 0 ; - 2 => 0 ; - 3 => 1 ; - 4 => 1 ; - 5 => 1 - } -``` -To avoid the code bloat resulting from this, we have chosen to -deal with records by a **currying** transformation: -``` - table {n : Number ; p : Person} {... ...} - ===> - table Number {Sg => table Person {...} ; table Person {...}} -``` -This is performed when GFCC is generated. Selections with -records have to be treated likewise, -``` - t ! r ===> t ! r.n ! r.p -``` - - -===The representation of linearization types=== - -Linearization types (``lincat``) are not needed when generating with -GFCC, but they have been added to enable parser generation directly from -GFCC. The linearization type definitions are shown as a part of the -concrete syntax, by using terms to represent types. Here is the table -showing how different linearization types are encoded. -``` - P* = max(P) -- parameter type - {r1 : T1 ; ... ; rn : Tn}* = [T1*,...,Tn*] -- record - (P => T)* = [T* ,...,T*] -- table, size(P) cases - Str* = () -``` -For example, the linearization type ``present/CatEng.NP`` is -translated as follows: -``` - NP = { - a : { -- 6 = 2*3 values - n : {ParamX.Number} ; -- 2 values - p : {ParamX.Person} -- 3 values - } ; - s : {ResEng.Case} => Str -- 3 values - } - - __NP = [[1,2],[(),(),()]] -``` - - - - -===Running the compiler and the GFCC interpreter=== - -GFCC generation is a part of the -[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html] -of GF since September 2006. To invoke the compiler, the flag -``-printer=gfcc`` to the command -``pm = print_multi`` is used. It is wise to recompile the grammar from -source, since previously compiled libraries may not obey the canonical -order of records. -Here is an example, performed in -[example/bronzeage ../../../../../examples/bronzeage]. -``` - i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageEng.gf - i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageGer.gf - strip - pm -printer=gfcc | wf bronze.gfcc -``` -There is also an experimental batch compiler, which does not use the GFC -format or the record aliases. It can be produced by -``` - make gfc -``` -in ``GF/src``, and invoked by -``` - gfc --make FILES -``` - - - - -==The reference interpreter== - -The reference interpreter written in Haskell consists of the following files: -``` - -- source file for BNFC - GFCC.cf -- labelled BNF grammar of gfcc - - -- files generated by BNFC - AbsGFCC.hs -- abstrac syntax datatypes - ErrM.hs -- error monad used internally - LexGFCC.hs -- lexer of gfcc files - ParGFCC.hs -- parser of gfcc files and syntax trees - PrintGFCC.hs -- printer of gfcc files and syntax trees - - -- hand-written files - DataGFCC.hs -- grammar datatype, post-parser grammar creation - Linearize.hs -- linearization and evaluation - Macros.hs -- utilities abstracting away from GFCC datatypes - Generate.hs -- random and exhaustive generation, generate-and-test parsing - API.hs -- functionalities accessible in embedded GF applications - Generate.hs -- random and exhaustive generation - Shell.hs -- main function - a simple command interpreter -``` -It is included in the -[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html] -of GF, in the subdirectories [``GF/src/GF/GFCC`` ../] and -[``GF/src/GF/Devel`` ../../Devel]. - -As of September 2007, default parsing in main GF uses GFCC (implemented by Krasimir -Angelov). The interpreter uses the relevant modules -``` - GF/Conversions/SimpleToFCFG.hs -- generate parser from GFCC - GF/Parsing/FCFG.hs -- run the parser -``` - - -To compile the interpreter, type -``` - make gfcc -``` -in ``GF/src``. To run it, type -``` - ./gfcc -``` -The available commands are -- ``gr ``: generate a number of random trees in category. - and show their linearizations in all languages -- ``grt ``: generate a number of random trees in category. - and show the trees and their linearizations in all languages -- ``gt ``: generate a number of trees in category from smallest, - and show their linearizations in all languages -- ``gtt ``: generate a number of trees in category from smallest, - and show the trees and their linearizations in all languages -- ``p ``: parse a string into a set of trees -- ``lin ``: linearize tree in all languages, also showing full records -- ``q``: terminate the system cleanly - - - -==Embedded formats== - -- JavaScript: compiler of linearization and abstract syntax - -- Haskell: compiler of abstract syntax and interpreter with parsing, - linearization, and generation - -- C: compiler of linearization (old GFCC) - -- C++: embedded interpreter supporting linearization (old GFCC) - - - -==Some things to do== - -Support for dependent types, higher-order abstract syntax, and -semantic definition in GFCC generation and interpreters. - -Replacing the entire GF shell by one based on GFCC. - -Interpreter in Java. - -Hand-written parsers for GFCC grammars to reduce code size -(and efficiency?) of interpreters. - -Binary format and/or file compression of GFCC output. - -Syntax editor based on GFCC. - -Rewriting of resource libraries in order to exploit the -word-suffix sharing better (depth-one tables, as in FM). - diff --git a/src-3.0/GF/GFCC/doc/old-GFCC.cf b/src-3.0/GF/GFCC/doc/old-GFCC.cf deleted file mode 100644 index 65657a259..000000000 --- a/src-3.0/GF/GFCC/doc/old-GFCC.cf +++ /dev/null @@ -1,50 +0,0 @@ -Grm. Grammar ::= Header ";" Abstract ";" [Concrete] ; -Hdr. Header ::= "grammar" CId "(" [CId] ")" ; -Abs. Abstract ::= "abstract" "{" [AbsDef] "}" ; -Cnc. Concrete ::= "concrete" CId "{" [CncDef] "}" ; - -Fun. AbsDef ::= CId ":" Type "=" Exp ; ---AFl. AbsDef ::= "%" CId "=" String ; -- flag -Lin. CncDef ::= CId "=" Term ; ---CFl. CncDef ::= "%" CId "=" String ; -- flag - -Typ. Type ::= [CId] "->" CId ; -Tr. Exp ::= "(" Atom [Exp] ")" ; -AC. Atom ::= CId ; -AS. Atom ::= String ; -AI. Atom ::= Integer ; -AF. Atom ::= Double ; -AM. Atom ::= "?" ; -trA. Exp ::= Atom ; -define trA a = Tr a [] ; - -R. Term ::= "[" [Term] "]" ; -- record/table -P. Term ::= "(" Term "!" Term ")" ; -- projection/selection -S. Term ::= "(" [Term] ")" ; -- sequence with ++ -K. Term ::= Tokn ; -- token -V. Term ::= "$" Integer ; -- argument -C. Term ::= Integer ; -- parameter value/label -F. Term ::= CId ; -- global constant -FV. Term ::= "[|" [Term] "|]" ; -- free variation -W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table -RP. Term ::= "(" Term "@" Term ")"; -- record parameter alias -TM. Term ::= "?" ; -- lin of metavariable - -L. Term ::= "(" CId "->" Term ")" ; -- lambda abstracted table -BV. Term ::= "#" CId ; -- lambda-bound variable - -KS. Tokn ::= String ; -KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ; -Var. Variant ::= [String] "/" [String] ; - - -terminator Concrete ";" ; -terminator AbsDef ";" ; -terminator CncDef ";" ; -separator CId "," ; -separator Term "," ; -terminator Exp "" ; -terminator String "" ; -separator Variant "," ; - -token CId (('_' | letter) (letter | digit | '\'' | '_')*) ; diff --git a/src-3.0/GF/GFCC/doc/old-gfcc.txt b/src-3.0/GF/GFCC/doc/old-gfcc.txt deleted file mode 100644 index 6ffd9bd64..000000000 --- a/src-3.0/GF/GFCC/doc/old-gfcc.txt +++ /dev/null @@ -1,656 +0,0 @@ -The GFCC Grammar Format -Aarne Ranta -October 19, 2006 - -Author's address: -[``http://www.cs.chalmers.se/~aarne`` http://www.cs.chalmers.se/~aarne] - -% to compile: txt2tags -thtml --toc gfcc.txt - -History: -- 19 Oct: translation of lincats, new figures on C++ -- 3 Oct 2006: first version - - -==What is GFCC== - -GFCC is a low-level format for GF grammars. Its aim is to contain the minimum -that is needed to process GF grammars at runtime. This minimality has three -advantages: -- compact grammar files and run-time objects -- time and space efficient processing -- simple definition of interpreters - - -The idea is that all embedded GF applications are compiled to GFCC. -The GF system would be primarily used as a compiler and as a grammar -development tool. - -Since GFCC is implemented in BNFC, a parser of the format is readily -available for C, C++, Haskell, Java, and OCaml. Also an XML -representation is generated in BNFC. A -[reference implementation ../] -of linearization and some other functions has been written in Haskell. - - -==GFCC vs. GFC== - -GFCC is aimed to replace GFC as the run-time grammar format. GFC was designed -to be a run-time format, but also to -support separate compilation of grammars, i.e. -to store the results of compiling -individual GF modules. But this means that GFC has to contain extra information, -such as type annotations, which is only needed in compilation and not at -run-time. In particular, the pattern matching syntax and semantics of GFC is -complex and therefore difficult to implement in new platforms. - -The main differences of GFCC compared with GFC can be summarized as follows: -- there are no modules, and therefore no qualified names -- a GFCC grammar is multilingual, and consists of a common abstract syntax - together with one concrete syntax per language -- records and tables are replaced by arrays -- record labels and parameter values are replaced by integers -- record projection and table selection are replaced by array indexing -- there is (so far) no support for dependent types or higher-order abstract - syntax (which would be easy to add, but make interpreters much more difficult - to write) - - -Here is an example of a GF grammar, consisting of three modules, -as translated to GFCC. The representations are aligned, with the exceptions -due to the alphabetical sorting of GFCC grammars. -``` - grammar Ex(Eng,Swe); - -abstract Ex = { abstract { - cat - S ; NP ; VP ; - fun - Pred : NP -> VP -> S ; Pred : NP,VP -> S = (Pred); - She, They : NP ; She : -> NP = (She); - Sleep : VP ; Sleep : -> VP = (Sleep); - They : -> NP = (They); -} } ; - -concrete Eng of Ex = { concrete Eng { - lincat - S = {s : Str} ; - NP = {s : Str ; n : Num} ; - VP = {s : Num => Str} ; - param - Num = Sg | Pl ; - lin - Pred np vp = { Pred = [(($0!1),(($1!0)!($0!0)))]; - s = np.s ++ vp.s ! np.n} ; - She = {s = "she" ; n = Sg} ; She = [0, "she"]; - They = {s = "they" ; n = Pl} ; - Sleep = {s = table { Sleep = [("sleep" + ["s",""])]; - Sg => "sleeps" ; - Pl => "sleep" They = [1, "they"]; - } } ; - } ; -} - -concrete Swe of Ex = { concrete Swe { - lincat - S = {s : Str} ; - NP = {s : Str} ; - VP = {s : Str} ; - param - Num = Sg | Pl ; - lin - Pred np vp = { Pred = [(($0!0),($1!0))]; - s = np.s ++ vp.s} ; - She = {s = "hon"} ; She = ["hon"]; - They = {s = "de"} ; They = ["de"]; - Sleep = {s = "sover"} ; Sleep = ["sover"]; -} } ; -``` - -==The syntax of GFCC files== - -===Top level=== - -A grammar has a header telling the name of the abstract syntax -(often specifying an application domain), and the names of -the concrete languages. The abstract syntax and the concrete -syntaxes themselves follow. -``` - Grammar ::= Header ";" Abstract ";" [Concrete] ; - Header ::= "grammar" CId "(" [CId] ")" ; - Abstract ::= "abstract" "{" [AbsDef] "}" ; - Concrete ::= "concrete" CId "{" [CncDef] "}" ; -``` -Abstract syntax judgements give typings and semantic definitions. -Concrete syntax judgements give linearizations. -``` - AbsDef ::= CId ":" Type "=" Exp ; - CncDef ::= CId "=" Term ; -``` -Also flags are possible, local to each "module" (i.e. abstract and concretes). -``` - AbsDef ::= "%" CId "=" String ; - CncDef ::= "%" CId "=" String ; -``` -For the run-time system, the reference implementation in Haskell -uses a structure that gives efficient look-up: -``` - data GFCC = GFCC { - absname :: CId , - cncnames :: [CId] , - abstract :: Abstr , - concretes :: Map CId Concr - } - - data Abstr = Abstr { - funs :: Map CId Type, -- find the type of a fun - cats :: Map CId [CId] -- find the funs giving a cat - } - - type Concr = Map CId Term -``` - - -===Abstract syntax=== - -Types are first-order function types built from -category symbols. Syntax trees (``Exp``) are -rose trees with the head (``Atom``) either a function -constant, a metavariable, or a string, integer, or float -literal. -``` - Type ::= [CId] "->" CId ; - Exp ::= "(" Atom [Exp] ")" ; - Atom ::= CId ; -- function constant - Atom ::= "?" ; -- metavariable - Atom ::= String ; -- string literal - Atom ::= Integer ; -- integer literal - Atom ::= Double ; -- float literal -``` - - -===Concrete syntax=== - -Linearization terms (``Term``) are built as follows. -Constructor names are shown to make the later code -examples readable. -``` - R. Term ::= "[" [Term] "]" ; -- array - P. Term ::= "(" Term "!" Term ")" ; -- access to indexed field - S. Term ::= "(" [Term] ")" ; -- sequence with ++ - K. Term ::= Tokn ; -- token - V. Term ::= "$" Integer ; -- argument - C. Term ::= Integer ; -- array index - FV. Term ::= "[|" [Term] "|]" ; -- free variation - TM. Term ::= "?" ; -- linearization of metavariable -``` -Tokens are strings or (maybe obsolescent) prefix-dependent -variant lists. -``` - KS. Tokn ::= String ; - KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ; - Var. Variant ::= [String] "/" [String] ; -``` -Three special forms of terms are introduced by the compiler -as optimizations. They can in principle be eliminated, but -their presence makes grammars much more compact. Their semantics -will be explained in a later section. -``` - F. Term ::= CId ; -- global constant - W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table - RP. Term ::= "(" Term "@" Term ")"; -- record parameter alias -``` -Identifiers are like ``Ident`` in GF and GFC, except that -the compiler produces constants prefixed with ``_`` in -the common subterm elimination optimization. -``` - token CId (('_' | letter) (letter | digit | '\'' | '_')*) ; -``` - - -==The semantics of concrete syntax terms== - -===Linearization and realization=== - -The linearization algorithm is essentially the same as in -GFC: a tree is linearized by evaluating its linearization term -in the environment of the linearizations of the subtrees. -Literal atoms are linearized in the obvious way. -The function also needs to know the language (i.e. concrete syntax) -in which linearization is performed. -``` - linExp :: GFCC -> CId -> Exp -> Term - linExp mcfg lang tree@(Tr at trees) = case at of - AC fun -> comp (Prelude.map lin trees) $ look fun - AS s -> R [kks (show s)] -- quoted - AI i -> R [kks (show i)] - AF d -> R [kks (show d)] - AM -> TM - where - lin = linExp mcfg lang - comp = compute mcfg lang - look = lookLin mcfg lang -``` -The result of linearization is usually a record, which is realized as -a string using the following algorithm. -``` - realize :: Term -> String - realize trm = case trm of - R (t:_) -> realize t - S ss -> unwords $ Prelude.map realize ss - K (KS s) -> s - K (KP s _) -> unwords s ---- prefix choice TODO - W s t -> s ++ realize t - FV (t:_) -> realize t - TM -> "?" -``` -Since the order of record fields is not necessarily -the same as in GF source, -this realization does not work securely for -categories whose lincats more than one field. - - -===Term evaluation=== - -Evaluation follows call-by-value order, with two environments -needed: -- the grammar (a concrete syntax) to give the global constants -- an array of terms to give the subtree linearizations - - -The code is presented in one-level pattern matching, to -enable reimplementations in languages that do not permit -deep patterns (such as Java and C++). -``` -compute :: GFCC -> CId -> [Term] -> Term -> Term -compute mcfg lang args = comp where - comp trm = case trm of - P r p -> proj (comp r) (comp p) - RP i t -> RP (comp i) (comp t) - W s t -> W s (comp t) - R ts -> R $ Prelude.map comp ts - V i -> idx args (fromInteger i) -- already computed - F c -> comp $ look c -- not computed (if contains V) - FV ts -> FV $ Prelude.map comp ts - S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts - _ -> trm - - look = lookLin mcfg lang - - idx xs i = xs !! i - - proj r p = case (r,p) of - (_, FV ts) -> FV $ Prelude.map (proj r) ts - (W s t, _) -> kks (s ++ getString (proj t p)) - _ -> comp $ getField r (getIndex p) - - getString t = case t of - K (KS s) -> s - _ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR" - - getIndex t = case t of - C i -> fromInteger i - RP p _ -> getIndex p - TM -> 0 -- default value for parameter - _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0 - - getField t i = case t of - R rs -> idx rs i - RP _ r -> getField r i - TM -> TM - _ -> trace ("ERROR in grammar compiler: field from " ++ show t) t -``` - -===The special term constructors=== - -The three forms introduced by the compiler may a need special -explanation. - -Global constants -``` - Term ::= CId ; -``` -are shorthands for complex terms. They are produced by the -compiler by (iterated) common subexpression elimination. -They are often more powerful than hand-devised code sharing in the source -code. They could be computed off-line by replacing each identifier by -its definition. - -Prefix-suffix tables -``` - Term ::= "(" String "+" Term ")" ; -``` -represent tables of word forms divided to the longest common prefix -and its array of suffixes. In the example grammar above, we have -``` - Sleep = [("sleep" + ["s",""])] -``` -which in fact is equal to the array of full forms -``` - ["sleeps", "sleep"] -``` -The power of this construction comes from the fact that suffix sets -tend to be repeated in a language, and can therefore be collected -by common subexpression elimination. It is this technique that -explains the used syntax rather than the more accurate -``` - "(" String "+" [String] ")" -``` -since we want the suffix part to be a ``Term`` for the optimization to -take effect. - -The most curious construct of GFCC is the parameter array alias, -``` - Term ::= "(" Term "@" Term ")"; -``` -This form is used as the value of parameter records, such as the type -``` - {n : Number ; p : Person} -``` -The problem with parameter records is their double role. -They can be used like parameter values, as indices in selection, -``` - VP.s ! {n = Sg ; p = P3} -``` -but also as records, from which parameters can be projected: -``` - {n = Sg ; p = P3}.n -``` -Whichever use is selected as primary, a prohibitively complex -case expression must be generated at compilation to GFCC to get the -other use. The adopted -solution is to generate a pair containing both a parameter value index -and an array of indices of record fields. For instance, if we have -``` - param Number = Sg | Pl ; Person = P1 | P2 | P3 ; -``` -we get the encoding -``` - {n = Sg ; p = P3} ---> (2 @ [0,2]) -``` -The GFCC computation rules are essentially -``` - (t ! (i @ _)) = (t ! i) - ((_ @ r) ! j) =(r ! j) -``` - - -==Compiling to GFCC== - -Compilation to GFCC is performed by the GF grammar compiler, and -GFCC interpreters need not know what it does. For grammar writers, -however, it might be interesting to know what happens to the grammars -in the process. - -The compilation phases are the following -+ translate GF source to GFC, as always in GF -+ undo GFC back-end optimizations -+ perform the ``values`` optimization to normalize tables -+ create a symbol table mapping the GFC parameter and record types to - fixed-size arrays, and parameter values and record labels to integers -+ traverse the linearization rules replacing parameters and labels by integers -+ reorganize the created GFC grammar so that it has just one abstract syntax - and one concrete syntax per language -+ apply UTF8 encoding to the grammar, if not yet applied (this is told by the - ``coding`` flag) -+ translate the GFC syntax tree to a GFCC syntax tree, using a simple - compositional mapping -+ perform the word-suffix optimization on GFCC linearization terms -+ perform subexpression elimination on each concrete syntax module -+ print out the GFCC code - - -Notice that a major part of the compilation is done within GFC, so that -GFC-related tasks (such as parser generation) could be performed by -using the old algorithms. - - -===Problems in GFCC compilation=== - -Two major problems had to be solved in compiling GFC to GFCC: -- consistent order of tables and records, to permit the array translation -- run-time variables in complex parameter values. - - -The current implementation is still experimental and may fail -to generate correct code. Any errors remaining are likely to be -related to the two problems just mentioned. - -The order problem is solved in different ways for tables and records. -For tables, the ``values`` optimization of GFC already manages to -maintain a canonical order. But this order can be destroyed by the -``share`` optimization. To make sure that GFCC compilation works properly, -it is safest to recompile the GF grammar by using the ``values`` -optimization flag. - -Records can be canonically ordered by sorting them by labels. -In fact, this was done in connection of the GFCC work as a part -of the GFC generation, to guarantee consistency. This means that -e.g. the ``s`` field will in general no longer appear as the first -field, even if it does so in the GF source code. But relying on the -order of fields in a labelled record would be misplaced anyway. - -The canonical form of records is further complicated by lock fields, -i.e. dummy fields of form ``lock_C = <>``, which are added to grammar -libraries to force intensionality of linearization types. The problem -is that the absence of a lock field only generates a warning, not -an error. Therefore a GFC grammar can contain objects of the same -type with and without a lock field. This problem was solved in GFCC -generation by just removing all lock fields (defined as fields whose -type is the empty record type). This has the further advantage of -(slightly) reducing the grammar size. More importantly, it is safe -to remove lock fields, because they are never used in computation, -and because intensional types are only needed in grammars reused -as libraries, not in grammars used at runtime. - -While the order problem is rather bureaucratic in nature, run-time -variables are an interesting problem. They arise in the presence -of complex parameter values, created by argument-taking constructors -and parameter records. To give an example, consider the GF parameter -type system -``` - Number = Sg | Pl ; - Person = P1 | P2 | P3 ; - Agr = Ag Number Person ; -``` -The values can be translated to integers in the expected way, -``` - Sg = 0, Pl = 1 - P1 = 0, P2 = 1, P3 = 2 - Ag Sg P1 = 0, Ag Sg P2 = 1, Ag Sg P3 = 2, - Ag Pl P1 = 3, Ag Pl P2 = 4, Ag Pl P3 = 5 -``` -However, an argument of ``Agr`` can be a run-time variable, as in -``` - Ag np.n P3 -``` -This expression must first be translated to a case expression, -``` - case np.n of { - 0 => 2 ; - 1 => 5 - } -``` -which can then be translated to the GFCC term -``` - ([2,5] ! ($0 ! $1)) -``` -assuming that the variable ``np`` is the first argument and that its -``Number`` field is the second in the record. - -This transformation of course has to be performed recursively, since -there can be several run-time variables in a parameter value: -``` - Ag np.n np.p -``` -A similar transformation would be possible to deal with the double -role of parameter records discussed above. Thus the type -``` - RNP = {n : Number ; p : Person} -``` -could be uniformly translated into the set ``{0,1,2,3,4,5}`` -as ``Agr`` above. Selections would be simple instances of indexing. -But any projection from the record should be translated into -a case expression, -``` - rnp.n ===> - case rnp of { - 0 => 0 ; - 1 => 0 ; - 2 => 0 ; - 3 => 1 ; - 4 => 1 ; - 5 => 1 - } -``` -To avoid the code bloat resulting from this, we chose the alias representation -which is easy enough to deal with in interpreters. - - -===The representation of linearization types=== - -Linearization types (``lincat``) are not needed when generating with -GFCC, but they have been added to enable parser generation directly from -GFCC. The linearization type definitions are shown as a part of the -concrete syntax, by using terms to represent types. Here is the table -showing how different linearization types are encoded. -``` - P* = size(P) -- parameter type - {_ : I ; __ : R}* = (I* @ R*) -- record of parameters - {r1 : T1 ; ... ; rn : Tn}* = [T1*,...,Tn*] -- other record - (P => T)* = [T* ,...,T*] -- size(P) times - Str* = () -``` -The category symbols are prefixed with two underscores (``__``). -For example, the linearization type ``present/CatEng.NP`` is -translated as follows: -``` - NP = { - a : { -- 6 = 2*3 values - n : {ParamX.Number} ; -- 2 values - p : {ParamX.Person} -- 3 values - } ; - s : {ResEng.Case} => Str -- 3 values - } - - __NP = [(6@[2,3]),[(),(),()]] -``` - - - - -===Running the compiler and the GFCC interpreter=== - -GFCC generation is a part of the -[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html] -of GF since September 2006. To invoke the compiler, the flag -``-printer=gfcc`` to the command -``pm = print_multi`` is used. It is wise to recompile the grammar from -source, since previously compiled libraries may not obey the canonical -order of records. To ``strip`` the grammar before -GFCC translation removes unnecessary interface references. -Here is an example, performed in -[example/bronzeage ../../../../../examples/bronzeage]. -``` - i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageEng.gf - i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageGer.gf - strip - pm -printer=gfcc | wf bronze.gfcc -``` - - - -==The reference interpreter== - -The reference interpreter written in Haskell consists of the following files: -``` - -- source file for BNFC - GFCC.cf -- labelled BNF grammar of gfcc - - -- files generated by BNFC - AbsGFCC.hs -- abstrac syntax of gfcc - ErrM.hs -- error monad used internally - LexGFCC.hs -- lexer of gfcc files - ParGFCC.hs -- parser of gfcc files and syntax trees - PrintGFCC.hs -- printer of gfcc files and syntax trees - - -- hand-written files - DataGFCC.hs -- post-parser grammar creation, linearization and evaluation - GenGFCC.hs -- random and exhaustive generation, generate-and-test parsing - RunGFCC.hs -- main function - a simple command interpreter -``` -It is included in the -[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html] -of GF, in the subdirectory [``GF/src/GF/Canon/GFCC`` ../]. - -To compile the interpreter, type -``` - make gfcc -``` -in ``GF/src``. To run it, type -``` - ./gfcc -``` -The available commands are -- ``gr ``: generate a number of random trees in category. - and show their linearizations in all languages -- ``grt ``: generate a number of random trees in category. - and show the trees and their linearizations in all languages -- ``gt ``: generate a number of trees in category from smallest, - and show their linearizations in all languages -- ``gtt ``: generate a number of trees in category from smallest, - and show the trees and their linearizations in all languages -- ``p ``: "parse", i.e. generate trees until match or - until the given number have been generated -- ````: linearize tree in all languages, also showing full records -- ``quit``: terminate the system cleanly - - -==Interpreter in C++== - -A base-line interpreter in C++ has been started. -Its main functionality is random generation of trees and linearization of them. - -Here are some results from running the different interpreters, compared -to running the same grammar in GF, saved in ``.gfcm`` format. -The grammar contains the English, German, and Norwegian -versions of Bronzeage. The experiment was carried out on -Ubuntu Linux laptop with 1.5 GHz Intel centrino processor. - -|| | GF | gfcc(hs) | gfcc++ | -| program size | 7249k | 803k | 113k -| grammar size | 336k | 119k | 119k -| read grammar | 1150ms | 510ms | 100ms -| generate 222 | 9500ms | 450ms | 800ms -| memory | 21M | 10M | 20M - - - -To summarize: -- going from GF to gfcc is a major win in both code size and efficiency -- going from Haskell to C++ interpreter is not a win yet, because of a space - leak in the C++ version - - - -==Some things to do== - -Interpreter in Java. - -Parsing via MCFG -- the FCFG format can possibly be simplified -- parser grammars should be saved in files to make interpreters easier - - -Hand-written parsers for GFCC grammars to reduce code size -(and efficiency?) of interpreters. - -Binary format and/or file compression of GFCC output. - -Syntax editor based on GFCC. - -Rewriting of resource libraries in order to exploit the -word-suffix sharing better (depth-one tables, as in FM). - - - diff --git a/src-3.0/GF/GFCC/doc/syntax.txt b/src-3.0/GF/GFCC/doc/syntax.txt deleted file mode 100644 index db8f7c149..000000000 --- a/src-3.0/GF/GFCC/doc/syntax.txt +++ /dev/null @@ -1,180 +0,0 @@ -GFCC Syntax - - -==Syntax of GFCC files== - -The parser syntax is very simple, as defined in BNF: -``` - Grm. Grammar ::= [RExp] ; - - App. RExp ::= "(" CId [RExp] ")" ; - AId. RExp ::= CId ; - AInt. RExp ::= Integer ; - AStr. RExp ::= String ; - AFlt. RExp ::= Double ; - AMet. RExp ::= "?" ; - - terminator RExp "" ; - - token CId (('_' | letter) (letter | digit | '\'' | '_')*) ; -``` -While a parser and a printer can be generated for many languages -from this grammar by using the BNF Converter, a parser is also -easy to write by hand using recursive descent. - - -==Syntax of well-formed GFCC code== - -Here is a summary of well-formed syntax, -with a comment on the semantics of each construction. -``` - Grammar ::= - ("grammar" CId CId*) -- abstract syntax name and concrete syntax names - "(" "flags" Flag* ")" -- global and abstract flags - "(" "abstract" Abstract ")" -- abstract syntax - "(" "concrete" Concrete* ")" -- concrete syntaxes - - Abstract ::= - "(" "fun" FunDef* ")" -- function definitions - "(" "cat" CatDef* ")" -- category definitions - - Concrete ::= - "(" CId -- language name - "flags" Flag* -- concrete flags - "lin" LinDef* -- linearization rules - "oper" LinDef* -- operations (macros) - "lincat" LinDef* -- linearization type definitions - "lindef" LinDef* -- linearization default definitions - "printname" LinDef* -- printname definitions - "param" LinDef* -- lincats with labels and parameter value names - ")" - - Flag ::= "(" CId String ")" -- flag and value - FunDef ::= "(" CId Type Exp ")" -- function, type, and definition - CatDef ::= "(" CId Hypo* ")" -- category and context - LinDef ::= "(" CId Term ")" -- function and definition - - Type ::= - "(" CId -- value category - "(" "H" Hypo* ")" -- argument context - "(" "X" Exp* ")" ")" -- arguments (of dependent value type) - - Exp ::= - "(" CId -- function - "(" "B" CId* ")" -- bindings - "(" "X" Exp* ")" ")" -- arguments - | CId -- variable - | "?" -- metavariable - | "(" "Eq" Equation* ")" -- group of pattern equations - | Integer -- integer literal (non-negative) - | Float -- floating-point literal (non-negative) - | String -- string literal (in double quotes) - - Hypo ::= "(" CId Type ")" -- variable and type - - Equation ::= "(" "E" Exp Exp* ")" -- value and pattern list - - Term ::= - "(" "R" Term* ")" -- array (record or table) - | "(" "S" Term* ")" -- concatenated sequence - | "(" "FV" Term* ")" -- free variant list - | "(" "P" Term Term ")" -- access to index (projection or selection) - | "(" "W" String Term ")" -- token prefix with suffix list - | "(" "A" Integer ")" -- pointer to subtree - | String -- token (in double quotes) - | Integer -- index in array - | CId -- macro constant - | "?" -- metavariable -``` - - -==GFCC interpreter== - -The first phase in interpreting GFCC is to parse a GFCC file and -build an internal abstract syntax representation, as specified -in the previous section. - -With this representation, linearization can be performed by -a straightforward function from expressions (``Exp``) to terms -(``Term``). All expressions except groups of pattern equations -can be linearized. - -Here is a reference Haskell implementation of linearization: -``` - linExp :: GFCC -> CId -> Exp -> Term - linExp gfcc lang tree@(DTr _ at trees) = case at of - AC fun -> comp (map lin trees) $ look fun - AS s -> R [K (show s)] -- quoted - AI i -> R [K (show i)] - AF d -> R [K (show d)] - AM -> TM - where - lin = linExp gfcc lang - comp = compute gfcc lang - look = lookLin gfcc lang -``` -TODO: bindings must be supported. - -Terms resulting from linearization are evaluated in -call-by-value order, with two environments needed: -- the grammar (a concrete syntax) to give the global constants -- an array of terms to give the subtree linearizations - - -The Haskell implementation works as follows: -``` -compute :: GFCC -> CId -> [Term] -> Term -> Term -compute gfcc lang args = comp where - comp trm = case trm of - P r p -> proj (comp r) (comp p) - W s t -> W s (comp t) - R ts -> R $ map comp ts - V i -> idx args (fromInteger i) -- already computed - F c -> comp $ look c -- not computed (if contains V) - FV ts -> FV $ Prelude.map comp ts - S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts - _ -> trm - - look = lookOper gfcc lang - - idx xs i = xs !! i - - proj r p = case (r,p) of - (_, FV ts) -> FV $ Prelude.map (proj r) ts - (FV ts, _ ) -> FV $ Prelude.map (\t -> proj t p) ts - (W s t, _) -> kks (s ++ getString (proj t p)) - _ -> comp $ getField r (getIndex p) - - getString t = case t of - K (KS s) -> s - _ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR" - - getIndex t = case t of - C i -> fromInteger i - RP p _ -> getIndex p - TM -> 0 -- default value for parameter - _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0 - - getField t i = case t of - R rs -> idx rs i - RP _ r -> getField r i - TM -> TM - _ -> trace ("ERROR in grammar compiler: field from " ++ show t) t -``` -The result of linearization is usually a record, which is realized as -a string using the following algorithm. -``` - realize :: Term -> String - realize trm = case trm of - R (t:_) -> realize t - S ss -> unwords $ map realize ss - K s -> s - W s t -> s ++ realize t - FV (t:_) -> realize t -- TODO: all variants - TM -> "?" -``` -Notice that realization always picks the first field of a record. -If a linearization type has more than one field, the first field -does not necessarily contain the desired string. -Also notice that the order of record fields in GFCC is not necessarily -the same as in GF source. diff --git a/src-3.0/GFC.hs b/src-3.0/GFC.hs index 09d01f615..72381b6ab 100644 --- a/src-3.0/GFC.hs +++ b/src-3.0/GFC.hs @@ -1,15 +1,15 @@ module GFC (mainGFC) where -- module Main where +import PGF +import PGF.CId +import PGF.Data +import PGF.Raw.Parse +import PGF.Raw.Convert import GF.Compile -import GF.GFCC.PrintGFCC -import GF.GFCC.CId -import GF.GFCC.DataGFCC -import GF.GFCC.Raw.ParGFCCRaw -import GF.GFCC.Raw.ConvertGFCC +import GF.Compile.Export import GF.Infra.UseIO import GF.Infra.Option -import GF.GFCC.API import GF.Data.ErrM import Data.Maybe diff --git a/src-3.0/GFI.hs b/src-3.0/GFI.hs index 97af0b3a4..49d612978 100644 --- a/src-3.0/GFI.hs +++ b/src-3.0/GFI.hs @@ -4,12 +4,11 @@ import GF.Command.Interpreter import GF.Command.Importing import GF.Command.Commands import GF.Data.ErrM -import GF.GFCC.API import GF.Grammar.API -- for cc command - import GF.Infra.UseIO import GF.Infra.Option import GF.System.Readline (fetchCommand) +import PGF import System.CPUTime diff --git a/src-3.0/PGF.hs b/src-3.0/PGF.hs new file mode 100644 index 000000000..4a44ac586 --- /dev/null +++ b/src-3.0/PGF.hs @@ -0,0 +1,181 @@ +---------------------------------------------------------------------- +-- | +-- Module : GFCCAPI +-- Maintainer : Aarne Ranta +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: +-- > CVS $Author: +-- > CVS $Revision: +-- +-- Reduced Application Programmer's Interface to GF, meant for +-- embedded GF systems. AR 19/9/2007 +----------------------------------------------------------------------------- + +module PGF where + +import PGF.CId +import PGF.Linearize +import PGF.Generate +import PGF.Macros +import PGF.Data +import PGF.Raw.Convert +import PGF.Raw.Parse +import PGF.Parsing.FCFG + +import GF.Data.ErrM + +import Data.Char +import qualified Data.Map as Map +import Control.Monad +import System.Random (newStdGen) +import System.Directory (doesFileExist) +import qualified Text.PrettyPrint as PP +import qualified Text.ParserCombinators.ReadP as RP + + +-- This API is meant to be used when embedding GF grammars in Haskell +-- programs. The embedded system is supposed to use the +-- .gfcc grammar format, which is first produced by the gf program. + +--------------------------------------------------- +-- Interface +--------------------------------------------------- + +data MultiGrammar = MultiGrammar {gfcc :: GFCC} +type Language = String +type Category = String +type Tree = Exp + +file2grammar :: FilePath -> IO MultiGrammar + +linearize :: MultiGrammar -> Language -> Tree -> String +parse :: MultiGrammar -> Language -> Category -> String -> [Tree] + +linearizeAll :: MultiGrammar -> Tree -> [String] +linearizeAllLang :: MultiGrammar -> Tree -> [(Language,String)] + +parseAll :: MultiGrammar -> Category -> String -> [[Tree]] +parseAllLang :: MultiGrammar -> Category -> String -> [(Language,[Tree])] + +generateAll :: MultiGrammar -> Category -> [Tree] +generateRandom :: MultiGrammar -> Category -> IO [Tree] +generateAllDepth :: MultiGrammar -> Category -> Maybe Int -> [Tree] + +readTree :: String -> Tree +showTree :: Tree -> String + +languages :: MultiGrammar -> [Language] +categories :: MultiGrammar -> [Category] + +startCat :: MultiGrammar -> Category + +--------------------------------------------------- +-- Implementation +--------------------------------------------------- + +file2grammar f = do + gfcc <- file2gfcc f + return (MultiGrammar gfcc) + +file2gfcc f = do + s <- readFileIf f + g <- parseGrammar s + return $ toGFCC g + +linearize mgr lang = PGF.Linearize.linearize (gfcc mgr) (mkCId lang) + +parse mgr lang cat s = + case lookParser (gfcc mgr) (mkCId lang) of + Nothing -> error "no parser" + Just pinfo -> case parseFCF "bottomup" pinfo (mkCId cat) (words s) of + Ok x -> x + Bad s -> error s + +linearizeAll mgr = map snd . linearizeAllLang mgr +linearizeAllLang mgr t = + [(lang,PGF.linearize mgr lang t) | lang <- languages mgr] + +parseAll mgr cat = map snd . parseAllLang mgr cat + +parseAllLang mgr cat s = + [(lang,ts) | lang <- languages mgr, let ts = parse mgr lang cat s, not (null ts)] + +generateRandom mgr cat = do + gen <- newStdGen + return $ genRandom gen (gfcc mgr) (mkCId cat) + +generateAll mgr cat = generate (gfcc mgr) (mkCId cat) Nothing +generateAllDepth mgr cat = generate (gfcc mgr) (mkCId cat) + +readTree s = case RP.readP_to_S (pExp 0) s of + [(x,"")] -> x + _ -> error "no parse" + +pExps :: RP.ReadP [Exp] +pExps = liftM2 (:) (pExp 1) pExps RP.<++ (RP.skipSpaces >> return []) + +pExp :: Int -> RP.ReadP Exp +pExp n = RP.skipSpaces >> (pParen RP.<++ pApp RP.<++ pNum RP.<++ pStr RP.<++ pMeta) + where + pParen = RP.between (RP.char '(') (RP.char ')') (pExp 0) + pApp = do xs <- RP.option [] (RP.between (RP.char '\\') (RP.string "->") (RP.sepBy1 pIdent (RP.char ','))) + f <- pIdent + ts <- (if n == 0 then pExps else return []) + return (DTr xs (AC f) ts) + pStr = RP.char '"' >> liftM (\s -> DTr [] (AS s) []) (RP.manyTill (pEsc RP.<++ RP.get) (RP.char '"')) + pEsc = RP.char '\\' >> RP.get + pNum = do x <- RP.munch1 isDigit + ((RP.char '.' >> RP.munch1 isDigit >>= \y -> return (DTr [] (AF (read (x++"."++y))) [])) + RP.<++ + (return (DTr [] (AI (read x)) []))) + pMeta = do RP.char '?' + x <- RP.munch1 isDigit + return (DTr [] (AM (read x)) []) + + pIdent = fmap mkCId (liftM2 (:) (RP.satisfy isIdentFirst) (RP.munch isIdentRest)) + isIdentFirst c = c == '_' || isLetter c + isIdentRest c = c == '_' || c == '\'' || isAlphaNum c + + +showTree = PP.render . ppExp False + +ppExp isNested (DTr [] at []) = ppAtom at +ppExp isNested (DTr xs at ts) = ppParens isNested (ppLambdas xs PP.<+> ppAtom at PP.<+> PP.hsep (map (ppExp True) ts)) + where + ppLambdas [] = PP.empty + ppLambdas xs = PP.char '\\' PP.<> + PP.hsep (PP.punctuate PP.comma (map (PP.text . prCId) xs)) PP.<+> + PP.text "->" + + ppParens True = PP.parens + ppParens False = id + +ppAtom (AC id) = PP.text (prCId id) +ppAtom (AS s) = PP.text (show s) +ppAtom (AI n) = PP.integer n +ppAtom (AF d) = PP.double d +ppAtom (AM n) = PP.char '?' PP.<> PP.integer n +ppAtom (AV id) = PP.text (prCId id) + +abstractName mgr = prCId (absname (gfcc mgr)) + +languages mgr = [prCId l | l <- cncnames (gfcc mgr)] + +categories mgr = [prCId c | c <- Map.keys (cats (abstract (gfcc mgr)))] + +startCat mgr = lookStartCat (gfcc mgr) + +emptyMultiGrammar = MultiGrammar emptyGFCC + +------------ for internal use only + +err f g ex = case ex of + Ok x -> g x + Bad s -> f s + +readFileIf f = do + b <- doesFileExist f + if b then readFile f + else putStrLn ("file " ++ f ++ " not found") >> return "" diff --git a/src-3.0/PGF/BuildParser.hs b/src-3.0/PGF/BuildParser.hs new file mode 100644 index 000000000..9dfab3130 --- /dev/null +++ b/src-3.0/PGF/BuildParser.hs @@ -0,0 +1,64 @@ +--------------------------------------------------------------------- +-- | +-- Maintainer : Krasimir Angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- FCFG parsing, parser information +----------------------------------------------------------------------------- + +module PGF.BuildParser where + +import GF.Data.SortedList +import GF.Data.Assoc +import PGF.CId +import PGF.Data +import PGF.Parsing.FCFG.Utilities + +import Data.Array +import Data.Maybe +import qualified Data.Map as Map +import qualified Data.Set as Set +import Debug.Trace + + +------------------------------------------------------------ +-- parser information + +getLeftCornerTok (FRule _ _ _ _ lins) + | inRange (bounds syms) 0 = case syms ! 0 of + FSymTok tok -> [tok] + _ -> [] + | otherwise = [] + where + syms = lins ! 0 + +getLeftCornerCat (FRule _ _ args _ lins) + | inRange (bounds syms) 0 = case syms ! 0 of + FSymCat _ d -> [args !! d] + _ -> [] + | otherwise = [] + where + syms = lins ! 0 + +buildParserInfo :: FGrammar -> ParserInfo +buildParserInfo (grammar,startup) = -- trace (unlines [prt (x,Set.toList set) | (x,set) <- Map.toList leftcornFilter]) $ + ParserInfo { allRules = allrules + , topdownRules = topdownrules + -- , emptyRules = emptyrules + , epsilonRules = epsilonrules + , leftcornerCats = leftcorncats + , leftcornerTokens = leftcorntoks + , grammarCats = grammarcats + , grammarToks = grammartoks + , startupCats = startup + } + + where allrules = listArray (0,length grammar-1) grammar + topdownrules = accumAssoc id [(cat, ruleid) | (ruleid, FRule _ _ _ cat _) <- assocs allrules] + epsilonrules = [ ruleid | (ruleid, FRule _ _ _ _ lins) <- assocs allrules, + not (inRange (bounds (lins ! 0)) 0) ] + leftcorncats = accumAssoc id [ (cat, ruleid) | (ruleid, rule) <- assocs allrules, cat <- getLeftCornerCat rule ] + leftcorntoks = accumAssoc id [ (tok, ruleid) | (ruleid, rule) <- assocs allrules, tok <- getLeftCornerTok rule ] + grammarcats = aElems topdownrules + grammartoks = nubsort [t | (FRule _ _ _ _ lins) <- grammar, lin <- elems lins, FSymTok t <- elems lin] diff --git a/src-3.0/PGF/CId.hs b/src-3.0/PGF/CId.hs new file mode 100644 index 000000000..8853d3d5b --- /dev/null +++ b/src-3.0/PGF/CId.hs @@ -0,0 +1,14 @@ +module PGF.CId (CId(..), wildCId, mkCId, prCId) where + +import Data.ByteString.Char8 as BS + +newtype CId = CId BS.ByteString deriving (Eq,Ord,Show) + +wildCId :: CId +wildCId = CId (BS.singleton '_') + +mkCId :: String -> CId +mkCId s = CId (BS.pack s) + +prCId :: CId -> String +prCId (CId x) = BS.unpack x diff --git a/src-3.0/PGF/Check.hs b/src-3.0/PGF/Check.hs new file mode 100644 index 000000000..9d5dd21ec --- /dev/null +++ b/src-3.0/PGF/Check.hs @@ -0,0 +1,186 @@ +module PGF.Check (checkGFCC, checkGFCCio, checkGFCCmaybe) where + +import PGF.CId +import PGF.Data +import PGF.Macros +import GF.Data.ErrM + +import qualified Data.Map as Map +import Control.Monad +import Debug.Trace + +checkGFCCio :: GFCC -> IO GFCC +checkGFCCio gfcc = case checkGFCC gfcc of + Ok (gc,b) -> do + putStrLn $ if b then "OK" else "Corrupted GFCC" + return gc + Bad s -> do + putStrLn s + error "building GFCC failed" + +---- needed in old Custom +checkGFCCmaybe :: GFCC -> Maybe GFCC +checkGFCCmaybe gfcc = case checkGFCC gfcc of + Ok (gc,b) -> return gc + Bad s -> Nothing + +checkGFCC :: GFCC -> Err (GFCC,Bool) +checkGFCC gfcc = do + (cs,bs) <- mapM (checkConcrete gfcc) + (Map.assocs (concretes gfcc)) >>= return . unzip + return (gfcc {concretes = Map.fromAscList cs}, and bs) + + +-- errors are non-fatal; replace with 'fail' to change this +msg s = trace s (return ()) + +andMapM :: Monad m => (a -> m Bool) -> [a] -> m Bool +andMapM f xs = mapM f xs >>= return . and + +labelBoolErr :: String -> Err (x,Bool) -> Err (x,Bool) +labelBoolErr ms iob = do + (x,b) <- iob + if b then return (x,b) else (msg ms >> return (x,b)) + + +checkConcrete :: GFCC -> (CId,Concr) -> Err ((CId,Concr),Bool) +checkConcrete gfcc (lang,cnc) = + labelBoolErr ("happened in language " ++ prCId lang) $ do + (rs,bs) <- mapM checkl (Map.assocs (lins cnc)) >>= return . unzip + return ((lang,cnc{lins = Map.fromAscList rs}),and bs) + where + checkl = checkLin gfcc lang + +checkLin :: GFCC -> CId -> (CId,Term) -> Err ((CId,Term),Bool) +checkLin gfcc lang (f,t) = + labelBoolErr ("happened in function " ++ prCId f) $ do + (t',b) <- checkTerm (lintype gfcc lang f) t --- $ inline gfcc lang t + return ((f,t'),b) + +inferTerm :: [CType] -> Term -> Err (Term,CType) +inferTerm args trm = case trm of + K _ -> returnt str + C i -> returnt $ ints i + V i -> do + testErr (i < length args) ("too large index " ++ show i) + returnt $ args !! i + S ts -> do + (ts',tys) <- mapM infer ts >>= return . unzip + let tys' = filter (/=str) tys + testErr (null tys') + ("expected Str in " ++ show trm ++ " not " ++ unwords (map show tys')) + return (S ts',str) + R ts -> do + (ts',tys) <- mapM infer ts >>= return . unzip + return $ (R ts',tuple tys) + P t u -> do + (t',tt) <- infer t + (u',tu) <- infer u + case tt of + R tys -> case tu of + R vs -> infer $ foldl P t' [P u' (C i) | i <- [0 .. length vs - 1]] + --- R [v] -> infer $ P t v + --- R (v:vs) -> infer $ P (head tys) (R vs) + + C i -> do + testErr (i < length tys) + ("required more than " ++ show i ++ " fields in " ++ show (R tys)) + return (P t' u', tys !! i) -- record: index must be known + _ -> do + let typ = head tys + testErr (all (==typ) tys) ("different types in table " ++ show trm) + return (P t' u', typ) -- table: types must be same + _ -> Bad $ "projection from " ++ show t ++ " : " ++ show tt + FV [] -> returnt tm0 ---- + FV (t:ts) -> do + (t',ty) <- infer t + (ts',tys) <- mapM infer ts >>= return . unzip + testErr (all (eqType ty) tys) ("different types in variants " ++ show trm) + return (FV (t':ts'),ty) + W s r -> infer r + _ -> Bad ("no type inference for " ++ show trm) + where + returnt ty = return (trm,ty) + infer = inferTerm args + +checkTerm :: LinType -> Term -> Err (Term,Bool) +checkTerm (args,val) trm = case inferTerm args trm of + Ok (t,ty) -> if eqType ty val + then return (t,True) + else do + msg ("term: " ++ show trm ++ + "\nexpected type: " ++ show val ++ + "\ninferred type: " ++ show ty) + return (t,False) + Bad s -> do + msg s + return (trm,False) + +eqType :: CType -> CType -> Bool +eqType inf exp = case (inf,exp) of + (C k, C n) -> k <= n -- only run-time corr. + (R rs,R ts) -> length rs == length ts && and [eqType r t | (r,t) <- zip rs ts] + (TM _, _) -> True ---- for variants [] ; not safe + _ -> inf == exp + +-- should be in a generic module, but not in the run-time DataGFCC + +type CType = Term +type LinType = ([CType],CType) + +tuple :: [CType] -> CType +tuple = R + +ints :: Int -> CType +ints = C + +str :: CType +str = S [] + +lintype :: GFCC -> CId -> CId -> LinType +lintype gfcc lang fun = case typeSkeleton (lookType gfcc fun) of + (cs,c) -> (map vlinc cs, linc c) ---- HOAS + where + linc = lookLincat gfcc lang + vlinc (0,c) = linc c + vlinc (i,c) = case linc c of + R ts -> R (ts ++ replicate i str) + +inline :: GFCC -> CId -> Term -> Term +inline gfcc lang t = case t of + F c -> inl $ look c + _ -> composSafeOp inl t + where + inl = inline gfcc lang + look = lookLin gfcc lang + +composOp :: Monad m => (Term -> m Term) -> Term -> m Term +composOp f trm = case trm of + R ts -> liftM R $ mapM f ts + S ts -> liftM S $ mapM f ts + FV ts -> liftM FV $ mapM f ts + P t u -> liftM2 P (f t) (f u) + W s t -> liftM (W s) $ f t + _ -> return trm + +composSafeOp :: (Term -> Term) -> Term -> Term +composSafeOp f = maybe undefined id . composOp (return . f) + +-- from GF.Data.Oper + +maybeErr :: String -> Maybe a -> Err a +maybeErr s = maybe (Bad s) Ok + +testErr :: Bool -> String -> Err () +testErr cond msg = if cond then return () else Bad msg + +errVal :: a -> Err a -> a +errVal a = err (const a) id + +errIn :: String -> Err a -> Err a +errIn msg = err (\s -> Bad (s ++ "\nOCCURRED IN\n" ++ msg)) return + +err :: (String -> b) -> (a -> b) -> Err a -> b +err d f e = case e of + Ok a -> f a + Bad s -> d s diff --git a/src-3.0/PGF/Data.hs b/src-3.0/PGF/Data.hs new file mode 100644 index 000000000..34c58e5d6 --- /dev/null +++ b/src-3.0/PGF/Data.hs @@ -0,0 +1,178 @@ +module PGF.Data where + +import PGF.CId +import GF.Text.UTF8 +import GF.Data.Assoc + +import qualified Data.Map as Map +import Data.List +import Data.Array + +-- internal datatypes for GFCC + +data GFCC = GFCC { + absname :: CId , + cncnames :: [CId] , + gflags :: Map.Map CId String, -- value of a global flag + abstract :: Abstr , + concretes :: Map.Map CId Concr + } + +data Abstr = Abstr { + aflags :: Map.Map CId String, -- value of a flag + funs :: Map.Map CId (Type,Exp), -- type and def of a fun + cats :: Map.Map CId [Hypo], -- context of a cat + catfuns :: Map.Map CId [CId] -- funs to a cat (redundant, for fast lookup) + } + +data Concr = Concr { + cflags :: Map.Map CId String, -- value of a flag + lins :: Map.Map CId Term, -- lin of a fun + opers :: Map.Map CId Term, -- oper generated by subex elim + lincats :: Map.Map CId Term, -- lin type of a cat + lindefs :: Map.Map CId Term, -- lin default of a cat + printnames :: Map.Map CId Term, -- printname of a cat or a fun + paramlincats :: Map.Map CId Term, -- lin type of cat, with printable param names + parser :: Maybe ParserInfo -- parser + } + +data Type = + DTyp [Hypo] CId [Exp] + deriving (Eq,Ord,Show) + +data Exp = + DTr [CId] Atom [Exp] + | EEq [Equation] + deriving (Eq,Ord,Show) + +data Atom = + AC CId + | AS String + | AI Integer + | AF Double + | AM Integer + | AV CId + deriving (Eq,Ord,Show) + +data Term = + R [Term] + | P Term Term + | S [Term] + | K Tokn + | V Int + | C Int + | F CId + | FV [Term] + | W String Term + | TM String + deriving (Eq,Ord,Show) + +data Tokn = + KS String + | KP [String] [Variant] + deriving (Eq,Ord,Show) + +data Variant = + Var [String] [String] + deriving (Eq,Ord,Show) + +data Hypo = + Hyp CId Type + deriving (Eq,Ord,Show) + +data Equation = + Equ [Exp] Exp + deriving (Eq,Ord,Show) + + +type FToken = String +type FCat = Int +type FIndex = Int +data FSymbol + = FSymCat {-# UNPACK #-} !FIndex {-# UNPACK #-} !Int + | FSymTok FToken +type Profile = [Int] +type FPointPos = Int +type FGrammar = ([FRule], Map.Map CId [FCat]) +data FRule = FRule CId [Profile] [FCat] FCat (Array FIndex (Array FPointPos FSymbol)) + +type RuleId = Int + +data ParserInfo + = ParserInfo { allRules :: Array RuleId FRule + , topdownRules :: Assoc FCat [RuleId] + -- ^ used in 'GF.Parsing.MCFG.Active' (Earley): + -- , emptyRules :: [RuleId] + , epsilonRules :: [RuleId] + -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): + , leftcornerCats :: Assoc FCat [RuleId] + , leftcornerTokens :: Assoc FToken [RuleId] + -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): + , grammarCats :: [FCat] + , grammarToks :: [FToken] + , startupCats :: Map.Map CId [FCat] + } + + +fcatString, fcatInt, fcatFloat, fcatVar :: Int +fcatString = (-1) +fcatInt = (-2) +fcatFloat = (-3) +fcatVar = (-4) + + +-- print statistics + +statGFCC :: GFCC -> String +statGFCC gfcc = unlines [ + "Abstract\t" ++ prCId (absname gfcc), + "Concretes\t" ++ unwords (map prCId (cncnames gfcc)), + "Categories\t" ++ unwords (map prCId (Map.keys (cats (abstract gfcc)))) + ] + +-- merge two GFCCs; fails is differens absnames; priority to second arg + +unionGFCC :: GFCC -> GFCC -> GFCC +unionGFCC one two = case absname one of + n | n == wildCId -> two -- extending empty grammar + | n == absname two -> one { -- extending grammar with same abstract + concretes = Map.union (concretes two) (concretes one), + cncnames = union (cncnames two) (cncnames one) + } + _ -> one -- abstracts don't match ---- print error msg + +emptyGFCC :: GFCC +emptyGFCC = GFCC { + absname = wildCId, + cncnames = [] , + gflags = Map.empty, + abstract = error "empty grammar, no abstract", + concretes = Map.empty + } + +-- encode idenfifiers and strings in UTF8 + +utf8GFCC :: GFCC -> GFCC +utf8GFCC gfcc = gfcc { + concretes = Map.map u8concr (concretes gfcc) + } + where + u8concr cnc = cnc { + lins = Map.map u8term (lins cnc), + opers = Map.map u8term (opers cnc) + } + u8term = convertStringsInTerm encodeUTF8 + +---- TODO: convert identifiers and flags + +convertStringsInTerm conv t = case t of + K (KS s) -> K (KS (conv s)) + W s r -> W (conv s) (convs r) + R ts -> R $ map convs ts + S ts -> S $ map convs ts + FV ts -> FV $ map convs ts + P u v -> P (convs u) (convs v) + _ -> t + where + convs = convertStringsInTerm conv + diff --git a/src-3.0/PGF/Generate.hs b/src-3.0/PGF/Generate.hs new file mode 100644 index 000000000..72340ffa3 --- /dev/null +++ b/src-3.0/PGF/Generate.hs @@ -0,0 +1,70 @@ +module PGF.Generate where + +import PGF.CId +import PGF.Data +import PGF.Macros + +import qualified Data.Map as M +import System.Random + +-- generate an infinite list of trees exhaustively +generate :: GFCC -> CId -> Maybe Int -> [Exp] +generate gfcc cat dp = concatMap (\i -> gener i cat) depths + where + gener 0 c = [tree (AC f) [] | (f, ([],_)) <- fns c] + gener i c = [ + tr | + (f, (cs,_)) <- fns c, + let alts = map (gener (i-1)) cs, + ts <- combinations alts, + let tr = tree (AC f) ts, + depth tr >= i + ] + fns c = [(f,catSkeleton ty) | (f,ty) <- functionsToCat gfcc c] + depths = maybe [0 ..] (\d -> [0..d]) dp + +-- generate an infinite list of trees randomly +genRandom :: StdGen -> GFCC -> CId -> [Exp] +genRandom gen gfcc cat = genTrees (randomRs (0.0, 1.0 :: Double) gen) cat where + + timeout = 47 -- give up + + genTrees ds0 cat = + let (ds,ds2) = splitAt (timeout+1) ds0 -- for time out, else ds + (t,k) = genTree ds cat + in (if k>timeout then id else (t:)) + (genTrees ds2 cat) -- else (drop k ds) + + genTree rs = gett rs where + gett ds cid | cid == mkCId "String" = (tree (AS "foo") [], 1) + gett ds cid | cid == mkCId "Int" = (tree (AI 12345) [], 1) + gett [] _ = (tree (AS "TIMEOUT") [], 1) ---- + gett ds cat = case fns cat of + [] -> (tree (AM 0) [],1) + fs -> let + d:ds2 = ds + (f,args) = getf d fs + (ts,k) = getts ds2 args + in (tree (AC f) ts, k+1) + getf d fs = let lg = (length fs) in + fs !! (floor (d * fromIntegral lg)) + getts ds cats = case cats of + c:cs -> let + (t, k) = gett ds c + (ts,ks) = getts (drop k ds) cs + in (t:ts, k + ks) + _ -> ([],0) + + fns cat = [(f,(fst (catSkeleton ty))) | (f,ty) <- functionsToCat gfcc cat] + + +{- +-- brute-force parsing method; only returns the first result +-- note: you cannot throw away rules with unknown words from the grammar +-- because it is not known which field in each rule may match the input + +searchParse :: Int -> GFCC -> CId -> [String] -> [Exp] +searchParse i gfcc cat ws = [t | t <- gen, s <- lins t, words s == ws] where + gen = take i $ generate gfcc cat + lins t = [linearize gfcc lang t | lang <- cncnames gfcc] +-} diff --git a/src-3.0/PGF/Linearize.hs b/src-3.0/PGF/Linearize.hs new file mode 100644 index 000000000..94d8aa216 --- /dev/null +++ b/src-3.0/PGF/Linearize.hs @@ -0,0 +1,87 @@ +module PGF.Linearize where + +import PGF.CId +import PGF.Data +import PGF.Macros +import qualified Data.Map as Map +import Data.List + +import Debug.Trace + +-- linearization and computation of concrete GFCC Terms + +linearize :: GFCC -> CId -> Exp -> String +linearize mcfg lang = realize . linExp mcfg lang + +realize :: Term -> String +realize trm = case trm of + R ts -> realize (ts !! 0) + S ss -> unwords $ map realize ss + K t -> case t of + KS s -> s + KP s _ -> unwords s ---- prefix choice TODO + W s t -> s ++ realize t + FV ts -> realize (ts !! 0) ---- other variants TODO + TM s -> s + _ -> "ERROR " ++ show trm ---- debug + +linExp :: GFCC -> CId -> Exp -> Term +linExp mcfg lang tree@(DTr xs at trees) = + addB $ case at of + AC fun -> comp (map lin trees) $ look fun + AS s -> R [kks (show s)] -- quoted + AI i -> R [kks (show i)] + --- [C lst, kks (show i), C size] where + --- lst = mod (fromInteger i) 10 ; size = if i < 10 then 0 else 1 + AF d -> R [kks (show d)] + AV x -> TM (prCId x) + AM i -> TM (show i) + where + lin = linExp mcfg lang + comp = compute mcfg lang + look = lookLin mcfg lang + addB t + | Data.List.null xs = t + | otherwise = case t of + R ts -> R $ ts ++ (Data.List.map (kks . prCId) xs) + TM s -> R $ t : (Data.List.map (kks . prCId) xs) + +compute :: GFCC -> CId -> [Term] -> Term -> Term +compute mcfg lang args = comp where + comp trm = case trm of + P r p -> proj (comp r) (comp p) + W s t -> W s (comp t) + R ts -> R $ map comp ts + V i -> idx args i -- already computed + F c -> comp $ look c -- not computed (if contains argvar) + FV ts -> FV $ map comp ts + S ts -> S $ filter (/= S []) $ map comp ts + _ -> trm + + look = lookOper mcfg lang + + idx xs i = if i > length xs - 1 + then trace + ("too large " ++ show i ++ " for\n" ++ unlines (map show xs) ++ "\n") tm0 + else xs !! i + + proj r p = case (r,p) of + (_, FV ts) -> FV $ map (proj r) ts + (FV ts, _ ) -> FV $ map (\t -> proj t p) ts + (W s t, _) -> kks (s ++ getString (proj t p)) + _ -> comp $ getField r (getIndex p) + + getString t = case t of + K (KS s) -> s + _ -> error ("ERROR in grammar compiler: string from "++ show t) "ERR" + + getIndex t = case t of + C i -> i + TM _ -> 0 -- default value for parameter + _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 666 + + getField t i = case t of + R rs -> idx rs i + TM s -> TM s + _ -> error ("ERROR in grammar compiler: field from " ++ show t) t + diff --git a/src-3.0/PGF/Macros.hs b/src-3.0/PGF/Macros.hs new file mode 100644 index 000000000..64ddd24e4 --- /dev/null +++ b/src-3.0/PGF/Macros.hs @@ -0,0 +1,116 @@ +module PGF.Macros where + +import PGF.CId +import PGF.Data +import Control.Monad +import qualified Data.Map as Map +import qualified Data.Array as Array +import Data.Maybe +import Data.List + +-- operations for manipulating GFCC grammars and objects + +lookLin :: GFCC -> CId -> CId -> Term +lookLin gfcc lang fun = + lookMap tm0 fun $ lins $ lookMap (error "no lang") lang $ concretes gfcc + +lookOper :: GFCC -> CId -> CId -> Term +lookOper gfcc lang fun = + lookMap tm0 fun $ opers $ lookMap (error "no lang") lang $ concretes gfcc + +lookLincat :: GFCC -> CId -> CId -> Term +lookLincat gfcc lang fun = + lookMap tm0 fun $ lincats $ lookMap (error "no lang") lang $ concretes gfcc + +lookParamLincat :: GFCC -> CId -> CId -> Term +lookParamLincat gfcc lang fun = + lookMap tm0 fun $ paramlincats $ lookMap (error "no lang") lang $ concretes gfcc + +lookType :: GFCC -> CId -> Type +lookType gfcc f = + fst $ lookMap (error $ "lookType " ++ show f) f (funs (abstract gfcc)) + +lookParser :: GFCC -> CId -> Maybe ParserInfo +lookParser gfcc lang = parser $ lookMap (error "no lang") lang $ concretes gfcc + +lookFCFG :: GFCC -> CId -> Maybe FGrammar +lookFCFG gfcc lang = fmap toFGrammar $ lookParser gfcc lang + where + toFGrammar :: ParserInfo -> FGrammar + toFGrammar pinfo = (Array.elems (allRules pinfo), startupCats pinfo) + +lookStartCat :: GFCC -> String +lookStartCat gfcc = fromMaybe "S" $ msum $ Data.List.map (Map.lookup (mkCId "startcat")) + [gflags gfcc, aflags (abstract gfcc)] + +lookGlobalFlag :: GFCC -> CId -> String +lookGlobalFlag gfcc f = + lookMap "?" f (gflags gfcc) + +lookAbsFlag :: GFCC -> CId -> String +lookAbsFlag gfcc f = + lookMap "?" f (aflags (abstract gfcc)) + +lookCncFlag :: GFCC -> CId -> CId -> String +lookCncFlag gfcc lang f = + lookMap "?" f $ cflags $ lookMap (error "no lang") lang $ concretes gfcc + +functionsToCat :: GFCC -> CId -> [(CId,Type)] +functionsToCat gfcc cat = + [(f,ty) | f <- fs, Just (ty,_) <- [Map.lookup f $ funs $ abstract gfcc]] + where + fs = lookMap [] cat $ catfuns $ abstract gfcc + +depth :: Exp -> Int +depth tr = case tr of + DTr _ _ [] -> 1 + DTr _ _ ts -> maximum (map depth ts) + 1 + +tree :: Atom -> [Exp] -> Exp +tree = DTr [] + +cftype :: [CId] -> CId -> Type +cftype args val = DTyp [Hyp wildCId (cftype [] arg) | arg <- args] val [] + +catSkeleton :: Type -> ([CId],CId) +catSkeleton ty = case ty of + DTyp hyps val _ -> ([valCat ty | Hyp _ ty <- hyps],val) + +typeSkeleton :: Type -> ([(Int,CId)],CId) +typeSkeleton ty = case ty of + DTyp hyps val _ -> ([(contextLength ty, valCat ty) | Hyp _ ty <- hyps],val) + +valCat :: Type -> CId +valCat ty = case ty of + DTyp _ val _ -> val + +contextLength :: Type -> Int +contextLength ty = case ty of + DTyp hyps _ _ -> length hyps + +exp0 :: Exp +exp0 = tree (AM 0) [] + +primNotion :: Exp +primNotion = EEq [] + +term0 :: CId -> Term +term0 = TM . prCId + +tm0 :: Term +tm0 = TM "?" + +kks :: String -> Term +kks = K . KS + +-- lookup with default value +lookMap :: (Show i, Ord i) => a -> i -> Map.Map i a -> a +lookMap d c m = maybe d id $ Map.lookup c m + +--- from Operations +combinations :: [[a]] -> [[a]] +combinations t = case t of + [] -> [[]] + aa:uu -> [a:u | a <- aa, u <- combinations uu] + + diff --git a/src-3.0/PGF/Parsing/FCFG.hs b/src-3.0/PGF/Parsing/FCFG.hs new file mode 100644 index 000000000..64421a0c4 --- /dev/null +++ b/src-3.0/PGF/Parsing/FCFG.hs @@ -0,0 +1,78 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : Krasimir Angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- FCFG parsing +----------------------------------------------------------------------------- + +module PGF.Parsing.FCFG + (parseFCF,buildParserInfo,ParserInfo(..),makeFinalEdge) where + +import GF.Data.ErrM +import GF.Data.Assoc +import GF.Data.SortedList + +import PGF.CId +import PGF.Data +import PGF.Macros +import PGF.BuildParser +import PGF.Parsing.FCFG.Utilities +import PGF.Parsing.FCFG.Active + +import qualified Data.Map as Map + +---------------------------------------------------------------------- +-- parsing + +-- main parsing function + +parseFCF :: + String -> -- ^ parsing strategy + ParserInfo -> -- ^ compiled grammar (fcfg) + CId -> -- ^ starting category + [String] -> -- ^ input tokens + Err [Exp] -- ^ resulting GF terms +parseFCF strategy pinfo startCat inString = + do let inTokens = input inString + startCats <- Map.lookup startCat (startupCats pinfo) + fcfParser <- {- trace lctree $ -} parseFCF strategy + let chart = fcfParser pinfo startCats inTokens + (i,j) = inputBounds inTokens + finalEdges = [makeFinalEdge cat i j | cat <- startCats] + forests = chart2forests chart (const False) finalEdges + filteredForests = forests >>= applyProfileToForest + trees = nubsort $ filteredForests >>= forest2trees + return $ map tree2term trees + where + parseFCF :: String -> Err (FCFParser) + parseFCF "bottomup" = Ok $ parse "b" + parseFCF "topdown" = Ok $ parse "t" + parseFCF strat = Bad $ "FCFG parsing strategy not defined: " ++ strat + +---------------------------------------------------------------------- +-- parse trees to GFCC terms + +tree2term :: SyntaxTree CId -> Exp +tree2term (TNode f ts) = tree (AC f) (map tree2term ts) +tree2term (TString s) = tree (AS s) [] +tree2term (TInt n) = tree (AI n) [] +tree2term (TFloat f) = tree (AF f) [] +tree2term (TMeta) = exp0 + +---------------------------------------------------------------------- +-- conversion and unification of forests + +-- simplest implementation +applyProfileToForest :: SyntaxForest (CId,[Profile]) -> [SyntaxForest CId] +applyProfileToForest (FNode (fun,profiles) children) + | fun == wildCId = concat chForests + | otherwise = [ FNode fun chForests | not (null chForests) ] + where chForests = concat [ mapM (unifyManyForests . map (forests !!)) profiles | + forests0 <- children, + forests <- mapM applyProfileToForest forests0 ] +applyProfileToForest (FString s) = [FString s] +applyProfileToForest (FInt n) = [FInt n] +applyProfileToForest (FFloat f) = [FFloat f] +applyProfileToForest (FMeta) = [FMeta] diff --git a/src-3.0/PGF/Parsing/FCFG/Active.hs b/src-3.0/PGF/Parsing/FCFG/Active.hs new file mode 100644 index 000000000..4572062f1 --- /dev/null +++ b/src-3.0/PGF/Parsing/FCFG/Active.hs @@ -0,0 +1,186 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : Krasimir Angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- MCFG parsing, the active algorithm +----------------------------------------------------------------------------- + +module PGF.Parsing.FCFG.Active (FCFParser, parse, makeFinalEdge) where + +import GF.Data.GeneralDeduction +import GF.Data.Assoc +import GF.Data.SortedList +import GF.Data.Utilities + +import PGF.CId +import PGF.Data +import PGF.Parsing.FCFG.Utilities + +import Control.Monad (guard) + +import qualified Data.List as List +import qualified Data.Map as Map +import qualified Data.Set as Set +import Data.Array + +---------------------------------------------------------------------- +-- * parsing + +makeFinalEdge cat 0 0 = (cat, [EmptyRange]) +makeFinalEdge cat i j = (cat, [makeRange i j]) + +-- | the list of categories = possible starting categories +type FCFParser = ParserInfo + -> [FCat] + -> Input FToken + -> SyntaxChart (CId,[Profile]) (FCat,RangeRec) + + +parse :: String -> FCFParser +parse strategy pinfo starts toks = xchart2syntaxchart chart pinfo + where chart = process strategy pinfo toks axioms emptyXChart + axioms | isBU strategy = literals pinfo toks ++ initialBU pinfo toks + | isTD strategy = literals pinfo toks ++ initialTD pinfo starts toks + +isBU s = s=="b" +isTD s = s=="t" + +-- used in prediction +emptyChildren :: RuleId -> ParserInfo -> SyntaxNode RuleId RangeRec +emptyChildren ruleid pinfo = SNode ruleid (replicate (length rhs) []) + where + FRule _ _ rhs _ _ = allRules pinfo ! ruleid + +process :: String -> ParserInfo -> Input FToken -> [(FCat,Item)] -> XChart FCat -> XChart FCat +process strategy pinfo toks [] chart = chart +process strategy pinfo toks ((c,item):items) chart = process strategy pinfo toks items $! univRule c item chart + where + univRule cat item@(Active found rng lbl ppos node@(SNode ruleid recs)) chart + | inRange (bounds lin) ppos = + case lin ! ppos of + FSymCat r d -> let c = args !! d + in case recs !! d of + [] -> case insertXChart chart item c of + Nothing -> chart + Just chart -> let items = do item@(Final found' _) <- lookupXChartFinal chart c + rng <- concatRange rng (found' !! r) + return (c, Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs))) + ++ + do guard (isTD strategy) + ruleid <- topdownRules pinfo ? c + return (c, Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo)) + in process strategy pinfo toks items chart + found' -> let items = do rng <- concatRange rng (found' !! r) + return (c, Active found rng lbl (ppos+1) node) + in process strategy pinfo toks items chart + FSymTok tok -> let items = do t_rng <- inputToken toks ? tok + rng' <- concatRange rng t_rng + return (cat, Active found rng' lbl (ppos+1) node) + in process strategy pinfo toks items chart + | otherwise = + if inRange (bounds lins) (lbl+1) + then univRule cat (Active (rng:found) EmptyRange (lbl+1) 0 node) chart + else univRule cat (Final (reverse (rng:found)) node) chart + where + (FRule _ _ args cat lins) = allRules pinfo ! ruleid + lin = lins ! lbl + univRule cat item@(Final found' node) chart = + case insertXChart chart item cat of + Nothing -> chart + Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _)) <- lookupXChartAct chart cat + let FRule _ _ args _ lins = allRules pinfo ! ruleid + FSymCat r d = lins ! l ! ppos + rng <- concatRange rng (found' !! r) + return (args !! d, Active found rng l (ppos+1) (updateChildren node d found')) + ++ + do guard (isBU strategy) + ruleid <- leftcornerCats pinfo ? cat + let FRule _ _ args _ lins = allRules pinfo ! ruleid + FSymCat r d = lins ! 0 ! 0 + return (args !! d, Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid pinfo) d found')) + + updateChildren :: SyntaxNode RuleId RangeRec -> Int -> RangeRec -> SyntaxNode RuleId RangeRec + updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs + in process strategy pinfo toks items chart + +---------------------------------------------------------------------- +-- * XChart + +data Item + = Active RangeRec + Range + {-# UNPACK #-} !FIndex + {-# UNPACK #-} !FPointPos + (SyntaxNode RuleId RangeRec) + | Final RangeRec (SyntaxNode RuleId RangeRec) + deriving (Eq, Ord) + +data XChart c = XChart !(ParseChart Item c) !(ParseChart Item c) + +emptyXChart :: Ord c => XChart c +emptyXChart = XChart emptyChart emptyChart + +insertXChart (XChart actives finals) item@(Active _ _ _ _ _) c = + case chartInsert actives item c of + Nothing -> Nothing + Just actives -> Just (XChart actives finals) + +insertXChart (XChart actives finals) item@(Final _ _) c = + case chartInsert finals item c of + Nothing -> Nothing + Just finals -> Just (XChart actives finals) + +lookupXChartAct (XChart actives finals) c = chartLookup actives c +lookupXChartFinal (XChart actives finals) c = chartLookup finals c + +xchart2syntaxchart :: XChart FCat -> ParserInfo -> SyntaxChart (CId,[Profile]) (FCat,RangeRec) +xchart2syntaxchart (XChart actives finals) pinfo = + accumAssoc groupSyntaxNodes $ + [ case node of + SNode ruleid rrecs -> let FRule fun prof rhs cat _ = allRules pinfo ! ruleid + in ((cat,found), SNode (fun,prof) (zip rhs rrecs)) + SString s -> ((cat,found), SString s) + SInt n -> ((cat,found), SInt n) + SFloat f -> ((cat,found), SFloat f) + | (cat, Final found node) <- chartAssocs finals + ] + +literals :: ParserInfo -> Input FToken -> [(FCat,Item)] +literals pinfo toks = + [let (c,node) = lexer t in (c,Final [rng] node) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfo)] + where + lexer t = + case reads t of + [(n,"")] -> (fcatInt, SInt (n::Integer)) + _ -> case reads t of + [(f,"")] -> (fcatFloat, SFloat (f::Double)) + _ -> (fcatString,SString t) + + +---------------------------------------------------------------------- +-- Earley -- + +-- called with all starting categories +initialTD :: ParserInfo -> [FCat] -> Input FToken -> [(FCat,Item)] +initialTD pinfo starts toks = + do cat <- starts + ruleid <- topdownRules pinfo ? cat + return (cat,Active [] (Range 0 0) 0 0 (emptyChildren ruleid pinfo)) + + +---------------------------------------------------------------------- +-- Kilbury -- + +initialBU :: ParserInfo -> Input FToken -> [(FCat,Item)] +initialBU pinfo toks = + do (tok,rngs) <- aAssocs (inputToken toks) + ruleid <- leftcornerTokens pinfo ? tok + let FRule _ _ _ cat _ = allRules pinfo ! ruleid + rng <- rngs + return (cat,Active [] rng 0 1 (emptyChildren ruleid pinfo)) + ++ + do ruleid <- epsilonRules pinfo + let FRule _ _ _ cat _ = allRules pinfo ! ruleid + return (cat,Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo)) diff --git a/src-3.0/PGF/Parsing/FCFG/Utilities.hs b/src-3.0/PGF/Parsing/FCFG/Utilities.hs new file mode 100644 index 000000000..f28311bdd --- /dev/null +++ b/src-3.0/PGF/Parsing/FCFG/Utilities.hs @@ -0,0 +1,271 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/13 12:40:19 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.6 $ +-- +-- Basic type declarations and functions for grammar formalisms +----------------------------------------------------------------------------- + + +module PGF.Parsing.FCFG.Utilities where + +import Control.Monad +import Data.Array +import Data.List (groupBy) + +import GF.Data.SortedList +import GF.Data.Assoc +import GF.Data.Utilities (sameLength, foldMerge, splitBy) + + +------------------------------------------------------------ +-- ranges as single pairs + +type RangeRec = [Range] + +data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int + | EmptyRange + deriving (Eq, Ord) + +makeRange :: Int -> Int -> Range +makeRange = Range + +concatRange :: Range -> Range -> [Range] +concatRange EmptyRange rng = return rng +concatRange rng EmptyRange = return rng +concatRange (Range i j) (Range j' k) = [Range i k | j==j'] + +minRange :: Range -> Int +minRange (Range i j) = i + +maxRange :: Range -> Int +maxRange (Range i j) = j + + +------------------------------------------------------------ +-- * representaions of input tokens + +data Input t = MkInput { inputBounds :: (Int, Int), + inputToken :: Assoc t [Range] + } + +input :: Ord t => [t] -> Input t +input toks = MkInput inBounds inToken + where + inBounds = (0, length toks) + inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,tok) <- zip3 [0..] [1..] toks ] + +inputMany :: Ord t => [[t]] -> Input t +inputMany toks = MkInput inBounds inToken + where + inBounds = (0, length toks) + inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,ts) <- zip3 [0..] [1..] toks, tok <- ts ] + + +------------------------------------------------------------ +-- * representations of syntactical analyses + +-- ** charts as finite maps over edges + +-- | The values of the chart, a list of key-daughters pairs, +-- has unique keys. In essence, it is a map from 'n' to daughters. +-- The daughters should be a set (not necessarily sorted) of rhs's. +type SyntaxChart n e = Assoc e [SyntaxNode n [e]] + +data SyntaxNode n e = SMeta + | SNode n [e] + | SString String + | SInt Integer + | SFloat Double + deriving (Eq,Ord) + +groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]] +groupSyntaxNodes [] = [] +groupSyntaxNodes (SNode n0 es0:xs) = (SNode n0 (es0:ess)) : groupSyntaxNodes xs' + where + (ess,xs') = span xs + + span [] = ([],[]) + span xs@(SNode n es:xs') + | n0 == n = let (ess,xs) = span xs' in (es:ess,xs) + | otherwise = ([],xs) +groupSyntaxNodes (SString s:xs) = (SString s) : groupSyntaxNodes xs +groupSyntaxNodes (SInt n:xs) = (SInt n) : groupSyntaxNodes xs +groupSyntaxNodes (SFloat f:xs) = (SFloat f) : groupSyntaxNodes xs + +-- better(?) representation of forests: +-- data Forest n = F (SMap n (SList [Forest n])) Bool +-- == +-- type Forest n = GeneralTrie n (SList [Forest n]) Bool +-- (the Bool == isMeta) + +-- ** syntax forests + +data SyntaxForest n = FMeta + | FNode n [[SyntaxForest n]] + -- ^ The outer list should be a set (not necessarily sorted) + -- of possible alternatives. Ie. the outer list + -- is a disjunctive node, and the inner lists + -- are (conjunctive) concatenative nodes + | FString String + | FInt Integer + | FFloat Double + deriving (Eq, Ord, Show) + +instance Functor SyntaxForest where + fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests + fmap _ (FString s) = FString s + fmap _ (FInt n) = FInt n + fmap _ (FFloat f) = FFloat f + fmap _ (FMeta) = FMeta + +forestName :: SyntaxForest n -> Maybe n +forestName (FNode n _) = Just n +forestName _ = Nothing + +unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n) +unifyManyForests = foldM unifyForests FMeta + +-- | two forests can be unified, if either is 'FMeta', or both have the same parent, +-- and all children can be unified +unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n) +unifyForests FMeta forest = return forest +unifyForests forest FMeta = return forest +unifyForests (FNode name1 children1) (FNode name2 children2) + | name1 == name2 && not (null children) = return $ FNode name1 children + where children = [ forests | forests1 <- children1, forests2 <- children2, + sameLength forests1 forests2, + forests <- zipWithM unifyForests forests1 forests2 ] +unifyForests (FString s1) (FString s2) + | s1 == s2 = return $ FString s1 +unifyForests (FInt n1) (FInt n2) + | n1 == n2 = return $ FInt n1 +unifyForests (FFloat f1) (FFloat f2) + | f1 == f2 = return $ FFloat f1 +unifyForests _ _ = fail "forest unification failure" + +{- måste tänka mer på detta: +compactForests :: Ord n => [SyntaxForest n] -> SList (SyntaxForest n) +compactForests = map joinForests . groupBy eqNames . sortForests + where eqNames f g = forestName f == forestName g + sortForests = foldMerge mergeForests [] . map return + mergeForests [] gs = gs + mergeForests fs [] = fs + mergeForests fs@(f:fs') gs@(g:gs') + = case forestName f `compare` forestName g of + LT -> f : mergeForests fs' gs + GT -> g : mergeForests fs gs' + EQ -> f : g : mergeForests fs' gs' + joinForests fs = case forestName (head fs) of + Nothing -> FMeta + Just name -> FNode name $ + compactDaughters $ + concat [ fss | FNode _ fss <- fs ] + compactDaughters fss = case head fss of + [] -> [[]] + [_] -> map return $ compactForests $ concat fss + _ -> nubsort fss +-} + +-- ** syntax trees + +data SyntaxTree n = TMeta + | TNode n [SyntaxTree n] + | TString String + | TInt Integer + | TFloat Double + deriving (Eq, Ord, Show) + +instance Functor SyntaxTree where + fmap f (TNode n trees) = TNode (f n) $ map (fmap f) trees + fmap _ (TString s) = TString s + fmap _ (TInt n) = TInt n + fmap _ (TFloat f) = TFloat f + fmap _ (TMeta) = TMeta + +treeName :: SyntaxTree n -> Maybe n +treeName (TNode n _) = Just n +treeName (TMeta) = Nothing + +unifyManyTrees :: (Monad m, Eq n) => [SyntaxTree n] -> m (SyntaxTree n) +unifyManyTrees = foldM unifyTrees TMeta + +-- | two trees can be unified, if either is 'TMeta', +-- or both have the same parent, and their children can be unified +unifyTrees :: (Monad m, Eq n) => SyntaxTree n -> SyntaxTree n -> m (SyntaxTree n) +unifyTrees TMeta tree = return tree +unifyTrees tree TMeta = return tree +unifyTrees (TNode name1 children1) (TNode name2 children2) + | name1 == name2 && sameLength children1 children2 + = liftM (TNode name1) $ zipWithM unifyTrees children1 children2 +unifyTrees (TString s1) (TString s2) + | s1 == s2 = return (TString s1) +unifyTrees (TInt n1) (TInt n2) + | n1 == n2 = return (TInt n1) +unifyTrees (TFloat f1) (TFloat f2) + | f1 == f2 = return (TFloat f1) +unifyTrees _ _ = fail "tree unification failure" + +-- ** conversions between representations + +chart2forests :: (Ord n, Ord e) => + SyntaxChart n e -- ^ The complete chart + -> (e -> Bool) -- ^ When is an edge 'FMeta'? + -> [e] -- ^ The starting edges + -> SList (SyntaxForest n) -- ^ The result has unique keys, ie. all 'n' are joined together. + -- In essence, the result is a map from 'n' to forest daughters + +-- simplest implementation + +chart2forests chart isMeta = concatMap (edge2forests []) + where edge2forests edges edge + | isMeta edge = [FMeta] + | edge `elem` edges = [] + | otherwise = map (item2forest (edge:edges)) $ chart ? edge + item2forest edges (SMeta) = FMeta + item2forest edges (SNode name children) = + FNode name $ children >>= mapM (edge2forests edges) + item2forest edges (SString s) = FString s + item2forest edges (SInt n) = FInt n + item2forest edges (SFloat f) = FFloat f + +{- -before AR inserted peb's patch 8/7/2007, this was: + +chart2forests chart isMeta = concatMap edge2forests + where edge2forests edge = if isMeta edge then [FMeta] + else map item2forest $ chart ? edge + item2forest (SMeta) = FMeta + item2forest (SNode name children) = FNode name $ children >>= mapM edge2forests + item2forest (SString s) = FString s + item2forest (SInt n) = FInt n + item2forest (SFloat f) = FFloat f + +-} + +{- +-- more intelligent(?) implementation, +-- requiring that charts and forests are sorted maps and sorted sets +chart2forests chart isMeta = es2fs + where e2fs e = if isMeta e then [FMeta] else map i2f $ chart ? e + es2fs es = if null metas then fs else FMeta : fs + where (metas, nonMetas) = splitBy isMeta es + fs = map i2f $ unionMap (<++>) $ map (chart ?) nonMetas + i2f (name, children) = FNode name $ + case head children of + [] -> [[]] + [_] -> map return $ es2fs $ concat children + _ -> children >>= mapM e2fs +-} + + +forest2trees :: SyntaxForest n -> SList (SyntaxTree n) +forest2trees (FNode n forests) = map (TNode n) $ forests >>= mapM forest2trees +forest2trees (FString s) = [TString s] +forest2trees (FInt n) = [TInt n] +forest2trees (FFloat f) = [TFloat f] +forest2trees (FMeta) = [TMeta] diff --git a/src-3.0/PGF/Raw/Abstract.hs b/src-3.0/PGF/Raw/Abstract.hs new file mode 100644 index 000000000..77d919a2d --- /dev/null +++ b/src-3.0/PGF/Raw/Abstract.hs @@ -0,0 +1,14 @@ +module PGF.Raw.Abstract where + +data Grammar = + Grm [RExp] + deriving (Eq,Ord,Show) + +data RExp = + App String [RExp] + | AInt Integer + | AStr String + | AFlt Double + | AMet + deriving (Eq,Ord,Show) + diff --git a/src-3.0/PGF/Raw/Convert.hs b/src-3.0/PGF/Raw/Convert.hs new file mode 100644 index 000000000..3e077cc8d --- /dev/null +++ b/src-3.0/PGF/Raw/Convert.hs @@ -0,0 +1,250 @@ +module PGF.Raw.Convert (toGFCC,fromGFCC) where + +import PGF.CId +import PGF.Data +import PGF.Raw.Abstract +import PGF.BuildParser (buildParserInfo) +import PGF.Parsing.FCFG.Utilities + +import qualified Data.Array as Array +import qualified Data.Map as Map + +pgfMajorVersion, pgfMinorVersion :: Integer +(pgfMajorVersion, pgfMinorVersion) = (1,0) + +-- convert parsed grammar to internal GFCC + +toGFCC :: Grammar -> GFCC +toGFCC (Grm [ + App "pgf" (AInt v1 : AInt v2 : App a []:cs), + App "flags" gfs, + ab@( + App "abstract" [ + App "fun" fs, + App "cat" cts + ]), + App "concrete" ccs + ]) = GFCC { + absname = mkCId a, + cncnames = [mkCId c | App c [] <- cs], + gflags = Map.fromAscList [(mkCId f,v) | App f [AStr v] <- gfs], + abstract = + let + aflags = Map.fromAscList [(mkCId f,v) | App f [AStr v] <- gfs] + lfuns = [(mkCId f,(toType typ,toExp def)) | App f [typ, def] <- fs] + funs = Map.fromAscList lfuns + lcats = [(mkCId c, Prelude.map toHypo hyps) | App c hyps <- cts] + cats = Map.fromAscList lcats + catfuns = Map.fromAscList + [(cat,[f | (f, (DTyp _ c _,_)) <- lfuns, c==cat]) | (cat,_) <- lcats] + in Abstr aflags funs cats catfuns, + concretes = Map.fromAscList [(mkCId lang, toConcr ts) | App lang ts <- ccs] + } + where + +toConcr :: [RExp] -> Concr +toConcr = foldl add (Concr { + cflags = Map.empty, + lins = Map.empty, + opers = Map.empty, + lincats = Map.empty, + lindefs = Map.empty, + printnames = Map.empty, + paramlincats = Map.empty, + parser = Nothing + }) + where + add :: Concr -> RExp -> Concr + add cnc (App "flags" ts) = cnc { cflags = Map.fromAscList [(mkCId f,v) | App f [AStr v] <- ts] } + add cnc (App "lin" ts) = cnc { lins = mkTermMap ts } + add cnc (App "oper" ts) = cnc { opers = mkTermMap ts } + add cnc (App "lincat" ts) = cnc { lincats = mkTermMap ts } + add cnc (App "lindef" ts) = cnc { lindefs = mkTermMap ts } + add cnc (App "printname" ts) = cnc { printnames = mkTermMap ts } + add cnc (App "param" ts) = cnc { paramlincats = mkTermMap ts } + add cnc (App "parser" ts) = cnc { parser = Just (toPInfo ts) } + +toPInfo :: [RExp] -> ParserInfo +toPInfo [App "rules" rs, App "startupcats" cs] = buildParserInfo (rules, cats) + where + rules = map toFRule rs + cats = Map.fromList [(mkCId c, map expToInt fs) | App c fs <- cs] + + toFRule :: RExp -> FRule + toFRule (App "rule" + [n, + App "cats" (rt:at), + App "R" ls]) = FRule fun prof args res lins + where + (fun,prof) = toFName n + args = map expToInt at + res = expToInt rt + lins = mkArray [mkArray [toSymbol s | s <- l] | App "S" l <- ls] + +toFName :: RExp -> (CId,[Profile]) +toFName (App "_A" [x]) = (wildCId, [[expToInt x]]) +toFName (App f ts) = (mkCId f, map toProfile ts) + where + toProfile :: RExp -> Profile + toProfile AMet = [] + toProfile (App "_A" [t]) = [expToInt t] + toProfile (App "_U" ts) = [expToInt t | App "_A" [t] <- ts] + +toSymbol :: RExp -> FSymbol +toSymbol (App "P" [n,l]) = FSymCat (expToInt l) (expToInt n) +toSymbol (AStr t) = FSymTok t + +toType :: RExp -> Type +toType e = case e of + App cat [App "H" hypos, App "X" exps] -> + DTyp (map toHypo hypos) (mkCId cat) (map toExp exps) + _ -> error $ "type " ++ show e + +toHypo :: RExp -> Hypo +toHypo e = case e of + App x [typ] -> Hyp (mkCId x) (toType typ) + _ -> error $ "hypo " ++ show e + +toExp :: RExp -> Exp +toExp e = case e of + App "App" [App fun [], App "B" xs, App "X" exps] -> + DTr [mkCId x | App x [] <- xs] (AC (mkCId fun)) (map toExp exps) + App "Eq" eqs -> + EEq [Equ (map toExp ps) (toExp v) | App "E" (v:ps) <- eqs] + App "Var" [App i []] -> DTr [] (AV (mkCId i)) [] + AMet -> DTr [] (AM 0) [] + AInt i -> DTr [] (AI i) [] + AFlt i -> DTr [] (AF i) [] + AStr i -> DTr [] (AS i) [] + _ -> error $ "exp " ++ show e + +toTerm :: RExp -> Term +toTerm e = case e of + App "R" es -> R (map toTerm es) + App "S" es -> S (map toTerm es) + App "FV" es -> FV (map toTerm es) + App "P" [e,v] -> P (toTerm e) (toTerm v) + App "W" [AStr s,v] -> W s (toTerm v) + App "A" [AInt i] -> V (fromInteger i) + App f [] -> F (mkCId f) + AInt i -> C (fromInteger i) + AMet -> TM "?" + AStr s -> K (KS s) ---- + _ -> error $ "term " ++ show e + +------------------------------ +--- from internal to parser -- +------------------------------ + +fromGFCC :: GFCC -> Grammar +fromGFCC gfcc0 = Grm [ + App "pgf" (AInt pgfMajorVersion:AInt pgfMinorVersion + : App (prCId (absname gfcc)) [] : map (flip App [] . prCId) (cncnames gfcc)), + App "flags" [App (prCId f) [AStr v] | (f,v) <- Map.toList (gflags gfcc `Map.union` aflags agfcc)], + App "abstract" [ + App "fun" [App (prCId f) [fromType t,fromExp d] | (f,(t,d)) <- Map.toList (funs agfcc)], + App "cat" [App (prCId f) (map fromHypo hs) | (f,hs) <- Map.toList (cats agfcc)] + ], + App "concrete" [App (prCId lang) (fromConcrete c) | (lang,c) <- Map.toList (concretes gfcc)] + ] + where + gfcc = utf8GFCC gfcc0 + agfcc = abstract gfcc + fromConcrete cnc = [ + App "flags" [App (prCId f) [AStr v] | (f,v) <- Map.toList (cflags cnc)], + App "lin" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (lins cnc)], + App "oper" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (opers cnc)], + App "lincat" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (lincats cnc)], + App "lindef" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (lindefs cnc)], + App "printname" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (printnames cnc)], + App "param" [App (prCId f) [fromTerm v] | (f,v) <- Map.toList (paramlincats cnc)] + ] ++ maybe [] (\p -> [fromPInfo p]) (parser cnc) + +fromType :: Type -> RExp +fromType e = case e of + DTyp hypos cat exps -> + App (prCId cat) [ + App "H" (map fromHypo hypos), + App "X" (map fromExp exps)] + +fromHypo :: Hypo -> RExp +fromHypo e = case e of + Hyp x typ -> App (prCId x) [fromType typ] + +fromExp :: Exp -> RExp +fromExp e = case e of + DTr xs (AC fun) exps -> + App "App" [App (prCId fun) [], App "B" (map (flip App [] . prCId) xs), App "X" (map fromExp exps)] + DTr [] (AV x) [] -> App "Var" [App (prCId x) []] + DTr [] (AS s) [] -> AStr s + DTr [] (AF d) [] -> AFlt d + DTr [] (AI i) [] -> AInt (toInteger i) + DTr [] (AM _) [] -> AMet ---- + EEq eqs -> + App "Eq" [App "E" (map fromExp (v:ps)) | Equ ps v <- eqs] + _ -> error $ "exp " ++ show e + +fromTerm :: Term -> RExp +fromTerm e = case e of + R es -> App "R" (map fromTerm es) + S es -> App "S" (map fromTerm es) + FV es -> App "FV" (map fromTerm es) + P e v -> App "P" [fromTerm e, fromTerm v] + W s v -> App "W" [AStr s, fromTerm v] + C i -> AInt (toInteger i) + TM _ -> AMet + F f -> App (prCId f) [] + V i -> App "A" [AInt (toInteger i)] + K (KS s) -> AStr s ---- + K (KP d vs) -> App "FV" (str d : [str v | Var v _ <- vs]) ---- + where + str v = App "S" (map AStr v) + +-- ** Parsing info + +fromPInfo :: ParserInfo -> RExp +fromPInfo p = App "parser" [ + App "rules" [fromFRule rule | rule <- Array.elems (allRules p)], + App "startupcats" [App (prCId f) (map intToExp cs) | (f,cs) <- Map.toList (startupCats p)] + ] + +fromFRule :: FRule -> RExp +fromFRule (FRule fun prof args res lins) = + App "rule" [fromFName (fun,prof), + App "cats" (intToExp res:map intToExp args), + App "R" [App "S" [fromSymbol s | s <- Array.elems l] | l <- Array.elems lins] + ] + +fromFName :: (CId,[Profile]) -> RExp +fromFName (f,ps) | f == wildCId = fromProfile (head ps) + | otherwise = App (prCId f) (map fromProfile ps) + where + fromProfile :: Profile -> RExp + fromProfile [] = AMet + fromProfile [x] = daughter x + fromProfile args = App "_U" (map daughter args) + + daughter n = App "_A" [intToExp n] + +fromSymbol :: FSymbol -> RExp +fromSymbol (FSymCat l n) = App "P" [intToExp n, intToExp l] +fromSymbol (FSymTok t) = AStr t + +-- ** Utilities + +mkTermMap :: [RExp] -> Map.Map CId Term +mkTermMap ts = Map.fromAscList [(mkCId f,toTerm v) | App f [v] <- ts] + +mkArray :: [a] -> Array.Array Int a +mkArray xs = Array.listArray (0, length xs - 1) xs + +expToInt :: Integral a => RExp -> a +expToInt (App "neg" [AInt i]) = fromIntegral (negate i) +expToInt (AInt i) = fromIntegral i + +expToStr :: RExp -> String +expToStr (AStr s) = s + +intToExp :: Integral a => a -> RExp +intToExp x | x < 0 = App "neg" [AInt (fromIntegral (negate x))] + | otherwise = AInt (fromIntegral x) diff --git a/src-3.0/PGF/Raw/Parse.hs b/src-3.0/PGF/Raw/Parse.hs new file mode 100644 index 000000000..671183ba4 --- /dev/null +++ b/src-3.0/PGF/Raw/Parse.hs @@ -0,0 +1,101 @@ +module PGF.Raw.Parse (parseGrammar) where + +import PGF.CId +import PGF.Raw.Abstract + +import Control.Monad +import Data.Char +import qualified Data.ByteString.Char8 as BS + +parseGrammar :: String -> IO Grammar +parseGrammar s = case runP pGrammar s of + Just (x,"") -> return x + _ -> fail "Parse error" + +pGrammar :: P Grammar +pGrammar = liftM Grm pTerms + +pTerms :: P [RExp] +pTerms = liftM2 (:) (pTerm 1) pTerms <++ (skipSpaces >> return []) + +pTerm :: Int -> P RExp +pTerm n = skipSpaces >> (pParen <++ pApp <++ pNum <++ pStr <++ pMeta) + where pParen = between (char '(') (char ')') (pTerm 0) + pApp = liftM2 App pIdent (if n == 0 then pTerms else return []) + pStr = char '"' >> liftM AStr (manyTill (pEsc <++ get) (char '"')) + pEsc = char '\\' >> get + pNum = do x <- munch1 isDigit + ((char '.' >> munch1 isDigit >>= \y -> return (AFlt (read (x++"."++y)))) + <++ + return (AInt (read x))) + pMeta = char '?' >> return AMet + pIdent = liftM2 (:) (satisfy isIdentFirst) (munch isIdentRest) + isIdentFirst c = c == '_' || isAlpha c + isIdentRest c = c == '_' || c == '\'' || isAlphaNum c + +-- Parser combinators with only left-biased choice + +newtype P a = P { runP :: String -> Maybe (a,String) } + +instance Monad P where + return x = P (\ts -> Just (x,ts)) + P p >>= f = P (\ts -> p ts >>= \ (x,ts') -> runP (f x) ts') + fail _ = pfail + +instance MonadPlus P where + mzero = pfail + mplus = (<++) + + +get :: P Char +get = P (\ts -> case ts of + [] -> Nothing + c:cs -> Just (c,cs)) + +look :: P String +look = P (\ts -> Just (ts,ts)) + +(<++) :: P a -> P a -> P a +P p <++ P q = P (\ts -> p ts `mplus` q ts) + +pfail :: P a +pfail = P (\ts -> Nothing) + +satisfy :: (Char -> Bool) -> P Char +satisfy p = do c <- get + if p c then return c else pfail + +char :: Char -> P Char +char c = satisfy (c==) + +string :: String -> P String +string this = look >>= scan this + where + scan [] _ = return this + scan (x:xs) (y:ys) | x == y = get >> scan xs ys + scan _ _ = pfail + +skipSpaces :: P () +skipSpaces = look >>= skip + where + skip (c:s) | isSpace c = get >> skip s + skip _ = return () + +manyTill :: P a -> P end -> P [a] +manyTill p end = scan + where scan = (end >> return []) <++ liftM2 (:) p scan + +munch :: (Char -> Bool) -> P String +munch p = munch1 p <++ return [] + +munch1 :: (Char -> Bool) -> P String +munch1 p = liftM2 (:) (satisfy p) (munch p) + +choice :: [P a] -> P a +choice = msum + +between :: P open -> P close -> P a -> P a +between open close p = do open + x <- p + close + return x diff --git a/src-3.0/PGF/Raw/Print.hs b/src-3.0/PGF/Raw/Print.hs new file mode 100644 index 000000000..d34adbc2b --- /dev/null +++ b/src-3.0/PGF/Raw/Print.hs @@ -0,0 +1,35 @@ +module PGF.Raw.Print (printTree) where + +import PGF.CId +import PGF.Raw.Abstract + +import Data.List (intersperse) +import Numeric (showFFloat) +import qualified Data.ByteString.Char8 as BS + +printTree :: Grammar -> String +printTree g = prGrammar g "" + +prGrammar :: Grammar -> ShowS +prGrammar (Grm xs) = prRExpList xs + +prRExp :: Int -> RExp -> ShowS +prRExp _ (App x []) = showString x +prRExp n (App x xs) = p (showString x . showChar ' ' . prRExpList xs) + where p s = if n == 0 then s else showChar '(' . s . showChar ')' +prRExp _ (AInt x) = shows x +prRExp _ (AStr x) = showChar '"' . concatS (map mkEsc x) . showChar '"' +prRExp _ (AFlt x) = showFFloat Nothing x +prRExp _ AMet = showChar '?' + +mkEsc :: Char -> ShowS +mkEsc s = case s of + '"' -> showString "\\\"" + '\\' -> showString "\\\\" + _ -> showChar s + +prRExpList :: [RExp] -> ShowS +prRExpList = concatS . intersperse (showChar ' ') . map (prRExp 1) + +concatS :: [ShowS] -> ShowS +concatS = foldr (.) id diff --git a/src-3.0/PGF/ShowLinearize.hs b/src-3.0/PGF/ShowLinearize.hs new file mode 100644 index 000000000..9aa316ba9 --- /dev/null +++ b/src-3.0/PGF/ShowLinearize.hs @@ -0,0 +1,86 @@ +module PGF.ShowLinearize ( + tableLinearize, + recordLinearize, + termLinearize, + allLinearize + ) where + +import PGF.CId +import PGF.Data +import PGF.Macros +import PGF.Linearize + +import GF.Data.Operations +import Data.List + +-- printing linearizations in different ways with source parameters + +-- internal representation, only used internally in this module +data Record = + RR [(String,Record)] + | RT [(String,Record)] + | RFV [Record] + | RS String + | RCon String + +prRecord :: Record -> String +prRecord = prr where + prr t = case t of + RR fs -> concat $ + "{" : + (intersperse ";" (map (\ (l,v) -> unwords [l,"=", prr v]) fs)) ++ ["}"] + RT fs -> concat $ + "table {" : + (intersperse ";" (map (\ (l,v) -> unwords [l,"=>",prr v]) fs)) ++ ["}"] + RFV ts -> concat $ + "variants {" : (intersperse ";" (map prr ts)) ++ ["}"] + RS s -> prQuotedString s + RCon s -> s + +-- uses the encoding of record types in GFCC.paramlincat +mkRecord :: Term -> Term -> Record +mkRecord typ trm = case (typ,trm) of + (R rs, R ts) -> RR [(str lab, mkRecord ty t) | (P lab ty, t) <- zip rs ts] + (S [FV ps,ty],R ts) -> RT [(str par, mkRecord ty t) | (par, t) <- zip ps ts] + (_,W s (R ts)) -> mkRecord typ (R [K (KS (s ++ u)) | K (KS u) <- ts]) + (FV ps, C i) -> RCon $ str $ ps !! i + (S [], _) -> RS $ realize trm + _ -> RS $ show trm ---- printTree trm + where + str = realize + +-- show all branches, without labels and params +allLinearize :: GFCC -> CId -> Exp -> String +allLinearize gfcc lang = concat . map pr . tabularLinearize gfcc lang where + pr (p,vs) = unlines vs + +-- show all branches, with labels and params +tableLinearize :: GFCC -> CId -> Exp -> String +tableLinearize gfcc lang = unlines . map pr . tabularLinearize gfcc lang where + pr (p,vs) = p +++ ":" +++ unwords (intersperse "|" vs) + +-- create a table from labels+params to variants +tabularLinearize :: GFCC -> CId -> Exp -> [(String,[String])] +tabularLinearize gfcc lang = branches . recLinearize gfcc lang where + branches r = case r of + RR fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t] + RT fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t] + RFV rs -> [([], ss) | (_,ss) <- concatMap branches rs] + RS s -> [([], [s])] + RCon _ -> [] + +-- show record in GF-source-like syntax +recordLinearize :: GFCC -> CId -> Exp -> String +recordLinearize gfcc lang = prRecord . recLinearize gfcc lang + +-- create a GF-like record, forming the basis of all functions above +recLinearize :: GFCC -> CId -> Exp -> Record +recLinearize gfcc lang exp = mkRecord typ $ linExp gfcc lang exp where + typ = case exp of + DTr _ (AC f) _ -> lookParamLincat gfcc lang $ valCat $ lookType gfcc f + +-- show GFCC term +termLinearize :: GFCC -> CId -> Exp -> String +termLinearize gfcc lang = show . linExp gfcc lang + + diff --git a/src-3.0/PGF/doc/Eng.gf b/src-3.0/PGF/doc/Eng.gf new file mode 100644 index 000000000..c64f46313 --- /dev/null +++ b/src-3.0/PGF/doc/Eng.gf @@ -0,0 +1,13 @@ +concrete Eng of Ex = { + lincat + S = {s : Str} ; + NP = {s : Str ; n : Num} ; + VP = {s : Num => Str} ; + param + Num = Sg | Pl ; + lin + Pred np vp = {s = np.s ++ vp.s ! np.n} ; + She = {s = "she" ; n = Sg} ; + They = {s = "they" ; n = Pl} ; + Sleep = {s = table {Sg => "sleeps" ; Pl => "sleep"}} ; +} diff --git a/src-3.0/PGF/doc/Ex.gf b/src-3.0/PGF/doc/Ex.gf new file mode 100644 index 000000000..bd0b03483 --- /dev/null +++ b/src-3.0/PGF/doc/Ex.gf @@ -0,0 +1,8 @@ +abstract Ex = { + cat + S ; NP ; VP ; + fun + Pred : NP -> VP -> S ; + She, They : NP ; + Sleep : VP ; +} diff --git a/src-3.0/PGF/doc/Swe.gf b/src-3.0/PGF/doc/Swe.gf new file mode 100644 index 000000000..1d6672371 --- /dev/null +++ b/src-3.0/PGF/doc/Swe.gf @@ -0,0 +1,13 @@ +concrete Swe of Ex = { + lincat + S = {s : Str} ; + NP = {s : Str} ; + VP = {s : Str} ; + param + Num = Sg | Pl ; + lin + Pred np vp = {s = np.s ++ vp.s} ; + She = {s = "hon"} ; + They = {s = "de"} ; + Sleep = {s = "sover"} ; +} diff --git a/src-3.0/PGF/doc/Test.gf b/src-3.0/PGF/doc/Test.gf new file mode 100644 index 000000000..5cd4c5474 --- /dev/null +++ b/src-3.0/PGF/doc/Test.gf @@ -0,0 +1,64 @@ +-- to test GFCC compilation + +flags coding=utf8 ; + +cat S ; NP ; N ; VP ; + +fun Pred : NP -> VP -> S ; +fun Pred2 : NP -> VP -> NP -> S ; +fun Det, Dets : N -> NP ; +fun Mina, Sina, Me, Te : NP ; +fun Raha, Paska, Pallo : N ; +fun Puhua, Munia, Sanoa : VP ; + +param Person = P1 | P2 | P3 ; +param Number = Sg | Pl ; +param Case = Nom | Part ; + +param NForm = NF Number Case ; +param VForm = VF Number Person ; + +lincat N = Noun ; +lincat VP = Verb ; + +oper Noun = {s : NForm => Str} ; +oper Verb = {s : VForm => Str} ; + +lincat NP = {s : Case => Str ; a : {n : Number ; p : Person}} ; + +lin Pred np vp = {s = np.s ! Nom ++ vp.s ! VF np.a.n np.a.p} ; +lin Pred2 np vp ob = {s = np.s ! Nom ++ vp.s ! VF np.a.n np.a.p ++ ob.s ! Part} ; +lin Det no = {s = \\c => no.s ! NF Sg c ; a = {n = Sg ; p = P3}} ; +lin Dets no = {s = \\c => no.s ! NF Pl c ; a = {n = Pl ; p = P3}} ; +lin Mina = {s = table Case ["minä" ; "minua"] ; a = {n = Sg ; p = P1}} ; +lin Te = {s = table Case ["te" ; "teitä"] ; a = {n = Pl ; p = P2}} ; +lin Sina = {s = table Case ["sinä" ; "sinua"] ; a = {n = Sg ; p = P2}} ; +lin Me = {s = table Case ["me" ; "meitä"] ; a = {n = Pl ; p = P1}} ; + +lin Raha = mkN "raha" ; +lin Paska = mkN "paska" ; +lin Pallo = mkN "pallo" ; +lin Puhua = mkV "puhu" ; +lin Munia = mkV "muni" ; +lin Sanoa = mkV "sano" ; + +oper mkN : Str -> Noun = \raha -> { + s = table { + NF Sg Nom => raha ; + NF Sg Part => raha + "a" ; + NF Pl Nom => raha + "t" ; + NF Pl Part => Predef.tk 1 raha + "oja" + } + } ; + +oper mkV : Str -> Verb = \puhu -> { + s = table { + VF Sg P1 => puhu + "n" ; + VF Sg P2 => puhu + "t" ; + VF Sg P3 => puhu + Predef.dp 1 puhu ; + VF Pl P1 => puhu + "mme" ; + VF Pl P2 => puhu + "tte" ; + VF Pl P3 => puhu + "vat" + } + } ; + diff --git a/src-3.0/PGF/doc/gfcc.html b/src-3.0/PGF/doc/gfcc.html new file mode 100644 index 000000000..8f8c478c0 --- /dev/null +++ b/src-3.0/PGF/doc/gfcc.html @@ -0,0 +1,809 @@ + + + + +The GFCC Grammar Format + +

The GFCC Grammar Format

+ +Aarne Ranta
+October 5, 2007 +
+ +

+Author's address: +http://www.cs.chalmers.se/~aarne +

+

+History: +

+
    +
  • 5 Oct 2007: new, better structured GFCC with full expressive power +
  • 19 Oct: translation of lincats, new figures on C++ +
  • 3 Oct 2006: first version +
+ +

What is GFCC

+

+GFCC is a low-level format for GF grammars. Its aim is to contain the minimum +that is needed to process GF grammars at runtime. This minimality has three +advantages: +

+
    +
  • compact grammar files and run-time objects +
  • time and space efficient processing +
  • simple definition of interpreters +
+ +

+Thus we also want to call GFCC the portable grammar format. +

+

+The idea is that all embedded GF applications use GFCC. +The GF system would be primarily used as a compiler and as a grammar +development tool. +

+

+Since GFCC is implemented in BNFC, a parser of the format is readily +available for C, C++, C#, Haskell, Java, and OCaml. Also an XML +representation can be generated in BNFC. A +reference implementation +of linearization and some other functions has been written in Haskell. +

+

GFCC vs. GFC

+

+GFCC is aimed to replace GFC as the run-time grammar format. GFC was designed +to be a run-time format, but also to +support separate compilation of grammars, i.e. +to store the results of compiling +individual GF modules. But this means that GFC has to contain extra information, +such as type annotations, which is only needed in compilation and not at +run-time. In particular, the pattern matching syntax and semantics of GFC is +complex and therefore difficult to implement in new platforms. +

+

+Actually, GFC is planned to be omitted also as the target format of +separate compilation, where plain GF (type annotated and partially evaluated) +will be used instead. GFC provides only marginal advantages as a target format +compared with GF, and it is therefore just extra weight to carry around this +format. +

+

+The main differences of GFCC compared with GFC (and GF) can be summarized as follows: +

+
    +
  • there are no modules, and therefore no qualified names +
  • a GFCC grammar is multilingual, and consists of a common abstract syntax + together with one concrete syntax per language +
  • records and tables are replaced by arrays +
  • record labels and parameter values are replaced by integers +
  • record projection and table selection are replaced by array indexing +
  • even though the format does support dependent types and higher-order abstract + syntax, there is no interpreted yet that does this +
+ +

+Here is an example of a GF grammar, consisting of three modules, +as translated to GFCC. The representations are aligned; thus they do not completely +reflect the order of judgements in GFCC files, which have different orders of +blocks of judgements, and alphabetical sorting. +

+
+                                      grammar Ex(Eng,Swe);
+  
+  abstract Ex = {                     abstract {
+    cat                                 cat
+      S ; NP ; VP ;                      NP[]; S[]; VP[];
+    fun                                 fun
+      Pred : NP -> VP -> S ;             Pred=[(($ 0! 1),(($ 1! 0)!($ 0! 0)))];
+      She, They : NP ;                   She=[0,"she"];
+      Sleep : VP ;                       They=[1,"they"];
+                                         Sleep=[["sleeps","sleep"]];
+  }                                     } ;
+                                      
+  concrete Eng of Ex = {              concrete Eng {
+    lincat                             lincat
+      S  = {s : Str} ;                  S=[()];
+      NP = {s : Str ; n : Num} ;        NP=[1,()];
+      VP = {s : Num => Str} ;           VP=[[(),()]];
+    param
+      Num = Sg | Pl ;
+    lin                                lin
+      Pred np vp = {                    Pred=[(($ 0! 1),(($ 1! 0)!($ 0! 0)))];
+        s = np.s ++ vp.s ! np.n} ;      
+      She = {s = "she" ; n = Sg} ;      She=[0,"she"];
+      They = {s = "they" ; n = Pl} ;    They = [1, "they"];
+      Sleep = {s = table {              Sleep=[["sleeps","sleep"]];
+        Sg => "sleeps" ; 
+        Pl => "sleep"                   
+        }                               
+      } ;
+  }                                   } ;
+  
+  concrete Swe of Ex = {              concrete Swe {
+    lincat                             lincat
+      S  = {s : Str} ;                  S=[()];
+      NP = {s : Str} ;                  NP=[()];
+      VP = {s : Str} ;                  VP=[()];
+    param
+      Num = Sg | Pl ;
+    lin                                lin
+      Pred np vp = {                    Pred = [(($0!0),($1!0))];
+        s = np.s ++ vp.s} ;
+      She = {s = "hon"} ;               She = ["hon"];
+      They = {s = "de"} ;               They = ["de"];
+      Sleep = {s = "sover"} ;           Sleep = ["sover"];
+  }                                     } ;                                   
+
+

+

The syntax of GFCC files

+

+The complete BNFC grammar, from which +the rules in this section are taken, is in the file +GF/GFCC/GFCC.cf. +

+

Top level

+

+A grammar has a header telling the name of the abstract syntax +(often specifying an application domain), and the names of +the concrete languages. The abstract syntax and the concrete +syntaxes themselves follow. +

+
+    Grm. Grammar  ::= 
+      "grammar" CId "(" [CId] ")" ";" 
+      Abstract ";" 
+      [Concrete] ;
+  
+    Abs. Abstract ::= 
+      "abstract" "{" 
+        "flags" [Flag] 
+        "fun"   [FunDef] 
+        "cat"   [CatDef] 
+      "}" ;
+  
+    Cnc. Concrete ::= 
+      "concrete" CId "{" 
+        "flags"  [Flag] 
+        "lin"    [LinDef] 
+        "oper"   [LinDef] 
+        "lincat" [LinDef] 
+        "lindef" [LinDef] 
+        "printname" [LinDef]
+      "}" ;
+
+

+This syntax organizes each module to a sequence of fields, such +as flags, linearizations, operations, linearization types, etc. +It is envisaged that particular applications can ignore some +of the fields, typically so that earlier fields are more +important than later ones. +

+

+The judgement forms have the following syntax. +

+
+    Flg. Flag     ::= CId "=" String ;
+    Cat. CatDef   ::= CId "[" [Hypo] "]" ;
+    Fun. FunDef   ::= CId ":" Type "=" Exp ;
+    Lin. LinDef   ::= CId "=" Term ;
+
+

+For the run-time system, the reference implementation in Haskell +uses a structure that gives efficient look-up: +

+
+    data GFCC = GFCC {
+      absname   :: CId ,
+      cncnames  :: [CId] ,
+      abstract  :: Abstr ,
+      concretes :: Map CId Concr
+      }
+  
+    data Abstr = Abstr {
+      aflags  :: Map CId String,     -- value of a flag
+      funs    :: Map CId (Type,Exp), -- type and def of a fun
+      cats    :: Map CId [Hypo],     -- context of a cat
+      catfuns :: Map CId [CId]       -- funs yielding a cat (redundant, for fast lookup)
+      }
+  
+    data Concr = Concr {
+      flags   :: Map CId String, -- value of a flag
+      lins    :: Map CId Term,   -- lin of a fun
+      opers   :: Map CId Term,   -- oper generated by subex elim
+      lincats :: Map CId Term,   -- lin type of a cat
+      lindefs :: Map CId Term,   -- lin default of a cat
+      printnames :: Map CId Term -- printname of a cat or a fun
+      }
+
+

+These definitions are from GF/GFCC/DataGFCC.hs. +

+

+Identifiers (CId) are like Ident in GF, except that +the compiler produces constants prefixed with _ in +the common subterm elimination optimization. +

+
+    token CId (('_' | letter) (letter | digit | '\'' | '_')*) ;
+
+

+

Abstract syntax

+

+Types are first-order function types built from argument type +contexts and value types. +category symbols. Syntax trees (Exp) are +rose trees with nodes consisting of a head (Atom) and +bound variables (CId). +

+
+    DTyp. Type  ::= "[" [Hypo] "]" CId [Exp] ;        
+    DTr.  Exp   ::= "[" "(" [CId] ")" Atom [Exp] "]" ;
+    Hyp.  Hypo  ::= CId ":" Type ;
+
+

+The head Atom is either a function +constant, a bound variable, or a metavariable, or a string, integer, or float +literal. +

+
+    AC.   Atom  ::= CId ;
+    AS.   Atom  ::= String ;
+    AI.   Atom  ::= Integer ;
+    AF.   Atom  ::= Double ;
+    AM.   Atom  ::= "?" Integer ;
+
+

+The context-free types and trees of the "old GFCC" are special +cases, which can be defined as follows: +

+
+    Typ.  Type  ::= [CId] "->" CId
+    Typ args val = DTyp [Hyp (CId "_") arg | arg <- args] val
+  
+    Tr.   Exp   ::= "(" CId [Exp] ")"
+    Tr fun exps  = DTr [] fun exps
+
+

+To store semantic (def) definitions by cases, the following expression +form is provided, but it is only meaningful in the last field of a function +declaration in an abstract syntax: +

+
+    EEq. Exp      ::= "{" [Equation] "}" ;
+    Equ. Equation ::= [Exp] "->" Exp ;
+
+

+Notice that expressions are used to encode patterns. Primitive notions +(the default semantics in GF) are encoded as empty sets of equations +([]). For a constructor (canonical form) of a category C, we +aim to use the encoding as the application (_constr C). +

+

Concrete syntax

+

+Linearization terms (Term) are built as follows. +Constructor names are shown to make the later code +examples readable. +

+
+    R.  Term ::= "[" [Term] "]" ;        -- array (record/table)
+    P.  Term ::= "(" Term "!" Term ")" ; -- access to field (projection/selection)
+    S.  Term ::= "(" [Term] ")" ;        -- concatenated sequence
+    K.  Term ::= Tokn ;                  -- token
+    V.  Term ::= "$" Integer ;           -- argument (subtree)
+    C.  Term ::= Integer ;               -- array index (label/parameter value)
+    FV. Term ::= "[|" [Term] "|]" ;      -- free variation
+    TM. Term ::= "?" ;                   -- linearization of metavariable
+
+

+Tokens are strings or (maybe obsolescent) prefix-dependent +variant lists. +

+
+    KS.  Tokn     ::= String ;
+    KP.  Tokn     ::= "[" "pre" [String] "[" [Variant] "]" "]" ;
+    Var. Variant  ::= [String] "/" [String] ;
+
+

+Two special forms of terms are introduced by the compiler +as optimizations. They can in principle be eliminated, but +their presence makes grammars much more compact. Their semantics +will be explained in a later section. +

+
+    F.  Term ::= CId ;                     -- global constant
+    W.  Term ::= "(" String "+" Term ")" ; -- prefix + suffix table
+
+

+There is also a deprecated form of "record parameter alias", +

+
+    RP. Term ::= "(" Term "@" Term ")";    -- DEPRECATED
+
+

+which will be removed when the migration to new GFCC is complete. +

+

The semantics of concrete syntax terms

+

+The code in this section is from GF/GFCC/Linearize.hs. +

+

Linearization and realization

+

+The linearization algorithm is essentially the same as in +GFC: a tree is linearized by evaluating its linearization term +in the environment of the linearizations of the subtrees. +Literal atoms are linearized in the obvious way. +The function also needs to know the language (i.e. concrete syntax) +in which linearization is performed. +

+
+    linExp :: GFCC -> CId -> Exp -> Term
+    linExp gfcc lang tree@(DTr _ at trees) = case at of
+      AC fun -> comp (Prelude.map lin trees) $ look fun
+      AS s   -> R [kks (show s)] -- quoted
+      AI i   -> R [kks (show i)]
+      AF d   -> R [kks (show d)]
+      AM     -> TM
+     where
+       lin  = linExp gfcc lang
+       comp = compute gfcc lang
+       look = lookLin gfcc lang
+
+

+TODO: bindings must be supported. +

+

+The result of linearization is usually a record, which is realized as +a string using the following algorithm. +

+
+    realize :: Term -> String
+    realize trm = case trm of
+      R (t:_)  -> realize t
+      S ss     -> unwords $ Prelude.map realize ss
+      K (KS s) -> s
+      K (KP s _) -> unwords s ---- prefix choice TODO
+      W s t    -> s ++ realize t
+      FV (t:_) -> realize t
+      TM       -> "?"
+
+

+Notice that realization always picks the first field of a record. +If a linearization type has more than one field, the first field +does not necessarily contain the desired string. +Also notice that the order of record fields in GFCC is not necessarily +the same as in GF source. +

+

Term evaluation

+

+Evaluation follows call-by-value order, with two environments +needed: +

+
    +
  • the grammar (a concrete syntax) to give the global constants +
  • an array of terms to give the subtree linearizations +
+ +

+The code is presented in one-level pattern matching, to +enable reimplementations in languages that do not permit +deep patterns (such as Java and C++). +

+
+  compute :: GFCC -> CId -> [Term] -> Term -> Term
+  compute gfcc lang args = comp where
+    comp trm = case trm of
+      P r p  -> proj (comp r) (comp p)
+      W s t  -> W s (comp t)
+      R ts   -> R $ Prelude.map comp ts
+      V i    -> idx args (fromInteger i)  -- already computed
+      F c    -> comp $ look c             -- not computed (if contains V)
+      FV ts  -> FV $ Prelude.map comp ts
+      S ts   -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts
+      _ -> trm
+  
+    look = lookOper gfcc lang
+  
+    idx xs i = xs !! i
+  
+    proj r p = case (r,p) of
+      (_,     FV ts) -> FV $ Prelude.map (proj r) ts
+      (W s t, _)     -> kks (s ++ getString (proj t p))
+      _              -> comp $ getField r (getIndex p)
+  
+    getString t = case t of
+      K (KS s) -> s
+      _ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR"
+  
+    getIndex t =  case t of
+      C i    -> fromInteger i
+      RP p _ -> getIndex p
+      TM     -> 0  -- default value for parameter
+      _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0
+  
+    getField t i = case t of
+      R rs   -> idx rs i
+      RP _ r -> getField r i
+      TM     -> TM
+      _ -> trace ("ERROR in grammar compiler: field from " ++ show t) t
+
+

+

The special term constructors

+

+The three forms introduced by the compiler may a need special +explanation. +

+

+Global constants +

+
+    Term ::= CId ;
+
+

+are shorthands for complex terms. They are produced by the +compiler by (iterated) common subexpression elimination. +They are often more powerful than hand-devised code sharing in the source +code. They could be computed off-line by replacing each identifier by +its definition. +

+

+Prefix-suffix tables +

+
+    Term ::= "(" String "+" Term ")" ; 
+
+

+represent tables of word forms divided to the longest common prefix +and its array of suffixes. In the example grammar above, we have +

+
+    Sleep = [("sleep" + ["s",""])]
+
+

+which in fact is equal to the array of full forms +

+
+    ["sleeps", "sleep"]
+
+

+The power of this construction comes from the fact that suffix sets +tend to be repeated in a language, and can therefore be collected +by common subexpression elimination. It is this technique that +explains the used syntax rather than the more accurate +

+
+    "(" String "+" [String] ")"
+
+

+since we want the suffix part to be a Term for the optimization to +take effect. +

+

Compiling to GFCC

+

+Compilation to GFCC is performed by the GF grammar compiler, and +GFCC interpreters need not know what it does. For grammar writers, +however, it might be interesting to know what happens to the grammars +in the process. +

+

+The compilation phases are the following +

+
    +
  1. type check and partially evaluate GF source +
  2. create a symbol table mapping the GF parameter and record types to + fixed-size arrays, and parameter values and record labels to integers +
  3. traverse the linearization rules replacing parameters and labels by integers +
  4. reorganize the created GF grammar so that it has just one abstract syntax + and one concrete syntax per language +
  5. TODO: apply UTF8 encoding to the grammar, if not yet applied (this is told by the + coding flag) +
  6. translate the GF grammar object to a GFCC grammar object, using a simple + compositional mapping +
  7. perform the word-suffix optimization on GFCC linearization terms +
  8. perform subexpression elimination on each concrete syntax module +
  9. print out the GFCC code +
+ +

Problems in GFCC compilation

+

+Two major problems had to be solved in compiling GF to GFCC: +

+
    +
  • consistent order of tables and records, to permit the array translation +
  • run-time variables in complex parameter values. +
+ +

+The current implementation is still experimental and may fail +to generate correct code. Any errors remaining are likely to be +related to the two problems just mentioned. +

+

+The order problem is solved in slightly different ways for tables and records. +In both cases, eta expansion is used to establish a +canonical order. Tables are ordered by applying the preorder induced +by param definitions. Records are ordered by sorting them by labels. +This means that +e.g. the s field will in general no longer appear as the first +field, even if it does so in the GF source code. But relying on the +order of fields in a labelled record would be misplaced anyway. +

+

+The canonical form of records is further complicated by lock fields, +i.e. dummy fields of form lock_C = <>, which are added to grammar +libraries to force intensionality of linearization types. The problem +is that the absence of a lock field only generates a warning, not +an error. Therefore a GF grammar can contain objects of the same +type with and without a lock field. This problem was solved in GFCC +generation by just removing all lock fields (defined as fields whose +type is the empty record type). This has the further advantage of +(slightly) reducing the grammar size. More importantly, it is safe +to remove lock fields, because they are never used in computation, +and because intensional types are only needed in grammars reused +as libraries, not in grammars used at runtime. +

+

+While the order problem is rather bureaucratic in nature, run-time +variables are an interesting problem. They arise in the presence +of complex parameter values, created by argument-taking constructors +and parameter records. To give an example, consider the GF parameter +type system +

+
+    Number = Sg | Pl ;
+    Person = P1 | P2 | P3 ;
+    Agr = Ag Number Person ;
+
+

+The values can be translated to integers in the expected way, +

+
+    Sg = 0, Pl = 1
+    P1 = 0, P2 = 1, P3 = 2
+    Ag Sg P1 = 0, Ag Sg P2 = 1, Ag Sg P3 = 2,
+    Ag Pl P1 = 3, Ag Pl P2 = 4, Ag Pl P3 = 5
+
+

+However, an argument of Agr can be a run-time variable, as in +

+
+    Ag np.n P3
+
+

+This expression must first be translated to a case expression, +

+
+    case np.n of {
+      0 => 2 ;
+      1 => 5
+      }
+
+

+which can then be translated to the GFCC term +

+
+    ([2,5] ! ($0 ! $1))  
+
+

+assuming that the variable np is the first argument and that its +Number field is the second in the record. +

+

+This transformation of course has to be performed recursively, since +there can be several run-time variables in a parameter value: +

+
+    Ag np.n np.p
+
+

+A similar transformation would be possible to deal with the double +role of parameter records discussed above. Thus the type +

+
+    RNP = {n : Number ; p : Person}
+
+

+could be uniformly translated into the set {0,1,2,3,4,5} +as Agr above. Selections would be simple instances of indexing. +But any projection from the record should be translated into +a case expression, +

+
+    rnp.n  ===> 
+    case rnp of {
+      0 => 0 ;
+      1 => 0 ;
+      2 => 0 ;
+      3 => 1 ;
+      4 => 1 ;
+      5 => 1
+      }
+
+

+To avoid the code bloat resulting from this, we have chosen to +deal with records by a currying transformation: +

+
+    table {n : Number ; p : Person} {... ...}
+     ===>
+    table Number {Sg => table Person {...} ; table Person {...}}
+
+

+This is performed when GFCC is generated. Selections with +records have to be treated likewise, +

+
+    t ! r   ===> t ! r.n ! r.p
+
+

+

The representation of linearization types

+

+Linearization types (lincat) are not needed when generating with +GFCC, but they have been added to enable parser generation directly from +GFCC. The linearization type definitions are shown as a part of the +concrete syntax, by using terms to represent types. Here is the table +showing how different linearization types are encoded. +

+
+    P*                         = max(P)         -- parameter type
+    {r1 : T1 ; ... ; rn : Tn}* = [T1*,...,Tn*]  -- record
+    (P => T)*                  = [T* ,...,T*]   -- table, size(P) cases
+    Str*                       = ()
+
+

+For example, the linearization type present/CatEng.NP is +translated as follows: +

+
+    NP = {
+      a : {                     -- 6 = 2*3 values
+        n : {ParamX.Number} ;   -- 2 values
+        p : {ParamX.Person}     -- 3 values
+      } ;
+      s : {ResEng.Case} => Str  -- 3 values
+    }
+  
+    __NP = [[1,2],[(),(),()]]
+
+

+

Running the compiler and the GFCC interpreter

+

+GFCC generation is a part of the +developers' version +of GF since September 2006. To invoke the compiler, the flag +-printer=gfcc to the command +pm = print_multi is used. It is wise to recompile the grammar from +source, since previously compiled libraries may not obey the canonical +order of records. +Here is an example, performed in +example/bronzeage. +

+
+    i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageEng.gf
+    i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageGer.gf
+    strip
+    pm -printer=gfcc | wf bronze.gfcc
+
+

+There is also an experimental batch compiler, which does not use the GFC +format or the record aliases. It can be produced by +

+
+    make gfc
+
+

+in GF/src, and invoked by +

+
+    gfc --make FILES
+
+

+

The reference interpreter

+

+The reference interpreter written in Haskell consists of the following files: +

+
+    -- source file for BNFC
+    GFCC.cf       -- labelled BNF grammar of gfcc
+  
+    -- files generated by BNFC
+    AbsGFCC.hs    -- abstrac syntax datatypes
+    ErrM.hs       -- error monad used internally
+    LexGFCC.hs    -- lexer of gfcc files
+    ParGFCC.hs    -- parser of gfcc files and syntax trees
+    PrintGFCC.hs  -- printer of gfcc files and syntax trees
+  
+    -- hand-written files
+    DataGFCC.hs   -- grammar datatype, post-parser grammar creation
+    Linearize.hs  -- linearization and evaluation
+    Macros.hs     -- utilities abstracting away from GFCC datatypes
+    Generate.hs   -- random and exhaustive generation, generate-and-test parsing
+    API.hs        -- functionalities accessible in embedded GF applications
+    Generate.hs   -- random and exhaustive generation
+    Shell.hs      -- main function - a simple command interpreter
+
+

+It is included in the +developers' version +of GF, in the subdirectories GF/src/GF/GFCC and +GF/src/GF/Devel. +

+

+As of September 2007, default parsing in main GF uses GFCC (implemented by Krasimir +Angelov). The interpreter uses the relevant modules +

+
+    GF/Conversions/SimpleToFCFG.hs  -- generate parser from GFCC
+    GF/Parsing/FCFG.hs              -- run the parser
+
+

+

+To compile the interpreter, type +

+
+    make gfcc
+
+

+in GF/src. To run it, type +

+
+    ./gfcc <GFCC-file>
+
+

+The available commands are +

+
    +
  • gr <Cat> <Int>: generate a number of random trees in category. + and show their linearizations in all languages +
  • grt <Cat> <Int>: generate a number of random trees in category. + and show the trees and their linearizations in all languages +
  • gt <Cat> <Int>: generate a number of trees in category from smallest, + and show their linearizations in all languages +
  • gtt <Cat> <Int>: generate a number of trees in category from smallest, + and show the trees and their linearizations in all languages +
  • p <Lang> <Cat> <String>: parse a string into a set of trees +
  • lin <Tree>: linearize tree in all languages, also showing full records +
  • q: terminate the system cleanly +
+ +

Embedded formats

+
    +
  • JavaScript: compiler of linearization and abstract syntax +

    +
  • Haskell: compiler of abstract syntax and interpreter with parsing, + linearization, and generation +

    +
  • C: compiler of linearization (old GFCC) +

    +
  • C++: embedded interpreter supporting linearization (old GFCC) +
+ +

Some things to do

+

+Support for dependent types, higher-order abstract syntax, and +semantic definition in GFCC generation and interpreters. +

+

+Replacing the entire GF shell by one based on GFCC. +

+

+Interpreter in Java. +

+

+Hand-written parsers for GFCC grammars to reduce code size +(and efficiency?) of interpreters. +

+

+Binary format and/or file compression of GFCC output. +

+

+Syntax editor based on GFCC. +

+

+Rewriting of resource libraries in order to exploit the +word-suffix sharing better (depth-one tables, as in FM). +

+ + + + diff --git a/src-3.0/PGF/doc/gfcc.txt b/src-3.0/PGF/doc/gfcc.txt new file mode 100644 index 000000000..5dcf2fbdc --- /dev/null +++ b/src-3.0/PGF/doc/gfcc.txt @@ -0,0 +1,712 @@ +The GFCC Grammar Format +Aarne Ranta +December 14, 2007 + +Author's address: +[``http://www.cs.chalmers.se/~aarne`` http://www.cs.chalmers.se/~aarne] + +% to compile: txt2tags -thtml --toc gfcc.txt + +History: +- 14 Dec 2007: simpler, Lisp-like concrete syntax of GFCC +- 5 Oct 2007: new, better structured GFCC with full expressive power +- 19 Oct: translation of lincats, new figures on C++ +- 3 Oct 2006: first version + + +==What is GFCC== + +GFCC is a low-level format for GF grammars. Its aim is to contain the minimum +that is needed to process GF grammars at runtime. This minimality has three +advantages: +- compact grammar files and run-time objects +- time and space efficient processing +- simple definition of interpreters + + +Thus we also want to call GFCC the **portable grammar format**. + +The idea is that all embedded GF applications use GFCC. +The GF system would be primarily used as a compiler and as a grammar +development tool. + +Since GFCC is implemented in BNFC, a parser of the format is readily +available for C, C++, C#, Haskell, Java, and OCaml. Also an XML +representation can be generated in BNFC. A +[reference implementation ../] +of linearization and some other functions has been written in Haskell. + + +==GFCC vs. GFC== + +GFCC is aimed to replace GFC as the run-time grammar format. GFC was designed +to be a run-time format, but also to +support separate compilation of grammars, i.e. +to store the results of compiling +individual GF modules. But this means that GFC has to contain extra information, +such as type annotations, which is only needed in compilation and not at +run-time. In particular, the pattern matching syntax and semantics of GFC is +complex and therefore difficult to implement in new platforms. + +Actually, GFC is planned to be omitted also as the target format of +separate compilation, where plain GF (type annotated and partially evaluated) +will be used instead. GFC provides only marginal advantages as a target format +compared with GF, and it is therefore just extra weight to carry around this +format. + +The main differences of GFCC compared with GFC (and GF) can be +summarized as follows: +- there are no modules, and therefore no qualified names +- a GFCC grammar is multilingual, and consists of a common abstract syntax + together with one concrete syntax per language +- records and tables are replaced by arrays +- record labels and parameter values are replaced by integers +- record projection and table selection are replaced by array indexing +- even though the format does support dependent types and higher-order abstract + syntax, there is no interpreted yet that does this + + + +Here is an example of a GF grammar, consisting of three modules, +as translated to GFCC. The representations are aligned; +thus they do not completely +reflect the order of judgements in GFCC files, which have different orders of +blocks of judgements, and alphabetical sorting. +``` + grammar Ex(Eng,Swe); + +abstract Ex = { abstract { + cat cat + S ; NP ; VP ; NP[]; S[]; VP[]; + fun fun + Pred : NP -> VP -> S ; Pred=[(($ 0! 1),(($ 1! 0)!($ 0! 0)))]; + She, They : NP ; She=[0,"she"]; + Sleep : VP ; They=[1,"they"]; + Sleep=[["sleeps","sleep"]]; +} } ; + +concrete Eng of Ex = { concrete Eng { + lincat lincat + S = {s : Str} ; S=[()]; + NP = {s : Str ; n : Num} ; NP=[1,()]; + VP = {s : Num => Str} ; VP=[[(),()]]; + param + Num = Sg | Pl ; + lin lin + Pred np vp = { Pred=[(($ 0! 1),(($ 1! 0)!($ 0! 0)))]; + s = np.s ++ vp.s ! np.n} ; + She = {s = "she" ; n = Sg} ; She=[0,"she"]; + They = {s = "they" ; n = Pl} ; They = [1, "they"]; + Sleep = {s = table { Sleep=[["sleeps","sleep"]]; + Sg => "sleeps" ; + Pl => "sleep" + } + } ; +} } ; + +concrete Swe of Ex = { concrete Swe { + lincat lincat + S = {s : Str} ; S=[()]; + NP = {s : Str} ; NP=[()]; + VP = {s : Str} ; VP=[()]; + param + Num = Sg | Pl ; + lin lin + Pred np vp = { Pred = [(($0!0),($1!0))]; + s = np.s ++ vp.s} ; + She = {s = "hon"} ; She = ["hon"]; + They = {s = "de"} ; They = ["de"]; + Sleep = {s = "sover"} ; Sleep = ["sover"]; +} } ; +``` + +==The syntax of GFCC files== + +The complete BNFC grammar, from which +the rules in this section are taken, is in the file +[``GF/GFCC/GFCC.cf`` ../DataGFCC.cf]. + + +===Top level=== + +A grammar has a header telling the name of the abstract syntax +(often specifying an application domain), and the names of +the concrete languages. The abstract syntax and the concrete +syntaxes themselves follow. +``` + Grm. Grammar ::= + "grammar" CId "(" [CId] ")" ";" + Abstract ";" + [Concrete] ; + + Abs. Abstract ::= + "abstract" "{" + "flags" [Flag] + "fun" [FunDef] + "cat" [CatDef] + "}" ; + + Cnc. Concrete ::= + "concrete" CId "{" + "flags" [Flag] + "lin" [LinDef] + "oper" [LinDef] + "lincat" [LinDef] + "lindef" [LinDef] + "printname" [LinDef] + "}" ; +``` +This syntax organizes each module to a sequence of **fields**, such +as flags, linearizations, operations, linearization types, etc. +It is envisaged that particular applications can ignore some +of the fields, typically so that earlier fields are more +important than later ones. + +The judgement forms have the following syntax. +``` + Flg. Flag ::= CId "=" String ; + Cat. CatDef ::= CId "[" [Hypo] "]" ; + Fun. FunDef ::= CId ":" Type "=" Exp ; + Lin. LinDef ::= CId "=" Term ; +``` +For the run-time system, the reference implementation in Haskell +uses a structure that gives efficient look-up: +``` + data GFCC = GFCC { + absname :: CId , + cncnames :: [CId] , + abstract :: Abstr , + concretes :: Map CId Concr + } + + data Abstr = Abstr { + aflags :: Map CId String, -- value of a flag + funs :: Map CId (Type,Exp), -- type and def of a fun + cats :: Map CId [Hypo], -- context of a cat + catfuns :: Map CId [CId] -- funs yielding a cat (redundant, for fast lookup) + } + + data Concr = Concr { + flags :: Map CId String, -- value of a flag + lins :: Map CId Term, -- lin of a fun + opers :: Map CId Term, -- oper generated by subex elim + lincats :: Map CId Term, -- lin type of a cat + lindefs :: Map CId Term, -- lin default of a cat + printnames :: Map CId Term -- printname of a cat or a fun + } +``` +These definitions are from [``GF/GFCC/DataGFCC.hs`` ../DataGFCC.hs]. + +Identifiers (``CId``) are like ``Ident`` in GF, except that +the compiler produces constants prefixed with ``_`` in +the common subterm elimination optimization. +``` + token CId (('_' | letter) (letter | digit | '\'' | '_')*) ; +``` + + +===Abstract syntax=== + +Types are first-order function types built from argument type +contexts and value types. +category symbols. Syntax trees (``Exp``) are +rose trees with nodes consisting of a head (``Atom``) and +bound variables (``CId``). +``` + DTyp. Type ::= "[" [Hypo] "]" CId [Exp] ; + DTr. Exp ::= "[" "(" [CId] ")" Atom [Exp] "]" ; + Hyp. Hypo ::= CId ":" Type ; +``` +The head Atom is either a function +constant, a bound variable, or a metavariable, or a string, integer, or float +literal. +``` + AC. Atom ::= CId ; + AS. Atom ::= String ; + AI. Atom ::= Integer ; + AF. Atom ::= Double ; + AM. Atom ::= "?" Integer ; +``` +The context-free types and trees of the "old GFCC" are special +cases, which can be defined as follows: +``` + Typ. Type ::= [CId] "->" CId + Typ args val = DTyp [Hyp (CId "_") arg | arg <- args] val + + Tr. Exp ::= "(" CId [Exp] ")" + Tr fun exps = DTr [] fun exps +``` +To store semantic (``def``) definitions by cases, the following expression +form is provided, but it is only meaningful in the last field of a function +declaration in an abstract syntax: +``` + EEq. Exp ::= "{" [Equation] "}" ; + Equ. Equation ::= [Exp] "->" Exp ; +``` +Notice that expressions are used to encode patterns. Primitive notions +(the default semantics in GF) are encoded as empty sets of equations +(``[]``). For a constructor (canonical form) of a category ``C``, we +aim to use the encoding as the application ``(_constr C)``. + + + +===Concrete syntax=== + +Linearization terms (``Term``) are built as follows. +Constructor names are shown to make the later code +examples readable. +``` + R. Term ::= "[" [Term] "]" ; -- array (record/table) + P. Term ::= "(" Term "!" Term ")" ; -- access to field (projection/selection) + S. Term ::= "(" [Term] ")" ; -- concatenated sequence + K. Term ::= Tokn ; -- token + V. Term ::= "$" Integer ; -- argument (subtree) + C. Term ::= Integer ; -- array index (label/parameter value) + FV. Term ::= "[|" [Term] "|]" ; -- free variation + TM. Term ::= "?" ; -- linearization of metavariable +``` +Tokens are strings or (maybe obsolescent) prefix-dependent +variant lists. +``` + KS. Tokn ::= String ; + KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ; + Var. Variant ::= [String] "/" [String] ; +``` +Two special forms of terms are introduced by the compiler +as optimizations. They can in principle be eliminated, but +their presence makes grammars much more compact. Their semantics +will be explained in a later section. +``` + F. Term ::= CId ; -- global constant + W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table +``` +There is also a deprecated form of "record parameter alias", +``` + RP. Term ::= "(" Term "@" Term ")"; -- DEPRECATED +``` +which will be removed when the migration to new GFCC is complete. + + + +==The semantics of concrete syntax terms== + +The code in this section is from [``GF/GFCC/Linearize.hs`` ../Linearize.hs]. + + +===Linearization and realization=== + +The linearization algorithm is essentially the same as in +GFC: a tree is linearized by evaluating its linearization term +in the environment of the linearizations of the subtrees. +Literal atoms are linearized in the obvious way. +The function also needs to know the language (i.e. concrete syntax) +in which linearization is performed. +``` + linExp :: GFCC -> CId -> Exp -> Term + linExp gfcc lang tree@(DTr _ at trees) = case at of + AC fun -> comp (Prelude.map lin trees) $ look fun + AS s -> R [kks (show s)] -- quoted + AI i -> R [kks (show i)] + AF d -> R [kks (show d)] + AM -> TM + where + lin = linExp gfcc lang + comp = compute gfcc lang + look = lookLin gfcc lang +``` +TODO: bindings must be supported. + +The result of linearization is usually a record, which is realized as +a string using the following algorithm. +``` + realize :: Term -> String + realize trm = case trm of + R (t:_) -> realize t + S ss -> unwords $ Prelude.map realize ss + K (KS s) -> s + K (KP s _) -> unwords s ---- prefix choice TODO + W s t -> s ++ realize t + FV (t:_) -> realize t + TM -> "?" +``` +Notice that realization always picks the first field of a record. +If a linearization type has more than one field, the first field +does not necessarily contain the desired string. +Also notice that the order of record fields in GFCC is not necessarily +the same as in GF source. + + +===Term evaluation=== + +Evaluation follows call-by-value order, with two environments +needed: +- the grammar (a concrete syntax) to give the global constants +- an array of terms to give the subtree linearizations + + +The code is presented in one-level pattern matching, to +enable reimplementations in languages that do not permit +deep patterns (such as Java and C++). +``` +compute :: GFCC -> CId -> [Term] -> Term -> Term +compute gfcc lang args = comp where + comp trm = case trm of + P r p -> proj (comp r) (comp p) + W s t -> W s (comp t) + R ts -> R $ Prelude.map comp ts + V i -> idx args (fromInteger i) -- already computed + F c -> comp $ look c -- not computed (if contains V) + FV ts -> FV $ Prelude.map comp ts + S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts + _ -> trm + + look = lookOper gfcc lang + + idx xs i = xs !! i + + proj r p = case (r,p) of + (_, FV ts) -> FV $ Prelude.map (proj r) ts + (FV ts, _ ) -> FV $ Prelude.map (\t -> proj t p) ts + (W s t, _) -> kks (s ++ getString (proj t p)) + _ -> comp $ getField r (getIndex p) + + getString t = case t of + K (KS s) -> s + _ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR" + + getIndex t = case t of + C i -> fromInteger i + RP p _ -> getIndex p + TM -> 0 -- default value for parameter + _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0 + + getField t i = case t of + R rs -> idx rs i + RP _ r -> getField r i + TM -> TM + _ -> trace ("ERROR in grammar compiler: field from " ++ show t) t +``` + +===The special term constructors=== + +The three forms introduced by the compiler may a need special +explanation. + +Global constants +``` + Term ::= CId ; +``` +are shorthands for complex terms. They are produced by the +compiler by (iterated) **common subexpression elimination**. +They are often more powerful than hand-devised code sharing in the source +code. They could be computed off-line by replacing each identifier by +its definition. + +**Prefix-suffix tables** +``` + Term ::= "(" String "+" Term ")" ; +``` +represent tables of word forms divided to the longest common prefix +and its array of suffixes. In the example grammar above, we have +``` + Sleep = [("sleep" + ["s",""])] +``` +which in fact is equal to the array of full forms +``` + ["sleeps", "sleep"] +``` +The power of this construction comes from the fact that suffix sets +tend to be repeated in a language, and can therefore be collected +by common subexpression elimination. It is this technique that +explains the used syntax rather than the more accurate +``` + "(" String "+" [String] ")" +``` +since we want the suffix part to be a ``Term`` for the optimization to +take effect. + + + +==Compiling to GFCC== + +Compilation to GFCC is performed by the GF grammar compiler, and +GFCC interpreters need not know what it does. For grammar writers, +however, it might be interesting to know what happens to the grammars +in the process. + +The compilation phases are the following ++ type check and partially evaluate GF source ++ create a symbol table mapping the GF parameter and record types to + fixed-size arrays, and parameter values and record labels to integers ++ traverse the linearization rules replacing parameters and labels by integers ++ reorganize the created GF grammar so that it has just one abstract syntax + and one concrete syntax per language ++ TODO: apply UTF8 encoding to the grammar, if not yet applied (this is told by the + ``coding`` flag) ++ translate the GF grammar object to a GFCC grammar object, using a simple + compositional mapping ++ perform the word-suffix optimization on GFCC linearization terms ++ perform subexpression elimination on each concrete syntax module ++ print out the GFCC code + + + + +===Problems in GFCC compilation=== + +Two major problems had to be solved in compiling GF to GFCC: +- consistent order of tables and records, to permit the array translation +- run-time variables in complex parameter values. + + +The current implementation is still experimental and may fail +to generate correct code. Any errors remaining are likely to be +related to the two problems just mentioned. + +The order problem is solved in slightly different ways for tables and records. +In both cases, **eta expansion** is used to establish a +canonical order. Tables are ordered by applying the preorder induced +by ``param`` definitions. Records are ordered by sorting them by labels. +This means that +e.g. the ``s`` field will in general no longer appear as the first +field, even if it does so in the GF source code. But relying on the +order of fields in a labelled record would be misplaced anyway. + +The canonical form of records is further complicated by lock fields, +i.e. dummy fields of form ``lock_C = <>``, which are added to grammar +libraries to force intensionality of linearization types. The problem +is that the absence of a lock field only generates a warning, not +an error. Therefore a GF grammar can contain objects of the same +type with and without a lock field. This problem was solved in GFCC +generation by just removing all lock fields (defined as fields whose +type is the empty record type). This has the further advantage of +(slightly) reducing the grammar size. More importantly, it is safe +to remove lock fields, because they are never used in computation, +and because intensional types are only needed in grammars reused +as libraries, not in grammars used at runtime. + +While the order problem is rather bureaucratic in nature, run-time +variables are an interesting problem. They arise in the presence +of complex parameter values, created by argument-taking constructors +and parameter records. To give an example, consider the GF parameter +type system +``` + Number = Sg | Pl ; + Person = P1 | P2 | P3 ; + Agr = Ag Number Person ; +``` +The values can be translated to integers in the expected way, +``` + Sg = 0, Pl = 1 + P1 = 0, P2 = 1, P3 = 2 + Ag Sg P1 = 0, Ag Sg P2 = 1, Ag Sg P3 = 2, + Ag Pl P1 = 3, Ag Pl P2 = 4, Ag Pl P3 = 5 +``` +However, an argument of ``Agr`` can be a run-time variable, as in +``` + Ag np.n P3 +``` +This expression must first be translated to a case expression, +``` + case np.n of { + 0 => 2 ; + 1 => 5 + } +``` +which can then be translated to the GFCC term +``` + ([2,5] ! ($0 ! $1)) +``` +assuming that the variable ``np`` is the first argument and that its +``Number`` field is the second in the record. + +This transformation of course has to be performed recursively, since +there can be several run-time variables in a parameter value: +``` + Ag np.n np.p +``` +A similar transformation would be possible to deal with the double +role of parameter records discussed above. Thus the type +``` + RNP = {n : Number ; p : Person} +``` +could be uniformly translated into the set ``{0,1,2,3,4,5}`` +as ``Agr`` above. Selections would be simple instances of indexing. +But any projection from the record should be translated into +a case expression, +``` + rnp.n ===> + case rnp of { + 0 => 0 ; + 1 => 0 ; + 2 => 0 ; + 3 => 1 ; + 4 => 1 ; + 5 => 1 + } +``` +To avoid the code bloat resulting from this, we have chosen to +deal with records by a **currying** transformation: +``` + table {n : Number ; p : Person} {... ...} + ===> + table Number {Sg => table Person {...} ; table Person {...}} +``` +This is performed when GFCC is generated. Selections with +records have to be treated likewise, +``` + t ! r ===> t ! r.n ! r.p +``` + + +===The representation of linearization types=== + +Linearization types (``lincat``) are not needed when generating with +GFCC, but they have been added to enable parser generation directly from +GFCC. The linearization type definitions are shown as a part of the +concrete syntax, by using terms to represent types. Here is the table +showing how different linearization types are encoded. +``` + P* = max(P) -- parameter type + {r1 : T1 ; ... ; rn : Tn}* = [T1*,...,Tn*] -- record + (P => T)* = [T* ,...,T*] -- table, size(P) cases + Str* = () +``` +For example, the linearization type ``present/CatEng.NP`` is +translated as follows: +``` + NP = { + a : { -- 6 = 2*3 values + n : {ParamX.Number} ; -- 2 values + p : {ParamX.Person} -- 3 values + } ; + s : {ResEng.Case} => Str -- 3 values + } + + __NP = [[1,2],[(),(),()]] +``` + + + + +===Running the compiler and the GFCC interpreter=== + +GFCC generation is a part of the +[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html] +of GF since September 2006. To invoke the compiler, the flag +``-printer=gfcc`` to the command +``pm = print_multi`` is used. It is wise to recompile the grammar from +source, since previously compiled libraries may not obey the canonical +order of records. +Here is an example, performed in +[example/bronzeage ../../../../../examples/bronzeage]. +``` + i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageEng.gf + i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageGer.gf + strip + pm -printer=gfcc | wf bronze.gfcc +``` +There is also an experimental batch compiler, which does not use the GFC +format or the record aliases. It can be produced by +``` + make gfc +``` +in ``GF/src``, and invoked by +``` + gfc --make FILES +``` + + + + +==The reference interpreter== + +The reference interpreter written in Haskell consists of the following files: +``` + -- source file for BNFC + GFCC.cf -- labelled BNF grammar of gfcc + + -- files generated by BNFC + AbsGFCC.hs -- abstrac syntax datatypes + ErrM.hs -- error monad used internally + LexGFCC.hs -- lexer of gfcc files + ParGFCC.hs -- parser of gfcc files and syntax trees + PrintGFCC.hs -- printer of gfcc files and syntax trees + + -- hand-written files + DataGFCC.hs -- grammar datatype, post-parser grammar creation + Linearize.hs -- linearization and evaluation + Macros.hs -- utilities abstracting away from GFCC datatypes + Generate.hs -- random and exhaustive generation, generate-and-test parsing + API.hs -- functionalities accessible in embedded GF applications + Generate.hs -- random and exhaustive generation + Shell.hs -- main function - a simple command interpreter +``` +It is included in the +[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html] +of GF, in the subdirectories [``GF/src/GF/GFCC`` ../] and +[``GF/src/GF/Devel`` ../../Devel]. + +As of September 2007, default parsing in main GF uses GFCC (implemented by Krasimir +Angelov). The interpreter uses the relevant modules +``` + GF/Conversions/SimpleToFCFG.hs -- generate parser from GFCC + GF/Parsing/FCFG.hs -- run the parser +``` + + +To compile the interpreter, type +``` + make gfcc +``` +in ``GF/src``. To run it, type +``` + ./gfcc +``` +The available commands are +- ``gr ``: generate a number of random trees in category. + and show their linearizations in all languages +- ``grt ``: generate a number of random trees in category. + and show the trees and their linearizations in all languages +- ``gt ``: generate a number of trees in category from smallest, + and show their linearizations in all languages +- ``gtt ``: generate a number of trees in category from smallest, + and show the trees and their linearizations in all languages +- ``p ``: parse a string into a set of trees +- ``lin ``: linearize tree in all languages, also showing full records +- ``q``: terminate the system cleanly + + + +==Embedded formats== + +- JavaScript: compiler of linearization and abstract syntax + +- Haskell: compiler of abstract syntax and interpreter with parsing, + linearization, and generation + +- C: compiler of linearization (old GFCC) + +- C++: embedded interpreter supporting linearization (old GFCC) + + + +==Some things to do== + +Support for dependent types, higher-order abstract syntax, and +semantic definition in GFCC generation and interpreters. + +Replacing the entire GF shell by one based on GFCC. + +Interpreter in Java. + +Hand-written parsers for GFCC grammars to reduce code size +(and efficiency?) of interpreters. + +Binary format and/or file compression of GFCC output. + +Syntax editor based on GFCC. + +Rewriting of resource libraries in order to exploit the +word-suffix sharing better (depth-one tables, as in FM). + diff --git a/src-3.0/PGF/doc/old-GFCC.cf b/src-3.0/PGF/doc/old-GFCC.cf new file mode 100644 index 000000000..65657a259 --- /dev/null +++ b/src-3.0/PGF/doc/old-GFCC.cf @@ -0,0 +1,50 @@ +Grm. Grammar ::= Header ";" Abstract ";" [Concrete] ; +Hdr. Header ::= "grammar" CId "(" [CId] ")" ; +Abs. Abstract ::= "abstract" "{" [AbsDef] "}" ; +Cnc. Concrete ::= "concrete" CId "{" [CncDef] "}" ; + +Fun. AbsDef ::= CId ":" Type "=" Exp ; +--AFl. AbsDef ::= "%" CId "=" String ; -- flag +Lin. CncDef ::= CId "=" Term ; +--CFl. CncDef ::= "%" CId "=" String ; -- flag + +Typ. Type ::= [CId] "->" CId ; +Tr. Exp ::= "(" Atom [Exp] ")" ; +AC. Atom ::= CId ; +AS. Atom ::= String ; +AI. Atom ::= Integer ; +AF. Atom ::= Double ; +AM. Atom ::= "?" ; +trA. Exp ::= Atom ; +define trA a = Tr a [] ; + +R. Term ::= "[" [Term] "]" ; -- record/table +P. Term ::= "(" Term "!" Term ")" ; -- projection/selection +S. Term ::= "(" [Term] ")" ; -- sequence with ++ +K. Term ::= Tokn ; -- token +V. Term ::= "$" Integer ; -- argument +C. Term ::= Integer ; -- parameter value/label +F. Term ::= CId ; -- global constant +FV. Term ::= "[|" [Term] "|]" ; -- free variation +W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table +RP. Term ::= "(" Term "@" Term ")"; -- record parameter alias +TM. Term ::= "?" ; -- lin of metavariable + +L. Term ::= "(" CId "->" Term ")" ; -- lambda abstracted table +BV. Term ::= "#" CId ; -- lambda-bound variable + +KS. Tokn ::= String ; +KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ; +Var. Variant ::= [String] "/" [String] ; + + +terminator Concrete ";" ; +terminator AbsDef ";" ; +terminator CncDef ";" ; +separator CId "," ; +separator Term "," ; +terminator Exp "" ; +terminator String "" ; +separator Variant "," ; + +token CId (('_' | letter) (letter | digit | '\'' | '_')*) ; diff --git a/src-3.0/PGF/doc/old-gfcc.txt b/src-3.0/PGF/doc/old-gfcc.txt new file mode 100644 index 000000000..6ffd9bd64 --- /dev/null +++ b/src-3.0/PGF/doc/old-gfcc.txt @@ -0,0 +1,656 @@ +The GFCC Grammar Format +Aarne Ranta +October 19, 2006 + +Author's address: +[``http://www.cs.chalmers.se/~aarne`` http://www.cs.chalmers.se/~aarne] + +% to compile: txt2tags -thtml --toc gfcc.txt + +History: +- 19 Oct: translation of lincats, new figures on C++ +- 3 Oct 2006: first version + + +==What is GFCC== + +GFCC is a low-level format for GF grammars. Its aim is to contain the minimum +that is needed to process GF grammars at runtime. This minimality has three +advantages: +- compact grammar files and run-time objects +- time and space efficient processing +- simple definition of interpreters + + +The idea is that all embedded GF applications are compiled to GFCC. +The GF system would be primarily used as a compiler and as a grammar +development tool. + +Since GFCC is implemented in BNFC, a parser of the format is readily +available for C, C++, Haskell, Java, and OCaml. Also an XML +representation is generated in BNFC. A +[reference implementation ../] +of linearization and some other functions has been written in Haskell. + + +==GFCC vs. GFC== + +GFCC is aimed to replace GFC as the run-time grammar format. GFC was designed +to be a run-time format, but also to +support separate compilation of grammars, i.e. +to store the results of compiling +individual GF modules. But this means that GFC has to contain extra information, +such as type annotations, which is only needed in compilation and not at +run-time. In particular, the pattern matching syntax and semantics of GFC is +complex and therefore difficult to implement in new platforms. + +The main differences of GFCC compared with GFC can be summarized as follows: +- there are no modules, and therefore no qualified names +- a GFCC grammar is multilingual, and consists of a common abstract syntax + together with one concrete syntax per language +- records and tables are replaced by arrays +- record labels and parameter values are replaced by integers +- record projection and table selection are replaced by array indexing +- there is (so far) no support for dependent types or higher-order abstract + syntax (which would be easy to add, but make interpreters much more difficult + to write) + + +Here is an example of a GF grammar, consisting of three modules, +as translated to GFCC. The representations are aligned, with the exceptions +due to the alphabetical sorting of GFCC grammars. +``` + grammar Ex(Eng,Swe); + +abstract Ex = { abstract { + cat + S ; NP ; VP ; + fun + Pred : NP -> VP -> S ; Pred : NP,VP -> S = (Pred); + She, They : NP ; She : -> NP = (She); + Sleep : VP ; Sleep : -> VP = (Sleep); + They : -> NP = (They); +} } ; + +concrete Eng of Ex = { concrete Eng { + lincat + S = {s : Str} ; + NP = {s : Str ; n : Num} ; + VP = {s : Num => Str} ; + param + Num = Sg | Pl ; + lin + Pred np vp = { Pred = [(($0!1),(($1!0)!($0!0)))]; + s = np.s ++ vp.s ! np.n} ; + She = {s = "she" ; n = Sg} ; She = [0, "she"]; + They = {s = "they" ; n = Pl} ; + Sleep = {s = table { Sleep = [("sleep" + ["s",""])]; + Sg => "sleeps" ; + Pl => "sleep" They = [1, "they"]; + } } ; + } ; +} + +concrete Swe of Ex = { concrete Swe { + lincat + S = {s : Str} ; + NP = {s : Str} ; + VP = {s : Str} ; + param + Num = Sg | Pl ; + lin + Pred np vp = { Pred = [(($0!0),($1!0))]; + s = np.s ++ vp.s} ; + She = {s = "hon"} ; She = ["hon"]; + They = {s = "de"} ; They = ["de"]; + Sleep = {s = "sover"} ; Sleep = ["sover"]; +} } ; +``` + +==The syntax of GFCC files== + +===Top level=== + +A grammar has a header telling the name of the abstract syntax +(often specifying an application domain), and the names of +the concrete languages. The abstract syntax and the concrete +syntaxes themselves follow. +``` + Grammar ::= Header ";" Abstract ";" [Concrete] ; + Header ::= "grammar" CId "(" [CId] ")" ; + Abstract ::= "abstract" "{" [AbsDef] "}" ; + Concrete ::= "concrete" CId "{" [CncDef] "}" ; +``` +Abstract syntax judgements give typings and semantic definitions. +Concrete syntax judgements give linearizations. +``` + AbsDef ::= CId ":" Type "=" Exp ; + CncDef ::= CId "=" Term ; +``` +Also flags are possible, local to each "module" (i.e. abstract and concretes). +``` + AbsDef ::= "%" CId "=" String ; + CncDef ::= "%" CId "=" String ; +``` +For the run-time system, the reference implementation in Haskell +uses a structure that gives efficient look-up: +``` + data GFCC = GFCC { + absname :: CId , + cncnames :: [CId] , + abstract :: Abstr , + concretes :: Map CId Concr + } + + data Abstr = Abstr { + funs :: Map CId Type, -- find the type of a fun + cats :: Map CId [CId] -- find the funs giving a cat + } + + type Concr = Map CId Term +``` + + +===Abstract syntax=== + +Types are first-order function types built from +category symbols. Syntax trees (``Exp``) are +rose trees with the head (``Atom``) either a function +constant, a metavariable, or a string, integer, or float +literal. +``` + Type ::= [CId] "->" CId ; + Exp ::= "(" Atom [Exp] ")" ; + Atom ::= CId ; -- function constant + Atom ::= "?" ; -- metavariable + Atom ::= String ; -- string literal + Atom ::= Integer ; -- integer literal + Atom ::= Double ; -- float literal +``` + + +===Concrete syntax=== + +Linearization terms (``Term``) are built as follows. +Constructor names are shown to make the later code +examples readable. +``` + R. Term ::= "[" [Term] "]" ; -- array + P. Term ::= "(" Term "!" Term ")" ; -- access to indexed field + S. Term ::= "(" [Term] ")" ; -- sequence with ++ + K. Term ::= Tokn ; -- token + V. Term ::= "$" Integer ; -- argument + C. Term ::= Integer ; -- array index + FV. Term ::= "[|" [Term] "|]" ; -- free variation + TM. Term ::= "?" ; -- linearization of metavariable +``` +Tokens are strings or (maybe obsolescent) prefix-dependent +variant lists. +``` + KS. Tokn ::= String ; + KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ; + Var. Variant ::= [String] "/" [String] ; +``` +Three special forms of terms are introduced by the compiler +as optimizations. They can in principle be eliminated, but +their presence makes grammars much more compact. Their semantics +will be explained in a later section. +``` + F. Term ::= CId ; -- global constant + W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table + RP. Term ::= "(" Term "@" Term ")"; -- record parameter alias +``` +Identifiers are like ``Ident`` in GF and GFC, except that +the compiler produces constants prefixed with ``_`` in +the common subterm elimination optimization. +``` + token CId (('_' | letter) (letter | digit | '\'' | '_')*) ; +``` + + +==The semantics of concrete syntax terms== + +===Linearization and realization=== + +The linearization algorithm is essentially the same as in +GFC: a tree is linearized by evaluating its linearization term +in the environment of the linearizations of the subtrees. +Literal atoms are linearized in the obvious way. +The function also needs to know the language (i.e. concrete syntax) +in which linearization is performed. +``` + linExp :: GFCC -> CId -> Exp -> Term + linExp mcfg lang tree@(Tr at trees) = case at of + AC fun -> comp (Prelude.map lin trees) $ look fun + AS s -> R [kks (show s)] -- quoted + AI i -> R [kks (show i)] + AF d -> R [kks (show d)] + AM -> TM + where + lin = linExp mcfg lang + comp = compute mcfg lang + look = lookLin mcfg lang +``` +The result of linearization is usually a record, which is realized as +a string using the following algorithm. +``` + realize :: Term -> String + realize trm = case trm of + R (t:_) -> realize t + S ss -> unwords $ Prelude.map realize ss + K (KS s) -> s + K (KP s _) -> unwords s ---- prefix choice TODO + W s t -> s ++ realize t + FV (t:_) -> realize t + TM -> "?" +``` +Since the order of record fields is not necessarily +the same as in GF source, +this realization does not work securely for +categories whose lincats more than one field. + + +===Term evaluation=== + +Evaluation follows call-by-value order, with two environments +needed: +- the grammar (a concrete syntax) to give the global constants +- an array of terms to give the subtree linearizations + + +The code is presented in one-level pattern matching, to +enable reimplementations in languages that do not permit +deep patterns (such as Java and C++). +``` +compute :: GFCC -> CId -> [Term] -> Term -> Term +compute mcfg lang args = comp where + comp trm = case trm of + P r p -> proj (comp r) (comp p) + RP i t -> RP (comp i) (comp t) + W s t -> W s (comp t) + R ts -> R $ Prelude.map comp ts + V i -> idx args (fromInteger i) -- already computed + F c -> comp $ look c -- not computed (if contains V) + FV ts -> FV $ Prelude.map comp ts + S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts + _ -> trm + + look = lookLin mcfg lang + + idx xs i = xs !! i + + proj r p = case (r,p) of + (_, FV ts) -> FV $ Prelude.map (proj r) ts + (W s t, _) -> kks (s ++ getString (proj t p)) + _ -> comp $ getField r (getIndex p) + + getString t = case t of + K (KS s) -> s + _ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR" + + getIndex t = case t of + C i -> fromInteger i + RP p _ -> getIndex p + TM -> 0 -- default value for parameter + _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0 + + getField t i = case t of + R rs -> idx rs i + RP _ r -> getField r i + TM -> TM + _ -> trace ("ERROR in grammar compiler: field from " ++ show t) t +``` + +===The special term constructors=== + +The three forms introduced by the compiler may a need special +explanation. + +Global constants +``` + Term ::= CId ; +``` +are shorthands for complex terms. They are produced by the +compiler by (iterated) common subexpression elimination. +They are often more powerful than hand-devised code sharing in the source +code. They could be computed off-line by replacing each identifier by +its definition. + +Prefix-suffix tables +``` + Term ::= "(" String "+" Term ")" ; +``` +represent tables of word forms divided to the longest common prefix +and its array of suffixes. In the example grammar above, we have +``` + Sleep = [("sleep" + ["s",""])] +``` +which in fact is equal to the array of full forms +``` + ["sleeps", "sleep"] +``` +The power of this construction comes from the fact that suffix sets +tend to be repeated in a language, and can therefore be collected +by common subexpression elimination. It is this technique that +explains the used syntax rather than the more accurate +``` + "(" String "+" [String] ")" +``` +since we want the suffix part to be a ``Term`` for the optimization to +take effect. + +The most curious construct of GFCC is the parameter array alias, +``` + Term ::= "(" Term "@" Term ")"; +``` +This form is used as the value of parameter records, such as the type +``` + {n : Number ; p : Person} +``` +The problem with parameter records is their double role. +They can be used like parameter values, as indices in selection, +``` + VP.s ! {n = Sg ; p = P3} +``` +but also as records, from which parameters can be projected: +``` + {n = Sg ; p = P3}.n +``` +Whichever use is selected as primary, a prohibitively complex +case expression must be generated at compilation to GFCC to get the +other use. The adopted +solution is to generate a pair containing both a parameter value index +and an array of indices of record fields. For instance, if we have +``` + param Number = Sg | Pl ; Person = P1 | P2 | P3 ; +``` +we get the encoding +``` + {n = Sg ; p = P3} ---> (2 @ [0,2]) +``` +The GFCC computation rules are essentially +``` + (t ! (i @ _)) = (t ! i) + ((_ @ r) ! j) =(r ! j) +``` + + +==Compiling to GFCC== + +Compilation to GFCC is performed by the GF grammar compiler, and +GFCC interpreters need not know what it does. For grammar writers, +however, it might be interesting to know what happens to the grammars +in the process. + +The compilation phases are the following ++ translate GF source to GFC, as always in GF ++ undo GFC back-end optimizations ++ perform the ``values`` optimization to normalize tables ++ create a symbol table mapping the GFC parameter and record types to + fixed-size arrays, and parameter values and record labels to integers ++ traverse the linearization rules replacing parameters and labels by integers ++ reorganize the created GFC grammar so that it has just one abstract syntax + and one concrete syntax per language ++ apply UTF8 encoding to the grammar, if not yet applied (this is told by the + ``coding`` flag) ++ translate the GFC syntax tree to a GFCC syntax tree, using a simple + compositional mapping ++ perform the word-suffix optimization on GFCC linearization terms ++ perform subexpression elimination on each concrete syntax module ++ print out the GFCC code + + +Notice that a major part of the compilation is done within GFC, so that +GFC-related tasks (such as parser generation) could be performed by +using the old algorithms. + + +===Problems in GFCC compilation=== + +Two major problems had to be solved in compiling GFC to GFCC: +- consistent order of tables and records, to permit the array translation +- run-time variables in complex parameter values. + + +The current implementation is still experimental and may fail +to generate correct code. Any errors remaining are likely to be +related to the two problems just mentioned. + +The order problem is solved in different ways for tables and records. +For tables, the ``values`` optimization of GFC already manages to +maintain a canonical order. But this order can be destroyed by the +``share`` optimization. To make sure that GFCC compilation works properly, +it is safest to recompile the GF grammar by using the ``values`` +optimization flag. + +Records can be canonically ordered by sorting them by labels. +In fact, this was done in connection of the GFCC work as a part +of the GFC generation, to guarantee consistency. This means that +e.g. the ``s`` field will in general no longer appear as the first +field, even if it does so in the GF source code. But relying on the +order of fields in a labelled record would be misplaced anyway. + +The canonical form of records is further complicated by lock fields, +i.e. dummy fields of form ``lock_C = <>``, which are added to grammar +libraries to force intensionality of linearization types. The problem +is that the absence of a lock field only generates a warning, not +an error. Therefore a GFC grammar can contain objects of the same +type with and without a lock field. This problem was solved in GFCC +generation by just removing all lock fields (defined as fields whose +type is the empty record type). This has the further advantage of +(slightly) reducing the grammar size. More importantly, it is safe +to remove lock fields, because they are never used in computation, +and because intensional types are only needed in grammars reused +as libraries, not in grammars used at runtime. + +While the order problem is rather bureaucratic in nature, run-time +variables are an interesting problem. They arise in the presence +of complex parameter values, created by argument-taking constructors +and parameter records. To give an example, consider the GF parameter +type system +``` + Number = Sg | Pl ; + Person = P1 | P2 | P3 ; + Agr = Ag Number Person ; +``` +The values can be translated to integers in the expected way, +``` + Sg = 0, Pl = 1 + P1 = 0, P2 = 1, P3 = 2 + Ag Sg P1 = 0, Ag Sg P2 = 1, Ag Sg P3 = 2, + Ag Pl P1 = 3, Ag Pl P2 = 4, Ag Pl P3 = 5 +``` +However, an argument of ``Agr`` can be a run-time variable, as in +``` + Ag np.n P3 +``` +This expression must first be translated to a case expression, +``` + case np.n of { + 0 => 2 ; + 1 => 5 + } +``` +which can then be translated to the GFCC term +``` + ([2,5] ! ($0 ! $1)) +``` +assuming that the variable ``np`` is the first argument and that its +``Number`` field is the second in the record. + +This transformation of course has to be performed recursively, since +there can be several run-time variables in a parameter value: +``` + Ag np.n np.p +``` +A similar transformation would be possible to deal with the double +role of parameter records discussed above. Thus the type +``` + RNP = {n : Number ; p : Person} +``` +could be uniformly translated into the set ``{0,1,2,3,4,5}`` +as ``Agr`` above. Selections would be simple instances of indexing. +But any projection from the record should be translated into +a case expression, +``` + rnp.n ===> + case rnp of { + 0 => 0 ; + 1 => 0 ; + 2 => 0 ; + 3 => 1 ; + 4 => 1 ; + 5 => 1 + } +``` +To avoid the code bloat resulting from this, we chose the alias representation +which is easy enough to deal with in interpreters. + + +===The representation of linearization types=== + +Linearization types (``lincat``) are not needed when generating with +GFCC, but they have been added to enable parser generation directly from +GFCC. The linearization type definitions are shown as a part of the +concrete syntax, by using terms to represent types. Here is the table +showing how different linearization types are encoded. +``` + P* = size(P) -- parameter type + {_ : I ; __ : R}* = (I* @ R*) -- record of parameters + {r1 : T1 ; ... ; rn : Tn}* = [T1*,...,Tn*] -- other record + (P => T)* = [T* ,...,T*] -- size(P) times + Str* = () +``` +The category symbols are prefixed with two underscores (``__``). +For example, the linearization type ``present/CatEng.NP`` is +translated as follows: +``` + NP = { + a : { -- 6 = 2*3 values + n : {ParamX.Number} ; -- 2 values + p : {ParamX.Person} -- 3 values + } ; + s : {ResEng.Case} => Str -- 3 values + } + + __NP = [(6@[2,3]),[(),(),()]] +``` + + + + +===Running the compiler and the GFCC interpreter=== + +GFCC generation is a part of the +[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html] +of GF since September 2006. To invoke the compiler, the flag +``-printer=gfcc`` to the command +``pm = print_multi`` is used. It is wise to recompile the grammar from +source, since previously compiled libraries may not obey the canonical +order of records. To ``strip`` the grammar before +GFCC translation removes unnecessary interface references. +Here is an example, performed in +[example/bronzeage ../../../../../examples/bronzeage]. +``` + i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageEng.gf + i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageGer.gf + strip + pm -printer=gfcc | wf bronze.gfcc +``` + + + +==The reference interpreter== + +The reference interpreter written in Haskell consists of the following files: +``` + -- source file for BNFC + GFCC.cf -- labelled BNF grammar of gfcc + + -- files generated by BNFC + AbsGFCC.hs -- abstrac syntax of gfcc + ErrM.hs -- error monad used internally + LexGFCC.hs -- lexer of gfcc files + ParGFCC.hs -- parser of gfcc files and syntax trees + PrintGFCC.hs -- printer of gfcc files and syntax trees + + -- hand-written files + DataGFCC.hs -- post-parser grammar creation, linearization and evaluation + GenGFCC.hs -- random and exhaustive generation, generate-and-test parsing + RunGFCC.hs -- main function - a simple command interpreter +``` +It is included in the +[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html] +of GF, in the subdirectory [``GF/src/GF/Canon/GFCC`` ../]. + +To compile the interpreter, type +``` + make gfcc +``` +in ``GF/src``. To run it, type +``` + ./gfcc +``` +The available commands are +- ``gr ``: generate a number of random trees in category. + and show their linearizations in all languages +- ``grt ``: generate a number of random trees in category. + and show the trees and their linearizations in all languages +- ``gt ``: generate a number of trees in category from smallest, + and show their linearizations in all languages +- ``gtt ``: generate a number of trees in category from smallest, + and show the trees and their linearizations in all languages +- ``p ``: "parse", i.e. generate trees until match or + until the given number have been generated +- ````: linearize tree in all languages, also showing full records +- ``quit``: terminate the system cleanly + + +==Interpreter in C++== + +A base-line interpreter in C++ has been started. +Its main functionality is random generation of trees and linearization of them. + +Here are some results from running the different interpreters, compared +to running the same grammar in GF, saved in ``.gfcm`` format. +The grammar contains the English, German, and Norwegian +versions of Bronzeage. The experiment was carried out on +Ubuntu Linux laptop with 1.5 GHz Intel centrino processor. + +|| | GF | gfcc(hs) | gfcc++ | +| program size | 7249k | 803k | 113k +| grammar size | 336k | 119k | 119k +| read grammar | 1150ms | 510ms | 100ms +| generate 222 | 9500ms | 450ms | 800ms +| memory | 21M | 10M | 20M + + + +To summarize: +- going from GF to gfcc is a major win in both code size and efficiency +- going from Haskell to C++ interpreter is not a win yet, because of a space + leak in the C++ version + + + +==Some things to do== + +Interpreter in Java. + +Parsing via MCFG +- the FCFG format can possibly be simplified +- parser grammars should be saved in files to make interpreters easier + + +Hand-written parsers for GFCC grammars to reduce code size +(and efficiency?) of interpreters. + +Binary format and/or file compression of GFCC output. + +Syntax editor based on GFCC. + +Rewriting of resource libraries in order to exploit the +word-suffix sharing better (depth-one tables, as in FM). + + + diff --git a/src-3.0/PGF/doc/syntax.txt b/src-3.0/PGF/doc/syntax.txt new file mode 100644 index 000000000..db8f7c149 --- /dev/null +++ b/src-3.0/PGF/doc/syntax.txt @@ -0,0 +1,180 @@ +GFCC Syntax + + +==Syntax of GFCC files== + +The parser syntax is very simple, as defined in BNF: +``` + Grm. Grammar ::= [RExp] ; + + App. RExp ::= "(" CId [RExp] ")" ; + AId. RExp ::= CId ; + AInt. RExp ::= Integer ; + AStr. RExp ::= String ; + AFlt. RExp ::= Double ; + AMet. RExp ::= "?" ; + + terminator RExp "" ; + + token CId (('_' | letter) (letter | digit | '\'' | '_')*) ; +``` +While a parser and a printer can be generated for many languages +from this grammar by using the BNF Converter, a parser is also +easy to write by hand using recursive descent. + + +==Syntax of well-formed GFCC code== + +Here is a summary of well-formed syntax, +with a comment on the semantics of each construction. +``` + Grammar ::= + ("grammar" CId CId*) -- abstract syntax name and concrete syntax names + "(" "flags" Flag* ")" -- global and abstract flags + "(" "abstract" Abstract ")" -- abstract syntax + "(" "concrete" Concrete* ")" -- concrete syntaxes + + Abstract ::= + "(" "fun" FunDef* ")" -- function definitions + "(" "cat" CatDef* ")" -- category definitions + + Concrete ::= + "(" CId -- language name + "flags" Flag* -- concrete flags + "lin" LinDef* -- linearization rules + "oper" LinDef* -- operations (macros) + "lincat" LinDef* -- linearization type definitions + "lindef" LinDef* -- linearization default definitions + "printname" LinDef* -- printname definitions + "param" LinDef* -- lincats with labels and parameter value names + ")" + + Flag ::= "(" CId String ")" -- flag and value + FunDef ::= "(" CId Type Exp ")" -- function, type, and definition + CatDef ::= "(" CId Hypo* ")" -- category and context + LinDef ::= "(" CId Term ")" -- function and definition + + Type ::= + "(" CId -- value category + "(" "H" Hypo* ")" -- argument context + "(" "X" Exp* ")" ")" -- arguments (of dependent value type) + + Exp ::= + "(" CId -- function + "(" "B" CId* ")" -- bindings + "(" "X" Exp* ")" ")" -- arguments + | CId -- variable + | "?" -- metavariable + | "(" "Eq" Equation* ")" -- group of pattern equations + | Integer -- integer literal (non-negative) + | Float -- floating-point literal (non-negative) + | String -- string literal (in double quotes) + + Hypo ::= "(" CId Type ")" -- variable and type + + Equation ::= "(" "E" Exp Exp* ")" -- value and pattern list + + Term ::= + "(" "R" Term* ")" -- array (record or table) + | "(" "S" Term* ")" -- concatenated sequence + | "(" "FV" Term* ")" -- free variant list + | "(" "P" Term Term ")" -- access to index (projection or selection) + | "(" "W" String Term ")" -- token prefix with suffix list + | "(" "A" Integer ")" -- pointer to subtree + | String -- token (in double quotes) + | Integer -- index in array + | CId -- macro constant + | "?" -- metavariable +``` + + +==GFCC interpreter== + +The first phase in interpreting GFCC is to parse a GFCC file and +build an internal abstract syntax representation, as specified +in the previous section. + +With this representation, linearization can be performed by +a straightforward function from expressions (``Exp``) to terms +(``Term``). All expressions except groups of pattern equations +can be linearized. + +Here is a reference Haskell implementation of linearization: +``` + linExp :: GFCC -> CId -> Exp -> Term + linExp gfcc lang tree@(DTr _ at trees) = case at of + AC fun -> comp (map lin trees) $ look fun + AS s -> R [K (show s)] -- quoted + AI i -> R [K (show i)] + AF d -> R [K (show d)] + AM -> TM + where + lin = linExp gfcc lang + comp = compute gfcc lang + look = lookLin gfcc lang +``` +TODO: bindings must be supported. + +Terms resulting from linearization are evaluated in +call-by-value order, with two environments needed: +- the grammar (a concrete syntax) to give the global constants +- an array of terms to give the subtree linearizations + + +The Haskell implementation works as follows: +``` +compute :: GFCC -> CId -> [Term] -> Term -> Term +compute gfcc lang args = comp where + comp trm = case trm of + P r p -> proj (comp r) (comp p) + W s t -> W s (comp t) + R ts -> R $ map comp ts + V i -> idx args (fromInteger i) -- already computed + F c -> comp $ look c -- not computed (if contains V) + FV ts -> FV $ Prelude.map comp ts + S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts + _ -> trm + + look = lookOper gfcc lang + + idx xs i = xs !! i + + proj r p = case (r,p) of + (_, FV ts) -> FV $ Prelude.map (proj r) ts + (FV ts, _ ) -> FV $ Prelude.map (\t -> proj t p) ts + (W s t, _) -> kks (s ++ getString (proj t p)) + _ -> comp $ getField r (getIndex p) + + getString t = case t of + K (KS s) -> s + _ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR" + + getIndex t = case t of + C i -> fromInteger i + RP p _ -> getIndex p + TM -> 0 -- default value for parameter + _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0 + + getField t i = case t of + R rs -> idx rs i + RP _ r -> getField r i + TM -> TM + _ -> trace ("ERROR in grammar compiler: field from " ++ show t) t +``` +The result of linearization is usually a record, which is realized as +a string using the following algorithm. +``` + realize :: Term -> String + realize trm = case trm of + R (t:_) -> realize t + S ss -> unwords $ map realize ss + K s -> s + W s t -> s ++ realize t + FV (t:_) -> realize t -- TODO: all variants + TM -> "?" +``` +Notice that realization always picks the first field of a record. +If a linearization type has more than one field, the first field +does not necessarily contain the desired string. +Also notice that the order of record fields in GFCC is not necessarily +the same as in GF source. -- cgit v1.2.3