diff options
Diffstat (limited to 'src/PGF.hs')
| -rw-r--r-- | src/PGF.hs | 224 |
1 files changed, 224 insertions, 0 deletions
diff --git a/src/PGF.hs b/src/PGF.hs new file mode 100644 index 000000000..8add3d1d8 --- /dev/null +++ b/src/PGF.hs @@ -0,0 +1,224 @@ +------------------------------------------------- +-- | +-- Module : PGF +-- Maintainer : Aarne Ranta +-- Stability : stable +-- Portability : portable +-- +-- This module is an Application Programming Interface to +-- load and interpret grammars compiled in Portable Grammar Format (PGF). +-- The PGF format is produced as a final output from the GF compiler. +-- The API is meant to be used for embedding GF grammars in Haskell +-- programs. +------------------------------------------------- + +module PGF( + -- * PGF + PGF, + readPGF, + + -- * Identifiers + -- ** CId + CId, mkCId, prCId, + + -- ** Language + Language, languages, abstractName, + + -- ** Category + Category, categories, startCat, + + -- * Expressions + -- ** Tree + Tree(..), Literal(..), + showTree, readTree, + + -- ** Expr + Expr(..), Equation(..), + showExpr, readExpr, + + -- * Operations + -- ** Linearization + linearize, linearizeAllLang, linearizeAll, + + -- ** Parsing + parse, parseAllLang, parseAll, + + -- ** Evaluation + tree2expr, expr2tree, + + -- ** Word Completion (Incremental Parsing) + Incremental.ParseState, + initState, Incremental.nextState, Incremental.getCompletions, extractExps, + + -- ** Generation + generateRandom, generateAll, generateAllDepth + ) where + +import PGF.CId +import PGF.Linearize +import PGF.Generate +import PGF.Macros +import PGF.Data +import PGF.Expr +import PGF.Raw.Convert +import PGF.Raw.Parse +import PGF.Raw.Print (printTree) +import PGF.Parsing.FCFG +import qualified PGF.Parsing.FCFG.Incremental as Incremental +import GF.Text.UTF8 + +import GF.Data.ErrM + +import qualified Data.Map as Map +import System.Random (newStdGen) + +--------------------------------------------------- +-- Interface +--------------------------------------------------- + +-- | This is just a string with the language name. +-- A language name is the identifier that you write in the +-- top concrete or abstract module in GF after the +-- concrete/abstract keyword. Example: +-- +-- > abstract Lang = ... +-- > concrete LangEng of Lang = ... +type Language = String + +-- | This is just a string with the category name. +-- The categories are defined in the abstract syntax +-- with the \'cat\' keyword. +type Category = String + +-- | Reads file in Portable Grammar Format and produces +-- 'PGF' structure. The file is usually produced with: +-- +-- > $ gfc --make <grammar file name> +readPGF :: FilePath -> IO PGF + +-- | Linearizes given expression as string in the language +linearize :: PGF -> Language -> Tree -> String + +-- | Tries to parse the given string in the specified language +-- and to produce abstract syntax expression. An empty +-- list is returned if the parsing is not successful. The list may also +-- contain more than one element if the grammar is ambiguous. +parse :: PGF -> Language -> Category -> String -> [Tree] + +-- | The same as 'linearizeAllLang' but does not return +-- the language. +linearizeAll :: PGF -> Tree -> [String] + +-- | Linearizes given expression as string in all languages +-- available in the grammar. +linearizeAllLang :: PGF -> Tree -> [(Language,String)] + +-- | The same as 'parseAllLang' but does not return +-- the language. +parseAll :: PGF -> Category -> String -> [[Tree]] + +-- | Tries to parse the given string with every language +-- available in the grammar and to produce abstract syntax +-- expression. The returned list contains pairs of language +-- and list of possible expressions. Only those languages +-- for which at least one parsing is possible are listed. +-- More than one abstract syntax expressions are possible +-- if the grammar is ambiguous. +parseAllLang :: PGF -> Category -> String -> [(Language,[Tree])] + +-- | Creates an initial parsing state for a given language and +-- startup category. +initState :: PGF -> Language -> Category -> Incremental.ParseState + +-- | This function extracts the list of all completed parse trees +-- that spans the whole input consumed so far. The trees are also +-- limited by the category specified, which is usually +-- the same as the startup category. +extractExps :: Incremental.ParseState -> Category -> [Tree] + +-- | The same as 'generateAllDepth' but does not limit +-- the depth in the generation. +generateAll :: PGF -> Category -> [Tree] + +-- | Generates an infinite list of random abstract syntax expressions. +-- This is usefull for tree bank generation which after that can be used +-- for grammar testing. +generateRandom :: PGF -> Category -> IO [Tree] + +-- | Generates an exhaustive possibly infinite list of +-- abstract syntax expressions. A depth can be specified +-- to limit the search space. +generateAllDepth :: PGF -> Category -> Maybe Int -> [Tree] + +-- | List of all languages available in the given grammar. +languages :: PGF -> [Language] + +-- | The abstract language name is the name of the top-level +-- abstract module +abstractName :: PGF -> Language + +-- | List of all categories defined in the given grammar. +categories :: PGF -> [Category] + +-- | The start category is defined in the grammar with +-- the \'startcat\' flag. This is usually the sentence category +-- but it is not necessary. Despite that there is a start category +-- defined you can parse with any category. The start category +-- definition is just for convenience. +startCat :: PGF -> Category + +--------------------------------------------------- +-- Implementation +--------------------------------------------------- + +readPGF f = do + s <- readFile f + g <- parseGrammar s + return $! toPGF g + +linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf (mkCId lang) + +parse pgf lang cat s = + case Map.lookup (mkCId lang) (concretes pgf) of + Just cnc -> case parser cnc of + Just pinfo -> if Map.lookup (mkCId "erasing") (cflags cnc) == Just "on" + then Incremental.parse pinfo (mkCId cat) (words s) + else case parseFCFG "bottomup" pinfo (mkCId cat) (words s) of + Ok x -> x + Bad s -> error s + Nothing -> error ("No parser built fo language: " ++ lang) + Nothing -> error ("Unknown language: " ++ lang) + +linearizeAll mgr = map snd . linearizeAllLang mgr +linearizeAllLang mgr t = + [(lang,PGF.linearize mgr lang t) | lang <- languages mgr] + +parseAll mgr cat = map snd . parseAllLang mgr cat + +parseAllLang mgr cat s = + [(lang,ts) | lang <- languages mgr, let ts = parse mgr lang cat s, not (null ts)] + +initState pgf lang cat = + case lookParser pgf langCId of + Just pinfo -> Incremental.initState pinfo catCId + _ -> error ("Unknown language: " ++ lang) + where + langCId = mkCId lang + catCId = mkCId cat + +extractExps state cat = Incremental.extractExps state (mkCId cat) + +generateRandom pgf cat = do + gen <- newStdGen + return $ genRandom gen pgf (mkCId cat) + +generateAll pgf cat = generate pgf (mkCId cat) Nothing +generateAllDepth pgf cat = generate pgf (mkCId cat) + +abstractName pgf = prCId (absname pgf) + +languages pgf = [prCId l | l <- cncnames pgf] + +categories pgf = [prCId c | c <- Map.keys (cats (abstract pgf))] + +startCat pgf = lookStartCat pgf |
