diff options
| author | krasimir <krasimir@chalmers.se> | 2009-12-13 18:50:29 +0000 |
|---|---|---|
| committer | krasimir <krasimir@chalmers.se> | 2009-12-13 18:50:29 +0000 |
| commit | f85232947e74ee7ef8c7b0ad2338212e7e68f1be (patch) | |
| tree | 667b886a5e3a4b026a63d4e3597f32497d824761 /src/runtime/haskell/PGF.hs | |
| parent | d88a865faff59c98fc91556ff8700b10ee5f2df8 (diff) | |
reorganize the directories under src, and rescue the JavaScript interpreter from deprecated
Diffstat (limited to 'src/runtime/haskell/PGF.hs')
| -rw-r--r-- | src/runtime/haskell/PGF.hs | 352 |
1 files changed, 352 insertions, 0 deletions
diff --git a/src/runtime/haskell/PGF.hs b/src/runtime/haskell/PGF.hs new file mode 100644 index 000000000..6c192095d --- /dev/null +++ b/src/runtime/haskell/PGF.hs @@ -0,0 +1,352 @@ +------------------------------------------------- +-- | +-- Module : PGF +-- Maintainer : Aarne Ranta +-- Stability : stable +-- Portability : portable +-- +-- This module is an Application Programming Interface to +-- load and interpret grammars compiled in Portable Grammar Format (PGF). +-- The PGF format is produced as a final output from the GF compiler. +-- The API is meant to be used for embedding GF grammars in Haskell +-- programs +------------------------------------------------- + +module PGF( + -- * PGF + PGF, + readPGF, + + -- * Identifiers + CId, mkCId, wildCId, + showCId, readCId, + + -- * Languages + Language, + showLanguage, readLanguage, + languages, abstractName, languageCode, + + -- * Types + Type, Hypo, + showType, readType, + mkType, mkHypo, mkDepHypo, mkImplHypo, + categories, startCat, + + -- * Functions + functions, functionType, + + -- * Expressions & Trees + -- ** Tree + Tree, + + -- ** Expr + Expr, + showExpr, readExpr, + mkApp, unApp, + mkStr, unStr, + mkInt, unInt, + mkDouble, unDouble, + mkMeta, isMeta, + + -- * Operations + -- ** Linearization + linearize, linearizeAllLang, linearizeAll, + showPrintName, + + -- ** Parsing + parse, parseWithRecovery, canParse, parseAllLang, parseAll, + + -- ** Evaluation + PGF.compute, paraphrase, + + -- ** Type Checking + -- | The type checker in PGF does both type checking and renaming + -- i.e. it verifies that all identifiers are declared and it + -- distinguishes between global function or type indentifiers and + -- variable names. The type checker should always be applied on + -- expressions entered by the user i.e. those produced via functions + -- like 'readType' and 'readExpr' because otherwise unexpected results + -- could appear. All typechecking functions returns updated versions + -- of the input types or expressions because the typechecking could + -- also lead to metavariables instantiations. + checkType, checkExpr, inferExpr, + TcError(..), ppTcError, + + -- ** Word Completion (Incremental Parsing) + complete, + Incremental.ParseState, + Incremental.initState, Incremental.nextState, Incremental.getCompletions, Incremental.recoveryStates, Incremental.extractTrees, + + -- ** Generation + generateRandom, generateAll, generateAllDepth, + + -- ** Morphological Analysis + Lemma, Analysis, Morpho, + lookupMorpho, buildMorpho, + + -- ** Visualizations + graphvizAbstractTree, + graphvizParseTree, + graphvizDependencyTree, + graphvizAlignment, + + -- * Browsing + browse + ) where + +import PGF.CId +import PGF.Linearize +import PGF.Generate +import PGF.TypeCheck +import PGF.Paraphrase +import PGF.VisualizeTree +import PGF.Macros +import PGF.Expr (Tree) +import PGF.Morphology +import PGF.Data hiding (functions) +import PGF.Binary +import qualified PGF.Parsing.FCFG.Active as Active +import qualified PGF.Parsing.FCFG.Incremental as Incremental +import qualified GF.Compile.GeneratePMCFG as PMCFG + +import GF.Infra.Option +import GF.Data.Utilities (replace) + +import Data.Char +import qualified Data.Map as Map +import qualified Data.IntMap as IntMap +import Data.Maybe +import Data.Binary +import Data.List(mapAccumL) +import System.Random (newStdGen) +import Control.Monad +import Text.PrettyPrint + +--------------------------------------------------- +-- Interface +--------------------------------------------------- + +-- | Reads file in Portable Grammar Format and produces +-- 'PGF' structure. The file is usually produced with: +-- +-- > $ gf -make <grammar file name> +readPGF :: FilePath -> IO PGF + +-- | Linearizes given expression as string in the language +linearize :: PGF -> Language -> Tree -> String + +-- | Tries to parse the given string in the specified language +-- and to produce abstract syntax expression. An empty +-- list is returned if the parsing is not successful. The list may also +-- contain more than one element if the grammar is ambiguous. +-- Throws an exception if the given language cannot be used +-- for parsing, see 'canParse'. +parse :: PGF -> Language -> Type -> String -> [Tree] + +parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> [Tree] + +-- | Checks whether the given language can be used for parsing. +canParse :: PGF -> Language -> Bool + +-- | The same as 'linearizeAllLang' but does not return +-- the language. +linearizeAll :: PGF -> Tree -> [String] + +-- | Linearizes given expression as string in all languages +-- available in the grammar. +linearizeAllLang :: PGF -> Tree -> [(Language,String)] + +-- | Show the printname of a type +showPrintName :: PGF -> Language -> Type -> String + +-- | The same as 'parseAllLang' but does not return +-- the language. +parseAll :: PGF -> Type -> String -> [[Tree]] + +-- | Tries to parse the given string with all available languages. +-- Languages which cannot be used for parsing (see 'canParse') +-- are ignored. +-- The returned list contains pairs of language +-- and list of abstract syntax expressions +-- (this is a list, since grammars can be ambiguous). +-- Only those languages +-- for which at least one parsing is possible are listed. +parseAllLang :: PGF -> Type -> String -> [(Language,[Tree])] + +-- | The same as 'generateAllDepth' but does not limit +-- the depth in the generation. +generateAll :: PGF -> Type -> [Expr] + +-- | Generates an infinite list of random abstract syntax expressions. +-- This is usefull for tree bank generation which after that can be used +-- for grammar testing. +generateRandom :: PGF -> Type -> IO [Expr] + +-- | Generates an exhaustive possibly infinite list of +-- abstract syntax expressions. A depth can be specified +-- to limit the search space. +generateAllDepth :: PGF -> Type -> Maybe Int -> [Expr] + +-- | List of all languages available in the given grammar. +languages :: PGF -> [Language] + +-- | Gets the RFC 4646 language tag +-- of the language which the given concrete syntax implements, +-- if this is listed in the source grammar. +-- Example language tags include @\"en\"@ for English, +-- and @\"en-UK\"@ for British English. +languageCode :: PGF -> Language -> Maybe String + +-- | The abstract language name is the name of the top-level +-- abstract module +abstractName :: PGF -> Language + +-- | List of all categories defined in the given grammar. +-- The categories are defined in the abstract syntax +-- with the \'cat\' keyword. +categories :: PGF -> [CId] + +-- | The start category is defined in the grammar with +-- the \'startcat\' flag. This is usually the sentence category +-- but it is not necessary. Despite that there is a start category +-- defined you can parse with any category. The start category +-- definition is just for convenience. +startCat :: PGF -> Type + +-- | List of all functions defined in the abstract syntax +functions :: PGF -> [CId] + +-- | The type of a given function +functionType :: PGF -> CId -> Maybe Type + +-- | Complete the last word in the given string. If the input +-- is empty or ends in whitespace, the last word is considred +-- to be the empty string. This means that the completions +-- will be all possible next words. +complete :: PGF -> Language -> Type -> String + -> [String] -- ^ Possible completions, + -- including the given input. + + +--------------------------------------------------- +-- Implementation +--------------------------------------------------- + +readPGF f = decodeFile f >>= addParsers + +-- Adds parsers for all concretes that don't have a parser and that have parser=ondemand. +addParsers :: PGF -> IO PGF +addParsers pgf = do cncs <- sequence [if wantsParser cnc then addParser lang cnc else return (lang,cnc) + | (lang,cnc) <- Map.toList (concretes pgf)] + return pgf { concretes = Map.fromList cncs } + where + wantsParser cnc = isNothing (parser cnc) && Map.lookup (mkCId "parser") (cflags cnc) == Just "ondemand" + addParser lang cnc = do pinfo <- PMCFG.convertConcrete noOptions (abstract pgf) lang cnc + return (lang,cnc { parser = Just pinfo }) + +linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf lang + +parse pgf lang typ s = + case Map.lookup lang (concretes pgf) of + Just cnc -> case parser cnc of + Just pinfo -> if Map.lookup (mkCId "erasing") (cflags cnc) == Just "on" + then Incremental.parse pgf lang typ (words s) + else Active.parse "t" pinfo typ (words s) + Nothing -> error ("No parser built for language: " ++ showCId lang) + Nothing -> error ("Unknown language: " ++ showCId lang) + +parseWithRecovery pgf lang typ open_typs s = Incremental.parseWithRecovery pgf lang typ open_typs (words s) + +canParse pgf cnc = isJust (lookParser pgf cnc) + +linearizeAll mgr = map snd . linearizeAllLang mgr +linearizeAllLang mgr t = + [(lang,PGF.linearize mgr lang t) | lang <- languages mgr] + +showPrintName pgf lang (DTyp _ c _) = realize $ lookPrintName pgf lang c + +parseAll mgr typ = map snd . parseAllLang mgr typ + +parseAllLang mgr typ s = + [(lang,ts) | lang <- languages mgr, canParse mgr lang, let ts = parse mgr lang typ s, not (null ts)] + +generateRandom pgf cat = do + gen <- newStdGen + return $ genRandom gen pgf cat + +generateAll pgf cat = generate pgf cat Nothing +generateAllDepth pgf cat = generate pgf cat + +abstractName pgf = absname pgf + +languages pgf = cncnames pgf + +languageCode pgf lang = + fmap (replace '_' '-') $ lookConcrFlag pgf lang (mkCId "language") + +categories pgf = [c | (c,hs) <- Map.toList (cats (abstract pgf))] + +startCat pgf = DTyp [] (lookStartCat pgf) [] + +functions pgf = Map.keys (funs (abstract pgf)) + +functionType pgf fun = + case Map.lookup fun (funs (abstract pgf)) of + Just (ty,_,_) -> Just ty + Nothing -> Nothing + +complete pgf from typ input = + let (ws,prefix) = tokensAndPrefix input + state0 = Incremental.initState pgf from typ + in case loop state0 ws of + Nothing -> [] + Just state -> + (if null prefix && not (null (Incremental.extractTrees state typ)) then [unwords ws ++ " "] else []) + ++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Incremental.getCompletions state prefix)] + where + tokensAndPrefix :: String -> ([String],String) + tokensAndPrefix s | not (null s) && isSpace (last s) = (ws, "") + | null ws = ([],"") + | otherwise = (init ws, last ws) + where ws = words s + + loop ps [] = Just ps + loop ps (t:ts) = case Incremental.nextState ps t of + Left es -> Nothing + Right ps -> loop ps ts + +-- | Converts an expression to normal form +compute :: PGF -> Expr -> Expr +compute pgf = PGF.Data.normalForm (funs (abstract pgf)) 0 [] + +browse :: PGF -> CId -> Maybe (String,[CId],[CId]) +browse pgf id = fmap (\def -> (def,producers,consumers)) definition + where + definition = case Map.lookup id (funs (abstract pgf)) of + Just (ty,_,eqs) -> Just $ render (text "fun" <+> ppCId id <+> colon <+> ppType 0 [] ty $$ + if null eqs + then empty + else text "def" <+> vcat [let (scope,ds) = mapAccumL (ppPatt 9) [] patts + in ppCId id <+> hsep ds <+> char '=' <+> ppExpr 0 scope res | Equ patts res <- eqs]) + Nothing -> case Map.lookup id (cats (abstract pgf)) of + Just hyps -> Just $ render (text "cat" <+> ppCId id <+> hsep (snd (mapAccumL ppHypo [] hyps))) + Nothing -> Nothing + + (producers,consumers) = Map.foldWithKey accum ([],[]) (funs (abstract pgf)) + where + accum f (ty,_,_) (plist,clist) = + let !plist' = if id `elem` ps then f : plist else plist + !clist' = if id `elem` cs then f : clist else clist + in (plist',clist') + where + (ps,cs) = tyIds ty + + tyIds (DTyp hyps cat es) = (foldr expIds (cat:concat css) es,concat pss) + where + (pss,css) = unzip [tyIds ty | (_,_,ty) <- hyps] + + expIds (EAbs _ _ e) ids = expIds e ids + expIds (EApp e1 e2) ids = expIds e1 (expIds e2 ids) + expIds (EFun id) ids = id : ids + expIds (ETyped e _) ids = expIds e ids + expIds _ ids = ids |
