summaryrefslogtreecommitdiff
path: root/src/runtime/haskell/PGF.hs
diff options
context:
space:
mode:
authorkrasimir <krasimir@chalmers.se>2009-12-13 18:50:29 +0000
committerkrasimir <krasimir@chalmers.se>2009-12-13 18:50:29 +0000
commitf85232947e74ee7ef8c7b0ad2338212e7e68f1be (patch)
tree667b886a5e3a4b026a63d4e3597f32497d824761 /src/runtime/haskell/PGF.hs
parentd88a865faff59c98fc91556ff8700b10ee5f2df8 (diff)
reorganize the directories under src, and rescue the JavaScript interpreter from deprecated
Diffstat (limited to 'src/runtime/haskell/PGF.hs')
-rw-r--r--src/runtime/haskell/PGF.hs352
1 files changed, 352 insertions, 0 deletions
diff --git a/src/runtime/haskell/PGF.hs b/src/runtime/haskell/PGF.hs
new file mode 100644
index 000000000..6c192095d
--- /dev/null
+++ b/src/runtime/haskell/PGF.hs
@@ -0,0 +1,352 @@
+-------------------------------------------------
+-- |
+-- Module : PGF
+-- Maintainer : Aarne Ranta
+-- Stability : stable
+-- Portability : portable
+--
+-- This module is an Application Programming Interface to
+-- load and interpret grammars compiled in Portable Grammar Format (PGF).
+-- The PGF format is produced as a final output from the GF compiler.
+-- The API is meant to be used for embedding GF grammars in Haskell
+-- programs
+-------------------------------------------------
+
+module PGF(
+ -- * PGF
+ PGF,
+ readPGF,
+
+ -- * Identifiers
+ CId, mkCId, wildCId,
+ showCId, readCId,
+
+ -- * Languages
+ Language,
+ showLanguage, readLanguage,
+ languages, abstractName, languageCode,
+
+ -- * Types
+ Type, Hypo,
+ showType, readType,
+ mkType, mkHypo, mkDepHypo, mkImplHypo,
+ categories, startCat,
+
+ -- * Functions
+ functions, functionType,
+
+ -- * Expressions & Trees
+ -- ** Tree
+ Tree,
+
+ -- ** Expr
+ Expr,
+ showExpr, readExpr,
+ mkApp, unApp,
+ mkStr, unStr,
+ mkInt, unInt,
+ mkDouble, unDouble,
+ mkMeta, isMeta,
+
+ -- * Operations
+ -- ** Linearization
+ linearize, linearizeAllLang, linearizeAll,
+ showPrintName,
+
+ -- ** Parsing
+ parse, parseWithRecovery, canParse, parseAllLang, parseAll,
+
+ -- ** Evaluation
+ PGF.compute, paraphrase,
+
+ -- ** Type Checking
+ -- | The type checker in PGF does both type checking and renaming
+ -- i.e. it verifies that all identifiers are declared and it
+ -- distinguishes between global function or type indentifiers and
+ -- variable names. The type checker should always be applied on
+ -- expressions entered by the user i.e. those produced via functions
+ -- like 'readType' and 'readExpr' because otherwise unexpected results
+ -- could appear. All typechecking functions returns updated versions
+ -- of the input types or expressions because the typechecking could
+ -- also lead to metavariables instantiations.
+ checkType, checkExpr, inferExpr,
+ TcError(..), ppTcError,
+
+ -- ** Word Completion (Incremental Parsing)
+ complete,
+ Incremental.ParseState,
+ Incremental.initState, Incremental.nextState, Incremental.getCompletions, Incremental.recoveryStates, Incremental.extractTrees,
+
+ -- ** Generation
+ generateRandom, generateAll, generateAllDepth,
+
+ -- ** Morphological Analysis
+ Lemma, Analysis, Morpho,
+ lookupMorpho, buildMorpho,
+
+ -- ** Visualizations
+ graphvizAbstractTree,
+ graphvizParseTree,
+ graphvizDependencyTree,
+ graphvizAlignment,
+
+ -- * Browsing
+ browse
+ ) where
+
+import PGF.CId
+import PGF.Linearize
+import PGF.Generate
+import PGF.TypeCheck
+import PGF.Paraphrase
+import PGF.VisualizeTree
+import PGF.Macros
+import PGF.Expr (Tree)
+import PGF.Morphology
+import PGF.Data hiding (functions)
+import PGF.Binary
+import qualified PGF.Parsing.FCFG.Active as Active
+import qualified PGF.Parsing.FCFG.Incremental as Incremental
+import qualified GF.Compile.GeneratePMCFG as PMCFG
+
+import GF.Infra.Option
+import GF.Data.Utilities (replace)
+
+import Data.Char
+import qualified Data.Map as Map
+import qualified Data.IntMap as IntMap
+import Data.Maybe
+import Data.Binary
+import Data.List(mapAccumL)
+import System.Random (newStdGen)
+import Control.Monad
+import Text.PrettyPrint
+
+---------------------------------------------------
+-- Interface
+---------------------------------------------------
+
+-- | Reads file in Portable Grammar Format and produces
+-- 'PGF' structure. The file is usually produced with:
+--
+-- > $ gf -make <grammar file name>
+readPGF :: FilePath -> IO PGF
+
+-- | Linearizes given expression as string in the language
+linearize :: PGF -> Language -> Tree -> String
+
+-- | Tries to parse the given string in the specified language
+-- and to produce abstract syntax expression. An empty
+-- list is returned if the parsing is not successful. The list may also
+-- contain more than one element if the grammar is ambiguous.
+-- Throws an exception if the given language cannot be used
+-- for parsing, see 'canParse'.
+parse :: PGF -> Language -> Type -> String -> [Tree]
+
+parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> [Tree]
+
+-- | Checks whether the given language can be used for parsing.
+canParse :: PGF -> Language -> Bool
+
+-- | The same as 'linearizeAllLang' but does not return
+-- the language.
+linearizeAll :: PGF -> Tree -> [String]
+
+-- | Linearizes given expression as string in all languages
+-- available in the grammar.
+linearizeAllLang :: PGF -> Tree -> [(Language,String)]
+
+-- | Show the printname of a type
+showPrintName :: PGF -> Language -> Type -> String
+
+-- | The same as 'parseAllLang' but does not return
+-- the language.
+parseAll :: PGF -> Type -> String -> [[Tree]]
+
+-- | Tries to parse the given string with all available languages.
+-- Languages which cannot be used for parsing (see 'canParse')
+-- are ignored.
+-- The returned list contains pairs of language
+-- and list of abstract syntax expressions
+-- (this is a list, since grammars can be ambiguous).
+-- Only those languages
+-- for which at least one parsing is possible are listed.
+parseAllLang :: PGF -> Type -> String -> [(Language,[Tree])]
+
+-- | The same as 'generateAllDepth' but does not limit
+-- the depth in the generation.
+generateAll :: PGF -> Type -> [Expr]
+
+-- | Generates an infinite list of random abstract syntax expressions.
+-- This is usefull for tree bank generation which after that can be used
+-- for grammar testing.
+generateRandom :: PGF -> Type -> IO [Expr]
+
+-- | Generates an exhaustive possibly infinite list of
+-- abstract syntax expressions. A depth can be specified
+-- to limit the search space.
+generateAllDepth :: PGF -> Type -> Maybe Int -> [Expr]
+
+-- | List of all languages available in the given grammar.
+languages :: PGF -> [Language]
+
+-- | Gets the RFC 4646 language tag
+-- of the language which the given concrete syntax implements,
+-- if this is listed in the source grammar.
+-- Example language tags include @\"en\"@ for English,
+-- and @\"en-UK\"@ for British English.
+languageCode :: PGF -> Language -> Maybe String
+
+-- | The abstract language name is the name of the top-level
+-- abstract module
+abstractName :: PGF -> Language
+
+-- | List of all categories defined in the given grammar.
+-- The categories are defined in the abstract syntax
+-- with the \'cat\' keyword.
+categories :: PGF -> [CId]
+
+-- | The start category is defined in the grammar with
+-- the \'startcat\' flag. This is usually the sentence category
+-- but it is not necessary. Despite that there is a start category
+-- defined you can parse with any category. The start category
+-- definition is just for convenience.
+startCat :: PGF -> Type
+
+-- | List of all functions defined in the abstract syntax
+functions :: PGF -> [CId]
+
+-- | The type of a given function
+functionType :: PGF -> CId -> Maybe Type
+
+-- | Complete the last word in the given string. If the input
+-- is empty or ends in whitespace, the last word is considred
+-- to be the empty string. This means that the completions
+-- will be all possible next words.
+complete :: PGF -> Language -> Type -> String
+ -> [String] -- ^ Possible completions,
+ -- including the given input.
+
+
+---------------------------------------------------
+-- Implementation
+---------------------------------------------------
+
+readPGF f = decodeFile f >>= addParsers
+
+-- Adds parsers for all concretes that don't have a parser and that have parser=ondemand.
+addParsers :: PGF -> IO PGF
+addParsers pgf = do cncs <- sequence [if wantsParser cnc then addParser lang cnc else return (lang,cnc)
+ | (lang,cnc) <- Map.toList (concretes pgf)]
+ return pgf { concretes = Map.fromList cncs }
+ where
+ wantsParser cnc = isNothing (parser cnc) && Map.lookup (mkCId "parser") (cflags cnc) == Just "ondemand"
+ addParser lang cnc = do pinfo <- PMCFG.convertConcrete noOptions (abstract pgf) lang cnc
+ return (lang,cnc { parser = Just pinfo })
+
+linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf lang
+
+parse pgf lang typ s =
+ case Map.lookup lang (concretes pgf) of
+ Just cnc -> case parser cnc of
+ Just pinfo -> if Map.lookup (mkCId "erasing") (cflags cnc) == Just "on"
+ then Incremental.parse pgf lang typ (words s)
+ else Active.parse "t" pinfo typ (words s)
+ Nothing -> error ("No parser built for language: " ++ showCId lang)
+ Nothing -> error ("Unknown language: " ++ showCId lang)
+
+parseWithRecovery pgf lang typ open_typs s = Incremental.parseWithRecovery pgf lang typ open_typs (words s)
+
+canParse pgf cnc = isJust (lookParser pgf cnc)
+
+linearizeAll mgr = map snd . linearizeAllLang mgr
+linearizeAllLang mgr t =
+ [(lang,PGF.linearize mgr lang t) | lang <- languages mgr]
+
+showPrintName pgf lang (DTyp _ c _) = realize $ lookPrintName pgf lang c
+
+parseAll mgr typ = map snd . parseAllLang mgr typ
+
+parseAllLang mgr typ s =
+ [(lang,ts) | lang <- languages mgr, canParse mgr lang, let ts = parse mgr lang typ s, not (null ts)]
+
+generateRandom pgf cat = do
+ gen <- newStdGen
+ return $ genRandom gen pgf cat
+
+generateAll pgf cat = generate pgf cat Nothing
+generateAllDepth pgf cat = generate pgf cat
+
+abstractName pgf = absname pgf
+
+languages pgf = cncnames pgf
+
+languageCode pgf lang =
+ fmap (replace '_' '-') $ lookConcrFlag pgf lang (mkCId "language")
+
+categories pgf = [c | (c,hs) <- Map.toList (cats (abstract pgf))]
+
+startCat pgf = DTyp [] (lookStartCat pgf) []
+
+functions pgf = Map.keys (funs (abstract pgf))
+
+functionType pgf fun =
+ case Map.lookup fun (funs (abstract pgf)) of
+ Just (ty,_,_) -> Just ty
+ Nothing -> Nothing
+
+complete pgf from typ input =
+ let (ws,prefix) = tokensAndPrefix input
+ state0 = Incremental.initState pgf from typ
+ in case loop state0 ws of
+ Nothing -> []
+ Just state ->
+ (if null prefix && not (null (Incremental.extractTrees state typ)) then [unwords ws ++ " "] else [])
+ ++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Incremental.getCompletions state prefix)]
+ where
+ tokensAndPrefix :: String -> ([String],String)
+ tokensAndPrefix s | not (null s) && isSpace (last s) = (ws, "")
+ | null ws = ([],"")
+ | otherwise = (init ws, last ws)
+ where ws = words s
+
+ loop ps [] = Just ps
+ loop ps (t:ts) = case Incremental.nextState ps t of
+ Left es -> Nothing
+ Right ps -> loop ps ts
+
+-- | Converts an expression to normal form
+compute :: PGF -> Expr -> Expr
+compute pgf = PGF.Data.normalForm (funs (abstract pgf)) 0 []
+
+browse :: PGF -> CId -> Maybe (String,[CId],[CId])
+browse pgf id = fmap (\def -> (def,producers,consumers)) definition
+ where
+ definition = case Map.lookup id (funs (abstract pgf)) of
+ Just (ty,_,eqs) -> Just $ render (text "fun" <+> ppCId id <+> colon <+> ppType 0 [] ty $$
+ if null eqs
+ then empty
+ else text "def" <+> vcat [let (scope,ds) = mapAccumL (ppPatt 9) [] patts
+ in ppCId id <+> hsep ds <+> char '=' <+> ppExpr 0 scope res | Equ patts res <- eqs])
+ Nothing -> case Map.lookup id (cats (abstract pgf)) of
+ Just hyps -> Just $ render (text "cat" <+> ppCId id <+> hsep (snd (mapAccumL ppHypo [] hyps)))
+ Nothing -> Nothing
+
+ (producers,consumers) = Map.foldWithKey accum ([],[]) (funs (abstract pgf))
+ where
+ accum f (ty,_,_) (plist,clist) =
+ let !plist' = if id `elem` ps then f : plist else plist
+ !clist' = if id `elem` cs then f : clist else clist
+ in (plist',clist')
+ where
+ (ps,cs) = tyIds ty
+
+ tyIds (DTyp hyps cat es) = (foldr expIds (cat:concat css) es,concat pss)
+ where
+ (pss,css) = unzip [tyIds ty | (_,_,ty) <- hyps]
+
+ expIds (EAbs _ _ e) ids = expIds e ids
+ expIds (EApp e1 e2) ids = expIds e1 (expIds e2 ids)
+ expIds (EFun id) ids = id : ids
+ expIds (ETyped e _) ids = expIds e ids
+ expIds _ ids = ids