reorganize the directories under src, and rescue the JavaScript interpreter from deprecated

author: krasimir <krasimir@chalmers.se> 2009-12-13 18:50:29 +0000
committer: krasimir <krasimir@chalmers.se> 2009-12-13 18:50:29 +0000
commit: f85232947e74ee7ef8c7b0ad2338212e7e68f1be (patch)
tree: 667b886a5e3a4b026a63d4e3597f32497d824761 /src/runtime/haskell/PGF.hs
parent: d88a865faff59c98fc91556ff8700b10ee5f2df8 (diff)
1 files changed, 352 insertions, 0 deletions
diff --git a/src/runtime/haskell/PGF.hs b/src/runtime/haskell/PGF.hs
new file mode 100644
index 000000000..6c192095d
--- /dev/null
+++ b/src/runtime/haskell/PGF.hs
@@ -0,0 +1,352 @@
+-------------------------------------------------
+-- |
+-- Module      : PGF
+-- Maintainer  : Aarne Ranta
+-- Stability   : stable
+-- Portability : portable
+--
+-- This module is an Application Programming Interface to 
+-- load and interpret grammars compiled in Portable Grammar Format (PGF).
+-- The PGF format is produced as a final output from the GF compiler.
+-- The API is meant to be used for embedding GF grammars in Haskell 
+-- programs
+-------------------------------------------------
+
+module PGF(
+           -- * PGF
+           PGF,
+           readPGF,
+
+           -- * Identifiers
+           CId, mkCId, wildCId,
+           showCId, readCId,
+
+           -- * Languages
+           Language, 
+           showLanguage, readLanguage,
+           languages, abstractName, languageCode,
+
+           -- * Types
+           Type, Hypo,
+           showType, readType,
+           mkType, mkHypo, mkDepHypo, mkImplHypo,
+           categories, startCat,
+
+           -- * Functions
+           functions, functionType,
+
+           -- * Expressions & Trees
+           -- ** Tree
+           Tree,
+
+           -- ** Expr
+           Expr,
+           showExpr, readExpr,
+           mkApp,    unApp,
+           mkStr,    unStr,
+           mkInt,    unInt,
+           mkDouble, unDouble,
+           mkMeta,   isMeta,
+
+           -- * Operations
+           -- ** Linearization
+           linearize, linearizeAllLang, linearizeAll,
+           showPrintName,
+
+           -- ** Parsing
+           parse, parseWithRecovery, canParse, parseAllLang, parseAll,
+
+           -- ** Evaluation
+           PGF.compute, paraphrase,
+
+           -- ** Type Checking
+           -- | The type checker in PGF does both type checking and renaming
+           -- i.e. it verifies that all identifiers are declared and it
+           -- distinguishes between global function or type indentifiers and
+           -- variable names. The type checker should always be applied on
+           -- expressions entered by the user i.e. those produced via functions
+           -- like 'readType' and 'readExpr' because otherwise unexpected results
+           -- could appear. All typechecking functions returns updated versions
+           -- of the input types or expressions because the typechecking could
+           -- also lead to metavariables instantiations.
+           checkType, checkExpr, inferExpr,
+           TcError(..), ppTcError,
+
+           -- ** Word Completion (Incremental Parsing)
+           complete,
+           Incremental.ParseState,
+           Incremental.initState, Incremental.nextState, Incremental.getCompletions, Incremental.recoveryStates, Incremental.extractTrees,
+
+           -- ** Generation
+           generateRandom, generateAll, generateAllDepth,
+
+           -- ** Morphological Analysis
+           Lemma, Analysis, Morpho,
+           lookupMorpho, buildMorpho,
+
+           -- ** Visualizations
+           graphvizAbstractTree,
+           graphvizParseTree,
+           graphvizDependencyTree,
+           graphvizAlignment,
+
+           -- * Browsing
+           browse
+          ) where
+
+import PGF.CId
+import PGF.Linearize
+import PGF.Generate
+import PGF.TypeCheck
+import PGF.Paraphrase
+import PGF.VisualizeTree
+import PGF.Macros
+import PGF.Expr (Tree)
+import PGF.Morphology
+import PGF.Data hiding (functions)
+import PGF.Binary
+import qualified PGF.Parsing.FCFG.Active as Active
+import qualified PGF.Parsing.FCFG.Incremental as Incremental
+import qualified GF.Compile.GeneratePMCFG as PMCFG
+
+import GF.Infra.Option
+import GF.Data.Utilities (replace)
+
+import Data.Char
+import qualified Data.Map as Map
+import qualified Data.IntMap as IntMap
+import Data.Maybe
+import Data.Binary
+import Data.List(mapAccumL)
+import System.Random (newStdGen)
+import Control.Monad
+import Text.PrettyPrint
+
+---------------------------------------------------
+-- Interface
+---------------------------------------------------
+
+-- | Reads file in Portable Grammar Format and produces
+-- 'PGF' structure. The file is usually produced with:
+--
+-- > $ gf -make <grammar file name>
+readPGF  :: FilePath -> IO PGF
+
+-- | Linearizes given expression as string in the language
+linearize    :: PGF -> Language -> Tree -> String
+
+-- | Tries to parse the given string in the specified language
+-- and to produce abstract syntax expression. An empty
+-- list is returned if the parsing is not successful. The list may also
+-- contain more than one element if the grammar is ambiguous.
+-- Throws an exception if the given language cannot be used
+-- for parsing, see 'canParse'.
+parse        :: PGF -> Language -> Type -> String -> [Tree]
+
+parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> [Tree]
+
+-- | Checks whether the given language can be used for parsing.
+canParse     :: PGF -> Language -> Bool
+
+-- | The same as 'linearizeAllLang' but does not return
+-- the language.
+linearizeAll     :: PGF -> Tree -> [String]
+
+-- | Linearizes given expression as string in all languages
+-- available in the grammar.
+linearizeAllLang :: PGF -> Tree -> [(Language,String)]
+
+-- | Show the printname of a type
+showPrintName :: PGF -> Language -> Type -> String
+
+-- | The same as 'parseAllLang' but does not return
+-- the language.
+parseAll     :: PGF -> Type -> String -> [[Tree]]
+
+-- | Tries to parse the given string with all available languages.
+-- Languages which cannot be used for parsing (see 'canParse')
+-- are ignored.
+-- The returned list contains pairs of language
+-- and list of abstract syntax expressions 
+-- (this is a list, since grammars can be ambiguous). 
+-- Only those languages
+-- for which at least one parsing is possible are listed.
+parseAllLang :: PGF -> Type -> String -> [(Language,[Tree])]
+
+-- | The same as 'generateAllDepth' but does not limit
+-- the depth in the generation.
+generateAll      :: PGF -> Type -> [Expr]
+
+-- | Generates an infinite list of random abstract syntax expressions.
+-- This is usefull for tree bank generation which after that can be used
+-- for grammar testing.
+generateRandom   :: PGF -> Type -> IO [Expr]
+
+-- | Generates an exhaustive possibly infinite list of
+-- abstract syntax expressions. A depth can be specified
+-- to limit the search space.
+generateAllDepth :: PGF -> Type -> Maybe Int -> [Expr]
+
+-- | List of all languages available in the given grammar.
+languages    :: PGF -> [Language]
+
+-- | Gets the RFC 4646 language tag 
+-- of the language which the given concrete syntax implements,
+-- if this is listed in the source grammar.
+-- Example language tags include @\"en\"@ for English,
+-- and @\"en-UK\"@ for British English.
+languageCode :: PGF -> Language -> Maybe String
+
+-- | The abstract language name is the name of the top-level
+-- abstract module
+abstractName :: PGF -> Language
+
+-- | List of all categories defined in the given grammar.
+-- The categories are defined in the abstract syntax
+-- with the \'cat\' keyword.
+categories :: PGF -> [CId]
+
+-- | The start category is defined in the grammar with
+-- the \'startcat\' flag. This is usually the sentence category
+-- but it is not necessary. Despite that there is a start category
+-- defined you can parse with any category. The start category
+-- definition is just for convenience.
+startCat   :: PGF -> Type
+
+-- | List of all functions defined in the abstract syntax
+functions :: PGF -> [CId]
+
+-- | The type of a given function
+functionType :: PGF -> CId -> Maybe Type
+
+-- | Complete the last word in the given string. If the input
+-- is empty or ends in whitespace, the last word is considred
+-- to be the empty string. This means that the completions
+-- will be all possible next words.
+complete :: PGF -> Language -> Type -> String 
+         -> [String] -- ^ Possible completions, 
+                     -- including the given input.
+
+
+---------------------------------------------------
+-- Implementation
+---------------------------------------------------
+
+readPGF f = decodeFile f >>= addParsers
+
+-- Adds parsers for all concretes that don't have a parser and that have parser=ondemand.
+addParsers :: PGF -> IO PGF
+addParsers pgf = do cncs <- sequence [if wantsParser cnc then addParser lang cnc else return (lang,cnc)
+                                           | (lang,cnc) <- Map.toList (concretes pgf)]
+                    return pgf { concretes = Map.fromList cncs }
+    where
+      wantsParser cnc = isNothing (parser cnc) && Map.lookup (mkCId "parser") (cflags cnc) == Just "ondemand"
+      addParser lang cnc = do pinfo <- PMCFG.convertConcrete noOptions (abstract pgf) lang cnc
+                              return (lang,cnc { parser = Just pinfo })
+
+linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf lang
+
+parse pgf lang typ s = 
+  case Map.lookup lang (concretes pgf) of
+    Just cnc -> case parser cnc of
+                  Just pinfo -> if Map.lookup (mkCId "erasing") (cflags cnc) == Just "on"
+                                  then Incremental.parse pgf lang typ (words s)
+                                  else Active.parse "t"  pinfo typ (words s)
+                  Nothing    -> error ("No parser built for language: " ++ showCId lang)
+    Nothing  -> error ("Unknown language: " ++ showCId lang)
+
+parseWithRecovery pgf lang typ open_typs s = Incremental.parseWithRecovery pgf lang typ open_typs (words s)
+
+canParse pgf cnc = isJust (lookParser pgf cnc)
+
+linearizeAll mgr = map snd . linearizeAllLang mgr
+linearizeAllLang mgr t = 
+  [(lang,PGF.linearize mgr lang t) | lang <- languages mgr]
+
+showPrintName pgf lang (DTyp _ c _) = realize $ lookPrintName pgf lang c
+
+parseAll mgr typ = map snd . parseAllLang mgr typ
+
+parseAllLang mgr typ s = 
+  [(lang,ts) | lang <- languages mgr, canParse mgr lang, let ts = parse mgr lang typ s, not (null ts)]
+
+generateRandom pgf cat = do
+  gen <- newStdGen
+  return $ genRandom gen pgf cat
+
+generateAll pgf cat = generate pgf cat Nothing
+generateAllDepth pgf cat = generate pgf cat
+
+abstractName pgf = absname pgf
+
+languages pgf = cncnames pgf
+
+languageCode pgf lang = 
+    fmap (replace '_' '-') $ lookConcrFlag pgf lang (mkCId "language")
+
+categories pgf = [c | (c,hs) <- Map.toList (cats (abstract pgf))]
+
+startCat pgf = DTyp [] (lookStartCat pgf) []
+
+functions pgf = Map.keys (funs (abstract pgf))
+
+functionType pgf fun =
+  case Map.lookup fun (funs (abstract pgf)) of
+    Just (ty,_,_) -> Just ty
+    Nothing       -> Nothing
+
+complete pgf from typ input = 
+         let (ws,prefix) = tokensAndPrefix input
+             state0 = Incremental.initState pgf from typ
+         in case loop state0 ws of
+              Nothing -> []
+              Just state -> 
+                (if null prefix && not (null (Incremental.extractTrees state typ)) then [unwords ws ++ " "] else [])
+                ++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Incremental.getCompletions state prefix)]
+  where
+    tokensAndPrefix :: String -> ([String],String)
+    tokensAndPrefix s | not (null s) && isSpace (last s) = (ws, "")
+                      | null ws = ([],"")
+                      | otherwise = (init ws, last ws)
+        where ws = words s
+
+    loop ps []     = Just ps
+    loop ps (t:ts) = case Incremental.nextState ps t of
+                       Left  es -> Nothing
+                       Right ps -> loop ps ts
+
+-- | Converts an expression to normal form
+compute :: PGF -> Expr -> Expr
+compute pgf = PGF.Data.normalForm (funs (abstract pgf)) 0 []
+
+browse :: PGF -> CId -> Maybe (String,[CId],[CId])
+browse pgf id = fmap (\def -> (def,producers,consumers)) definition
+  where
+    definition = case Map.lookup id (funs (abstract pgf)) of
+                   Just (ty,_,eqs) -> Just $ render (text "fun" <+> ppCId id <+> colon <+> ppType 0 [] ty $$
+                                                     if null eqs
+                                                       then empty
+                                                       else text "def" <+> vcat [let (scope,ds) = mapAccumL (ppPatt 9) [] patts
+                                                                                 in ppCId id <+> hsep ds <+> char '=' <+> ppExpr 0 scope res | Equ patts res <- eqs])
+                   Nothing   -> case Map.lookup id (cats (abstract pgf)) of
+                                  Just hyps -> Just $ render (text "cat" <+> ppCId id <+> hsep (snd (mapAccumL ppHypo [] hyps)))
+                                  Nothing   -> Nothing
+
+    (producers,consumers) = Map.foldWithKey accum ([],[]) (funs (abstract pgf))
+      where
+        accum f (ty,_,_) (plist,clist) = 
+          let !plist' = if id `elem` ps then f : plist else plist
+              !clist' = if id `elem` cs then f : clist else clist
+          in (plist',clist')
+          where
+            (ps,cs) = tyIds ty
+
+    tyIds (DTyp hyps cat es) = (foldr expIds (cat:concat css) es,concat pss)
+      where
+        (pss,css) = unzip [tyIds ty | (_,_,ty) <- hyps]
+
+    expIds (EAbs _ _ e) ids = expIds e ids
+    expIds (EApp e1 e2) ids = expIds e1 (expIds e2 ids)
+    expIds (EFun id)    ids = id : ids
+    expIds (ETyped e _) ids = expIds e ids
+    expIds _            ids = ids
author	krasimir <krasimir@chalmers.se>	2009-12-13 18:50:29 +0000
committer	krasimir <krasimir@chalmers.se>	2009-12-13 18:50:29 +0000
commit	f85232947e74ee7ef8c7b0ad2338212e7e68f1be (patch)
tree	667b886a5e3a4b026a63d4e3597f32497d824761 /src/runtime/haskell/PGF.hs
parent	d88a865faff59c98fc91556ff8700b10ee5f2df8 (diff)