diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
| commit | 055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch) | |
| tree | 0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/Embed | |
| parent | 915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff) | |
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/Embed')
| -rw-r--r-- | src-3.0/GF/Embed/EmbedAPI.hs | 114 | ||||
| -rw-r--r-- | src-3.0/GF/Embed/EmbedCustom.hs | 113 | ||||
| -rw-r--r-- | src-3.0/GF/Embed/EmbedParsing.hs | 65 | ||||
| -rw-r--r-- | src-3.0/GF/Embed/TemplateApp.hs | 44 |
4 files changed, 336 insertions, 0 deletions
diff --git a/src-3.0/GF/Embed/EmbedAPI.hs b/src-3.0/GF/Embed/EmbedAPI.hs new file mode 100644 index 000000000..43e4f2546 --- /dev/null +++ b/src-3.0/GF/Embed/EmbedAPI.hs @@ -0,0 +1,114 @@ +---------------------------------------------------------------------- +-- | +-- Module : EmbedAPI +-- Maintainer : Aarne Ranta +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: +-- > CVS $Author: +-- > CVS $Revision: +-- +-- Reduced Application Programmer's Interface to GF, meant for +-- embedded GF systems. AR 10/5/2005 +----------------------------------------------------------------------------- + +module GF.Embed.EmbedAPI where + +import GF.Compile.ShellState (ShellState,grammar2shellState,canModules,stateGrammarOfLang,abstract,grammar,firstStateGrammar,allLanguages,allCategories,stateOptions,firstAbsCat) +import GF.UseGrammar.Linear (linTree2string) +import GF.UseGrammar.GetTree (string2tree) +import GF.Embed.EmbedParsing (parseString) +import GF.Canon.CMacros (noMark) +import GF.Grammar.Grammar (Trm) +import GF.Grammar.MMacros (exp2tree) +import GF.Grammar.Macros (zIdent) +import GF.Grammar.PrGrammar (prt_) +import GF.Grammar.Values (tree2exp) +import GF.Grammar.TypeCheck (annotate) +import GF.Canon.GetGFC (getCanonGrammar) +import GF.Infra.Modules (emptyMGrammar) +import GF.CF.CFIdent (string2CFCat) +import GF.Infra.UseIO +import GF.Data.Operations +import GF.Infra.Option (noOptions,useUntokenizer,options,iOpt) +import GF.Infra.Ident (prIdent) +import GF.Embed.EmbedCustom + +-- This API is meant to be used when embedding GF grammars in Haskell +-- programs. The embedded system is supposed to use the +-- .gfcm grammar format, which is first produced by the gf program. + +--------------------------------------------------- +-- Interface +--------------------------------------------------- + +type MultiGrammar = ShellState +type Language = String +type Category = String +type Tree = Trm + +file2grammar :: FilePath -> IO MultiGrammar + +linearize :: MultiGrammar -> Language -> Tree -> String +parse :: MultiGrammar -> Language -> Category -> String -> [Tree] + +linearizeAll :: MultiGrammar -> Tree -> [String] +linearizeAllLang :: MultiGrammar -> Tree -> [(Language,String)] + +parseAll :: MultiGrammar -> Category -> String -> [[Tree]] +parseAllLang :: MultiGrammar -> Category -> String -> [(Language,[Tree])] + +readTree :: MultiGrammar -> String -> Tree +showTree :: Tree -> String + +languages :: MultiGrammar -> [Language] +categories :: MultiGrammar -> [Category] + +startCat :: MultiGrammar -> Category + +--------------------------------------------------- +-- Implementation +--------------------------------------------------- + +file2grammar file = do + can <- useIOE (error "cannot parse grammar file") $ getCanonGrammar file + return $ errVal (error "cannot build multigrammar") $ + grammar2shellState (options [iOpt "docf"]) (can,emptyMGrammar) + +linearize mgr lang = + untok . + linTree2string noMark (canModules mgr) (zIdent lang) . + errVal (error "illegal tree") . + annotate gr + where + gr = grammar sgr + sgr = stateGrammarOfLang mgr (zIdent lang) + untok = customOrDefault (stateOptions sgr) useUntokenizer customUntokenizer sgr + +parse mgr lang cat = + map tree2exp . + errVal [] . + parseString (stateOptions sgr) sgr cfcat + where + sgr = stateGrammarOfLang mgr (zIdent lang) + cfcat = string2CFCat abs cat + abs = maybe (error "no abstract syntax") prIdent $ abstract mgr + +linearizeAll mgr = map snd . linearizeAllLang mgr +linearizeAllLang mgr t = [(lang,linearize mgr lang t) | lang <- languages mgr] + +parseAll mgr cat = map snd . parseAllLang mgr cat + +parseAllLang mgr cat s = + [(lang,ts) | lang <- languages mgr, let ts = parse mgr lang cat s, not (null ts)] + +readTree mgr s = tree2exp $ string2tree (firstStateGrammar mgr) s + +showTree t = prt_ t + +languages mgr = [prt_ l | l <- allLanguages mgr] + +categories mgr = [prt_ c | (_,c) <- allCategories mgr] + +startCat = prt_ . snd . firstAbsCat noOptions . firstStateGrammar diff --git a/src-3.0/GF/Embed/EmbedCustom.hs b/src-3.0/GF/Embed/EmbedCustom.hs new file mode 100644 index 000000000..f315441c5 --- /dev/null +++ b/src-3.0/GF/Embed/EmbedCustom.hs @@ -0,0 +1,113 @@ +---------------------------------------------------------------------- +-- | +-- Module : EmbedCustom +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: +-- > CVS $Author: +-- > CVS $Revision: +-- +-- A database for customizable lexers and unlexers. Reduced version of +-- GF.API, intended for embedded GF grammars. + +----------------------------------------------------------------------------- + +module GF.Embed.EmbedCustom where + +import GF.Data.Operations +import GF.Text.Text +import GF.UseGrammar.Tokenize +import GF.UseGrammar.Morphology +import GF.Infra.Option +import GF.CF.CFIdent +import GF.Compile.ShellState +import Data.Char + +-- | useTokenizer, \"-lexer=x\" +customTokenizer :: CustomData (StateGrammar -> String -> [CFTok]) + +-- | useUntokenizer, \"-unlexer=x\" --- should be from token list to string +customUntokenizer :: CustomData (StateGrammar -> String -> String) + +-- | this is the way of selecting an item +customOrDefault :: Options -> OptFun -> CustomData a -> a +customOrDefault opts optfun db = maybe (defaultCustomVal db) id $ + customAsOptVal opts optfun db + +-- | to produce menus of custom operations +customInfo :: CustomData a -> (String, [String]) +customInfo c = (titleCustomData c, map (ciStr . fst) (dbCustomData c)) + +type CommandId = String + +strCI :: String -> CommandId +strCI = id + +ciStr :: CommandId -> String +ciStr = id + +ciOpt :: CommandId -> Option +ciOpt = iOpt + +newtype CustomData a = CustomData (String, [(CommandId,a)]) + +customData :: String -> [(CommandId, a)] -> CustomData a +customData title db = CustomData (title,db) + +dbCustomData :: CustomData a -> [(CommandId, a)] +dbCustomData (CustomData (_,db)) = db + +titleCustomData :: CustomData a -> String +titleCustomData (CustomData (t,_)) = t + +lookupCustom :: CustomData a -> CommandId -> Maybe a +lookupCustom = flip lookup . dbCustomData + +customAsOptVal :: Options -> OptFun -> CustomData a -> Maybe a +customAsOptVal opts optfun db = do + arg <- getOptVal opts optfun + lookupCustom db (strCI arg) + +-- | take the first entry from the database +defaultCustomVal :: CustomData a -> a +defaultCustomVal (CustomData (s,db)) = + ifNull (error ("empty database:" +++ s)) (snd . head) db + +customTokenizer = + customData "Tokenizers, selected by option -lexer=x" $ + [ + (strCI "words", const $ tokWords) + ,(strCI "literals", const $ tokLits) + ,(strCI "vars", const $ tokVars) + ,(strCI "chars", const $ map (tS . singleton)) + ,(strCI "code", const $ lexHaskell) + ,(strCI "codevars", lexHaskellVar . stateIsWord) + ,(strCI "text", const $ lexText) + ,(strCI "unglue", \gr -> map tS . decomposeWords (stateMorpho gr)) + ,(strCI "codelit", lexHaskellLiteral . stateIsWord) + ,(strCI "textlit", lexTextLiteral . stateIsWord) + ,(strCI "codeC", const $ lexC2M) + ,(strCI "codeCHigh", const $ lexC2M' True) +-- add your own tokenizers here + ] + +customUntokenizer = + customData "Untokenizers, selected by option -unlexer=x" $ + [ + (strCI "unwords", const $ id) -- DEFAULT + ,(strCI "text", const $ formatAsText) + ,(strCI "html", const $ formatAsHTML) + ,(strCI "latex", const $ formatAsLatex) + ,(strCI "code", const $ formatAsCode) + ,(strCI "concat", const $ filter (not . isSpace)) + ,(strCI "textlit", const $ formatAsTextLit) + ,(strCI "codelit", const $ formatAsCodeLit) + ,(strCI "concat", const $ concatRemSpace) + ,(strCI "glue", const $ performBinds) + ,(strCI "reverse", const $ reverse) + ,(strCI "bind", const $ performBinds) -- backward compat +-- add your own untokenizers here + ] + diff --git a/src-3.0/GF/Embed/EmbedParsing.hs b/src-3.0/GF/Embed/EmbedParsing.hs new file mode 100644 index 000000000..43909f355 --- /dev/null +++ b/src-3.0/GF/Embed/EmbedParsing.hs @@ -0,0 +1,65 @@ +---------------------------------------------------------------------- +-- | +-- Module : EmbedParsing +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: +-- > CVS $Author: +-- > CVS $Revision: +-- +-- just one parse method, for use in embedded GF systems +----------------------------------------------------------------------------- + +module GF.Embed.EmbedParsing where + +import GF.Infra.CheckM +import qualified GF.Canon.AbsGFC as C +import GF.Canon.GFC +import GF.Canon.MkGFC (trExp) ---- +import GF.Canon.CMacros +import GF.Grammar.MMacros (refreshMetas) +import GF.UseGrammar.Linear +import GF.Data.Str +import GF.CF.CF +import GF.CF.CFIdent +import GF.Infra.Ident +import GF.Grammar.TypeCheck +import GF.Grammar.Values +import GF.UseGrammar.Tokenize +import GF.CF.Profile +import GF.Infra.Option +import GF.Compile.ShellState +import GF.Embed.EmbedCustom +import GF.CF.PPrCF (prCFTree) +import qualified GF.Parsing.GFC as New + + +-- import qualified GF.Parsing.GFC as New + +import GF.Data.Operations + +import Data.List (nub) +import Control.Monad (liftM) + +-- AR 26/1/2000 -- 8/4 -- 28/1/2001 -- 9/12/2002 + +parseString :: Options -> StateGrammar -> CFCat -> String -> Err [Tree] +parseString os sg cat = liftM fst . parseStringMsg os sg cat + +parseStringMsg :: Options -> StateGrammar -> CFCat -> String -> Err ([Tree],String) +parseStringMsg os sg cat s = do + (ts,(_,ss)) <- checkStart $ parseStringC os sg cat s + return (ts,unlines ss) + +parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree] +parseStringC opts0 sg cat s = do + let opts = unionOptions opts0 $ stateOptions sg + algorithm = "f" -- default algorithm: FCFG + strategy = "bottomup" + tokenizer = customOrDefault opts useTokenizer customTokenizer sg + toks = tokenizer s + ts <- checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks + checkErr $ allChecks $ map (annotate (stateGrammarST sg) . refreshMetas []) ts + diff --git a/src-3.0/GF/Embed/TemplateApp.hs b/src-3.0/GF/Embed/TemplateApp.hs new file mode 100644 index 000000000..f8722691f --- /dev/null +++ b/src-3.0/GF/Embed/TemplateApp.hs @@ -0,0 +1,44 @@ +module Main where + +import GF.Embed.EmbedAPI +import System + +-- Simple translation application built on EmbedAPI. AR 7/10/2005 + +main :: IO () +main = do + file:_ <- getArgs + grammar <- file2grammar file + translate grammar + +translate :: MultiGrammar -> IO () +translate grammar = do + s <- getLine + if s == "quit" then return () else do + treat grammar s + translate grammar + +treat :: MultiGrammar -> String -> IO () +treat grammar s = putStrLn $ case comm of + ["lin"] -> unlines $ linearizeAll grammar $ readTree grammar rest + ["lin",lang] -> linearize grammar lang $ readTree grammar rest + ["parse",cat] -> unlines $ map showTree $ concat $ parseAll grammar cat rest + ["parse",lang,cat] -> unlines $ map showTree $ parse grammar lang cat rest + ["langs"] -> unwords $ languages grammar + ["cats"] -> unwords $ categories grammar + ["help"] -> helpMsg + _ -> "command not interpreted: " ++ s + where + (comm,rest) = (words c,drop 1 r) where + (c,r) = span (/=':') s + +helpMsg = unlines [ + "lin : <Tree>", + "lin <Lang> : <Tree>", + "parse <Cat> : <String>", + "parse <Lang> <Cat> : <String>", + "langs", + "cats", + "help", + "quit" + ] |
