summaryrefslogtreecommitdiff
path: root/src-3.0/GF/Embed
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
committeraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
commit055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch)
tree0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/Embed
parent915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff)
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/Embed')
-rw-r--r--src-3.0/GF/Embed/EmbedAPI.hs114
-rw-r--r--src-3.0/GF/Embed/EmbedCustom.hs113
-rw-r--r--src-3.0/GF/Embed/EmbedParsing.hs65
-rw-r--r--src-3.0/GF/Embed/TemplateApp.hs44
4 files changed, 336 insertions, 0 deletions
diff --git a/src-3.0/GF/Embed/EmbedAPI.hs b/src-3.0/GF/Embed/EmbedAPI.hs
new file mode 100644
index 000000000..43e4f2546
--- /dev/null
+++ b/src-3.0/GF/Embed/EmbedAPI.hs
@@ -0,0 +1,114 @@
+----------------------------------------------------------------------
+-- |
+-- Module : EmbedAPI
+-- Maintainer : Aarne Ranta
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date:
+-- > CVS $Author:
+-- > CVS $Revision:
+--
+-- Reduced Application Programmer's Interface to GF, meant for
+-- embedded GF systems. AR 10/5/2005
+-----------------------------------------------------------------------------
+
+module GF.Embed.EmbedAPI where
+
+import GF.Compile.ShellState (ShellState,grammar2shellState,canModules,stateGrammarOfLang,abstract,grammar,firstStateGrammar,allLanguages,allCategories,stateOptions,firstAbsCat)
+import GF.UseGrammar.Linear (linTree2string)
+import GF.UseGrammar.GetTree (string2tree)
+import GF.Embed.EmbedParsing (parseString)
+import GF.Canon.CMacros (noMark)
+import GF.Grammar.Grammar (Trm)
+import GF.Grammar.MMacros (exp2tree)
+import GF.Grammar.Macros (zIdent)
+import GF.Grammar.PrGrammar (prt_)
+import GF.Grammar.Values (tree2exp)
+import GF.Grammar.TypeCheck (annotate)
+import GF.Canon.GetGFC (getCanonGrammar)
+import GF.Infra.Modules (emptyMGrammar)
+import GF.CF.CFIdent (string2CFCat)
+import GF.Infra.UseIO
+import GF.Data.Operations
+import GF.Infra.Option (noOptions,useUntokenizer,options,iOpt)
+import GF.Infra.Ident (prIdent)
+import GF.Embed.EmbedCustom
+
+-- This API is meant to be used when embedding GF grammars in Haskell
+-- programs. The embedded system is supposed to use the
+-- .gfcm grammar format, which is first produced by the gf program.
+
+---------------------------------------------------
+-- Interface
+---------------------------------------------------
+
+type MultiGrammar = ShellState
+type Language = String
+type Category = String
+type Tree = Trm
+
+file2grammar :: FilePath -> IO MultiGrammar
+
+linearize :: MultiGrammar -> Language -> Tree -> String
+parse :: MultiGrammar -> Language -> Category -> String -> [Tree]
+
+linearizeAll :: MultiGrammar -> Tree -> [String]
+linearizeAllLang :: MultiGrammar -> Tree -> [(Language,String)]
+
+parseAll :: MultiGrammar -> Category -> String -> [[Tree]]
+parseAllLang :: MultiGrammar -> Category -> String -> [(Language,[Tree])]
+
+readTree :: MultiGrammar -> String -> Tree
+showTree :: Tree -> String
+
+languages :: MultiGrammar -> [Language]
+categories :: MultiGrammar -> [Category]
+
+startCat :: MultiGrammar -> Category
+
+---------------------------------------------------
+-- Implementation
+---------------------------------------------------
+
+file2grammar file = do
+ can <- useIOE (error "cannot parse grammar file") $ getCanonGrammar file
+ return $ errVal (error "cannot build multigrammar") $
+ grammar2shellState (options [iOpt "docf"]) (can,emptyMGrammar)
+
+linearize mgr lang =
+ untok .
+ linTree2string noMark (canModules mgr) (zIdent lang) .
+ errVal (error "illegal tree") .
+ annotate gr
+ where
+ gr = grammar sgr
+ sgr = stateGrammarOfLang mgr (zIdent lang)
+ untok = customOrDefault (stateOptions sgr) useUntokenizer customUntokenizer sgr
+
+parse mgr lang cat =
+ map tree2exp .
+ errVal [] .
+ parseString (stateOptions sgr) sgr cfcat
+ where
+ sgr = stateGrammarOfLang mgr (zIdent lang)
+ cfcat = string2CFCat abs cat
+ abs = maybe (error "no abstract syntax") prIdent $ abstract mgr
+
+linearizeAll mgr = map snd . linearizeAllLang mgr
+linearizeAllLang mgr t = [(lang,linearize mgr lang t) | lang <- languages mgr]
+
+parseAll mgr cat = map snd . parseAllLang mgr cat
+
+parseAllLang mgr cat s =
+ [(lang,ts) | lang <- languages mgr, let ts = parse mgr lang cat s, not (null ts)]
+
+readTree mgr s = tree2exp $ string2tree (firstStateGrammar mgr) s
+
+showTree t = prt_ t
+
+languages mgr = [prt_ l | l <- allLanguages mgr]
+
+categories mgr = [prt_ c | (_,c) <- allCategories mgr]
+
+startCat = prt_ . snd . firstAbsCat noOptions . firstStateGrammar
diff --git a/src-3.0/GF/Embed/EmbedCustom.hs b/src-3.0/GF/Embed/EmbedCustom.hs
new file mode 100644
index 000000000..f315441c5
--- /dev/null
+++ b/src-3.0/GF/Embed/EmbedCustom.hs
@@ -0,0 +1,113 @@
+----------------------------------------------------------------------
+-- |
+-- Module : EmbedCustom
+-- Maintainer : AR
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date:
+-- > CVS $Author:
+-- > CVS $Revision:
+--
+-- A database for customizable lexers and unlexers. Reduced version of
+-- GF.API, intended for embedded GF grammars.
+
+-----------------------------------------------------------------------------
+
+module GF.Embed.EmbedCustom where
+
+import GF.Data.Operations
+import GF.Text.Text
+import GF.UseGrammar.Tokenize
+import GF.UseGrammar.Morphology
+import GF.Infra.Option
+import GF.CF.CFIdent
+import GF.Compile.ShellState
+import Data.Char
+
+-- | useTokenizer, \"-lexer=x\"
+customTokenizer :: CustomData (StateGrammar -> String -> [CFTok])
+
+-- | useUntokenizer, \"-unlexer=x\" --- should be from token list to string
+customUntokenizer :: CustomData (StateGrammar -> String -> String)
+
+-- | this is the way of selecting an item
+customOrDefault :: Options -> OptFun -> CustomData a -> a
+customOrDefault opts optfun db = maybe (defaultCustomVal db) id $
+ customAsOptVal opts optfun db
+
+-- | to produce menus of custom operations
+customInfo :: CustomData a -> (String, [String])
+customInfo c = (titleCustomData c, map (ciStr . fst) (dbCustomData c))
+
+type CommandId = String
+
+strCI :: String -> CommandId
+strCI = id
+
+ciStr :: CommandId -> String
+ciStr = id
+
+ciOpt :: CommandId -> Option
+ciOpt = iOpt
+
+newtype CustomData a = CustomData (String, [(CommandId,a)])
+
+customData :: String -> [(CommandId, a)] -> CustomData a
+customData title db = CustomData (title,db)
+
+dbCustomData :: CustomData a -> [(CommandId, a)]
+dbCustomData (CustomData (_,db)) = db
+
+titleCustomData :: CustomData a -> String
+titleCustomData (CustomData (t,_)) = t
+
+lookupCustom :: CustomData a -> CommandId -> Maybe a
+lookupCustom = flip lookup . dbCustomData
+
+customAsOptVal :: Options -> OptFun -> CustomData a -> Maybe a
+customAsOptVal opts optfun db = do
+ arg <- getOptVal opts optfun
+ lookupCustom db (strCI arg)
+
+-- | take the first entry from the database
+defaultCustomVal :: CustomData a -> a
+defaultCustomVal (CustomData (s,db)) =
+ ifNull (error ("empty database:" +++ s)) (snd . head) db
+
+customTokenizer =
+ customData "Tokenizers, selected by option -lexer=x" $
+ [
+ (strCI "words", const $ tokWords)
+ ,(strCI "literals", const $ tokLits)
+ ,(strCI "vars", const $ tokVars)
+ ,(strCI "chars", const $ map (tS . singleton))
+ ,(strCI "code", const $ lexHaskell)
+ ,(strCI "codevars", lexHaskellVar . stateIsWord)
+ ,(strCI "text", const $ lexText)
+ ,(strCI "unglue", \gr -> map tS . decomposeWords (stateMorpho gr))
+ ,(strCI "codelit", lexHaskellLiteral . stateIsWord)
+ ,(strCI "textlit", lexTextLiteral . stateIsWord)
+ ,(strCI "codeC", const $ lexC2M)
+ ,(strCI "codeCHigh", const $ lexC2M' True)
+-- add your own tokenizers here
+ ]
+
+customUntokenizer =
+ customData "Untokenizers, selected by option -unlexer=x" $
+ [
+ (strCI "unwords", const $ id) -- DEFAULT
+ ,(strCI "text", const $ formatAsText)
+ ,(strCI "html", const $ formatAsHTML)
+ ,(strCI "latex", const $ formatAsLatex)
+ ,(strCI "code", const $ formatAsCode)
+ ,(strCI "concat", const $ filter (not . isSpace))
+ ,(strCI "textlit", const $ formatAsTextLit)
+ ,(strCI "codelit", const $ formatAsCodeLit)
+ ,(strCI "concat", const $ concatRemSpace)
+ ,(strCI "glue", const $ performBinds)
+ ,(strCI "reverse", const $ reverse)
+ ,(strCI "bind", const $ performBinds) -- backward compat
+-- add your own untokenizers here
+ ]
+
diff --git a/src-3.0/GF/Embed/EmbedParsing.hs b/src-3.0/GF/Embed/EmbedParsing.hs
new file mode 100644
index 000000000..43909f355
--- /dev/null
+++ b/src-3.0/GF/Embed/EmbedParsing.hs
@@ -0,0 +1,65 @@
+----------------------------------------------------------------------
+-- |
+-- Module : EmbedParsing
+-- Maintainer : AR
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date:
+-- > CVS $Author:
+-- > CVS $Revision:
+--
+-- just one parse method, for use in embedded GF systems
+-----------------------------------------------------------------------------
+
+module GF.Embed.EmbedParsing where
+
+import GF.Infra.CheckM
+import qualified GF.Canon.AbsGFC as C
+import GF.Canon.GFC
+import GF.Canon.MkGFC (trExp) ----
+import GF.Canon.CMacros
+import GF.Grammar.MMacros (refreshMetas)
+import GF.UseGrammar.Linear
+import GF.Data.Str
+import GF.CF.CF
+import GF.CF.CFIdent
+import GF.Infra.Ident
+import GF.Grammar.TypeCheck
+import GF.Grammar.Values
+import GF.UseGrammar.Tokenize
+import GF.CF.Profile
+import GF.Infra.Option
+import GF.Compile.ShellState
+import GF.Embed.EmbedCustom
+import GF.CF.PPrCF (prCFTree)
+import qualified GF.Parsing.GFC as New
+
+
+-- import qualified GF.Parsing.GFC as New
+
+import GF.Data.Operations
+
+import Data.List (nub)
+import Control.Monad (liftM)
+
+-- AR 26/1/2000 -- 8/4 -- 28/1/2001 -- 9/12/2002
+
+parseString :: Options -> StateGrammar -> CFCat -> String -> Err [Tree]
+parseString os sg cat = liftM fst . parseStringMsg os sg cat
+
+parseStringMsg :: Options -> StateGrammar -> CFCat -> String -> Err ([Tree],String)
+parseStringMsg os sg cat s = do
+ (ts,(_,ss)) <- checkStart $ parseStringC os sg cat s
+ return (ts,unlines ss)
+
+parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree]
+parseStringC opts0 sg cat s = do
+ let opts = unionOptions opts0 $ stateOptions sg
+ algorithm = "f" -- default algorithm: FCFG
+ strategy = "bottomup"
+ tokenizer = customOrDefault opts useTokenizer customTokenizer sg
+ toks = tokenizer s
+ ts <- checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks
+ checkErr $ allChecks $ map (annotate (stateGrammarST sg) . refreshMetas []) ts
+
diff --git a/src-3.0/GF/Embed/TemplateApp.hs b/src-3.0/GF/Embed/TemplateApp.hs
new file mode 100644
index 000000000..f8722691f
--- /dev/null
+++ b/src-3.0/GF/Embed/TemplateApp.hs
@@ -0,0 +1,44 @@
+module Main where
+
+import GF.Embed.EmbedAPI
+import System
+
+-- Simple translation application built on EmbedAPI. AR 7/10/2005
+
+main :: IO ()
+main = do
+ file:_ <- getArgs
+ grammar <- file2grammar file
+ translate grammar
+
+translate :: MultiGrammar -> IO ()
+translate grammar = do
+ s <- getLine
+ if s == "quit" then return () else do
+ treat grammar s
+ translate grammar
+
+treat :: MultiGrammar -> String -> IO ()
+treat grammar s = putStrLn $ case comm of
+ ["lin"] -> unlines $ linearizeAll grammar $ readTree grammar rest
+ ["lin",lang] -> linearize grammar lang $ readTree grammar rest
+ ["parse",cat] -> unlines $ map showTree $ concat $ parseAll grammar cat rest
+ ["parse",lang,cat] -> unlines $ map showTree $ parse grammar lang cat rest
+ ["langs"] -> unwords $ languages grammar
+ ["cats"] -> unwords $ categories grammar
+ ["help"] -> helpMsg
+ _ -> "command not interpreted: " ++ s
+ where
+ (comm,rest) = (words c,drop 1 r) where
+ (c,r) = span (/=':') s
+
+helpMsg = unlines [
+ "lin : <Tree>",
+ "lin <Lang> : <Tree>",
+ "parse <Cat> : <String>",
+ "parse <Lang> <Cat> : <String>",
+ "langs",
+ "cats",
+ "help",
+ "quit"
+ ]