GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3

author: aarne <aarne@cs.chalmers.se> 2008-05-21 09:26:44 +0000
committer: aarne <aarne@cs.chalmers.se> 2008-05-21 09:26:44 +0000
commit: 055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch)
tree: 0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/Embed
parent: 915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff)
4 files changed, 336 insertions, 0 deletions
diff --git a/src-3.0/GF/Embed/EmbedAPI.hs b/src-3.0/GF/Embed/EmbedAPI.hs
new file mode 100644
index 000000000..43e4f2546
--- /dev/null
+++ b/src-3.0/GF/Embed/EmbedAPI.hs
@@ -0,0 +1,114 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : EmbedAPI
+-- Maintainer  : Aarne Ranta
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 
+-- > CVS $Author: 
+-- > CVS $Revision: 
+--
+-- Reduced Application Programmer's Interface to GF, meant for
+-- embedded GF systems. AR 10/5/2005
+-----------------------------------------------------------------------------
+
+module GF.Embed.EmbedAPI where
+
+import GF.Compile.ShellState (ShellState,grammar2shellState,canModules,stateGrammarOfLang,abstract,grammar,firstStateGrammar,allLanguages,allCategories,stateOptions,firstAbsCat)
+import GF.UseGrammar.Linear (linTree2string)
+import GF.UseGrammar.GetTree (string2tree)
+import GF.Embed.EmbedParsing (parseString)
+import GF.Canon.CMacros (noMark)
+import GF.Grammar.Grammar (Trm)
+import GF.Grammar.MMacros (exp2tree)
+import GF.Grammar.Macros (zIdent)
+import GF.Grammar.PrGrammar (prt_)
+import GF.Grammar.Values (tree2exp)
+import GF.Grammar.TypeCheck (annotate)
+import GF.Canon.GetGFC (getCanonGrammar)
+import GF.Infra.Modules (emptyMGrammar)
+import GF.CF.CFIdent (string2CFCat)
+import GF.Infra.UseIO
+import GF.Data.Operations
+import GF.Infra.Option (noOptions,useUntokenizer,options,iOpt)
+import GF.Infra.Ident (prIdent)
+import GF.Embed.EmbedCustom
+
+-- This API is meant to be used when embedding GF grammars in Haskell 
+-- programs. The embedded system is supposed to use the
+-- .gfcm grammar format, which is first produced by the gf program.
+
+---------------------------------------------------
+-- Interface
+---------------------------------------------------
+
+type MultiGrammar = ShellState
+type Language     = String
+type Category     = String
+type Tree         = Trm
+
+file2grammar :: FilePath -> IO MultiGrammar
+
+linearize    :: MultiGrammar -> Language -> Tree -> String
+parse        :: MultiGrammar -> Language -> Category -> String -> [Tree]
+
+linearizeAll     :: MultiGrammar -> Tree -> [String]
+linearizeAllLang :: MultiGrammar -> Tree -> [(Language,String)]
+
+parseAll     :: MultiGrammar -> Category -> String -> [[Tree]]
+parseAllLang :: MultiGrammar -> Category -> String -> [(Language,[Tree])]
+
+readTree     :: MultiGrammar -> String -> Tree
+showTree     ::                 Tree -> String
+
+languages    :: MultiGrammar -> [Language]
+categories   :: MultiGrammar -> [Category]
+
+startCat :: MultiGrammar -> Category
+
+---------------------------------------------------
+-- Implementation
+---------------------------------------------------
+
+file2grammar file = do
+  can <- useIOE (error "cannot parse grammar file") $ getCanonGrammar file
+  return $ errVal (error "cannot build multigrammar") $ 
+    grammar2shellState (options [iOpt "docf"]) (can,emptyMGrammar) 
+
+linearize mgr lang = 
+  untok .
+  linTree2string noMark (canModules mgr) (zIdent lang) . 
+  errVal (error "illegal tree") . 
+  annotate gr
+ where
+   gr    = grammar sgr
+   sgr   = stateGrammarOfLang mgr (zIdent lang)
+   untok = customOrDefault (stateOptions sgr) useUntokenizer customUntokenizer sgr
+
+parse mgr lang cat = 
+  map tree2exp . 
+  errVal [] . 
+  parseString (stateOptions sgr) sgr cfcat
+ where
+   sgr   = stateGrammarOfLang mgr (zIdent lang)
+   cfcat = string2CFCat abs cat
+   abs   = maybe (error "no abstract syntax") prIdent $ abstract mgr
+
+linearizeAll mgr = map snd . linearizeAllLang mgr
+linearizeAllLang mgr t = [(lang,linearize mgr lang t) | lang <- languages mgr]
+
+parseAll mgr cat = map snd . parseAllLang mgr cat
+
+parseAllLang mgr cat s = 
+  [(lang,ts) | lang <- languages mgr, let ts = parse mgr lang cat s, not (null ts)]
+
+readTree mgr s = tree2exp $ string2tree (firstStateGrammar mgr) s
+
+showTree t  = prt_ t
+
+languages mgr = [prt_ l | l <- allLanguages mgr]
+
+categories mgr = [prt_ c | (_,c) <- allCategories mgr]
+
+startCat = prt_ . snd . firstAbsCat noOptions . firstStateGrammar
diff --git a/src-3.0/GF/Embed/EmbedCustom.hs b/src-3.0/GF/Embed/EmbedCustom.hs
new file mode 100644
index 000000000..f315441c5
--- /dev/null
+++ b/src-3.0/GF/Embed/EmbedCustom.hs
@@ -0,0 +1,113 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : EmbedCustom
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 
+-- > CVS $Author: 
+-- > CVS $Revision: 
+--
+-- A database for customizable lexers and unlexers. Reduced version of
+-- GF.API, intended for embedded GF grammars.
+
+-----------------------------------------------------------------------------
+
+module GF.Embed.EmbedCustom where
+
+import GF.Data.Operations
+import GF.Text.Text
+import GF.UseGrammar.Tokenize
+import GF.UseGrammar.Morphology
+import GF.Infra.Option
+import GF.CF.CFIdent
+import GF.Compile.ShellState
+import Data.Char
+
+-- | useTokenizer, \"-lexer=x\"
+customTokenizer      :: CustomData (StateGrammar -> String -> [CFTok])  
+
+-- | useUntokenizer, \"-unlexer=x\" --- should be from token list to string
+customUntokenizer    :: CustomData (StateGrammar -> String -> String)  
+
+-- | this is the way of selecting an item
+customOrDefault :: Options -> OptFun -> CustomData a -> a
+customOrDefault opts optfun db = maybe (defaultCustomVal db) id $ 
+                                   customAsOptVal opts optfun db
+
+-- | to produce menus of custom operations
+customInfo :: CustomData a -> (String, [String])
+customInfo c = (titleCustomData c, map (ciStr . fst) (dbCustomData c))
+
+type CommandId = String
+
+strCI :: String -> CommandId
+strCI = id
+
+ciStr :: CommandId -> String
+ciStr = id
+
+ciOpt :: CommandId -> Option
+ciOpt = iOpt
+
+newtype CustomData a = CustomData (String, [(CommandId,a)])
+
+customData :: String -> [(CommandId, a)] -> CustomData a
+customData title db = CustomData (title,db)
+
+dbCustomData :: CustomData a -> [(CommandId, a)]
+dbCustomData (CustomData (_,db)) = db
+
+titleCustomData :: CustomData a -> String
+titleCustomData (CustomData (t,_)) = t
+
+lookupCustom :: CustomData a -> CommandId -> Maybe a
+lookupCustom = flip lookup . dbCustomData
+
+customAsOptVal :: Options -> OptFun -> CustomData a -> Maybe a
+customAsOptVal opts optfun db = do
+  arg <- getOptVal opts optfun
+  lookupCustom db (strCI arg)
+
+-- | take the first entry from the database
+defaultCustomVal :: CustomData a -> a
+defaultCustomVal (CustomData (s,db)) = 
+  ifNull (error ("empty database:" +++ s)) (snd . head) db
+
+customTokenizer = 
+  customData "Tokenizers, selected by option -lexer=x" $
+  [
+   (strCI "words",     const $ tokWords)
+  ,(strCI "literals",  const $ tokLits)
+  ,(strCI "vars",      const $ tokVars)
+  ,(strCI "chars",     const $ map (tS . singleton))
+  ,(strCI "code",      const $ lexHaskell)
+  ,(strCI "codevars",  lexHaskellVar . stateIsWord)
+  ,(strCI "text",      const $ lexText)
+  ,(strCI "unglue",    \gr -> map tS . decomposeWords (stateMorpho gr))
+  ,(strCI "codelit",   lexHaskellLiteral . stateIsWord)
+  ,(strCI "textlit",   lexTextLiteral . stateIsWord)
+  ,(strCI "codeC",     const $ lexC2M)
+  ,(strCI "codeCHigh", const $ lexC2M' True)
+-- add your own tokenizers here
+  ]
+
+customUntokenizer = 
+  customData "Untokenizers, selected by option -unlexer=x" $
+  [
+   (strCI "unwords",   const $ id)   -- DEFAULT
+  ,(strCI "text",      const $ formatAsText)
+  ,(strCI "html",      const $ formatAsHTML)
+  ,(strCI "latex",     const $ formatAsLatex)
+  ,(strCI "code",      const $ formatAsCode)
+  ,(strCI "concat",    const $ filter (not . isSpace))
+  ,(strCI "textlit",   const $ formatAsTextLit)
+  ,(strCI "codelit",   const $ formatAsCodeLit)
+  ,(strCI "concat",    const $ concatRemSpace)
+  ,(strCI "glue",      const $ performBinds)
+  ,(strCI "reverse",   const $ reverse)
+  ,(strCI "bind",      const $ performBinds) -- backward compat
+-- add your own untokenizers here
+  ]
+
diff --git a/src-3.0/GF/Embed/EmbedParsing.hs b/src-3.0/GF/Embed/EmbedParsing.hs
new file mode 100644
index 000000000..43909f355
--- /dev/null
+++ b/src-3.0/GF/Embed/EmbedParsing.hs
@@ -0,0 +1,65 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : EmbedParsing
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 
+-- > CVS $Author: 
+-- > CVS $Revision: 
+--
+-- just one parse method, for use in embedded GF systems 
+-----------------------------------------------------------------------------
+
+module GF.Embed.EmbedParsing where
+
+import GF.Infra.CheckM
+import qualified GF.Canon.AbsGFC as C
+import GF.Canon.GFC
+import GF.Canon.MkGFC (trExp) ----
+import GF.Canon.CMacros
+import GF.Grammar.MMacros (refreshMetas)
+import GF.UseGrammar.Linear
+import GF.Data.Str
+import GF.CF.CF
+import GF.CF.CFIdent
+import GF.Infra.Ident
+import GF.Grammar.TypeCheck
+import GF.Grammar.Values
+import GF.UseGrammar.Tokenize
+import GF.CF.Profile
+import GF.Infra.Option
+import GF.Compile.ShellState
+import GF.Embed.EmbedCustom
+import GF.CF.PPrCF (prCFTree)
+import qualified GF.Parsing.GFC as New
+
+
+-- import qualified GF.Parsing.GFC as New
+
+import GF.Data.Operations
+
+import Data.List (nub)
+import Control.Monad (liftM)
+
+-- AR 26/1/2000 -- 8/4 -- 28/1/2001 -- 9/12/2002
+
+parseString :: Options -> StateGrammar -> CFCat -> String -> Err [Tree]
+parseString os sg cat = liftM fst . parseStringMsg os sg cat
+
+parseStringMsg :: Options -> StateGrammar -> CFCat -> String -> Err ([Tree],String)
+parseStringMsg os sg cat s = do
+  (ts,(_,ss)) <- checkStart $ parseStringC os sg cat s
+  return (ts,unlines ss)
+
+parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree]
+parseStringC opts0 sg cat s = do
+  let opts      = unionOptions opts0 $ stateOptions sg
+      algorithm = "f" -- default algorithm: FCFG
+      strategy  = "bottomup" 
+      tokenizer = customOrDefault opts useTokenizer customTokenizer sg
+      toks = tokenizer s 
+  ts  <- checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks
+  checkErr $ allChecks $ map (annotate (stateGrammarST sg) . refreshMetas []) ts
+
diff --git a/src-3.0/GF/Embed/TemplateApp.hs b/src-3.0/GF/Embed/TemplateApp.hs
new file mode 100644
index 000000000..f8722691f
--- /dev/null
+++ b/src-3.0/GF/Embed/TemplateApp.hs
@@ -0,0 +1,44 @@
+module Main where
+
+import GF.Embed.EmbedAPI
+import System
+
+-- Simple translation application built on EmbedAPI. AR 7/10/2005
+
+main :: IO ()
+main = do
+  file:_  <- getArgs
+  grammar <- file2grammar file
+  translate grammar
+
+translate :: MultiGrammar -> IO ()
+translate grammar = do
+  s <- getLine
+  if s == "quit" then return () else do
+    treat grammar s
+    translate grammar
+
+treat :: MultiGrammar -> String -> IO ()
+treat grammar s = putStrLn $ case comm of
+  ["lin"]        -> unlines $ linearizeAll grammar $ readTree grammar rest 
+  ["lin",lang]   -> linearize grammar lang $ readTree grammar rest 
+  ["parse",cat]  -> unlines $ map showTree $ concat $ parseAll grammar cat rest
+  ["parse",lang,cat] -> unlines $ map showTree $ parse grammar lang cat rest
+  ["langs"]      -> unwords $ languages grammar
+  ["cats"]       -> unwords $ categories grammar
+  ["help"]       -> helpMsg
+  _ -> "command not interpreted: " ++ s
+ where
+   (comm,rest) = (words c,drop 1 r) where
+      (c,r) = span (/=':') s
+
+helpMsg = unlines [
+  "lin : <Tree>",
+  "lin <Lang> : <Tree>",
+  "parse <Cat> : <String>",
+  "parse <Lang> <Cat> : <String>",
+  "langs",
+  "cats",
+  "help",
+  "quit"
+  ]
author	aarne <aarne@cs.chalmers.se>	2008-05-21 09:26:44 +0000
committer	aarne <aarne@cs.chalmers.se>	2008-05-21 09:26:44 +0000
commit	055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch)
tree	0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/Embed
parent	915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff)