summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2007-09-05 14:05:42 +0000
committeraarne <aarne@cs.chalmers.se>2007-09-05 14:05:42 +0000
commitd2a5148708a3f270b058403e573106ed1d309957 (patch)
treed1676e0578644e96ba644250c5190f5e294bac06
parente2b9657f79d913dddb064037e79a9dd1a63bdbf9 (diff)
modernized parser in EmbedAPI
-rw-r--r--examples/tutorial/embedded/LexMath.gf8
-rw-r--r--examples/tutorial/embedded/LexMathEng.gf8
-rw-r--r--examples/tutorial/embedded/LexMathFre.gf8
-rw-r--r--examples/tutorial/embedded/Makefile18
-rw-r--r--examples/tutorial/embedded/Math.gf14
-rw-r--r--examples/tutorial/embedded/MathEng.gf6
-rw-r--r--examples/tutorial/embedded/MathFre.gf6
-rw-r--r--examples/tutorial/embedded/MathI.gf23
-rw-r--r--examples/tutorial/embedded/TransferLoop.hs41
-rw-r--r--examples/tutorial/embedded/Translator.hs16
-rw-r--r--examples/tutorial/embedded/TranslatorLoop.hs23
-rw-r--r--examples/tutorial/embedded/haskell/GSyntax.hs100
-rw-r--r--examples/tutorial/embedded/haskell/Run.hs38
-rw-r--r--src/GF/Embed/EmbedAPI.hs23
-rw-r--r--src/GF/Embed/EmbedParsing.hs90
15 files changed, 335 insertions, 87 deletions
diff --git a/examples/tutorial/embedded/LexMath.gf b/examples/tutorial/embedded/LexMath.gf
new file mode 100644
index 000000000..25bfe8846
--- /dev/null
+++ b/examples/tutorial/embedded/LexMath.gf
@@ -0,0 +1,8 @@
+interface LexMath = open Syntax in {
+
+ oper
+ even_A : A ;
+ odd_A : A ;
+ prime_A : A ;
+
+}
diff --git a/examples/tutorial/embedded/LexMathEng.gf b/examples/tutorial/embedded/LexMathEng.gf
new file mode 100644
index 000000000..183fa520f
--- /dev/null
+++ b/examples/tutorial/embedded/LexMathEng.gf
@@ -0,0 +1,8 @@
+instance LexMathEng of LexMath = open SyntaxEng, ParadigmsEng in {
+
+ oper
+ even_A = mkA "even" ;
+ odd_A = mkA "odd" ;
+ prime_A = mkA "prime" ;
+
+}
diff --git a/examples/tutorial/embedded/LexMathFre.gf b/examples/tutorial/embedded/LexMathFre.gf
new file mode 100644
index 000000000..7407b410f
--- /dev/null
+++ b/examples/tutorial/embedded/LexMathFre.gf
@@ -0,0 +1,8 @@
+instance LexMathFre of LexMath = open SyntaxFre, ParadigmsFre in {
+
+ oper
+ even_A = mkA "pair" ;
+ odd_A = mkA "impair" ;
+ prime_A = mkA "premier" ;
+
+}
diff --git a/examples/tutorial/embedded/Makefile b/examples/tutorial/embedded/Makefile
new file mode 100644
index 000000000..203795b6a
--- /dev/null
+++ b/examples/tutorial/embedded/Makefile
@@ -0,0 +1,18 @@
+all: gf hs run
+
+gf:
+ echo "pm | wf math.gfcm" | gf MathEng.gf MathFre.gf
+
+hs: gf
+ echo "pg -printer=haskell | wf haskell/GSyntax.hs" | gf math.gfcm
+
+run: hs
+ ghc --make -o ./math -ihaskell haskell/Run.hs
+ strip math
+
+clean:
+ rm -f *.gfc *.gfr haskell/*.o haskell/*.hi
+
+distclean:
+ rm -f GSyntax.hs math math.gfcm *.gfc *.gfr haskell/*.o haskell/*.hi
+
diff --git a/examples/tutorial/embedded/Math.gf b/examples/tutorial/embedded/Math.gf
new file mode 100644
index 000000000..95f5d5d1c
--- /dev/null
+++ b/examples/tutorial/embedded/Math.gf
@@ -0,0 +1,14 @@
+abstract Math = {
+
+ cat Answer ; Question ; Object ;
+
+ fun
+ Even : Object -> Question ;
+ Odd : Object -> Question ;
+ Prime : Object -> Question ;
+ Number : Int -> Object ;
+
+ Yes : Answer ;
+ No : Answer ;
+
+}
diff --git a/examples/tutorial/embedded/MathEng.gf b/examples/tutorial/embedded/MathEng.gf
new file mode 100644
index 000000000..ac332fef7
--- /dev/null
+++ b/examples/tutorial/embedded/MathEng.gf
@@ -0,0 +1,6 @@
+--# -path=.:present:prelude:mathematical
+
+concrete MathEng of Math = MathI with
+ (Syntax = SyntaxEng),
+ (Symbol = SymbolEng),
+ (LexMath = LexMathEng) ;
diff --git a/examples/tutorial/embedded/MathFre.gf b/examples/tutorial/embedded/MathFre.gf
new file mode 100644
index 000000000..456db8084
--- /dev/null
+++ b/examples/tutorial/embedded/MathFre.gf
@@ -0,0 +1,6 @@
+--# -path=.:present:prelude:mathematical
+
+concrete MathFre of Math = MathI with
+ (Syntax = SyntaxFre),
+ (Symbol = SymbolFre),
+ (LexMath = LexMathFre) ;
diff --git a/examples/tutorial/embedded/MathI.gf b/examples/tutorial/embedded/MathI.gf
new file mode 100644
index 000000000..aaac7b98f
--- /dev/null
+++ b/examples/tutorial/embedded/MathI.gf
@@ -0,0 +1,23 @@
+incomplete concrete MathI of Math =
+ open Syntax, Symbol, LexMath in {
+
+ flags startcat = Question ; lexer = textlit ; unlexer = text ;
+
+ lincat
+ Answer = Text ;
+ Question = Text ;
+ Object = NP ;
+
+ lin
+ Even = questAdj even_A ;
+ Odd = questAdj odd_A ;
+ Prime = questAdj prime_A ;
+ Number n = mkNP (IntPN n) ;
+
+ Yes = mkText yes_Phr ;
+ No = mkText no_Phr ;
+
+ oper
+ questAdj : A -> NP -> Text = \adj,x -> mkText (mkQS (mkCl x adj)) ;
+
+}
diff --git a/examples/tutorial/embedded/TransferLoop.hs b/examples/tutorial/embedded/TransferLoop.hs
new file mode 100644
index 000000000..5663a1eb5
--- /dev/null
+++ b/examples/tutorial/embedded/TransferLoop.hs
@@ -0,0 +1,41 @@
+module Main where
+
+import GF.Embed.EmbedAPI
+import GSyntax
+
+main :: IO ()
+main = do
+ gr <- file2grammar "math.gfcm"
+ loop (translate answerTree gr)
+
+loop :: (String -> String) -> IO ()
+loop trans = do
+ s <- getLine
+ if s == "quit" then putStrLn "bye" else do
+ putStrLn $ trans s
+ loop trans
+
+translate :: (Tree -> Tree) -> MultiGrammar -> String -> String
+translate tr gr = unlines . map transLine . lines where
+ transLine s = case parseAllLang gr (startCat gr) s of
+ (lg,t:_):_ -> linearize gr lg (tr t)
+ _ -> "NO PARSE"
+
+answerTree :: Tree -> Tree
+answerTree = gf . answer . fg
+
+answer :: GQuestion -> GAnswer
+answer p = case p of
+ GOdd x -> test odd x
+ GEven x -> test even x
+ GPrime x -> test prime x
+
+value :: GObject -> Int
+value e = case e of
+ GNumber (GInt i) -> fromInteger i
+
+test :: (Int -> Bool) -> GObject -> GAnswer
+test f x = if f (value x) then GYes else GNo
+
+prime :: Int -> Bool
+prime = (< 8) ----
diff --git a/examples/tutorial/embedded/Translator.hs b/examples/tutorial/embedded/Translator.hs
new file mode 100644
index 000000000..c227420f6
--- /dev/null
+++ b/examples/tutorial/embedded/Translator.hs
@@ -0,0 +1,16 @@
+module Main where
+
+import GF.Embed.EmbedAPI
+import System (getArgs)
+
+main :: IO ()
+main = do
+ file:_ <- getArgs
+ gr <- file2grammar file
+ interact (translate gr)
+
+translate :: MultiGrammar -> String -> String
+translate gr = unlines . map transLine . lines where
+ transLine s =
+ let (lang,tree:_):_ = parseAllLang gr (startCat gr) s
+ in unlines [linearize gr lg tree | lg <- languages gr, lg /= lang]
diff --git a/examples/tutorial/embedded/TranslatorLoop.hs b/examples/tutorial/embedded/TranslatorLoop.hs
new file mode 100644
index 000000000..18b20146e
--- /dev/null
+++ b/examples/tutorial/embedded/TranslatorLoop.hs
@@ -0,0 +1,23 @@
+module Main where
+
+import GF.Embed.EmbedAPI
+import System (getArgs)
+
+main :: IO ()
+main = do
+ file:_ <- getArgs
+ gr <- file2grammar file
+ loop (translate gr)
+
+loop :: (String -> String) -> IO ()
+loop trans = do
+ s <- getLine
+ if s == "quit" then putStrLn "bye" else do
+ putStrLn $ trans s
+ loop trans
+
+translate :: MultiGrammar -> String -> String
+translate gr = unlines . map transLine . lines where
+ transLine s = case parseAllLang gr (startCat gr) s of
+ (lg,t:_):_ -> unlines [linearize gr l t | l <- languages gr, l /= lg]
+ _ -> "NO PARSE"
diff --git a/examples/tutorial/embedded/haskell/GSyntax.hs b/examples/tutorial/embedded/haskell/GSyntax.hs
new file mode 100644
index 000000000..28469e7da
--- /dev/null
+++ b/examples/tutorial/embedded/haskell/GSyntax.hs
@@ -0,0 +1,100 @@
+module GSyntax where
+
+import GF.Infra.Ident
+import GF.Grammar.Grammar
+import GF.Grammar.PrGrammar
+import GF.Grammar.Macros
+import GF.Data.Operations
+----------------------------------------------------
+-- automatic translation from GF to Haskell
+----------------------------------------------------
+
+class Gf a where gf :: a -> Trm
+class Fg a where fg :: Trm -> a
+
+newtype GString = GString String deriving Show
+
+instance Gf GString where
+ gf (GString s) = K s
+
+instance Fg GString where
+ fg t =
+ case termForm t of
+ Ok ([], K s ,[]) -> GString s
+ _ -> error ("no GString " ++ prt t)
+
+newtype GInt = GInt Integer deriving Show
+
+instance Gf GInt where
+ gf (GInt s) = EInt s
+
+instance Fg GInt where
+ fg t =
+ case termForm t of
+ Ok ([], EInt s ,[]) -> GInt s
+ _ -> error ("no GInt " ++ prt t)
+
+newtype GFloat = GFloat Double deriving Show
+
+instance Gf GFloat where
+ gf (GFloat s) = EFloat s
+
+instance Fg GFloat where
+ fg t =
+ case termForm t of
+ Ok ([], EFloat s ,[]) -> GFloat s
+ _ -> error ("no GFloat " ++ prt t)
+
+----------------------------------------------------
+-- below this line machine-generated
+----------------------------------------------------
+
+data GAnswer =
+ GYes
+ | GNo
+ deriving Show
+
+data GObject = GNumber GInt
+ deriving Show
+
+data GQuestion =
+ GPrime GObject
+ | GOdd GObject
+ | GEven GObject
+ deriving Show
+
+
+instance Gf GAnswer where
+ gf GYes = appqc "Math" "Yes" []
+ gf GNo = appqc "Math" "No" []
+
+instance Gf GObject where gf (GNumber x1) = appqc "Math" "Number" [gf x1]
+
+instance Gf GQuestion where
+ gf (GPrime x1) = appqc "Math" "Prime" [gf x1]
+ gf (GOdd x1) = appqc "Math" "Odd" [gf x1]
+ gf (GEven x1) = appqc "Math" "Even" [gf x1]
+
+
+instance Fg GAnswer where
+ fg t =
+ case termForm t of
+ Ok ([], Q (IC "Math") (IC "Yes"),[]) -> GYes
+ Ok ([], Q (IC "Math") (IC "No"),[]) -> GNo
+ _ -> error ("no Answer " ++ prt t)
+
+instance Fg GObject where
+ fg t =
+ case termForm t of
+ Ok ([], Q (IC "Math") (IC "Number"),[x1]) -> GNumber (fg x1)
+ _ -> error ("no Object " ++ prt t)
+
+instance Fg GQuestion where
+ fg t =
+ case termForm t of
+ Ok ([], Q (IC "Math") (IC "Prime"),[x1]) -> GPrime (fg x1)
+ Ok ([], Q (IC "Math") (IC "Odd"),[x1]) -> GOdd (fg x1)
+ Ok ([], Q (IC "Math") (IC "Even"),[x1]) -> GEven (fg x1)
+ _ -> error ("no Question " ++ prt t)
+
+
diff --git a/examples/tutorial/embedded/haskell/Run.hs b/examples/tutorial/embedded/haskell/Run.hs
new file mode 100644
index 000000000..c3fd87466
--- /dev/null
+++ b/examples/tutorial/embedded/haskell/Run.hs
@@ -0,0 +1,38 @@
+module Main where
+
+import GSyntax
+import GF.Embed.EmbedAPI
+
+main :: IO ()
+main = do
+ gr <- file2grammar "math.gfcm"
+ loop gr
+
+loop :: MultiGrammar -> IO ()
+loop gr = do
+ s <- getLine
+ interpret gr s
+ loop gr
+
+interpret :: MultiGrammar -> String -> IO ()
+interpret gr s = do
+ let ltss = parseAllLang gr "Question" s
+ case ltss of
+ [] -> putStrLn "no parse"
+ (l,t:_):_ -> putStrLn $ linearize gr l $ gf $ answer $ fg t
+
+answer :: GQuestion -> GAnswer
+answer p = case p of
+ GOdd x -> test odd x
+ GEven x -> test even x
+ GPrime x -> test prime x
+
+value :: GObject -> Int
+value e = case e of
+ GNumber (GInt i) -> fromInteger i
+
+test :: (Int -> Bool) -> GObject -> GAnswer
+test f x = if f (value x) then GYes else GNo
+
+prime :: Int -> Bool
+prime = (< 8) ----
diff --git a/src/GF/Embed/EmbedAPI.hs b/src/GF/Embed/EmbedAPI.hs
index a3a130a36..43e4f2546 100644
--- a/src/GF/Embed/EmbedAPI.hs
+++ b/src/GF/Embed/EmbedAPI.hs
@@ -15,7 +15,7 @@
module GF.Embed.EmbedAPI where
-import GF.Compile.ShellState (ShellState,grammar2shellState,canModules,stateGrammarOfLang,abstract,grammar,firstStateGrammar,allLanguages,allCategories,stateOptions)
+import GF.Compile.ShellState (ShellState,grammar2shellState,canModules,stateGrammarOfLang,abstract,grammar,firstStateGrammar,allLanguages,allCategories,stateOptions,firstAbsCat)
import GF.UseGrammar.Linear (linTree2string)
import GF.UseGrammar.GetTree (string2tree)
import GF.Embed.EmbedParsing (parseString)
@@ -53,8 +53,11 @@ file2grammar :: FilePath -> IO MultiGrammar
linearize :: MultiGrammar -> Language -> Tree -> String
parse :: MultiGrammar -> Language -> Category -> String -> [Tree]
-linearizeAll :: MultiGrammar -> Tree -> [String]
-parseAll :: MultiGrammar -> Category -> String -> [[Tree]]
+linearizeAll :: MultiGrammar -> Tree -> [String]
+linearizeAllLang :: MultiGrammar -> Tree -> [(Language,String)]
+
+parseAll :: MultiGrammar -> Category -> String -> [[Tree]]
+parseAllLang :: MultiGrammar -> Category -> String -> [(Language,[Tree])]
readTree :: MultiGrammar -> String -> Tree
showTree :: Tree -> String
@@ -62,6 +65,8 @@ showTree :: Tree -> String
languages :: MultiGrammar -> [Language]
categories :: MultiGrammar -> [Category]
+startCat :: MultiGrammar -> Category
+
---------------------------------------------------
-- Implementation
---------------------------------------------------
@@ -84,15 +89,19 @@ linearize mgr lang =
parse mgr lang cat =
map tree2exp .
errVal [] .
- parseString noOptions sgr cfcat
+ parseString (stateOptions sgr) sgr cfcat
where
sgr = stateGrammarOfLang mgr (zIdent lang)
cfcat = string2CFCat abs cat
abs = maybe (error "no abstract syntax") prIdent $ abstract mgr
-linearizeAll mgr t = [linearize mgr lang t | lang <- languages mgr]
+linearizeAll mgr = map snd . linearizeAllLang mgr
+linearizeAllLang mgr t = [(lang,linearize mgr lang t) | lang <- languages mgr]
-parseAll mgr cat s = [parse mgr lang cat s | lang <- languages mgr]
+parseAll mgr cat = map snd . parseAllLang mgr cat
+
+parseAllLang mgr cat s =
+ [(lang,ts) | lang <- languages mgr, let ts = parse mgr lang cat s, not (null ts)]
readTree mgr s = tree2exp $ string2tree (firstStateGrammar mgr) s
@@ -101,3 +110,5 @@ showTree t = prt_ t
languages mgr = [prt_ l | l <- allLanguages mgr]
categories mgr = [prt_ c | (_,c) <- allCategories mgr]
+
+startCat = prt_ . snd . firstAbsCat noOptions . firstStateGrammar
diff --git a/src/GF/Embed/EmbedParsing.hs b/src/GF/Embed/EmbedParsing.hs
index 40378c491..43909f355 100644
--- a/src/GF/Embed/EmbedParsing.hs
+++ b/src/GF/Embed/EmbedParsing.hs
@@ -33,8 +33,7 @@ import GF.Infra.Option
import GF.Compile.ShellState
import GF.Embed.EmbedCustom
import GF.CF.PPrCF (prCFTree)
-
-import qualified GF.OldParsing.ParseCF as PCFOld -- OBSOLETE
+import qualified GF.Parsing.GFC as New
-- import qualified GF.Parsing.GFC as New
@@ -55,83 +54,12 @@ parseStringMsg os sg cat s = do
return (ts,unlines ss)
parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree]
-parseStringC opts0 sg cat s
-
- | otherwise = do
- let opts = unionOptions opts0 $ stateOptions sg
- cf = stateCF sg
- gr = stateGrammarST sg
- cn = cncId sg
- tok = customOrDefault opts useTokenizer customTokenizer sg
- parser = PCFOld.parse "ibn" (stateCF sg) cat -- customOrDefault opts useParser customParser sg cat
- tokens2trms opts sg cn parser (tok s)
-
-tokens2trms :: Options ->StateGrammar ->Ident -> CFParser -> [CFTok] -> Check [Tree]
-tokens2trms opts sg cn parser toks = trees2trms opts sg cn toks trees info
- where result = parser toks
- info = snd result
- trees = {- nub $ -} cfParseResults result -- peb 25/5-04: removed nub (O(n^2))
-
-trees2trms :: Options -> StateGrammar -> Ident -> [CFTok] -> [CFTree] -> String -> Check [Tree]
-trees2trms opts sg cn as ts0 info = do
- ts <- case () of
- _ | null ts0 -> checkWarn "No success in cf parsing" >> return []
- _ | raw -> do
- ts1 <- return (map cf2trm0 ts0) ----- should not need annot
- checks [
- mapM (checkErr . (annotate gr) . trExp) ts1 ---- complicated, often fails
- ,checkWarn (unlines ("Raw CF trees:":(map prCFTree ts0))) >> return []
- ]
- _ -> do
- let num = optIntOrN opts flagRawtrees 999999
- let (ts01,rest) = splitAt num ts0
- if null rest then return ()
- else checkWarn ("Warning: only" +++ show num +++ "raw parses out of" +++
- show (length ts0) +++
- "considered; use -rawtrees=<Int> to see more"
- )
- (ts1,ss) <- checkErr $ mapErrN 1 postParse ts01
- if null ts1 then raise ss else return ()
- ts2 <- mapM (checkErr . annotate gr . refreshMetas [] . trExp) ts1 ----
- if forgive then return ts2 else do
- let tsss = [(t, allLinsOfTree gr cn t) | t <- ts2]
- ps = [t | (t,ss) <- tsss,
- any (compatToks as) (map str2cftoks ss)]
- if null ps
- then raise $ "Failure in morphology." ++
- if verb
- then "\nPossible corrections: " +++++
- unlines (nub (map sstr (concatMap snd tsss)))
- else ""
- else return ps
-
- if verb
- then checkWarn $ " the token list" +++ show as ++++ unknown as +++++ info
- else return ()
-
- return $ optIntOrAll opts flagNumber $ nub ts
- where
- gr = stateGrammarST sg
-
- raw = oElem rawParse opts
- verb = oElem beVerbose opts
- forgive = oElem forgiveParse opts
-
- unknown ts = case filter noMatch [t | t@(TS _) <- ts] of
- [] -> "where all words are known"
- us -> "with the unknown tokens" +++ show us --- needs to be fixed for literals
- terminals = map TS $ stateGrammarWords sg
- noMatch t = all (not . compatTok t) terminals
-
-
---- too much type checking in building term info? return FullTerm to save work?
-
--- | raw parsing: so simple it is for a context-free CF grammar
-cf2trm0 :: CFTree -> C.Exp
-cf2trm0 (CFTree (fun, (_, trees))) = mkAppAtom (cffun2trm fun) (map cf2trm0 trees)
- where
- cffun2trm (CFFun (fun,_)) = fun
- mkApp = foldl C.EApp
- mkAppAtom a = mkApp (C.EAtom a)
-
+parseStringC opts0 sg cat s = do
+ let opts = unionOptions opts0 $ stateOptions sg
+ algorithm = "f" -- default algorithm: FCFG
+ strategy = "bottomup"
+ tokenizer = customOrDefault opts useTokenizer customTokenizer sg
+ toks = tokenizer s
+ ts <- checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks
+ checkErr $ allChecks $ map (annotate (stateGrammarST sg) . refreshMetas []) ts