summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2006-03-04 22:14:33 +0000
committeraarne <aarne@cs.chalmers.se>2006-03-04 22:14:33 +0000
commit2657c51e040964d9704f5c69945685ebd546eb6a (patch)
treebab26adf8f85c253c50b90ac9cde36d58efa42bd /src
parent84e01c303dc161c3a811a045cc0c45f3f13e33d6 (diff)
example based also with treebank, with real term parser
Diffstat (limited to 'src')
-rw-r--r--src/GF.hs4
-rw-r--r--src/GF/Compile/MkConcrete.hs147
-rw-r--r--src/GF/Grammar/Grammar.hs1
-rw-r--r--src/GF/Source/GrammarToSource.hs5
-rw-r--r--src/GF/Source/SourceToGrammar.hs1
-rw-r--r--src/GF/UseGrammar/Treebank.hs5
6 files changed, 81 insertions, 82 deletions
diff --git a/src/GF.hs b/src/GF.hs
index 054ff6e89..a4bf22c59 100644
--- a/src/GF.hs
+++ b/src/GF.hs
@@ -63,7 +63,7 @@ main = do
return ()
_ | opt makeConcrete -> do
- mkConcretes fs
+ mkConcretes os fs
_ | opt openEditor -> do
system $ "jgf" +++ unwords xs
@@ -88,7 +88,7 @@ main = do
if opt fromExamples
then do
es <- liftM (nub . concat) $ mapM (getGFEFiles os) fs
- mkConcretes es
+ mkConcretes os es
doGF (removeOption fromExamples os) fs
else doGF os fs
diff --git a/src/GF/Compile/MkConcrete.hs b/src/GF/Compile/MkConcrete.hs
index a0af24007..aafa56242 100644
--- a/src/GF/Compile/MkConcrete.hs
+++ b/src/GF/Compile/MkConcrete.hs
@@ -12,15 +12,16 @@
-- Compile a gfe file into a concrete syntax by using the parser on a resource grammar.
-----------------------------------------------------------------------------
-module GF.Compile.MkConcrete (mkConcretes,mkCncLine) where
+module GF.Compile.MkConcrete (mkConcretes) where
import GF.Grammar.Values (Tree,tree2exp)
-import GF.Grammar.PrGrammar (prt_)
-import GF.Grammar.Grammar (Term(Q,QC)) ---
-import GF.Grammar.Macros (composSafeOp, record2subst)
+import GF.Grammar.PrGrammar (prt_,prModule)
+import GF.Grammar.Grammar --- (Term(..),SourceModule)
+import GF.Grammar.Macros (composSafeOp, composOp, record2subst, zIdent)
import GF.Compile.ShellState --(firstStateGrammar,stateGrammarWords)
-import GF.Compile.PGrammar (pTerm)
+import GF.Compile.PGrammar (pTerm,pTrm)
import GF.Compile.Compile
+import GF.Compile.GetGrammar
import GF.API
import GF.API.IOGrammar
import qualified GF.Embed.EmbedAPI as EA
@@ -28,8 +29,10 @@ import qualified GF.Embed.EmbedAPI as EA
import GF.Data.Operations
import GF.Infra.UseIO
import GF.Infra.Option
+import GF.Infra.Modules
import GF.Infra.ReadFiles
import GF.System.Arch
+import GF.UseGrammar.Treebank
import System.Directory
import Data.Char
@@ -50,38 +53,40 @@ import Data.List
-- notice: we use a hand-crafted lexer and parser in order to preserve
-- the layout and comments in the rest of the file.
-mkConcretes :: [FilePath] -> IO ()
-mkConcretes files = do
+mkConcretes :: Options -> [FilePath] -> IO ()
+mkConcretes opts files = do
ress <- mapM getResPath files
let grps = groupBy (\a b -> fst a == fst b) $
sortBy (\a b -> compare (fst a) (fst b)) $ zip ress files
- mapM_ mkCncGroups [(rp,map snd gs) | gs@((rp,_):_) <- grps]
+ mapM_ (mkCncGroups opts) [(rp,map snd gs) | gs@((rp,_):_) <- grps]
-mkCncGroups ((res,path),files) = do
+mkCncGroups opts0 ((res,path),files) = do
putStrLnFlush $ "Going to preprocess examples in " ++ unwords files
putStrLn $ "Compiling resource " ++ res
- let opts = options [beSilent,pathList path]
+ let opts = addOptions (options [beSilent,pathList path]) opts0
+ let treebank = oElem (iOpt "treebank") opts
egr <- appIOE $ shellStateFromFiles opts emptyShellState res
- gr <- err (\s -> putStrLn s >> error "resource grammar rejected")
- (return . firstStateGrammar) egr
- let parser cat =
- errVal ([],"No parse") .
- optParseArgErrMsg (options [newMParser, firstCat cat, beVerbose]) gr
- let morpho = isKnownWord gr
+ (parser,morpho) <- if treebank then do
+ tb <- err (\_ -> error "no treebank")
+ return
+ (egr >>= flip findTreebank (zIdent (unsuffixFile res)))
+ return (\_ -> flip (,) "Not in treebank" . map pTrm . lookupTreebank tb,
+ isWordInTreebank tb)
+ else do
+ gr <- err (\s -> putStrLn s >> error "resource grammar rejected")
+ (return . firstStateGrammar) egr
+ return
+ (\cat s ->
+ errVal ([],"No parse") $
+ optParseArgErrMsg (options [newMParser, firstCat cat, beVerbose]) gr s >>=
+ (\ (ts,e) -> return (map tree2exp ts, e)) ,
+ isKnownWord gr)
putStrLn "Building parser"
mapM_ (mkConcrete parser morpho) files
-type Parser = String -> String -> ([Tree],String)
+type Parser = String -> String -> ([Term],String)
type Morpho = String -> Bool
-mkConcrete :: Parser -> Morpho -> FilePath -> IO ()
-mkConcrete parser morpho file = do
- cont <- liftM getExLines $ readFileIf file
- let out = suffixFile "gf" $ justModuleName file
- writeFile out $ "-- File generated by GF from " ++ file
- appendFile out "\n"
- mapM_ (mkCnc out parser morpho) cont
-
getResPath :: FilePath -> IO (String,String)
getResPath file = do
s <- liftM lines $ readFileIf file
@@ -95,62 +100,46 @@ getResPath file = do
"--#":w:_ -> isPrefixOf ('-':tag) w
_ -> False
-getExLines :: String -> [Either String String]
-getExLines = getl . lines where
- getl ls = case ls of
- s:ss | begEx (words s) -> case break endEx ls of
- (x,y:z) -> Left (unwords (x ++ [y])) : getl z
- _ -> Left s : getl ss
- s:ss -> Right s : getl ss
- [] -> []
- begEx s = case s of
- "=":"in":_ -> True
- _:ws -> begEx ws
- _ -> False
- endEx s = case dropWhile isSpace (reverse s) of
- ';':_ -> True
- _ -> False
-mkCnc :: FilePath -> Parser -> Morpho -> Either String String -> IO ()
-mkCnc out parser morpho line = do
- let (res,msg) = mkCncLine parser morpho line
- appendFile out res
+mkConcrete :: Parser -> Morpho -> FilePath -> IO ()
+mkConcrete parser morpho file = do
+ src <- appIOE (getSourceModule noOptions file) >>= err error return
+ let (src',msgs) = mkModule parser morpho src
+ let out = suffixFile "gf" $ justModuleName file
+ writeFile out $ "-- File generated by GF from " ++ file
appendFile out "\n"
- ifNull (return ()) putStrLnFlush msg
-
-mkCncLine :: (String -> String -> ([Tree],String)) -> (String -> Bool) ->
- Either String String -> (String,String)
-mkCncLine parser morpho (Right line) = (line,[])
-mkCncLine parser morpho (Left line) = mkLinRule (words line) where
- mkLinRule s =
- let
- (pre,str) = span (/= "in") s
- ([mcat],rest) = splitAt 1 $ tail str
- (lin,subst) = span (/= '"') $ tail $ unwords rest
- cat = reverse $ takeWhile (/= '.') $ reverse mcat
- substs = doSubst (init (tail subst))
- def
- | last pre /= "=" = line -- ordinary lin rule
- | otherwise = case parser cat lin of
- (t:ts,_) -> ind ++ unwords pre +++
- substs (tree2exp t) +++ ";" ++
- if null ts then [] else (" -- AMBIGUOUS:" ++++
- unlines ["-- " ++ substs (tree2exp s) +++ ";" | s <- ts])
- ([],msg) -> "{-" ++ line ++++ morph lin ++++ "-}"
- in
- (def,def)
+ appendFile out (prModule src')
+ appendFile out "{-\n"
+ appendFile out $ unlines $ filter (not . null) msgs
+ appendFile out "-}\n"
+
+mkModule :: Parser -> Morpho -> SourceModule -> (SourceModule,[String])
+mkModule parser morpho (name,src) = case src of
+ ModMod m@(Module mt st fs me ops js) ->
+
+ let js1 = jments m
+ (js2,msgs) = err error id $ appSTM (mapMTree mkInfo js1) []
+ mod2 = ModMod $ Module mt st fs me ops $ js2
+ in ((name,mod2), msgs)
+ where
+ mkInfo ni@(name,info) = case info of
+ CncFun mt (Yes trm) ppr -> do
+ trm' <- mkTrm trm
+ return (name, CncFun mt (Yes trm') ppr)
+ _ -> return ni
+ where
+ mkTrm t = case t of
+ Example (P _ cat) s -> parse cat s t
+ Example (Vr cat) s -> parse cat s t
+ _ -> composOp mkTrm t
+ parse cat s t = case parser (prt_ cat) s of
+ (tr:[], _) -> return tr
+ (tr:trs,_) -> do
+ updateSTM ((("AMBIGUOUS" +++ prt_ name) : s : map prt_ trs) ++)
+ return tr
+ ([],ms) -> do
+ updateSTM ((("NO PARSE" +++ prt_ name) : s : ms : [morph s]) ++)
+ return t
morph s = case [w | w <- words s, not (morpho w)] of
[] -> ""
ws -> "unknown words: " ++ unwords ws
- ind = takeWhile isSpace line
-
-doSubst :: String -> Term -> String
-doSubst subst0 trm = prt_ $ subt subst trm where
- subst
- | all isSpace subst0 = []
- | otherwise = err error id $ pTerm subst0 >>= record2subst
- subt g t = case t of
- Q _ c -> maybe t id $ lookup c g
- QC _ c -> maybe t id $ lookup c g
- _ -> composSafeOp (subt g) t
-
diff --git a/src/GF/Grammar/Grammar.hs b/src/GF/Grammar/Grammar.hs
index 47970c882..1c963ac66 100644
--- a/src/GF/Grammar/Grammar.hs
+++ b/src/GF/Grammar/Grammar.hs
@@ -128,6 +128,7 @@ data Term =
| Typed Term Term -- ^ type-annotated term
--
-- /below this, the constructors are only for concrete syntax/
+ | Example Term String -- ^ example-based term: @in M.C "foo"
| RecType [Labelling] -- ^ record type: @{ p : A ; ...}@
| R [Assign] -- ^ record: @{ p = a ; ...}@
| P Term Label -- ^ projection: @r.p@
diff --git a/src/GF/Source/GrammarToSource.hs b/src/GF/Source/GrammarToSource.hs
index 526a29f4c..38c658dc4 100644
--- a/src/GF/Source/GrammarToSource.hs
+++ b/src/GF/Source/GrammarToSource.hs
@@ -35,7 +35,9 @@ trGrammar (MGrammar ms) = P.Gr (map trModule ms) -- no includes
trModule :: (Ident,SourceModInfo) -> P.ModDef
trModule (i,mo) = case mo of
ModMod m -> P.MModule compl typ body where
- compl = P.CMCompl -- always complete module
+ compl = case mstatus m of
+ MSIncomplete -> P.CMIncompl
+ _ -> P.CMCompl
i' = tri i
typ = case typeOfModule mo of
MTResource -> P.MTResource i'
@@ -140,6 +142,7 @@ trt trm = case trm of
Prod x a b | isWildIdent x -> P.EProd (P.DExp (trt a)) (trt b)
Prod x a b -> P.EProd (P.DDec [trb x] (trt a)) (trt b)
+ Example t s -> P.EExample (trt t) s
R [] -> P.ETuple [] --- to get correct parsing when read back
R r -> P.ERecord $ map trAssign r
RecType r -> P.ERecord $ map trLabelling r
diff --git a/src/GF/Source/SourceToGrammar.hs b/src/GF/Source/SourceToGrammar.hs
index ffba51d6e..4aa5b55a6 100644
--- a/src/GF/Source/SourceToGrammar.hs
+++ b/src/GF/Source/SourceToGrammar.hs
@@ -437,6 +437,7 @@ transExp x = case x of
EExtend exp0 exp -> liftM2 G.ExtR (transExp exp0) (transExp exp)
EAbstr binds exp -> liftM2 M.mkAbs (mapM transBind binds) (transExp exp)
ETyped exp0 exp -> liftM2 G.Typed (transExp exp0) (transExp exp)
+ EExample exp str -> liftM2 G.Example (transExp exp) (return str)
EProd decl exp -> liftM2 M.mkProdSimple (transDecl decl) (transExp exp)
ETType exp0 exp -> liftM2 G.Table (transExp exp0) (transExp exp)
diff --git a/src/GF/UseGrammar/Treebank.hs b/src/GF/UseGrammar/Treebank.hs
index f1dd5b75b..54ac8fb04 100644
--- a/src/GF/UseGrammar/Treebank.hs
+++ b/src/GF/UseGrammar/Treebank.hs
@@ -25,6 +25,7 @@ module GF.UseGrammar.Treebank (
readMultiTreebank,
lookupTreebank,
assocsTreebank,
+ isWordInTreebank,
printAssoc
) where
@@ -45,6 +46,7 @@ import GF.Infra.Ident (Ident)
import GF.Infra.UseIO
import qualified GF.Grammar.Abstract as A
import qualified Data.Map as M
+import qualified Data.Set as S
-- Generate a treebank with a multilingual grammar. AR 8/2/2006
-- (c) Aarne Ranta 2006 under GNU GPL
@@ -142,6 +144,9 @@ ret = [] -- return ()
assocsTreebank :: UniTreebank -> [(String,[String])]
assocsTreebank = M.assocs
+isWordInTreebank :: UniTreebank -> String -> Bool
+isWordInTreebank tb w = S.member w (S.fromList (concatMap words (M.keys tb)))
+
printAssoc (s, ts) = s ++ concat ["\t" ++ t | t <- ts]
getTreebanks :: [String] -> [(String,String,String)]