diff options
| author | aarne <aarne@cs.chalmers.se> | 2006-03-04 22:14:33 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2006-03-04 22:14:33 +0000 |
| commit | 2657c51e040964d9704f5c69945685ebd546eb6a (patch) | |
| tree | bab26adf8f85c253c50b90ac9cde36d58efa42bd /src/GF | |
| parent | 84e01c303dc161c3a811a045cc0c45f3f13e33d6 (diff) | |
example based also with treebank, with real term parser
Diffstat (limited to 'src/GF')
| -rw-r--r-- | src/GF/Compile/MkConcrete.hs | 147 | ||||
| -rw-r--r-- | src/GF/Grammar/Grammar.hs | 1 | ||||
| -rw-r--r-- | src/GF/Source/GrammarToSource.hs | 5 | ||||
| -rw-r--r-- | src/GF/Source/SourceToGrammar.hs | 1 | ||||
| -rw-r--r-- | src/GF/UseGrammar/Treebank.hs | 5 |
5 files changed, 79 insertions, 80 deletions
diff --git a/src/GF/Compile/MkConcrete.hs b/src/GF/Compile/MkConcrete.hs index a0af24007..aafa56242 100644 --- a/src/GF/Compile/MkConcrete.hs +++ b/src/GF/Compile/MkConcrete.hs @@ -12,15 +12,16 @@ -- Compile a gfe file into a concrete syntax by using the parser on a resource grammar. ----------------------------------------------------------------------------- -module GF.Compile.MkConcrete (mkConcretes,mkCncLine) where +module GF.Compile.MkConcrete (mkConcretes) where import GF.Grammar.Values (Tree,tree2exp) -import GF.Grammar.PrGrammar (prt_) -import GF.Grammar.Grammar (Term(Q,QC)) --- -import GF.Grammar.Macros (composSafeOp, record2subst) +import GF.Grammar.PrGrammar (prt_,prModule) +import GF.Grammar.Grammar --- (Term(..),SourceModule) +import GF.Grammar.Macros (composSafeOp, composOp, record2subst, zIdent) import GF.Compile.ShellState --(firstStateGrammar,stateGrammarWords) -import GF.Compile.PGrammar (pTerm) +import GF.Compile.PGrammar (pTerm,pTrm) import GF.Compile.Compile +import GF.Compile.GetGrammar import GF.API import GF.API.IOGrammar import qualified GF.Embed.EmbedAPI as EA @@ -28,8 +29,10 @@ import qualified GF.Embed.EmbedAPI as EA import GF.Data.Operations import GF.Infra.UseIO import GF.Infra.Option +import GF.Infra.Modules import GF.Infra.ReadFiles import GF.System.Arch +import GF.UseGrammar.Treebank import System.Directory import Data.Char @@ -50,38 +53,40 @@ import Data.List -- notice: we use a hand-crafted lexer and parser in order to preserve -- the layout and comments in the rest of the file. -mkConcretes :: [FilePath] -> IO () -mkConcretes files = do +mkConcretes :: Options -> [FilePath] -> IO () +mkConcretes opts files = do ress <- mapM getResPath files let grps = groupBy (\a b -> fst a == fst b) $ sortBy (\a b -> compare (fst a) (fst b)) $ zip ress files - mapM_ mkCncGroups [(rp,map snd gs) | gs@((rp,_):_) <- grps] + mapM_ (mkCncGroups opts) [(rp,map snd gs) | gs@((rp,_):_) <- grps] -mkCncGroups ((res,path),files) = do +mkCncGroups opts0 ((res,path),files) = do putStrLnFlush $ "Going to preprocess examples in " ++ unwords files putStrLn $ "Compiling resource " ++ res - let opts = options [beSilent,pathList path] + let opts = addOptions (options [beSilent,pathList path]) opts0 + let treebank = oElem (iOpt "treebank") opts egr <- appIOE $ shellStateFromFiles opts emptyShellState res - gr <- err (\s -> putStrLn s >> error "resource grammar rejected") - (return . firstStateGrammar) egr - let parser cat = - errVal ([],"No parse") . - optParseArgErrMsg (options [newMParser, firstCat cat, beVerbose]) gr - let morpho = isKnownWord gr + (parser,morpho) <- if treebank then do + tb <- err (\_ -> error "no treebank") + return + (egr >>= flip findTreebank (zIdent (unsuffixFile res))) + return (\_ -> flip (,) "Not in treebank" . map pTrm . lookupTreebank tb, + isWordInTreebank tb) + else do + gr <- err (\s -> putStrLn s >> error "resource grammar rejected") + (return . firstStateGrammar) egr + return + (\cat s -> + errVal ([],"No parse") $ + optParseArgErrMsg (options [newMParser, firstCat cat, beVerbose]) gr s >>= + (\ (ts,e) -> return (map tree2exp ts, e)) , + isKnownWord gr) putStrLn "Building parser" mapM_ (mkConcrete parser morpho) files -type Parser = String -> String -> ([Tree],String) +type Parser = String -> String -> ([Term],String) type Morpho = String -> Bool -mkConcrete :: Parser -> Morpho -> FilePath -> IO () -mkConcrete parser morpho file = do - cont <- liftM getExLines $ readFileIf file - let out = suffixFile "gf" $ justModuleName file - writeFile out $ "-- File generated by GF from " ++ file - appendFile out "\n" - mapM_ (mkCnc out parser morpho) cont - getResPath :: FilePath -> IO (String,String) getResPath file = do s <- liftM lines $ readFileIf file @@ -95,62 +100,46 @@ getResPath file = do "--#":w:_ -> isPrefixOf ('-':tag) w _ -> False -getExLines :: String -> [Either String String] -getExLines = getl . lines where - getl ls = case ls of - s:ss | begEx (words s) -> case break endEx ls of - (x,y:z) -> Left (unwords (x ++ [y])) : getl z - _ -> Left s : getl ss - s:ss -> Right s : getl ss - [] -> [] - begEx s = case s of - "=":"in":_ -> True - _:ws -> begEx ws - _ -> False - endEx s = case dropWhile isSpace (reverse s) of - ';':_ -> True - _ -> False -mkCnc :: FilePath -> Parser -> Morpho -> Either String String -> IO () -mkCnc out parser morpho line = do - let (res,msg) = mkCncLine parser morpho line - appendFile out res +mkConcrete :: Parser -> Morpho -> FilePath -> IO () +mkConcrete parser morpho file = do + src <- appIOE (getSourceModule noOptions file) >>= err error return + let (src',msgs) = mkModule parser morpho src + let out = suffixFile "gf" $ justModuleName file + writeFile out $ "-- File generated by GF from " ++ file appendFile out "\n" - ifNull (return ()) putStrLnFlush msg - -mkCncLine :: (String -> String -> ([Tree],String)) -> (String -> Bool) -> - Either String String -> (String,String) -mkCncLine parser morpho (Right line) = (line,[]) -mkCncLine parser morpho (Left line) = mkLinRule (words line) where - mkLinRule s = - let - (pre,str) = span (/= "in") s - ([mcat],rest) = splitAt 1 $ tail str - (lin,subst) = span (/= '"') $ tail $ unwords rest - cat = reverse $ takeWhile (/= '.') $ reverse mcat - substs = doSubst (init (tail subst)) - def - | last pre /= "=" = line -- ordinary lin rule - | otherwise = case parser cat lin of - (t:ts,_) -> ind ++ unwords pre +++ - substs (tree2exp t) +++ ";" ++ - if null ts then [] else (" -- AMBIGUOUS:" ++++ - unlines ["-- " ++ substs (tree2exp s) +++ ";" | s <- ts]) - ([],msg) -> "{-" ++ line ++++ morph lin ++++ "-}" - in - (def,def) + appendFile out (prModule src') + appendFile out "{-\n" + appendFile out $ unlines $ filter (not . null) msgs + appendFile out "-}\n" + +mkModule :: Parser -> Morpho -> SourceModule -> (SourceModule,[String]) +mkModule parser morpho (name,src) = case src of + ModMod m@(Module mt st fs me ops js) -> + + let js1 = jments m + (js2,msgs) = err error id $ appSTM (mapMTree mkInfo js1) [] + mod2 = ModMod $ Module mt st fs me ops $ js2 + in ((name,mod2), msgs) + where + mkInfo ni@(name,info) = case info of + CncFun mt (Yes trm) ppr -> do + trm' <- mkTrm trm + return (name, CncFun mt (Yes trm') ppr) + _ -> return ni + where + mkTrm t = case t of + Example (P _ cat) s -> parse cat s t + Example (Vr cat) s -> parse cat s t + _ -> composOp mkTrm t + parse cat s t = case parser (prt_ cat) s of + (tr:[], _) -> return tr + (tr:trs,_) -> do + updateSTM ((("AMBIGUOUS" +++ prt_ name) : s : map prt_ trs) ++) + return tr + ([],ms) -> do + updateSTM ((("NO PARSE" +++ prt_ name) : s : ms : [morph s]) ++) + return t morph s = case [w | w <- words s, not (morpho w)] of [] -> "" ws -> "unknown words: " ++ unwords ws - ind = takeWhile isSpace line - -doSubst :: String -> Term -> String -doSubst subst0 trm = prt_ $ subt subst trm where - subst - | all isSpace subst0 = [] - | otherwise = err error id $ pTerm subst0 >>= record2subst - subt g t = case t of - Q _ c -> maybe t id $ lookup c g - QC _ c -> maybe t id $ lookup c g - _ -> composSafeOp (subt g) t - diff --git a/src/GF/Grammar/Grammar.hs b/src/GF/Grammar/Grammar.hs index 47970c882..1c963ac66 100644 --- a/src/GF/Grammar/Grammar.hs +++ b/src/GF/Grammar/Grammar.hs @@ -128,6 +128,7 @@ data Term = | Typed Term Term -- ^ type-annotated term -- -- /below this, the constructors are only for concrete syntax/ + | Example Term String -- ^ example-based term: @in M.C "foo" | RecType [Labelling] -- ^ record type: @{ p : A ; ...}@ | R [Assign] -- ^ record: @{ p = a ; ...}@ | P Term Label -- ^ projection: @r.p@ diff --git a/src/GF/Source/GrammarToSource.hs b/src/GF/Source/GrammarToSource.hs index 526a29f4c..38c658dc4 100644 --- a/src/GF/Source/GrammarToSource.hs +++ b/src/GF/Source/GrammarToSource.hs @@ -35,7 +35,9 @@ trGrammar (MGrammar ms) = P.Gr (map trModule ms) -- no includes trModule :: (Ident,SourceModInfo) -> P.ModDef trModule (i,mo) = case mo of ModMod m -> P.MModule compl typ body where - compl = P.CMCompl -- always complete module + compl = case mstatus m of + MSIncomplete -> P.CMIncompl + _ -> P.CMCompl i' = tri i typ = case typeOfModule mo of MTResource -> P.MTResource i' @@ -140,6 +142,7 @@ trt trm = case trm of Prod x a b | isWildIdent x -> P.EProd (P.DExp (trt a)) (trt b) Prod x a b -> P.EProd (P.DDec [trb x] (trt a)) (trt b) + Example t s -> P.EExample (trt t) s R [] -> P.ETuple [] --- to get correct parsing when read back R r -> P.ERecord $ map trAssign r RecType r -> P.ERecord $ map trLabelling r diff --git a/src/GF/Source/SourceToGrammar.hs b/src/GF/Source/SourceToGrammar.hs index ffba51d6e..4aa5b55a6 100644 --- a/src/GF/Source/SourceToGrammar.hs +++ b/src/GF/Source/SourceToGrammar.hs @@ -437,6 +437,7 @@ transExp x = case x of EExtend exp0 exp -> liftM2 G.ExtR (transExp exp0) (transExp exp) EAbstr binds exp -> liftM2 M.mkAbs (mapM transBind binds) (transExp exp) ETyped exp0 exp -> liftM2 G.Typed (transExp exp0) (transExp exp) + EExample exp str -> liftM2 G.Example (transExp exp) (return str) EProd decl exp -> liftM2 M.mkProdSimple (transDecl decl) (transExp exp) ETType exp0 exp -> liftM2 G.Table (transExp exp0) (transExp exp) diff --git a/src/GF/UseGrammar/Treebank.hs b/src/GF/UseGrammar/Treebank.hs index f1dd5b75b..54ac8fb04 100644 --- a/src/GF/UseGrammar/Treebank.hs +++ b/src/GF/UseGrammar/Treebank.hs @@ -25,6 +25,7 @@ module GF.UseGrammar.Treebank ( readMultiTreebank, lookupTreebank, assocsTreebank, + isWordInTreebank, printAssoc ) where @@ -45,6 +46,7 @@ import GF.Infra.Ident (Ident) import GF.Infra.UseIO import qualified GF.Grammar.Abstract as A import qualified Data.Map as M +import qualified Data.Set as S -- Generate a treebank with a multilingual grammar. AR 8/2/2006 -- (c) Aarne Ranta 2006 under GNU GPL @@ -142,6 +144,9 @@ ret = [] -- return () assocsTreebank :: UniTreebank -> [(String,[String])] assocsTreebank = M.assocs +isWordInTreebank :: UniTreebank -> String -> Bool +isWordInTreebank tb w = S.member w (S.fromList (concatMap words (M.keys tb))) + printAssoc (s, ts) = s ++ concat ["\t" ++ t | t <- ts] getTreebanks :: [String] -> [(String,String,String)] |
