summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/GF/Shell.hs5
-rw-r--r--src/GF/Shell/HelpFile.hs11
-rw-r--r--src/GF/Shell/ShellCommands.hs2
-rw-r--r--src/GF/UseGrammar/Treebank.hs74
-rw-r--r--src/HelpFile11
5 files changed, 78 insertions, 25 deletions
diff --git a/src/GF/Shell.hs b/src/GF/Shell.hs
index 150b756c1..1c7f4527e 100644
--- a/src/GF/Shell.hs
+++ b/src/GF/Shell.hs
@@ -290,10 +290,13 @@ execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case com
_ -> Nothing
returnArg (ATrms $ generateTrees opts gro mt) sa
+ CTreeBank | oElem doCompute opts -> do -- -c
+ let bank = prCommandArg a
+ returnArg (AString $ unlines $ testTreebank opts st bank) sa
CTreeBank -> do
let ts = strees $ s2t $ snd sa
comm = "command" ----
- justOutput opts (mkTreebank opts st comm ts) sa
+ returnArg (AString $ unlines $ mkTreebank opts st comm ts) sa
CShowTreeGraph | oElem emitCode opts -> do -- -o
returnArg (AString $ visualizeTrees opts $ strees $ s2t a) sa
diff --git a/src/GF/Shell/HelpFile.hs b/src/GF/Shell/HelpFile.hs
index 14c87e621..5764e9f14 100644
--- a/src/GF/Shell/HelpFile.hs
+++ b/src/GF/Shell/HelpFile.hs
@@ -230,11 +230,14 @@ txtHelpFile =
"\n p -lang=Cncdecimal \"123\" | at num2bin | l -- convert dec to bin" ++
"\n" ++
"\ntb, tree_bank: tb" ++
- "\n Generate a multilingual treebank from a list of trees." ++
- "\n flags:" ++
- "\n -xml wrap the treebank with XML tags" ++
+ "\n Generate a multilingual treebank from a list of trees (default) or compare" ++
+ "\n to an existing treebank." ++
+ "\n options:" ++
+ "\n -c compare to existing xml-formatted treebank" ++
+ "\n -xml wrap the treebank (or comparison results) with XML tags" ++
"\n examples:" ++
- "\n gr -cat=S -number=100 | tb" ++
+ "\n gr -cat=S -number=100 | tb -xml | wf tb.xml -- random treebank into file" ++
+ "\n rf tb.txt | tb -c -- read comparison treebank from file" ++
"\n" ++
"\ntt, test_tokenizer: tt String" ++
"\n Show the token list sent to the parser when String is parsed." ++
diff --git a/src/GF/Shell/ShellCommands.hs b/src/GF/Shell/ShellCommands.hs
index 935b0be09..7d10ef882 100644
--- a/src/GF/Shell/ShellCommands.hs
+++ b/src/GF/Shell/ShellCommands.hs
@@ -183,7 +183,7 @@ optionsOfCommand co = case co of
CGenerateRandom -> both "cf prob" "cat lang number depth"
CGenerateTrees -> both "metas" "atoms depth alts cat lang number"
CPutTerm -> flags "transform number"
- CTreeBank -> opts "xml"
+ CTreeBank -> opts "c xml"
CWrapTerm _ -> opts "c"
CApplyTransfer _ -> flags "lang transfer"
CMorphoAnalyse -> both "short" "lang"
diff --git a/src/GF/UseGrammar/Treebank.hs b/src/GF/UseGrammar/Treebank.hs
index 838e23597..99f0da281 100644
--- a/src/GF/UseGrammar/Treebank.hs
+++ b/src/GF/UseGrammar/Treebank.hs
@@ -12,12 +12,13 @@
-- Purpose: to generate treebanks.
-----------------------------------------------------------------------------
-module GF.UseGrammar.Treebank (mkTreebank) where
+module GF.UseGrammar.Treebank (mkTreebank,testTreebank) where
-
-import GF.Compile.ShellState (ShellState,grammar2shellState,canModules,stateGrammarOfLang,abstract,grammar,firstStateGrammar,allLanguages,allCategories)
+import GF.Compile.ShellState
import GF.UseGrammar.Linear (linTree2string)
import GF.UseGrammar.Custom
+import GF.UseGrammar.GetTree (string2tree)
+import GF.Grammar.TypeCheck (annotate)
import GF.Canon.CMacros (noMark)
import GF.Grammar.Grammar (Trm)
import GF.Grammar.MMacros (exp2tree)
@@ -31,13 +32,14 @@ import qualified GF.Grammar.Abstract as A
-- Generate a treebank with a multilingual grammar. AR 8/2/2006
-- (c) Aarne Ranta 2006 under GNU GPL
--- | the main function
-mkTreebank :: Options -> ShellState -> String -> [A.Tree] -> IO ()
-mkTreebank opts sh com trees = putInXML opts "treebank" comm(mapM_ mkItem tris)
+-- | the main functions
+mkTreebank :: Options -> ShellState -> String -> [A.Tree] -> Res
+mkTreebank opts sh com trees = putInXML opts "treebank" comm (concatMap mkItem tris)
where
- mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t >>mapM_ (mkLin t) langs)
- mkTree t = putInXML opts "tree" [] (putStrLn $ showTree t)
- mkLin t lg = putInXML opts "lin" (lang lg) (putStrLn $ linearize sh lg t)
+ mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t ++ concatMap (mkLin t) langs)
+-- mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t >>mapM_ (mkLin t) langs)
+ mkTree t = putInXML opts "tree" [] (puts $ showTree t)
+ mkLin t lg = putInXML opts "lin" (lang lg) (puts $ linearize sh lg t)
langs = [prt_ l | l <- allLanguages sh]
comm = "" --- " command=" ++ show com +++ "abstract=" ++ show abstr
@@ -46,14 +48,56 @@ mkTreebank opts sh com trees = putInXML opts "treebank" comm(mapM_ mkItem tris)
lang lg = " lang=" ++ show (prt_ (zIdent lg))
tris = zip trees [1..]
+testTreebank :: Options -> ShellState -> String -> Res
+testTreebank opts sh = putInXML opts "diff" [] . concatMap testOne . getTreebank . lines
+ where
+ testOne (e,lang,str) = do
+ let tr = annot gr e
+ let str0 = linearize sh lang tr
+ if str == str0 then ret else putInXML opts "diff" [] $ do
+ putInXML opts "tree" [] (puts $ showTree tr)
+ putInXML opts "old" (" lang=" ++ show (prt_ (zIdent lang))) $ puts str0
+ putInXML opts "new" (" lang=" ++ show (prt_ (zIdent lang))) $ puts str
+ gr = firstStateGrammar sh
+
+-- string vs. IO
+type Res = [String] -- IO ()
+puts :: String -> Res
+puts = return -- putStrLn
+ret = [] -- return ()
+--
-putInXML :: Options -> String -> String -> IO () -> IO ()
-putInXML opts tag attrs io = do
- ifXML $ putStrLn $ tagXML $ tag ++ attrs
- io
- ifXML $ putStrLn $ tagXML $ '/':tag
+getTreebank :: [String] -> [(String,String,String)]
+getTreebank ll = case ll of
+ [] -> []
+ l:ls ->
+ let (l1,l2) = getItem ls
+ (tr,lins) = getTree l1
+ lglins = getLins lins
+ in [(tr,lang,str) | (lang,str) <- lglins] ++ getTreebank l2
where
- ifXML c = if oElem showXML opts then c else return ()
+ getItem = span ((/="</item") . take 6)
+
+ getTree (_:ss) = let (t1,t2) = span ((/="</tree") . take 6) ss in (last t1, drop 1 t2)
+
+ getLins (beg:str:end:ss) = (getLang beg, str):getLins ss
+ getLins _ = []
+
+ getLang = takeWhile (/='"') . tail . dropWhile (/='"')
+
+annot :: StateGrammar -> String -> A.Tree
+annot gr s = errVal (error "illegal tree") $ do
+ let t = tree2exp $ string2tree gr s
+ annotate (grammar gr) t
+
+putInXML :: Options -> String -> String -> Res -> Res
+putInXML opts tag attrs io =
+ (ifXML $ puts $ tagXML $ tag ++ attrs) ++
+ io ++
+ (ifXML $ puts $ tagXML $ '/':tag)
+ where
+ ifXML c = if oElem showXML opts then c else []
+
tagXML :: String -> String
tagXML s = "<" ++ s ++ ">"
diff --git a/src/HelpFile b/src/HelpFile
index 8b54e5585..0f6a12d8b 100644
--- a/src/HelpFile
+++ b/src/HelpFile
@@ -201,11 +201,14 @@ at, apply_transfer: at (Module.Fun | Fun)
p -lang=Cncdecimal "123" | at num2bin | l -- convert dec to bin
tb, tree_bank: tb
- Generate a multilingual treebank from a list of trees.
- flags:
- -xml wrap the treebank with XML tags
+ Generate a multilingual treebank from a list of trees (default) or compare
+ to an existing treebank.
+ options:
+ -c compare to existing xml-formatted treebank
+ -xml wrap the treebank (or comparison results) with XML tags
examples:
- gr -cat=S -number=100 | tb
+ gr -cat=S -number=100 | tb -xml | wf tb.xml -- random treebank into file
+ rf tb.txt | tb -c -- read comparison treebank from file
tt, test_tokenizer: tt String
Show the token list sent to the parser when String is parsed.