summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2006-02-08 10:18:00 +0000
committeraarne <aarne@cs.chalmers.se>2006-02-08 10:18:00 +0000
commit98b24d0e33e2cb90244773f8ad7d3c4f3f73ecd0 (patch)
tree362fc849521607bb1bb5761b50f3f17ec317ae26 /src
parent85910ef7766d70e9bb0674eb9f29ae4b05f98267 (diff)
command tb for creating treebanks
Diffstat (limited to 'src')
-rw-r--r--src/GF/Compile/ShellState.hs2
-rw-r--r--src/GF/Shell.hs6
-rw-r--r--src/GF/Shell/HelpFile.hs7
-rw-r--r--src/GF/Shell/PShell.hs1
-rw-r--r--src/GF/Shell/ShellCommands.hs2
-rw-r--r--src/GF/UseGrammar/Treebank.hs69
-rw-r--r--src/HelpFile7
7 files changed, 93 insertions, 1 deletions
diff --git a/src/GF/Compile/ShellState.hs b/src/GF/Compile/ShellState.hs
index 1c1d8556e..6c281a926 100644
--- a/src/GF/Compile/ShellState.hs
+++ b/src/GF/Compile/ShellState.hs
@@ -405,7 +405,7 @@ allActiveGrammars :: ShellState -> [StateGrammar]
globalOptions = gloptions
--allLanguages = map (fst . fst) . concretes
-allLanguages = M.allConcreteModules . canModules
+allLanguages = map (snd . fst) . actualConcretes
allTransfers = map fst . transfers
allCategories = map fst . allCatsOf . canModules
diff --git a/src/GF/Shell.hs b/src/GF/Shell.hs
index 36dfc5b14..150b756c1 100644
--- a/src/GF/Shell.hs
+++ b/src/GF/Shell.hs
@@ -27,6 +27,7 @@ import qualified GF.Canon.CMacros as CMacros
import qualified GF.Compile.GrammarToCanon as GrammarToCanon
import GF.Grammar.Values
import GF.UseGrammar.GetTree
+import GF.UseGrammar.Treebank
import GF.Shell.ShellCommands
@@ -289,6 +290,11 @@ execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case com
_ -> Nothing
returnArg (ATrms $ generateTrees opts gro mt) sa
+ CTreeBank -> do
+ let ts = strees $ s2t $ snd sa
+ comm = "command" ----
+ justOutput opts (mkTreebank opts st comm ts) sa
+
CShowTreeGraph | oElem emitCode opts -> do -- -o
returnArg (AString $ visualizeTrees opts $ strees $ s2t a) sa
CShowTreeGraph -> do
diff --git a/src/GF/Shell/HelpFile.hs b/src/GF/Shell/HelpFile.hs
index dd03d2515..14c87e621 100644
--- a/src/GF/Shell/HelpFile.hs
+++ b/src/GF/Shell/HelpFile.hs
@@ -229,6 +229,13 @@ txtHelpFile =
"\n examples:" ++
"\n p -lang=Cncdecimal \"123\" | at num2bin | l -- convert dec to bin" ++
"\n" ++
+ "\ntb, tree_bank: tb" ++
+ "\n Generate a multilingual treebank from a list of trees." ++
+ "\n flags:" ++
+ "\n -xml wrap the treebank with XML tags" ++
+ "\n examples:" ++
+ "\n gr -cat=S -number=100 | tb" ++
+ "\n" ++
"\ntt, test_tokenizer: tt String" ++
"\n Show the token list sent to the parser when String is parsed." ++
"\n HINT: can be useful when debugging the parser." ++
diff --git a/src/GF/Shell/PShell.hs b/src/GF/Shell/PShell.hs
index aba743503..676e54c46 100644
--- a/src/GF/Shell/PShell.hs
+++ b/src/GF/Shell/PShell.hs
@@ -113,6 +113,7 @@ pCommand ws = case ws of
"tt" : s -> aString CTestTokenizer s
"cc" : s -> aUnit $ CComputeConcrete $ unwords s
"so" : s -> aUnit $ CShowOpers $ unwords s
+ "tb" : [] -> aUnit CTreeBank
"tq" : i:o:[] -> aUnit (CTranslationQuiz (language i) (language o))
"tl":i:o:[] -> aUnit (CTranslationList (language i) (language o))
diff --git a/src/GF/Shell/ShellCommands.hs b/src/GF/Shell/ShellCommands.hs
index 48eac25a5..935b0be09 100644
--- a/src/GF/Shell/ShellCommands.hs
+++ b/src/GF/Shell/ShellCommands.hs
@@ -48,6 +48,7 @@ data Command =
| CTranslate Language Language
| CGenerateRandom
| CGenerateTrees
+ | CTreeBank
| CPutTerm
| CWrapTerm I.Ident
| CApplyTransfer (Maybe I.Ident, I.Ident)
@@ -182,6 +183,7 @@ optionsOfCommand co = case co of
CGenerateRandom -> both "cf prob" "cat lang number depth"
CGenerateTrees -> both "metas" "atoms depth alts cat lang number"
CPutTerm -> flags "transform number"
+ CTreeBank -> opts "xml"
CWrapTerm _ -> opts "c"
CApplyTransfer _ -> flags "lang transfer"
CMorphoAnalyse -> both "short" "lang"
diff --git a/src/GF/UseGrammar/Treebank.hs b/src/GF/UseGrammar/Treebank.hs
new file mode 100644
index 000000000..667c323f6
--- /dev/null
+++ b/src/GF/UseGrammar/Treebank.hs
@@ -0,0 +1,69 @@
+----------------------------------------------------------------------
+-- |
+-- Module : Treebank
+-- Maintainer : AR
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- Generate multilingual treebanks. AR 8\/2\/2006
+--
+-- (c) Aarne Ranta 2006 under GNU GPL
+--
+-- Purpose: to generate treebanks.
+-----------------------------------------------------------------------------
+
+module GF.UseGrammar.Treebank (mkTreebank) where
+
+
+import GF.Compile.ShellState (ShellState,grammar2shellState,canModules,stateGrammarOfLang,abstract,grammar,firstStateGrammar,allLanguages,allCategories)
+import GF.UseGrammar.Linear (linTree2string)
+import GF.UseGrammar.Custom
+import GF.Canon.CMacros (noMark)
+import GF.Grammar.Grammar (Trm)
+import GF.Grammar.MMacros (exp2tree)
+import GF.Grammar.Macros (zIdent)
+import GF.Grammar.PrGrammar (prt_)
+import GF.Grammar.Values (tree2exp)
+import GF.Data.Operations
+import GF.Infra.Option
+import qualified GF.Grammar.Abstract as A
+
+-- Generate a treebank with a multilingual grammar. AR 8/2/2006
+-- (c) Aarne Ranta 2006 under GNU GPL
+
+-- | the main function
+mkTreebank :: Options -> ShellState -> String -> [A.Tree] -> IO ()
+mkTreebank opts sh com trees = putInXML opts "treebank" comm(mapM_ mkItem trees)
+ where
+ mkItem t = putInXML opts "item" cat (mkTree t >>mapM_ (mkLin t) langs)
+ mkTree t = putInXML opts "tree" [] (putStrLn $ showTree t)
+ mkLin t lg = putInXML opts "lin" (lang lg) (putStrLn $ linearize sh lg t)
+
+ langs = [prt_ l | l <- allLanguages sh]
+ comm = "" --- " command=" ++ show com +++ "abstract=" ++ show abstr
+ abstr = "" --- "Abs" ----
+ cat = "" --- " cat=" ++ show "S" ----
+ lang lg = " lang=" ++ show (prt_ (zIdent lg))
+
+
+putInXML :: Options -> String -> String -> IO () -> IO ()
+putInXML opts tag attrs io = do
+ ifXML $ putStrLn $ tagXML $ tag ++ attrs
+ io
+ ifXML $ putStrLn $ tagXML $ '/':tag
+ where
+ ifXML c = if oElem showXML opts then c else return ()
+
+tagXML :: String -> String
+tagXML s = "<" ++ s ++ ">"
+
+--- these handy functions are borrowed from EmbedAPI
+
+linearize mgr lang =
+ untok .
+ linTree2string noMark (canModules mgr) (zIdent lang)
+ where
+ sgr = stateGrammarOfLang mgr (zIdent lang)
+ untok = customOrDefault noOptions useUntokenizer customUntokenizer sgr
+
+showTree t = prt_ $ tree2exp t
diff --git a/src/HelpFile b/src/HelpFile
index 0ff04b25b..8b54e5585 100644
--- a/src/HelpFile
+++ b/src/HelpFile
@@ -200,6 +200,13 @@ at, apply_transfer: at (Module.Fun | Fun)
examples:
p -lang=Cncdecimal "123" | at num2bin | l -- convert dec to bin
+tb, tree_bank: tb
+ Generate a multilingual treebank from a list of trees.
+ flags:
+ -xml wrap the treebank with XML tags
+ examples:
+ gr -cat=S -number=100 | tb
+
tt, test_tokenizer: tt String
Show the token list sent to the parser when String is parsed.
HINT: can be useful when debugging the parser.