1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
|
----------------------------------------------------------------------
-- |
-- Module : Treebank
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- Generate multilingual treebanks. AR 8\/2\/2006
--
-- (c) Aarne Ranta 2006 under GNU GPL
--
-- Purpose: to generate treebanks.
-----------------------------------------------------------------------------
module GF.UseGrammar.Treebank (mkTreebank,testTreebank) where
import GF.Compile.ShellState
import GF.UseGrammar.Linear (linTree2string)
import GF.UseGrammar.Custom
import GF.UseGrammar.GetTree (string2tree)
import GF.Grammar.TypeCheck (annotate)
import GF.Canon.CMacros (noMark)
import GF.Grammar.Grammar (Trm)
import GF.Grammar.MMacros (exp2tree)
import GF.Grammar.Macros (zIdent)
import GF.Grammar.PrGrammar (prt_)
import GF.Grammar.Values (tree2exp)
import GF.Data.Operations
import GF.Infra.Option
import qualified GF.Grammar.Abstract as A
-- Generate a treebank with a multilingual grammar. AR 8/2/2006
-- (c) Aarne Ranta 2006 under GNU GPL
-- | the main functions
mkTreebank :: Options -> ShellState -> String -> [A.Tree] -> Res
mkTreebank opts sh com trees = putInXML opts "treebank" comm (concatMap mkItem tris)
where
mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t ++ concatMap (mkLin t) langs)
-- mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t >>mapM_ (mkLin t) langs)
mkTree t = putInXML opts "tree" [] (puts $ showTree t)
mkLin t lg = putInXML opts "lin" (lang lg) (puts $ linearize sh lg t)
langs = [prt_ l | l <- allLanguages sh]
comm = "" --- " command=" ++ show com +++ "abstract=" ++ show abstr
abstr = "" --- "Abs" ----
cat i = " number=" ++ show (show i) --- " cat=" ++ show "S" ----
lang lg = " lang=" ++ show (prt_ (zIdent lg))
tris = zip trees [1..]
testTreebank :: Options -> ShellState -> String -> Res
testTreebank opts sh = putInXML opts "testtreebank" [] . concatMap testOne . getTreebank . lines
where
testOne (e,lang,str0) = do
let tr = annot gr e
let str = linearize sh lang tr
if str == str0 then ret else putInXML opts "diff" [] $ concat [
putInXML opts "tree" [] (puts $ showTree tr),
putInXML opts "old" (" lang=" ++ show (prt_ (zIdent lang))) $ puts str0,
putInXML opts "new" (" lang=" ++ show (prt_ (zIdent lang))) $ puts str
]
gr = firstStateGrammar sh
-- string vs. IO
type Res = [String] -- IO ()
puts :: String -> Res
puts = return -- putStrLn
ret = [] -- return ()
--
getTreebank :: [String] -> [(String,String,String)]
getTreebank ll = case ll of
[] -> []
l:ls ->
let (l1,l2) = getItem ls
(tr,lins) = getTree l1
lglins = getLins lins
in [(tr,lang,str) | (lang,str) <- lglins] ++ getTreebank l2
where
getItem = span ((/="</item") . take 6)
getTree (_:ss) = let (t1,t2) = span ((/="</tree") . take 6) ss in (last t1, drop 1 t2)
getLins (beg:str:end:ss) = (getLang beg, str):getLins ss
getLins _ = []
getLang = takeWhile (/='"') . tail . dropWhile (/='"')
annot :: StateGrammar -> String -> A.Tree
annot gr s = errVal (error "illegal tree") $ do
let t = tree2exp $ string2tree gr s
annotate (grammar gr) t
putInXML :: Options -> String -> String -> Res -> Res
putInXML opts tag attrs io =
(ifXML $ puts $ tagXML $ tag ++ attrs) ++
io ++
(ifXML $ puts $ tagXML $ '/':tag)
where
ifXML c = if oElem showXML opts then c else []
tagXML :: String -> String
tagXML s = "<" ++ s ++ ">"
--- these handy functions are borrowed from EmbedAPI
linearize mgr lang =
untok .
linTree2string noMark (canModules mgr) (zIdent lang)
where
sgr = stateGrammarOfLangOpt False mgr (zIdent lang)
untok = customOrDefault (stateOptions sgr) useUntokenizer customUntokenizer sgr
showTree t = prt_ $ tree2exp t
|