1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
|
----------------------------------------------------------------------
-- |
-- Module : Treebank
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- Generate multilingual treebanks. AR 8\/2\/2006
--
-- (c) Aarne Ranta 2006 under GNU GPL
--
-- Purpose: to generate treebanks.
-----------------------------------------------------------------------------
module GF.UseGrammar.Treebank (
mkTreebank,
testTreebank,
treesTreebank,
getTreebank,
lookupTreebank,
pre2treebank
) where
import GF.Compile.ShellState
import GF.UseGrammar.Linear (linTree2string)
import GF.UseGrammar.Custom
import GF.UseGrammar.GetTree (string2tree)
import GF.Grammar.TypeCheck (annotate)
import GF.Canon.CMacros (noMark)
import GF.Grammar.Grammar (Trm)
import GF.Grammar.MMacros (exp2tree)
import GF.Grammar.Macros (zIdent)
import GF.Grammar.PrGrammar (prt_)
import GF.Grammar.Values (tree2exp)
import GF.Data.Operations
import GF.Infra.Option
import qualified GF.Grammar.Abstract as A
import qualified Data.Map as M
-- Generate a treebank with a multilingual grammar. AR 8/2/2006
-- (c) Aarne Ranta 2006 under GNU GPL
-- | the main functions
mkTreebank :: Options -> ShellState -> String -> [A.Tree] -> Res
mkTreebank opts sh com trees = putInXML opts "treebank" comm (concatMap mkItem tris)
where
mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t ++ concatMap (mkLin t) langs)
-- mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t >>mapM_ (mkLin t) langs)
mkTree t = putInXML opts "tree" [] (puts $ showTree t)
mkLin t lg = putInXML opts "lin" (lang lg) (puts $ linearize sh lg t)
langs = [prt_ l | l <- allLanguages sh]
comm = "" --- " command=" ++ show com +++ "abstract=" ++ show abstr
abstr = "" --- "Abs" ----
cat i = " number=" ++ show (show i) --- " cat=" ++ show "S" ----
lang lg = " lang=" ++ show (prt_ (zIdent lg))
tris = zip trees [1..]
testTreebank :: Options -> ShellState -> String -> Res
testTreebank opts sh = putInXML opts "testtreebank" [] .
concatMap testOne .
getTreebanks . lines
where
testOne (e,lang,str0) = do
let tr = annot gr e
let str = linearize sh lang tr
if str == str0 then ret else putInXML opts "diff" [] $ concat [
putInXML opts "tree" [] (puts $ showTree tr),
putInXML opts "old" (" lang=" ++ show (prt_ (zIdent lang))) $ puts str0,
putInXML opts "new" (" lang=" ++ show (prt_ (zIdent lang))) $ puts str
]
gr = firstStateGrammar sh
treesTreebank :: Options -> String -> [String]
treesTreebank _ = terms . getTreebank . lines where
terms ts = [t | (t,_) <- ts]
-- string vs. IO
type Res = [String] -- IO ()
puts :: String -> Res
puts = return -- putStrLn
ret = [] -- return ()
--
type PreTreebank = [(String,[(String,String)])]
getTreebanks :: [String] -> [(String,String,String)]
getTreebanks = concatMap grps . getTreebank where
grps (t,lls) = [(t,x,y) | (x,y) <- lls]
getTreebank :: [String] -> PreTreebank
getTreebank ll = case ll of
l:ls@(_:_:_) ->
let (l1,l2) = getItem ls
(tr,lins) = getTree l1
lglins = getLins lins
in (tr,lglins) : getTreebank l2
_ -> []
where
getItem = span ((/="</item") . take 6)
getTree (_:ss) =
let (t1,t2) = span ((/="</tree") . take 6) ss in (last t1, drop 1 t2)
getLins (beg:str:end:ss) = (getLang beg, str):getLins ss
getLins _ = []
getLang = takeWhile (/='"') . tail . dropWhile (/='"')
lookupTreebank :: Treebank -> String -> [(String,String)]
lookupTreebank tb s = maybe [] id $ M.lookup s tb
pre2treebank :: PreTreebank -> Treebank
pre2treebank tb = M.fromListWith (++) [(s,[(l,t)]) | (t,ls) <- tb, (l,s) <- ls]
annot :: StateGrammar -> String -> A.Tree
annot gr s = errVal (error "illegal tree") $ do
let t = tree2exp $ string2tree gr s
annotate (grammar gr) t
putInXML :: Options -> String -> String -> Res -> Res
putInXML opts tag attrs io =
(ifXML $ puts $ tagXML $ tag ++ attrs) ++
io ++
(ifXML $ puts $ tagXML $ '/':tag)
where
ifXML c = if oElem showXML opts then c else []
tagXML :: String -> String
tagXML s = "<" ++ s ++ ">"
--- these handy functions are borrowed from EmbedAPI
linearize mgr lang =
untok .
linTree2string noMark (canModules mgr) (zIdent lang)
where
sgr = stateGrammarOfLangOpt False mgr (zIdent lang)
untok = customOrDefault (stateOptions sgr) useUntokenizer customUntokenizer sgr
showTree t = prt_ $ tree2exp t
|