1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
|
----------------------------------------------------------------------
-- |
-- Module : VisualizeTree
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date:
-- > CVS $Author:
-- > CVS $Revision:
--
-- Print a graph of an abstract syntax tree in Graphviz DOT format
-- Based on BB's VisualizeGrammar
-- FIXME: change this to use GF.Visualization.Graphviz,
-- instead of rolling its own.
-----------------------------------------------------------------------------
module PGF.VisualizeTree
( graphvizAbstractTree
, graphvizParseTree
, graphvizDependencyTree
, graphvizBracketedString
, graphvizAlignment
, getDepLabels
) where
import PGF.CId (CId,showCId,ppCId,pCId,mkCId)
import PGF.Data
import PGF.Expr (showExpr, Tree)
import PGF.Linearize
import PGF.Macros (lookValCat, lookMap,
BracketedString(..), BracketedTokn(..), flattenBracketedString)
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import Data.List (intersperse,nub,isPrefixOf,sort,sortBy)
import Data.Char (isDigit)
import Data.Maybe (fromMaybe)
import Text.PrettyPrint
import Data.Array.IArray
import Control.Monad
import qualified Data.Set as Set
import qualified Text.ParserCombinators.ReadP as RP
-- | Renders abstract syntax tree in Graphviz format
graphvizAbstractTree :: PGF -> (Bool,Bool) -> Tree -> String
graphvizAbstractTree pgf (funs,cats) = render . tree2graph
where
tree2graph t =
text "graph {" $$
ppGraph [] [] 0 t $$
text "}"
getAbs xs (EAbs _ x e) = getAbs (x:xs) e
getAbs xs (ETyped e _) = getAbs xs e
getAbs xs e = (xs,e)
getApp (EApp x y) es = getApp x (y:es)
getApp (ETyped e _) es = getApp e es
getApp e es = (e,es)
getLbl scope (EFun f) = let fun = if funs then ppCId f else empty
cat = if cats then ppCId (lookValCat pgf f) else empty
sep = if funs && cats then colon else empty
in fun <+> sep <+> cat
getLbl scope (ELit l) = text (escapeStr (render (ppLit l)))
getLbl scope (EMeta i) = ppMeta i
getLbl scope (EVar i) = ppCId (scope !! i)
getLbl scope (ETyped e _) = getLbl scope e
getLbl scope (EImplArg e) = getLbl scope e
ppGraph scope ps i e0 =
let (xs, e1) = getAbs [] e0
(e2,args) = getApp e1 []
binds = if null xs
then empty
else text "\\\\" <> hcat (punctuate comma (map ppCId xs)) <+> text "->"
(lbl,eargs) = case e2 of
EAbs _ _ _ -> (char '@', e2:args) -- eta-redexes are rendered with artificial "@" node
_ -> (getLbl scope' e2, args)
scope' = xs ++ scope
in ppNode (i:ps) <> text "[label =" <+> doubleQuotes (binds <+> lbl) <> text ", style = \"solid\", shape = \"plaintext\"] ;" $$
(if null ps
then empty
else ppNode ps <+> text "--" <+> ppNode (i:ps) <+> text "[style = \"solid\"];") $$
vcat (zipWith (ppGraph scope' (i:ps)) [0..] eargs)
ppNode ps = char 'n' <> hcat (punctuate (char '_') (map int ps))
escapeStr [] = []
escapeStr ('\\':cs) = '\\':'\\':escapeStr cs
escapeStr ('"' :cs) = '\\':'"' :escapeStr cs
escapeStr (c :cs) = c :escapeStr cs
type Labels = Map.Map CId [String]
{- This is an attempt to build the dependency tree from the bracketed string.
Unfortunately it doesn't quite work. See the actual implementation at
the end of this module.
graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Tree -> String
graphvizDependencyTree format debug mlab ms pgf lang t = render $
case format of
"malt" -> vcat (map (hcat . intersperse (char '\t') ) wnodes)
"malt_input" -> vcat (map (hcat . intersperse (char '\t') . take 6) wnodes)
_ -> text "digraph {" $$
space $$
nest 2 (text "rankdir=LR ;" $$
text "node [shape = plaintext] ;" $$
vcat nodes $$
vcat links) $$
text "}"
where
nodes = map mkNode leaves
links = map mkLink [(fid, fromMaybe nil (lookup fid deps)) | (fid,_,w) <- tail leaves]
wnodes = undefined
nil = -1
bs = bracketedLinearize pgf lang t
leaves = (nil,0,"ROOT") : (groupAndIndexIt 1 . getLeaves nil) bs
deps = getDeps nil [bs]
groupAndIndexIt id [] = []
groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws
in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1
where
collect pws@((p1,w):pws1)
| p == p1 = let (ws,pws2) = collect pws1
in (w:ws,pws2)
collect pws = ([],pws)
getLeaves parent bs =
case bs of
Leaf w -> [(parent,w)]
Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss
getDeps out_head bss =
case selectHead (children bss) of
Just ((head, bss'), deps) -> concat (descend out_head head bss' : [descend (headOf head bss') fid bss | (fid,bss) <- IntMap.toList deps])
Nothing -> []
where
descend head fid bss = (fid,head) : getDeps head bss
headOf head bss
| null [() | Leaf _ <- bss] =
case selectHead (children bss) of
Just ((head, bss), deps) -> headOf head bss
Nothing -> head
| otherwise = head
children bss = IntMap.fromListWith (++) [(fid,bss) | Bracket _ fid _ _ bss <- bss]
selectHead children = IntMap.maxViewWithKey children
mkNode (p,i,w) =
tag p <+> brackets (text "label = " <> doubleQuotes (int i <> char '.' <+> text w)) <+> semi
mkLink (x,y) = tag y <+> text "->" <+> tag x -- ++ " [label = \"" ++ l ++ "\"] ;"
-}
getDepLabels :: [String] -> Labels
getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss]
graphvizParseTree :: PGF -> Language -> Tree -> String
graphvizParseTree pgf lang = graphvizBracketedString . bracketedLinearize pgf lang
graphvizBracketedString :: BracketedString -> String
graphvizBracketedString = render . lin2tree
where
lin2tree bs =
text "graph {" $$
space $$
nest 2 (text "rankdir=BU ;" $$
text "node [shape = record, color = white] ;" $$
space $$
vcat (nodes bs)) $$
text "}"
where
nodes bs = zipWith mkStruct [0..] (interns ++ [zipWith (\i (l,p,w) -> (l,p,i,w)) [99990..] leaves])
nil = -1
leaves = getLeaves 0 nil bs
interns = getInterns 0 [(nil,bs)]
getLeaves level parent bs =
case bs of
Leaf w -> [(level-1,parent,w)]
Bracket _ fid i _ bss -> concatMap (getLeaves (level+1) fid) bss
getInterns level [] = []
getInterns level nodes =
nub [(level-1,parent,fid,showCId cat) | (parent,Bracket cat fid _ _ _) <- nodes] :
getInterns (level+1) [(fid,child) | (_,Bracket _ fid _ _ children) <- nodes, child <- children]
mkStruct l cs = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$
vcat [link pl pid l id | (pl,pid,id,_) <- cs]
link pl pid l id
| pl < 0 = empty
| otherwise = struct pl <> colon <> tag pid <> colon <> char 's' <+>
text "--" <+>
struct l <> colon <> tag id <> colon <> char 'n' <+> semi
fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text c | (_,_,id,c) <- cs])
graphvizAlignment :: PGF -> [Language] -> Expr -> String
graphvizAlignment pgf langs = render . lin2graph . linsBracketed
where
linsBracketed t = [bracketedLinearize pgf lang t | lang <- langs]
lin2graph :: [BracketedString] -> Doc
lin2graph bss =
text "digraph {" $$
space $$
nest 2 (text "rankdir=LR ;" $$
text "node [shape = record] ;" $$
space $$
mkLayers 0 leaves) $$
text "}"
where
nil = -1
leaves = map (groupAndIndexIt 0 . getLeaves nil) bss
groupAndIndexIt id [] = []
groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws
in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1
where
collect pws@((p1,w):pws1)
| p == p1 = let (ws,pws2) = collect pws1
in (w:ws,pws2)
collect pws = ([],pws)
getLeaves parent bs =
case bs of
Leaf w -> [(parent,w)]
Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss
mkLayers l [] = empty
mkLayers l (cs:css) = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$
(case css of
(ncs:_) -> vcat (map (mkLinks l ncs) cs)
_ -> empty) $$
mkLayers (l+1) css
mkLinks l cs (p0,id0,_) =
vcat (map (\id1 -> struct l <> colon <> tag id0 <> colon <> char 'e' <+>
text "->" <+>
struct (l+1) <> colon <> tag id1 <> colon <> char 'w' <+> semi) indices)
where
indices = [id1 | (p1,id1,_) <- cs, p1 == p0]
fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text w | (_,id,w) <- cs])
-- auxiliaries for graphviz syntax
struct l = text ("struct" ++ show l)
tbrackets d = char '<' <> d <> char '>'
tag i
| i < 0 = char 'r' <> int (negate i)
| otherwise = char 'n' <> int i
--------------------------------------------------------------------
-- The linearization code bellow is needed just in order to
-- produce the dependency tree. Unfortunately the bracketed string
-- doesn't give us an easy way to find which part of the string
-- corresponds to which argument of the parent function.
--
-- Uuuuugly!!! I hope that this code will be removed one day.
type LinTable = Array LIndex [BracketedTokn]
linTree :: PGF -> Language -> (Maybe CId -> [Int] -> LinTable -> LinTable) -> Expr -> [LinTable]
linTree pgf lang mark e = lin0 [] [] [] Nothing e
where
cnc = lookMap (error "no lang") lang (concretes pgf)
lp = lproductions cnc
lin0 path xs ys mb_fid (EAbs _ x e) = lin0 path (showCId x:xs) ys mb_fid e
lin0 path xs ys mb_fid (ETyped e _) = lin0 path xs ys mb_fid e
lin0 path xs ys mb_fid e = lin path ys mb_fid e []
lin path xs mb_fid (EApp e1 e2) es = lin path xs mb_fid e1 (e2:es)
lin path xs mb_fid (ELit l) [] = case l of
LStr s -> return (mark Nothing path (ss s))
LInt n -> return (mark Nothing path (ss (show n)))
LFlt f -> return (mark Nothing path (ss (show f)))
lin path xs mb_fid (EFun f) es = map (mark (Just f) path) (apply path xs mb_fid f es)
lin path xs mb_fid (ETyped e _) es = lin path xs mb_fid e es
lin path xs mb_fid (EImplArg e) es = lin path xs mb_fid e es
ss s = listArray (0,0) [[LeafKS [s]]]
apply path xs mb_fid f es =
case Map.lookup f lp of
Just prods -> case lookupProds mb_fid prods of
Just set -> do prod <- Set.toList set
case prod of
PApply funid fids -> do guard (length fids == length es)
args <- sequence (zipWith3 (\i (PArg _ fid) e -> lin0 (sub i path) [] xs (Just fid) e) [0..] fids es)
let (CncFun _ lins) = cncfuns cnc ! funid
return (listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins])
PCoerce fid -> apply path xs (Just fid) f es
Nothing -> mzero
where
lookupProds (Just fid) prods = IntMap.lookup fid prods
lookupProds Nothing prods = Just (Set.filter isApp (Set.unions (IntMap.elems prods)))
sub i path = i:path
isApp (PApply _ _) = True
isApp _ = False
computeSeq seqid args = concatMap compute (elems seq)
where
seq = sequences cnc ! seqid
compute (SymCat d r) = (args !! d) ! r
compute (SymLit d r) = (args !! d) ! r
compute (SymKS ts) = [LeafKS ts]
compute (SymKP ts alts) = [LeafKP ts alts]
untokn :: [BracketedTokn] -> [String]
untokn ts = case ts of
LeafKP d _ : [] -> d
LeafKP d vs : ws -> let ss@(s:_) = untokn ws in sel d vs s ++ ss
LeafKS s : ws -> s ++ untokn ws
[] -> []
where
sel d vs w = case [v | Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of
v:_ -> v
_ -> d
-- show bracketed markup with references to tree structure
markLinearizes :: PGF -> CId -> Expr -> [String]
markLinearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang mark
where
mark mb_f path lint = amap (bracket mb_f path) lint
bracket Nothing path ts = [LeafKS ["("++show (reverse path)]] ++ ts ++ [LeafKS [")"]]
bracket (Just f) path ts = [LeafKS ["(("++showCId f++","++show (reverse path)++")"]] ++ ts ++ [LeafKS [")"]]
graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Expr -> String
graphvizDependencyTree format debug mlab ms pgf lang tr = case format of
"malt" -> unlines (lin2dep format)
"malt_input" -> unlines (lin2dep format)
_ -> concat $ map (++"\n") $ ["digraph {\n"] ++ lin2dep format ++ ["}"]
where
lin2dep format = -- trace (ifd (show sortedNodes ++ show nodeWords)) $
case format of
"malt" -> map (concat . intersperse "\t") wnodes
"malt_input" -> map (concat . intersperse "\t" . take 6) wnodes
_ -> prelude ++ nodes ++ links
ifd s = if debug then s else []
pot = readPosText $ concat $ take 1 $ markLinearizes pgf lang tr
---- use Just str if you have str to match against
prelude = ["rankdir=LR ;", "node [shape = plaintext] ;"]
nodes = map mkNode nodeWords
mkNode (i,((_,p),ss)) =
node p ++ " [label = \"" ++ show i ++ ". " ++ ifd (show p) ++ unwords ss ++ "\"] ;"
nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)|
((Just f,p),w) <- wlins pot]
links = map mkLink thelinks
thelinks = [(word y, x, label tr y x) |
(_,((f,x),_)) <- tail nodeWords,
let y = dominant x]
mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;"
node = show . show
dominant x = case x of
[] -> x
_ | not (x == hx) -> hx
_ -> dominant (init x)
where
hx = headArg (init x) tr x
headArg x0 tr x = case (unApp tr,x) of
(Just (f,[]),[_]) -> x0 ---- ??
(Just (f,ts),[_]) -> x0 ++ [getHead (length ts - 1) f]
(Just (f,ts),i:y) -> headArg x0 (ts !! i) y
_ -> x0 ----
label tr y x = case span (uncurry (==)) (zip y x) of
(xys,(_,i):_) -> getLabel i (funAt tr (map fst xys))
_ -> "" ----
funAt tr x = case (unApp tr,x) of
(Just (f,_) ,[]) -> f
(Just (f,ts),i:y) -> funAt (ts !! i) y
_ -> mkCId (render (ppExpr 0 [] tr)) ----
word x = if elem x sortedNodes then x else
let x' = headArg x tr (x ++[0]) in
if x' == x then [] else word x'
sortedNodes = [p | (_,((_,p),_)) <- nodeWords]
labels = maybe Map.empty id mlab
getHead i f = case Map.lookup f labels of
Just ls -> length $ takeWhile (/= "head") ls
_ -> i
getLabel i f = case Map.lookup f labels of
Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i
_ -> showCId f ++ "#" ++ show i
-- to generate CoNLL format for MaltParser
nodeMap :: Map.Map [Int] Int
nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords]
arcMap :: Map.Map [Int] ([Int],String)
arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks]
lookDomLab p = case Map.lookup p arcMap of
Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then rootlabel else l)
_ -> (0,rootlabel)
wnodes = [[show i, maltws ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] |
(i, ((fun,p),ws)) <- tail nodeWords,
let pos = showCId $ lookValCat pgf fun,
let morph = unspec,
let (dom,lab) = lookDomLab p
]
maltws = concat . intersperse "+" . words . unwords -- no spaces in column 2
unspec = "_"
rootlabel = "ROOT"
wlins :: PosText -> [((Maybe CId,[Int]),[String])]
wlins pt = case pt of
T p pts -> concatMap (lins p) pts
M ws -> if null ws then [] else [((Nothing,[]),ws)]
where
lins p pt = case pt of
T q pts -> concatMap (lins q) pts
M ws -> if null ws then [] else [(p,ws)]
data PosText =
T (Maybe CId,[Int]) [PosText]
| M [String]
deriving Show
readPosText :: String -> PosText
readPosText = fst . head . (RP.readP_to_S pPosText) where
pPosText = do
RP.char '(' >> RP.skipSpaces
p <- pPos
RP.skipSpaces
ts <- RP.many pPosText
RP.char ')' >> RP.skipSpaces
return (T p ts)
RP.<++ do
ws <- RP.sepBy1 (RP.munch1 (flip notElem "()")) (RP.char ' ')
return (M ws)
pPos = do
fun <- (RP.char '(' >> pCId >>= \f -> RP.char ',' >> (return $ Just f))
RP.<++ (return Nothing)
RP.char '[' >> RP.skipSpaces
is <- RP.sepBy (RP.munch1 isDigit) (RP.char ',')
RP.char ']' >> RP.skipSpaces
RP.char ')' RP.<++ return ' '
return (fun,map read is)
|