summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoraarne <aarne@chalmers.se>2009-10-20 13:04:33 +0000
committeraarne <aarne@chalmers.se>2009-10-20 13:04:33 +0000
commite82b6e02e991e482de04619a5694e8d765d2730b (patch)
treeb86651b6c95cb83aaee510594d3ac59c198fed1a
parent499ac428d32ada9979c2ff41bd23582245fb4715 (diff)
Malt parser .conll format by vd -output=malt
-rw-r--r--src/GF/Command/Commands.hs14
-rw-r--r--src/PGF/VisualizeTree.hs37
2 files changed, 39 insertions, 12 deletions
diff --git a/src/GF/Command/Commands.hs b/src/GF/Command/Commands.hs
index e297bb6b8..1b12d82cc 100644
--- a/src/GF/Command/Commands.hs
+++ b/src/GF/Command/Commands.hs
@@ -569,7 +569,8 @@ allCommands cod env@(pgf, mos) = Map.fromList [
longname = "visualize_dependency",
synopsis = "show word dependency tree graphically",
explanation = unlines [
- "Prints a dependency tree the .dot format (the graphviz format).",
+ "Prints a dependency tree in the .dot format (the graphviz format, default)",
+ "or the MaltParser/CoNLL format (flag -output=malt)",
"By default, the last argument is the head of every abstract syntax",
"function; moreover, the head depends on the head of the function above.",
"The graph can be saved in a file by the wf command as usual.",
@@ -581,21 +582,21 @@ allCommands cod env@(pgf, mos) = Map.fromList [
exec = \opts es -> do
let debug = isOpt "v" opts
let file = valStrOpts "file" "" opts
+ let outp = valStrOpts "output" "dot" opts
mlab <- case file of
"" -> return Nothing
_ -> readFile file >>= return . Just . getDepLabels . lines
let lang = optLang opts
- let grph = if null es then [] else
- dependencyTree debug mlab Nothing pgf lang (head es)
+ let grphs = unlines $ map (dependencyTree outp debug mlab Nothing pgf lang) es
if isFlag "view" opts || isFlag "format" opts then do
- let file s = "_grph." ++ s
+ let file s = "_grphd." ++ s
let view = optViewGraph opts ++ " "
let format = optViewFormat opts
- writeFile (file "dot") (enc grph)
+ writeFile (file "dot") (enc grphs)
system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++
" ; " ++ view ++ file format
return void
- else return $ fromString grph,
+ else return $ fromString grphs,
examples = [
"gr | aw -- generate a tree and show word alignment as graph script",
"gr | vt -view=\"open\" -- generate a tree and display alignment on a Mac"
@@ -606,6 +607,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
flags = [
("file","configuration file for labels per fun, format 'fun l1 ... label ... l2'"),
("format","format of the visualization file (default \"png\")"),
+ ("output","output format of graph source (default \"dot\")"),
("view","program to open the resulting file (default \"open\")")
]
}),
diff --git a/src/PGF/VisualizeTree.hs b/src/PGF/VisualizeTree.hs
index 165e96d8f..8db2cb3e4 100644
--- a/src/PGF/VisualizeTree.hs
+++ b/src/PGF/VisualizeTree.hs
@@ -60,10 +60,16 @@ prGraph digr ns = concat $ map (++"\n") $ [graph ++ "{\n"] ++ ns ++ ["}"] where
-- dependency trees from Linearize.linearizeMark
-dependencyTree :: Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Expr -> String
-dependencyTree debug mlab ms pgf lang exp = prGraph True lin2dep where
+dependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Expr -> String
+dependencyTree format debug mlab ms pgf lang exp = case format of
+ "malt" -> unlines (lin2dep format)
+ _ -> prGraph True (lin2dep format)
- lin2dep = trace (ifd (show sortedNodes ++ show nodeWords)) $ prelude ++ nodes ++ links
+ where
+
+ lin2dep format = trace (ifd (show sortedNodes ++ show nodeWords)) $ case format of
+ "malt" -> map (concat . intersperse "\t") wnodes
+ _ -> prelude ++ nodes ++ links
ifd s = if debug then s else []
@@ -78,8 +84,8 @@ dependencyTree debug mlab ms pgf lang exp = prGraph True lin2dep where
nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)|
((Just f,p),w) <- wlins pot]
- links = map mkLink
- [(word y, x, label tr y x) |
+ links = map mkLink thelinks
+ thelinks = [(word y, x, label tr y x) |
(_,((f,x),_)) <- tail nodeWords,
let y = dominant x]
mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;"
@@ -120,13 +126,32 @@ dependencyTree debug mlab ms pgf lang exp = prGraph True lin2dep where
Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i
_ -> showCId f ++ "#" ++ show i
+-- to generate CoNLL format for MaltParser
+ nodeMap :: Map.Map [Int] Int
+ nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords]
+
+ arcMap :: Map.Map [Int] ([Int],String)
+ arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks]
+
+ lookDomLab p = case Map.lookup p arcMap of
+ Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then "_" else l)
+ _ -> (0,unspec)
+
+ wnodes = [[show i, unwords ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] |
+ (i, ((fun,p),ws)) <- tail nodeWords,
+ let pos = showCId $ lookValCat pgf fun,
+ let morph = unspec,
+ let (dom,lab) = lookDomLab p
+ ]
+
+ unspec = "_"
+
type Labels = Map.Map CId [String]
getDepLabels :: [String] -> Labels
getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss]
-
-- parse trees from Linearize.linearizeMark
---- nubrec and domins are quadratic, but could be (n log n)