summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorkrasimir <krasimir@chalmers.se>2010-05-29 12:49:54 +0000
committerkrasimir <krasimir@chalmers.se>2010-05-29 12:49:54 +0000
commitd3effb1c7d19b3031e9ed32137d8c632dbcf40fd (patch)
tree3c112c71cd6b72d5dc55304b9a962aab41350b04 /src
parent8b3dcb53a9d0af614ba1adc19ae03a3fdc6fc10d (diff)
I switched back to the old algorithm for generating dependency trees. This required an ugly hack but there is no easy and quick other way :-(
Diffstat (limited to 'src')
-rw-r--r--src/runtime/haskell/PGF/VisualizeTree.hs248
1 files changed, 238 insertions, 10 deletions
diff --git a/src/runtime/haskell/PGF/VisualizeTree.hs b/src/runtime/haskell/PGF/VisualizeTree.hs
index 9c41fdfa2..68392422f 100644
--- a/src/runtime/haskell/PGF/VisualizeTree.hs
+++ b/src/runtime/haskell/PGF/VisualizeTree.hs
@@ -24,11 +24,11 @@ module PGF.VisualizeTree
, getDepLabels
) where
-import PGF.CId (CId,showCId,ppCId,mkCId)
+import PGF.CId (CId,showCId,ppCId,pCId,mkCId)
import PGF.Data
import PGF.Expr (showExpr, Tree)
import PGF.Linearize
-import PGF.Macros (lookValCat, BracketedString(..), flattenBracketedString)
+import PGF.Macros (lookValCat, lookMap, _B, _V, BracketedString(..), flattenBracketedString)
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
@@ -37,6 +37,12 @@ import Data.Char (isDigit)
import Data.Maybe (fromMaybe)
import Text.PrettyPrint
+import Data.Array.IArray
+import Control.Monad
+import qualified Data.Set as Set
+import qualified Text.ParserCombinators.ReadP as RP
+
+
-- | Renders abstract syntax tree in Graphviz format
graphvizAbstractTree :: PGF -> (Bool,Bool) -> Tree -> String
graphvizAbstractTree pgf (funs,cats) = render . tree2graph
@@ -90,6 +96,10 @@ graphvizAbstractTree pgf (funs,cats) = render . tree2graph
type Labels = Map.Map CId [String]
+{- This is an attempt to build the dependency tree from the bracketed string.
+ Unfortunately it doesn't quite work. See the actual implementation at
+ the end of this module.
+
graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Tree -> String
graphvizDependencyTree format debug mlab ms pgf lang t = render $
case format of
@@ -129,27 +139,28 @@ graphvizDependencyTree format debug mlab ms pgf lang t = render $
Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss
getDeps out_head bss =
- case IntMap.maxViewWithKey children of
+ case selectHead (children bss) of
Just ((head, bss'), deps) -> concat (descend out_head head bss' : [descend (headOf head bss') fid bss | (fid,bss) <- IntMap.toList deps])
- Nothing -> []
+ Nothing -> []
where
- children = IntMap.fromListWith (++) [(fid,bss) | Bracket _ fid _ _ bss <- bss]
-
descend head fid bss = (fid,head) : getDeps head bss
headOf head bss
| null [() | Leaf _ <- bss] =
- case IntMap.maxViewWithKey children of
+ case selectHead (children bss) of
Just ((head, bss), deps) -> headOf head bss
Nothing -> head
| otherwise = head
- where
- children = IntMap.fromListWith (++) [(fid,bss) | Bracket _ fid _ _ bss <- bss]
+
+ children bss = IntMap.fromListWith (++) [(fid,bss) | Bracket _ fid _ _ bss <- bss]
+
+ selectHead children = IntMap.maxViewWithKey children
mkNode (p,i,w) =
- tag p <> text " [label = " <> doubleQuotes (int i <> char '.' <+> text w) <> text "] ;"
+ tag p <+> brackets (text "label = " <> doubleQuotes (int i <> char '.' <+> text w)) <+> semi
mkLink (x,y) = tag y <+> text "->" <+> tag x -- ++ " [label = \"" ++ l ++ "\"] ;"
+-}
getDepLabels :: [String] -> Labels
getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss]
@@ -253,3 +264,220 @@ tbrackets d = char '<' <> d <> char '>'
tag i
| i < 0 = char 'r' <> int (negate i)
| otherwise = char 'n' <> int i
+
+
+--------------------------------------------------------------------
+-- The linearization code bellow is needed just in order to
+-- produce the dependency tree. Unfortunately the bracketed string
+-- doesn't give us an easy way to find which part of the string
+-- corresponds to which argument of the parent function.
+--
+-- Uuuuugly!!! I hope that this code will be removed one day.
+
+type LinTable = Array LIndex [Tokn]
+
+
+linTree :: PGF -> Language -> (Maybe CId -> [Int] -> LinTable -> LinTable) -> Expr -> [LinTable]
+linTree pgf lang mark e = lin0 [] [] [] Nothing e
+ where
+ cnc = lookMap (error "no lang") lang (concretes pgf)
+ lp = lproductions cnc
+
+ lin0 path xs ys mb_fid (EAbs _ x e) = lin0 path (showCId x:xs) ys mb_fid e
+ lin0 path xs ys mb_fid (ETyped e _) = lin0 path xs ys mb_fid e
+ lin0 path xs ys mb_fid e | null xs = lin path ys mb_fid e []
+ | otherwise = apply path (xs ++ ys) mb_fid _B (e:[ELit (LStr x) | x <- xs])
+
+ lin path xs mb_fid (EApp e1 e2) es = lin path xs mb_fid e1 (e2:es)
+ lin path xs mb_fid (ELit l) [] = case l of
+ LStr s -> return (mark Nothing path (ss s))
+ LInt n -> return (mark Nothing path (ss (show n)))
+ LFlt f -> return (mark Nothing path (ss (show f)))
+ lin path xs mb_fid (EMeta i) es = apply path xs mb_fid _V (ELit (LStr ('?':show i)):es)
+ lin path xs mb_fid (EFun f) es = map (mark (Just f) path) (apply path xs mb_fid f es)
+ lin path xs mb_fid (EVar i) es = apply path xs mb_fid _V (ELit (LStr (xs !! i)) :es)
+ lin path xs mb_fid (ETyped e _) es = lin path xs mb_fid e es
+ lin path xs mb_fid (EImplArg e) es = lin path xs mb_fid e es
+
+ ss s = listArray (0,0) [[KS s]]
+
+ apply path xs mb_fid f es =
+ case Map.lookup f lp of
+ Just prods -> case lookupProds mb_fid prods of
+ Just set -> do prod <- Set.toList set
+ case prod of
+ PApply funid fids -> do guard (length fids == length es)
+ args <- sequence (zipWith3 (\i fid e -> lin0 (sub i path) [] xs (Just fid) e) [0..] fids es)
+ let (CncFun _ lins) = cncfuns cnc ! funid
+ return (listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins])
+ PCoerce fid -> apply path xs (Just fid) f es
+ Nothing -> mzero
+ Nothing -> apply path xs mb_fid _V [ELit (LStr ("[" ++ showCId f ++ "]"))] -- fun without lin
+ where
+ lookupProds (Just fid) prods = IntMap.lookup fid prods
+ lookupProds Nothing prods
+ | f == _B || f == _V = Nothing
+ | otherwise = Just (Set.filter isApp (Set.unions (IntMap.elems prods)))
+
+ sub i path
+ | f == _B || f == _V = path
+ | otherwise = i:path
+
+ isApp (PApply _ _) = True
+ isApp _ = False
+
+ computeSeq seqid args = concatMap compute (elems seq)
+ where
+ seq = sequences cnc ! seqid
+
+ compute (SymCat d r) = (args !! d) ! r
+ compute (SymLit d r) = (args !! d) ! r
+ compute (SymKS ts) = map KS ts
+ compute (SymKP ts alts) = [KP ts alts]
+
+untokn :: [Tokn] -> [String]
+untokn ts = case ts of
+ KP d _ : [] -> d
+ KP d vs : ws -> let ss@(s:_) = untokn ws in sel d vs s ++ ss
+ KS s : ws -> s : untokn ws
+ [] -> []
+ where
+ sel d vs w = case [v | Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of
+ v:_ -> v
+ _ -> d
+
+
+-- show bracketed markup with references to tree structure
+markLinearizes :: PGF -> CId -> Expr -> [String]
+markLinearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang mark
+ where
+ mark mb_f path lint = amap (bracket mb_f path) lint
+
+ bracket Nothing path ts = [KS ("("++show (reverse path))] ++ ts ++ [KS ")"]
+ bracket (Just f) path ts = [KS ("(("++showCId f++","++show (reverse path)++")")] ++ ts ++ [KS ")"]
+
+
+graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Expr -> String
+graphvizDependencyTree format debug mlab ms pgf lang tr = case format of
+ "malt" -> unlines (lin2dep format)
+ "malt_input" -> unlines (lin2dep format)
+ _ -> concat $ map (++"\n") $ ["digraph {\n"] ++ lin2dep format ++ ["}"]
+ where
+
+ lin2dep format = -- trace (ifd (show sortedNodes ++ show nodeWords)) $
+ case format of
+ "malt" -> map (concat . intersperse "\t") wnodes
+ "malt_input" -> map (concat . intersperse "\t" . take 6) wnodes
+ _ -> prelude ++ nodes ++ links
+
+ ifd s = if debug then s else []
+
+ pot = readPosText $ concat $ take 1 $ markLinearizes pgf lang tr
+ ---- use Just str if you have str to match against
+
+ prelude = ["rankdir=LR ;", "node [shape = plaintext] ;"]
+
+ nodes = map mkNode nodeWords
+ mkNode (i,((_,p),ss)) =
+ node p ++ " [label = \"" ++ show i ++ ". " ++ ifd (show p) ++ unwords ss ++ "\"] ;"
+ nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)|
+ ((Just f,p),w) <- wlins pot]
+
+ links = map mkLink thelinks
+ thelinks = [(word y, x, label tr y x) |
+ (_,((f,x),_)) <- tail nodeWords,
+ let y = dominant x]
+ mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;"
+ node = show . show
+
+ dominant x = case x of
+ [] -> x
+ _ | not (x == hx) -> hx
+ _ -> dominant (init x)
+ where
+ hx = headArg (init x) tr x
+
+ headArg x0 tr x = case (unApp tr,x) of
+ (Just (f,[]),[_]) -> x0 ---- ??
+ (Just (f,ts),[_]) -> x0 ++ [getHead (length ts - 1) f]
+ (Just (f,ts),i:y) -> headArg x0 (ts !! i) y
+ _ -> x0 ----
+
+ label tr y x = case span (uncurry (==)) (zip y x) of
+ (xys,(_,i):_) -> getLabel i (funAt tr (map fst xys))
+ _ -> "" ----
+
+ funAt tr x = case (unApp tr,x) of
+ (Just (f,_) ,[]) -> f
+ (Just (f,ts),i:y) -> funAt (ts !! i) y
+ _ -> mkCId (render (ppExpr 0 [] tr)) ----
+
+ word x = if elem x sortedNodes then x else
+ let x' = headArg x tr (x ++[0]) in
+ if x' == x then [] else word x'
+
+ sortedNodes = [p | (_,((_,p),_)) <- nodeWords]
+
+ labels = maybe Map.empty id mlab
+ getHead i f = case Map.lookup f labels of
+ Just ls -> length $ takeWhile (/= "head") ls
+ _ -> i
+ getLabel i f = case Map.lookup f labels of
+ Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i
+ _ -> showCId f ++ "#" ++ show i
+
+-- to generate CoNLL format for MaltParser
+ nodeMap :: Map.Map [Int] Int
+ nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords]
+
+ arcMap :: Map.Map [Int] ([Int],String)
+ arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks]
+
+ lookDomLab p = case Map.lookup p arcMap of
+ Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then rootlabel else l)
+ _ -> (0,rootlabel)
+
+ wnodes = [[show i, maltws ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] |
+ (i, ((fun,p),ws)) <- tail nodeWords,
+ let pos = showCId $ lookValCat pgf fun,
+ let morph = unspec,
+ let (dom,lab) = lookDomLab p
+ ]
+ maltws = concat . intersperse "+" . words . unwords -- no spaces in column 2
+ unspec = "_"
+ rootlabel = "ROOT"
+
+wlins :: PosText -> [((Maybe CId,[Int]),[String])]
+wlins pt = case pt of
+ T p pts -> concatMap (lins p) pts
+ M ws -> if null ws then [] else [((Nothing,[]),ws)]
+ where
+ lins p pt = case pt of
+ T q pts -> concatMap (lins q) pts
+ M ws -> if null ws then [] else [(p,ws)]
+
+data PosText =
+ T (Maybe CId,[Int]) [PosText]
+ | M [String]
+ deriving Show
+
+readPosText :: String -> PosText
+readPosText = fst . head . (RP.readP_to_S pPosText) where
+ pPosText = do
+ RP.char '(' >> RP.skipSpaces
+ p <- pPos
+ RP.skipSpaces
+ ts <- RP.many pPosText
+ RP.char ')' >> RP.skipSpaces
+ return (T p ts)
+ RP.<++ do
+ ws <- RP.sepBy1 (RP.munch1 (flip notElem "()")) (RP.char ' ')
+ return (M ws)
+ pPos = do
+ fun <- (RP.char '(' >> pCId >>= \f -> RP.char ',' >> (return $ Just f))
+ RP.<++ (return Nothing)
+ RP.char '[' >> RP.skipSpaces
+ is <- RP.sepBy (RP.munch1 isDigit) (RP.char ',')
+ RP.char ']' >> RP.skipSpaces
+ RP.char ')' RP.<++ return ' '
+ return (fun,map read is)