summaryrefslogtreecommitdiff
path: root/src/runtime/haskell
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime/haskell')
-rw-r--r--src/runtime/haskell/PGF.hs64
-rw-r--r--src/runtime/haskell/PGF/Data.hs3
-rw-r--r--src/runtime/haskell/PGF/Forest.hs101
-rw-r--r--src/runtime/haskell/PGF/Linearize.hs174
-rw-r--r--src/runtime/haskell/PGF/Macros.hs54
-rw-r--r--src/runtime/haskell/PGF/Parse.hs88
-rw-r--r--src/runtime/haskell/PGF/Printer.hs2
-rw-r--r--src/runtime/haskell/PGF/VisualizeTree.hs583
8 files changed, 603 insertions, 466 deletions
diff --git a/src/runtime/haskell/PGF.hs b/src/runtime/haskell/PGF.hs
index 9ab24b100..f2235cf37 100644
--- a/src/runtime/haskell/PGF.hs
+++ b/src/runtime/haskell/PGF.hs
@@ -1,7 +1,7 @@
-------------------------------------------------
-- |
-- Module : PGF
--- Maintainer : Aarne Ranta
+-- Maintainer : Krasimir Angelov
-- Stability : stable
-- Portability : portable
--
@@ -50,9 +50,12 @@ module PGF(
-- * Operations
-- ** Linearization
- linearize, linearizeAllLang, linearizeAll,
+ linearize, linearizeAllLang, linearizeAll, bracketedLinearize, tabularLinearizes,
groupResults, -- lins of trees by language, removing duplicates
showPrintName,
+
+ BracketedString(..), FId, LIndex,
+ Forest.showBracketedString,
-- ** Parsing
parse, parseWithRecovery, parseAllLang, parseAll,
@@ -73,10 +76,11 @@ module PGF(
checkType, checkExpr, inferExpr,
TcError(..), ppTcError,
- -- ** Word Completion (Incremental Parsing)
+ -- ** Low level parsing API
complete,
Parse.ParseState,
- Parse.initState, Parse.nextState, Parse.getCompletions, Parse.recoveryStates, Parse.extractTrees,
+ Parse.initState, Parse.nextState, Parse.getCompletions, Parse.recoveryStates,
+ Parse.ParseResult(..), Parse.getParseResult,
-- ** Generation
generateRandom, generateAll, generateAllDepth,
@@ -90,6 +94,7 @@ module PGF(
graphvizAbstractTree,
graphvizParseTree,
graphvizDependencyTree,
+ graphvizBracketedString,
graphvizAlignment,
-- * Browsing
@@ -107,6 +112,7 @@ import PGF.Expr (Tree)
import PGF.Morphology
import PGF.Data
import PGF.Binary
+import qualified PGF.Forest as Forest
import qualified PGF.Parse as Parse
import GF.Data.Utilities (replace)
@@ -131,34 +137,18 @@ import Text.PrettyPrint
-- > $ gf -make <grammar file name>
readPGF :: FilePath -> IO PGF
--- | Linearizes given expression as string in the language
-linearize :: PGF -> Language -> Tree -> String
-
-- | Tries to parse the given string in the specified language
--- and to produce abstract syntax expression. An empty
--- list is returned if the parsing is not successful. The list may also
--- contain more than one element if the grammar is ambiguous.
--- Throws an exception if the given language cannot be used
--- for parsing, see 'canParse'.
-parse :: PGF -> Language -> Type -> String -> [Tree]
-
-parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> [Tree]
-
--- | The same as 'linearizeAllLang' but does not return
--- the language.
-linearizeAll :: PGF -> Tree -> [String]
+-- and to produce abstract syntax expression.
+parse :: PGF -> Language -> Type -> String -> (Parse.ParseResult,Maybe BracketedString)
--- | Linearizes given expression as string in all languages
--- available in the grammar.
-linearizeAllLang :: PGF -> Tree -> [(Language,String)]
+-- | This is an experimental function. Use it on your own risk
+parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> (Parse.ParseResult,Maybe BracketedString)
-- | The same as 'parseAllLang' but does not return
-- the language.
parseAll :: PGF -> Type -> String -> [[Tree]]
-- | Tries to parse the given string with all available languages.
--- Languages which cannot be used for parsing (see 'canParse')
--- are ignored.
-- The returned list contains pairs of language
-- and list of abstract syntax expressions
-- (this is a list, since grammars can be ambiguous).
@@ -227,8 +217,6 @@ complete :: PGF -> Language -> Type -> String
readPGF f = decodeFile f
-linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf lang
-
parse pgf lang typ s =
case Map.lookup lang (concretes pgf) of
Just cnc -> Parse.parse pgf lang typ (words s)
@@ -236,10 +224,6 @@ parse pgf lang typ s =
parseWithRecovery pgf lang typ open_typs s = Parse.parseWithRecovery pgf lang typ open_typs (words s)
-linearizeAll mgr = map snd . linearizeAllLang mgr
-linearizeAllLang mgr t =
- [(lang,PGF.linearize mgr lang t) | lang <- languages mgr]
-
groupResults :: [[(Language,String)]] -> [(Language,[String])]
groupResults = Map.toList . foldr more Map.empty . start . concat
where
@@ -250,7 +234,7 @@ groupResults = Map.toList . foldr more Map.empty . start . concat
parseAll mgr typ = map snd . parseAllLang mgr typ
parseAllLang mgr typ s =
- [(lang,ts) | lang <- languages mgr, let ts = parse mgr lang typ s, not (null ts)]
+ [(lang,ts) | lang <- languages mgr, (Parse.ParseResult ts,_) <- [parse mgr lang typ s], not (null ts)]
generateRandom pgf cat = do
gen <- newStdGen
@@ -280,14 +264,18 @@ functionType pgf fun =
Nothing -> Nothing
complete pgf from typ input =
- let (ws,prefix) = tokensAndPrefix input
- state0 = Parse.initState pgf from typ
- in case loop state0 ws of
- Nothing -> []
- Just state ->
- (if null prefix && not (null (Parse.extractTrees state typ)) then [unwords ws ++ " "] else [])
- ++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Parse.getCompletions state prefix)]
+ let (ws,prefix) = tokensAndPrefix input
+ state0 = Parse.initState pgf from typ
+ in case loop state0 ws of
+ Nothing -> []
+ Just state -> (if null prefix && isSuccessful state then [unwords ws ++ " "] else [])
+ ++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Parse.getCompletions state prefix)]
where
+ isSuccessful state =
+ case Parse.getParseResult state typ of
+ (Parse.ParseResult ts, _) -> not (null ts)
+ _ -> False
+
tokensAndPrefix :: String -> ([String],String)
tokensAndPrefix s | not (null s) && isSpace (last s) = (ws, "")
| null ws = ([],"")
diff --git a/src/runtime/haskell/PGF/Data.hs b/src/runtime/haskell/PGF/Data.hs
index b604317fc..7623a05f3 100644
--- a/src/runtime/haskell/PGF/Data.hs
+++ b/src/runtime/haskell/PGF/Data.hs
@@ -56,7 +56,7 @@ data Symbol
data Production
= PApply {-# UNPACK #-} !FunId [FId]
| PCoerce {-# UNPACK #-} !FId
- | PConst Expr [String]
+ | PConst CId Expr [String]
deriving (Eq,Ord,Show)
data CncCat = CncCat {-# UNPACK #-} !FId {-# UNPACK #-} !FId {-# UNPACK #-} !(Array LIndex String)
data CncFun = CncFun CId {-# UNPACK #-} !(UArray LIndex SeqId) deriving (Eq,Ord,Show)
@@ -86,7 +86,6 @@ data Tokn =
| KP [String] [Alternative]
deriving (Eq,Ord,Show)
-
-- merge two PGFs; fails is differens absnames; priority to second arg
unionPGF :: PGF -> PGF -> PGF
diff --git a/src/runtime/haskell/PGF/Forest.hs b/src/runtime/haskell/PGF/Forest.hs
new file mode 100644
index 000000000..3dd996aa6
--- /dev/null
+++ b/src/runtime/haskell/PGF/Forest.hs
@@ -0,0 +1,101 @@
+-------------------------------------------------
+-- |
+-- Module : PGF
+-- Maintainer : Krasimir Angelov
+-- Stability : stable
+-- Portability : portable
+--
+-- Forest is a compact representation of a set
+-- of parse trees. This let us to efficiently
+-- represent local ambiguities
+--
+-------------------------------------------------
+
+module PGF.Forest( Forest(..)
+ , BracketedString, showBracketedString
+ , linearizeWithBrackets
+ ) where
+
+import PGF.CId
+import PGF.Data
+import PGF.Macros
+import Data.List
+import Data.Array.IArray
+import qualified Data.Set as Set
+import qualified Data.Map as Map
+import qualified Data.IntSet as IntSet
+import qualified Data.IntMap as IntMap
+import Control.Monad
+
+data Forest
+ = Forest
+ { abstr :: Abstr
+ , concr :: Concr
+ , forest :: IntMap.IntMap (Set.Set Production)
+ , root :: {-# UNPACK #-} !FId
+ , label :: {-# UNPACK #-} !LIndex
+ }
+
+--------------------------------------------------------------------
+-- Rendering of bracketed strings
+--------------------------------------------------------------------
+
+linearizeWithBrackets :: Forest -> BracketedString
+linearizeWithBrackets = head . snd . untokn "" . bracketedTokn
+
+---------------------------------------------------------------
+-- Internally we have to do everything with Tokn first because
+-- we must handle the pre {...} construction.
+--
+
+bracketedTokn :: Forest -> BracketedTokn
+bracketedTokn (Forest abs cnc forest root label) =
+ let (fid,cat,lin) = render IntMap.empty root
+ in Bracket_ fid label cat (lin ! label)
+ where
+ trusted = trustedSpots IntSet.empty root
+
+ render parents fid =
+ case (IntMap.lookup fid parents) `mplus` (fmap Set.toList $ IntMap.lookup fid forest) of
+ Just (p:ps) -> descend (IntMap.insert fid ps parents) p
+ Nothing -> error ("wrong forest id " ++ show fid)
+ where
+ descend parents (PApply funid args) = let (CncFun fun lins) = cncfuns cnc ! funid
+ Just (DTyp _ cat _,_,_) = Map.lookup fun (funs abs)
+ largs = map (render parents) args
+ in (fid,cat,listArray (bounds lins) [computeSeq seqid largs | seqid <- elems lins])
+ descend parents (PCoerce fid) = render parents fid
+ descend parents (PConst cat _ ts) = (fid,cat,listArray (0,0) [[LeafKS ts]])
+
+ trustedSpots parents fid
+ | IntSet.member fid parents
+ = IntSet.empty
+ | otherwise = IntSet.insert fid $
+ case IntMap.lookup fid forest of
+ Just prods -> foldl1 IntSet.intersection [descend prod | prod <- Set.toList prods]
+ Nothing -> IntSet.empty
+ where
+ parents' = IntSet.insert fid parents
+
+ descend (PApply funid args) = IntSet.unions (map (trustedSpots parents') args)
+ descend (PCoerce fid) = trustedSpots parents' fid
+ descend (PConst c e _) = IntSet.empty
+
+ computeSeq :: SeqId -> [(FId,CId,LinTable)] -> [BracketedTokn]
+ computeSeq seqid args = concatMap compute (elems seq)
+ where
+ seq = sequences cnc ! seqid
+
+ compute (SymCat d r) = getArg d r
+ compute (SymLit d r) = getArg d r
+ compute (SymKS ts) = [LeafKS ts]
+ compute (SymKP ts alts) = [LeafKP ts alts]
+
+ getArg d r
+ | not (null arg_lin) &&
+ IntSet.member fid trusted
+ = [Bracket_ fid r cat arg_lin]
+ | otherwise = arg_lin
+ where
+ arg_lin = lin ! r
+ (fid,cat,lin) = args !! d
diff --git a/src/runtime/haskell/PGF/Linearize.hs b/src/runtime/haskell/PGF/Linearize.hs
index 058d8281f..4a399f5e9 100644
--- a/src/runtime/haskell/PGF/Linearize.hs
+++ b/src/runtime/haskell/PGF/Linearize.hs
@@ -1,8 +1,15 @@
-module PGF.Linearize(linearizes,markLinearizes,tabularLinearizes) where
+module PGF.Linearize
+ ( linearize
+ , linearizeAll
+ , linearizeAllLang
+ , bracketedLinearize
+ , tabularLinearizes
+ ) where
import PGF.CId
import PGF.Data
import PGF.Macros
+import PGF.Expr
import Data.Array.IArray
import Data.List
import Control.Monad
@@ -10,99 +17,112 @@ import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
--- linearization and computation of concrete PGF Terms
+--------------------------------------------------------------------
+-- The API
+--------------------------------------------------------------------
-type LinTable = Array LIndex [Tokn]
+-- | Linearizes given expression as string in the language
+linearize :: PGF -> Language -> Tree -> String
+linearize pgf lang = concat . take 1 . map (unwords . concatMap flattenBracketedString . snd . untokn "" . (!0)) . linTree pgf lang
-linearizes :: PGF -> CId -> Expr -> [String]
-linearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang (\_ _ lint -> lint)
+-- | The same as 'linearizeAllLang' but does not return
+-- the language.
+linearizeAll :: PGF -> Tree -> [String]
+linearizeAll pgf = map snd . linearizeAllLang pgf
-linTree :: PGF -> Language -> (Maybe CId -> [Int] -> LinTable -> LinTable) -> Expr -> [LinTable]
-linTree pgf lang mark e = lin0 [] [] [] Nothing e
+-- | Linearizes given expression as string in all languages
+-- available in the grammar.
+linearizeAllLang :: PGF -> Tree -> [(Language,String)]
+linearizeAllLang pgf t = [(lang,linearize pgf lang t) | lang <- Map.keys (concretes pgf)]
+
+-- | Linearizes given expression as a bracketed string in the language
+bracketedLinearize :: PGF -> Language -> Tree -> BracketedString
+bracketedLinearize pgf lang = head . concat . map (snd . untokn "" . (!0)) . linTree pgf lang
+
+-- | Creates a table from feature name to linearization.
+-- The outher list encodes the variations
+tabularLinearizes :: PGF -> CId -> Expr -> [[(String,String)]]
+tabularLinearizes pgf lang e = map (zip lbls . map (unwords . concatMap flattenBracketedString . snd . untokn "") . elems)
+ (linTree pgf lang e)
+ where
+ lbls = case unApp e of
+ Just (f,_) -> let cat = valCat (lookType pgf f)
+ in case Map.lookup cat (cnccats (lookConcr pgf lang)) of
+ Just (CncCat _ _ lbls) -> elems lbls
+ Nothing -> error "No labels"
+ Nothing -> error "Not function application"
+
+--------------------------------------------------------------------
+-- Implementation
+--------------------------------------------------------------------
+
+linTree :: PGF -> Language -> Expr -> [Array LIndex BracketedTokn]
+linTree pgf lang e =
+ [amapWithIndex (\label -> Bracket_ fid label cat) lin | (_,(fid,cat,lin)) <- lin0 [] [] Nothing 0 e]
where
cnc = lookMap (error "no lang") lang (concretes pgf)
lp = lproductions cnc
-
- lin0 path xs ys mb_fid (EAbs _ x e) = lin0 path (showCId x:xs) ys mb_fid e
- lin0 path xs ys mb_fid (ETyped e _) = lin0 path xs ys mb_fid e
- lin0 path xs ys mb_fid e | null xs = lin path ys mb_fid e []
- | otherwise = apply path (xs ++ ys) mb_fid _B (e:[ELit (LStr x) | x <- xs])
-
- lin path xs mb_fid (EApp e1 e2) es = lin path xs mb_fid e1 (e2:es)
- lin path xs mb_fid (ELit l) [] = case l of
- LStr s -> return (mark Nothing path (ss s))
- LInt n -> return (mark Nothing path (ss (show n)))
- LFlt f -> return (mark Nothing path (ss (show f)))
- lin path xs mb_fid (EMeta i) es = apply path xs mb_fid _V (ELit (LStr ('?':show i)):es)
- lin path xs mb_fid (EFun f) es = map (mark (Just f) path) (apply path xs mb_fid f es)
- lin path xs mb_fid (EVar i) es = apply path xs mb_fid _V (ELit (LStr (xs !! i)) :es)
- lin path xs mb_fid (ETyped e _) es = lin path xs mb_fid e es
- lin path xs mb_fid (EImplArg e) es = lin path xs mb_fid e es
-
- ss s = listArray (0,0) [[KS s]]
-
- apply path xs mb_fid f es =
+
+ lin0 xs ys mb_fid n_fid (EAbs _ x e) = lin0 (showCId x:xs) ys mb_fid n_fid e
+ lin0 xs ys mb_fid n_fid (ETyped e _) = lin0 xs ys mb_fid n_fid e
+ lin0 xs ys mb_fid n_fid e | null xs = lin ys mb_fid n_fid e []
+ | otherwise = apply (xs ++ ys) mb_fid n_fid _B (e:[ELit (LStr x) | x <- xs])
+
+ lin xs mb_fid n_fid (EApp e1 e2) es = lin xs mb_fid n_fid e1 (e2:es)
+ lin xs mb_fid n_fid (ELit l) [] = case l of
+ LStr s -> return (n_fid+1,(n_fid,cidString,ss s))
+ LInt n -> return (n_fid+1,(n_fid,cidInt ,ss (show n)))
+ LFlt f -> return (n_fid+1,(n_fid,cidFloat ,ss (show f)))
+ lin xs mb_fid n_fid (EMeta i) es = apply xs mb_fid n_fid _V (ELit (LStr ('?':show i)):es)
+ lin xs mb_fid n_fid (EFun f) es = apply xs mb_fid n_fid f es
+ lin xs mb_fid n_fid (EVar i) es = apply xs mb_fid n_fid _V (ELit (LStr (xs !! i)) :es)
+ lin xs mb_fid n_fid (ETyped e _) es = lin xs mb_fid n_fid e es
+ lin xs mb_fid n_fid (EImplArg e) es = lin xs mb_fid n_fid e es
+
+ ss s = listArray (0,0) [[LeafKS [s]]]
+
+ apply :: [String] -> Maybe FId -> FId -> CId -> [Expr] -> [(FId,(FId, CId, LinTable))]
+ apply xs mb_fid n_fid f es =
case Map.lookup f lp of
- Just prods -> case lookupProds mb_fid prods of
- Just set -> do prod <- Set.toList set
- case prod of
- PApply funid fids -> do guard (length fids == length es)
- args <- sequence (zipWith3 (\i fid e -> lin0 (sub i path) [] xs (Just fid) e) [0..] fids es)
- let (CncFun _ lins) = cncfuns cnc ! funid
- return (listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins])
- PCoerce fid -> apply path xs (Just fid) f es
- Nothing -> mzero
- Nothing -> apply path xs mb_fid _V [ELit (LStr ("[" ++ showCId f ++ "]"))] -- fun without lin
+ Just prods -> do prod <- lookupProds mb_fid prods
+ case prod of
+ PApply funid fids -> do guard (length fids == length es)
+ (n_fid,args) <- descend n_fid (zip fids es)
+ let (CncFun fun lins) = cncfuns cnc ! funid
+ Just (DTyp _ cat _,_,_) = Map.lookup fun (funs (abstract pgf))
+ return (n_fid+1,(n_fid,cat,listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins]))
+ PCoerce fid -> apply xs (Just fid) n_fid f es
+ Nothing -> apply xs mb_fid n_fid _V [ELit (LStr ("[" ++ showCId f ++ "]"))] -- fun without lin
where
- lookupProds (Just fid) prods = IntMap.lookup fid prods
+ lookupProds (Just fid) prods = maybe [] Set.toList (IntMap.lookup fid prods)
lookupProds Nothing prods
- | f == _B || f == _V = Nothing
- | otherwise = Just (Set.filter isApp (Set.unions (IntMap.elems prods)))
+ | f == _B || f == _V = []
+ | otherwise = [prod | (fid,set) <- IntMap.toList prods, prod <- Set.toList set]
- sub i path
- | f == _B || f == _V = path
- | otherwise = i:path
+ descend n_fid [] = return (n_fid,[])
+ descend n_fid ((fid,e):fes) = do (n_fid,xx) <- lin0 [] xs (Just fid) n_fid e
+ (n_fid,xxs) <- descend n_fid fes
+ return (n_fid,xx:xxs)
isApp (PApply _ _) = True
isApp _ = False
+ computeSeq :: SeqId -> [(FId,CId,LinTable)] -> [BracketedTokn]
computeSeq seqid args = concatMap compute (elems seq)
where
seq = sequences cnc ! seqid
- compute (SymCat d r) = (args !! d) ! r
- compute (SymLit d r) = (args !! d) ! r
- compute (SymKS ts) = map KS ts
- compute (SymKP ts alts) = [KP ts alts]
-
-untokn :: [Tokn] -> [String]
-untokn ts = case ts of
- KP d _ : [] -> d
- KP d vs : ws -> let ss@(s:_) = untokn ws in sel d vs s ++ ss
- KS s : ws -> s : untokn ws
- [] -> []
- where
- sel d vs w = case [v | Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of
- v:_ -> v
- _ -> d
-
--- create a table from labels+params to variants
-tabularLinearizes :: PGF -> CId -> Expr -> [[(String,String)]]
-tabularLinearizes pgf lang e = map (zip lbls . map (unwords . untokn) . elems) (linTree pgf lang (\_ _ lint -> lint) e)
- where
- lbls = case unApp e of
- Just (f,_) -> let cat = valCat (lookType pgf f)
- in case Map.lookup cat (cnccats (lookConcr pgf lang)) of
- Just (CncCat _ _ lbls) -> elems lbls
- Nothing -> error "No labels"
- Nothing -> error "Not function application"
+ compute (SymCat d r) = getArg d r
+ compute (SymLit d r) = getArg d r
+ compute (SymKS ts) = [LeafKS ts]
+ compute (SymKP ts alts) = [LeafKP ts alts]
+ getArg d r
+ | not (null arg_lin) = [Bracket_ fid r cat arg_lin]
+ | otherwise = arg_lin
+ where
+ arg_lin = lin ! r
+ (fid,cat,lin) = args !! d
--- show bracketed markup with references to tree structure
-markLinearizes :: PGF -> CId -> Expr -> [String]
-markLinearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang mark
- where
- mark mb_f path lint = amap (bracket mb_f path) lint
-
- bracket Nothing path ts = [KS ("("++show (reverse path))] ++ ts ++ [KS ")"]
- bracket (Just f) path ts = [KS ("(("++showCId f++","++show (reverse path)++")")] ++ ts ++ [KS ")"]
+amapWithIndex :: (IArray a e1, IArray a e2, Ix i) => (i -> e1 -> e2) -> a i e1 -> a i e2
+amapWithIndex f arr = listArray (bounds arr) (map (uncurry f) (assocs arr))
diff --git a/src/runtime/haskell/PGF/Macros.hs b/src/runtime/haskell/PGF/Macros.hs
index 8886bc696..1b563fc48 100644
--- a/src/runtime/haskell/PGF/Macros.hs
+++ b/src/runtime/haskell/PGF/Macros.hs
@@ -11,6 +11,7 @@ import qualified Data.Array as Array
import Data.Maybe
import Data.List
import GF.Data.Utilities(sortNub)
+import Text.PrettyPrint
-- operations for manipulating PGF grammars and objects
@@ -202,3 +203,56 @@ updateProductionIndices pgf = pgf{ concretes = fmap updateConcrete (concretes pg
getFunctions (PCoerce fid) = case IntMap.lookup fid productions of
Nothing -> []
Just prods -> [fun | prod <- Set.toList prods, fun <- getFunctions prod]
+
+
+-- Utilities for doing linearization
+
+-- | BracketedString represents a sentence that is linearized
+-- as usual but we also want to retain the ''brackets'' that
+-- mark the beginning and the end of each constituent.
+data BracketedString
+ = Leaf String -- ^ this is the leaf i.e. a single token
+ | Bracket {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [BracketedString] -- ^ this is a bracket. The 'CId' is the category of
+ -- the phrase. The 'FId' is an unique identifier for
+ -- every phrase in the sentence. For context-free grammars
+ -- i.e. without discontinuous constituents this identifier
+ -- is also unique for every bracket. When there are discontinuous
+ -- phrases then the identifiers are unique for every phrase but
+ -- not for every bracket since the bracket represents a constituent.
+ -- The different constituents could still be distinguished by using
+ -- the constituent index i.e. 'LIndex'. If the grammar is reduplicating
+ -- then the constituent indices will be the same for all brackets
+ -- that represents the same constituent.
+
+data BracketedTokn
+ = LeafKS [String]
+ | LeafKP [String] [Alternative]
+ | Bracket_ {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [BracketedTokn] -- Invariant: the list is not empty
+
+type LinTable = Array.Array LIndex [BracketedTokn]
+
+-- | Renders the bracketed string as string where
+-- the brackets are shown as @(S ...)@ where
+-- @S@ is the category.
+showBracketedString :: BracketedString -> String
+showBracketedString = render . ppBracketedString
+
+ppBracketedString (Leaf t) = text t
+ppBracketedString (Bracket fcat index cat bss) = parens (ppCId cat <+> hsep (map ppBracketedString bss))
+
+untokn :: String -> BracketedTokn -> (String,[BracketedString])
+untokn nw (LeafKS ts) = (head ts,map Leaf ts)
+untokn nw (LeafKP d vs) = let ts = sel d vs nw
+ in (head ts,map Leaf ts)
+ where
+ sel d vs nw =
+ case [v | Alt v cs <- vs, any (\c -> isPrefixOf c nw) cs] of
+ v:_ -> v
+ _ -> d
+untokn nw (Bracket_ fid index cat bss) =
+ let (nw',bss') = mapAccumR untokn nw bss
+ in (nw',[Bracket fid index cat (concat bss')])
+
+flattenBracketedString :: BracketedString -> [String]
+flattenBracketedString (Leaf w) = [w]
+flattenBracketedString (Bracket _ _ _ bss) = concatMap flattenBracketedString bss
diff --git a/src/runtime/haskell/PGF/Parse.hs b/src/runtime/haskell/PGF/Parse.hs
index ab5c91f02..85fc026aa 100644
--- a/src/runtime/haskell/PGF/Parse.hs
+++ b/src/runtime/haskell/PGF/Parse.hs
@@ -6,7 +6,7 @@ module PGF.Parse
, nextState
, getCompletions
, recoveryStates
- , extractTrees
+ , ParseResult(..), getParseResult
, parse
, parseWithRecovery
) where
@@ -14,7 +14,7 @@ module PGF.Parse
import Data.Array.IArray
import Data.Array.Base (unsafeAt)
import Data.List (isPrefixOf, foldl')
-import Data.Maybe (fromMaybe, maybe)
+import Data.Maybe (fromMaybe, maybe, maybeToList)
import qualified Data.Map as Map
import qualified GF.Data.TrieMap as TMap
import qualified Data.IntMap as IntMap
@@ -27,26 +27,35 @@ import PGF.Data
import PGF.Expr(Tree)
import PGF.Macros
import PGF.TypeCheck
-import Debug.Trace
-
-parse :: PGF -> Language -> Type -> [String] -> [Tree]
-parse pgf lang typ toks = loop (initState pgf lang typ) toks
+import PGF.Forest(Forest(Forest), linearizeWithBrackets)
+
+-- | This data type encodes the different outcomes which you could get from the parser.
+data ParseResult
+ = ParseFailed Int -- ^ The integer is the position in number of tokens where the parser failed.
+ | TypeError FId [TcError] -- ^ The parsing was successful but none of the trees is type correct.
+ -- The forest id ('FId') points to the bracketed string from the parser
+ -- where the type checking failed. More than one error is returned
+ -- if there are many analizes for some phrase but they all are not type correct.
+ | ParseResult [Tree] -- ^ If the parsing was successful we get a list of abstract syntax trees. The list should be non-empty.
+
+parse :: PGF -> Language -> Type -> [String] -> (ParseResult,Maybe BracketedString)
+parse pgf lang typ toks = loop 0 (initState pgf lang typ) toks
where
- loop ps [] = extractTrees ps typ
- loop ps (t:ts) = case nextState ps t of
- Left es -> []
- Right ps -> loop ps ts
+ loop i ps [] = getParseResult ps typ
+ loop i ps (t:ts) = case nextState ps t of
+ Left es -> (ParseFailed i,Nothing)
+ Right ps -> loop (i+1) ps ts
-parseWithRecovery :: PGF -> Language -> Type -> [Type] -> [String] -> [Tree]
+parseWithRecovery :: PGF -> Language -> Type -> [Type] -> [String] -> (ParseResult,Maybe BracketedString)
parseWithRecovery pgf lang typ open_typs toks = accept (initState pgf lang typ) toks
where
- accept ps [] = extractTrees ps typ
+ accept ps [] = getParseResult ps typ
accept ps (t:ts) =
case nextState ps t of
Right ps -> accept ps ts
Left es -> skip (recoveryStates open_typs es) ts
- skip ps_map [] = extractTrees (fst ps_map) typ
+ skip ps_map [] = getParseResult (fst ps_map) typ
skip ps_map (t:ts) =
case Map.lookup t (snd ps_map) of
Just ps -> accept ps ts
@@ -145,23 +154,31 @@ recoveryStates open_types (EState pgf cnc chart) =
-- that spans the whole input consumed so far. The trees are also
-- limited by the category specified, which is usually
-- the same as the startup category.
-extractTrees :: ParseState -> Type -> [Tree]
-extractTrees (PState pgf cnc chart items) ty@(DTyp _ start _) =
- nubsort [e1 | e <- exps, Right e1 <- [checkExpr pgf e ty]]
+getParseResult :: ParseState -> Type -> (ParseResult,Maybe BracketedString)
+getParseResult (PState pgf cnc chart items) ty@(DTyp _ start _) =
+ let mb_bs = case roots of
+ ((root,lbl):_) -> Just $ linearizeWithBrackets $ Forest (abstract pgf) cnc (forest st) root lbl
+ _ -> Nothing
+
+ exps = nubsort $ do
+ (fid,lbl) <- roots
+ (fvs,e) <- go Set.empty 0 (0,fid)
+ guard (Set.null fvs)
+ Right e1 <- [checkExpr pgf e ty]
+ return e1
+
+ in (ParseResult exps,mb_bs)
where
(mb_agenda,acc) = TMap.decompose items
agenda = maybe [] Set.toList mb_agenda
(_,st) = process Nothing (\_ _ -> id) (sequences cnc) (cncfuns cnc) agenda () chart
- exps =
- case Map.lookup start (cnccats cnc) of
- Just (CncCat s e lbls) -> do cat <- range (s,e)
- lbl <- indices lbls
- Just fid <- [lookupPC (PK cat lbl 0) (passive st)]
- (fvs,tree) <- go Set.empty 0 (0,fid)
- guard (Set.null fvs)
- return tree
- Nothing -> mzero
+ roots = case Map.lookup start (cnccats cnc) of
+ Just (CncCat s e lbls) -> do cat <- range (s,e)
+ lbl <- indices lbls
+ fid <- maybeToList (lookupPC (PK cat lbl 0) (passive st))
+ return (fid,lbl)
+ Nothing -> mzero
go rec fcat' (d,fcat)
| fcat < totalCats cnc = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
@@ -189,6 +206,7 @@ extractTrees (PState pgf cnc chart items) ty@(DTyp _ start _) =
freeVar (EFun v) = Set.singleton v
freeVar _ = Set.empty
+
process mbt fn !seqs !funs [] acc chart = (acc,chart)
process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc chart
| inRange (bounds lin) ppos =
@@ -218,15 +236,15 @@ process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) ac
Nothing -> fid
Just fid -> fid
- in case [ts | PConst _ ts <- maybe [] Set.toList (IntMap.lookup fid' (forest chart))] of
+ in case [ts | PConst _ _ ts <- maybe [] Set.toList (IntMap.lookup fid' (forest chart))] of
(toks:_) -> let !acc' = fn toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc
in process mbt fn seqs funs items acc' chart
[] -> case litCatMatch fid mbt of
- Just (toks,lit)
+ Just (cat,lit,toks)
-> let fid' = nextId chart
!acc' = fn toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc
in process mbt fn seqs funs items acc' chart{passive=insertPC (mkPK key k) fid' (passive chart)
- ,forest =IntMap.insert fid' (Set.singleton (PConst lit toks)) (forest chart)
+ ,forest =IntMap.insert fid' (Set.singleton (PConst cat lit toks)) (forest chart)
,nextId =nextId chart+1
}
Nothing -> process mbt fn seqs funs items acc chart
@@ -260,12 +278,12 @@ updateAt :: Int -> a -> [a] -> [a]
updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
litCatMatch fcat (Just t)
- | fcat == fcatString = Just ([t],ELit (LStr t))
- | fcat == fcatInt = case reads t of {[(n,"")] -> Just ([t],ELit (LInt n));
+ | fcat == fcatString = Just (cidString,ELit (LStr t),[t])
+ | fcat == fcatInt = case reads t of {[(n,"")] -> Just (cidInt,ELit (LInt n),[t]);
_ -> Nothing }
- | fcat == fcatFloat = case reads t of {[(d,"")] -> Just ([t],ELit (LFlt d));
+ | fcat == fcatFloat = case reads t of {[(d,"")] -> Just (cidFloat,ELit (LFlt d),[t]);
_ -> Nothing }
- | fcat == fcatVar = Just ([t],EFun (mkCId t))
+ | fcat == fcatVar = Just (cidVar,EFun (mkCId t),[t])
litCatMatch _ _ = Nothing
@@ -341,9 +359,9 @@ foldForest f g b fcat forest =
Nothing -> b
Just set -> Set.fold foldProd b set
where
- foldProd (PCoerce fcat) b = foldForest f g b fcat forest
- foldProd (PApply funid args) b = f funid args b
- foldProd (PConst const toks) b = g const toks b
+ foldProd (PCoerce fcat) b = foldForest f g b fcat forest
+ foldProd (PApply funid args) b = f funid args b
+ foldProd (PConst _ const toks) b = g const toks b
----------------------------------------------------------------
diff --git a/src/runtime/haskell/PGF/Printer.hs b/src/runtime/haskell/PGF/Printer.hs
index 1d9928304..fa6c5e549 100644
--- a/src/runtime/haskell/PGF/Printer.hs
+++ b/src/runtime/haskell/PGF/Printer.hs
@@ -58,7 +58,7 @@ ppProduction (fcat,PApply funid args) =
ppFCat fcat <+> text "->" <+> ppFunId funid <> brackets (hcat (punctuate comma (map ppFCat args)))
ppProduction (fcat,PCoerce arg) =
ppFCat fcat <+> text "->" <+> char '_' <> brackets (ppFCat arg)
-ppProduction (fcat,PConst _ ss) =
+ppProduction (fcat,PConst _ _ ss) =
ppFCat fcat <+> text "->" <+> ppStrs ss
ppCncFun (funid,CncFun fun arr) =
diff --git a/src/runtime/haskell/PGF/VisualizeTree.hs b/src/runtime/haskell/PGF/VisualizeTree.hs
index e58791d4d..3075e7a86 100644
--- a/src/runtime/haskell/PGF/VisualizeTree.hs
+++ b/src/runtime/haskell/PGF/VisualizeTree.hs
@@ -15,339 +15,296 @@
-- instead of rolling its own.
-----------------------------------------------------------------------------
-module PGF.VisualizeTree ( graphvizAbstractTree
- , graphvizParseTree
- , graphvizDependencyTree
- , graphvizAlignment
- , tree2mk
- , getDepLabels
- , PosText(..), readPosText
+module PGF.VisualizeTree
+ ( graphvizAbstractTree
+ , graphvizParseTree
+ , graphvizDependencyTree
+ , graphvizBracketedString
+ , graphvizAlignment
+ , getDepLabels
) where
-import PGF.CId (CId,showCId,pCId,mkCId)
+import PGF.CId (CId,showCId,ppCId,mkCId)
import PGF.Data
-import PGF.Tree
-import PGF.Expr (showExpr)
+import PGF.Expr (showExpr, Tree)
import PGF.Linearize
-import PGF.Macros (lookValCat)
+import PGF.Macros (lookValCat, BracketedString(..), flattenBracketedString)
import qualified Data.Map as Map
import Data.List (intersperse,nub,isPrefixOf,sort,sortBy)
import Data.Char (isDigit)
-import qualified Text.ParserCombinators.ReadP as RP
-
-import Debug.Trace
-
-graphvizAbstractTree :: PGF -> (Bool,Bool) -> Expr -> String
-graphvizAbstractTree pgf funscats = prGraph False . tree2graph pgf funscats . expr2tree
-
-tree2graph :: PGF -> (Bool,Bool) -> Tree -> [String]
-tree2graph pgf (funs,cats) = prf [] where
- prf ps t = let (nod,lab) = prn ps t in
- (nod ++ " [label = " ++ lab ++ ", style = \"solid\", shape = \"plaintext\"] ;") :
- case t of
- Fun cid trees ->
- [ pra (j:ps) nod t | (j,t) <- zip [0..] trees] ++
- concat [prf (j:ps) t | (j,t) <- zip [0..] trees]
- Abs xs (Fun cid trees) ->
- [ pra (j:ps) nod t | (j,t) <- zip [0..] trees] ++
- concat [prf (j:ps) t | (j,t) <- zip [0..] trees]
- _ -> []
- prn ps t = case t of
- Fun cid _ ->
- let
- fun = if funs then showCId cid else ""
- cat = if cats then prCat cid else ""
- colon = if funs && cats then " : " else ""
- lab = "\"" ++ fun ++ colon ++ cat ++ "\""
- in (show(show (ps :: [Int])),lab)
- Abs bs tree ->
- let fun = case tree of
- Fun cid _ -> Fun cid []
- _ -> tree
- in (show(show (ps :: [Int])),"\"" ++ esc (prTree (Abs bs fun)) ++ "\"")
- _ -> (show(show (ps :: [Int])),"\"" ++ esc (prTree t) ++ "\"")
- pra i nod t = nod ++ arr ++ fst (prn i t) ++ " [style = \"solid\"];"
- arr = " -- " -- if digr then " -> " else " -- "
- prCat = showCId . lookValCat pgf
- esc = concatMap (\c -> if c =='\\' then [c,c] else [c]) --- escape backslash in abstracts
-
-prGraph digr ns = concat $ map (++"\n") $ [graph ++ "{\n"] ++ ns ++ ["}"] where
- graph = if digr then "digraph" else "graph"
-
-
--- replace each non-atomic constructor with mkC, where C is the val cat
-tree2mk :: PGF -> Expr -> String
-tree2mk pgf = showExpr [] . tree2expr . t2m . expr2tree where
- t2m t = case t of
- Fun cid [] -> t
- Fun cid ts -> Fun (mk cid) (map t2m ts)
- _ -> t
- mk = mkCId . ("mk" ++) . showCId . lookValCat pgf
-
--- dependency trees from Linearize.linearizeMark
-
-graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Expr -> String
-graphvizDependencyTree format debug mlab ms pgf lang exp = case format of
- "malt" -> unlines (lin2dep format)
- "malt_input" -> unlines (lin2dep format)
- _ -> prGraph True (lin2dep format)
-
- where
-
- lin2dep format = -- trace (ifd (show sortedNodes ++ show nodeWords)) $
- case format of
- "malt" -> map (concat . intersperse "\t") wnodes
- "malt_input" -> map (concat . intersperse "\t" . take 6) wnodes
- _ -> prelude ++ nodes ++ links
-
- ifd s = if debug then s else []
-
- pot = readPosText $ concat $ take 1 $ markLinearizes pgf lang exp
- ---- use Just str if you have str to match against
-
- prelude = ["rankdir=LR ;", "node [shape = plaintext] ;"]
-
- nodes = map mkNode nodeWords
- mkNode (i,((_,p),ss)) =
- node p ++ " [label = \"" ++ show i ++ ". " ++ ifd (show p) ++ unwords ss ++ "\"] ;"
- nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)|
- ((Just f,p),w) <- wlins pot]
-
- links = map mkLink thelinks
- thelinks = [(word y, x, label tr y x) |
- (_,((f,x),_)) <- tail nodeWords,
- let y = dominant x]
- mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;"
- node = show . show
-
- dominant x = case x of
- [] -> x
- _ | not (x == hx) -> hx
- _ -> dominant (init x)
- where
- hx = headArg (init x) tr x
-
- headArg x0 tr x = case (tr,x) of
- (Fun f [],[_]) -> x0 ---- ??
- (Fun f ts,[_]) -> x0 ++ [getHead (length ts - 1) f]
- (Fun f ts,i:y) -> headArg x0 (ts !! i) y
- _ -> x0 ----
-
- label tr y x = case span (uncurry (==)) (zip y x) of
- (xys,(_,i):_) -> getLabel i (funAt tr (map fst xys))
- _ -> "" ----
-
- funAt tr x = case (tr,x) of
- (Fun f _ ,[]) -> f
- (Fun f ts,i:y) -> funAt (ts !! i) y
- _ -> mkCId (prTree tr) ----
-
- word x = if elem x sortedNodes then x else
- let x' = headArg x tr (x ++[0]) in
- if x' == x then [] else word x'
-
- tr = expr2tree exp
- sortedNodes = [p | (_,((_,p),_)) <- nodeWords]
-
- labels = maybe Map.empty id mlab
- getHead i f = case Map.lookup f labels of
- Just ls -> length $ takeWhile (/= "head") ls
- _ -> i
- getLabel i f = case Map.lookup f labels of
- Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i
- _ -> showCId f ++ "#" ++ show i
-
--- to generate CoNLL format for MaltParser
- nodeMap :: Map.Map [Int] Int
- nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords]
-
- arcMap :: Map.Map [Int] ([Int],String)
- arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks]
-
- lookDomLab p = case Map.lookup p arcMap of
- Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then rootlabel else l)
- _ -> (0,rootlabel)
-
- wnodes = [[show i, maltws ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] |
- (i, ((fun,p),ws)) <- tail nodeWords,
- let pos = showCId $ lookValCat pgf fun,
- let morph = unspec,
- let (dom,lab) = lookDomLab p
- ]
- maltws = concat . intersperse "+" . words . unwords -- no spaces in column 2
- unspec = "_"
- rootlabel = "ROOT"
-
-type Labels = Map.Map CId [String]
-
-getDepLabels :: [String] -> Labels
-getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss]
+import Text.PrettyPrint
+
+-- | Renders abstract syntax tree in Graphviz format
+graphvizAbstractTree :: PGF -> (Bool,Bool) -> Tree -> String
+graphvizAbstractTree pgf (funs,cats) = render . tree2graph
+ where
+ tree2graph t =
+ text "graph {" $$
+ ppGraph [] [] 0 t $$
+ text "}"
+
+ getAbs xs (EAbs _ x e) = getAbs (x:xs) e
+ getAbs xs (ETyped e _) = getAbs xs e
+ getAbs xs e = (xs,e)
+
+ getApp (EApp x y) es = getApp x (y:es)
+ getApp (ETyped e _) es = getApp e es
+ getApp e es = (e,es)
+
+ getLbl scope (EFun f) = let fun = if funs then ppCId f else empty
+ cat = if cats then ppCId (lookValCat pgf f) else empty
+ sep = if funs && cats then colon else empty
+ in fun <+> sep <+> cat
+ getLbl scope (ELit l) = text (escapeStr (render (ppLit l)))
+ getLbl scope (EMeta i) = ppMeta i
+ getLbl scope (EVar i) = ppCId (scope !! i)
+ getLbl scope (ETyped e _) = getLbl scope e
+ getLbl scope (EImplArg e) = getLbl scope e
+
+ ppGraph scope ps i e0 =
+ let (xs, e1) = getAbs [] e0
+ (e2,args) = getApp e1 []
+ binds = if null xs
+ then empty
+ else text "\\\\" <> hcat (punctuate comma (map ppCId xs)) <+> text "->"
+ (lbl,eargs) = case e2 of
+ EAbs _ _ _ -> (char '@', e2:args) -- eta-redexes are rendered with artificial "@" node
+ _ -> (getLbl scope' e2, args)
+ scope' = xs ++ scope
+ in ppNode (i:ps) <> text "[label =" <+> doubleQuotes (binds <+> lbl) <> text ", style = \"solid\", shape = \"plaintext\"] ;" $$
+ (if null ps
+ then empty
+ else ppNode ps <+> text "--" <+> ppNode (i:ps) <+> text "[style = \"solid\"];") $$
+ vcat (zipWith (ppGraph scope' (i:ps)) [0..] eargs)
+
+ ppNode ps = char 'n' <> hcat (punctuate (char '_') (map int ps))
+
+ escapeStr [] = []
+ escapeStr ('\\':cs) = '\\':'\\':escapeStr cs
+ escapeStr ('"' :cs) = '\\':'"' :escapeStr cs
+ escapeStr (c :cs) = c :escapeStr cs
--- parse trees from Linearize.linearizeMark
----- nubrec and domins are quadratic, but could be (n log n)
-
-graphvizParseTree :: PGF -> CId -> Expr -> String
-graphvizParseTree pgf lang = prGraph False . lin2tree pgf . concat . take 1 . markLinearizes pgf lang where
-
-lin2tree pgf s = prelude ++ nodes ++ links where
+type Labels = Map.Map CId [String]
- prelude = ["rankdir=BU ;", "node [shape = record, color = white] ;"]
+graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Tree -> String
+graphvizDependencyTree format debug mlab ms pgf lang t = render $
+ case format of
+ "malt" -> vcat (map (hcat . intersperse (char '\t') ) wnodes)
+ "malt_input" -> vcat (map (hcat . intersperse (char '\t') . take 6) wnodes)
+ _ -> text "digraph {" $$
+ space $$
+ nest 2 (text "rankdir=LR ;" $$
+ text "node [shape = plaintext] ;" $$
+ vcat nodes $$
+ links) $$
+ text "}"
+ where
+ nodes = map mkNode leaves
+ links = empty
+ wnodes = undefined
+
+ nil = -1
+
+ bs = bracketedLinearize pgf lang t
+
+ leaves = (nil,0,"ROOT") : (groupAndIndexIt 1 . getLeaves nil) bs
+
+ groupAndIndexIt id [] = []
+ groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws
+ in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1
+ where
+ collect pws@((p1,w):pws1)
+ | p == p1 = let (ws,pws2) = collect pws1
+ in (w:ws,pws2)
+ collect pws = ([],pws)
+
+ getLeaves parent bs =
+ case bs of
+ Leaf w -> [(parent,w)]
+ Bracket fid _ _ bss -> concatMap (getLeaves fid) bss
+
+ mkNode (p,i,w) =
+ tag p <> text " [label = " <> doubleQuotes (int i <> char '.' <+> text w) <> text "] ;"
- nodeRecs = zip [0..]
- (nub (filter (not . null) (nlins [postext] ++ [leaves postext])))
- nlins pts =
- nubrec [] $ [(p,cat f) | T (Just f, p) _ <- pts] :
- concatMap nlins [ts | T _ ts <- pts]
- leaves pt = [(p++[j],s) | (j,(p,s)) <-
- zip [9990..] [(p,s) | ((_,p),ss) <- wlins pt, s <- ss]]
+{-
+ ifd s = if debug then s else []
+
+ pot = readPosText $ concat $ take 1 $ markLinearizes pgf lang exp
+
+ nodes = map mkNode nodeWords
+ mkNode (i,((_,p),ss)) =
+ node p ++ " [label = \"" ++ show i ++ ". " ++ ifd (show p) ++ unwords ss ++ "\"] ;"
+ nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)|
+ ((Just f,p),w) <- wlins pot]
+
+ links = map mkLink thelinks
+ thelinks = [(word y, x, label tr y x) |
+ (_,((f,x),_)) <- tail nodeWords,
+ let y = dominant x]
+ mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;"
+ node = show . show
+
+ dominant x = case x of
+ [] -> x
+ _ | not (x == hx) -> hx
+ _ -> dominant (init x)
+ where
+ hx = headArg (init x) tr x
+
+ headArg x0 tr x = case (tr,x) of
+ (Fun f [],[_]) -> x0 ---- ??
+ (Fun f ts,[_]) -> x0 ++ [getHead (length ts - 1) f]
+ (Fun f ts,i:y) -> headArg x0 (ts !! i) y
+ _ -> x0 ----
+
+ label tr y x = case span (uncurry (==)) (zip y x) of
+ (xys,(_,i):_) -> getLabel i (funAt tr (map fst xys))
+ _ -> "" ----
+
+ funAt tr x = case (tr,x) of
+ (Fun f _ ,[]) -> f
+ (Fun f ts,i:y) -> funAt (ts !! i) y
+ _ -> mkCId (prTree tr) ----
+
+ word x = if elem x sortedNodes then x else
+ let x' = headArg x tr (x ++[0]) in
+ if x' == x then [] else word x'
+
+ tr = expr2tree exp
+ sortedNodes = [p | (_,((_,p),_)) <- nodeWords]
+
+ labels = maybe Map.empty id mlab
+ getHead i f = case Map.lookup f labels of
+ Just ls -> length $ takeWhile (/= "head") ls
+ _ -> i
+ getLabel i f = case Map.lookup f labels of
+ Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i
+ _ -> showCId f ++ "#" ++ show i
+
+ -- to generate CoNLL format for MaltParser
+ nodeMap :: Map.Map [Int] Int
+ nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords]
+
+ arcMap :: Map.Map [Int] ([Int],String)
+ arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks]
+
+ lookDomLab p = case Map.lookup p arcMap of
+ Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then rootlabel else l)
+ _ -> (0,rootlabel)
+
+ wnodes = [[show i, maltws ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] |
+ (i, ((fun,p),ws)) <- tail nodeWords,
+ let pos = showCId $ lookValCat pgf fun,
+ let morph = unspec,
+ let (dom,lab) = lookDomLab p
+ ]
+ maltws = concat . intersperse "+" . words . unwords -- no spaces in column 2
+ unspec = "_"
+ rootlabel = "ROOT"
+-}
- nubrec es rs = case rs of
- r:rr -> let r' = filter (not . flip elem es) (nub r)
- in r' : nubrec (r' ++ es) rr
- _ -> rs
- nodes = map mkStruct nodeRecs
+getDepLabels :: [String] -> Labels
+getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss]
- mkStruct (i,cs) = struct i ++ "[label = \"" ++ fields cs ++ "\"] ;"
- cat = showCId . lookValCat pgf
- fields cs = concat (intersperse "|" [ mtag (showp p) ++ c | (p,c) <- cs])
- struct i = "struct" ++ show i
- links = map mkEdge domins
- domins = nub [((i,x),(j,y)) |
- (i,xs) <- nodeRecs, (j,ys) <- nodeRecs,
- x <- xs, y <- ys, dominates x y]
- dominates (p,x) (q,y) = not (null q) && p == init q
- mkEdge ((i,x),(j,y)) =
- struct i ++ ":n" ++ uncommas (showp (fst x)) ++ ":s -- " ++
- struct j ++ ":n" ++ uncommas (showp (fst y)) ++ ":n ;"
+graphvizParseTree :: PGF -> Language -> Tree -> String
+graphvizParseTree pgf lang = graphvizBracketedString . bracketedLinearize pgf lang
+
+graphvizBracketedString :: BracketedString -> String
+graphvizBracketedString = render . lin2tree
+ where
+ lin2tree bs =
+ text "graph {" $$
+ space $$
+ nest 2 (text "rankdir=BU ;" $$
+ text "node [shape = record, color = white] ;" $$
+ space $$
+ vcat (nodes bs)) $$
+ text "}"
+ where
+ nodes bs = zipWith mkStruct [0..] (interns ++ [zipWith (\i (l,p,w) -> (l,p,i,w)) [99990..] leaves])
+
+ nil = -1
+
+ leaves = getLeaves 0 nil bs
+ interns = getInterns 0 [(nil,bs)]
+
+ getLeaves level parent bs =
+ case bs of
+ Leaf w -> [(level-1,parent,w)]
+ Bracket fid i _ bss -> concatMap (getLeaves (level+1) fid) bss
+
+ getInterns level [] = []
+ getInterns level nodes =
+ nub [(level-1,parent,fid,showCId cat) | (parent,Bracket fid _ cat _) <- nodes] :
+ getInterns (level+1) [(fid,child) | (_,Bracket fid _ _ children) <- nodes, child <- children]
+
+ mkStruct l cs = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$
+ vcat [link pl pid l id | (pl,pid,id,_) <- cs]
+ link pl pid l id
+ | pl < 0 = empty
+ | otherwise = struct pl <> colon <> tag pid <> colon <> char 's' <+>
+ text "--" <+>
+ struct l <> colon <> tag id <> colon <> char 'n' <+> semi
+ fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text c | (_,_,id,c) <- cs])
- postext = readPosText s
-- auxiliaries for graphviz syntax
-struct i = "struct" ++ show i
-mark (j,n) = "n" ++ show j ++ "a" ++ uncommas n
-uncommas = map (\c -> if c==',' then 'c' else c)
-tag s = "<" ++ s ++ ">"
-showp = init . tail . show
-mtag = tag . ('n':) . uncommas
+struct l = text ("struct" ++ show l)
+tbrackets d = char '<' <> d <> char '>'
+tag i = char 'n' <> int i
-- word alignments from Linearize.markLinearize
-- words are chunks like {[0,1,1,0] old}
-graphvizAlignment :: PGF -> Expr -> String
-graphvizAlignment pgf = prGraph True . lin2graph . linsMark where
- linsMark t = [concat (take 1 (markLinearizes pgf la t)) | la <- Map.keys (concretes pgf)]
-
-lin2graph :: [String] -> [String]
-lin2graph ss = -- trace (show ss) $
- prelude ++ nodes ++ links
-
- where
-
- prelude = ["rankdir=LR ;", "node [shape = record] ;"]
-
- nlins :: [(Int,[((Int,String),String)])]
- nlins = [(i, [((j,showp p),unw ws) | (j,((_,p),ws)) <- zip [0..] ws]) |
- (i,ws) <- zip [0..] (map (wlins . readPosText) ss)]
-
- unw = concat . intersperse "\\ " -- space escape in graphviz
-
- nodes = map mkStruct nlins
-
- mkStruct (i, ws) = struct i ++ "[label = \"" ++ fields ws ++ "\"] ;"
-
- fields ws = concat (intersperse "|" [tag (mark m) ++ " " ++ w | (m,w) <- ws])
-
- links = nub $ concatMap mkEdge (init nlins)
-
- mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list
- [edge i v w | (v@(_,p),_) <- lin, (w@(_,q),_) <- lin', p == q]
-
- edge i v w =
- struct i ++ ":" ++ mark v ++ ":e -> " ++ struct (i+1) ++ ":" ++ mark w ++ ":w ;"
-{-
-alignmentData :: PGF -> [Expr] -> Map.Map String (Map.Map String Double)
-alignmentData pgf = mkStat . concatMap (mkAlign . linsMark) where
- linsMark t =
- [s | la <- take 2 (cncnames pgf), s <- take 1 (linearizesMark pgf la t)]
-
- mkStat :: [(String,String)] -> Map.Map String (Map.Map String Double)
- mkStat =
-
- mkAlign :: [String] -> [(String,String)]
- mkAlign ss =
-
- nlins :: [(Int,[((Int,String),String)])]
- nlins = [(i, [((j,showp p),unw ws) | (j,((_,p),ws)) <- zip [0..] vs]) |
- (i,vs) <- zip [0..] (map (wlins . readPosText) ss)]
-
- nodes = map mkStruct nlins
-
- mkStruct (i, ws) = struct i ++ "[label = \"" ++ fields ws ++ "\"] ;"
-
- fields ws = concat (intersperse "|" [tag (mark m) ++ " " ++ w | (m,w) <- ws])
-
- links = nub $ concatMap mkEdge (init nlins)
-
- mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list
- [edge i v w | (v@(_,p),_) <- lin, (w@(_,q),_) <- lin', p == q]
-
- edge i v w =
- struct i ++ ":" ++ mark v ++ ":e -> " ++ struct (i+1) ++ ":" ++ mark w ++ ":w ;"
--}
-
-wlins :: PosText -> [((Maybe CId,[Int]),[String])]
-wlins pt = case pt of
- T p pts -> concatMap (lins p) pts
- M ws -> if null ws then [] else [((Nothing,[]),ws)]
- where
- lins p pt = case pt of
- T q pts -> concatMap (lins q) pts
- M ws -> if null ws then [] else [(p,ws)]
-
-data PosText =
- T (Maybe CId,[Int]) [PosText]
- | M [String]
- deriving Show
-
-readPosText :: String -> PosText
-readPosText = fst . head . (RP.readP_to_S pPosText) where
- pPosText = do
- RP.char '(' >> RP.skipSpaces
- p <- pPos
- RP.skipSpaces
- ts <- RP.many pPosText
- RP.char ')' >> RP.skipSpaces
- return (T p ts)
- RP.<++ do
- ws <- RP.sepBy1 (RP.munch1 (flip notElem "()")) (RP.char ' ')
- return (M ws)
- pPos = do
- fun <- (RP.char '(' >> pCId >>= \f -> RP.char ',' >> (return $ Just f))
- RP.<++ (return Nothing)
- RP.char '[' >> RP.skipSpaces
- is <- RP.sepBy (RP.munch1 isDigit) (RP.char ',')
- RP.char ']' >> RP.skipSpaces
- RP.char ')' RP.<++ return ' '
- return (fun,map read is)
-
-
-{-
-digraph{
-rankdir ="LR" ;
-node [shape = record] ;
-
-struct1 [label = "<f0> this|<f1> very|<f2> intelligent|<f3> man"] ;
-struct2 [label = "<f0> cet|<f1> homme|<f2> tres|<f3> intelligent|<f4> ci"] ;
-
-struct1:f0 -> struct2:f0 ;
-struct1:f1 -> struct2:f2 ;
-struct1:f2 -> struct2:f3 ;
-struct1:f3 -> struct2:f1 ;
-struct1:f0 -> struct2:f4 ;
-}
--}
-
+graphvizAlignment :: PGF -> [Language] -> Expr -> String
+graphvizAlignment pgf langs = render . lin2graph . linsBracketed
+ where
+ linsBracketed t = [bracketedLinearize pgf lang t | lang <- langs]
+
+ lin2graph :: [BracketedString] -> Doc
+ lin2graph bss =
+ text "digraph {" $$
+ space $$
+ nest 2 (text "rankdir=LR ;" $$
+ text "node [shape = record] ;" $$
+ space $$
+ mkLayers 0 leaves) $$
+ text "}"
+ where
+ nil = -1
+
+ leaves = map (groupAndIndexIt 0 . getLeaves nil) bss
+
+ groupAndIndexIt id [] = []
+ groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws
+ in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1
+ where
+ collect pws@((p1,w):pws1)
+ | p == p1 = let (ws,pws2) = collect pws1
+ in (w:ws,pws2)
+ collect pws = ([],pws)
+
+ getLeaves parent bs =
+ case bs of
+ Leaf w -> [(parent,w)]
+ Bracket fid _ _ bss -> concatMap (getLeaves fid) bss
+
+ mkLayers l [] = empty
+ mkLayers l (cs:css) = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$
+ (case css of
+ (ncs:_) -> vcat (map (mkLinks l ncs) cs)
+ _ -> empty) $$
+ mkLayers (l+1) css
+
+ mkLinks l cs (p0,id0,_) =
+ vcat (map (\id1 -> struct l <> colon <> tag id0 <> colon <> char 'e' <+>
+ text "->" <+>
+ struct (l+1) <> colon <> tag id1 <> colon <> char 'w' <+> semi) indices)
+ where
+ indices = [id1 | (p1,id1,_) <- cs, p1 == p0]
+
+ fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text w | (_,id,w) <- cs])