diff options
| author | krasimir <krasimir@chalmers.se> | 2010-04-30 14:36:06 +0000 |
|---|---|---|
| committer | krasimir <krasimir@chalmers.se> | 2010-04-30 14:36:06 +0000 |
| commit | 8460598801b644f323db0b7d7ca879e3acb9215b (patch) | |
| tree | 02aaf44ec76bf9738f996bfc1688a94f308cde27 /src | |
| parent | 7a4cb3c2715c5dd61309b9bc0309142a44393c29 (diff) | |
first incarnation of the bracketed string API
Diffstat (limited to 'src')
| -rw-r--r-- | src/compiler/GF/Command/Commands.hs | 40 | ||||
| -rw-r--r-- | src/compiler/GF/Compile/ExampleBased.hs | 15 | ||||
| -rw-r--r-- | src/compiler/GF/Quiz.hs | 5 | ||||
| -rw-r--r-- | src/runtime/haskell/PGF.hs | 64 | ||||
| -rw-r--r-- | src/runtime/haskell/PGF/Data.hs | 3 | ||||
| -rw-r--r-- | src/runtime/haskell/PGF/Forest.hs | 101 | ||||
| -rw-r--r-- | src/runtime/haskell/PGF/Linearize.hs | 174 | ||||
| -rw-r--r-- | src/runtime/haskell/PGF/Macros.hs | 54 | ||||
| -rw-r--r-- | src/runtime/haskell/PGF/Parse.hs | 88 | ||||
| -rw-r--r-- | src/runtime/haskell/PGF/Printer.hs | 2 | ||||
| -rw-r--r-- | src/runtime/haskell/PGF/VisualizeTree.hs | 583 |
11 files changed, 646 insertions, 483 deletions
diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs index 0273b82eb..f89e497ad 100644 --- a/src/compiler/GF/Command/Commands.hs +++ b/src/compiler/GF/Command/Commands.hs @@ -22,6 +22,7 @@ import PGF.Morphology import PGF.Printer import PGF.Probabilistic -- (getProbsFromFile,prProbabilities,defaultProbabilities) import PGF.Generate (generateRandomFrom) ---- +import PGF.Tree (Tree(Fun), expr2tree, tree2expr) import GF.Compile.Export import GF.Compile.ExampleBased import GF.Infra.Option (noOptions, readOutputFormat) @@ -150,7 +151,7 @@ allCommands env@(pgf, mos) = Map.fromList [ "flag -format." ], exec = \opts es -> do - let grph = if null es then [] else graphvizAlignment pgf (head es) + let grph = if null es then [] else graphvizAlignment pgf (languages pgf) (head es) if isFlag "view" opts || isFlag "format" opts then do let file s = "_grph." ++ s let view = optViewGraph opts @@ -481,11 +482,14 @@ allCommands env@(pgf, mos) = Map.fromList [ "will accept unknown adjectives, nouns and verbs with the resource grammar." ], exec = \opts ts -> - returnFromExprsPar opts ts $ concatMap (par opts) $ toStrings ts, + return $ fromParse opts ts $ concatMap (par opts) $ toStrings ts, flags = [ ("cat","target category of parsing"), ("lang","the languages of parsing (comma-separated, no spaces)"), ("openclass","list of open-class categories for robust parsing") + ], + options = [ + ("bracket","prints the bracketed string from the parser") ] }), ("pg", emptyCommandInfo { ----- @@ -893,8 +897,8 @@ allCommands env@(pgf, mos) = Map.fromList [ ] where par opts s = case optOpenTypes opts of - [] -> concat [parse pgf lang (optType opts) s | lang <- optLangs opts] - open_typs -> concat [parseWithRecovery pgf lang (optType opts) open_typs s | lang <- optLangs opts] + [] -> [parse pgf lang (optType opts) s | lang <- optLangs opts] + open_typs -> [parseWithRecovery pgf lang (optType opts) open_typs s | lang <- optLangs opts] void = ([],[]) @@ -918,9 +922,17 @@ allCommands env@(pgf, mos) = Map.fromList [ map (map (unl . snd)) . tabularLinearizes pgf lang _ | isOpt "table" opts -> unlines . concat . intersperse [[]] . map (map (\(p,v) -> p+++":"+++unl v)) . tabularLinearizes pgf lang - _ | isOpt "bracket" opts -> unlines . markLinearizes pgf lang + _ | isOpt "bracket" opts -> showBracketedString . bracketedLinearize pgf lang _ -> unl . linearize pgf lang + -- replace each non-atomic constructor with mkC, where C is the val cat + tree2mk pgf = showExpr [] . tree2expr . t2m . expr2tree where + t2m t = case t of + Fun cid [] -> t + Fun cid ts -> Fun (mk cid) (map t2m ts) + _ -> t + mk = mkCId . ("mk" ++) . showCId . lookValCat pgf + unlex opts lang = stringOps Nothing (getUnlex opts lang ++ map prOpt opts) ---- getUnlex opts lang = case words (valStrOpts "unlexer" "" opts) of @@ -991,14 +1003,22 @@ allCommands env@(pgf, mos) = Map.fromList [ toStrings = map showAsString toString = unwords . toStrings + fromParse opts ts parses + | isOpt "bracket" opts = case catMaybes bss of + [] -> ([], "no brackets found") + bss -> ([], unlines $ map showBracketedString bss) + | otherwise = case ts of + [] -> ([], "no trees found" ++ + missingWordMsg (optMorpho opts) (concatMap words (toStrings ts)) + ) + _ -> fromExprs ts + where + (prs,bss) = unzip parses + ts = [t | ParseResult ts <- prs, t <- ts] + returnFromExprs es = return $ case es of [] -> ([], "no trees found") _ -> fromExprs es - returnFromExprsPar opts ts es = return $ case es of - [] -> ([], "no trees found" ++ - missingWordMsg (optMorpho opts) (concatMap words (toStrings ts)) - ) - _ -> fromExprs es prGrammar opts | isOpt "cats" opts = return $ fromString $ unwords $ map showCId $ categories pgf diff --git a/src/compiler/GF/Compile/ExampleBased.hs b/src/compiler/GF/Compile/ExampleBased.hs index 983f38869..f197722ba 100644 --- a/src/compiler/GF/Compile/ExampleBased.hs +++ b/src/compiler/GF/Compile/ExampleBased.hs @@ -41,17 +41,20 @@ convertFile conf src file = do convEx (cat,ex) = do appn "(" let typ = maybe (error "no valid cat") id $ readType cat - let ts = rank $ parse pgf lang typ ex - ws <- case ts of - [] -> do + ws <- case fst (parse pgf lang typ ex) of + ParseFailed _ -> do let ws = morphoMissing morpho (words ex) appv ("WARNING: cannot parse example " ++ ex) case ws of [] -> return () _ -> appv (" missing words: " ++ unwords ws) - return ws - t:tt -> appv ("WARNING: ambiguous example " ++ ex) >> - appn t >> mapM_ (appn . (" --- " ++)) tt >> return [] + return ws + TypeError _ _ -> + return [] + ParseResult ts -> + case rank ts of + (t:tt) -> appv ("WARNING: ambiguous example " ++ ex) >> + appn t >> mapM_ (appn . (" --- " ++)) tt >> return [] appn ")" return ws rank ts = case probs conf of diff --git a/src/compiler/GF/Quiz.hs b/src/compiler/GF/Quiz.hs index 4a4caafc8..2a9b28ccb 100644 --- a/src/compiler/GF/Quiz.hs +++ b/src/compiler/GF/Quiz.hs @@ -46,7 +46,10 @@ translationList mex mprobs pgf ig og typ number = do return $ map mkOne $ ts where mkOne t = (norml (linearize pgf ig t), map (norml . linearize pgf og) (homonyms t)) - homonyms = nub . parse pgf ig typ . linearize pgf ig + homonyms t = + case (fst . parse pgf ig typ . linearize pgf ig) t of + ParseResult ts -> ts + _ -> [] morphologyList :: Maybe Expr -> Maybe Probabilities -> diff --git a/src/runtime/haskell/PGF.hs b/src/runtime/haskell/PGF.hs index 9ab24b100..f2235cf37 100644 --- a/src/runtime/haskell/PGF.hs +++ b/src/runtime/haskell/PGF.hs @@ -1,7 +1,7 @@ ------------------------------------------------- -- | -- Module : PGF --- Maintainer : Aarne Ranta +-- Maintainer : Krasimir Angelov -- Stability : stable -- Portability : portable -- @@ -50,9 +50,12 @@ module PGF( -- * Operations -- ** Linearization - linearize, linearizeAllLang, linearizeAll, + linearize, linearizeAllLang, linearizeAll, bracketedLinearize, tabularLinearizes, groupResults, -- lins of trees by language, removing duplicates showPrintName, + + BracketedString(..), FId, LIndex, + Forest.showBracketedString, -- ** Parsing parse, parseWithRecovery, parseAllLang, parseAll, @@ -73,10 +76,11 @@ module PGF( checkType, checkExpr, inferExpr, TcError(..), ppTcError, - -- ** Word Completion (Incremental Parsing) + -- ** Low level parsing API complete, Parse.ParseState, - Parse.initState, Parse.nextState, Parse.getCompletions, Parse.recoveryStates, Parse.extractTrees, + Parse.initState, Parse.nextState, Parse.getCompletions, Parse.recoveryStates, + Parse.ParseResult(..), Parse.getParseResult, -- ** Generation generateRandom, generateAll, generateAllDepth, @@ -90,6 +94,7 @@ module PGF( graphvizAbstractTree, graphvizParseTree, graphvizDependencyTree, + graphvizBracketedString, graphvizAlignment, -- * Browsing @@ -107,6 +112,7 @@ import PGF.Expr (Tree) import PGF.Morphology import PGF.Data import PGF.Binary +import qualified PGF.Forest as Forest import qualified PGF.Parse as Parse import GF.Data.Utilities (replace) @@ -131,34 +137,18 @@ import Text.PrettyPrint -- > $ gf -make <grammar file name> readPGF :: FilePath -> IO PGF --- | Linearizes given expression as string in the language -linearize :: PGF -> Language -> Tree -> String - -- | Tries to parse the given string in the specified language --- and to produce abstract syntax expression. An empty --- list is returned if the parsing is not successful. The list may also --- contain more than one element if the grammar is ambiguous. --- Throws an exception if the given language cannot be used --- for parsing, see 'canParse'. -parse :: PGF -> Language -> Type -> String -> [Tree] - -parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> [Tree] - --- | The same as 'linearizeAllLang' but does not return --- the language. -linearizeAll :: PGF -> Tree -> [String] +-- and to produce abstract syntax expression. +parse :: PGF -> Language -> Type -> String -> (Parse.ParseResult,Maybe BracketedString) --- | Linearizes given expression as string in all languages --- available in the grammar. -linearizeAllLang :: PGF -> Tree -> [(Language,String)] +-- | This is an experimental function. Use it on your own risk +parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> (Parse.ParseResult,Maybe BracketedString) -- | The same as 'parseAllLang' but does not return -- the language. parseAll :: PGF -> Type -> String -> [[Tree]] -- | Tries to parse the given string with all available languages. --- Languages which cannot be used for parsing (see 'canParse') --- are ignored. -- The returned list contains pairs of language -- and list of abstract syntax expressions -- (this is a list, since grammars can be ambiguous). @@ -227,8 +217,6 @@ complete :: PGF -> Language -> Type -> String readPGF f = decodeFile f -linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf lang - parse pgf lang typ s = case Map.lookup lang (concretes pgf) of Just cnc -> Parse.parse pgf lang typ (words s) @@ -236,10 +224,6 @@ parse pgf lang typ s = parseWithRecovery pgf lang typ open_typs s = Parse.parseWithRecovery pgf lang typ open_typs (words s) -linearizeAll mgr = map snd . linearizeAllLang mgr -linearizeAllLang mgr t = - [(lang,PGF.linearize mgr lang t) | lang <- languages mgr] - groupResults :: [[(Language,String)]] -> [(Language,[String])] groupResults = Map.toList . foldr more Map.empty . start . concat where @@ -250,7 +234,7 @@ groupResults = Map.toList . foldr more Map.empty . start . concat parseAll mgr typ = map snd . parseAllLang mgr typ parseAllLang mgr typ s = - [(lang,ts) | lang <- languages mgr, let ts = parse mgr lang typ s, not (null ts)] + [(lang,ts) | lang <- languages mgr, (Parse.ParseResult ts,_) <- [parse mgr lang typ s], not (null ts)] generateRandom pgf cat = do gen <- newStdGen @@ -280,14 +264,18 @@ functionType pgf fun = Nothing -> Nothing complete pgf from typ input = - let (ws,prefix) = tokensAndPrefix input - state0 = Parse.initState pgf from typ - in case loop state0 ws of - Nothing -> [] - Just state -> - (if null prefix && not (null (Parse.extractTrees state typ)) then [unwords ws ++ " "] else []) - ++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Parse.getCompletions state prefix)] + let (ws,prefix) = tokensAndPrefix input + state0 = Parse.initState pgf from typ + in case loop state0 ws of + Nothing -> [] + Just state -> (if null prefix && isSuccessful state then [unwords ws ++ " "] else []) + ++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Parse.getCompletions state prefix)] where + isSuccessful state = + case Parse.getParseResult state typ of + (Parse.ParseResult ts, _) -> not (null ts) + _ -> False + tokensAndPrefix :: String -> ([String],String) tokensAndPrefix s | not (null s) && isSpace (last s) = (ws, "") | null ws = ([],"") diff --git a/src/runtime/haskell/PGF/Data.hs b/src/runtime/haskell/PGF/Data.hs index b604317fc..7623a05f3 100644 --- a/src/runtime/haskell/PGF/Data.hs +++ b/src/runtime/haskell/PGF/Data.hs @@ -56,7 +56,7 @@ data Symbol data Production = PApply {-# UNPACK #-} !FunId [FId] | PCoerce {-# UNPACK #-} !FId - | PConst Expr [String] + | PConst CId Expr [String] deriving (Eq,Ord,Show) data CncCat = CncCat {-# UNPACK #-} !FId {-# UNPACK #-} !FId {-# UNPACK #-} !(Array LIndex String) data CncFun = CncFun CId {-# UNPACK #-} !(UArray LIndex SeqId) deriving (Eq,Ord,Show) @@ -86,7 +86,6 @@ data Tokn = | KP [String] [Alternative] deriving (Eq,Ord,Show) - -- merge two PGFs; fails is differens absnames; priority to second arg unionPGF :: PGF -> PGF -> PGF diff --git a/src/runtime/haskell/PGF/Forest.hs b/src/runtime/haskell/PGF/Forest.hs new file mode 100644 index 000000000..3dd996aa6 --- /dev/null +++ b/src/runtime/haskell/PGF/Forest.hs @@ -0,0 +1,101 @@ +-------------------------------------------------
+-- |
+-- Module : PGF
+-- Maintainer : Krasimir Angelov
+-- Stability : stable
+-- Portability : portable
+--
+-- Forest is a compact representation of a set
+-- of parse trees. This let us to efficiently
+-- represent local ambiguities
+--
+-------------------------------------------------
+
+module PGF.Forest( Forest(..)
+ , BracketedString, showBracketedString
+ , linearizeWithBrackets
+ ) where
+
+import PGF.CId
+import PGF.Data
+import PGF.Macros
+import Data.List
+import Data.Array.IArray
+import qualified Data.Set as Set
+import qualified Data.Map as Map
+import qualified Data.IntSet as IntSet
+import qualified Data.IntMap as IntMap
+import Control.Monad
+
+data Forest
+ = Forest
+ { abstr :: Abstr
+ , concr :: Concr
+ , forest :: IntMap.IntMap (Set.Set Production)
+ , root :: {-# UNPACK #-} !FId
+ , label :: {-# UNPACK #-} !LIndex
+ }
+
+--------------------------------------------------------------------
+-- Rendering of bracketed strings
+--------------------------------------------------------------------
+
+linearizeWithBrackets :: Forest -> BracketedString
+linearizeWithBrackets = head . snd . untokn "" . bracketedTokn
+
+---------------------------------------------------------------
+-- Internally we have to do everything with Tokn first because
+-- we must handle the pre {...} construction.
+--
+
+bracketedTokn :: Forest -> BracketedTokn
+bracketedTokn (Forest abs cnc forest root label) =
+ let (fid,cat,lin) = render IntMap.empty root
+ in Bracket_ fid label cat (lin ! label)
+ where
+ trusted = trustedSpots IntSet.empty root
+
+ render parents fid =
+ case (IntMap.lookup fid parents) `mplus` (fmap Set.toList $ IntMap.lookup fid forest) of
+ Just (p:ps) -> descend (IntMap.insert fid ps parents) p
+ Nothing -> error ("wrong forest id " ++ show fid)
+ where
+ descend parents (PApply funid args) = let (CncFun fun lins) = cncfuns cnc ! funid
+ Just (DTyp _ cat _,_,_) = Map.lookup fun (funs abs)
+ largs = map (render parents) args
+ in (fid,cat,listArray (bounds lins) [computeSeq seqid largs | seqid <- elems lins])
+ descend parents (PCoerce fid) = render parents fid
+ descend parents (PConst cat _ ts) = (fid,cat,listArray (0,0) [[LeafKS ts]])
+
+ trustedSpots parents fid
+ | IntSet.member fid parents
+ = IntSet.empty
+ | otherwise = IntSet.insert fid $
+ case IntMap.lookup fid forest of
+ Just prods -> foldl1 IntSet.intersection [descend prod | prod <- Set.toList prods]
+ Nothing -> IntSet.empty
+ where
+ parents' = IntSet.insert fid parents
+
+ descend (PApply funid args) = IntSet.unions (map (trustedSpots parents') args)
+ descend (PCoerce fid) = trustedSpots parents' fid
+ descend (PConst c e _) = IntSet.empty
+
+ computeSeq :: SeqId -> [(FId,CId,LinTable)] -> [BracketedTokn]
+ computeSeq seqid args = concatMap compute (elems seq)
+ where
+ seq = sequences cnc ! seqid
+
+ compute (SymCat d r) = getArg d r
+ compute (SymLit d r) = getArg d r
+ compute (SymKS ts) = [LeafKS ts]
+ compute (SymKP ts alts) = [LeafKP ts alts]
+
+ getArg d r
+ | not (null arg_lin) &&
+ IntSet.member fid trusted
+ = [Bracket_ fid r cat arg_lin]
+ | otherwise = arg_lin
+ where
+ arg_lin = lin ! r
+ (fid,cat,lin) = args !! d
diff --git a/src/runtime/haskell/PGF/Linearize.hs b/src/runtime/haskell/PGF/Linearize.hs index 058d8281f..4a399f5e9 100644 --- a/src/runtime/haskell/PGF/Linearize.hs +++ b/src/runtime/haskell/PGF/Linearize.hs @@ -1,8 +1,15 @@ -module PGF.Linearize(linearizes,markLinearizes,tabularLinearizes) where +module PGF.Linearize + ( linearize + , linearizeAll + , linearizeAllLang + , bracketedLinearize + , tabularLinearizes + ) where import PGF.CId import PGF.Data import PGF.Macros +import PGF.Expr import Data.Array.IArray import Data.List import Control.Monad @@ -10,99 +17,112 @@ import qualified Data.Map as Map import qualified Data.IntMap as IntMap import qualified Data.Set as Set --- linearization and computation of concrete PGF Terms +-------------------------------------------------------------------- +-- The API +-------------------------------------------------------------------- -type LinTable = Array LIndex [Tokn] +-- | Linearizes given expression as string in the language +linearize :: PGF -> Language -> Tree -> String +linearize pgf lang = concat . take 1 . map (unwords . concatMap flattenBracketedString . snd . untokn "" . (!0)) . linTree pgf lang -linearizes :: PGF -> CId -> Expr -> [String] -linearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang (\_ _ lint -> lint) +-- | The same as 'linearizeAllLang' but does not return +-- the language. +linearizeAll :: PGF -> Tree -> [String] +linearizeAll pgf = map snd . linearizeAllLang pgf -linTree :: PGF -> Language -> (Maybe CId -> [Int] -> LinTable -> LinTable) -> Expr -> [LinTable] -linTree pgf lang mark e = lin0 [] [] [] Nothing e +-- | Linearizes given expression as string in all languages +-- available in the grammar. +linearizeAllLang :: PGF -> Tree -> [(Language,String)] +linearizeAllLang pgf t = [(lang,linearize pgf lang t) | lang <- Map.keys (concretes pgf)] + +-- | Linearizes given expression as a bracketed string in the language +bracketedLinearize :: PGF -> Language -> Tree -> BracketedString +bracketedLinearize pgf lang = head . concat . map (snd . untokn "" . (!0)) . linTree pgf lang + +-- | Creates a table from feature name to linearization. +-- The outher list encodes the variations +tabularLinearizes :: PGF -> CId -> Expr -> [[(String,String)]] +tabularLinearizes pgf lang e = map (zip lbls . map (unwords . concatMap flattenBracketedString . snd . untokn "") . elems) + (linTree pgf lang e) + where + lbls = case unApp e of + Just (f,_) -> let cat = valCat (lookType pgf f) + in case Map.lookup cat (cnccats (lookConcr pgf lang)) of + Just (CncCat _ _ lbls) -> elems lbls + Nothing -> error "No labels" + Nothing -> error "Not function application" + +-------------------------------------------------------------------- +-- Implementation +-------------------------------------------------------------------- + +linTree :: PGF -> Language -> Expr -> [Array LIndex BracketedTokn] +linTree pgf lang e = + [amapWithIndex (\label -> Bracket_ fid label cat) lin | (_,(fid,cat,lin)) <- lin0 [] [] Nothing 0 e] where cnc = lookMap (error "no lang") lang (concretes pgf) lp = lproductions cnc - - lin0 path xs ys mb_fid (EAbs _ x e) = lin0 path (showCId x:xs) ys mb_fid e - lin0 path xs ys mb_fid (ETyped e _) = lin0 path xs ys mb_fid e - lin0 path xs ys mb_fid e | null xs = lin path ys mb_fid e [] - | otherwise = apply path (xs ++ ys) mb_fid _B (e:[ELit (LStr x) | x <- xs]) - - lin path xs mb_fid (EApp e1 e2) es = lin path xs mb_fid e1 (e2:es) - lin path xs mb_fid (ELit l) [] = case l of - LStr s -> return (mark Nothing path (ss s)) - LInt n -> return (mark Nothing path (ss (show n))) - LFlt f -> return (mark Nothing path (ss (show f))) - lin path xs mb_fid (EMeta i) es = apply path xs mb_fid _V (ELit (LStr ('?':show i)):es) - lin path xs mb_fid (EFun f) es = map (mark (Just f) path) (apply path xs mb_fid f es) - lin path xs mb_fid (EVar i) es = apply path xs mb_fid _V (ELit (LStr (xs !! i)) :es) - lin path xs mb_fid (ETyped e _) es = lin path xs mb_fid e es - lin path xs mb_fid (EImplArg e) es = lin path xs mb_fid e es - - ss s = listArray (0,0) [[KS s]] - - apply path xs mb_fid f es = + + lin0 xs ys mb_fid n_fid (EAbs _ x e) = lin0 (showCId x:xs) ys mb_fid n_fid e + lin0 xs ys mb_fid n_fid (ETyped e _) = lin0 xs ys mb_fid n_fid e + lin0 xs ys mb_fid n_fid e | null xs = lin ys mb_fid n_fid e [] + | otherwise = apply (xs ++ ys) mb_fid n_fid _B (e:[ELit (LStr x) | x <- xs]) + + lin xs mb_fid n_fid (EApp e1 e2) es = lin xs mb_fid n_fid e1 (e2:es) + lin xs mb_fid n_fid (ELit l) [] = case l of + LStr s -> return (n_fid+1,(n_fid,cidString,ss s)) + LInt n -> return (n_fid+1,(n_fid,cidInt ,ss (show n))) + LFlt f -> return (n_fid+1,(n_fid,cidFloat ,ss (show f))) + lin xs mb_fid n_fid (EMeta i) es = apply xs mb_fid n_fid _V (ELit (LStr ('?':show i)):es) + lin xs mb_fid n_fid (EFun f) es = apply xs mb_fid n_fid f es + lin xs mb_fid n_fid (EVar i) es = apply xs mb_fid n_fid _V (ELit (LStr (xs !! i)) :es) + lin xs mb_fid n_fid (ETyped e _) es = lin xs mb_fid n_fid e es + lin xs mb_fid n_fid (EImplArg e) es = lin xs mb_fid n_fid e es + + ss s = listArray (0,0) [[LeafKS [s]]] + + apply :: [String] -> Maybe FId -> FId -> CId -> [Expr] -> [(FId,(FId, CId, LinTable))] + apply xs mb_fid n_fid f es = case Map.lookup f lp of - Just prods -> case lookupProds mb_fid prods of - Just set -> do prod <- Set.toList set - case prod of - PApply funid fids -> do guard (length fids == length es) - args <- sequence (zipWith3 (\i fid e -> lin0 (sub i path) [] xs (Just fid) e) [0..] fids es) - let (CncFun _ lins) = cncfuns cnc ! funid - return (listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins]) - PCoerce fid -> apply path xs (Just fid) f es - Nothing -> mzero - Nothing -> apply path xs mb_fid _V [ELit (LStr ("[" ++ showCId f ++ "]"))] -- fun without lin + Just prods -> do prod <- lookupProds mb_fid prods + case prod of + PApply funid fids -> do guard (length fids == length es) + (n_fid,args) <- descend n_fid (zip fids es) + let (CncFun fun lins) = cncfuns cnc ! funid + Just (DTyp _ cat _,_,_) = Map.lookup fun (funs (abstract pgf)) + return (n_fid+1,(n_fid,cat,listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins])) + PCoerce fid -> apply xs (Just fid) n_fid f es + Nothing -> apply xs mb_fid n_fid _V [ELit (LStr ("[" ++ showCId f ++ "]"))] -- fun without lin where - lookupProds (Just fid) prods = IntMap.lookup fid prods + lookupProds (Just fid) prods = maybe [] Set.toList (IntMap.lookup fid prods) lookupProds Nothing prods - | f == _B || f == _V = Nothing - | otherwise = Just (Set.filter isApp (Set.unions (IntMap.elems prods))) + | f == _B || f == _V = [] + | otherwise = [prod | (fid,set) <- IntMap.toList prods, prod <- Set.toList set] - sub i path - | f == _B || f == _V = path - | otherwise = i:path + descend n_fid [] = return (n_fid,[]) + descend n_fid ((fid,e):fes) = do (n_fid,xx) <- lin0 [] xs (Just fid) n_fid e + (n_fid,xxs) <- descend n_fid fes + return (n_fid,xx:xxs) isApp (PApply _ _) = True isApp _ = False + computeSeq :: SeqId -> [(FId,CId,LinTable)] -> [BracketedTokn] computeSeq seqid args = concatMap compute (elems seq) where seq = sequences cnc ! seqid - compute (SymCat d r) = (args !! d) ! r - compute (SymLit d r) = (args !! d) ! r - compute (SymKS ts) = map KS ts - compute (SymKP ts alts) = [KP ts alts] - -untokn :: [Tokn] -> [String] -untokn ts = case ts of - KP d _ : [] -> d - KP d vs : ws -> let ss@(s:_) = untokn ws in sel d vs s ++ ss - KS s : ws -> s : untokn ws - [] -> [] - where - sel d vs w = case [v | Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of - v:_ -> v - _ -> d - --- create a table from labels+params to variants -tabularLinearizes :: PGF -> CId -> Expr -> [[(String,String)]] -tabularLinearizes pgf lang e = map (zip lbls . map (unwords . untokn) . elems) (linTree pgf lang (\_ _ lint -> lint) e) - where - lbls = case unApp e of - Just (f,_) -> let cat = valCat (lookType pgf f) - in case Map.lookup cat (cnccats (lookConcr pgf lang)) of - Just (CncCat _ _ lbls) -> elems lbls - Nothing -> error "No labels" - Nothing -> error "Not function application" + compute (SymCat d r) = getArg d r + compute (SymLit d r) = getArg d r + compute (SymKS ts) = [LeafKS ts] + compute (SymKP ts alts) = [LeafKP ts alts] + getArg d r + | not (null arg_lin) = [Bracket_ fid r cat arg_lin] + | otherwise = arg_lin + where + arg_lin = lin ! r + (fid,cat,lin) = args !! d --- show bracketed markup with references to tree structure -markLinearizes :: PGF -> CId -> Expr -> [String] -markLinearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang mark - where - mark mb_f path lint = amap (bracket mb_f path) lint - - bracket Nothing path ts = [KS ("("++show (reverse path))] ++ ts ++ [KS ")"] - bracket (Just f) path ts = [KS ("(("++showCId f++","++show (reverse path)++")")] ++ ts ++ [KS ")"] +amapWithIndex :: (IArray a e1, IArray a e2, Ix i) => (i -> e1 -> e2) -> a i e1 -> a i e2 +amapWithIndex f arr = listArray (bounds arr) (map (uncurry f) (assocs arr)) diff --git a/src/runtime/haskell/PGF/Macros.hs b/src/runtime/haskell/PGF/Macros.hs index 8886bc696..1b563fc48 100644 --- a/src/runtime/haskell/PGF/Macros.hs +++ b/src/runtime/haskell/PGF/Macros.hs @@ -11,6 +11,7 @@ import qualified Data.Array as Array import Data.Maybe import Data.List import GF.Data.Utilities(sortNub) +import Text.PrettyPrint -- operations for manipulating PGF grammars and objects @@ -202,3 +203,56 @@ updateProductionIndices pgf = pgf{ concretes = fmap updateConcrete (concretes pg getFunctions (PCoerce fid) = case IntMap.lookup fid productions of Nothing -> [] Just prods -> [fun | prod <- Set.toList prods, fun <- getFunctions prod] + + +-- Utilities for doing linearization + +-- | BracketedString represents a sentence that is linearized +-- as usual but we also want to retain the ''brackets'' that +-- mark the beginning and the end of each constituent. +data BracketedString + = Leaf String -- ^ this is the leaf i.e. a single token + | Bracket {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [BracketedString] -- ^ this is a bracket. The 'CId' is the category of + -- the phrase. The 'FId' is an unique identifier for + -- every phrase in the sentence. For context-free grammars + -- i.e. without discontinuous constituents this identifier + -- is also unique for every bracket. When there are discontinuous + -- phrases then the identifiers are unique for every phrase but + -- not for every bracket since the bracket represents a constituent. + -- The different constituents could still be distinguished by using + -- the constituent index i.e. 'LIndex'. If the grammar is reduplicating + -- then the constituent indices will be the same for all brackets + -- that represents the same constituent. + +data BracketedTokn + = LeafKS [String] + | LeafKP [String] [Alternative] + | Bracket_ {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [BracketedTokn] -- Invariant: the list is not empty + +type LinTable = Array.Array LIndex [BracketedTokn] + +-- | Renders the bracketed string as string where +-- the brackets are shown as @(S ...)@ where +-- @S@ is the category. +showBracketedString :: BracketedString -> String +showBracketedString = render . ppBracketedString + +ppBracketedString (Leaf t) = text t +ppBracketedString (Bracket fcat index cat bss) = parens (ppCId cat <+> hsep (map ppBracketedString bss)) + +untokn :: String -> BracketedTokn -> (String,[BracketedString]) +untokn nw (LeafKS ts) = (head ts,map Leaf ts) +untokn nw (LeafKP d vs) = let ts = sel d vs nw + in (head ts,map Leaf ts) + where + sel d vs nw = + case [v | Alt v cs <- vs, any (\c -> isPrefixOf c nw) cs] of + v:_ -> v + _ -> d +untokn nw (Bracket_ fid index cat bss) = + let (nw',bss') = mapAccumR untokn nw bss + in (nw',[Bracket fid index cat (concat bss')]) + +flattenBracketedString :: BracketedString -> [String] +flattenBracketedString (Leaf w) = [w] +flattenBracketedString (Bracket _ _ _ bss) = concatMap flattenBracketedString bss diff --git a/src/runtime/haskell/PGF/Parse.hs b/src/runtime/haskell/PGF/Parse.hs index ab5c91f02..85fc026aa 100644 --- a/src/runtime/haskell/PGF/Parse.hs +++ b/src/runtime/haskell/PGF/Parse.hs @@ -6,7 +6,7 @@ module PGF.Parse , nextState
, getCompletions
, recoveryStates
- , extractTrees
+ , ParseResult(..), getParseResult
, parse
, parseWithRecovery
) where
@@ -14,7 +14,7 @@ module PGF.Parse import Data.Array.IArray
import Data.Array.Base (unsafeAt)
import Data.List (isPrefixOf, foldl')
-import Data.Maybe (fromMaybe, maybe)
+import Data.Maybe (fromMaybe, maybe, maybeToList)
import qualified Data.Map as Map
import qualified GF.Data.TrieMap as TMap
import qualified Data.IntMap as IntMap
@@ -27,26 +27,35 @@ import PGF.Data import PGF.Expr(Tree)
import PGF.Macros
import PGF.TypeCheck
-import Debug.Trace
-
-parse :: PGF -> Language -> Type -> [String] -> [Tree]
-parse pgf lang typ toks = loop (initState pgf lang typ) toks
+import PGF.Forest(Forest(Forest), linearizeWithBrackets)
+
+-- | This data type encodes the different outcomes which you could get from the parser.
+data ParseResult
+ = ParseFailed Int -- ^ The integer is the position in number of tokens where the parser failed.
+ | TypeError FId [TcError] -- ^ The parsing was successful but none of the trees is type correct.
+ -- The forest id ('FId') points to the bracketed string from the parser
+ -- where the type checking failed. More than one error is returned
+ -- if there are many analizes for some phrase but they all are not type correct.
+ | ParseResult [Tree] -- ^ If the parsing was successful we get a list of abstract syntax trees. The list should be non-empty.
+
+parse :: PGF -> Language -> Type -> [String] -> (ParseResult,Maybe BracketedString)
+parse pgf lang typ toks = loop 0 (initState pgf lang typ) toks
where
- loop ps [] = extractTrees ps typ
- loop ps (t:ts) = case nextState ps t of
- Left es -> []
- Right ps -> loop ps ts
+ loop i ps [] = getParseResult ps typ
+ loop i ps (t:ts) = case nextState ps t of
+ Left es -> (ParseFailed i,Nothing)
+ Right ps -> loop (i+1) ps ts
-parseWithRecovery :: PGF -> Language -> Type -> [Type] -> [String] -> [Tree]
+parseWithRecovery :: PGF -> Language -> Type -> [Type] -> [String] -> (ParseResult,Maybe BracketedString)
parseWithRecovery pgf lang typ open_typs toks = accept (initState pgf lang typ) toks
where
- accept ps [] = extractTrees ps typ
+ accept ps [] = getParseResult ps typ
accept ps (t:ts) =
case nextState ps t of
Right ps -> accept ps ts
Left es -> skip (recoveryStates open_typs es) ts
- skip ps_map [] = extractTrees (fst ps_map) typ
+ skip ps_map [] = getParseResult (fst ps_map) typ
skip ps_map (t:ts) =
case Map.lookup t (snd ps_map) of
Just ps -> accept ps ts
@@ -145,23 +154,31 @@ recoveryStates open_types (EState pgf cnc chart) = -- that spans the whole input consumed so far. The trees are also
-- limited by the category specified, which is usually
-- the same as the startup category.
-extractTrees :: ParseState -> Type -> [Tree]
-extractTrees (PState pgf cnc chart items) ty@(DTyp _ start _) =
- nubsort [e1 | e <- exps, Right e1 <- [checkExpr pgf e ty]]
+getParseResult :: ParseState -> Type -> (ParseResult,Maybe BracketedString)
+getParseResult (PState pgf cnc chart items) ty@(DTyp _ start _) =
+ let mb_bs = case roots of
+ ((root,lbl):_) -> Just $ linearizeWithBrackets $ Forest (abstract pgf) cnc (forest st) root lbl
+ _ -> Nothing
+
+ exps = nubsort $ do
+ (fid,lbl) <- roots
+ (fvs,e) <- go Set.empty 0 (0,fid)
+ guard (Set.null fvs)
+ Right e1 <- [checkExpr pgf e ty]
+ return e1
+
+ in (ParseResult exps,mb_bs)
where
(mb_agenda,acc) = TMap.decompose items
agenda = maybe [] Set.toList mb_agenda
(_,st) = process Nothing (\_ _ -> id) (sequences cnc) (cncfuns cnc) agenda () chart
- exps =
- case Map.lookup start (cnccats cnc) of
- Just (CncCat s e lbls) -> do cat <- range (s,e)
- lbl <- indices lbls
- Just fid <- [lookupPC (PK cat lbl 0) (passive st)]
- (fvs,tree) <- go Set.empty 0 (0,fid)
- guard (Set.null fvs)
- return tree
- Nothing -> mzero
+ roots = case Map.lookup start (cnccats cnc) of
+ Just (CncCat s e lbls) -> do cat <- range (s,e)
+ lbl <- indices lbls
+ fid <- maybeToList (lookupPC (PK cat lbl 0) (passive st))
+ return (fid,lbl)
+ Nothing -> mzero
go rec fcat' (d,fcat)
| fcat < totalCats cnc = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
@@ -189,6 +206,7 @@ extractTrees (PState pgf cnc chart items) ty@(DTyp _ start _) = freeVar (EFun v) = Set.singleton v
freeVar _ = Set.empty
+
process mbt fn !seqs !funs [] acc chart = (acc,chart)
process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc chart
| inRange (bounds lin) ppos =
@@ -218,15 +236,15 @@ process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) ac Nothing -> fid
Just fid -> fid
- in case [ts | PConst _ ts <- maybe [] Set.toList (IntMap.lookup fid' (forest chart))] of
+ in case [ts | PConst _ _ ts <- maybe [] Set.toList (IntMap.lookup fid' (forest chart))] of
(toks:_) -> let !acc' = fn toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc
in process mbt fn seqs funs items acc' chart
[] -> case litCatMatch fid mbt of
- Just (toks,lit)
+ Just (cat,lit,toks)
-> let fid' = nextId chart
!acc' = fn toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc
in process mbt fn seqs funs items acc' chart{passive=insertPC (mkPK key k) fid' (passive chart)
- ,forest =IntMap.insert fid' (Set.singleton (PConst lit toks)) (forest chart)
+ ,forest =IntMap.insert fid' (Set.singleton (PConst cat lit toks)) (forest chart)
,nextId =nextId chart+1
}
Nothing -> process mbt fn seqs funs items acc chart
@@ -260,12 +278,12 @@ updateAt :: Int -> a -> [a] -> [a] updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
litCatMatch fcat (Just t)
- | fcat == fcatString = Just ([t],ELit (LStr t))
- | fcat == fcatInt = case reads t of {[(n,"")] -> Just ([t],ELit (LInt n));
+ | fcat == fcatString = Just (cidString,ELit (LStr t),[t])
+ | fcat == fcatInt = case reads t of {[(n,"")] -> Just (cidInt,ELit (LInt n),[t]);
_ -> Nothing }
- | fcat == fcatFloat = case reads t of {[(d,"")] -> Just ([t],ELit (LFlt d));
+ | fcat == fcatFloat = case reads t of {[(d,"")] -> Just (cidFloat,ELit (LFlt d),[t]);
_ -> Nothing }
- | fcat == fcatVar = Just ([t],EFun (mkCId t))
+ | fcat == fcatVar = Just (cidVar,EFun (mkCId t),[t])
litCatMatch _ _ = Nothing
@@ -341,9 +359,9 @@ foldForest f g b fcat forest = Nothing -> b
Just set -> Set.fold foldProd b set
where
- foldProd (PCoerce fcat) b = foldForest f g b fcat forest
- foldProd (PApply funid args) b = f funid args b
- foldProd (PConst const toks) b = g const toks b
+ foldProd (PCoerce fcat) b = foldForest f g b fcat forest
+ foldProd (PApply funid args) b = f funid args b
+ foldProd (PConst _ const toks) b = g const toks b
----------------------------------------------------------------
diff --git a/src/runtime/haskell/PGF/Printer.hs b/src/runtime/haskell/PGF/Printer.hs index 1d9928304..fa6c5e549 100644 --- a/src/runtime/haskell/PGF/Printer.hs +++ b/src/runtime/haskell/PGF/Printer.hs @@ -58,7 +58,7 @@ ppProduction (fcat,PApply funid args) = ppFCat fcat <+> text "->" <+> ppFunId funid <> brackets (hcat (punctuate comma (map ppFCat args))) ppProduction (fcat,PCoerce arg) = ppFCat fcat <+> text "->" <+> char '_' <> brackets (ppFCat arg) -ppProduction (fcat,PConst _ ss) = +ppProduction (fcat,PConst _ _ ss) = ppFCat fcat <+> text "->" <+> ppStrs ss ppCncFun (funid,CncFun fun arr) = diff --git a/src/runtime/haskell/PGF/VisualizeTree.hs b/src/runtime/haskell/PGF/VisualizeTree.hs index e58791d4d..3075e7a86 100644 --- a/src/runtime/haskell/PGF/VisualizeTree.hs +++ b/src/runtime/haskell/PGF/VisualizeTree.hs @@ -15,339 +15,296 @@ -- instead of rolling its own. ----------------------------------------------------------------------------- -module PGF.VisualizeTree ( graphvizAbstractTree - , graphvizParseTree - , graphvizDependencyTree - , graphvizAlignment - , tree2mk - , getDepLabels - , PosText(..), readPosText +module PGF.VisualizeTree + ( graphvizAbstractTree + , graphvizParseTree + , graphvizDependencyTree + , graphvizBracketedString + , graphvizAlignment + , getDepLabels ) where -import PGF.CId (CId,showCId,pCId,mkCId) +import PGF.CId (CId,showCId,ppCId,mkCId) import PGF.Data -import PGF.Tree -import PGF.Expr (showExpr) +import PGF.Expr (showExpr, Tree) import PGF.Linearize -import PGF.Macros (lookValCat) +import PGF.Macros (lookValCat, BracketedString(..), flattenBracketedString) import qualified Data.Map as Map import Data.List (intersperse,nub,isPrefixOf,sort,sortBy) import Data.Char (isDigit) -import qualified Text.ParserCombinators.ReadP as RP - -import Debug.Trace - -graphvizAbstractTree :: PGF -> (Bool,Bool) -> Expr -> String -graphvizAbstractTree pgf funscats = prGraph False . tree2graph pgf funscats . expr2tree - -tree2graph :: PGF -> (Bool,Bool) -> Tree -> [String] -tree2graph pgf (funs,cats) = prf [] where - prf ps t = let (nod,lab) = prn ps t in - (nod ++ " [label = " ++ lab ++ ", style = \"solid\", shape = \"plaintext\"] ;") : - case t of - Fun cid trees -> - [ pra (j:ps) nod t | (j,t) <- zip [0..] trees] ++ - concat [prf (j:ps) t | (j,t) <- zip [0..] trees] - Abs xs (Fun cid trees) -> - [ pra (j:ps) nod t | (j,t) <- zip [0..] trees] ++ - concat [prf (j:ps) t | (j,t) <- zip [0..] trees] - _ -> [] - prn ps t = case t of - Fun cid _ -> - let - fun = if funs then showCId cid else "" - cat = if cats then prCat cid else "" - colon = if funs && cats then " : " else "" - lab = "\"" ++ fun ++ colon ++ cat ++ "\"" - in (show(show (ps :: [Int])),lab) - Abs bs tree -> - let fun = case tree of - Fun cid _ -> Fun cid [] - _ -> tree - in (show(show (ps :: [Int])),"\"" ++ esc (prTree (Abs bs fun)) ++ "\"") - _ -> (show(show (ps :: [Int])),"\"" ++ esc (prTree t) ++ "\"") - pra i nod t = nod ++ arr ++ fst (prn i t) ++ " [style = \"solid\"];" - arr = " -- " -- if digr then " -> " else " -- " - prCat = showCId . lookValCat pgf - esc = concatMap (\c -> if c =='\\' then [c,c] else [c]) --- escape backslash in abstracts - -prGraph digr ns = concat $ map (++"\n") $ [graph ++ "{\n"] ++ ns ++ ["}"] where - graph = if digr then "digraph" else "graph" - - --- replace each non-atomic constructor with mkC, where C is the val cat -tree2mk :: PGF -> Expr -> String -tree2mk pgf = showExpr [] . tree2expr . t2m . expr2tree where - t2m t = case t of - Fun cid [] -> t - Fun cid ts -> Fun (mk cid) (map t2m ts) - _ -> t - mk = mkCId . ("mk" ++) . showCId . lookValCat pgf - --- dependency trees from Linearize.linearizeMark - -graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Expr -> String -graphvizDependencyTree format debug mlab ms pgf lang exp = case format of - "malt" -> unlines (lin2dep format) - "malt_input" -> unlines (lin2dep format) - _ -> prGraph True (lin2dep format) - - where - - lin2dep format = -- trace (ifd (show sortedNodes ++ show nodeWords)) $ - case format of - "malt" -> map (concat . intersperse "\t") wnodes - "malt_input" -> map (concat . intersperse "\t" . take 6) wnodes - _ -> prelude ++ nodes ++ links - - ifd s = if debug then s else [] - - pot = readPosText $ concat $ take 1 $ markLinearizes pgf lang exp - ---- use Just str if you have str to match against - - prelude = ["rankdir=LR ;", "node [shape = plaintext] ;"] - - nodes = map mkNode nodeWords - mkNode (i,((_,p),ss)) = - node p ++ " [label = \"" ++ show i ++ ". " ++ ifd (show p) ++ unwords ss ++ "\"] ;" - nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)| - ((Just f,p),w) <- wlins pot] - - links = map mkLink thelinks - thelinks = [(word y, x, label tr y x) | - (_,((f,x),_)) <- tail nodeWords, - let y = dominant x] - mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;" - node = show . show - - dominant x = case x of - [] -> x - _ | not (x == hx) -> hx - _ -> dominant (init x) - where - hx = headArg (init x) tr x - - headArg x0 tr x = case (tr,x) of - (Fun f [],[_]) -> x0 ---- ?? - (Fun f ts,[_]) -> x0 ++ [getHead (length ts - 1) f] - (Fun f ts,i:y) -> headArg x0 (ts !! i) y - _ -> x0 ---- - - label tr y x = case span (uncurry (==)) (zip y x) of - (xys,(_,i):_) -> getLabel i (funAt tr (map fst xys)) - _ -> "" ---- - - funAt tr x = case (tr,x) of - (Fun f _ ,[]) -> f - (Fun f ts,i:y) -> funAt (ts !! i) y - _ -> mkCId (prTree tr) ---- - - word x = if elem x sortedNodes then x else - let x' = headArg x tr (x ++[0]) in - if x' == x then [] else word x' - - tr = expr2tree exp - sortedNodes = [p | (_,((_,p),_)) <- nodeWords] - - labels = maybe Map.empty id mlab - getHead i f = case Map.lookup f labels of - Just ls -> length $ takeWhile (/= "head") ls - _ -> i - getLabel i f = case Map.lookup f labels of - Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i - _ -> showCId f ++ "#" ++ show i - --- to generate CoNLL format for MaltParser - nodeMap :: Map.Map [Int] Int - nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords] - - arcMap :: Map.Map [Int] ([Int],String) - arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks] - - lookDomLab p = case Map.lookup p arcMap of - Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then rootlabel else l) - _ -> (0,rootlabel) - - wnodes = [[show i, maltws ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] | - (i, ((fun,p),ws)) <- tail nodeWords, - let pos = showCId $ lookValCat pgf fun, - let morph = unspec, - let (dom,lab) = lookDomLab p - ] - maltws = concat . intersperse "+" . words . unwords -- no spaces in column 2 - unspec = "_" - rootlabel = "ROOT" - -type Labels = Map.Map CId [String] - -getDepLabels :: [String] -> Labels -getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss] +import Text.PrettyPrint + +-- | Renders abstract syntax tree in Graphviz format +graphvizAbstractTree :: PGF -> (Bool,Bool) -> Tree -> String +graphvizAbstractTree pgf (funs,cats) = render . tree2graph + where + tree2graph t = + text "graph {" $$ + ppGraph [] [] 0 t $$ + text "}" + + getAbs xs (EAbs _ x e) = getAbs (x:xs) e + getAbs xs (ETyped e _) = getAbs xs e + getAbs xs e = (xs,e) + + getApp (EApp x y) es = getApp x (y:es) + getApp (ETyped e _) es = getApp e es + getApp e es = (e,es) + + getLbl scope (EFun f) = let fun = if funs then ppCId f else empty + cat = if cats then ppCId (lookValCat pgf f) else empty + sep = if funs && cats then colon else empty + in fun <+> sep <+> cat + getLbl scope (ELit l) = text (escapeStr (render (ppLit l))) + getLbl scope (EMeta i) = ppMeta i + getLbl scope (EVar i) = ppCId (scope !! i) + getLbl scope (ETyped e _) = getLbl scope e + getLbl scope (EImplArg e) = getLbl scope e + + ppGraph scope ps i e0 = + let (xs, e1) = getAbs [] e0 + (e2,args) = getApp e1 [] + binds = if null xs + then empty + else text "\\\\" <> hcat (punctuate comma (map ppCId xs)) <+> text "->" + (lbl,eargs) = case e2 of + EAbs _ _ _ -> (char '@', e2:args) -- eta-redexes are rendered with artificial "@" node + _ -> (getLbl scope' e2, args) + scope' = xs ++ scope + in ppNode (i:ps) <> text "[label =" <+> doubleQuotes (binds <+> lbl) <> text ", style = \"solid\", shape = \"plaintext\"] ;" $$ + (if null ps + then empty + else ppNode ps <+> text "--" <+> ppNode (i:ps) <+> text "[style = \"solid\"];") $$ + vcat (zipWith (ppGraph scope' (i:ps)) [0..] eargs) + + ppNode ps = char 'n' <> hcat (punctuate (char '_') (map int ps)) + + escapeStr [] = [] + escapeStr ('\\':cs) = '\\':'\\':escapeStr cs + escapeStr ('"' :cs) = '\\':'"' :escapeStr cs + escapeStr (c :cs) = c :escapeStr cs --- parse trees from Linearize.linearizeMark ----- nubrec and domins are quadratic, but could be (n log n) - -graphvizParseTree :: PGF -> CId -> Expr -> String -graphvizParseTree pgf lang = prGraph False . lin2tree pgf . concat . take 1 . markLinearizes pgf lang where - -lin2tree pgf s = prelude ++ nodes ++ links where +type Labels = Map.Map CId [String] - prelude = ["rankdir=BU ;", "node [shape = record, color = white] ;"] +graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Tree -> String +graphvizDependencyTree format debug mlab ms pgf lang t = render $ + case format of + "malt" -> vcat (map (hcat . intersperse (char '\t') ) wnodes) + "malt_input" -> vcat (map (hcat . intersperse (char '\t') . take 6) wnodes) + _ -> text "digraph {" $$ + space $$ + nest 2 (text "rankdir=LR ;" $$ + text "node [shape = plaintext] ;" $$ + vcat nodes $$ + links) $$ + text "}" + where + nodes = map mkNode leaves + links = empty + wnodes = undefined + + nil = -1 + + bs = bracketedLinearize pgf lang t + + leaves = (nil,0,"ROOT") : (groupAndIndexIt 1 . getLeaves nil) bs + + groupAndIndexIt id [] = [] + groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws + in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1 + where + collect pws@((p1,w):pws1) + | p == p1 = let (ws,pws2) = collect pws1 + in (w:ws,pws2) + collect pws = ([],pws) + + getLeaves parent bs = + case bs of + Leaf w -> [(parent,w)] + Bracket fid _ _ bss -> concatMap (getLeaves fid) bss + + mkNode (p,i,w) = + tag p <> text " [label = " <> doubleQuotes (int i <> char '.' <+> text w) <> text "] ;" - nodeRecs = zip [0..] - (nub (filter (not . null) (nlins [postext] ++ [leaves postext]))) - nlins pts = - nubrec [] $ [(p,cat f) | T (Just f, p) _ <- pts] : - concatMap nlins [ts | T _ ts <- pts] - leaves pt = [(p++[j],s) | (j,(p,s)) <- - zip [9990..] [(p,s) | ((_,p),ss) <- wlins pt, s <- ss]] +{- + ifd s = if debug then s else [] + + pot = readPosText $ concat $ take 1 $ markLinearizes pgf lang exp + + nodes = map mkNode nodeWords + mkNode (i,((_,p),ss)) = + node p ++ " [label = \"" ++ show i ++ ". " ++ ifd (show p) ++ unwords ss ++ "\"] ;" + nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)| + ((Just f,p),w) <- wlins pot] + + links = map mkLink thelinks + thelinks = [(word y, x, label tr y x) | + (_,((f,x),_)) <- tail nodeWords, + let y = dominant x] + mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;" + node = show . show + + dominant x = case x of + [] -> x + _ | not (x == hx) -> hx + _ -> dominant (init x) + where + hx = headArg (init x) tr x + + headArg x0 tr x = case (tr,x) of + (Fun f [],[_]) -> x0 ---- ?? + (Fun f ts,[_]) -> x0 ++ [getHead (length ts - 1) f] + (Fun f ts,i:y) -> headArg x0 (ts !! i) y + _ -> x0 ---- + + label tr y x = case span (uncurry (==)) (zip y x) of + (xys,(_,i):_) -> getLabel i (funAt tr (map fst xys)) + _ -> "" ---- + + funAt tr x = case (tr,x) of + (Fun f _ ,[]) -> f + (Fun f ts,i:y) -> funAt (ts !! i) y + _ -> mkCId (prTree tr) ---- + + word x = if elem x sortedNodes then x else + let x' = headArg x tr (x ++[0]) in + if x' == x then [] else word x' + + tr = expr2tree exp + sortedNodes = [p | (_,((_,p),_)) <- nodeWords] + + labels = maybe Map.empty id mlab + getHead i f = case Map.lookup f labels of + Just ls -> length $ takeWhile (/= "head") ls + _ -> i + getLabel i f = case Map.lookup f labels of + Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i + _ -> showCId f ++ "#" ++ show i + + -- to generate CoNLL format for MaltParser + nodeMap :: Map.Map [Int] Int + nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords] + + arcMap :: Map.Map [Int] ([Int],String) + arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks] + + lookDomLab p = case Map.lookup p arcMap of + Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then rootlabel else l) + _ -> (0,rootlabel) + + wnodes = [[show i, maltws ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] | + (i, ((fun,p),ws)) <- tail nodeWords, + let pos = showCId $ lookValCat pgf fun, + let morph = unspec, + let (dom,lab) = lookDomLab p + ] + maltws = concat . intersperse "+" . words . unwords -- no spaces in column 2 + unspec = "_" + rootlabel = "ROOT" +-} - nubrec es rs = case rs of - r:rr -> let r' = filter (not . flip elem es) (nub r) - in r' : nubrec (r' ++ es) rr - _ -> rs - nodes = map mkStruct nodeRecs +getDepLabels :: [String] -> Labels +getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss] - mkStruct (i,cs) = struct i ++ "[label = \"" ++ fields cs ++ "\"] ;" - cat = showCId . lookValCat pgf - fields cs = concat (intersperse "|" [ mtag (showp p) ++ c | (p,c) <- cs]) - struct i = "struct" ++ show i - links = map mkEdge domins - domins = nub [((i,x),(j,y)) | - (i,xs) <- nodeRecs, (j,ys) <- nodeRecs, - x <- xs, y <- ys, dominates x y] - dominates (p,x) (q,y) = not (null q) && p == init q - mkEdge ((i,x),(j,y)) = - struct i ++ ":n" ++ uncommas (showp (fst x)) ++ ":s -- " ++ - struct j ++ ":n" ++ uncommas (showp (fst y)) ++ ":n ;" +graphvizParseTree :: PGF -> Language -> Tree -> String +graphvizParseTree pgf lang = graphvizBracketedString . bracketedLinearize pgf lang + +graphvizBracketedString :: BracketedString -> String +graphvizBracketedString = render . lin2tree + where + lin2tree bs = + text "graph {" $$ + space $$ + nest 2 (text "rankdir=BU ;" $$ + text "node [shape = record, color = white] ;" $$ + space $$ + vcat (nodes bs)) $$ + text "}" + where + nodes bs = zipWith mkStruct [0..] (interns ++ [zipWith (\i (l,p,w) -> (l,p,i,w)) [99990..] leaves]) + + nil = -1 + + leaves = getLeaves 0 nil bs + interns = getInterns 0 [(nil,bs)] + + getLeaves level parent bs = + case bs of + Leaf w -> [(level-1,parent,w)] + Bracket fid i _ bss -> concatMap (getLeaves (level+1) fid) bss + + getInterns level [] = [] + getInterns level nodes = + nub [(level-1,parent,fid,showCId cat) | (parent,Bracket fid _ cat _) <- nodes] : + getInterns (level+1) [(fid,child) | (_,Bracket fid _ _ children) <- nodes, child <- children] + + mkStruct l cs = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$ + vcat [link pl pid l id | (pl,pid,id,_) <- cs] + link pl pid l id + | pl < 0 = empty + | otherwise = struct pl <> colon <> tag pid <> colon <> char 's' <+> + text "--" <+> + struct l <> colon <> tag id <> colon <> char 'n' <+> semi + fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text c | (_,_,id,c) <- cs]) - postext = readPosText s -- auxiliaries for graphviz syntax -struct i = "struct" ++ show i -mark (j,n) = "n" ++ show j ++ "a" ++ uncommas n -uncommas = map (\c -> if c==',' then 'c' else c) -tag s = "<" ++ s ++ ">" -showp = init . tail . show -mtag = tag . ('n':) . uncommas +struct l = text ("struct" ++ show l) +tbrackets d = char '<' <> d <> char '>' +tag i = char 'n' <> int i -- word alignments from Linearize.markLinearize -- words are chunks like {[0,1,1,0] old} -graphvizAlignment :: PGF -> Expr -> String -graphvizAlignment pgf = prGraph True . lin2graph . linsMark where - linsMark t = [concat (take 1 (markLinearizes pgf la t)) | la <- Map.keys (concretes pgf)] - -lin2graph :: [String] -> [String] -lin2graph ss = -- trace (show ss) $ - prelude ++ nodes ++ links - - where - - prelude = ["rankdir=LR ;", "node [shape = record] ;"] - - nlins :: [(Int,[((Int,String),String)])] - nlins = [(i, [((j,showp p),unw ws) | (j,((_,p),ws)) <- zip [0..] ws]) | - (i,ws) <- zip [0..] (map (wlins . readPosText) ss)] - - unw = concat . intersperse "\\ " -- space escape in graphviz - - nodes = map mkStruct nlins - - mkStruct (i, ws) = struct i ++ "[label = \"" ++ fields ws ++ "\"] ;" - - fields ws = concat (intersperse "|" [tag (mark m) ++ " " ++ w | (m,w) <- ws]) - - links = nub $ concatMap mkEdge (init nlins) - - mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list - [edge i v w | (v@(_,p),_) <- lin, (w@(_,q),_) <- lin', p == q] - - edge i v w = - struct i ++ ":" ++ mark v ++ ":e -> " ++ struct (i+1) ++ ":" ++ mark w ++ ":w ;" -{- -alignmentData :: PGF -> [Expr] -> Map.Map String (Map.Map String Double) -alignmentData pgf = mkStat . concatMap (mkAlign . linsMark) where - linsMark t = - [s | la <- take 2 (cncnames pgf), s <- take 1 (linearizesMark pgf la t)] - - mkStat :: [(String,String)] -> Map.Map String (Map.Map String Double) - mkStat = - - mkAlign :: [String] -> [(String,String)] - mkAlign ss = - - nlins :: [(Int,[((Int,String),String)])] - nlins = [(i, [((j,showp p),unw ws) | (j,((_,p),ws)) <- zip [0..] vs]) | - (i,vs) <- zip [0..] (map (wlins . readPosText) ss)] - - nodes = map mkStruct nlins - - mkStruct (i, ws) = struct i ++ "[label = \"" ++ fields ws ++ "\"] ;" - - fields ws = concat (intersperse "|" [tag (mark m) ++ " " ++ w | (m,w) <- ws]) - - links = nub $ concatMap mkEdge (init nlins) - - mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list - [edge i v w | (v@(_,p),_) <- lin, (w@(_,q),_) <- lin', p == q] - - edge i v w = - struct i ++ ":" ++ mark v ++ ":e -> " ++ struct (i+1) ++ ":" ++ mark w ++ ":w ;" --} - -wlins :: PosText -> [((Maybe CId,[Int]),[String])] -wlins pt = case pt of - T p pts -> concatMap (lins p) pts - M ws -> if null ws then [] else [((Nothing,[]),ws)] - where - lins p pt = case pt of - T q pts -> concatMap (lins q) pts - M ws -> if null ws then [] else [(p,ws)] - -data PosText = - T (Maybe CId,[Int]) [PosText] - | M [String] - deriving Show - -readPosText :: String -> PosText -readPosText = fst . head . (RP.readP_to_S pPosText) where - pPosText = do - RP.char '(' >> RP.skipSpaces - p <- pPos - RP.skipSpaces - ts <- RP.many pPosText - RP.char ')' >> RP.skipSpaces - return (T p ts) - RP.<++ do - ws <- RP.sepBy1 (RP.munch1 (flip notElem "()")) (RP.char ' ') - return (M ws) - pPos = do - fun <- (RP.char '(' >> pCId >>= \f -> RP.char ',' >> (return $ Just f)) - RP.<++ (return Nothing) - RP.char '[' >> RP.skipSpaces - is <- RP.sepBy (RP.munch1 isDigit) (RP.char ',') - RP.char ']' >> RP.skipSpaces - RP.char ')' RP.<++ return ' ' - return (fun,map read is) - - -{- -digraph{ -rankdir ="LR" ; -node [shape = record] ; - -struct1 [label = "<f0> this|<f1> very|<f2> intelligent|<f3> man"] ; -struct2 [label = "<f0> cet|<f1> homme|<f2> tres|<f3> intelligent|<f4> ci"] ; - -struct1:f0 -> struct2:f0 ; -struct1:f1 -> struct2:f2 ; -struct1:f2 -> struct2:f3 ; -struct1:f3 -> struct2:f1 ; -struct1:f0 -> struct2:f4 ; -} --} - +graphvizAlignment :: PGF -> [Language] -> Expr -> String +graphvizAlignment pgf langs = render . lin2graph . linsBracketed + where + linsBracketed t = [bracketedLinearize pgf lang t | lang <- langs] + + lin2graph :: [BracketedString] -> Doc + lin2graph bss = + text "digraph {" $$ + space $$ + nest 2 (text "rankdir=LR ;" $$ + text "node [shape = record] ;" $$ + space $$ + mkLayers 0 leaves) $$ + text "}" + where + nil = -1 + + leaves = map (groupAndIndexIt 0 . getLeaves nil) bss + + groupAndIndexIt id [] = [] + groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws + in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1 + where + collect pws@((p1,w):pws1) + | p == p1 = let (ws,pws2) = collect pws1 + in (w:ws,pws2) + collect pws = ([],pws) + + getLeaves parent bs = + case bs of + Leaf w -> [(parent,w)] + Bracket fid _ _ bss -> concatMap (getLeaves fid) bss + + mkLayers l [] = empty + mkLayers l (cs:css) = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$ + (case css of + (ncs:_) -> vcat (map (mkLinks l ncs) cs) + _ -> empty) $$ + mkLayers (l+1) css + + mkLinks l cs (p0,id0,_) = + vcat (map (\id1 -> struct l <> colon <> tag id0 <> colon <> char 'e' <+> + text "->" <+> + struct (l+1) <> colon <> tag id1 <> colon <> char 'w' <+> semi) indices) + where + indices = [id1 | (p1,id1,_) <- cs, p1 == p0] + + fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text w | (_,id,w) <- cs]) |
