summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorkrasimir <krasimir@chalmers.se>2010-05-19 13:32:39 +0000
committerkrasimir <krasimir@chalmers.se>2010-05-19 13:32:39 +0000
commite0dc9c80a6cbb45254f7e20d50894267aa2a3532 (patch)
tree07f02914c96664da1e57fcb2558f84ced6cc05ff /src
parent1743e88192d3395221d8a023aee319182055191d (diff)
now every BracketedString also has reference to the source expression(s)
Diffstat (limited to 'src')
-rw-r--r--src/runtime/haskell/PGF/Forest.hs60
-rw-r--r--src/runtime/haskell/PGF/Linearize.hs26
-rw-r--r--src/runtime/haskell/PGF/Macros.hs19
-rw-r--r--src/runtime/haskell/PGF/Parse.hs17
-rw-r--r--src/runtime/haskell/PGF/VisualizeTree.hs18
5 files changed, 88 insertions, 52 deletions
diff --git a/src/runtime/haskell/PGF/Forest.hs b/src/runtime/haskell/PGF/Forest.hs
index 4cc3dd908..ece6a8000 100644
--- a/src/runtime/haskell/PGF/Forest.hs
+++ b/src/runtime/haskell/PGF/Forest.hs
@@ -14,6 +14,7 @@
module PGF.Forest( Forest(..)
, BracketedString, showBracketedString, lengthBracketedString
, linearizeWithBrackets
+ , foldForest
) where
import PGF.CId
@@ -26,6 +27,7 @@ import qualified Data.Map as Map
import qualified Data.IntSet as IntSet
import qualified Data.IntMap as IntMap
import Control.Monad
+import GF.Data.SortedList
data Forest
= Forest
@@ -48,11 +50,11 @@ linearizeWithBrackets = head . snd . untokn "" . bracketedTokn
--
bracketedTokn :: Forest -> BracketedTokn
-bracketedTokn (Forest abs cnc forest root) =
+bracketedTokn f@(Forest abs cnc forest root) =
case [computeSeq seq (map (render IntMap.empty) args) | (seq,args) <- root] of
- ([bs@(Bracket_ cat fid label lin)]:_) -> bs
- (bss:_) -> Bracket_ wildCId 0 0 bss
- [] -> Bracket_ wildCId 0 0 []
+ ([bs@(Bracket_ _ _ _ _ _)]:_) -> bs
+ (bss:_) -> Bracket_ wildCId 0 0 [] bss
+ [] -> Bracket_ wildCId 0 0 [] []
where
trusted = foldl1 IntSet.intersection [IntSet.unions (map (trustedSpots IntSet.empty) args) | (_,args) <- root]
@@ -97,8 +99,56 @@ bracketedTokn (Forest abs cnc forest root) =
getArg d r
| not (null arg_lin) &&
IntSet.member fid trusted
- = [Bracket_ cat fid r arg_lin]
+ = [Bracket_ cat fid r es arg_lin]
| otherwise = arg_lin
where
arg_lin = lin ! r
(fid,cat,lin) = args !! d
+ es = getAbsTrees f fid
+
+-- | This function extracts the list of all completed parse trees
+-- that spans the whole input consumed so far. The trees are also
+-- limited by the category specified, which is usually
+-- the same as the startup category.
+getAbsTrees :: Forest -> FId -> [Expr]
+getAbsTrees (Forest abs cnc forest root) fid =
+ nubsort $ do (fvs,e) <- go Set.empty 0 (0,fid)
+ guard (Set.null fvs)
+ return e
+ where
+ go rec fcat' (d,fcat)
+ | fcat < totalCats cnc = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
+ | Set.member fcat rec = mzero
+ | otherwise = foldForest (\funid args trees ->
+ do let CncFun fn lins = cncfuns cnc ! funid
+ args <- mapM (go (Set.insert fcat rec) fcat) (zip [0..] args)
+ check_ho_fun fn args
+ `mplus`
+ trees)
+ (\const _ trees ->
+ return (freeVar const,const)
+ `mplus`
+ trees)
+ [] fcat forest
+
+ check_ho_fun fun args
+ | fun == _V = return (head args)
+ | fun == _B = return (foldl1 Set.difference (map fst args), foldr (\x e -> EAbs Explicit (mkVar (snd x)) e) (snd (head args)) (tail args))
+ | otherwise = return (Set.unions (map fst args),foldl (\e x -> EApp e (snd x)) (EFun fun) args)
+
+ mkVar (EFun v) = v
+ mkVar (EMeta _) = wildCId
+
+ freeVar (EFun v) = Set.singleton v
+ freeVar _ = Set.empty
+
+
+foldForest :: (FunId -> [FId] -> b -> b) -> (Expr -> [String] -> b -> b) -> b -> FId -> IntMap.IntMap (Set.Set Production) -> b
+foldForest f g b fcat forest =
+ case IntMap.lookup fcat forest of
+ Nothing -> b
+ Just set -> Set.fold foldProd b set
+ where
+ foldProd (PCoerce fcat) b = foldForest f g b fcat forest
+ foldProd (PApply funid args) b = f funid args b
+ foldProd (PConst _ const toks) b = g const toks b
diff --git a/src/runtime/haskell/PGF/Linearize.hs b/src/runtime/haskell/PGF/Linearize.hs
index 503b98d7b..cf70c1efb 100644
--- a/src/runtime/haskell/PGF/Linearize.hs
+++ b/src/runtime/haskell/PGF/Linearize.hs
@@ -63,7 +63,7 @@ type CncType = (CId, FId) -- concrete type is the abstract type (the category
linTree :: PGF -> Language -> Expr -> [Array LIndex BracketedTokn]
linTree pgf lang e =
- [amapWithIndex (\label -> Bracket_ cat fid label) lin | (_,((cat,fid),lin)) <- lin0 [] [] Nothing 0 e]
+ [amapWithIndex (\label -> Bracket_ cat fid label [e]) lin | (_,((cat,fid),e,lin)) <- lin0 [] [] Nothing 0 e]
where
cnc = lookMap (error "no lang") lang (concretes pgf)
lp = lproductions cnc
@@ -74,26 +74,26 @@ linTree pgf lang e =
| otherwise = apply (xs ++ ys) mb_cty n_fid _B (e:[ELit (LStr x) | x <- xs])
lin xs mb_cty n_fid (EApp e1 e2) es = lin xs mb_cty n_fid e1 (e2:es)
- lin xs mb_cty n_fid (ELit l) [] = case l of
- LStr s -> return (n_fid+1,((cidString,n_fid),ss s))
- LInt n -> return (n_fid+1,((cidInt, n_fid),ss (show n)))
- LFlt f -> return (n_fid+1,((cidFloat, n_fid),ss (show f)))
+ lin xs mb_cty n_fid e@(ELit l) [] = case l of
+ LStr s -> return (n_fid+1,((cidString,n_fid),e,ss s))
+ LInt n -> return (n_fid+1,((cidInt, n_fid),e,ss (show n)))
+ LFlt f -> return (n_fid+1,((cidFloat, n_fid),e,ss (show f)))
lin xs mb_cty n_fid (EMeta i) es = apply xs mb_cty n_fid _V (ELit (LStr ('?':show i)):es)
lin xs mb_cty n_fid (EFun f) es = apply xs mb_cty n_fid f es
lin xs mb_cty n_fid (EVar i) es = apply xs mb_cty n_fid _V (ELit (LStr (xs !! i)) :es)
- lin xs mb_cty n_fid (ETyped e _) es = lin xs mb_cty n_fid e es
- lin xs mb_cty n_fid (EImplArg e) es = lin xs mb_cty n_fid e es
+ lin xs mb_cty n_fid (ETyped e _) es = lin xs mb_cty n_fid e es
+ lin xs mb_cty n_fid (EImplArg e) es = lin xs mb_cty n_fid e es
ss s = listArray (0,0) [[LeafKS [s]]]
- apply :: [String] -> Maybe CncType -> FId -> CId -> [Expr] -> [(FId,(CncType, LinTable))]
+ apply :: [String] -> Maybe CncType -> FId -> CId -> [Expr] -> [(FId,(CncType, Expr, LinTable))]
apply xs mb_cty n_fid f es =
case Map.lookup f lp of
Just prods -> do (funid,(cat,fid),ctys) <- getApps prods
guard (length ctys == length es)
(n_fid,args) <- descend n_fid (zip ctys es)
let (CncFun _ lins) = cncfuns cnc ! funid
- return (n_fid+1,((cat,n_fid),listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins]))
+ return (n_fid+1,((cat,n_fid),undefined,listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins]))
Nothing -> apply xs mb_cty n_fid _V [ELit (LStr ("[" ++ showCId f ++ "]"))] -- fun without lin
where
getApps prods =
@@ -116,7 +116,7 @@ linTree pgf lang e =
(n_fid,args) <- descend n_fid fes
return (n_fid,arg:args)
- computeSeq :: SeqId -> [(CncType,LinTable)] -> [BracketedTokn]
+ computeSeq :: SeqId -> [(CncType,Expr,LinTable)] -> [BracketedTokn]
computeSeq seqid args = concatMap compute (elems seq)
where
seq = sequences cnc ! seqid
@@ -127,11 +127,11 @@ linTree pgf lang e =
compute (SymKP ts alts) = [LeafKP ts alts]
getArg d r
- | not (null arg_lin) = [Bracket_ cat fid r arg_lin]
+ | not (null arg_lin) = [Bracket_ cat fid r [e] arg_lin]
| otherwise = arg_lin
where
- arg_lin = lin ! r
- ((cat,fid),lin) = args !! d
+ arg_lin = lin ! r
+ ((cat,fid),e,lin) = args !! d
amapWithIndex :: (IArray a e1, IArray a e2, Ix i) => (i -> e1 -> e2) -> a i e1 -> a i e2
amapWithIndex f arr = listArray (bounds arr) (map (uncurry f) (assocs arr))
diff --git a/src/runtime/haskell/PGF/Macros.hs b/src/runtime/haskell/PGF/Macros.hs
index f4bfae646..328bf369d 100644
--- a/src/runtime/haskell/PGF/Macros.hs
+++ b/src/runtime/haskell/PGF/Macros.hs
@@ -212,7 +212,8 @@ updateProductionIndices pgf = pgf{ concretes = fmap updateConcrete (concretes pg
-- mark the beginning and the end of each constituent.
data BracketedString
= Leaf String -- ^ this is the leaf i.e. a single token
- | Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex [BracketedString] -- ^ this is a bracket. The 'CId' is the category of
+ | Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex [Expr] [BracketedString]
+ -- ^ this is a bracket. The 'CId' is the category of
-- the phrase. The 'FId' is an unique identifier for
-- every phrase in the sentence. For context-free grammars
-- i.e. without discontinuous constituents this identifier
@@ -227,7 +228,7 @@ data BracketedString
data BracketedTokn
= LeafKS [String]
| LeafKP [String] [Alternative]
- | Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex [BracketedTokn] -- Invariant: the list is not empty
+ | Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex [Expr] [BracketedTokn] -- Invariant: the list is not empty
type LinTable = Array.Array LIndex [BracketedTokn]
@@ -238,12 +239,12 @@ showBracketedString :: BracketedString -> String
showBracketedString = render . ppBracketedString
ppBracketedString (Leaf t) = text t
-ppBracketedString (Bracket cat fcat index bss) = parens (ppCId cat <+> hsep (map ppBracketedString bss))
+ppBracketedString (Bracket cat fcat index _ bss) = parens (ppCId cat <+> hsep (map ppBracketedString bss))
-- | The length of the bracketed string in number of tokens.
lengthBracketedString :: BracketedString -> Int
-lengthBracketedString (Leaf _) = 1
-lengthBracketedString (Bracket _ _ _ bss) = sum (map lengthBracketedString bss)
+lengthBracketedString (Leaf _) = 1
+lengthBracketedString (Bracket _ _ _ _ bss) = sum (map lengthBracketedString bss)
untokn :: String -> BracketedTokn -> (String,[BracketedString])
untokn nw (LeafKS ts) = (head ts,map Leaf ts)
@@ -254,10 +255,10 @@ untokn nw (LeafKP d vs) = let ts = sel d vs nw
case [v | Alt v cs <- vs, any (\c -> isPrefixOf c nw) cs] of
v:_ -> v
_ -> d
-untokn nw (Bracket_ cat fid index bss) =
+untokn nw (Bracket_ cat fid index es bss) =
let (nw',bss') = mapAccumR untokn nw bss
- in (nw',[Bracket cat fid index (concat bss')])
+ in (nw',[Bracket cat fid index es (concat bss')])
flattenBracketedString :: BracketedString -> [String]
-flattenBracketedString (Leaf w) = [w]
-flattenBracketedString (Bracket _ _ _ bss) = concatMap flattenBracketedString bss
+flattenBracketedString (Leaf w) = [w]
+flattenBracketedString (Bracket _ _ _ _ bss) = concatMap flattenBracketedString bss
diff --git a/src/runtime/haskell/PGF/Parse.hs b/src/runtime/haskell/PGF/Parse.hs
index ce195f752..f48fab097 100644
--- a/src/runtime/haskell/PGF/Parse.hs
+++ b/src/runtime/haskell/PGF/Parse.hs
@@ -27,7 +27,7 @@ import PGF.Data
import PGF.Expr(Tree)
import PGF.Macros
import PGF.TypeCheck
-import PGF.Forest(Forest(Forest), linearizeWithBrackets)
+import PGF.Forest(Forest(Forest), linearizeWithBrackets, foldForest)
-- | This data type encodes the different outcomes which you could get from the parser.
data ParseResult
@@ -380,21 +380,6 @@ insertPC key fcat chart = Map.insert key fcat chart
----------------------------------------------------------------
--- Forest
-----------------------------------------------------------------
-
-foldForest :: (FunId -> [FId] -> b -> b) -> (Expr -> [String] -> b -> b) -> b -> FId -> IntMap.IntMap (Set.Set Production) -> b
-foldForest f g b fcat forest =
- case IntMap.lookup fcat forest of
- Nothing -> b
- Just set -> Set.fold foldProd b set
- where
- foldProd (PCoerce fcat) b = foldForest f g b fcat forest
- foldProd (PApply funid args) b = f funid args b
- foldProd (PConst _ const toks) b = g const toks b
-
-
-----------------------------------------------------------------
-- Parse State
----------------------------------------------------------------
diff --git a/src/runtime/haskell/PGF/VisualizeTree.hs b/src/runtime/haskell/PGF/VisualizeTree.hs
index 098d6a07f..542044b2d 100644
--- a/src/runtime/haskell/PGF/VisualizeTree.hs
+++ b/src/runtime/haskell/PGF/VisualizeTree.hs
@@ -22,7 +22,7 @@ module PGF.VisualizeTree
, graphvizBracketedString
, graphvizAlignment
, getDepLabels
- ) where
+ ) where
import PGF.CId (CId,showCId,ppCId,mkCId)
import PGF.Data
@@ -122,8 +122,8 @@ graphvizDependencyTree format debug mlab ms pgf lang t = render $
getLeaves parent bs =
case bs of
- Leaf w -> [(parent,w)]
- Bracket _ fid _ bss -> concatMap (getLeaves fid) bss
+ Leaf w -> [(parent,w)]
+ Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss
mkNode (p,i,w) =
tag p <> text " [label = " <> doubleQuotes (int i <> char '.' <+> text w) <> text "] ;"
@@ -234,13 +234,13 @@ graphvizBracketedString = render . lin2tree
getLeaves level parent bs =
case bs of
- Leaf w -> [(level-1,parent,w)]
- Bracket _ fid i bss -> concatMap (getLeaves (level+1) fid) bss
+ Leaf w -> [(level-1,parent,w)]
+ Bracket _ fid i _ bss -> concatMap (getLeaves (level+1) fid) bss
getInterns level [] = []
getInterns level nodes =
- nub [(level-1,parent,fid,showCId cat) | (parent,Bracket cat fid _ _) <- nodes] :
- getInterns (level+1) [(fid,child) | (_,Bracket _ fid _ children) <- nodes, child <- children]
+ nub [(level-1,parent,fid,showCId cat) | (parent,Bracket cat fid _ _ _) <- nodes] :
+ getInterns (level+1) [(fid,child) | (_,Bracket _ fid _ _ children) <- nodes, child <- children]
mkStruct l cs = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$
vcat [link pl pid l id | (pl,pid,id,_) <- cs]
@@ -290,8 +290,8 @@ graphvizAlignment pgf langs = render . lin2graph . linsBracketed
getLeaves parent bs =
case bs of
- Leaf w -> [(parent,w)]
- Bracket _ fid _ bss -> concatMap (getLeaves fid) bss
+ Leaf w -> [(parent,w)]
+ Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss
mkLayers l [] = empty
mkLayers l (cs:css) = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$