1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
|
----------------------------------------------------------------------
-- |
-- Module : ChartParser
-- Maintainer : Peter Ljunglöf
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:21:12 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.10 $
--
-- Bottom-up Kilbury chart parser from "Pure Functional Parsing", chapter 5.
-- OBSOLETE -- should use new MCFG parsers instead
-----------------------------------------------------------------------------
module GF.CF.ChartParser (chartParser) where
-- import Tracing
-- import PrintParser
-- import PrintSimplifiedTerm
import GF.Data.Operations
import GF.CF.CF
import GF.CF.CFIdent
import GF.CF.PPrCF (prCFItem)
import GF.Data.OrdSet
import GF.Data.OrdMap2
import Data.List (groupBy)
type Token = CFTok
type Name = CFFun
type Category = CFItem
type Grammar = ([Production], Terminal)
type Production = (Name, Category, [Category])
type Terminal = Token -> [(Category, Maybe Name)]
type GParser = Grammar -> Category -> [Token] -> ([ParseTree],String)
data ParseTree = Node Name Category [ParseTree] | Leaf Token
maxTake :: Int
-- maxTake = 1000
maxTake = maxBound
--------------------------------------------------
-- converting between GF parsing and CFG parsing
buildParser :: GParser -> CF -> CFCat -> CFParser
buildParser gparser cf = parse
where
parse = \start input ->
let parse2 = parse' (CFNonterm start) input in
(take maxTake [(parse2tree t, []) | t <- fst parse2], snd parse2)
parse' = gparser (cf2grammar cf)
cf2grammar :: CF -> Grammar
cf2grammar cf = (productions, terminal)
where
productions = [ (name, CFNonterm cat, rhs) |
(name, (cat, rhs)) <- cfRules ]
terminal tok = [ (CFNonterm cat, Just name) |
(cat, name) <- cfPredef tok ]
++
[ (item, Nothing) |
item <- elems rhsItems,
matchCFTerm item tok ]
cfRules = rulesOfCF cf
cfPredef = predefOfCF cf
rhsItems :: Set Category
rhsItems = union [ makeSet rhs | (_, (_, rhs)) <- cfRules ]
parse2tree :: ParseTree -> CFTree
parse2tree (Node name (CFNonterm cat) trees) = CFTree (name, (cat, trees'))
where
trees' = [ parse2tree t | t@(Node _ _ _) <- trees ] -- ignore leafs
maybeNode :: Maybe Name -> Category -> Token -> ParseTree
maybeNode (Just name) cat tok = Node name cat [Leaf tok]
maybeNode Nothing _ tok = Leaf tok
--------------------------------------------------
-- chart parsing (bottom up kilbury-like)
type Chart = [CState]
type CState = Set Edge
type Edge = (Int, Category, [Category])
type Passive = (Int, Int, Category)
chartParser :: CF -> CFCat -> CFParser
chartParser = buildParser chartParser0
chartParser0 :: GParser
chartParser0 (productions, terminal) = cparse
where
emptyCats :: Set Category
emptyCats = empties emptySet
where
empties cats | cats==cats' = cats
| otherwise = empties cats'
where cats' = makeSet [ cat | (_, cat, rhs) <- productions,
all (`elemSet` cats) rhs ]
grammarMap :: Map Category [(Name, [Category])]
grammarMap = makeMapWith (++)
[ (cat, [(name,rhs)]) | (name, cat, rhs) <- productions ]
leftCornerMap :: Map Category (Set (Category,[Category]))
leftCornerMap = makeMapWith (<++>) [ (a, unitSet (b, bs)) |
(_, b, abs) <- productions,
(a : bs) <- removeNullable abs ]
removeNullable :: [Category] -> [[Category]]
removeNullable [] = []
removeNullable cats@(cat:cats')
| cat `elemSet` emptyCats = cats : removeNullable cats'
| otherwise = [cats]
cparse :: Category -> [Token] -> ([ParseTree], String)
cparse start input = -- trace "ChartParser" $
case lookup (0, length input, start) $
-- tracePrt "#edgeTrees" (prt . map (length.snd)) $
edgeTrees of
Just trees -> -- tracePrt "#trees" (prt . length . fst) $
(trees, "Chart:" ++++ prChart passiveEdges)
Nothing -> ([], "Chart:" ++++ prChart passiveEdges)
where
finalChart :: Chart
finalChart = map buildState initialChart
finalChartMap :: [Map Category (Set Edge)]
finalChartMap = map stateMap finalChart
stateMap :: CState -> Map Category (Set Edge)
stateMap state = makeMapWith (<++>) [ (a, unitSet (i,b,bs)) |
(i, b, a:bs) <- elems state ]
initialChart :: Chart
initialChart = -- tracePrt "#initialChart" (prt . map (length.elems)) $
emptySet : map initialState (zip [0..] input)
where initialState (j, sym) = makeSet [ (j, cat, []) |
(cat, _) <- terminal sym ]
buildState :: CState -> CState
buildState = limit more
where more (j, a, []) = ordSet [ (j, b, bs) |
(b, bs) <- elems (lookupWith emptySet leftCornerMap a) ]
<++>
lookupWith emptySet (finalChartMap !! j) a
more (j, b, a:bs) = ordSet [ (j, b, bs) |
a `elemSet` emptyCats ]
passiveEdges :: [Passive]
passiveEdges = -- tracePrt "#passiveEdges" (prt . length) $
[ (i, j, cat) |
(j, state) <- zip [0..] $
-- tracePrt "#passiveChart"
-- (prt . map (length.filter (\(_,_,x)->null x).elems)) $
-- tracePrt "#activeChart" (prt . map (length.elems)) $
finalChart,
(i, cat, []) <- elems state ]
++
[ (i, i, cat) |
i <- [0 .. length input],
cat <- elems emptyCats ]
edgeTrees :: [ (Passive, [ParseTree]) ]
edgeTrees = [ (edge, treesFor edge) | edge <- passiveEdges ]
edgeTreesMap :: Map (Int, Category) [(Int, [ParseTree])]
edgeTreesMap = makeMapWith (++) [ ((i,c), [(j,trees)]) |
((i,j,c), trees) <- edgeTrees ]
treesFor :: Passive -> [ParseTree]
treesFor (i, j, cat) = [ Node name cat trees |
(name, rhs) <- lookupWith [] grammarMap cat,
trees <- children rhs i j ]
++
[ maybeNode name cat tok |
i == j-1,
let tok = input !! i,
Just name <- [lookup cat (terminal tok)] ]
children :: [Category] -> Int -> Int -> [[ParseTree]]
children [] i k = [ [] | i == k ]
children (c:cs) i k = [ tree : rest |
i <= k,
(j, trees) <- lookupWith [] edgeTreesMap (i,c),
rest <- children cs j k,
tree <- trees ]
{-
instance Print ParseTree where
prt (Node name cat trees) = prt name++"."++prt cat++"^{"++prtSep "," trees++"}"
prt (Leaf token) = prt token
-}
-- AR 10/12/2002
prChart :: [Passive] -> String
prChart = unlines . map (unwords . map prOne) . positions where
prOne (i,j,it) = show i ++ "-" ++ show j ++ "-" ++ prCFItem it
positions = groupBy (\ (i,_,_) (j,_,_) -> i == j)
|