diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
| commit | 055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch) | |
| tree | 0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/CF/ChartParser.hs | |
| parent | 915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff) | |
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/CF/ChartParser.hs')
| -rw-r--r-- | src-3.0/GF/CF/ChartParser.hs | 206 |
1 files changed, 206 insertions, 0 deletions
diff --git a/src-3.0/GF/CF/ChartParser.hs b/src-3.0/GF/CF/ChartParser.hs new file mode 100644 index 000000000..740c4d787 --- /dev/null +++ b/src-3.0/GF/CF/ChartParser.hs @@ -0,0 +1,206 @@ +---------------------------------------------------------------------- +-- | +-- Module : ChartParser +-- Maintainer : Peter Ljunglöf +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:21:12 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.10 $ +-- +-- Bottom-up Kilbury chart parser from "Pure Functional Parsing", chapter 5. +-- OBSOLETE -- should use new MCFG parsers instead +----------------------------------------------------------------------------- + +module GF.CF.ChartParser (chartParser) where + +-- import Tracing +-- import PrintParser +-- import PrintSimplifiedTerm + +import GF.Data.Operations +import GF.CF.CF +import GF.CF.CFIdent +import GF.CF.PPrCF (prCFItem) + +import GF.Data.OrdSet +import GF.Data.OrdMap2 + +import Data.List (groupBy) + +type Token = CFTok +type Name = CFFun +type Category = CFItem +type Grammar = ([Production], Terminal) +type Production = (Name, Category, [Category]) +type Terminal = Token -> [(Category, Maybe Name)] +type GParser = Grammar -> Category -> [Token] -> ([ParseTree],String) +data ParseTree = Node Name Category [ParseTree] | Leaf Token + +maxTake :: Int +-- maxTake = 1000 +maxTake = maxBound + +-------------------------------------------------- +-- converting between GF parsing and CFG parsing + +buildParser :: GParser -> CF -> CFCat -> CFParser +buildParser gparser cf = parse + where + parse = \start input -> + let parse2 = parse' (CFNonterm start) input in + (take maxTake [(parse2tree t, []) | t <- fst parse2], snd parse2) + parse' = gparser (cf2grammar cf) + +cf2grammar :: CF -> Grammar +cf2grammar cf = (productions, terminal) + where + productions = [ (name, CFNonterm cat, rhs) | + (name, (cat, rhs)) <- cfRules ] + terminal tok = [ (CFNonterm cat, Just name) | + (cat, name) <- cfPredef tok ] + ++ + [ (item, Nothing) | + item <- elems rhsItems, + matchCFTerm item tok ] + cfRules = rulesOfCF cf + cfPredef = predefOfCF cf + rhsItems :: Set Category + rhsItems = union [ makeSet rhs | (_, (_, rhs)) <- cfRules ] + +parse2tree :: ParseTree -> CFTree +parse2tree (Node name (CFNonterm cat) trees) = CFTree (name, (cat, trees')) + where + trees' = [ parse2tree t | t@(Node _ _ _) <- trees ] -- ignore leafs + +maybeNode :: Maybe Name -> Category -> Token -> ParseTree +maybeNode (Just name) cat tok = Node name cat [Leaf tok] +maybeNode Nothing _ tok = Leaf tok + + +-------------------------------------------------- +-- chart parsing (bottom up kilbury-like) + +type Chart = [CState] +type CState = Set Edge +type Edge = (Int, Category, [Category]) +type Passive = (Int, Int, Category) + +chartParser :: CF -> CFCat -> CFParser +chartParser = buildParser chartParser0 + +chartParser0 :: GParser +chartParser0 (productions, terminal) = cparse + where + emptyCats :: Set Category + emptyCats = empties emptySet + where + empties cats | cats==cats' = cats + | otherwise = empties cats' + where cats' = makeSet [ cat | (_, cat, rhs) <- productions, + all (`elemSet` cats) rhs ] + + grammarMap :: Map Category [(Name, [Category])] + grammarMap = makeMapWith (++) + [ (cat, [(name,rhs)]) | (name, cat, rhs) <- productions ] + + leftCornerMap :: Map Category (Set (Category,[Category])) + leftCornerMap = makeMapWith (<++>) [ (a, unitSet (b, bs)) | + (_, b, abs) <- productions, + (a : bs) <- removeNullable abs ] + + removeNullable :: [Category] -> [[Category]] + removeNullable [] = [] + removeNullable cats@(cat:cats') + | cat `elemSet` emptyCats = cats : removeNullable cats' + | otherwise = [cats] + + cparse :: Category -> [Token] -> ([ParseTree], String) + cparse start input = -- trace "ChartParser" $ + case lookup (0, length input, start) $ + -- tracePrt "#edgeTrees" (prt . map (length.snd)) $ + edgeTrees of + Just trees -> -- tracePrt "#trees" (prt . length . fst) $ + (trees, "Chart:" ++++ prChart passiveEdges) + Nothing -> ([], "Chart:" ++++ prChart passiveEdges) + where + finalChart :: Chart + finalChart = map buildState initialChart + + finalChartMap :: [Map Category (Set Edge)] + finalChartMap = map stateMap finalChart + + stateMap :: CState -> Map Category (Set Edge) + stateMap state = makeMapWith (<++>) [ (a, unitSet (i,b,bs)) | + (i, b, a:bs) <- elems state ] + + initialChart :: Chart + initialChart = -- tracePrt "#initialChart" (prt . map (length.elems)) $ + emptySet : map initialState (zip [0..] input) + where initialState (j, sym) = makeSet [ (j, cat, []) | + (cat, _) <- terminal sym ] + + buildState :: CState -> CState + buildState = limit more + where more (j, a, []) = ordSet [ (j, b, bs) | + (b, bs) <- elems (lookupWith emptySet leftCornerMap a) ] + <++> + lookupWith emptySet (finalChartMap !! j) a + more (j, b, a:bs) = ordSet [ (j, b, bs) | + a `elemSet` emptyCats ] + + passiveEdges :: [Passive] + passiveEdges = -- tracePrt "#passiveEdges" (prt . length) $ + [ (i, j, cat) | + (j, state) <- zip [0..] $ + -- tracePrt "#passiveChart" + -- (prt . map (length.filter (\(_,_,x)->null x).elems)) $ + -- tracePrt "#activeChart" (prt . map (length.elems)) $ + finalChart, + (i, cat, []) <- elems state ] + ++ + [ (i, i, cat) | + i <- [0 .. length input], + cat <- elems emptyCats ] + + edgeTrees :: [ (Passive, [ParseTree]) ] + edgeTrees = [ (edge, treesFor edge) | edge <- passiveEdges ] + + edgeTreesMap :: Map (Int, Category) [(Int, [ParseTree])] + edgeTreesMap = makeMapWith (++) [ ((i,c), [(j,trees)]) | + ((i,j,c), trees) <- edgeTrees ] + + treesFor :: Passive -> [ParseTree] + treesFor (i, j, cat) = [ Node name cat trees | + (name, rhs) <- lookupWith [] grammarMap cat, + trees <- children rhs i j ] + ++ + [ maybeNode name cat tok | + i == j-1, + let tok = input !! i, + Just name <- [lookup cat (terminal tok)] ] + + children :: [Category] -> Int -> Int -> [[ParseTree]] + children [] i k = [ [] | i == k ] + children (c:cs) i k = [ tree : rest | + i <= k, + (j, trees) <- lookupWith [] edgeTreesMap (i,c), + rest <- children cs j k, + tree <- trees ] + + +{- +instance Print ParseTree where + prt (Node name cat trees) = prt name++"."++prt cat++"^{"++prtSep "," trees++"}" + prt (Leaf token) = prt token +-} + +-- AR 10/12/2002 + +prChart :: [Passive] -> String +prChart = unlines . map (unwords . map prOne) . positions where + prOne (i,j,it) = show i ++ "-" ++ show j ++ "-" ++ prCFItem it + positions = groupBy (\ (i,_,_) (j,_,_) -> i == j) + + |
