summaryrefslogtreecommitdiff
path: root/src/GF/CF
diff options
context:
space:
mode:
authoraarne <unknown>2003-09-22 13:16:55 +0000
committeraarne <unknown>2003-09-22 13:16:55 +0000
commitb1402e8bd6a68a891b00a214d6cf184d66defe19 (patch)
tree90372ac4e53dce91cf949dbf8e93be06f1d9e8bd /src/GF/CF
Founding the newly structured GF2.0 cvs archive.
Diffstat (limited to 'src/GF/CF')
-rw-r--r--src/GF/CF/CF.hs180
-rw-r--r--src/GF/CF/CFIdent.hs151
-rw-r--r--src/GF/CF/CanonToCF.hs157
-rw-r--r--src/GF/CF/ChartParser.hs166
-rw-r--r--src/GF/CF/PPrCF.hs59
-rw-r--r--src/GF/CF/Profile.hs95
6 files changed, 808 insertions, 0 deletions
diff --git a/src/GF/CF/CF.hs b/src/GF/CF/CF.hs
new file mode 100644
index 000000000..0cff68b97
--- /dev/null
+++ b/src/GF/CF/CF.hs
@@ -0,0 +1,180 @@
+module CF where
+
+import Operations
+import Str
+import AbsGFC
+import GFC
+import CFIdent
+import List (nub,nubBy)
+import Char (isUpper, isLower, toUpper, toLower)
+
+-- context-free grammars. AR 15/12/1999 -- 30/3/2000 -- 2/6/2001 -- 3/12/2001
+
+-- CF grammar data types
+
+-- abstract type CF.
+-- Invariant: each category has all its rules grouped with it
+-- also: the list is never empty (the category is just missing then)
+newtype CF = CF ([(CFCat,[CFRule])], CFPredef)
+type CFRule = (CFFun, (CFCat, [CFItem]))
+
+-- CFPredef is a hack for variable symbols and literals; normally = const []
+data CFItem = CFTerm RegExp | CFNonterm CFCat deriving (Eq, Ord,Show)
+
+newtype CFTree = CFTree (CFFun,(CFCat, [CFTree])) deriving (Eq, Show)
+
+type CFPredef = CFTok -> [(CFCat, CFFun)] -- recognize literals, variables, etc
+
+-- Wadler style + return information
+type CFParser = [CFTok] -> ([(CFTree,[CFTok])],String)
+
+cfParseResults :: ([(CFTree,[CFTok])],String) -> [CFTree]
+cfParseResults rs = [b | (b,[]) <- fst rs]
+
+-- terminals are regular expressions on words; to be completed to full regexp
+data RegExp =
+ RegAlts [CFWord] -- list of alternative words
+ | RegSpec CFTok -- special token
+ deriving (Eq, Ord, Show)
+
+type CFWord = String
+
+-- the above types should be kept abstract, and the following functions used
+
+-- to construct CF grammars
+
+emptyCF :: CF
+emptyCF = CF ([], emptyCFPredef)
+
+emptyCFPredef :: CFPredef
+emptyCFPredef = const []
+
+rules2CF :: [CFRule] -> CF
+rules2CF rs = CF (groupCFRules rs, emptyCFPredef)
+
+groupCFRules :: [CFRule] -> [(CFCat,[CFRule])]
+groupCFRules = foldr ins [] where
+ ins rule crs = case crs of
+ (c,r) : rs | compatCF c cat -> (c,rule:r) : rs
+ cr : rs -> cr : ins rule rs
+ _ -> [(cat,[rule])]
+ where
+ cat = valCatCF rule
+
+-- to construct rules
+
+-- make a rule from a single token without constituents
+atomCFRule :: CFCat -> CFFun -> CFTok -> CFRule
+atomCFRule c f s = (f, (c, [atomCFTerm s]))
+
+-- usual terminal
+atomCFTerm :: CFTok -> CFItem
+atomCFTerm = CFTerm . atomRegExp
+
+atomRegExp :: CFTok -> RegExp
+atomRegExp t = case t of
+ TS s -> RegAlts [s]
+ _ -> RegSpec t
+
+-- terminal consisting of alternatives
+altsCFTerm :: [String] -> CFItem
+altsCFTerm = CFTerm . RegAlts
+
+
+-- to construct trees
+
+-- make a tree without constituents
+atomCFTree :: CFCat -> CFFun -> CFTree
+atomCFTree c f = buildCFTree c f []
+
+-- make a tree with constituents.
+buildCFTree :: CFCat -> CFFun -> [CFTree] -> CFTree
+buildCFTree c f trees = CFTree (f,(c,trees))
+
+{- ----
+cfMeta0 :: CFTree
+cfMeta0 = atomCFTree uCFCat metaCFFun
+
+-- used in happy
+litCFTree :: String -> CFTree --- Maybe CFTree
+litCFTree s = maybe cfMeta0 id $ do
+ (c,f) <- getCFLiteral s
+ return $ buildCFTree c f []
+-}
+
+-- to decide whether a token matches a terminal item
+
+matchCFTerm :: CFItem -> CFTok -> Bool
+matchCFTerm (CFTerm t) s = satRegExp t s
+matchCFTerm _ _ = False
+
+satRegExp :: RegExp -> CFTok -> Bool
+satRegExp r t = case (r,t) of
+ (RegAlts tt, TS s) -> elem s tt
+ (RegAlts tt, TC s) -> or [elem s' tt | s' <- caseUpperOrLower s]
+ (RegSpec x, _) -> t == x ---
+ _ -> False
+ where
+ caseUpperOrLower s = case s of
+ c:cs | isUpper c -> [s, toLower c : cs]
+ c:cs | isLower c -> [s, toUpper c : cs]
+ _ -> [s]
+
+-- to analyse a CF grammar
+
+catsOfCF :: CF -> [CFCat]
+catsOfCF (CF (rr,_)) = map fst rr
+
+rulesOfCF :: CF -> [CFRule]
+rulesOfCF (CF (rr,_)) = concatMap snd rr
+
+ruleGroupsOfCF :: CF -> [(CFCat,[CFRule])]
+ruleGroupsOfCF (CF (rr,_)) = rr
+
+rulesForCFCat :: CF -> CFCat -> [CFRule]
+rulesForCFCat (CF (rr,_)) cat = maybe [] id $ lookup cat rr
+
+valCatCF :: CFRule -> CFCat
+valCatCF (_,(c,_)) = c
+
+valItemsCF :: CFRule -> [CFItem]
+valItemsCF (_,(_,i)) = i
+
+valFunCF :: CFRule -> CFFun
+valFunCF (f,(_,_)) = f
+
+startCat :: CF -> CFCat
+startCat (CF (rr,_)) = fst (head rr) --- hardly useful
+
+predefOfCF :: CF -> CFPredef
+predefOfCF (CF (_,f)) = f
+
+appCFPredef :: CF -> CFTok -> [(CFCat, CFFun)]
+appCFPredef = ($) . predefOfCF
+
+valCFItem :: CFItem -> Either RegExp CFCat
+valCFItem (CFTerm r) = Left r
+valCFItem (CFNonterm nt) = Right nt
+
+cfTokens :: CF -> [CFWord]
+cfTokens cf = nub $ concat $ [ wordsOfRegExp i | r <- rulesOfCF cf,
+ CFTerm i <- valItemsCF r]
+
+wordsOfRegExp :: RegExp -> [CFWord]
+wordsOfRegExp (RegAlts tt) = tt
+wordsOfRegExp _ = []
+
+forCFItem :: CFTok -> CFRule -> Bool
+forCFItem a (_,(_, CFTerm r : _)) = satRegExp r a
+forCFItem _ _ = False
+
+isCircularCF :: CFRule -> Bool
+isCircularCF (_,(c', CFNonterm c:[])) = compatCF c' c
+isCircularCF _ = False
+--- we should make a test of circular chains, too
+
+-- coercion to the older predef cf type
+
+predefRules :: CFPredef -> CFTok -> [CFRule]
+predefRules pre s = [atomCFRule c f s | (c,f) <- pre s]
+
diff --git a/src/GF/CF/CFIdent.hs b/src/GF/CF/CFIdent.hs
new file mode 100644
index 000000000..d9c451adb
--- /dev/null
+++ b/src/GF/CF/CFIdent.hs
@@ -0,0 +1,151 @@
+module CFIdent where
+
+import Operations
+import GFC
+import Ident
+import AbsGFC
+import PrGrammar
+import Str
+import Char (toLower, toUpper)
+
+-- symbols (categories, functions) for context-free grammars.
+
+-- these types should be abstract
+
+data CFTok =
+ TS String -- normal strings
+ | TC String -- strings that are ambiguous between upper or lower case
+ | TL String -- string literals
+ | TI Int -- integer literals
+ | TV Ident -- variables
+ | TM Int String -- metavariables; the integer identifies it
+ deriving (Eq, Ord, Show)
+
+newtype CFCat = CFCat (CIdent,Label) deriving (Eq, Ord, Show)
+
+tS, tC, tL, tI, tV, tM :: String -> CFTok
+tS = TS
+tC = TC
+tL = TL
+tI = TI . read
+tV = TV . identC
+tM = TM 0
+
+tInt :: Int -> CFTok
+tInt = TI
+
+prCFTok :: CFTok -> String
+prCFTok t = case t of
+ TS s -> s
+ TC s -> s
+ TL s -> s
+ TI i -> show i
+ TV x -> prt x
+ TM i _ -> "?" ---
+
+-- to build trees: the Atom contains a GF function, Cn | Meta | Vr | Literal
+newtype CFFun = CFFun (Atom, Profile) deriving (Eq,Show)
+
+type Profile = [([[Int]],[Int])]
+
+
+-- the following functions should be used instead of constructors
+
+-- to construct CF functions
+
+mkCFFun :: Atom -> CFFun
+mkCFFun t = CFFun (t,[])
+
+{- ----
+getCFLiteral :: String -> Maybe (CFCat, CFFun)
+getCFLiteral s = case lookupLiteral' s of
+ Ok (c, lit) -> Just (cat2CFCat c, mkCFFun lit)
+ _ -> Nothing
+-}
+
+varCFFun :: Ident -> CFFun
+varCFFun = mkCFFun . AV
+
+consCFFun :: CIdent -> CFFun
+consCFFun = mkCFFun . AC
+
+{- ----
+string2CFFun :: String -> CFFun
+string2CFFun = consCFFun . Ident
+-}
+
+cfFun2String :: CFFun -> String
+cfFun2String (CFFun (f,_)) = prt f
+
+cfFun2Profile :: CFFun -> Profile
+cfFun2Profile (CFFun (_,p)) = p
+
+{- ----
+strPro2cfFun :: String -> Profile -> CFFun
+strPro2cfFun str p = (CFFun (AC (Ident str), p))
+-}
+
+metaCFFun :: CFFun
+metaCFFun = mkCFFun $ AM 0
+
+-- to construct CF categories
+
+-- belongs elsewhere
+mkCIdent :: String -> String -> CIdent
+mkCIdent m c = CIQ (identC m) (identC c)
+
+ident2CFCat :: CIdent -> Ident -> CFCat
+ident2CFCat mc d = CFCat (mc, L d)
+
+-- standard way of making cf cat: label s
+string2CFCat :: String -> String -> CFCat
+string2CFCat m c = ident2CFCat (mkCIdent m c) (identC "s")
+
+idents2CFCat :: Ident -> Ident -> CFCat
+idents2CFCat m c = ident2CFCat (CIQ m c) (identC "s")
+
+catVarCF :: CFCat
+catVarCF = ident2CFCat (mkCIdent "_" "#Var") (identC "_") ----
+
+{- ----
+uCFCat :: CFCat
+uCFCat = cat2CFCat uCat
+-}
+
+moduleOfCFCat :: CFCat -> Ident
+moduleOfCFCat (CFCat (CIQ m _, _)) = m
+
+-- the opposite direction
+cfCat2Cat :: CFCat -> CIdent
+cfCat2Cat (CFCat (s,_)) = s
+
+
+-- to construct CF tokens
+
+string2CFTok :: String -> CFTok
+string2CFTok = tS
+
+str2cftoks :: Str -> [CFTok]
+str2cftoks = map tS . words . sstr
+
+-- decide if two token lists look the same (in parser postprocessing)
+
+compatToks :: [CFTok] -> [CFTok] -> Bool
+compatToks ts us = and [compatTok t u | (t,u) <- zip ts us]
+
+compatTok t u = any (`elem` (alts t)) (alts u) where
+ alts u = case u of
+ TC (c:s) -> [toLower c : s, toUpper c : s]
+ _ -> [prCFTok u]
+
+-- decide if two CFFuns have the same function head (profiles may differ)
+
+compatCFFun :: CFFun -> CFFun -> Bool
+compatCFFun (CFFun (f,_)) (CFFun (g,_)) = f == g
+
+-- decide whether two categories match
+-- the modifiers can be from different modules, but on the same extension
+-- path, so there is no clash, and they can be safely ignored ---
+compatCF :: CFCat -> CFCat -> Bool
+----compatCF = (==)
+compatCF (CFCat (CIQ _ c, l)) (CFCat (CIQ _ c', l')) = c==c' && l==l'
diff --git a/src/GF/CF/CanonToCF.hs b/src/GF/CF/CanonToCF.hs
new file mode 100644
index 000000000..6f7dc6d6b
--- /dev/null
+++ b/src/GF/CF/CanonToCF.hs
@@ -0,0 +1,157 @@
+module CanonToCF where
+
+import Operations
+import Option
+import Ident
+import AbsGFC
+import GFC
+import PrGrammar
+import CMacros
+import qualified Modules as M
+import CF
+import CFIdent
+import List (nub)
+import Monad
+
+-- AR 27/1/2000 -- 3/12/2001 -- 8/6/2003
+
+-- The main function: for a given cnc module m, build the CF grammar with all the
+-- rules coming from modules that m extends. The categories are qualified by
+-- the abstract module name a that m is of.
+
+canon2cf :: Options -> CanonGrammar -> Ident -> Err CF
+canon2cf opts gr c = do
+ let ms = M.allExtends gr c
+ a <- M.abstractOfConcrete gr c
+ let cncs = [m | (n, M.ModMod m) <- M.modules gr, elem n ms]
+ let mms = [(a, tree2list (M.jments m)) | m <- cncs]
+ rules0 <- liftM concat $ mapM (uncurry (cnc2cfCond opts)) mms
+ let rules = filter (not . isCircularCF) rules0 ---- temporarily here
+ let predef = const [] ---- mkCFPredef cfcats
+ return $ CF (groupCFRules rules, predef)
+
+cnc2cfCond :: Options -> Ident -> [(Ident,Info)] -> Err [CFRule]
+cnc2cfCond opts m gr =
+ liftM concat $
+ mapM lin2cf [(m,fun,cat,args,lin) | (fun, CncFun cat args lin _) <- gr]
+
+type IFun = Ident
+type ICat = CIdent
+
+-- all CF rules corresponding to a linearization rule
+lin2cf :: (Ident, IFun, ICat, [ArgVar], Term) -> Err [CFRule]
+lin2cf (m,fun,cat,args,lin) = errIn ("building CF rule for" +++ prt fun) $ do
+ rhss0 <- allLinValues lin -- :: [(Label, [([Patt],Term)])]
+ rhss1 <- mapM (mkCFItems m) (concat rhss0) -- :: [(Label, [[PreCFItem]])]
+ mapM (mkCfRules m fun cat args) rhss1 >>= return . nub . concat
+
+-- making sequences of CF items from every branch in a linearization
+mkCFItems :: Ident -> (Label, [([Patt],Term)]) -> Err (Label, [[PreCFItem]])
+mkCFItems m (lab,pts) = do
+ itemss <- mapM (term2CFItems m) (map snd pts)
+ return (lab, concat itemss) ---- combinations? (test!)
+
+-- making CF rules from sequences of CF items
+mkCfRules :: Ident -> IFun -> ICat -> [ArgVar] -> (Label, [[PreCFItem]]) -> Err [CFRule]
+mkCfRules m fun cat args (lab, itss) = mapM mkOneRule itss
+ where
+ mkOneRule its = do
+ let nonterms = zip [0..] [(pos,d,v) | PNonterm _ pos d v <- its]
+ profile = mkProfile nonterms
+ cfcat = CFCat (redirectIdent m cat,lab)
+ cffun = CFFun (AC (CIQ m fun), profile)
+ cfits = map precf2cf its
+ return (cffun,(cfcat,cfits))
+ mkProfile nonterms = map mkOne args
+ where
+ mkOne (A c i) = mkOne (AB c 0 i)
+ mkOne (AB _ b i) = (map mkB [0..b-1], [k | (k,(j,_,True)) <- nonterms, j==i])
+ where
+ mkB j = [p | (p,(k, LV l,False)) <- nonterms, k == i, l == j]
+
+-- intermediate data structure of CFItems with information for profiles
+data PreCFItem =
+ PTerm RegExp -- like ordinary Terminal
+ | PNonterm CIdent Integer Label Bool -- cat, position, part/bind, whether arg
+ deriving Eq
+
+precf2cf :: PreCFItem -> CFItem
+precf2cf (PTerm r) = CFTerm r
+precf2cf (PNonterm cm _ (L c) True) = CFNonterm (ident2CFCat cm c)
+precf2cf (PNonterm _ _ _ False) = CFNonterm catVarCF
+
+
+-- the main job in translating linearization rules into sequences of cf items
+term2CFItems :: Ident -> Term -> Err [[PreCFItem]]
+term2CFItems m t = errIn "forming cf items" $ case t of
+ S c _ -> t2c c
+
+ T _ cc -> do
+ its <- mapM t2c [t | Cas _ t <- cc]
+ tryMkCFTerm (concat its)
+
+ C t1 t2 -> do
+ its1 <- t2c t1
+ its2 <- t2c t2
+ return [x ++ y | x <- its1, y <- its2]
+
+ FV ts -> do
+ its <- mapM t2c ts
+ tryMkCFTerm (concat its)
+
+ P arg s -> extrR arg s
+
+ K (KS s) -> return [[PTerm (RegAlts [s]) | not (null s)]]
+
+ E -> return [[]]
+
+ K (KP d vs) -> do
+ let its = [PTerm (RegAlts [s]) | s <- d]
+ let itss = [[PTerm (RegAlts [s]) | s <- t] | Var t _ <- vs]
+ tryMkCFTerm (its : itss)
+
+ _ -> prtBad "no cf for" t ----
+
+ where
+
+ t2c = term2CFItems m
+
+ -- optimize the number of rules by a factorization
+ tryMkCFTerm :: [[PreCFItem]] -> Err [[PreCFItem]]
+ tryMkCFTerm ii@(its:itss) | all (\x -> length x == length its) itss =
+ case mapM mkOne (counterparts ii) of
+ Ok tt -> return [tt]
+ _ -> return ii
+ where
+ mkOne cfits = case mapM mkOneTerm cfits of
+ Ok tt -> return $ PTerm (RegAlts (concat (nub tt)))
+ _ -> mkOneNonTerm cfits
+ mkOneTerm (PTerm (RegAlts t)) = return t
+ mkOneTerm _ = Bad ""
+ mkOneNonTerm (n@(PNonterm _ _ _ _) : cc) =
+ if all (== n) cc
+ then return n
+ else Bad ""
+ mkOneNonTerm _ = Bad ""
+ counterparts ll = [map (!! i) ll | i <- [0..length (head ll) - 1]]
+ tryMkCFTerm itss = return itss
+
+ extrR arg lab = case (arg,lab) of
+ (Arg (A cat pos), l@(L _)) -> return [[PNonterm (CIQ m cat) pos l True]]
+ (Arg (A cat pos), l@(LV _)) -> return [[PNonterm (CIQ m cat) pos l False]]
+ (Arg (AB cat pos b), l@(L _)) -> return [[PNonterm (CIQ m cat) pos l True]]
+ (Arg (AB cat pos b), l@(LV _)) -> return [[PNonterm (CIQ m cat) pos l False]]
+ ---- ??
+ _ -> prtBad "cannot extract record field from" arg
+
+{- Proof + 1 @ 4 catVarCF :: CFCat
+PNonterm CIdent Integer Label Bool -- cat, position, part/bind, whether arg
+
+
+mkCFPredef :: [CFCat] -> CFPredef
+mkCFPredef cats s =
+ [(cat, metaCFFun) | TM _ _ <- [s], cat <- cats] ++
+ [(cat, varCFFun x) | TV x <- [s], cat <- cats] ++
+ [(cat, lit) | TL t <- [s], Just (cat,lit) <- [getCFLiteral t]] ++
+ [(cat, lit) | TI i <- [s], Just (cat,lit) <- [getCFLiteral (show i)]] ---
+-}
diff --git a/src/GF/CF/ChartParser.hs b/src/GF/CF/ChartParser.hs
new file mode 100644
index 000000000..09d538244
--- /dev/null
+++ b/src/GF/CF/ChartParser.hs
@@ -0,0 +1,166 @@
+
+module ChartParser (chartParser) where
+
+import Operations
+import CF
+import CFIdent
+import PPrCF (prCFItem)
+
+import OrdSet
+import OrdMap2
+
+import List (groupBy)
+
+type Token = CFTok
+type Name = CFFun
+type Category = CFItem
+type Grammar = ([Production], Terminal)
+type Production = (Name, Category, [Category])
+type Terminal = Token -> [(Category, Maybe Name)]
+type GParser = Grammar -> Category -> [Token] -> ([ParseTree],String)
+data ParseTree = Node Name Category [ParseTree] | Leaf Token
+
+--------------------------------------------------
+-- converting between GF parsing and CFG parsing
+
+buildParser :: GParser -> CF -> CFCat -> CFParser
+buildParser gparser cf = parse
+ where
+ parse = \start input ->
+ let parse2 = parse' (CFNonterm start) input in
+ ([(parse2tree t, []) | t <- fst parse2], snd parse2)
+ parse' = gparser (cf2grammar cf)
+
+cf2grammar :: CF -> Grammar
+cf2grammar cf = (productions, terminal)
+ where
+ productions = [ (name, CFNonterm cat, rhs) |
+ (name, (cat, rhs)) <- cfRules ]
+ terminal tok = [ (CFNonterm cat, Just name) |
+ (cat, name) <- cfPredef tok ]
+ ++
+ [ (item, Nothing) |
+ item <- elems rhsItems,
+ matchCFTerm item tok ]
+ cfRules = rulesOfCF cf
+ cfPredef = predefOfCF cf
+ rhsItems :: Set Category
+ rhsItems = union [ makeSet rhs | (_, (_, rhs)) <- cfRules ]
+
+parse2tree :: ParseTree -> CFTree
+parse2tree (Node name (CFNonterm cat) trees) = CFTree (name, (cat, trees'))
+ where
+ trees' = [ parse2tree t | t@(Node _ _ _) <- trees ] -- ignore leafs
+
+maybeNode :: Maybe Name -> Category -> Token -> ParseTree
+maybeNode (Just name) cat tok = Node name cat [Leaf tok]
+maybeNode Nothing _ tok = Leaf tok
+
+
+--------------------------------------------------
+-- chart parsing (bottom up kilbury-like)
+
+type Chart = [CState]
+type CState = Set Edge
+type Edge = (Int, Category, [Category])
+type Passive = (Int, Int, Category)
+
+chartParser :: CF -> CFCat -> CFParser
+chartParser = buildParser chartParser0
+
+chartParser0 :: GParser
+chartParser0 (productions, terminal) = cparse
+ where
+ emptyCats :: Set Category
+ emptyCats = empties emptySet
+ where
+ empties cats | cats==cats' = cats
+ | otherwise = empties cats'
+ where cats' = makeSet [ cat | (_, cat, rhs) <- productions,
+ all (`elemSet` cats) rhs ]
+
+ grammarMap :: Map Category [(Name, [Category])]
+ grammarMap = makeMapWith (++)
+ [ (cat, [(name,rhs)]) | (name, cat, rhs) <- productions ]
+
+ leftCornerMap :: Map Category (Set (Category,[Category]))
+ leftCornerMap = makeMapWith (<++>) [ (a, unitSet (b, bs)) |
+ (_, b, abs) <- productions,
+ (a : bs) <- removeNullable abs ]
+
+ removeNullable :: [Category] -> [[Category]]
+ removeNullable [] = []
+ removeNullable cats@(cat:cats')
+ | cat `elemSet` emptyCats = cats : removeNullable cats'
+ | otherwise = [cats]
+
+ cparse :: Category -> [Token] -> ([ParseTree], String)
+ cparse start input = case lookup (0, length input, start) edgeTrees of
+ Just trees -> (trees, "Chart:" ++++ prChart passiveEdges)
+ Nothing -> ([], "Chart:" ++++ prChart passiveEdges)
+ where
+ finalChart :: Chart
+ finalChart = map buildState initialChart
+
+ finalChartMap :: [Map Category (Set Edge)]
+ finalChartMap = map stateMap finalChart
+
+ stateMap :: CState -> Map Category (Set Edge)
+ stateMap state = makeMapWith (<++>) [ (a, unitSet (i,b,bs)) |
+ (i, b, a:bs) <- elems state ]
+
+ initialChart :: Chart
+ initialChart = emptySet : map initialState (zip [0..] input)
+ where initialState (j, sym) = makeSet [ (j, cat, []) |
+ (cat, _) <- terminal sym ]
+
+ buildState :: CState -> CState
+ buildState = limit more
+ where more (j, a, []) = ordSet [ (j, b, bs) |
+ (b, bs) <- elems (lookupWith emptySet leftCornerMap a) ]
+ <++>
+ lookupWith emptySet (finalChartMap !! j) a
+ more (j, b, a:bs) = ordSet [ (j, b, bs) |
+ a `elemSet` emptyCats ]
+
+ passiveEdges :: [Passive]
+ passiveEdges = [ (i, j, cat) |
+ (j, state) <- zip [0..] finalChart,
+ (i, cat, []) <- elems state ]
+ ++
+ [ (i, i, cat) |
+ i <- [0 .. length input],
+ cat <- elems emptyCats ]
+
+ edgeTrees :: [ (Passive, [ParseTree]) ]
+ edgeTrees = [ (edge, treesFor edge) | edge <- passiveEdges ]
+
+ edgeTreesMap :: Map (Int, Category) [(Int, [ParseTree])]
+ edgeTreesMap = makeMapWith (++) [ ((i,c), [(j,trees)]) |
+ ((i,j,c), trees) <- edgeTrees ]
+
+ treesFor :: Passive -> [ParseTree]
+ treesFor (i, j, cat) = [ Node name cat trees |
+ (name, rhs) <- lookupWith [] grammarMap cat,
+ trees <- children rhs i j ]
+ ++
+ [ maybeNode name cat tok |
+ i == j-1,
+ let tok = input !! i,
+ Just name <- [lookup cat (terminal tok)] ]
+
+ children :: [Category] -> Int -> Int -> [[ParseTree]]
+ children [] i k = [ [] | i == k ]
+ children (c:cs) i k = [ tree : rest |
+ i <= k,
+ (j, trees) <- lookupWith [] edgeTreesMap (i,c),
+ rest <- children cs j k,
+ tree <- trees ]
+
+
+-- AR 10/12/2002
+
+prChart :: [Passive] -> String
+prChart = unlines . map (unwords . map prOne) . positions where
+ prOne (i,j,it) = show i ++ "-" ++ show j ++ "-" ++ prCFItem it
+ positions = groupBy (\ (i,_,_) (j,_,_) -> i == j)
diff --git a/src/GF/CF/PPrCF.hs b/src/GF/CF/PPrCF.hs
new file mode 100644
index 000000000..ff4b64e66
--- /dev/null
+++ b/src/GF/CF/PPrCF.hs
@@ -0,0 +1,59 @@
+module PPrCF where
+
+import Operations
+import CF
+import CFIdent
+import AbsGFC
+import PrGrammar
+
+-- printing and parsing CF grammars, rules, and trees AR 26/1/2000 -- 9/6/2003
+---- use the Print class instead!
+
+prCF :: CF -> String
+prCF = unlines . (map prCFRule) . rulesOfCF -- hiding the literal recogn function
+
+prCFTree :: CFTree -> String
+prCFTree (CFTree (fun, (_,trees))) = prCFFun fun ++ prs trees where
+ prs [] = ""
+ prs ts = " " ++ unwords (map ps ts)
+ ps t@(CFTree (_,(_,[]))) = prCFTree t
+ ps t = prParenth (prCFTree t)
+
+prCFRule :: CFRule -> String
+prCFRule (fun,(cat,its)) =
+ prCFFun fun ++ "." +++ prCFCat cat +++ "::=" +++
+ unwords (map prCFItem its) +++ ";"
+
+prCFFun :: CFFun -> String
+prCFFun = prCFFun' True ---- False -- print profiles for debug
+
+prCFFun' :: Bool -> CFFun -> String
+prCFFun' profs (CFFun (t, p)) = prt t ++ pp p where
+ pp p = if (not profs || normal p) then "" else "_" ++ concat (map show p)
+ normal p = and [x==y && null b | ((b,x),y) <- zip p (map (:[]) [0..])]
+
+prCFCat :: CFCat -> String
+prCFCat (CFCat (c,l)) = prt c ++ "-" ++ prt l ----
+
+prCFItem (CFNonterm c) = prCFCat c
+prCFItem (CFTerm a) = prRegExp a
+
+prRegExp (RegAlts tt) = case tt of
+ [t] -> prQuotedString t
+ _ -> prParenth (prTList " | " (map prQuotedString tt))
+
+{- ----
+-- rules have an amazingly easy parser, if we use the format
+-- fun. C -> item1 item2 ... where unquoted items are treated as cats
+-- Actually would be nice to add profiles to this.
+
+getCFRule :: String -> Maybe CFRule
+getCFRule s = getcf (wrds s) where
+ getcf ww | length ww > 2 && ww !! 2 `elem` ["->", "::="] =
+ Just (string2CFFun (init fun), (string2CFCat cat, map mkIt its)) where
+ fun : cat : _ : its = words s
+ mkIt ('"':w@(_:_)) = atomCFTerm (string2CFTok (init w))
+ mkIt w = CFNonterm (string2CFCat w)
+ getcf _ = Nothing
+ wrds = takeWhile (/= ";") . words -- to permit semicolon in the end
+-} \ No newline at end of file
diff --git a/src/GF/CF/Profile.hs b/src/GF/CF/Profile.hs
new file mode 100644
index 000000000..6dbb5f85a
--- /dev/null
+++ b/src/GF/CF/Profile.hs
@@ -0,0 +1,95 @@
+module Profile (postParse) where
+
+import AbsGFC
+import GFC
+import qualified Ident as I
+import CMacros
+---import MMacros
+import CF
+import CFIdent
+import PPrCF -- for error msg
+import PrGrammar
+
+import Operations
+
+import Monad
+import List (nub)
+
+
+-- restoring parse trees for discontinuous constituents, bindings, etc. AR 25/1/2001
+-- revised 8/4/2002 for the new profile structure
+
+postParse :: CFTree -> Err Exp
+postParse tree = do
+ iterm <- errIn "postprocessing initial parse tree" $ tree2term tree
+ return $ term2trm iterm
+
+-- an intermediate data structure
+data ITerm = ITerm (Atom, BindVs) [ITerm] | IMeta deriving (Eq,Show)
+type BindVs = [[I.Ident]]
+
+-- the job is done in two passes:
+-- (1) tree2term: restore constituent order from Profile
+-- (2) term2trm: restore Bindings from Binds
+
+tree2term :: CFTree -> Err ITerm
+tree2term (CFTree (cff@(CFFun (fun,pro)), (_,trees))) = case fun of
+ AM _ -> return IMeta
+ _ -> do
+ args <- mapM mkArg pro
+ binds <- mapM mkBinds pro
+ return $ ITerm (fun, binds) args
+ where
+ mkArg (_,arg) = case arg of
+ [x] -> do -- one occurrence
+ trx <- trees !? x
+ tree2term trx
+ [] -> return IMeta -- suppression
+ _ -> do -- reduplication
+ trees' <- mapM (trees !?) arg
+ xs1 <- mapM tree2term trees'
+ xs2 <- checkArity xs1
+ unif xs2
+
+ checkArity xs = if length (nub [length xx | ITerm _ xx <- xs']) > 1
+ then Bad "arity error"
+ else return xs'
+ where xs' = [t | t@(ITerm _ _) <- xs]
+ unif [] = return $ IMeta
+ unif xs@(ITerm fp@(f,_) xx : ts) = do
+ let hs = [h | ITerm (h,_) _ <- ts]
+ testErr (all (==f) hs) -- if fails, hs must be nonempty
+ ("unification expects" +++ prt f +++ "but found" +++ prt (head hs))
+ xx' <- mapM unifArg [0 .. length xx - 1]
+ return $ ITerm fp xx'
+ where
+ unifArg i = tryUnif [zz !! i | ITerm _ zz <- xs]
+ tryUnif xx = case [t | t@(ITerm _ _) <- xx] of
+ [] -> return IMeta
+ x:xs -> if all (==x) xs
+ then return x
+ else Bad "failed to unify"
+
+ mkBinds (xss,_) = mapM mkBind xss
+ mkBind xs = do
+ ts <- mapM (trees !?) xs
+ let vs = [x | CFTree (CFFun (AV x,_),(_,[])) <- ts]
+ testErr (length ts == length vs) "non-variable in bound position"
+ case vs of
+ [x] -> return x
+ [] -> return $ I.identC "h_" ---- uBoundVar
+ y:ys -> do
+ testErr (all (==y) ys) ("fail to unify bindings of" +++ prt y)
+ return y
+
+term2trm :: ITerm -> Exp
+term2trm IMeta = EAtom (AM 0) ---- mExp0
+term2trm (ITerm (fun, binds) terms) =
+ let bterms = zip binds terms
+ in mkAppAtom fun [mkAbsR xs (term2trm t) | (xs,t) <- bterms]
+
+ --- these are deprecated
+ where
+ mkAbsR c e = foldr EAbs e c
+ mkAppAtom a = mkApp (EAtom a)
+ mkApp = foldl EApp \ No newline at end of file