diff options
Diffstat (limited to 'src/GF/CF')
| -rw-r--r-- | src/GF/CF/CF.hs | 180 | ||||
| -rw-r--r-- | src/GF/CF/CFIdent.hs | 151 | ||||
| -rw-r--r-- | src/GF/CF/CanonToCF.hs | 157 | ||||
| -rw-r--r-- | src/GF/CF/ChartParser.hs | 166 | ||||
| -rw-r--r-- | src/GF/CF/PPrCF.hs | 59 | ||||
| -rw-r--r-- | src/GF/CF/Profile.hs | 95 |
6 files changed, 808 insertions, 0 deletions
diff --git a/src/GF/CF/CF.hs b/src/GF/CF/CF.hs new file mode 100644 index 000000000..0cff68b97 --- /dev/null +++ b/src/GF/CF/CF.hs @@ -0,0 +1,180 @@ +module CF where + +import Operations +import Str +import AbsGFC +import GFC +import CFIdent +import List (nub,nubBy) +import Char (isUpper, isLower, toUpper, toLower) + +-- context-free grammars. AR 15/12/1999 -- 30/3/2000 -- 2/6/2001 -- 3/12/2001 + +-- CF grammar data types + +-- abstract type CF. +-- Invariant: each category has all its rules grouped with it +-- also: the list is never empty (the category is just missing then) +newtype CF = CF ([(CFCat,[CFRule])], CFPredef) +type CFRule = (CFFun, (CFCat, [CFItem])) + +-- CFPredef is a hack for variable symbols and literals; normally = const [] +data CFItem = CFTerm RegExp | CFNonterm CFCat deriving (Eq, Ord,Show) + +newtype CFTree = CFTree (CFFun,(CFCat, [CFTree])) deriving (Eq, Show) + +type CFPredef = CFTok -> [(CFCat, CFFun)] -- recognize literals, variables, etc + +-- Wadler style + return information +type CFParser = [CFTok] -> ([(CFTree,[CFTok])],String) + +cfParseResults :: ([(CFTree,[CFTok])],String) -> [CFTree] +cfParseResults rs = [b | (b,[]) <- fst rs] + +-- terminals are regular expressions on words; to be completed to full regexp +data RegExp = + RegAlts [CFWord] -- list of alternative words + | RegSpec CFTok -- special token + deriving (Eq, Ord, Show) + +type CFWord = String + +-- the above types should be kept abstract, and the following functions used + +-- to construct CF grammars + +emptyCF :: CF +emptyCF = CF ([], emptyCFPredef) + +emptyCFPredef :: CFPredef +emptyCFPredef = const [] + +rules2CF :: [CFRule] -> CF +rules2CF rs = CF (groupCFRules rs, emptyCFPredef) + +groupCFRules :: [CFRule] -> [(CFCat,[CFRule])] +groupCFRules = foldr ins [] where + ins rule crs = case crs of + (c,r) : rs | compatCF c cat -> (c,rule:r) : rs + cr : rs -> cr : ins rule rs + _ -> [(cat,[rule])] + where + cat = valCatCF rule + +-- to construct rules + +-- make a rule from a single token without constituents +atomCFRule :: CFCat -> CFFun -> CFTok -> CFRule +atomCFRule c f s = (f, (c, [atomCFTerm s])) + +-- usual terminal +atomCFTerm :: CFTok -> CFItem +atomCFTerm = CFTerm . atomRegExp + +atomRegExp :: CFTok -> RegExp +atomRegExp t = case t of + TS s -> RegAlts [s] + _ -> RegSpec t + +-- terminal consisting of alternatives +altsCFTerm :: [String] -> CFItem +altsCFTerm = CFTerm . RegAlts + + +-- to construct trees + +-- make a tree without constituents +atomCFTree :: CFCat -> CFFun -> CFTree +atomCFTree c f = buildCFTree c f [] + +-- make a tree with constituents. +buildCFTree :: CFCat -> CFFun -> [CFTree] -> CFTree +buildCFTree c f trees = CFTree (f,(c,trees)) + +{- ---- +cfMeta0 :: CFTree +cfMeta0 = atomCFTree uCFCat metaCFFun + +-- used in happy +litCFTree :: String -> CFTree --- Maybe CFTree +litCFTree s = maybe cfMeta0 id $ do + (c,f) <- getCFLiteral s + return $ buildCFTree c f [] +-} + +-- to decide whether a token matches a terminal item + +matchCFTerm :: CFItem -> CFTok -> Bool +matchCFTerm (CFTerm t) s = satRegExp t s +matchCFTerm _ _ = False + +satRegExp :: RegExp -> CFTok -> Bool +satRegExp r t = case (r,t) of + (RegAlts tt, TS s) -> elem s tt + (RegAlts tt, TC s) -> or [elem s' tt | s' <- caseUpperOrLower s] + (RegSpec x, _) -> t == x --- + _ -> False + where + caseUpperOrLower s = case s of + c:cs | isUpper c -> [s, toLower c : cs] + c:cs | isLower c -> [s, toUpper c : cs] + _ -> [s] + +-- to analyse a CF grammar + +catsOfCF :: CF -> [CFCat] +catsOfCF (CF (rr,_)) = map fst rr + +rulesOfCF :: CF -> [CFRule] +rulesOfCF (CF (rr,_)) = concatMap snd rr + +ruleGroupsOfCF :: CF -> [(CFCat,[CFRule])] +ruleGroupsOfCF (CF (rr,_)) = rr + +rulesForCFCat :: CF -> CFCat -> [CFRule] +rulesForCFCat (CF (rr,_)) cat = maybe [] id $ lookup cat rr + +valCatCF :: CFRule -> CFCat +valCatCF (_,(c,_)) = c + +valItemsCF :: CFRule -> [CFItem] +valItemsCF (_,(_,i)) = i + +valFunCF :: CFRule -> CFFun +valFunCF (f,(_,_)) = f + +startCat :: CF -> CFCat +startCat (CF (rr,_)) = fst (head rr) --- hardly useful + +predefOfCF :: CF -> CFPredef +predefOfCF (CF (_,f)) = f + +appCFPredef :: CF -> CFTok -> [(CFCat, CFFun)] +appCFPredef = ($) . predefOfCF + +valCFItem :: CFItem -> Either RegExp CFCat +valCFItem (CFTerm r) = Left r +valCFItem (CFNonterm nt) = Right nt + +cfTokens :: CF -> [CFWord] +cfTokens cf = nub $ concat $ [ wordsOfRegExp i | r <- rulesOfCF cf, + CFTerm i <- valItemsCF r] + +wordsOfRegExp :: RegExp -> [CFWord] +wordsOfRegExp (RegAlts tt) = tt +wordsOfRegExp _ = [] + +forCFItem :: CFTok -> CFRule -> Bool +forCFItem a (_,(_, CFTerm r : _)) = satRegExp r a +forCFItem _ _ = False + +isCircularCF :: CFRule -> Bool +isCircularCF (_,(c', CFNonterm c:[])) = compatCF c' c +isCircularCF _ = False +--- we should make a test of circular chains, too + +-- coercion to the older predef cf type + +predefRules :: CFPredef -> CFTok -> [CFRule] +predefRules pre s = [atomCFRule c f s | (c,f) <- pre s] + diff --git a/src/GF/CF/CFIdent.hs b/src/GF/CF/CFIdent.hs new file mode 100644 index 000000000..d9c451adb --- /dev/null +++ b/src/GF/CF/CFIdent.hs @@ -0,0 +1,151 @@ +module CFIdent where + +import Operations +import GFC +import Ident +import AbsGFC +import PrGrammar +import Str +import Char (toLower, toUpper) + +-- symbols (categories, functions) for context-free grammars. + +-- these types should be abstract + +data CFTok = + TS String -- normal strings + | TC String -- strings that are ambiguous between upper or lower case + | TL String -- string literals + | TI Int -- integer literals + | TV Ident -- variables + | TM Int String -- metavariables; the integer identifies it + deriving (Eq, Ord, Show) + +newtype CFCat = CFCat (CIdent,Label) deriving (Eq, Ord, Show) + +tS, tC, tL, tI, tV, tM :: String -> CFTok +tS = TS +tC = TC +tL = TL +tI = TI . read +tV = TV . identC +tM = TM 0 + +tInt :: Int -> CFTok +tInt = TI + +prCFTok :: CFTok -> String +prCFTok t = case t of + TS s -> s + TC s -> s + TL s -> s + TI i -> show i + TV x -> prt x + TM i _ -> "?" --- + +-- to build trees: the Atom contains a GF function, Cn | Meta | Vr | Literal +newtype CFFun = CFFun (Atom, Profile) deriving (Eq,Show) + +type Profile = [([[Int]],[Int])] + + +-- the following functions should be used instead of constructors + +-- to construct CF functions + +mkCFFun :: Atom -> CFFun +mkCFFun t = CFFun (t,[]) + +{- ---- +getCFLiteral :: String -> Maybe (CFCat, CFFun) +getCFLiteral s = case lookupLiteral' s of + Ok (c, lit) -> Just (cat2CFCat c, mkCFFun lit) + _ -> Nothing +-} + +varCFFun :: Ident -> CFFun +varCFFun = mkCFFun . AV + +consCFFun :: CIdent -> CFFun +consCFFun = mkCFFun . AC + +{- ---- +string2CFFun :: String -> CFFun +string2CFFun = consCFFun . Ident +-} + +cfFun2String :: CFFun -> String +cfFun2String (CFFun (f,_)) = prt f + +cfFun2Profile :: CFFun -> Profile +cfFun2Profile (CFFun (_,p)) = p + +{- ---- +strPro2cfFun :: String -> Profile -> CFFun +strPro2cfFun str p = (CFFun (AC (Ident str), p)) +-} + +metaCFFun :: CFFun +metaCFFun = mkCFFun $ AM 0 + +-- to construct CF categories + +-- belongs elsewhere +mkCIdent :: String -> String -> CIdent +mkCIdent m c = CIQ (identC m) (identC c) + +ident2CFCat :: CIdent -> Ident -> CFCat +ident2CFCat mc d = CFCat (mc, L d) + +-- standard way of making cf cat: label s +string2CFCat :: String -> String -> CFCat +string2CFCat m c = ident2CFCat (mkCIdent m c) (identC "s") + +idents2CFCat :: Ident -> Ident -> CFCat +idents2CFCat m c = ident2CFCat (CIQ m c) (identC "s") + +catVarCF :: CFCat +catVarCF = ident2CFCat (mkCIdent "_" "#Var") (identC "_") ---- + +{- ---- +uCFCat :: CFCat +uCFCat = cat2CFCat uCat +-} + +moduleOfCFCat :: CFCat -> Ident +moduleOfCFCat (CFCat (CIQ m _, _)) = m + +-- the opposite direction +cfCat2Cat :: CFCat -> CIdent +cfCat2Cat (CFCat (s,_)) = s + + +-- to construct CF tokens + +string2CFTok :: String -> CFTok +string2CFTok = tS + +str2cftoks :: Str -> [CFTok] +str2cftoks = map tS . words . sstr + +-- decide if two token lists look the same (in parser postprocessing) + +compatToks :: [CFTok] -> [CFTok] -> Bool +compatToks ts us = and [compatTok t u | (t,u) <- zip ts us] + +compatTok t u = any (`elem` (alts t)) (alts u) where + alts u = case u of + TC (c:s) -> [toLower c : s, toUpper c : s] + _ -> [prCFTok u] + +-- decide if two CFFuns have the same function head (profiles may differ) + +compatCFFun :: CFFun -> CFFun -> Bool +compatCFFun (CFFun (f,_)) (CFFun (g,_)) = f == g + +-- decide whether two categories match +-- the modifiers can be from different modules, but on the same extension +-- path, so there is no clash, and they can be safely ignored --- +compatCF :: CFCat -> CFCat -> Bool +----compatCF = (==) +compatCF (CFCat (CIQ _ c, l)) (CFCat (CIQ _ c', l')) = c==c' && l==l' diff --git a/src/GF/CF/CanonToCF.hs b/src/GF/CF/CanonToCF.hs new file mode 100644 index 000000000..6f7dc6d6b --- /dev/null +++ b/src/GF/CF/CanonToCF.hs @@ -0,0 +1,157 @@ +module CanonToCF where + +import Operations +import Option +import Ident +import AbsGFC +import GFC +import PrGrammar +import CMacros +import qualified Modules as M +import CF +import CFIdent +import List (nub) +import Monad + +-- AR 27/1/2000 -- 3/12/2001 -- 8/6/2003 + +-- The main function: for a given cnc module m, build the CF grammar with all the +-- rules coming from modules that m extends. The categories are qualified by +-- the abstract module name a that m is of. + +canon2cf :: Options -> CanonGrammar -> Ident -> Err CF +canon2cf opts gr c = do + let ms = M.allExtends gr c + a <- M.abstractOfConcrete gr c + let cncs = [m | (n, M.ModMod m) <- M.modules gr, elem n ms] + let mms = [(a, tree2list (M.jments m)) | m <- cncs] + rules0 <- liftM concat $ mapM (uncurry (cnc2cfCond opts)) mms + let rules = filter (not . isCircularCF) rules0 ---- temporarily here + let predef = const [] ---- mkCFPredef cfcats + return $ CF (groupCFRules rules, predef) + +cnc2cfCond :: Options -> Ident -> [(Ident,Info)] -> Err [CFRule] +cnc2cfCond opts m gr = + liftM concat $ + mapM lin2cf [(m,fun,cat,args,lin) | (fun, CncFun cat args lin _) <- gr] + +type IFun = Ident +type ICat = CIdent + +-- all CF rules corresponding to a linearization rule +lin2cf :: (Ident, IFun, ICat, [ArgVar], Term) -> Err [CFRule] +lin2cf (m,fun,cat,args,lin) = errIn ("building CF rule for" +++ prt fun) $ do + rhss0 <- allLinValues lin -- :: [(Label, [([Patt],Term)])] + rhss1 <- mapM (mkCFItems m) (concat rhss0) -- :: [(Label, [[PreCFItem]])] + mapM (mkCfRules m fun cat args) rhss1 >>= return . nub . concat + +-- making sequences of CF items from every branch in a linearization +mkCFItems :: Ident -> (Label, [([Patt],Term)]) -> Err (Label, [[PreCFItem]]) +mkCFItems m (lab,pts) = do + itemss <- mapM (term2CFItems m) (map snd pts) + return (lab, concat itemss) ---- combinations? (test!) + +-- making CF rules from sequences of CF items +mkCfRules :: Ident -> IFun -> ICat -> [ArgVar] -> (Label, [[PreCFItem]]) -> Err [CFRule] +mkCfRules m fun cat args (lab, itss) = mapM mkOneRule itss + where + mkOneRule its = do + let nonterms = zip [0..] [(pos,d,v) | PNonterm _ pos d v <- its] + profile = mkProfile nonterms + cfcat = CFCat (redirectIdent m cat,lab) + cffun = CFFun (AC (CIQ m fun), profile) + cfits = map precf2cf its + return (cffun,(cfcat,cfits)) + mkProfile nonterms = map mkOne args + where + mkOne (A c i) = mkOne (AB c 0 i) + mkOne (AB _ b i) = (map mkB [0..b-1], [k | (k,(j,_,True)) <- nonterms, j==i]) + where + mkB j = [p | (p,(k, LV l,False)) <- nonterms, k == i, l == j] + +-- intermediate data structure of CFItems with information for profiles +data PreCFItem = + PTerm RegExp -- like ordinary Terminal + | PNonterm CIdent Integer Label Bool -- cat, position, part/bind, whether arg + deriving Eq + +precf2cf :: PreCFItem -> CFItem +precf2cf (PTerm r) = CFTerm r +precf2cf (PNonterm cm _ (L c) True) = CFNonterm (ident2CFCat cm c) +precf2cf (PNonterm _ _ _ False) = CFNonterm catVarCF + + +-- the main job in translating linearization rules into sequences of cf items +term2CFItems :: Ident -> Term -> Err [[PreCFItem]] +term2CFItems m t = errIn "forming cf items" $ case t of + S c _ -> t2c c + + T _ cc -> do + its <- mapM t2c [t | Cas _ t <- cc] + tryMkCFTerm (concat its) + + C t1 t2 -> do + its1 <- t2c t1 + its2 <- t2c t2 + return [x ++ y | x <- its1, y <- its2] + + FV ts -> do + its <- mapM t2c ts + tryMkCFTerm (concat its) + + P arg s -> extrR arg s + + K (KS s) -> return [[PTerm (RegAlts [s]) | not (null s)]] + + E -> return [[]] + + K (KP d vs) -> do + let its = [PTerm (RegAlts [s]) | s <- d] + let itss = [[PTerm (RegAlts [s]) | s <- t] | Var t _ <- vs] + tryMkCFTerm (its : itss) + + _ -> prtBad "no cf for" t ---- + + where + + t2c = term2CFItems m + + -- optimize the number of rules by a factorization + tryMkCFTerm :: [[PreCFItem]] -> Err [[PreCFItem]] + tryMkCFTerm ii@(its:itss) | all (\x -> length x == length its) itss = + case mapM mkOne (counterparts ii) of + Ok tt -> return [tt] + _ -> return ii + where + mkOne cfits = case mapM mkOneTerm cfits of + Ok tt -> return $ PTerm (RegAlts (concat (nub tt))) + _ -> mkOneNonTerm cfits + mkOneTerm (PTerm (RegAlts t)) = return t + mkOneTerm _ = Bad "" + mkOneNonTerm (n@(PNonterm _ _ _ _) : cc) = + if all (== n) cc + then return n + else Bad "" + mkOneNonTerm _ = Bad "" + counterparts ll = [map (!! i) ll | i <- [0..length (head ll) - 1]] + tryMkCFTerm itss = return itss + + extrR arg lab = case (arg,lab) of + (Arg (A cat pos), l@(L _)) -> return [[PNonterm (CIQ m cat) pos l True]] + (Arg (A cat pos), l@(LV _)) -> return [[PNonterm (CIQ m cat) pos l False]] + (Arg (AB cat pos b), l@(L _)) -> return [[PNonterm (CIQ m cat) pos l True]] + (Arg (AB cat pos b), l@(LV _)) -> return [[PNonterm (CIQ m cat) pos l False]] + ---- ?? + _ -> prtBad "cannot extract record field from" arg + +{- Proof + 1 @ 4 catVarCF :: CFCat +PNonterm CIdent Integer Label Bool -- cat, position, part/bind, whether arg + + +mkCFPredef :: [CFCat] -> CFPredef +mkCFPredef cats s = + [(cat, metaCFFun) | TM _ _ <- [s], cat <- cats] ++ + [(cat, varCFFun x) | TV x <- [s], cat <- cats] ++ + [(cat, lit) | TL t <- [s], Just (cat,lit) <- [getCFLiteral t]] ++ + [(cat, lit) | TI i <- [s], Just (cat,lit) <- [getCFLiteral (show i)]] --- +-} diff --git a/src/GF/CF/ChartParser.hs b/src/GF/CF/ChartParser.hs new file mode 100644 index 000000000..09d538244 --- /dev/null +++ b/src/GF/CF/ChartParser.hs @@ -0,0 +1,166 @@ + +module ChartParser (chartParser) where + +import Operations +import CF +import CFIdent +import PPrCF (prCFItem) + +import OrdSet +import OrdMap2 + +import List (groupBy) + +type Token = CFTok +type Name = CFFun +type Category = CFItem +type Grammar = ([Production], Terminal) +type Production = (Name, Category, [Category]) +type Terminal = Token -> [(Category, Maybe Name)] +type GParser = Grammar -> Category -> [Token] -> ([ParseTree],String) +data ParseTree = Node Name Category [ParseTree] | Leaf Token + +-------------------------------------------------- +-- converting between GF parsing and CFG parsing + +buildParser :: GParser -> CF -> CFCat -> CFParser +buildParser gparser cf = parse + where + parse = \start input -> + let parse2 = parse' (CFNonterm start) input in + ([(parse2tree t, []) | t <- fst parse2], snd parse2) + parse' = gparser (cf2grammar cf) + +cf2grammar :: CF -> Grammar +cf2grammar cf = (productions, terminal) + where + productions = [ (name, CFNonterm cat, rhs) | + (name, (cat, rhs)) <- cfRules ] + terminal tok = [ (CFNonterm cat, Just name) | + (cat, name) <- cfPredef tok ] + ++ + [ (item, Nothing) | + item <- elems rhsItems, + matchCFTerm item tok ] + cfRules = rulesOfCF cf + cfPredef = predefOfCF cf + rhsItems :: Set Category + rhsItems = union [ makeSet rhs | (_, (_, rhs)) <- cfRules ] + +parse2tree :: ParseTree -> CFTree +parse2tree (Node name (CFNonterm cat) trees) = CFTree (name, (cat, trees')) + where + trees' = [ parse2tree t | t@(Node _ _ _) <- trees ] -- ignore leafs + +maybeNode :: Maybe Name -> Category -> Token -> ParseTree +maybeNode (Just name) cat tok = Node name cat [Leaf tok] +maybeNode Nothing _ tok = Leaf tok + + +-------------------------------------------------- +-- chart parsing (bottom up kilbury-like) + +type Chart = [CState] +type CState = Set Edge +type Edge = (Int, Category, [Category]) +type Passive = (Int, Int, Category) + +chartParser :: CF -> CFCat -> CFParser +chartParser = buildParser chartParser0 + +chartParser0 :: GParser +chartParser0 (productions, terminal) = cparse + where + emptyCats :: Set Category + emptyCats = empties emptySet + where + empties cats | cats==cats' = cats + | otherwise = empties cats' + where cats' = makeSet [ cat | (_, cat, rhs) <- productions, + all (`elemSet` cats) rhs ] + + grammarMap :: Map Category [(Name, [Category])] + grammarMap = makeMapWith (++) + [ (cat, [(name,rhs)]) | (name, cat, rhs) <- productions ] + + leftCornerMap :: Map Category (Set (Category,[Category])) + leftCornerMap = makeMapWith (<++>) [ (a, unitSet (b, bs)) | + (_, b, abs) <- productions, + (a : bs) <- removeNullable abs ] + + removeNullable :: [Category] -> [[Category]] + removeNullable [] = [] + removeNullable cats@(cat:cats') + | cat `elemSet` emptyCats = cats : removeNullable cats' + | otherwise = [cats] + + cparse :: Category -> [Token] -> ([ParseTree], String) + cparse start input = case lookup (0, length input, start) edgeTrees of + Just trees -> (trees, "Chart:" ++++ prChart passiveEdges) + Nothing -> ([], "Chart:" ++++ prChart passiveEdges) + where + finalChart :: Chart + finalChart = map buildState initialChart + + finalChartMap :: [Map Category (Set Edge)] + finalChartMap = map stateMap finalChart + + stateMap :: CState -> Map Category (Set Edge) + stateMap state = makeMapWith (<++>) [ (a, unitSet (i,b,bs)) | + (i, b, a:bs) <- elems state ] + + initialChart :: Chart + initialChart = emptySet : map initialState (zip [0..] input) + where initialState (j, sym) = makeSet [ (j, cat, []) | + (cat, _) <- terminal sym ] + + buildState :: CState -> CState + buildState = limit more + where more (j, a, []) = ordSet [ (j, b, bs) | + (b, bs) <- elems (lookupWith emptySet leftCornerMap a) ] + <++> + lookupWith emptySet (finalChartMap !! j) a + more (j, b, a:bs) = ordSet [ (j, b, bs) | + a `elemSet` emptyCats ] + + passiveEdges :: [Passive] + passiveEdges = [ (i, j, cat) | + (j, state) <- zip [0..] finalChart, + (i, cat, []) <- elems state ] + ++ + [ (i, i, cat) | + i <- [0 .. length input], + cat <- elems emptyCats ] + + edgeTrees :: [ (Passive, [ParseTree]) ] + edgeTrees = [ (edge, treesFor edge) | edge <- passiveEdges ] + + edgeTreesMap :: Map (Int, Category) [(Int, [ParseTree])] + edgeTreesMap = makeMapWith (++) [ ((i,c), [(j,trees)]) | + ((i,j,c), trees) <- edgeTrees ] + + treesFor :: Passive -> [ParseTree] + treesFor (i, j, cat) = [ Node name cat trees | + (name, rhs) <- lookupWith [] grammarMap cat, + trees <- children rhs i j ] + ++ + [ maybeNode name cat tok | + i == j-1, + let tok = input !! i, + Just name <- [lookup cat (terminal tok)] ] + + children :: [Category] -> Int -> Int -> [[ParseTree]] + children [] i k = [ [] | i == k ] + children (c:cs) i k = [ tree : rest | + i <= k, + (j, trees) <- lookupWith [] edgeTreesMap (i,c), + rest <- children cs j k, + tree <- trees ] + + +-- AR 10/12/2002 + +prChart :: [Passive] -> String +prChart = unlines . map (unwords . map prOne) . positions where + prOne (i,j,it) = show i ++ "-" ++ show j ++ "-" ++ prCFItem it + positions = groupBy (\ (i,_,_) (j,_,_) -> i == j) diff --git a/src/GF/CF/PPrCF.hs b/src/GF/CF/PPrCF.hs new file mode 100644 index 000000000..ff4b64e66 --- /dev/null +++ b/src/GF/CF/PPrCF.hs @@ -0,0 +1,59 @@ +module PPrCF where + +import Operations +import CF +import CFIdent +import AbsGFC +import PrGrammar + +-- printing and parsing CF grammars, rules, and trees AR 26/1/2000 -- 9/6/2003 +---- use the Print class instead! + +prCF :: CF -> String +prCF = unlines . (map prCFRule) . rulesOfCF -- hiding the literal recogn function + +prCFTree :: CFTree -> String +prCFTree (CFTree (fun, (_,trees))) = prCFFun fun ++ prs trees where + prs [] = "" + prs ts = " " ++ unwords (map ps ts) + ps t@(CFTree (_,(_,[]))) = prCFTree t + ps t = prParenth (prCFTree t) + +prCFRule :: CFRule -> String +prCFRule (fun,(cat,its)) = + prCFFun fun ++ "." +++ prCFCat cat +++ "::=" +++ + unwords (map prCFItem its) +++ ";" + +prCFFun :: CFFun -> String +prCFFun = prCFFun' True ---- False -- print profiles for debug + +prCFFun' :: Bool -> CFFun -> String +prCFFun' profs (CFFun (t, p)) = prt t ++ pp p where + pp p = if (not profs || normal p) then "" else "_" ++ concat (map show p) + normal p = and [x==y && null b | ((b,x),y) <- zip p (map (:[]) [0..])] + +prCFCat :: CFCat -> String +prCFCat (CFCat (c,l)) = prt c ++ "-" ++ prt l ---- + +prCFItem (CFNonterm c) = prCFCat c +prCFItem (CFTerm a) = prRegExp a + +prRegExp (RegAlts tt) = case tt of + [t] -> prQuotedString t + _ -> prParenth (prTList " | " (map prQuotedString tt)) + +{- ---- +-- rules have an amazingly easy parser, if we use the format +-- fun. C -> item1 item2 ... where unquoted items are treated as cats +-- Actually would be nice to add profiles to this. + +getCFRule :: String -> Maybe CFRule +getCFRule s = getcf (wrds s) where + getcf ww | length ww > 2 && ww !! 2 `elem` ["->", "::="] = + Just (string2CFFun (init fun), (string2CFCat cat, map mkIt its)) where + fun : cat : _ : its = words s + mkIt ('"':w@(_:_)) = atomCFTerm (string2CFTok (init w)) + mkIt w = CFNonterm (string2CFCat w) + getcf _ = Nothing + wrds = takeWhile (/= ";") . words -- to permit semicolon in the end +-}
\ No newline at end of file diff --git a/src/GF/CF/Profile.hs b/src/GF/CF/Profile.hs new file mode 100644 index 000000000..6dbb5f85a --- /dev/null +++ b/src/GF/CF/Profile.hs @@ -0,0 +1,95 @@ +module Profile (postParse) where + +import AbsGFC +import GFC +import qualified Ident as I +import CMacros +---import MMacros +import CF +import CFIdent +import PPrCF -- for error msg +import PrGrammar + +import Operations + +import Monad +import List (nub) + + +-- restoring parse trees for discontinuous constituents, bindings, etc. AR 25/1/2001 +-- revised 8/4/2002 for the new profile structure + +postParse :: CFTree -> Err Exp +postParse tree = do + iterm <- errIn "postprocessing initial parse tree" $ tree2term tree + return $ term2trm iterm + +-- an intermediate data structure +data ITerm = ITerm (Atom, BindVs) [ITerm] | IMeta deriving (Eq,Show) +type BindVs = [[I.Ident]] + +-- the job is done in two passes: +-- (1) tree2term: restore constituent order from Profile +-- (2) term2trm: restore Bindings from Binds + +tree2term :: CFTree -> Err ITerm +tree2term (CFTree (cff@(CFFun (fun,pro)), (_,trees))) = case fun of + AM _ -> return IMeta + _ -> do + args <- mapM mkArg pro + binds <- mapM mkBinds pro + return $ ITerm (fun, binds) args + where + mkArg (_,arg) = case arg of + [x] -> do -- one occurrence + trx <- trees !? x + tree2term trx + [] -> return IMeta -- suppression + _ -> do -- reduplication + trees' <- mapM (trees !?) arg + xs1 <- mapM tree2term trees' + xs2 <- checkArity xs1 + unif xs2 + + checkArity xs = if length (nub [length xx | ITerm _ xx <- xs']) > 1 + then Bad "arity error" + else return xs' + where xs' = [t | t@(ITerm _ _) <- xs] + unif [] = return $ IMeta + unif xs@(ITerm fp@(f,_) xx : ts) = do + let hs = [h | ITerm (h,_) _ <- ts] + testErr (all (==f) hs) -- if fails, hs must be nonempty + ("unification expects" +++ prt f +++ "but found" +++ prt (head hs)) + xx' <- mapM unifArg [0 .. length xx - 1] + return $ ITerm fp xx' + where + unifArg i = tryUnif [zz !! i | ITerm _ zz <- xs] + tryUnif xx = case [t | t@(ITerm _ _) <- xx] of + [] -> return IMeta + x:xs -> if all (==x) xs + then return x + else Bad "failed to unify" + + mkBinds (xss,_) = mapM mkBind xss + mkBind xs = do + ts <- mapM (trees !?) xs + let vs = [x | CFTree (CFFun (AV x,_),(_,[])) <- ts] + testErr (length ts == length vs) "non-variable in bound position" + case vs of + [x] -> return x + [] -> return $ I.identC "h_" ---- uBoundVar + y:ys -> do + testErr (all (==y) ys) ("fail to unify bindings of" +++ prt y) + return y + +term2trm :: ITerm -> Exp +term2trm IMeta = EAtom (AM 0) ---- mExp0 +term2trm (ITerm (fun, binds) terms) = + let bterms = zip binds terms + in mkAppAtom fun [mkAbsR xs (term2trm t) | (xs,t) <- bterms] + + --- these are deprecated + where + mkAbsR c e = foldr EAbs e c + mkAppAtom a = mkApp (EAtom a) + mkApp = foldl EApp
\ No newline at end of file |
