diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
| commit | 055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch) | |
| tree | 0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/CF/CF.hs | |
| parent | 915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff) | |
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/CF/CF.hs')
| -rw-r--r-- | src-3.0/GF/CF/CF.hs | 213 |
1 files changed, 213 insertions, 0 deletions
diff --git a/src-3.0/GF/CF/CF.hs b/src-3.0/GF/CF/CF.hs new file mode 100644 index 000000000..9233e905a --- /dev/null +++ b/src-3.0/GF/CF/CF.hs @@ -0,0 +1,213 @@ +---------------------------------------------------------------------- +-- | +-- Module : CF +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:21:07 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.6 $ +-- +-- context-free grammars. AR 15\/12\/1999 -- 30\/3\/2000 -- 2\/6\/2001 -- 3\/12\/2001 +----------------------------------------------------------------------------- + +module GF.CF.CF (-- * Types + CF(..), CFRule, CFRuleGroup, + CFItem(..), CFTree(..), CFPredef, CFParser, + RegExp(..), CFWord, + -- * Functions + cfParseResults, + -- ** to construct CF grammars + emptyCF, emptyCFPredef, rules2CF, groupCFRules, + -- ** to construct rules + atomCFRule, atomCFTerm, atomRegExp, altsCFTerm, + -- ** to construct trees + atomCFTree, buildCFTree, + -- ** to decide whether a token matches a terminal item + matchCFTerm, satRegExp, + -- ** to analyse a CF grammar + catsOfCF, rulesOfCF, ruleGroupsOfCF, rulesForCFCat, + valCatCF, valItemsCF, valFunCF, + startCat, predefOfCF, appCFPredef, valCFItem, + cfTokens, wordsOfRegExp, forCFItem, + isCircularCF, predefRules + ) where + +import GF.Data.Operations +import GF.Data.Str +import GF.Canon.AbsGFC +import GF.Canon.GFC +import GF.CF.CFIdent +import Data.List (nub,nubBy) +import Data.Char (isUpper, isLower, toUpper, toLower) + +-- CF grammar data types + +-- | abstract type CF. +-- Invariant: each category has all its rules grouped with it +-- also: the list is never empty (the category is just missing then) +newtype CF = CF ([CFRuleGroup], CFPredef) +type CFRule = (CFFun, (CFCat, [CFItem])) +type CFRuleGroup = (CFCat,[CFRule]) + +-- | CFPredef is a hack for variable symbols and literals; normally = @const []@ +data CFItem = CFTerm RegExp | CFNonterm CFCat deriving (Eq, Ord,Show) + +newtype CFTree = CFTree (CFFun,(CFCat, [CFTree])) deriving (Eq, Show) + +-- | recognize literals, variables, etc +type CFPredef = CFTok -> [(CFCat, CFFun)] + +-- | Wadler style + return information +type CFParser = [CFTok] -> ([(CFTree,[CFTok])],String) + +cfParseResults :: ([(CFTree,[CFTok])],String) -> [CFTree] +cfParseResults rs = [b | (b,[]) <- fst rs] + +-- | terminals are regular expressions on words; to be completed to full regexp +data RegExp = + RegAlts [CFWord] -- ^ list of alternative words + | RegSpec CFTok -- ^ special token + deriving (Eq, Ord, Show) + +type CFWord = String + +-- the above types should be kept abstract, and the following functions used + +-- to construct CF grammars + +emptyCF :: CF +emptyCF = CF ([], emptyCFPredef) + +emptyCFPredef :: CFPredef +emptyCFPredef = const [] + +rules2CF :: [CFRule] -> CF +rules2CF rs = CF (groupCFRules rs, emptyCFPredef) + +groupCFRules :: [CFRule] -> [(CFCat,[CFRule])] +groupCFRules = foldr ins [] where + ins rule crs = case crs of + (c,r) : rs | compatCF c cat -> (c,rule:r) : rs + cr : rs -> cr : ins rule rs + _ -> [(cat,[rule])] + where + cat = valCatCF rule + +-- to construct rules + +-- | make a rule from a single token without constituents +atomCFRule :: CFCat -> CFFun -> CFTok -> CFRule +atomCFRule c f s = (f, (c, [atomCFTerm s])) + +-- | usual terminal +atomCFTerm :: CFTok -> CFItem +atomCFTerm = CFTerm . atomRegExp + +atomRegExp :: CFTok -> RegExp +atomRegExp t = case t of + TS s -> RegAlts [s] + _ -> RegSpec t + +-- | terminal consisting of alternatives +altsCFTerm :: [String] -> CFItem +altsCFTerm = CFTerm . RegAlts + + +-- to construct trees + +-- | make a tree without constituents +atomCFTree :: CFCat -> CFFun -> CFTree +atomCFTree c f = buildCFTree c f [] + +-- | make a tree with constituents. +buildCFTree :: CFCat -> CFFun -> [CFTree] -> CFTree +buildCFTree c f trees = CFTree (f,(c,trees)) + +{- ---- +cfMeta0 :: CFTree +cfMeta0 = atomCFTree uCFCat metaCFFun + +-- used in happy +litCFTree :: String -> CFTree --- Maybe CFTree +litCFTree s = maybe cfMeta0 id $ do + (c,f) <- getCFLiteral s + return $ buildCFTree c f [] +-} + +-- to decide whether a token matches a terminal item + +matchCFTerm :: CFItem -> CFTok -> Bool +matchCFTerm (CFTerm t) s = satRegExp t s +matchCFTerm _ _ = False + +satRegExp :: RegExp -> CFTok -> Bool +satRegExp r t = case (r,t) of + (RegAlts tt, TS s) -> elem s tt + (RegAlts tt, TC s) -> or [elem s' tt | s' <- caseUpperOrLower s] + (RegSpec x, _) -> t == x --- + _ -> False + where + caseUpperOrLower s = case s of + c:cs | isUpper c -> [s, toLower c : cs] + c:cs | isLower c -> [s, toUpper c : cs] + _ -> [s] + +-- to analyse a CF grammar + +catsOfCF :: CF -> [CFCat] +catsOfCF (CF (rr,_)) = map fst rr + +rulesOfCF :: CF -> [CFRule] +rulesOfCF (CF (rr,_)) = concatMap snd rr + +ruleGroupsOfCF :: CF -> [(CFCat,[CFRule])] +ruleGroupsOfCF (CF (rr,_)) = rr + +rulesForCFCat :: CF -> CFCat -> [CFRule] +rulesForCFCat (CF (rr,_)) cat = maybe [] id $ lookup cat rr + +valCatCF :: CFRule -> CFCat +valCatCF (_,(c,_)) = c + +valItemsCF :: CFRule -> [CFItem] +valItemsCF (_,(_,i)) = i + +valFunCF :: CFRule -> CFFun +valFunCF (f,(_,_)) = f + +startCat :: CF -> CFCat +startCat (CF (rr,_)) = fst (head rr) --- hardly useful + +predefOfCF :: CF -> CFPredef +predefOfCF (CF (_,f)) = f + +appCFPredef :: CF -> CFTok -> [(CFCat, CFFun)] +appCFPredef = ($) . predefOfCF + +valCFItem :: CFItem -> Either RegExp CFCat +valCFItem (CFTerm r) = Left r +valCFItem (CFNonterm nt) = Right nt + +cfTokens :: CF -> [CFWord] +cfTokens cf = nub $ concat $ [ wordsOfRegExp i | r <- rulesOfCF cf, + CFTerm i <- valItemsCF r] + +wordsOfRegExp :: RegExp -> [CFWord] +wordsOfRegExp (RegAlts tt) = tt +wordsOfRegExp _ = [] + +forCFItem :: CFTok -> CFRule -> Bool +forCFItem a (_,(_, CFTerm r : _)) = satRegExp r a +forCFItem _ _ = False + +-- | we should make a test of circular chains, too +isCircularCF :: CFRule -> Bool +isCircularCF (_,(c', CFNonterm c:[])) = compatCF c' c +isCircularCF _ = False + +-- | coercion to the older predef cf type +predefRules :: CFPredef -> CFTok -> [CFRule] +predefRules pre s = [atomCFRule c f s | (c,f) <- pre s] + |
