summaryrefslogtreecommitdiff
path: root/src-3.0/GF/CF/CF.hs
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
committeraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
commit055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch)
tree0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/CF/CF.hs
parent915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff)
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/CF/CF.hs')
-rw-r--r--src-3.0/GF/CF/CF.hs213
1 files changed, 213 insertions, 0 deletions
diff --git a/src-3.0/GF/CF/CF.hs b/src-3.0/GF/CF/CF.hs
new file mode 100644
index 000000000..9233e905a
--- /dev/null
+++ b/src-3.0/GF/CF/CF.hs
@@ -0,0 +1,213 @@
+----------------------------------------------------------------------
+-- |
+-- Module : CF
+-- Maintainer : AR
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/04/21 16:21:07 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.6 $
+--
+-- context-free grammars. AR 15\/12\/1999 -- 30\/3\/2000 -- 2\/6\/2001 -- 3\/12\/2001
+-----------------------------------------------------------------------------
+
+module GF.CF.CF (-- * Types
+ CF(..), CFRule, CFRuleGroup,
+ CFItem(..), CFTree(..), CFPredef, CFParser,
+ RegExp(..), CFWord,
+ -- * Functions
+ cfParseResults,
+ -- ** to construct CF grammars
+ emptyCF, emptyCFPredef, rules2CF, groupCFRules,
+ -- ** to construct rules
+ atomCFRule, atomCFTerm, atomRegExp, altsCFTerm,
+ -- ** to construct trees
+ atomCFTree, buildCFTree,
+ -- ** to decide whether a token matches a terminal item
+ matchCFTerm, satRegExp,
+ -- ** to analyse a CF grammar
+ catsOfCF, rulesOfCF, ruleGroupsOfCF, rulesForCFCat,
+ valCatCF, valItemsCF, valFunCF,
+ startCat, predefOfCF, appCFPredef, valCFItem,
+ cfTokens, wordsOfRegExp, forCFItem,
+ isCircularCF, predefRules
+ ) where
+
+import GF.Data.Operations
+import GF.Data.Str
+import GF.Canon.AbsGFC
+import GF.Canon.GFC
+import GF.CF.CFIdent
+import Data.List (nub,nubBy)
+import Data.Char (isUpper, isLower, toUpper, toLower)
+
+-- CF grammar data types
+
+-- | abstract type CF.
+-- Invariant: each category has all its rules grouped with it
+-- also: the list is never empty (the category is just missing then)
+newtype CF = CF ([CFRuleGroup], CFPredef)
+type CFRule = (CFFun, (CFCat, [CFItem]))
+type CFRuleGroup = (CFCat,[CFRule])
+
+-- | CFPredef is a hack for variable symbols and literals; normally = @const []@
+data CFItem = CFTerm RegExp | CFNonterm CFCat deriving (Eq, Ord,Show)
+
+newtype CFTree = CFTree (CFFun,(CFCat, [CFTree])) deriving (Eq, Show)
+
+-- | recognize literals, variables, etc
+type CFPredef = CFTok -> [(CFCat, CFFun)]
+
+-- | Wadler style + return information
+type CFParser = [CFTok] -> ([(CFTree,[CFTok])],String)
+
+cfParseResults :: ([(CFTree,[CFTok])],String) -> [CFTree]
+cfParseResults rs = [b | (b,[]) <- fst rs]
+
+-- | terminals are regular expressions on words; to be completed to full regexp
+data RegExp =
+ RegAlts [CFWord] -- ^ list of alternative words
+ | RegSpec CFTok -- ^ special token
+ deriving (Eq, Ord, Show)
+
+type CFWord = String
+
+-- the above types should be kept abstract, and the following functions used
+
+-- to construct CF grammars
+
+emptyCF :: CF
+emptyCF = CF ([], emptyCFPredef)
+
+emptyCFPredef :: CFPredef
+emptyCFPredef = const []
+
+rules2CF :: [CFRule] -> CF
+rules2CF rs = CF (groupCFRules rs, emptyCFPredef)
+
+groupCFRules :: [CFRule] -> [(CFCat,[CFRule])]
+groupCFRules = foldr ins [] where
+ ins rule crs = case crs of
+ (c,r) : rs | compatCF c cat -> (c,rule:r) : rs
+ cr : rs -> cr : ins rule rs
+ _ -> [(cat,[rule])]
+ where
+ cat = valCatCF rule
+
+-- to construct rules
+
+-- | make a rule from a single token without constituents
+atomCFRule :: CFCat -> CFFun -> CFTok -> CFRule
+atomCFRule c f s = (f, (c, [atomCFTerm s]))
+
+-- | usual terminal
+atomCFTerm :: CFTok -> CFItem
+atomCFTerm = CFTerm . atomRegExp
+
+atomRegExp :: CFTok -> RegExp
+atomRegExp t = case t of
+ TS s -> RegAlts [s]
+ _ -> RegSpec t
+
+-- | terminal consisting of alternatives
+altsCFTerm :: [String] -> CFItem
+altsCFTerm = CFTerm . RegAlts
+
+
+-- to construct trees
+
+-- | make a tree without constituents
+atomCFTree :: CFCat -> CFFun -> CFTree
+atomCFTree c f = buildCFTree c f []
+
+-- | make a tree with constituents.
+buildCFTree :: CFCat -> CFFun -> [CFTree] -> CFTree
+buildCFTree c f trees = CFTree (f,(c,trees))
+
+{- ----
+cfMeta0 :: CFTree
+cfMeta0 = atomCFTree uCFCat metaCFFun
+
+-- used in happy
+litCFTree :: String -> CFTree --- Maybe CFTree
+litCFTree s = maybe cfMeta0 id $ do
+ (c,f) <- getCFLiteral s
+ return $ buildCFTree c f []
+-}
+
+-- to decide whether a token matches a terminal item
+
+matchCFTerm :: CFItem -> CFTok -> Bool
+matchCFTerm (CFTerm t) s = satRegExp t s
+matchCFTerm _ _ = False
+
+satRegExp :: RegExp -> CFTok -> Bool
+satRegExp r t = case (r,t) of
+ (RegAlts tt, TS s) -> elem s tt
+ (RegAlts tt, TC s) -> or [elem s' tt | s' <- caseUpperOrLower s]
+ (RegSpec x, _) -> t == x ---
+ _ -> False
+ where
+ caseUpperOrLower s = case s of
+ c:cs | isUpper c -> [s, toLower c : cs]
+ c:cs | isLower c -> [s, toUpper c : cs]
+ _ -> [s]
+
+-- to analyse a CF grammar
+
+catsOfCF :: CF -> [CFCat]
+catsOfCF (CF (rr,_)) = map fst rr
+
+rulesOfCF :: CF -> [CFRule]
+rulesOfCF (CF (rr,_)) = concatMap snd rr
+
+ruleGroupsOfCF :: CF -> [(CFCat,[CFRule])]
+ruleGroupsOfCF (CF (rr,_)) = rr
+
+rulesForCFCat :: CF -> CFCat -> [CFRule]
+rulesForCFCat (CF (rr,_)) cat = maybe [] id $ lookup cat rr
+
+valCatCF :: CFRule -> CFCat
+valCatCF (_,(c,_)) = c
+
+valItemsCF :: CFRule -> [CFItem]
+valItemsCF (_,(_,i)) = i
+
+valFunCF :: CFRule -> CFFun
+valFunCF (f,(_,_)) = f
+
+startCat :: CF -> CFCat
+startCat (CF (rr,_)) = fst (head rr) --- hardly useful
+
+predefOfCF :: CF -> CFPredef
+predefOfCF (CF (_,f)) = f
+
+appCFPredef :: CF -> CFTok -> [(CFCat, CFFun)]
+appCFPredef = ($) . predefOfCF
+
+valCFItem :: CFItem -> Either RegExp CFCat
+valCFItem (CFTerm r) = Left r
+valCFItem (CFNonterm nt) = Right nt
+
+cfTokens :: CF -> [CFWord]
+cfTokens cf = nub $ concat $ [ wordsOfRegExp i | r <- rulesOfCF cf,
+ CFTerm i <- valItemsCF r]
+
+wordsOfRegExp :: RegExp -> [CFWord]
+wordsOfRegExp (RegAlts tt) = tt
+wordsOfRegExp _ = []
+
+forCFItem :: CFTok -> CFRule -> Bool
+forCFItem a (_,(_, CFTerm r : _)) = satRegExp r a
+forCFItem _ _ = False
+
+-- | we should make a test of circular chains, too
+isCircularCF :: CFRule -> Bool
+isCircularCF (_,(c', CFNonterm c:[])) = compatCF c' c
+isCircularCF _ = False
+
+-- | coercion to the older predef cf type
+predefRules :: CFPredef -> CFTok -> [CFRule]
+predefRules pre s = [atomCFRule c f s | (c,f) <- pre s]
+