summaryrefslogtreecommitdiff
path: root/src-3.0/GF/CF/CFIdent.hs
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
committeraarne <aarne@cs.chalmers.se>2008-05-21 09:26:44 +0000
commit055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch)
tree0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/CF/CFIdent.hs
parent915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff)
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/CF/CFIdent.hs')
-rw-r--r--src-3.0/GF/CF/CFIdent.hs253
1 files changed, 253 insertions, 0 deletions
diff --git a/src-3.0/GF/CF/CFIdent.hs b/src-3.0/GF/CF/CFIdent.hs
new file mode 100644
index 000000000..02ee482c0
--- /dev/null
+++ b/src-3.0/GF/CF/CFIdent.hs
@@ -0,0 +1,253 @@
+----------------------------------------------------------------------
+-- |
+-- Module : CFIdent
+-- Maintainer : AR
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/11/14 16:03:40 $
+-- > CVS $Author: aarne $
+-- > CVS $Revision: 1.13 $
+--
+-- symbols (categories, functions) for context-free grammars.
+-----------------------------------------------------------------------------
+
+module GF.CF.CFIdent (-- * Tokens and categories
+ CFTok(..), CFCat(..),
+ tS, tC, tL, tI, tF, tV, tM, tInt,
+ prCFTok,
+ -- * Function names and profiles
+ CFFun(..), Profile,
+ wordsCFTok,
+ -- * CF Functions
+ mkCFFun, varCFFun, consCFFun, string2CFFun, stringCFFun,
+ intCFFun, floatCFFun, dummyCFFun,
+ cfFun2String, cfFun2Ident, cfFun2Profile, metaCFFun,
+ -- * CF Categories
+ mkCIdent, ident2CFCat, labels2CFCat, string2CFCat,
+ catVarCF, cat2CFCat, cfCatString, cfCatInt,cfCatFloat,
+ moduleOfCFCat, cfCat2Cat, cfCat2Ident, lexCFCat,
+ -- * CF Tokens
+ string2CFTok, str2cftoks,
+ -- * Comparisons
+ compatToks, compatTok, compatCFFun, compatCF,
+ wordsLits
+ ) where
+
+import GF.Data.Operations
+import GF.Canon.GFC
+import GF.Infra.Ident
+import GF.Grammar.Values (cPredefAbs)
+import GF.Canon.AbsGFC
+import GF.Grammar.Macros (ident2label)
+import GF.Grammar.PrGrammar
+import GF.Data.Str
+import Data.Char (toLower, toUpper, isSpace)
+import Data.List (intersperse)
+
+-- | this type should be abstract
+data CFTok =
+ TS String -- ^ normal strings
+ | TC String -- ^ strings that are ambiguous between upper or lower case
+ | TL String -- ^ string literals
+ | TI Integer -- ^ integer literals
+ | TF Double -- ^ float literals
+ | TV Ident -- ^ variables
+ | TM Int String -- ^ metavariables; the integer identifies it
+ deriving (Eq, Ord, Show)
+
+-- | this type should be abstract
+newtype CFCat = CFCat (CIdent,Label) deriving (Eq, Ord, Show)
+
+tS :: String -> CFTok
+tC :: String -> CFTok
+tL :: String -> CFTok
+tI :: String -> CFTok
+tF :: String -> CFTok
+tV :: String -> CFTok
+tM :: String -> CFTok
+
+tS = TS
+tC = TC
+tL = TL
+tI = TI . read
+tF = TF . read
+tV = TV . identC
+tM = TM 0
+
+tInt :: Integer -> CFTok
+tInt = TI
+
+prCFTok :: CFTok -> String
+prCFTok t = case t of
+ TS s -> s
+ TC s -> s
+ TL s -> s
+ TI i -> show i
+ TF i -> show i
+ TV x -> prt x
+ TM i m -> m --- "?" --- m
+
+-- | to build trees: the Atom contains a GF function, @Cn | Meta | Vr | Literal@
+newtype CFFun = CFFun (Atom, Profile) deriving (Eq,Ord,Show)
+-- - - - - - - - - - - - - - - - - - - - - ^^^ added by peb, 21/5-04
+
+type Profile = [([[Int]],[Int])]
+
+wordsCFTok :: CFTok -> [String]
+wordsCFTok t = case t of
+ TC (c:cs) -> [c':cs | c' <- [toUpper c, toLower c]]
+ _ -> [prCFTok t]
+
+-- the following functions should be used instead of constructors
+
+-- to construct CF functions
+
+mkCFFun :: Atom -> CFFun
+mkCFFun t = CFFun (t,[])
+
+varCFFun :: Ident -> CFFun
+varCFFun = mkCFFun . AV
+
+consCFFun :: CIdent -> CFFun
+consCFFun = mkCFFun . AC
+
+-- | standard way of making cf fun
+string2CFFun :: String -> String -> CFFun
+string2CFFun m c = consCFFun $ mkCIdent m c
+
+stringCFFun :: String -> CFFun
+stringCFFun = mkCFFun . AS
+
+intCFFun :: Integer -> CFFun
+intCFFun = mkCFFun . AI
+
+floatCFFun :: Double -> CFFun
+floatCFFun = mkCFFun . AF
+
+-- | used in lexer-by-need rules
+dummyCFFun :: CFFun
+dummyCFFun = varCFFun $ identC "_"
+
+cfFun2String :: CFFun -> String
+cfFun2String (CFFun (f,_)) = prt f
+
+cfFun2Ident :: CFFun -> Ident
+cfFun2Ident (CFFun (f,_)) = identC $ prt_ f ---
+
+cfFun2Profile :: CFFun -> Profile
+cfFun2Profile (CFFun (_,p)) = p
+
+{- ----
+strPro2cfFun :: String -> Profile -> CFFun
+strPro2cfFun str p = (CFFun (AC (Ident str), p))
+-}
+
+metaCFFun :: CFFun
+metaCFFun = mkCFFun $ AM 0
+
+-- to construct CF categories
+
+-- | belongs elsewhere
+mkCIdent :: String -> String -> CIdent
+mkCIdent m c = CIQ (identC m) (identC c)
+
+ident2CFCat :: CIdent -> Ident -> CFCat
+ident2CFCat mc d = CFCat (mc, L d)
+
+labels2CFCat :: CIdent -> [Label] -> CFCat
+labels2CFCat mc d = CFCat (mc, L (identC (concat (intersperse "." (map prt d))))) ----
+
+-- | standard way of making cf cat: label s
+string2CFCat :: String -> String -> CFCat
+string2CFCat m c = ident2CFCat (mkCIdent m c) (identC "s")
+
+idents2CFCat :: Ident -> Ident -> CFCat
+idents2CFCat m c = ident2CFCat (CIQ m c) (identC "s")
+
+catVarCF :: CFCat
+catVarCF = ident2CFCat (mkCIdent "_" "#Var") (identC "_") ----
+
+cat2CFCat :: (Ident,Ident) -> CFCat
+cat2CFCat = uncurry idents2CFCat
+
+-- | literals
+cfCatString :: CFCat
+cfCatString = string2CFCat (prt cPredefAbs) "String"
+
+cfCatInt, cfCatFloat :: CFCat
+cfCatInt = string2CFCat (prt cPredefAbs) "Int"
+cfCatFloat = string2CFCat (prt cPredefAbs) "Float"
+
+
+
+{- ----
+uCFCat :: CFCat
+uCFCat = cat2CFCat uCat
+-}
+
+moduleOfCFCat :: CFCat -> Ident
+moduleOfCFCat (CFCat (CIQ m _, _)) = m
+
+-- | the opposite direction
+cfCat2Cat :: CFCat -> (Ident,Ident)
+cfCat2Cat (CFCat (CIQ m c,_)) = (m,c)
+
+cfCat2Ident :: CFCat -> Ident
+cfCat2Ident = snd . cfCat2Cat
+
+lexCFCat :: CFCat -> CFCat
+lexCFCat cat = ident2CFCat (uncurry CIQ (cfCat2Cat cat)) (identC "*")
+
+-- to construct CF tokens
+
+string2CFTok :: String -> CFTok
+string2CFTok = tS
+
+str2cftoks :: Str -> [CFTok]
+str2cftoks = map tS . wordsLits . sstr
+
+-- decide if two token lists look the same (in parser postprocessing)
+
+compatToks :: [CFTok] -> [CFTok] -> Bool
+compatToks ts us = and [compatTok t u | (t,u) <- zip ts us]
+
+compatTok :: CFTok -> CFTok -> Bool
+compatTok (TM _ _) _ = True --- hack because metas are renamed
+compatTok _ (TM _ _) = True
+compatTok t u = any (`elem` (alts t)) (alts u) where
+ alts u = case u of
+ TC (c:s) -> [toLower c : s, toUpper c : s]
+ TL s -> [s, prQuotedString s]
+ _ -> [prCFTok u]
+
+-- | decide if two CFFuns have the same function head (profiles may differ)
+compatCFFun :: CFFun -> CFFun -> Bool
+compatCFFun (CFFun (f,_)) (CFFun (g,_)) = f == g
+
+-- | decide whether two categories match
+-- the modifiers can be from different modules, but on the same extension
+-- path, so there is no clash, and they can be safely ignored ---
+compatCF :: CFCat -> CFCat -> Bool
+----compatCF = (==)
+compatCF (CFCat (CIQ _ c, l)) (CFCat (CIQ _ c', l')) = c==c' && l==l'
+
+-- | Like 'words', but does not split on whitespace inside
+-- double quotes.wordsLits :: String -> [String]
+-- Also treats escaped quotes in quotes (AR 21/12/2005) by breaks
+-- instead of break
+wordsLits [] = []
+wordsLits (c:cs) | isSpace c = wordsLits (dropWhile isSpace cs)
+ | isQuote c
+ = let (l,rs) = breaks (==c) cs
+ rs' = drop 1 rs
+ in ([c]++l++[c]):wordsLits rs'
+ | otherwise = let (w,rs) = break isSpaceQ cs
+ in (c:w):wordsLits rs
+ where
+ breaks c cs = case break c cs of
+ (l@(_:_),d:rs) | last l == '\\' ->
+ let (r,ts) = breaks c rs in (l++[d]++r, ts)
+ v -> v
+ isQuote c = elem c "\"'"
+ isSpaceQ c = isSpace c ---- || isQuote c