1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
|
----------------------------------------------------------------------
-- |
-- Module : CFIdent
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/14 16:03:40 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.13 $
--
-- symbols (categories, functions) for context-free grammars.
-----------------------------------------------------------------------------
module GF.CF.CFIdent (-- * Tokens and categories
CFTok(..), CFCat(..),
tS, tC, tL, tI, tF, tV, tM, tInt,
prCFTok,
-- * Function names and profiles
CFFun(..), Profile,
wordsCFTok,
-- * CF Functions
mkCFFun, varCFFun, consCFFun, string2CFFun, stringCFFun,
intCFFun, floatCFFun, dummyCFFun,
cfFun2String, cfFun2Ident, cfFun2Profile, metaCFFun,
-- * CF Categories
mkCIdent, ident2CFCat, labels2CFCat, string2CFCat,
catVarCF, cat2CFCat, cfCatString, cfCatInt,cfCatFloat,
moduleOfCFCat, cfCat2Cat, cfCat2Ident, lexCFCat,
-- * CF Tokens
string2CFTok, str2cftoks,
-- * Comparisons
compatToks, compatTok, compatCFFun, compatCF,
wordsLits
) where
import GF.Data.Operations
import GF.Canon.GFC
import GF.Infra.Ident
import GF.Grammar.Values (cPredefAbs)
import GF.Canon.AbsGFC
import GF.Grammar.Macros (ident2label)
import GF.Grammar.PrGrammar
import GF.Data.Str
import Data.Char (toLower, toUpper, isSpace)
import Data.List (intersperse)
-- | this type should be abstract
data CFTok =
TS String -- ^ normal strings
| TC String -- ^ strings that are ambiguous between upper or lower case
| TL String -- ^ string literals
| TI Integer -- ^ integer literals
| TF Double -- ^ float literals
| TV Ident -- ^ variables
| TM Int String -- ^ metavariables; the integer identifies it
deriving (Eq, Ord, Show)
-- | this type should be abstract
newtype CFCat = CFCat (CIdent,Label) deriving (Eq, Ord, Show)
tS :: String -> CFTok
tC :: String -> CFTok
tL :: String -> CFTok
tI :: String -> CFTok
tF :: String -> CFTok
tV :: String -> CFTok
tM :: String -> CFTok
tS = TS
tC = TC
tL = TL
tI = TI . read
tF = TF . read
tV = TV . identC
tM = TM 0
tInt :: Integer -> CFTok
tInt = TI
prCFTok :: CFTok -> String
prCFTok t = case t of
TS s -> s
TC s -> s
TL s -> s
TI i -> show i
TF i -> show i
TV x -> prt x
TM i m -> m --- "?" --- m
-- | to build trees: the Atom contains a GF function, @Cn | Meta | Vr | Literal@
newtype CFFun = CFFun (Atom, Profile) deriving (Eq,Ord,Show)
-- - - - - - - - - - - - - - - - - - - - - ^^^ added by peb, 21/5-04
type Profile = [([[Int]],[Int])]
wordsCFTok :: CFTok -> [String]
wordsCFTok t = case t of
TC (c:cs) -> [c':cs | c' <- [toUpper c, toLower c]]
_ -> [prCFTok t]
-- the following functions should be used instead of constructors
-- to construct CF functions
mkCFFun :: Atom -> CFFun
mkCFFun t = CFFun (t,[])
varCFFun :: Ident -> CFFun
varCFFun = mkCFFun . AV
consCFFun :: CIdent -> CFFun
consCFFun = mkCFFun . AC
-- | standard way of making cf fun
string2CFFun :: String -> String -> CFFun
string2CFFun m c = consCFFun $ mkCIdent m c
stringCFFun :: String -> CFFun
stringCFFun = mkCFFun . AS
intCFFun :: Integer -> CFFun
intCFFun = mkCFFun . AI
floatCFFun :: Double -> CFFun
floatCFFun = mkCFFun . AF
-- | used in lexer-by-need rules
dummyCFFun :: CFFun
dummyCFFun = varCFFun $ identC "_"
cfFun2String :: CFFun -> String
cfFun2String (CFFun (f,_)) = prt f
cfFun2Ident :: CFFun -> Ident
cfFun2Ident (CFFun (f,_)) = identC $ prt_ f ---
cfFun2Profile :: CFFun -> Profile
cfFun2Profile (CFFun (_,p)) = p
{- ----
strPro2cfFun :: String -> Profile -> CFFun
strPro2cfFun str p = (CFFun (AC (Ident str), p))
-}
metaCFFun :: CFFun
metaCFFun = mkCFFun $ AM 0
-- to construct CF categories
-- | belongs elsewhere
mkCIdent :: String -> String -> CIdent
mkCIdent m c = CIQ (identC m) (identC c)
ident2CFCat :: CIdent -> Ident -> CFCat
ident2CFCat mc d = CFCat (mc, L d)
labels2CFCat :: CIdent -> [Label] -> CFCat
labels2CFCat mc d = CFCat (mc, L (identC (concat (intersperse "." (map prt d))))) ----
-- | standard way of making cf cat: label s
string2CFCat :: String -> String -> CFCat
string2CFCat m c = ident2CFCat (mkCIdent m c) (identC "s")
idents2CFCat :: Ident -> Ident -> CFCat
idents2CFCat m c = ident2CFCat (CIQ m c) (identC "s")
catVarCF :: CFCat
catVarCF = ident2CFCat (mkCIdent "_" "#Var") (identC "_") ----
cat2CFCat :: (Ident,Ident) -> CFCat
cat2CFCat = uncurry idents2CFCat
-- | literals
cfCatString :: CFCat
cfCatString = string2CFCat (prt cPredefAbs) "String"
cfCatInt, cfCatFloat :: CFCat
cfCatInt = string2CFCat (prt cPredefAbs) "Int"
cfCatFloat = string2CFCat (prt cPredefAbs) "Float"
{- ----
uCFCat :: CFCat
uCFCat = cat2CFCat uCat
-}
moduleOfCFCat :: CFCat -> Ident
moduleOfCFCat (CFCat (CIQ m _, _)) = m
-- | the opposite direction
cfCat2Cat :: CFCat -> (Ident,Ident)
cfCat2Cat (CFCat (CIQ m c,_)) = (m,c)
cfCat2Ident :: CFCat -> Ident
cfCat2Ident = snd . cfCat2Cat
lexCFCat :: CFCat -> CFCat
lexCFCat cat = ident2CFCat (uncurry CIQ (cfCat2Cat cat)) (identC "*")
-- to construct CF tokens
string2CFTok :: String -> CFTok
string2CFTok = tS
str2cftoks :: Str -> [CFTok]
str2cftoks = map tS . wordsLits . sstr
-- decide if two token lists look the same (in parser postprocessing)
compatToks :: [CFTok] -> [CFTok] -> Bool
compatToks ts us = and [compatTok t u | (t,u) <- zip ts us]
compatTok :: CFTok -> CFTok -> Bool
compatTok (TM _ _) _ = True --- hack because metas are renamed
compatTok _ (TM _ _) = True
compatTok t u = any (`elem` (alts t)) (alts u) where
alts u = case u of
TC (c:s) -> [toLower c : s, toUpper c : s]
TL s -> [s, prQuotedString s]
_ -> [prCFTok u]
-- | decide if two CFFuns have the same function head (profiles may differ)
compatCFFun :: CFFun -> CFFun -> Bool
compatCFFun (CFFun (f,_)) (CFFun (g,_)) = f == g
-- | decide whether two categories match
-- the modifiers can be from different modules, but on the same extension
-- path, so there is no clash, and they can be safely ignored ---
compatCF :: CFCat -> CFCat -> Bool
----compatCF = (==)
compatCF (CFCat (CIQ _ c, l)) (CFCat (CIQ _ c', l')) = c==c' && l==l'
-- | Like 'words', but does not split on whitespace inside
-- double quotes.wordsLits :: String -> [String]
-- Also treats escaped quotes in quotes (AR 21/12/2005) by breaks
-- instead of break
wordsLits [] = []
wordsLits (c:cs) | isSpace c = wordsLits (dropWhile isSpace cs)
| c == '\'' || c == '"'
= let (l,rs) = breaks (==c) cs
rs' = drop 1 rs
in ([c]++l++[c]):wordsLits rs'
| otherwise = let (w,rs) = break isSpace cs
in (c:w):wordsLits rs
where
breaks c cs = case break c cs of
(l@(_:_),d:rs) | last l == '\\' ->
let (r,ts) = breaks c rs in (l++[d]++r, ts)
v -> v
|