summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2007-10-08 14:20:37 +0000
committeraarne <aarne@cs.chalmers.se>2007-10-08 14:20:37 +0000
commit38a517aafa380a58e5398b71904b054ddefa4555 (patch)
tree8696014a9fa0e4160dba7b309d5d4e765cc85b5e /src
parent5a881a90dde265720cf2d537787acfc842d0475d (diff)
UTF8 encoding of strings in terms in GFCC in gfc
Diffstat (limited to 'src')
-rw-r--r--src/GF/Devel/GrammarToGFCC.hs26
-rw-r--r--src/GF/GFCC/DataGFCC.hs31
-rw-r--r--src/GF/GFCC/Macros.hs8
3 files changed, 47 insertions, 18 deletions
diff --git a/src/GF/Devel/GrammarToGFCC.hs b/src/GF/Devel/GrammarToGFCC.hs
index d939f06ab..4fe2e6e0d 100644
--- a/src/GF/Devel/GrammarToGFCC.hs
+++ b/src/GF/Devel/GrammarToGFCC.hs
@@ -32,7 +32,7 @@ prGrammar2gfcc opts cnc gr = (abs, D.printGFCC gc) where
mkCanon2gfcc :: Options -> String -> SourceGrammar -> (String,D.GFCC)
mkCanon2gfcc opts cnc gr =
- (prIdent abs, (canon2gfcc opts . reorder abs . utf8Conv . canon2canon abs) gr)
+ (prIdent abs, (canon2gfcc opts . reorder abs . canon2canon abs) gr)
where
abs = err error id $ M.abstractOfConcrete gr (identC cnc)
@@ -69,12 +69,14 @@ canon2gfcc opts cgr@(M.MGrammar ((a,M.ModMod abm):cms)) =
js = tree2list (M.jments mo)
flags = Map.fromList [(C.CId f,x) | Opt (f,[x]) <- M.flags mo]
opers = Map.fromAscList [] -- opers will be created as optimization
+ utf = if elem (Opt ("coding",["utf8"])) (M.flags mo)
+ then D.convertStringsInTerm decodeUTF8 else id
lins = Map.fromAscList
- [(i2i f, mkTerm tr) | (f,CncFun _ (Yes tr) _) <- js]
+ [(i2i f, utf (mkTerm tr)) | (f,CncFun _ (Yes tr) _) <- js]
lincats = Map.fromAscList
[(i2i c, mkCType ty) | (c,CncCat (Yes ty) _ _) <- js]
lindefs = Map.fromAscList
- [(i2i c, mkTerm tr) | (c,CncCat _ (Yes tr) _) <- js]
+ [(i2i c, mkTerm tr) | (c,CncCat _ (Yes tr) _) <- js]
printnames = Map.union
(Map.fromAscList [(i2i f, mkTerm tr) | (f,CncFun _ _ (Yes tr)) <- js])
(Map.fromAscList [(i2i f, mkTerm tr) | (f,CncCat _ _ (Yes tr)) <- js])
@@ -95,7 +97,11 @@ mkExp t = case t of
mkAt c = case c of
Q _ c -> C.AC $ i2i c
QC _ c -> C.AC $ i2i c
+ Vr x -> C.AV $ i2i x
EInt i -> C.AI i
+ EFloat f -> C.AF f
+ K s -> C.AS s
+ Meta (MetaSymb i) -> C.AM $ toInteger i
_ -> C.AM 0
mkPatt p = uncurry CM.tree $ case p of
A.PP _ c ps -> (C.AC (i2i c), map mkPatt ps)
@@ -182,20 +188,6 @@ repartition abs cg = [M.partOfGrammar cg (lang,mo) |
let mo = errVal
(error ("no module found for " ++ A.prt lang)) $ M.lookupModule cg lang
]
-
--- convert to UTF8 if not yet converted
-utf8Conv :: SourceGrammar -> SourceGrammar
-utf8Conv = M.MGrammar . map toUTF8 . M.modules where
- toUTF8 mo = case mo of
- (i, M.ModMod m)
- ----- | hasFlagCanon (flagCanon "coding" "utf8") mo -> mo
- | otherwise -> (i, M.ModMod $
- m{ M.jments = M.jments m -----
------ mapTree (onSnd (mapInfoTerms (onTokens encodeUTF8))) (M.jments m),
- ----- M.flags = setFlag "coding" "utf8" (M.flags m)
- }
- )
- _ -> mo
-- translate tables and records to arrays, parameters and labels to indices
diff --git a/src/GF/GFCC/DataGFCC.hs b/src/GF/GFCC/DataGFCC.hs
index e2e5a4efe..3d6cca3cc 100644
--- a/src/GF/GFCC/DataGFCC.hs
+++ b/src/GF/GFCC/DataGFCC.hs
@@ -3,6 +3,8 @@ module GF.GFCC.DataGFCC where
import GF.GFCC.AbsGFCC
import GF.GFCC.PrintGFCC
import GF.Infra.CompactPrint
+import GF.Text.UTF8
+
import Data.Map
import Data.List
@@ -71,7 +73,7 @@ mkGFCC (Grm a cs ab@(Abs afls fs cts) ccs) = GFCC {
-- convert internal GFCC and pretty-print it
printGFCC :: GFCC -> String
-printGFCC gfcc = compactPrintGFCC $ printTree $ Grm
+printGFCC gfcc0 = compactPrintGFCC $ printTree $ Grm
(absname gfcc)
(cncnames gfcc)
(Abs
@@ -88,9 +90,36 @@ printGFCC gfcc = compactPrintGFCC $ printTree $ Grm
[Lin f v | (f,v) <- assocs (lincats cnc)]
[Lin f v | (f,v) <- assocs (lindefs cnc)]
[Lin f v | (f,v) <- assocs (printnames cnc)]
+ gfcc = utf8GFCC gfcc0
-- default map and filter are for Map here
lmap = Prelude.map
lfilter = Prelude.filter
+mmap = Data.Map.map
+
+-- encode idenfifiers and strings in UTF8
+
+utf8GFCC :: GFCC -> GFCC
+utf8GFCC gfcc = gfcc {
+ concretes = mmap u8concr (concretes gfcc)
+ }
+ where
+ u8concr cnc = cnc {
+ lins = mmap u8term (lins cnc),
+ opers = mmap u8term (opers cnc)
+ }
+ u8term = convertStringsInTerm encodeUTF8
+
+---- TODO: convert identifiers and flags
+convertStringsInTerm conv t = case t of
+ K (KS s) -> K (KS (conv s))
+ W s r -> W (conv s) (convs r)
+ R ts -> R $ lmap convs ts
+ S ts -> S $ lmap convs ts
+ FV ts -> FV $ lmap convs ts
+ P u v -> P (convs u) (convs v)
+ _ -> t
+ where
+ convs = convertStringsInTerm conv
diff --git a/src/GF/GFCC/Macros.hs b/src/GF/GFCC/Macros.hs
index 2fe6770f1..a44250e98 100644
--- a/src/GF/GFCC/Macros.hs
+++ b/src/GF/GFCC/Macros.hs
@@ -24,6 +24,14 @@ lookType :: GFCC -> CId -> Type
lookType gfcc f =
fst $ lookMap (error $ "lookType " ++ show f) f (funs (abstract gfcc))
+lookAbsFlag :: GFCC -> CId -> String
+lookAbsFlag gfcc f =
+ lookMap (error $ "lookAbsFlag " ++ show f) f (aflags (abstract gfcc))
+
+lookFlag :: GFCC -> CId -> CId -> String
+lookFlag gfcc lang fun =
+ lookMap "?" fun $ flags $ lookMap (error "no lang") lang $ concretes gfcc
+
functionsToCat :: GFCC -> CId -> [(CId,Type)]
functionsToCat gfcc cat =
[(f,ty) | f <- fs, Just (ty,_) <- [Data.Map.lookup f $ funs $ abstract gfcc]]