summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorbringert <unknown>2005-03-08 14:31:22 +0000
committerbringert <unknown>2005-03-08 14:31:22 +0000
commit7194efcea85e6a1d16b35fd2ad268dfc675a1b24 (patch)
tree4394b0668240b8e4d92d62d2d4a80313015033b8 /src
parenta4b8921544c5e809f4f664d2fa19bb8257fa77bf (diff)
Finished UTF8 identifier conversion. Will probably redo it at string level, i.e. after pretty-printing instead
Diffstat (limited to 'src')
-rw-r--r--src/GF/API.hs36
-rw-r--r--src/GF/Canon/CMacros.hs42
-rw-r--r--src/GF/Canon/GFC.hs155
-rw-r--r--src/GF/Infra/Option.hs9
-rw-r--r--src/GF/Shell/ShellCommands.hs8
-rw-r--r--src/HelpFile3
6 files changed, 194 insertions, 59 deletions
diff --git a/src/GF/API.hs b/src/GF/API.hs
index 4ab40b180..2fc562c1c 100644
--- a/src/GF/API.hs
+++ b/src/GF/API.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/02/18 19:21:06 $
--- > CVS $Author: peb $
--- > CVS $Revision: 1.27 $
+-- > CVS $Date: 2005/03/08 15:31:22 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.28 $
--
-- Application Programmer's Interface to GF; also used by Shell. AR 10/11/2001
-----------------------------------------------------------------------------
@@ -298,17 +298,20 @@ optPrintGrammar :: Options -> StateGrammar -> String
optPrintGrammar opts = customOrDefault opts grammarPrinter customGrammarPrinter
optPrintMultiGrammar :: Options -> CanonGrammar -> String
-optPrintMultiGrammar opts = pmg . encode
+optPrintMultiGrammar opts = pmg . encodeId . encode
where
pmg = customOrDefault opts grammarPrinter customMultiGrammarPrinter
-- if -utf8 was given, convert from language specific codings
encode = if oElem useUTF8 opts then mapModules moduleToUTF8 else id
+ -- if -utf8id was given, convert identifiers to UTF8
+ encodeId = if oElem useUTF8id opts then grammarIdentsToUTF8 else id
moduleToUTF8 m =
m{ jments = mapTree (onSnd (mapInfoTerms code)) (jments m),
flags = setFlag "coding" "utf8" (flags m) }
where code = onTokens (anyCodingToUTF8 (moduleOpts m))
moduleOpts = Opts . okError . mapM CG.redFlag . flags
-
+ grammarIdentsToUTF8 mgr
+ = MGrammar [ (identToUTF8 i, mapIdents identToUTF8 mi) | (i,mi) <- modules mgr]
optPrintSyntax :: Options -> GF.Grammar -> String
optPrintSyntax opts = customOrDefault opts grammarPrinter customSyntaxPrinter
@@ -366,3 +369,26 @@ optDecodeUTF8 gr = case getOptVal (stateOptions gr) uniCoding of
anyCodingToUTF8 :: Options -> String -> String
anyCodingToUTF8 opts =
encodeUTF8 . customOrDefault opts uniCoding customUniCoding
+
+{-
+-- | Convert all text not inside double quotes to UTF8
+nonLiteralsToUTF8 :: String -> String
+nonLiteralsToUTF8 "" = ""
+nonLiteralsToUTF8 ('"':cs) = '"' : l ++ nonLiteralsToUTF8 rs
+ where (l,rs) = takeStringLit cs
+nonLiteralsToUTF8 (c:cs) = encodeUTF8 [c] : nonLiteralsToUTF8 cs
+ where
+ -- | Split off an initial string ended by double quotes
+ takeStringLit :: String -> (String,String)
+ takeStringLit "" = ("","")
+ takeStringLit
+-}
+
+-- | Convert an identifier in unicode to UTF8 encoding
+identToUTF8 :: I.Ident -> I.Ident
+identToUTF8 i = case i of
+ I.IC s -> I.IC (encodeUTF8 s)
+ I.IW -> I.IW
+ I.IV (i,s) -> I.IV (i, encodeUTF8 s)
+ I.IA (s,i) -> I.IA (encodeUTF8 s, i)
+ I.IAV (s,i1,i2) -> I.IAV (encodeUTF8 s, i2, i2)
diff --git a/src/GF/Canon/CMacros.hs b/src/GF/Canon/CMacros.hs
index ea4513a02..a097c4926 100644
--- a/src/GF/Canon/CMacros.hs
+++ b/src/GF/Canon/CMacros.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/03/07 17:50:00 $
+-- > CVS $Date: 2005/03/08 15:31:22 $
-- > CVS $Author: bringert $
--- > CVS $Revision: 1.22 $
+-- > CVS $Revision: 1.23 $
--
-- Macros for building and analysing terms in GFC concrete syntax.
--
@@ -246,44 +246,6 @@ onTokens f t = case t of
K (KP ss vs) -> K (KP (map f ss) [Var (map f x) (map f y) | Var x y <- vs])
_ -> composSafeOp (onTokens f) t
--- | Apply some function to all identifiers in a GFC term
-onTermIdents :: (Ident -> Ident) -> Term -> Term
-onTermIdents f t = case t of
- Arg av -> Arg $ case av of
- A i x -> A (f i) x
- AB i x y -> AB (f i) x y
- I ci -> I (fc ci)
- Con ci ts -> Con (fc ci) (map (onTermIdents f) ts)
- LI i -> LI (f i)
- R as -> R [Ass (fl l) (onTermIdents f t) | Ass l t <- as]
- P t l -> P (onTermIdents f t) (fl l)
- T ct cs -> T (fct ct) [Cas (map fp ps) (onTermIdents f t) | Cas ps t <- cs]
- V ct ts -> V (fct ct) (map (onTermIdents f) ts)
- S t1 t2 -> S (onTermIdents f t1) (onTermIdents f t2)
- C t1 t2 -> C (onTermIdents f t1) (onTermIdents f t2)
- FV ts -> FV (map (onTermIdents f) ts)
- _ -> t
- where
- fc :: CIdent -> CIdent
- fc (CIQ i1 i2) = CIQ (f i1) (f i2)
- fl :: Label -> Label
- fl l = case l of
- L i -> L (f i)
- _ -> l
- fct :: CType -> CType
- fct ct = case ct of
- RecType ls -> RecType [ Lbg (fl l) (fct ct) | Lbg l ct <- ls ]
- Table t1 t2 -> Table (fct t1) (fct t2)
- Cn ci -> Cn (fc ci)
- _ -> ct
- fp :: Patt -> Patt
- fp p = case p of
- PC ci ps -> PC (fc ci) (map fp ps)
- PV i -> PV (f i)
- PR ps -> PR [PAss (fl l) (fp p) | PAss l p <- ps]
- _ -> p
-
-
-- | to define compositional term functions
composSafeOp :: (Term -> Term) -> Term -> Term
composSafeOp op trm = case composOp (mkMonadic op) trm of
diff --git a/src/GF/Canon/GFC.hs b/src/GF/Canon/GFC.hs
index a777f4b76..c29e77c73 100644
--- a/src/GF/Canon/GFC.hs
+++ b/src/GF/Canon/GFC.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/03/04 14:08:36 $
+-- > CVS $Date: 2005/03/08 15:31:22 $
-- > CVS $Author: bringert $
--- > CVS $Revision: 1.7 $
+-- > CVS $Revision: 1.8 $
--
-- canonical GF. AR 10\/9\/2002 -- 9\/5\/2003 -- 21\/9
-----------------------------------------------------------------------------
@@ -20,7 +20,8 @@ module GFC (Context,
Info(..),
Printname,
mapInfoTerms,
- setFlag
+ setFlag,
+ mapIdents
) where
import AbsGFC
@@ -34,6 +35,7 @@ import Operations
import qualified Modules as M
import Char
+import Control.Arrow (first)
type Context = [(Ident,Exp)]
@@ -45,7 +47,7 @@ type CanonModule = (Ident, CanonModInfo)
type CanonAbs = M.Module Ident Option Info
-data Info =
+data Info =
AbsCat A.Context [A.Fun]
| AbsFun A.Type A.Term
| AbsTrans A.Term
@@ -67,4 +69,147 @@ mapInfoTerms f i = case i of
_ -> i
setFlag :: String -> String -> [Flag] -> [Flag]
-setFlag n v fs = Flg (IC n) (IC v):[f | f@(Flg (IC n') _) <- fs, n' /= n] \ No newline at end of file
+setFlag n v fs = Flg (IC n) (IC v):[f | f@(Flg (IC n') _) <- fs, n' /= n]
+
+-- | Apply a function to all identifiers in a module
+mapIdents :: (Ident -> Ident) -> M.ModInfo Ident Flag Info -> M.ModInfo Ident Flag Info
+mapIdents f mi = case mi of
+ M.ModMainGrammar mg -> M.ModMainGrammar (fmg mg)
+ M.ModMod m -> M.ModMod (fm m)
+ M.ModWith mt s i is oss -> M.ModWith (fmt mt) s (f i) (map f is) (map fos oss)
+ where
+ fmg :: M.MainGrammar Ident -> M.MainGrammar Ident
+ fmg (M.MainGrammar i mcs) = M.MainGrammar (f i) (map fmc mcs)
+ fmc :: M.MainConcreteSpec Ident -> M.MainConcreteSpec Ident
+ fmc (M.MainConcreteSpec i1 i2 mos1 mos2)
+ = M.MainConcreteSpec (f i1) (f i2) (fmap fos mos1) (fmap fos mos2)
+ fos :: M.OpenSpec Ident -> M.OpenSpec Ident
+ fos os = case os of
+ M.OSimple q i -> M.OSimple q (f i)
+ M.OQualif q i1 i2 -> M.OQualif q (f i1) (f i2)
+ fm :: M.Module Ident Flag Info -> M.Module Ident Flag Info
+ fm m@(M.Module{ M.mtype = mt, M.flags = fl, M.extends = ex,
+ M.opens = os, M.jments = js}) =
+ m { M.mtype = fmt mt, M.flags = map ffl fl, M.extends = map f ex,
+ M.opens = map fos os,
+ M.jments = mapTree (\(i,t) -> (f i, fi t)) js }
+ fmt :: M.ModuleType Ident -> M.ModuleType Ident
+ fmt t = case t of
+ M.MTTransfer os1 os2 -> M.MTTransfer (fos os1) (fos os2)
+ M.MTConcrete i -> M.MTConcrete (f i)
+ M.MTInstance i -> M.MTInstance (f i)
+ M.MTReuse rt -> M.MTReuse (frt rt)
+ M.MTUnion mt ms -> M.MTUnion (fmt mt) [(f i, map f is) | (i,is) <- ms]
+ _ -> t
+ frt :: M.MReuseType Ident -> M.MReuseType Ident
+ frt rt = case rt of
+ M.MRInterface i -> M.MRInterface (f i)
+ M.MRInstance i1 i2 -> M.MRInstance (f i1) (f i2)
+ M.MRResource i -> M.MRResource (f i)
+ ffl :: Flag -> Flag
+ ffl (Flg i1 i2) = Flg (f i1) (f i2)
+ fi :: Info -> Info
+ fi info = case info of
+ AbsCat ds fs -> AbsCat ds fs -- FIXME: convert idents here too
+ AbsFun ty te -> AbsFun ty te -- FIXME: convert idents here too
+ AbsTrans te -> AbsTrans te -- FIXME: convert idents here too
+ ResPar ps -> ResPar [ParD (f i) (map fct cts) | ParD i cts <- ps]
+ ResOper ct t -> ResOper (fct ct) (ft t)
+ CncCat ct t pn -> CncCat (fct ct) (ft t) (ft pn)
+ CncFun ci avs t pn -> CncFun (fc ci) (map fav avs) (ft t) (ft pn)
+ AnyInd b i -> AnyInd b (f i)
+ fqi :: A.QIdent -> A.QIdent
+ fqi (i1,i2) = (f i1, f i2)
+ fc :: CIdent -> CIdent
+ fc (CIQ i1 i2) = CIQ (f i1) (f i2)
+ fl :: Label -> Label
+ fl l = case l of
+ L i -> L (f i)
+ _ -> l
+ fct :: CType -> CType
+ fct ct = case ct of
+ RecType ls -> RecType [ Lbg (fl l) (fct ct) | Lbg l ct <- ls ]
+ Table t1 t2 -> Table (fct t1) (fct t2)
+ Cn ci -> Cn (fc ci)
+ _ -> ct
+ fp :: Patt -> Patt
+ fp p = case p of
+ PC ci ps -> PC (fc ci) (map fp ps)
+ PV i -> PV (f i)
+ PR ps -> PR [PAss (fl l) (fp p) | PAss l p <- ps]
+ _ -> p
+ ft :: Term -> Term
+ ft t = case t of
+ Arg av -> Arg (fav av)
+ I ci -> I (fc ci)
+ Con ci ts -> Con (fc ci) (map ft ts)
+ LI i -> LI (f i)
+ R as -> R [Ass (fl l) (ft t) | Ass l t <- as]
+ P t l -> P (ft t) (fl l)
+ T ct cs -> T (fct ct) [Cas (map fp ps) (ft t) | Cas ps t <- cs]
+ V ct ts -> V (fct ct) (map ft ts)
+ S t1 t2 -> S (ft t1) (ft t2)
+ C t1 t2 -> C (ft t1) (ft t2)
+ FV ts -> FV (map ft ts)
+ _ -> t
+ fav :: ArgVar -> ArgVar
+ fav av = case av of
+ A i x -> A (f i) x
+ AB i x y -> AB (f i) x y
+{-
+ fat :: A.Term -> A.Term
+ fat t = case t of
+ A.Vr i -> A.Vr (f i)
+ A.Cn i -> A.Cn (f i)
+ A.Con i -> A.Con (f i)
+ A.App t1 t2 -> A.App (fat t1) (fat t2)
+ A.Abs i t' -> A.Abs (f i) (fat t')
+ A.Prod i t1 t2 -> A.Prod (f i) (fat t1) (fat t2)
+ A.Eqs eqs -> A.Eqs [(, fat t) | (ps,t) <- eqs ]
+ | Eqs [([Patt],Term)]
+
+ -- only used in internal representation
+ | Typed Term Term -- ^ type-annotated term
+--
+-- /below this, the constructors are only for concrete syntax/
+ | RecType [Labelling] -- ^ record type: @{ p : A ; ...}@
+ | R [Assign] -- ^ record: @{ p = a ; ...}@
+ | P Term Label -- ^ projection: @r.p@
+ | ExtR Term Term -- ^ extension: @R ** {x : A}@ (both types and terms)
+
+ | Table Term Term -- ^ table type: @P => A@
+ | T TInfo [Case] -- ^ table: @table {p => c ; ...}@
+ | TSh TInfo [Cases] -- ^ table with discjunctive patters (only back end opt)
+ | V Type [Term] -- ^ table given as course of values: @table T [c1 ; ... ; cn]@
+ | S Term Term -- ^ selection: @t ! p@
+
+ | Let LocalDef Term -- ^ local definition: @let {t : T = a} in b@
+
+ | Alias Ident Type Term -- ^ constant and its definition, used in inlining
+
+ | Q Ident Ident -- ^ qualified constant from a package
+ | QC Ident Ident -- ^ qualified constructor from a package
+
+ | C Term Term -- ^ concatenation: @s ++ t@
+ | Glue Term Term -- ^ agglutination: @s + t@
+
+ | FV [Term] -- ^ alternatives in free variation: @variants { s ; ... }@
+
+ | Alts (Term, [(Term, Term)]) -- ^ alternatives by prefix: @pre {t ; s\/c ; ...}@
+ | Strs [Term] -- ^ conditioning prefix strings: @strs {s ; ...}@
+--
+-- /below this, the last three constructors are obsolete/
+ | LiT Ident -- ^ linearization type
+ | Ready Str -- ^ result of compiling; not to be parsed ...
+ | Computed Term -- ^ result of computing: not to be reopened nor parsed
+
+ _ -> t
+
+ fp :: A.Patt -> A.Patt
+ fp p = case p of
+ A.PC Ident [Patt]
+ A.PP Ident Ident [Patt]
+ A.PV Ident
+ A.PR [(Label,Patt)]
+ A.PT Type Patt
+-} \ No newline at end of file
diff --git a/src/GF/Infra/Option.hs b/src/GF/Infra/Option.hs
index 8ee49b68d..a1a4e3468 100644
--- a/src/GF/Infra/Option.hs
+++ b/src/GF/Infra/Option.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/02/25 15:35:48 $
--- > CVS $Author: aarne $
--- > CVS $Revision: 1.21 $
+-- > CVS $Date: 2005/03/08 15:31:22 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.22 $
--
-- Options and flags used in GF shell commands and files.
--
@@ -151,7 +151,7 @@ dontParse = iOpt "read"
showAbstr, showXML, showOld, showLatex, showFullForm,
showEBNF, showCF, showWords, showOpts,
isCompiled, isHaskell, noCompOpers, retainOpers,
- newParser, noCF, checkCirc, noCheckCirc, lexerByNeed :: Option
+ newParser, noCF, checkCirc, noCheckCirc, lexerByNeed, useUTF8id :: Option
defaultGrOpts :: [Option]
showAbstr = iOpt "abs"
@@ -174,6 +174,7 @@ noCF = iOpt "nocf"
checkCirc = iOpt "nocirc"
noCheckCirc = iOpt "nocheckcirc"
lexerByNeed = iOpt "cflexer"
+useUTF8id = iOpt "utf8id"
-- ** linearization
diff --git a/src/GF/Shell/ShellCommands.hs b/src/GF/Shell/ShellCommands.hs
index 89aae2d06..d60849e90 100644
--- a/src/GF/Shell/ShellCommands.hs
+++ b/src/GF/Shell/ShellCommands.hs
@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
--- > CVS $Date: 2005/02/25 15:35:48 $
--- > CVS $Author: aarne $
--- > CVS $Revision: 1.24 $
+-- > CVS $Date: 2005/03/08 15:31:22 $
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.25 $
--
-- The datatype of shell commands and the list of their options.
-----------------------------------------------------------------------------
@@ -183,7 +183,7 @@ optionsOfCommand co = case co of
CSystemCommand _ -> none
CPrintGrammar -> both "utf8" "printer lang"
- CPrintMultiGrammar -> both "utf8" "printer"
+ CPrintMultiGrammar -> both "utf8 utf8id" "printer"
CPrintSourceGrammar -> both "utf8" "printer"
CHelp _ -> opts "all filter length lexer unlexer printer transform depth number"
diff --git a/src/HelpFile b/src/HelpFile
index 4283164df..22e697da6 100644
--- a/src/HelpFile
+++ b/src/HelpFile
@@ -81,7 +81,8 @@ pm, print_multigrammar: pm
Prints the current multilingual grammar in .gfcm form.
(Automatically executes the strip command (s) before doing this.)
options:
- -utf8 apply UTF8-encoding to the grammar
+ -utf8 apply UTF8 encoding to the tokens in the grammar
+ -utf8id apply UTF8 encoding to the identifiers in the grammar
-graph print module dependency graph in 'dot' format
examples:
pm | wf Letter.gfcm -- print the grammar into the file Letter.gfcm