summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2007-09-19 13:49:12 +0000
committeraarne <aarne@cs.chalmers.se>2007-09-19 13:49:12 +0000
commit7825a1a1c54d6922c709bed7de3f23f049e0046e (patch)
tree9eedd42bf67ae46b8f233234064c1edce133587c /src
parenta886f7042b2ad16fa3ef120548df9773ffafbb45 (diff)
adapted GFCC2FCFG to other uses of GFCC, made it to default parser
Diffstat (limited to 'src')
-rw-r--r--src/GF/Canon/CanonToGFCC.hs69
-rw-r--r--src/GF/Canon/CanonToJS.hs7
-rw-r--r--src/GF/Canon/GFCC/AbsGFCC.hs11
-rw-r--r--src/GF/Canon/GFCC/DataGFCC.hs36
-rw-r--r--src/GF/Canon/GFCC/PrintGFCC.hs13
-rw-r--r--src/GF/Compile/GrammarToGFCC.hs10
-rw-r--r--src/GF/Compile/ShellState.hs2
-rw-r--r--src/GF/Conversion/SimpleToFCFG.hs19
8 files changed, 97 insertions, 70 deletions
diff --git a/src/GF/Canon/CanonToGFCC.hs b/src/GF/Canon/CanonToGFCC.hs
index 65b591acd..3898180c1 100644
--- a/src/GF/Canon/CanonToGFCC.hs
+++ b/src/GF/Canon/CanonToGFCC.hs
@@ -12,7 +12,8 @@
-- GFC to GFCC compiler. AR Aug-Oct 2006
-----------------------------------------------------------------------------
-module GF.Canon.CanonToGFCC (prCanon2gfcc, mkCanon2gfcc) where
+module GF.Canon.CanonToGFCC (
+ prCanon2gfcc, mkCanon2gfcc, mkCanon2gfccNoUTF8) where
import GF.Canon.AbsGFC
import qualified GF.Canon.GFC as GFC
@@ -43,9 +44,14 @@ import Debug.Trace ----
prCanon2gfcc :: CanonGrammar -> String
prCanon2gfcc = Pr.printTree . mkCanon2gfcc
+-- this variant makes utf8 conversion; used in back ends
mkCanon2gfcc :: CanonGrammar -> C.Grammar
mkCanon2gfcc = canon2gfcc . reorder . utf8Conv . canon2canon . normalize
+-- this variant makes no utf8 conversion; used in ShellState
+mkCanon2gfccNoUTF8 :: CanonGrammar -> C.Grammar
+mkCanon2gfccNoUTF8 = canon2gfcc . reorder . canon2canon . normalize
+
-- This is needed to reorganize the grammar. GFCC has its own back-end optimization.
-- But we need to have the canonical order in tables, created by valOpt
normalize :: CanonGrammar -> CanonGrammar
@@ -82,7 +88,7 @@ mkType t = case GM.catSkeleton t of
mkCType :: CType -> C.Term
mkCType t = case t of
- TInts i -> C.C (fromInteger i)
+ TInts i -> C.C $ fromInteger i
-- record parameter alias - created in gfc preprocessing
RecType [Lbg (L (IC "_")) i, Lbg (L (IC "__")) t] -> C.RP (mkCType i) (mkCType t)
RecType rs -> C.R [mkCType t | Lbg _ t <- rs]
@@ -90,13 +96,13 @@ mkCType t = case t of
TStr -> C.S []
where
getI pt = case pt of
- C.C i -> i
+ C.C i -> i
C.RP i _ -> getI i
mkTerm :: Term -> C.Term
mkTerm tr = case tr of
- Arg (A _ i) -> C.V (fromInteger i)
- EInt i -> C.C (fromInteger i)
+ Arg (A _ i) -> C.V $ fromInteger i
+ EInt i -> C.C $ fromInteger i
-- record parameter alias - created in gfc preprocessing
R [Ass (L (IC "_")) i, Ass (L (IC "__")) t] -> C.RP (mkTerm i) (mkTerm t)
-- ordinary record
@@ -111,11 +117,11 @@ mkTerm tr = case tr of
S t p -> C.P (mkTerm t) (mkTerm p)
C s t -> C.S [mkTerm x | x <- [s,t]]
FV ts -> C.FV [mkTerm t | t <- ts]
- K (KS s) -> C.KS s
- K (KP ss _) -> C.KP ss [] ---- TODO: prefix variants
+ K (KS s) -> C.K (C.KS s)
+ K (KP ss _) -> C.K (C.KP ss []) ---- TODO: prefix variants
E -> C.S []
Par _ _ -> prtTrace tr $ C.C 66661 ---- for debugging
- _ -> C.S [C.KS (A.prt tr +++ "66662")] ---- for debugging
+ _ -> C.S [C.K (C.KS (A.prt tr +++ "66662"))] ---- for debugging
where
mkLab (L (IC l)) = case l of
'_':ds -> (read ds) :: Int
@@ -175,7 +181,7 @@ canon2canon :: CanonGrammar -> CanonGrammar
canon2canon = recollect . map cl2cl . repartition where
recollect =
M.MGrammar . nubBy (\ (i,_) (j,_) -> i==j) . concatMap M.modules
- cl2cl cg = tr $ M.MGrammar $ map c2c $ M.modules cg where
+ cl2cl cg = {-tr $-} M.MGrammar $ map c2c $ M.modules cg where
c2c (c,m) = case m of
M.ModMod mo@(M.Module _ _ _ _ _ js) ->
(c, M.ModMod $ M.replaceJudgements mo $ mapTree j2j js)
@@ -406,30 +412,24 @@ optConcrete defs = subex
-- suffix sets can later be shared by subex elim
optTerm :: C.Term -> C.Term
-optTerm tr =
- case tr of
- C.R ts -> mkSuff ts
- C.P t v -> C.P (optTerm t) v
+optTerm tr = case tr of
+ C.R ts@(_:_:_) | all isK ts -> mkSuff $ optToks [s | C.K (C.KS s) <- ts]
+ C.R ts -> C.R $ map optTerm ts
+ C.P t v -> C.P (optTerm t) v
C.L x t -> C.L x (optTerm t)
- tr -> tr
- where
- mkSuff ts@(C.KS s : ts1@(_:_)) =
- case pref s ts1 of
- Nothing -> C.R (map optTerm ts)
- Just "" -> C.R ts
- Just prf -> let len = length prf
- in C.W prf [drop len s | C.KS s <- ts]
- where
- pref cand [] = Just cand
- pref cand (t:ts) =
- case t of
- C.KS s -> pref (getPrefix cand s) ts
- _ -> Nothing
- where
- getPrefix cand s
- | isPrefixOf cand s = cand
- | otherwise = getPrefix (init cand) s
- mkSuff ts = C.R (map optTerm ts)
+ _ -> tr
+ where
+ optToks ss = prf : suffs where
+ prf = pref (head ss) (tail ss)
+ suffs = map (drop (length prf)) ss
+ pref cand ss = case ss of
+ s1:ss2 -> if isPrefixOf cand s1 then pref cand ss2 else pref (init cand) ss
+ _ -> cand
+ isK t = case t of
+ C.K (C.KS _) -> True
+ _ -> False
+ mkSuff ("":ws) = C.R (map (C.K . C.KS) ws)
+ mkSuff (p:ws) = C.W p (C.R (map (C.K . C.KS) ws))
-- common subexpression elimination; see ./Subexpression.hs for the idea
@@ -454,7 +454,7 @@ addSubexpConsts tree lins =
_ -> case t of
C.R ts -> C.R $ map (recomp f) ts
C.S ts -> C.S $ map (recomp f) ts
- C.W s ss -> C.W s ss
+ C.W s t -> C.W s (recomp f t)
C.P t p -> C.P (recomp f t) (recomp f p)
C.RP t p -> C.RP (recomp f t) (recomp f p)
C.L x t -> C.L x (recomp f t)
@@ -483,7 +483,8 @@ collectSubterms t = case t of
C.S ts -> do
mapM collectSubterms ts
add t
- C.W s ts -> do
+ C.W s u -> do
+ collectSubterms u
add t
C.P p u -> do
collectSubterms p
diff --git a/src/GF/Canon/CanonToJS.hs b/src/GF/Canon/CanonToJS.hs
index 7e88a5ef2..beefefa04 100644
--- a/src/GF/Canon/CanonToJS.hs
+++ b/src/GF/Canon/CanonToJS.hs
@@ -53,13 +53,14 @@ term2js l t = f t
C.R xs -> new "Arr" (map f xs)
C.P x y -> JS.ECall (JS.EMember (f x) (JS.Ident "sel")) [f y]
C.S xs -> new "Seq" (map f xs)
- C.KS s -> new "Str" [JS.EStr s]
- C.KP ss vs -> new "Seq" (map JS.EStr ss) -- FIXME
+ C.K (C.KS s) -> new "Str" [JS.EStr s]
+ C.K (C.KP ss vs) -> new "Seq" (map JS.EStr ss) -- FIXME
C.V i -> JS.EIndex (JS.EVar children) (JS.EInt i)
C.C i -> new "Int" [JS.EInt i]
C.F (C.CId f) -> JS.ECall (JS.EMember (JS.EVar l) (JS.Ident "rule")) [JS.EStr f, JS.EVar children]
C.FV xs -> new "Variants" (map f xs)
- C.W str ss -> new "Suffix" (JS.EStr str : map JS.EStr ss)
+ C.W str (C.R r) ->
+ new "Suffix" (JS.EStr str : [JS.EStr s | C.K (C.KS s) <- r])
C.RP x y -> new "Rp" [f x, f y]
C.TM -> new "Meta" []
diff --git a/src/GF/Canon/GFCC/AbsGFCC.hs b/src/GF/Canon/GFCC/AbsGFCC.hs
index ccb964689..aab74f7fb 100644
--- a/src/GF/Canon/GFCC/AbsGFCC.hs
+++ b/src/GF/Canon/GFCC/AbsGFCC.hs
@@ -47,19 +47,24 @@ data Term =
R [Term]
| P Term Term
| S [Term]
- | KS String
- | KP [String] [Variant]
+ | K Tokn
| V Int
| C Int
| F CId
| FV [Term]
- | W String [String]
+ | W String Term
| RP Term Term
| TM
| L CId Term
| BV CId
deriving (Eq,Ord,Show)
+data Tokn =
+ KS String
+ | KP [String] [Variant]
+ deriving (Eq,Ord,Show)
+
data Variant =
Var [String] [String]
deriving (Eq,Ord,Show)
+
diff --git a/src/GF/Canon/GFCC/DataGFCC.hs b/src/GF/Canon/GFCC/DataGFCC.hs
index 746175e29..eabd8b3a3 100644
--- a/src/GF/Canon/GFCC/DataGFCC.hs
+++ b/src/GF/Canon/GFCC/DataGFCC.hs
@@ -46,9 +46,10 @@ realize :: Term -> String
realize trm = case trm of
R ts -> realize (ts !! 0)
S ss -> unwords $ Prelude.map realize ss
- KS s -> s
- KP s _ -> unwords s ---- prefix choice TODO
- W s ss -> s ++ (ss !! 0)
+ K t -> case t of
+ KS s -> s
+ KP s _ -> unwords s ---- prefix choice TODO
+ W s t -> s ++ realize t
FV ts -> realize (ts !! 0) ---- other variants TODO
RP _ r -> realize r
TM -> "?"
@@ -58,9 +59,9 @@ linExp :: GFCC -> CId -> Exp -> Term
linExp mcfg lang tree@(Tr at trees) =
case at of
AC fun -> comp (Prelude.map lin trees) $ look fun
- AS s -> R [KS (show s)] -- quoted
- AI i -> R [KS (show i)]
- AF d -> R [KS (show d)]
+ AS s -> R [kks (show s)] -- quoted
+ AI i -> R [kks (show i)]
+ AF d -> R [kks (show d)]
AM -> TM
where
lin = linExp mcfg lang
@@ -71,17 +72,20 @@ exp0 :: Exp
exp0 = Tr (AS "NO_PARSE") []
term0 :: CId -> Term
-term0 (CId s) = R [KS ("#" ++ s ++ "#")]
+term0 (CId s) = R [kks ("#" ++ s ++ "#")]
+
+kks :: String -> Term
+kks = K . KS
compute :: GFCC -> CId -> [Term] -> Term -> Term
compute mcfg lang args = comp where
comp trm = case trm of
P r p -> proj (comp r) (comp p)
RP i t -> RP (comp i) (comp t)
- W s ss -> W s ss
+ W s t -> W s (comp t)
R ts -> R $ Prelude.map comp ts
- V i -> idx args i -- already computed
- F c -> comp $ look c -- not computed (if contains argvar)
+ V i -> idx args i -- already computed
+ F c -> comp $ look c -- not computed (if contains argvar)
FV ts -> FV $ Prelude.map comp ts
S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts
_ -> trm
@@ -90,9 +94,14 @@ compute mcfg lang args = comp where
idx xs i = if i > length xs - 1 then trace "overrun !!\n" (last xs) else xs !! i
- proj r p = case p of
- FV ts -> FV $ Prelude.map (proj r) ts
- _ -> comp $ getField r (getIndex p)
+ proj r p = case (r,p) of
+ (_, FV ts) -> FV $ Prelude.map (proj r) ts
+ (W s t, _) -> kks (s ++ getString (proj t p))
+ _ -> comp $ getField r (getIndex p)
+
+ getString t = case t of
+ K (KS s) -> s
+ _ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR"
getIndex t = case t of
C i -> i
@@ -102,7 +111,6 @@ compute mcfg lang args = comp where
getField t i = case t of
R rs -> idx rs i
- W s ss -> KS (s ++ idx ss i)
RP _ r -> getField r i
TM -> TM
_ -> trace ("ERROR in grammar compiler: field from " ++ show t) t
diff --git a/src/GF/Canon/GFCC/PrintGFCC.hs b/src/GF/Canon/GFCC/PrintGFCC.hs
index 1ef7cfbe3..05a9246cd 100644
--- a/src/GF/Canon/GFCC/PrintGFCC.hs
+++ b/src/GF/Canon/GFCC/PrintGFCC.hs
@@ -69,11 +69,10 @@ prPrec :: Int -> Int -> Doc -> Doc
prPrec i j = if j<i then parenth else id
-instance Print Int where
+instance Print Integer where
prt _ x = doc (shows x)
-
-instance Print Integer where
+instance Print Int where
prt _ x = doc (shows x)
@@ -157,8 +156,7 @@ instance Print Term where
R terms -> prPrec i 0 (concatD [doc (showString "[") , prt 0 terms , doc (showString "]")])
P term0 term -> prPrec i 0 (concatD [doc (showString "(") , prt 0 term0 , doc (showString "!") , prt 0 term , doc (showString ")")])
S terms -> prPrec i 0 (concatD [doc (showString "(") , prt 0 terms , doc (showString ")")])
- KS str -> prPrec i 0 (concatD [prt 0 str])
- KP strs variants -> prPrec i 0 (concatD [doc (showString "[") , doc (showString "pre") , prt 0 strs , doc (showString "[") , prt 0 variants , doc (showString "]") , doc (showString "]")])
+ K tokn -> prPrec i 0 (concatD [prt 0 tokn])
V n -> prPrec i 0 (concatD [doc (showString "$") , prt 0 n])
C n -> prPrec i 0 (concatD [prt 0 n])
F cid -> prPrec i 0 (concatD [prt 0 cid])
@@ -174,6 +172,11 @@ instance Print Term where
[x] -> (concatD [prt 0 x])
x:xs -> (concatD [prt 0 x , doc (showString ",") , prt 0 xs])
+instance Print Tokn where
+ prt i e = case e of
+ KS str -> prPrec i 0 (concatD [prt 0 str])
+ KP strs variants -> prPrec i 0 (concatD [doc (showString "[") , doc (showString "pre") , prt 0 strs , doc (showString "[") , prt 0 variants , doc (showString "]") , doc (showString "]")])
+
instance Print Variant where
prt i e = case e of
diff --git a/src/GF/Compile/GrammarToGFCC.hs b/src/GF/Compile/GrammarToGFCC.hs
index 6d4401e4f..e1374bad1 100644
--- a/src/GF/Compile/GrammarToGFCC.hs
+++ b/src/GF/Compile/GrammarToGFCC.hs
@@ -74,7 +74,7 @@ mkType t = case GM.catSkeleton t of
mkCType :: Type -> C.Term
mkCType t = case t of
- EInt i -> C.C i
+ EInt i -> C.C $ fromInteger i
-- record parameter alias - created in gfc preprocessing
RecType [(LIdent "_", i), (LIdent "__", t)] -> C.RP (mkCType i) (mkCType t)
RecType rs -> C.R [mkCType t | (_, t) <- rs]
@@ -82,14 +82,14 @@ mkCType t = case t of
_ -> C.S [] ----- TStr
where
getI pt = case pt of
- C.C i -> fromInteger i
+ C.C i -> i
C.RP i _ -> getI i
_ -> 1 -----
mkTerm :: Term -> C.Term
mkTerm tr = case tr of
- Vr (IA (_,i)) -> C.V $ toInteger i
- EInt i -> C.C i
+ Vr (IA (_,i)) -> C.V i
+ EInt i -> C.C $ fromInteger i
-- record parameter alias - created in gfc preprocessing
R [(LIdent "_", (_,i)), (LIdent "__", (_,t))] -> C.RP (mkTerm i) (mkTerm t)
-- ordinary record
@@ -112,7 +112,7 @@ mkTerm tr = case tr of
_ -> C.S [C.K (C.KS (A.prt tr +++ "66662"))] ---- for debugging
where
mkLab (LIdent l) = case l of
- '_':ds -> (read ds) :: Integer
+ '_':ds -> (read ds) :: Int
_ -> prtTrace tr $ 66663
-- return just one module per language
diff --git a/src/GF/Compile/ShellState.hs b/src/GF/Compile/ShellState.hs
index 80956d8ff..57c0d1e88 100644
--- a/src/GF/Compile/ShellState.hs
+++ b/src/GF/Compile/ShellState.hs
@@ -260,7 +260,7 @@ updateShellState opts ign mcnc sh ((_,sgr,gr,eenv),rts) = do
let fromGFC = snd . snd . Cnv.convertGFC opts
(mcfgs, cfgs) = unzip $ map (curry fromGFC cgr) concrs
- fcfgs = FCnv.convertGrammar (C2GFCC.mkCanon2gfcc cgr)
+ fcfgs = FCnv.convertGrammar (C2GFCC.mkCanon2gfccNoUTF8 cgr)
pInfos = zipWith3 Prs.buildPInfo mcfgs (map snd fcfgs) cfgs
diff --git a/src/GF/Conversion/SimpleToFCFG.hs b/src/GF/Conversion/SimpleToFCFG.hs
index 402729341..e0e639800 100644
--- a/src/GF/Conversion/SimpleToFCFG.hs
+++ b/src/GF/Conversion/SimpleToFCFG.hs
@@ -133,14 +133,23 @@ convertTerm cnc_defs selector (FV vars) lins = do term <-
convertTerm cnc_defs selector term lins
convertTerm cnc_defs selector (S ts) ((lbl_path,lin) : lins) = do projectHead lbl_path
foldM (\lins t -> convertTerm cnc_defs selector t lins) ((lbl_path,lin) : lins) (reverse ts)
-convertTerm cnc_defs selector (KS str) ((lbl_path,lin) : lins) = do projectHead lbl_path
- return ((lbl_path,Tok str : lin) : lins)
-convertTerm cnc_defs selector (KP (str:_)_)((lbl_path,lin) : lins) = do projectHead lbl_path
- return ((lbl_path,Tok str : lin) : lins)
+convertTerm cnc_defs selector (K (KS str)) ((lbl_path,lin) : lins) =
+ do projectHead lbl_path
+ return ((lbl_path,Tok str : lin) : lins)
+convertTerm cnc_defs selector (K (KP (str:_)_))((lbl_path,lin) : lins) =
+ do projectHead lbl_path
+ return ((lbl_path,Tok str : lin) : lins)
convertTerm cnc_defs selector (RP _ term) lins = convertTerm cnc_defs selector term lins
convertTerm cnc_defs selector (F id) lins = do term <- Map.lookup id cnc_defs
convertTerm cnc_defs selector term lins
-convertTerm cnc_defs selector (W s ss) ((lbl_path,lin) : lins) = convertRec cnc_defs selector 0 [KS (s ++ s1) | s1 <- ss] lbl_path lin lins
+convertTerm cnc_defs selector (W s t) ((lbl_path,lin) : lins) = do
+ ss <- case t of
+ R ss -> return ss
+ F f -> do
+ t <- Map.lookup f cnc_defs
+ case t of
+ R ss -> return ss
+ convertRec cnc_defs selector 0 [K (KS (s ++ s1)) | K (KS s1) <- ss] lbl_path lin lins
convertTerm cnc_defs selector x lins = error ("convertTerm ("++show x++")")