From f85232947e74ee7ef8c7b0ad2338212e7e68f1be Mon Sep 17 00:00:00 2001 From: krasimir Date: Sun, 13 Dec 2009 18:50:29 +0000 Subject: reorganize the directories under src, and rescue the JavaScript interpreter from deprecated --- src/PGF/Binary.hs | 199 --------- src/PGF/BuildParser.hs | 76 ---- src/PGF/CId.hs | 55 --- src/PGF/Check.hs | 173 -------- src/PGF/Data.hs | 95 ----- src/PGF/Editor.hs | 241 ----------- src/PGF/Expr.hs | 355 ---------------- src/PGF/Expr.hs-boot | 28 -- src/PGF/Generate.hs | 66 --- src/PGF/Linearize.hs | 166 -------- src/PGF/Macros.hs | 154 ------- src/PGF/Morphology.hs | 26 -- src/PGF/PMCFG.hs | 119 ------ src/PGF/Paraphrase.hs | 112 ----- src/PGF/Parsing/FCFG/Active.hs | 205 --------- src/PGF/Parsing/FCFG/Incremental.hs | 371 ----------------- src/PGF/Parsing/FCFG/Utilities.hs | 188 --------- src/PGF/ShowLinearize.hs | 113 ----- src/PGF/Tree.hs | 71 ---- src/PGF/Type.hs | 103 ----- src/PGF/TypeCheck.hs | 524 ----------------------- src/PGF/VisualizeTree.hs | 353 ---------------- src/PGF/doc/Eng.gf | 13 - src/PGF/doc/Ex.gf | 8 - src/PGF/doc/Swe.gf | 13 - src/PGF/doc/Test.gf | 64 --- src/PGF/doc/gfcc.html | 809 ------------------------------------ src/PGF/doc/gfcc.txt | 712 ------------------------------- src/PGF/doc/old-GFCC.cf | 50 --- src/PGF/doc/old-gfcc.txt | 656 ----------------------------- src/PGF/doc/syntax.txt | 180 -------- 31 files changed, 6298 deletions(-) delete mode 100644 src/PGF/Binary.hs delete mode 100644 src/PGF/BuildParser.hs delete mode 100644 src/PGF/CId.hs delete mode 100644 src/PGF/Check.hs delete mode 100644 src/PGF/Data.hs delete mode 100644 src/PGF/Editor.hs delete mode 100644 src/PGF/Expr.hs delete mode 100644 src/PGF/Expr.hs-boot delete mode 100644 src/PGF/Generate.hs delete mode 100644 src/PGF/Linearize.hs delete mode 100644 src/PGF/Macros.hs delete mode 100644 src/PGF/Morphology.hs delete mode 100644 src/PGF/PMCFG.hs delete mode 100644 src/PGF/Paraphrase.hs delete mode 100644 src/PGF/Parsing/FCFG/Active.hs delete mode 100644 src/PGF/Parsing/FCFG/Incremental.hs delete mode 100644 src/PGF/Parsing/FCFG/Utilities.hs delete mode 100644 src/PGF/ShowLinearize.hs delete mode 100644 src/PGF/Tree.hs delete mode 100644 src/PGF/Type.hs delete mode 100644 src/PGF/TypeCheck.hs delete mode 100644 src/PGF/VisualizeTree.hs delete mode 100644 src/PGF/doc/Eng.gf delete mode 100644 src/PGF/doc/Ex.gf delete mode 100644 src/PGF/doc/Swe.gf delete mode 100644 src/PGF/doc/Test.gf delete mode 100644 src/PGF/doc/gfcc.html delete mode 100644 src/PGF/doc/gfcc.txt delete mode 100644 src/PGF/doc/old-GFCC.cf delete mode 100644 src/PGF/doc/old-gfcc.txt delete mode 100644 src/PGF/doc/syntax.txt (limited to 'src/PGF') diff --git a/src/PGF/Binary.hs b/src/PGF/Binary.hs deleted file mode 100644 index e4ed98424..000000000 --- a/src/PGF/Binary.hs +++ /dev/null @@ -1,199 +0,0 @@ -module PGF.Binary where - -import PGF.CId -import PGF.Data -import Data.Binary -import Data.Binary.Put -import Data.Binary.Get -import qualified Data.ByteString as BS -import qualified Data.Map as Map -import qualified Data.IntMap as IntMap -import qualified Data.Set as Set -import Control.Monad - -pgfMajorVersion, pgfMinorVersion :: Word16 -(pgfMajorVersion, pgfMinorVersion) = (1,0) - -instance Binary PGF where - put pgf = putWord16be pgfMajorVersion >> - putWord16be pgfMinorVersion >> - put ( absname pgf, cncnames pgf - , gflags pgf - , abstract pgf, concretes pgf - ) - get = do v1 <- getWord16be - v2 <- getWord16be - absname <- get - cncnames <- get - gflags <- get - abstract <- get - concretes <- get - return (PGF{ absname=absname, cncnames=cncnames - , gflags=gflags - , abstract=abstract, concretes=concretes - }) - -instance Binary CId where - put (CId bs) = put bs - get = liftM CId get - -instance Binary Abstr where - put abs = put (aflags abs, funs abs, cats abs) - get = do aflags <- get - funs <- get - cats <- get - let catfuns = Map.mapWithKey (\cat _ -> [f | (f, (DTyp _ c _,_,_)) <- Map.toList funs, c==cat]) cats - return (Abstr{ aflags=aflags - , funs=funs, cats=cats - , catfuns=catfuns - }) - -instance Binary Concr where - put cnc = put ( cflags cnc, lins cnc, opers cnc - , lincats cnc, lindefs cnc - , printnames cnc, paramlincats cnc - , parser cnc - ) - get = do cflags <- get - lins <- get - opers <- get - lincats <- get - lindefs <- get - printnames <- get - paramlincats <- get - parser <- get - return (Concr{ cflags=cflags, lins=lins, opers=opers - , lincats=lincats, lindefs=lindefs - , printnames=printnames - , paramlincats=paramlincats - , parser=parser - }) - -instance Binary Alternative where - put (Alt v x) = put v >> put x - get = liftM2 Alt get get - -instance Binary Term where - put (R es) = putWord8 0 >> put es - put (S es) = putWord8 1 >> put es - put (FV es) = putWord8 2 >> put es - put (P e v) = putWord8 3 >> put (e,v) - put (W e v) = putWord8 4 >> put (e,v) - put (C i ) = putWord8 5 >> put i - put (TM i ) = putWord8 6 >> put i - put (F f) = putWord8 7 >> put f - put (V i) = putWord8 8 >> put i - put (K (KS s)) = putWord8 9 >> put s - put (K (KP d vs)) = putWord8 10 >> put (d,vs) - - get = do tag <- getWord8 - case tag of - 0 -> liftM R get - 1 -> liftM S get - 2 -> liftM FV get - 3 -> liftM2 P get get - 4 -> liftM2 W get get - 5 -> liftM C get - 6 -> liftM TM get - 7 -> liftM F get - 8 -> liftM V get - 9 -> liftM (K . KS) get - 10 -> liftM2 (\d vs -> K (KP d vs)) get get - _ -> decodingError - -instance Binary Expr where - put (EAbs b x exp) = putWord8 0 >> put (b,x,exp) - put (EApp e1 e2) = putWord8 1 >> put (e1,e2) - put (ELit (LStr s)) = putWord8 2 >> put s - put (ELit (LFlt d)) = putWord8 3 >> put d - put (ELit (LInt i)) = putWord8 4 >> put i - put (EMeta i) = putWord8 5 >> put i - put (EFun f) = putWord8 6 >> put f - put (EVar i) = putWord8 7 >> put i - put (ETyped e ty) = putWord8 8 >> put (e,ty) - get = do tag <- getWord8 - case tag of - 0 -> liftM3 EAbs get get get - 1 -> liftM2 EApp get get - 2 -> liftM (ELit . LStr) get - 3 -> liftM (ELit . LFlt) get - 4 -> liftM (ELit . LInt) get - 5 -> liftM EMeta get - 6 -> liftM EFun get - 7 -> liftM EVar get - 8 -> liftM2 ETyped get get - _ -> decodingError - -instance Binary Patt where - put (PApp f ps) = putWord8 0 >> put (f,ps) - put (PVar x) = putWord8 1 >> put x - put PWild = putWord8 2 - put (PLit (LStr s)) = putWord8 3 >> put s - put (PLit (LFlt d)) = putWord8 4 >> put d - put (PLit (LInt i)) = putWord8 5 >> put i - get = do tag <- getWord8 - case tag of - 0 -> liftM2 PApp get get - 1 -> liftM PVar get - 2 -> return PWild - 3 -> liftM (PLit . LStr) get - 4 -> liftM (PLit . LFlt) get - 5 -> liftM (PLit . LInt) get - _ -> decodingError - -instance Binary Equation where - put (Equ ps e) = put (ps,e) - get = liftM2 Equ get get - -instance Binary Type where - put (DTyp hypos cat exps) = put (hypos,cat,exps) - get = liftM3 DTyp get get get - -instance Binary BindType where - put Explicit = putWord8 0 - put Implicit = putWord8 1 - get = do tag <- getWord8 - case tag of - 0 -> return Explicit - 1 -> return Implicit - _ -> decodingError - -instance Binary FFun where - put (FFun fun prof lins) = put (fun,prof,lins) - get = liftM3 FFun get get get - -instance Binary FSymbol where - put (FSymCat n l) = putWord8 0 >> put (n,l) - put (FSymLit n l) = putWord8 1 >> put (n,l) - put (FSymKS ts) = putWord8 2 >> put ts - put (FSymKP d vs) = putWord8 3 >> put (d,vs) - get = do tag <- getWord8 - case tag of - 0 -> liftM2 FSymCat get get - 1 -> liftM2 FSymLit get get - 2 -> liftM FSymKS get - 3 -> liftM2 (\d vs -> FSymKP d vs) get get - _ -> decodingError - -instance Binary Production where - put (FApply ruleid args) = putWord8 0 >> put (ruleid,args) - put (FCoerce fcat) = putWord8 1 >> put fcat - get = do tag <- getWord8 - case tag of - 0 -> liftM2 FApply get get - 1 -> liftM FCoerce get - _ -> decodingError - -instance Binary ParserInfo where - put p = put (functions p, sequences p, productions0 p, totalCats p, startCats p) - get = do functions <- get - sequences <- get - productions0<- get - totalCats <- get - startCats <- get - return (ParserInfo{functions=functions,sequences=sequences - ,productions0=productions0 - ,productions =filterProductions productions0 - ,totalCats=totalCats,startCats=startCats}) - -decodingError = fail "This PGF file was compiled with different version of GF" diff --git a/src/PGF/BuildParser.hs b/src/PGF/BuildParser.hs deleted file mode 100644 index 23e0725c6..000000000 --- a/src/PGF/BuildParser.hs +++ /dev/null @@ -1,76 +0,0 @@ ---------------------------------------------------------------------- --- | --- Maintainer : Krasimir Angelov --- Stability : (stable) --- Portability : (portable) --- --- FCFG parsing, parser information ------------------------------------------------------------------------------ - -module PGF.BuildParser where - -import GF.Data.SortedList -import GF.Data.Assoc -import PGF.CId -import PGF.Data -import PGF.Parsing.FCFG.Utilities - -import Data.Array.IArray -import Data.Maybe -import qualified Data.IntMap as IntMap -import qualified Data.Map as Map -import qualified Data.Set as Set -import Debug.Trace - - -data ParserInfoEx - = ParserInfoEx { epsilonRules :: [(FunId,[FCat],FCat)] - , leftcornerCats :: Assoc FCat [(FunId,[FCat],FCat)] - , leftcornerTokens :: Assoc String [(FunId,[FCat],FCat)] - , grammarToks :: [String] - } - ------------------------------------------------------------- --- parser information - -getLeftCornerTok pinfo (FFun _ _ lins) - | inRange (bounds syms) 0 = case syms ! 0 of - FSymKS [tok] -> [tok] - _ -> [] - | otherwise = [] - where - syms = (sequences pinfo) ! (lins ! 0) - -getLeftCornerCat pinfo args (FFun _ _ lins) - | inRange (bounds syms) 0 = case syms ! 0 of - FSymCat d _ -> let cat = args !! d - in case IntMap.lookup cat (productions pinfo) of - Just set -> cat : [cat' | FCoerce cat' <- Set.toList set] - Nothing -> [cat] - _ -> [] - | otherwise = [] - where - syms = (sequences pinfo) ! (lins ! 0) - -buildParserInfo :: ParserInfo -> ParserInfoEx -buildParserInfo pinfo = - ParserInfoEx { epsilonRules = epsilonrules - , leftcornerCats = leftcorncats - , leftcornerTokens = leftcorntoks - , grammarToks = grammartoks - } - - where epsilonrules = [ (ruleid,args,cat) - | (cat,set) <- IntMap.toList (productions pinfo) - , (FApply ruleid args) <- Set.toList set - , let (FFun _ _ lins) = (functions pinfo) ! ruleid - , not (inRange (bounds ((sequences pinfo) ! (lins ! 0))) 0) ] - leftcorncats = accumAssoc id [ (cat', (ruleid, args, cat)) - | (cat,set) <- IntMap.toList (productions pinfo) - , (FApply ruleid args) <- Set.toList set - , cat' <- getLeftCornerCat pinfo args ((functions pinfo) ! ruleid) ] - leftcorntoks = accumAssoc id [ (tok, (ruleid, args, cat)) - | (cat,set) <- IntMap.toList (productions pinfo) - , (FApply ruleid args) <- Set.toList set - , tok <- getLeftCornerTok pinfo ((functions pinfo) ! ruleid) ] - grammartoks = nubsort [t | lin <- elems (sequences pinfo), FSymKS [t] <- elems lin] diff --git a/src/PGF/CId.hs b/src/PGF/CId.hs deleted file mode 100644 index fea304d9d..000000000 --- a/src/PGF/CId.hs +++ /dev/null @@ -1,55 +0,0 @@ -module PGF.CId (CId(..), - mkCId, wildCId, - readCId, showCId, - - -- utils - pCId, pIdent, ppCId) where - -import Control.Monad -import qualified Data.ByteString.Char8 as BS -import Data.Char -import qualified Text.ParserCombinators.ReadP as RP -import qualified Text.PrettyPrint as PP - - --- | An abstract data type that represents --- identifiers for functions and categories in PGF. -newtype CId = CId BS.ByteString deriving (Eq,Ord) - -wildCId :: CId -wildCId = CId (BS.singleton '_') - --- | Creates a new identifier from 'String' -mkCId :: String -> CId -mkCId s = CId (BS.pack s) - --- | Reads an identifier from 'String'. The function returns 'Nothing' if the string is not valid identifier. -readCId :: String -> Maybe CId -readCId s = case [x | (x,cs) <- RP.readP_to_S pCId s, all isSpace cs] of - [x] -> Just x - _ -> Nothing - --- | Renders the identifier as 'String' -showCId :: CId -> String -showCId (CId x) = BS.unpack x - -instance Show CId where - showsPrec _ = showString . showCId - -instance Read CId where - readsPrec _ = RP.readP_to_S pCId - -pCId :: RP.ReadP CId -pCId = do s <- pIdent - if s == "_" - then RP.pfail - else return (mkCId s) - -pIdent :: RP.ReadP String -pIdent = liftM2 (:) (RP.satisfy isIdentFirst) (RP.munch isIdentRest) - where - isIdentFirst c = c == '_' || isLetter c - isIdentRest c = c == '_' || c == '\'' || isAlphaNum c - -ppCId :: CId -> PP.Doc -ppCId = PP.text . showCId diff --git a/src/PGF/Check.hs b/src/PGF/Check.hs deleted file mode 100644 index 58b66cfe4..000000000 --- a/src/PGF/Check.hs +++ /dev/null @@ -1,173 +0,0 @@ -module PGF.Check (checkPGF) where - -import PGF.CId -import PGF.Data -import PGF.Macros -import GF.Data.ErrM - -import qualified Data.Map as Map -import Control.Monad -import Debug.Trace - -checkPGF :: PGF -> Err (PGF,Bool) -checkPGF pgf = do - (cs,bs) <- mapM (checkConcrete pgf) - (Map.assocs (concretes pgf)) >>= return . unzip - return (pgf {concretes = Map.fromAscList cs}, and bs) - - --- errors are non-fatal; replace with 'fail' to change this -msg s = trace s (return ()) - -andMapM :: Monad m => (a -> m Bool) -> [a] -> m Bool -andMapM f xs = mapM f xs >>= return . and - -labelBoolErr :: String -> Err (x,Bool) -> Err (x,Bool) -labelBoolErr ms iob = do - (x,b) <- iob - if b then return (x,b) else (msg ms >> return (x,b)) - - -checkConcrete :: PGF -> (CId,Concr) -> Err ((CId,Concr),Bool) -checkConcrete pgf (lang,cnc) = - labelBoolErr ("happened in language " ++ showCId lang) $ do - (rs,bs) <- mapM checkl (Map.assocs (lins cnc)) >>= return . unzip - return ((lang,cnc{lins = Map.fromAscList rs}),and bs) - where - checkl = checkLin pgf lang - -checkLin :: PGF -> CId -> (CId,Term) -> Err ((CId,Term),Bool) -checkLin pgf lang (f,t) = - labelBoolErr ("happened in function " ++ showCId f) $ do - (t',b) <- checkTerm (lintype pgf lang f) t --- $ inline pgf lang t - return ((f,t'),b) - -inferTerm :: [CType] -> Term -> Err (Term,CType) -inferTerm args trm = case trm of - K _ -> returnt str - C i -> returnt $ ints i - V i -> do - testErr (i < length args) ("too large index " ++ show i) - returnt $ args !! i - S ts -> do - (ts',tys) <- mapM infer ts >>= return . unzip - let tys' = filter (/=str) tys - testErr (null tys') - ("expected Str in " ++ show trm ++ " not " ++ unwords (map show tys')) - return (S ts',str) - R ts -> do - (ts',tys) <- mapM infer ts >>= return . unzip - return $ (R ts',tuple tys) - P t u -> do - (t',tt) <- infer t - (u',tu) <- infer u - case tt of - R tys -> case tu of - R vs -> infer $ foldl P t' [P u' (C i) | i <- [0 .. length vs - 1]] - --- R [v] -> infer $ P t v - --- R (v:vs) -> infer $ P (head tys) (R vs) - - C i -> do - testErr (i < length tys) - ("required more than " ++ show i ++ " fields in " ++ show (R tys)) - return (P t' u', tys !! i) -- record: index must be known - _ -> do - let typ = head tys - testErr (all (==typ) tys) ("different types in table " ++ show trm) - return (P t' u', typ) -- table: types must be same - _ -> Bad $ "projection from " ++ show t ++ " : " ++ show tt - FV [] -> returnt tm0 ---- - FV (t:ts) -> do - (t',ty) <- infer t - (ts',tys) <- mapM infer ts >>= return . unzip - testErr (all (eqType True ty) tys) ("different types in variants " ++ show trm) - return (FV (t':ts'),ty) - W s r -> infer r - _ -> Bad ("no type inference for " ++ show trm) - where - returnt ty = return (trm,ty) - infer = inferTerm args - -checkTerm :: LinType -> Term -> Err (Term,Bool) -checkTerm (args,val) trm = case inferTerm args trm of - Ok (t,ty) -> if eqType False ty val - then return (t,True) - else do - msg ("term: " ++ show trm ++ - "\nexpected type: " ++ show val ++ - "\ninferred type: " ++ show ty) - return (t,False) - Bad s -> do - msg s - return (trm,False) - --- symmetry in (Ints m == Ints n) is all we can use in variants - -eqType :: Bool -> CType -> CType -> Bool -eqType symm inf exp = case (inf,exp) of - (C k, C n) -> if symm then True else k <= n -- only run-time corr. - (R rs,R ts) -> length rs == length ts && and [eqType symm r t | (r,t) <- zip rs ts] - (TM _, _) -> True ---- for variants [] ; not safe - _ -> inf == exp - --- should be in a generic module, but not in the run-time DataGFCC - -type CType = Term -type LinType = ([CType],CType) - -tuple :: [CType] -> CType -tuple = R - -ints :: Int -> CType -ints = C - -str :: CType -str = S [] - -lintype :: PGF -> CId -> CId -> LinType -lintype pgf lang fun = case typeSkeleton (lookType pgf fun) of - (cs,c) -> (map vlinc cs, linc c) ---- HOAS - where - linc = lookLincat pgf lang - vlinc (0,c) = linc c - vlinc (i,c) = case linc c of - R ts -> R (ts ++ replicate i str) - -inline :: PGF -> CId -> Term -> Term -inline pgf lang t = case t of - F c -> inl $ look c - _ -> composSafeOp inl t - where - inl = inline pgf lang - look = lookLin pgf lang - -composOp :: Monad m => (Term -> m Term) -> Term -> m Term -composOp f trm = case trm of - R ts -> liftM R $ mapM f ts - S ts -> liftM S $ mapM f ts - FV ts -> liftM FV $ mapM f ts - P t u -> liftM2 P (f t) (f u) - W s t -> liftM (W s) $ f t - _ -> return trm - -composSafeOp :: (Term -> Term) -> Term -> Term -composSafeOp f = maybe undefined id . composOp (return . f) - --- from GF.Data.Oper - -maybeErr :: String -> Maybe a -> Err a -maybeErr s = maybe (Bad s) Ok - -testErr :: Bool -> String -> Err () -testErr cond msg = if cond then return () else Bad msg - -errVal :: a -> Err a -> a -errVal a = err (const a) id - -errIn :: String -> Err a -> Err a -errIn msg = err (\s -> Bad (s ++ "\nOCCURRED IN\n" ++ msg)) return - -err :: (String -> b) -> (a -> b) -> Err a -> b -err d f e = case e of - Ok a -> f a - Bad s -> d s diff --git a/src/PGF/Data.hs b/src/PGF/Data.hs deleted file mode 100644 index 38027e96e..000000000 --- a/src/PGF/Data.hs +++ /dev/null @@ -1,95 +0,0 @@ -module PGF.Data (module PGF.Data, module PGF.Expr, module PGF.Type, module PGF.PMCFG) where - -import PGF.CId -import PGF.Expr hiding (Value, Env, Tree) -import PGF.Type -import PGF.PMCFG - -import qualified Data.Map as Map -import qualified Data.Set as Set -import qualified Data.IntMap as IntMap -import Data.List - --- internal datatypes for PGF - --- | An abstract data type representing multilingual grammar --- in Portable Grammar Format. -data PGF = PGF { - absname :: CId , - cncnames :: [CId] , - gflags :: Map.Map CId String, -- value of a global flag - abstract :: Abstr , - concretes :: Map.Map CId Concr - } - -data Abstr = Abstr { - aflags :: Map.Map CId String, -- value of a flag - funs :: Map.Map CId (Type,Int,[Equation]), -- type, arrity and definition of function - cats :: Map.Map CId [Hypo], -- context of a cat - catfuns :: Map.Map CId [CId] -- funs to a cat (redundant, for fast lookup) - } - -data Concr = Concr { - cflags :: Map.Map CId String, -- value of a flag - lins :: Map.Map CId Term, -- lin of a fun - opers :: Map.Map CId Term, -- oper generated by subex elim - lincats :: Map.Map CId Term, -- lin type of a cat - lindefs :: Map.Map CId Term, -- lin default of a cat - printnames :: Map.Map CId Term, -- printname of a cat or a fun - paramlincats :: Map.Map CId Term, -- lin type of cat, with printable param names - parser :: Maybe ParserInfo -- parser - } - -data Term = - R [Term] - | P Term Term - | S [Term] - | K Tokn - | V Int - | C Int - | F CId - | FV [Term] - | W String Term - | TM String - deriving (Eq,Ord,Show) - -data Tokn = - KS String - | KP [String] [Alternative] - deriving (Eq,Ord,Show) - - --- merge two GFCCs; fails is differens absnames; priority to second arg - -unionPGF :: PGF -> PGF -> PGF -unionPGF one two = case absname one of - n | n == wildCId -> two -- extending empty grammar - | n == absname two -> one { -- extending grammar with same abstract - concretes = Map.union (concretes two) (concretes one), - cncnames = union (cncnames one) (cncnames two) - } - _ -> one -- abstracts don't match ---- print error msg - -emptyPGF :: PGF -emptyPGF = PGF { - absname = wildCId, - cncnames = [] , - gflags = Map.empty, - abstract = error "empty grammar, no abstract", - concretes = Map.empty - } - --- | This is just a 'CId' with the language name. --- A language name is the identifier that you write in the --- top concrete or abstract module in GF after the --- concrete/abstract keyword. Example: --- --- > abstract Lang = ... --- > concrete LangEng of Lang = ... -type Language = CId - -readLanguage :: String -> Maybe Language -readLanguage = readCId - -showLanguage :: Language -> String -showLanguage = showCId diff --git a/src/PGF/Editor.hs b/src/PGF/Editor.hs deleted file mode 100644 index 3f69da170..000000000 --- a/src/PGF/Editor.hs +++ /dev/null @@ -1,241 +0,0 @@ -module PGF.Editor ( - State, -- datatype -- type-annotated possibly open tree with a focus - Dict, -- datatype -- abstract syntax information optimized for editing - Position, -- datatype -- path from top to focus - new, -- :: Type -> State -- create new State - refine, -- :: Dict -> CId -> State -> State -- refine focus with CId - replace, -- :: Dict -> Tree -> State -> State -- replace focus with Tree - delete, -- :: State -> State -- replace focus with ? - goNextMeta, -- :: State -> State -- move focus to next ? node - goNext, -- :: State -> State -- move to next node - goTop, -- :: State -> State -- move focus to the top (=root) - goPosition, -- :: Position -> State -> State -- move focus to given position - mkPosition, -- :: [Int] -> Position -- list of choices (top = []) - showPosition,-- :: Position -> [Int] -- readable position - focusType, -- :: State -> Type -- get the type of focus - stateTree, -- :: State -> Tree -- get the current tree - isMetaFocus, -- :: State -> Bool -- whether focus is ? - allMetas, -- :: State -> [(Position,Type)] -- all ?s and their positions - prState, -- :: State -> String -- print state, focus marked * - refineMenu, -- :: Dict -> State -> [CId] -- get refinement menu - pgf2dict -- :: PGF -> Dict -- create editing Dict from PGF - ) where - -import PGF.Data -import PGF.CId -import qualified Data.Map as M -import Debug.Trace ---- - --- API - -new :: Type -> State -new (DTyp _ t _) = etree2state (uETree t) - -refine :: Dict -> CId -> State -> State -refine dict f = replaceInState (mkRefinement dict f) - -replace :: Dict -> Tree -> State -> State -replace dict t = replaceInState (tree2etree dict t) - -delete :: State -> State -delete s = replaceInState (uETree (typ (tree s))) s - -goNextMeta :: State -> State -goNextMeta s = - if isComplete s then s - else let s1 = goNext s in if isMetaFocus s1 - then s1 else goNextMeta s1 - -isComplete :: State -> Bool -isComplete s = isc (tree s) where - isc t = case atom t of - AMeta _ -> False - ACon _ -> all isc (children t) - -goTop :: State -> State -goTop = navigate (const top) - -goPosition :: [Int] -> State -> State -goPosition p s = s{position = p} - -mkPosition :: [Int] -> Position -mkPosition = id - -refineMenu :: Dict -> State -> [CId] -refineMenu dict s = maybe [] (map fst) $ M.lookup (focusBType s) (refines dict) - -focusType :: State -> Type -focusType s = btype2type (focusBType s) - -stateTree :: State -> Tree -stateTree = etree2tree . tree - -pgf2dict :: PGF -> Dict -pgf2dict pgf = Dict (M.fromAscList fus) refs where - fus = [(f,mkFType ty) | (f,(ty,_)) <- M.toList (funs abs)] - refs = M.fromAscList [(c, fusTo c) | (c,_) <- M.toList (cats abs)] - fusTo c = [(f,ty) | (f,ty@(_,k)) <- fus, k==c] ---- quadratic - mkFType (DTyp hyps c _) = ([k | Hyp _ (DTyp _ k _) <- hyps],c) ----dep types - abs = abstract pgf - -etree2tree :: ETree -> Tree -etree2tree t = case atom t of - ACon f -> Fun f (map etree2tree (children t)) - AMeta i -> Meta i - -tree2etree :: Dict -> Tree -> ETree -tree2etree dict t = case t of - Fun f _ -> annot (look f) t - where - annot (tys,ty) tr = case tr of - Fun f trs -> ETree (ACon f) ty [annt t tr | (t,tr) <- zip tys trs] - Meta i -> ETree (AMeta i) ty [] - annt ty tr = case tr of - Fun _ _ -> tree2etree dict tr - Meta _ -> annot ([],ty) tr - look f = maybe undefined id $ M.lookup f (functs dict) - -prState :: State -> String -prState s = unlines [replicate i ' ' ++ f | (i,f) <- pr [] (tree s)] where - pr i t = - (ind i,prAtom i (atom t)) : concat [pr (sub j i) c | (j,c) <- zip [0..] (children t)] - prAtom i a = prFocus i ++ case a of - ACon f -> prCId f - AMeta i -> "?" ++ show i - prFocus i = if i == position s then "*" else "" - ind i = 2 * length i - sub j i = i ++ [j] - -showPosition :: Position -> [Int] -showPosition = id - -allMetas :: State -> [(Position,Type)] -allMetas s = [(reverse p, btype2type ty) | (p,ty) <- metas [] (tree s)] where - metas p t = - (if isMetaAtom (atom t) then [(p,typ t)] else []) ++ - concat [metas (i:p) u | (i,u) <- zip [0..] (children t)] - ----- Trees and navigation - -data ETree = ETree { - atom :: Atom, - typ :: BType, - children :: [ETree] - } - deriving Show - -data Atom = - ACon CId - | AMeta Int - deriving Show - -btype2type :: BType -> Type -btype2type t = DTyp [] t [] - -uETree :: BType -> ETree -uETree ty = ETree (AMeta 0) ty [] - -data State = State { - position :: Position, - tree :: ETree - } - deriving Show - -type Position = [Int] - -top :: Position -top = [] - -up :: Position -> Position -up p = case p of - _:_ -> init p - _ -> p - -down :: Position -> Position -down = (++[0]) - -left :: Position -> Position -left p = case p of - _:_ | last p > 0 -> init p ++ [last p - 1] - _ -> top - -right :: Position -> Position -right p = case p of - _:_ -> init p ++ [last p + 1] - _ -> top - -etree2state :: ETree -> State -etree2state = State top - -doInState :: (ETree -> ETree) -> State -> State -doInState f s = s{tree = change (position s) (tree s)} where - change p t = case p of - [] -> f t - n:ns -> let (ts1,t0:ts2) = splitAt n (children t) in - t{children = ts1 ++ [change ns t0] ++ ts2} - -subtree :: Position -> ETree -> ETree -subtree p t = case p of - [] -> t - n:ns -> subtree ns (children t !! n) - -focus :: State -> ETree -focus s = subtree (position s) (tree s) - -focusBType :: State -> BType -focusBType s = typ (focus s) - -navigate :: (Position -> Position) -> State -> State -navigate p s = s{position = p (position s)} - --- p is a fix-point aspect of state change -untilFix :: Eq a => (State -> a) -> (State -> Bool) -> (State -> State) -> State -> State -untilFix p b f s = - if b s - then s - else let fs = f s in if p fs == p s - then s - else untilFix p b f fs - -untilPosition :: (State -> Bool) -> (State -> State) -> State -> State -untilPosition = untilFix position - -goNext :: State -> State -goNext s = case focus s of - st | not (null (children st)) -> navigate down s - _ -> findSister s - where - findSister s = case s of - s' | null (position s') -> s' - s' | hasYoungerSisters s' -> navigate right s' - s' -> findSister (navigate up s') - hasYoungerSisters s = case position s of - p@(_:_) -> length (children (focus (navigate up s))) > last p + 1 - _ -> False - -isMetaFocus :: State -> Bool -isMetaFocus s = isMetaAtom (atom (focus s)) - -isMetaAtom :: Atom -> Bool -isMetaAtom a = case a of - AMeta _ -> True - _ -> False - -replaceInState :: ETree -> State -> State -replaceInState t = doInState (const t) - - -------- - -type BType = CId ----dep types -type FType = ([BType],BType) ----dep types - -data Dict = Dict { - functs :: M.Map CId FType, - refines :: M.Map BType [(CId,FType)] - } - -mkRefinement :: Dict -> CId -> ETree -mkRefinement dict f = ETree (ACon f) val (map uETree args) where - (args,val) = maybe undefined id $ M.lookup f (functs dict) - diff --git a/src/PGF/Expr.hs b/src/PGF/Expr.hs deleted file mode 100644 index cf0cb79aa..000000000 --- a/src/PGF/Expr.hs +++ /dev/null @@ -1,355 +0,0 @@ -module PGF.Expr(Tree, BindType(..), Expr(..), Literal(..), Patt(..), Equation(..), - readExpr, showExpr, pExpr, pBinds, ppExpr, ppPatt, - - mkApp, unApp, - mkStr, unStr, - mkInt, unInt, - mkDouble, unDouble, - mkMeta, isMeta, - - normalForm, - - -- needed in the typechecker - Value(..), Env, Funs, eval, apply, - - MetaId, - - -- helpers - pMeta,pStr,pArg,pLit,freshName,ppMeta,ppLit,ppParens - ) where - -import PGF.CId -import PGF.Type - -import Data.Char -import Data.Maybe -import Data.List as List -import Data.Map as Map hiding (showTree) -import Control.Monad -import qualified Text.PrettyPrint as PP -import qualified Text.ParserCombinators.ReadP as RP - -data Literal = - LStr String -- ^ string constant - | LInt Integer -- ^ integer constant - | LFlt Double -- ^ floating point constant - deriving (Eq,Ord,Show) - -type MetaId = Int - -data BindType = - Explicit - | Implicit - deriving (Eq,Ord,Show) - --- | Tree is the abstract syntax representation of a given sentence --- in some concrete syntax. Technically 'Tree' is a type synonym --- of 'Expr'. -type Tree = Expr - --- | An expression in the abstract syntax of the grammar. It could be --- both parameter of a dependent type or an abstract syntax tree for --- for some sentence. -data Expr = - EAbs BindType CId Expr -- ^ lambda abstraction - | EApp Expr Expr -- ^ application - | ELit Literal -- ^ literal - | EMeta {-# UNPACK #-} !MetaId -- ^ meta variable - | EFun CId -- ^ function or data constructor - | EVar {-# UNPACK #-} !Int -- ^ variable with de Bruijn index - | ETyped Expr Type -- ^ local type signature - | EImplArg Expr -- ^ implicit argument in expression - deriving (Eq,Ord,Show) - --- | The pattern is used to define equations in the abstract syntax of the grammar. -data Patt = - PApp CId [Patt] -- ^ application. The identifier should be constructor i.e. defined with 'data' - | PLit Literal -- ^ literal - | PVar CId -- ^ variable - | PWild -- ^ wildcard - | PImplArg Patt -- ^ implicit argument in pattern - deriving (Eq,Ord) - --- | The equation is used to define lambda function as a sequence --- of equations with pattern matching. The list of 'Expr' represents --- the patterns and the second 'Expr' is the function body for this --- equation. -data Equation = - Equ [Patt] Expr - deriving (Eq,Ord) - --- | parses 'String' as an expression -readExpr :: String -> Maybe Expr -readExpr s = case [x | (x,cs) <- RP.readP_to_S pExpr s, all isSpace cs] of - [x] -> Just x - _ -> Nothing - --- | renders expression as 'String'. The list --- of identifiers is the list of all free variables --- in the expression in order reverse to the order --- of binding. -showExpr :: [CId] -> Expr -> String -showExpr vars = PP.render . ppExpr 0 vars - -instance Read Expr where - readsPrec _ = RP.readP_to_S pExpr - --- | Constructs an expression by applying a function to a list of expressions -mkApp :: CId -> [Expr] -> Expr -mkApp f es = foldl EApp (EFun f) es - --- | Decomposes an expression into application of function -unApp :: Expr -> Maybe (CId,[Expr]) -unApp = extract [] - where - extract es (EFun f) = Just (f,es) - extract es (EApp e1 e2) = extract (e2:es) e1 - extract es _ = Nothing - --- | Constructs an expression from string literal -mkStr :: String -> Expr -mkStr s = ELit (LStr s) - --- | Decomposes an expression into string literal -unStr :: Expr -> Maybe String -unStr (ELit (LStr s)) = Just s -unStr _ = Nothing - --- | Constructs an expression from integer literal -mkInt :: Integer -> Expr -mkInt i = ELit (LInt i) - --- | Decomposes an expression into integer literal -unInt :: Expr -> Maybe Integer -unInt (ELit (LInt i)) = Just i -unInt _ = Nothing - --- | Constructs an expression from real number literal -mkDouble :: Double -> Expr -mkDouble f = ELit (LFlt f) - --- | Decomposes an expression into real number literal -unDouble :: Expr -> Maybe Double -unDouble (ELit (LFlt f)) = Just f -unDouble _ = Nothing - --- | Constructs an expression which is meta variable -mkMeta :: Expr -mkMeta = EMeta 0 - --- | Checks whether an expression is a meta variable -isMeta :: Expr -> Bool -isMeta (EMeta _) = True -isMeta _ = False - ------------------------------------------------------ --- Parsing ------------------------------------------------------ - -pExpr :: RP.ReadP Expr -pExpr = RP.skipSpaces >> (pAbs RP.<++ pTerm) - where - pTerm = do f <- pFactor - RP.skipSpaces - as <- RP.sepBy pArg RP.skipSpaces - return (foldl EApp f as) - - pAbs = do xs <- RP.between (RP.char '\\') (RP.skipSpaces >> RP.string "->") pBinds - e <- pExpr - return (foldr (\(b,x) e -> EAbs b x e) e xs) - -pBinds :: RP.ReadP [(BindType,CId)] -pBinds = do xss <- RP.sepBy1 (RP.skipSpaces >> pBind) (RP.skipSpaces >> RP.char ',') - return (concat xss) - where - pCIdOrWild = pCId `mplus` (RP.char '_' >> return wildCId) - - pBind = - do x <- pCIdOrWild - return [(Explicit,x)] - `mplus` - RP.between (RP.char '{') - (RP.skipSpaces >> RP.char '}') - (RP.sepBy1 (RP.skipSpaces >> pCIdOrWild >>= \id -> return (Implicit,id)) (RP.skipSpaces >> RP.char ',')) - -pArg = fmap EImplArg (RP.between (RP.char '{') (RP.char '}') pExpr) - RP.<++ - pFactor - -pFactor = fmap EFun pCId - RP.<++ fmap ELit pLit - RP.<++ fmap EMeta pMeta - RP.<++ RP.between (RP.char '(') (RP.char ')') pExpr - RP.<++ RP.between (RP.char '<') (RP.char '>') pTyped - -pTyped = do RP.skipSpaces - e <- pExpr - RP.skipSpaces - RP.char ':' - RP.skipSpaces - ty <- pType - return (ETyped e ty) - -pMeta = do RP.char '?' - return 0 - -pLit :: RP.ReadP Literal -pLit = pNum RP.<++ liftM LStr pStr - -pNum = do x <- RP.munch1 isDigit - ((RP.char '.' >> RP.munch1 isDigit >>= \y -> return (LFlt (read (x++"."++y)))) - RP.<++ - (return (LInt (read x)))) - -pStr = RP.char '"' >> (RP.manyTill (pEsc RP.<++ RP.get) (RP.char '"')) - where - pEsc = RP.char '\\' >> RP.get - - ------------------------------------------------------ --- Printing ------------------------------------------------------ - -ppExpr :: Int -> [CId] -> Expr -> PP.Doc -ppExpr d scope (EAbs b x e) = let (bs,xs,e1) = getVars [] [] (EAbs b x e) - in ppParens (d > 1) (PP.char '\\' PP.<> - PP.hsep (PP.punctuate PP.comma (reverse (List.zipWith ppBind bs xs))) PP.<+> - PP.text "->" PP.<+> - ppExpr 1 (xs++scope) e1) - where - getVars bs xs (EAbs b x e) = getVars (b:bs) ((freshName x xs):xs) e - getVars bs xs e = (bs,xs,e) -ppExpr d scope (EApp e1 e2) = ppParens (d > 3) ((ppExpr 3 scope e1) PP.<+> (ppExpr 4 scope e2)) -ppExpr d scope (ELit l) = ppLit l -ppExpr d scope (EMeta n) = ppMeta n -ppExpr d scope (EFun f) = ppCId f -ppExpr d scope (EVar i) = ppCId (scope !! i) -ppExpr d scope (ETyped e ty)= PP.char '<' PP.<> ppExpr 0 scope e PP.<+> PP.colon PP.<+> ppType 0 scope ty PP.<> PP.char '>' -ppExpr d scope (EImplArg e) = PP.braces (ppExpr 0 scope e) - -ppPatt :: Int -> [CId] -> Patt -> ([CId],PP.Doc) -ppPatt d scope (PApp f ps) = let (scope',ds) = mapAccumL (ppPatt 2) scope ps - in (scope',ppParens (not (List.null ps) && d > 1) (ppCId f PP.<+> PP.hsep ds)) -ppPatt d scope (PLit l) = (scope,ppLit l) -ppPatt d scope (PVar f) = (f:scope,ppCId f) -ppPatt d scope PWild = (scope,PP.char '_') -ppPatt d scope (PImplArg p) = let (scope',d) = ppPatt 0 scope p - in (scope',PP.braces d) - -ppBind Explicit x = ppCId x -ppBind Implicit x = PP.braces (ppCId x) - -ppLit (LStr s) = PP.text (show s) -ppLit (LInt n) = PP.integer n -ppLit (LFlt d) = PP.double d - -ppMeta :: MetaId -> PP.Doc -ppMeta n - | n == 0 = PP.char '?' - | otherwise = PP.char '?' PP.<> PP.int n - -ppParens True = PP.parens -ppParens False = id - -freshName :: CId -> [CId] -> CId -freshName x xs0 = loop 1 x - where - xs = wildCId : xs0 - - loop i y - | elem y xs = loop (i+1) (mkCId (show x++show i)) - | otherwise = y - - ------------------------------------------------------ --- Computation ------------------------------------------------------ - --- | Compute an expression to normal form -normalForm :: Funs -> Int -> Env -> Expr -> Expr -normalForm funs k env e = value2expr k (eval funs env e) - where - value2expr i (VApp f vs) = foldl EApp (EFun f) (List.map (value2expr i) vs) - value2expr i (VGen j vs) = foldl EApp (EVar (i-j-1)) (List.map (value2expr i) vs) - value2expr i (VMeta j env vs) = foldl EApp (EMeta j) (List.map (value2expr i) vs) - value2expr i (VSusp j env vs k) = value2expr i (k (VGen j vs)) - value2expr i (VLit l) = ELit l - value2expr i (VClosure env (EAbs b x e)) = EAbs b x (value2expr (i+1) (eval funs ((VGen i []):env) e)) - value2expr i (VImplArg v) = EImplArg (value2expr i v) - -data Value - = VApp CId [Value] - | VLit Literal - | VMeta {-# UNPACK #-} !MetaId Env [Value] - | VSusp {-# UNPACK #-} !MetaId Env [Value] (Value -> Value) - | VGen {-# UNPACK #-} !Int [Value] - | VClosure Env Expr - | VImplArg Value - -type Funs = Map.Map CId (Type,Int,[Equation]) -- type and def of a fun -type Env = [Value] - -eval :: Funs -> Env -> Expr -> Value -eval funs env (EVar i) = env !! i -eval funs env (EFun f) = case Map.lookup f funs of - Just (_,a,eqs) -> if a == 0 - then case eqs of - Equ [] e : _ -> eval funs [] e - _ -> VApp f [] - else VApp f [] - Nothing -> error ("unknown function "++showCId f) -eval funs env (EApp e1 e2) = apply funs env e1 [eval funs env e2] -eval funs env (EAbs b x e) = VClosure env (EAbs b x e) -eval funs env (EMeta i) = VMeta i env [] -eval funs env (ELit l) = VLit l -eval funs env (ETyped e _) = eval funs env e -eval funs env (EImplArg e) = VImplArg (eval funs env e) - -apply :: Funs -> Env -> Expr -> [Value] -> Value -apply funs env e [] = eval funs env e -apply funs env (EVar i) vs = applyValue funs (env !! i) vs -apply funs env (EFun f) vs = case Map.lookup f funs of - Just (_,a,eqs) -> if a <= length vs - then let (as,vs') = splitAt a vs - in match funs f eqs as vs' - else VApp f vs - Nothing -> error ("unknown function "++showCId f) -apply funs env (EApp e1 e2) vs = apply funs env e1 (eval funs env e2 : vs) -apply funs env (EAbs _ x e) (v:vs) = apply funs (v:env) e vs -apply funs env (EMeta i) vs = VMeta i env vs -apply funs env (ELit l) vs = error "literal of function type" -apply funs env (ETyped e _) vs = apply funs env e vs -apply funs env (EImplArg _) vs = error "implicit argument in function position" - -applyValue funs v [] = v -applyValue funs (VApp f vs0) vs = apply funs [] (EFun f) (vs0++vs) -applyValue funs (VLit _) vs = error "literal of function type" -applyValue funs (VMeta i env vs0) vs = VMeta i env (vs0++vs) -applyValue funs (VGen i vs0) vs = VGen i (vs0++vs) -applyValue funs (VSusp i env vs0 k) vs = VSusp i env vs0 (\v -> applyValue funs (k v) vs) -applyValue funs (VClosure env (EAbs b x e)) (v:vs) = apply funs (v:env) e vs -applyValue funs (VImplArg _) vs = error "implicit argument in function position" - ------------------------------------------------------ --- Pattern matching ------------------------------------------------------ - -match :: Funs -> CId -> [Equation] -> [Value] -> [Value] -> Value -match funs f eqs as0 vs0 = - case eqs of - [] -> VApp f (as0++vs0) - (Equ ps res):eqs -> tryMatches eqs ps as0 res [] - where - tryMatches eqs [] [] res env = apply funs env res vs0 - tryMatches eqs (p:ps) (a:as) res env = tryMatch p a env - where - tryMatch (PVar x ) (v ) env = tryMatches eqs ps as res (v:env) - tryMatch (PWild ) (_ ) env = tryMatches eqs ps as res env - tryMatch (p ) (VMeta i envi vs ) env = VSusp i envi vs (\v -> tryMatch p v env) - tryMatch (p ) (VGen i vs ) env = VApp f (as0++vs0) - tryMatch (p ) (VSusp i envi vs k) env = VSusp i envi vs (\v -> tryMatch p (k v) env) - tryMatch (PApp f1 ps1) (VApp f2 vs2 ) env | f1 == f2 = tryMatches eqs (ps1++ps) (vs2++as) res env - tryMatch (PLit l1 ) (VLit l2 ) env | l1 == l2 = tryMatches eqs ps as res env - tryMatch (PImplArg p ) (VImplArg v ) env = tryMatch p v env - tryMatch _ _ env = match funs f eqs as0 vs0 - diff --git a/src/PGF/Expr.hs-boot b/src/PGF/Expr.hs-boot deleted file mode 100644 index 34a62a410..000000000 --- a/src/PGF/Expr.hs-boot +++ /dev/null @@ -1,28 +0,0 @@ -module PGF.Expr where - -import PGF.CId -import qualified Text.PrettyPrint as PP -import qualified Text.ParserCombinators.ReadP as RP - -data Expr - -instance Eq Expr -instance Ord Expr -instance Show Expr - - -data BindType = Explicit | Implicit - -instance Eq BindType -instance Ord BindType -instance Show BindType - - -pArg :: RP.ReadP Expr -pBinds :: RP.ReadP [(BindType,CId)] - -ppExpr :: Int -> [CId] -> Expr -> PP.Doc - -freshName :: CId -> [CId] -> CId - -ppParens :: Bool -> PP.Doc -> PP.Doc diff --git a/src/PGF/Generate.hs b/src/PGF/Generate.hs deleted file mode 100644 index 5add00a78..000000000 --- a/src/PGF/Generate.hs +++ /dev/null @@ -1,66 +0,0 @@ -module PGF.Generate where - -import PGF.CId -import PGF.Data -import PGF.Macros -import PGF.TypeCheck - -import qualified Data.Map as M -import System.Random - --- generate an infinite list of trees exhaustively -generate :: PGF -> Type -> Maybe Int -> [Expr] -generate pgf ty@(DTyp _ cat _) dp = filter (\e -> case checkExpr pgf e ty of - Left _ -> False - Right _ -> True ) - (concatMap (\i -> gener i cat) depths) - where - gener 0 c = [EFun f | (f, ([],_)) <- fns c] - gener i c = [ - tr | - (f, (cs,_)) <- fns c, - let alts = map (gener (i-1)) cs, - ts <- combinations alts, - let tr = foldl EApp (EFun f) ts, - depth tr >= i - ] - fns c = [(f,catSkeleton ty) | (f,ty) <- functionsToCat pgf c] - depths = maybe [0 ..] (\d -> [0..d]) dp - --- generate an infinite list of trees randomly -genRandom :: StdGen -> PGF -> Type -> [Expr] -genRandom gen pgf ty@(DTyp _ cat _) = filter (\e -> case checkExpr pgf e ty of - Left _ -> False - Right _ -> True ) - (genTrees (randomRs (0.0, 1.0 :: Double) gen) cat) - where - timeout = 47 -- give up - - genTrees ds0 cat = - let (ds,ds2) = splitAt (timeout+1) ds0 -- for time out, else ds - (t,k) = genTree ds cat - in (if k>timeout then id else (t:)) - (genTrees ds2 cat) -- else (drop k ds) - - genTree rs = gett rs where - gett ds cid | cid == cidString = (ELit (LStr "foo"), 1) - gett ds cid | cid == cidInt = (ELit (LInt 12345), 1) - gett ds cid | cid == cidFloat = (ELit (LFlt 12345), 1) - gett [] _ = (ELit (LStr "TIMEOUT"), 1) ---- - gett ds cat = case fns cat of - [] -> (EMeta 0,1) - fs -> let - d:ds2 = ds - (f,args) = getf d fs - (ts,k) = getts ds2 args - in (foldl EApp (EFun f) ts, k+1) - getf d fs = let lg = (length fs) in - fs !! (floor (d * fromIntegral lg)) - getts ds cats = case cats of - c:cs -> let - (t, k) = gett ds c - (ts,ks) = getts (drop k ds) cs - in (t:ts, k + ks) - _ -> ([],0) - - fns cat = [(f,(fst (catSkeleton ty))) | (f,ty) <- functionsToCat pgf cat] diff --git a/src/PGF/Linearize.hs b/src/PGF/Linearize.hs deleted file mode 100644 index fdd4cecb5..000000000 --- a/src/PGF/Linearize.hs +++ /dev/null @@ -1,166 +0,0 @@ -{-# LANGUAGE ParallelListComp #-} -module PGF.Linearize - (linearizes,realize,realizes,linTree, linTreeMark,linearizesMark) where - -import PGF.CId -import PGF.Data -import PGF.Macros -import PGF.Tree - -import Control.Monad -import qualified Data.Map as Map -import Data.List - -import Debug.Trace - --- linearization and computation of concrete PGF Terms - -linearizes :: PGF -> CId -> Expr -> [String] -linearizes pgf lang = realizes . linTree pgf lang - -realize :: Term -> String -realize = concat . take 1 . realizes - -realizes :: Term -> [String] -realizes = map (unwords . untokn) . realizest - -realizest :: Term -> [[Tokn]] -realizest trm = case trm of - R ts -> realizest (ts !! 0) - S ss -> map concat $ combinations $ map realizest ss - K t -> [[t]] - W s t -> [[KS (s ++ r)] | [KS r] <- realizest t] - FV ts -> concatMap realizest ts - TM s -> [[KS s]] - _ -> [[KS $ "REALIZE_ERROR " ++ show trm]] ---- debug - -untokn :: [Tokn] -> [String] -untokn ts = case ts of - KP d _ : [] -> d - KP d vs : ws -> let ss@(s:_) = untokn ws in sel d vs s ++ ss - KS s : ws -> s : untokn ws - [] -> [] - where - sel d vs w = case [v | Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of - v:_ -> v - _ -> d - --- Lifts all variants to the top level (except those in macros). -liftVariants :: Term -> [Term] -liftVariants = f - where - f (R ts) = liftM R $ mapM f ts - f (P t1 t2) = liftM2 P (f t1) (f t2) - f (S ts) = liftM S $ mapM f ts - f (FV ts) = ts >>= f - f (W s t) = liftM (W s) $ f t - f t = return t - -linTree :: PGF -> CId -> Expr -> Term -linTree pgf lang e = lin (expr2tree e) Nothing - where - cnc = lookMap (error "no lang") lang (concretes pgf) - - lin (Abs xs e ) mty = case lin e Nothing of - R ts -> R $ ts ++ (Data.List.map (kks . showCId . snd) xs) - TM s -> R $ (TM s) : (Data.List.map (kks . showCId . snd) xs) - lin (Fun fun es) mty = case Map.lookup fun (funs (abstract pgf)) of - Just (DTyp hyps _ _,_,_) -> let argVariants = sequence [liftVariants (lin e (Just ty)) | e <- es | (_,_,ty) <- hyps] - in variants [compute pgf lang args $ lookMap tm0 fun (lins cnc) | args <- argVariants] - Nothing -> tm0 - lin (Lit (LStr s)) mty = R [kks (show s)] -- quoted - lin (Lit (LInt i)) mty = R [kks (show i)] - lin (Lit (LFlt d)) mty = R [kks (show d)] - lin (Var x) mty = case mty of - Just (DTyp _ cat _) -> compute pgf lang [K (KS (showCId x))] (lookMap tm0 cat (lindefs cnc)) - Nothing -> TM (showCId x) - lin (Meta i) mty = case mty of - Just (DTyp _ cat _) -> compute pgf lang [K (KS (show i))] (lookMap tm0 cat (lindefs cnc)) - Nothing -> TM (show i) - -variants :: [Term] -> Term -variants ts = case ts of - [t] -> t - _ -> FV ts - -unvariants :: Term -> [Term] -unvariants t = case t of - FV ts -> ts - _ -> [t] - -compute :: PGF -> CId -> [Term] -> Term -> Term -compute pgf lang args = comp where - comp trm = case trm of - P r p -> proj (comp r) (comp p) - W s t -> W s (comp t) - R ts -> R $ map comp ts - V i -> idx args i -- already computed - F c -> comp $ look c -- not computed (if contains argvar) - FV ts -> FV $ map comp ts - S ts -> S $ filter (/= S []) $ map comp ts - _ -> trm - - look = lookOper pgf lang - - idx xs i = if i > length xs - 1 - then trace - ("too large " ++ show i ++ " for\n" ++ unlines (map show xs) ++ "\n") tm0 - else xs !! i - - proj r p = case (r,p) of - (_, FV ts) -> FV $ map (proj r) ts - (FV ts, _ ) -> FV $ map (\t -> proj t p) ts - (W s t, _) -> kks (s ++ getString (proj t p)) - _ -> comp $ getField r (getIndex p) - - getString t = case t of - K (KS s) -> s - _ -> error ("ERROR in grammar compiler: string from "++ show t) "ERR" - - getIndex t = case t of - C i -> i - TM _ -> 0 -- default value for parameter - _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 666 - - getField t i = case t of - R rs -> idx rs i - TM s -> TM s - _ -> error ("ERROR in grammar compiler: field from " ++ show t) t - ---------- --- markup with tree positions - -linearizesMark :: PGF -> CId -> Expr -> [String] -linearizesMark pgf lang = realizes . linTreeMark pgf lang - -linTreeMark :: PGF -> CId -> Expr -> Term -linTreeMark pgf lang = lin [] . expr2tree - where - lin p (Abs xs e ) = case lin p e of - R ts -> R $ ts ++ (Data.List.map (kks . showCId . snd) xs) - TM s -> R $ (TM s) : (Data.List.map (kks . showCId . snd) xs) - lin p (Fun fun es) = - let argVariants = - mapM (\ (i,e) -> liftVariants $ lin (sub p i) e) (zip [0..] es) - in variants [mark (fun,p) $ compute pgf lang args $ look fun | - args <- argVariants] - lin p (Lit (LStr s)) = mark p $ R [kks (show s)] -- quoted - lin p (Lit (LInt i)) = mark p $ R [kks (show i)] - lin p (Lit (LFlt d)) = mark p $ R [kks (show d)] - lin p (Var x) = mark p $ TM (showCId x) - lin p (Meta i) = mark p $ TM (show i) - - look = lookLin pgf lang - - mark :: Show a => a -> Term -> Term - mark p t = case t of - R ts -> R $ map (mark p) ts - FV ts -> R $ map (mark p) ts - S ts -> S $ bracket p ts - K s -> S $ bracket p [t] - W s (R ts) -> R [mark p $ kks (s ++ u) | K (KS u) <- ts] - _ -> t - -- otherwise in normal form - - bracket p ts = [kks ("("++show p)] ++ ts ++ [kks ")"] - sub p i = p ++ [i] diff --git a/src/PGF/Macros.hs b/src/PGF/Macros.hs deleted file mode 100644 index af25de025..000000000 --- a/src/PGF/Macros.hs +++ /dev/null @@ -1,154 +0,0 @@ -module PGF.Macros where - -import PGF.CId -import PGF.Data -import Control.Monad -import qualified Data.Map as Map -import qualified Data.Array as Array -import Data.Maybe -import Data.List - --- operations for manipulating PGF grammars and objects - -mapConcretes :: (Concr -> Concr) -> PGF -> PGF -mapConcretes f pgf = pgf { concretes = Map.map f (concretes pgf) } - -lookLin :: PGF -> CId -> CId -> Term -lookLin pgf lang fun = - lookMap tm0 fun $ lins $ lookMap (error "no lang") lang $ concretes pgf - -lookOper :: PGF -> CId -> CId -> Term -lookOper pgf lang fun = - lookMap tm0 fun $ opers $ lookMap (error "no lang") lang $ concretes pgf - -lookLincat :: PGF -> CId -> CId -> Term -lookLincat pgf lang fun = - lookMap tm0 fun $ lincats $ lookMap (error "no lang") lang $ concretes pgf - -lookParamLincat :: PGF -> CId -> CId -> Term -lookParamLincat pgf lang fun = - lookMap tm0 fun $ paramlincats $ lookMap (error "no lang") lang $ concretes pgf - -lookPrintName :: PGF -> CId -> CId -> Term -lookPrintName pgf lang fun = - lookMap tm0 fun $ printnames $ lookMap (error "no lang") lang $ concretes pgf - -lookType :: PGF -> CId -> Type -lookType pgf f = - case lookMap (error $ "lookType " ++ show f) f (funs (abstract pgf)) of - (ty,_,_) -> ty - -lookDef :: PGF -> CId -> [Equation] -lookDef pgf f = - case lookMap (error $ "lookDef " ++ show f) f (funs (abstract pgf)) of - (_,a,eqs) -> eqs - -isData :: PGF -> CId -> Bool -isData pgf f = - case Map.lookup f (funs (abstract pgf)) of - Just (_,_,[]) -> True -- the encoding of data constrs - _ -> False - -lookValCat :: PGF -> CId -> CId -lookValCat pgf = valCat . lookType pgf - -lookParser :: PGF -> CId -> Maybe ParserInfo -lookParser pgf lang = Map.lookup lang (concretes pgf) >>= parser - -lookStartCat :: PGF -> CId -lookStartCat pgf = mkCId $ fromMaybe "S" $ msum $ Data.List.map (Map.lookup (mkCId "startcat")) - [gflags pgf, aflags (abstract pgf)] - -lookGlobalFlag :: PGF -> CId -> String -lookGlobalFlag pgf f = - lookMap "?" f (gflags pgf) - -lookAbsFlag :: PGF -> CId -> String -lookAbsFlag pgf f = - lookMap "?" f (aflags (abstract pgf)) - -lookConcr :: PGF -> CId -> Concr -lookConcr pgf cnc = - lookMap (error $ "Missing concrete syntax: " ++ showCId cnc) cnc $ concretes pgf - -lookConcrFlag :: PGF -> CId -> CId -> Maybe String -lookConcrFlag pgf lang f = Map.lookup f $ cflags $ lookConcr pgf lang - -functionsToCat :: PGF -> CId -> [(CId,Type)] -functionsToCat pgf cat = - [(f,ty) | f <- fs, Just (ty,_,_) <- [Map.lookup f $ funs $ abstract pgf]] - where - fs = lookMap [] cat $ catfuns $ abstract pgf - -missingLins :: PGF -> CId -> [CId] -missingLins pgf lang = [c | c <- fs, not (hasl c)] where - fs = Map.keys $ funs $ abstract pgf - hasl = hasLin pgf lang - -hasLin :: PGF -> CId -> CId -> Bool -hasLin pgf lang f = Map.member f $ lins $ lookConcr pgf lang - -restrictPGF :: (CId -> Bool) -> PGF -> PGF -restrictPGF cond pgf = pgf { - abstract = abstr { - funs = restrict $ funs $ abstr, - cats = restrict $ cats $ abstr - } - } ---- restrict concrs also, might be needed - where - restrict = Map.filterWithKey (\c _ -> cond c) - abstr = abstract pgf - -depth :: Expr -> Int -depth (EAbs _ _ t) = depth t -depth (EApp e1 e2) = max (depth e1) (depth e2) + 1 -depth _ = 1 - -cftype :: [CId] -> CId -> Type -cftype args val = DTyp [(Explicit,wildCId,cftype [] arg) | arg <- args] val [] - -typeOfHypo :: Hypo -> Type -typeOfHypo (_,_,ty) = ty - -catSkeleton :: Type -> ([CId],CId) -catSkeleton ty = case ty of - DTyp hyps val _ -> ([valCat (typeOfHypo h) | h <- hyps],val) - -typeSkeleton :: Type -> ([(Int,CId)],CId) -typeSkeleton ty = case ty of - DTyp hyps val _ -> ([(contextLength ty, valCat ty) | h <- hyps, let ty = typeOfHypo h],val) - -valCat :: Type -> CId -valCat ty = case ty of - DTyp _ val _ -> val - -contextLength :: Type -> Int -contextLength ty = case ty of - DTyp hyps _ _ -> length hyps - -term0 :: CId -> Term -term0 = TM . showCId - -tm0 :: Term -tm0 = TM "?" - -kks :: String -> Term -kks = K . KS - --- lookup with default value -lookMap :: (Show i, Ord i) => a -> i -> Map.Map i a -> a -lookMap d c m = Map.findWithDefault d c m - ---- from Operations -combinations :: [[a]] -> [[a]] -combinations t = case t of - [] -> [[]] - aa:uu -> [a:u | a <- aa, u <- combinations uu] - -isLiteralCat :: CId -> Bool -isLiteralCat = (`elem` [cidString, cidFloat, cidInt, cidVar]) - -cidString = mkCId "String" -cidInt = mkCId "Int" -cidFloat = mkCId "Float" -cidVar = mkCId "__gfVar" diff --git a/src/PGF/Morphology.hs b/src/PGF/Morphology.hs deleted file mode 100644 index 9eee71a97..000000000 --- a/src/PGF/Morphology.hs +++ /dev/null @@ -1,26 +0,0 @@ -module PGF.Morphology(Lemma,Analysis,Morpho, - buildMorpho, - lookupMorpho,fullFormLexicon) where - -import PGF.ShowLinearize (collectWords) -import PGF.Data -import PGF.CId - -import qualified Data.Map as Map -import Data.List (intersperse) - --- these 4 definitions depend on the datastructure used - -type Lemma = CId -type Analysis = String - -newtype Morpho = Morpho (Map.Map String [(Lemma,Analysis)]) - -buildMorpho :: PGF -> Language -> Morpho -buildMorpho pgf lang = Morpho (Map.fromListWith (++) (collectWords pgf lang)) - -lookupMorpho :: Morpho -> String -> [(Lemma,Analysis)] -lookupMorpho (Morpho mo) s = maybe [] id $ Map.lookup s mo - -fullFormLexicon :: Morpho -> [(String,[(Lemma,Analysis)])] -fullFormLexicon (Morpho mo) = Map.toList mo diff --git a/src/PGF/PMCFG.hs b/src/PGF/PMCFG.hs deleted file mode 100644 index c657e3d17..000000000 --- a/src/PGF/PMCFG.hs +++ /dev/null @@ -1,119 +0,0 @@ -module PGF.PMCFG where - -import PGF.CId -import PGF.Expr - -import qualified Data.Map as Map -import qualified Data.Set as Set -import qualified Data.IntMap as IntMap -import Data.Array.IArray -import Data.Array.Unboxed -import Text.PrettyPrint - -type FCat = Int -type FIndex = Int -type FPointPos = Int -data FSymbol - = FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex - | FSymLit {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex - | FSymKS [String] - | FSymKP [String] [Alternative] - deriving (Eq,Ord,Show) -type Profile = [Int] -data Production - = FApply {-# UNPACK #-} !FunId [FCat] - | FCoerce {-# UNPACK #-} !FCat - | FConst Expr [String] - deriving (Eq,Ord,Show) -data FFun = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show) -type FSeq = Array FPointPos FSymbol -type FunId = Int -type SeqId = Int - -data Alternative = - Alt [String] [String] - deriving (Eq,Ord,Show) - -data ParserInfo - = ParserInfo { functions :: Array FunId FFun - , sequences :: Array SeqId FSeq - , productions0:: IntMap.IntMap (Set.Set Production) -- this are the original productions as they are loaded from the PGF file - , productions :: IntMap.IntMap (Set.Set Production) -- this are the productions after the filtering for useless productions - , startCats :: Map.Map CId [FCat] - , totalCats :: {-# UNPACK #-} !FCat - } - - -fcatString, fcatInt, fcatFloat, fcatVar :: Int -fcatString = (-1) -fcatInt = (-2) -fcatFloat = (-3) -fcatVar = (-4) - -isLiteralFCat :: FCat -> Bool -isLiteralFCat = (`elem` [fcatString, fcatInt, fcatFloat, fcatVar]) - -ppPMCFG :: ParserInfo -> Doc -ppPMCFG pinfo = - text "productions" $$ - nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions pinfo), prod <- Set.toList set]) $$ - text "functions" $$ - nest 2 (vcat (map ppFun (assocs (functions pinfo)))) $$ - text "sequences" $$ - nest 2 (vcat (map ppSeq (assocs (sequences pinfo)))) $$ - text "startcats" $$ - nest 2 (vcat (map ppStartCat (Map.toList (startCats pinfo)))) - -ppProduction (fcat,FApply funid args) = - ppFCat fcat <+> text "->" <+> ppFunId funid <> brackets (hcat (punctuate comma (map ppFCat args))) -ppProduction (fcat,FCoerce arg) = - ppFCat fcat <+> text "->" <+> char '_' <> brackets (ppFCat arg) -ppProduction (fcat,FConst _ ss) = - ppFCat fcat <+> text "->" <+> ppStrs ss - -ppFun (funid,FFun fun _ arr) = - ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun) - -ppSeq (seqid,seq) = - ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq)) - -ppStartCat (id,fcats) = - ppCId id <+> text ":=" <+> brackets (hcat (punctuate comma (map ppFCat fcats))) - -ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>' -ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>' -ppSymbol (FSymKS ts) = ppStrs ts -ppSymbol (FSymKP ts alts) = text "pre" <+> braces (hsep (punctuate semi (ppStrs ts : map ppAlt alts))) - -ppAlt (Alt ts ps) = ppStrs ts <+> char '/' <+> hsep (map (doubleQuotes . text) ps) - -ppStrs ss = doubleQuotes (hsep (map text ss)) - -ppFCat fcat - | fcat == fcatString = text "CString" - | fcat == fcatInt = text "CInt" - | fcat == fcatFloat = text "CFloat" - | fcat == fcatVar = text "CVar" - | otherwise = char 'C' <> int fcat - -ppFunId funid = char 'F' <> int funid -ppSeqId seqid = char 'S' <> int seqid - - -filterProductions = closure - where - closure prods0 - | IntMap.size prods == IntMap.size prods0 = prods - | otherwise = closure prods - where - prods = IntMap.mapMaybe (filterProdSet prods0) prods0 - - filterProdSet prods set0 - | Set.null set = Nothing - | otherwise = Just set - where - set = Set.filter (filterRule prods) set0 - - filterRule prods (FApply funid args) = all (\fcat -> isLiteralFCat fcat || IntMap.member fcat prods) args - filterRule prods (FCoerce fcat) = isLiteralFCat fcat || IntMap.member fcat prods - filterRule prods _ = True diff --git a/src/PGF/Paraphrase.hs b/src/PGF/Paraphrase.hs deleted file mode 100644 index 58d15b2e8..000000000 --- a/src/PGF/Paraphrase.hs +++ /dev/null @@ -1,112 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : Paraphrase --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- Generate parapharases with def definitions. ------------------------------------------------------------------------------ - -module PGF.Paraphrase ( - paraphrase, - paraphraseN - ) where - -import PGF.Data -import PGF.Tree -import PGF.Macros (lookDef,isData) -import PGF.CId - -import Data.List (nub,sort,group) -import qualified Data.Map as Map - -import Debug.Trace ---- - -paraphrase :: PGF -> Expr -> [Expr] -paraphrase pgf = nub . paraphraseN 2 pgf - -paraphraseN :: Int -> PGF -> Expr -> [Expr] -paraphraseN i pgf = map tree2expr . paraphraseN' i pgf . expr2tree - -paraphraseN' :: Int -> PGF -> Tree -> [Tree] -paraphraseN' 0 _ t = [t] -paraphraseN' i pgf t = - step i t ++ [Fun g ts' | Fun g ts <- step (i-1) t, ts' <- sequence (map par ts)] - where - par = paraphraseN' (i-1) pgf - step 0 t = [t] - step i t = let stept = step (i-1) t in stept ++ concat [def u | u <- stept] - def = fromDef pgf - -fromDef :: PGF -> Tree -> [Tree] -fromDef pgf t@(Fun f ts) = defDown t ++ defUp t where - defDown t = [subst g u | let equ = equsFrom f, (u,g) <- match equ ts, trequ "U" f equ] - defUp t = [subst g u | equ <- equsTo f, (u,g) <- match [equ] ts, trequ "D" f equ] - - equsFrom f = [(ps,d) | Just equs <- [lookup f equss], (Fun _ ps,d) <- equs] - - equsTo f = [c | (_,equs) <- equss, c <- casesTo f equs] - - casesTo f equs = - [(ps,p) | (p,d@(Fun g ps)) <- equs, g==f, - isClosed d || (length equs == 1 && isLinear d)] - - equss = [(f,[(Fun f (map patt2tree ps), expr2tree d) | (Equ ps d) <- eqs]) | - (f,(_,_,eqs)) <- Map.assocs (funs (abstract pgf)), not (null eqs)] - - trequ s f e = True ----trace (s ++ ": " ++ show f ++ " " ++ show e) True - -subst :: Subst -> Tree -> Tree -subst g e = case e of - Fun f ts -> Fun f (map substg ts) - Var x -> maybe e id $ lookup x g - _ -> e - where - substg = subst g - -type Subst = [(CId,Tree)] - --- this applies to pattern, hence don't need to consider abstractions -isClosed :: Tree -> Bool -isClosed t = case t of - Fun _ ts -> all isClosed ts - Var _ -> False - _ -> True - --- this applies to pattern, hence don't need to consider abstractions -isLinear :: Tree -> Bool -isLinear = nodup . vars where - vars t = case t of - Fun _ ts -> concatMap vars ts - Var x -> [x] - _ -> [] - nodup = all ((<2) . length) . group . sort - - -match :: [([Tree],Tree)] -> [Tree] -> [(Tree, Subst)] -match cases terms = case cases of - [] -> [] - (patts,_):_ | length patts /= length terms -> [] - (patts,val):cc -> case mapM tryMatch (zip patts terms) of - Just substs -> return (val, concat substs) - _ -> match cc terms - where - tryMatch (p,t) = case (p, t) of - (Var x, _) | notMeta t -> return [(x,t)] - (Fun p pp, Fun f tt) | p == f && length pp == length tt -> do - matches <- mapM tryMatch (zip pp tt) - return (concat matches) - _ -> if p==t then return [] else Nothing - - notMeta e = case e of - Meta _ -> False - Fun f ts -> all notMeta ts - _ -> True - --- | Converts a pattern to tree. -patt2tree :: Patt -> Tree -patt2tree (PApp f ps) = Fun f (map patt2tree ps) -patt2tree (PLit l) = Lit l -patt2tree (PVar x) = Var x -patt2tree PWild = Meta 0 diff --git a/src/PGF/Parsing/FCFG/Active.hs b/src/PGF/Parsing/FCFG/Active.hs deleted file mode 100644 index e88926f6e..000000000 --- a/src/PGF/Parsing/FCFG/Active.hs +++ /dev/null @@ -1,205 +0,0 @@ ----------------------------------------------------------------------- --- | --- Maintainer : Krasimir Angelov --- Stability : (stable) --- Portability : (portable) --- --- MCFG parsing, the active algorithm ------------------------------------------------------------------------------ - -module PGF.Parsing.FCFG.Active (parse) where - -import GF.Data.Assoc -import GF.Data.SortedList -import GF.Data.Utilities -import qualified GF.Data.MultiMap as MM - -import PGF.CId -import PGF.Data -import PGF.Tree -import PGF.Parsing.FCFG.Utilities -import PGF.BuildParser - -import Control.Monad (guard) - -import qualified Data.List as List -import qualified Data.Map as Map -import qualified Data.IntMap as IntMap -import qualified Data.Set as Set -import Data.Array.IArray -import Debug.Trace - ----------------------------------------------------------------------- --- * parsing - -type FToken = String - -makeFinalEdge cat 0 0 = (cat, [EmptyRange]) -makeFinalEdge cat i j = (cat, [makeRange i j]) - --- | the list of categories = possible starting categories -parse :: String -> ParserInfo -> Type -> [FToken] -> [Expr] -parse strategy pinfo (DTyp _ start _) toks = map (tree2expr) . nubsort $ filteredForests >>= forest2trees - where - inTokens = input toks - starts = Map.findWithDefault [] start (startCats pinfo) - schart = xchart2syntaxchart chart pinfo - (i,j) = inputBounds inTokens - finalEdges = [makeFinalEdge cat i j | cat <- starts] - forests = chart2forests schart (const False) finalEdges - filteredForests = forests >>= applyProfileToForest - - pinfoex = buildParserInfo pinfo - - chart = process strategy pinfo pinfoex inTokens axioms emptyXChart - axioms | isBU strategy = literals pinfoex inTokens ++ initialBU pinfo pinfoex inTokens - | isTD strategy = literals pinfoex inTokens ++ initialTD pinfo starts inTokens - -isBU s = s=="b" -isTD s = s=="t" - --- used in prediction -emptyChildren :: FunId -> [FCat] -> SyntaxNode FunId RangeRec -emptyChildren ruleid args = SNode ruleid (replicate (length args) []) - - -process :: String -> ParserInfo -> ParserInfoEx -> Input FToken -> [Item] -> XChart FCat -> XChart FCat -process strategy pinfo pinfoex toks [] chart = chart -process strategy pinfo pinfoex toks (item:items) chart = process strategy pinfo pinfoex toks items $! univRule item chart - where - univRule item@(Active found rng lbl ppos node@(SNode ruleid recs) args cat) chart - | inRange (bounds lin) ppos = - case lin ! ppos of - FSymCat d r -> let c = args !! d - in case recs !! d of - [] -> case insertXChart chart item c of - Nothing -> chart - Just chart -> let items = do item@(Final found' _ _ _) <- lookupXChartFinal chart c - rng <- concatRange rng (found' !! r) - return (Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs)) args cat) - ++ - do guard (isTD strategy) - (ruleid,args) <- topdownRules pinfo c - return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args c) - in process strategy pinfo pinfoex toks items chart - found' -> let items = do rng <- concatRange rng (found' !! r) - return (Active found rng lbl (ppos+1) node args cat) - in process strategy pinfo pinfoex toks items chart - FSymKS [tok] - -> let items = do t_rng <- inputToken toks ? tok - rng' <- concatRange rng t_rng - return (Active found rng' lbl (ppos+1) node args cat) - in process strategy pinfo pinfoex toks items chart - | otherwise = - if inRange (bounds lins) (lbl+1) - then univRule (Active (rng:found) EmptyRange (lbl+1) 0 node args cat) chart - else univRule (Final (reverse (rng:found)) node args cat) chart - where - (FFun _ _ lins) = functions pinfo ! ruleid - lin = sequences pinfo ! (lins ! lbl) - univRule item@(Final found' node args cat) chart = - case insertXChart chart item cat of - Nothing -> chart - Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _) args c) <- lookupXChartAct chart cat - let FFun _ _ lins = functions pinfo ! ruleid - FSymCat d r = (sequences pinfo ! (lins ! l)) ! ppos - rng <- concatRange rng (found' !! r) - return (Active found rng l (ppos+1) (updateChildren node d found') args c) - ++ - do guard (isBU strategy) - (ruleid,args,c) <- leftcornerCats pinfoex ? cat - let FFun _ _ lins = functions pinfo ! ruleid - FSymCat d r = (sequences pinfo ! (lins ! 0)) ! 0 - return (Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid args) d found') args c) - - updateChildren :: SyntaxNode FunId RangeRec -> Int -> RangeRec -> SyntaxNode FunId RangeRec - updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs - in process strategy pinfo pinfoex toks items chart - ----------------------------------------------------------------------- --- * XChart - -data Item - = Active RangeRec - Range - {-# UNPACK #-} !FIndex - {-# UNPACK #-} !FPointPos - (SyntaxNode FunId RangeRec) - [FCat] - FCat - | Final RangeRec (SyntaxNode FunId RangeRec) [FCat] FCat - deriving (Eq, Ord, Show) - -data XChart c = XChart !(MM.MultiMap c Item) !(MM.MultiMap c Item) - -emptyXChart :: Ord c => XChart c -emptyXChart = XChart MM.empty MM.empty - -insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _ _) c = - case MM.insert' c item actives of - Nothing -> Nothing - Just actives -> Just (XChart actives finals) - -insertXChart (XChart actives finals) item@(Final _ _ _ _) c = - case MM.insert' c item finals of - Nothing -> Nothing - Just finals -> Just (XChart actives finals) - -lookupXChartAct (XChart actives finals) c = actives MM.! c -lookupXChartFinal (XChart actives finals) c = finals MM.! c - -xchart2syntaxchart :: XChart FCat -> ParserInfo -> SyntaxChart (CId,[Profile]) (FCat,RangeRec) -xchart2syntaxchart (XChart actives finals) pinfo = - accumAssoc groupSyntaxNodes $ - [ case node of - SNode ruleid rrecs -> let FFun fun prof _ = functions pinfo ! ruleid - in ((cat,found), SNode (fun,prof) (zip rhs rrecs)) - SString s -> ((cat,found), SString s) - SInt n -> ((cat,found), SInt n) - SFloat f -> ((cat,found), SFloat f) - | (Final found node rhs cat) <- MM.elems finals - ] - -literals :: ParserInfoEx -> Input FToken -> [Item] -literals pinfoex toks = - [let (c,node) = lexer t in (Final [rng] node [] c) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfoex)] - where - lexer t = - case reads t of - [(n,"")] -> (fcatInt, SInt (n::Integer)) - _ -> case reads t of - [(f,"")] -> (fcatFloat, SFloat (f::Double)) - _ -> (fcatString,SString t) - - ----------------------------------------------------------------------- --- Earley -- - --- called with all starting categories -initialTD :: ParserInfo -> [FCat] -> Input FToken -> [Item] -initialTD pinfo starts toks = - do cat <- starts - (ruleid,args) <- topdownRules pinfo cat - return (Active [] (Range 0 0) 0 0 (emptyChildren ruleid args) args cat) - -topdownRules pinfo cat = f cat [] - where - f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo)) - - g (FApply ruleid args) rules = (ruleid,args) : rules - g (FCoerce cat) rules = f cat rules - - ----------------------------------------------------------------------- --- Kilbury -- - -initialBU :: ParserInfo -> ParserInfoEx -> Input FToken -> [Item] -initialBU pinfo pinfoex toks = - do (tok,rngs) <- aAssocs (inputToken toks) - (ruleid,args,cat) <- leftcornerTokens pinfoex ? tok - rng <- rngs - return (Active [] rng 0 1 (emptyChildren ruleid args) args cat) - ++ - do (ruleid,args,cat) <- epsilonRules pinfoex - let FFun _ _ _ = functions pinfo ! ruleid - return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args cat) diff --git a/src/PGF/Parsing/FCFG/Incremental.hs b/src/PGF/Parsing/FCFG/Incremental.hs deleted file mode 100644 index 296a0d33b..000000000 --- a/src/PGF/Parsing/FCFG/Incremental.hs +++ /dev/null @@ -1,371 +0,0 @@ -{-# LANGUAGE BangPatterns #-} -module PGF.Parsing.FCFG.Incremental - ( ParseState - , ErrorState - , initState - , nextState - , getCompletions - , recoveryStates - , extractTrees - , parse - , parseWithRecovery - ) where - -import Data.Array.IArray -import Data.Array.Base (unsafeAt) -import Data.List (isPrefixOf, foldl') -import Data.Maybe (fromMaybe, maybe) -import qualified Data.Map as Map -import qualified GF.Data.TrieMap as TMap -import qualified Data.IntMap as IntMap -import qualified Data.Set as Set -import Control.Monad - -import GF.Data.SortedList -import PGF.CId -import PGF.Data -import PGF.Expr(Tree) -import PGF.Macros -import PGF.TypeCheck -import Debug.Trace - -parse :: PGF -> Language -> Type -> [String] -> [Tree] -parse pgf lang typ toks = loop (initState pgf lang typ) toks - where - loop ps [] = extractTrees ps typ - loop ps (t:ts) = case nextState ps t of - Left es -> [] - Right ps -> loop ps ts - -parseWithRecovery :: PGF -> Language -> Type -> [Type] -> [String] -> [Tree] -parseWithRecovery pgf lang typ open_typs toks = accept (initState pgf lang typ) toks - where - accept ps [] = extractTrees ps typ - accept ps (t:ts) = - case nextState ps t of - Right ps -> accept ps ts - Left es -> skip (recoveryStates open_typs es) ts - - skip ps_map [] = extractTrees (fst ps_map) typ - skip ps_map (t:ts) = - case Map.lookup t (snd ps_map) of - Just ps -> accept ps ts - Nothing -> skip ps_map ts - --- | Creates an initial parsing state for a given language and --- startup category. -initState :: PGF -> Language -> Type -> ParseState -initState pgf lang (DTyp _ start _) = - let items = do - cat <- fromMaybe [] (Map.lookup start (startCats pinfo)) - (funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args) - [] cat (productions pinfo) - let FFun fn _ lins = functions pinfo ! funid - (lbl,seqid) <- assocs lins - return (Active 0 0 funid seqid args (AK cat lbl)) - - pinfo = - case lookParser pgf lang of - Just pinfo -> pinfo - _ -> error ("Unknown language: " ++ showCId lang) - - in PState pgf - pinfo - (Chart emptyAC [] emptyPC (productions pinfo) (totalCats pinfo) 0) - (TMap.singleton [] (Set.fromList items)) - --- | From the current state and the next token --- 'nextState' computes a new state, where the token --- is consumed and the current position is shifted by one. --- If the new token cannot be accepted then an error state --- is returned. -nextState :: ParseState -> String -> Either ErrorState ParseState -nextState (PState pgf pinfo chart items) t = - let (mb_agenda,map_items) = TMap.decompose items - agenda = maybe [] Set.toList mb_agenda - acc = fromMaybe TMap.empty (Map.lookup t map_items) - (acc1,chart1) = process (Just t) add (sequences pinfo) (functions pinfo) agenda acc chart - chart2 = chart1{ active =emptyAC - , actives=active chart1 : actives chart1 - , passive=emptyPC - , offset =offset chart1+1 - } - in if TMap.null acc1 - then Left (EState pgf pinfo chart2) - else Right (PState pgf pinfo chart2 acc1) - where - add (tok:toks) item acc - | tok == t = TMap.insertWith Set.union toks (Set.singleton item) acc - add _ item acc = acc - --- | If the next token is not known but only its prefix (possible empty prefix) --- then the 'getCompletions' function can be used to calculate the possible --- next words and the consequent states. This is used for word completions in --- the GF interpreter. -getCompletions :: ParseState -> String -> Map.Map String ParseState -getCompletions (PState pgf pinfo chart items) w = - let (mb_agenda,map_items) = TMap.decompose items - agenda = maybe [] Set.toList mb_agenda - acc = Map.filterWithKey (\tok _ -> isPrefixOf w tok) map_items - (acc',chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda acc chart - chart2 = chart1{ active =emptyAC - , actives=active chart1 : actives chart1 - , passive=emptyPC - , offset =offset chart1+1 - } - in fmap (PState pgf pinfo chart2) acc' - where - add (tok:toks) item acc - | isPrefixOf w tok = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc - add _ item acc = acc - -recoveryStates :: [Type] -> ErrorState -> (ParseState, Map.Map String ParseState) -recoveryStates open_types (EState pgf pinfo chart) = - let open_fcats = concatMap type2fcats open_types - agenda = foldl (complete open_fcats) [] (actives chart) - (acc,chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda Map.empty chart - chart2 = chart1{ active =emptyAC - , actives=active chart1 : actives chart1 - , passive=emptyPC - , offset =offset chart1+1 - } - in (PState pgf pinfo chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf pinfo chart2) acc) - where - type2fcats (DTyp _ cat _) = fromMaybe [] (Map.lookup cat (startCats pinfo)) - - complete open_fcats items ac = - foldl (Set.fold (\(Active j' ppos funid seqid args keyc) -> - (:) (Active j' (ppos+1) funid seqid args keyc))) - items - [set | fcat <- open_fcats, set <- lookupACByFCat fcat ac] - - add (tok:toks) item acc = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc - --- | This function extracts the list of all completed parse trees --- that spans the whole input consumed so far. The trees are also --- limited by the category specified, which is usually --- the same as the startup category. -extractTrees :: ParseState -> Type -> [Tree] -extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) = - nubsort [e1 | e <- exps, Right e1 <- [checkExpr pgf e ty]] - where - (mb_agenda,acc) = TMap.decompose items - agenda = maybe [] Set.toList mb_agenda - (_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) agenda () chart - - exps = do - cat <- fromMaybe [] (Map.lookup start (startCats pinfo)) - (funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args) - [] cat (productions pinfo) - let FFun fn _ lins = functions pinfo ! funid - lbl <- indices lins - Just fid <- [lookupPC (PK cat lbl 0) (passive st)] - (fvs,tree) <- go Set.empty 0 (0,fid) - guard (Set.null fvs) - return tree - - go rec fcat' (d,fcat) - | fcat < totalCats pinfo = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments - | Set.member fcat rec = mzero - | otherwise = foldForest (\funid args trees -> - do let FFun fn _ lins = functions pinfo ! funid - args <- mapM (go (Set.insert fcat rec) fcat) (zip [0..] args) - check_ho_fun fn args - `mplus` - trees) - (\const _ trees -> - return (freeVar const,const) - `mplus` - trees) - [] fcat (forest st) - - check_ho_fun fun args - | fun == _V = return (head args) - | fun == _B = return (foldl1 Set.difference (map fst args), foldr (\x e -> EAbs Explicit (mkVar (snd x)) e) (snd (head args)) (tail args)) - | otherwise = return (Set.unions (map fst args),foldl (\e x -> EApp e (snd x)) (EFun fun) args) - - mkVar (EFun v) = v - mkVar (EMeta _) = wildCId - - freeVar (EFun v) = Set.singleton v - freeVar _ = Set.empty - -_B = mkCId "_B" -_V = mkCId "_V" - -process mbt fn !seqs !funs [] acc chart = (acc,chart) -process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc chart - | inRange (bounds lin) ppos = - case unsafeAt lin ppos of - FSymCat d r -> let !fid = args !! d - key = AK fid r - - items2 = case lookupPC (mkPK key k) (passive chart) of - Nothing -> items - Just id -> (Active j (ppos+1) funid seqid (updateAt d id args) key0) : items - items3 = foldForest (\funid args items -> Active k 0 funid (rhs funid r) args key : items) - (\_ _ items -> items) - items2 fid (forest chart) - in case lookupAC key (active chart) of - Nothing -> process mbt fn seqs funs items3 acc chart{active=insertAC key (Set.singleton item) (active chart)} - Just set | Set.member item set -> process mbt fn seqs funs items acc chart - | otherwise -> process mbt fn seqs funs items2 acc chart{active=insertAC key (Set.insert item set) (active chart)} - FSymKS toks -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc - in process mbt fn seqs funs items acc' chart - FSymKP strs vars - -> let !acc' = foldl (\acc toks -> fn toks (Active j (ppos+1) funid seqid args key0) acc) acc - (strs:[strs' | Alt strs' _ <- vars]) - in process mbt fn seqs funs items acc' chart - FSymLit d r -> let !fid = args !! d - in case [ts | FConst _ ts <- maybe [] Set.toList (IntMap.lookup fid (forest chart))] of - (toks:_) -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc - in process mbt fn seqs funs items acc' chart - [] -> case litCatMatch fid mbt of - Just (toks,lit) -> let fid' = nextId chart - !acc' = fn toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc - in process mbt fn seqs funs items acc' chart{forest=IntMap.insert fid' (Set.singleton (FConst lit toks)) (forest chart) - ,nextId=nextId chart+1 - } - Nothing -> process mbt fn seqs funs items acc chart - | otherwise = - case lookupPC (mkPK key0 j) (passive chart) of - Nothing -> let fid = nextId chart - - items2 = case lookupAC key0 ((active chart:actives chart) !! (k-j)) of - Nothing -> items - Just set -> Set.fold (\(Active j' ppos funid seqid args keyc) -> - let FSymCat d _ = unsafeAt (unsafeAt seqs seqid) ppos - in (:) (Active j' (ppos+1) funid seqid (updateAt d fid args) keyc)) items set - in process mbt fn seqs funs items2 acc chart{passive=insertPC (mkPK key0 j) fid (passive chart) - ,forest =IntMap.insert fid (Set.singleton (FApply funid args)) (forest chart) - ,nextId =nextId chart+1 - } - Just id -> let items2 = [Active k 0 funid (rhs funid r) args (AK id r) | r <- labelsAC id (active chart)] ++ items - in process mbt fn seqs funs items2 acc chart{forest = IntMap.insertWith Set.union id (Set.singleton (FApply funid args)) (forest chart)} - where - !lin = unsafeAt seqs seqid - !k = offset chart - - mkPK (AK fid lbl) j = PK fid lbl j - - rhs funid lbl = unsafeAt lins lbl - where - FFun _ _ lins = unsafeAt funs funid - - -updateAt :: Int -> a -> [a] -> [a] -updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs] - -litCatMatch fcat (Just t) - | fcat == fcatString = Just ([t],ELit (LStr t)) - | fcat == fcatInt = case reads t of {[(n,"")] -> Just ([t],ELit (LInt n)); - _ -> Nothing } - | fcat == fcatFloat = case reads t of {[(d,"")] -> Just ([t],ELit (LFlt d)); - _ -> Nothing } - | fcat == fcatVar = Just ([t],EFun (mkCId t)) -litCatMatch _ _ = Nothing - - ----------------------------------------------------------------- --- Active Chart ----------------------------------------------------------------- - -data Active - = Active {-# UNPACK #-} !Int - {-# UNPACK #-} !FPointPos - {-# UNPACK #-} !FunId - {-# UNPACK #-} !SeqId - [FCat] - {-# UNPACK #-} !ActiveKey - deriving (Eq,Show,Ord) -data ActiveKey - = AK {-# UNPACK #-} !FCat - {-# UNPACK #-} !FIndex - deriving (Eq,Ord,Show) -type ActiveChart = IntMap.IntMap (IntMap.IntMap (Set.Set Active)) - -emptyAC :: ActiveChart -emptyAC = IntMap.empty - -lookupAC :: ActiveKey -> ActiveChart -> Maybe (Set.Set Active) -lookupAC (AK fcat l) chart = IntMap.lookup fcat chart >>= IntMap.lookup l - -lookupACByFCat :: FCat -> ActiveChart -> [Set.Set Active] -lookupACByFCat fcat chart = - case IntMap.lookup fcat chart of - Nothing -> [] - Just map -> IntMap.elems map - -labelsAC :: FCat -> ActiveChart -> [FIndex] -labelsAC fcat chart = - case IntMap.lookup fcat chart of - Nothing -> [] - Just map -> IntMap.keys map - -insertAC :: ActiveKey -> Set.Set Active -> ActiveChart -> ActiveChart -insertAC (AK fcat l) set chart = IntMap.insertWith IntMap.union fcat (IntMap.singleton l set) chart - - ----------------------------------------------------------------- --- Passive Chart ----------------------------------------------------------------- - -data PassiveKey - = PK {-# UNPACK #-} !FCat - {-# UNPACK #-} !FIndex - {-# UNPACK #-} !Int - deriving (Eq,Ord,Show) - -type PassiveChart = Map.Map PassiveKey FCat - -emptyPC :: PassiveChart -emptyPC = Map.empty - -lookupPC :: PassiveKey -> PassiveChart -> Maybe FCat -lookupPC key chart = Map.lookup key chart - -insertPC :: PassiveKey -> FCat -> PassiveChart -> PassiveChart -insertPC key fcat chart = Map.insert key fcat chart - - ----------------------------------------------------------------- --- Forest ----------------------------------------------------------------- - -foldForest :: (FunId -> [FCat] -> b -> b) -> (Expr -> [String] -> b -> b) -> b -> FCat -> IntMap.IntMap (Set.Set Production) -> b -foldForest f g b fcat forest = - case IntMap.lookup fcat forest of - Nothing -> b - Just set -> Set.fold foldProd b set - where - foldProd (FCoerce fcat) b = foldForest f g b fcat forest - foldProd (FApply funid args) b = f funid args b - foldProd (FConst const toks) b = g const toks b - - ----------------------------------------------------------------- --- Parse State ----------------------------------------------------------------- - --- | An abstract data type whose values represent --- the current state in an incremental parser. -data ParseState = PState PGF ParserInfo Chart (TMap.TrieMap String (Set.Set Active)) - -data Chart - = Chart - { active :: ActiveChart - , actives :: [ActiveChart] - , passive :: PassiveChart - , forest :: IntMap.IntMap (Set.Set Production) - , nextId :: {-# UNPACK #-} !FCat - , offset :: {-# UNPACK #-} !Int - } - deriving Show - ----------------------------------------------------------------- --- Error State ----------------------------------------------------------------- - --- | An abstract data type whose values represent --- the state in an incremental parser after an error. -data ErrorState = EState PGF ParserInfo Chart diff --git a/src/PGF/Parsing/FCFG/Utilities.hs b/src/PGF/Parsing/FCFG/Utilities.hs deleted file mode 100644 index dc0b2dc4a..000000000 --- a/src/PGF/Parsing/FCFG/Utilities.hs +++ /dev/null @@ -1,188 +0,0 @@ ----------------------------------------------------------------------- --- | --- Maintainer : PL --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/05/13 12:40:19 $ --- > CVS $Author: peb $ --- > CVS $Revision: 1.6 $ --- --- Basic type declarations and functions for grammar formalisms ------------------------------------------------------------------------------ - - -module PGF.Parsing.FCFG.Utilities where - -import Control.Monad -import Data.Array -import Data.List (groupBy) - -import PGF.CId -import PGF.Data -import PGF.Tree -import GF.Data.Assoc -import GF.Data.Utilities (sameLength, foldMerge, splitBy) - - ------------------------------------------------------------- --- ranges as single pairs - -type RangeRec = [Range] - -data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int - | EmptyRange - deriving (Eq, Ord, Show) - -makeRange :: Int -> Int -> Range -makeRange = Range - -concatRange :: Range -> Range -> [Range] -concatRange EmptyRange rng = return rng -concatRange rng EmptyRange = return rng -concatRange (Range i j) (Range j' k) = [Range i k | j==j'] - -minRange :: Range -> Int -minRange (Range i j) = i - -maxRange :: Range -> Int -maxRange (Range i j) = j - - ------------------------------------------------------------- --- * representaions of input tokens - -data Input t = MkInput { inputBounds :: (Int, Int), - inputToken :: Assoc t [Range] - } - -input :: Ord t => [t] -> Input t -input toks = MkInput inBounds inToken - where - inBounds = (0, length toks) - inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,tok) <- zip3 [0..] [1..] toks ] - -inputMany :: Ord t => [[t]] -> Input t -inputMany toks = MkInput inBounds inToken - where - inBounds = (0, length toks) - inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,ts) <- zip3 [0..] [1..] toks, tok <- ts ] - - ------------------------------------------------------------- --- * representations of syntactical analyses - --- ** charts as finite maps over edges - --- | The values of the chart, a list of key-daughters pairs, --- has unique keys. In essence, it is a map from 'n' to daughters. --- The daughters should be a set (not necessarily sorted) of rhs's. -type SyntaxChart n e = Assoc e [SyntaxNode n [e]] - -data SyntaxNode n e = SMeta - | SNode n [e] - | SString String - | SInt Integer - | SFloat Double - deriving (Eq,Ord,Show) - -groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]] -groupSyntaxNodes [] = [] -groupSyntaxNodes (SNode n0 es0:xs) = (SNode n0 (es0:ess)) : groupSyntaxNodes xs' - where - (ess,xs') = span xs - - span [] = ([],[]) - span xs@(SNode n es:xs') - | n0 == n = let (ess,xs) = span xs' in (es:ess,xs) - | otherwise = ([],xs) -groupSyntaxNodes (SString s:xs) = (SString s) : groupSyntaxNodes xs -groupSyntaxNodes (SInt n:xs) = (SInt n) : groupSyntaxNodes xs -groupSyntaxNodes (SFloat f:xs) = (SFloat f) : groupSyntaxNodes xs - --- ** syntax forests - -data SyntaxForest n = FMeta - | FNode n [[SyntaxForest n]] - -- ^ The outer list should be a set (not necessarily sorted) - -- of possible alternatives. Ie. the outer list - -- is a disjunctive node, and the inner lists - -- are (conjunctive) concatenative nodes - | FString String - | FInt Integer - | FFloat Double - deriving (Eq, Ord, Show) - -instance Functor SyntaxForest where - fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests - fmap _ (FString s) = FString s - fmap _ (FInt n) = FInt n - fmap _ (FFloat f) = FFloat f - fmap _ (FMeta) = FMeta - -forestName :: SyntaxForest n -> Maybe n -forestName (FNode n _) = Just n -forestName _ = Nothing - -unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n) -unifyManyForests = foldM unifyForests FMeta - --- | two forests can be unified, if either is 'FMeta', or both have the same parent, --- and all children can be unified -unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n) -unifyForests FMeta forest = return forest -unifyForests forest FMeta = return forest -unifyForests (FNode name1 children1) (FNode name2 children2) - | name1 == name2 && not (null children) = return $ FNode name1 children - where children = [ forests | forests1 <- children1, forests2 <- children2, - sameLength forests1 forests2, - forests <- zipWithM unifyForests forests1 forests2 ] -unifyForests (FString s1) (FString s2) - | s1 == s2 = return $ FString s1 -unifyForests (FInt n1) (FInt n2) - | n1 == n2 = return $ FInt n1 -unifyForests (FFloat f1) (FFloat f2) - | f1 == f2 = return $ FFloat f1 -unifyForests _ _ = fail "forest unification failure" - - --- ** conversions between representations - -chart2forests :: (Ord n, Ord e) => - SyntaxChart n e -- ^ The complete chart - -> (e -> Bool) -- ^ When is an edge 'FMeta'? - -> [e] -- ^ The starting edges - -> [SyntaxForest n] -- ^ The result has unique keys, ie. all 'n' are joined together. - -- In essence, the result is a map from 'n' to forest daughters -chart2forests chart isMeta = concatMap (edge2forests []) - where edge2forests edges edge - | isMeta edge = [FMeta] - | edge `elem` edges = [] - | otherwise = map (item2forest (edge:edges)) $ chart ? edge - item2forest edges (SMeta) = FMeta - item2forest edges (SNode name children) = - FNode name $ children >>= mapM (edge2forests edges) - item2forest edges (SString s) = FString s - item2forest edges (SInt n) = FInt n - item2forest edges (SFloat f) = FFloat f - - -applyProfileToForest :: SyntaxForest (CId,[Profile]) -> [SyntaxForest CId] -applyProfileToForest (FNode (fun,profiles) children) - | fun == wildCId = concat chForests - | otherwise = [ FNode fun chForests | not (null chForests) ] - where chForests = concat [ mapM (unifyManyForests . map (forests !!)) profiles | - forests0 <- children, - forests <- mapM applyProfileToForest forests0 ] -applyProfileToForest (FString s) = [FString s] -applyProfileToForest (FInt n) = [FInt n] -applyProfileToForest (FFloat f) = [FFloat f] -applyProfileToForest (FMeta) = [FMeta] - - -forest2trees :: SyntaxForest CId -> [Tree] -forest2trees (FNode n forests) = map (Fun n) $ forests >>= mapM forest2trees -forest2trees (FString s) = [Lit (LStr s)] -forest2trees (FInt n) = [Lit (LInt n)] -forest2trees (FFloat f) = [Lit (LFlt f)] -forest2trees (FMeta) = [Meta 0] diff --git a/src/PGF/ShowLinearize.hs b/src/PGF/ShowLinearize.hs deleted file mode 100644 index dd3b997a6..000000000 --- a/src/PGF/ShowLinearize.hs +++ /dev/null @@ -1,113 +0,0 @@ -module PGF.ShowLinearize ( - collectWords, - tableLinearize, - recordLinearize, - termLinearize, - tabularLinearize, - allLinearize, - markLinearize - ) where - -import PGF.CId -import PGF.Data -import PGF.Tree -import PGF.Macros -import PGF.Linearize - -import GF.Data.Operations -import Data.List -import qualified Data.Map as Map - --- printing linearizations in different ways with source parameters - --- internal representation, only used internally in this module -data Record = - RR [(String,Record)] - | RT [(String,Record)] - | RFV [Record] - | RS String - | RCon String - -prRecord :: Record -> String -prRecord = prr where - prr t = case t of - RR fs -> concat $ - "{" : - (intersperse ";" (map (\ (l,v) -> unwords [l,"=", prr v]) fs)) ++ ["}"] - RT fs -> concat $ - "table {" : - (intersperse ";" (map (\ (l,v) -> unwords [l,"=>",prr v]) fs)) ++ ["}"] - RFV ts -> concat $ - "variants {" : (intersperse ";" (map prr ts)) ++ ["}"] - RS s -> prQuotedString s - RCon s -> s - --- uses the encoding of record types in PGF.paramlincat -mkRecord :: Term -> Term -> Record -mkRecord typ trm = case (typ,trm) of - (_, FV ts) -> RFV $ map (mkRecord typ) ts - (R rs, R ts) -> RR [(str lab, mkRecord ty t) | (P lab ty, t) <- zip rs ts] - (S [FV ps,ty],R ts) -> RT [(str par, mkRecord ty t) | (par, t) <- zip ps ts] - (_,W s (R ts)) -> mkRecord typ (R [K (KS (s ++ u)) | K (KS u) <- ts]) - (FV ps, C i) -> RCon $ str $ ps !! i - (S [], _) -> case realizes trm of - [s] -> RS s - ss -> RFV $ map RS ss - _ -> RS $ show trm ---- printTree trm - where - str = realize - --- show all branches, without labels and params -allLinearize :: (String -> String) -> PGF -> CId -> Expr -> String -allLinearize unlex pgf lang = concat . map (unlex . pr) . tabularLinearize pgf lang where - pr (p,vs) = unlines vs - --- show all branches, with labels and params -tableLinearize :: (String -> String) -> PGF -> CId -> Expr -> String -tableLinearize unlex pgf lang = unlines . map pr . tabularLinearize pgf lang where - pr (p,vs) = p +++ ":" +++ unwords (intersperse "|" (map unlex vs)) - --- create a table from labels+params to variants -tabularLinearize :: PGF -> CId -> Expr -> [(String,[String])] -tabularLinearize pgf lang = branches . recLinearize pgf lang where - branches r = case r of - RR fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t] - RT fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t] - RFV rs -> concatMap branches rs - RS s -> [([], [s])] - RCon _ -> [] - --- show record in GF-source-like syntax -recordLinearize :: PGF -> CId -> Expr -> String -recordLinearize pgf lang = prRecord . recLinearize pgf lang - --- create a GF-like record, forming the basis of all functions above -recLinearize :: PGF -> CId -> Expr -> Record -recLinearize pgf lang tree = mkRecord typ $ linTree pgf lang tree where - typ = case expr2tree tree of - Fun f _ -> lookParamLincat pgf lang $ valCat $ lookType pgf f - --- show PGF term -termLinearize :: PGF -> CId -> Expr -> String -termLinearize pgf lang = show . linTree pgf lang - --- show bracketed markup with references to tree structure -markLinearize :: PGF -> CId -> Expr -> String -markLinearize pgf lang = concat . take 1 . linearizesMark pgf lang - - --- for Morphology: word, lemma, tags -collectWords :: PGF -> Language -> [(String, [(CId,String)])] -collectWords pgf lang = - concatMap collOne - [(f,c,0) | (f,(DTyp [] c _,_,_)) <- Map.toList $ funs $ abstract pgf] - where - collOne (f,c,i) = - fromRec f [showCId c] (recLinearize pgf lang (foldl EApp (EFun f) (replicate i (EMeta 888)))) - fromRec f v r = case r of - RR rs -> concat [fromRec f v t | (_,t) <- rs] - RT rs -> concat [fromRec f (p:v) t | (p,t) <- rs] - RFV rs -> concatMap (fromRec f v) rs - RS s -> [(s,[(f,unwords (reverse v))])] - RCon c -> [] ---- inherent - diff --git a/src/PGF/Tree.hs b/src/PGF/Tree.hs deleted file mode 100644 index cb2052cd7..000000000 --- a/src/PGF/Tree.hs +++ /dev/null @@ -1,71 +0,0 @@ -module PGF.Tree - ( Tree(..), - tree2expr, expr2tree, - prTree - ) where - -import PGF.CId -import PGF.Expr hiding (Tree) - -import Data.Char -import Data.List as List -import Control.Monad -import qualified Text.PrettyPrint as PP -import qualified Text.ParserCombinators.ReadP as RP - --- | The tree is an evaluated expression in the abstract syntax --- of the grammar. The type is especially restricted to not --- allow unapplied lambda abstractions. The tree is used directly --- from the linearizer and is produced directly from the parser. -data Tree = - Abs [(BindType,CId)] Tree -- ^ lambda abstraction. The list of variables is non-empty - | Var CId -- ^ variable - | Fun CId [Tree] -- ^ function application - | Lit Literal -- ^ literal - | Meta {-# UNPACK #-} !MetaId -- ^ meta variable - deriving (Eq, Ord) - ------------------------------------------------------ --- Conversion Expr <-> Tree ------------------------------------------------------ - --- | Converts a tree to expression. The conversion --- is always total, every tree is a valid expression. -tree2expr :: Tree -> Expr -tree2expr = tree2expr [] - where - tree2expr ys (Fun x ts) = foldl EApp (EFun x) (List.map (tree2expr ys) ts) - tree2expr ys (Lit l) = ELit l - tree2expr ys (Meta n) = EMeta n - tree2expr ys (Abs xs t) = foldr (\(b,x) e -> EAbs b x e) (tree2expr (List.map snd (reverse xs)++ys) t) xs - tree2expr ys (Var x) = case List.lookup x (zip ys [0..]) of - Just i -> EVar i - Nothing -> error "unknown variable" - --- | Converts an expression to tree. The conversion is only partial. --- Variables and meta variables of function type and beta redexes are not allowed. -expr2tree :: Expr -> Tree -expr2tree e = abs [] [] e - where - abs ys xs (EAbs b x e) = abs ys ((b,x):xs) e - abs ys xs (ETyped e _) = abs ys xs e - abs ys xs e = case xs of - [] -> app ys [] e - xs -> Abs (reverse xs) (app (map snd xs++ys) [] e) - - app xs as (EApp e1 e2) = app xs ((abs xs [] e2) : as) e1 - app xs as (ELit l) - | List.null as = Lit l - | otherwise = error "literal of function type encountered" - app xs as (EMeta n) - | List.null as = Meta n - | otherwise = error "meta variables of function type are not allowed in trees" - app xs as (EAbs _ x e) = error "beta redexes are not allowed in trees" - app xs as (EVar i) = Var (xs !! i) - app xs as (EFun f) = Fun f as - app xs as (ETyped e _) = app xs as e - - -prTree :: Tree -> String -prTree = showExpr [] . tree2expr - diff --git a/src/PGF/Type.hs b/src/PGF/Type.hs deleted file mode 100644 index 013754a45..000000000 --- a/src/PGF/Type.hs +++ /dev/null @@ -1,103 +0,0 @@ -module PGF.Type ( Type(..), Hypo, - readType, showType, - mkType, mkHypo, mkDepHypo, mkImplHypo, - pType, ppType, ppHypo ) where - -import PGF.CId -import {-# SOURCE #-} PGF.Expr -import Data.Char -import Data.List -import qualified Text.PrettyPrint as PP -import qualified Text.ParserCombinators.ReadP as RP -import Control.Monad - --- | To read a type from a 'String', use 'readType'. -data Type = - DTyp [Hypo] CId [Expr] - deriving (Eq,Ord,Show) - --- | 'Hypo' represents a hypothesis in a type i.e. in the type A -> B, A is the hypothesis -type Hypo = (BindType,CId,Type) - --- | Reads a 'Type' from a 'String'. -readType :: String -> Maybe Type -readType s = case [x | (x,cs) <- RP.readP_to_S pType s, all isSpace cs] of - [x] -> Just x - _ -> Nothing - --- | renders type as 'String'. The list --- of identifiers is the list of all free variables --- in the expression in order reverse to the order --- of binding. -showType :: [CId] -> Type -> String -showType vars = PP.render . ppType 0 vars - --- | creates a type from list of hypothesises, category and --- list of arguments for the category. The operation --- @mkType [h_1,...,h_n] C [e_1,...,e_m]@ will create --- @h_1 -> ... -> h_n -> C e_1 ... e_m@ -mkType :: [Hypo] -> CId -> [Expr] -> Type -mkType hyps cat args = DTyp hyps cat args - --- | creates hypothesis for non-dependent type i.e. A -mkHypo :: Type -> Hypo -mkHypo ty = (Explicit,wildCId,ty) - --- | creates hypothesis for dependent type i.e. (x : A) -mkDepHypo :: CId -> Type -> Hypo -mkDepHypo x ty = (Explicit,x,ty) - --- | creates hypothesis for dependent type with implicit argument i.e. ({x} : A) -mkImplHypo :: CId -> Type -> Hypo -mkImplHypo x ty = (Implicit,x,ty) - -pType :: RP.ReadP Type -pType = do - RP.skipSpaces - hyps <- RP.sepBy (pHypo >>= \h -> RP.skipSpaces >> RP.string "->" >> return h) RP.skipSpaces - RP.skipSpaces - (cat,args) <- pAtom - return (DTyp (concat hyps) cat args) - where - pHypo = - do (cat,args) <- pAtom - return [(Explicit,wildCId,DTyp [] cat args)] - RP.<++ - (RP.between (RP.char '(') (RP.char ')') $ do - xs <- RP.option [(Explicit,wildCId)] $ do - xs <- pBinds - RP.skipSpaces - RP.char ':' - return xs - ty <- pType - return [(b,v,ty) | (b,v) <- xs]) - RP.<++ - (RP.between (RP.char '{') (RP.char '}') $ do - vs <- RP.sepBy1 (RP.skipSpaces >> pCId) (RP.skipSpaces >> RP.char ',') - RP.skipSpaces - RP.char ':' - ty <- pType - return [(Implicit,v,ty) | v <- vs]) - - pAtom = do - cat <- pCId - RP.skipSpaces - args <- RP.sepBy pArg RP.skipSpaces - return (cat, args) - -ppType :: Int -> [CId] -> Type -> PP.Doc -ppType d scope (DTyp hyps cat args) - | null hyps = ppRes scope cat args - | otherwise = let (scope',hdocs) = mapAccumL ppHypo scope hyps - in ppParens (d > 0) (foldr (\hdoc doc -> hdoc PP.<+> PP.text "->" PP.<+> doc) (ppRes scope' cat args) hdocs) - where - ppRes scope cat es = ppCId cat PP.<+> PP.hsep (map (ppExpr 4 scope) es) - -ppHypo scope (Explicit,x,typ) = if x == wildCId - then (scope,ppType 1 scope typ) - else let y = freshName x scope - in (y:scope,PP.parens (ppCId y PP.<+> PP.char ':' PP.<+> ppType 0 scope typ)) -ppHypo scope (Implicit,x,typ) = if x == wildCId - then (scope,PP.parens (PP.braces (ppCId x) PP.<+> PP.char ':' PP.<+> ppType 0 scope typ)) - else let y = freshName x scope - in (y:scope,PP.parens (PP.braces (ppCId y) PP.<+> PP.char ':' PP.<+> ppType 0 scope typ)) diff --git a/src/PGF/TypeCheck.hs b/src/PGF/TypeCheck.hs deleted file mode 100644 index 937c21786..000000000 --- a/src/PGF/TypeCheck.hs +++ /dev/null @@ -1,524 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : PGF.TypeCheck --- Maintainer : Krasimir Angelov --- Stability : (stable) --- Portability : (portable) --- --- Type checking in abstract syntax with dependent types. --- The type checker also performs renaming and checking for unknown --- functions. The variable references are replaced by de Bruijn indices. --- ------------------------------------------------------------------------------ - -module PGF.TypeCheck (checkType, checkExpr, inferExpr, - - ppTcError, TcError(..) - ) where - -import PGF.Data -import PGF.Expr -import PGF.Macros (typeOfHypo) -import PGF.CId - -import Data.Map as Map -import Data.IntMap as IntMap -import Data.Maybe as Maybe -import Data.List as List -import Control.Monad -import Text.PrettyPrint - ------------------------------------------------------ --- The Scope ------------------------------------------------------ - -data TType = TTyp Env Type -newtype Scope = Scope [(CId,TType)] - -emptyScope = Scope [] - -addScopedVar :: CId -> TType -> Scope -> Scope -addScopedVar x tty (Scope gamma) = Scope ((x,tty):gamma) - --- | returns the type and the De Bruijn index of a local variable -lookupVar :: CId -> Scope -> Maybe (Int,TType) -lookupVar x (Scope gamma) = listToMaybe [(i,tty) | ((y,tty),i) <- zip gamma [0..], x == y] - --- | returns the type and the name of a local variable -getVar :: Int -> Scope -> (CId,TType) -getVar i (Scope gamma) = gamma !! i - -scopeEnv :: Scope -> Env -scopeEnv (Scope gamma) = let n = length gamma - in [VGen (n-i-1) [] | i <- [0..n-1]] - -scopeVars :: Scope -> [CId] -scopeVars (Scope gamma) = List.map fst gamma - -scopeSize :: Scope -> Int -scopeSize (Scope gamma) = length gamma - ------------------------------------------------------ --- The Monad ------------------------------------------------------ - -type MetaStore = IntMap MetaValue -data MetaValue - = MUnbound Scope [Expr -> TcM ()] - | MBound Expr - | MGuarded Expr [Expr -> TcM ()] {-# UNPACK #-} !Int -- the Int is the number of constraints that have to be solved - -- to unlock this meta variable - -newtype TcM a = TcM {unTcM :: Abstr -> MetaId -> MetaStore -> TcResult a} -data TcResult a - = Ok {-# UNPACK #-} !MetaId MetaStore a - | Fail TcError - -instance Monad TcM where - return x = TcM (\abstr metaid ms -> Ok metaid ms x) - f >>= g = TcM (\abstr metaid ms -> case unTcM f abstr metaid ms of - Ok metaid ms x -> unTcM (g x) abstr metaid ms - Fail e -> Fail e) - -instance Functor TcM where - fmap f x = TcM (\abstr metaid ms -> case unTcM x abstr metaid ms of - Ok metaid ms x -> Ok metaid ms (f x) - Fail e -> Fail e) - -lookupCatHyps :: CId -> TcM [Hypo] -lookupCatHyps cat = TcM (\abstr metaid ms -> case Map.lookup cat (cats abstr) of - Just hyps -> Ok metaid ms hyps - Nothing -> Fail (UnknownCat cat)) - -lookupFunType :: CId -> TcM TType -lookupFunType fun = TcM (\abstr metaid ms -> case Map.lookup fun (funs abstr) of - Just (ty,_,_) -> Ok metaid ms (TTyp [] ty) - Nothing -> Fail (UnknownFun fun)) - -newMeta :: Scope -> TcM MetaId -newMeta scope = TcM (\abstr metaid ms -> Ok (metaid+1) (IntMap.insert metaid (MUnbound scope []) ms) metaid) - -newGuardedMeta :: Scope -> Expr -> TcM MetaId -newGuardedMeta scope e = getFuns >>= \funs -> TcM (\abstr metaid ms -> Ok (metaid+1) (IntMap.insert metaid (MGuarded e [] 0) ms) metaid) - -getMeta :: MetaId -> TcM MetaValue -getMeta i = TcM (\abstr metaid ms -> Ok metaid ms $! case IntMap.lookup i ms of - Just mv -> mv) -setMeta :: MetaId -> MetaValue -> TcM () -setMeta i mv = TcM (\abstr metaid ms -> Ok metaid (IntMap.insert i mv ms) ()) - -tcError :: TcError -> TcM a -tcError e = TcM (\abstr metaid ms -> Fail e) - -getFuns :: TcM Funs -getFuns = TcM (\abstr metaid ms -> Ok metaid ms (funs abstr)) - -addConstraint :: MetaId -> MetaId -> Env -> [Value] -> (Value -> TcM ()) -> TcM () -addConstraint i j env vs c = do - funs <- getFuns - mv <- getMeta j - case mv of - MUnbound scope cs -> addRef >> setMeta j (MUnbound scope ((\e -> release >> c (apply funs env e vs)) : cs)) - MBound e -> c (apply funs env e vs) - MGuarded e cs x | x == 0 -> c (apply funs env e vs) - | otherwise -> addRef >> setMeta j (MGuarded e ((\e -> release >> c (apply funs env e vs)) : cs) x) - where - addRef = TcM (\abstr metaid ms -> case IntMap.lookup i ms of - Just (MGuarded e cs x) -> Ok metaid (IntMap.insert i (MGuarded e cs (x+1)) ms) ()) - - release = TcM (\abstr metaid ms -> case IntMap.lookup i ms of - Just (MGuarded e cs x) -> if x == 1 - then unTcM (sequence_ [c e | c <- cs]) abstr metaid (IntMap.insert i (MGuarded e [] 0) ms) - else Ok metaid (IntMap.insert i (MGuarded e cs (x-1)) ms) ()) - ------------------------------------------------------ --- Type errors ------------------------------------------------------ - --- | If an error occurs in the typechecking phase --- the type checker returns not a plain text error message --- but a 'TcError' structure which describes the error. -data TcError - = UnknownCat CId -- ^ Unknown category name was found. - | UnknownFun CId -- ^ Unknown function name was found. - | WrongCatArgs [CId] Type CId Int Int -- ^ A category was applied to wrong number of arguments. - -- The first integer is the number of expected arguments and - -- the second the number of given arguments. - -- The @[CId]@ argument is the list of free variables - -- in the type. It should be used for the 'showType' function. - | TypeMismatch [CId] Expr Type Type -- ^ The expression is not of the expected type. - -- The first type is the expected type, while - -- the second is the inferred. The @[CId]@ argument is the list - -- of free variables in both the expression and the type. - -- It should be used for the 'showType' and 'showExpr' functions. - | NotFunType [CId] Expr Type -- ^ Something that is not of function type was applied to an argument. - | CannotInferType [CId] Expr -- ^ It is not possible to infer the type of an expression. - | UnresolvedMetaVars [CId] Expr [MetaId] -- ^ Some metavariables have to be instantiated in order to complete the typechecking. - | UnexpectedImplArg [CId] Expr -- ^ Implicit argument was passed where the type doesn't allow it - --- | Renders the type checking error to a document. See 'Text.PrettyPrint'. -ppTcError :: TcError -> Doc -ppTcError (UnknownCat cat) = text "Category" <+> ppCId cat <+> text "is not in scope" -ppTcError (UnknownFun fun) = text "Function" <+> ppCId fun <+> text "is not in scope" -ppTcError (WrongCatArgs xs ty cat m n) = text "Category" <+> ppCId cat <+> text "should have" <+> int m <+> text "argument(s), but has been given" <+> int n $$ - text "In the type:" <+> ppType 0 xs ty -ppTcError (TypeMismatch xs e ty1 ty2) = text "Couldn't match expected type" <+> ppType 0 xs ty1 $$ - text " against inferred type" <+> ppType 0 xs ty2 $$ - text "In the expression:" <+> ppExpr 0 xs e -ppTcError (NotFunType xs e ty) = text "A function type is expected for the expression" <+> ppExpr 0 xs e <+> text "instead of type" <+> ppType 0 xs ty -ppTcError (CannotInferType xs e) = text "Cannot infer the type of expression" <+> ppExpr 0 xs e -ppTcError (UnresolvedMetaVars xs e ms) = text "Meta variable(s)" <+> fsep (List.map ppMeta ms) <+> text "should be resolved" $$ - text "in the expression:" <+> ppExpr 0 xs e -ppTcError (UnexpectedImplArg xs e) = braces (ppExpr 0 xs e) <+> text "is implicit argument but not implicit argument is expected here" - ------------------------------------------------------ --- checkType ------------------------------------------------------ - --- | Check whether a given type is consistent with the abstract --- syntax of the grammar. -checkType :: PGF -> Type -> Either TcError Type -checkType pgf ty = - case unTcM (tcType emptyScope ty >>= refineType) (abstract pgf) 0 IntMap.empty of - Ok _ ms ty -> Right ty - Fail err -> Left err - -tcType :: Scope -> Type -> TcM Type -tcType scope ty@(DTyp hyps cat es) = do - (scope,hyps) <- tcHypos scope hyps - c_hyps <- lookupCatHyps cat - let m = length es - n = length [ty | (Explicit,x,ty) <- c_hyps] - (delta,es) <- tcCatArgs scope es [] c_hyps ty n m - return (DTyp hyps cat es) - -tcHypos :: Scope -> [Hypo] -> TcM (Scope,[Hypo]) -tcHypos scope [] = return (scope,[]) -tcHypos scope (h:hs) = do - (scope,h ) <- tcHypo scope h - (scope,hs) <- tcHypos scope hs - return (scope,h:hs) - -tcHypo :: Scope -> Hypo -> TcM (Scope,Hypo) -tcHypo scope (b,x,ty) = do - ty <- tcType scope ty - if x == wildCId - then return (scope,(b,x,ty)) - else return (addScopedVar x (TTyp (scopeEnv scope) ty) scope,(b,x,ty)) - -tcCatArgs scope [] delta [] ty0 n m = return (delta,[]) -tcCatArgs scope (EImplArg e:es) delta ((Explicit,x,ty):hs) ty0 n m = tcError (UnexpectedImplArg (scopeVars scope) e) -tcCatArgs scope (EImplArg e:es) delta ((Implicit,x,ty):hs) ty0 n m = do - e <- tcExpr scope e (TTyp delta ty) - funs <- getFuns - (delta,es) <- if x == wildCId - then tcCatArgs scope es delta hs ty0 n m - else tcCatArgs scope es (eval funs (scopeEnv scope) e:delta) hs ty0 n m - return (delta,EImplArg e:es) -tcCatArgs scope es delta ((Implicit,x,ty):hs) ty0 n m = do - i <- newMeta scope - (delta,es) <- if x == wildCId - then tcCatArgs scope es delta hs ty0 n m - else tcCatArgs scope es (VMeta i (scopeEnv scope) [] : delta) hs ty0 n m - return (delta,EImplArg (EMeta i) : es) -tcCatArgs scope (e:es) delta ((Explicit,x,ty):hs) ty0 n m = do - e <- tcExpr scope e (TTyp delta ty) - funs <- getFuns - (delta,es) <- if x == wildCId - then tcCatArgs scope es delta hs ty0 n m - else tcCatArgs scope es (eval funs (scopeEnv scope) e:delta) hs ty0 n m - return (delta,e:es) -tcCatArgs scope _ delta _ ty0@(DTyp _ cat _) n m = do - tcError (WrongCatArgs (scopeVars scope) ty0 cat n m) - ------------------------------------------------------ --- checkExpr ------------------------------------------------------ - --- | Checks an expression against a specified type. -checkExpr :: PGF -> Expr -> Type -> Either TcError Expr -checkExpr pgf e ty = - case unTcM (do e <- tcExpr emptyScope e (TTyp [] ty) - e <- refineExpr e - checkResolvedMetaStore emptyScope e - return e) (abstract pgf) 0 IntMap.empty of - Ok _ ms e -> Right e - Fail err -> Left err - -tcExpr :: Scope -> Expr -> TType -> TcM Expr -tcExpr scope e0@(EAbs Implicit x e) tty = - case tty of - TTyp delta (DTyp ((Implicit,y,ty):hs) c es) -> do e <- if y == wildCId - then tcExpr (addScopedVar x (TTyp delta ty) scope) - e (TTyp delta (DTyp hs c es)) - else tcExpr (addScopedVar x (TTyp delta ty) scope) - e (TTyp ((VGen (scopeSize scope) []):delta) (DTyp hs c es)) - return (EAbs Implicit x e) - _ -> do ty <- evalType (scopeSize scope) tty - tcError (NotFunType (scopeVars scope) e0 ty) -tcExpr scope e0 (TTyp delta (DTyp ((Implicit,y,ty):hs) c es)) = do - e0 <- if y == wildCId - then tcExpr (addScopedVar wildCId (TTyp delta ty) scope) - e0 (TTyp delta (DTyp hs c es)) - else tcExpr (addScopedVar wildCId (TTyp delta ty) scope) - e0 (TTyp ((VGen (scopeSize scope) []):delta) (DTyp hs c es)) - return (EAbs Implicit wildCId e0) -tcExpr scope e0@(EAbs Explicit x e) tty = - case tty of - TTyp delta (DTyp ((Explicit,y,ty):hs) c es) -> do e <- if y == wildCId - then tcExpr (addScopedVar x (TTyp delta ty) scope) - e (TTyp delta (DTyp hs c es)) - else tcExpr (addScopedVar x (TTyp delta ty) scope) - e (TTyp ((VGen (scopeSize scope) []):delta) (DTyp hs c es)) - return (EAbs Explicit x e) - _ -> do ty <- evalType (scopeSize scope) tty - tcError (NotFunType (scopeVars scope) e0 ty) -tcExpr scope (EMeta _) tty = do - i <- newMeta scope - return (EMeta i) -tcExpr scope e0 tty = do - (e0,tty0) <- infExpr scope e0 - i <- newGuardedMeta scope e0 - eqType scope (scopeSize scope) i tty tty0 - return (EMeta i) - - ------------------------------------------------------ --- inferExpr ------------------------------------------------------ - --- | Tries to infer the type of a given expression. Note that --- even if the expression is type correct it is not always --- possible to infer its type in the GF type system. --- In this case the function returns the 'CannotInferType' error. -inferExpr :: PGF -> Expr -> Either TcError (Expr,Type) -inferExpr pgf e = - case unTcM (do (e,tty) <- infExpr emptyScope e - e <- refineExpr e - checkResolvedMetaStore emptyScope e - ty <- evalType 0 tty - return (e,ty)) (abstract pgf) 1 IntMap.empty of - Ok _ ms (e,ty) -> Right (e,ty) - Fail err -> Left err - -infExpr :: Scope -> Expr -> TcM (Expr,TType) -infExpr scope e0@(EApp e1 e2) = do - (e1,TTyp delta ty) <- infExpr scope e1 - (e0,delta,ty) <- tcArg scope e1 e2 delta ty - return (e0,TTyp delta ty) -infExpr scope e0@(EFun x) = do - case lookupVar x scope of - Just (i,tty) -> return (EVar i,tty) - Nothing -> do tty <- lookupFunType x - return (e0,tty) -infExpr scope e0@(EVar i) = do - return (e0,snd (getVar i scope)) -infExpr scope e0@(ELit l) = do - let cat = case l of - LStr _ -> mkCId "String" - LInt _ -> mkCId "Int" - LFlt _ -> mkCId "Float" - return (e0,TTyp [] (DTyp [] cat [])) -infExpr scope (ETyped e ty) = do - ty <- tcType scope ty - e <- tcExpr scope e (TTyp (scopeEnv scope) ty) - return (ETyped e ty,TTyp (scopeEnv scope) ty) -infExpr scope (EImplArg e) = do - (e,tty) <- infExpr scope e - return (EImplArg e,tty) -infExpr scope e = tcError (CannotInferType (scopeVars scope) e) - -tcArg scope e1 e2 delta ty0@(DTyp [] c es) = do - ty1 <- evalType (scopeSize scope) (TTyp delta ty0) - tcError (NotFunType (scopeVars scope) e1 ty1) -tcArg scope e1 (EImplArg e2) delta ty0@(DTyp ((Explicit,x,ty):hs) c es) = tcError (UnexpectedImplArg (scopeVars scope) e2) -tcArg scope e1 (EImplArg e2) delta ty0@(DTyp ((Implicit,x,ty):hs) c es) = do - e2 <- tcExpr scope e2 (TTyp delta ty) - funs <- getFuns - if x == wildCId - then return (EApp e1 (EImplArg e2), delta,DTyp hs c es) - else return (EApp e1 (EImplArg e2),eval funs (scopeEnv scope) e2:delta,DTyp hs c es) -tcArg scope e1 e2 delta ty0@(DTyp ((Explicit,x,ty):hs) c es) = do - e2 <- tcExpr scope e2 (TTyp delta ty) - funs <- getFuns - if x == wildCId - then return (EApp e1 e2, delta,DTyp hs c es) - else return (EApp e1 e2,eval funs (scopeEnv scope) e2:delta,DTyp hs c es) -tcArg scope e1 e2 delta ty0@(DTyp ((Implicit,x,ty):hs) c es) = do - i <- newMeta scope - if x == wildCId - then tcArg scope (EApp e1 (EImplArg (EMeta i))) e2 delta (DTyp hs c es) - else tcArg scope (EApp e1 (EImplArg (EMeta i))) e2 (VMeta i (scopeEnv scope) [] : delta) (DTyp hs c es) - ------------------------------------------------------ --- eqType ------------------------------------------------------ - -eqType :: Scope -> Int -> MetaId -> TType -> TType -> TcM () -eqType scope k i0 tty1@(TTyp delta1 ty1@(DTyp hyps1 cat1 es1)) tty2@(TTyp delta2 ty2@(DTyp hyps2 cat2 es2)) - | cat1 == cat2 = do (k,delta1,delta2) <- eqHyps k delta1 hyps1 delta2 hyps2 - sequence_ [eqExpr k delta1 e1 delta2 e2 | (e1,e2) <- zip es1 es2] - | otherwise = raiseTypeMatchError - where - raiseTypeMatchError = do ty1 <- evalType k tty1 - ty2 <- evalType k tty2 - e <- refineExpr (EMeta i0) - tcError (TypeMismatch (scopeVars scope) e ty1 ty2) - - eqHyps :: Int -> Env -> [Hypo] -> Env -> [Hypo] -> TcM (Int,Env,Env) - eqHyps k delta1 [] delta2 [] = - return (k,delta1,delta2) - eqHyps k delta1 ((_,x,ty1) : h1s) delta2 ((_,y,ty2) : h2s) = do - eqType scope k i0 (TTyp delta1 ty1) (TTyp delta2 ty2) - if x == wildCId && y == wildCId - then eqHyps k delta1 h1s delta2 h2s - else if x /= wildCId && y /= wildCId - then eqHyps (k+1) ((VGen k []):delta1) h1s ((VGen k []):delta2) h2s - else raiseTypeMatchError - eqHyps k delta1 h1s delta2 h2s = raiseTypeMatchError - - eqExpr :: Int -> Env -> Expr -> Env -> Expr -> TcM () - eqExpr k env1 e1 env2 e2 = do - funs <- getFuns - eqValue k (eval funs env1 e1) (eval funs env2 e2) - - eqValue :: Int -> Value -> Value -> TcM () - eqValue k v1 v2 = do - v1 <- deRef v1 - v2 <- deRef v2 - eqValue' k v1 v2 - - deRef v@(VMeta i env vs) = do - mv <- getMeta i - funs <- getFuns - case mv of - MBound e -> deRef (apply funs env e vs) - MGuarded e _ x | x == 0 -> deRef (apply funs env e vs) - | otherwise -> return v - MUnbound _ _ -> return v - deRef v = return v - - eqValue' k (VSusp i env vs1 c) v2 = addConstraint i0 i env vs1 (\v1 -> eqValue k (c v1) v2) - eqValue' k v1 (VSusp i env vs2 c) = addConstraint i0 i env vs2 (\v2 -> eqValue k v1 (c v2)) - eqValue' k (VMeta i env1 vs1) (VMeta j env2 vs2) | i == j = zipWithM_ (eqValue k) vs1 vs2 - eqValue' k (VMeta i env1 vs1) v2 = do (MUnbound scopei cs) <- getMeta i - e2 <- mkLam i scopei env1 vs1 v2 - setMeta i (MBound e2) - sequence_ [c e2 | c <- cs] - eqValue' k v1 (VMeta i env2 vs2) = do (MUnbound scopei cs) <- getMeta i - e1 <- mkLam i scopei env2 vs2 v1 - setMeta i (MBound e1) - sequence_ [c e1 | c <- cs] - eqValue' k (VApp f1 vs1) (VApp f2 vs2) | f1 == f2 = zipWithM_ (eqValue k) vs1 vs2 - eqValue' k (VLit l1) (VLit l2 ) | l1 == l2 = return () - eqValue' k (VGen i vs1) (VGen j vs2) | i == j = zipWithM_ (eqValue k) vs1 vs2 - eqValue' k (VClosure env1 (EAbs _ x1 e1)) (VClosure env2 (EAbs _ x2 e2)) = let v = VGen k [] - in eqExpr (k+1) (v:env1) e1 (v:env2) e2 - eqValue' k v1 v2 = raiseTypeMatchError - - mkLam i scope env vs0 v = do - let k = scopeSize scope - vs = reverse (take k env) ++ vs0 - xs = nub [i | VGen i [] <- vs] - if length vs == length xs - then return () - else raiseTypeMatchError - v <- occurCheck i k xs v - funs <- getFuns - return (addLam vs0 (value2expr funs (length xs) v)) - where - addLam [] e = e - addLam (v:vs) e = EAbs Explicit var (addLam vs e) - - var = mkCId "v" - - occurCheck i0 k xs (VApp f vs) = do vs <- mapM (occurCheck i0 k xs) vs - return (VApp f vs) - occurCheck i0 k xs (VLit l) = return (VLit l) - occurCheck i0 k xs (VMeta i env vs) = do if i == i0 - then raiseTypeMatchError - else return () - mv <- getMeta i - funs <- getFuns - case mv of - MBound e -> occurCheck i0 k xs (apply funs env e vs) - MGuarded e _ _ -> occurCheck i0 k xs (apply funs env e vs) - MUnbound scopei _ | scopeSize scopei > k -> raiseTypeMatchError - | otherwise -> do vs <- mapM (occurCheck i0 k xs) vs - return (VMeta i env vs) - occurCheck i0 k xs (VSusp i env vs cnt) = do addConstraint i0 i env vs (\v -> occurCheck i0 k xs (cnt v) >> return ()) - return (VSusp i env vs cnt) - occurCheck i0 k xs (VGen i vs) = case List.findIndex (==i) xs of - Just i -> do vs <- mapM (occurCheck i0 k xs) vs - return (VGen i vs) - Nothing -> raiseTypeMatchError - occurCheck i0 k xs (VClosure env e) = do env <- mapM (occurCheck i0 k xs) env - return (VClosure env e) - - ------------------------------------------------------------ --- check for meta variables that still have to be resolved ------------------------------------------------------------ - -checkResolvedMetaStore :: Scope -> Expr -> TcM () -checkResolvedMetaStore scope e = TcM (\abstr metaid ms -> - let xs = [i | (i,mv) <- IntMap.toList ms, not (isResolved mv)] - in if List.null xs - then Ok metaid ms () - else Fail (UnresolvedMetaVars (scopeVars scope) e xs)) - where - isResolved (MUnbound _ []) = True - isResolved (MGuarded _ _ _) = True - isResolved (MBound _) = True - isResolved _ = False - ------------------------------------------------------ --- evalType ------------------------------------------------------ - -evalType :: Int -> TType -> TcM Type -evalType k (TTyp delta ty) = do funs <- getFuns - refineType (evalTy funs k delta ty) - where - evalTy sig k delta (DTyp hyps cat es) = - let ((k1,delta1),hyps1) = mapAccumL (evalHypo sig) (k,delta) hyps - in DTyp hyps1 cat (List.map (normalForm sig k1 delta1) es) - - evalHypo sig (k,delta) (b,x,ty) = - if x == wildCId - then ((k, delta),(b,x,evalTy sig k delta ty)) - else ((k+1,(VGen k []):delta),(b,x,evalTy sig k delta ty)) - - ------------------------------------------------------ --- refinement ------------------------------------------------------ - -refineExpr :: Expr -> TcM Expr -refineExpr e = TcM (\abstr metaid ms -> Ok metaid ms (refineExpr_ ms e)) - -refineExpr_ ms e = refine e - where - refine (EAbs b x e) = EAbs b x (refine e) - refine (EApp e1 e2) = EApp (refine e1) (refine e2) - refine (ELit l) = ELit l - refine (EMeta i) = case IntMap.lookup i ms of - Just (MBound e ) -> refine e - Just (MGuarded e _ _) -> refine e - _ -> EMeta i - refine (EFun f) = EFun f - refine (EVar i) = EVar i - refine (ETyped e ty) = ETyped (refine e) (refineType_ ms ty) - refine (EImplArg e) = EImplArg (refine e) - -refineType :: Type -> TcM Type -refineType ty = TcM (\abstr metaid ms -> Ok metaid ms (refineType_ ms ty)) - -refineType_ ms (DTyp hyps cat es) = DTyp [(b,x,refineType_ ms ty) | (b,x,ty) <- hyps] cat (List.map (refineExpr_ ms) es) - -value2expr sig i (VApp f vs) = foldl EApp (EFun f) (List.map (value2expr sig i) vs) -value2expr sig i (VGen j vs) = foldl EApp (EVar (i-j-1)) (List.map (value2expr sig i) vs) -value2expr sig i (VMeta j env vs) = foldl EApp (EMeta j) (List.map (value2expr sig i) vs) -value2expr sig i (VSusp j env vs k) = value2expr sig i (k (VGen j vs)) -value2expr sig i (VLit l) = ELit l -value2expr sig i (VClosure env (EAbs b x e)) = EAbs b x (value2expr sig (i+1) (eval sig ((VGen i []):env) e)) diff --git a/src/PGF/VisualizeTree.hs b/src/PGF/VisualizeTree.hs deleted file mode 100644 index 429551f54..000000000 --- a/src/PGF/VisualizeTree.hs +++ /dev/null @@ -1,353 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : VisualizeTree --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: --- > CVS $Author: --- > CVS $Revision: --- --- Print a graph of an abstract syntax tree in Graphviz DOT format --- Based on BB's VisualizeGrammar --- FIXME: change this to use GF.Visualization.Graphviz, --- instead of rolling its own. ------------------------------------------------------------------------------ - -module PGF.VisualizeTree ( graphvizAbstractTree - , graphvizParseTree - , graphvizDependencyTree - , graphvizAlignment - , tree2mk - , getDepLabels - , PosText(..), readPosText - ) where - -import PGF.CId (CId,showCId,pCId,mkCId) -import PGF.Data -import PGF.Tree -import PGF.Expr (showExpr) -import PGF.Linearize -import PGF.Macros (lookValCat) - -import qualified Data.Map as Map -import Data.List (intersperse,nub,isPrefixOf,sort,sortBy) -import Data.Char (isDigit) -import qualified Text.ParserCombinators.ReadP as RP - -import Debug.Trace - -graphvizAbstractTree :: PGF -> (Bool,Bool) -> Expr -> String -graphvizAbstractTree pgf funscats = prGraph False . tree2graph pgf funscats . expr2tree - -tree2graph :: PGF -> (Bool,Bool) -> Tree -> [String] -tree2graph pgf (funs,cats) = prf [] where - prf ps t = let (nod,lab) = prn ps t in - (nod ++ " [label = " ++ lab ++ ", style = \"solid\", shape = \"plaintext\"] ;") : - case t of - Fun cid trees -> - [ pra (j:ps) nod t | (j,t) <- zip [0..] trees] ++ - concat [prf (j:ps) t | (j,t) <- zip [0..] trees] - Abs xs (Fun cid trees) -> - [ pra (j:ps) nod t | (j,t) <- zip [0..] trees] ++ - concat [prf (j:ps) t | (j,t) <- zip [0..] trees] - _ -> [] - prn ps t = case t of - Fun cid _ -> - let - fun = if funs then showCId cid else "" - cat = if cats then prCat cid else "" - colon = if funs && cats then " : " else "" - lab = "\"" ++ fun ++ colon ++ cat ++ "\"" - in (show(show (ps :: [Int])),lab) - Abs bs tree -> - let fun = case tree of - Fun cid _ -> Fun cid [] - _ -> tree - in (show(show (ps :: [Int])),"\"" ++ esc (prTree (Abs bs fun)) ++ "\"") - _ -> (show(show (ps :: [Int])),"\"" ++ esc (prTree t) ++ "\"") - pra i nod t = nod ++ arr ++ fst (prn i t) ++ " [style = \"solid\"];" - arr = " -- " -- if digr then " -> " else " -- " - prCat = showCId . lookValCat pgf - esc = concatMap (\c -> if c =='\\' then [c,c] else [c]) --- escape backslash in abstracts - -prGraph digr ns = concat $ map (++"\n") $ [graph ++ "{\n"] ++ ns ++ ["}"] where - graph = if digr then "digraph" else "graph" - - --- replace each non-atomic constructor with mkC, where C is the val cat -tree2mk :: PGF -> Expr -> String -tree2mk pgf = showExpr [] . tree2expr . t2m . expr2tree where - t2m t = case t of - Fun cid [] -> t - Fun cid ts -> Fun (mk cid) (map t2m ts) - _ -> t - mk = mkCId . ("mk" ++) . showCId . lookValCat pgf - --- dependency trees from Linearize.linearizeMark - -graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Expr -> String -graphvizDependencyTree format debug mlab ms pgf lang exp = case format of - "malt" -> unlines (lin2dep format) - "malt_input" -> unlines (lin2dep format) - _ -> prGraph True (lin2dep format) - - where - - lin2dep format = trace (ifd (show sortedNodes ++ show nodeWords)) $ case format of - "malt" -> map (concat . intersperse "\t") wnodes - "malt_input" -> map (concat . intersperse "\t" . take 6) wnodes - _ -> prelude ++ nodes ++ links - - ifd s = if debug then s else [] - - pot = readPosText $ head $ linearizesMark pgf lang exp - ---- use Just str if you have str to match against - - prelude = ["rankdir=LR ;", "node [shape = plaintext] ;"] - - nodes = map mkNode nodeWords - mkNode (i,((_,p),ss)) = - node p ++ " [label = \"" ++ show i ++ ". " ++ ifd (show p) ++ unwords ss ++ "\"] ;" - nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)| - ((Just f,p),w) <- wlins pot] - - links = map mkLink thelinks - thelinks = [(word y, x, label tr y x) | - (_,((f,x),_)) <- tail nodeWords, - let y = dominant x] - mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;" - node = show . show - - dominant x = case x of - [] -> x - _ | not (x == hx) -> hx - _ -> dominant (init x) - where - hx = headArg (init x) tr x - - headArg x0 tr x = case (tr,x) of - (Fun f [],[_]) -> x0 ---- ?? - (Fun f ts,[_]) -> x0 ++ [getHead (length ts - 1) f] - (Fun f ts,i:y) -> headArg x0 (ts !! i) y - _ -> x0 ---- - - label tr y x = case span (uncurry (==)) (zip y x) of - (xys,(_,i):_) -> getLabel i (funAt tr (map fst xys)) - _ -> "" ---- - - funAt tr x = case (tr,x) of - (Fun f _ ,[]) -> f - (Fun f ts,i:y) -> funAt (ts !! i) y - _ -> mkCId (prTree tr) ---- - - word x = if elem x sortedNodes then x else - let x' = headArg x tr (x ++[0]) in - if x' == x then [] else word x' - - tr = expr2tree exp - sortedNodes = [p | (_,((_,p),_)) <- nodeWords] - - labels = maybe Map.empty id mlab - getHead i f = case Map.lookup f labels of - Just ls -> length $ takeWhile (/= "head") ls - _ -> i - getLabel i f = case Map.lookup f labels of - Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i - _ -> showCId f ++ "#" ++ show i - --- to generate CoNLL format for MaltParser - nodeMap :: Map.Map [Int] Int - nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords] - - arcMap :: Map.Map [Int] ([Int],String) - arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks] - - lookDomLab p = case Map.lookup p arcMap of - Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then rootlabel else l) - _ -> (0,rootlabel) - - wnodes = [[show i, maltws ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] | - (i, ((fun,p),ws)) <- tail nodeWords, - let pos = showCId $ lookValCat pgf fun, - let morph = unspec, - let (dom,lab) = lookDomLab p - ] - maltws = concat . intersperse "+" . words . unwords -- no spaces in column 2 - unspec = "_" - rootlabel = "ROOT" - -type Labels = Map.Map CId [String] - -getDepLabels :: [String] -> Labels -getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss] - - --- parse trees from Linearize.linearizeMark ----- nubrec and domins are quadratic, but could be (n log n) - -graphvizParseTree :: PGF -> CId -> Expr -> String -graphvizParseTree pgf lang = prGraph False . lin2tree pgf . linMark where - linMark = head . linearizesMark pgf lang - ---- use Just str if you have str to match against - -lin2tree pgf s = trace s $ prelude ++ nodes ++ links where - - prelude = ["rankdir=BU ;", "node [shape = record, color = white] ;"] - - nodeRecs = zip [0..] - (nub (filter (not . null) (nlins [postext] ++ [leaves postext]))) - nlins pts = - nubrec [] $ [(p,cat f) | T (Just f, p) _ <- pts] : - concatMap nlins [ts | T _ ts <- pts] - leaves pt = [(p++[j],s) | (j,(p,s)) <- - zip [9990..] [(p,s) | ((_,p),ss) <- wlins pt, s <- ss]] - - nubrec es rs = case rs of - r:rr -> let r' = filter (not . flip elem es) (nub r) - in r' : nubrec (r' ++ es) rr - _ -> rs - - nodes = map mkStruct nodeRecs - - mkStruct (i,cs) = struct i ++ "[label = \"" ++ fields cs ++ "\"] ;" - cat = showCId . lookValCat pgf - fields cs = concat (intersperse "|" [ mtag (showp p) ++ c | (p,c) <- cs]) - struct i = "struct" ++ show i - - links = map mkEdge domins - domins = nub [((i,x),(j,y)) | - (i,xs) <- nodeRecs, (j,ys) <- nodeRecs, - x <- xs, y <- ys, dominates x y] - dominates (p,x) (q,y) = not (null q) && p == init q - mkEdge ((i,x),(j,y)) = - struct i ++ ":n" ++ uncommas (showp (fst x)) ++ ":s -- " ++ - struct j ++ ":n" ++ uncommas (showp (fst y)) ++ ":n ;" - - postext = readPosText s - --- auxiliaries for graphviz syntax -struct i = "struct" ++ show i -mark (j,n) = "n" ++ show j ++ "a" ++ uncommas n -uncommas = map (\c -> if c==',' then 'c' else c) -tag s = "<" ++ s ++ ">" -showp = init . tail . show -mtag = tag . ('n':) . uncommas - --- word alignments from Linearize.linearizesMark --- words are chunks like {[0,1,1,0] old} - -graphvizAlignment :: PGF -> Expr -> String -graphvizAlignment pgf = prGraph True . lin2graph . linsMark where - linsMark t = [s | la <- cncnames pgf, s <- take 1 (linearizesMark pgf la t)] - -lin2graph :: [String] -> [String] -lin2graph ss = trace (show ss) $ prelude ++ nodes ++ links - - where - - prelude = ["rankdir=LR ;", "node [shape = record] ;"] - - nlins :: [(Int,[((Int,String),String)])] - nlins = [(i, [((j,showp p),unw ws) | (j,((_,p),ws)) <- zip [0..] ws]) | - (i,ws) <- zip [0..] (map (wlins . readPosText) ss)] - - unw = concat . intersperse "\\ " -- space escape in graphviz - - nodes = map mkStruct nlins - - mkStruct (i, ws) = struct i ++ "[label = \"" ++ fields ws ++ "\"] ;" - - fields ws = concat (intersperse "|" [tag (mark m) ++ " " ++ w | (m,w) <- ws]) - - links = nub $ concatMap mkEdge (init nlins) - - mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list - [edge i v w | (v@(_,p),_) <- lin, (w@(_,q),_) <- lin', p == q] - - edge i v w = - struct i ++ ":" ++ mark v ++ ":e -> " ++ struct (i+1) ++ ":" ++ mark w ++ ":w ;" -{- -alignmentData :: PGF -> [Expr] -> Map.Map String (Map.Map String Double) -alignmentData pgf = mkStat . concatMap (mkAlign . linsMark) where - linsMark t = - [s | la <- take 2 (cncnames pgf), s <- take 1 (linearizesMark pgf la t)] - - mkStat :: [(String,String)] -> Map.Map String (Map.Map String Double) - mkStat = - - mkAlign :: [String] -> [(String,String)] - mkAlign ss = - - nlins :: [(Int,[((Int,String),String)])] - nlins = [(i, [((j,showp p),unw ws) | (j,((_,p),ws)) <- zip [0..] vs]) | - (i,vs) <- zip [0..] (map (wlins . readPosText) ss)] - - nodes = map mkStruct nlins - - mkStruct (i, ws) = struct i ++ "[label = \"" ++ fields ws ++ "\"] ;" - - fields ws = concat (intersperse "|" [tag (mark m) ++ " " ++ w | (m,w) <- ws]) - - links = nub $ concatMap mkEdge (init nlins) - - mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list - [edge i v w | (v@(_,p),_) <- lin, (w@(_,q),_) <- lin', p == q] - - edge i v w = - struct i ++ ":" ++ mark v ++ ":e -> " ++ struct (i+1) ++ ":" ++ mark w ++ ":w ;" --} - -wlins :: PosText -> [((Maybe CId,[Int]),[String])] -wlins pt = case pt of - T p pts -> concatMap (lins p) pts - M ws -> if null ws then [] else [((Nothing,[]),ws)] - where - lins p pt = case pt of - T q pts -> concatMap (lins q) pts - M ws -> if null ws then [] else [(p,ws)] - -data PosText = - T (Maybe CId,[Int]) [PosText] - | M [String] - deriving Show - -readPosText :: String -> PosText -readPosText = fst . head . (RP.readP_to_S pPosText) where - pPosText = do - RP.char '(' >> RP.skipSpaces - p <- pPos - RP.skipSpaces - ts <- RP.many pPosText - RP.char ')' >> RP.skipSpaces - return (T p ts) - RP.<++ do - ws <- RP.sepBy1 (RP.munch1 (flip notElem "()")) (RP.char ' ') - return (M ws) - pPos = do - fun <- (RP.char '(' >> pCId >>= \f -> RP.char ',' >> (return $ Just f)) - RP.<++ (return Nothing) - RP.char '[' >> RP.skipSpaces - is <- RP.sepBy (RP.munch1 isDigit) (RP.char ',') - RP.char ']' >> RP.skipSpaces - RP.char ')' RP.<++ return ' ' - return (fun,map read is) - - -{- -digraph{ -rankdir ="LR" ; -node [shape = record] ; - -struct1 [label = " this| very| intelligent| man"] ; -struct2 [label = " cet| homme| tres| intelligent| ci"] ; - -struct1:f0 -> struct2:f0 ; -struct1:f1 -> struct2:f2 ; -struct1:f2 -> struct2:f3 ; -struct1:f3 -> struct2:f1 ; -struct1:f0 -> struct2:f4 ; -} --} - diff --git a/src/PGF/doc/Eng.gf b/src/PGF/doc/Eng.gf deleted file mode 100644 index c64f46313..000000000 --- a/src/PGF/doc/Eng.gf +++ /dev/null @@ -1,13 +0,0 @@ -concrete Eng of Ex = { - lincat - S = {s : Str} ; - NP = {s : Str ; n : Num} ; - VP = {s : Num => Str} ; - param - Num = Sg | Pl ; - lin - Pred np vp = {s = np.s ++ vp.s ! np.n} ; - She = {s = "she" ; n = Sg} ; - They = {s = "they" ; n = Pl} ; - Sleep = {s = table {Sg => "sleeps" ; Pl => "sleep"}} ; -} diff --git a/src/PGF/doc/Ex.gf b/src/PGF/doc/Ex.gf deleted file mode 100644 index bd0b03483..000000000 --- a/src/PGF/doc/Ex.gf +++ /dev/null @@ -1,8 +0,0 @@ -abstract Ex = { - cat - S ; NP ; VP ; - fun - Pred : NP -> VP -> S ; - She, They : NP ; - Sleep : VP ; -} diff --git a/src/PGF/doc/Swe.gf b/src/PGF/doc/Swe.gf deleted file mode 100644 index 1d6672371..000000000 --- a/src/PGF/doc/Swe.gf +++ /dev/null @@ -1,13 +0,0 @@ -concrete Swe of Ex = { - lincat - S = {s : Str} ; - NP = {s : Str} ; - VP = {s : Str} ; - param - Num = Sg | Pl ; - lin - Pred np vp = {s = np.s ++ vp.s} ; - She = {s = "hon"} ; - They = {s = "de"} ; - Sleep = {s = "sover"} ; -} diff --git a/src/PGF/doc/Test.gf b/src/PGF/doc/Test.gf deleted file mode 100644 index 5cd4c5474..000000000 --- a/src/PGF/doc/Test.gf +++ /dev/null @@ -1,64 +0,0 @@ --- to test GFCC compilation - -flags coding=utf8 ; - -cat S ; NP ; N ; VP ; - -fun Pred : NP -> VP -> S ; -fun Pred2 : NP -> VP -> NP -> S ; -fun Det, Dets : N -> NP ; -fun Mina, Sina, Me, Te : NP ; -fun Raha, Paska, Pallo : N ; -fun Puhua, Munia, Sanoa : VP ; - -param Person = P1 | P2 | P3 ; -param Number = Sg | Pl ; -param Case = Nom | Part ; - -param NForm = NF Number Case ; -param VForm = VF Number Person ; - -lincat N = Noun ; -lincat VP = Verb ; - -oper Noun = {s : NForm => Str} ; -oper Verb = {s : VForm => Str} ; - -lincat NP = {s : Case => Str ; a : {n : Number ; p : Person}} ; - -lin Pred np vp = {s = np.s ! Nom ++ vp.s ! VF np.a.n np.a.p} ; -lin Pred2 np vp ob = {s = np.s ! Nom ++ vp.s ! VF np.a.n np.a.p ++ ob.s ! Part} ; -lin Det no = {s = \\c => no.s ! NF Sg c ; a = {n = Sg ; p = P3}} ; -lin Dets no = {s = \\c => no.s ! NF Pl c ; a = {n = Pl ; p = P3}} ; -lin Mina = {s = table Case ["minä" ; "minua"] ; a = {n = Sg ; p = P1}} ; -lin Te = {s = table Case ["te" ; "teitä"] ; a = {n = Pl ; p = P2}} ; -lin Sina = {s = table Case ["sinä" ; "sinua"] ; a = {n = Sg ; p = P2}} ; -lin Me = {s = table Case ["me" ; "meitä"] ; a = {n = Pl ; p = P1}} ; - -lin Raha = mkN "raha" ; -lin Paska = mkN "paska" ; -lin Pallo = mkN "pallo" ; -lin Puhua = mkV "puhu" ; -lin Munia = mkV "muni" ; -lin Sanoa = mkV "sano" ; - -oper mkN : Str -> Noun = \raha -> { - s = table { - NF Sg Nom => raha ; - NF Sg Part => raha + "a" ; - NF Pl Nom => raha + "t" ; - NF Pl Part => Predef.tk 1 raha + "oja" - } - } ; - -oper mkV : Str -> Verb = \puhu -> { - s = table { - VF Sg P1 => puhu + "n" ; - VF Sg P2 => puhu + "t" ; - VF Sg P3 => puhu + Predef.dp 1 puhu ; - VF Pl P1 => puhu + "mme" ; - VF Pl P2 => puhu + "tte" ; - VF Pl P3 => puhu + "vat" - } - } ; - diff --git a/src/PGF/doc/gfcc.html b/src/PGF/doc/gfcc.html deleted file mode 100644 index 8f8c478c0..000000000 --- a/src/PGF/doc/gfcc.html +++ /dev/null @@ -1,809 +0,0 @@ - - - - -The GFCC Grammar Format - -

The GFCC Grammar Format

- -Aarne Ranta
-October 5, 2007 -
- -

-Author's address: -http://www.cs.chalmers.se/~aarne -

-

-History: -

-
    -
  • 5 Oct 2007: new, better structured GFCC with full expressive power -
  • 19 Oct: translation of lincats, new figures on C++ -
  • 3 Oct 2006: first version -
- -

What is GFCC

-

-GFCC is a low-level format for GF grammars. Its aim is to contain the minimum -that is needed to process GF grammars at runtime. This minimality has three -advantages: -

-
    -
  • compact grammar files and run-time objects -
  • time and space efficient processing -
  • simple definition of interpreters -
- -

-Thus we also want to call GFCC the portable grammar format. -

-

-The idea is that all embedded GF applications use GFCC. -The GF system would be primarily used as a compiler and as a grammar -development tool. -

-

-Since GFCC is implemented in BNFC, a parser of the format is readily -available for C, C++, C#, Haskell, Java, and OCaml. Also an XML -representation can be generated in BNFC. A -reference implementation -of linearization and some other functions has been written in Haskell. -

-

GFCC vs. GFC

-

-GFCC is aimed to replace GFC as the run-time grammar format. GFC was designed -to be a run-time format, but also to -support separate compilation of grammars, i.e. -to store the results of compiling -individual GF modules. But this means that GFC has to contain extra information, -such as type annotations, which is only needed in compilation and not at -run-time. In particular, the pattern matching syntax and semantics of GFC is -complex and therefore difficult to implement in new platforms. -

-

-Actually, GFC is planned to be omitted also as the target format of -separate compilation, where plain GF (type annotated and partially evaluated) -will be used instead. GFC provides only marginal advantages as a target format -compared with GF, and it is therefore just extra weight to carry around this -format. -

-

-The main differences of GFCC compared with GFC (and GF) can be summarized as follows: -

-
    -
  • there are no modules, and therefore no qualified names -
  • a GFCC grammar is multilingual, and consists of a common abstract syntax - together with one concrete syntax per language -
  • records and tables are replaced by arrays -
  • record labels and parameter values are replaced by integers -
  • record projection and table selection are replaced by array indexing -
  • even though the format does support dependent types and higher-order abstract - syntax, there is no interpreted yet that does this -
- -

-Here is an example of a GF grammar, consisting of three modules, -as translated to GFCC. The representations are aligned; thus they do not completely -reflect the order of judgements in GFCC files, which have different orders of -blocks of judgements, and alphabetical sorting. -

-
-                                      grammar Ex(Eng,Swe);
-  
-  abstract Ex = {                     abstract {
-    cat                                 cat
-      S ; NP ; VP ;                      NP[]; S[]; VP[];
-    fun                                 fun
-      Pred : NP -> VP -> S ;             Pred=[(($ 0! 1),(($ 1! 0)!($ 0! 0)))];
-      She, They : NP ;                   She=[0,"she"];
-      Sleep : VP ;                       They=[1,"they"];
-                                         Sleep=[["sleeps","sleep"]];
-  }                                     } ;
-                                      
-  concrete Eng of Ex = {              concrete Eng {
-    lincat                             lincat
-      S  = {s : Str} ;                  S=[()];
-      NP = {s : Str ; n : Num} ;        NP=[1,()];
-      VP = {s : Num => Str} ;           VP=[[(),()]];
-    param
-      Num = Sg | Pl ;
-    lin                                lin
-      Pred np vp = {                    Pred=[(($ 0! 1),(($ 1! 0)!($ 0! 0)))];
-        s = np.s ++ vp.s ! np.n} ;      
-      She = {s = "she" ; n = Sg} ;      She=[0,"she"];
-      They = {s = "they" ; n = Pl} ;    They = [1, "they"];
-      Sleep = {s = table {              Sleep=[["sleeps","sleep"]];
-        Sg => "sleeps" ; 
-        Pl => "sleep"                   
-        }                               
-      } ;
-  }                                   } ;
-  
-  concrete Swe of Ex = {              concrete Swe {
-    lincat                             lincat
-      S  = {s : Str} ;                  S=[()];
-      NP = {s : Str} ;                  NP=[()];
-      VP = {s : Str} ;                  VP=[()];
-    param
-      Num = Sg | Pl ;
-    lin                                lin
-      Pred np vp = {                    Pred = [(($0!0),($1!0))];
-        s = np.s ++ vp.s} ;
-      She = {s = "hon"} ;               She = ["hon"];
-      They = {s = "de"} ;               They = ["de"];
-      Sleep = {s = "sover"} ;           Sleep = ["sover"];
-  }                                     } ;                                   
-
-

-

The syntax of GFCC files

-

-The complete BNFC grammar, from which -the rules in this section are taken, is in the file -GF/GFCC/GFCC.cf. -

-

Top level

-

-A grammar has a header telling the name of the abstract syntax -(often specifying an application domain), and the names of -the concrete languages. The abstract syntax and the concrete -syntaxes themselves follow. -

-
-    Grm. Grammar  ::= 
-      "grammar" CId "(" [CId] ")" ";" 
-      Abstract ";" 
-      [Concrete] ;
-  
-    Abs. Abstract ::= 
-      "abstract" "{" 
-        "flags" [Flag] 
-        "fun"   [FunDef] 
-        "cat"   [CatDef] 
-      "}" ;
-  
-    Cnc. Concrete ::= 
-      "concrete" CId "{" 
-        "flags"  [Flag] 
-        "lin"    [LinDef] 
-        "oper"   [LinDef] 
-        "lincat" [LinDef] 
-        "lindef" [LinDef] 
-        "printname" [LinDef]
-      "}" ;
-
-

-This syntax organizes each module to a sequence of fields, such -as flags, linearizations, operations, linearization types, etc. -It is envisaged that particular applications can ignore some -of the fields, typically so that earlier fields are more -important than later ones. -

-

-The judgement forms have the following syntax. -

-
-    Flg. Flag     ::= CId "=" String ;
-    Cat. CatDef   ::= CId "[" [Hypo] "]" ;
-    Fun. FunDef   ::= CId ":" Type "=" Exp ;
-    Lin. LinDef   ::= CId "=" Term ;
-
-

-For the run-time system, the reference implementation in Haskell -uses a structure that gives efficient look-up: -

-
-    data GFCC = GFCC {
-      absname   :: CId ,
-      cncnames  :: [CId] ,
-      abstract  :: Abstr ,
-      concretes :: Map CId Concr
-      }
-  
-    data Abstr = Abstr {
-      aflags  :: Map CId String,     -- value of a flag
-      funs    :: Map CId (Type,Exp), -- type and def of a fun
-      cats    :: Map CId [Hypo],     -- context of a cat
-      catfuns :: Map CId [CId]       -- funs yielding a cat (redundant, for fast lookup)
-      }
-  
-    data Concr = Concr {
-      flags   :: Map CId String, -- value of a flag
-      lins    :: Map CId Term,   -- lin of a fun
-      opers   :: Map CId Term,   -- oper generated by subex elim
-      lincats :: Map CId Term,   -- lin type of a cat
-      lindefs :: Map CId Term,   -- lin default of a cat
-      printnames :: Map CId Term -- printname of a cat or a fun
-      }
-
-

-These definitions are from GF/GFCC/DataGFCC.hs. -

-

-Identifiers (CId) are like Ident in GF, except that -the compiler produces constants prefixed with _ in -the common subterm elimination optimization. -

-
-    token CId (('_' | letter) (letter | digit | '\'' | '_')*) ;
-
-

-

Abstract syntax

-

-Types are first-order function types built from argument type -contexts and value types. -category symbols. Syntax trees (Exp) are -rose trees with nodes consisting of a head (Atom) and -bound variables (CId). -

-
-    DTyp. Type  ::= "[" [Hypo] "]" CId [Exp] ;        
-    DTr.  Exp   ::= "[" "(" [CId] ")" Atom [Exp] "]" ;
-    Hyp.  Hypo  ::= CId ":" Type ;
-
-

-The head Atom is either a function -constant, a bound variable, or a metavariable, or a string, integer, or float -literal. -

-
-    AC.   Atom  ::= CId ;
-    AS.   Atom  ::= String ;
-    AI.   Atom  ::= Integer ;
-    AF.   Atom  ::= Double ;
-    AM.   Atom  ::= "?" Integer ;
-
-

-The context-free types and trees of the "old GFCC" are special -cases, which can be defined as follows: -

-
-    Typ.  Type  ::= [CId] "->" CId
-    Typ args val = DTyp [Hyp (CId "_") arg | arg <- args] val
-  
-    Tr.   Exp   ::= "(" CId [Exp] ")"
-    Tr fun exps  = DTr [] fun exps
-
-

-To store semantic (def) definitions by cases, the following expression -form is provided, but it is only meaningful in the last field of a function -declaration in an abstract syntax: -

-
-    EEq. Exp      ::= "{" [Equation] "}" ;
-    Equ. Equation ::= [Exp] "->" Exp ;
-
-

-Notice that expressions are used to encode patterns. Primitive notions -(the default semantics in GF) are encoded as empty sets of equations -([]). For a constructor (canonical form) of a category C, we -aim to use the encoding as the application (_constr C). -

-

Concrete syntax

-

-Linearization terms (Term) are built as follows. -Constructor names are shown to make the later code -examples readable. -

-
-    R.  Term ::= "[" [Term] "]" ;        -- array (record/table)
-    P.  Term ::= "(" Term "!" Term ")" ; -- access to field (projection/selection)
-    S.  Term ::= "(" [Term] ")" ;        -- concatenated sequence
-    K.  Term ::= Tokn ;                  -- token
-    V.  Term ::= "$" Integer ;           -- argument (subtree)
-    C.  Term ::= Integer ;               -- array index (label/parameter value)
-    FV. Term ::= "[|" [Term] "|]" ;      -- free variation
-    TM. Term ::= "?" ;                   -- linearization of metavariable
-
-

-Tokens are strings or (maybe obsolescent) prefix-dependent -variant lists. -

-
-    KS.  Tokn     ::= String ;
-    KP.  Tokn     ::= "[" "pre" [String] "[" [Variant] "]" "]" ;
-    Var. Variant  ::= [String] "/" [String] ;
-
-

-Two special forms of terms are introduced by the compiler -as optimizations. They can in principle be eliminated, but -their presence makes grammars much more compact. Their semantics -will be explained in a later section. -

-
-    F.  Term ::= CId ;                     -- global constant
-    W.  Term ::= "(" String "+" Term ")" ; -- prefix + suffix table
-
-

-There is also a deprecated form of "record parameter alias", -

-
-    RP. Term ::= "(" Term "@" Term ")";    -- DEPRECATED
-
-

-which will be removed when the migration to new GFCC is complete. -

-

The semantics of concrete syntax terms

-

-The code in this section is from GF/GFCC/Linearize.hs. -

-

Linearization and realization

-

-The linearization algorithm is essentially the same as in -GFC: a tree is linearized by evaluating its linearization term -in the environment of the linearizations of the subtrees. -Literal atoms are linearized in the obvious way. -The function also needs to know the language (i.e. concrete syntax) -in which linearization is performed. -

-
-    linExp :: GFCC -> CId -> Exp -> Term
-    linExp gfcc lang tree@(DTr _ at trees) = case at of
-      AC fun -> comp (Prelude.map lin trees) $ look fun
-      AS s   -> R [kks (show s)] -- quoted
-      AI i   -> R [kks (show i)]
-      AF d   -> R [kks (show d)]
-      AM     -> TM
-     where
-       lin  = linExp gfcc lang
-       comp = compute gfcc lang
-       look = lookLin gfcc lang
-
-

-TODO: bindings must be supported. -

-

-The result of linearization is usually a record, which is realized as -a string using the following algorithm. -

-
-    realize :: Term -> String
-    realize trm = case trm of
-      R (t:_)  -> realize t
-      S ss     -> unwords $ Prelude.map realize ss
-      K (KS s) -> s
-      K (KP s _) -> unwords s ---- prefix choice TODO
-      W s t    -> s ++ realize t
-      FV (t:_) -> realize t
-      TM       -> "?"
-
-

-Notice that realization always picks the first field of a record. -If a linearization type has more than one field, the first field -does not necessarily contain the desired string. -Also notice that the order of record fields in GFCC is not necessarily -the same as in GF source. -

-

Term evaluation

-

-Evaluation follows call-by-value order, with two environments -needed: -

-
    -
  • the grammar (a concrete syntax) to give the global constants -
  • an array of terms to give the subtree linearizations -
- -

-The code is presented in one-level pattern matching, to -enable reimplementations in languages that do not permit -deep patterns (such as Java and C++). -

-
-  compute :: GFCC -> CId -> [Term] -> Term -> Term
-  compute gfcc lang args = comp where
-    comp trm = case trm of
-      P r p  -> proj (comp r) (comp p)
-      W s t  -> W s (comp t)
-      R ts   -> R $ Prelude.map comp ts
-      V i    -> idx args (fromInteger i)  -- already computed
-      F c    -> comp $ look c             -- not computed (if contains V)
-      FV ts  -> FV $ Prelude.map comp ts
-      S ts   -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts
-      _ -> trm
-  
-    look = lookOper gfcc lang
-  
-    idx xs i = xs !! i
-  
-    proj r p = case (r,p) of
-      (_,     FV ts) -> FV $ Prelude.map (proj r) ts
-      (W s t, _)     -> kks (s ++ getString (proj t p))
-      _              -> comp $ getField r (getIndex p)
-  
-    getString t = case t of
-      K (KS s) -> s
-      _ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR"
-  
-    getIndex t =  case t of
-      C i    -> fromInteger i
-      RP p _ -> getIndex p
-      TM     -> 0  -- default value for parameter
-      _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0
-  
-    getField t i = case t of
-      R rs   -> idx rs i
-      RP _ r -> getField r i
-      TM     -> TM
-      _ -> trace ("ERROR in grammar compiler: field from " ++ show t) t
-
-

-

The special term constructors

-

-The three forms introduced by the compiler may a need special -explanation. -

-

-Global constants -

-
-    Term ::= CId ;
-
-

-are shorthands for complex terms. They are produced by the -compiler by (iterated) common subexpression elimination. -They are often more powerful than hand-devised code sharing in the source -code. They could be computed off-line by replacing each identifier by -its definition. -

-

-Prefix-suffix tables -

-
-    Term ::= "(" String "+" Term ")" ; 
-
-

-represent tables of word forms divided to the longest common prefix -and its array of suffixes. In the example grammar above, we have -

-
-    Sleep = [("sleep" + ["s",""])]
-
-

-which in fact is equal to the array of full forms -

-
-    ["sleeps", "sleep"]
-
-

-The power of this construction comes from the fact that suffix sets -tend to be repeated in a language, and can therefore be collected -by common subexpression elimination. It is this technique that -explains the used syntax rather than the more accurate -

-
-    "(" String "+" [String] ")"
-
-

-since we want the suffix part to be a Term for the optimization to -take effect. -

-

Compiling to GFCC

-

-Compilation to GFCC is performed by the GF grammar compiler, and -GFCC interpreters need not know what it does. For grammar writers, -however, it might be interesting to know what happens to the grammars -in the process. -

-

-The compilation phases are the following -

-
    -
  1. type check and partially evaluate GF source -
  2. create a symbol table mapping the GF parameter and record types to - fixed-size arrays, and parameter values and record labels to integers -
  3. traverse the linearization rules replacing parameters and labels by integers -
  4. reorganize the created GF grammar so that it has just one abstract syntax - and one concrete syntax per language -
  5. TODO: apply UTF8 encoding to the grammar, if not yet applied (this is told by the - coding flag) -
  6. translate the GF grammar object to a GFCC grammar object, using a simple - compositional mapping -
  7. perform the word-suffix optimization on GFCC linearization terms -
  8. perform subexpression elimination on each concrete syntax module -
  9. print out the GFCC code -
- -

Problems in GFCC compilation

-

-Two major problems had to be solved in compiling GF to GFCC: -

-
    -
  • consistent order of tables and records, to permit the array translation -
  • run-time variables in complex parameter values. -
- -

-The current implementation is still experimental and may fail -to generate correct code. Any errors remaining are likely to be -related to the two problems just mentioned. -

-

-The order problem is solved in slightly different ways for tables and records. -In both cases, eta expansion is used to establish a -canonical order. Tables are ordered by applying the preorder induced -by param definitions. Records are ordered by sorting them by labels. -This means that -e.g. the s field will in general no longer appear as the first -field, even if it does so in the GF source code. But relying on the -order of fields in a labelled record would be misplaced anyway. -

-

-The canonical form of records is further complicated by lock fields, -i.e. dummy fields of form lock_C = <>, which are added to grammar -libraries to force intensionality of linearization types. The problem -is that the absence of a lock field only generates a warning, not -an error. Therefore a GF grammar can contain objects of the same -type with and without a lock field. This problem was solved in GFCC -generation by just removing all lock fields (defined as fields whose -type is the empty record type). This has the further advantage of -(slightly) reducing the grammar size. More importantly, it is safe -to remove lock fields, because they are never used in computation, -and because intensional types are only needed in grammars reused -as libraries, not in grammars used at runtime. -

-

-While the order problem is rather bureaucratic in nature, run-time -variables are an interesting problem. They arise in the presence -of complex parameter values, created by argument-taking constructors -and parameter records. To give an example, consider the GF parameter -type system -

-
-    Number = Sg | Pl ;
-    Person = P1 | P2 | P3 ;
-    Agr = Ag Number Person ;
-
-

-The values can be translated to integers in the expected way, -

-
-    Sg = 0, Pl = 1
-    P1 = 0, P2 = 1, P3 = 2
-    Ag Sg P1 = 0, Ag Sg P2 = 1, Ag Sg P3 = 2,
-    Ag Pl P1 = 3, Ag Pl P2 = 4, Ag Pl P3 = 5
-
-

-However, an argument of Agr can be a run-time variable, as in -

-
-    Ag np.n P3
-
-

-This expression must first be translated to a case expression, -

-
-    case np.n of {
-      0 => 2 ;
-      1 => 5
-      }
-
-

-which can then be translated to the GFCC term -

-
-    ([2,5] ! ($0 ! $1))  
-
-

-assuming that the variable np is the first argument and that its -Number field is the second in the record. -

-

-This transformation of course has to be performed recursively, since -there can be several run-time variables in a parameter value: -

-
-    Ag np.n np.p
-
-

-A similar transformation would be possible to deal with the double -role of parameter records discussed above. Thus the type -

-
-    RNP = {n : Number ; p : Person}
-
-

-could be uniformly translated into the set {0,1,2,3,4,5} -as Agr above. Selections would be simple instances of indexing. -But any projection from the record should be translated into -a case expression, -

-
-    rnp.n  ===> 
-    case rnp of {
-      0 => 0 ;
-      1 => 0 ;
-      2 => 0 ;
-      3 => 1 ;
-      4 => 1 ;
-      5 => 1
-      }
-
-

-To avoid the code bloat resulting from this, we have chosen to -deal with records by a currying transformation: -

-
-    table {n : Number ; p : Person} {... ...}
-     ===>
-    table Number {Sg => table Person {...} ; table Person {...}}
-
-

-This is performed when GFCC is generated. Selections with -records have to be treated likewise, -

-
-    t ! r   ===> t ! r.n ! r.p
-
-

-

The representation of linearization types

-

-Linearization types (lincat) are not needed when generating with -GFCC, but they have been added to enable parser generation directly from -GFCC. The linearization type definitions are shown as a part of the -concrete syntax, by using terms to represent types. Here is the table -showing how different linearization types are encoded. -

-
-    P*                         = max(P)         -- parameter type
-    {r1 : T1 ; ... ; rn : Tn}* = [T1*,...,Tn*]  -- record
-    (P => T)*                  = [T* ,...,T*]   -- table, size(P) cases
-    Str*                       = ()
-
-

-For example, the linearization type present/CatEng.NP is -translated as follows: -

-
-    NP = {
-      a : {                     -- 6 = 2*3 values
-        n : {ParamX.Number} ;   -- 2 values
-        p : {ParamX.Person}     -- 3 values
-      } ;
-      s : {ResEng.Case} => Str  -- 3 values
-    }
-  
-    __NP = [[1,2],[(),(),()]]
-
-

-

Running the compiler and the GFCC interpreter

-

-GFCC generation is a part of the -developers' version -of GF since September 2006. To invoke the compiler, the flag --printer=gfcc to the command -pm = print_multi is used. It is wise to recompile the grammar from -source, since previously compiled libraries may not obey the canonical -order of records. -Here is an example, performed in -example/bronzeage. -

-
-    i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageEng.gf
-    i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageGer.gf
-    strip
-    pm -printer=gfcc | wf bronze.gfcc
-
-

-There is also an experimental batch compiler, which does not use the GFC -format or the record aliases. It can be produced by -

-
-    make gfc
-
-

-in GF/src, and invoked by -

-
-    gfc --make FILES
-
-

-

The reference interpreter

-

-The reference interpreter written in Haskell consists of the following files: -

-
-    -- source file for BNFC
-    GFCC.cf       -- labelled BNF grammar of gfcc
-  
-    -- files generated by BNFC
-    AbsGFCC.hs    -- abstrac syntax datatypes
-    ErrM.hs       -- error monad used internally
-    LexGFCC.hs    -- lexer of gfcc files
-    ParGFCC.hs    -- parser of gfcc files and syntax trees
-    PrintGFCC.hs  -- printer of gfcc files and syntax trees
-  
-    -- hand-written files
-    DataGFCC.hs   -- grammar datatype, post-parser grammar creation
-    Linearize.hs  -- linearization and evaluation
-    Macros.hs     -- utilities abstracting away from GFCC datatypes
-    Generate.hs   -- random and exhaustive generation, generate-and-test parsing
-    API.hs        -- functionalities accessible in embedded GF applications
-    Generate.hs   -- random and exhaustive generation
-    Shell.hs      -- main function - a simple command interpreter
-
-

-It is included in the -developers' version -of GF, in the subdirectories GF/src/GF/GFCC and -GF/src/GF/Devel. -

-

-As of September 2007, default parsing in main GF uses GFCC (implemented by Krasimir -Angelov). The interpreter uses the relevant modules -

-
-    GF/Conversions/SimpleToFCFG.hs  -- generate parser from GFCC
-    GF/Parsing/FCFG.hs              -- run the parser
-
-

-

-To compile the interpreter, type -

-
-    make gfcc
-
-

-in GF/src. To run it, type -

-
-    ./gfcc <GFCC-file>
-
-

-The available commands are -

-
    -
  • gr <Cat> <Int>: generate a number of random trees in category. - and show their linearizations in all languages -
  • grt <Cat> <Int>: generate a number of random trees in category. - and show the trees and their linearizations in all languages -
  • gt <Cat> <Int>: generate a number of trees in category from smallest, - and show their linearizations in all languages -
  • gtt <Cat> <Int>: generate a number of trees in category from smallest, - and show the trees and their linearizations in all languages -
  • p <Lang> <Cat> <String>: parse a string into a set of trees -
  • lin <Tree>: linearize tree in all languages, also showing full records -
  • q: terminate the system cleanly -
- -

Embedded formats

-
    -
  • JavaScript: compiler of linearization and abstract syntax -

    -
  • Haskell: compiler of abstract syntax and interpreter with parsing, - linearization, and generation -

    -
  • C: compiler of linearization (old GFCC) -

    -
  • C++: embedded interpreter supporting linearization (old GFCC) -
- -

Some things to do

-

-Support for dependent types, higher-order abstract syntax, and -semantic definition in GFCC generation and interpreters. -

-

-Replacing the entire GF shell by one based on GFCC. -

-

-Interpreter in Java. -

-

-Hand-written parsers for GFCC grammars to reduce code size -(and efficiency?) of interpreters. -

-

-Binary format and/or file compression of GFCC output. -

-

-Syntax editor based on GFCC. -

-

-Rewriting of resource libraries in order to exploit the -word-suffix sharing better (depth-one tables, as in FM). -

- - - - diff --git a/src/PGF/doc/gfcc.txt b/src/PGF/doc/gfcc.txt deleted file mode 100644 index 5dcf2fbdc..000000000 --- a/src/PGF/doc/gfcc.txt +++ /dev/null @@ -1,712 +0,0 @@ -The GFCC Grammar Format -Aarne Ranta -December 14, 2007 - -Author's address: -[``http://www.cs.chalmers.se/~aarne`` http://www.cs.chalmers.se/~aarne] - -% to compile: txt2tags -thtml --toc gfcc.txt - -History: -- 14 Dec 2007: simpler, Lisp-like concrete syntax of GFCC -- 5 Oct 2007: new, better structured GFCC with full expressive power -- 19 Oct: translation of lincats, new figures on C++ -- 3 Oct 2006: first version - - -==What is GFCC== - -GFCC is a low-level format for GF grammars. Its aim is to contain the minimum -that is needed to process GF grammars at runtime. This minimality has three -advantages: -- compact grammar files and run-time objects -- time and space efficient processing -- simple definition of interpreters - - -Thus we also want to call GFCC the **portable grammar format**. - -The idea is that all embedded GF applications use GFCC. -The GF system would be primarily used as a compiler and as a grammar -development tool. - -Since GFCC is implemented in BNFC, a parser of the format is readily -available for C, C++, C#, Haskell, Java, and OCaml. Also an XML -representation can be generated in BNFC. A -[reference implementation ../] -of linearization and some other functions has been written in Haskell. - - -==GFCC vs. GFC== - -GFCC is aimed to replace GFC as the run-time grammar format. GFC was designed -to be a run-time format, but also to -support separate compilation of grammars, i.e. -to store the results of compiling -individual GF modules. But this means that GFC has to contain extra information, -such as type annotations, which is only needed in compilation and not at -run-time. In particular, the pattern matching syntax and semantics of GFC is -complex and therefore difficult to implement in new platforms. - -Actually, GFC is planned to be omitted also as the target format of -separate compilation, where plain GF (type annotated and partially evaluated) -will be used instead. GFC provides only marginal advantages as a target format -compared with GF, and it is therefore just extra weight to carry around this -format. - -The main differences of GFCC compared with GFC (and GF) can be -summarized as follows: -- there are no modules, and therefore no qualified names -- a GFCC grammar is multilingual, and consists of a common abstract syntax - together with one concrete syntax per language -- records and tables are replaced by arrays -- record labels and parameter values are replaced by integers -- record projection and table selection are replaced by array indexing -- even though the format does support dependent types and higher-order abstract - syntax, there is no interpreted yet that does this - - - -Here is an example of a GF grammar, consisting of three modules, -as translated to GFCC. The representations are aligned; -thus they do not completely -reflect the order of judgements in GFCC files, which have different orders of -blocks of judgements, and alphabetical sorting. -``` - grammar Ex(Eng,Swe); - -abstract Ex = { abstract { - cat cat - S ; NP ; VP ; NP[]; S[]; VP[]; - fun fun - Pred : NP -> VP -> S ; Pred=[(($ 0! 1),(($ 1! 0)!($ 0! 0)))]; - She, They : NP ; She=[0,"she"]; - Sleep : VP ; They=[1,"they"]; - Sleep=[["sleeps","sleep"]]; -} } ; - -concrete Eng of Ex = { concrete Eng { - lincat lincat - S = {s : Str} ; S=[()]; - NP = {s : Str ; n : Num} ; NP=[1,()]; - VP = {s : Num => Str} ; VP=[[(),()]]; - param - Num = Sg | Pl ; - lin lin - Pred np vp = { Pred=[(($ 0! 1),(($ 1! 0)!($ 0! 0)))]; - s = np.s ++ vp.s ! np.n} ; - She = {s = "she" ; n = Sg} ; She=[0,"she"]; - They = {s = "they" ; n = Pl} ; They = [1, "they"]; - Sleep = {s = table { Sleep=[["sleeps","sleep"]]; - Sg => "sleeps" ; - Pl => "sleep" - } - } ; -} } ; - -concrete Swe of Ex = { concrete Swe { - lincat lincat - S = {s : Str} ; S=[()]; - NP = {s : Str} ; NP=[()]; - VP = {s : Str} ; VP=[()]; - param - Num = Sg | Pl ; - lin lin - Pred np vp = { Pred = [(($0!0),($1!0))]; - s = np.s ++ vp.s} ; - She = {s = "hon"} ; She = ["hon"]; - They = {s = "de"} ; They = ["de"]; - Sleep = {s = "sover"} ; Sleep = ["sover"]; -} } ; -``` - -==The syntax of GFCC files== - -The complete BNFC grammar, from which -the rules in this section are taken, is in the file -[``GF/GFCC/GFCC.cf`` ../DataGFCC.cf]. - - -===Top level=== - -A grammar has a header telling the name of the abstract syntax -(often specifying an application domain), and the names of -the concrete languages. The abstract syntax and the concrete -syntaxes themselves follow. -``` - Grm. Grammar ::= - "grammar" CId "(" [CId] ")" ";" - Abstract ";" - [Concrete] ; - - Abs. Abstract ::= - "abstract" "{" - "flags" [Flag] - "fun" [FunDef] - "cat" [CatDef] - "}" ; - - Cnc. Concrete ::= - "concrete" CId "{" - "flags" [Flag] - "lin" [LinDef] - "oper" [LinDef] - "lincat" [LinDef] - "lindef" [LinDef] - "printname" [LinDef] - "}" ; -``` -This syntax organizes each module to a sequence of **fields**, such -as flags, linearizations, operations, linearization types, etc. -It is envisaged that particular applications can ignore some -of the fields, typically so that earlier fields are more -important than later ones. - -The judgement forms have the following syntax. -``` - Flg. Flag ::= CId "=" String ; - Cat. CatDef ::= CId "[" [Hypo] "]" ; - Fun. FunDef ::= CId ":" Type "=" Exp ; - Lin. LinDef ::= CId "=" Term ; -``` -For the run-time system, the reference implementation in Haskell -uses a structure that gives efficient look-up: -``` - data GFCC = GFCC { - absname :: CId , - cncnames :: [CId] , - abstract :: Abstr , - concretes :: Map CId Concr - } - - data Abstr = Abstr { - aflags :: Map CId String, -- value of a flag - funs :: Map CId (Type,Exp), -- type and def of a fun - cats :: Map CId [Hypo], -- context of a cat - catfuns :: Map CId [CId] -- funs yielding a cat (redundant, for fast lookup) - } - - data Concr = Concr { - flags :: Map CId String, -- value of a flag - lins :: Map CId Term, -- lin of a fun - opers :: Map CId Term, -- oper generated by subex elim - lincats :: Map CId Term, -- lin type of a cat - lindefs :: Map CId Term, -- lin default of a cat - printnames :: Map CId Term -- printname of a cat or a fun - } -``` -These definitions are from [``GF/GFCC/DataGFCC.hs`` ../DataGFCC.hs]. - -Identifiers (``CId``) are like ``Ident`` in GF, except that -the compiler produces constants prefixed with ``_`` in -the common subterm elimination optimization. -``` - token CId (('_' | letter) (letter | digit | '\'' | '_')*) ; -``` - - -===Abstract syntax=== - -Types are first-order function types built from argument type -contexts and value types. -category symbols. Syntax trees (``Exp``) are -rose trees with nodes consisting of a head (``Atom``) and -bound variables (``CId``). -``` - DTyp. Type ::= "[" [Hypo] "]" CId [Exp] ; - DTr. Exp ::= "[" "(" [CId] ")" Atom [Exp] "]" ; - Hyp. Hypo ::= CId ":" Type ; -``` -The head Atom is either a function -constant, a bound variable, or a metavariable, or a string, integer, or float -literal. -``` - AC. Atom ::= CId ; - AS. Atom ::= String ; - AI. Atom ::= Integer ; - AF. Atom ::= Double ; - AM. Atom ::= "?" Integer ; -``` -The context-free types and trees of the "old GFCC" are special -cases, which can be defined as follows: -``` - Typ. Type ::= [CId] "->" CId - Typ args val = DTyp [Hyp (CId "_") arg | arg <- args] val - - Tr. Exp ::= "(" CId [Exp] ")" - Tr fun exps = DTr [] fun exps -``` -To store semantic (``def``) definitions by cases, the following expression -form is provided, but it is only meaningful in the last field of a function -declaration in an abstract syntax: -``` - EEq. Exp ::= "{" [Equation] "}" ; - Equ. Equation ::= [Exp] "->" Exp ; -``` -Notice that expressions are used to encode patterns. Primitive notions -(the default semantics in GF) are encoded as empty sets of equations -(``[]``). For a constructor (canonical form) of a category ``C``, we -aim to use the encoding as the application ``(_constr C)``. - - - -===Concrete syntax=== - -Linearization terms (``Term``) are built as follows. -Constructor names are shown to make the later code -examples readable. -``` - R. Term ::= "[" [Term] "]" ; -- array (record/table) - P. Term ::= "(" Term "!" Term ")" ; -- access to field (projection/selection) - S. Term ::= "(" [Term] ")" ; -- concatenated sequence - K. Term ::= Tokn ; -- token - V. Term ::= "$" Integer ; -- argument (subtree) - C. Term ::= Integer ; -- array index (label/parameter value) - FV. Term ::= "[|" [Term] "|]" ; -- free variation - TM. Term ::= "?" ; -- linearization of metavariable -``` -Tokens are strings or (maybe obsolescent) prefix-dependent -variant lists. -``` - KS. Tokn ::= String ; - KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ; - Var. Variant ::= [String] "/" [String] ; -``` -Two special forms of terms are introduced by the compiler -as optimizations. They can in principle be eliminated, but -their presence makes grammars much more compact. Their semantics -will be explained in a later section. -``` - F. Term ::= CId ; -- global constant - W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table -``` -There is also a deprecated form of "record parameter alias", -``` - RP. Term ::= "(" Term "@" Term ")"; -- DEPRECATED -``` -which will be removed when the migration to new GFCC is complete. - - - -==The semantics of concrete syntax terms== - -The code in this section is from [``GF/GFCC/Linearize.hs`` ../Linearize.hs]. - - -===Linearization and realization=== - -The linearization algorithm is essentially the same as in -GFC: a tree is linearized by evaluating its linearization term -in the environment of the linearizations of the subtrees. -Literal atoms are linearized in the obvious way. -The function also needs to know the language (i.e. concrete syntax) -in which linearization is performed. -``` - linExp :: GFCC -> CId -> Exp -> Term - linExp gfcc lang tree@(DTr _ at trees) = case at of - AC fun -> comp (Prelude.map lin trees) $ look fun - AS s -> R [kks (show s)] -- quoted - AI i -> R [kks (show i)] - AF d -> R [kks (show d)] - AM -> TM - where - lin = linExp gfcc lang - comp = compute gfcc lang - look = lookLin gfcc lang -``` -TODO: bindings must be supported. - -The result of linearization is usually a record, which is realized as -a string using the following algorithm. -``` - realize :: Term -> String - realize trm = case trm of - R (t:_) -> realize t - S ss -> unwords $ Prelude.map realize ss - K (KS s) -> s - K (KP s _) -> unwords s ---- prefix choice TODO - W s t -> s ++ realize t - FV (t:_) -> realize t - TM -> "?" -``` -Notice that realization always picks the first field of a record. -If a linearization type has more than one field, the first field -does not necessarily contain the desired string. -Also notice that the order of record fields in GFCC is not necessarily -the same as in GF source. - - -===Term evaluation=== - -Evaluation follows call-by-value order, with two environments -needed: -- the grammar (a concrete syntax) to give the global constants -- an array of terms to give the subtree linearizations - - -The code is presented in one-level pattern matching, to -enable reimplementations in languages that do not permit -deep patterns (such as Java and C++). -``` -compute :: GFCC -> CId -> [Term] -> Term -> Term -compute gfcc lang args = comp where - comp trm = case trm of - P r p -> proj (comp r) (comp p) - W s t -> W s (comp t) - R ts -> R $ Prelude.map comp ts - V i -> idx args (fromInteger i) -- already computed - F c -> comp $ look c -- not computed (if contains V) - FV ts -> FV $ Prelude.map comp ts - S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts - _ -> trm - - look = lookOper gfcc lang - - idx xs i = xs !! i - - proj r p = case (r,p) of - (_, FV ts) -> FV $ Prelude.map (proj r) ts - (FV ts, _ ) -> FV $ Prelude.map (\t -> proj t p) ts - (W s t, _) -> kks (s ++ getString (proj t p)) - _ -> comp $ getField r (getIndex p) - - getString t = case t of - K (KS s) -> s - _ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR" - - getIndex t = case t of - C i -> fromInteger i - RP p _ -> getIndex p - TM -> 0 -- default value for parameter - _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0 - - getField t i = case t of - R rs -> idx rs i - RP _ r -> getField r i - TM -> TM - _ -> trace ("ERROR in grammar compiler: field from " ++ show t) t -``` - -===The special term constructors=== - -The three forms introduced by the compiler may a need special -explanation. - -Global constants -``` - Term ::= CId ; -``` -are shorthands for complex terms. They are produced by the -compiler by (iterated) **common subexpression elimination**. -They are often more powerful than hand-devised code sharing in the source -code. They could be computed off-line by replacing each identifier by -its definition. - -**Prefix-suffix tables** -``` - Term ::= "(" String "+" Term ")" ; -``` -represent tables of word forms divided to the longest common prefix -and its array of suffixes. In the example grammar above, we have -``` - Sleep = [("sleep" + ["s",""])] -``` -which in fact is equal to the array of full forms -``` - ["sleeps", "sleep"] -``` -The power of this construction comes from the fact that suffix sets -tend to be repeated in a language, and can therefore be collected -by common subexpression elimination. It is this technique that -explains the used syntax rather than the more accurate -``` - "(" String "+" [String] ")" -``` -since we want the suffix part to be a ``Term`` for the optimization to -take effect. - - - -==Compiling to GFCC== - -Compilation to GFCC is performed by the GF grammar compiler, and -GFCC interpreters need not know what it does. For grammar writers, -however, it might be interesting to know what happens to the grammars -in the process. - -The compilation phases are the following -+ type check and partially evaluate GF source -+ create a symbol table mapping the GF parameter and record types to - fixed-size arrays, and parameter values and record labels to integers -+ traverse the linearization rules replacing parameters and labels by integers -+ reorganize the created GF grammar so that it has just one abstract syntax - and one concrete syntax per language -+ TODO: apply UTF8 encoding to the grammar, if not yet applied (this is told by the - ``coding`` flag) -+ translate the GF grammar object to a GFCC grammar object, using a simple - compositional mapping -+ perform the word-suffix optimization on GFCC linearization terms -+ perform subexpression elimination on each concrete syntax module -+ print out the GFCC code - - - - -===Problems in GFCC compilation=== - -Two major problems had to be solved in compiling GF to GFCC: -- consistent order of tables and records, to permit the array translation -- run-time variables in complex parameter values. - - -The current implementation is still experimental and may fail -to generate correct code. Any errors remaining are likely to be -related to the two problems just mentioned. - -The order problem is solved in slightly different ways for tables and records. -In both cases, **eta expansion** is used to establish a -canonical order. Tables are ordered by applying the preorder induced -by ``param`` definitions. Records are ordered by sorting them by labels. -This means that -e.g. the ``s`` field will in general no longer appear as the first -field, even if it does so in the GF source code. But relying on the -order of fields in a labelled record would be misplaced anyway. - -The canonical form of records is further complicated by lock fields, -i.e. dummy fields of form ``lock_C = <>``, which are added to grammar -libraries to force intensionality of linearization types. The problem -is that the absence of a lock field only generates a warning, not -an error. Therefore a GF grammar can contain objects of the same -type with and without a lock field. This problem was solved in GFCC -generation by just removing all lock fields (defined as fields whose -type is the empty record type). This has the further advantage of -(slightly) reducing the grammar size. More importantly, it is safe -to remove lock fields, because they are never used in computation, -and because intensional types are only needed in grammars reused -as libraries, not in grammars used at runtime. - -While the order problem is rather bureaucratic in nature, run-time -variables are an interesting problem. They arise in the presence -of complex parameter values, created by argument-taking constructors -and parameter records. To give an example, consider the GF parameter -type system -``` - Number = Sg | Pl ; - Person = P1 | P2 | P3 ; - Agr = Ag Number Person ; -``` -The values can be translated to integers in the expected way, -``` - Sg = 0, Pl = 1 - P1 = 0, P2 = 1, P3 = 2 - Ag Sg P1 = 0, Ag Sg P2 = 1, Ag Sg P3 = 2, - Ag Pl P1 = 3, Ag Pl P2 = 4, Ag Pl P3 = 5 -``` -However, an argument of ``Agr`` can be a run-time variable, as in -``` - Ag np.n P3 -``` -This expression must first be translated to a case expression, -``` - case np.n of { - 0 => 2 ; - 1 => 5 - } -``` -which can then be translated to the GFCC term -``` - ([2,5] ! ($0 ! $1)) -``` -assuming that the variable ``np`` is the first argument and that its -``Number`` field is the second in the record. - -This transformation of course has to be performed recursively, since -there can be several run-time variables in a parameter value: -``` - Ag np.n np.p -``` -A similar transformation would be possible to deal with the double -role of parameter records discussed above. Thus the type -``` - RNP = {n : Number ; p : Person} -``` -could be uniformly translated into the set ``{0,1,2,3,4,5}`` -as ``Agr`` above. Selections would be simple instances of indexing. -But any projection from the record should be translated into -a case expression, -``` - rnp.n ===> - case rnp of { - 0 => 0 ; - 1 => 0 ; - 2 => 0 ; - 3 => 1 ; - 4 => 1 ; - 5 => 1 - } -``` -To avoid the code bloat resulting from this, we have chosen to -deal with records by a **currying** transformation: -``` - table {n : Number ; p : Person} {... ...} - ===> - table Number {Sg => table Person {...} ; table Person {...}} -``` -This is performed when GFCC is generated. Selections with -records have to be treated likewise, -``` - t ! r ===> t ! r.n ! r.p -``` - - -===The representation of linearization types=== - -Linearization types (``lincat``) are not needed when generating with -GFCC, but they have been added to enable parser generation directly from -GFCC. The linearization type definitions are shown as a part of the -concrete syntax, by using terms to represent types. Here is the table -showing how different linearization types are encoded. -``` - P* = max(P) -- parameter type - {r1 : T1 ; ... ; rn : Tn}* = [T1*,...,Tn*] -- record - (P => T)* = [T* ,...,T*] -- table, size(P) cases - Str* = () -``` -For example, the linearization type ``present/CatEng.NP`` is -translated as follows: -``` - NP = { - a : { -- 6 = 2*3 values - n : {ParamX.Number} ; -- 2 values - p : {ParamX.Person} -- 3 values - } ; - s : {ResEng.Case} => Str -- 3 values - } - - __NP = [[1,2],[(),(),()]] -``` - - - - -===Running the compiler and the GFCC interpreter=== - -GFCC generation is a part of the -[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html] -of GF since September 2006. To invoke the compiler, the flag -``-printer=gfcc`` to the command -``pm = print_multi`` is used. It is wise to recompile the grammar from -source, since previously compiled libraries may not obey the canonical -order of records. -Here is an example, performed in -[example/bronzeage ../../../../../examples/bronzeage]. -``` - i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageEng.gf - i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageGer.gf - strip - pm -printer=gfcc | wf bronze.gfcc -``` -There is also an experimental batch compiler, which does not use the GFC -format or the record aliases. It can be produced by -``` - make gfc -``` -in ``GF/src``, and invoked by -``` - gfc --make FILES -``` - - - - -==The reference interpreter== - -The reference interpreter written in Haskell consists of the following files: -``` - -- source file for BNFC - GFCC.cf -- labelled BNF grammar of gfcc - - -- files generated by BNFC - AbsGFCC.hs -- abstrac syntax datatypes - ErrM.hs -- error monad used internally - LexGFCC.hs -- lexer of gfcc files - ParGFCC.hs -- parser of gfcc files and syntax trees - PrintGFCC.hs -- printer of gfcc files and syntax trees - - -- hand-written files - DataGFCC.hs -- grammar datatype, post-parser grammar creation - Linearize.hs -- linearization and evaluation - Macros.hs -- utilities abstracting away from GFCC datatypes - Generate.hs -- random and exhaustive generation, generate-and-test parsing - API.hs -- functionalities accessible in embedded GF applications - Generate.hs -- random and exhaustive generation - Shell.hs -- main function - a simple command interpreter -``` -It is included in the -[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html] -of GF, in the subdirectories [``GF/src/GF/GFCC`` ../] and -[``GF/src/GF/Devel`` ../../Devel]. - -As of September 2007, default parsing in main GF uses GFCC (implemented by Krasimir -Angelov). The interpreter uses the relevant modules -``` - GF/Conversions/SimpleToFCFG.hs -- generate parser from GFCC - GF/Parsing/FCFG.hs -- run the parser -``` - - -To compile the interpreter, type -``` - make gfcc -``` -in ``GF/src``. To run it, type -``` - ./gfcc -``` -The available commands are -- ``gr ``: generate a number of random trees in category. - and show their linearizations in all languages -- ``grt ``: generate a number of random trees in category. - and show the trees and their linearizations in all languages -- ``gt ``: generate a number of trees in category from smallest, - and show their linearizations in all languages -- ``gtt ``: generate a number of trees in category from smallest, - and show the trees and their linearizations in all languages -- ``p ``: parse a string into a set of trees -- ``lin ``: linearize tree in all languages, also showing full records -- ``q``: terminate the system cleanly - - - -==Embedded formats== - -- JavaScript: compiler of linearization and abstract syntax - -- Haskell: compiler of abstract syntax and interpreter with parsing, - linearization, and generation - -- C: compiler of linearization (old GFCC) - -- C++: embedded interpreter supporting linearization (old GFCC) - - - -==Some things to do== - -Support for dependent types, higher-order abstract syntax, and -semantic definition in GFCC generation and interpreters. - -Replacing the entire GF shell by one based on GFCC. - -Interpreter in Java. - -Hand-written parsers for GFCC grammars to reduce code size -(and efficiency?) of interpreters. - -Binary format and/or file compression of GFCC output. - -Syntax editor based on GFCC. - -Rewriting of resource libraries in order to exploit the -word-suffix sharing better (depth-one tables, as in FM). - diff --git a/src/PGF/doc/old-GFCC.cf b/src/PGF/doc/old-GFCC.cf deleted file mode 100644 index 65657a259..000000000 --- a/src/PGF/doc/old-GFCC.cf +++ /dev/null @@ -1,50 +0,0 @@ -Grm. Grammar ::= Header ";" Abstract ";" [Concrete] ; -Hdr. Header ::= "grammar" CId "(" [CId] ")" ; -Abs. Abstract ::= "abstract" "{" [AbsDef] "}" ; -Cnc. Concrete ::= "concrete" CId "{" [CncDef] "}" ; - -Fun. AbsDef ::= CId ":" Type "=" Exp ; ---AFl. AbsDef ::= "%" CId "=" String ; -- flag -Lin. CncDef ::= CId "=" Term ; ---CFl. CncDef ::= "%" CId "=" String ; -- flag - -Typ. Type ::= [CId] "->" CId ; -Tr. Exp ::= "(" Atom [Exp] ")" ; -AC. Atom ::= CId ; -AS. Atom ::= String ; -AI. Atom ::= Integer ; -AF. Atom ::= Double ; -AM. Atom ::= "?" ; -trA. Exp ::= Atom ; -define trA a = Tr a [] ; - -R. Term ::= "[" [Term] "]" ; -- record/table -P. Term ::= "(" Term "!" Term ")" ; -- projection/selection -S. Term ::= "(" [Term] ")" ; -- sequence with ++ -K. Term ::= Tokn ; -- token -V. Term ::= "$" Integer ; -- argument -C. Term ::= Integer ; -- parameter value/label -F. Term ::= CId ; -- global constant -FV. Term ::= "[|" [Term] "|]" ; -- free variation -W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table -RP. Term ::= "(" Term "@" Term ")"; -- record parameter alias -TM. Term ::= "?" ; -- lin of metavariable - -L. Term ::= "(" CId "->" Term ")" ; -- lambda abstracted table -BV. Term ::= "#" CId ; -- lambda-bound variable - -KS. Tokn ::= String ; -KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ; -Var. Variant ::= [String] "/" [String] ; - - -terminator Concrete ";" ; -terminator AbsDef ";" ; -terminator CncDef ";" ; -separator CId "," ; -separator Term "," ; -terminator Exp "" ; -terminator String "" ; -separator Variant "," ; - -token CId (('_' | letter) (letter | digit | '\'' | '_')*) ; diff --git a/src/PGF/doc/old-gfcc.txt b/src/PGF/doc/old-gfcc.txt deleted file mode 100644 index 6ffd9bd64..000000000 --- a/src/PGF/doc/old-gfcc.txt +++ /dev/null @@ -1,656 +0,0 @@ -The GFCC Grammar Format -Aarne Ranta -October 19, 2006 - -Author's address: -[``http://www.cs.chalmers.se/~aarne`` http://www.cs.chalmers.se/~aarne] - -% to compile: txt2tags -thtml --toc gfcc.txt - -History: -- 19 Oct: translation of lincats, new figures on C++ -- 3 Oct 2006: first version - - -==What is GFCC== - -GFCC is a low-level format for GF grammars. Its aim is to contain the minimum -that is needed to process GF grammars at runtime. This minimality has three -advantages: -- compact grammar files and run-time objects -- time and space efficient processing -- simple definition of interpreters - - -The idea is that all embedded GF applications are compiled to GFCC. -The GF system would be primarily used as a compiler and as a grammar -development tool. - -Since GFCC is implemented in BNFC, a parser of the format is readily -available for C, C++, Haskell, Java, and OCaml. Also an XML -representation is generated in BNFC. A -[reference implementation ../] -of linearization and some other functions has been written in Haskell. - - -==GFCC vs. GFC== - -GFCC is aimed to replace GFC as the run-time grammar format. GFC was designed -to be a run-time format, but also to -support separate compilation of grammars, i.e. -to store the results of compiling -individual GF modules. But this means that GFC has to contain extra information, -such as type annotations, which is only needed in compilation and not at -run-time. In particular, the pattern matching syntax and semantics of GFC is -complex and therefore difficult to implement in new platforms. - -The main differences of GFCC compared with GFC can be summarized as follows: -- there are no modules, and therefore no qualified names -- a GFCC grammar is multilingual, and consists of a common abstract syntax - together with one concrete syntax per language -- records and tables are replaced by arrays -- record labels and parameter values are replaced by integers -- record projection and table selection are replaced by array indexing -- there is (so far) no support for dependent types or higher-order abstract - syntax (which would be easy to add, but make interpreters much more difficult - to write) - - -Here is an example of a GF grammar, consisting of three modules, -as translated to GFCC. The representations are aligned, with the exceptions -due to the alphabetical sorting of GFCC grammars. -``` - grammar Ex(Eng,Swe); - -abstract Ex = { abstract { - cat - S ; NP ; VP ; - fun - Pred : NP -> VP -> S ; Pred : NP,VP -> S = (Pred); - She, They : NP ; She : -> NP = (She); - Sleep : VP ; Sleep : -> VP = (Sleep); - They : -> NP = (They); -} } ; - -concrete Eng of Ex = { concrete Eng { - lincat - S = {s : Str} ; - NP = {s : Str ; n : Num} ; - VP = {s : Num => Str} ; - param - Num = Sg | Pl ; - lin - Pred np vp = { Pred = [(($0!1),(($1!0)!($0!0)))]; - s = np.s ++ vp.s ! np.n} ; - She = {s = "she" ; n = Sg} ; She = [0, "she"]; - They = {s = "they" ; n = Pl} ; - Sleep = {s = table { Sleep = [("sleep" + ["s",""])]; - Sg => "sleeps" ; - Pl => "sleep" They = [1, "they"]; - } } ; - } ; -} - -concrete Swe of Ex = { concrete Swe { - lincat - S = {s : Str} ; - NP = {s : Str} ; - VP = {s : Str} ; - param - Num = Sg | Pl ; - lin - Pred np vp = { Pred = [(($0!0),($1!0))]; - s = np.s ++ vp.s} ; - She = {s = "hon"} ; She = ["hon"]; - They = {s = "de"} ; They = ["de"]; - Sleep = {s = "sover"} ; Sleep = ["sover"]; -} } ; -``` - -==The syntax of GFCC files== - -===Top level=== - -A grammar has a header telling the name of the abstract syntax -(often specifying an application domain), and the names of -the concrete languages. The abstract syntax and the concrete -syntaxes themselves follow. -``` - Grammar ::= Header ";" Abstract ";" [Concrete] ; - Header ::= "grammar" CId "(" [CId] ")" ; - Abstract ::= "abstract" "{" [AbsDef] "}" ; - Concrete ::= "concrete" CId "{" [CncDef] "}" ; -``` -Abstract syntax judgements give typings and semantic definitions. -Concrete syntax judgements give linearizations. -``` - AbsDef ::= CId ":" Type "=" Exp ; - CncDef ::= CId "=" Term ; -``` -Also flags are possible, local to each "module" (i.e. abstract and concretes). -``` - AbsDef ::= "%" CId "=" String ; - CncDef ::= "%" CId "=" String ; -``` -For the run-time system, the reference implementation in Haskell -uses a structure that gives efficient look-up: -``` - data GFCC = GFCC { - absname :: CId , - cncnames :: [CId] , - abstract :: Abstr , - concretes :: Map CId Concr - } - - data Abstr = Abstr { - funs :: Map CId Type, -- find the type of a fun - cats :: Map CId [CId] -- find the funs giving a cat - } - - type Concr = Map CId Term -``` - - -===Abstract syntax=== - -Types are first-order function types built from -category symbols. Syntax trees (``Exp``) are -rose trees with the head (``Atom``) either a function -constant, a metavariable, or a string, integer, or float -literal. -``` - Type ::= [CId] "->" CId ; - Exp ::= "(" Atom [Exp] ")" ; - Atom ::= CId ; -- function constant - Atom ::= "?" ; -- metavariable - Atom ::= String ; -- string literal - Atom ::= Integer ; -- integer literal - Atom ::= Double ; -- float literal -``` - - -===Concrete syntax=== - -Linearization terms (``Term``) are built as follows. -Constructor names are shown to make the later code -examples readable. -``` - R. Term ::= "[" [Term] "]" ; -- array - P. Term ::= "(" Term "!" Term ")" ; -- access to indexed field - S. Term ::= "(" [Term] ")" ; -- sequence with ++ - K. Term ::= Tokn ; -- token - V. Term ::= "$" Integer ; -- argument - C. Term ::= Integer ; -- array index - FV. Term ::= "[|" [Term] "|]" ; -- free variation - TM. Term ::= "?" ; -- linearization of metavariable -``` -Tokens are strings or (maybe obsolescent) prefix-dependent -variant lists. -``` - KS. Tokn ::= String ; - KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ; - Var. Variant ::= [String] "/" [String] ; -``` -Three special forms of terms are introduced by the compiler -as optimizations. They can in principle be eliminated, but -their presence makes grammars much more compact. Their semantics -will be explained in a later section. -``` - F. Term ::= CId ; -- global constant - W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table - RP. Term ::= "(" Term "@" Term ")"; -- record parameter alias -``` -Identifiers are like ``Ident`` in GF and GFC, except that -the compiler produces constants prefixed with ``_`` in -the common subterm elimination optimization. -``` - token CId (('_' | letter) (letter | digit | '\'' | '_')*) ; -``` - - -==The semantics of concrete syntax terms== - -===Linearization and realization=== - -The linearization algorithm is essentially the same as in -GFC: a tree is linearized by evaluating its linearization term -in the environment of the linearizations of the subtrees. -Literal atoms are linearized in the obvious way. -The function also needs to know the language (i.e. concrete syntax) -in which linearization is performed. -``` - linExp :: GFCC -> CId -> Exp -> Term - linExp mcfg lang tree@(Tr at trees) = case at of - AC fun -> comp (Prelude.map lin trees) $ look fun - AS s -> R [kks (show s)] -- quoted - AI i -> R [kks (show i)] - AF d -> R [kks (show d)] - AM -> TM - where - lin = linExp mcfg lang - comp = compute mcfg lang - look = lookLin mcfg lang -``` -The result of linearization is usually a record, which is realized as -a string using the following algorithm. -``` - realize :: Term -> String - realize trm = case trm of - R (t:_) -> realize t - S ss -> unwords $ Prelude.map realize ss - K (KS s) -> s - K (KP s _) -> unwords s ---- prefix choice TODO - W s t -> s ++ realize t - FV (t:_) -> realize t - TM -> "?" -``` -Since the order of record fields is not necessarily -the same as in GF source, -this realization does not work securely for -categories whose lincats more than one field. - - -===Term evaluation=== - -Evaluation follows call-by-value order, with two environments -needed: -- the grammar (a concrete syntax) to give the global constants -- an array of terms to give the subtree linearizations - - -The code is presented in one-level pattern matching, to -enable reimplementations in languages that do not permit -deep patterns (such as Java and C++). -``` -compute :: GFCC -> CId -> [Term] -> Term -> Term -compute mcfg lang args = comp where - comp trm = case trm of - P r p -> proj (comp r) (comp p) - RP i t -> RP (comp i) (comp t) - W s t -> W s (comp t) - R ts -> R $ Prelude.map comp ts - V i -> idx args (fromInteger i) -- already computed - F c -> comp $ look c -- not computed (if contains V) - FV ts -> FV $ Prelude.map comp ts - S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts - _ -> trm - - look = lookLin mcfg lang - - idx xs i = xs !! i - - proj r p = case (r,p) of - (_, FV ts) -> FV $ Prelude.map (proj r) ts - (W s t, _) -> kks (s ++ getString (proj t p)) - _ -> comp $ getField r (getIndex p) - - getString t = case t of - K (KS s) -> s - _ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR" - - getIndex t = case t of - C i -> fromInteger i - RP p _ -> getIndex p - TM -> 0 -- default value for parameter - _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0 - - getField t i = case t of - R rs -> idx rs i - RP _ r -> getField r i - TM -> TM - _ -> trace ("ERROR in grammar compiler: field from " ++ show t) t -``` - -===The special term constructors=== - -The three forms introduced by the compiler may a need special -explanation. - -Global constants -``` - Term ::= CId ; -``` -are shorthands for complex terms. They are produced by the -compiler by (iterated) common subexpression elimination. -They are often more powerful than hand-devised code sharing in the source -code. They could be computed off-line by replacing each identifier by -its definition. - -Prefix-suffix tables -``` - Term ::= "(" String "+" Term ")" ; -``` -represent tables of word forms divided to the longest common prefix -and its array of suffixes. In the example grammar above, we have -``` - Sleep = [("sleep" + ["s",""])] -``` -which in fact is equal to the array of full forms -``` - ["sleeps", "sleep"] -``` -The power of this construction comes from the fact that suffix sets -tend to be repeated in a language, and can therefore be collected -by common subexpression elimination. It is this technique that -explains the used syntax rather than the more accurate -``` - "(" String "+" [String] ")" -``` -since we want the suffix part to be a ``Term`` for the optimization to -take effect. - -The most curious construct of GFCC is the parameter array alias, -``` - Term ::= "(" Term "@" Term ")"; -``` -This form is used as the value of parameter records, such as the type -``` - {n : Number ; p : Person} -``` -The problem with parameter records is their double role. -They can be used like parameter values, as indices in selection, -``` - VP.s ! {n = Sg ; p = P3} -``` -but also as records, from which parameters can be projected: -``` - {n = Sg ; p = P3}.n -``` -Whichever use is selected as primary, a prohibitively complex -case expression must be generated at compilation to GFCC to get the -other use. The adopted -solution is to generate a pair containing both a parameter value index -and an array of indices of record fields. For instance, if we have -``` - param Number = Sg | Pl ; Person = P1 | P2 | P3 ; -``` -we get the encoding -``` - {n = Sg ; p = P3} ---> (2 @ [0,2]) -``` -The GFCC computation rules are essentially -``` - (t ! (i @ _)) = (t ! i) - ((_ @ r) ! j) =(r ! j) -``` - - -==Compiling to GFCC== - -Compilation to GFCC is performed by the GF grammar compiler, and -GFCC interpreters need not know what it does. For grammar writers, -however, it might be interesting to know what happens to the grammars -in the process. - -The compilation phases are the following -+ translate GF source to GFC, as always in GF -+ undo GFC back-end optimizations -+ perform the ``values`` optimization to normalize tables -+ create a symbol table mapping the GFC parameter and record types to - fixed-size arrays, and parameter values and record labels to integers -+ traverse the linearization rules replacing parameters and labels by integers -+ reorganize the created GFC grammar so that it has just one abstract syntax - and one concrete syntax per language -+ apply UTF8 encoding to the grammar, if not yet applied (this is told by the - ``coding`` flag) -+ translate the GFC syntax tree to a GFCC syntax tree, using a simple - compositional mapping -+ perform the word-suffix optimization on GFCC linearization terms -+ perform subexpression elimination on each concrete syntax module -+ print out the GFCC code - - -Notice that a major part of the compilation is done within GFC, so that -GFC-related tasks (such as parser generation) could be performed by -using the old algorithms. - - -===Problems in GFCC compilation=== - -Two major problems had to be solved in compiling GFC to GFCC: -- consistent order of tables and records, to permit the array translation -- run-time variables in complex parameter values. - - -The current implementation is still experimental and may fail -to generate correct code. Any errors remaining are likely to be -related to the two problems just mentioned. - -The order problem is solved in different ways for tables and records. -For tables, the ``values`` optimization of GFC already manages to -maintain a canonical order. But this order can be destroyed by the -``share`` optimization. To make sure that GFCC compilation works properly, -it is safest to recompile the GF grammar by using the ``values`` -optimization flag. - -Records can be canonically ordered by sorting them by labels. -In fact, this was done in connection of the GFCC work as a part -of the GFC generation, to guarantee consistency. This means that -e.g. the ``s`` field will in general no longer appear as the first -field, even if it does so in the GF source code. But relying on the -order of fields in a labelled record would be misplaced anyway. - -The canonical form of records is further complicated by lock fields, -i.e. dummy fields of form ``lock_C = <>``, which are added to grammar -libraries to force intensionality of linearization types. The problem -is that the absence of a lock field only generates a warning, not -an error. Therefore a GFC grammar can contain objects of the same -type with and without a lock field. This problem was solved in GFCC -generation by just removing all lock fields (defined as fields whose -type is the empty record type). This has the further advantage of -(slightly) reducing the grammar size. More importantly, it is safe -to remove lock fields, because they are never used in computation, -and because intensional types are only needed in grammars reused -as libraries, not in grammars used at runtime. - -While the order problem is rather bureaucratic in nature, run-time -variables are an interesting problem. They arise in the presence -of complex parameter values, created by argument-taking constructors -and parameter records. To give an example, consider the GF parameter -type system -``` - Number = Sg | Pl ; - Person = P1 | P2 | P3 ; - Agr = Ag Number Person ; -``` -The values can be translated to integers in the expected way, -``` - Sg = 0, Pl = 1 - P1 = 0, P2 = 1, P3 = 2 - Ag Sg P1 = 0, Ag Sg P2 = 1, Ag Sg P3 = 2, - Ag Pl P1 = 3, Ag Pl P2 = 4, Ag Pl P3 = 5 -``` -However, an argument of ``Agr`` can be a run-time variable, as in -``` - Ag np.n P3 -``` -This expression must first be translated to a case expression, -``` - case np.n of { - 0 => 2 ; - 1 => 5 - } -``` -which can then be translated to the GFCC term -``` - ([2,5] ! ($0 ! $1)) -``` -assuming that the variable ``np`` is the first argument and that its -``Number`` field is the second in the record. - -This transformation of course has to be performed recursively, since -there can be several run-time variables in a parameter value: -``` - Ag np.n np.p -``` -A similar transformation would be possible to deal with the double -role of parameter records discussed above. Thus the type -``` - RNP = {n : Number ; p : Person} -``` -could be uniformly translated into the set ``{0,1,2,3,4,5}`` -as ``Agr`` above. Selections would be simple instances of indexing. -But any projection from the record should be translated into -a case expression, -``` - rnp.n ===> - case rnp of { - 0 => 0 ; - 1 => 0 ; - 2 => 0 ; - 3 => 1 ; - 4 => 1 ; - 5 => 1 - } -``` -To avoid the code bloat resulting from this, we chose the alias representation -which is easy enough to deal with in interpreters. - - -===The representation of linearization types=== - -Linearization types (``lincat``) are not needed when generating with -GFCC, but they have been added to enable parser generation directly from -GFCC. The linearization type definitions are shown as a part of the -concrete syntax, by using terms to represent types. Here is the table -showing how different linearization types are encoded. -``` - P* = size(P) -- parameter type - {_ : I ; __ : R}* = (I* @ R*) -- record of parameters - {r1 : T1 ; ... ; rn : Tn}* = [T1*,...,Tn*] -- other record - (P => T)* = [T* ,...,T*] -- size(P) times - Str* = () -``` -The category symbols are prefixed with two underscores (``__``). -For example, the linearization type ``present/CatEng.NP`` is -translated as follows: -``` - NP = { - a : { -- 6 = 2*3 values - n : {ParamX.Number} ; -- 2 values - p : {ParamX.Person} -- 3 values - } ; - s : {ResEng.Case} => Str -- 3 values - } - - __NP = [(6@[2,3]),[(),(),()]] -``` - - - - -===Running the compiler and the GFCC interpreter=== - -GFCC generation is a part of the -[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html] -of GF since September 2006. To invoke the compiler, the flag -``-printer=gfcc`` to the command -``pm = print_multi`` is used. It is wise to recompile the grammar from -source, since previously compiled libraries may not obey the canonical -order of records. To ``strip`` the grammar before -GFCC translation removes unnecessary interface references. -Here is an example, performed in -[example/bronzeage ../../../../../examples/bronzeage]. -``` - i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageEng.gf - i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageGer.gf - strip - pm -printer=gfcc | wf bronze.gfcc -``` - - - -==The reference interpreter== - -The reference interpreter written in Haskell consists of the following files: -``` - -- source file for BNFC - GFCC.cf -- labelled BNF grammar of gfcc - - -- files generated by BNFC - AbsGFCC.hs -- abstrac syntax of gfcc - ErrM.hs -- error monad used internally - LexGFCC.hs -- lexer of gfcc files - ParGFCC.hs -- parser of gfcc files and syntax trees - PrintGFCC.hs -- printer of gfcc files and syntax trees - - -- hand-written files - DataGFCC.hs -- post-parser grammar creation, linearization and evaluation - GenGFCC.hs -- random and exhaustive generation, generate-and-test parsing - RunGFCC.hs -- main function - a simple command interpreter -``` -It is included in the -[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html] -of GF, in the subdirectory [``GF/src/GF/Canon/GFCC`` ../]. - -To compile the interpreter, type -``` - make gfcc -``` -in ``GF/src``. To run it, type -``` - ./gfcc -``` -The available commands are -- ``gr ``: generate a number of random trees in category. - and show their linearizations in all languages -- ``grt ``: generate a number of random trees in category. - and show the trees and their linearizations in all languages -- ``gt ``: generate a number of trees in category from smallest, - and show their linearizations in all languages -- ``gtt ``: generate a number of trees in category from smallest, - and show the trees and their linearizations in all languages -- ``p ``: "parse", i.e. generate trees until match or - until the given number have been generated -- ````: linearize tree in all languages, also showing full records -- ``quit``: terminate the system cleanly - - -==Interpreter in C++== - -A base-line interpreter in C++ has been started. -Its main functionality is random generation of trees and linearization of them. - -Here are some results from running the different interpreters, compared -to running the same grammar in GF, saved in ``.gfcm`` format. -The grammar contains the English, German, and Norwegian -versions of Bronzeage. The experiment was carried out on -Ubuntu Linux laptop with 1.5 GHz Intel centrino processor. - -|| | GF | gfcc(hs) | gfcc++ | -| program size | 7249k | 803k | 113k -| grammar size | 336k | 119k | 119k -| read grammar | 1150ms | 510ms | 100ms -| generate 222 | 9500ms | 450ms | 800ms -| memory | 21M | 10M | 20M - - - -To summarize: -- going from GF to gfcc is a major win in both code size and efficiency -- going from Haskell to C++ interpreter is not a win yet, because of a space - leak in the C++ version - - - -==Some things to do== - -Interpreter in Java. - -Parsing via MCFG -- the FCFG format can possibly be simplified -- parser grammars should be saved in files to make interpreters easier - - -Hand-written parsers for GFCC grammars to reduce code size -(and efficiency?) of interpreters. - -Binary format and/or file compression of GFCC output. - -Syntax editor based on GFCC. - -Rewriting of resource libraries in order to exploit the -word-suffix sharing better (depth-one tables, as in FM). - - - diff --git a/src/PGF/doc/syntax.txt b/src/PGF/doc/syntax.txt deleted file mode 100644 index db8f7c149..000000000 --- a/src/PGF/doc/syntax.txt +++ /dev/null @@ -1,180 +0,0 @@ -GFCC Syntax - - -==Syntax of GFCC files== - -The parser syntax is very simple, as defined in BNF: -``` - Grm. Grammar ::= [RExp] ; - - App. RExp ::= "(" CId [RExp] ")" ; - AId. RExp ::= CId ; - AInt. RExp ::= Integer ; - AStr. RExp ::= String ; - AFlt. RExp ::= Double ; - AMet. RExp ::= "?" ; - - terminator RExp "" ; - - token CId (('_' | letter) (letter | digit | '\'' | '_')*) ; -``` -While a parser and a printer can be generated for many languages -from this grammar by using the BNF Converter, a parser is also -easy to write by hand using recursive descent. - - -==Syntax of well-formed GFCC code== - -Here is a summary of well-formed syntax, -with a comment on the semantics of each construction. -``` - Grammar ::= - ("grammar" CId CId*) -- abstract syntax name and concrete syntax names - "(" "flags" Flag* ")" -- global and abstract flags - "(" "abstract" Abstract ")" -- abstract syntax - "(" "concrete" Concrete* ")" -- concrete syntaxes - - Abstract ::= - "(" "fun" FunDef* ")" -- function definitions - "(" "cat" CatDef* ")" -- category definitions - - Concrete ::= - "(" CId -- language name - "flags" Flag* -- concrete flags - "lin" LinDef* -- linearization rules - "oper" LinDef* -- operations (macros) - "lincat" LinDef* -- linearization type definitions - "lindef" LinDef* -- linearization default definitions - "printname" LinDef* -- printname definitions - "param" LinDef* -- lincats with labels and parameter value names - ")" - - Flag ::= "(" CId String ")" -- flag and value - FunDef ::= "(" CId Type Exp ")" -- function, type, and definition - CatDef ::= "(" CId Hypo* ")" -- category and context - LinDef ::= "(" CId Term ")" -- function and definition - - Type ::= - "(" CId -- value category - "(" "H" Hypo* ")" -- argument context - "(" "X" Exp* ")" ")" -- arguments (of dependent value type) - - Exp ::= - "(" CId -- function - "(" "B" CId* ")" -- bindings - "(" "X" Exp* ")" ")" -- arguments - | CId -- variable - | "?" -- metavariable - | "(" "Eq" Equation* ")" -- group of pattern equations - | Integer -- integer literal (non-negative) - | Float -- floating-point literal (non-negative) - | String -- string literal (in double quotes) - - Hypo ::= "(" CId Type ")" -- variable and type - - Equation ::= "(" "E" Exp Exp* ")" -- value and pattern list - - Term ::= - "(" "R" Term* ")" -- array (record or table) - | "(" "S" Term* ")" -- concatenated sequence - | "(" "FV" Term* ")" -- free variant list - | "(" "P" Term Term ")" -- access to index (projection or selection) - | "(" "W" String Term ")" -- token prefix with suffix list - | "(" "A" Integer ")" -- pointer to subtree - | String -- token (in double quotes) - | Integer -- index in array - | CId -- macro constant - | "?" -- metavariable -``` - - -==GFCC interpreter== - -The first phase in interpreting GFCC is to parse a GFCC file and -build an internal abstract syntax representation, as specified -in the previous section. - -With this representation, linearization can be performed by -a straightforward function from expressions (``Exp``) to terms -(``Term``). All expressions except groups of pattern equations -can be linearized. - -Here is a reference Haskell implementation of linearization: -``` - linExp :: GFCC -> CId -> Exp -> Term - linExp gfcc lang tree@(DTr _ at trees) = case at of - AC fun -> comp (map lin trees) $ look fun - AS s -> R [K (show s)] -- quoted - AI i -> R [K (show i)] - AF d -> R [K (show d)] - AM -> TM - where - lin = linExp gfcc lang - comp = compute gfcc lang - look = lookLin gfcc lang -``` -TODO: bindings must be supported. - -Terms resulting from linearization are evaluated in -call-by-value order, with two environments needed: -- the grammar (a concrete syntax) to give the global constants -- an array of terms to give the subtree linearizations - - -The Haskell implementation works as follows: -``` -compute :: GFCC -> CId -> [Term] -> Term -> Term -compute gfcc lang args = comp where - comp trm = case trm of - P r p -> proj (comp r) (comp p) - W s t -> W s (comp t) - R ts -> R $ map comp ts - V i -> idx args (fromInteger i) -- already computed - F c -> comp $ look c -- not computed (if contains V) - FV ts -> FV $ Prelude.map comp ts - S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts - _ -> trm - - look = lookOper gfcc lang - - idx xs i = xs !! i - - proj r p = case (r,p) of - (_, FV ts) -> FV $ Prelude.map (proj r) ts - (FV ts, _ ) -> FV $ Prelude.map (\t -> proj t p) ts - (W s t, _) -> kks (s ++ getString (proj t p)) - _ -> comp $ getField r (getIndex p) - - getString t = case t of - K (KS s) -> s - _ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR" - - getIndex t = case t of - C i -> fromInteger i - RP p _ -> getIndex p - TM -> 0 -- default value for parameter - _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0 - - getField t i = case t of - R rs -> idx rs i - RP _ r -> getField r i - TM -> TM - _ -> trace ("ERROR in grammar compiler: field from " ++ show t) t -``` -The result of linearization is usually a record, which is realized as -a string using the following algorithm. -``` - realize :: Term -> String - realize trm = case trm of - R (t:_) -> realize t - S ss -> unwords $ map realize ss - K s -> s - W s t -> s ++ realize t - FV (t:_) -> realize t -- TODO: all variants - TM -> "?" -``` -Notice that realization always picks the first field of a record. -If a linearization type has more than one field, the first field -does not necessarily contain the desired string. -Also notice that the order of record fields in GFCC is not necessarily -the same as in GF source. -- cgit v1.2.3