diff options
| author | krasimir <krasimir@chalmers.se> | 2009-12-13 18:50:29 +0000 |
|---|---|---|
| committer | krasimir <krasimir@chalmers.se> | 2009-12-13 18:50:29 +0000 |
| commit | f85232947e74ee7ef8c7b0ad2338212e7e68f1be (patch) | |
| tree | 667b886a5e3a4b026a63d4e3597f32497d824761 /src/PGF | |
| parent | d88a865faff59c98fc91556ff8700b10ee5f2df8 (diff) | |
reorganize the directories under src, and rescue the JavaScript interpreter from deprecated
Diffstat (limited to 'src/PGF')
31 files changed, 0 insertions, 6298 deletions
diff --git a/src/PGF/Binary.hs b/src/PGF/Binary.hs deleted file mode 100644 index e4ed98424..000000000 --- a/src/PGF/Binary.hs +++ /dev/null @@ -1,199 +0,0 @@ -module PGF.Binary where
-
-import PGF.CId
-import PGF.Data
-import Data.Binary
-import Data.Binary.Put
-import Data.Binary.Get
-import qualified Data.ByteString as BS
-import qualified Data.Map as Map
-import qualified Data.IntMap as IntMap
-import qualified Data.Set as Set
-import Control.Monad
-
-pgfMajorVersion, pgfMinorVersion :: Word16
-(pgfMajorVersion, pgfMinorVersion) = (1,0)
-
-instance Binary PGF where
- put pgf = putWord16be pgfMajorVersion >>
- putWord16be pgfMinorVersion >>
- put ( absname pgf, cncnames pgf
- , gflags pgf
- , abstract pgf, concretes pgf
- )
- get = do v1 <- getWord16be
- v2 <- getWord16be
- absname <- get
- cncnames <- get
- gflags <- get
- abstract <- get
- concretes <- get
- return (PGF{ absname=absname, cncnames=cncnames
- , gflags=gflags
- , abstract=abstract, concretes=concretes
- })
-
-instance Binary CId where
- put (CId bs) = put bs
- get = liftM CId get
-
-instance Binary Abstr where
- put abs = put (aflags abs, funs abs, cats abs)
- get = do aflags <- get
- funs <- get
- cats <- get
- let catfuns = Map.mapWithKey (\cat _ -> [f | (f, (DTyp _ c _,_,_)) <- Map.toList funs, c==cat]) cats
- return (Abstr{ aflags=aflags
- , funs=funs, cats=cats
- , catfuns=catfuns
- })
-
-instance Binary Concr where
- put cnc = put ( cflags cnc, lins cnc, opers cnc
- , lincats cnc, lindefs cnc
- , printnames cnc, paramlincats cnc
- , parser cnc
- )
- get = do cflags <- get
- lins <- get
- opers <- get
- lincats <- get
- lindefs <- get
- printnames <- get
- paramlincats <- get
- parser <- get
- return (Concr{ cflags=cflags, lins=lins, opers=opers
- , lincats=lincats, lindefs=lindefs
- , printnames=printnames
- , paramlincats=paramlincats
- , parser=parser
- })
-
-instance Binary Alternative where
- put (Alt v x) = put v >> put x
- get = liftM2 Alt get get
-
-instance Binary Term where
- put (R es) = putWord8 0 >> put es
- put (S es) = putWord8 1 >> put es
- put (FV es) = putWord8 2 >> put es
- put (P e v) = putWord8 3 >> put (e,v)
- put (W e v) = putWord8 4 >> put (e,v)
- put (C i ) = putWord8 5 >> put i
- put (TM i ) = putWord8 6 >> put i
- put (F f) = putWord8 7 >> put f
- put (V i) = putWord8 8 >> put i
- put (K (KS s)) = putWord8 9 >> put s
- put (K (KP d vs)) = putWord8 10 >> put (d,vs)
-
- get = do tag <- getWord8
- case tag of
- 0 -> liftM R get
- 1 -> liftM S get
- 2 -> liftM FV get
- 3 -> liftM2 P get get
- 4 -> liftM2 W get get
- 5 -> liftM C get
- 6 -> liftM TM get
- 7 -> liftM F get
- 8 -> liftM V get
- 9 -> liftM (K . KS) get
- 10 -> liftM2 (\d vs -> K (KP d vs)) get get
- _ -> decodingError
-
-instance Binary Expr where
- put (EAbs b x exp) = putWord8 0 >> put (b,x,exp)
- put (EApp e1 e2) = putWord8 1 >> put (e1,e2)
- put (ELit (LStr s)) = putWord8 2 >> put s
- put (ELit (LFlt d)) = putWord8 3 >> put d
- put (ELit (LInt i)) = putWord8 4 >> put i
- put (EMeta i) = putWord8 5 >> put i
- put (EFun f) = putWord8 6 >> put f
- put (EVar i) = putWord8 7 >> put i
- put (ETyped e ty) = putWord8 8 >> put (e,ty)
- get = do tag <- getWord8
- case tag of
- 0 -> liftM3 EAbs get get get
- 1 -> liftM2 EApp get get
- 2 -> liftM (ELit . LStr) get
- 3 -> liftM (ELit . LFlt) get
- 4 -> liftM (ELit . LInt) get
- 5 -> liftM EMeta get
- 6 -> liftM EFun get
- 7 -> liftM EVar get
- 8 -> liftM2 ETyped get get
- _ -> decodingError
-
-instance Binary Patt where
- put (PApp f ps) = putWord8 0 >> put (f,ps)
- put (PVar x) = putWord8 1 >> put x
- put PWild = putWord8 2
- put (PLit (LStr s)) = putWord8 3 >> put s
- put (PLit (LFlt d)) = putWord8 4 >> put d
- put (PLit (LInt i)) = putWord8 5 >> put i
- get = do tag <- getWord8
- case tag of
- 0 -> liftM2 PApp get get
- 1 -> liftM PVar get
- 2 -> return PWild
- 3 -> liftM (PLit . LStr) get
- 4 -> liftM (PLit . LFlt) get
- 5 -> liftM (PLit . LInt) get
- _ -> decodingError
-
-instance Binary Equation where
- put (Equ ps e) = put (ps,e)
- get = liftM2 Equ get get
-
-instance Binary Type where
- put (DTyp hypos cat exps) = put (hypos,cat,exps)
- get = liftM3 DTyp get get get
-
-instance Binary BindType where
- put Explicit = putWord8 0
- put Implicit = putWord8 1
- get = do tag <- getWord8
- case tag of
- 0 -> return Explicit
- 1 -> return Implicit
- _ -> decodingError
-
-instance Binary FFun where
- put (FFun fun prof lins) = put (fun,prof,lins)
- get = liftM3 FFun get get get
-
-instance Binary FSymbol where
- put (FSymCat n l) = putWord8 0 >> put (n,l)
- put (FSymLit n l) = putWord8 1 >> put (n,l)
- put (FSymKS ts) = putWord8 2 >> put ts
- put (FSymKP d vs) = putWord8 3 >> put (d,vs)
- get = do tag <- getWord8
- case tag of
- 0 -> liftM2 FSymCat get get
- 1 -> liftM2 FSymLit get get
- 2 -> liftM FSymKS get
- 3 -> liftM2 (\d vs -> FSymKP d vs) get get
- _ -> decodingError
-
-instance Binary Production where
- put (FApply ruleid args) = putWord8 0 >> put (ruleid,args)
- put (FCoerce fcat) = putWord8 1 >> put fcat
- get = do tag <- getWord8
- case tag of
- 0 -> liftM2 FApply get get
- 1 -> liftM FCoerce get
- _ -> decodingError
-
-instance Binary ParserInfo where
- put p = put (functions p, sequences p, productions0 p, totalCats p, startCats p)
- get = do functions <- get
- sequences <- get
- productions0<- get
- totalCats <- get
- startCats <- get
- return (ParserInfo{functions=functions,sequences=sequences
- ,productions0=productions0
- ,productions =filterProductions productions0
- ,totalCats=totalCats,startCats=startCats})
-
-decodingError = fail "This PGF file was compiled with different version of GF"
diff --git a/src/PGF/BuildParser.hs b/src/PGF/BuildParser.hs deleted file mode 100644 index 23e0725c6..000000000 --- a/src/PGF/BuildParser.hs +++ /dev/null @@ -1,76 +0,0 @@ ---------------------------------------------------------------------- --- | --- Maintainer : Krasimir Angelov --- Stability : (stable) --- Portability : (portable) --- --- FCFG parsing, parser information ------------------------------------------------------------------------------ - -module PGF.BuildParser where - -import GF.Data.SortedList -import GF.Data.Assoc -import PGF.CId -import PGF.Data -import PGF.Parsing.FCFG.Utilities - -import Data.Array.IArray -import Data.Maybe -import qualified Data.IntMap as IntMap -import qualified Data.Map as Map -import qualified Data.Set as Set -import Debug.Trace - - -data ParserInfoEx - = ParserInfoEx { epsilonRules :: [(FunId,[FCat],FCat)] - , leftcornerCats :: Assoc FCat [(FunId,[FCat],FCat)] - , leftcornerTokens :: Assoc String [(FunId,[FCat],FCat)] - , grammarToks :: [String] - } - ------------------------------------------------------------- --- parser information - -getLeftCornerTok pinfo (FFun _ _ lins) - | inRange (bounds syms) 0 = case syms ! 0 of - FSymKS [tok] -> [tok] - _ -> [] - | otherwise = [] - where - syms = (sequences pinfo) ! (lins ! 0) - -getLeftCornerCat pinfo args (FFun _ _ lins) - | inRange (bounds syms) 0 = case syms ! 0 of - FSymCat d _ -> let cat = args !! d - in case IntMap.lookup cat (productions pinfo) of - Just set -> cat : [cat' | FCoerce cat' <- Set.toList set] - Nothing -> [cat] - _ -> [] - | otherwise = [] - where - syms = (sequences pinfo) ! (lins ! 0) - -buildParserInfo :: ParserInfo -> ParserInfoEx -buildParserInfo pinfo = - ParserInfoEx { epsilonRules = epsilonrules - , leftcornerCats = leftcorncats - , leftcornerTokens = leftcorntoks - , grammarToks = grammartoks - } - - where epsilonrules = [ (ruleid,args,cat) - | (cat,set) <- IntMap.toList (productions pinfo) - , (FApply ruleid args) <- Set.toList set - , let (FFun _ _ lins) = (functions pinfo) ! ruleid - , not (inRange (bounds ((sequences pinfo) ! (lins ! 0))) 0) ] - leftcorncats = accumAssoc id [ (cat', (ruleid, args, cat)) - | (cat,set) <- IntMap.toList (productions pinfo) - , (FApply ruleid args) <- Set.toList set - , cat' <- getLeftCornerCat pinfo args ((functions pinfo) ! ruleid) ] - leftcorntoks = accumAssoc id [ (tok, (ruleid, args, cat)) - | (cat,set) <- IntMap.toList (productions pinfo) - , (FApply ruleid args) <- Set.toList set - , tok <- getLeftCornerTok pinfo ((functions pinfo) ! ruleid) ] - grammartoks = nubsort [t | lin <- elems (sequences pinfo), FSymKS [t] <- elems lin] diff --git a/src/PGF/CId.hs b/src/PGF/CId.hs deleted file mode 100644 index fea304d9d..000000000 --- a/src/PGF/CId.hs +++ /dev/null @@ -1,55 +0,0 @@ -module PGF.CId (CId(..), - mkCId, wildCId, - readCId, showCId, - - -- utils - pCId, pIdent, ppCId) where - -import Control.Monad -import qualified Data.ByteString.Char8 as BS -import Data.Char -import qualified Text.ParserCombinators.ReadP as RP -import qualified Text.PrettyPrint as PP - - --- | An abstract data type that represents --- identifiers for functions and categories in PGF. -newtype CId = CId BS.ByteString deriving (Eq,Ord) - -wildCId :: CId -wildCId = CId (BS.singleton '_') - --- | Creates a new identifier from 'String' -mkCId :: String -> CId -mkCId s = CId (BS.pack s) - --- | Reads an identifier from 'String'. The function returns 'Nothing' if the string is not valid identifier. -readCId :: String -> Maybe CId -readCId s = case [x | (x,cs) <- RP.readP_to_S pCId s, all isSpace cs] of - [x] -> Just x - _ -> Nothing - --- | Renders the identifier as 'String' -showCId :: CId -> String -showCId (CId x) = BS.unpack x - -instance Show CId where - showsPrec _ = showString . showCId - -instance Read CId where - readsPrec _ = RP.readP_to_S pCId - -pCId :: RP.ReadP CId -pCId = do s <- pIdent - if s == "_" - then RP.pfail - else return (mkCId s) - -pIdent :: RP.ReadP String -pIdent = liftM2 (:) (RP.satisfy isIdentFirst) (RP.munch isIdentRest) - where - isIdentFirst c = c == '_' || isLetter c - isIdentRest c = c == '_' || c == '\'' || isAlphaNum c - -ppCId :: CId -> PP.Doc -ppCId = PP.text . showCId diff --git a/src/PGF/Check.hs b/src/PGF/Check.hs deleted file mode 100644 index 58b66cfe4..000000000 --- a/src/PGF/Check.hs +++ /dev/null @@ -1,173 +0,0 @@ -module PGF.Check (checkPGF) where - -import PGF.CId -import PGF.Data -import PGF.Macros -import GF.Data.ErrM - -import qualified Data.Map as Map -import Control.Monad -import Debug.Trace - -checkPGF :: PGF -> Err (PGF,Bool) -checkPGF pgf = do - (cs,bs) <- mapM (checkConcrete pgf) - (Map.assocs (concretes pgf)) >>= return . unzip - return (pgf {concretes = Map.fromAscList cs}, and bs) - - --- errors are non-fatal; replace with 'fail' to change this -msg s = trace s (return ()) - -andMapM :: Monad m => (a -> m Bool) -> [a] -> m Bool -andMapM f xs = mapM f xs >>= return . and - -labelBoolErr :: String -> Err (x,Bool) -> Err (x,Bool) -labelBoolErr ms iob = do - (x,b) <- iob - if b then return (x,b) else (msg ms >> return (x,b)) - - -checkConcrete :: PGF -> (CId,Concr) -> Err ((CId,Concr),Bool) -checkConcrete pgf (lang,cnc) = - labelBoolErr ("happened in language " ++ showCId lang) $ do - (rs,bs) <- mapM checkl (Map.assocs (lins cnc)) >>= return . unzip - return ((lang,cnc{lins = Map.fromAscList rs}),and bs) - where - checkl = checkLin pgf lang - -checkLin :: PGF -> CId -> (CId,Term) -> Err ((CId,Term),Bool) -checkLin pgf lang (f,t) = - labelBoolErr ("happened in function " ++ showCId f) $ do - (t',b) <- checkTerm (lintype pgf lang f) t --- $ inline pgf lang t - return ((f,t'),b) - -inferTerm :: [CType] -> Term -> Err (Term,CType) -inferTerm args trm = case trm of - K _ -> returnt str - C i -> returnt $ ints i - V i -> do - testErr (i < length args) ("too large index " ++ show i) - returnt $ args !! i - S ts -> do - (ts',tys) <- mapM infer ts >>= return . unzip - let tys' = filter (/=str) tys - testErr (null tys') - ("expected Str in " ++ show trm ++ " not " ++ unwords (map show tys')) - return (S ts',str) - R ts -> do - (ts',tys) <- mapM infer ts >>= return . unzip - return $ (R ts',tuple tys) - P t u -> do - (t',tt) <- infer t - (u',tu) <- infer u - case tt of - R tys -> case tu of - R vs -> infer $ foldl P t' [P u' (C i) | i <- [0 .. length vs - 1]] - --- R [v] -> infer $ P t v - --- R (v:vs) -> infer $ P (head tys) (R vs) - - C i -> do - testErr (i < length tys) - ("required more than " ++ show i ++ " fields in " ++ show (R tys)) - return (P t' u', tys !! i) -- record: index must be known - _ -> do - let typ = head tys - testErr (all (==typ) tys) ("different types in table " ++ show trm) - return (P t' u', typ) -- table: types must be same - _ -> Bad $ "projection from " ++ show t ++ " : " ++ show tt - FV [] -> returnt tm0 ---- - FV (t:ts) -> do - (t',ty) <- infer t - (ts',tys) <- mapM infer ts >>= return . unzip - testErr (all (eqType True ty) tys) ("different types in variants " ++ show trm) - return (FV (t':ts'),ty) - W s r -> infer r - _ -> Bad ("no type inference for " ++ show trm) - where - returnt ty = return (trm,ty) - infer = inferTerm args - -checkTerm :: LinType -> Term -> Err (Term,Bool) -checkTerm (args,val) trm = case inferTerm args trm of - Ok (t,ty) -> if eqType False ty val - then return (t,True) - else do - msg ("term: " ++ show trm ++ - "\nexpected type: " ++ show val ++ - "\ninferred type: " ++ show ty) - return (t,False) - Bad s -> do - msg s - return (trm,False) - --- symmetry in (Ints m == Ints n) is all we can use in variants - -eqType :: Bool -> CType -> CType -> Bool -eqType symm inf exp = case (inf,exp) of - (C k, C n) -> if symm then True else k <= n -- only run-time corr. - (R rs,R ts) -> length rs == length ts && and [eqType symm r t | (r,t) <- zip rs ts] - (TM _, _) -> True ---- for variants [] ; not safe - _ -> inf == exp - --- should be in a generic module, but not in the run-time DataGFCC - -type CType = Term -type LinType = ([CType],CType) - -tuple :: [CType] -> CType -tuple = R - -ints :: Int -> CType -ints = C - -str :: CType -str = S [] - -lintype :: PGF -> CId -> CId -> LinType -lintype pgf lang fun = case typeSkeleton (lookType pgf fun) of - (cs,c) -> (map vlinc cs, linc c) ---- HOAS - where - linc = lookLincat pgf lang - vlinc (0,c) = linc c - vlinc (i,c) = case linc c of - R ts -> R (ts ++ replicate i str) - -inline :: PGF -> CId -> Term -> Term -inline pgf lang t = case t of - F c -> inl $ look c - _ -> composSafeOp inl t - where - inl = inline pgf lang - look = lookLin pgf lang - -composOp :: Monad m => (Term -> m Term) -> Term -> m Term -composOp f trm = case trm of - R ts -> liftM R $ mapM f ts - S ts -> liftM S $ mapM f ts - FV ts -> liftM FV $ mapM f ts - P t u -> liftM2 P (f t) (f u) - W s t -> liftM (W s) $ f t - _ -> return trm - -composSafeOp :: (Term -> Term) -> Term -> Term -composSafeOp f = maybe undefined id . composOp (return . f) - --- from GF.Data.Oper - -maybeErr :: String -> Maybe a -> Err a -maybeErr s = maybe (Bad s) Ok - -testErr :: Bool -> String -> Err () -testErr cond msg = if cond then return () else Bad msg - -errVal :: a -> Err a -> a -errVal a = err (const a) id - -errIn :: String -> Err a -> Err a -errIn msg = err (\s -> Bad (s ++ "\nOCCURRED IN\n" ++ msg)) return - -err :: (String -> b) -> (a -> b) -> Err a -> b -err d f e = case e of - Ok a -> f a - Bad s -> d s diff --git a/src/PGF/Data.hs b/src/PGF/Data.hs deleted file mode 100644 index 38027e96e..000000000 --- a/src/PGF/Data.hs +++ /dev/null @@ -1,95 +0,0 @@ -module PGF.Data (module PGF.Data, module PGF.Expr, module PGF.Type, module PGF.PMCFG) where - -import PGF.CId -import PGF.Expr hiding (Value, Env, Tree) -import PGF.Type -import PGF.PMCFG - -import qualified Data.Map as Map -import qualified Data.Set as Set -import qualified Data.IntMap as IntMap -import Data.List - --- internal datatypes for PGF - --- | An abstract data type representing multilingual grammar --- in Portable Grammar Format. -data PGF = PGF { - absname :: CId , - cncnames :: [CId] , - gflags :: Map.Map CId String, -- value of a global flag - abstract :: Abstr , - concretes :: Map.Map CId Concr - } - -data Abstr = Abstr { - aflags :: Map.Map CId String, -- value of a flag - funs :: Map.Map CId (Type,Int,[Equation]), -- type, arrity and definition of function - cats :: Map.Map CId [Hypo], -- context of a cat - catfuns :: Map.Map CId [CId] -- funs to a cat (redundant, for fast lookup) - } - -data Concr = Concr { - cflags :: Map.Map CId String, -- value of a flag - lins :: Map.Map CId Term, -- lin of a fun - opers :: Map.Map CId Term, -- oper generated by subex elim - lincats :: Map.Map CId Term, -- lin type of a cat - lindefs :: Map.Map CId Term, -- lin default of a cat - printnames :: Map.Map CId Term, -- printname of a cat or a fun - paramlincats :: Map.Map CId Term, -- lin type of cat, with printable param names - parser :: Maybe ParserInfo -- parser - } - -data Term = - R [Term] - | P Term Term - | S [Term] - | K Tokn - | V Int - | C Int - | F CId - | FV [Term] - | W String Term - | TM String - deriving (Eq,Ord,Show) - -data Tokn = - KS String - | KP [String] [Alternative] - deriving (Eq,Ord,Show) - - --- merge two GFCCs; fails is differens absnames; priority to second arg - -unionPGF :: PGF -> PGF -> PGF -unionPGF one two = case absname one of - n | n == wildCId -> two -- extending empty grammar - | n == absname two -> one { -- extending grammar with same abstract - concretes = Map.union (concretes two) (concretes one), - cncnames = union (cncnames one) (cncnames two) - } - _ -> one -- abstracts don't match ---- print error msg - -emptyPGF :: PGF -emptyPGF = PGF { - absname = wildCId, - cncnames = [] , - gflags = Map.empty, - abstract = error "empty grammar, no abstract", - concretes = Map.empty - } - --- | This is just a 'CId' with the language name. --- A language name is the identifier that you write in the --- top concrete or abstract module in GF after the --- concrete/abstract keyword. Example: --- --- > abstract Lang = ... --- > concrete LangEng of Lang = ... -type Language = CId - -readLanguage :: String -> Maybe Language -readLanguage = readCId - -showLanguage :: Language -> String -showLanguage = showCId diff --git a/src/PGF/Editor.hs b/src/PGF/Editor.hs deleted file mode 100644 index 3f69da170..000000000 --- a/src/PGF/Editor.hs +++ /dev/null @@ -1,241 +0,0 @@ -module PGF.Editor ( - State, -- datatype -- type-annotated possibly open tree with a focus - Dict, -- datatype -- abstract syntax information optimized for editing - Position, -- datatype -- path from top to focus - new, -- :: Type -> State -- create new State - refine, -- :: Dict -> CId -> State -> State -- refine focus with CId - replace, -- :: Dict -> Tree -> State -> State -- replace focus with Tree - delete, -- :: State -> State -- replace focus with ? - goNextMeta, -- :: State -> State -- move focus to next ? node - goNext, -- :: State -> State -- move to next node - goTop, -- :: State -> State -- move focus to the top (=root) - goPosition, -- :: Position -> State -> State -- move focus to given position - mkPosition, -- :: [Int] -> Position -- list of choices (top = []) - showPosition,-- :: Position -> [Int] -- readable position - focusType, -- :: State -> Type -- get the type of focus - stateTree, -- :: State -> Tree -- get the current tree - isMetaFocus, -- :: State -> Bool -- whether focus is ? - allMetas, -- :: State -> [(Position,Type)] -- all ?s and their positions - prState, -- :: State -> String -- print state, focus marked * - refineMenu, -- :: Dict -> State -> [CId] -- get refinement menu - pgf2dict -- :: PGF -> Dict -- create editing Dict from PGF - ) where - -import PGF.Data -import PGF.CId -import qualified Data.Map as M -import Debug.Trace ---- - --- API - -new :: Type -> State -new (DTyp _ t _) = etree2state (uETree t) - -refine :: Dict -> CId -> State -> State -refine dict f = replaceInState (mkRefinement dict f) - -replace :: Dict -> Tree -> State -> State -replace dict t = replaceInState (tree2etree dict t) - -delete :: State -> State -delete s = replaceInState (uETree (typ (tree s))) s - -goNextMeta :: State -> State -goNextMeta s = - if isComplete s then s - else let s1 = goNext s in if isMetaFocus s1 - then s1 else goNextMeta s1 - -isComplete :: State -> Bool -isComplete s = isc (tree s) where - isc t = case atom t of - AMeta _ -> False - ACon _ -> all isc (children t) - -goTop :: State -> State -goTop = navigate (const top) - -goPosition :: [Int] -> State -> State -goPosition p s = s{position = p} - -mkPosition :: [Int] -> Position -mkPosition = id - -refineMenu :: Dict -> State -> [CId] -refineMenu dict s = maybe [] (map fst) $ M.lookup (focusBType s) (refines dict) - -focusType :: State -> Type -focusType s = btype2type (focusBType s) - -stateTree :: State -> Tree -stateTree = etree2tree . tree - -pgf2dict :: PGF -> Dict -pgf2dict pgf = Dict (M.fromAscList fus) refs where - fus = [(f,mkFType ty) | (f,(ty,_)) <- M.toList (funs abs)] - refs = M.fromAscList [(c, fusTo c) | (c,_) <- M.toList (cats abs)] - fusTo c = [(f,ty) | (f,ty@(_,k)) <- fus, k==c] ---- quadratic - mkFType (DTyp hyps c _) = ([k | Hyp _ (DTyp _ k _) <- hyps],c) ----dep types - abs = abstract pgf - -etree2tree :: ETree -> Tree -etree2tree t = case atom t of - ACon f -> Fun f (map etree2tree (children t)) - AMeta i -> Meta i - -tree2etree :: Dict -> Tree -> ETree -tree2etree dict t = case t of - Fun f _ -> annot (look f) t - where - annot (tys,ty) tr = case tr of - Fun f trs -> ETree (ACon f) ty [annt t tr | (t,tr) <- zip tys trs] - Meta i -> ETree (AMeta i) ty [] - annt ty tr = case tr of - Fun _ _ -> tree2etree dict tr - Meta _ -> annot ([],ty) tr - look f = maybe undefined id $ M.lookup f (functs dict) - -prState :: State -> String -prState s = unlines [replicate i ' ' ++ f | (i,f) <- pr [] (tree s)] where - pr i t = - (ind i,prAtom i (atom t)) : concat [pr (sub j i) c | (j,c) <- zip [0..] (children t)] - prAtom i a = prFocus i ++ case a of - ACon f -> prCId f - AMeta i -> "?" ++ show i - prFocus i = if i == position s then "*" else "" - ind i = 2 * length i - sub j i = i ++ [j] - -showPosition :: Position -> [Int] -showPosition = id - -allMetas :: State -> [(Position,Type)] -allMetas s = [(reverse p, btype2type ty) | (p,ty) <- metas [] (tree s)] where - metas p t = - (if isMetaAtom (atom t) then [(p,typ t)] else []) ++ - concat [metas (i:p) u | (i,u) <- zip [0..] (children t)] - ----- Trees and navigation - -data ETree = ETree { - atom :: Atom, - typ :: BType, - children :: [ETree] - } - deriving Show - -data Atom = - ACon CId - | AMeta Int - deriving Show - -btype2type :: BType -> Type -btype2type t = DTyp [] t [] - -uETree :: BType -> ETree -uETree ty = ETree (AMeta 0) ty [] - -data State = State { - position :: Position, - tree :: ETree - } - deriving Show - -type Position = [Int] - -top :: Position -top = [] - -up :: Position -> Position -up p = case p of - _:_ -> init p - _ -> p - -down :: Position -> Position -down = (++[0]) - -left :: Position -> Position -left p = case p of - _:_ | last p > 0 -> init p ++ [last p - 1] - _ -> top - -right :: Position -> Position -right p = case p of - _:_ -> init p ++ [last p + 1] - _ -> top - -etree2state :: ETree -> State -etree2state = State top - -doInState :: (ETree -> ETree) -> State -> State -doInState f s = s{tree = change (position s) (tree s)} where - change p t = case p of - [] -> f t - n:ns -> let (ts1,t0:ts2) = splitAt n (children t) in - t{children = ts1 ++ [change ns t0] ++ ts2} - -subtree :: Position -> ETree -> ETree -subtree p t = case p of - [] -> t - n:ns -> subtree ns (children t !! n) - -focus :: State -> ETree -focus s = subtree (position s) (tree s) - -focusBType :: State -> BType -focusBType s = typ (focus s) - -navigate :: (Position -> Position) -> State -> State -navigate p s = s{position = p (position s)} - --- p is a fix-point aspect of state change -untilFix :: Eq a => (State -> a) -> (State -> Bool) -> (State -> State) -> State -> State -untilFix p b f s = - if b s - then s - else let fs = f s in if p fs == p s - then s - else untilFix p b f fs - -untilPosition :: (State -> Bool) -> (State -> State) -> State -> State -untilPosition = untilFix position - -goNext :: State -> State -goNext s = case focus s of - st | not (null (children st)) -> navigate down s - _ -> findSister s - where - findSister s = case s of - s' | null (position s') -> s' - s' | hasYoungerSisters s' -> navigate right s' - s' -> findSister (navigate up s') - hasYoungerSisters s = case position s of - p@(_:_) -> length (children (focus (navigate up s))) > last p + 1 - _ -> False - -isMetaFocus :: State -> Bool -isMetaFocus s = isMetaAtom (atom (focus s)) - -isMetaAtom :: Atom -> Bool -isMetaAtom a = case a of - AMeta _ -> True - _ -> False - -replaceInState :: ETree -> State -> State -replaceInState t = doInState (const t) - - -------- - -type BType = CId ----dep types -type FType = ([BType],BType) ----dep types - -data Dict = Dict { - functs :: M.Map CId FType, - refines :: M.Map BType [(CId,FType)] - } - -mkRefinement :: Dict -> CId -> ETree -mkRefinement dict f = ETree (ACon f) val (map uETree args) where - (args,val) = maybe undefined id $ M.lookup f (functs dict) - diff --git a/src/PGF/Expr.hs b/src/PGF/Expr.hs deleted file mode 100644 index cf0cb79aa..000000000 --- a/src/PGF/Expr.hs +++ /dev/null @@ -1,355 +0,0 @@ -module PGF.Expr(Tree, BindType(..), Expr(..), Literal(..), Patt(..), Equation(..),
- readExpr, showExpr, pExpr, pBinds, ppExpr, ppPatt,
-
- mkApp, unApp,
- mkStr, unStr,
- mkInt, unInt,
- mkDouble, unDouble,
- mkMeta, isMeta,
-
- normalForm,
-
- -- needed in the typechecker
- Value(..), Env, Funs, eval, apply,
-
- MetaId,
-
- -- helpers
- pMeta,pStr,pArg,pLit,freshName,ppMeta,ppLit,ppParens
- ) where
-
-import PGF.CId
-import PGF.Type
-
-import Data.Char
-import Data.Maybe
-import Data.List as List
-import Data.Map as Map hiding (showTree)
-import Control.Monad
-import qualified Text.PrettyPrint as PP
-import qualified Text.ParserCombinators.ReadP as RP
-
-data Literal =
- LStr String -- ^ string constant
- | LInt Integer -- ^ integer constant
- | LFlt Double -- ^ floating point constant
- deriving (Eq,Ord,Show)
-
-type MetaId = Int
-
-data BindType =
- Explicit
- | Implicit
- deriving (Eq,Ord,Show)
-
--- | Tree is the abstract syntax representation of a given sentence
--- in some concrete syntax. Technically 'Tree' is a type synonym
--- of 'Expr'.
-type Tree = Expr
-
--- | An expression in the abstract syntax of the grammar. It could be
--- both parameter of a dependent type or an abstract syntax tree for
--- for some sentence.
-data Expr =
- EAbs BindType CId Expr -- ^ lambda abstraction
- | EApp Expr Expr -- ^ application
- | ELit Literal -- ^ literal
- | EMeta {-# UNPACK #-} !MetaId -- ^ meta variable
- | EFun CId -- ^ function or data constructor
- | EVar {-# UNPACK #-} !Int -- ^ variable with de Bruijn index
- | ETyped Expr Type -- ^ local type signature
- | EImplArg Expr -- ^ implicit argument in expression
- deriving (Eq,Ord,Show)
-
--- | The pattern is used to define equations in the abstract syntax of the grammar.
-data Patt =
- PApp CId [Patt] -- ^ application. The identifier should be constructor i.e. defined with 'data'
- | PLit Literal -- ^ literal
- | PVar CId -- ^ variable
- | PWild -- ^ wildcard
- | PImplArg Patt -- ^ implicit argument in pattern
- deriving (Eq,Ord)
-
--- | The equation is used to define lambda function as a sequence
--- of equations with pattern matching. The list of 'Expr' represents
--- the patterns and the second 'Expr' is the function body for this
--- equation.
-data Equation =
- Equ [Patt] Expr
- deriving (Eq,Ord)
-
--- | parses 'String' as an expression
-readExpr :: String -> Maybe Expr
-readExpr s = case [x | (x,cs) <- RP.readP_to_S pExpr s, all isSpace cs] of
- [x] -> Just x
- _ -> Nothing
-
--- | renders expression as 'String'. The list
--- of identifiers is the list of all free variables
--- in the expression in order reverse to the order
--- of binding.
-showExpr :: [CId] -> Expr -> String
-showExpr vars = PP.render . ppExpr 0 vars
-
-instance Read Expr where
- readsPrec _ = RP.readP_to_S pExpr
-
--- | Constructs an expression by applying a function to a list of expressions
-mkApp :: CId -> [Expr] -> Expr
-mkApp f es = foldl EApp (EFun f) es
-
--- | Decomposes an expression into application of function
-unApp :: Expr -> Maybe (CId,[Expr])
-unApp = extract []
- where
- extract es (EFun f) = Just (f,es)
- extract es (EApp e1 e2) = extract (e2:es) e1
- extract es _ = Nothing
-
--- | Constructs an expression from string literal
-mkStr :: String -> Expr
-mkStr s = ELit (LStr s)
-
--- | Decomposes an expression into string literal
-unStr :: Expr -> Maybe String
-unStr (ELit (LStr s)) = Just s
-unStr _ = Nothing
-
--- | Constructs an expression from integer literal
-mkInt :: Integer -> Expr
-mkInt i = ELit (LInt i)
-
--- | Decomposes an expression into integer literal
-unInt :: Expr -> Maybe Integer
-unInt (ELit (LInt i)) = Just i
-unInt _ = Nothing
-
--- | Constructs an expression from real number literal
-mkDouble :: Double -> Expr
-mkDouble f = ELit (LFlt f)
-
--- | Decomposes an expression into real number literal
-unDouble :: Expr -> Maybe Double
-unDouble (ELit (LFlt f)) = Just f
-unDouble _ = Nothing
-
--- | Constructs an expression which is meta variable
-mkMeta :: Expr
-mkMeta = EMeta 0
-
--- | Checks whether an expression is a meta variable
-isMeta :: Expr -> Bool
-isMeta (EMeta _) = True
-isMeta _ = False
-
------------------------------------------------------
--- Parsing
------------------------------------------------------
-
-pExpr :: RP.ReadP Expr
-pExpr = RP.skipSpaces >> (pAbs RP.<++ pTerm)
- where
- pTerm = do f <- pFactor
- RP.skipSpaces
- as <- RP.sepBy pArg RP.skipSpaces
- return (foldl EApp f as)
-
- pAbs = do xs <- RP.between (RP.char '\\') (RP.skipSpaces >> RP.string "->") pBinds
- e <- pExpr
- return (foldr (\(b,x) e -> EAbs b x e) e xs)
-
-pBinds :: RP.ReadP [(BindType,CId)]
-pBinds = do xss <- RP.sepBy1 (RP.skipSpaces >> pBind) (RP.skipSpaces >> RP.char ',')
- return (concat xss)
- where
- pCIdOrWild = pCId `mplus` (RP.char '_' >> return wildCId)
-
- pBind =
- do x <- pCIdOrWild
- return [(Explicit,x)]
- `mplus`
- RP.between (RP.char '{')
- (RP.skipSpaces >> RP.char '}')
- (RP.sepBy1 (RP.skipSpaces >> pCIdOrWild >>= \id -> return (Implicit,id)) (RP.skipSpaces >> RP.char ','))
-
-pArg = fmap EImplArg (RP.between (RP.char '{') (RP.char '}') pExpr)
- RP.<++
- pFactor
-
-pFactor = fmap EFun pCId
- RP.<++ fmap ELit pLit
- RP.<++ fmap EMeta pMeta
- RP.<++ RP.between (RP.char '(') (RP.char ')') pExpr
- RP.<++ RP.between (RP.char '<') (RP.char '>') pTyped
-
-pTyped = do RP.skipSpaces
- e <- pExpr
- RP.skipSpaces
- RP.char ':'
- RP.skipSpaces
- ty <- pType
- return (ETyped e ty)
-
-pMeta = do RP.char '?'
- return 0
-
-pLit :: RP.ReadP Literal
-pLit = pNum RP.<++ liftM LStr pStr
-
-pNum = do x <- RP.munch1 isDigit
- ((RP.char '.' >> RP.munch1 isDigit >>= \y -> return (LFlt (read (x++"."++y))))
- RP.<++
- (return (LInt (read x))))
-
-pStr = RP.char '"' >> (RP.manyTill (pEsc RP.<++ RP.get) (RP.char '"'))
- where
- pEsc = RP.char '\\' >> RP.get
-
-
------------------------------------------------------
--- Printing
------------------------------------------------------
-
-ppExpr :: Int -> [CId] -> Expr -> PP.Doc
-ppExpr d scope (EAbs b x e) = let (bs,xs,e1) = getVars [] [] (EAbs b x e)
- in ppParens (d > 1) (PP.char '\\' PP.<>
- PP.hsep (PP.punctuate PP.comma (reverse (List.zipWith ppBind bs xs))) PP.<+>
- PP.text "->" PP.<+>
- ppExpr 1 (xs++scope) e1)
- where
- getVars bs xs (EAbs b x e) = getVars (b:bs) ((freshName x xs):xs) e
- getVars bs xs e = (bs,xs,e)
-ppExpr d scope (EApp e1 e2) = ppParens (d > 3) ((ppExpr 3 scope e1) PP.<+> (ppExpr 4 scope e2))
-ppExpr d scope (ELit l) = ppLit l
-ppExpr d scope (EMeta n) = ppMeta n
-ppExpr d scope (EFun f) = ppCId f
-ppExpr d scope (EVar i) = ppCId (scope !! i)
-ppExpr d scope (ETyped e ty)= PP.char '<' PP.<> ppExpr 0 scope e PP.<+> PP.colon PP.<+> ppType 0 scope ty PP.<> PP.char '>'
-ppExpr d scope (EImplArg e) = PP.braces (ppExpr 0 scope e)
-
-ppPatt :: Int -> [CId] -> Patt -> ([CId],PP.Doc)
-ppPatt d scope (PApp f ps) = let (scope',ds) = mapAccumL (ppPatt 2) scope ps
- in (scope',ppParens (not (List.null ps) && d > 1) (ppCId f PP.<+> PP.hsep ds))
-ppPatt d scope (PLit l) = (scope,ppLit l)
-ppPatt d scope (PVar f) = (f:scope,ppCId f)
-ppPatt d scope PWild = (scope,PP.char '_')
-ppPatt d scope (PImplArg p) = let (scope',d) = ppPatt 0 scope p
- in (scope',PP.braces d)
-
-ppBind Explicit x = ppCId x
-ppBind Implicit x = PP.braces (ppCId x)
-
-ppLit (LStr s) = PP.text (show s)
-ppLit (LInt n) = PP.integer n
-ppLit (LFlt d) = PP.double d
-
-ppMeta :: MetaId -> PP.Doc
-ppMeta n
- | n == 0 = PP.char '?'
- | otherwise = PP.char '?' PP.<> PP.int n
-
-ppParens True = PP.parens
-ppParens False = id
-
-freshName :: CId -> [CId] -> CId
-freshName x xs0 = loop 1 x
- where
- xs = wildCId : xs0
-
- loop i y
- | elem y xs = loop (i+1) (mkCId (show x++show i))
- | otherwise = y
-
-
------------------------------------------------------
--- Computation
------------------------------------------------------
-
--- | Compute an expression to normal form
-normalForm :: Funs -> Int -> Env -> Expr -> Expr
-normalForm funs k env e = value2expr k (eval funs env e)
- where
- value2expr i (VApp f vs) = foldl EApp (EFun f) (List.map (value2expr i) vs)
- value2expr i (VGen j vs) = foldl EApp (EVar (i-j-1)) (List.map (value2expr i) vs)
- value2expr i (VMeta j env vs) = foldl EApp (EMeta j) (List.map (value2expr i) vs)
- value2expr i (VSusp j env vs k) = value2expr i (k (VGen j vs))
- value2expr i (VLit l) = ELit l
- value2expr i (VClosure env (EAbs b x e)) = EAbs b x (value2expr (i+1) (eval funs ((VGen i []):env) e))
- value2expr i (VImplArg v) = EImplArg (value2expr i v)
-
-data Value
- = VApp CId [Value]
- | VLit Literal
- | VMeta {-# UNPACK #-} !MetaId Env [Value]
- | VSusp {-# UNPACK #-} !MetaId Env [Value] (Value -> Value)
- | VGen {-# UNPACK #-} !Int [Value]
- | VClosure Env Expr
- | VImplArg Value
-
-type Funs = Map.Map CId (Type,Int,[Equation]) -- type and def of a fun
-type Env = [Value]
-
-eval :: Funs -> Env -> Expr -> Value
-eval funs env (EVar i) = env !! i
-eval funs env (EFun f) = case Map.lookup f funs of
- Just (_,a,eqs) -> if a == 0
- then case eqs of
- Equ [] e : _ -> eval funs [] e
- _ -> VApp f []
- else VApp f []
- Nothing -> error ("unknown function "++showCId f)
-eval funs env (EApp e1 e2) = apply funs env e1 [eval funs env e2]
-eval funs env (EAbs b x e) = VClosure env (EAbs b x e)
-eval funs env (EMeta i) = VMeta i env []
-eval funs env (ELit l) = VLit l
-eval funs env (ETyped e _) = eval funs env e
-eval funs env (EImplArg e) = VImplArg (eval funs env e)
-
-apply :: Funs -> Env -> Expr -> [Value] -> Value
-apply funs env e [] = eval funs env e
-apply funs env (EVar i) vs = applyValue funs (env !! i) vs
-apply funs env (EFun f) vs = case Map.lookup f funs of
- Just (_,a,eqs) -> if a <= length vs
- then let (as,vs') = splitAt a vs
- in match funs f eqs as vs'
- else VApp f vs
- Nothing -> error ("unknown function "++showCId f)
-apply funs env (EApp e1 e2) vs = apply funs env e1 (eval funs env e2 : vs)
-apply funs env (EAbs _ x e) (v:vs) = apply funs (v:env) e vs
-apply funs env (EMeta i) vs = VMeta i env vs
-apply funs env (ELit l) vs = error "literal of function type"
-apply funs env (ETyped e _) vs = apply funs env e vs
-apply funs env (EImplArg _) vs = error "implicit argument in function position"
-
-applyValue funs v [] = v
-applyValue funs (VApp f vs0) vs = apply funs [] (EFun f) (vs0++vs)
-applyValue funs (VLit _) vs = error "literal of function type"
-applyValue funs (VMeta i env vs0) vs = VMeta i env (vs0++vs)
-applyValue funs (VGen i vs0) vs = VGen i (vs0++vs)
-applyValue funs (VSusp i env vs0 k) vs = VSusp i env vs0 (\v -> applyValue funs (k v) vs)
-applyValue funs (VClosure env (EAbs b x e)) (v:vs) = apply funs (v:env) e vs
-applyValue funs (VImplArg _) vs = error "implicit argument in function position"
-
------------------------------------------------------
--- Pattern matching
------------------------------------------------------
-
-match :: Funs -> CId -> [Equation] -> [Value] -> [Value] -> Value
-match funs f eqs as0 vs0 =
- case eqs of
- [] -> VApp f (as0++vs0)
- (Equ ps res):eqs -> tryMatches eqs ps as0 res []
- where
- tryMatches eqs [] [] res env = apply funs env res vs0
- tryMatches eqs (p:ps) (a:as) res env = tryMatch p a env
- where
- tryMatch (PVar x ) (v ) env = tryMatches eqs ps as res (v:env)
- tryMatch (PWild ) (_ ) env = tryMatches eqs ps as res env
- tryMatch (p ) (VMeta i envi vs ) env = VSusp i envi vs (\v -> tryMatch p v env)
- tryMatch (p ) (VGen i vs ) env = VApp f (as0++vs0)
- tryMatch (p ) (VSusp i envi vs k) env = VSusp i envi vs (\v -> tryMatch p (k v) env)
- tryMatch (PApp f1 ps1) (VApp f2 vs2 ) env | f1 == f2 = tryMatches eqs (ps1++ps) (vs2++as) res env
- tryMatch (PLit l1 ) (VLit l2 ) env | l1 == l2 = tryMatches eqs ps as res env
- tryMatch (PImplArg p ) (VImplArg v ) env = tryMatch p v env
- tryMatch _ _ env = match funs f eqs as0 vs0
-
diff --git a/src/PGF/Expr.hs-boot b/src/PGF/Expr.hs-boot deleted file mode 100644 index 34a62a410..000000000 --- a/src/PGF/Expr.hs-boot +++ /dev/null @@ -1,28 +0,0 @@ -module PGF.Expr where
-
-import PGF.CId
-import qualified Text.PrettyPrint as PP
-import qualified Text.ParserCombinators.ReadP as RP
-
-data Expr
-
-instance Eq Expr
-instance Ord Expr
-instance Show Expr
-
-
-data BindType = Explicit | Implicit
-
-instance Eq BindType
-instance Ord BindType
-instance Show BindType
-
-
-pArg :: RP.ReadP Expr
-pBinds :: RP.ReadP [(BindType,CId)]
-
-ppExpr :: Int -> [CId] -> Expr -> PP.Doc
-
-freshName :: CId -> [CId] -> CId
-
-ppParens :: Bool -> PP.Doc -> PP.Doc
diff --git a/src/PGF/Generate.hs b/src/PGF/Generate.hs deleted file mode 100644 index 5add00a78..000000000 --- a/src/PGF/Generate.hs +++ /dev/null @@ -1,66 +0,0 @@ -module PGF.Generate where - -import PGF.CId -import PGF.Data -import PGF.Macros -import PGF.TypeCheck - -import qualified Data.Map as M -import System.Random - --- generate an infinite list of trees exhaustively -generate :: PGF -> Type -> Maybe Int -> [Expr] -generate pgf ty@(DTyp _ cat _) dp = filter (\e -> case checkExpr pgf e ty of - Left _ -> False - Right _ -> True ) - (concatMap (\i -> gener i cat) depths) - where - gener 0 c = [EFun f | (f, ([],_)) <- fns c] - gener i c = [ - tr | - (f, (cs,_)) <- fns c, - let alts = map (gener (i-1)) cs, - ts <- combinations alts, - let tr = foldl EApp (EFun f) ts, - depth tr >= i - ] - fns c = [(f,catSkeleton ty) | (f,ty) <- functionsToCat pgf c] - depths = maybe [0 ..] (\d -> [0..d]) dp - --- generate an infinite list of trees randomly -genRandom :: StdGen -> PGF -> Type -> [Expr] -genRandom gen pgf ty@(DTyp _ cat _) = filter (\e -> case checkExpr pgf e ty of - Left _ -> False - Right _ -> True ) - (genTrees (randomRs (0.0, 1.0 :: Double) gen) cat) - where - timeout = 47 -- give up - - genTrees ds0 cat = - let (ds,ds2) = splitAt (timeout+1) ds0 -- for time out, else ds - (t,k) = genTree ds cat - in (if k>timeout then id else (t:)) - (genTrees ds2 cat) -- else (drop k ds) - - genTree rs = gett rs where - gett ds cid | cid == cidString = (ELit (LStr "foo"), 1) - gett ds cid | cid == cidInt = (ELit (LInt 12345), 1) - gett ds cid | cid == cidFloat = (ELit (LFlt 12345), 1) - gett [] _ = (ELit (LStr "TIMEOUT"), 1) ---- - gett ds cat = case fns cat of - [] -> (EMeta 0,1) - fs -> let - d:ds2 = ds - (f,args) = getf d fs - (ts,k) = getts ds2 args - in (foldl EApp (EFun f) ts, k+1) - getf d fs = let lg = (length fs) in - fs !! (floor (d * fromIntegral lg)) - getts ds cats = case cats of - c:cs -> let - (t, k) = gett ds c - (ts,ks) = getts (drop k ds) cs - in (t:ts, k + ks) - _ -> ([],0) - - fns cat = [(f,(fst (catSkeleton ty))) | (f,ty) <- functionsToCat pgf cat] diff --git a/src/PGF/Linearize.hs b/src/PGF/Linearize.hs deleted file mode 100644 index fdd4cecb5..000000000 --- a/src/PGF/Linearize.hs +++ /dev/null @@ -1,166 +0,0 @@ -{-# LANGUAGE ParallelListComp #-} -module PGF.Linearize - (linearizes,realize,realizes,linTree, linTreeMark,linearizesMark) where - -import PGF.CId -import PGF.Data -import PGF.Macros -import PGF.Tree - -import Control.Monad -import qualified Data.Map as Map -import Data.List - -import Debug.Trace - --- linearization and computation of concrete PGF Terms - -linearizes :: PGF -> CId -> Expr -> [String] -linearizes pgf lang = realizes . linTree pgf lang - -realize :: Term -> String -realize = concat . take 1 . realizes - -realizes :: Term -> [String] -realizes = map (unwords . untokn) . realizest - -realizest :: Term -> [[Tokn]] -realizest trm = case trm of - R ts -> realizest (ts !! 0) - S ss -> map concat $ combinations $ map realizest ss - K t -> [[t]] - W s t -> [[KS (s ++ r)] | [KS r] <- realizest t] - FV ts -> concatMap realizest ts - TM s -> [[KS s]] - _ -> [[KS $ "REALIZE_ERROR " ++ show trm]] ---- debug - -untokn :: [Tokn] -> [String] -untokn ts = case ts of - KP d _ : [] -> d - KP d vs : ws -> let ss@(s:_) = untokn ws in sel d vs s ++ ss - KS s : ws -> s : untokn ws - [] -> [] - where - sel d vs w = case [v | Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of - v:_ -> v - _ -> d - --- Lifts all variants to the top level (except those in macros). -liftVariants :: Term -> [Term] -liftVariants = f - where - f (R ts) = liftM R $ mapM f ts - f (P t1 t2) = liftM2 P (f t1) (f t2) - f (S ts) = liftM S $ mapM f ts - f (FV ts) = ts >>= f - f (W s t) = liftM (W s) $ f t - f t = return t - -linTree :: PGF -> CId -> Expr -> Term -linTree pgf lang e = lin (expr2tree e) Nothing - where - cnc = lookMap (error "no lang") lang (concretes pgf) - - lin (Abs xs e ) mty = case lin e Nothing of - R ts -> R $ ts ++ (Data.List.map (kks . showCId . snd) xs) - TM s -> R $ (TM s) : (Data.List.map (kks . showCId . snd) xs) - lin (Fun fun es) mty = case Map.lookup fun (funs (abstract pgf)) of - Just (DTyp hyps _ _,_,_) -> let argVariants = sequence [liftVariants (lin e (Just ty)) | e <- es | (_,_,ty) <- hyps] - in variants [compute pgf lang args $ lookMap tm0 fun (lins cnc) | args <- argVariants] - Nothing -> tm0 - lin (Lit (LStr s)) mty = R [kks (show s)] -- quoted - lin (Lit (LInt i)) mty = R [kks (show i)] - lin (Lit (LFlt d)) mty = R [kks (show d)] - lin (Var x) mty = case mty of - Just (DTyp _ cat _) -> compute pgf lang [K (KS (showCId x))] (lookMap tm0 cat (lindefs cnc)) - Nothing -> TM (showCId x) - lin (Meta i) mty = case mty of - Just (DTyp _ cat _) -> compute pgf lang [K (KS (show i))] (lookMap tm0 cat (lindefs cnc)) - Nothing -> TM (show i) - -variants :: [Term] -> Term -variants ts = case ts of - [t] -> t - _ -> FV ts - -unvariants :: Term -> [Term] -unvariants t = case t of - FV ts -> ts - _ -> [t] - -compute :: PGF -> CId -> [Term] -> Term -> Term -compute pgf lang args = comp where - comp trm = case trm of - P r p -> proj (comp r) (comp p) - W s t -> W s (comp t) - R ts -> R $ map comp ts - V i -> idx args i -- already computed - F c -> comp $ look c -- not computed (if contains argvar) - FV ts -> FV $ map comp ts - S ts -> S $ filter (/= S []) $ map comp ts - _ -> trm - - look = lookOper pgf lang - - idx xs i = if i > length xs - 1 - then trace - ("too large " ++ show i ++ " for\n" ++ unlines (map show xs) ++ "\n") tm0 - else xs !! i - - proj r p = case (r,p) of - (_, FV ts) -> FV $ map (proj r) ts - (FV ts, _ ) -> FV $ map (\t -> proj t p) ts - (W s t, _) -> kks (s ++ getString (proj t p)) - _ -> comp $ getField r (getIndex p) - - getString t = case t of - K (KS s) -> s - _ -> error ("ERROR in grammar compiler: string from "++ show t) "ERR" - - getIndex t = case t of - C i -> i - TM _ -> 0 -- default value for parameter - _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 666 - - getField t i = case t of - R rs -> idx rs i - TM s -> TM s - _ -> error ("ERROR in grammar compiler: field from " ++ show t) t - ---------- --- markup with tree positions - -linearizesMark :: PGF -> CId -> Expr -> [String] -linearizesMark pgf lang = realizes . linTreeMark pgf lang - -linTreeMark :: PGF -> CId -> Expr -> Term -linTreeMark pgf lang = lin [] . expr2tree - where - lin p (Abs xs e ) = case lin p e of - R ts -> R $ ts ++ (Data.List.map (kks . showCId . snd) xs) - TM s -> R $ (TM s) : (Data.List.map (kks . showCId . snd) xs) - lin p (Fun fun es) = - let argVariants = - mapM (\ (i,e) -> liftVariants $ lin (sub p i) e) (zip [0..] es) - in variants [mark (fun,p) $ compute pgf lang args $ look fun | - args <- argVariants] - lin p (Lit (LStr s)) = mark p $ R [kks (show s)] -- quoted - lin p (Lit (LInt i)) = mark p $ R [kks (show i)] - lin p (Lit (LFlt d)) = mark p $ R [kks (show d)] - lin p (Var x) = mark p $ TM (showCId x) - lin p (Meta i) = mark p $ TM (show i) - - look = lookLin pgf lang - - mark :: Show a => a -> Term -> Term - mark p t = case t of - R ts -> R $ map (mark p) ts - FV ts -> R $ map (mark p) ts - S ts -> S $ bracket p ts - K s -> S $ bracket p [t] - W s (R ts) -> R [mark p $ kks (s ++ u) | K (KS u) <- ts] - _ -> t - -- otherwise in normal form - - bracket p ts = [kks ("("++show p)] ++ ts ++ [kks ")"] - sub p i = p ++ [i] diff --git a/src/PGF/Macros.hs b/src/PGF/Macros.hs deleted file mode 100644 index af25de025..000000000 --- a/src/PGF/Macros.hs +++ /dev/null @@ -1,154 +0,0 @@ -module PGF.Macros where - -import PGF.CId -import PGF.Data -import Control.Monad -import qualified Data.Map as Map -import qualified Data.Array as Array -import Data.Maybe -import Data.List - --- operations for manipulating PGF grammars and objects - -mapConcretes :: (Concr -> Concr) -> PGF -> PGF -mapConcretes f pgf = pgf { concretes = Map.map f (concretes pgf) } - -lookLin :: PGF -> CId -> CId -> Term -lookLin pgf lang fun = - lookMap tm0 fun $ lins $ lookMap (error "no lang") lang $ concretes pgf - -lookOper :: PGF -> CId -> CId -> Term -lookOper pgf lang fun = - lookMap tm0 fun $ opers $ lookMap (error "no lang") lang $ concretes pgf - -lookLincat :: PGF -> CId -> CId -> Term -lookLincat pgf lang fun = - lookMap tm0 fun $ lincats $ lookMap (error "no lang") lang $ concretes pgf - -lookParamLincat :: PGF -> CId -> CId -> Term -lookParamLincat pgf lang fun = - lookMap tm0 fun $ paramlincats $ lookMap (error "no lang") lang $ concretes pgf - -lookPrintName :: PGF -> CId -> CId -> Term -lookPrintName pgf lang fun = - lookMap tm0 fun $ printnames $ lookMap (error "no lang") lang $ concretes pgf - -lookType :: PGF -> CId -> Type -lookType pgf f = - case lookMap (error $ "lookType " ++ show f) f (funs (abstract pgf)) of - (ty,_,_) -> ty - -lookDef :: PGF -> CId -> [Equation] -lookDef pgf f = - case lookMap (error $ "lookDef " ++ show f) f (funs (abstract pgf)) of - (_,a,eqs) -> eqs - -isData :: PGF -> CId -> Bool -isData pgf f = - case Map.lookup f (funs (abstract pgf)) of - Just (_,_,[]) -> True -- the encoding of data constrs - _ -> False - -lookValCat :: PGF -> CId -> CId -lookValCat pgf = valCat . lookType pgf - -lookParser :: PGF -> CId -> Maybe ParserInfo -lookParser pgf lang = Map.lookup lang (concretes pgf) >>= parser - -lookStartCat :: PGF -> CId -lookStartCat pgf = mkCId $ fromMaybe "S" $ msum $ Data.List.map (Map.lookup (mkCId "startcat")) - [gflags pgf, aflags (abstract pgf)] - -lookGlobalFlag :: PGF -> CId -> String -lookGlobalFlag pgf f = - lookMap "?" f (gflags pgf) - -lookAbsFlag :: PGF -> CId -> String -lookAbsFlag pgf f = - lookMap "?" f (aflags (abstract pgf)) - -lookConcr :: PGF -> CId -> Concr -lookConcr pgf cnc = - lookMap (error $ "Missing concrete syntax: " ++ showCId cnc) cnc $ concretes pgf - -lookConcrFlag :: PGF -> CId -> CId -> Maybe String -lookConcrFlag pgf lang f = Map.lookup f $ cflags $ lookConcr pgf lang - -functionsToCat :: PGF -> CId -> [(CId,Type)] -functionsToCat pgf cat = - [(f,ty) | f <- fs, Just (ty,_,_) <- [Map.lookup f $ funs $ abstract pgf]] - where - fs = lookMap [] cat $ catfuns $ abstract pgf - -missingLins :: PGF -> CId -> [CId] -missingLins pgf lang = [c | c <- fs, not (hasl c)] where - fs = Map.keys $ funs $ abstract pgf - hasl = hasLin pgf lang - -hasLin :: PGF -> CId -> CId -> Bool -hasLin pgf lang f = Map.member f $ lins $ lookConcr pgf lang - -restrictPGF :: (CId -> Bool) -> PGF -> PGF -restrictPGF cond pgf = pgf { - abstract = abstr { - funs = restrict $ funs $ abstr, - cats = restrict $ cats $ abstr - } - } ---- restrict concrs also, might be needed - where - restrict = Map.filterWithKey (\c _ -> cond c) - abstr = abstract pgf - -depth :: Expr -> Int -depth (EAbs _ _ t) = depth t -depth (EApp e1 e2) = max (depth e1) (depth e2) + 1 -depth _ = 1 - -cftype :: [CId] -> CId -> Type -cftype args val = DTyp [(Explicit,wildCId,cftype [] arg) | arg <- args] val [] - -typeOfHypo :: Hypo -> Type -typeOfHypo (_,_,ty) = ty - -catSkeleton :: Type -> ([CId],CId) -catSkeleton ty = case ty of - DTyp hyps val _ -> ([valCat (typeOfHypo h) | h <- hyps],val) - -typeSkeleton :: Type -> ([(Int,CId)],CId) -typeSkeleton ty = case ty of - DTyp hyps val _ -> ([(contextLength ty, valCat ty) | h <- hyps, let ty = typeOfHypo h],val) - -valCat :: Type -> CId -valCat ty = case ty of - DTyp _ val _ -> val - -contextLength :: Type -> Int -contextLength ty = case ty of - DTyp hyps _ _ -> length hyps - -term0 :: CId -> Term -term0 = TM . showCId - -tm0 :: Term -tm0 = TM "?" - -kks :: String -> Term -kks = K . KS - --- lookup with default value -lookMap :: (Show i, Ord i) => a -> i -> Map.Map i a -> a -lookMap d c m = Map.findWithDefault d c m - ---- from Operations -combinations :: [[a]] -> [[a]] -combinations t = case t of - [] -> [[]] - aa:uu -> [a:u | a <- aa, u <- combinations uu] - -isLiteralCat :: CId -> Bool -isLiteralCat = (`elem` [cidString, cidFloat, cidInt, cidVar]) - -cidString = mkCId "String" -cidInt = mkCId "Int" -cidFloat = mkCId "Float" -cidVar = mkCId "__gfVar" diff --git a/src/PGF/Morphology.hs b/src/PGF/Morphology.hs deleted file mode 100644 index 9eee71a97..000000000 --- a/src/PGF/Morphology.hs +++ /dev/null @@ -1,26 +0,0 @@ -module PGF.Morphology(Lemma,Analysis,Morpho, - buildMorpho, - lookupMorpho,fullFormLexicon) where - -import PGF.ShowLinearize (collectWords) -import PGF.Data -import PGF.CId - -import qualified Data.Map as Map -import Data.List (intersperse) - --- these 4 definitions depend on the datastructure used - -type Lemma = CId -type Analysis = String - -newtype Morpho = Morpho (Map.Map String [(Lemma,Analysis)]) - -buildMorpho :: PGF -> Language -> Morpho -buildMorpho pgf lang = Morpho (Map.fromListWith (++) (collectWords pgf lang)) - -lookupMorpho :: Morpho -> String -> [(Lemma,Analysis)] -lookupMorpho (Morpho mo) s = maybe [] id $ Map.lookup s mo - -fullFormLexicon :: Morpho -> [(String,[(Lemma,Analysis)])] -fullFormLexicon (Morpho mo) = Map.toList mo diff --git a/src/PGF/PMCFG.hs b/src/PGF/PMCFG.hs deleted file mode 100644 index c657e3d17..000000000 --- a/src/PGF/PMCFG.hs +++ /dev/null @@ -1,119 +0,0 @@ -module PGF.PMCFG where
-
-import PGF.CId
-import PGF.Expr
-
-import qualified Data.Map as Map
-import qualified Data.Set as Set
-import qualified Data.IntMap as IntMap
-import Data.Array.IArray
-import Data.Array.Unboxed
-import Text.PrettyPrint
-
-type FCat = Int
-type FIndex = Int
-type FPointPos = Int
-data FSymbol
- = FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
- | FSymLit {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
- | FSymKS [String]
- | FSymKP [String] [Alternative]
- deriving (Eq,Ord,Show)
-type Profile = [Int]
-data Production
- = FApply {-# UNPACK #-} !FunId [FCat]
- | FCoerce {-# UNPACK #-} !FCat
- | FConst Expr [String]
- deriving (Eq,Ord,Show)
-data FFun = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
-type FSeq = Array FPointPos FSymbol
-type FunId = Int
-type SeqId = Int
-
-data Alternative =
- Alt [String] [String]
- deriving (Eq,Ord,Show)
-
-data ParserInfo
- = ParserInfo { functions :: Array FunId FFun
- , sequences :: Array SeqId FSeq
- , productions0:: IntMap.IntMap (Set.Set Production) -- this are the original productions as they are loaded from the PGF file
- , productions :: IntMap.IntMap (Set.Set Production) -- this are the productions after the filtering for useless productions
- , startCats :: Map.Map CId [FCat]
- , totalCats :: {-# UNPACK #-} !FCat
- }
-
-
-fcatString, fcatInt, fcatFloat, fcatVar :: Int
-fcatString = (-1)
-fcatInt = (-2)
-fcatFloat = (-3)
-fcatVar = (-4)
-
-isLiteralFCat :: FCat -> Bool
-isLiteralFCat = (`elem` [fcatString, fcatInt, fcatFloat, fcatVar])
-
-ppPMCFG :: ParserInfo -> Doc
-ppPMCFG pinfo =
- text "productions" $$
- nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions pinfo), prod <- Set.toList set]) $$
- text "functions" $$
- nest 2 (vcat (map ppFun (assocs (functions pinfo)))) $$
- text "sequences" $$
- nest 2 (vcat (map ppSeq (assocs (sequences pinfo)))) $$
- text "startcats" $$
- nest 2 (vcat (map ppStartCat (Map.toList (startCats pinfo))))
-
-ppProduction (fcat,FApply funid args) =
- ppFCat fcat <+> text "->" <+> ppFunId funid <> brackets (hcat (punctuate comma (map ppFCat args)))
-ppProduction (fcat,FCoerce arg) =
- ppFCat fcat <+> text "->" <+> char '_' <> brackets (ppFCat arg)
-ppProduction (fcat,FConst _ ss) =
- ppFCat fcat <+> text "->" <+> ppStrs ss
-
-ppFun (funid,FFun fun _ arr) =
- ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun)
-
-ppSeq (seqid,seq) =
- ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq))
-
-ppStartCat (id,fcats) =
- ppCId id <+> text ":=" <+> brackets (hcat (punctuate comma (map ppFCat fcats)))
-
-ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
-ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>'
-ppSymbol (FSymKS ts) = ppStrs ts
-ppSymbol (FSymKP ts alts) = text "pre" <+> braces (hsep (punctuate semi (ppStrs ts : map ppAlt alts)))
-
-ppAlt (Alt ts ps) = ppStrs ts <+> char '/' <+> hsep (map (doubleQuotes . text) ps)
-
-ppStrs ss = doubleQuotes (hsep (map text ss))
-
-ppFCat fcat
- | fcat == fcatString = text "CString"
- | fcat == fcatInt = text "CInt"
- | fcat == fcatFloat = text "CFloat"
- | fcat == fcatVar = text "CVar"
- | otherwise = char 'C' <> int fcat
-
-ppFunId funid = char 'F' <> int funid
-ppSeqId seqid = char 'S' <> int seqid
-
-
-filterProductions = closure
- where
- closure prods0
- | IntMap.size prods == IntMap.size prods0 = prods
- | otherwise = closure prods
- where
- prods = IntMap.mapMaybe (filterProdSet prods0) prods0
-
- filterProdSet prods set0
- | Set.null set = Nothing
- | otherwise = Just set
- where
- set = Set.filter (filterRule prods) set0
-
- filterRule prods (FApply funid args) = all (\fcat -> isLiteralFCat fcat || IntMap.member fcat prods) args
- filterRule prods (FCoerce fcat) = isLiteralFCat fcat || IntMap.member fcat prods
- filterRule prods _ = True
diff --git a/src/PGF/Paraphrase.hs b/src/PGF/Paraphrase.hs deleted file mode 100644 index 58d15b2e8..000000000 --- a/src/PGF/Paraphrase.hs +++ /dev/null @@ -1,112 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : Paraphrase --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- Generate parapharases with def definitions. ------------------------------------------------------------------------------ - -module PGF.Paraphrase ( - paraphrase, - paraphraseN - ) where - -import PGF.Data -import PGF.Tree -import PGF.Macros (lookDef,isData) -import PGF.CId - -import Data.List (nub,sort,group) -import qualified Data.Map as Map - -import Debug.Trace ---- - -paraphrase :: PGF -> Expr -> [Expr] -paraphrase pgf = nub . paraphraseN 2 pgf - -paraphraseN :: Int -> PGF -> Expr -> [Expr] -paraphraseN i pgf = map tree2expr . paraphraseN' i pgf . expr2tree - -paraphraseN' :: Int -> PGF -> Tree -> [Tree] -paraphraseN' 0 _ t = [t] -paraphraseN' i pgf t = - step i t ++ [Fun g ts' | Fun g ts <- step (i-1) t, ts' <- sequence (map par ts)] - where - par = paraphraseN' (i-1) pgf - step 0 t = [t] - step i t = let stept = step (i-1) t in stept ++ concat [def u | u <- stept] - def = fromDef pgf - -fromDef :: PGF -> Tree -> [Tree] -fromDef pgf t@(Fun f ts) = defDown t ++ defUp t where - defDown t = [subst g u | let equ = equsFrom f, (u,g) <- match equ ts, trequ "U" f equ] - defUp t = [subst g u | equ <- equsTo f, (u,g) <- match [equ] ts, trequ "D" f equ] - - equsFrom f = [(ps,d) | Just equs <- [lookup f equss], (Fun _ ps,d) <- equs] - - equsTo f = [c | (_,equs) <- equss, c <- casesTo f equs] - - casesTo f equs = - [(ps,p) | (p,d@(Fun g ps)) <- equs, g==f, - isClosed d || (length equs == 1 && isLinear d)] - - equss = [(f,[(Fun f (map patt2tree ps), expr2tree d) | (Equ ps d) <- eqs]) | - (f,(_,_,eqs)) <- Map.assocs (funs (abstract pgf)), not (null eqs)] - - trequ s f e = True ----trace (s ++ ": " ++ show f ++ " " ++ show e) True - -subst :: Subst -> Tree -> Tree -subst g e = case e of - Fun f ts -> Fun f (map substg ts) - Var x -> maybe e id $ lookup x g - _ -> e - where - substg = subst g - -type Subst = [(CId,Tree)] - --- this applies to pattern, hence don't need to consider abstractions -isClosed :: Tree -> Bool -isClosed t = case t of - Fun _ ts -> all isClosed ts - Var _ -> False - _ -> True - --- this applies to pattern, hence don't need to consider abstractions -isLinear :: Tree -> Bool -isLinear = nodup . vars where - vars t = case t of - Fun _ ts -> concatMap vars ts - Var x -> [x] - _ -> [] - nodup = all ((<2) . length) . group . sort - - -match :: [([Tree],Tree)] -> [Tree] -> [(Tree, Subst)] -match cases terms = case cases of - [] -> [] - (patts,_):_ | length patts /= length terms -> [] - (patts,val):cc -> case mapM tryMatch (zip patts terms) of - Just substs -> return (val, concat substs) - _ -> match cc terms - where - tryMatch (p,t) = case (p, t) of - (Var x, _) | notMeta t -> return [(x,t)] - (Fun p pp, Fun f tt) | p == f && length pp == length tt -> do - matches <- mapM tryMatch (zip pp tt) - return (concat matches) - _ -> if p==t then return [] else Nothing - - notMeta e = case e of - Meta _ -> False - Fun f ts -> all notMeta ts - _ -> True - --- | Converts a pattern to tree. -patt2tree :: Patt -> Tree -patt2tree (PApp f ps) = Fun f (map patt2tree ps) -patt2tree (PLit l) = Lit l -patt2tree (PVar x) = Var x -patt2tree PWild = Meta 0 diff --git a/src/PGF/Parsing/FCFG/Active.hs b/src/PGF/Parsing/FCFG/Active.hs deleted file mode 100644 index e88926f6e..000000000 --- a/src/PGF/Parsing/FCFG/Active.hs +++ /dev/null @@ -1,205 +0,0 @@ ----------------------------------------------------------------------- --- | --- Maintainer : Krasimir Angelov --- Stability : (stable) --- Portability : (portable) --- --- MCFG parsing, the active algorithm ------------------------------------------------------------------------------ - -module PGF.Parsing.FCFG.Active (parse) where - -import GF.Data.Assoc -import GF.Data.SortedList -import GF.Data.Utilities -import qualified GF.Data.MultiMap as MM - -import PGF.CId -import PGF.Data -import PGF.Tree -import PGF.Parsing.FCFG.Utilities -import PGF.BuildParser - -import Control.Monad (guard) - -import qualified Data.List as List -import qualified Data.Map as Map -import qualified Data.IntMap as IntMap -import qualified Data.Set as Set -import Data.Array.IArray -import Debug.Trace - ----------------------------------------------------------------------- --- * parsing - -type FToken = String - -makeFinalEdge cat 0 0 = (cat, [EmptyRange]) -makeFinalEdge cat i j = (cat, [makeRange i j]) - --- | the list of categories = possible starting categories -parse :: String -> ParserInfo -> Type -> [FToken] -> [Expr] -parse strategy pinfo (DTyp _ start _) toks = map (tree2expr) . nubsort $ filteredForests >>= forest2trees - where - inTokens = input toks - starts = Map.findWithDefault [] start (startCats pinfo) - schart = xchart2syntaxchart chart pinfo - (i,j) = inputBounds inTokens - finalEdges = [makeFinalEdge cat i j | cat <- starts] - forests = chart2forests schart (const False) finalEdges - filteredForests = forests >>= applyProfileToForest - - pinfoex = buildParserInfo pinfo - - chart = process strategy pinfo pinfoex inTokens axioms emptyXChart - axioms | isBU strategy = literals pinfoex inTokens ++ initialBU pinfo pinfoex inTokens - | isTD strategy = literals pinfoex inTokens ++ initialTD pinfo starts inTokens - -isBU s = s=="b" -isTD s = s=="t" - --- used in prediction -emptyChildren :: FunId -> [FCat] -> SyntaxNode FunId RangeRec -emptyChildren ruleid args = SNode ruleid (replicate (length args) []) - - -process :: String -> ParserInfo -> ParserInfoEx -> Input FToken -> [Item] -> XChart FCat -> XChart FCat -process strategy pinfo pinfoex toks [] chart = chart -process strategy pinfo pinfoex toks (item:items) chart = process strategy pinfo pinfoex toks items $! univRule item chart - where - univRule item@(Active found rng lbl ppos node@(SNode ruleid recs) args cat) chart - | inRange (bounds lin) ppos = - case lin ! ppos of - FSymCat d r -> let c = args !! d - in case recs !! d of - [] -> case insertXChart chart item c of - Nothing -> chart - Just chart -> let items = do item@(Final found' _ _ _) <- lookupXChartFinal chart c - rng <- concatRange rng (found' !! r) - return (Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs)) args cat) - ++ - do guard (isTD strategy) - (ruleid,args) <- topdownRules pinfo c - return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args c) - in process strategy pinfo pinfoex toks items chart - found' -> let items = do rng <- concatRange rng (found' !! r) - return (Active found rng lbl (ppos+1) node args cat) - in process strategy pinfo pinfoex toks items chart - FSymKS [tok] - -> let items = do t_rng <- inputToken toks ? tok - rng' <- concatRange rng t_rng - return (Active found rng' lbl (ppos+1) node args cat) - in process strategy pinfo pinfoex toks items chart - | otherwise = - if inRange (bounds lins) (lbl+1) - then univRule (Active (rng:found) EmptyRange (lbl+1) 0 node args cat) chart - else univRule (Final (reverse (rng:found)) node args cat) chart - where - (FFun _ _ lins) = functions pinfo ! ruleid - lin = sequences pinfo ! (lins ! lbl) - univRule item@(Final found' node args cat) chart = - case insertXChart chart item cat of - Nothing -> chart - Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _) args c) <- lookupXChartAct chart cat - let FFun _ _ lins = functions pinfo ! ruleid - FSymCat d r = (sequences pinfo ! (lins ! l)) ! ppos - rng <- concatRange rng (found' !! r) - return (Active found rng l (ppos+1) (updateChildren node d found') args c) - ++ - do guard (isBU strategy) - (ruleid,args,c) <- leftcornerCats pinfoex ? cat - let FFun _ _ lins = functions pinfo ! ruleid - FSymCat d r = (sequences pinfo ! (lins ! 0)) ! 0 - return (Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid args) d found') args c) - - updateChildren :: SyntaxNode FunId RangeRec -> Int -> RangeRec -> SyntaxNode FunId RangeRec - updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs - in process strategy pinfo pinfoex toks items chart - ----------------------------------------------------------------------- --- * XChart - -data Item - = Active RangeRec - Range - {-# UNPACK #-} !FIndex - {-# UNPACK #-} !FPointPos - (SyntaxNode FunId RangeRec) - [FCat] - FCat - | Final RangeRec (SyntaxNode FunId RangeRec) [FCat] FCat - deriving (Eq, Ord, Show) - -data XChart c = XChart !(MM.MultiMap c Item) !(MM.MultiMap c Item) - -emptyXChart :: Ord c => XChart c -emptyXChart = XChart MM.empty MM.empty - -insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _ _) c = - case MM.insert' c item actives of - Nothing -> Nothing - Just actives -> Just (XChart actives finals) - -insertXChart (XChart actives finals) item@(Final _ _ _ _) c = - case MM.insert' c item finals of - Nothing -> Nothing - Just finals -> Just (XChart actives finals) - -lookupXChartAct (XChart actives finals) c = actives MM.! c -lookupXChartFinal (XChart actives finals) c = finals MM.! c - -xchart2syntaxchart :: XChart FCat -> ParserInfo -> SyntaxChart (CId,[Profile]) (FCat,RangeRec) -xchart2syntaxchart (XChart actives finals) pinfo = - accumAssoc groupSyntaxNodes $ - [ case node of - SNode ruleid rrecs -> let FFun fun prof _ = functions pinfo ! ruleid - in ((cat,found), SNode (fun,prof) (zip rhs rrecs)) - SString s -> ((cat,found), SString s) - SInt n -> ((cat,found), SInt n) - SFloat f -> ((cat,found), SFloat f) - | (Final found node rhs cat) <- MM.elems finals - ] - -literals :: ParserInfoEx -> Input FToken -> [Item] -literals pinfoex toks = - [let (c,node) = lexer t in (Final [rng] node [] c) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfoex)] - where - lexer t = - case reads t of - [(n,"")] -> (fcatInt, SInt (n::Integer)) - _ -> case reads t of - [(f,"")] -> (fcatFloat, SFloat (f::Double)) - _ -> (fcatString,SString t) - - ----------------------------------------------------------------------- --- Earley -- - --- called with all starting categories -initialTD :: ParserInfo -> [FCat] -> Input FToken -> [Item] -initialTD pinfo starts toks = - do cat <- starts - (ruleid,args) <- topdownRules pinfo cat - return (Active [] (Range 0 0) 0 0 (emptyChildren ruleid args) args cat) - -topdownRules pinfo cat = f cat [] - where - f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo)) - - g (FApply ruleid args) rules = (ruleid,args) : rules - g (FCoerce cat) rules = f cat rules - - ----------------------------------------------------------------------- --- Kilbury -- - -initialBU :: ParserInfo -> ParserInfoEx -> Input FToken -> [Item] -initialBU pinfo pinfoex toks = - do (tok,rngs) <- aAssocs (inputToken toks) - (ruleid,args,cat) <- leftcornerTokens pinfoex ? tok - rng <- rngs - return (Active [] rng 0 1 (emptyChildren ruleid args) args cat) - ++ - do (ruleid,args,cat) <- epsilonRules pinfoex - let FFun _ _ _ = functions pinfo ! ruleid - return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args cat) diff --git a/src/PGF/Parsing/FCFG/Incremental.hs b/src/PGF/Parsing/FCFG/Incremental.hs deleted file mode 100644 index 296a0d33b..000000000 --- a/src/PGF/Parsing/FCFG/Incremental.hs +++ /dev/null @@ -1,371 +0,0 @@ -{-# LANGUAGE BangPatterns #-}
-module PGF.Parsing.FCFG.Incremental
- ( ParseState
- , ErrorState
- , initState
- , nextState
- , getCompletions
- , recoveryStates
- , extractTrees
- , parse
- , parseWithRecovery
- ) where
-
-import Data.Array.IArray
-import Data.Array.Base (unsafeAt)
-import Data.List (isPrefixOf, foldl')
-import Data.Maybe (fromMaybe, maybe)
-import qualified Data.Map as Map
-import qualified GF.Data.TrieMap as TMap
-import qualified Data.IntMap as IntMap
-import qualified Data.Set as Set
-import Control.Monad
-
-import GF.Data.SortedList
-import PGF.CId
-import PGF.Data
-import PGF.Expr(Tree)
-import PGF.Macros
-import PGF.TypeCheck
-import Debug.Trace
-
-parse :: PGF -> Language -> Type -> [String] -> [Tree]
-parse pgf lang typ toks = loop (initState pgf lang typ) toks
- where
- loop ps [] = extractTrees ps typ
- loop ps (t:ts) = case nextState ps t of
- Left es -> []
- Right ps -> loop ps ts
-
-parseWithRecovery :: PGF -> Language -> Type -> [Type] -> [String] -> [Tree]
-parseWithRecovery pgf lang typ open_typs toks = accept (initState pgf lang typ) toks
- where
- accept ps [] = extractTrees ps typ
- accept ps (t:ts) =
- case nextState ps t of
- Right ps -> accept ps ts
- Left es -> skip (recoveryStates open_typs es) ts
-
- skip ps_map [] = extractTrees (fst ps_map) typ
- skip ps_map (t:ts) =
- case Map.lookup t (snd ps_map) of
- Just ps -> accept ps ts
- Nothing -> skip ps_map ts
-
--- | Creates an initial parsing state for a given language and
--- startup category.
-initState :: PGF -> Language -> Type -> ParseState
-initState pgf lang (DTyp _ start _) =
- let items = do
- cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
- (funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
- [] cat (productions pinfo)
- let FFun fn _ lins = functions pinfo ! funid
- (lbl,seqid) <- assocs lins
- return (Active 0 0 funid seqid args (AK cat lbl))
-
- pinfo =
- case lookParser pgf lang of
- Just pinfo -> pinfo
- _ -> error ("Unknown language: " ++ showCId lang)
-
- in PState pgf
- pinfo
- (Chart emptyAC [] emptyPC (productions pinfo) (totalCats pinfo) 0)
- (TMap.singleton [] (Set.fromList items))
-
--- | From the current state and the next token
--- 'nextState' computes a new state, where the token
--- is consumed and the current position is shifted by one.
--- If the new token cannot be accepted then an error state
--- is returned.
-nextState :: ParseState -> String -> Either ErrorState ParseState
-nextState (PState pgf pinfo chart items) t =
- let (mb_agenda,map_items) = TMap.decompose items
- agenda = maybe [] Set.toList mb_agenda
- acc = fromMaybe TMap.empty (Map.lookup t map_items)
- (acc1,chart1) = process (Just t) add (sequences pinfo) (functions pinfo) agenda acc chart
- chart2 = chart1{ active =emptyAC
- , actives=active chart1 : actives chart1
- , passive=emptyPC
- , offset =offset chart1+1
- }
- in if TMap.null acc1
- then Left (EState pgf pinfo chart2)
- else Right (PState pgf pinfo chart2 acc1)
- where
- add (tok:toks) item acc
- | tok == t = TMap.insertWith Set.union toks (Set.singleton item) acc
- add _ item acc = acc
-
--- | If the next token is not known but only its prefix (possible empty prefix)
--- then the 'getCompletions' function can be used to calculate the possible
--- next words and the consequent states. This is used for word completions in
--- the GF interpreter.
-getCompletions :: ParseState -> String -> Map.Map String ParseState
-getCompletions (PState pgf pinfo chart items) w =
- let (mb_agenda,map_items) = TMap.decompose items
- agenda = maybe [] Set.toList mb_agenda
- acc = Map.filterWithKey (\tok _ -> isPrefixOf w tok) map_items
- (acc',chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda acc chart
- chart2 = chart1{ active =emptyAC
- , actives=active chart1 : actives chart1
- , passive=emptyPC
- , offset =offset chart1+1
- }
- in fmap (PState pgf pinfo chart2) acc'
- where
- add (tok:toks) item acc
- | isPrefixOf w tok = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc
- add _ item acc = acc
-
-recoveryStates :: [Type] -> ErrorState -> (ParseState, Map.Map String ParseState)
-recoveryStates open_types (EState pgf pinfo chart) =
- let open_fcats = concatMap type2fcats open_types
- agenda = foldl (complete open_fcats) [] (actives chart)
- (acc,chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda Map.empty chart
- chart2 = chart1{ active =emptyAC
- , actives=active chart1 : actives chart1
- , passive=emptyPC
- , offset =offset chart1+1
- }
- in (PState pgf pinfo chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf pinfo chart2) acc)
- where
- type2fcats (DTyp _ cat _) = fromMaybe [] (Map.lookup cat (startCats pinfo))
-
- complete open_fcats items ac =
- foldl (Set.fold (\(Active j' ppos funid seqid args keyc) ->
- (:) (Active j' (ppos+1) funid seqid args keyc)))
- items
- [set | fcat <- open_fcats, set <- lookupACByFCat fcat ac]
-
- add (tok:toks) item acc = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc
-
--- | This function extracts the list of all completed parse trees
--- that spans the whole input consumed so far. The trees are also
--- limited by the category specified, which is usually
--- the same as the startup category.
-extractTrees :: ParseState -> Type -> [Tree]
-extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) =
- nubsort [e1 | e <- exps, Right e1 <- [checkExpr pgf e ty]]
- where
- (mb_agenda,acc) = TMap.decompose items
- agenda = maybe [] Set.toList mb_agenda
- (_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) agenda () chart
-
- exps = do
- cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
- (funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
- [] cat (productions pinfo)
- let FFun fn _ lins = functions pinfo ! funid
- lbl <- indices lins
- Just fid <- [lookupPC (PK cat lbl 0) (passive st)]
- (fvs,tree) <- go Set.empty 0 (0,fid)
- guard (Set.null fvs)
- return tree
-
- go rec fcat' (d,fcat)
- | fcat < totalCats pinfo = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
- | Set.member fcat rec = mzero
- | otherwise = foldForest (\funid args trees ->
- do let FFun fn _ lins = functions pinfo ! funid
- args <- mapM (go (Set.insert fcat rec) fcat) (zip [0..] args)
- check_ho_fun fn args
- `mplus`
- trees)
- (\const _ trees ->
- return (freeVar const,const)
- `mplus`
- trees)
- [] fcat (forest st)
-
- check_ho_fun fun args
- | fun == _V = return (head args)
- | fun == _B = return (foldl1 Set.difference (map fst args), foldr (\x e -> EAbs Explicit (mkVar (snd x)) e) (snd (head args)) (tail args))
- | otherwise = return (Set.unions (map fst args),foldl (\e x -> EApp e (snd x)) (EFun fun) args)
-
- mkVar (EFun v) = v
- mkVar (EMeta _) = wildCId
-
- freeVar (EFun v) = Set.singleton v
- freeVar _ = Set.empty
-
-_B = mkCId "_B"
-_V = mkCId "_V"
-
-process mbt fn !seqs !funs [] acc chart = (acc,chart)
-process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc chart
- | inRange (bounds lin) ppos =
- case unsafeAt lin ppos of
- FSymCat d r -> let !fid = args !! d
- key = AK fid r
-
- items2 = case lookupPC (mkPK key k) (passive chart) of
- Nothing -> items
- Just id -> (Active j (ppos+1) funid seqid (updateAt d id args) key0) : items
- items3 = foldForest (\funid args items -> Active k 0 funid (rhs funid r) args key : items)
- (\_ _ items -> items)
- items2 fid (forest chart)
- in case lookupAC key (active chart) of
- Nothing -> process mbt fn seqs funs items3 acc chart{active=insertAC key (Set.singleton item) (active chart)}
- Just set | Set.member item set -> process mbt fn seqs funs items acc chart
- | otherwise -> process mbt fn seqs funs items2 acc chart{active=insertAC key (Set.insert item set) (active chart)}
- FSymKS toks -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc
- in process mbt fn seqs funs items acc' chart
- FSymKP strs vars
- -> let !acc' = foldl (\acc toks -> fn toks (Active j (ppos+1) funid seqid args key0) acc) acc
- (strs:[strs' | Alt strs' _ <- vars])
- in process mbt fn seqs funs items acc' chart
- FSymLit d r -> let !fid = args !! d
- in case [ts | FConst _ ts <- maybe [] Set.toList (IntMap.lookup fid (forest chart))] of
- (toks:_) -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc
- in process mbt fn seqs funs items acc' chart
- [] -> case litCatMatch fid mbt of
- Just (toks,lit) -> let fid' = nextId chart
- !acc' = fn toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc
- in process mbt fn seqs funs items acc' chart{forest=IntMap.insert fid' (Set.singleton (FConst lit toks)) (forest chart)
- ,nextId=nextId chart+1
- }
- Nothing -> process mbt fn seqs funs items acc chart
- | otherwise =
- case lookupPC (mkPK key0 j) (passive chart) of
- Nothing -> let fid = nextId chart
-
- items2 = case lookupAC key0 ((active chart:actives chart) !! (k-j)) of
- Nothing -> items
- Just set -> Set.fold (\(Active j' ppos funid seqid args keyc) ->
- let FSymCat d _ = unsafeAt (unsafeAt seqs seqid) ppos
- in (:) (Active j' (ppos+1) funid seqid (updateAt d fid args) keyc)) items set
- in process mbt fn seqs funs items2 acc chart{passive=insertPC (mkPK key0 j) fid (passive chart)
- ,forest =IntMap.insert fid (Set.singleton (FApply funid args)) (forest chart)
- ,nextId =nextId chart+1
- }
- Just id -> let items2 = [Active k 0 funid (rhs funid r) args (AK id r) | r <- labelsAC id (active chart)] ++ items
- in process mbt fn seqs funs items2 acc chart{forest = IntMap.insertWith Set.union id (Set.singleton (FApply funid args)) (forest chart)}
- where
- !lin = unsafeAt seqs seqid
- !k = offset chart
-
- mkPK (AK fid lbl) j = PK fid lbl j
-
- rhs funid lbl = unsafeAt lins lbl
- where
- FFun _ _ lins = unsafeAt funs funid
-
-
-updateAt :: Int -> a -> [a] -> [a]
-updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
-
-litCatMatch fcat (Just t)
- | fcat == fcatString = Just ([t],ELit (LStr t))
- | fcat == fcatInt = case reads t of {[(n,"")] -> Just ([t],ELit (LInt n));
- _ -> Nothing }
- | fcat == fcatFloat = case reads t of {[(d,"")] -> Just ([t],ELit (LFlt d));
- _ -> Nothing }
- | fcat == fcatVar = Just ([t],EFun (mkCId t))
-litCatMatch _ _ = Nothing
-
-
-----------------------------------------------------------------
--- Active Chart
-----------------------------------------------------------------
-
-data Active
- = Active {-# UNPACK #-} !Int
- {-# UNPACK #-} !FPointPos
- {-# UNPACK #-} !FunId
- {-# UNPACK #-} !SeqId
- [FCat]
- {-# UNPACK #-} !ActiveKey
- deriving (Eq,Show,Ord)
-data ActiveKey
- = AK {-# UNPACK #-} !FCat
- {-# UNPACK #-} !FIndex
- deriving (Eq,Ord,Show)
-type ActiveChart = IntMap.IntMap (IntMap.IntMap (Set.Set Active))
-
-emptyAC :: ActiveChart
-emptyAC = IntMap.empty
-
-lookupAC :: ActiveKey -> ActiveChart -> Maybe (Set.Set Active)
-lookupAC (AK fcat l) chart = IntMap.lookup fcat chart >>= IntMap.lookup l
-
-lookupACByFCat :: FCat -> ActiveChart -> [Set.Set Active]
-lookupACByFCat fcat chart =
- case IntMap.lookup fcat chart of
- Nothing -> []
- Just map -> IntMap.elems map
-
-labelsAC :: FCat -> ActiveChart -> [FIndex]
-labelsAC fcat chart =
- case IntMap.lookup fcat chart of
- Nothing -> []
- Just map -> IntMap.keys map
-
-insertAC :: ActiveKey -> Set.Set Active -> ActiveChart -> ActiveChart
-insertAC (AK fcat l) set chart = IntMap.insertWith IntMap.union fcat (IntMap.singleton l set) chart
-
-
-----------------------------------------------------------------
--- Passive Chart
-----------------------------------------------------------------
-
-data PassiveKey
- = PK {-# UNPACK #-} !FCat
- {-# UNPACK #-} !FIndex
- {-# UNPACK #-} !Int
- deriving (Eq,Ord,Show)
-
-type PassiveChart = Map.Map PassiveKey FCat
-
-emptyPC :: PassiveChart
-emptyPC = Map.empty
-
-lookupPC :: PassiveKey -> PassiveChart -> Maybe FCat
-lookupPC key chart = Map.lookup key chart
-
-insertPC :: PassiveKey -> FCat -> PassiveChart -> PassiveChart
-insertPC key fcat chart = Map.insert key fcat chart
-
-
-----------------------------------------------------------------
--- Forest
-----------------------------------------------------------------
-
-foldForest :: (FunId -> [FCat] -> b -> b) -> (Expr -> [String] -> b -> b) -> b -> FCat -> IntMap.IntMap (Set.Set Production) -> b
-foldForest f g b fcat forest =
- case IntMap.lookup fcat forest of
- Nothing -> b
- Just set -> Set.fold foldProd b set
- where
- foldProd (FCoerce fcat) b = foldForest f g b fcat forest
- foldProd (FApply funid args) b = f funid args b
- foldProd (FConst const toks) b = g const toks b
-
-
-----------------------------------------------------------------
--- Parse State
-----------------------------------------------------------------
-
--- | An abstract data type whose values represent
--- the current state in an incremental parser.
-data ParseState = PState PGF ParserInfo Chart (TMap.TrieMap String (Set.Set Active))
-
-data Chart
- = Chart
- { active :: ActiveChart
- , actives :: [ActiveChart]
- , passive :: PassiveChart
- , forest :: IntMap.IntMap (Set.Set Production)
- , nextId :: {-# UNPACK #-} !FCat
- , offset :: {-# UNPACK #-} !Int
- }
- deriving Show
-
-----------------------------------------------------------------
--- Error State
-----------------------------------------------------------------
-
--- | An abstract data type whose values represent
--- the state in an incremental parser after an error.
-data ErrorState = EState PGF ParserInfo Chart
diff --git a/src/PGF/Parsing/FCFG/Utilities.hs b/src/PGF/Parsing/FCFG/Utilities.hs deleted file mode 100644 index dc0b2dc4a..000000000 --- a/src/PGF/Parsing/FCFG/Utilities.hs +++ /dev/null @@ -1,188 +0,0 @@ ----------------------------------------------------------------------- --- | --- Maintainer : PL --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: 2005/05/13 12:40:19 $ --- > CVS $Author: peb $ --- > CVS $Revision: 1.6 $ --- --- Basic type declarations and functions for grammar formalisms ------------------------------------------------------------------------------ - - -module PGF.Parsing.FCFG.Utilities where - -import Control.Monad -import Data.Array -import Data.List (groupBy) - -import PGF.CId -import PGF.Data -import PGF.Tree -import GF.Data.Assoc -import GF.Data.Utilities (sameLength, foldMerge, splitBy) - - ------------------------------------------------------------- --- ranges as single pairs - -type RangeRec = [Range] - -data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int - | EmptyRange - deriving (Eq, Ord, Show) - -makeRange :: Int -> Int -> Range -makeRange = Range - -concatRange :: Range -> Range -> [Range] -concatRange EmptyRange rng = return rng -concatRange rng EmptyRange = return rng -concatRange (Range i j) (Range j' k) = [Range i k | j==j'] - -minRange :: Range -> Int -minRange (Range i j) = i - -maxRange :: Range -> Int -maxRange (Range i j) = j - - ------------------------------------------------------------- --- * representaions of input tokens - -data Input t = MkInput { inputBounds :: (Int, Int), - inputToken :: Assoc t [Range] - } - -input :: Ord t => [t] -> Input t -input toks = MkInput inBounds inToken - where - inBounds = (0, length toks) - inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,tok) <- zip3 [0..] [1..] toks ] - -inputMany :: Ord t => [[t]] -> Input t -inputMany toks = MkInput inBounds inToken - where - inBounds = (0, length toks) - inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,ts) <- zip3 [0..] [1..] toks, tok <- ts ] - - ------------------------------------------------------------- --- * representations of syntactical analyses - --- ** charts as finite maps over edges - --- | The values of the chart, a list of key-daughters pairs, --- has unique keys. In essence, it is a map from 'n' to daughters. --- The daughters should be a set (not necessarily sorted) of rhs's. -type SyntaxChart n e = Assoc e [SyntaxNode n [e]] - -data SyntaxNode n e = SMeta - | SNode n [e] - | SString String - | SInt Integer - | SFloat Double - deriving (Eq,Ord,Show) - -groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]] -groupSyntaxNodes [] = [] -groupSyntaxNodes (SNode n0 es0:xs) = (SNode n0 (es0:ess)) : groupSyntaxNodes xs' - where - (ess,xs') = span xs - - span [] = ([],[]) - span xs@(SNode n es:xs') - | n0 == n = let (ess,xs) = span xs' in (es:ess,xs) - | otherwise = ([],xs) -groupSyntaxNodes (SString s:xs) = (SString s) : groupSyntaxNodes xs -groupSyntaxNodes (SInt n:xs) = (SInt n) : groupSyntaxNodes xs -groupSyntaxNodes (SFloat f:xs) = (SFloat f) : groupSyntaxNodes xs - --- ** syntax forests - -data SyntaxForest n = FMeta - | FNode n [[SyntaxForest n]] - -- ^ The outer list should be a set (not necessarily sorted) - -- of possible alternatives. Ie. the outer list - -- is a disjunctive node, and the inner lists - -- are (conjunctive) concatenative nodes - | FString String - | FInt Integer - | FFloat Double - deriving (Eq, Ord, Show) - -instance Functor SyntaxForest where - fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests - fmap _ (FString s) = FString s - fmap _ (FInt n) = FInt n - fmap _ (FFloat f) = FFloat f - fmap _ (FMeta) = FMeta - -forestName :: SyntaxForest n -> Maybe n -forestName (FNode n _) = Just n -forestName _ = Nothing - -unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n) -unifyManyForests = foldM unifyForests FMeta - --- | two forests can be unified, if either is 'FMeta', or both have the same parent, --- and all children can be unified -unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n) -unifyForests FMeta forest = return forest -unifyForests forest FMeta = return forest -unifyForests (FNode name1 children1) (FNode name2 children2) - | name1 == name2 && not (null children) = return $ FNode name1 children - where children = [ forests | forests1 <- children1, forests2 <- children2, - sameLength forests1 forests2, - forests <- zipWithM unifyForests forests1 forests2 ] -unifyForests (FString s1) (FString s2) - | s1 == s2 = return $ FString s1 -unifyForests (FInt n1) (FInt n2) - | n1 == n2 = return $ FInt n1 -unifyForests (FFloat f1) (FFloat f2) - | f1 == f2 = return $ FFloat f1 -unifyForests _ _ = fail "forest unification failure" - - --- ** conversions between representations - -chart2forests :: (Ord n, Ord e) => - SyntaxChart n e -- ^ The complete chart - -> (e -> Bool) -- ^ When is an edge 'FMeta'? - -> [e] -- ^ The starting edges - -> [SyntaxForest n] -- ^ The result has unique keys, ie. all 'n' are joined together. - -- In essence, the result is a map from 'n' to forest daughters -chart2forests chart isMeta = concatMap (edge2forests []) - where edge2forests edges edge - | isMeta edge = [FMeta] - | edge `elem` edges = [] - | otherwise = map (item2forest (edge:edges)) $ chart ? edge - item2forest edges (SMeta) = FMeta - item2forest edges (SNode name children) = - FNode name $ children >>= mapM (edge2forests edges) - item2forest edges (SString s) = FString s - item2forest edges (SInt n) = FInt n - item2forest edges (SFloat f) = FFloat f - - -applyProfileToForest :: SyntaxForest (CId,[Profile]) -> [SyntaxForest CId] -applyProfileToForest (FNode (fun,profiles) children) - | fun == wildCId = concat chForests - | otherwise = [ FNode fun chForests | not (null chForests) ] - where chForests = concat [ mapM (unifyManyForests . map (forests !!)) profiles | - forests0 <- children, - forests <- mapM applyProfileToForest forests0 ] -applyProfileToForest (FString s) = [FString s] -applyProfileToForest (FInt n) = [FInt n] -applyProfileToForest (FFloat f) = [FFloat f] -applyProfileToForest (FMeta) = [FMeta] - - -forest2trees :: SyntaxForest CId -> [Tree] -forest2trees (FNode n forests) = map (Fun n) $ forests >>= mapM forest2trees -forest2trees (FString s) = [Lit (LStr s)] -forest2trees (FInt n) = [Lit (LInt n)] -forest2trees (FFloat f) = [Lit (LFlt f)] -forest2trees (FMeta) = [Meta 0] diff --git a/src/PGF/ShowLinearize.hs b/src/PGF/ShowLinearize.hs deleted file mode 100644 index dd3b997a6..000000000 --- a/src/PGF/ShowLinearize.hs +++ /dev/null @@ -1,113 +0,0 @@ -module PGF.ShowLinearize ( - collectWords, - tableLinearize, - recordLinearize, - termLinearize, - tabularLinearize, - allLinearize, - markLinearize - ) where - -import PGF.CId -import PGF.Data -import PGF.Tree -import PGF.Macros -import PGF.Linearize - -import GF.Data.Operations -import Data.List -import qualified Data.Map as Map - --- printing linearizations in different ways with source parameters - --- internal representation, only used internally in this module -data Record = - RR [(String,Record)] - | RT [(String,Record)] - | RFV [Record] - | RS String - | RCon String - -prRecord :: Record -> String -prRecord = prr where - prr t = case t of - RR fs -> concat $ - "{" : - (intersperse ";" (map (\ (l,v) -> unwords [l,"=", prr v]) fs)) ++ ["}"] - RT fs -> concat $ - "table {" : - (intersperse ";" (map (\ (l,v) -> unwords [l,"=>",prr v]) fs)) ++ ["}"] - RFV ts -> concat $ - "variants {" : (intersperse ";" (map prr ts)) ++ ["}"] - RS s -> prQuotedString s - RCon s -> s - --- uses the encoding of record types in PGF.paramlincat -mkRecord :: Term -> Term -> Record -mkRecord typ trm = case (typ,trm) of - (_, FV ts) -> RFV $ map (mkRecord typ) ts - (R rs, R ts) -> RR [(str lab, mkRecord ty t) | (P lab ty, t) <- zip rs ts] - (S [FV ps,ty],R ts) -> RT [(str par, mkRecord ty t) | (par, t) <- zip ps ts] - (_,W s (R ts)) -> mkRecord typ (R [K (KS (s ++ u)) | K (KS u) <- ts]) - (FV ps, C i) -> RCon $ str $ ps !! i - (S [], _) -> case realizes trm of - [s] -> RS s - ss -> RFV $ map RS ss - _ -> RS $ show trm ---- printTree trm - where - str = realize - --- show all branches, without labels and params -allLinearize :: (String -> String) -> PGF -> CId -> Expr -> String -allLinearize unlex pgf lang = concat . map (unlex . pr) . tabularLinearize pgf lang where - pr (p,vs) = unlines vs - --- show all branches, with labels and params -tableLinearize :: (String -> String) -> PGF -> CId -> Expr -> String -tableLinearize unlex pgf lang = unlines . map pr . tabularLinearize pgf lang where - pr (p,vs) = p +++ ":" +++ unwords (intersperse "|" (map unlex vs)) - --- create a table from labels+params to variants -tabularLinearize :: PGF -> CId -> Expr -> [(String,[String])] -tabularLinearize pgf lang = branches . recLinearize pgf lang where - branches r = case r of - RR fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t] - RT fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t] - RFV rs -> concatMap branches rs - RS s -> [([], [s])] - RCon _ -> [] - --- show record in GF-source-like syntax -recordLinearize :: PGF -> CId -> Expr -> String -recordLinearize pgf lang = prRecord . recLinearize pgf lang - --- create a GF-like record, forming the basis of all functions above -recLinearize :: PGF -> CId -> Expr -> Record -recLinearize pgf lang tree = mkRecord typ $ linTree pgf lang tree where - typ = case expr2tree tree of - Fun f _ -> lookParamLincat pgf lang $ valCat $ lookType pgf f - --- show PGF term -termLinearize :: PGF -> CId -> Expr -> String -termLinearize pgf lang = show . linTree pgf lang - --- show bracketed markup with references to tree structure -markLinearize :: PGF -> CId -> Expr -> String -markLinearize pgf lang = concat . take 1 . linearizesMark pgf lang - - --- for Morphology: word, lemma, tags -collectWords :: PGF -> Language -> [(String, [(CId,String)])] -collectWords pgf lang = - concatMap collOne - [(f,c,0) | (f,(DTyp [] c _,_,_)) <- Map.toList $ funs $ abstract pgf] - where - collOne (f,c,i) = - fromRec f [showCId c] (recLinearize pgf lang (foldl EApp (EFun f) (replicate i (EMeta 888)))) - fromRec f v r = case r of - RR rs -> concat [fromRec f v t | (_,t) <- rs] - RT rs -> concat [fromRec f (p:v) t | (p,t) <- rs] - RFV rs -> concatMap (fromRec f v) rs - RS s -> [(s,[(f,unwords (reverse v))])] - RCon c -> [] ---- inherent - diff --git a/src/PGF/Tree.hs b/src/PGF/Tree.hs deleted file mode 100644 index cb2052cd7..000000000 --- a/src/PGF/Tree.hs +++ /dev/null @@ -1,71 +0,0 @@ -module PGF.Tree - ( Tree(..), - tree2expr, expr2tree, - prTree - ) where - -import PGF.CId -import PGF.Expr hiding (Tree) - -import Data.Char -import Data.List as List -import Control.Monad -import qualified Text.PrettyPrint as PP -import qualified Text.ParserCombinators.ReadP as RP - --- | The tree is an evaluated expression in the abstract syntax --- of the grammar. The type is especially restricted to not --- allow unapplied lambda abstractions. The tree is used directly --- from the linearizer and is produced directly from the parser. -data Tree = - Abs [(BindType,CId)] Tree -- ^ lambda abstraction. The list of variables is non-empty - | Var CId -- ^ variable - | Fun CId [Tree] -- ^ function application - | Lit Literal -- ^ literal - | Meta {-# UNPACK #-} !MetaId -- ^ meta variable - deriving (Eq, Ord) - ------------------------------------------------------ --- Conversion Expr <-> Tree ------------------------------------------------------ - --- | Converts a tree to expression. The conversion --- is always total, every tree is a valid expression. -tree2expr :: Tree -> Expr -tree2expr = tree2expr [] - where - tree2expr ys (Fun x ts) = foldl EApp (EFun x) (List.map (tree2expr ys) ts) - tree2expr ys (Lit l) = ELit l - tree2expr ys (Meta n) = EMeta n - tree2expr ys (Abs xs t) = foldr (\(b,x) e -> EAbs b x e) (tree2expr (List.map snd (reverse xs)++ys) t) xs - tree2expr ys (Var x) = case List.lookup x (zip ys [0..]) of - Just i -> EVar i - Nothing -> error "unknown variable" - --- | Converts an expression to tree. The conversion is only partial. --- Variables and meta variables of function type and beta redexes are not allowed. -expr2tree :: Expr -> Tree -expr2tree e = abs [] [] e - where - abs ys xs (EAbs b x e) = abs ys ((b,x):xs) e - abs ys xs (ETyped e _) = abs ys xs e - abs ys xs e = case xs of - [] -> app ys [] e - xs -> Abs (reverse xs) (app (map snd xs++ys) [] e) - - app xs as (EApp e1 e2) = app xs ((abs xs [] e2) : as) e1 - app xs as (ELit l) - | List.null as = Lit l - | otherwise = error "literal of function type encountered" - app xs as (EMeta n) - | List.null as = Meta n - | otherwise = error "meta variables of function type are not allowed in trees" - app xs as (EAbs _ x e) = error "beta redexes are not allowed in trees" - app xs as (EVar i) = Var (xs !! i) - app xs as (EFun f) = Fun f as - app xs as (ETyped e _) = app xs as e - - -prTree :: Tree -> String -prTree = showExpr [] . tree2expr - diff --git a/src/PGF/Type.hs b/src/PGF/Type.hs deleted file mode 100644 index 013754a45..000000000 --- a/src/PGF/Type.hs +++ /dev/null @@ -1,103 +0,0 @@ -module PGF.Type ( Type(..), Hypo,
- readType, showType,
- mkType, mkHypo, mkDepHypo, mkImplHypo,
- pType, ppType, ppHypo ) where
-
-import PGF.CId
-import {-# SOURCE #-} PGF.Expr
-import Data.Char
-import Data.List
-import qualified Text.PrettyPrint as PP
-import qualified Text.ParserCombinators.ReadP as RP
-import Control.Monad
-
--- | To read a type from a 'String', use 'readType'.
-data Type =
- DTyp [Hypo] CId [Expr]
- deriving (Eq,Ord,Show)
-
--- | 'Hypo' represents a hypothesis in a type i.e. in the type A -> B, A is the hypothesis
-type Hypo = (BindType,CId,Type)
-
--- | Reads a 'Type' from a 'String'.
-readType :: String -> Maybe Type
-readType s = case [x | (x,cs) <- RP.readP_to_S pType s, all isSpace cs] of
- [x] -> Just x
- _ -> Nothing
-
--- | renders type as 'String'. The list
--- of identifiers is the list of all free variables
--- in the expression in order reverse to the order
--- of binding.
-showType :: [CId] -> Type -> String
-showType vars = PP.render . ppType 0 vars
-
--- | creates a type from list of hypothesises, category and
--- list of arguments for the category. The operation
--- @mkType [h_1,...,h_n] C [e_1,...,e_m]@ will create
--- @h_1 -> ... -> h_n -> C e_1 ... e_m@
-mkType :: [Hypo] -> CId -> [Expr] -> Type
-mkType hyps cat args = DTyp hyps cat args
-
--- | creates hypothesis for non-dependent type i.e. A
-mkHypo :: Type -> Hypo
-mkHypo ty = (Explicit,wildCId,ty)
-
--- | creates hypothesis for dependent type i.e. (x : A)
-mkDepHypo :: CId -> Type -> Hypo
-mkDepHypo x ty = (Explicit,x,ty)
-
--- | creates hypothesis for dependent type with implicit argument i.e. ({x} : A)
-mkImplHypo :: CId -> Type -> Hypo
-mkImplHypo x ty = (Implicit,x,ty)
-
-pType :: RP.ReadP Type
-pType = do
- RP.skipSpaces
- hyps <- RP.sepBy (pHypo >>= \h -> RP.skipSpaces >> RP.string "->" >> return h) RP.skipSpaces
- RP.skipSpaces
- (cat,args) <- pAtom
- return (DTyp (concat hyps) cat args)
- where
- pHypo =
- do (cat,args) <- pAtom
- return [(Explicit,wildCId,DTyp [] cat args)]
- RP.<++
- (RP.between (RP.char '(') (RP.char ')') $ do
- xs <- RP.option [(Explicit,wildCId)] $ do
- xs <- pBinds
- RP.skipSpaces
- RP.char ':'
- return xs
- ty <- pType
- return [(b,v,ty) | (b,v) <- xs])
- RP.<++
- (RP.between (RP.char '{') (RP.char '}') $ do
- vs <- RP.sepBy1 (RP.skipSpaces >> pCId) (RP.skipSpaces >> RP.char ',')
- RP.skipSpaces
- RP.char ':'
- ty <- pType
- return [(Implicit,v,ty) | v <- vs])
-
- pAtom = do
- cat <- pCId
- RP.skipSpaces
- args <- RP.sepBy pArg RP.skipSpaces
- return (cat, args)
-
-ppType :: Int -> [CId] -> Type -> PP.Doc
-ppType d scope (DTyp hyps cat args)
- | null hyps = ppRes scope cat args
- | otherwise = let (scope',hdocs) = mapAccumL ppHypo scope hyps
- in ppParens (d > 0) (foldr (\hdoc doc -> hdoc PP.<+> PP.text "->" PP.<+> doc) (ppRes scope' cat args) hdocs)
- where
- ppRes scope cat es = ppCId cat PP.<+> PP.hsep (map (ppExpr 4 scope) es)
-
-ppHypo scope (Explicit,x,typ) = if x == wildCId
- then (scope,ppType 1 scope typ)
- else let y = freshName x scope
- in (y:scope,PP.parens (ppCId y PP.<+> PP.char ':' PP.<+> ppType 0 scope typ))
-ppHypo scope (Implicit,x,typ) = if x == wildCId
- then (scope,PP.parens (PP.braces (ppCId x) PP.<+> PP.char ':' PP.<+> ppType 0 scope typ))
- else let y = freshName x scope
- in (y:scope,PP.parens (PP.braces (ppCId y) PP.<+> PP.char ':' PP.<+> ppType 0 scope typ))
diff --git a/src/PGF/TypeCheck.hs b/src/PGF/TypeCheck.hs deleted file mode 100644 index 937c21786..000000000 --- a/src/PGF/TypeCheck.hs +++ /dev/null @@ -1,524 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : PGF.TypeCheck --- Maintainer : Krasimir Angelov --- Stability : (stable) --- Portability : (portable) --- --- Type checking in abstract syntax with dependent types. --- The type checker also performs renaming and checking for unknown --- functions. The variable references are replaced by de Bruijn indices. --- ------------------------------------------------------------------------------ - -module PGF.TypeCheck (checkType, checkExpr, inferExpr, - - ppTcError, TcError(..) - ) where - -import PGF.Data -import PGF.Expr -import PGF.Macros (typeOfHypo) -import PGF.CId - -import Data.Map as Map -import Data.IntMap as IntMap -import Data.Maybe as Maybe -import Data.List as List -import Control.Monad -import Text.PrettyPrint - ------------------------------------------------------ --- The Scope ------------------------------------------------------ - -data TType = TTyp Env Type -newtype Scope = Scope [(CId,TType)] - -emptyScope = Scope [] - -addScopedVar :: CId -> TType -> Scope -> Scope -addScopedVar x tty (Scope gamma) = Scope ((x,tty):gamma) - --- | returns the type and the De Bruijn index of a local variable -lookupVar :: CId -> Scope -> Maybe (Int,TType) -lookupVar x (Scope gamma) = listToMaybe [(i,tty) | ((y,tty),i) <- zip gamma [0..], x == y] - --- | returns the type and the name of a local variable -getVar :: Int -> Scope -> (CId,TType) -getVar i (Scope gamma) = gamma !! i - -scopeEnv :: Scope -> Env -scopeEnv (Scope gamma) = let n = length gamma - in [VGen (n-i-1) [] | i <- [0..n-1]] - -scopeVars :: Scope -> [CId] -scopeVars (Scope gamma) = List.map fst gamma - -scopeSize :: Scope -> Int -scopeSize (Scope gamma) = length gamma - ------------------------------------------------------ --- The Monad ------------------------------------------------------ - -type MetaStore = IntMap MetaValue -data MetaValue - = MUnbound Scope [Expr -> TcM ()] - | MBound Expr - | MGuarded Expr [Expr -> TcM ()] {-# UNPACK #-} !Int -- the Int is the number of constraints that have to be solved - -- to unlock this meta variable - -newtype TcM a = TcM {unTcM :: Abstr -> MetaId -> MetaStore -> TcResult a} -data TcResult a - = Ok {-# UNPACK #-} !MetaId MetaStore a - | Fail TcError - -instance Monad TcM where - return x = TcM (\abstr metaid ms -> Ok metaid ms x) - f >>= g = TcM (\abstr metaid ms -> case unTcM f abstr metaid ms of - Ok metaid ms x -> unTcM (g x) abstr metaid ms - Fail e -> Fail e) - -instance Functor TcM where - fmap f x = TcM (\abstr metaid ms -> case unTcM x abstr metaid ms of - Ok metaid ms x -> Ok metaid ms (f x) - Fail e -> Fail e) - -lookupCatHyps :: CId -> TcM [Hypo] -lookupCatHyps cat = TcM (\abstr metaid ms -> case Map.lookup cat (cats abstr) of - Just hyps -> Ok metaid ms hyps - Nothing -> Fail (UnknownCat cat)) - -lookupFunType :: CId -> TcM TType -lookupFunType fun = TcM (\abstr metaid ms -> case Map.lookup fun (funs abstr) of - Just (ty,_,_) -> Ok metaid ms (TTyp [] ty) - Nothing -> Fail (UnknownFun fun)) - -newMeta :: Scope -> TcM MetaId -newMeta scope = TcM (\abstr metaid ms -> Ok (metaid+1) (IntMap.insert metaid (MUnbound scope []) ms) metaid) - -newGuardedMeta :: Scope -> Expr -> TcM MetaId -newGuardedMeta scope e = getFuns >>= \funs -> TcM (\abstr metaid ms -> Ok (metaid+1) (IntMap.insert metaid (MGuarded e [] 0) ms) metaid) - -getMeta :: MetaId -> TcM MetaValue -getMeta i = TcM (\abstr metaid ms -> Ok metaid ms $! case IntMap.lookup i ms of - Just mv -> mv) -setMeta :: MetaId -> MetaValue -> TcM () -setMeta i mv = TcM (\abstr metaid ms -> Ok metaid (IntMap.insert i mv ms) ()) - -tcError :: TcError -> TcM a -tcError e = TcM (\abstr metaid ms -> Fail e) - -getFuns :: TcM Funs -getFuns = TcM (\abstr metaid ms -> Ok metaid ms (funs abstr)) - -addConstraint :: MetaId -> MetaId -> Env -> [Value] -> (Value -> TcM ()) -> TcM () -addConstraint i j env vs c = do - funs <- getFuns - mv <- getMeta j - case mv of - MUnbound scope cs -> addRef >> setMeta j (MUnbound scope ((\e -> release >> c (apply funs env e vs)) : cs)) - MBound e -> c (apply funs env e vs) - MGuarded e cs x | x == 0 -> c (apply funs env e vs) - | otherwise -> addRef >> setMeta j (MGuarded e ((\e -> release >> c (apply funs env e vs)) : cs) x) - where - addRef = TcM (\abstr metaid ms -> case IntMap.lookup i ms of - Just (MGuarded e cs x) -> Ok metaid (IntMap.insert i (MGuarded e cs (x+1)) ms) ()) - - release = TcM (\abstr metaid ms -> case IntMap.lookup i ms of - Just (MGuarded e cs x) -> if x == 1 - then unTcM (sequence_ [c e | c <- cs]) abstr metaid (IntMap.insert i (MGuarded e [] 0) ms) - else Ok metaid (IntMap.insert i (MGuarded e cs (x-1)) ms) ()) - ------------------------------------------------------ --- Type errors ------------------------------------------------------ - --- | If an error occurs in the typechecking phase --- the type checker returns not a plain text error message --- but a 'TcError' structure which describes the error. -data TcError - = UnknownCat CId -- ^ Unknown category name was found. - | UnknownFun CId -- ^ Unknown function name was found. - | WrongCatArgs [CId] Type CId Int Int -- ^ A category was applied to wrong number of arguments. - -- The first integer is the number of expected arguments and - -- the second the number of given arguments. - -- The @[CId]@ argument is the list of free variables - -- in the type. It should be used for the 'showType' function. - | TypeMismatch [CId] Expr Type Type -- ^ The expression is not of the expected type. - -- The first type is the expected type, while - -- the second is the inferred. The @[CId]@ argument is the list - -- of free variables in both the expression and the type. - -- It should be used for the 'showType' and 'showExpr' functions. - | NotFunType [CId] Expr Type -- ^ Something that is not of function type was applied to an argument. - | CannotInferType [CId] Expr -- ^ It is not possible to infer the type of an expression. - | UnresolvedMetaVars [CId] Expr [MetaId] -- ^ Some metavariables have to be instantiated in order to complete the typechecking. - | UnexpectedImplArg [CId] Expr -- ^ Implicit argument was passed where the type doesn't allow it - --- | Renders the type checking error to a document. See 'Text.PrettyPrint'. -ppTcError :: TcError -> Doc -ppTcError (UnknownCat cat) = text "Category" <+> ppCId cat <+> text "is not in scope" -ppTcError (UnknownFun fun) = text "Function" <+> ppCId fun <+> text "is not in scope" -ppTcError (WrongCatArgs xs ty cat m n) = text "Category" <+> ppCId cat <+> text "should have" <+> int m <+> text "argument(s), but has been given" <+> int n $$ - text "In the type:" <+> ppType 0 xs ty -ppTcError (TypeMismatch xs e ty1 ty2) = text "Couldn't match expected type" <+> ppType 0 xs ty1 $$ - text " against inferred type" <+> ppType 0 xs ty2 $$ - text "In the expression:" <+> ppExpr 0 xs e -ppTcError (NotFunType xs e ty) = text "A function type is expected for the expression" <+> ppExpr 0 xs e <+> text "instead of type" <+> ppType 0 xs ty -ppTcError (CannotInferType xs e) = text "Cannot infer the type of expression" <+> ppExpr 0 xs e -ppTcError (UnresolvedMetaVars xs e ms) = text "Meta variable(s)" <+> fsep (List.map ppMeta ms) <+> text "should be resolved" $$ - text "in the expression:" <+> ppExpr 0 xs e -ppTcError (UnexpectedImplArg xs e) = braces (ppExpr 0 xs e) <+> text "is implicit argument but not implicit argument is expected here" - ------------------------------------------------------ --- checkType ------------------------------------------------------ - --- | Check whether a given type is consistent with the abstract --- syntax of the grammar. -checkType :: PGF -> Type -> Either TcError Type -checkType pgf ty = - case unTcM (tcType emptyScope ty >>= refineType) (abstract pgf) 0 IntMap.empty of - Ok _ ms ty -> Right ty - Fail err -> Left err - -tcType :: Scope -> Type -> TcM Type -tcType scope ty@(DTyp hyps cat es) = do - (scope,hyps) <- tcHypos scope hyps - c_hyps <- lookupCatHyps cat - let m = length es - n = length [ty | (Explicit,x,ty) <- c_hyps] - (delta,es) <- tcCatArgs scope es [] c_hyps ty n m - return (DTyp hyps cat es) - -tcHypos :: Scope -> [Hypo] -> TcM (Scope,[Hypo]) -tcHypos scope [] = return (scope,[]) -tcHypos scope (h:hs) = do - (scope,h ) <- tcHypo scope h - (scope,hs) <- tcHypos scope hs - return (scope,h:hs) - -tcHypo :: Scope -> Hypo -> TcM (Scope,Hypo) -tcHypo scope (b,x,ty) = do - ty <- tcType scope ty - if x == wildCId - then return (scope,(b,x,ty)) - else return (addScopedVar x (TTyp (scopeEnv scope) ty) scope,(b,x,ty)) - -tcCatArgs scope [] delta [] ty0 n m = return (delta,[]) -tcCatArgs scope (EImplArg e:es) delta ((Explicit,x,ty):hs) ty0 n m = tcError (UnexpectedImplArg (scopeVars scope) e) -tcCatArgs scope (EImplArg e:es) delta ((Implicit,x,ty):hs) ty0 n m = do - e <- tcExpr scope e (TTyp delta ty) - funs <- getFuns - (delta,es) <- if x == wildCId - then tcCatArgs scope es delta hs ty0 n m - else tcCatArgs scope es (eval funs (scopeEnv scope) e:delta) hs ty0 n m - return (delta,EImplArg e:es) -tcCatArgs scope es delta ((Implicit,x,ty):hs) ty0 n m = do - i <- newMeta scope - (delta,es) <- if x == wildCId - then tcCatArgs scope es delta hs ty0 n m - else tcCatArgs scope es (VMeta i (scopeEnv scope) [] : delta) hs ty0 n m - return (delta,EImplArg (EMeta i) : es) -tcCatArgs scope (e:es) delta ((Explicit,x,ty):hs) ty0 n m = do - e <- tcExpr scope e (TTyp delta ty) - funs <- getFuns - (delta,es) <- if x == wildCId - then tcCatArgs scope es delta hs ty0 n m - else tcCatArgs scope es (eval funs (scopeEnv scope) e:delta) hs ty0 n m - return (delta,e:es) -tcCatArgs scope _ delta _ ty0@(DTyp _ cat _) n m = do - tcError (WrongCatArgs (scopeVars scope) ty0 cat n m) - ------------------------------------------------------ --- checkExpr ------------------------------------------------------ - --- | Checks an expression against a specified type. -checkExpr :: PGF -> Expr -> Type -> Either TcError Expr -checkExpr pgf e ty = - case unTcM (do e <- tcExpr emptyScope e (TTyp [] ty) - e <- refineExpr e - checkResolvedMetaStore emptyScope e - return e) (abstract pgf) 0 IntMap.empty of - Ok _ ms e -> Right e - Fail err -> Left err - -tcExpr :: Scope -> Expr -> TType -> TcM Expr -tcExpr scope e0@(EAbs Implicit x e) tty = - case tty of - TTyp delta (DTyp ((Implicit,y,ty):hs) c es) -> do e <- if y == wildCId - then tcExpr (addScopedVar x (TTyp delta ty) scope) - e (TTyp delta (DTyp hs c es)) - else tcExpr (addScopedVar x (TTyp delta ty) scope) - e (TTyp ((VGen (scopeSize scope) []):delta) (DTyp hs c es)) - return (EAbs Implicit x e) - _ -> do ty <- evalType (scopeSize scope) tty - tcError (NotFunType (scopeVars scope) e0 ty) -tcExpr scope e0 (TTyp delta (DTyp ((Implicit,y,ty):hs) c es)) = do - e0 <- if y == wildCId - then tcExpr (addScopedVar wildCId (TTyp delta ty) scope) - e0 (TTyp delta (DTyp hs c es)) - else tcExpr (addScopedVar wildCId (TTyp delta ty) scope) - e0 (TTyp ((VGen (scopeSize scope) []):delta) (DTyp hs c es)) - return (EAbs Implicit wildCId e0) -tcExpr scope e0@(EAbs Explicit x e) tty = - case tty of - TTyp delta (DTyp ((Explicit,y,ty):hs) c es) -> do e <- if y == wildCId - then tcExpr (addScopedVar x (TTyp delta ty) scope) - e (TTyp delta (DTyp hs c es)) - else tcExpr (addScopedVar x (TTyp delta ty) scope) - e (TTyp ((VGen (scopeSize scope) []):delta) (DTyp hs c es)) - return (EAbs Explicit x e) - _ -> do ty <- evalType (scopeSize scope) tty - tcError (NotFunType (scopeVars scope) e0 ty) -tcExpr scope (EMeta _) tty = do - i <- newMeta scope - return (EMeta i) -tcExpr scope e0 tty = do - (e0,tty0) <- infExpr scope e0 - i <- newGuardedMeta scope e0 - eqType scope (scopeSize scope) i tty tty0 - return (EMeta i) - - ------------------------------------------------------ --- inferExpr ------------------------------------------------------ - --- | Tries to infer the type of a given expression. Note that --- even if the expression is type correct it is not always --- possible to infer its type in the GF type system. --- In this case the function returns the 'CannotInferType' error. -inferExpr :: PGF -> Expr -> Either TcError (Expr,Type) -inferExpr pgf e = - case unTcM (do (e,tty) <- infExpr emptyScope e - e <- refineExpr e - checkResolvedMetaStore emptyScope e - ty <- evalType 0 tty - return (e,ty)) (abstract pgf) 1 IntMap.empty of - Ok _ ms (e,ty) -> Right (e,ty) - Fail err -> Left err - -infExpr :: Scope -> Expr -> TcM (Expr,TType) -infExpr scope e0@(EApp e1 e2) = do - (e1,TTyp delta ty) <- infExpr scope e1 - (e0,delta,ty) <- tcArg scope e1 e2 delta ty - return (e0,TTyp delta ty) -infExpr scope e0@(EFun x) = do - case lookupVar x scope of - Just (i,tty) -> return (EVar i,tty) - Nothing -> do tty <- lookupFunType x - return (e0,tty) -infExpr scope e0@(EVar i) = do - return (e0,snd (getVar i scope)) -infExpr scope e0@(ELit l) = do - let cat = case l of - LStr _ -> mkCId "String" - LInt _ -> mkCId "Int" - LFlt _ -> mkCId "Float" - return (e0,TTyp [] (DTyp [] cat [])) -infExpr scope (ETyped e ty) = do - ty <- tcType scope ty - e <- tcExpr scope e (TTyp (scopeEnv scope) ty) - return (ETyped e ty,TTyp (scopeEnv scope) ty) -infExpr scope (EImplArg e) = do - (e,tty) <- infExpr scope e - return (EImplArg e,tty) -infExpr scope e = tcError (CannotInferType (scopeVars scope) e) - -tcArg scope e1 e2 delta ty0@(DTyp [] c es) = do - ty1 <- evalType (scopeSize scope) (TTyp delta ty0) - tcError (NotFunType (scopeVars scope) e1 ty1) -tcArg scope e1 (EImplArg e2) delta ty0@(DTyp ((Explicit,x,ty):hs) c es) = tcError (UnexpectedImplArg (scopeVars scope) e2) -tcArg scope e1 (EImplArg e2) delta ty0@(DTyp ((Implicit,x,ty):hs) c es) = do - e2 <- tcExpr scope e2 (TTyp delta ty) - funs <- getFuns - if x == wildCId - then return (EApp e1 (EImplArg e2), delta,DTyp hs c es) - else return (EApp e1 (EImplArg e2),eval funs (scopeEnv scope) e2:delta,DTyp hs c es) -tcArg scope e1 e2 delta ty0@(DTyp ((Explicit,x,ty):hs) c es) = do - e2 <- tcExpr scope e2 (TTyp delta ty) - funs <- getFuns - if x == wildCId - then return (EApp e1 e2, delta,DTyp hs c es) - else return (EApp e1 e2,eval funs (scopeEnv scope) e2:delta,DTyp hs c es) -tcArg scope e1 e2 delta ty0@(DTyp ((Implicit,x,ty):hs) c es) = do - i <- newMeta scope - if x == wildCId - then tcArg scope (EApp e1 (EImplArg (EMeta i))) e2 delta (DTyp hs c es) - else tcArg scope (EApp e1 (EImplArg (EMeta i))) e2 (VMeta i (scopeEnv scope) [] : delta) (DTyp hs c es) - ------------------------------------------------------ --- eqType ------------------------------------------------------ - -eqType :: Scope -> Int -> MetaId -> TType -> TType -> TcM () -eqType scope k i0 tty1@(TTyp delta1 ty1@(DTyp hyps1 cat1 es1)) tty2@(TTyp delta2 ty2@(DTyp hyps2 cat2 es2)) - | cat1 == cat2 = do (k,delta1,delta2) <- eqHyps k delta1 hyps1 delta2 hyps2 - sequence_ [eqExpr k delta1 e1 delta2 e2 | (e1,e2) <- zip es1 es2] - | otherwise = raiseTypeMatchError - where - raiseTypeMatchError = do ty1 <- evalType k tty1 - ty2 <- evalType k tty2 - e <- refineExpr (EMeta i0) - tcError (TypeMismatch (scopeVars scope) e ty1 ty2) - - eqHyps :: Int -> Env -> [Hypo] -> Env -> [Hypo] -> TcM (Int,Env,Env) - eqHyps k delta1 [] delta2 [] = - return (k,delta1,delta2) - eqHyps k delta1 ((_,x,ty1) : h1s) delta2 ((_,y,ty2) : h2s) = do - eqType scope k i0 (TTyp delta1 ty1) (TTyp delta2 ty2) - if x == wildCId && y == wildCId - then eqHyps k delta1 h1s delta2 h2s - else if x /= wildCId && y /= wildCId - then eqHyps (k+1) ((VGen k []):delta1) h1s ((VGen k []):delta2) h2s - else raiseTypeMatchError - eqHyps k delta1 h1s delta2 h2s = raiseTypeMatchError - - eqExpr :: Int -> Env -> Expr -> Env -> Expr -> TcM () - eqExpr k env1 e1 env2 e2 = do - funs <- getFuns - eqValue k (eval funs env1 e1) (eval funs env2 e2) - - eqValue :: Int -> Value -> Value -> TcM () - eqValue k v1 v2 = do - v1 <- deRef v1 - v2 <- deRef v2 - eqValue' k v1 v2 - - deRef v@(VMeta i env vs) = do - mv <- getMeta i - funs <- getFuns - case mv of - MBound e -> deRef (apply funs env e vs) - MGuarded e _ x | x == 0 -> deRef (apply funs env e vs) - | otherwise -> return v - MUnbound _ _ -> return v - deRef v = return v - - eqValue' k (VSusp i env vs1 c) v2 = addConstraint i0 i env vs1 (\v1 -> eqValue k (c v1) v2) - eqValue' k v1 (VSusp i env vs2 c) = addConstraint i0 i env vs2 (\v2 -> eqValue k v1 (c v2)) - eqValue' k (VMeta i env1 vs1) (VMeta j env2 vs2) | i == j = zipWithM_ (eqValue k) vs1 vs2 - eqValue' k (VMeta i env1 vs1) v2 = do (MUnbound scopei cs) <- getMeta i - e2 <- mkLam i scopei env1 vs1 v2 - setMeta i (MBound e2) - sequence_ [c e2 | c <- cs] - eqValue' k v1 (VMeta i env2 vs2) = do (MUnbound scopei cs) <- getMeta i - e1 <- mkLam i scopei env2 vs2 v1 - setMeta i (MBound e1) - sequence_ [c e1 | c <- cs] - eqValue' k (VApp f1 vs1) (VApp f2 vs2) | f1 == f2 = zipWithM_ (eqValue k) vs1 vs2 - eqValue' k (VLit l1) (VLit l2 ) | l1 == l2 = return () - eqValue' k (VGen i vs1) (VGen j vs2) | i == j = zipWithM_ (eqValue k) vs1 vs2 - eqValue' k (VClosure env1 (EAbs _ x1 e1)) (VClosure env2 (EAbs _ x2 e2)) = let v = VGen k [] - in eqExpr (k+1) (v:env1) e1 (v:env2) e2 - eqValue' k v1 v2 = raiseTypeMatchError - - mkLam i scope env vs0 v = do - let k = scopeSize scope - vs = reverse (take k env) ++ vs0 - xs = nub [i | VGen i [] <- vs] - if length vs == length xs - then return () - else raiseTypeMatchError - v <- occurCheck i k xs v - funs <- getFuns - return (addLam vs0 (value2expr funs (length xs) v)) - where - addLam [] e = e - addLam (v:vs) e = EAbs Explicit var (addLam vs e) - - var = mkCId "v" - - occurCheck i0 k xs (VApp f vs) = do vs <- mapM (occurCheck i0 k xs) vs - return (VApp f vs) - occurCheck i0 k xs (VLit l) = return (VLit l) - occurCheck i0 k xs (VMeta i env vs) = do if i == i0 - then raiseTypeMatchError - else return () - mv <- getMeta i - funs <- getFuns - case mv of - MBound e -> occurCheck i0 k xs (apply funs env e vs) - MGuarded e _ _ -> occurCheck i0 k xs (apply funs env e vs) - MUnbound scopei _ | scopeSize scopei > k -> raiseTypeMatchError - | otherwise -> do vs <- mapM (occurCheck i0 k xs) vs - return (VMeta i env vs) - occurCheck i0 k xs (VSusp i env vs cnt) = do addConstraint i0 i env vs (\v -> occurCheck i0 k xs (cnt v) >> return ()) - return (VSusp i env vs cnt) - occurCheck i0 k xs (VGen i vs) = case List.findIndex (==i) xs of - Just i -> do vs <- mapM (occurCheck i0 k xs) vs - return (VGen i vs) - Nothing -> raiseTypeMatchError - occurCheck i0 k xs (VClosure env e) = do env <- mapM (occurCheck i0 k xs) env - return (VClosure env e) - - ------------------------------------------------------------ --- check for meta variables that still have to be resolved ------------------------------------------------------------ - -checkResolvedMetaStore :: Scope -> Expr -> TcM () -checkResolvedMetaStore scope e = TcM (\abstr metaid ms -> - let xs = [i | (i,mv) <- IntMap.toList ms, not (isResolved mv)] - in if List.null xs - then Ok metaid ms () - else Fail (UnresolvedMetaVars (scopeVars scope) e xs)) - where - isResolved (MUnbound _ []) = True - isResolved (MGuarded _ _ _) = True - isResolved (MBound _) = True - isResolved _ = False - ------------------------------------------------------ --- evalType ------------------------------------------------------ - -evalType :: Int -> TType -> TcM Type -evalType k (TTyp delta ty) = do funs <- getFuns - refineType (evalTy funs k delta ty) - where - evalTy sig k delta (DTyp hyps cat es) = - let ((k1,delta1),hyps1) = mapAccumL (evalHypo sig) (k,delta) hyps - in DTyp hyps1 cat (List.map (normalForm sig k1 delta1) es) - - evalHypo sig (k,delta) (b,x,ty) = - if x == wildCId - then ((k, delta),(b,x,evalTy sig k delta ty)) - else ((k+1,(VGen k []):delta),(b,x,evalTy sig k delta ty)) - - ------------------------------------------------------ --- refinement ------------------------------------------------------ - -refineExpr :: Expr -> TcM Expr -refineExpr e = TcM (\abstr metaid ms -> Ok metaid ms (refineExpr_ ms e)) - -refineExpr_ ms e = refine e - where - refine (EAbs b x e) = EAbs b x (refine e) - refine (EApp e1 e2) = EApp (refine e1) (refine e2) - refine (ELit l) = ELit l - refine (EMeta i) = case IntMap.lookup i ms of - Just (MBound e ) -> refine e - Just (MGuarded e _ _) -> refine e - _ -> EMeta i - refine (EFun f) = EFun f - refine (EVar i) = EVar i - refine (ETyped e ty) = ETyped (refine e) (refineType_ ms ty) - refine (EImplArg e) = EImplArg (refine e) - -refineType :: Type -> TcM Type -refineType ty = TcM (\abstr metaid ms -> Ok metaid ms (refineType_ ms ty)) - -refineType_ ms (DTyp hyps cat es) = DTyp [(b,x,refineType_ ms ty) | (b,x,ty) <- hyps] cat (List.map (refineExpr_ ms) es) - -value2expr sig i (VApp f vs) = foldl EApp (EFun f) (List.map (value2expr sig i) vs) -value2expr sig i (VGen j vs) = foldl EApp (EVar (i-j-1)) (List.map (value2expr sig i) vs) -value2expr sig i (VMeta j env vs) = foldl EApp (EMeta j) (List.map (value2expr sig i) vs) -value2expr sig i (VSusp j env vs k) = value2expr sig i (k (VGen j vs)) -value2expr sig i (VLit l) = ELit l -value2expr sig i (VClosure env (EAbs b x e)) = EAbs b x (value2expr sig (i+1) (eval sig ((VGen i []):env) e)) diff --git a/src/PGF/VisualizeTree.hs b/src/PGF/VisualizeTree.hs deleted file mode 100644 index 429551f54..000000000 --- a/src/PGF/VisualizeTree.hs +++ /dev/null @@ -1,353 +0,0 @@ ----------------------------------------------------------------------- --- | --- Module : VisualizeTree --- Maintainer : AR --- Stability : (stable) --- Portability : (portable) --- --- > CVS $Date: --- > CVS $Author: --- > CVS $Revision: --- --- Print a graph of an abstract syntax tree in Graphviz DOT format --- Based on BB's VisualizeGrammar --- FIXME: change this to use GF.Visualization.Graphviz, --- instead of rolling its own. ------------------------------------------------------------------------------ - -module PGF.VisualizeTree ( graphvizAbstractTree - , graphvizParseTree - , graphvizDependencyTree - , graphvizAlignment - , tree2mk - , getDepLabels - , PosText(..), readPosText - ) where - -import PGF.CId (CId,showCId,pCId,mkCId) -import PGF.Data -import PGF.Tree -import PGF.Expr (showExpr) -import PGF.Linearize -import PGF.Macros (lookValCat) - -import qualified Data.Map as Map -import Data.List (intersperse,nub,isPrefixOf,sort,sortBy) -import Data.Char (isDigit) -import qualified Text.ParserCombinators.ReadP as RP - -import Debug.Trace - -graphvizAbstractTree :: PGF -> (Bool,Bool) -> Expr -> String -graphvizAbstractTree pgf funscats = prGraph False . tree2graph pgf funscats . expr2tree - -tree2graph :: PGF -> (Bool,Bool) -> Tree -> [String] -tree2graph pgf (funs,cats) = prf [] where - prf ps t = let (nod,lab) = prn ps t in - (nod ++ " [label = " ++ lab ++ ", style = \"solid\", shape = \"plaintext\"] ;") : - case t of - Fun cid trees -> - [ pra (j:ps) nod t | (j,t) <- zip [0..] trees] ++ - concat [prf (j:ps) t | (j,t) <- zip [0..] trees] - Abs xs (Fun cid trees) -> - [ pra (j:ps) nod t | (j,t) <- zip [0..] trees] ++ - concat [prf (j:ps) t | (j,t) <- zip [0..] trees] - _ -> [] - prn ps t = case t of - Fun cid _ -> - let - fun = if funs then showCId cid else "" - cat = if cats then prCat cid else "" - colon = if funs && cats then " : " else "" - lab = "\"" ++ fun ++ colon ++ cat ++ "\"" - in (show(show (ps :: [Int])),lab) - Abs bs tree -> - let fun = case tree of - Fun cid _ -> Fun cid [] - _ -> tree - in (show(show (ps :: [Int])),"\"" ++ esc (prTree (Abs bs fun)) ++ "\"") - _ -> (show(show (ps :: [Int])),"\"" ++ esc (prTree t) ++ "\"") - pra i nod t = nod ++ arr ++ fst (prn i t) ++ " [style = \"solid\"];" - arr = " -- " -- if digr then " -> " else " -- " - prCat = showCId . lookValCat pgf - esc = concatMap (\c -> if c =='\\' then [c,c] else [c]) --- escape backslash in abstracts - -prGraph digr ns = concat $ map (++"\n") $ [graph ++ "{\n"] ++ ns ++ ["}"] where - graph = if digr then "digraph" else "graph" - - --- replace each non-atomic constructor with mkC, where C is the val cat -tree2mk :: PGF -> Expr -> String -tree2mk pgf = showExpr [] . tree2expr . t2m . expr2tree where - t2m t = case t of - Fun cid [] -> t - Fun cid ts -> Fun (mk cid) (map t2m ts) - _ -> t - mk = mkCId . ("mk" ++) . showCId . lookValCat pgf - --- dependency trees from Linearize.linearizeMark - -graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Expr -> String -graphvizDependencyTree format debug mlab ms pgf lang exp = case format of - "malt" -> unlines (lin2dep format) - "malt_input" -> unlines (lin2dep format) - _ -> prGraph True (lin2dep format) - - where - - lin2dep format = trace (ifd (show sortedNodes ++ show nodeWords)) $ case format of - "malt" -> map (concat . intersperse "\t") wnodes - "malt_input" -> map (concat . intersperse "\t" . take 6) wnodes - _ -> prelude ++ nodes ++ links - - ifd s = if debug then s else [] - - pot = readPosText $ head $ linearizesMark pgf lang exp - ---- use Just str if you have str to match against - - prelude = ["rankdir=LR ;", "node [shape = plaintext] ;"] - - nodes = map mkNode nodeWords - mkNode (i,((_,p),ss)) = - node p ++ " [label = \"" ++ show i ++ ". " ++ ifd (show p) ++ unwords ss ++ "\"] ;" - nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)| - ((Just f,p),w) <- wlins pot] - - links = map mkLink thelinks - thelinks = [(word y, x, label tr y x) | - (_,((f,x),_)) <- tail nodeWords, - let y = dominant x] - mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;" - node = show . show - - dominant x = case x of - [] -> x - _ | not (x == hx) -> hx - _ -> dominant (init x) - where - hx = headArg (init x) tr x - - headArg x0 tr x = case (tr,x) of - (Fun f [],[_]) -> x0 ---- ?? - (Fun f ts,[_]) -> x0 ++ [getHead (length ts - 1) f] - (Fun f ts,i:y) -> headArg x0 (ts !! i) y - _ -> x0 ---- - - label tr y x = case span (uncurry (==)) (zip y x) of - (xys,(_,i):_) -> getLabel i (funAt tr (map fst xys)) - _ -> "" ---- - - funAt tr x = case (tr,x) of - (Fun f _ ,[]) -> f - (Fun f ts,i:y) -> funAt (ts !! i) y - _ -> mkCId (prTree tr) ---- - - word x = if elem x sortedNodes then x else - let x' = headArg x tr (x ++[0]) in - if x' == x then [] else word x' - - tr = expr2tree exp - sortedNodes = [p | (_,((_,p),_)) <- nodeWords] - - labels = maybe Map.empty id mlab - getHead i f = case Map.lookup f labels of - Just ls -> length $ takeWhile (/= "head") ls - _ -> i - getLabel i f = case Map.lookup f labels of - Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i - _ -> showCId f ++ "#" ++ show i - --- to generate CoNLL format for MaltParser - nodeMap :: Map.Map [Int] Int - nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords] - - arcMap :: Map.Map [Int] ([Int],String) - arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks] - - lookDomLab p = case Map.lookup p arcMap of - Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then rootlabel else l) - _ -> (0,rootlabel) - - wnodes = [[show i, maltws ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] | - (i, ((fun,p),ws)) <- tail nodeWords, - let pos = showCId $ lookValCat pgf fun, - let morph = unspec, - let (dom,lab) = lookDomLab p - ] - maltws = concat . intersperse "+" . words . unwords -- no spaces in column 2 - unspec = "_" - rootlabel = "ROOT" - -type Labels = Map.Map CId [String] - -getDepLabels :: [String] -> Labels -getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss] - - --- parse trees from Linearize.linearizeMark ----- nubrec and domins are quadratic, but could be (n log n) - -graphvizParseTree :: PGF -> CId -> Expr -> String -graphvizParseTree pgf lang = prGraph False . lin2tree pgf . linMark where - linMark = head . linearizesMark pgf lang - ---- use Just str if you have str to match against - -lin2tree pgf s = trace s $ prelude ++ nodes ++ links where - - prelude = ["rankdir=BU ;", "node [shape = record, color = white] ;"] - - nodeRecs = zip [0..] - (nub (filter (not . null) (nlins [postext] ++ [leaves postext]))) - nlins pts = - nubrec [] $ [(p,cat f) | T (Just f, p) _ <- pts] : - concatMap nlins [ts | T _ ts <- pts] - leaves pt = [(p++[j],s) | (j,(p,s)) <- - zip [9990..] [(p,s) | ((_,p),ss) <- wlins pt, s <- ss]] - - nubrec es rs = case rs of - r:rr -> let r' = filter (not . flip elem es) (nub r) - in r' : nubrec (r' ++ es) rr - _ -> rs - - nodes = map mkStruct nodeRecs - - mkStruct (i,cs) = struct i ++ "[label = \"" ++ fields cs ++ "\"] ;" - cat = showCId . lookValCat pgf - fields cs = concat (intersperse "|" [ mtag (showp p) ++ c | (p,c) <- cs]) - struct i = "struct" ++ show i - - links = map mkEdge domins - domins = nub [((i,x),(j,y)) | - (i,xs) <- nodeRecs, (j,ys) <- nodeRecs, - x <- xs, y <- ys, dominates x y] - dominates (p,x) (q,y) = not (null q) && p == init q - mkEdge ((i,x),(j,y)) = - struct i ++ ":n" ++ uncommas (showp (fst x)) ++ ":s -- " ++ - struct j ++ ":n" ++ uncommas (showp (fst y)) ++ ":n ;" - - postext = readPosText s - --- auxiliaries for graphviz syntax -struct i = "struct" ++ show i -mark (j,n) = "n" ++ show j ++ "a" ++ uncommas n -uncommas = map (\c -> if c==',' then 'c' else c) -tag s = "<" ++ s ++ ">" -showp = init . tail . show -mtag = tag . ('n':) . uncommas - --- word alignments from Linearize.linearizesMark --- words are chunks like {[0,1,1,0] old} - -graphvizAlignment :: PGF -> Expr -> String -graphvizAlignment pgf = prGraph True . lin2graph . linsMark where - linsMark t = [s | la <- cncnames pgf, s <- take 1 (linearizesMark pgf la t)] - -lin2graph :: [String] -> [String] -lin2graph ss = trace (show ss) $ prelude ++ nodes ++ links - - where - - prelude = ["rankdir=LR ;", "node [shape = record] ;"] - - nlins :: [(Int,[((Int,String),String)])] - nlins = [(i, [((j,showp p),unw ws) | (j,((_,p),ws)) <- zip [0..] ws]) | - (i,ws) <- zip [0..] (map (wlins . readPosText) ss)] - - unw = concat . intersperse "\\ " -- space escape in graphviz - - nodes = map mkStruct nlins - - mkStruct (i, ws) = struct i ++ "[label = \"" ++ fields ws ++ "\"] ;" - - fields ws = concat (intersperse "|" [tag (mark m) ++ " " ++ w | (m,w) <- ws]) - - links = nub $ concatMap mkEdge (init nlins) - - mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list - [edge i v w | (v@(_,p),_) <- lin, (w@(_,q),_) <- lin', p == q] - - edge i v w = - struct i ++ ":" ++ mark v ++ ":e -> " ++ struct (i+1) ++ ":" ++ mark w ++ ":w ;" -{- -alignmentData :: PGF -> [Expr] -> Map.Map String (Map.Map String Double) -alignmentData pgf = mkStat . concatMap (mkAlign . linsMark) where - linsMark t = - [s | la <- take 2 (cncnames pgf), s <- take 1 (linearizesMark pgf la t)] - - mkStat :: [(String,String)] -> Map.Map String (Map.Map String Double) - mkStat = - - mkAlign :: [String] -> [(String,String)] - mkAlign ss = - - nlins :: [(Int,[((Int,String),String)])] - nlins = [(i, [((j,showp p),unw ws) | (j,((_,p),ws)) <- zip [0..] vs]) | - (i,vs) <- zip [0..] (map (wlins . readPosText) ss)] - - nodes = map mkStruct nlins - - mkStruct (i, ws) = struct i ++ "[label = \"" ++ fields ws ++ "\"] ;" - - fields ws = concat (intersperse "|" [tag (mark m) ++ " " ++ w | (m,w) <- ws]) - - links = nub $ concatMap mkEdge (init nlins) - - mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list - [edge i v w | (v@(_,p),_) <- lin, (w@(_,q),_) <- lin', p == q] - - edge i v w = - struct i ++ ":" ++ mark v ++ ":e -> " ++ struct (i+1) ++ ":" ++ mark w ++ ":w ;" --} - -wlins :: PosText -> [((Maybe CId,[Int]),[String])] -wlins pt = case pt of - T p pts -> concatMap (lins p) pts - M ws -> if null ws then [] else [((Nothing,[]),ws)] - where - lins p pt = case pt of - T q pts -> concatMap (lins q) pts - M ws -> if null ws then [] else [(p,ws)] - -data PosText = - T (Maybe CId,[Int]) [PosText] - | M [String] - deriving Show - -readPosText :: String -> PosText -readPosText = fst . head . (RP.readP_to_S pPosText) where - pPosText = do - RP.char '(' >> RP.skipSpaces - p <- pPos - RP.skipSpaces - ts <- RP.many pPosText - RP.char ')' >> RP.skipSpaces - return (T p ts) - RP.<++ do - ws <- RP.sepBy1 (RP.munch1 (flip notElem "()")) (RP.char ' ') - return (M ws) - pPos = do - fun <- (RP.char '(' >> pCId >>= \f -> RP.char ',' >> (return $ Just f)) - RP.<++ (return Nothing) - RP.char '[' >> RP.skipSpaces - is <- RP.sepBy (RP.munch1 isDigit) (RP.char ',') - RP.char ']' >> RP.skipSpaces - RP.char ')' RP.<++ return ' ' - return (fun,map read is) - - -{- -digraph{ -rankdir ="LR" ; -node [shape = record] ; - -struct1 [label = "<f0> this|<f1> very|<f2> intelligent|<f3> man"] ; -struct2 [label = "<f0> cet|<f1> homme|<f2> tres|<f3> intelligent|<f4> ci"] ; - -struct1:f0 -> struct2:f0 ; -struct1:f1 -> struct2:f2 ; -struct1:f2 -> struct2:f3 ; -struct1:f3 -> struct2:f1 ; -struct1:f0 -> struct2:f4 ; -} --} - diff --git a/src/PGF/doc/Eng.gf b/src/PGF/doc/Eng.gf deleted file mode 100644 index c64f46313..000000000 --- a/src/PGF/doc/Eng.gf +++ /dev/null @@ -1,13 +0,0 @@ -concrete Eng of Ex = { - lincat - S = {s : Str} ; - NP = {s : Str ; n : Num} ; - VP = {s : Num => Str} ; - param - Num = Sg | Pl ; - lin - Pred np vp = {s = np.s ++ vp.s ! np.n} ; - She = {s = "she" ; n = Sg} ; - They = {s = "they" ; n = Pl} ; - Sleep = {s = table {Sg => "sleeps" ; Pl => "sleep"}} ; -} diff --git a/src/PGF/doc/Ex.gf b/src/PGF/doc/Ex.gf deleted file mode 100644 index bd0b03483..000000000 --- a/src/PGF/doc/Ex.gf +++ /dev/null @@ -1,8 +0,0 @@ -abstract Ex = { - cat - S ; NP ; VP ; - fun - Pred : NP -> VP -> S ; - She, They : NP ; - Sleep : VP ; -} diff --git a/src/PGF/doc/Swe.gf b/src/PGF/doc/Swe.gf deleted file mode 100644 index 1d6672371..000000000 --- a/src/PGF/doc/Swe.gf +++ /dev/null @@ -1,13 +0,0 @@ -concrete Swe of Ex = { - lincat - S = {s : Str} ; - NP = {s : Str} ; - VP = {s : Str} ; - param - Num = Sg | Pl ; - lin - Pred np vp = {s = np.s ++ vp.s} ; - She = {s = "hon"} ; - They = {s = "de"} ; - Sleep = {s = "sover"} ; -} diff --git a/src/PGF/doc/Test.gf b/src/PGF/doc/Test.gf deleted file mode 100644 index 5cd4c5474..000000000 --- a/src/PGF/doc/Test.gf +++ /dev/null @@ -1,64 +0,0 @@ --- to test GFCC compilation - -flags coding=utf8 ; - -cat S ; NP ; N ; VP ; - -fun Pred : NP -> VP -> S ; -fun Pred2 : NP -> VP -> NP -> S ; -fun Det, Dets : N -> NP ; -fun Mina, Sina, Me, Te : NP ; -fun Raha, Paska, Pallo : N ; -fun Puhua, Munia, Sanoa : VP ; - -param Person = P1 | P2 | P3 ; -param Number = Sg | Pl ; -param Case = Nom | Part ; - -param NForm = NF Number Case ; -param VForm = VF Number Person ; - -lincat N = Noun ; -lincat VP = Verb ; - -oper Noun = {s : NForm => Str} ; -oper Verb = {s : VForm => Str} ; - -lincat NP = {s : Case => Str ; a : {n : Number ; p : Person}} ; - -lin Pred np vp = {s = np.s ! Nom ++ vp.s ! VF np.a.n np.a.p} ; -lin Pred2 np vp ob = {s = np.s ! Nom ++ vp.s ! VF np.a.n np.a.p ++ ob.s ! Part} ; -lin Det no = {s = \\c => no.s ! NF Sg c ; a = {n = Sg ; p = P3}} ; -lin Dets no = {s = \\c => no.s ! NF Pl c ; a = {n = Pl ; p = P3}} ; -lin Mina = {s = table Case ["minä" ; "minua"] ; a = {n = Sg ; p = P1}} ; -lin Te = {s = table Case ["te" ; "teitä"] ; a = {n = Pl ; p = P2}} ; -lin Sina = {s = table Case ["sinä" ; "sinua"] ; a = {n = Sg ; p = P2}} ; -lin Me = {s = table Case ["me" ; "meitä"] ; a = {n = Pl ; p = P1}} ; - -lin Raha = mkN "raha" ; -lin Paska = mkN "paska" ; -lin Pallo = mkN "pallo" ; -lin Puhua = mkV "puhu" ; -lin Munia = mkV "muni" ; -lin Sanoa = mkV "sano" ; - -oper mkN : Str -> Noun = \raha -> { - s = table { - NF Sg Nom => raha ; - NF Sg Part => raha + "a" ; - NF Pl Nom => raha + "t" ; - NF Pl Part => Predef.tk 1 raha + "oja" - } - } ; - -oper mkV : Str -> Verb = \puhu -> { - s = table { - VF Sg P1 => puhu + "n" ; - VF Sg P2 => puhu + "t" ; - VF Sg P3 => puhu + Predef.dp 1 puhu ; - VF Pl P1 => puhu + "mme" ; - VF Pl P2 => puhu + "tte" ; - VF Pl P3 => puhu + "vat" - } - } ; - diff --git a/src/PGF/doc/gfcc.html b/src/PGF/doc/gfcc.html deleted file mode 100644 index 8f8c478c0..000000000 --- a/src/PGF/doc/gfcc.html +++ /dev/null @@ -1,809 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> -<HTML> -<HEAD> -<META NAME="generator" CONTENT="http://txt2tags.sf.net"> -<TITLE>The GFCC Grammar Format</TITLE> -</HEAD><BODY BGCOLOR="white" TEXT="black"> -<P ALIGN="center"><CENTER><H1>The GFCC Grammar Format</H1> -<FONT SIZE="4"> -<I>Aarne Ranta</I><BR> -October 5, 2007 -</FONT></CENTER> - -<P> -Author's address: -<A HREF="http://www.cs.chalmers.se/~aarne"><CODE>http://www.cs.chalmers.se/~aarne</CODE></A> -</P> -<P> -History: -</P> -<UL> -<LI>5 Oct 2007: new, better structured GFCC with full expressive power -<LI>19 Oct: translation of lincats, new figures on C++ -<LI>3 Oct 2006: first version -</UL> - -<H2>What is GFCC</H2> -<P> -GFCC is a low-level format for GF grammars. Its aim is to contain the minimum -that is needed to process GF grammars at runtime. This minimality has three -advantages: -</P> -<UL> -<LI>compact grammar files and run-time objects -<LI>time and space efficient processing -<LI>simple definition of interpreters -</UL> - -<P> -Thus we also want to call GFCC the <B>portable grammar format</B>. -</P> -<P> -The idea is that all embedded GF applications use GFCC. -The GF system would be primarily used as a compiler and as a grammar -development tool. -</P> -<P> -Since GFCC is implemented in BNFC, a parser of the format is readily -available for C, C++, C#, Haskell, Java, and OCaml. Also an XML -representation can be generated in BNFC. A -<A HREF="../">reference implementation</A> -of linearization and some other functions has been written in Haskell. -</P> -<H2>GFCC vs. GFC</H2> -<P> -GFCC is aimed to replace GFC as the run-time grammar format. GFC was designed -to be a run-time format, but also to -support separate compilation of grammars, i.e. -to store the results of compiling -individual GF modules. But this means that GFC has to contain extra information, -such as type annotations, which is only needed in compilation and not at -run-time. In particular, the pattern matching syntax and semantics of GFC is -complex and therefore difficult to implement in new platforms. -</P> -<P> -Actually, GFC is planned to be omitted also as the target format of -separate compilation, where plain GF (type annotated and partially evaluated) -will be used instead. GFC provides only marginal advantages as a target format -compared with GF, and it is therefore just extra weight to carry around this -format. -</P> -<P> -The main differences of GFCC compared with GFC (and GF) can be summarized as follows: -</P> -<UL> -<LI>there are no modules, and therefore no qualified names -<LI>a GFCC grammar is multilingual, and consists of a common abstract syntax - together with one concrete syntax per language -<LI>records and tables are replaced by arrays -<LI>record labels and parameter values are replaced by integers -<LI>record projection and table selection are replaced by array indexing -<LI>even though the format does support dependent types and higher-order abstract - syntax, there is no interpreted yet that does this -</UL> - -<P> -Here is an example of a GF grammar, consisting of three modules, -as translated to GFCC. The representations are aligned; thus they do not completely -reflect the order of judgements in GFCC files, which have different orders of -blocks of judgements, and alphabetical sorting. -</P> -<PRE> - grammar Ex(Eng,Swe); - - abstract Ex = { abstract { - cat cat - S ; NP ; VP ; NP[]; S[]; VP[]; - fun fun - Pred : NP -> VP -> S ; Pred=[(($ 0! 1),(($ 1! 0)!($ 0! 0)))]; - She, They : NP ; She=[0,"she"]; - Sleep : VP ; They=[1,"they"]; - Sleep=[["sleeps","sleep"]]; - } } ; - - concrete Eng of Ex = { concrete Eng { - lincat lincat - S = {s : Str} ; S=[()]; - NP = {s : Str ; n : Num} ; NP=[1,()]; - VP = {s : Num => Str} ; VP=[[(),()]]; - param - Num = Sg | Pl ; - lin lin - Pred np vp = { Pred=[(($ 0! 1),(($ 1! 0)!($ 0! 0)))]; - s = np.s ++ vp.s ! np.n} ; - She = {s = "she" ; n = Sg} ; She=[0,"she"]; - They = {s = "they" ; n = Pl} ; They = [1, "they"]; - Sleep = {s = table { Sleep=[["sleeps","sleep"]]; - Sg => "sleeps" ; - Pl => "sleep" - } - } ; - } } ; - - concrete Swe of Ex = { concrete Swe { - lincat lincat - S = {s : Str} ; S=[()]; - NP = {s : Str} ; NP=[()]; - VP = {s : Str} ; VP=[()]; - param - Num = Sg | Pl ; - lin lin - Pred np vp = { Pred = [(($0!0),($1!0))]; - s = np.s ++ vp.s} ; - She = {s = "hon"} ; She = ["hon"]; - They = {s = "de"} ; They = ["de"]; - Sleep = {s = "sover"} ; Sleep = ["sover"]; - } } ; -</PRE> -<P></P> -<H2>The syntax of GFCC files</H2> -<P> -The complete BNFC grammar, from which -the rules in this section are taken, is in the file -<A HREF="../DataGFCC.cf"><CODE>GF/GFCC/GFCC.cf</CODE></A>. -</P> -<H3>Top level</H3> -<P> -A grammar has a header telling the name of the abstract syntax -(often specifying an application domain), and the names of -the concrete languages. The abstract syntax and the concrete -syntaxes themselves follow. -</P> -<PRE> - Grm. Grammar ::= - "grammar" CId "(" [CId] ")" ";" - Abstract ";" - [Concrete] ; - - Abs. Abstract ::= - "abstract" "{" - "flags" [Flag] - "fun" [FunDef] - "cat" [CatDef] - "}" ; - - Cnc. Concrete ::= - "concrete" CId "{" - "flags" [Flag] - "lin" [LinDef] - "oper" [LinDef] - "lincat" [LinDef] - "lindef" [LinDef] - "printname" [LinDef] - "}" ; -</PRE> -<P> -This syntax organizes each module to a sequence of <B>fields</B>, such -as flags, linearizations, operations, linearization types, etc. -It is envisaged that particular applications can ignore some -of the fields, typically so that earlier fields are more -important than later ones. -</P> -<P> -The judgement forms have the following syntax. -</P> -<PRE> - Flg. Flag ::= CId "=" String ; - Cat. CatDef ::= CId "[" [Hypo] "]" ; - Fun. FunDef ::= CId ":" Type "=" Exp ; - Lin. LinDef ::= CId "=" Term ; -</PRE> -<P> -For the run-time system, the reference implementation in Haskell -uses a structure that gives efficient look-up: -</P> -<PRE> - data GFCC = GFCC { - absname :: CId , - cncnames :: [CId] , - abstract :: Abstr , - concretes :: Map CId Concr - } - - data Abstr = Abstr { - aflags :: Map CId String, -- value of a flag - funs :: Map CId (Type,Exp), -- type and def of a fun - cats :: Map CId [Hypo], -- context of a cat - catfuns :: Map CId [CId] -- funs yielding a cat (redundant, for fast lookup) - } - - data Concr = Concr { - flags :: Map CId String, -- value of a flag - lins :: Map CId Term, -- lin of a fun - opers :: Map CId Term, -- oper generated by subex elim - lincats :: Map CId Term, -- lin type of a cat - lindefs :: Map CId Term, -- lin default of a cat - printnames :: Map CId Term -- printname of a cat or a fun - } -</PRE> -<P> -These definitions are from <A HREF="../DataGFCC.hs"><CODE>GF/GFCC/DataGFCC.hs</CODE></A>. -</P> -<P> -Identifiers (<CODE>CId</CODE>) are like <CODE>Ident</CODE> in GF, except that -the compiler produces constants prefixed with <CODE>_</CODE> in -the common subterm elimination optimization. -</P> -<PRE> - token CId (('_' | letter) (letter | digit | '\'' | '_')*) ; -</PRE> -<P></P> -<H3>Abstract syntax</H3> -<P> -Types are first-order function types built from argument type -contexts and value types. -category symbols. Syntax trees (<CODE>Exp</CODE>) are -rose trees with nodes consisting of a head (<CODE>Atom</CODE>) and -bound variables (<CODE>CId</CODE>). -</P> -<PRE> - DTyp. Type ::= "[" [Hypo] "]" CId [Exp] ; - DTr. Exp ::= "[" "(" [CId] ")" Atom [Exp] "]" ; - Hyp. Hypo ::= CId ":" Type ; -</PRE> -<P> -The head Atom is either a function -constant, a bound variable, or a metavariable, or a string, integer, or float -literal. -</P> -<PRE> - AC. Atom ::= CId ; - AS. Atom ::= String ; - AI. Atom ::= Integer ; - AF. Atom ::= Double ; - AM. Atom ::= "?" Integer ; -</PRE> -<P> -The context-free types and trees of the "old GFCC" are special -cases, which can be defined as follows: -</P> -<PRE> - Typ. Type ::= [CId] "->" CId - Typ args val = DTyp [Hyp (CId "_") arg | arg <- args] val - - Tr. Exp ::= "(" CId [Exp] ")" - Tr fun exps = DTr [] fun exps -</PRE> -<P> -To store semantic (<CODE>def</CODE>) definitions by cases, the following expression -form is provided, but it is only meaningful in the last field of a function -declaration in an abstract syntax: -</P> -<PRE> - EEq. Exp ::= "{" [Equation] "}" ; - Equ. Equation ::= [Exp] "->" Exp ; -</PRE> -<P> -Notice that expressions are used to encode patterns. Primitive notions -(the default semantics in GF) are encoded as empty sets of equations -(<CODE>[]</CODE>). For a constructor (canonical form) of a category <CODE>C</CODE>, we -aim to use the encoding as the application <CODE>(_constr C)</CODE>. -</P> -<H3>Concrete syntax</H3> -<P> -Linearization terms (<CODE>Term</CODE>) are built as follows. -Constructor names are shown to make the later code -examples readable. -</P> -<PRE> - R. Term ::= "[" [Term] "]" ; -- array (record/table) - P. Term ::= "(" Term "!" Term ")" ; -- access to field (projection/selection) - S. Term ::= "(" [Term] ")" ; -- concatenated sequence - K. Term ::= Tokn ; -- token - V. Term ::= "$" Integer ; -- argument (subtree) - C. Term ::= Integer ; -- array index (label/parameter value) - FV. Term ::= "[|" [Term] "|]" ; -- free variation - TM. Term ::= "?" ; -- linearization of metavariable -</PRE> -<P> -Tokens are strings or (maybe obsolescent) prefix-dependent -variant lists. -</P> -<PRE> - KS. Tokn ::= String ; - KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ; - Var. Variant ::= [String] "/" [String] ; -</PRE> -<P> -Two special forms of terms are introduced by the compiler -as optimizations. They can in principle be eliminated, but -their presence makes grammars much more compact. Their semantics -will be explained in a later section. -</P> -<PRE> - F. Term ::= CId ; -- global constant - W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table -</PRE> -<P> -There is also a deprecated form of "record parameter alias", -</P> -<PRE> - RP. Term ::= "(" Term "@" Term ")"; -- DEPRECATED -</PRE> -<P> -which will be removed when the migration to new GFCC is complete. -</P> -<H2>The semantics of concrete syntax terms</H2> -<P> -The code in this section is from <A HREF="../Linearize.hs"><CODE>GF/GFCC/Linearize.hs</CODE></A>. -</P> -<H3>Linearization and realization</H3> -<P> -The linearization algorithm is essentially the same as in -GFC: a tree is linearized by evaluating its linearization term -in the environment of the linearizations of the subtrees. -Literal atoms are linearized in the obvious way. -The function also needs to know the language (i.e. concrete syntax) -in which linearization is performed. -</P> -<PRE> - linExp :: GFCC -> CId -> Exp -> Term - linExp gfcc lang tree@(DTr _ at trees) = case at of - AC fun -> comp (Prelude.map lin trees) $ look fun - AS s -> R [kks (show s)] -- quoted - AI i -> R [kks (show i)] - AF d -> R [kks (show d)] - AM -> TM - where - lin = linExp gfcc lang - comp = compute gfcc lang - look = lookLin gfcc lang -</PRE> -<P> -TODO: bindings must be supported. -</P> -<P> -The result of linearization is usually a record, which is realized as -a string using the following algorithm. -</P> -<PRE> - realize :: Term -> String - realize trm = case trm of - R (t:_) -> realize t - S ss -> unwords $ Prelude.map realize ss - K (KS s) -> s - K (KP s _) -> unwords s ---- prefix choice TODO - W s t -> s ++ realize t - FV (t:_) -> realize t - TM -> "?" -</PRE> -<P> -Notice that realization always picks the first field of a record. -If a linearization type has more than one field, the first field -does not necessarily contain the desired string. -Also notice that the order of record fields in GFCC is not necessarily -the same as in GF source. -</P> -<H3>Term evaluation</H3> -<P> -Evaluation follows call-by-value order, with two environments -needed: -</P> -<UL> -<LI>the grammar (a concrete syntax) to give the global constants -<LI>an array of terms to give the subtree linearizations -</UL> - -<P> -The code is presented in one-level pattern matching, to -enable reimplementations in languages that do not permit -deep patterns (such as Java and C++). -</P> -<PRE> - compute :: GFCC -> CId -> [Term] -> Term -> Term - compute gfcc lang args = comp where - comp trm = case trm of - P r p -> proj (comp r) (comp p) - W s t -> W s (comp t) - R ts -> R $ Prelude.map comp ts - V i -> idx args (fromInteger i) -- already computed - F c -> comp $ look c -- not computed (if contains V) - FV ts -> FV $ Prelude.map comp ts - S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts - _ -> trm - - look = lookOper gfcc lang - - idx xs i = xs !! i - - proj r p = case (r,p) of - (_, FV ts) -> FV $ Prelude.map (proj r) ts - (W s t, _) -> kks (s ++ getString (proj t p)) - _ -> comp $ getField r (getIndex p) - - getString t = case t of - K (KS s) -> s - _ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR" - - getIndex t = case t of - C i -> fromInteger i - RP p _ -> getIndex p - TM -> 0 -- default value for parameter - _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0 - - getField t i = case t of - R rs -> idx rs i - RP _ r -> getField r i - TM -> TM - _ -> trace ("ERROR in grammar compiler: field from " ++ show t) t -</PRE> -<P></P> -<H3>The special term constructors</H3> -<P> -The three forms introduced by the compiler may a need special -explanation. -</P> -<P> -Global constants -</P> -<PRE> - Term ::= CId ; -</PRE> -<P> -are shorthands for complex terms. They are produced by the -compiler by (iterated) <B>common subexpression elimination</B>. -They are often more powerful than hand-devised code sharing in the source -code. They could be computed off-line by replacing each identifier by -its definition. -</P> -<P> -<B>Prefix-suffix tables</B> -</P> -<PRE> - Term ::= "(" String "+" Term ")" ; -</PRE> -<P> -represent tables of word forms divided to the longest common prefix -and its array of suffixes. In the example grammar above, we have -</P> -<PRE> - Sleep = [("sleep" + ["s",""])] -</PRE> -<P> -which in fact is equal to the array of full forms -</P> -<PRE> - ["sleeps", "sleep"] -</PRE> -<P> -The power of this construction comes from the fact that suffix sets -tend to be repeated in a language, and can therefore be collected -by common subexpression elimination. It is this technique that -explains the used syntax rather than the more accurate -</P> -<PRE> - "(" String "+" [String] ")" -</PRE> -<P> -since we want the suffix part to be a <CODE>Term</CODE> for the optimization to -take effect. -</P> -<H2>Compiling to GFCC</H2> -<P> -Compilation to GFCC is performed by the GF grammar compiler, and -GFCC interpreters need not know what it does. For grammar writers, -however, it might be interesting to know what happens to the grammars -in the process. -</P> -<P> -The compilation phases are the following -</P> -<OL> -<LI>type check and partially evaluate GF source -<LI>create a symbol table mapping the GF parameter and record types to - fixed-size arrays, and parameter values and record labels to integers -<LI>traverse the linearization rules replacing parameters and labels by integers -<LI>reorganize the created GF grammar so that it has just one abstract syntax - and one concrete syntax per language -<LI>TODO: apply UTF8 encoding to the grammar, if not yet applied (this is told by the - <CODE>coding</CODE> flag) -<LI>translate the GF grammar object to a GFCC grammar object, using a simple - compositional mapping -<LI>perform the word-suffix optimization on GFCC linearization terms -<LI>perform subexpression elimination on each concrete syntax module -<LI>print out the GFCC code -</OL> - -<H3>Problems in GFCC compilation</H3> -<P> -Two major problems had to be solved in compiling GF to GFCC: -</P> -<UL> -<LI>consistent order of tables and records, to permit the array translation -<LI>run-time variables in complex parameter values. -</UL> - -<P> -The current implementation is still experimental and may fail -to generate correct code. Any errors remaining are likely to be -related to the two problems just mentioned. -</P> -<P> -The order problem is solved in slightly different ways for tables and records. -In both cases, <B>eta expansion</B> is used to establish a -canonical order. Tables are ordered by applying the preorder induced -by <CODE>param</CODE> definitions. Records are ordered by sorting them by labels. -This means that -e.g. the <CODE>s</CODE> field will in general no longer appear as the first -field, even if it does so in the GF source code. But relying on the -order of fields in a labelled record would be misplaced anyway. -</P> -<P> -The canonical form of records is further complicated by lock fields, -i.e. dummy fields of form <CODE>lock_C = <></CODE>, which are added to grammar -libraries to force intensionality of linearization types. The problem -is that the absence of a lock field only generates a warning, not -an error. Therefore a GF grammar can contain objects of the same -type with and without a lock field. This problem was solved in GFCC -generation by just removing all lock fields (defined as fields whose -type is the empty record type). This has the further advantage of -(slightly) reducing the grammar size. More importantly, it is safe -to remove lock fields, because they are never used in computation, -and because intensional types are only needed in grammars reused -as libraries, not in grammars used at runtime. -</P> -<P> -While the order problem is rather bureaucratic in nature, run-time -variables are an interesting problem. They arise in the presence -of complex parameter values, created by argument-taking constructors -and parameter records. To give an example, consider the GF parameter -type system -</P> -<PRE> - Number = Sg | Pl ; - Person = P1 | P2 | P3 ; - Agr = Ag Number Person ; -</PRE> -<P> -The values can be translated to integers in the expected way, -</P> -<PRE> - Sg = 0, Pl = 1 - P1 = 0, P2 = 1, P3 = 2 - Ag Sg P1 = 0, Ag Sg P2 = 1, Ag Sg P3 = 2, - Ag Pl P1 = 3, Ag Pl P2 = 4, Ag Pl P3 = 5 -</PRE> -<P> -However, an argument of <CODE>Agr</CODE> can be a run-time variable, as in -</P> -<PRE> - Ag np.n P3 -</PRE> -<P> -This expression must first be translated to a case expression, -</P> -<PRE> - case np.n of { - 0 => 2 ; - 1 => 5 - } -</PRE> -<P> -which can then be translated to the GFCC term -</P> -<PRE> - ([2,5] ! ($0 ! $1)) -</PRE> -<P> -assuming that the variable <CODE>np</CODE> is the first argument and that its -<CODE>Number</CODE> field is the second in the record. -</P> -<P> -This transformation of course has to be performed recursively, since -there can be several run-time variables in a parameter value: -</P> -<PRE> - Ag np.n np.p -</PRE> -<P> -A similar transformation would be possible to deal with the double -role of parameter records discussed above. Thus the type -</P> -<PRE> - RNP = {n : Number ; p : Person} -</PRE> -<P> -could be uniformly translated into the set <CODE>{0,1,2,3,4,5}</CODE> -as <CODE>Agr</CODE> above. Selections would be simple instances of indexing. -But any projection from the record should be translated into -a case expression, -</P> -<PRE> - rnp.n ===> - case rnp of { - 0 => 0 ; - 1 => 0 ; - 2 => 0 ; - 3 => 1 ; - 4 => 1 ; - 5 => 1 - } -</PRE> -<P> -To avoid the code bloat resulting from this, we have chosen to -deal with records by a <B>currying</B> transformation: -</P> -<PRE> - table {n : Number ; p : Person} {... ...} - ===> - table Number {Sg => table Person {...} ; table Person {...}} -</PRE> -<P> -This is performed when GFCC is generated. Selections with -records have to be treated likewise, -</P> -<PRE> - t ! r ===> t ! r.n ! r.p -</PRE> -<P></P> -<H3>The representation of linearization types</H3> -<P> -Linearization types (<CODE>lincat</CODE>) are not needed when generating with -GFCC, but they have been added to enable parser generation directly from -GFCC. The linearization type definitions are shown as a part of the -concrete syntax, by using terms to represent types. Here is the table -showing how different linearization types are encoded. -</P> -<PRE> - P* = max(P) -- parameter type - {r1 : T1 ; ... ; rn : Tn}* = [T1*,...,Tn*] -- record - (P => T)* = [T* ,...,T*] -- table, size(P) cases - Str* = () -</PRE> -<P> -For example, the linearization type <CODE>present/CatEng.NP</CODE> is -translated as follows: -</P> -<PRE> - NP = { - a : { -- 6 = 2*3 values - n : {ParamX.Number} ; -- 2 values - p : {ParamX.Person} -- 3 values - } ; - s : {ResEng.Case} => Str -- 3 values - } - - __NP = [[1,2],[(),(),()]] -</PRE> -<P></P> -<H3>Running the compiler and the GFCC interpreter</H3> -<P> -GFCC generation is a part of the -<A HREF="http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html">developers' version</A> -of GF since September 2006. To invoke the compiler, the flag -<CODE>-printer=gfcc</CODE> to the command -<CODE>pm = print_multi</CODE> is used. It is wise to recompile the grammar from -source, since previously compiled libraries may not obey the canonical -order of records. -Here is an example, performed in -<A HREF="../../../../../examples/bronzeage">example/bronzeage</A>. -</P> -<PRE> - i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageEng.gf - i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageGer.gf - strip - pm -printer=gfcc | wf bronze.gfcc -</PRE> -<P> -There is also an experimental batch compiler, which does not use the GFC -format or the record aliases. It can be produced by -</P> -<PRE> - make gfc -</PRE> -<P> -in <CODE>GF/src</CODE>, and invoked by -</P> -<PRE> - gfc --make FILES -</PRE> -<P></P> -<H2>The reference interpreter</H2> -<P> -The reference interpreter written in Haskell consists of the following files: -</P> -<PRE> - -- source file for BNFC - GFCC.cf -- labelled BNF grammar of gfcc - - -- files generated by BNFC - AbsGFCC.hs -- abstrac syntax datatypes - ErrM.hs -- error monad used internally - LexGFCC.hs -- lexer of gfcc files - ParGFCC.hs -- parser of gfcc files and syntax trees - PrintGFCC.hs -- printer of gfcc files and syntax trees - - -- hand-written files - DataGFCC.hs -- grammar datatype, post-parser grammar creation - Linearize.hs -- linearization and evaluation - Macros.hs -- utilities abstracting away from GFCC datatypes - Generate.hs -- random and exhaustive generation, generate-and-test parsing - API.hs -- functionalities accessible in embedded GF applications - Generate.hs -- random and exhaustive generation - Shell.hs -- main function - a simple command interpreter -</PRE> -<P> -It is included in the -<A HREF="http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html">developers' version</A> -of GF, in the subdirectories <A HREF="../"><CODE>GF/src/GF/GFCC</CODE></A> and -<A HREF="../../Devel"><CODE>GF/src/GF/Devel</CODE></A>. -</P> -<P> -As of September 2007, default parsing in main GF uses GFCC (implemented by Krasimir -Angelov). The interpreter uses the relevant modules -</P> -<PRE> - GF/Conversions/SimpleToFCFG.hs -- generate parser from GFCC - GF/Parsing/FCFG.hs -- run the parser -</PRE> -<P></P> -<P> -To compile the interpreter, type -</P> -<PRE> - make gfcc -</PRE> -<P> -in <CODE>GF/src</CODE>. To run it, type -</P> -<PRE> - ./gfcc <GFCC-file> -</PRE> -<P> -The available commands are -</P> -<UL> -<LI><CODE>gr <Cat> <Int></CODE>: generate a number of random trees in category. - and show their linearizations in all languages -<LI><CODE>grt <Cat> <Int></CODE>: generate a number of random trees in category. - and show the trees and their linearizations in all languages -<LI><CODE>gt <Cat> <Int></CODE>: generate a number of trees in category from smallest, - and show their linearizations in all languages -<LI><CODE>gtt <Cat> <Int></CODE>: generate a number of trees in category from smallest, - and show the trees and their linearizations in all languages -<LI><CODE>p <Lang> <Cat> <String></CODE>: parse a string into a set of trees -<LI><CODE>lin <Tree></CODE>: linearize tree in all languages, also showing full records -<LI><CODE>q</CODE>: terminate the system cleanly -</UL> - -<H2>Embedded formats</H2> -<UL> -<LI>JavaScript: compiler of linearization and abstract syntax -<P></P> -<LI>Haskell: compiler of abstract syntax and interpreter with parsing, - linearization, and generation -<P></P> -<LI>C: compiler of linearization (old GFCC) -<P></P> -<LI>C++: embedded interpreter supporting linearization (old GFCC) -</UL> - -<H2>Some things to do</H2> -<P> -Support for dependent types, higher-order abstract syntax, and -semantic definition in GFCC generation and interpreters. -</P> -<P> -Replacing the entire GF shell by one based on GFCC. -</P> -<P> -Interpreter in Java. -</P> -<P> -Hand-written parsers for GFCC grammars to reduce code size -(and efficiency?) of interpreters. -</P> -<P> -Binary format and/or file compression of GFCC output. -</P> -<P> -Syntax editor based on GFCC. -</P> -<P> -Rewriting of resource libraries in order to exploit the -word-suffix sharing better (depth-one tables, as in FM). -</P> - -<!-- html code generated by txt2tags 2.3 (http://txt2tags.sf.net) --> -<!-- cmdline: txt2tags -thtml gfcc.txt --> -</BODY></HTML> diff --git a/src/PGF/doc/gfcc.txt b/src/PGF/doc/gfcc.txt deleted file mode 100644 index 5dcf2fbdc..000000000 --- a/src/PGF/doc/gfcc.txt +++ /dev/null @@ -1,712 +0,0 @@ -The GFCC Grammar Format -Aarne Ranta -December 14, 2007 - -Author's address: -[``http://www.cs.chalmers.se/~aarne`` http://www.cs.chalmers.se/~aarne] - -% to compile: txt2tags -thtml --toc gfcc.txt - -History: -- 14 Dec 2007: simpler, Lisp-like concrete syntax of GFCC -- 5 Oct 2007: new, better structured GFCC with full expressive power -- 19 Oct: translation of lincats, new figures on C++ -- 3 Oct 2006: first version - - -==What is GFCC== - -GFCC is a low-level format for GF grammars. Its aim is to contain the minimum -that is needed to process GF grammars at runtime. This minimality has three -advantages: -- compact grammar files and run-time objects -- time and space efficient processing -- simple definition of interpreters - - -Thus we also want to call GFCC the **portable grammar format**. - -The idea is that all embedded GF applications use GFCC. -The GF system would be primarily used as a compiler and as a grammar -development tool. - -Since GFCC is implemented in BNFC, a parser of the format is readily -available for C, C++, C#, Haskell, Java, and OCaml. Also an XML -representation can be generated in BNFC. A -[reference implementation ../] -of linearization and some other functions has been written in Haskell. - - -==GFCC vs. GFC== - -GFCC is aimed to replace GFC as the run-time grammar format. GFC was designed -to be a run-time format, but also to -support separate compilation of grammars, i.e. -to store the results of compiling -individual GF modules. But this means that GFC has to contain extra information, -such as type annotations, which is only needed in compilation and not at -run-time. In particular, the pattern matching syntax and semantics of GFC is -complex and therefore difficult to implement in new platforms. - -Actually, GFC is planned to be omitted also as the target format of -separate compilation, where plain GF (type annotated and partially evaluated) -will be used instead. GFC provides only marginal advantages as a target format -compared with GF, and it is therefore just extra weight to carry around this -format. - -The main differences of GFCC compared with GFC (and GF) can be -summarized as follows: -- there are no modules, and therefore no qualified names -- a GFCC grammar is multilingual, and consists of a common abstract syntax - together with one concrete syntax per language -- records and tables are replaced by arrays -- record labels and parameter values are replaced by integers -- record projection and table selection are replaced by array indexing -- even though the format does support dependent types and higher-order abstract - syntax, there is no interpreted yet that does this - - - -Here is an example of a GF grammar, consisting of three modules, -as translated to GFCC. The representations are aligned; -thus they do not completely -reflect the order of judgements in GFCC files, which have different orders of -blocks of judgements, and alphabetical sorting. -``` - grammar Ex(Eng,Swe); - -abstract Ex = { abstract { - cat cat - S ; NP ; VP ; NP[]; S[]; VP[]; - fun fun - Pred : NP -> VP -> S ; Pred=[(($ 0! 1),(($ 1! 0)!($ 0! 0)))]; - She, They : NP ; She=[0,"she"]; - Sleep : VP ; They=[1,"they"]; - Sleep=[["sleeps","sleep"]]; -} } ; - -concrete Eng of Ex = { concrete Eng { - lincat lincat - S = {s : Str} ; S=[()]; - NP = {s : Str ; n : Num} ; NP=[1,()]; - VP = {s : Num => Str} ; VP=[[(),()]]; - param - Num = Sg | Pl ; - lin lin - Pred np vp = { Pred=[(($ 0! 1),(($ 1! 0)!($ 0! 0)))]; - s = np.s ++ vp.s ! np.n} ; - She = {s = "she" ; n = Sg} ; She=[0,"she"]; - They = {s = "they" ; n = Pl} ; They = [1, "they"]; - Sleep = {s = table { Sleep=[["sleeps","sleep"]]; - Sg => "sleeps" ; - Pl => "sleep" - } - } ; -} } ; - -concrete Swe of Ex = { concrete Swe { - lincat lincat - S = {s : Str} ; S=[()]; - NP = {s : Str} ; NP=[()]; - VP = {s : Str} ; VP=[()]; - param - Num = Sg | Pl ; - lin lin - Pred np vp = { Pred = [(($0!0),($1!0))]; - s = np.s ++ vp.s} ; - She = {s = "hon"} ; She = ["hon"]; - They = {s = "de"} ; They = ["de"]; - Sleep = {s = "sover"} ; Sleep = ["sover"]; -} } ; -``` - -==The syntax of GFCC files== - -The complete BNFC grammar, from which -the rules in this section are taken, is in the file -[``GF/GFCC/GFCC.cf`` ../DataGFCC.cf]. - - -===Top level=== - -A grammar has a header telling the name of the abstract syntax -(often specifying an application domain), and the names of -the concrete languages. The abstract syntax and the concrete -syntaxes themselves follow. -``` - Grm. Grammar ::= - "grammar" CId "(" [CId] ")" ";" - Abstract ";" - [Concrete] ; - - Abs. Abstract ::= - "abstract" "{" - "flags" [Flag] - "fun" [FunDef] - "cat" [CatDef] - "}" ; - - Cnc. Concrete ::= - "concrete" CId "{" - "flags" [Flag] - "lin" [LinDef] - "oper" [LinDef] - "lincat" [LinDef] - "lindef" [LinDef] - "printname" [LinDef] - "}" ; -``` -This syntax organizes each module to a sequence of **fields**, such -as flags, linearizations, operations, linearization types, etc. -It is envisaged that particular applications can ignore some -of the fields, typically so that earlier fields are more -important than later ones. - -The judgement forms have the following syntax. -``` - Flg. Flag ::= CId "=" String ; - Cat. CatDef ::= CId "[" [Hypo] "]" ; - Fun. FunDef ::= CId ":" Type "=" Exp ; - Lin. LinDef ::= CId "=" Term ; -``` -For the run-time system, the reference implementation in Haskell -uses a structure that gives efficient look-up: -``` - data GFCC = GFCC { - absname :: CId , - cncnames :: [CId] , - abstract :: Abstr , - concretes :: Map CId Concr - } - - data Abstr = Abstr { - aflags :: Map CId String, -- value of a flag - funs :: Map CId (Type,Exp), -- type and def of a fun - cats :: Map CId [Hypo], -- context of a cat - catfuns :: Map CId [CId] -- funs yielding a cat (redundant, for fast lookup) - } - - data Concr = Concr { - flags :: Map CId String, -- value of a flag - lins :: Map CId Term, -- lin of a fun - opers :: Map CId Term, -- oper generated by subex elim - lincats :: Map CId Term, -- lin type of a cat - lindefs :: Map CId Term, -- lin default of a cat - printnames :: Map CId Term -- printname of a cat or a fun - } -``` -These definitions are from [``GF/GFCC/DataGFCC.hs`` ../DataGFCC.hs]. - -Identifiers (``CId``) are like ``Ident`` in GF, except that -the compiler produces constants prefixed with ``_`` in -the common subterm elimination optimization. -``` - token CId (('_' | letter) (letter | digit | '\'' | '_')*) ; -``` - - -===Abstract syntax=== - -Types are first-order function types built from argument type -contexts and value types. -category symbols. Syntax trees (``Exp``) are -rose trees with nodes consisting of a head (``Atom``) and -bound variables (``CId``). -``` - DTyp. Type ::= "[" [Hypo] "]" CId [Exp] ; - DTr. Exp ::= "[" "(" [CId] ")" Atom [Exp] "]" ; - Hyp. Hypo ::= CId ":" Type ; -``` -The head Atom is either a function -constant, a bound variable, or a metavariable, or a string, integer, or float -literal. -``` - AC. Atom ::= CId ; - AS. Atom ::= String ; - AI. Atom ::= Integer ; - AF. Atom ::= Double ; - AM. Atom ::= "?" Integer ; -``` -The context-free types and trees of the "old GFCC" are special -cases, which can be defined as follows: -``` - Typ. Type ::= [CId] "->" CId - Typ args val = DTyp [Hyp (CId "_") arg | arg <- args] val - - Tr. Exp ::= "(" CId [Exp] ")" - Tr fun exps = DTr [] fun exps -``` -To store semantic (``def``) definitions by cases, the following expression -form is provided, but it is only meaningful in the last field of a function -declaration in an abstract syntax: -``` - EEq. Exp ::= "{" [Equation] "}" ; - Equ. Equation ::= [Exp] "->" Exp ; -``` -Notice that expressions are used to encode patterns. Primitive notions -(the default semantics in GF) are encoded as empty sets of equations -(``[]``). For a constructor (canonical form) of a category ``C``, we -aim to use the encoding as the application ``(_constr C)``. - - - -===Concrete syntax=== - -Linearization terms (``Term``) are built as follows. -Constructor names are shown to make the later code -examples readable. -``` - R. Term ::= "[" [Term] "]" ; -- array (record/table) - P. Term ::= "(" Term "!" Term ")" ; -- access to field (projection/selection) - S. Term ::= "(" [Term] ")" ; -- concatenated sequence - K. Term ::= Tokn ; -- token - V. Term ::= "$" Integer ; -- argument (subtree) - C. Term ::= Integer ; -- array index (label/parameter value) - FV. Term ::= "[|" [Term] "|]" ; -- free variation - TM. Term ::= "?" ; -- linearization of metavariable -``` -Tokens are strings or (maybe obsolescent) prefix-dependent -variant lists. -``` - KS. Tokn ::= String ; - KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ; - Var. Variant ::= [String] "/" [String] ; -``` -Two special forms of terms are introduced by the compiler -as optimizations. They can in principle be eliminated, but -their presence makes grammars much more compact. Their semantics -will be explained in a later section. -``` - F. Term ::= CId ; -- global constant - W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table -``` -There is also a deprecated form of "record parameter alias", -``` - RP. Term ::= "(" Term "@" Term ")"; -- DEPRECATED -``` -which will be removed when the migration to new GFCC is complete. - - - -==The semantics of concrete syntax terms== - -The code in this section is from [``GF/GFCC/Linearize.hs`` ../Linearize.hs]. - - -===Linearization and realization=== - -The linearization algorithm is essentially the same as in -GFC: a tree is linearized by evaluating its linearization term -in the environment of the linearizations of the subtrees. -Literal atoms are linearized in the obvious way. -The function also needs to know the language (i.e. concrete syntax) -in which linearization is performed. -``` - linExp :: GFCC -> CId -> Exp -> Term - linExp gfcc lang tree@(DTr _ at trees) = case at of - AC fun -> comp (Prelude.map lin trees) $ look fun - AS s -> R [kks (show s)] -- quoted - AI i -> R [kks (show i)] - AF d -> R [kks (show d)] - AM -> TM - where - lin = linExp gfcc lang - comp = compute gfcc lang - look = lookLin gfcc lang -``` -TODO: bindings must be supported. - -The result of linearization is usually a record, which is realized as -a string using the following algorithm. -``` - realize :: Term -> String - realize trm = case trm of - R (t:_) -> realize t - S ss -> unwords $ Prelude.map realize ss - K (KS s) -> s - K (KP s _) -> unwords s ---- prefix choice TODO - W s t -> s ++ realize t - FV (t:_) -> realize t - TM -> "?" -``` -Notice that realization always picks the first field of a record. -If a linearization type has more than one field, the first field -does not necessarily contain the desired string. -Also notice that the order of record fields in GFCC is not necessarily -the same as in GF source. - - -===Term evaluation=== - -Evaluation follows call-by-value order, with two environments -needed: -- the grammar (a concrete syntax) to give the global constants -- an array of terms to give the subtree linearizations - - -The code is presented in one-level pattern matching, to -enable reimplementations in languages that do not permit -deep patterns (such as Java and C++). -``` -compute :: GFCC -> CId -> [Term] -> Term -> Term -compute gfcc lang args = comp where - comp trm = case trm of - P r p -> proj (comp r) (comp p) - W s t -> W s (comp t) - R ts -> R $ Prelude.map comp ts - V i -> idx args (fromInteger i) -- already computed - F c -> comp $ look c -- not computed (if contains V) - FV ts -> FV $ Prelude.map comp ts - S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts - _ -> trm - - look = lookOper gfcc lang - - idx xs i = xs !! i - - proj r p = case (r,p) of - (_, FV ts) -> FV $ Prelude.map (proj r) ts - (FV ts, _ ) -> FV $ Prelude.map (\t -> proj t p) ts - (W s t, _) -> kks (s ++ getString (proj t p)) - _ -> comp $ getField r (getIndex p) - - getString t = case t of - K (KS s) -> s - _ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR" - - getIndex t = case t of - C i -> fromInteger i - RP p _ -> getIndex p - TM -> 0 -- default value for parameter - _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0 - - getField t i = case t of - R rs -> idx rs i - RP _ r -> getField r i - TM -> TM - _ -> trace ("ERROR in grammar compiler: field from " ++ show t) t -``` - -===The special term constructors=== - -The three forms introduced by the compiler may a need special -explanation. - -Global constants -``` - Term ::= CId ; -``` -are shorthands for complex terms. They are produced by the -compiler by (iterated) **common subexpression elimination**. -They are often more powerful than hand-devised code sharing in the source -code. They could be computed off-line by replacing each identifier by -its definition. - -**Prefix-suffix tables** -``` - Term ::= "(" String "+" Term ")" ; -``` -represent tables of word forms divided to the longest common prefix -and its array of suffixes. In the example grammar above, we have -``` - Sleep = [("sleep" + ["s",""])] -``` -which in fact is equal to the array of full forms -``` - ["sleeps", "sleep"] -``` -The power of this construction comes from the fact that suffix sets -tend to be repeated in a language, and can therefore be collected -by common subexpression elimination. It is this technique that -explains the used syntax rather than the more accurate -``` - "(" String "+" [String] ")" -``` -since we want the suffix part to be a ``Term`` for the optimization to -take effect. - - - -==Compiling to GFCC== - -Compilation to GFCC is performed by the GF grammar compiler, and -GFCC interpreters need not know what it does. For grammar writers, -however, it might be interesting to know what happens to the grammars -in the process. - -The compilation phases are the following -+ type check and partially evaluate GF source -+ create a symbol table mapping the GF parameter and record types to - fixed-size arrays, and parameter values and record labels to integers -+ traverse the linearization rules replacing parameters and labels by integers -+ reorganize the created GF grammar so that it has just one abstract syntax - and one concrete syntax per language -+ TODO: apply UTF8 encoding to the grammar, if not yet applied (this is told by the - ``coding`` flag) -+ translate the GF grammar object to a GFCC grammar object, using a simple - compositional mapping -+ perform the word-suffix optimization on GFCC linearization terms -+ perform subexpression elimination on each concrete syntax module -+ print out the GFCC code - - - - -===Problems in GFCC compilation=== - -Two major problems had to be solved in compiling GF to GFCC: -- consistent order of tables and records, to permit the array translation -- run-time variables in complex parameter values. - - -The current implementation is still experimental and may fail -to generate correct code. Any errors remaining are likely to be -related to the two problems just mentioned. - -The order problem is solved in slightly different ways for tables and records. -In both cases, **eta expansion** is used to establish a -canonical order. Tables are ordered by applying the preorder induced -by ``param`` definitions. Records are ordered by sorting them by labels. -This means that -e.g. the ``s`` field will in general no longer appear as the first -field, even if it does so in the GF source code. But relying on the -order of fields in a labelled record would be misplaced anyway. - -The canonical form of records is further complicated by lock fields, -i.e. dummy fields of form ``lock_C = <>``, which are added to grammar -libraries to force intensionality of linearization types. The problem -is that the absence of a lock field only generates a warning, not -an error. Therefore a GF grammar can contain objects of the same -type with and without a lock field. This problem was solved in GFCC -generation by just removing all lock fields (defined as fields whose -type is the empty record type). This has the further advantage of -(slightly) reducing the grammar size. More importantly, it is safe -to remove lock fields, because they are never used in computation, -and because intensional types are only needed in grammars reused -as libraries, not in grammars used at runtime. - -While the order problem is rather bureaucratic in nature, run-time -variables are an interesting problem. They arise in the presence -of complex parameter values, created by argument-taking constructors -and parameter records. To give an example, consider the GF parameter -type system -``` - Number = Sg | Pl ; - Person = P1 | P2 | P3 ; - Agr = Ag Number Person ; -``` -The values can be translated to integers in the expected way, -``` - Sg = 0, Pl = 1 - P1 = 0, P2 = 1, P3 = 2 - Ag Sg P1 = 0, Ag Sg P2 = 1, Ag Sg P3 = 2, - Ag Pl P1 = 3, Ag Pl P2 = 4, Ag Pl P3 = 5 -``` -However, an argument of ``Agr`` can be a run-time variable, as in -``` - Ag np.n P3 -``` -This expression must first be translated to a case expression, -``` - case np.n of { - 0 => 2 ; - 1 => 5 - } -``` -which can then be translated to the GFCC term -``` - ([2,5] ! ($0 ! $1)) -``` -assuming that the variable ``np`` is the first argument and that its -``Number`` field is the second in the record. - -This transformation of course has to be performed recursively, since -there can be several run-time variables in a parameter value: -``` - Ag np.n np.p -``` -A similar transformation would be possible to deal with the double -role of parameter records discussed above. Thus the type -``` - RNP = {n : Number ; p : Person} -``` -could be uniformly translated into the set ``{0,1,2,3,4,5}`` -as ``Agr`` above. Selections would be simple instances of indexing. -But any projection from the record should be translated into -a case expression, -``` - rnp.n ===> - case rnp of { - 0 => 0 ; - 1 => 0 ; - 2 => 0 ; - 3 => 1 ; - 4 => 1 ; - 5 => 1 - } -``` -To avoid the code bloat resulting from this, we have chosen to -deal with records by a **currying** transformation: -``` - table {n : Number ; p : Person} {... ...} - ===> - table Number {Sg => table Person {...} ; table Person {...}} -``` -This is performed when GFCC is generated. Selections with -records have to be treated likewise, -``` - t ! r ===> t ! r.n ! r.p -``` - - -===The representation of linearization types=== - -Linearization types (``lincat``) are not needed when generating with -GFCC, but they have been added to enable parser generation directly from -GFCC. The linearization type definitions are shown as a part of the -concrete syntax, by using terms to represent types. Here is the table -showing how different linearization types are encoded. -``` - P* = max(P) -- parameter type - {r1 : T1 ; ... ; rn : Tn}* = [T1*,...,Tn*] -- record - (P => T)* = [T* ,...,T*] -- table, size(P) cases - Str* = () -``` -For example, the linearization type ``present/CatEng.NP`` is -translated as follows: -``` - NP = { - a : { -- 6 = 2*3 values - n : {ParamX.Number} ; -- 2 values - p : {ParamX.Person} -- 3 values - } ; - s : {ResEng.Case} => Str -- 3 values - } - - __NP = [[1,2],[(),(),()]] -``` - - - - -===Running the compiler and the GFCC interpreter=== - -GFCC generation is a part of the -[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html] -of GF since September 2006. To invoke the compiler, the flag -``-printer=gfcc`` to the command -``pm = print_multi`` is used. It is wise to recompile the grammar from -source, since previously compiled libraries may not obey the canonical -order of records. -Here is an example, performed in -[example/bronzeage ../../../../../examples/bronzeage]. -``` - i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageEng.gf - i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageGer.gf - strip - pm -printer=gfcc | wf bronze.gfcc -``` -There is also an experimental batch compiler, which does not use the GFC -format or the record aliases. It can be produced by -``` - make gfc -``` -in ``GF/src``, and invoked by -``` - gfc --make FILES -``` - - - - -==The reference interpreter== - -The reference interpreter written in Haskell consists of the following files: -``` - -- source file for BNFC - GFCC.cf -- labelled BNF grammar of gfcc - - -- files generated by BNFC - AbsGFCC.hs -- abstrac syntax datatypes - ErrM.hs -- error monad used internally - LexGFCC.hs -- lexer of gfcc files - ParGFCC.hs -- parser of gfcc files and syntax trees - PrintGFCC.hs -- printer of gfcc files and syntax trees - - -- hand-written files - DataGFCC.hs -- grammar datatype, post-parser grammar creation - Linearize.hs -- linearization and evaluation - Macros.hs -- utilities abstracting away from GFCC datatypes - Generate.hs -- random and exhaustive generation, generate-and-test parsing - API.hs -- functionalities accessible in embedded GF applications - Generate.hs -- random and exhaustive generation - Shell.hs -- main function - a simple command interpreter -``` -It is included in the -[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html] -of GF, in the subdirectories [``GF/src/GF/GFCC`` ../] and -[``GF/src/GF/Devel`` ../../Devel]. - -As of September 2007, default parsing in main GF uses GFCC (implemented by Krasimir -Angelov). The interpreter uses the relevant modules -``` - GF/Conversions/SimpleToFCFG.hs -- generate parser from GFCC - GF/Parsing/FCFG.hs -- run the parser -``` - - -To compile the interpreter, type -``` - make gfcc -``` -in ``GF/src``. To run it, type -``` - ./gfcc <GFCC-file> -``` -The available commands are -- ``gr <Cat> <Int>``: generate a number of random trees in category. - and show their linearizations in all languages -- ``grt <Cat> <Int>``: generate a number of random trees in category. - and show the trees and their linearizations in all languages -- ``gt <Cat> <Int>``: generate a number of trees in category from smallest, - and show their linearizations in all languages -- ``gtt <Cat> <Int>``: generate a number of trees in category from smallest, - and show the trees and their linearizations in all languages -- ``p <Lang> <Cat> <String>``: parse a string into a set of trees -- ``lin <Tree>``: linearize tree in all languages, also showing full records -- ``q``: terminate the system cleanly - - - -==Embedded formats== - -- JavaScript: compiler of linearization and abstract syntax - -- Haskell: compiler of abstract syntax and interpreter with parsing, - linearization, and generation - -- C: compiler of linearization (old GFCC) - -- C++: embedded interpreter supporting linearization (old GFCC) - - - -==Some things to do== - -Support for dependent types, higher-order abstract syntax, and -semantic definition in GFCC generation and interpreters. - -Replacing the entire GF shell by one based on GFCC. - -Interpreter in Java. - -Hand-written parsers for GFCC grammars to reduce code size -(and efficiency?) of interpreters. - -Binary format and/or file compression of GFCC output. - -Syntax editor based on GFCC. - -Rewriting of resource libraries in order to exploit the -word-suffix sharing better (depth-one tables, as in FM). - diff --git a/src/PGF/doc/old-GFCC.cf b/src/PGF/doc/old-GFCC.cf deleted file mode 100644 index 65657a259..000000000 --- a/src/PGF/doc/old-GFCC.cf +++ /dev/null @@ -1,50 +0,0 @@ -Grm. Grammar ::= Header ";" Abstract ";" [Concrete] ; -Hdr. Header ::= "grammar" CId "(" [CId] ")" ; -Abs. Abstract ::= "abstract" "{" [AbsDef] "}" ; -Cnc. Concrete ::= "concrete" CId "{" [CncDef] "}" ; - -Fun. AbsDef ::= CId ":" Type "=" Exp ; ---AFl. AbsDef ::= "%" CId "=" String ; -- flag -Lin. CncDef ::= CId "=" Term ; ---CFl. CncDef ::= "%" CId "=" String ; -- flag - -Typ. Type ::= [CId] "->" CId ; -Tr. Exp ::= "(" Atom [Exp] ")" ; -AC. Atom ::= CId ; -AS. Atom ::= String ; -AI. Atom ::= Integer ; -AF. Atom ::= Double ; -AM. Atom ::= "?" ; -trA. Exp ::= Atom ; -define trA a = Tr a [] ; - -R. Term ::= "[" [Term] "]" ; -- record/table -P. Term ::= "(" Term "!" Term ")" ; -- projection/selection -S. Term ::= "(" [Term] ")" ; -- sequence with ++ -K. Term ::= Tokn ; -- token -V. Term ::= "$" Integer ; -- argument -C. Term ::= Integer ; -- parameter value/label -F. Term ::= CId ; -- global constant -FV. Term ::= "[|" [Term] "|]" ; -- free variation -W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table -RP. Term ::= "(" Term "@" Term ")"; -- record parameter alias -TM. Term ::= "?" ; -- lin of metavariable - -L. Term ::= "(" CId "->" Term ")" ; -- lambda abstracted table -BV. Term ::= "#" CId ; -- lambda-bound variable - -KS. Tokn ::= String ; -KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ; -Var. Variant ::= [String] "/" [String] ; - - -terminator Concrete ";" ; -terminator AbsDef ";" ; -terminator CncDef ";" ; -separator CId "," ; -separator Term "," ; -terminator Exp "" ; -terminator String "" ; -separator Variant "," ; - -token CId (('_' | letter) (letter | digit | '\'' | '_')*) ; diff --git a/src/PGF/doc/old-gfcc.txt b/src/PGF/doc/old-gfcc.txt deleted file mode 100644 index 6ffd9bd64..000000000 --- a/src/PGF/doc/old-gfcc.txt +++ /dev/null @@ -1,656 +0,0 @@ -The GFCC Grammar Format -Aarne Ranta -October 19, 2006 - -Author's address: -[``http://www.cs.chalmers.se/~aarne`` http://www.cs.chalmers.se/~aarne] - -% to compile: txt2tags -thtml --toc gfcc.txt - -History: -- 19 Oct: translation of lincats, new figures on C++ -- 3 Oct 2006: first version - - -==What is GFCC== - -GFCC is a low-level format for GF grammars. Its aim is to contain the minimum -that is needed to process GF grammars at runtime. This minimality has three -advantages: -- compact grammar files and run-time objects -- time and space efficient processing -- simple definition of interpreters - - -The idea is that all embedded GF applications are compiled to GFCC. -The GF system would be primarily used as a compiler and as a grammar -development tool. - -Since GFCC is implemented in BNFC, a parser of the format is readily -available for C, C++, Haskell, Java, and OCaml. Also an XML -representation is generated in BNFC. A -[reference implementation ../] -of linearization and some other functions has been written in Haskell. - - -==GFCC vs. GFC== - -GFCC is aimed to replace GFC as the run-time grammar format. GFC was designed -to be a run-time format, but also to -support separate compilation of grammars, i.e. -to store the results of compiling -individual GF modules. But this means that GFC has to contain extra information, -such as type annotations, which is only needed in compilation and not at -run-time. In particular, the pattern matching syntax and semantics of GFC is -complex and therefore difficult to implement in new platforms. - -The main differences of GFCC compared with GFC can be summarized as follows: -- there are no modules, and therefore no qualified names -- a GFCC grammar is multilingual, and consists of a common abstract syntax - together with one concrete syntax per language -- records and tables are replaced by arrays -- record labels and parameter values are replaced by integers -- record projection and table selection are replaced by array indexing -- there is (so far) no support for dependent types or higher-order abstract - syntax (which would be easy to add, but make interpreters much more difficult - to write) - - -Here is an example of a GF grammar, consisting of three modules, -as translated to GFCC. The representations are aligned, with the exceptions -due to the alphabetical sorting of GFCC grammars. -``` - grammar Ex(Eng,Swe); - -abstract Ex = { abstract { - cat - S ; NP ; VP ; - fun - Pred : NP -> VP -> S ; Pred : NP,VP -> S = (Pred); - She, They : NP ; She : -> NP = (She); - Sleep : VP ; Sleep : -> VP = (Sleep); - They : -> NP = (They); -} } ; - -concrete Eng of Ex = { concrete Eng { - lincat - S = {s : Str} ; - NP = {s : Str ; n : Num} ; - VP = {s : Num => Str} ; - param - Num = Sg | Pl ; - lin - Pred np vp = { Pred = [(($0!1),(($1!0)!($0!0)))]; - s = np.s ++ vp.s ! np.n} ; - She = {s = "she" ; n = Sg} ; She = [0, "she"]; - They = {s = "they" ; n = Pl} ; - Sleep = {s = table { Sleep = [("sleep" + ["s",""])]; - Sg => "sleeps" ; - Pl => "sleep" They = [1, "they"]; - } } ; - } ; -} - -concrete Swe of Ex = { concrete Swe { - lincat - S = {s : Str} ; - NP = {s : Str} ; - VP = {s : Str} ; - param - Num = Sg | Pl ; - lin - Pred np vp = { Pred = [(($0!0),($1!0))]; - s = np.s ++ vp.s} ; - She = {s = "hon"} ; She = ["hon"]; - They = {s = "de"} ; They = ["de"]; - Sleep = {s = "sover"} ; Sleep = ["sover"]; -} } ; -``` - -==The syntax of GFCC files== - -===Top level=== - -A grammar has a header telling the name of the abstract syntax -(often specifying an application domain), and the names of -the concrete languages. The abstract syntax and the concrete -syntaxes themselves follow. -``` - Grammar ::= Header ";" Abstract ";" [Concrete] ; - Header ::= "grammar" CId "(" [CId] ")" ; - Abstract ::= "abstract" "{" [AbsDef] "}" ; - Concrete ::= "concrete" CId "{" [CncDef] "}" ; -``` -Abstract syntax judgements give typings and semantic definitions. -Concrete syntax judgements give linearizations. -``` - AbsDef ::= CId ":" Type "=" Exp ; - CncDef ::= CId "=" Term ; -``` -Also flags are possible, local to each "module" (i.e. abstract and concretes). -``` - AbsDef ::= "%" CId "=" String ; - CncDef ::= "%" CId "=" String ; -``` -For the run-time system, the reference implementation in Haskell -uses a structure that gives efficient look-up: -``` - data GFCC = GFCC { - absname :: CId , - cncnames :: [CId] , - abstract :: Abstr , - concretes :: Map CId Concr - } - - data Abstr = Abstr { - funs :: Map CId Type, -- find the type of a fun - cats :: Map CId [CId] -- find the funs giving a cat - } - - type Concr = Map CId Term -``` - - -===Abstract syntax=== - -Types are first-order function types built from -category symbols. Syntax trees (``Exp``) are -rose trees with the head (``Atom``) either a function -constant, a metavariable, or a string, integer, or float -literal. -``` - Type ::= [CId] "->" CId ; - Exp ::= "(" Atom [Exp] ")" ; - Atom ::= CId ; -- function constant - Atom ::= "?" ; -- metavariable - Atom ::= String ; -- string literal - Atom ::= Integer ; -- integer literal - Atom ::= Double ; -- float literal -``` - - -===Concrete syntax=== - -Linearization terms (``Term``) are built as follows. -Constructor names are shown to make the later code -examples readable. -``` - R. Term ::= "[" [Term] "]" ; -- array - P. Term ::= "(" Term "!" Term ")" ; -- access to indexed field - S. Term ::= "(" [Term] ")" ; -- sequence with ++ - K. Term ::= Tokn ; -- token - V. Term ::= "$" Integer ; -- argument - C. Term ::= Integer ; -- array index - FV. Term ::= "[|" [Term] "|]" ; -- free variation - TM. Term ::= "?" ; -- linearization of metavariable -``` -Tokens are strings or (maybe obsolescent) prefix-dependent -variant lists. -``` - KS. Tokn ::= String ; - KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ; - Var. Variant ::= [String] "/" [String] ; -``` -Three special forms of terms are introduced by the compiler -as optimizations. They can in principle be eliminated, but -their presence makes grammars much more compact. Their semantics -will be explained in a later section. -``` - F. Term ::= CId ; -- global constant - W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table - RP. Term ::= "(" Term "@" Term ")"; -- record parameter alias -``` -Identifiers are like ``Ident`` in GF and GFC, except that -the compiler produces constants prefixed with ``_`` in -the common subterm elimination optimization. -``` - token CId (('_' | letter) (letter | digit | '\'' | '_')*) ; -``` - - -==The semantics of concrete syntax terms== - -===Linearization and realization=== - -The linearization algorithm is essentially the same as in -GFC: a tree is linearized by evaluating its linearization term -in the environment of the linearizations of the subtrees. -Literal atoms are linearized in the obvious way. -The function also needs to know the language (i.e. concrete syntax) -in which linearization is performed. -``` - linExp :: GFCC -> CId -> Exp -> Term - linExp mcfg lang tree@(Tr at trees) = case at of - AC fun -> comp (Prelude.map lin trees) $ look fun - AS s -> R [kks (show s)] -- quoted - AI i -> R [kks (show i)] - AF d -> R [kks (show d)] - AM -> TM - where - lin = linExp mcfg lang - comp = compute mcfg lang - look = lookLin mcfg lang -``` -The result of linearization is usually a record, which is realized as -a string using the following algorithm. -``` - realize :: Term -> String - realize trm = case trm of - R (t:_) -> realize t - S ss -> unwords $ Prelude.map realize ss - K (KS s) -> s - K (KP s _) -> unwords s ---- prefix choice TODO - W s t -> s ++ realize t - FV (t:_) -> realize t - TM -> "?" -``` -Since the order of record fields is not necessarily -the same as in GF source, -this realization does not work securely for -categories whose lincats more than one field. - - -===Term evaluation=== - -Evaluation follows call-by-value order, with two environments -needed: -- the grammar (a concrete syntax) to give the global constants -- an array of terms to give the subtree linearizations - - -The code is presented in one-level pattern matching, to -enable reimplementations in languages that do not permit -deep patterns (such as Java and C++). -``` -compute :: GFCC -> CId -> [Term] -> Term -> Term -compute mcfg lang args = comp where - comp trm = case trm of - P r p -> proj (comp r) (comp p) - RP i t -> RP (comp i) (comp t) - W s t -> W s (comp t) - R ts -> R $ Prelude.map comp ts - V i -> idx args (fromInteger i) -- already computed - F c -> comp $ look c -- not computed (if contains V) - FV ts -> FV $ Prelude.map comp ts - S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts - _ -> trm - - look = lookLin mcfg lang - - idx xs i = xs !! i - - proj r p = case (r,p) of - (_, FV ts) -> FV $ Prelude.map (proj r) ts - (W s t, _) -> kks (s ++ getString (proj t p)) - _ -> comp $ getField r (getIndex p) - - getString t = case t of - K (KS s) -> s - _ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR" - - getIndex t = case t of - C i -> fromInteger i - RP p _ -> getIndex p - TM -> 0 -- default value for parameter - _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0 - - getField t i = case t of - R rs -> idx rs i - RP _ r -> getField r i - TM -> TM - _ -> trace ("ERROR in grammar compiler: field from " ++ show t) t -``` - -===The special term constructors=== - -The three forms introduced by the compiler may a need special -explanation. - -Global constants -``` - Term ::= CId ; -``` -are shorthands for complex terms. They are produced by the -compiler by (iterated) common subexpression elimination. -They are often more powerful than hand-devised code sharing in the source -code. They could be computed off-line by replacing each identifier by -its definition. - -Prefix-suffix tables -``` - Term ::= "(" String "+" Term ")" ; -``` -represent tables of word forms divided to the longest common prefix -and its array of suffixes. In the example grammar above, we have -``` - Sleep = [("sleep" + ["s",""])] -``` -which in fact is equal to the array of full forms -``` - ["sleeps", "sleep"] -``` -The power of this construction comes from the fact that suffix sets -tend to be repeated in a language, and can therefore be collected -by common subexpression elimination. It is this technique that -explains the used syntax rather than the more accurate -``` - "(" String "+" [String] ")" -``` -since we want the suffix part to be a ``Term`` for the optimization to -take effect. - -The most curious construct of GFCC is the parameter array alias, -``` - Term ::= "(" Term "@" Term ")"; -``` -This form is used as the value of parameter records, such as the type -``` - {n : Number ; p : Person} -``` -The problem with parameter records is their double role. -They can be used like parameter values, as indices in selection, -``` - VP.s ! {n = Sg ; p = P3} -``` -but also as records, from which parameters can be projected: -``` - {n = Sg ; p = P3}.n -``` -Whichever use is selected as primary, a prohibitively complex -case expression must be generated at compilation to GFCC to get the -other use. The adopted -solution is to generate a pair containing both a parameter value index -and an array of indices of record fields. For instance, if we have -``` - param Number = Sg | Pl ; Person = P1 | P2 | P3 ; -``` -we get the encoding -``` - {n = Sg ; p = P3} ---> (2 @ [0,2]) -``` -The GFCC computation rules are essentially -``` - (t ! (i @ _)) = (t ! i) - ((_ @ r) ! j) =(r ! j) -``` - - -==Compiling to GFCC== - -Compilation to GFCC is performed by the GF grammar compiler, and -GFCC interpreters need not know what it does. For grammar writers, -however, it might be interesting to know what happens to the grammars -in the process. - -The compilation phases are the following -+ translate GF source to GFC, as always in GF -+ undo GFC back-end optimizations -+ perform the ``values`` optimization to normalize tables -+ create a symbol table mapping the GFC parameter and record types to - fixed-size arrays, and parameter values and record labels to integers -+ traverse the linearization rules replacing parameters and labels by integers -+ reorganize the created GFC grammar so that it has just one abstract syntax - and one concrete syntax per language -+ apply UTF8 encoding to the grammar, if not yet applied (this is told by the - ``coding`` flag) -+ translate the GFC syntax tree to a GFCC syntax tree, using a simple - compositional mapping -+ perform the word-suffix optimization on GFCC linearization terms -+ perform subexpression elimination on each concrete syntax module -+ print out the GFCC code - - -Notice that a major part of the compilation is done within GFC, so that -GFC-related tasks (such as parser generation) could be performed by -using the old algorithms. - - -===Problems in GFCC compilation=== - -Two major problems had to be solved in compiling GFC to GFCC: -- consistent order of tables and records, to permit the array translation -- run-time variables in complex parameter values. - - -The current implementation is still experimental and may fail -to generate correct code. Any errors remaining are likely to be -related to the two problems just mentioned. - -The order problem is solved in different ways for tables and records. -For tables, the ``values`` optimization of GFC already manages to -maintain a canonical order. But this order can be destroyed by the -``share`` optimization. To make sure that GFCC compilation works properly, -it is safest to recompile the GF grammar by using the ``values`` -optimization flag. - -Records can be canonically ordered by sorting them by labels. -In fact, this was done in connection of the GFCC work as a part -of the GFC generation, to guarantee consistency. This means that -e.g. the ``s`` field will in general no longer appear as the first -field, even if it does so in the GF source code. But relying on the -order of fields in a labelled record would be misplaced anyway. - -The canonical form of records is further complicated by lock fields, -i.e. dummy fields of form ``lock_C = <>``, which are added to grammar -libraries to force intensionality of linearization types. The problem -is that the absence of a lock field only generates a warning, not -an error. Therefore a GFC grammar can contain objects of the same -type with and without a lock field. This problem was solved in GFCC -generation by just removing all lock fields (defined as fields whose -type is the empty record type). This has the further advantage of -(slightly) reducing the grammar size. More importantly, it is safe -to remove lock fields, because they are never used in computation, -and because intensional types are only needed in grammars reused -as libraries, not in grammars used at runtime. - -While the order problem is rather bureaucratic in nature, run-time -variables are an interesting problem. They arise in the presence -of complex parameter values, created by argument-taking constructors -and parameter records. To give an example, consider the GF parameter -type system -``` - Number = Sg | Pl ; - Person = P1 | P2 | P3 ; - Agr = Ag Number Person ; -``` -The values can be translated to integers in the expected way, -``` - Sg = 0, Pl = 1 - P1 = 0, P2 = 1, P3 = 2 - Ag Sg P1 = 0, Ag Sg P2 = 1, Ag Sg P3 = 2, - Ag Pl P1 = 3, Ag Pl P2 = 4, Ag Pl P3 = 5 -``` -However, an argument of ``Agr`` can be a run-time variable, as in -``` - Ag np.n P3 -``` -This expression must first be translated to a case expression, -``` - case np.n of { - 0 => 2 ; - 1 => 5 - } -``` -which can then be translated to the GFCC term -``` - ([2,5] ! ($0 ! $1)) -``` -assuming that the variable ``np`` is the first argument and that its -``Number`` field is the second in the record. - -This transformation of course has to be performed recursively, since -there can be several run-time variables in a parameter value: -``` - Ag np.n np.p -``` -A similar transformation would be possible to deal with the double -role of parameter records discussed above. Thus the type -``` - RNP = {n : Number ; p : Person} -``` -could be uniformly translated into the set ``{0,1,2,3,4,5}`` -as ``Agr`` above. Selections would be simple instances of indexing. -But any projection from the record should be translated into -a case expression, -``` - rnp.n ===> - case rnp of { - 0 => 0 ; - 1 => 0 ; - 2 => 0 ; - 3 => 1 ; - 4 => 1 ; - 5 => 1 - } -``` -To avoid the code bloat resulting from this, we chose the alias representation -which is easy enough to deal with in interpreters. - - -===The representation of linearization types=== - -Linearization types (``lincat``) are not needed when generating with -GFCC, but they have been added to enable parser generation directly from -GFCC. The linearization type definitions are shown as a part of the -concrete syntax, by using terms to represent types. Here is the table -showing how different linearization types are encoded. -``` - P* = size(P) -- parameter type - {_ : I ; __ : R}* = (I* @ R*) -- record of parameters - {r1 : T1 ; ... ; rn : Tn}* = [T1*,...,Tn*] -- other record - (P => T)* = [T* ,...,T*] -- size(P) times - Str* = () -``` -The category symbols are prefixed with two underscores (``__``). -For example, the linearization type ``present/CatEng.NP`` is -translated as follows: -``` - NP = { - a : { -- 6 = 2*3 values - n : {ParamX.Number} ; -- 2 values - p : {ParamX.Person} -- 3 values - } ; - s : {ResEng.Case} => Str -- 3 values - } - - __NP = [(6@[2,3]),[(),(),()]] -``` - - - - -===Running the compiler and the GFCC interpreter=== - -GFCC generation is a part of the -[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html] -of GF since September 2006. To invoke the compiler, the flag -``-printer=gfcc`` to the command -``pm = print_multi`` is used. It is wise to recompile the grammar from -source, since previously compiled libraries may not obey the canonical -order of records. To ``strip`` the grammar before -GFCC translation removes unnecessary interface references. -Here is an example, performed in -[example/bronzeage ../../../../../examples/bronzeage]. -``` - i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageEng.gf - i -src -path=.:prelude:resource-1.0/* -optimize=all_subs BronzeageGer.gf - strip - pm -printer=gfcc | wf bronze.gfcc -``` - - - -==The reference interpreter== - -The reference interpreter written in Haskell consists of the following files: -``` - -- source file for BNFC - GFCC.cf -- labelled BNF grammar of gfcc - - -- files generated by BNFC - AbsGFCC.hs -- abstrac syntax of gfcc - ErrM.hs -- error monad used internally - LexGFCC.hs -- lexer of gfcc files - ParGFCC.hs -- parser of gfcc files and syntax trees - PrintGFCC.hs -- printer of gfcc files and syntax trees - - -- hand-written files - DataGFCC.hs -- post-parser grammar creation, linearization and evaluation - GenGFCC.hs -- random and exhaustive generation, generate-and-test parsing - RunGFCC.hs -- main function - a simple command interpreter -``` -It is included in the -[developers' version http://www.cs.chalmers.se/Cs/Research/Language-technology/darcs/GF/doc/darcs.html] -of GF, in the subdirectory [``GF/src/GF/Canon/GFCC`` ../]. - -To compile the interpreter, type -``` - make gfcc -``` -in ``GF/src``. To run it, type -``` - ./gfcc <GFCC-file> -``` -The available commands are -- ``gr <Cat> <Int>``: generate a number of random trees in category. - and show their linearizations in all languages -- ``grt <Cat> <Int>``: generate a number of random trees in category. - and show the trees and their linearizations in all languages -- ``gt <Cat> <Int>``: generate a number of trees in category from smallest, - and show their linearizations in all languages -- ``gtt <Cat> <Int>``: generate a number of trees in category from smallest, - and show the trees and their linearizations in all languages -- ``p <Int> <Cat> <String>``: "parse", i.e. generate trees until match or - until the given number have been generated -- ``<Tree>``: linearize tree in all languages, also showing full records -- ``quit``: terminate the system cleanly - - -==Interpreter in C++== - -A base-line interpreter in C++ has been started. -Its main functionality is random generation of trees and linearization of them. - -Here are some results from running the different interpreters, compared -to running the same grammar in GF, saved in ``.gfcm`` format. -The grammar contains the English, German, and Norwegian -versions of Bronzeage. The experiment was carried out on -Ubuntu Linux laptop with 1.5 GHz Intel centrino processor. - -|| | GF | gfcc(hs) | gfcc++ | -| program size | 7249k | 803k | 113k -| grammar size | 336k | 119k | 119k -| read grammar | 1150ms | 510ms | 100ms -| generate 222 | 9500ms | 450ms | 800ms -| memory | 21M | 10M | 20M - - - -To summarize: -- going from GF to gfcc is a major win in both code size and efficiency -- going from Haskell to C++ interpreter is not a win yet, because of a space - leak in the C++ version - - - -==Some things to do== - -Interpreter in Java. - -Parsing via MCFG -- the FCFG format can possibly be simplified -- parser grammars should be saved in files to make interpreters easier - - -Hand-written parsers for GFCC grammars to reduce code size -(and efficiency?) of interpreters. - -Binary format and/or file compression of GFCC output. - -Syntax editor based on GFCC. - -Rewriting of resource libraries in order to exploit the -word-suffix sharing better (depth-one tables, as in FM). - - - diff --git a/src/PGF/doc/syntax.txt b/src/PGF/doc/syntax.txt deleted file mode 100644 index db8f7c149..000000000 --- a/src/PGF/doc/syntax.txt +++ /dev/null @@ -1,180 +0,0 @@ -GFCC Syntax - - -==Syntax of GFCC files== - -The parser syntax is very simple, as defined in BNF: -``` - Grm. Grammar ::= [RExp] ; - - App. RExp ::= "(" CId [RExp] ")" ; - AId. RExp ::= CId ; - AInt. RExp ::= Integer ; - AStr. RExp ::= String ; - AFlt. RExp ::= Double ; - AMet. RExp ::= "?" ; - - terminator RExp "" ; - - token CId (('_' | letter) (letter | digit | '\'' | '_')*) ; -``` -While a parser and a printer can be generated for many languages -from this grammar by using the BNF Converter, a parser is also -easy to write by hand using recursive descent. - - -==Syntax of well-formed GFCC code== - -Here is a summary of well-formed syntax, -with a comment on the semantics of each construction. -``` - Grammar ::= - ("grammar" CId CId*) -- abstract syntax name and concrete syntax names - "(" "flags" Flag* ")" -- global and abstract flags - "(" "abstract" Abstract ")" -- abstract syntax - "(" "concrete" Concrete* ")" -- concrete syntaxes - - Abstract ::= - "(" "fun" FunDef* ")" -- function definitions - "(" "cat" CatDef* ")" -- category definitions - - Concrete ::= - "(" CId -- language name - "flags" Flag* -- concrete flags - "lin" LinDef* -- linearization rules - "oper" LinDef* -- operations (macros) - "lincat" LinDef* -- linearization type definitions - "lindef" LinDef* -- linearization default definitions - "printname" LinDef* -- printname definitions - "param" LinDef* -- lincats with labels and parameter value names - ")" - - Flag ::= "(" CId String ")" -- flag and value - FunDef ::= "(" CId Type Exp ")" -- function, type, and definition - CatDef ::= "(" CId Hypo* ")" -- category and context - LinDef ::= "(" CId Term ")" -- function and definition - - Type ::= - "(" CId -- value category - "(" "H" Hypo* ")" -- argument context - "(" "X" Exp* ")" ")" -- arguments (of dependent value type) - - Exp ::= - "(" CId -- function - "(" "B" CId* ")" -- bindings - "(" "X" Exp* ")" ")" -- arguments - | CId -- variable - | "?" -- metavariable - | "(" "Eq" Equation* ")" -- group of pattern equations - | Integer -- integer literal (non-negative) - | Float -- floating-point literal (non-negative) - | String -- string literal (in double quotes) - - Hypo ::= "(" CId Type ")" -- variable and type - - Equation ::= "(" "E" Exp Exp* ")" -- value and pattern list - - Term ::= - "(" "R" Term* ")" -- array (record or table) - | "(" "S" Term* ")" -- concatenated sequence - | "(" "FV" Term* ")" -- free variant list - | "(" "P" Term Term ")" -- access to index (projection or selection) - | "(" "W" String Term ")" -- token prefix with suffix list - | "(" "A" Integer ")" -- pointer to subtree - | String -- token (in double quotes) - | Integer -- index in array - | CId -- macro constant - | "?" -- metavariable -``` - - -==GFCC interpreter== - -The first phase in interpreting GFCC is to parse a GFCC file and -build an internal abstract syntax representation, as specified -in the previous section. - -With this representation, linearization can be performed by -a straightforward function from expressions (``Exp``) to terms -(``Term``). All expressions except groups of pattern equations -can be linearized. - -Here is a reference Haskell implementation of linearization: -``` - linExp :: GFCC -> CId -> Exp -> Term - linExp gfcc lang tree@(DTr _ at trees) = case at of - AC fun -> comp (map lin trees) $ look fun - AS s -> R [K (show s)] -- quoted - AI i -> R [K (show i)] - AF d -> R [K (show d)] - AM -> TM - where - lin = linExp gfcc lang - comp = compute gfcc lang - look = lookLin gfcc lang -``` -TODO: bindings must be supported. - -Terms resulting from linearization are evaluated in -call-by-value order, with two environments needed: -- the grammar (a concrete syntax) to give the global constants -- an array of terms to give the subtree linearizations - - -The Haskell implementation works as follows: -``` -compute :: GFCC -> CId -> [Term] -> Term -> Term -compute gfcc lang args = comp where - comp trm = case trm of - P r p -> proj (comp r) (comp p) - W s t -> W s (comp t) - R ts -> R $ map comp ts - V i -> idx args (fromInteger i) -- already computed - F c -> comp $ look c -- not computed (if contains V) - FV ts -> FV $ Prelude.map comp ts - S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts - _ -> trm - - look = lookOper gfcc lang - - idx xs i = xs !! i - - proj r p = case (r,p) of - (_, FV ts) -> FV $ Prelude.map (proj r) ts - (FV ts, _ ) -> FV $ Prelude.map (\t -> proj t p) ts - (W s t, _) -> kks (s ++ getString (proj t p)) - _ -> comp $ getField r (getIndex p) - - getString t = case t of - K (KS s) -> s - _ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR" - - getIndex t = case t of - C i -> fromInteger i - RP p _ -> getIndex p - TM -> 0 -- default value for parameter - _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0 - - getField t i = case t of - R rs -> idx rs i - RP _ r -> getField r i - TM -> TM - _ -> trace ("ERROR in grammar compiler: field from " ++ show t) t -``` -The result of linearization is usually a record, which is realized as -a string using the following algorithm. -``` - realize :: Term -> String - realize trm = case trm of - R (t:_) -> realize t - S ss -> unwords $ map realize ss - K s -> s - W s t -> s ++ realize t - FV (t:_) -> realize t -- TODO: all variants - TM -> "?" -``` -Notice that realization always picks the first field of a record. -If a linearization type has more than one field, the first field -does not necessarily contain the desired string. -Also notice that the order of record fields in GFCC is not necessarily -the same as in GF source. |
