diff options
| author | krasimir <krasimir@chalmers.se> | 2009-12-13 18:50:29 +0000 |
|---|---|---|
| committer | krasimir <krasimir@chalmers.se> | 2009-12-13 18:50:29 +0000 |
| commit | f85232947e74ee7ef8c7b0ad2338212e7e68f1be (patch) | |
| tree | 667b886a5e3a4b026a63d4e3597f32497d824761 /src/runtime/haskell/PGF | |
| parent | d88a865faff59c98fc91556ff8700b10ee5f2df8 (diff) | |
reorganize the directories under src, and rescue the JavaScript interpreter from deprecated
Diffstat (limited to 'src/runtime/haskell/PGF')
22 files changed, 3793 insertions, 0 deletions
diff --git a/src/runtime/haskell/PGF/Binary.hs b/src/runtime/haskell/PGF/Binary.hs new file mode 100644 index 000000000..e4ed98424 --- /dev/null +++ b/src/runtime/haskell/PGF/Binary.hs @@ -0,0 +1,199 @@ +module PGF.Binary where
+
+import PGF.CId
+import PGF.Data
+import Data.Binary
+import Data.Binary.Put
+import Data.Binary.Get
+import qualified Data.ByteString as BS
+import qualified Data.Map as Map
+import qualified Data.IntMap as IntMap
+import qualified Data.Set as Set
+import Control.Monad
+
+pgfMajorVersion, pgfMinorVersion :: Word16
+(pgfMajorVersion, pgfMinorVersion) = (1,0)
+
+instance Binary PGF where
+ put pgf = putWord16be pgfMajorVersion >>
+ putWord16be pgfMinorVersion >>
+ put ( absname pgf, cncnames pgf
+ , gflags pgf
+ , abstract pgf, concretes pgf
+ )
+ get = do v1 <- getWord16be
+ v2 <- getWord16be
+ absname <- get
+ cncnames <- get
+ gflags <- get
+ abstract <- get
+ concretes <- get
+ return (PGF{ absname=absname, cncnames=cncnames
+ , gflags=gflags
+ , abstract=abstract, concretes=concretes
+ })
+
+instance Binary CId where
+ put (CId bs) = put bs
+ get = liftM CId get
+
+instance Binary Abstr where
+ put abs = put (aflags abs, funs abs, cats abs)
+ get = do aflags <- get
+ funs <- get
+ cats <- get
+ let catfuns = Map.mapWithKey (\cat _ -> [f | (f, (DTyp _ c _,_,_)) <- Map.toList funs, c==cat]) cats
+ return (Abstr{ aflags=aflags
+ , funs=funs, cats=cats
+ , catfuns=catfuns
+ })
+
+instance Binary Concr where
+ put cnc = put ( cflags cnc, lins cnc, opers cnc
+ , lincats cnc, lindefs cnc
+ , printnames cnc, paramlincats cnc
+ , parser cnc
+ )
+ get = do cflags <- get
+ lins <- get
+ opers <- get
+ lincats <- get
+ lindefs <- get
+ printnames <- get
+ paramlincats <- get
+ parser <- get
+ return (Concr{ cflags=cflags, lins=lins, opers=opers
+ , lincats=lincats, lindefs=lindefs
+ , printnames=printnames
+ , paramlincats=paramlincats
+ , parser=parser
+ })
+
+instance Binary Alternative where
+ put (Alt v x) = put v >> put x
+ get = liftM2 Alt get get
+
+instance Binary Term where
+ put (R es) = putWord8 0 >> put es
+ put (S es) = putWord8 1 >> put es
+ put (FV es) = putWord8 2 >> put es
+ put (P e v) = putWord8 3 >> put (e,v)
+ put (W e v) = putWord8 4 >> put (e,v)
+ put (C i ) = putWord8 5 >> put i
+ put (TM i ) = putWord8 6 >> put i
+ put (F f) = putWord8 7 >> put f
+ put (V i) = putWord8 8 >> put i
+ put (K (KS s)) = putWord8 9 >> put s
+ put (K (KP d vs)) = putWord8 10 >> put (d,vs)
+
+ get = do tag <- getWord8
+ case tag of
+ 0 -> liftM R get
+ 1 -> liftM S get
+ 2 -> liftM FV get
+ 3 -> liftM2 P get get
+ 4 -> liftM2 W get get
+ 5 -> liftM C get
+ 6 -> liftM TM get
+ 7 -> liftM F get
+ 8 -> liftM V get
+ 9 -> liftM (K . KS) get
+ 10 -> liftM2 (\d vs -> K (KP d vs)) get get
+ _ -> decodingError
+
+instance Binary Expr where
+ put (EAbs b x exp) = putWord8 0 >> put (b,x,exp)
+ put (EApp e1 e2) = putWord8 1 >> put (e1,e2)
+ put (ELit (LStr s)) = putWord8 2 >> put s
+ put (ELit (LFlt d)) = putWord8 3 >> put d
+ put (ELit (LInt i)) = putWord8 4 >> put i
+ put (EMeta i) = putWord8 5 >> put i
+ put (EFun f) = putWord8 6 >> put f
+ put (EVar i) = putWord8 7 >> put i
+ put (ETyped e ty) = putWord8 8 >> put (e,ty)
+ get = do tag <- getWord8
+ case tag of
+ 0 -> liftM3 EAbs get get get
+ 1 -> liftM2 EApp get get
+ 2 -> liftM (ELit . LStr) get
+ 3 -> liftM (ELit . LFlt) get
+ 4 -> liftM (ELit . LInt) get
+ 5 -> liftM EMeta get
+ 6 -> liftM EFun get
+ 7 -> liftM EVar get
+ 8 -> liftM2 ETyped get get
+ _ -> decodingError
+
+instance Binary Patt where
+ put (PApp f ps) = putWord8 0 >> put (f,ps)
+ put (PVar x) = putWord8 1 >> put x
+ put PWild = putWord8 2
+ put (PLit (LStr s)) = putWord8 3 >> put s
+ put (PLit (LFlt d)) = putWord8 4 >> put d
+ put (PLit (LInt i)) = putWord8 5 >> put i
+ get = do tag <- getWord8
+ case tag of
+ 0 -> liftM2 PApp get get
+ 1 -> liftM PVar get
+ 2 -> return PWild
+ 3 -> liftM (PLit . LStr) get
+ 4 -> liftM (PLit . LFlt) get
+ 5 -> liftM (PLit . LInt) get
+ _ -> decodingError
+
+instance Binary Equation where
+ put (Equ ps e) = put (ps,e)
+ get = liftM2 Equ get get
+
+instance Binary Type where
+ put (DTyp hypos cat exps) = put (hypos,cat,exps)
+ get = liftM3 DTyp get get get
+
+instance Binary BindType where
+ put Explicit = putWord8 0
+ put Implicit = putWord8 1
+ get = do tag <- getWord8
+ case tag of
+ 0 -> return Explicit
+ 1 -> return Implicit
+ _ -> decodingError
+
+instance Binary FFun where
+ put (FFun fun prof lins) = put (fun,prof,lins)
+ get = liftM3 FFun get get get
+
+instance Binary FSymbol where
+ put (FSymCat n l) = putWord8 0 >> put (n,l)
+ put (FSymLit n l) = putWord8 1 >> put (n,l)
+ put (FSymKS ts) = putWord8 2 >> put ts
+ put (FSymKP d vs) = putWord8 3 >> put (d,vs)
+ get = do tag <- getWord8
+ case tag of
+ 0 -> liftM2 FSymCat get get
+ 1 -> liftM2 FSymLit get get
+ 2 -> liftM FSymKS get
+ 3 -> liftM2 (\d vs -> FSymKP d vs) get get
+ _ -> decodingError
+
+instance Binary Production where
+ put (FApply ruleid args) = putWord8 0 >> put (ruleid,args)
+ put (FCoerce fcat) = putWord8 1 >> put fcat
+ get = do tag <- getWord8
+ case tag of
+ 0 -> liftM2 FApply get get
+ 1 -> liftM FCoerce get
+ _ -> decodingError
+
+instance Binary ParserInfo where
+ put p = put (functions p, sequences p, productions0 p, totalCats p, startCats p)
+ get = do functions <- get
+ sequences <- get
+ productions0<- get
+ totalCats <- get
+ startCats <- get
+ return (ParserInfo{functions=functions,sequences=sequences
+ ,productions0=productions0
+ ,productions =filterProductions productions0
+ ,totalCats=totalCats,startCats=startCats})
+
+decodingError = fail "This PGF file was compiled with different version of GF"
diff --git a/src/runtime/haskell/PGF/BuildParser.hs b/src/runtime/haskell/PGF/BuildParser.hs new file mode 100644 index 000000000..23e0725c6 --- /dev/null +++ b/src/runtime/haskell/PGF/BuildParser.hs @@ -0,0 +1,76 @@ +--------------------------------------------------------------------- +-- | +-- Maintainer : Krasimir Angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- FCFG parsing, parser information +----------------------------------------------------------------------------- + +module PGF.BuildParser where + +import GF.Data.SortedList +import GF.Data.Assoc +import PGF.CId +import PGF.Data +import PGF.Parsing.FCFG.Utilities + +import Data.Array.IArray +import Data.Maybe +import qualified Data.IntMap as IntMap +import qualified Data.Map as Map +import qualified Data.Set as Set +import Debug.Trace + + +data ParserInfoEx + = ParserInfoEx { epsilonRules :: [(FunId,[FCat],FCat)] + , leftcornerCats :: Assoc FCat [(FunId,[FCat],FCat)] + , leftcornerTokens :: Assoc String [(FunId,[FCat],FCat)] + , grammarToks :: [String] + } + +------------------------------------------------------------ +-- parser information + +getLeftCornerTok pinfo (FFun _ _ lins) + | inRange (bounds syms) 0 = case syms ! 0 of + FSymKS [tok] -> [tok] + _ -> [] + | otherwise = [] + where + syms = (sequences pinfo) ! (lins ! 0) + +getLeftCornerCat pinfo args (FFun _ _ lins) + | inRange (bounds syms) 0 = case syms ! 0 of + FSymCat d _ -> let cat = args !! d + in case IntMap.lookup cat (productions pinfo) of + Just set -> cat : [cat' | FCoerce cat' <- Set.toList set] + Nothing -> [cat] + _ -> [] + | otherwise = [] + where + syms = (sequences pinfo) ! (lins ! 0) + +buildParserInfo :: ParserInfo -> ParserInfoEx +buildParserInfo pinfo = + ParserInfoEx { epsilonRules = epsilonrules + , leftcornerCats = leftcorncats + , leftcornerTokens = leftcorntoks + , grammarToks = grammartoks + } + + where epsilonrules = [ (ruleid,args,cat) + | (cat,set) <- IntMap.toList (productions pinfo) + , (FApply ruleid args) <- Set.toList set + , let (FFun _ _ lins) = (functions pinfo) ! ruleid + , not (inRange (bounds ((sequences pinfo) ! (lins ! 0))) 0) ] + leftcorncats = accumAssoc id [ (cat', (ruleid, args, cat)) + | (cat,set) <- IntMap.toList (productions pinfo) + , (FApply ruleid args) <- Set.toList set + , cat' <- getLeftCornerCat pinfo args ((functions pinfo) ! ruleid) ] + leftcorntoks = accumAssoc id [ (tok, (ruleid, args, cat)) + | (cat,set) <- IntMap.toList (productions pinfo) + , (FApply ruleid args) <- Set.toList set + , tok <- getLeftCornerTok pinfo ((functions pinfo) ! ruleid) ] + grammartoks = nubsort [t | lin <- elems (sequences pinfo), FSymKS [t] <- elems lin] diff --git a/src/runtime/haskell/PGF/CId.hs b/src/runtime/haskell/PGF/CId.hs new file mode 100644 index 000000000..fea304d9d --- /dev/null +++ b/src/runtime/haskell/PGF/CId.hs @@ -0,0 +1,55 @@ +module PGF.CId (CId(..), + mkCId, wildCId, + readCId, showCId, + + -- utils + pCId, pIdent, ppCId) where + +import Control.Monad +import qualified Data.ByteString.Char8 as BS +import Data.Char +import qualified Text.ParserCombinators.ReadP as RP +import qualified Text.PrettyPrint as PP + + +-- | An abstract data type that represents +-- identifiers for functions and categories in PGF. +newtype CId = CId BS.ByteString deriving (Eq,Ord) + +wildCId :: CId +wildCId = CId (BS.singleton '_') + +-- | Creates a new identifier from 'String' +mkCId :: String -> CId +mkCId s = CId (BS.pack s) + +-- | Reads an identifier from 'String'. The function returns 'Nothing' if the string is not valid identifier. +readCId :: String -> Maybe CId +readCId s = case [x | (x,cs) <- RP.readP_to_S pCId s, all isSpace cs] of + [x] -> Just x + _ -> Nothing + +-- | Renders the identifier as 'String' +showCId :: CId -> String +showCId (CId x) = BS.unpack x + +instance Show CId where + showsPrec _ = showString . showCId + +instance Read CId where + readsPrec _ = RP.readP_to_S pCId + +pCId :: RP.ReadP CId +pCId = do s <- pIdent + if s == "_" + then RP.pfail + else return (mkCId s) + +pIdent :: RP.ReadP String +pIdent = liftM2 (:) (RP.satisfy isIdentFirst) (RP.munch isIdentRest) + where + isIdentFirst c = c == '_' || isLetter c + isIdentRest c = c == '_' || c == '\'' || isAlphaNum c + +ppCId :: CId -> PP.Doc +ppCId = PP.text . showCId diff --git a/src/runtime/haskell/PGF/Check.hs b/src/runtime/haskell/PGF/Check.hs new file mode 100644 index 000000000..58b66cfe4 --- /dev/null +++ b/src/runtime/haskell/PGF/Check.hs @@ -0,0 +1,173 @@ +module PGF.Check (checkPGF) where + +import PGF.CId +import PGF.Data +import PGF.Macros +import GF.Data.ErrM + +import qualified Data.Map as Map +import Control.Monad +import Debug.Trace + +checkPGF :: PGF -> Err (PGF,Bool) +checkPGF pgf = do + (cs,bs) <- mapM (checkConcrete pgf) + (Map.assocs (concretes pgf)) >>= return . unzip + return (pgf {concretes = Map.fromAscList cs}, and bs) + + +-- errors are non-fatal; replace with 'fail' to change this +msg s = trace s (return ()) + +andMapM :: Monad m => (a -> m Bool) -> [a] -> m Bool +andMapM f xs = mapM f xs >>= return . and + +labelBoolErr :: String -> Err (x,Bool) -> Err (x,Bool) +labelBoolErr ms iob = do + (x,b) <- iob + if b then return (x,b) else (msg ms >> return (x,b)) + + +checkConcrete :: PGF -> (CId,Concr) -> Err ((CId,Concr),Bool) +checkConcrete pgf (lang,cnc) = + labelBoolErr ("happened in language " ++ showCId lang) $ do + (rs,bs) <- mapM checkl (Map.assocs (lins cnc)) >>= return . unzip + return ((lang,cnc{lins = Map.fromAscList rs}),and bs) + where + checkl = checkLin pgf lang + +checkLin :: PGF -> CId -> (CId,Term) -> Err ((CId,Term),Bool) +checkLin pgf lang (f,t) = + labelBoolErr ("happened in function " ++ showCId f) $ do + (t',b) <- checkTerm (lintype pgf lang f) t --- $ inline pgf lang t + return ((f,t'),b) + +inferTerm :: [CType] -> Term -> Err (Term,CType) +inferTerm args trm = case trm of + K _ -> returnt str + C i -> returnt $ ints i + V i -> do + testErr (i < length args) ("too large index " ++ show i) + returnt $ args !! i + S ts -> do + (ts',tys) <- mapM infer ts >>= return . unzip + let tys' = filter (/=str) tys + testErr (null tys') + ("expected Str in " ++ show trm ++ " not " ++ unwords (map show tys')) + return (S ts',str) + R ts -> do + (ts',tys) <- mapM infer ts >>= return . unzip + return $ (R ts',tuple tys) + P t u -> do + (t',tt) <- infer t + (u',tu) <- infer u + case tt of + R tys -> case tu of + R vs -> infer $ foldl P t' [P u' (C i) | i <- [0 .. length vs - 1]] + --- R [v] -> infer $ P t v + --- R (v:vs) -> infer $ P (head tys) (R vs) + + C i -> do + testErr (i < length tys) + ("required more than " ++ show i ++ " fields in " ++ show (R tys)) + return (P t' u', tys !! i) -- record: index must be known + _ -> do + let typ = head tys + testErr (all (==typ) tys) ("different types in table " ++ show trm) + return (P t' u', typ) -- table: types must be same + _ -> Bad $ "projection from " ++ show t ++ " : " ++ show tt + FV [] -> returnt tm0 ---- + FV (t:ts) -> do + (t',ty) <- infer t + (ts',tys) <- mapM infer ts >>= return . unzip + testErr (all (eqType True ty) tys) ("different types in variants " ++ show trm) + return (FV (t':ts'),ty) + W s r -> infer r + _ -> Bad ("no type inference for " ++ show trm) + where + returnt ty = return (trm,ty) + infer = inferTerm args + +checkTerm :: LinType -> Term -> Err (Term,Bool) +checkTerm (args,val) trm = case inferTerm args trm of + Ok (t,ty) -> if eqType False ty val + then return (t,True) + else do + msg ("term: " ++ show trm ++ + "\nexpected type: " ++ show val ++ + "\ninferred type: " ++ show ty) + return (t,False) + Bad s -> do + msg s + return (trm,False) + +-- symmetry in (Ints m == Ints n) is all we can use in variants + +eqType :: Bool -> CType -> CType -> Bool +eqType symm inf exp = case (inf,exp) of + (C k, C n) -> if symm then True else k <= n -- only run-time corr. + (R rs,R ts) -> length rs == length ts && and [eqType symm r t | (r,t) <- zip rs ts] + (TM _, _) -> True ---- for variants [] ; not safe + _ -> inf == exp + +-- should be in a generic module, but not in the run-time DataGFCC + +type CType = Term +type LinType = ([CType],CType) + +tuple :: [CType] -> CType +tuple = R + +ints :: Int -> CType +ints = C + +str :: CType +str = S [] + +lintype :: PGF -> CId -> CId -> LinType +lintype pgf lang fun = case typeSkeleton (lookType pgf fun) of + (cs,c) -> (map vlinc cs, linc c) ---- HOAS + where + linc = lookLincat pgf lang + vlinc (0,c) = linc c + vlinc (i,c) = case linc c of + R ts -> R (ts ++ replicate i str) + +inline :: PGF -> CId -> Term -> Term +inline pgf lang t = case t of + F c -> inl $ look c + _ -> composSafeOp inl t + where + inl = inline pgf lang + look = lookLin pgf lang + +composOp :: Monad m => (Term -> m Term) -> Term -> m Term +composOp f trm = case trm of + R ts -> liftM R $ mapM f ts + S ts -> liftM S $ mapM f ts + FV ts -> liftM FV $ mapM f ts + P t u -> liftM2 P (f t) (f u) + W s t -> liftM (W s) $ f t + _ -> return trm + +composSafeOp :: (Term -> Term) -> Term -> Term +composSafeOp f = maybe undefined id . composOp (return . f) + +-- from GF.Data.Oper + +maybeErr :: String -> Maybe a -> Err a +maybeErr s = maybe (Bad s) Ok + +testErr :: Bool -> String -> Err () +testErr cond msg = if cond then return () else Bad msg + +errVal :: a -> Err a -> a +errVal a = err (const a) id + +errIn :: String -> Err a -> Err a +errIn msg = err (\s -> Bad (s ++ "\nOCCURRED IN\n" ++ msg)) return + +err :: (String -> b) -> (a -> b) -> Err a -> b +err d f e = case e of + Ok a -> f a + Bad s -> d s diff --git a/src/runtime/haskell/PGF/Data.hs b/src/runtime/haskell/PGF/Data.hs new file mode 100644 index 000000000..38027e96e --- /dev/null +++ b/src/runtime/haskell/PGF/Data.hs @@ -0,0 +1,95 @@ +module PGF.Data (module PGF.Data, module PGF.Expr, module PGF.Type, module PGF.PMCFG) where + +import PGF.CId +import PGF.Expr hiding (Value, Env, Tree) +import PGF.Type +import PGF.PMCFG + +import qualified Data.Map as Map +import qualified Data.Set as Set +import qualified Data.IntMap as IntMap +import Data.List + +-- internal datatypes for PGF + +-- | An abstract data type representing multilingual grammar +-- in Portable Grammar Format. +data PGF = PGF { + absname :: CId , + cncnames :: [CId] , + gflags :: Map.Map CId String, -- value of a global flag + abstract :: Abstr , + concretes :: Map.Map CId Concr + } + +data Abstr = Abstr { + aflags :: Map.Map CId String, -- value of a flag + funs :: Map.Map CId (Type,Int,[Equation]), -- type, arrity and definition of function + cats :: Map.Map CId [Hypo], -- context of a cat + catfuns :: Map.Map CId [CId] -- funs to a cat (redundant, for fast lookup) + } + +data Concr = Concr { + cflags :: Map.Map CId String, -- value of a flag + lins :: Map.Map CId Term, -- lin of a fun + opers :: Map.Map CId Term, -- oper generated by subex elim + lincats :: Map.Map CId Term, -- lin type of a cat + lindefs :: Map.Map CId Term, -- lin default of a cat + printnames :: Map.Map CId Term, -- printname of a cat or a fun + paramlincats :: Map.Map CId Term, -- lin type of cat, with printable param names + parser :: Maybe ParserInfo -- parser + } + +data Term = + R [Term] + | P Term Term + | S [Term] + | K Tokn + | V Int + | C Int + | F CId + | FV [Term] + | W String Term + | TM String + deriving (Eq,Ord,Show) + +data Tokn = + KS String + | KP [String] [Alternative] + deriving (Eq,Ord,Show) + + +-- merge two GFCCs; fails is differens absnames; priority to second arg + +unionPGF :: PGF -> PGF -> PGF +unionPGF one two = case absname one of + n | n == wildCId -> two -- extending empty grammar + | n == absname two -> one { -- extending grammar with same abstract + concretes = Map.union (concretes two) (concretes one), + cncnames = union (cncnames one) (cncnames two) + } + _ -> one -- abstracts don't match ---- print error msg + +emptyPGF :: PGF +emptyPGF = PGF { + absname = wildCId, + cncnames = [] , + gflags = Map.empty, + abstract = error "empty grammar, no abstract", + concretes = Map.empty + } + +-- | This is just a 'CId' with the language name. +-- A language name is the identifier that you write in the +-- top concrete or abstract module in GF after the +-- concrete/abstract keyword. Example: +-- +-- > abstract Lang = ... +-- > concrete LangEng of Lang = ... +type Language = CId + +readLanguage :: String -> Maybe Language +readLanguage = readCId + +showLanguage :: Language -> String +showLanguage = showCId diff --git a/src/runtime/haskell/PGF/Editor.hs b/src/runtime/haskell/PGF/Editor.hs new file mode 100644 index 000000000..3f69da170 --- /dev/null +++ b/src/runtime/haskell/PGF/Editor.hs @@ -0,0 +1,241 @@ +module PGF.Editor ( + State, -- datatype -- type-annotated possibly open tree with a focus + Dict, -- datatype -- abstract syntax information optimized for editing + Position, -- datatype -- path from top to focus + new, -- :: Type -> State -- create new State + refine, -- :: Dict -> CId -> State -> State -- refine focus with CId + replace, -- :: Dict -> Tree -> State -> State -- replace focus with Tree + delete, -- :: State -> State -- replace focus with ? + goNextMeta, -- :: State -> State -- move focus to next ? node + goNext, -- :: State -> State -- move to next node + goTop, -- :: State -> State -- move focus to the top (=root) + goPosition, -- :: Position -> State -> State -- move focus to given position + mkPosition, -- :: [Int] -> Position -- list of choices (top = []) + showPosition,-- :: Position -> [Int] -- readable position + focusType, -- :: State -> Type -- get the type of focus + stateTree, -- :: State -> Tree -- get the current tree + isMetaFocus, -- :: State -> Bool -- whether focus is ? + allMetas, -- :: State -> [(Position,Type)] -- all ?s and their positions + prState, -- :: State -> String -- print state, focus marked * + refineMenu, -- :: Dict -> State -> [CId] -- get refinement menu + pgf2dict -- :: PGF -> Dict -- create editing Dict from PGF + ) where + +import PGF.Data +import PGF.CId +import qualified Data.Map as M +import Debug.Trace ---- + +-- API + +new :: Type -> State +new (DTyp _ t _) = etree2state (uETree t) + +refine :: Dict -> CId -> State -> State +refine dict f = replaceInState (mkRefinement dict f) + +replace :: Dict -> Tree -> State -> State +replace dict t = replaceInState (tree2etree dict t) + +delete :: State -> State +delete s = replaceInState (uETree (typ (tree s))) s + +goNextMeta :: State -> State +goNextMeta s = + if isComplete s then s + else let s1 = goNext s in if isMetaFocus s1 + then s1 else goNextMeta s1 + +isComplete :: State -> Bool +isComplete s = isc (tree s) where + isc t = case atom t of + AMeta _ -> False + ACon _ -> all isc (children t) + +goTop :: State -> State +goTop = navigate (const top) + +goPosition :: [Int] -> State -> State +goPosition p s = s{position = p} + +mkPosition :: [Int] -> Position +mkPosition = id + +refineMenu :: Dict -> State -> [CId] +refineMenu dict s = maybe [] (map fst) $ M.lookup (focusBType s) (refines dict) + +focusType :: State -> Type +focusType s = btype2type (focusBType s) + +stateTree :: State -> Tree +stateTree = etree2tree . tree + +pgf2dict :: PGF -> Dict +pgf2dict pgf = Dict (M.fromAscList fus) refs where + fus = [(f,mkFType ty) | (f,(ty,_)) <- M.toList (funs abs)] + refs = M.fromAscList [(c, fusTo c) | (c,_) <- M.toList (cats abs)] + fusTo c = [(f,ty) | (f,ty@(_,k)) <- fus, k==c] ---- quadratic + mkFType (DTyp hyps c _) = ([k | Hyp _ (DTyp _ k _) <- hyps],c) ----dep types + abs = abstract pgf + +etree2tree :: ETree -> Tree +etree2tree t = case atom t of + ACon f -> Fun f (map etree2tree (children t)) + AMeta i -> Meta i + +tree2etree :: Dict -> Tree -> ETree +tree2etree dict t = case t of + Fun f _ -> annot (look f) t + where + annot (tys,ty) tr = case tr of + Fun f trs -> ETree (ACon f) ty [annt t tr | (t,tr) <- zip tys trs] + Meta i -> ETree (AMeta i) ty [] + annt ty tr = case tr of + Fun _ _ -> tree2etree dict tr + Meta _ -> annot ([],ty) tr + look f = maybe undefined id $ M.lookup f (functs dict) + +prState :: State -> String +prState s = unlines [replicate i ' ' ++ f | (i,f) <- pr [] (tree s)] where + pr i t = + (ind i,prAtom i (atom t)) : concat [pr (sub j i) c | (j,c) <- zip [0..] (children t)] + prAtom i a = prFocus i ++ case a of + ACon f -> prCId f + AMeta i -> "?" ++ show i + prFocus i = if i == position s then "*" else "" + ind i = 2 * length i + sub j i = i ++ [j] + +showPosition :: Position -> [Int] +showPosition = id + +allMetas :: State -> [(Position,Type)] +allMetas s = [(reverse p, btype2type ty) | (p,ty) <- metas [] (tree s)] where + metas p t = + (if isMetaAtom (atom t) then [(p,typ t)] else []) ++ + concat [metas (i:p) u | (i,u) <- zip [0..] (children t)] + +---- Trees and navigation + +data ETree = ETree { + atom :: Atom, + typ :: BType, + children :: [ETree] + } + deriving Show + +data Atom = + ACon CId + | AMeta Int + deriving Show + +btype2type :: BType -> Type +btype2type t = DTyp [] t [] + +uETree :: BType -> ETree +uETree ty = ETree (AMeta 0) ty [] + +data State = State { + position :: Position, + tree :: ETree + } + deriving Show + +type Position = [Int] + +top :: Position +top = [] + +up :: Position -> Position +up p = case p of + _:_ -> init p + _ -> p + +down :: Position -> Position +down = (++[0]) + +left :: Position -> Position +left p = case p of + _:_ | last p > 0 -> init p ++ [last p - 1] + _ -> top + +right :: Position -> Position +right p = case p of + _:_ -> init p ++ [last p + 1] + _ -> top + +etree2state :: ETree -> State +etree2state = State top + +doInState :: (ETree -> ETree) -> State -> State +doInState f s = s{tree = change (position s) (tree s)} where + change p t = case p of + [] -> f t + n:ns -> let (ts1,t0:ts2) = splitAt n (children t) in + t{children = ts1 ++ [change ns t0] ++ ts2} + +subtree :: Position -> ETree -> ETree +subtree p t = case p of + [] -> t + n:ns -> subtree ns (children t !! n) + +focus :: State -> ETree +focus s = subtree (position s) (tree s) + +focusBType :: State -> BType +focusBType s = typ (focus s) + +navigate :: (Position -> Position) -> State -> State +navigate p s = s{position = p (position s)} + +-- p is a fix-point aspect of state change +untilFix :: Eq a => (State -> a) -> (State -> Bool) -> (State -> State) -> State -> State +untilFix p b f s = + if b s + then s + else let fs = f s in if p fs == p s + then s + else untilFix p b f fs + +untilPosition :: (State -> Bool) -> (State -> State) -> State -> State +untilPosition = untilFix position + +goNext :: State -> State +goNext s = case focus s of + st | not (null (children st)) -> navigate down s + _ -> findSister s + where + findSister s = case s of + s' | null (position s') -> s' + s' | hasYoungerSisters s' -> navigate right s' + s' -> findSister (navigate up s') + hasYoungerSisters s = case position s of + p@(_:_) -> length (children (focus (navigate up s))) > last p + 1 + _ -> False + +isMetaFocus :: State -> Bool +isMetaFocus s = isMetaAtom (atom (focus s)) + +isMetaAtom :: Atom -> Bool +isMetaAtom a = case a of + AMeta _ -> True + _ -> False + +replaceInState :: ETree -> State -> State +replaceInState t = doInState (const t) + + +------- + +type BType = CId ----dep types +type FType = ([BType],BType) ----dep types + +data Dict = Dict { + functs :: M.Map CId FType, + refines :: M.Map BType [(CId,FType)] + } + +mkRefinement :: Dict -> CId -> ETree +mkRefinement dict f = ETree (ACon f) val (map uETree args) where + (args,val) = maybe undefined id $ M.lookup f (functs dict) + diff --git a/src/runtime/haskell/PGF/Expr.hs b/src/runtime/haskell/PGF/Expr.hs new file mode 100644 index 000000000..cf0cb79aa --- /dev/null +++ b/src/runtime/haskell/PGF/Expr.hs @@ -0,0 +1,355 @@ +module PGF.Expr(Tree, BindType(..), Expr(..), Literal(..), Patt(..), Equation(..),
+ readExpr, showExpr, pExpr, pBinds, ppExpr, ppPatt,
+
+ mkApp, unApp,
+ mkStr, unStr,
+ mkInt, unInt,
+ mkDouble, unDouble,
+ mkMeta, isMeta,
+
+ normalForm,
+
+ -- needed in the typechecker
+ Value(..), Env, Funs, eval, apply,
+
+ MetaId,
+
+ -- helpers
+ pMeta,pStr,pArg,pLit,freshName,ppMeta,ppLit,ppParens
+ ) where
+
+import PGF.CId
+import PGF.Type
+
+import Data.Char
+import Data.Maybe
+import Data.List as List
+import Data.Map as Map hiding (showTree)
+import Control.Monad
+import qualified Text.PrettyPrint as PP
+import qualified Text.ParserCombinators.ReadP as RP
+
+data Literal =
+ LStr String -- ^ string constant
+ | LInt Integer -- ^ integer constant
+ | LFlt Double -- ^ floating point constant
+ deriving (Eq,Ord,Show)
+
+type MetaId = Int
+
+data BindType =
+ Explicit
+ | Implicit
+ deriving (Eq,Ord,Show)
+
+-- | Tree is the abstract syntax representation of a given sentence
+-- in some concrete syntax. Technically 'Tree' is a type synonym
+-- of 'Expr'.
+type Tree = Expr
+
+-- | An expression in the abstract syntax of the grammar. It could be
+-- both parameter of a dependent type or an abstract syntax tree for
+-- for some sentence.
+data Expr =
+ EAbs BindType CId Expr -- ^ lambda abstraction
+ | EApp Expr Expr -- ^ application
+ | ELit Literal -- ^ literal
+ | EMeta {-# UNPACK #-} !MetaId -- ^ meta variable
+ | EFun CId -- ^ function or data constructor
+ | EVar {-# UNPACK #-} !Int -- ^ variable with de Bruijn index
+ | ETyped Expr Type -- ^ local type signature
+ | EImplArg Expr -- ^ implicit argument in expression
+ deriving (Eq,Ord,Show)
+
+-- | The pattern is used to define equations in the abstract syntax of the grammar.
+data Patt =
+ PApp CId [Patt] -- ^ application. The identifier should be constructor i.e. defined with 'data'
+ | PLit Literal -- ^ literal
+ | PVar CId -- ^ variable
+ | PWild -- ^ wildcard
+ | PImplArg Patt -- ^ implicit argument in pattern
+ deriving (Eq,Ord)
+
+-- | The equation is used to define lambda function as a sequence
+-- of equations with pattern matching. The list of 'Expr' represents
+-- the patterns and the second 'Expr' is the function body for this
+-- equation.
+data Equation =
+ Equ [Patt] Expr
+ deriving (Eq,Ord)
+
+-- | parses 'String' as an expression
+readExpr :: String -> Maybe Expr
+readExpr s = case [x | (x,cs) <- RP.readP_to_S pExpr s, all isSpace cs] of
+ [x] -> Just x
+ _ -> Nothing
+
+-- | renders expression as 'String'. The list
+-- of identifiers is the list of all free variables
+-- in the expression in order reverse to the order
+-- of binding.
+showExpr :: [CId] -> Expr -> String
+showExpr vars = PP.render . ppExpr 0 vars
+
+instance Read Expr where
+ readsPrec _ = RP.readP_to_S pExpr
+
+-- | Constructs an expression by applying a function to a list of expressions
+mkApp :: CId -> [Expr] -> Expr
+mkApp f es = foldl EApp (EFun f) es
+
+-- | Decomposes an expression into application of function
+unApp :: Expr -> Maybe (CId,[Expr])
+unApp = extract []
+ where
+ extract es (EFun f) = Just (f,es)
+ extract es (EApp e1 e2) = extract (e2:es) e1
+ extract es _ = Nothing
+
+-- | Constructs an expression from string literal
+mkStr :: String -> Expr
+mkStr s = ELit (LStr s)
+
+-- | Decomposes an expression into string literal
+unStr :: Expr -> Maybe String
+unStr (ELit (LStr s)) = Just s
+unStr _ = Nothing
+
+-- | Constructs an expression from integer literal
+mkInt :: Integer -> Expr
+mkInt i = ELit (LInt i)
+
+-- | Decomposes an expression into integer literal
+unInt :: Expr -> Maybe Integer
+unInt (ELit (LInt i)) = Just i
+unInt _ = Nothing
+
+-- | Constructs an expression from real number literal
+mkDouble :: Double -> Expr
+mkDouble f = ELit (LFlt f)
+
+-- | Decomposes an expression into real number literal
+unDouble :: Expr -> Maybe Double
+unDouble (ELit (LFlt f)) = Just f
+unDouble _ = Nothing
+
+-- | Constructs an expression which is meta variable
+mkMeta :: Expr
+mkMeta = EMeta 0
+
+-- | Checks whether an expression is a meta variable
+isMeta :: Expr -> Bool
+isMeta (EMeta _) = True
+isMeta _ = False
+
+-----------------------------------------------------
+-- Parsing
+-----------------------------------------------------
+
+pExpr :: RP.ReadP Expr
+pExpr = RP.skipSpaces >> (pAbs RP.<++ pTerm)
+ where
+ pTerm = do f <- pFactor
+ RP.skipSpaces
+ as <- RP.sepBy pArg RP.skipSpaces
+ return (foldl EApp f as)
+
+ pAbs = do xs <- RP.between (RP.char '\\') (RP.skipSpaces >> RP.string "->") pBinds
+ e <- pExpr
+ return (foldr (\(b,x) e -> EAbs b x e) e xs)
+
+pBinds :: RP.ReadP [(BindType,CId)]
+pBinds = do xss <- RP.sepBy1 (RP.skipSpaces >> pBind) (RP.skipSpaces >> RP.char ',')
+ return (concat xss)
+ where
+ pCIdOrWild = pCId `mplus` (RP.char '_' >> return wildCId)
+
+ pBind =
+ do x <- pCIdOrWild
+ return [(Explicit,x)]
+ `mplus`
+ RP.between (RP.char '{')
+ (RP.skipSpaces >> RP.char '}')
+ (RP.sepBy1 (RP.skipSpaces >> pCIdOrWild >>= \id -> return (Implicit,id)) (RP.skipSpaces >> RP.char ','))
+
+pArg = fmap EImplArg (RP.between (RP.char '{') (RP.char '}') pExpr)
+ RP.<++
+ pFactor
+
+pFactor = fmap EFun pCId
+ RP.<++ fmap ELit pLit
+ RP.<++ fmap EMeta pMeta
+ RP.<++ RP.between (RP.char '(') (RP.char ')') pExpr
+ RP.<++ RP.between (RP.char '<') (RP.char '>') pTyped
+
+pTyped = do RP.skipSpaces
+ e <- pExpr
+ RP.skipSpaces
+ RP.char ':'
+ RP.skipSpaces
+ ty <- pType
+ return (ETyped e ty)
+
+pMeta = do RP.char '?'
+ return 0
+
+pLit :: RP.ReadP Literal
+pLit = pNum RP.<++ liftM LStr pStr
+
+pNum = do x <- RP.munch1 isDigit
+ ((RP.char '.' >> RP.munch1 isDigit >>= \y -> return (LFlt (read (x++"."++y))))
+ RP.<++
+ (return (LInt (read x))))
+
+pStr = RP.char '"' >> (RP.manyTill (pEsc RP.<++ RP.get) (RP.char '"'))
+ where
+ pEsc = RP.char '\\' >> RP.get
+
+
+-----------------------------------------------------
+-- Printing
+-----------------------------------------------------
+
+ppExpr :: Int -> [CId] -> Expr -> PP.Doc
+ppExpr d scope (EAbs b x e) = let (bs,xs,e1) = getVars [] [] (EAbs b x e)
+ in ppParens (d > 1) (PP.char '\\' PP.<>
+ PP.hsep (PP.punctuate PP.comma (reverse (List.zipWith ppBind bs xs))) PP.<+>
+ PP.text "->" PP.<+>
+ ppExpr 1 (xs++scope) e1)
+ where
+ getVars bs xs (EAbs b x e) = getVars (b:bs) ((freshName x xs):xs) e
+ getVars bs xs e = (bs,xs,e)
+ppExpr d scope (EApp e1 e2) = ppParens (d > 3) ((ppExpr 3 scope e1) PP.<+> (ppExpr 4 scope e2))
+ppExpr d scope (ELit l) = ppLit l
+ppExpr d scope (EMeta n) = ppMeta n
+ppExpr d scope (EFun f) = ppCId f
+ppExpr d scope (EVar i) = ppCId (scope !! i)
+ppExpr d scope (ETyped e ty)= PP.char '<' PP.<> ppExpr 0 scope e PP.<+> PP.colon PP.<+> ppType 0 scope ty PP.<> PP.char '>'
+ppExpr d scope (EImplArg e) = PP.braces (ppExpr 0 scope e)
+
+ppPatt :: Int -> [CId] -> Patt -> ([CId],PP.Doc)
+ppPatt d scope (PApp f ps) = let (scope',ds) = mapAccumL (ppPatt 2) scope ps
+ in (scope',ppParens (not (List.null ps) && d > 1) (ppCId f PP.<+> PP.hsep ds))
+ppPatt d scope (PLit l) = (scope,ppLit l)
+ppPatt d scope (PVar f) = (f:scope,ppCId f)
+ppPatt d scope PWild = (scope,PP.char '_')
+ppPatt d scope (PImplArg p) = let (scope',d) = ppPatt 0 scope p
+ in (scope',PP.braces d)
+
+ppBind Explicit x = ppCId x
+ppBind Implicit x = PP.braces (ppCId x)
+
+ppLit (LStr s) = PP.text (show s)
+ppLit (LInt n) = PP.integer n
+ppLit (LFlt d) = PP.double d
+
+ppMeta :: MetaId -> PP.Doc
+ppMeta n
+ | n == 0 = PP.char '?'
+ | otherwise = PP.char '?' PP.<> PP.int n
+
+ppParens True = PP.parens
+ppParens False = id
+
+freshName :: CId -> [CId] -> CId
+freshName x xs0 = loop 1 x
+ where
+ xs = wildCId : xs0
+
+ loop i y
+ | elem y xs = loop (i+1) (mkCId (show x++show i))
+ | otherwise = y
+
+
+-----------------------------------------------------
+-- Computation
+-----------------------------------------------------
+
+-- | Compute an expression to normal form
+normalForm :: Funs -> Int -> Env -> Expr -> Expr
+normalForm funs k env e = value2expr k (eval funs env e)
+ where
+ value2expr i (VApp f vs) = foldl EApp (EFun f) (List.map (value2expr i) vs)
+ value2expr i (VGen j vs) = foldl EApp (EVar (i-j-1)) (List.map (value2expr i) vs)
+ value2expr i (VMeta j env vs) = foldl EApp (EMeta j) (List.map (value2expr i) vs)
+ value2expr i (VSusp j env vs k) = value2expr i (k (VGen j vs))
+ value2expr i (VLit l) = ELit l
+ value2expr i (VClosure env (EAbs b x e)) = EAbs b x (value2expr (i+1) (eval funs ((VGen i []):env) e))
+ value2expr i (VImplArg v) = EImplArg (value2expr i v)
+
+data Value
+ = VApp CId [Value]
+ | VLit Literal
+ | VMeta {-# UNPACK #-} !MetaId Env [Value]
+ | VSusp {-# UNPACK #-} !MetaId Env [Value] (Value -> Value)
+ | VGen {-# UNPACK #-} !Int [Value]
+ | VClosure Env Expr
+ | VImplArg Value
+
+type Funs = Map.Map CId (Type,Int,[Equation]) -- type and def of a fun
+type Env = [Value]
+
+eval :: Funs -> Env -> Expr -> Value
+eval funs env (EVar i) = env !! i
+eval funs env (EFun f) = case Map.lookup f funs of
+ Just (_,a,eqs) -> if a == 0
+ then case eqs of
+ Equ [] e : _ -> eval funs [] e
+ _ -> VApp f []
+ else VApp f []
+ Nothing -> error ("unknown function "++showCId f)
+eval funs env (EApp e1 e2) = apply funs env e1 [eval funs env e2]
+eval funs env (EAbs b x e) = VClosure env (EAbs b x e)
+eval funs env (EMeta i) = VMeta i env []
+eval funs env (ELit l) = VLit l
+eval funs env (ETyped e _) = eval funs env e
+eval funs env (EImplArg e) = VImplArg (eval funs env e)
+
+apply :: Funs -> Env -> Expr -> [Value] -> Value
+apply funs env e [] = eval funs env e
+apply funs env (EVar i) vs = applyValue funs (env !! i) vs
+apply funs env (EFun f) vs = case Map.lookup f funs of
+ Just (_,a,eqs) -> if a <= length vs
+ then let (as,vs') = splitAt a vs
+ in match funs f eqs as vs'
+ else VApp f vs
+ Nothing -> error ("unknown function "++showCId f)
+apply funs env (EApp e1 e2) vs = apply funs env e1 (eval funs env e2 : vs)
+apply funs env (EAbs _ x e) (v:vs) = apply funs (v:env) e vs
+apply funs env (EMeta i) vs = VMeta i env vs
+apply funs env (ELit l) vs = error "literal of function type"
+apply funs env (ETyped e _) vs = apply funs env e vs
+apply funs env (EImplArg _) vs = error "implicit argument in function position"
+
+applyValue funs v [] = v
+applyValue funs (VApp f vs0) vs = apply funs [] (EFun f) (vs0++vs)
+applyValue funs (VLit _) vs = error "literal of function type"
+applyValue funs (VMeta i env vs0) vs = VMeta i env (vs0++vs)
+applyValue funs (VGen i vs0) vs = VGen i (vs0++vs)
+applyValue funs (VSusp i env vs0 k) vs = VSusp i env vs0 (\v -> applyValue funs (k v) vs)
+applyValue funs (VClosure env (EAbs b x e)) (v:vs) = apply funs (v:env) e vs
+applyValue funs (VImplArg _) vs = error "implicit argument in function position"
+
+-----------------------------------------------------
+-- Pattern matching
+-----------------------------------------------------
+
+match :: Funs -> CId -> [Equation] -> [Value] -> [Value] -> Value
+match funs f eqs as0 vs0 =
+ case eqs of
+ [] -> VApp f (as0++vs0)
+ (Equ ps res):eqs -> tryMatches eqs ps as0 res []
+ where
+ tryMatches eqs [] [] res env = apply funs env res vs0
+ tryMatches eqs (p:ps) (a:as) res env = tryMatch p a env
+ where
+ tryMatch (PVar x ) (v ) env = tryMatches eqs ps as res (v:env)
+ tryMatch (PWild ) (_ ) env = tryMatches eqs ps as res env
+ tryMatch (p ) (VMeta i envi vs ) env = VSusp i envi vs (\v -> tryMatch p v env)
+ tryMatch (p ) (VGen i vs ) env = VApp f (as0++vs0)
+ tryMatch (p ) (VSusp i envi vs k) env = VSusp i envi vs (\v -> tryMatch p (k v) env)
+ tryMatch (PApp f1 ps1) (VApp f2 vs2 ) env | f1 == f2 = tryMatches eqs (ps1++ps) (vs2++as) res env
+ tryMatch (PLit l1 ) (VLit l2 ) env | l1 == l2 = tryMatches eqs ps as res env
+ tryMatch (PImplArg p ) (VImplArg v ) env = tryMatch p v env
+ tryMatch _ _ env = match funs f eqs as0 vs0
+
diff --git a/src/runtime/haskell/PGF/Expr.hs-boot b/src/runtime/haskell/PGF/Expr.hs-boot new file mode 100644 index 000000000..34a62a410 --- /dev/null +++ b/src/runtime/haskell/PGF/Expr.hs-boot @@ -0,0 +1,28 @@ +module PGF.Expr where
+
+import PGF.CId
+import qualified Text.PrettyPrint as PP
+import qualified Text.ParserCombinators.ReadP as RP
+
+data Expr
+
+instance Eq Expr
+instance Ord Expr
+instance Show Expr
+
+
+data BindType = Explicit | Implicit
+
+instance Eq BindType
+instance Ord BindType
+instance Show BindType
+
+
+pArg :: RP.ReadP Expr
+pBinds :: RP.ReadP [(BindType,CId)]
+
+ppExpr :: Int -> [CId] -> Expr -> PP.Doc
+
+freshName :: CId -> [CId] -> CId
+
+ppParens :: Bool -> PP.Doc -> PP.Doc
diff --git a/src/runtime/haskell/PGF/Generate.hs b/src/runtime/haskell/PGF/Generate.hs new file mode 100644 index 000000000..5add00a78 --- /dev/null +++ b/src/runtime/haskell/PGF/Generate.hs @@ -0,0 +1,66 @@ +module PGF.Generate where + +import PGF.CId +import PGF.Data +import PGF.Macros +import PGF.TypeCheck + +import qualified Data.Map as M +import System.Random + +-- generate an infinite list of trees exhaustively +generate :: PGF -> Type -> Maybe Int -> [Expr] +generate pgf ty@(DTyp _ cat _) dp = filter (\e -> case checkExpr pgf e ty of + Left _ -> False + Right _ -> True ) + (concatMap (\i -> gener i cat) depths) + where + gener 0 c = [EFun f | (f, ([],_)) <- fns c] + gener i c = [ + tr | + (f, (cs,_)) <- fns c, + let alts = map (gener (i-1)) cs, + ts <- combinations alts, + let tr = foldl EApp (EFun f) ts, + depth tr >= i + ] + fns c = [(f,catSkeleton ty) | (f,ty) <- functionsToCat pgf c] + depths = maybe [0 ..] (\d -> [0..d]) dp + +-- generate an infinite list of trees randomly +genRandom :: StdGen -> PGF -> Type -> [Expr] +genRandom gen pgf ty@(DTyp _ cat _) = filter (\e -> case checkExpr pgf e ty of + Left _ -> False + Right _ -> True ) + (genTrees (randomRs (0.0, 1.0 :: Double) gen) cat) + where + timeout = 47 -- give up + + genTrees ds0 cat = + let (ds,ds2) = splitAt (timeout+1) ds0 -- for time out, else ds + (t,k) = genTree ds cat + in (if k>timeout then id else (t:)) + (genTrees ds2 cat) -- else (drop k ds) + + genTree rs = gett rs where + gett ds cid | cid == cidString = (ELit (LStr "foo"), 1) + gett ds cid | cid == cidInt = (ELit (LInt 12345), 1) + gett ds cid | cid == cidFloat = (ELit (LFlt 12345), 1) + gett [] _ = (ELit (LStr "TIMEOUT"), 1) ---- + gett ds cat = case fns cat of + [] -> (EMeta 0,1) + fs -> let + d:ds2 = ds + (f,args) = getf d fs + (ts,k) = getts ds2 args + in (foldl EApp (EFun f) ts, k+1) + getf d fs = let lg = (length fs) in + fs !! (floor (d * fromIntegral lg)) + getts ds cats = case cats of + c:cs -> let + (t, k) = gett ds c + (ts,ks) = getts (drop k ds) cs + in (t:ts, k + ks) + _ -> ([],0) + + fns cat = [(f,(fst (catSkeleton ty))) | (f,ty) <- functionsToCat pgf cat] diff --git a/src/runtime/haskell/PGF/Linearize.hs b/src/runtime/haskell/PGF/Linearize.hs new file mode 100644 index 000000000..fdd4cecb5 --- /dev/null +++ b/src/runtime/haskell/PGF/Linearize.hs @@ -0,0 +1,166 @@ +{-# LANGUAGE ParallelListComp #-} +module PGF.Linearize + (linearizes,realize,realizes,linTree, linTreeMark,linearizesMark) where + +import PGF.CId +import PGF.Data +import PGF.Macros +import PGF.Tree + +import Control.Monad +import qualified Data.Map as Map +import Data.List + +import Debug.Trace + +-- linearization and computation of concrete PGF Terms + +linearizes :: PGF -> CId -> Expr -> [String] +linearizes pgf lang = realizes . linTree pgf lang + +realize :: Term -> String +realize = concat . take 1 . realizes + +realizes :: Term -> [String] +realizes = map (unwords . untokn) . realizest + +realizest :: Term -> [[Tokn]] +realizest trm = case trm of + R ts -> realizest (ts !! 0) + S ss -> map concat $ combinations $ map realizest ss + K t -> [[t]] + W s t -> [[KS (s ++ r)] | [KS r] <- realizest t] + FV ts -> concatMap realizest ts + TM s -> [[KS s]] + _ -> [[KS $ "REALIZE_ERROR " ++ show trm]] ---- debug + +untokn :: [Tokn] -> [String] +untokn ts = case ts of + KP d _ : [] -> d + KP d vs : ws -> let ss@(s:_) = untokn ws in sel d vs s ++ ss + KS s : ws -> s : untokn ws + [] -> [] + where + sel d vs w = case [v | Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of + v:_ -> v + _ -> d + +-- Lifts all variants to the top level (except those in macros). +liftVariants :: Term -> [Term] +liftVariants = f + where + f (R ts) = liftM R $ mapM f ts + f (P t1 t2) = liftM2 P (f t1) (f t2) + f (S ts) = liftM S $ mapM f ts + f (FV ts) = ts >>= f + f (W s t) = liftM (W s) $ f t + f t = return t + +linTree :: PGF -> CId -> Expr -> Term +linTree pgf lang e = lin (expr2tree e) Nothing + where + cnc = lookMap (error "no lang") lang (concretes pgf) + + lin (Abs xs e ) mty = case lin e Nothing of + R ts -> R $ ts ++ (Data.List.map (kks . showCId . snd) xs) + TM s -> R $ (TM s) : (Data.List.map (kks . showCId . snd) xs) + lin (Fun fun es) mty = case Map.lookup fun (funs (abstract pgf)) of + Just (DTyp hyps _ _,_,_) -> let argVariants = sequence [liftVariants (lin e (Just ty)) | e <- es | (_,_,ty) <- hyps] + in variants [compute pgf lang args $ lookMap tm0 fun (lins cnc) | args <- argVariants] + Nothing -> tm0 + lin (Lit (LStr s)) mty = R [kks (show s)] -- quoted + lin (Lit (LInt i)) mty = R [kks (show i)] + lin (Lit (LFlt d)) mty = R [kks (show d)] + lin (Var x) mty = case mty of + Just (DTyp _ cat _) -> compute pgf lang [K (KS (showCId x))] (lookMap tm0 cat (lindefs cnc)) + Nothing -> TM (showCId x) + lin (Meta i) mty = case mty of + Just (DTyp _ cat _) -> compute pgf lang [K (KS (show i))] (lookMap tm0 cat (lindefs cnc)) + Nothing -> TM (show i) + +variants :: [Term] -> Term +variants ts = case ts of + [t] -> t + _ -> FV ts + +unvariants :: Term -> [Term] +unvariants t = case t of + FV ts -> ts + _ -> [t] + +compute :: PGF -> CId -> [Term] -> Term -> Term +compute pgf lang args = comp where + comp trm = case trm of + P r p -> proj (comp r) (comp p) + W s t -> W s (comp t) + R ts -> R $ map comp ts + V i -> idx args i -- already computed + F c -> comp $ look c -- not computed (if contains argvar) + FV ts -> FV $ map comp ts + S ts -> S $ filter (/= S []) $ map comp ts + _ -> trm + + look = lookOper pgf lang + + idx xs i = if i > length xs - 1 + then trace + ("too large " ++ show i ++ " for\n" ++ unlines (map show xs) ++ "\n") tm0 + else xs !! i + + proj r p = case (r,p) of + (_, FV ts) -> FV $ map (proj r) ts + (FV ts, _ ) -> FV $ map (\t -> proj t p) ts + (W s t, _) -> kks (s ++ getString (proj t p)) + _ -> comp $ getField r (getIndex p) + + getString t = case t of + K (KS s) -> s + _ -> error ("ERROR in grammar compiler: string from "++ show t) "ERR" + + getIndex t = case t of + C i -> i + TM _ -> 0 -- default value for parameter + _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 666 + + getField t i = case t of + R rs -> idx rs i + TM s -> TM s + _ -> error ("ERROR in grammar compiler: field from " ++ show t) t + +--------- +-- markup with tree positions + +linearizesMark :: PGF -> CId -> Expr -> [String] +linearizesMark pgf lang = realizes . linTreeMark pgf lang + +linTreeMark :: PGF -> CId -> Expr -> Term +linTreeMark pgf lang = lin [] . expr2tree + where + lin p (Abs xs e ) = case lin p e of + R ts -> R $ ts ++ (Data.List.map (kks . showCId . snd) xs) + TM s -> R $ (TM s) : (Data.List.map (kks . showCId . snd) xs) + lin p (Fun fun es) = + let argVariants = + mapM (\ (i,e) -> liftVariants $ lin (sub p i) e) (zip [0..] es) + in variants [mark (fun,p) $ compute pgf lang args $ look fun | + args <- argVariants] + lin p (Lit (LStr s)) = mark p $ R [kks (show s)] -- quoted + lin p (Lit (LInt i)) = mark p $ R [kks (show i)] + lin p (Lit (LFlt d)) = mark p $ R [kks (show d)] + lin p (Var x) = mark p $ TM (showCId x) + lin p (Meta i) = mark p $ TM (show i) + + look = lookLin pgf lang + + mark :: Show a => a -> Term -> Term + mark p t = case t of + R ts -> R $ map (mark p) ts + FV ts -> R $ map (mark p) ts + S ts -> S $ bracket p ts + K s -> S $ bracket p [t] + W s (R ts) -> R [mark p $ kks (s ++ u) | K (KS u) <- ts] + _ -> t + -- otherwise in normal form + + bracket p ts = [kks ("("++show p)] ++ ts ++ [kks ")"] + sub p i = p ++ [i] diff --git a/src/runtime/haskell/PGF/Macros.hs b/src/runtime/haskell/PGF/Macros.hs new file mode 100644 index 000000000..af25de025 --- /dev/null +++ b/src/runtime/haskell/PGF/Macros.hs @@ -0,0 +1,154 @@ +module PGF.Macros where + +import PGF.CId +import PGF.Data +import Control.Monad +import qualified Data.Map as Map +import qualified Data.Array as Array +import Data.Maybe +import Data.List + +-- operations for manipulating PGF grammars and objects + +mapConcretes :: (Concr -> Concr) -> PGF -> PGF +mapConcretes f pgf = pgf { concretes = Map.map f (concretes pgf) } + +lookLin :: PGF -> CId -> CId -> Term +lookLin pgf lang fun = + lookMap tm0 fun $ lins $ lookMap (error "no lang") lang $ concretes pgf + +lookOper :: PGF -> CId -> CId -> Term +lookOper pgf lang fun = + lookMap tm0 fun $ opers $ lookMap (error "no lang") lang $ concretes pgf + +lookLincat :: PGF -> CId -> CId -> Term +lookLincat pgf lang fun = + lookMap tm0 fun $ lincats $ lookMap (error "no lang") lang $ concretes pgf + +lookParamLincat :: PGF -> CId -> CId -> Term +lookParamLincat pgf lang fun = + lookMap tm0 fun $ paramlincats $ lookMap (error "no lang") lang $ concretes pgf + +lookPrintName :: PGF -> CId -> CId -> Term +lookPrintName pgf lang fun = + lookMap tm0 fun $ printnames $ lookMap (error "no lang") lang $ concretes pgf + +lookType :: PGF -> CId -> Type +lookType pgf f = + case lookMap (error $ "lookType " ++ show f) f (funs (abstract pgf)) of + (ty,_,_) -> ty + +lookDef :: PGF -> CId -> [Equation] +lookDef pgf f = + case lookMap (error $ "lookDef " ++ show f) f (funs (abstract pgf)) of + (_,a,eqs) -> eqs + +isData :: PGF -> CId -> Bool +isData pgf f = + case Map.lookup f (funs (abstract pgf)) of + Just (_,_,[]) -> True -- the encoding of data constrs + _ -> False + +lookValCat :: PGF -> CId -> CId +lookValCat pgf = valCat . lookType pgf + +lookParser :: PGF -> CId -> Maybe ParserInfo +lookParser pgf lang = Map.lookup lang (concretes pgf) >>= parser + +lookStartCat :: PGF -> CId +lookStartCat pgf = mkCId $ fromMaybe "S" $ msum $ Data.List.map (Map.lookup (mkCId "startcat")) + [gflags pgf, aflags (abstract pgf)] + +lookGlobalFlag :: PGF -> CId -> String +lookGlobalFlag pgf f = + lookMap "?" f (gflags pgf) + +lookAbsFlag :: PGF -> CId -> String +lookAbsFlag pgf f = + lookMap "?" f (aflags (abstract pgf)) + +lookConcr :: PGF -> CId -> Concr +lookConcr pgf cnc = + lookMap (error $ "Missing concrete syntax: " ++ showCId cnc) cnc $ concretes pgf + +lookConcrFlag :: PGF -> CId -> CId -> Maybe String +lookConcrFlag pgf lang f = Map.lookup f $ cflags $ lookConcr pgf lang + +functionsToCat :: PGF -> CId -> [(CId,Type)] +functionsToCat pgf cat = + [(f,ty) | f <- fs, Just (ty,_,_) <- [Map.lookup f $ funs $ abstract pgf]] + where + fs = lookMap [] cat $ catfuns $ abstract pgf + +missingLins :: PGF -> CId -> [CId] +missingLins pgf lang = [c | c <- fs, not (hasl c)] where + fs = Map.keys $ funs $ abstract pgf + hasl = hasLin pgf lang + +hasLin :: PGF -> CId -> CId -> Bool +hasLin pgf lang f = Map.member f $ lins $ lookConcr pgf lang + +restrictPGF :: (CId -> Bool) -> PGF -> PGF +restrictPGF cond pgf = pgf { + abstract = abstr { + funs = restrict $ funs $ abstr, + cats = restrict $ cats $ abstr + } + } ---- restrict concrs also, might be needed + where + restrict = Map.filterWithKey (\c _ -> cond c) + abstr = abstract pgf + +depth :: Expr -> Int +depth (EAbs _ _ t) = depth t +depth (EApp e1 e2) = max (depth e1) (depth e2) + 1 +depth _ = 1 + +cftype :: [CId] -> CId -> Type +cftype args val = DTyp [(Explicit,wildCId,cftype [] arg) | arg <- args] val [] + +typeOfHypo :: Hypo -> Type +typeOfHypo (_,_,ty) = ty + +catSkeleton :: Type -> ([CId],CId) +catSkeleton ty = case ty of + DTyp hyps val _ -> ([valCat (typeOfHypo h) | h <- hyps],val) + +typeSkeleton :: Type -> ([(Int,CId)],CId) +typeSkeleton ty = case ty of + DTyp hyps val _ -> ([(contextLength ty, valCat ty) | h <- hyps, let ty = typeOfHypo h],val) + +valCat :: Type -> CId +valCat ty = case ty of + DTyp _ val _ -> val + +contextLength :: Type -> Int +contextLength ty = case ty of + DTyp hyps _ _ -> length hyps + +term0 :: CId -> Term +term0 = TM . showCId + +tm0 :: Term +tm0 = TM "?" + +kks :: String -> Term +kks = K . KS + +-- lookup with default value +lookMap :: (Show i, Ord i) => a -> i -> Map.Map i a -> a +lookMap d c m = Map.findWithDefault d c m + +--- from Operations +combinations :: [[a]] -> [[a]] +combinations t = case t of + [] -> [[]] + aa:uu -> [a:u | a <- aa, u <- combinations uu] + +isLiteralCat :: CId -> Bool +isLiteralCat = (`elem` [cidString, cidFloat, cidInt, cidVar]) + +cidString = mkCId "String" +cidInt = mkCId "Int" +cidFloat = mkCId "Float" +cidVar = mkCId "__gfVar" diff --git a/src/runtime/haskell/PGF/Morphology.hs b/src/runtime/haskell/PGF/Morphology.hs new file mode 100644 index 000000000..9eee71a97 --- /dev/null +++ b/src/runtime/haskell/PGF/Morphology.hs @@ -0,0 +1,26 @@ +module PGF.Morphology(Lemma,Analysis,Morpho, + buildMorpho, + lookupMorpho,fullFormLexicon) where + +import PGF.ShowLinearize (collectWords) +import PGF.Data +import PGF.CId + +import qualified Data.Map as Map +import Data.List (intersperse) + +-- these 4 definitions depend on the datastructure used + +type Lemma = CId +type Analysis = String + +newtype Morpho = Morpho (Map.Map String [(Lemma,Analysis)]) + +buildMorpho :: PGF -> Language -> Morpho +buildMorpho pgf lang = Morpho (Map.fromListWith (++) (collectWords pgf lang)) + +lookupMorpho :: Morpho -> String -> [(Lemma,Analysis)] +lookupMorpho (Morpho mo) s = maybe [] id $ Map.lookup s mo + +fullFormLexicon :: Morpho -> [(String,[(Lemma,Analysis)])] +fullFormLexicon (Morpho mo) = Map.toList mo diff --git a/src/runtime/haskell/PGF/PMCFG.hs b/src/runtime/haskell/PGF/PMCFG.hs new file mode 100644 index 000000000..c657e3d17 --- /dev/null +++ b/src/runtime/haskell/PGF/PMCFG.hs @@ -0,0 +1,119 @@ +module PGF.PMCFG where
+
+import PGF.CId
+import PGF.Expr
+
+import qualified Data.Map as Map
+import qualified Data.Set as Set
+import qualified Data.IntMap as IntMap
+import Data.Array.IArray
+import Data.Array.Unboxed
+import Text.PrettyPrint
+
+type FCat = Int
+type FIndex = Int
+type FPointPos = Int
+data FSymbol
+ = FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
+ | FSymLit {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
+ | FSymKS [String]
+ | FSymKP [String] [Alternative]
+ deriving (Eq,Ord,Show)
+type Profile = [Int]
+data Production
+ = FApply {-# UNPACK #-} !FunId [FCat]
+ | FCoerce {-# UNPACK #-} !FCat
+ | FConst Expr [String]
+ deriving (Eq,Ord,Show)
+data FFun = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
+type FSeq = Array FPointPos FSymbol
+type FunId = Int
+type SeqId = Int
+
+data Alternative =
+ Alt [String] [String]
+ deriving (Eq,Ord,Show)
+
+data ParserInfo
+ = ParserInfo { functions :: Array FunId FFun
+ , sequences :: Array SeqId FSeq
+ , productions0:: IntMap.IntMap (Set.Set Production) -- this are the original productions as they are loaded from the PGF file
+ , productions :: IntMap.IntMap (Set.Set Production) -- this are the productions after the filtering for useless productions
+ , startCats :: Map.Map CId [FCat]
+ , totalCats :: {-# UNPACK #-} !FCat
+ }
+
+
+fcatString, fcatInt, fcatFloat, fcatVar :: Int
+fcatString = (-1)
+fcatInt = (-2)
+fcatFloat = (-3)
+fcatVar = (-4)
+
+isLiteralFCat :: FCat -> Bool
+isLiteralFCat = (`elem` [fcatString, fcatInt, fcatFloat, fcatVar])
+
+ppPMCFG :: ParserInfo -> Doc
+ppPMCFG pinfo =
+ text "productions" $$
+ nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions pinfo), prod <- Set.toList set]) $$
+ text "functions" $$
+ nest 2 (vcat (map ppFun (assocs (functions pinfo)))) $$
+ text "sequences" $$
+ nest 2 (vcat (map ppSeq (assocs (sequences pinfo)))) $$
+ text "startcats" $$
+ nest 2 (vcat (map ppStartCat (Map.toList (startCats pinfo))))
+
+ppProduction (fcat,FApply funid args) =
+ ppFCat fcat <+> text "->" <+> ppFunId funid <> brackets (hcat (punctuate comma (map ppFCat args)))
+ppProduction (fcat,FCoerce arg) =
+ ppFCat fcat <+> text "->" <+> char '_' <> brackets (ppFCat arg)
+ppProduction (fcat,FConst _ ss) =
+ ppFCat fcat <+> text "->" <+> ppStrs ss
+
+ppFun (funid,FFun fun _ arr) =
+ ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun)
+
+ppSeq (seqid,seq) =
+ ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq))
+
+ppStartCat (id,fcats) =
+ ppCId id <+> text ":=" <+> brackets (hcat (punctuate comma (map ppFCat fcats)))
+
+ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
+ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>'
+ppSymbol (FSymKS ts) = ppStrs ts
+ppSymbol (FSymKP ts alts) = text "pre" <+> braces (hsep (punctuate semi (ppStrs ts : map ppAlt alts)))
+
+ppAlt (Alt ts ps) = ppStrs ts <+> char '/' <+> hsep (map (doubleQuotes . text) ps)
+
+ppStrs ss = doubleQuotes (hsep (map text ss))
+
+ppFCat fcat
+ | fcat == fcatString = text "CString"
+ | fcat == fcatInt = text "CInt"
+ | fcat == fcatFloat = text "CFloat"
+ | fcat == fcatVar = text "CVar"
+ | otherwise = char 'C' <> int fcat
+
+ppFunId funid = char 'F' <> int funid
+ppSeqId seqid = char 'S' <> int seqid
+
+
+filterProductions = closure
+ where
+ closure prods0
+ | IntMap.size prods == IntMap.size prods0 = prods
+ | otherwise = closure prods
+ where
+ prods = IntMap.mapMaybe (filterProdSet prods0) prods0
+
+ filterProdSet prods set0
+ | Set.null set = Nothing
+ | otherwise = Just set
+ where
+ set = Set.filter (filterRule prods) set0
+
+ filterRule prods (FApply funid args) = all (\fcat -> isLiteralFCat fcat || IntMap.member fcat prods) args
+ filterRule prods (FCoerce fcat) = isLiteralFCat fcat || IntMap.member fcat prods
+ filterRule prods _ = True
diff --git a/src/runtime/haskell/PGF/Paraphrase.hs b/src/runtime/haskell/PGF/Paraphrase.hs new file mode 100644 index 000000000..58d15b2e8 --- /dev/null +++ b/src/runtime/haskell/PGF/Paraphrase.hs @@ -0,0 +1,112 @@ +---------------------------------------------------------------------- +-- | +-- Module : Paraphrase +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- Generate parapharases with def definitions. +----------------------------------------------------------------------------- + +module PGF.Paraphrase ( + paraphrase, + paraphraseN + ) where + +import PGF.Data +import PGF.Tree +import PGF.Macros (lookDef,isData) +import PGF.CId + +import Data.List (nub,sort,group) +import qualified Data.Map as Map + +import Debug.Trace ---- + +paraphrase :: PGF -> Expr -> [Expr] +paraphrase pgf = nub . paraphraseN 2 pgf + +paraphraseN :: Int -> PGF -> Expr -> [Expr] +paraphraseN i pgf = map tree2expr . paraphraseN' i pgf . expr2tree + +paraphraseN' :: Int -> PGF -> Tree -> [Tree] +paraphraseN' 0 _ t = [t] +paraphraseN' i pgf t = + step i t ++ [Fun g ts' | Fun g ts <- step (i-1) t, ts' <- sequence (map par ts)] + where + par = paraphraseN' (i-1) pgf + step 0 t = [t] + step i t = let stept = step (i-1) t in stept ++ concat [def u | u <- stept] + def = fromDef pgf + +fromDef :: PGF -> Tree -> [Tree] +fromDef pgf t@(Fun f ts) = defDown t ++ defUp t where + defDown t = [subst g u | let equ = equsFrom f, (u,g) <- match equ ts, trequ "U" f equ] + defUp t = [subst g u | equ <- equsTo f, (u,g) <- match [equ] ts, trequ "D" f equ] + + equsFrom f = [(ps,d) | Just equs <- [lookup f equss], (Fun _ ps,d) <- equs] + + equsTo f = [c | (_,equs) <- equss, c <- casesTo f equs] + + casesTo f equs = + [(ps,p) | (p,d@(Fun g ps)) <- equs, g==f, + isClosed d || (length equs == 1 && isLinear d)] + + equss = [(f,[(Fun f (map patt2tree ps), expr2tree d) | (Equ ps d) <- eqs]) | + (f,(_,_,eqs)) <- Map.assocs (funs (abstract pgf)), not (null eqs)] + + trequ s f e = True ----trace (s ++ ": " ++ show f ++ " " ++ show e) True + +subst :: Subst -> Tree -> Tree +subst g e = case e of + Fun f ts -> Fun f (map substg ts) + Var x -> maybe e id $ lookup x g + _ -> e + where + substg = subst g + +type Subst = [(CId,Tree)] + +-- this applies to pattern, hence don't need to consider abstractions +isClosed :: Tree -> Bool +isClosed t = case t of + Fun _ ts -> all isClosed ts + Var _ -> False + _ -> True + +-- this applies to pattern, hence don't need to consider abstractions +isLinear :: Tree -> Bool +isLinear = nodup . vars where + vars t = case t of + Fun _ ts -> concatMap vars ts + Var x -> [x] + _ -> [] + nodup = all ((<2) . length) . group . sort + + +match :: [([Tree],Tree)] -> [Tree] -> [(Tree, Subst)] +match cases terms = case cases of + [] -> [] + (patts,_):_ | length patts /= length terms -> [] + (patts,val):cc -> case mapM tryMatch (zip patts terms) of + Just substs -> return (val, concat substs) + _ -> match cc terms + where + tryMatch (p,t) = case (p, t) of + (Var x, _) | notMeta t -> return [(x,t)] + (Fun p pp, Fun f tt) | p == f && length pp == length tt -> do + matches <- mapM tryMatch (zip pp tt) + return (concat matches) + _ -> if p==t then return [] else Nothing + + notMeta e = case e of + Meta _ -> False + Fun f ts -> all notMeta ts + _ -> True + +-- | Converts a pattern to tree. +patt2tree :: Patt -> Tree +patt2tree (PApp f ps) = Fun f (map patt2tree ps) +patt2tree (PLit l) = Lit l +patt2tree (PVar x) = Var x +patt2tree PWild = Meta 0 diff --git a/src/runtime/haskell/PGF/Parsing/FCFG/Active.hs b/src/runtime/haskell/PGF/Parsing/FCFG/Active.hs new file mode 100644 index 000000000..e88926f6e --- /dev/null +++ b/src/runtime/haskell/PGF/Parsing/FCFG/Active.hs @@ -0,0 +1,205 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : Krasimir Angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- MCFG parsing, the active algorithm +----------------------------------------------------------------------------- + +module PGF.Parsing.FCFG.Active (parse) where + +import GF.Data.Assoc +import GF.Data.SortedList +import GF.Data.Utilities +import qualified GF.Data.MultiMap as MM + +import PGF.CId +import PGF.Data +import PGF.Tree +import PGF.Parsing.FCFG.Utilities +import PGF.BuildParser + +import Control.Monad (guard) + +import qualified Data.List as List +import qualified Data.Map as Map +import qualified Data.IntMap as IntMap +import qualified Data.Set as Set +import Data.Array.IArray +import Debug.Trace + +---------------------------------------------------------------------- +-- * parsing + +type FToken = String + +makeFinalEdge cat 0 0 = (cat, [EmptyRange]) +makeFinalEdge cat i j = (cat, [makeRange i j]) + +-- | the list of categories = possible starting categories +parse :: String -> ParserInfo -> Type -> [FToken] -> [Expr] +parse strategy pinfo (DTyp _ start _) toks = map (tree2expr) . nubsort $ filteredForests >>= forest2trees + where + inTokens = input toks + starts = Map.findWithDefault [] start (startCats pinfo) + schart = xchart2syntaxchart chart pinfo + (i,j) = inputBounds inTokens + finalEdges = [makeFinalEdge cat i j | cat <- starts] + forests = chart2forests schart (const False) finalEdges + filteredForests = forests >>= applyProfileToForest + + pinfoex = buildParserInfo pinfo + + chart = process strategy pinfo pinfoex inTokens axioms emptyXChart + axioms | isBU strategy = literals pinfoex inTokens ++ initialBU pinfo pinfoex inTokens + | isTD strategy = literals pinfoex inTokens ++ initialTD pinfo starts inTokens + +isBU s = s=="b" +isTD s = s=="t" + +-- used in prediction +emptyChildren :: FunId -> [FCat] -> SyntaxNode FunId RangeRec +emptyChildren ruleid args = SNode ruleid (replicate (length args) []) + + +process :: String -> ParserInfo -> ParserInfoEx -> Input FToken -> [Item] -> XChart FCat -> XChart FCat +process strategy pinfo pinfoex toks [] chart = chart +process strategy pinfo pinfoex toks (item:items) chart = process strategy pinfo pinfoex toks items $! univRule item chart + where + univRule item@(Active found rng lbl ppos node@(SNode ruleid recs) args cat) chart + | inRange (bounds lin) ppos = + case lin ! ppos of + FSymCat d r -> let c = args !! d + in case recs !! d of + [] -> case insertXChart chart item c of + Nothing -> chart + Just chart -> let items = do item@(Final found' _ _ _) <- lookupXChartFinal chart c + rng <- concatRange rng (found' !! r) + return (Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs)) args cat) + ++ + do guard (isTD strategy) + (ruleid,args) <- topdownRules pinfo c + return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args c) + in process strategy pinfo pinfoex toks items chart + found' -> let items = do rng <- concatRange rng (found' !! r) + return (Active found rng lbl (ppos+1) node args cat) + in process strategy pinfo pinfoex toks items chart + FSymKS [tok] + -> let items = do t_rng <- inputToken toks ? tok + rng' <- concatRange rng t_rng + return (Active found rng' lbl (ppos+1) node args cat) + in process strategy pinfo pinfoex toks items chart + | otherwise = + if inRange (bounds lins) (lbl+1) + then univRule (Active (rng:found) EmptyRange (lbl+1) 0 node args cat) chart + else univRule (Final (reverse (rng:found)) node args cat) chart + where + (FFun _ _ lins) = functions pinfo ! ruleid + lin = sequences pinfo ! (lins ! lbl) + univRule item@(Final found' node args cat) chart = + case insertXChart chart item cat of + Nothing -> chart + Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _) args c) <- lookupXChartAct chart cat + let FFun _ _ lins = functions pinfo ! ruleid + FSymCat d r = (sequences pinfo ! (lins ! l)) ! ppos + rng <- concatRange rng (found' !! r) + return (Active found rng l (ppos+1) (updateChildren node d found') args c) + ++ + do guard (isBU strategy) + (ruleid,args,c) <- leftcornerCats pinfoex ? cat + let FFun _ _ lins = functions pinfo ! ruleid + FSymCat d r = (sequences pinfo ! (lins ! 0)) ! 0 + return (Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid args) d found') args c) + + updateChildren :: SyntaxNode FunId RangeRec -> Int -> RangeRec -> SyntaxNode FunId RangeRec + updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs + in process strategy pinfo pinfoex toks items chart + +---------------------------------------------------------------------- +-- * XChart + +data Item + = Active RangeRec + Range + {-# UNPACK #-} !FIndex + {-# UNPACK #-} !FPointPos + (SyntaxNode FunId RangeRec) + [FCat] + FCat + | Final RangeRec (SyntaxNode FunId RangeRec) [FCat] FCat + deriving (Eq, Ord, Show) + +data XChart c = XChart !(MM.MultiMap c Item) !(MM.MultiMap c Item) + +emptyXChart :: Ord c => XChart c +emptyXChart = XChart MM.empty MM.empty + +insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _ _) c = + case MM.insert' c item actives of + Nothing -> Nothing + Just actives -> Just (XChart actives finals) + +insertXChart (XChart actives finals) item@(Final _ _ _ _) c = + case MM.insert' c item finals of + Nothing -> Nothing + Just finals -> Just (XChart actives finals) + +lookupXChartAct (XChart actives finals) c = actives MM.! c +lookupXChartFinal (XChart actives finals) c = finals MM.! c + +xchart2syntaxchart :: XChart FCat -> ParserInfo -> SyntaxChart (CId,[Profile]) (FCat,RangeRec) +xchart2syntaxchart (XChart actives finals) pinfo = + accumAssoc groupSyntaxNodes $ + [ case node of + SNode ruleid rrecs -> let FFun fun prof _ = functions pinfo ! ruleid + in ((cat,found), SNode (fun,prof) (zip rhs rrecs)) + SString s -> ((cat,found), SString s) + SInt n -> ((cat,found), SInt n) + SFloat f -> ((cat,found), SFloat f) + | (Final found node rhs cat) <- MM.elems finals + ] + +literals :: ParserInfoEx -> Input FToken -> [Item] +literals pinfoex toks = + [let (c,node) = lexer t in (Final [rng] node [] c) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfoex)] + where + lexer t = + case reads t of + [(n,"")] -> (fcatInt, SInt (n::Integer)) + _ -> case reads t of + [(f,"")] -> (fcatFloat, SFloat (f::Double)) + _ -> (fcatString,SString t) + + +---------------------------------------------------------------------- +-- Earley -- + +-- called with all starting categories +initialTD :: ParserInfo -> [FCat] -> Input FToken -> [Item] +initialTD pinfo starts toks = + do cat <- starts + (ruleid,args) <- topdownRules pinfo cat + return (Active [] (Range 0 0) 0 0 (emptyChildren ruleid args) args cat) + +topdownRules pinfo cat = f cat [] + where + f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo)) + + g (FApply ruleid args) rules = (ruleid,args) : rules + g (FCoerce cat) rules = f cat rules + + +---------------------------------------------------------------------- +-- Kilbury -- + +initialBU :: ParserInfo -> ParserInfoEx -> Input FToken -> [Item] +initialBU pinfo pinfoex toks = + do (tok,rngs) <- aAssocs (inputToken toks) + (ruleid,args,cat) <- leftcornerTokens pinfoex ? tok + rng <- rngs + return (Active [] rng 0 1 (emptyChildren ruleid args) args cat) + ++ + do (ruleid,args,cat) <- epsilonRules pinfoex + let FFun _ _ _ = functions pinfo ! ruleid + return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args cat) diff --git a/src/runtime/haskell/PGF/Parsing/FCFG/Incremental.hs b/src/runtime/haskell/PGF/Parsing/FCFG/Incremental.hs new file mode 100644 index 000000000..296a0d33b --- /dev/null +++ b/src/runtime/haskell/PGF/Parsing/FCFG/Incremental.hs @@ -0,0 +1,371 @@ +{-# LANGUAGE BangPatterns #-}
+module PGF.Parsing.FCFG.Incremental
+ ( ParseState
+ , ErrorState
+ , initState
+ , nextState
+ , getCompletions
+ , recoveryStates
+ , extractTrees
+ , parse
+ , parseWithRecovery
+ ) where
+
+import Data.Array.IArray
+import Data.Array.Base (unsafeAt)
+import Data.List (isPrefixOf, foldl')
+import Data.Maybe (fromMaybe, maybe)
+import qualified Data.Map as Map
+import qualified GF.Data.TrieMap as TMap
+import qualified Data.IntMap as IntMap
+import qualified Data.Set as Set
+import Control.Monad
+
+import GF.Data.SortedList
+import PGF.CId
+import PGF.Data
+import PGF.Expr(Tree)
+import PGF.Macros
+import PGF.TypeCheck
+import Debug.Trace
+
+parse :: PGF -> Language -> Type -> [String] -> [Tree]
+parse pgf lang typ toks = loop (initState pgf lang typ) toks
+ where
+ loop ps [] = extractTrees ps typ
+ loop ps (t:ts) = case nextState ps t of
+ Left es -> []
+ Right ps -> loop ps ts
+
+parseWithRecovery :: PGF -> Language -> Type -> [Type] -> [String] -> [Tree]
+parseWithRecovery pgf lang typ open_typs toks = accept (initState pgf lang typ) toks
+ where
+ accept ps [] = extractTrees ps typ
+ accept ps (t:ts) =
+ case nextState ps t of
+ Right ps -> accept ps ts
+ Left es -> skip (recoveryStates open_typs es) ts
+
+ skip ps_map [] = extractTrees (fst ps_map) typ
+ skip ps_map (t:ts) =
+ case Map.lookup t (snd ps_map) of
+ Just ps -> accept ps ts
+ Nothing -> skip ps_map ts
+
+-- | Creates an initial parsing state for a given language and
+-- startup category.
+initState :: PGF -> Language -> Type -> ParseState
+initState pgf lang (DTyp _ start _) =
+ let items = do
+ cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
+ (funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
+ [] cat (productions pinfo)
+ let FFun fn _ lins = functions pinfo ! funid
+ (lbl,seqid) <- assocs lins
+ return (Active 0 0 funid seqid args (AK cat lbl))
+
+ pinfo =
+ case lookParser pgf lang of
+ Just pinfo -> pinfo
+ _ -> error ("Unknown language: " ++ showCId lang)
+
+ in PState pgf
+ pinfo
+ (Chart emptyAC [] emptyPC (productions pinfo) (totalCats pinfo) 0)
+ (TMap.singleton [] (Set.fromList items))
+
+-- | From the current state and the next token
+-- 'nextState' computes a new state, where the token
+-- is consumed and the current position is shifted by one.
+-- If the new token cannot be accepted then an error state
+-- is returned.
+nextState :: ParseState -> String -> Either ErrorState ParseState
+nextState (PState pgf pinfo chart items) t =
+ let (mb_agenda,map_items) = TMap.decompose items
+ agenda = maybe [] Set.toList mb_agenda
+ acc = fromMaybe TMap.empty (Map.lookup t map_items)
+ (acc1,chart1) = process (Just t) add (sequences pinfo) (functions pinfo) agenda acc chart
+ chart2 = chart1{ active =emptyAC
+ , actives=active chart1 : actives chart1
+ , passive=emptyPC
+ , offset =offset chart1+1
+ }
+ in if TMap.null acc1
+ then Left (EState pgf pinfo chart2)
+ else Right (PState pgf pinfo chart2 acc1)
+ where
+ add (tok:toks) item acc
+ | tok == t = TMap.insertWith Set.union toks (Set.singleton item) acc
+ add _ item acc = acc
+
+-- | If the next token is not known but only its prefix (possible empty prefix)
+-- then the 'getCompletions' function can be used to calculate the possible
+-- next words and the consequent states. This is used for word completions in
+-- the GF interpreter.
+getCompletions :: ParseState -> String -> Map.Map String ParseState
+getCompletions (PState pgf pinfo chart items) w =
+ let (mb_agenda,map_items) = TMap.decompose items
+ agenda = maybe [] Set.toList mb_agenda
+ acc = Map.filterWithKey (\tok _ -> isPrefixOf w tok) map_items
+ (acc',chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda acc chart
+ chart2 = chart1{ active =emptyAC
+ , actives=active chart1 : actives chart1
+ , passive=emptyPC
+ , offset =offset chart1+1
+ }
+ in fmap (PState pgf pinfo chart2) acc'
+ where
+ add (tok:toks) item acc
+ | isPrefixOf w tok = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc
+ add _ item acc = acc
+
+recoveryStates :: [Type] -> ErrorState -> (ParseState, Map.Map String ParseState)
+recoveryStates open_types (EState pgf pinfo chart) =
+ let open_fcats = concatMap type2fcats open_types
+ agenda = foldl (complete open_fcats) [] (actives chart)
+ (acc,chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda Map.empty chart
+ chart2 = chart1{ active =emptyAC
+ , actives=active chart1 : actives chart1
+ , passive=emptyPC
+ , offset =offset chart1+1
+ }
+ in (PState pgf pinfo chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf pinfo chart2) acc)
+ where
+ type2fcats (DTyp _ cat _) = fromMaybe [] (Map.lookup cat (startCats pinfo))
+
+ complete open_fcats items ac =
+ foldl (Set.fold (\(Active j' ppos funid seqid args keyc) ->
+ (:) (Active j' (ppos+1) funid seqid args keyc)))
+ items
+ [set | fcat <- open_fcats, set <- lookupACByFCat fcat ac]
+
+ add (tok:toks) item acc = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc
+
+-- | This function extracts the list of all completed parse trees
+-- that spans the whole input consumed so far. The trees are also
+-- limited by the category specified, which is usually
+-- the same as the startup category.
+extractTrees :: ParseState -> Type -> [Tree]
+extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) =
+ nubsort [e1 | e <- exps, Right e1 <- [checkExpr pgf e ty]]
+ where
+ (mb_agenda,acc) = TMap.decompose items
+ agenda = maybe [] Set.toList mb_agenda
+ (_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) agenda () chart
+
+ exps = do
+ cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
+ (funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
+ [] cat (productions pinfo)
+ let FFun fn _ lins = functions pinfo ! funid
+ lbl <- indices lins
+ Just fid <- [lookupPC (PK cat lbl 0) (passive st)]
+ (fvs,tree) <- go Set.empty 0 (0,fid)
+ guard (Set.null fvs)
+ return tree
+
+ go rec fcat' (d,fcat)
+ | fcat < totalCats pinfo = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
+ | Set.member fcat rec = mzero
+ | otherwise = foldForest (\funid args trees ->
+ do let FFun fn _ lins = functions pinfo ! funid
+ args <- mapM (go (Set.insert fcat rec) fcat) (zip [0..] args)
+ check_ho_fun fn args
+ `mplus`
+ trees)
+ (\const _ trees ->
+ return (freeVar const,const)
+ `mplus`
+ trees)
+ [] fcat (forest st)
+
+ check_ho_fun fun args
+ | fun == _V = return (head args)
+ | fun == _B = return (foldl1 Set.difference (map fst args), foldr (\x e -> EAbs Explicit (mkVar (snd x)) e) (snd (head args)) (tail args))
+ | otherwise = return (Set.unions (map fst args),foldl (\e x -> EApp e (snd x)) (EFun fun) args)
+
+ mkVar (EFun v) = v
+ mkVar (EMeta _) = wildCId
+
+ freeVar (EFun v) = Set.singleton v
+ freeVar _ = Set.empty
+
+_B = mkCId "_B"
+_V = mkCId "_V"
+
+process mbt fn !seqs !funs [] acc chart = (acc,chart)
+process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc chart
+ | inRange (bounds lin) ppos =
+ case unsafeAt lin ppos of
+ FSymCat d r -> let !fid = args !! d
+ key = AK fid r
+
+ items2 = case lookupPC (mkPK key k) (passive chart) of
+ Nothing -> items
+ Just id -> (Active j (ppos+1) funid seqid (updateAt d id args) key0) : items
+ items3 = foldForest (\funid args items -> Active k 0 funid (rhs funid r) args key : items)
+ (\_ _ items -> items)
+ items2 fid (forest chart)
+ in case lookupAC key (active chart) of
+ Nothing -> process mbt fn seqs funs items3 acc chart{active=insertAC key (Set.singleton item) (active chart)}
+ Just set | Set.member item set -> process mbt fn seqs funs items acc chart
+ | otherwise -> process mbt fn seqs funs items2 acc chart{active=insertAC key (Set.insert item set) (active chart)}
+ FSymKS toks -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc
+ in process mbt fn seqs funs items acc' chart
+ FSymKP strs vars
+ -> let !acc' = foldl (\acc toks -> fn toks (Active j (ppos+1) funid seqid args key0) acc) acc
+ (strs:[strs' | Alt strs' _ <- vars])
+ in process mbt fn seqs funs items acc' chart
+ FSymLit d r -> let !fid = args !! d
+ in case [ts | FConst _ ts <- maybe [] Set.toList (IntMap.lookup fid (forest chart))] of
+ (toks:_) -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc
+ in process mbt fn seqs funs items acc' chart
+ [] -> case litCatMatch fid mbt of
+ Just (toks,lit) -> let fid' = nextId chart
+ !acc' = fn toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc
+ in process mbt fn seqs funs items acc' chart{forest=IntMap.insert fid' (Set.singleton (FConst lit toks)) (forest chart)
+ ,nextId=nextId chart+1
+ }
+ Nothing -> process mbt fn seqs funs items acc chart
+ | otherwise =
+ case lookupPC (mkPK key0 j) (passive chart) of
+ Nothing -> let fid = nextId chart
+
+ items2 = case lookupAC key0 ((active chart:actives chart) !! (k-j)) of
+ Nothing -> items
+ Just set -> Set.fold (\(Active j' ppos funid seqid args keyc) ->
+ let FSymCat d _ = unsafeAt (unsafeAt seqs seqid) ppos
+ in (:) (Active j' (ppos+1) funid seqid (updateAt d fid args) keyc)) items set
+ in process mbt fn seqs funs items2 acc chart{passive=insertPC (mkPK key0 j) fid (passive chart)
+ ,forest =IntMap.insert fid (Set.singleton (FApply funid args)) (forest chart)
+ ,nextId =nextId chart+1
+ }
+ Just id -> let items2 = [Active k 0 funid (rhs funid r) args (AK id r) | r <- labelsAC id (active chart)] ++ items
+ in process mbt fn seqs funs items2 acc chart{forest = IntMap.insertWith Set.union id (Set.singleton (FApply funid args)) (forest chart)}
+ where
+ !lin = unsafeAt seqs seqid
+ !k = offset chart
+
+ mkPK (AK fid lbl) j = PK fid lbl j
+
+ rhs funid lbl = unsafeAt lins lbl
+ where
+ FFun _ _ lins = unsafeAt funs funid
+
+
+updateAt :: Int -> a -> [a] -> [a]
+updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
+
+litCatMatch fcat (Just t)
+ | fcat == fcatString = Just ([t],ELit (LStr t))
+ | fcat == fcatInt = case reads t of {[(n,"")] -> Just ([t],ELit (LInt n));
+ _ -> Nothing }
+ | fcat == fcatFloat = case reads t of {[(d,"")] -> Just ([t],ELit (LFlt d));
+ _ -> Nothing }
+ | fcat == fcatVar = Just ([t],EFun (mkCId t))
+litCatMatch _ _ = Nothing
+
+
+----------------------------------------------------------------
+-- Active Chart
+----------------------------------------------------------------
+
+data Active
+ = Active {-# UNPACK #-} !Int
+ {-# UNPACK #-} !FPointPos
+ {-# UNPACK #-} !FunId
+ {-# UNPACK #-} !SeqId
+ [FCat]
+ {-# UNPACK #-} !ActiveKey
+ deriving (Eq,Show,Ord)
+data ActiveKey
+ = AK {-# UNPACK #-} !FCat
+ {-# UNPACK #-} !FIndex
+ deriving (Eq,Ord,Show)
+type ActiveChart = IntMap.IntMap (IntMap.IntMap (Set.Set Active))
+
+emptyAC :: ActiveChart
+emptyAC = IntMap.empty
+
+lookupAC :: ActiveKey -> ActiveChart -> Maybe (Set.Set Active)
+lookupAC (AK fcat l) chart = IntMap.lookup fcat chart >>= IntMap.lookup l
+
+lookupACByFCat :: FCat -> ActiveChart -> [Set.Set Active]
+lookupACByFCat fcat chart =
+ case IntMap.lookup fcat chart of
+ Nothing -> []
+ Just map -> IntMap.elems map
+
+labelsAC :: FCat -> ActiveChart -> [FIndex]
+labelsAC fcat chart =
+ case IntMap.lookup fcat chart of
+ Nothing -> []
+ Just map -> IntMap.keys map
+
+insertAC :: ActiveKey -> Set.Set Active -> ActiveChart -> ActiveChart
+insertAC (AK fcat l) set chart = IntMap.insertWith IntMap.union fcat (IntMap.singleton l set) chart
+
+
+----------------------------------------------------------------
+-- Passive Chart
+----------------------------------------------------------------
+
+data PassiveKey
+ = PK {-# UNPACK #-} !FCat
+ {-# UNPACK #-} !FIndex
+ {-# UNPACK #-} !Int
+ deriving (Eq,Ord,Show)
+
+type PassiveChart = Map.Map PassiveKey FCat
+
+emptyPC :: PassiveChart
+emptyPC = Map.empty
+
+lookupPC :: PassiveKey -> PassiveChart -> Maybe FCat
+lookupPC key chart = Map.lookup key chart
+
+insertPC :: PassiveKey -> FCat -> PassiveChart -> PassiveChart
+insertPC key fcat chart = Map.insert key fcat chart
+
+
+----------------------------------------------------------------
+-- Forest
+----------------------------------------------------------------
+
+foldForest :: (FunId -> [FCat] -> b -> b) -> (Expr -> [String] -> b -> b) -> b -> FCat -> IntMap.IntMap (Set.Set Production) -> b
+foldForest f g b fcat forest =
+ case IntMap.lookup fcat forest of
+ Nothing -> b
+ Just set -> Set.fold foldProd b set
+ where
+ foldProd (FCoerce fcat) b = foldForest f g b fcat forest
+ foldProd (FApply funid args) b = f funid args b
+ foldProd (FConst const toks) b = g const toks b
+
+
+----------------------------------------------------------------
+-- Parse State
+----------------------------------------------------------------
+
+-- | An abstract data type whose values represent
+-- the current state in an incremental parser.
+data ParseState = PState PGF ParserInfo Chart (TMap.TrieMap String (Set.Set Active))
+
+data Chart
+ = Chart
+ { active :: ActiveChart
+ , actives :: [ActiveChart]
+ , passive :: PassiveChart
+ , forest :: IntMap.IntMap (Set.Set Production)
+ , nextId :: {-# UNPACK #-} !FCat
+ , offset :: {-# UNPACK #-} !Int
+ }
+ deriving Show
+
+----------------------------------------------------------------
+-- Error State
+----------------------------------------------------------------
+
+-- | An abstract data type whose values represent
+-- the state in an incremental parser after an error.
+data ErrorState = EState PGF ParserInfo Chart
diff --git a/src/runtime/haskell/PGF/Parsing/FCFG/Utilities.hs b/src/runtime/haskell/PGF/Parsing/FCFG/Utilities.hs new file mode 100644 index 000000000..dc0b2dc4a --- /dev/null +++ b/src/runtime/haskell/PGF/Parsing/FCFG/Utilities.hs @@ -0,0 +1,188 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/13 12:40:19 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.6 $ +-- +-- Basic type declarations and functions for grammar formalisms +----------------------------------------------------------------------------- + + +module PGF.Parsing.FCFG.Utilities where + +import Control.Monad +import Data.Array +import Data.List (groupBy) + +import PGF.CId +import PGF.Data +import PGF.Tree +import GF.Data.Assoc +import GF.Data.Utilities (sameLength, foldMerge, splitBy) + + +------------------------------------------------------------ +-- ranges as single pairs + +type RangeRec = [Range] + +data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int + | EmptyRange + deriving (Eq, Ord, Show) + +makeRange :: Int -> Int -> Range +makeRange = Range + +concatRange :: Range -> Range -> [Range] +concatRange EmptyRange rng = return rng +concatRange rng EmptyRange = return rng +concatRange (Range i j) (Range j' k) = [Range i k | j==j'] + +minRange :: Range -> Int +minRange (Range i j) = i + +maxRange :: Range -> Int +maxRange (Range i j) = j + + +------------------------------------------------------------ +-- * representaions of input tokens + +data Input t = MkInput { inputBounds :: (Int, Int), + inputToken :: Assoc t [Range] + } + +input :: Ord t => [t] -> Input t +input toks = MkInput inBounds inToken + where + inBounds = (0, length toks) + inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,tok) <- zip3 [0..] [1..] toks ] + +inputMany :: Ord t => [[t]] -> Input t +inputMany toks = MkInput inBounds inToken + where + inBounds = (0, length toks) + inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,ts) <- zip3 [0..] [1..] toks, tok <- ts ] + + +------------------------------------------------------------ +-- * representations of syntactical analyses + +-- ** charts as finite maps over edges + +-- | The values of the chart, a list of key-daughters pairs, +-- has unique keys. In essence, it is a map from 'n' to daughters. +-- The daughters should be a set (not necessarily sorted) of rhs's. +type SyntaxChart n e = Assoc e [SyntaxNode n [e]] + +data SyntaxNode n e = SMeta + | SNode n [e] + | SString String + | SInt Integer + | SFloat Double + deriving (Eq,Ord,Show) + +groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]] +groupSyntaxNodes [] = [] +groupSyntaxNodes (SNode n0 es0:xs) = (SNode n0 (es0:ess)) : groupSyntaxNodes xs' + where + (ess,xs') = span xs + + span [] = ([],[]) + span xs@(SNode n es:xs') + | n0 == n = let (ess,xs) = span xs' in (es:ess,xs) + | otherwise = ([],xs) +groupSyntaxNodes (SString s:xs) = (SString s) : groupSyntaxNodes xs +groupSyntaxNodes (SInt n:xs) = (SInt n) : groupSyntaxNodes xs +groupSyntaxNodes (SFloat f:xs) = (SFloat f) : groupSyntaxNodes xs + +-- ** syntax forests + +data SyntaxForest n = FMeta + | FNode n [[SyntaxForest n]] + -- ^ The outer list should be a set (not necessarily sorted) + -- of possible alternatives. Ie. the outer list + -- is a disjunctive node, and the inner lists + -- are (conjunctive) concatenative nodes + | FString String + | FInt Integer + | FFloat Double + deriving (Eq, Ord, Show) + +instance Functor SyntaxForest where + fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests + fmap _ (FString s) = FString s + fmap _ (FInt n) = FInt n + fmap _ (FFloat f) = FFloat f + fmap _ (FMeta) = FMeta + +forestName :: SyntaxForest n -> Maybe n +forestName (FNode n _) = Just n +forestName _ = Nothing + +unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n) +unifyManyForests = foldM unifyForests FMeta + +-- | two forests can be unified, if either is 'FMeta', or both have the same parent, +-- and all children can be unified +unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n) +unifyForests FMeta forest = return forest +unifyForests forest FMeta = return forest +unifyForests (FNode name1 children1) (FNode name2 children2) + | name1 == name2 && not (null children) = return $ FNode name1 children + where children = [ forests | forests1 <- children1, forests2 <- children2, + sameLength forests1 forests2, + forests <- zipWithM unifyForests forests1 forests2 ] +unifyForests (FString s1) (FString s2) + | s1 == s2 = return $ FString s1 +unifyForests (FInt n1) (FInt n2) + | n1 == n2 = return $ FInt n1 +unifyForests (FFloat f1) (FFloat f2) + | f1 == f2 = return $ FFloat f1 +unifyForests _ _ = fail "forest unification failure" + + +-- ** conversions between representations + +chart2forests :: (Ord n, Ord e) => + SyntaxChart n e -- ^ The complete chart + -> (e -> Bool) -- ^ When is an edge 'FMeta'? + -> [e] -- ^ The starting edges + -> [SyntaxForest n] -- ^ The result has unique keys, ie. all 'n' are joined together. + -- In essence, the result is a map from 'n' to forest daughters +chart2forests chart isMeta = concatMap (edge2forests []) + where edge2forests edges edge + | isMeta edge = [FMeta] + | edge `elem` edges = [] + | otherwise = map (item2forest (edge:edges)) $ chart ? edge + item2forest edges (SMeta) = FMeta + item2forest edges (SNode name children) = + FNode name $ children >>= mapM (edge2forests edges) + item2forest edges (SString s) = FString s + item2forest edges (SInt n) = FInt n + item2forest edges (SFloat f) = FFloat f + + +applyProfileToForest :: SyntaxForest (CId,[Profile]) -> [SyntaxForest CId] +applyProfileToForest (FNode (fun,profiles) children) + | fun == wildCId = concat chForests + | otherwise = [ FNode fun chForests | not (null chForests) ] + where chForests = concat [ mapM (unifyManyForests . map (forests !!)) profiles | + forests0 <- children, + forests <- mapM applyProfileToForest forests0 ] +applyProfileToForest (FString s) = [FString s] +applyProfileToForest (FInt n) = [FInt n] +applyProfileToForest (FFloat f) = [FFloat f] +applyProfileToForest (FMeta) = [FMeta] + + +forest2trees :: SyntaxForest CId -> [Tree] +forest2trees (FNode n forests) = map (Fun n) $ forests >>= mapM forest2trees +forest2trees (FString s) = [Lit (LStr s)] +forest2trees (FInt n) = [Lit (LInt n)] +forest2trees (FFloat f) = [Lit (LFlt f)] +forest2trees (FMeta) = [Meta 0] diff --git a/src/runtime/haskell/PGF/ShowLinearize.hs b/src/runtime/haskell/PGF/ShowLinearize.hs new file mode 100644 index 000000000..dd3b997a6 --- /dev/null +++ b/src/runtime/haskell/PGF/ShowLinearize.hs @@ -0,0 +1,113 @@ +module PGF.ShowLinearize ( + collectWords, + tableLinearize, + recordLinearize, + termLinearize, + tabularLinearize, + allLinearize, + markLinearize + ) where + +import PGF.CId +import PGF.Data +import PGF.Tree +import PGF.Macros +import PGF.Linearize + +import GF.Data.Operations +import Data.List +import qualified Data.Map as Map + +-- printing linearizations in different ways with source parameters + +-- internal representation, only used internally in this module +data Record = + RR [(String,Record)] + | RT [(String,Record)] + | RFV [Record] + | RS String + | RCon String + +prRecord :: Record -> String +prRecord = prr where + prr t = case t of + RR fs -> concat $ + "{" : + (intersperse ";" (map (\ (l,v) -> unwords [l,"=", prr v]) fs)) ++ ["}"] + RT fs -> concat $ + "table {" : + (intersperse ";" (map (\ (l,v) -> unwords [l,"=>",prr v]) fs)) ++ ["}"] + RFV ts -> concat $ + "variants {" : (intersperse ";" (map prr ts)) ++ ["}"] + RS s -> prQuotedString s + RCon s -> s + +-- uses the encoding of record types in PGF.paramlincat +mkRecord :: Term -> Term -> Record +mkRecord typ trm = case (typ,trm) of + (_, FV ts) -> RFV $ map (mkRecord typ) ts + (R rs, R ts) -> RR [(str lab, mkRecord ty t) | (P lab ty, t) <- zip rs ts] + (S [FV ps,ty],R ts) -> RT [(str par, mkRecord ty t) | (par, t) <- zip ps ts] + (_,W s (R ts)) -> mkRecord typ (R [K (KS (s ++ u)) | K (KS u) <- ts]) + (FV ps, C i) -> RCon $ str $ ps !! i + (S [], _) -> case realizes trm of + [s] -> RS s + ss -> RFV $ map RS ss + _ -> RS $ show trm ---- printTree trm + where + str = realize + +-- show all branches, without labels and params +allLinearize :: (String -> String) -> PGF -> CId -> Expr -> String +allLinearize unlex pgf lang = concat . map (unlex . pr) . tabularLinearize pgf lang where + pr (p,vs) = unlines vs + +-- show all branches, with labels and params +tableLinearize :: (String -> String) -> PGF -> CId -> Expr -> String +tableLinearize unlex pgf lang = unlines . map pr . tabularLinearize pgf lang where + pr (p,vs) = p +++ ":" +++ unwords (intersperse "|" (map unlex vs)) + +-- create a table from labels+params to variants +tabularLinearize :: PGF -> CId -> Expr -> [(String,[String])] +tabularLinearize pgf lang = branches . recLinearize pgf lang where + branches r = case r of + RR fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t] + RT fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t] + RFV rs -> concatMap branches rs + RS s -> [([], [s])] + RCon _ -> [] + +-- show record in GF-source-like syntax +recordLinearize :: PGF -> CId -> Expr -> String +recordLinearize pgf lang = prRecord . recLinearize pgf lang + +-- create a GF-like record, forming the basis of all functions above +recLinearize :: PGF -> CId -> Expr -> Record +recLinearize pgf lang tree = mkRecord typ $ linTree pgf lang tree where + typ = case expr2tree tree of + Fun f _ -> lookParamLincat pgf lang $ valCat $ lookType pgf f + +-- show PGF term +termLinearize :: PGF -> CId -> Expr -> String +termLinearize pgf lang = show . linTree pgf lang + +-- show bracketed markup with references to tree structure +markLinearize :: PGF -> CId -> Expr -> String +markLinearize pgf lang = concat . take 1 . linearizesMark pgf lang + + +-- for Morphology: word, lemma, tags +collectWords :: PGF -> Language -> [(String, [(CId,String)])] +collectWords pgf lang = + concatMap collOne + [(f,c,0) | (f,(DTyp [] c _,_,_)) <- Map.toList $ funs $ abstract pgf] + where + collOne (f,c,i) = + fromRec f [showCId c] (recLinearize pgf lang (foldl EApp (EFun f) (replicate i (EMeta 888)))) + fromRec f v r = case r of + RR rs -> concat [fromRec f v t | (_,t) <- rs] + RT rs -> concat [fromRec f (p:v) t | (p,t) <- rs] + RFV rs -> concatMap (fromRec f v) rs + RS s -> [(s,[(f,unwords (reverse v))])] + RCon c -> [] ---- inherent + diff --git a/src/runtime/haskell/PGF/Tree.hs b/src/runtime/haskell/PGF/Tree.hs new file mode 100644 index 000000000..cb2052cd7 --- /dev/null +++ b/src/runtime/haskell/PGF/Tree.hs @@ -0,0 +1,71 @@ +module PGF.Tree + ( Tree(..), + tree2expr, expr2tree, + prTree + ) where + +import PGF.CId +import PGF.Expr hiding (Tree) + +import Data.Char +import Data.List as List +import Control.Monad +import qualified Text.PrettyPrint as PP +import qualified Text.ParserCombinators.ReadP as RP + +-- | The tree is an evaluated expression in the abstract syntax +-- of the grammar. The type is especially restricted to not +-- allow unapplied lambda abstractions. The tree is used directly +-- from the linearizer and is produced directly from the parser. +data Tree = + Abs [(BindType,CId)] Tree -- ^ lambda abstraction. The list of variables is non-empty + | Var CId -- ^ variable + | Fun CId [Tree] -- ^ function application + | Lit Literal -- ^ literal + | Meta {-# UNPACK #-} !MetaId -- ^ meta variable + deriving (Eq, Ord) + +----------------------------------------------------- +-- Conversion Expr <-> Tree +----------------------------------------------------- + +-- | Converts a tree to expression. The conversion +-- is always total, every tree is a valid expression. +tree2expr :: Tree -> Expr +tree2expr = tree2expr [] + where + tree2expr ys (Fun x ts) = foldl EApp (EFun x) (List.map (tree2expr ys) ts) + tree2expr ys (Lit l) = ELit l + tree2expr ys (Meta n) = EMeta n + tree2expr ys (Abs xs t) = foldr (\(b,x) e -> EAbs b x e) (tree2expr (List.map snd (reverse xs)++ys) t) xs + tree2expr ys (Var x) = case List.lookup x (zip ys [0..]) of + Just i -> EVar i + Nothing -> error "unknown variable" + +-- | Converts an expression to tree. The conversion is only partial. +-- Variables and meta variables of function type and beta redexes are not allowed. +expr2tree :: Expr -> Tree +expr2tree e = abs [] [] e + where + abs ys xs (EAbs b x e) = abs ys ((b,x):xs) e + abs ys xs (ETyped e _) = abs ys xs e + abs ys xs e = case xs of + [] -> app ys [] e + xs -> Abs (reverse xs) (app (map snd xs++ys) [] e) + + app xs as (EApp e1 e2) = app xs ((abs xs [] e2) : as) e1 + app xs as (ELit l) + | List.null as = Lit l + | otherwise = error "literal of function type encountered" + app xs as (EMeta n) + | List.null as = Meta n + | otherwise = error "meta variables of function type are not allowed in trees" + app xs as (EAbs _ x e) = error "beta redexes are not allowed in trees" + app xs as (EVar i) = Var (xs !! i) + app xs as (EFun f) = Fun f as + app xs as (ETyped e _) = app xs as e + + +prTree :: Tree -> String +prTree = showExpr [] . tree2expr + diff --git a/src/runtime/haskell/PGF/Type.hs b/src/runtime/haskell/PGF/Type.hs new file mode 100644 index 000000000..013754a45 --- /dev/null +++ b/src/runtime/haskell/PGF/Type.hs @@ -0,0 +1,103 @@ +module PGF.Type ( Type(..), Hypo,
+ readType, showType,
+ mkType, mkHypo, mkDepHypo, mkImplHypo,
+ pType, ppType, ppHypo ) where
+
+import PGF.CId
+import {-# SOURCE #-} PGF.Expr
+import Data.Char
+import Data.List
+import qualified Text.PrettyPrint as PP
+import qualified Text.ParserCombinators.ReadP as RP
+import Control.Monad
+
+-- | To read a type from a 'String', use 'readType'.
+data Type =
+ DTyp [Hypo] CId [Expr]
+ deriving (Eq,Ord,Show)
+
+-- | 'Hypo' represents a hypothesis in a type i.e. in the type A -> B, A is the hypothesis
+type Hypo = (BindType,CId,Type)
+
+-- | Reads a 'Type' from a 'String'.
+readType :: String -> Maybe Type
+readType s = case [x | (x,cs) <- RP.readP_to_S pType s, all isSpace cs] of
+ [x] -> Just x
+ _ -> Nothing
+
+-- | renders type as 'String'. The list
+-- of identifiers is the list of all free variables
+-- in the expression in order reverse to the order
+-- of binding.
+showType :: [CId] -> Type -> String
+showType vars = PP.render . ppType 0 vars
+
+-- | creates a type from list of hypothesises, category and
+-- list of arguments for the category. The operation
+-- @mkType [h_1,...,h_n] C [e_1,...,e_m]@ will create
+-- @h_1 -> ... -> h_n -> C e_1 ... e_m@
+mkType :: [Hypo] -> CId -> [Expr] -> Type
+mkType hyps cat args = DTyp hyps cat args
+
+-- | creates hypothesis for non-dependent type i.e. A
+mkHypo :: Type -> Hypo
+mkHypo ty = (Explicit,wildCId,ty)
+
+-- | creates hypothesis for dependent type i.e. (x : A)
+mkDepHypo :: CId -> Type -> Hypo
+mkDepHypo x ty = (Explicit,x,ty)
+
+-- | creates hypothesis for dependent type with implicit argument i.e. ({x} : A)
+mkImplHypo :: CId -> Type -> Hypo
+mkImplHypo x ty = (Implicit,x,ty)
+
+pType :: RP.ReadP Type
+pType = do
+ RP.skipSpaces
+ hyps <- RP.sepBy (pHypo >>= \h -> RP.skipSpaces >> RP.string "->" >> return h) RP.skipSpaces
+ RP.skipSpaces
+ (cat,args) <- pAtom
+ return (DTyp (concat hyps) cat args)
+ where
+ pHypo =
+ do (cat,args) <- pAtom
+ return [(Explicit,wildCId,DTyp [] cat args)]
+ RP.<++
+ (RP.between (RP.char '(') (RP.char ')') $ do
+ xs <- RP.option [(Explicit,wildCId)] $ do
+ xs <- pBinds
+ RP.skipSpaces
+ RP.char ':'
+ return xs
+ ty <- pType
+ return [(b,v,ty) | (b,v) <- xs])
+ RP.<++
+ (RP.between (RP.char '{') (RP.char '}') $ do
+ vs <- RP.sepBy1 (RP.skipSpaces >> pCId) (RP.skipSpaces >> RP.char ',')
+ RP.skipSpaces
+ RP.char ':'
+ ty <- pType
+ return [(Implicit,v,ty) | v <- vs])
+
+ pAtom = do
+ cat <- pCId
+ RP.skipSpaces
+ args <- RP.sepBy pArg RP.skipSpaces
+ return (cat, args)
+
+ppType :: Int -> [CId] -> Type -> PP.Doc
+ppType d scope (DTyp hyps cat args)
+ | null hyps = ppRes scope cat args
+ | otherwise = let (scope',hdocs) = mapAccumL ppHypo scope hyps
+ in ppParens (d > 0) (foldr (\hdoc doc -> hdoc PP.<+> PP.text "->" PP.<+> doc) (ppRes scope' cat args) hdocs)
+ where
+ ppRes scope cat es = ppCId cat PP.<+> PP.hsep (map (ppExpr 4 scope) es)
+
+ppHypo scope (Explicit,x,typ) = if x == wildCId
+ then (scope,ppType 1 scope typ)
+ else let y = freshName x scope
+ in (y:scope,PP.parens (ppCId y PP.<+> PP.char ':' PP.<+> ppType 0 scope typ))
+ppHypo scope (Implicit,x,typ) = if x == wildCId
+ then (scope,PP.parens (PP.braces (ppCId x) PP.<+> PP.char ':' PP.<+> ppType 0 scope typ))
+ else let y = freshName x scope
+ in (y:scope,PP.parens (PP.braces (ppCId y) PP.<+> PP.char ':' PP.<+> ppType 0 scope typ))
diff --git a/src/runtime/haskell/PGF/TypeCheck.hs b/src/runtime/haskell/PGF/TypeCheck.hs new file mode 100644 index 000000000..937c21786 --- /dev/null +++ b/src/runtime/haskell/PGF/TypeCheck.hs @@ -0,0 +1,524 @@ +---------------------------------------------------------------------- +-- | +-- Module : PGF.TypeCheck +-- Maintainer : Krasimir Angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- Type checking in abstract syntax with dependent types. +-- The type checker also performs renaming and checking for unknown +-- functions. The variable references are replaced by de Bruijn indices. +-- +----------------------------------------------------------------------------- + +module PGF.TypeCheck (checkType, checkExpr, inferExpr, + + ppTcError, TcError(..) + ) where + +import PGF.Data +import PGF.Expr +import PGF.Macros (typeOfHypo) +import PGF.CId + +import Data.Map as Map +import Data.IntMap as IntMap +import Data.Maybe as Maybe +import Data.List as List +import Control.Monad +import Text.PrettyPrint + +----------------------------------------------------- +-- The Scope +----------------------------------------------------- + +data TType = TTyp Env Type +newtype Scope = Scope [(CId,TType)] + +emptyScope = Scope [] + +addScopedVar :: CId -> TType -> Scope -> Scope +addScopedVar x tty (Scope gamma) = Scope ((x,tty):gamma) + +-- | returns the type and the De Bruijn index of a local variable +lookupVar :: CId -> Scope -> Maybe (Int,TType) +lookupVar x (Scope gamma) = listToMaybe [(i,tty) | ((y,tty),i) <- zip gamma [0..], x == y] + +-- | returns the type and the name of a local variable +getVar :: Int -> Scope -> (CId,TType) +getVar i (Scope gamma) = gamma !! i + +scopeEnv :: Scope -> Env +scopeEnv (Scope gamma) = let n = length gamma + in [VGen (n-i-1) [] | i <- [0..n-1]] + +scopeVars :: Scope -> [CId] +scopeVars (Scope gamma) = List.map fst gamma + +scopeSize :: Scope -> Int +scopeSize (Scope gamma) = length gamma + +----------------------------------------------------- +-- The Monad +----------------------------------------------------- + +type MetaStore = IntMap MetaValue +data MetaValue + = MUnbound Scope [Expr -> TcM ()] + | MBound Expr + | MGuarded Expr [Expr -> TcM ()] {-# UNPACK #-} !Int -- the Int is the number of constraints that have to be solved + -- to unlock this meta variable + +newtype TcM a = TcM {unTcM :: Abstr -> MetaId -> MetaStore -> TcResult a} +data TcResult a + = Ok {-# UNPACK #-} !MetaId MetaStore a + | Fail TcError + +instance Monad TcM where + return x = TcM (\abstr metaid ms -> Ok metaid ms x) + f >>= g = TcM (\abstr metaid ms -> case unTcM f abstr metaid ms of + Ok metaid ms x -> unTcM (g x) abstr metaid ms + Fail e -> Fail e) + +instance Functor TcM where + fmap f x = TcM (\abstr metaid ms -> case unTcM x abstr metaid ms of + Ok metaid ms x -> Ok metaid ms (f x) + Fail e -> Fail e) + +lookupCatHyps :: CId -> TcM [Hypo] +lookupCatHyps cat = TcM (\abstr metaid ms -> case Map.lookup cat (cats abstr) of + Just hyps -> Ok metaid ms hyps + Nothing -> Fail (UnknownCat cat)) + +lookupFunType :: CId -> TcM TType +lookupFunType fun = TcM (\abstr metaid ms -> case Map.lookup fun (funs abstr) of + Just (ty,_,_) -> Ok metaid ms (TTyp [] ty) + Nothing -> Fail (UnknownFun fun)) + +newMeta :: Scope -> TcM MetaId +newMeta scope = TcM (\abstr metaid ms -> Ok (metaid+1) (IntMap.insert metaid (MUnbound scope []) ms) metaid) + +newGuardedMeta :: Scope -> Expr -> TcM MetaId +newGuardedMeta scope e = getFuns >>= \funs -> TcM (\abstr metaid ms -> Ok (metaid+1) (IntMap.insert metaid (MGuarded e [] 0) ms) metaid) + +getMeta :: MetaId -> TcM MetaValue +getMeta i = TcM (\abstr metaid ms -> Ok metaid ms $! case IntMap.lookup i ms of + Just mv -> mv) +setMeta :: MetaId -> MetaValue -> TcM () +setMeta i mv = TcM (\abstr metaid ms -> Ok metaid (IntMap.insert i mv ms) ()) + +tcError :: TcError -> TcM a +tcError e = TcM (\abstr metaid ms -> Fail e) + +getFuns :: TcM Funs +getFuns = TcM (\abstr metaid ms -> Ok metaid ms (funs abstr)) + +addConstraint :: MetaId -> MetaId -> Env -> [Value] -> (Value -> TcM ()) -> TcM () +addConstraint i j env vs c = do + funs <- getFuns + mv <- getMeta j + case mv of + MUnbound scope cs -> addRef >> setMeta j (MUnbound scope ((\e -> release >> c (apply funs env e vs)) : cs)) + MBound e -> c (apply funs env e vs) + MGuarded e cs x | x == 0 -> c (apply funs env e vs) + | otherwise -> addRef >> setMeta j (MGuarded e ((\e -> release >> c (apply funs env e vs)) : cs) x) + where + addRef = TcM (\abstr metaid ms -> case IntMap.lookup i ms of + Just (MGuarded e cs x) -> Ok metaid (IntMap.insert i (MGuarded e cs (x+1)) ms) ()) + + release = TcM (\abstr metaid ms -> case IntMap.lookup i ms of + Just (MGuarded e cs x) -> if x == 1 + then unTcM (sequence_ [c e | c <- cs]) abstr metaid (IntMap.insert i (MGuarded e [] 0) ms) + else Ok metaid (IntMap.insert i (MGuarded e cs (x-1)) ms) ()) + +----------------------------------------------------- +-- Type errors +----------------------------------------------------- + +-- | If an error occurs in the typechecking phase +-- the type checker returns not a plain text error message +-- but a 'TcError' structure which describes the error. +data TcError + = UnknownCat CId -- ^ Unknown category name was found. + | UnknownFun CId -- ^ Unknown function name was found. + | WrongCatArgs [CId] Type CId Int Int -- ^ A category was applied to wrong number of arguments. + -- The first integer is the number of expected arguments and + -- the second the number of given arguments. + -- The @[CId]@ argument is the list of free variables + -- in the type. It should be used for the 'showType' function. + | TypeMismatch [CId] Expr Type Type -- ^ The expression is not of the expected type. + -- The first type is the expected type, while + -- the second is the inferred. The @[CId]@ argument is the list + -- of free variables in both the expression and the type. + -- It should be used for the 'showType' and 'showExpr' functions. + | NotFunType [CId] Expr Type -- ^ Something that is not of function type was applied to an argument. + | CannotInferType [CId] Expr -- ^ It is not possible to infer the type of an expression. + | UnresolvedMetaVars [CId] Expr [MetaId] -- ^ Some metavariables have to be instantiated in order to complete the typechecking. + | UnexpectedImplArg [CId] Expr -- ^ Implicit argument was passed where the type doesn't allow it + +-- | Renders the type checking error to a document. See 'Text.PrettyPrint'. +ppTcError :: TcError -> Doc +ppTcError (UnknownCat cat) = text "Category" <+> ppCId cat <+> text "is not in scope" +ppTcError (UnknownFun fun) = text "Function" <+> ppCId fun <+> text "is not in scope" +ppTcError (WrongCatArgs xs ty cat m n) = text "Category" <+> ppCId cat <+> text "should have" <+> int m <+> text "argument(s), but has been given" <+> int n $$ + text "In the type:" <+> ppType 0 xs ty +ppTcError (TypeMismatch xs e ty1 ty2) = text "Couldn't match expected type" <+> ppType 0 xs ty1 $$ + text " against inferred type" <+> ppType 0 xs ty2 $$ + text "In the expression:" <+> ppExpr 0 xs e +ppTcError (NotFunType xs e ty) = text "A function type is expected for the expression" <+> ppExpr 0 xs e <+> text "instead of type" <+> ppType 0 xs ty +ppTcError (CannotInferType xs e) = text "Cannot infer the type of expression" <+> ppExpr 0 xs e +ppTcError (UnresolvedMetaVars xs e ms) = text "Meta variable(s)" <+> fsep (List.map ppMeta ms) <+> text "should be resolved" $$ + text "in the expression:" <+> ppExpr 0 xs e +ppTcError (UnexpectedImplArg xs e) = braces (ppExpr 0 xs e) <+> text "is implicit argument but not implicit argument is expected here" + +----------------------------------------------------- +-- checkType +----------------------------------------------------- + +-- | Check whether a given type is consistent with the abstract +-- syntax of the grammar. +checkType :: PGF -> Type -> Either TcError Type +checkType pgf ty = + case unTcM (tcType emptyScope ty >>= refineType) (abstract pgf) 0 IntMap.empty of + Ok _ ms ty -> Right ty + Fail err -> Left err + +tcType :: Scope -> Type -> TcM Type +tcType scope ty@(DTyp hyps cat es) = do + (scope,hyps) <- tcHypos scope hyps + c_hyps <- lookupCatHyps cat + let m = length es + n = length [ty | (Explicit,x,ty) <- c_hyps] + (delta,es) <- tcCatArgs scope es [] c_hyps ty n m + return (DTyp hyps cat es) + +tcHypos :: Scope -> [Hypo] -> TcM (Scope,[Hypo]) +tcHypos scope [] = return (scope,[]) +tcHypos scope (h:hs) = do + (scope,h ) <- tcHypo scope h + (scope,hs) <- tcHypos scope hs + return (scope,h:hs) + +tcHypo :: Scope -> Hypo -> TcM (Scope,Hypo) +tcHypo scope (b,x,ty) = do + ty <- tcType scope ty + if x == wildCId + then return (scope,(b,x,ty)) + else return (addScopedVar x (TTyp (scopeEnv scope) ty) scope,(b,x,ty)) + +tcCatArgs scope [] delta [] ty0 n m = return (delta,[]) +tcCatArgs scope (EImplArg e:es) delta ((Explicit,x,ty):hs) ty0 n m = tcError (UnexpectedImplArg (scopeVars scope) e) +tcCatArgs scope (EImplArg e:es) delta ((Implicit,x,ty):hs) ty0 n m = do + e <- tcExpr scope e (TTyp delta ty) + funs <- getFuns + (delta,es) <- if x == wildCId + then tcCatArgs scope es delta hs ty0 n m + else tcCatArgs scope es (eval funs (scopeEnv scope) e:delta) hs ty0 n m + return (delta,EImplArg e:es) +tcCatArgs scope es delta ((Implicit,x,ty):hs) ty0 n m = do + i <- newMeta scope + (delta,es) <- if x == wildCId + then tcCatArgs scope es delta hs ty0 n m + else tcCatArgs scope es (VMeta i (scopeEnv scope) [] : delta) hs ty0 n m + return (delta,EImplArg (EMeta i) : es) +tcCatArgs scope (e:es) delta ((Explicit,x,ty):hs) ty0 n m = do + e <- tcExpr scope e (TTyp delta ty) + funs <- getFuns + (delta,es) <- if x == wildCId + then tcCatArgs scope es delta hs ty0 n m + else tcCatArgs scope es (eval funs (scopeEnv scope) e:delta) hs ty0 n m + return (delta,e:es) +tcCatArgs scope _ delta _ ty0@(DTyp _ cat _) n m = do + tcError (WrongCatArgs (scopeVars scope) ty0 cat n m) + +----------------------------------------------------- +-- checkExpr +----------------------------------------------------- + +-- | Checks an expression against a specified type. +checkExpr :: PGF -> Expr -> Type -> Either TcError Expr +checkExpr pgf e ty = + case unTcM (do e <- tcExpr emptyScope e (TTyp [] ty) + e <- refineExpr e + checkResolvedMetaStore emptyScope e + return e) (abstract pgf) 0 IntMap.empty of + Ok _ ms e -> Right e + Fail err -> Left err + +tcExpr :: Scope -> Expr -> TType -> TcM Expr +tcExpr scope e0@(EAbs Implicit x e) tty = + case tty of + TTyp delta (DTyp ((Implicit,y,ty):hs) c es) -> do e <- if y == wildCId + then tcExpr (addScopedVar x (TTyp delta ty) scope) + e (TTyp delta (DTyp hs c es)) + else tcExpr (addScopedVar x (TTyp delta ty) scope) + e (TTyp ((VGen (scopeSize scope) []):delta) (DTyp hs c es)) + return (EAbs Implicit x e) + _ -> do ty <- evalType (scopeSize scope) tty + tcError (NotFunType (scopeVars scope) e0 ty) +tcExpr scope e0 (TTyp delta (DTyp ((Implicit,y,ty):hs) c es)) = do + e0 <- if y == wildCId + then tcExpr (addScopedVar wildCId (TTyp delta ty) scope) + e0 (TTyp delta (DTyp hs c es)) + else tcExpr (addScopedVar wildCId (TTyp delta ty) scope) + e0 (TTyp ((VGen (scopeSize scope) []):delta) (DTyp hs c es)) + return (EAbs Implicit wildCId e0) +tcExpr scope e0@(EAbs Explicit x e) tty = + case tty of + TTyp delta (DTyp ((Explicit,y,ty):hs) c es) -> do e <- if y == wildCId + then tcExpr (addScopedVar x (TTyp delta ty) scope) + e (TTyp delta (DTyp hs c es)) + else tcExpr (addScopedVar x (TTyp delta ty) scope) + e (TTyp ((VGen (scopeSize scope) []):delta) (DTyp hs c es)) + return (EAbs Explicit x e) + _ -> do ty <- evalType (scopeSize scope) tty + tcError (NotFunType (scopeVars scope) e0 ty) +tcExpr scope (EMeta _) tty = do + i <- newMeta scope + return (EMeta i) +tcExpr scope e0 tty = do + (e0,tty0) <- infExpr scope e0 + i <- newGuardedMeta scope e0 + eqType scope (scopeSize scope) i tty tty0 + return (EMeta i) + + +----------------------------------------------------- +-- inferExpr +----------------------------------------------------- + +-- | Tries to infer the type of a given expression. Note that +-- even if the expression is type correct it is not always +-- possible to infer its type in the GF type system. +-- In this case the function returns the 'CannotInferType' error. +inferExpr :: PGF -> Expr -> Either TcError (Expr,Type) +inferExpr pgf e = + case unTcM (do (e,tty) <- infExpr emptyScope e + e <- refineExpr e + checkResolvedMetaStore emptyScope e + ty <- evalType 0 tty + return (e,ty)) (abstract pgf) 1 IntMap.empty of + Ok _ ms (e,ty) -> Right (e,ty) + Fail err -> Left err + +infExpr :: Scope -> Expr -> TcM (Expr,TType) +infExpr scope e0@(EApp e1 e2) = do + (e1,TTyp delta ty) <- infExpr scope e1 + (e0,delta,ty) <- tcArg scope e1 e2 delta ty + return (e0,TTyp delta ty) +infExpr scope e0@(EFun x) = do + case lookupVar x scope of + Just (i,tty) -> return (EVar i,tty) + Nothing -> do tty <- lookupFunType x + return (e0,tty) +infExpr scope e0@(EVar i) = do + return (e0,snd (getVar i scope)) +infExpr scope e0@(ELit l) = do + let cat = case l of + LStr _ -> mkCId "String" + LInt _ -> mkCId "Int" + LFlt _ -> mkCId "Float" + return (e0,TTyp [] (DTyp [] cat [])) +infExpr scope (ETyped e ty) = do + ty <- tcType scope ty + e <- tcExpr scope e (TTyp (scopeEnv scope) ty) + return (ETyped e ty,TTyp (scopeEnv scope) ty) +infExpr scope (EImplArg e) = do + (e,tty) <- infExpr scope e + return (EImplArg e,tty) +infExpr scope e = tcError (CannotInferType (scopeVars scope) e) + +tcArg scope e1 e2 delta ty0@(DTyp [] c es) = do + ty1 <- evalType (scopeSize scope) (TTyp delta ty0) + tcError (NotFunType (scopeVars scope) e1 ty1) +tcArg scope e1 (EImplArg e2) delta ty0@(DTyp ((Explicit,x,ty):hs) c es) = tcError (UnexpectedImplArg (scopeVars scope) e2) +tcArg scope e1 (EImplArg e2) delta ty0@(DTyp ((Implicit,x,ty):hs) c es) = do + e2 <- tcExpr scope e2 (TTyp delta ty) + funs <- getFuns + if x == wildCId + then return (EApp e1 (EImplArg e2), delta,DTyp hs c es) + else return (EApp e1 (EImplArg e2),eval funs (scopeEnv scope) e2:delta,DTyp hs c es) +tcArg scope e1 e2 delta ty0@(DTyp ((Explicit,x,ty):hs) c es) = do + e2 <- tcExpr scope e2 (TTyp delta ty) + funs <- getFuns + if x == wildCId + then return (EApp e1 e2, delta,DTyp hs c es) + else return (EApp e1 e2,eval funs (scopeEnv scope) e2:delta,DTyp hs c es) +tcArg scope e1 e2 delta ty0@(DTyp ((Implicit,x,ty):hs) c es) = do + i <- newMeta scope + if x == wildCId + then tcArg scope (EApp e1 (EImplArg (EMeta i))) e2 delta (DTyp hs c es) + else tcArg scope (EApp e1 (EImplArg (EMeta i))) e2 (VMeta i (scopeEnv scope) [] : delta) (DTyp hs c es) + +----------------------------------------------------- +-- eqType +----------------------------------------------------- + +eqType :: Scope -> Int -> MetaId -> TType -> TType -> TcM () +eqType scope k i0 tty1@(TTyp delta1 ty1@(DTyp hyps1 cat1 es1)) tty2@(TTyp delta2 ty2@(DTyp hyps2 cat2 es2)) + | cat1 == cat2 = do (k,delta1,delta2) <- eqHyps k delta1 hyps1 delta2 hyps2 + sequence_ [eqExpr k delta1 e1 delta2 e2 | (e1,e2) <- zip es1 es2] + | otherwise = raiseTypeMatchError + where + raiseTypeMatchError = do ty1 <- evalType k tty1 + ty2 <- evalType k tty2 + e <- refineExpr (EMeta i0) + tcError (TypeMismatch (scopeVars scope) e ty1 ty2) + + eqHyps :: Int -> Env -> [Hypo] -> Env -> [Hypo] -> TcM (Int,Env,Env) + eqHyps k delta1 [] delta2 [] = + return (k,delta1,delta2) + eqHyps k delta1 ((_,x,ty1) : h1s) delta2 ((_,y,ty2) : h2s) = do + eqType scope k i0 (TTyp delta1 ty1) (TTyp delta2 ty2) + if x == wildCId && y == wildCId + then eqHyps k delta1 h1s delta2 h2s + else if x /= wildCId && y /= wildCId + then eqHyps (k+1) ((VGen k []):delta1) h1s ((VGen k []):delta2) h2s + else raiseTypeMatchError + eqHyps k delta1 h1s delta2 h2s = raiseTypeMatchError + + eqExpr :: Int -> Env -> Expr -> Env -> Expr -> TcM () + eqExpr k env1 e1 env2 e2 = do + funs <- getFuns + eqValue k (eval funs env1 e1) (eval funs env2 e2) + + eqValue :: Int -> Value -> Value -> TcM () + eqValue k v1 v2 = do + v1 <- deRef v1 + v2 <- deRef v2 + eqValue' k v1 v2 + + deRef v@(VMeta i env vs) = do + mv <- getMeta i + funs <- getFuns + case mv of + MBound e -> deRef (apply funs env e vs) + MGuarded e _ x | x == 0 -> deRef (apply funs env e vs) + | otherwise -> return v + MUnbound _ _ -> return v + deRef v = return v + + eqValue' k (VSusp i env vs1 c) v2 = addConstraint i0 i env vs1 (\v1 -> eqValue k (c v1) v2) + eqValue' k v1 (VSusp i env vs2 c) = addConstraint i0 i env vs2 (\v2 -> eqValue k v1 (c v2)) + eqValue' k (VMeta i env1 vs1) (VMeta j env2 vs2) | i == j = zipWithM_ (eqValue k) vs1 vs2 + eqValue' k (VMeta i env1 vs1) v2 = do (MUnbound scopei cs) <- getMeta i + e2 <- mkLam i scopei env1 vs1 v2 + setMeta i (MBound e2) + sequence_ [c e2 | c <- cs] + eqValue' k v1 (VMeta i env2 vs2) = do (MUnbound scopei cs) <- getMeta i + e1 <- mkLam i scopei env2 vs2 v1 + setMeta i (MBound e1) + sequence_ [c e1 | c <- cs] + eqValue' k (VApp f1 vs1) (VApp f2 vs2) | f1 == f2 = zipWithM_ (eqValue k) vs1 vs2 + eqValue' k (VLit l1) (VLit l2 ) | l1 == l2 = return () + eqValue' k (VGen i vs1) (VGen j vs2) | i == j = zipWithM_ (eqValue k) vs1 vs2 + eqValue' k (VClosure env1 (EAbs _ x1 e1)) (VClosure env2 (EAbs _ x2 e2)) = let v = VGen k [] + in eqExpr (k+1) (v:env1) e1 (v:env2) e2 + eqValue' k v1 v2 = raiseTypeMatchError + + mkLam i scope env vs0 v = do + let k = scopeSize scope + vs = reverse (take k env) ++ vs0 + xs = nub [i | VGen i [] <- vs] + if length vs == length xs + then return () + else raiseTypeMatchError + v <- occurCheck i k xs v + funs <- getFuns + return (addLam vs0 (value2expr funs (length xs) v)) + where + addLam [] e = e + addLam (v:vs) e = EAbs Explicit var (addLam vs e) + + var = mkCId "v" + + occurCheck i0 k xs (VApp f vs) = do vs <- mapM (occurCheck i0 k xs) vs + return (VApp f vs) + occurCheck i0 k xs (VLit l) = return (VLit l) + occurCheck i0 k xs (VMeta i env vs) = do if i == i0 + then raiseTypeMatchError + else return () + mv <- getMeta i + funs <- getFuns + case mv of + MBound e -> occurCheck i0 k xs (apply funs env e vs) + MGuarded e _ _ -> occurCheck i0 k xs (apply funs env e vs) + MUnbound scopei _ | scopeSize scopei > k -> raiseTypeMatchError + | otherwise -> do vs <- mapM (occurCheck i0 k xs) vs + return (VMeta i env vs) + occurCheck i0 k xs (VSusp i env vs cnt) = do addConstraint i0 i env vs (\v -> occurCheck i0 k xs (cnt v) >> return ()) + return (VSusp i env vs cnt) + occurCheck i0 k xs (VGen i vs) = case List.findIndex (==i) xs of + Just i -> do vs <- mapM (occurCheck i0 k xs) vs + return (VGen i vs) + Nothing -> raiseTypeMatchError + occurCheck i0 k xs (VClosure env e) = do env <- mapM (occurCheck i0 k xs) env + return (VClosure env e) + + +----------------------------------------------------------- +-- check for meta variables that still have to be resolved +----------------------------------------------------------- + +checkResolvedMetaStore :: Scope -> Expr -> TcM () +checkResolvedMetaStore scope e = TcM (\abstr metaid ms -> + let xs = [i | (i,mv) <- IntMap.toList ms, not (isResolved mv)] + in if List.null xs + then Ok metaid ms () + else Fail (UnresolvedMetaVars (scopeVars scope) e xs)) + where + isResolved (MUnbound _ []) = True + isResolved (MGuarded _ _ _) = True + isResolved (MBound _) = True + isResolved _ = False + +----------------------------------------------------- +-- evalType +----------------------------------------------------- + +evalType :: Int -> TType -> TcM Type +evalType k (TTyp delta ty) = do funs <- getFuns + refineType (evalTy funs k delta ty) + where + evalTy sig k delta (DTyp hyps cat es) = + let ((k1,delta1),hyps1) = mapAccumL (evalHypo sig) (k,delta) hyps + in DTyp hyps1 cat (List.map (normalForm sig k1 delta1) es) + + evalHypo sig (k,delta) (b,x,ty) = + if x == wildCId + then ((k, delta),(b,x,evalTy sig k delta ty)) + else ((k+1,(VGen k []):delta),(b,x,evalTy sig k delta ty)) + + +----------------------------------------------------- +-- refinement +----------------------------------------------------- + +refineExpr :: Expr -> TcM Expr +refineExpr e = TcM (\abstr metaid ms -> Ok metaid ms (refineExpr_ ms e)) + +refineExpr_ ms e = refine e + where + refine (EAbs b x e) = EAbs b x (refine e) + refine (EApp e1 e2) = EApp (refine e1) (refine e2) + refine (ELit l) = ELit l + refine (EMeta i) = case IntMap.lookup i ms of + Just (MBound e ) -> refine e + Just (MGuarded e _ _) -> refine e + _ -> EMeta i + refine (EFun f) = EFun f + refine (EVar i) = EVar i + refine (ETyped e ty) = ETyped (refine e) (refineType_ ms ty) + refine (EImplArg e) = EImplArg (refine e) + +refineType :: Type -> TcM Type +refineType ty = TcM (\abstr metaid ms -> Ok metaid ms (refineType_ ms ty)) + +refineType_ ms (DTyp hyps cat es) = DTyp [(b,x,refineType_ ms ty) | (b,x,ty) <- hyps] cat (List.map (refineExpr_ ms) es) + +value2expr sig i (VApp f vs) = foldl EApp (EFun f) (List.map (value2expr sig i) vs) +value2expr sig i (VGen j vs) = foldl EApp (EVar (i-j-1)) (List.map (value2expr sig i) vs) +value2expr sig i (VMeta j env vs) = foldl EApp (EMeta j) (List.map (value2expr sig i) vs) +value2expr sig i (VSusp j env vs k) = value2expr sig i (k (VGen j vs)) +value2expr sig i (VLit l) = ELit l +value2expr sig i (VClosure env (EAbs b x e)) = EAbs b x (value2expr sig (i+1) (eval sig ((VGen i []):env) e)) diff --git a/src/runtime/haskell/PGF/VisualizeTree.hs b/src/runtime/haskell/PGF/VisualizeTree.hs new file mode 100644 index 000000000..429551f54 --- /dev/null +++ b/src/runtime/haskell/PGF/VisualizeTree.hs @@ -0,0 +1,353 @@ +---------------------------------------------------------------------- +-- | +-- Module : VisualizeTree +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: +-- > CVS $Author: +-- > CVS $Revision: +-- +-- Print a graph of an abstract syntax tree in Graphviz DOT format +-- Based on BB's VisualizeGrammar +-- FIXME: change this to use GF.Visualization.Graphviz, +-- instead of rolling its own. +----------------------------------------------------------------------------- + +module PGF.VisualizeTree ( graphvizAbstractTree + , graphvizParseTree + , graphvizDependencyTree + , graphvizAlignment + , tree2mk + , getDepLabels + , PosText(..), readPosText + ) where + +import PGF.CId (CId,showCId,pCId,mkCId) +import PGF.Data +import PGF.Tree +import PGF.Expr (showExpr) +import PGF.Linearize +import PGF.Macros (lookValCat) + +import qualified Data.Map as Map +import Data.List (intersperse,nub,isPrefixOf,sort,sortBy) +import Data.Char (isDigit) +import qualified Text.ParserCombinators.ReadP as RP + +import Debug.Trace + +graphvizAbstractTree :: PGF -> (Bool,Bool) -> Expr -> String +graphvizAbstractTree pgf funscats = prGraph False . tree2graph pgf funscats . expr2tree + +tree2graph :: PGF -> (Bool,Bool) -> Tree -> [String] +tree2graph pgf (funs,cats) = prf [] where + prf ps t = let (nod,lab) = prn ps t in + (nod ++ " [label = " ++ lab ++ ", style = \"solid\", shape = \"plaintext\"] ;") : + case t of + Fun cid trees -> + [ pra (j:ps) nod t | (j,t) <- zip [0..] trees] ++ + concat [prf (j:ps) t | (j,t) <- zip [0..] trees] + Abs xs (Fun cid trees) -> + [ pra (j:ps) nod t | (j,t) <- zip [0..] trees] ++ + concat [prf (j:ps) t | (j,t) <- zip [0..] trees] + _ -> [] + prn ps t = case t of + Fun cid _ -> + let + fun = if funs then showCId cid else "" + cat = if cats then prCat cid else "" + colon = if funs && cats then " : " else "" + lab = "\"" ++ fun ++ colon ++ cat ++ "\"" + in (show(show (ps :: [Int])),lab) + Abs bs tree -> + let fun = case tree of + Fun cid _ -> Fun cid [] + _ -> tree + in (show(show (ps :: [Int])),"\"" ++ esc (prTree (Abs bs fun)) ++ "\"") + _ -> (show(show (ps :: [Int])),"\"" ++ esc (prTree t) ++ "\"") + pra i nod t = nod ++ arr ++ fst (prn i t) ++ " [style = \"solid\"];" + arr = " -- " -- if digr then " -> " else " -- " + prCat = showCId . lookValCat pgf + esc = concatMap (\c -> if c =='\\' then [c,c] else [c]) --- escape backslash in abstracts + +prGraph digr ns = concat $ map (++"\n") $ [graph ++ "{\n"] ++ ns ++ ["}"] where + graph = if digr then "digraph" else "graph" + + +-- replace each non-atomic constructor with mkC, where C is the val cat +tree2mk :: PGF -> Expr -> String +tree2mk pgf = showExpr [] . tree2expr . t2m . expr2tree where + t2m t = case t of + Fun cid [] -> t + Fun cid ts -> Fun (mk cid) (map t2m ts) + _ -> t + mk = mkCId . ("mk" ++) . showCId . lookValCat pgf + +-- dependency trees from Linearize.linearizeMark + +graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Expr -> String +graphvizDependencyTree format debug mlab ms pgf lang exp = case format of + "malt" -> unlines (lin2dep format) + "malt_input" -> unlines (lin2dep format) + _ -> prGraph True (lin2dep format) + + where + + lin2dep format = trace (ifd (show sortedNodes ++ show nodeWords)) $ case format of + "malt" -> map (concat . intersperse "\t") wnodes + "malt_input" -> map (concat . intersperse "\t" . take 6) wnodes + _ -> prelude ++ nodes ++ links + + ifd s = if debug then s else [] + + pot = readPosText $ head $ linearizesMark pgf lang exp + ---- use Just str if you have str to match against + + prelude = ["rankdir=LR ;", "node [shape = plaintext] ;"] + + nodes = map mkNode nodeWords + mkNode (i,((_,p),ss)) = + node p ++ " [label = \"" ++ show i ++ ". " ++ ifd (show p) ++ unwords ss ++ "\"] ;" + nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)| + ((Just f,p),w) <- wlins pot] + + links = map mkLink thelinks + thelinks = [(word y, x, label tr y x) | + (_,((f,x),_)) <- tail nodeWords, + let y = dominant x] + mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;" + node = show . show + + dominant x = case x of + [] -> x + _ | not (x == hx) -> hx + _ -> dominant (init x) + where + hx = headArg (init x) tr x + + headArg x0 tr x = case (tr,x) of + (Fun f [],[_]) -> x0 ---- ?? + (Fun f ts,[_]) -> x0 ++ [getHead (length ts - 1) f] + (Fun f ts,i:y) -> headArg x0 (ts !! i) y + _ -> x0 ---- + + label tr y x = case span (uncurry (==)) (zip y x) of + (xys,(_,i):_) -> getLabel i (funAt tr (map fst xys)) + _ -> "" ---- + + funAt tr x = case (tr,x) of + (Fun f _ ,[]) -> f + (Fun f ts,i:y) -> funAt (ts !! i) y + _ -> mkCId (prTree tr) ---- + + word x = if elem x sortedNodes then x else + let x' = headArg x tr (x ++[0]) in + if x' == x then [] else word x' + + tr = expr2tree exp + sortedNodes = [p | (_,((_,p),_)) <- nodeWords] + + labels = maybe Map.empty id mlab + getHead i f = case Map.lookup f labels of + Just ls -> length $ takeWhile (/= "head") ls + _ -> i + getLabel i f = case Map.lookup f labels of + Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i + _ -> showCId f ++ "#" ++ show i + +-- to generate CoNLL format for MaltParser + nodeMap :: Map.Map [Int] Int + nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords] + + arcMap :: Map.Map [Int] ([Int],String) + arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks] + + lookDomLab p = case Map.lookup p arcMap of + Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then rootlabel else l) + _ -> (0,rootlabel) + + wnodes = [[show i, maltws ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] | + (i, ((fun,p),ws)) <- tail nodeWords, + let pos = showCId $ lookValCat pgf fun, + let morph = unspec, + let (dom,lab) = lookDomLab p + ] + maltws = concat . intersperse "+" . words . unwords -- no spaces in column 2 + unspec = "_" + rootlabel = "ROOT" + +type Labels = Map.Map CId [String] + +getDepLabels :: [String] -> Labels +getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss] + + +-- parse trees from Linearize.linearizeMark +---- nubrec and domins are quadratic, but could be (n log n) + +graphvizParseTree :: PGF -> CId -> Expr -> String +graphvizParseTree pgf lang = prGraph False . lin2tree pgf . linMark where + linMark = head . linearizesMark pgf lang + ---- use Just str if you have str to match against + +lin2tree pgf s = trace s $ prelude ++ nodes ++ links where + + prelude = ["rankdir=BU ;", "node [shape = record, color = white] ;"] + + nodeRecs = zip [0..] + (nub (filter (not . null) (nlins [postext] ++ [leaves postext]))) + nlins pts = + nubrec [] $ [(p,cat f) | T (Just f, p) _ <- pts] : + concatMap nlins [ts | T _ ts <- pts] + leaves pt = [(p++[j],s) | (j,(p,s)) <- + zip [9990..] [(p,s) | ((_,p),ss) <- wlins pt, s <- ss]] + + nubrec es rs = case rs of + r:rr -> let r' = filter (not . flip elem es) (nub r) + in r' : nubrec (r' ++ es) rr + _ -> rs + + nodes = map mkStruct nodeRecs + + mkStruct (i,cs) = struct i ++ "[label = \"" ++ fields cs ++ "\"] ;" + cat = showCId . lookValCat pgf + fields cs = concat (intersperse "|" [ mtag (showp p) ++ c | (p,c) <- cs]) + struct i = "struct" ++ show i + + links = map mkEdge domins + domins = nub [((i,x),(j,y)) | + (i,xs) <- nodeRecs, (j,ys) <- nodeRecs, + x <- xs, y <- ys, dominates x y] + dominates (p,x) (q,y) = not (null q) && p == init q + mkEdge ((i,x),(j,y)) = + struct i ++ ":n" ++ uncommas (showp (fst x)) ++ ":s -- " ++ + struct j ++ ":n" ++ uncommas (showp (fst y)) ++ ":n ;" + + postext = readPosText s + +-- auxiliaries for graphviz syntax +struct i = "struct" ++ show i +mark (j,n) = "n" ++ show j ++ "a" ++ uncommas n +uncommas = map (\c -> if c==',' then 'c' else c) +tag s = "<" ++ s ++ ">" +showp = init . tail . show +mtag = tag . ('n':) . uncommas + +-- word alignments from Linearize.linearizesMark +-- words are chunks like {[0,1,1,0] old} + +graphvizAlignment :: PGF -> Expr -> String +graphvizAlignment pgf = prGraph True . lin2graph . linsMark where + linsMark t = [s | la <- cncnames pgf, s <- take 1 (linearizesMark pgf la t)] + +lin2graph :: [String] -> [String] +lin2graph ss = trace (show ss) $ prelude ++ nodes ++ links + + where + + prelude = ["rankdir=LR ;", "node [shape = record] ;"] + + nlins :: [(Int,[((Int,String),String)])] + nlins = [(i, [((j,showp p),unw ws) | (j,((_,p),ws)) <- zip [0..] ws]) | + (i,ws) <- zip [0..] (map (wlins . readPosText) ss)] + + unw = concat . intersperse "\\ " -- space escape in graphviz + + nodes = map mkStruct nlins + + mkStruct (i, ws) = struct i ++ "[label = \"" ++ fields ws ++ "\"] ;" + + fields ws = concat (intersperse "|" [tag (mark m) ++ " " ++ w | (m,w) <- ws]) + + links = nub $ concatMap mkEdge (init nlins) + + mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list + [edge i v w | (v@(_,p),_) <- lin, (w@(_,q),_) <- lin', p == q] + + edge i v w = + struct i ++ ":" ++ mark v ++ ":e -> " ++ struct (i+1) ++ ":" ++ mark w ++ ":w ;" +{- +alignmentData :: PGF -> [Expr] -> Map.Map String (Map.Map String Double) +alignmentData pgf = mkStat . concatMap (mkAlign . linsMark) where + linsMark t = + [s | la <- take 2 (cncnames pgf), s <- take 1 (linearizesMark pgf la t)] + + mkStat :: [(String,String)] -> Map.Map String (Map.Map String Double) + mkStat = + + mkAlign :: [String] -> [(String,String)] + mkAlign ss = + + nlins :: [(Int,[((Int,String),String)])] + nlins = [(i, [((j,showp p),unw ws) | (j,((_,p),ws)) <- zip [0..] vs]) | + (i,vs) <- zip [0..] (map (wlins . readPosText) ss)] + + nodes = map mkStruct nlins + + mkStruct (i, ws) = struct i ++ "[label = \"" ++ fields ws ++ "\"] ;" + + fields ws = concat (intersperse "|" [tag (mark m) ++ " " ++ w | (m,w) <- ws]) + + links = nub $ concatMap mkEdge (init nlins) + + mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list + [edge i v w | (v@(_,p),_) <- lin, (w@(_,q),_) <- lin', p == q] + + edge i v w = + struct i ++ ":" ++ mark v ++ ":e -> " ++ struct (i+1) ++ ":" ++ mark w ++ ":w ;" +-} + +wlins :: PosText -> [((Maybe CId,[Int]),[String])] +wlins pt = case pt of + T p pts -> concatMap (lins p) pts + M ws -> if null ws then [] else [((Nothing,[]),ws)] + where + lins p pt = case pt of + T q pts -> concatMap (lins q) pts + M ws -> if null ws then [] else [(p,ws)] + +data PosText = + T (Maybe CId,[Int]) [PosText] + | M [String] + deriving Show + +readPosText :: String -> PosText +readPosText = fst . head . (RP.readP_to_S pPosText) where + pPosText = do + RP.char '(' >> RP.skipSpaces + p <- pPos + RP.skipSpaces + ts <- RP.many pPosText + RP.char ')' >> RP.skipSpaces + return (T p ts) + RP.<++ do + ws <- RP.sepBy1 (RP.munch1 (flip notElem "()")) (RP.char ' ') + return (M ws) + pPos = do + fun <- (RP.char '(' >> pCId >>= \f -> RP.char ',' >> (return $ Just f)) + RP.<++ (return Nothing) + RP.char '[' >> RP.skipSpaces + is <- RP.sepBy (RP.munch1 isDigit) (RP.char ',') + RP.char ']' >> RP.skipSpaces + RP.char ')' RP.<++ return ' ' + return (fun,map read is) + + +{- +digraph{ +rankdir ="LR" ; +node [shape = record] ; + +struct1 [label = "<f0> this|<f1> very|<f2> intelligent|<f3> man"] ; +struct2 [label = "<f0> cet|<f1> homme|<f2> tres|<f3> intelligent|<f4> ci"] ; + +struct1:f0 -> struct2:f0 ; +struct1:f1 -> struct2:f2 ; +struct1:f2 -> struct2:f3 ; +struct1:f3 -> struct2:f1 ; +struct1:f0 -> struct2:f4 ; +} +-} + |
