diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-06-25 16:54:35 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-06-25 16:54:35 +0000 |
| commit | e9e80fc389365e24d4300d7d5390c7d833a96c50 (patch) | |
| tree | f0b58473adaa670bd8fc52ada419d8cad470ee03 /src-3.0/PGF/Data.hs | |
| parent | b96b36f43de3e2f8b58d5f539daa6f6d47f25870 (diff) | |
changed names of resource-1.3; added a note on homepage on release
Diffstat (limited to 'src-3.0/PGF/Data.hs')
| -rw-r--r-- | src-3.0/PGF/Data.hs | 201 |
1 files changed, 0 insertions, 201 deletions
diff --git a/src-3.0/PGF/Data.hs b/src-3.0/PGF/Data.hs deleted file mode 100644 index 3f9aaa6ab..000000000 --- a/src-3.0/PGF/Data.hs +++ /dev/null @@ -1,201 +0,0 @@ -module PGF.Data where - -import PGF.CId -import GF.Text.UTF8 -import GF.Data.Assoc - -import qualified Data.Map as Map -import Data.List -import Data.Array - --- internal datatypes for PGF - --- | An abstract data type representing multilingual grammar --- in Portable Grammar Format. -data PGF = PGF { - absname :: CId , - cncnames :: [CId] , - gflags :: Map.Map CId String, -- value of a global flag - abstract :: Abstr , - concretes :: Map.Map CId Concr - } - -data Abstr = Abstr { - aflags :: Map.Map CId String, -- value of a flag - funs :: Map.Map CId (Type,Expr), -- type and def of a fun - cats :: Map.Map CId [Hypo], -- context of a cat - catfuns :: Map.Map CId [CId] -- funs to a cat (redundant, for fast lookup) - } - -data Concr = Concr { - cflags :: Map.Map CId String, -- value of a flag - lins :: Map.Map CId Term, -- lin of a fun - opers :: Map.Map CId Term, -- oper generated by subex elim - lincats :: Map.Map CId Term, -- lin type of a cat - lindefs :: Map.Map CId Term, -- lin default of a cat - printnames :: Map.Map CId Term, -- printname of a cat or a fun - paramlincats :: Map.Map CId Term, -- lin type of cat, with printable param names - parser :: Maybe ParserInfo -- parser - } - -data Type = - DTyp [Hypo] CId [Expr] - deriving (Eq,Ord,Show) - -data Literal = - LStr String -- ^ string constant - | LInt Integer -- ^ integer constant - | LFlt Double -- ^ floating point constant - deriving (Eq,Ord,Show) - --- | The tree is an evaluated expression in the abstract syntax --- of the grammar. The type is especially restricted to not --- allow unapplied lambda abstractions. The tree is used directly --- from the linearizer and is produced directly from the parser. -data Tree = - Abs [CId] Tree -- ^ lambda abstraction. The list of variables is non-empty - | Var CId -- ^ variable - | Fun CId [Tree] -- ^ function application - | Lit Literal -- ^ literal - | Meta Int -- ^ meta variable - deriving (Show, Eq, Ord) - --- | An expression represents a potentially unevaluated expression --- in the abstract syntax of the grammar. It can be evaluated with --- the 'expr2tree' function and then linearized or it can be used --- directly in the dependent types. -data Expr = - EAbs CId Expr -- ^ lambda abstraction - | EApp Expr Expr -- ^ application - | ELit Literal -- ^ literal - | EMeta Int -- ^ meta variable - | EVar CId -- ^ variable or function reference - | EEq [Equation] -- ^ lambda function defined as a set of equations with pattern matching - deriving (Eq,Ord,Show) - -data Term = - R [Term] - | P Term Term - | S [Term] - | K Tokn - | V Int - | C Int - | F CId - | FV [Term] - | W String Term - | TM String - deriving (Eq,Ord,Show) - -data Tokn = - KS String - | KP [String] [Alternative] - deriving (Eq,Ord,Show) - -data Alternative = - Alt [String] [String] - deriving (Eq,Ord,Show) - -data Hypo = - Hyp CId Type - deriving (Eq,Ord,Show) - --- | The equation is used to define lambda function as a sequence --- of equations with pattern matching. The list of 'Expr' represents --- the patterns and the second 'Expr' is the function body for this --- equation. -data Equation = - Equ [Expr] Expr - deriving (Eq,Ord,Show) - - -type FToken = String -type FCat = Int -type FIndex = Int -data FSymbol - = FSymCat {-# UNPACK #-} !FIndex {-# UNPACK #-} !Int - | FSymTok FToken -type Profile = [Int] -type FPointPos = Int -type FGrammar = ([FRule], Map.Map CId [FCat]) -data FRule = FRule CId [Profile] [FCat] FCat (Array FIndex (Array FPointPos FSymbol)) - -type RuleId = Int - -data ParserInfo - = ParserInfo { allRules :: Array RuleId FRule - , topdownRules :: Assoc FCat [RuleId] - -- ^ used in 'GF.Parsing.MCFG.Active' (Earley): - -- , emptyRules :: [RuleId] - , epsilonRules :: [RuleId] - -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): - , leftcornerCats :: Assoc FCat [RuleId] - , leftcornerTokens :: Assoc FToken [RuleId] - -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): - , grammarCats :: [FCat] - , grammarToks :: [FToken] - , startupCats :: Map.Map CId [FCat] - } - - -fcatString, fcatInt, fcatFloat, fcatVar :: Int -fcatString = (-1) -fcatInt = (-2) -fcatFloat = (-3) -fcatVar = (-4) - - --- print statistics - -statGFCC :: PGF -> String -statGFCC pgf = unlines [ - "Abstract\t" ++ prCId (absname pgf), - "Concretes\t" ++ unwords (map prCId (cncnames pgf)), - "Categories\t" ++ unwords (map prCId (Map.keys (cats (abstract pgf)))) - ] - --- merge two GFCCs; fails is differens absnames; priority to second arg - -unionPGF :: PGF -> PGF -> PGF -unionPGF one two = case absname one of - n | n == wildCId -> two -- extending empty grammar - | n == absname two -> one { -- extending grammar with same abstract - concretes = Map.union (concretes two) (concretes one), - cncnames = union (cncnames two) (cncnames one) - } - _ -> one -- abstracts don't match ---- print error msg - -emptyPGF :: PGF -emptyPGF = PGF { - absname = wildCId, - cncnames = [] , - gflags = Map.empty, - abstract = error "empty grammar, no abstract", - concretes = Map.empty - } - --- encode idenfifiers and strings in UTF8 - -utf8GFCC :: PGF -> PGF -utf8GFCC pgf = pgf { - concretes = Map.map u8concr (concretes pgf) - } - where - u8concr cnc = cnc { - lins = Map.map u8term (lins cnc), - opers = Map.map u8term (opers cnc) - } - u8term = convertStringsInTerm encodeUTF8 - ----- TODO: convert identifiers and flags - -convertStringsInTerm conv t = case t of - K (KS s) -> K (KS (conv s)) - W s r -> W (conv s) (convs r) - R ts -> R $ map convs ts - S ts -> S $ map convs ts - FV ts -> FV $ map convs ts - P u v -> P (convs u) (convs v) - _ -> t - where - convs = convertStringsInTerm conv - |
