summaryrefslogtreecommitdiff
path: root/src-3.0/PGF/Data.hs
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-06-25 16:54:35 +0000
committeraarne <aarne@cs.chalmers.se>2008-06-25 16:54:35 +0000
commite9e80fc389365e24d4300d7d5390c7d833a96c50 (patch)
treef0b58473adaa670bd8fc52ada419d8cad470ee03 /src-3.0/PGF/Data.hs
parentb96b36f43de3e2f8b58d5f539daa6f6d47f25870 (diff)
changed names of resource-1.3; added a note on homepage on release
Diffstat (limited to 'src-3.0/PGF/Data.hs')
-rw-r--r--src-3.0/PGF/Data.hs201
1 files changed, 0 insertions, 201 deletions
diff --git a/src-3.0/PGF/Data.hs b/src-3.0/PGF/Data.hs
deleted file mode 100644
index 3f9aaa6ab..000000000
--- a/src-3.0/PGF/Data.hs
+++ /dev/null
@@ -1,201 +0,0 @@
-module PGF.Data where
-
-import PGF.CId
-import GF.Text.UTF8
-import GF.Data.Assoc
-
-import qualified Data.Map as Map
-import Data.List
-import Data.Array
-
--- internal datatypes for PGF
-
--- | An abstract data type representing multilingual grammar
--- in Portable Grammar Format.
-data PGF = PGF {
- absname :: CId ,
- cncnames :: [CId] ,
- gflags :: Map.Map CId String, -- value of a global flag
- abstract :: Abstr ,
- concretes :: Map.Map CId Concr
- }
-
-data Abstr = Abstr {
- aflags :: Map.Map CId String, -- value of a flag
- funs :: Map.Map CId (Type,Expr), -- type and def of a fun
- cats :: Map.Map CId [Hypo], -- context of a cat
- catfuns :: Map.Map CId [CId] -- funs to a cat (redundant, for fast lookup)
- }
-
-data Concr = Concr {
- cflags :: Map.Map CId String, -- value of a flag
- lins :: Map.Map CId Term, -- lin of a fun
- opers :: Map.Map CId Term, -- oper generated by subex elim
- lincats :: Map.Map CId Term, -- lin type of a cat
- lindefs :: Map.Map CId Term, -- lin default of a cat
- printnames :: Map.Map CId Term, -- printname of a cat or a fun
- paramlincats :: Map.Map CId Term, -- lin type of cat, with printable param names
- parser :: Maybe ParserInfo -- parser
- }
-
-data Type =
- DTyp [Hypo] CId [Expr]
- deriving (Eq,Ord,Show)
-
-data Literal =
- LStr String -- ^ string constant
- | LInt Integer -- ^ integer constant
- | LFlt Double -- ^ floating point constant
- deriving (Eq,Ord,Show)
-
--- | The tree is an evaluated expression in the abstract syntax
--- of the grammar. The type is especially restricted to not
--- allow unapplied lambda abstractions. The tree is used directly
--- from the linearizer and is produced directly from the parser.
-data Tree =
- Abs [CId] Tree -- ^ lambda abstraction. The list of variables is non-empty
- | Var CId -- ^ variable
- | Fun CId [Tree] -- ^ function application
- | Lit Literal -- ^ literal
- | Meta Int -- ^ meta variable
- deriving (Show, Eq, Ord)
-
--- | An expression represents a potentially unevaluated expression
--- in the abstract syntax of the grammar. It can be evaluated with
--- the 'expr2tree' function and then linearized or it can be used
--- directly in the dependent types.
-data Expr =
- EAbs CId Expr -- ^ lambda abstraction
- | EApp Expr Expr -- ^ application
- | ELit Literal -- ^ literal
- | EMeta Int -- ^ meta variable
- | EVar CId -- ^ variable or function reference
- | EEq [Equation] -- ^ lambda function defined as a set of equations with pattern matching
- deriving (Eq,Ord,Show)
-
-data Term =
- R [Term]
- | P Term Term
- | S [Term]
- | K Tokn
- | V Int
- | C Int
- | F CId
- | FV [Term]
- | W String Term
- | TM String
- deriving (Eq,Ord,Show)
-
-data Tokn =
- KS String
- | KP [String] [Alternative]
- deriving (Eq,Ord,Show)
-
-data Alternative =
- Alt [String] [String]
- deriving (Eq,Ord,Show)
-
-data Hypo =
- Hyp CId Type
- deriving (Eq,Ord,Show)
-
--- | The equation is used to define lambda function as a sequence
--- of equations with pattern matching. The list of 'Expr' represents
--- the patterns and the second 'Expr' is the function body for this
--- equation.
-data Equation =
- Equ [Expr] Expr
- deriving (Eq,Ord,Show)
-
-
-type FToken = String
-type FCat = Int
-type FIndex = Int
-data FSymbol
- = FSymCat {-# UNPACK #-} !FIndex {-# UNPACK #-} !Int
- | FSymTok FToken
-type Profile = [Int]
-type FPointPos = Int
-type FGrammar = ([FRule], Map.Map CId [FCat])
-data FRule = FRule CId [Profile] [FCat] FCat (Array FIndex (Array FPointPos FSymbol))
-
-type RuleId = Int
-
-data ParserInfo
- = ParserInfo { allRules :: Array RuleId FRule
- , topdownRules :: Assoc FCat [RuleId]
- -- ^ used in 'GF.Parsing.MCFG.Active' (Earley):
- -- , emptyRules :: [RuleId]
- , epsilonRules :: [RuleId]
- -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
- , leftcornerCats :: Assoc FCat [RuleId]
- , leftcornerTokens :: Assoc FToken [RuleId]
- -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
- , grammarCats :: [FCat]
- , grammarToks :: [FToken]
- , startupCats :: Map.Map CId [FCat]
- }
-
-
-fcatString, fcatInt, fcatFloat, fcatVar :: Int
-fcatString = (-1)
-fcatInt = (-2)
-fcatFloat = (-3)
-fcatVar = (-4)
-
-
--- print statistics
-
-statGFCC :: PGF -> String
-statGFCC pgf = unlines [
- "Abstract\t" ++ prCId (absname pgf),
- "Concretes\t" ++ unwords (map prCId (cncnames pgf)),
- "Categories\t" ++ unwords (map prCId (Map.keys (cats (abstract pgf))))
- ]
-
--- merge two GFCCs; fails is differens absnames; priority to second arg
-
-unionPGF :: PGF -> PGF -> PGF
-unionPGF one two = case absname one of
- n | n == wildCId -> two -- extending empty grammar
- | n == absname two -> one { -- extending grammar with same abstract
- concretes = Map.union (concretes two) (concretes one),
- cncnames = union (cncnames two) (cncnames one)
- }
- _ -> one -- abstracts don't match ---- print error msg
-
-emptyPGF :: PGF
-emptyPGF = PGF {
- absname = wildCId,
- cncnames = [] ,
- gflags = Map.empty,
- abstract = error "empty grammar, no abstract",
- concretes = Map.empty
- }
-
--- encode idenfifiers and strings in UTF8
-
-utf8GFCC :: PGF -> PGF
-utf8GFCC pgf = pgf {
- concretes = Map.map u8concr (concretes pgf)
- }
- where
- u8concr cnc = cnc {
- lins = Map.map u8term (lins cnc),
- opers = Map.map u8term (opers cnc)
- }
- u8term = convertStringsInTerm encodeUTF8
-
----- TODO: convert identifiers and flags
-
-convertStringsInTerm conv t = case t of
- K (KS s) -> K (KS (conv s))
- W s r -> W (conv s) (convs r)
- R ts -> R $ map convs ts
- S ts -> S $ map convs ts
- FV ts -> FV $ map convs ts
- P u v -> P (convs u) (convs v)
- _ -> t
- where
- convs = convertStringsInTerm conv
-