summaryrefslogtreecommitdiff
path: root/src/PGF/Data.hs
blob: 047b5ec46aaccb65f3d28b680663ad60fe793e8a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
module PGF.Data (module PGF.Data, module PGF.Expr, module PGF.Type) where

import PGF.CId
import PGF.Expr hiding (Value, Env)
import PGF.Type

import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import Data.List
import Data.Array
import Data.Array.Unboxed

-- internal datatypes for PGF

-- | An abstract data type representing multilingual grammar
-- in Portable Grammar Format.
data PGF = PGF {
  absname   :: CId ,
  cncnames  :: [CId] ,
  gflags    :: Map.Map CId String,   -- value of a global flag
  abstract  :: Abstr ,
  concretes :: Map.Map CId Concr
  }

data Abstr = Abstr {
  aflags  :: Map.Map CId String,      -- value of a flag
  funs    :: Map.Map CId (Type,Expr), -- type and def of a fun
  cats    :: Map.Map CId [Hypo],      -- context of a cat
  catfuns :: Map.Map CId [CId]        -- funs to a cat (redundant, for fast lookup)
  }

data Concr = Concr {
  cflags  :: Map.Map CId String,    -- value of a flag
  lins    :: Map.Map CId Term,      -- lin of a fun
  opers   :: Map.Map CId Term,      -- oper generated by subex elim
  lincats :: Map.Map CId Term,      -- lin type of a cat
  lindefs :: Map.Map CId Term,      -- lin default of a cat
  printnames :: Map.Map CId Term,   -- printname of a cat or a fun
  paramlincats :: Map.Map CId Term, -- lin type of cat, with printable param names
  parser  :: Maybe ParserInfo       -- parser
  }

data Term =
   R [Term]
 | P Term Term
 | S [Term]
 | K Tokn
 | V Int
 | C Int
 | F CId
 | FV [Term]
 | W String Term
 | TM String
  deriving (Eq,Ord,Show)

data Tokn =
   KS String
 | KP [String] [Alternative]
  deriving (Eq,Ord,Show)

data Alternative =
   Alt [String] [String]
  deriving (Eq,Ord,Show)


type FCat      = Int
type FIndex    = Int
type FPointPos = Int
data FSymbol
  = FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
  | FSymLit {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
  | FSymTok Tokn
  deriving (Eq,Ord,Show)
type Profile = [Int]
data Production
  = FApply  {-# UNPACK #-} !FunId [FCat]
  | FCoerce {-# UNPACK #-} !FCat
  | FConst  Tree String
  deriving (Eq,Ord,Show)
data FFun  = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
type FSeq  = Array FPointPos FSymbol
type FunId = Int
type SeqId = Int

data ParserInfo
    = ParserInfo { functions   :: Array FunId FFun
                 , sequences   :: Array SeqId FSeq
	         , productions :: IntMap.IntMap (Set.Set Production)
	         , startCats   :: Map.Map CId [FCat]
	         , totalCats   :: {-# UNPACK #-} !FCat
	         }


fcatString, fcatInt, fcatFloat, fcatVar :: Int
fcatString = (-1)
fcatInt    = (-2)
fcatFloat  = (-3)
fcatVar    = (-4)


-- print statistics

statGFCC :: PGF -> String
statGFCC pgf = unlines [
  "Abstract\t" ++ prCId (absname pgf), 
  "Concretes\t" ++ unwords (map prCId (cncnames pgf)), 
  "Categories\t" ++ unwords (map prCId (Map.keys (cats (abstract pgf)))) 
  ]

-- merge two GFCCs; fails is differens absnames; priority to second arg

unionPGF :: PGF -> PGF -> PGF
unionPGF one two = case absname one of
  n | n == wildCId     -> two    -- extending empty grammar
    | n == absname two -> one {  -- extending grammar with same abstract
      concretes = Map.union (concretes two) (concretes one),
      cncnames  = union (cncnames one) (cncnames two)
    }
  _ -> one   -- abstracts don't match ---- print error msg

emptyPGF :: PGF
emptyPGF = PGF {
  absname   = wildCId,
  cncnames  = [] ,
  gflags    = Map.empty,
  abstract  = error "empty grammar, no abstract",
  concretes = Map.empty
  }