summaryrefslogtreecommitdiff
path: root/src/runtime/haskell/PGF/PMCFG.hs
blob: abf7e438032bc547a1b521c3eaf9b6a12c6f5f75 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
module PGF.PMCFG where

import PGF.CId
import PGF.Expr

import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import Data.Array.IArray
import Data.Array.Unboxed
import Text.PrettyPrint

type FCat      = Int
type FIndex    = Int
type FPointPos = Int
data FSymbol
  = FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
  | FSymLit {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
  | FSymKS [String]
  | FSymKP [String] [Alternative]
  deriving (Eq,Ord,Show)
data Production
  = FApply  {-# UNPACK #-} !FunId [FCat]
  | FCoerce {-# UNPACK #-} !FCat
  | FConst  Expr [String]
  deriving (Eq,Ord,Show)
data FFun  = FFun CId {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
type FSeq  = Array FPointPos FSymbol
type FunId = Int
type SeqId = Int

data Alternative =
   Alt [String] [String]
  deriving (Eq,Ord,Show)

data ParserInfo
    = ParserInfo { functions   :: Array FunId FFun
                 , sequences   :: Array SeqId FSeq
                 , productions0:: IntMap.IntMap (Set.Set Production)    -- this are the original productions as they are loaded from the PGF file
                 , productions :: IntMap.IntMap (Set.Set Production)    -- this are the productions after the filtering for useless productions
                 , startCats   :: Map.Map CId (FCat,FCat,Array FIndex String)  -- for every category - start/end FCat and a list of label names
                 , totalCats   :: {-# UNPACK #-} !FCat
                 }


fcatString, fcatInt, fcatFloat, fcatVar :: Int
fcatString = (-1)
fcatInt    = (-2)
fcatFloat  = (-3)
fcatVar    = (-4)

isLiteralFCat :: FCat -> Bool
isLiteralFCat = (`elem` [fcatString, fcatInt, fcatFloat, fcatVar])

ppPMCFG :: ParserInfo -> Doc
ppPMCFG pinfo =
  text "productions" $$
  nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions pinfo), prod <- Set.toList set]) $$
  text "functions" $$
  nest 2 (vcat (map ppFun (assocs (functions pinfo)))) $$
  text "sequences" $$
  nest 2 (vcat (map ppSeq (assocs (sequences pinfo)))) $$
  text "startcats" $$
  nest 2 (vcat (map ppStartCat (Map.toList (startCats pinfo))))

ppProduction (fcat,FApply funid args) =
  ppFCat fcat <+> text "->" <+> ppFunId funid <> brackets (hcat (punctuate comma (map ppFCat args)))
ppProduction (fcat,FCoerce arg) =
  ppFCat fcat <+> text "->" <+> char '_' <> brackets (ppFCat arg)
ppProduction (fcat,FConst _ ss) =
  ppFCat fcat <+> text "->" <+> ppStrs ss

ppFun (funid,FFun fun arr) =
  ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun)

ppSeq (seqid,seq) = 
  ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq))

ppStartCat (id,(start,end,labels)) =
  ppCId id <+> text ":=" <+> (text "range " <+> brackets (ppFCat start <+> text ".." <+> ppFCat end) $$
                              text "labels" <+> brackets (vcat (map (text . show) (elems labels))))

ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>'
ppSymbol (FSymKS ts)   = ppStrs ts
ppSymbol (FSymKP ts alts) = text "pre" <+> braces (hsep (punctuate semi (ppStrs ts : map ppAlt alts)))

ppAlt (Alt ts ps) = ppStrs ts <+> char '/' <+> hsep (map (doubleQuotes . text) ps)

ppStrs ss = doubleQuotes (hsep (map text ss))

ppFCat  fcat
  | fcat == fcatString = text "CString"
  | fcat == fcatInt    = text "CInt"
  | fcat == fcatFloat  = text "CFloat"
  | fcat == fcatVar    = text "CVar"
  | otherwise          = char 'C' <> int fcat

ppFunId funid = char 'F' <> int funid
ppSeqId seqid = char 'S' <> int seqid


filterProductions = closure
  where
    closure prods0
      | IntMap.size prods == IntMap.size prods0 = prods
      | otherwise                               = closure prods
      where
        prods = IntMap.mapMaybe (filterProdSet prods0) prods0

    filterProdSet prods set0
      | Set.null set = Nothing
      | otherwise    = Just set
      where
        set = Set.filter (filterRule prods) set0
                             
    filterRule prods (FApply funid args) = all (\fcat -> isLiteralFCat fcat || IntMap.member fcat prods) args
    filterRule prods (FCoerce fcat)      = isLiteralFCat fcat || IntMap.member fcat prods
    filterRule prods _                   = True