summaryrefslogtreecommitdiff
path: root/src/GF/OldParsing/Utilities.hs
blob: 22d16897309bd44c4dcdc82b1307ba9a78e0ce86 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
----------------------------------------------------------------------
-- |
-- Module      : Parsing.Utilities
-- Maintainer  : PL
-- Stability   : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/11 13:52:55 $ 
-- > CVS $Author: peb $
-- > CVS $Revision: 1.1 $
--
-- Basic type declarations and functions to be used when parsing
-----------------------------------------------------------------------------


module GF.OldParsing.Utilities 
    ( -- * Symbols
      Symbol(..), symbol, mapSymbol,
      -- * Edges
      Edge(..),
      -- * Parser input
      Input(..), makeInput, input, inputMany,
      -- * charts, parse forests & trees
      ParseChart, ParseForest(..), ParseTree(..),
      chart2forests, forest2trees
    ) where

-- haskell modules:
import Monad
import Array
-- gf modules:
import GF.Data.SortedList
import GF.Data.Assoc
-- parsing modules:
import GF.Printing.PrintParser

------------------------------------------------------------
-- symbols

data Symbol c t = Cat c | Tok t
		  deriving (Eq, Ord, Show)

symbol    :: (c -> a) -> (t -> a) -> Symbol c t -> a
mapSymbol :: (c -> d) -> (t -> u) -> Symbol c t -> Symbol d u

----------

symbol fc ft (Cat cat) = fc cat
symbol fc ft (Tok tok) = ft tok

mapSymbol fc ft = symbol (Cat . fc) (Tok . ft)


------------------------------------------------------------
-- edges

data Edge s = Edge Int Int s
	      deriving (Eq, Ord, Show)

instance Functor Edge where
    fmap f (Edge i j s) = Edge i j (f s)


------------------------------------------------------------
-- parser input 

data Input t = MkInput { inputEdges  :: [Edge t],
			 inputBounds :: (Int, Int),
			 inputFrom   :: Array Int (Assoc t [Int]),
			 inputTo     :: Array Int (Assoc t [Int]),
			 inputToken  :: Assoc t [(Int, Int)]
		       }

makeInput :: Ord t => [Edge t] -> Input t
input     :: Ord t =>  [t]     -> Input t
inputMany :: Ord t => [[t]]    -> Input t

----------

makeInput inEdges  | null inEdges = input []
		   | otherwise    = MkInput inEdges inBounds inFrom inTo inToken
    where inBounds = foldr1 minmax [ (i, j) | Edge i j _ <- inEdges ]
	      where minmax (a, b) (a', b') = (min a a', max b b')
	  inFrom   = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds $
		     [ (i, [(tok, j)]) | Edge i j tok <- inEdges ]
	  inTo     = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds
		     [ (j, [(tok, i)]) | Edge i j tok <- inEdges ]
	  inToken  = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ]

input toks         = MkInput inEdges inBounds inFrom inTo inToken
    where inEdges  = zipWith3 Edge [0..] [1..] toks
	  inBounds = (0, length toks)
	  inFrom   = listArray inBounds $
		     [ listAssoc [(tok, [j])] | (tok, j) <- zip toks [1..] ] ++ [ listAssoc [] ]
	  inTo     = listArray inBounds $
		     [ listAssoc [] ] ++ [ listAssoc [(tok, [i])] | (tok, i) <- zip toks [0..] ]
	  inToken  = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ]

inputMany toks     = MkInput inEdges inBounds inFrom inTo inToken
    where inEdges  = [ Edge i j t | (i, j, ts) <- zip3 [0..] [1..] toks, t <- ts ]
	  inBounds = (0, length toks)
	  inFrom   = listArray inBounds $
		     [ listAssoc [ (t, [j]) | t <- nubsort ts ] | (ts, j) <- zip toks [1..] ]
		     ++ [ listAssoc [] ]
	  inTo     = listArray inBounds $
		     [ listAssoc [] ] ++ 
		     [ listAssoc [ (t, [i]) | t <- nubsort ts ] | (ts, i) <- zip toks [0..] ]
	  inToken  = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ]


------------------------------------------------------------
-- charts, parse forests & trees

type ParseChart n e = Assoc e [(n, [[e]])]

data ParseForest n = FNode n [[ParseForest n]] | FMeta 
		     deriving (Eq, Ord, Show)

data ParseTree n = TNode n [ParseTree n] | TMeta 
		   deriving (Eq, Ord, Show)

chart2forests :: Ord e => ParseChart n e -> (e -> Bool) -> e -> [ParseForest n]

--filterCoercions :: (n -> Bool) -> ParseForest n -> [ParseForest n]

forest2trees :: ParseForest n -> [ParseTree n]

instance Functor ParseTree where
    fmap f (TNode n trees) = TNode (f n) $ map (fmap f) trees
    fmap f (TMeta) = TMeta 
	     
instance Functor ParseForest where
    fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests
    fmap f (FMeta) = FMeta 

----------

chart2forests chart isMeta = edge2forests
    where item2forest (name, children) = FNode name $
					 do edges <- children
					    mapM edge2forests edges 
          edge2forests edge 
	      | isMeta     edge = [FMeta]
	      | otherwise       = filter checkForest $ map item2forest $ chart ? edge
	  checkForest (FNode _ children) = not (null children)

-- filterCoercions _ (FMeta) = [FMeta]
-- filterCoercions isCoercion (FNode s forests) 
--     | isCoercion s = do [forest] <- forests ; filterCoercions isCoercion forest
--     | otherwise    = FNode s $ do children <- forests ; mapM (filterCoercions isCoercion)

forest2trees (FNode s forests) = map (TNode s) $ forests >>= mapM forest2trees
forest2trees (FMeta) = [TMeta]



------------------------------------------------------------
-- pretty-printing

instance (Print c, Print t) => Print (Symbol c t) where
    prt = symbol prt (simpleShow.prt)
    prtList = prtSep " "

simpleShow :: String -> String
simpleShow s = "\"" ++ concatMap mkEsc s ++ "\""
    where
    mkEsc :: Char -> String
    mkEsc c = case c of
		     _ | elem c "\\\"" -> '\\' : [c]
		     '\n' -> "\\n"
		     '\t' -> "\\t"
		     _ -> [c]

instance (Print s) => Print (Edge s) where
    prt (Edge i j s) = "[" ++ show i ++ "-" ++ show j ++ ": " ++ prt s ++ "]"
    prtList = prtSep ""

instance (Print s) => Print (ParseTree s) where
    prt (TNode s trees) = prt s ++ "^{" ++ prtSep " " trees ++ "}"
    prt (TMeta) = "?"
    prtList = prtAfter "\n"

instance (Print s) => Print (ParseForest s) where
    prt (FNode s forests) = prt s ++ "^{" ++ prtSep " | " (map (prtSep " ") forests) ++ "}"
    prt (FMeta) = "?"
    prtList = prtAfter "\n"