1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
|
----------------------------------------------------------------------
-- |
-- Module : Parsing
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/06/02 10:23:52 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.25 $
--
-- (Description of the module)
-----------------------------------------------------------------------------
module GF.UseGrammar.Parsing where
import GF.Infra.CheckM
import qualified GF.Canon.AbsGFC as C
import GF.Canon.GFC
import GF.Canon.MkGFC (trExp) ----
import GF.Canon.CMacros
import GF.Grammar.MMacros (refreshMetas)
import GF.UseGrammar.Linear
import GF.Data.Str
import GF.CF.CF
import GF.CF.CFIdent
import GF.Infra.Ident
import GF.Grammar.TypeCheck
import GF.Grammar.Values
--import CFMethod
import GF.UseGrammar.Tokenize
import GF.UseGrammar.Morphology (isKnownWord)
import GF.CF.Profile
import GF.Infra.Option
import GF.UseGrammar.Custom
import GF.Compile.ShellState
import GF.CF.PPrCF (prCFTree)
-- import qualified GF.OldParsing.ParseGFC as NewOld -- OBSOLETE
import qualified GF.Parsing.GFC as New
import GF.Data.Operations
import Data.List (nub,sortBy)
import Data.Char (toLower)
import Control.Monad (liftM)
-- AR 26/1/2000 -- 8/4 -- 28/1/2001 -- 9/12/2002
parseString :: Options -> StateGrammar -> CFCat -> String -> Err [Tree]
parseString os sg cat = liftM fst . parseStringMsg os sg cat
parseStringMsg :: Options -> StateGrammar -> CFCat -> String -> Err ([Tree],String)
parseStringMsg os sg cat s = do
case checkStart $ parseStringC os sg cat s of
Ok (ts,(_,ss)) -> return (ts, unlines $ reverse ss)
Bad s -> return ([],s)
parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree]
parseStringC opts0 sg cat s
| oElem (iOpt "old") opts0 ||
(not (oElem (iOpt "fcfg") opts0) && stateHasHOAS sg) = do
let opts = unionOptions opts0 $ stateOptions sg
cf = stateCF sg
gr = stateGrammarST sg
cn = cncId sg
toks = customOrDefault opts useTokenizer customTokenizer sg s
parser = customOrDefault opts useParser customParser sg cat
if oElem (iOpt "cut") opts
then doUntil (not . null) $ map (tokens2trms opts sg cn parser) toks
else mapM (tokens2trms opts sg cn parser) toks >>= return . concat
---- | or [oElem p opts0 |
---- p <- [newCParser,newMParser,newFParser,newParser,newerParser] = do
| otherwise = do
let opts = unionOptions opts0 $ stateOptions sg
algorithm | oElem newCParser opts0 = "c"
| oElem newMParser opts0 = "m"
| oElem newFParser opts0 = "f"
| otherwise = "f" -- default algorithm: FCFG
strategy = maybe "bottomup" id $ getOptVal opts useParser
-- -parser=bottomup/topdown
tokenizer = customOrDefault opts useTokenizer customTokenizer sg
toks = case tokenizer s of
t:_ -> t
_ -> [] ---- no support for undet. tok.
unknowns =
[w | TC w <- toks, unk w && unk (uncap w)] ++ [w | TS w <- toks, unk w]
where
unk w = not $ isKnownWord (morpho sg) w
uncap (c:cs) = toLower c : cs
uncap s = s
case unknowns of
_:_ | oElem (iOpt "trynextlang") opts -> return []
_:_ -> fail $ "Unknown words:" +++ unwords unknowns
_ -> do
ts <- checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks
ts' <- checkErr $
allChecks $ map (annotate (stateGrammarST sg) . refreshMetas []) ts
return $ optIntOrAll opts flagNumber ts'
tokens2trms :: Options ->StateGrammar ->Ident -> CFParser -> [CFTok] -> Check [Tree]
tokens2trms opts sg cn parser toks = trees2trms opts sg cn toks trees info
where result = parser toks
info = snd result
trees = {- nub $ -} cfParseResults result -- peb 25/5-04: removed nub (O(n^2))
trees2trms ::
Options -> StateGrammar -> Ident -> [CFTok] -> [CFTree] -> String -> Check [Tree]
trees2trms opts sg cn as ts0 info = do
let s = unwords $ map prCFTok as
ts <- case () of
_ | null ts0 -> checkWarn ("No success in cf parsing" +++ s) >> return []
_ | raw -> do
ts1 <- return (map cf2trm0 ts0) ----- should not need annot
checks [
mapM (checkErr . (annotate gr) . trExp) ts1 ---- complicated, often fails
,checkWarn (unlines ("Raw CF trees:":(map prCFTree ts0))) >> return []
]
_ -> do
let num = optIntOrN opts flagRawtrees 999999
let (ts01,rest) = splitAt num ts0
if null rest then return ()
else raise ("Warning: only" +++ show num +++ "raw parses out of" +++
show (length ts0) +++
"considered; use -rawtrees=<Int> to see more"
)
(ts1,ss) <- checkErr $ mapErrN 1 postParse ts01
if null ts1 then raise ss else return ()
ts2 <- checkErr $
allChecks $ map (annotate gr . refreshMetas [] . trExp) ts1 ----
if forgive then return ts2 else do
let tsss = [(t, allLinsOfTree gr cn t) | t <- ts2]
ps = [t | (t,ss) <- tsss,
any (compatToks as) (map str2cftoks ss)]
if null ps
then raise $ "Failure in morphology." ++
if verb
then "\nPossible corrections: " +++++
unlines (nub (map sstr (concatMap snd tsss)))
else ""
else return ps
if verb
then checkWarn $ " the token list" +++ show as ++++ unknownWords sg as +++++ info
else return ()
return $ optIntOrAll opts flagNumber $ nub ts
where
gr = stateGrammarST sg
raw = oElem rawParse opts
verb = oElem beVerbose opts
forgive = oElem forgiveParse opts
---- Operatins.allChecks :: ErrorMonad m => [m a] -> m [a]
unknownWords sg ts = case filter noMatch [t | t@(TS _) <- ts] of
[] -> "where all words are known"
us -> "with the unknown tokens" +++ show us --- needs to be fixed for literals
where
terminals = map TS $ stateGrammarWords sg
noMatch t = all (not . compatTok t) terminals
--- too much type checking in building term info? return FullTerm to save work?
-- | raw parsing: so simple it is for a context-free CF grammar
cf2trm0 :: CFTree -> C.Exp
cf2trm0 (CFTree (fun, (_, trees))) = mkAppAtom (cffun2trm fun) (map cf2trm0 trees)
where
cffun2trm (CFFun (fun,_)) = fun
mkApp = foldl C.EApp
mkAppAtom a = mkApp (C.EAtom a)
|