summaryrefslogtreecommitdiff
path: root/src/GF/UseGrammar/Parsing.hs
blob: 5d601bc58a6bd692ff9d874f172d5b7a341ada17 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
module Parsing where

import CheckM
import qualified AbsGFC as C
import GFC
import MkGFC (trExp) ----
import CMacros
import MMacros (refreshMetas)
import Linear
import Str
import CF
import CFIdent
import Ident
import TypeCheck
import Values
--import CFMethod
import Tokenize
import Profile
import Option
import Custom
import ShellState

import qualified ExportParser as N

import Operations

import List (nub)
import Monad (liftM)

-- AR 26/1/2000 -- 8/4 -- 28/1/2001 -- 9/12/2002

parseString :: Options -> StateGrammar -> CFCat -> String -> Err [Tree]
parseString os sg cat = liftM fst . parseStringMsg os sg cat

parseStringMsg :: Options -> StateGrammar -> CFCat -> String -> Err ([Tree],String)
parseStringMsg os sg cat s = do
  (ts,(_,ss)) <- checkStart $ parseStringC os sg cat s
  return (ts,unlines ss)

parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree]
parseStringC opts0 sg cat s

---- to test peb's new parser 6/10/2003
 | oElem newParser opts0 = do  
  let pm = maybe "" id $ getOptVal opts0 useParser -- -parser=pm
      gr = grammar sg
      ct = cfCat2Cat cat
  ts <- checkErr $ N.newParser pm gr (cfCat2Cat cat) s
  mapM (checkErr . (annotate gr)) ts

 | otherwise = do
  let opts = unionOptions opts0 $ stateOptions sg
      cf  = stateCF sg
      gr  = stateGrammarST sg
      cn  = cncId sg
      tok = customOrDefault opts useTokenizer customTokenizer sg
      parser = customOrDefault opts useParser customParser sg cat
  tokens2trms opts sg cn parser (tok s)

tokens2trms :: Options ->StateGrammar ->Ident -> CFParser -> [CFTok] -> Check [Tree]
tokens2trms opts sg cn parser as = do
  let res@(trees,info) = parser as
  ts0 <- return $ nub (cfParseResults res)
  ts  <- case () of
    _ | null ts0 -> checkWarn "No success in cf parsing" >> return []
    _ | raw      -> do
      ts1 <- return (map cf2trm0 ts0) ----- should not need annot
      mapM (checkErr . (annotate gr) . trExp) ts1 ---- complicated; often fails
    _ -> do
      (ts1,ss) <- checkErr $ mapErr postParse ts0
      if null ts1 then raise ss else return ()
      ts2 <- mapM (checkErr . annotate gr . refreshMetas [] . trExp) ts1 ---- 
      if forgive then return ts2 else do
        let tsss = [(t, allLinsOfTree gr cn t) | t <- ts2]
            ps = [t | (t,ss) <- tsss, 
                      any (compatToks as) (map str2cftoks ss)]
        if null ps 
           then raise $ "Failure in morphology." ++
                  if verb 
                     then "\nPossible corrections: " +++++
                          unlines (nub (map sstr (concatMap snd tsss)))
                     else ""
           else return ps

  if verb 
     then checkWarn $ " the token list" +++ show as ++++ unknown as +++++ info
     else return ()

  return $ optIntOrAll opts flagNumber $ nub ts
 where
   gr  = stateGrammarST sg

   raw     = oElem rawParse opts
   verb    = oElem beVerbose opts
   forgive = oElem forgiveParse opts

   unknown ts = case filter noMatch [t | t@(TS _) <- ts] of
     [] -> "where all words are known"
     us -> "with the unknown tokens" +++ show us --- needs to be fixed for literals
   terminals = map TS $ stateGrammarWords sg
   noMatch t = all (not . compatTok t) terminals 
     

--- too much type checking in building term info? return FullTerm to save work?

-- raw parsing: so simple it is for a context-free CF grammar
cf2trm0 :: CFTree -> C.Exp
cf2trm0 (CFTree (fun, (_, trees))) = mkAppAtom (cffun2trm fun) (map cf2trm0 trees)
 where
   cffun2trm (CFFun (fun,_)) = fun
   mkApp = foldl C.EApp
   mkAppAtom a = mkApp (C.EAtom a)