1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
|
-- [1416,4467,4623,4871,4561,4303,3763,3137,2501,1857,1353,952,646,483,332,200,116,89,54,41,20,22,7,2,4,5,0,3,2,1,0,0,0,0,0,1]
-- average 5
import Monad
import Idents
import PennFormat
import PGF hiding (Tree,parse)
import Control.Monad
import System.IO
import System.Process
import Data.Maybe
import Data.List
import Data.IORef
import Data.Char
import Data.Tree
test = False
main = do
pgf <- readPGF "ParseEngAbs.pgf"
let Just language = readLanguage "ParseEng"
morpho = buildMorpho pgf language
s <- readFile "wsj.02-21"
ref <- newIORef (0,0,0)
mapM_ (process pgf morpho ref) ((if test then take 40 else id) (parseTreebank s))
where
process pgf morpho ref t = do
(cn,co,l) <- readIORef ref
let e = (flatten . parse penn pgf morpho . prune) t
(cn',co') = count (cn,co) e
l' = l+1
writeIORef ref (cn',co',l')
hPutStrLn stdout (showExpr [] e)
when test $ do
writeFile ("tmp_tree.dot") (graphvizAbstractTree pgf (True,False) e)
rawSystem "dot" ["-Tpdf", "tmp_tree.dot", "-otrees/tree"++showAlign l'++".pdf"]
return ()
hPutStrLn stderr (show ((fromIntegral cn' / fromIntegral co') * 100))
count (cn,co) e = cn `seq` co `seq`
case unApp e of
Just (f,es) -> if f == meta
then foldl' count (cn, co+1) es
else foldl' count (cn+1,co+1) es
Nothing -> (cn+1,co+1)
showAlign n =
replicate (5 - length s) '0' ++ s
where
s = show n
prune (Node tag ts)
| tag == "S"
&& not (null ts)
&& last ts == Node "." [Node "." []] = Node tag (init ts)
| otherwise = Node tag ts
flatten e =
case unApp e of
Just (f,es) | f == meta -> mkApp f (concatMap grab es)
| otherwise -> mkApp f (map flatten es)
Nothing -> e
grab e =
case unApp e of
Just (f,es) | f == meta -> concatMap grab es
| otherwise -> [mkApp f (map flatten es)]
Nothing -> []
penn :: Grammar String Expr
penn =
grammar (mkApp meta)
[ "ADVP":-> do adv <- cat "RB"
case unApp adv of
Just (f,[a]) | f == cidPositAdvAdj -> return (mkApp cidPositAdVAdj [a])
_ -> mzero
`mplus`
do adV <- inside "RB" (lemma "AdV" "s")
return (mkApp adV [])
, "ADJP":-> do adas <- many pAdA
v <- inside "JJ" (lemma "V2" "s VPPart")
pps <- many (cat "PP")
let adj = mkApp cidPastPartAP [mkApp v []]
ap0 = foldr (\ada ap -> mkApp cidAdAP [ada,ap]) adj adas
ap = foldr (\pp ap -> mkApp cidAdvAP [ap,pp]) ap0 pps
return ap
`mplus`
do adas0 <- many pAdA
adjs <- many1 (cat "JJ")
let adj = last adjs
adas = adas0 ++ [mkApp cidPositAdAAdj [adj] | adj <- init adjs]
ap = foldr (\ada ap -> mkApp cidAdAP [ada,ap]) (mkApp cidPositA [adj]) adas
return ap
, "S" :-> do advs <- many $ do pp <- cat "PP"
inside "," word
return pp
`mplus`
do cat "ADVP"
e0 <- do (tmp,pol,sl,e) <- pClSlash
guard (not sl)
return (mkApp cidUseCl [tmp,pol,e])
`mplus`
do s <- cat "S"
inside "," word
np <- cat "NP"
inside "VP" $ do
(t,v) <- pV "VS"
inside "SBAR" $ do
cat "-NONE-"
inside "S" $ do
cat "-NONE-"
return (mkApp cidUseCl [mkApp cidTTAnt [ mkApp (fromMaybe meta (isVTense t)) []
, mkApp cidASimul []
]
,mkApp cidPPos []
,mkApp cidComplPredVP [np,mkApp cidComplVS [mkApp v [],s]]
])
opt (inside "." word) ""
return (foldr (\ad e -> mkApp cidAdvS [ad, e]) e0 advs)
`mplus`
do s1 <- cat "S"
opt (inside "," word) ""
cc <- cat "CC"
s2 <- cat "S"
return (mkApp cidConjS [cc, mkApp cidBaseS [s1,s2]])
, "SBAR" :-> do (do cat "-NONE-" -- missing preposition
return ()
`mplus`
do w <- inside "IN" word
guard (w == "that"))
cat "S"
, "NP" :-> do (m_cc,list_np) <- pBaseNPs
case m_cc of
Just cc -> return (mkApp cidConjNP [cc, mkListNP list_np])
Nothing -> if length list_np > 1
then return (mkApp meta list_np)
else return (head list_np)
`mplus`
do np <- cat "NP"
rs <- inside "SBAR" $
do rp <- cat "WHNP"
inside "S" $
do (tmp,pol,sl,e) <- pClSlash
guard sl
return (mkApp cidUseRCl [tmp,pol,mkApp cidRelSlash [rp,e]])
`mplus`
do inside "NP" (cat "-NONE-")
(tmp,pol,sl,vp) <- inside "VP" pVP
guard (not sl)
return (mkApp cidUseRCl [fromMaybe (mkApp meta []) (isVTense tmp)
,mkApp pol []
,mkApp cidRelVP [rp,vp]])
return (mkApp cidRelNP [np,rs])
`mplus`
do (m_cc,list_np) <- pNPs
case m_cc of
Just cc -> return (mkApp cidConjNP [cc, mkListNP list_np])
Nothing -> if length list_np > 1
then return (mkApp meta list_np)
else return (head list_np)
, "VP" :-> do (_,_,_,e) <- pVP
return e
, "PP" :-> do prep <- do cat "IN"
`mplus`
do inside "TO" word
return (mkApp cidto_Prep [])
`mplus`
do w1 <- inside "JJ" word
w2 <- inside "IN" word
guard (w1 == "such" && w2 == "as")
return (mkApp cidsuch_as_Prep [])
np <- cat "NP"
return (mkApp cidPrepNP [prep,np])
`mplus`
do pp1 <- cat "PP"
inside "," word
conj <- cat "CC"
pp2 <- cat "PP"
opt (inside "," word) ""
return (mkApp cidConjAdv [conj, mkApp cidBaseAdv [pp1,pp2]])
, "CC" :-> do cc <- word
case cc of
"and" -> return (mkApp cidand_Conj [])
"&" -> return (mkApp cidamp_Conj [])
"or" -> return (mkApp cidor_Conj [])
_ -> mzero
, "DT" :-> do (dt,b) <- pDT
return dt
, "IN" :-> do prep <- lemma "Prep" "s"
return (mkApp prep [])
, "NN" :-> do transform (concatMap splitDashN)
(do n <- lemma "N" "s Sg Nom"
(do inside "-" word
n2 <- lemma "N" "s Sg Nom"
return (mkApp cidDashCN [mkApp n [], mkApp n2 []])
`mplus`
do return (mkApp n [])))
`mplus`
do v <- lemma "V" "s VPresPart"
return (mkApp cidGerundN [mkApp v []])
, "NNS" :-> do transform (concatMap splitDashN)
(do n <- lemma "N" "s Pl Nom"
return (mkApp n [])
`mplus`
do n1 <- lemma "N" "s Sg Nom"
inside "-" word
n2 <- lemma "N" "s Pl Nom"
return (mkApp cidDashCN [mkApp n1 [], mkApp n2 []]))
, "PRP" :-> do p <- (lemma "Pron" "s (NCase Nom)"
`mplus`
lemma "Pron" "s NPAcc"
`mplus`
(do w <- word
guard (w == "I") -- upper case word
return cidi_Pron))
return (mkApp p [])
, "PRP$":-> do p <- lemma "Pron" "s (NCase Gen)"
return (mkApp cidPossPron [mkApp p []])
, "RB" :-> do a <- lemma "A" "s AAdv"
return (mkApp cidPositAdvAdj [mkApp a []])
`mplus`
do adv <- lemma "Adv" "s"
return (mkApp adv [])
, "QP" :-> do adn <- inside "IN" (lemma "AdN" "s")
num <- pCD
return (mkApp cidDetQuant [mkApp cidIndefArt [], mkApp cidNumCard [mkApp cidAdNum [mkApp adn [], num]]])
, "WHNP":-> cat "WP"
`mplus`
cat "WDT"
`mplus`
cat "WP$"
`mplus`
do cat "-NONE-"
return (mkApp cidno_RP [])
`mplus`
do w <- inside "IN" word
guard (w == "that")
return (mkApp cidthat_RP [])
, "-NONE-"
:-> return (mkApp meta [])
, "JJ" :-> do a <- lemma "A" "s (AAdj Posit Nom)"
return (mkApp a [])
, "JJR" :-> do a <- lemma "A" "s (AAdj Compar Nom)"
return (mkApp a [])
, "JJS" :-> do a <- lemma "A" "s (AAdj Superl Nom)"
return (mkApp cidOrdSuperl [mkApp a []])
, "VB" :-> do v <- mplus (lemma "V" "s VInf") (lemma "V2" "s VInf")
return (mkApp v [])
, "VBD" :-> do v <- mplus (lemma "V" "s VPast") (lemma "V2" "s VPast")
return (mkApp v [])
, "VBG" :-> do v <- mplus (lemma "V" "s VPresPart") (lemma "V2" "s VPresPart")
return (mkApp v [])
, "VBN" :-> do v <- mplus (lemma "V" "s VPPart") (lemma "V2" "s VPPart")
return (mkApp v [])
, "VBP" :-> do v <- mplus (lemma "V" "s VInf") (lemma "V2" "s VInf")
return (mkApp v [])
, "VBZ" :-> do v <- mplus (lemma "V" "s VPres") (lemma "V2" "s VPres")
return (mkApp v [])
, "PDT" :-> do pdt <- lemma "Predet" "s"
return (mkApp pdt [])
, "WP" :-> do rp <- (lemma "RP" "s (RC Masc (NCase Nom))"
`mplus`
lemma "RP" "s (RC Masc NPAcc)")
return (mkApp rp [])
, "WDT" :-> do rp <- lemma "RP" "s (RC Neutr (NCase Nom))"
return (mkApp rp [])
, "WP$" :-> do rp <- lemma "RP" "s (RC Masc (NCase Gen))"
return (mkApp rp [])
]
data VForm a
= VInf | VPart | VGerund | VTense a
instance Functor VForm where
fmap f VInf = VInf
fmap f VPart = VPart
fmap f VGerund = VGerund
fmap f (VTense t) = VTense (f t)
isVInf VInf = True
isVInf _ = False
isVPart VPart = True
isVPart _ = False
isVGerund VGerund = True
isVGerund _ = False
isVTense (VTense t) = Just t
isVTense _ = Nothing
pVP = do
(t,a,p,sl,e0) <- do t <- pCopula
p <- pPol
inside "VP" $ do
advs <- many (cat "ADVP")
(t',p',sl,e0) <- pVP
guard (isVPart t' && sl && p' == cidPPos)
let e1 = mkApp cidPassVPSlash [e0]
e2 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e1 advs
return (t,cidASimul,p,False,e2)
`mplus`
do t <- pCopula
p <- pPol
advs <- many (cat "ADVP")
e <- do e <- cat "ADJP"
return (mkApp cidCompAP [e])
`mplus`
do e <- cat "NP"
return (mkApp cidCompNP [e])
`mplus`
do e <- cat "NP"
return (mkApp cidCompNP [e])
`mplus`
do e <- cat "PP"
return (mkApp cidCompAdv [e])
`mplus`
do e <- cat "SBAR"
return (mkApp cidCompS [e])
`mplus`
do e <- inside "S" $ do
inside "NP" (cat "-NONE-")
(tmp,pol,sl,e) <- inside "VP" pVP
guard (isVInf tmp && not sl && pol == cidPPos)
return e
return (mkApp cidCompVP [e])
let e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) (mkApp cidUseComp [e]) advs
return (t,cidASimul,p,False,e1)
`mplus`
do t <- pCopula
p <- pPol
advs <- many (cat "ADVP")
(tmp,pol,sl,e) <- inside "VP" pVP
guard (isVGerund tmp && not sl && pol == cidPPos)
let e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e advs
return (t,cidASimul,p,False,mkApp cidProgrVP [e1])
`mplus`
do t <- pCopula
p <- pPol
adv <- cat "ADVP"
return (t,cidASimul,p,False,mkApp cidUseComp [mkApp cidCompAdv [adv]])
`mplus`
do w <- inside "MD" word
t <- case w of
"will" -> return cidTFut
"would" -> return cidTCond
_ -> mzero
p <- pPol
advs <- many (cat "ADVP")
(tmp,pol,sl,e0) <- inside "VP" pVP
guard (isVInf tmp && pol == cidPPos)
let e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs
return (VTense t,cidASimul,p,sl,e1)
`mplus`
do t <- pHave
p <- pPol
advs <- many (cat "ADVP")
(tmp,pol,sl,e0) <- inside "VP" pVP
guard (isVPart tmp && pol == cidPPos)
let e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs
return (t,cidAAnter,p,sl,e1)
`mplus`
do t <- pDo
p <- pPol
advs <- many (cat "ADVP")
(tmp,p',sl,e0) <- inside "VP" $ pVP
guard (p' == cidPPos)
let e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs
return (t,cidASimul,p,sl,e1)
`mplus`
do advs <- many (cat "ADVP")
inside "TO" word -- infinitives
e0 <- cat "VP"
let e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs
return (VInf,cidASimul,cidPPos,False,e1)
`mplus`
do advs1 <- many (cat "ADVP")
(t,v) <- pV "V2"
pps <- many (cat "PP")
let e0 = mkApp cidSlashV2a [mkApp v []]
e1 = foldl (\e pp -> mkApp cidAdvVPSlash [e, pp]) e0 pps
(sl,e2) <- (do (inside "NP" (cat "-NONE-")
`mplus`
inside "SBAR" (cat "-NONE-"))
return (True,e1)
`mplus`
do np <- cat "NP"
return (False,mkApp cidComplSlash [e1, np]))
let e3 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e2 advs1
return (t,cidASimul,cidPPos,sl,e3)
`mplus`
do (t,v) <- inside "MD" $
(do v <- lemma "VV" "s (VVF VPres)"
return (cidTPres,v)
`mplus`
do v <- lemma "VV" "s (VVF VPast)"
return (cidTPast,v))
p <- pPol
advs <- many (cat "ADVP")
vp <- cat "VP"
let e0 = mkApp cidComplVV [mkApp v [], vp]
e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs
return (VTense t,cidASimul,p,False,e1)
`mplus`
do advs <- many (cat "ADVP")
(t,v) <- pVV
vp <- inside "S" $ do
inside "NP" (cat "-NONE-")
(tmp,pol,sl,e) <- inside "VP" pVP
guard ((isVInf tmp || isVGerund tmp) && not sl && pol == cidPPos)
return e
let e0 = mkApp cidComplVV [mkApp v [], vp]
e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs
return (t,cidASimul,cidPPos,False,e1)
`mplus`
do advs <- many (cat "ADVP")
(t,v) <- pV "V2V"
inside "S" $
(do inside "NP" (cat "-NONE-")
(tmp,pol,sl,vp) <- inside "VP" pVP
guard (isVInf tmp && not sl)
let e0 = mkApp cidSlashV2V [mkApp v [], mkApp pol [], vp]
e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs
return (t,cidASimul,cidPPos,True,e1)
`mplus`
do np <- cat "NP"
(tmp,pol,sl,vp) <- inside "VP" pVP
guard (isVInf tmp && not sl)
let e0 = mkApp cidComplSlash [mkApp cidSlashV2V [mkApp v [], mkApp pol [], vp], np]
e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs
return (t,cidASimul,cidPPos,False,e1))
`mplus`
do advs <- many (cat "ADVP")
(t,v) <- pV "VA"
adjp <- cat "ADJP"
let e0 = mkApp cidComplVA [mkApp v [], adjp]
e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs
return (t,cidASimul,cidPPos,False,e1)
`mplus`
do advs <- many (cat "ADVP")
(t,v) <- pV "VS"
s <- cat "S" `mplus` cat "SBAR"
let e0 = mkApp cidComplVS [mkApp v [], s]
e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs
return (t,cidASimul,cidPPos,False,e1)
`mplus`
do advs <- many (cat "ADVP")
(t,v) <- pV "V"
let e0 = mkApp cidUseV [mkApp v []]
e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs
return (t,cidASimul,cidPPos,False,e1)
pps <- many (cat "PP"
`mplus`
inside "ADVP" (cat "RB"))
let tmp = fmap (\t -> mkApp cidTTAnt [mkApp t [],mkApp a []]) t
e1 = foldl (\e pp -> mkApp (if sl then cidAdvVPSlash else cidAdvVP) [e, pp]) e0 pps
return (tmp, p, sl, e1)
pClSlash = do np <- cat "NP"
advs <- many (cat "ADVP")
(tmp,pol,sl,vp) <- do (tmp,pol,sl,vp) <- inside "VP" pVP
return (isVTense tmp,pol,sl,vp)
`mplus`
do vp <- cat "VP"
return (Nothing,meta,False,vp)
let vp1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) vp advs
return (fromMaybe (mkApp meta []) tmp
,mkApp pol []
,sl
,mkApp (if sl then cidSlashVP else cidPredVP) [np,vp1]
)
pV cat =
do v <- lookup "VB" "s VInf"
return (VInf,v)
`mplus`
do v <- lookup "VBP" "s VInf"
return (VTense cidTPres,v)
`mplus`
do v <- lookup "VBZ" "s VPres"
return (VTense cidTPres,v)
`mplus`
do v <- lookup "VBD" "s VPast"
return (VTense cidTPast,v)
`mplus`
do v <- lookup "VBN" "s VPPart"
return (VPart,v)
`mplus`
do v <- lookup "VBG" "s VPresPart"
return (VGerund,v)
where
lookup pos fld =
inside pos $
(do lemma cat fld
`mplus`
do w <- word
return (mkCId ("["++w++"_"++cat++"]")))
pVV =
do v <- lookup "VB" "s (VVF VInf)"
return (VInf,v)
`mplus`
do v <- lookup "VBP" "s (VVF VInf)"
return (VTense cidTPres,v)
`mplus`
do v <- lookup "VBZ" "s (VVF VPres)"
return (VTense cidTPres,v)
`mplus`
do v <- lookup "VBD" "s (VVF VPast)"
return (VTense cidTPast,v)
`mplus`
do v <- lookup "VBN" "s (VVF VPPart)"
return (VPart,v)
`mplus`
do v <- lookup "VBG" "s (VVF VPresPart)"
return (VGerund,v)
where
lookup pos fld =
inside pos $
(do lemma "VV" fld
`mplus`
do w <- word
return (mkCId ("["++w++"_VV]")))
pCopula =
do s <- inside "VB" word
guard (s == "be")
return VInf
`mplus`
do s <- inside "VBP" word
guard (s == "am" || s == "'m" || s == "are" || s == "'re")
return (VTense cidTPres)
`mplus`
do s <- inside "VBZ" word
guard (s == "is" || s == "'s")
return (VTense cidTPres)
`mplus`
do s <- inside "VBD" word
guard (s == "were" || s == "was")
return (VTense cidTPast)
`mplus`
do s <- inside "VBN" word
guard (s == "been")
return VPart
`mplus`
do s <- inside "VBG" word
guard (s == "being")
return VGerund
pDo =
do s <- inside "VB" word
guard (s == "do")
return VInf
`mplus`
do s <- inside "VBP" word
guard (s == "do")
return (VTense cidTPres)
`mplus`
do s <- inside "VBZ" word
guard (s == "does")
return (VTense cidTPres)
`mplus`
do s <- inside "VBD" word
guard (s == "did")
return (VTense cidTPast)
pHave =
do s <- inside "VB" word
guard (s == "have")
return VInf
`mplus`
do s <- inside "VBP" word
guard (s == "have")
return (VTense cidTPres)
`mplus`
do s <- inside "VBZ" word
guard (s == "has")
return (VTense cidTPres)
`mplus`
do s <- inside "VBD" word
guard (s == "had")
return (VTense cidTPast)
`mplus`
do s <- inside "VBN" word
guard (s == "had")
return VPart
pPol =
do w <- inside "RB" word
guard (w == "n't" || w == "not")
return cidPNeg
`mplus`
do return cidPPos
pBaseNP =
do np <- inside "NN" (lemma "NP" "s (NCase Nom)")
return (mkApp np [])
`mplus`
do m_pdt <- opt (liftM Just (cat "PDT")) Nothing
m_q <- opt (liftM Just pQuant) Nothing
m_num <- opt (liftM Just pCD ) Nothing
m_ord <- opt (liftM Just (cat "JJS")) Nothing
adjs <- many pModCN
ns <- many1 (mplus (cat "NN" >>= \n -> return (n,cidNumSg))
(cat "NNS" >>= \n -> return (n,cidNumPl)))
let (n,s) = last ns
cn0 = foldr (\(n,s) e -> mkApp cidCompoundCN [mkApp s [], n, e])
(mkApp cidUseN [n])
(init ns)
cn = foldr (\adj e -> mkApp cidAdjCN [adj, e])
cn0
adjs
num = maybe (mkApp s []) (\n -> mkApp cidNumCard [n]) m_num
mkDetQuant q num =
case m_ord of
Just ord -> mkApp cidDetQuantOrd [q,num,ord]
Nothing -> mkApp cidDetQuant [q,num]
e0 <- if s == cidNumSg
then case m_q of
Just (q,True) -> return (mkApp cidDetCN [mkDetQuant q num,cn])
Just (q,False) -> return (mkApp cidDetCN [q,cn])
Nothing -> do guard (isNothing m_num)
return (mkApp cidMassNP [cn])
else case m_q of
Just (q,True) -> return (mkApp cidDetCN [mkDetQuant q num,cn])
Just (q,False) -> return (mkApp cidDetCN [q,cn])
Nothing -> return (mkApp cidDetCN [mkDetQuant (mkApp cidIndefArt []) num,cn])
let e1 = case m_pdt of
Just pdt -> mkApp cidPredetNP [pdt,e0]
Nothing -> e0
return e1
`mplus`
do dt <- cat "QP"
n <- mplus (cat "NN") (cat "NNS")
return (mkApp cidDetCN [dt,mkApp cidUseN [n]])
`mplus`
do m_q <- opt (liftM Just pQuant) Nothing
ws2 <- many1 (inside "NNP" word `mplus` inside "NNPS" word)
let e0 = mkApp cidSymbPN
[mkApp cidMkSymb
[mkStr (unwords ws2)]]
case m_q of
Just (q,b) -> do guard b
return (mkApp cidUseQuantPN [q,e0])
Nothing -> return (mkApp cidUsePN [e0])
`mplus`
do p <- inside "PRP" (lemma "NP" "s (NCase Nom)")
return (mkApp p [])
`mplus`
do p <- cat "PRP"
return (mkApp cidUsePron [p])
`mplus`
do np <- cat "NP"
pps <- many1 (cat "PP")
prns <- many (cat "PRN")
let e0 = foldl (\e pp -> mkApp cidAdvNP [e, pp]) np pps
e1 = foldl (\e pn -> mkApp meta [e, pn]) e0 prns
return e1
`mplus`
do np <- cat "NP"
inside "," word
(t',p',sl,vp) <- inside "VP" pVP
guard (isVPart t' && sl && p' == cidPPos)
inside "," word
return (mkApp meta [np, vp])
`mplus`
do (q,b) <- pQuant
return (mkApp cidDetNP [if b
then mkApp cidDetQuant [mkApp cidIndefArt [],mkApp cidNumSg []]
else q])
`mplus`
do n <- pCD
return (mkApp cidDetNP [mkApp cidDetQuant [mkApp cidIndefArt [],mkApp cidNumCard [n]]])
pBaseNPs = do
np <- pBaseNP
(do inside "," word
(m_cc,nps) <- pBaseNPs
return (m_cc ,np:nps)
`mplus`
do cc <- cat "CC"
np2 <- pBaseNP
return (Just cc,[np,np2])
`mplus`
do return (Nothing,[np]))
pNPs = do
(t1,t2) <- do w <- inside "DT" word
case map toLower w of
"both" -> return (mkApp cidand_Conj [],mkApp cidboth7and_DConj [])
"either" -> return (mkApp cidor_Conj [],mkApp cideither7or_DConj [])
_ -> mzero
`mplus`
do return (mkApp meta [],mkApp meta [])
(m_cc,nps) <- pList
return (fmap (toDConj t1 t2) m_cc,nps)
where
toDConj t1 t2 cc
| cc == t1 = t2
| otherwise = cc
pList = do
np <- cat "NP"
(do inside "," word
(m_cc,nps) <- pList
return (m_cc ,np:nps)
`mplus`
do cc <- cat "CC"
np2 <- cat "NP"
return (Just cc,[np,np2])
`mplus`
do return (Nothing,[np]))
mkListNP nps0 =
foldr (\np1 np2 -> mkApp cidConsNP [np1,np2]) (mkApp cidBaseNP nps2) nps1
where
(nps1,nps2) = splitAt (length nps0-2) nps0
pModCN =
do v <- inside "VBN" (lemma "V2" "s VPPart")
return (mkApp cidPastPartAP [mkApp v []])
`mplus`
do v <- inside "JJ" (lemma "V2" "s VPPart")
return (mkApp cidPastPartAP [mkApp v []])
`mplus`
do v <- inside "JJ" (lemma "V" "s VPresPart")
return (mkApp cidGerundAP [mkApp v []])
`mplus`
do a <- cat "JJ"
return (mkApp cidPositA [a])
`mplus`
do a <- cat "ADJP"
return a
pCD =
do w0 <- inside "CD" word
let w = filter (/=',') w0
guard (not (null w) && all isDigit w)
let es = [mkApp (mkCId ("D_"++[d])) [] | d <- w]
e0 = foldr (\e1 e2 -> mkApp cidIIDig [e1,e2]) (mkApp cidIDig [last es]) (init es)
e1 = mkApp cidNumDigits [e0]
return e1
`mplus`
do w <- inside "CD" word
e <- case map toLower w of
"one" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot01 []]]]])
"two" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot0 [mkApp cidn2 []]]]]])
"three" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot0 [mkApp cidn3 []]]]]])
"four" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot0 [mkApp cidn4 []]]]]])
"five" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot0 [mkApp cidn5 []]]]]])
"six" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot0 [mkApp cidn6 []]]]]])
"seven" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot0 [mkApp cidn7 []]]]]])
"eight" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot0 [mkApp cidn8 []]]]]])
"nine" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot0 [mkApp cidn9 []]]]]])
_ -> mzero
return (mkApp cidNumNumeral [e])
`mplus`
do cat "CD"
pQuant =
inside "DT" pDT
`mplus`
do dt <- cat "PRP$"
return (dt,True)
`mplus`
do np <- inside "NP" $ do
np <- pBaseNP
inside "POS" word
return np
return (mkApp cidGenNP [np],True)
`mplus`
do dt <- pMany
return (dt,False)
pDT =
do dt <- mplus (lemma "Quant" "s False Sg")
(lemma "Quant" "s False Pl")
return (mkApp dt [],True)
`mplus`
do dt <- lemma "Det" "s"
return (mkApp dt [],False)
pMany =
do w <- inside "JJ" word
guard (map toLower w == "many")
return (mkApp cidmany_Det [])
pAdA = do adv <- cat "RB"
case unApp adv of
Just (f,[a]) | f == cidPositAdvAdj
-> return (mkApp cidPositAdAAdj [a])
_ -> mzero
`mplus`
do ada <- inside "RB" (lemma "AdA" "s")
return (mkApp ada [])
splitDashN (Node w []) =
case break (=='-') w of
(w1,'-':w2) -> Node w1 [] : Node "-" [Node "-" []] : splitDashN (Node w2 [])
_ -> [Node w []]
splitDashN t = [t]
meta = mkCId "?"
|