summaryrefslogtreecommitdiff
path: root/treebanks
diff options
context:
space:
mode:
authorkr.angelov <kr.angelov@gmail.com>2012-10-01 08:52:54 +0000
committerkr.angelov <kr.angelov@gmail.com>2012-10-01 08:52:54 +0000
commit6e3503bb7b6c9aac12711477b8a474ce41c1cd7a (patch)
treeef55a8a965a4a09473bc9dad97a38ab13fc59c1b /treebanks
parentde679b400acdec70a42b09c525c4c8b4f7d33f09 (diff)
move examples/PennTreebank to /treebanks/PennTreebank
Diffstat (limited to 'treebanks')
-rw-r--r--treebanks/PennTreebank/Idents.hs115
-rw-r--r--treebanks/PennTreebank/Monad.hs98
-rw-r--r--treebanks/PennTreebank/PennFormat.hs38
-rw-r--r--treebanks/PennTreebank/training.hs125
-rw-r--r--treebanks/PennTreebank/translate.hs809
5 files changed, 1185 insertions, 0 deletions
diff --git a/treebanks/PennTreebank/Idents.hs b/treebanks/PennTreebank/Idents.hs
new file mode 100644
index 000000000..b084d9214
--- /dev/null
+++ b/treebanks/PennTreebank/Idents.hs
@@ -0,0 +1,115 @@
+module Idents where
+
+import PGF
+
+cidASimul = mkCId "ASimul"
+cidAAnter = mkCId "AAnter"
+cidPositAdvAdj = mkCId "PositAdvAdj"
+cidPositAdVAdj = mkCId "PositAdVAdj"
+cidUseCl = mkCId "UseCl"
+cidPredVP = mkCId "PredVP"
+cidSlashVP = mkCId "SlashVP"
+cidComplPredVP = mkCId "ComplPredVP"
+cidAdjCN = mkCId "AdjCN"
+cidUseN = mkCId "UseN"
+cidDetQuant = mkCId "DetQuant"
+cidDetQuantOrd = mkCId "DetQuantOrd"
+cidNumSg = mkCId "NumSg"
+cidNumPl = mkCId "NumPl"
+cidDetCN = mkCId "DetCN"
+cidIndefArt = mkCId "IndefArt"
+cidUsePN = mkCId "UsePN"
+cidUseQuantPN = mkCId "UseQuantPN"
+cidSymbPN = mkCId "SymbPN"
+cidMkSymb = mkCId "MkSymb"
+cidUsePron = mkCId "UsePron"
+cidConjNP = mkCId "ConjNP"
+cidBaseNP = mkCId "BaseNP"
+cidConsNP = mkCId "ConsNP"
+cidConjCN = mkCId "ConjCN"
+cidBaseCN = mkCId "BaseCN"
+cidConsCN = mkCId "ConsCN"
+cidConjAdv = mkCId "ConjAdv"
+cidBaseAdv = mkCId "BaseAdv"
+cidConsAdv = mkCId "ConsAdv"
+cidBaseS = mkCId "BaseS"
+cidConsS = mkCId "ConsS"
+cidConjS = mkCId "ConjS"
+cidMassNP = mkCId "MassNP"
+cidAdvNP = mkCId "AdvNP"
+cidTPres = mkCId "TPres"
+cidTPast = mkCId "TPast"
+cidTFut = mkCId "TFut"
+cidTCond = mkCId "TCond"
+cidTTAnt = mkCId "TTAnt"
+cidPPos = mkCId "PPos"
+cidPNeg = mkCId "PNeg"
+cidComplSlash = mkCId "ComplSlash"
+cidSlashV2a = mkCId "SlashV2a"
+cidComplVS = mkCId "ComplVS"
+cidComplVV = mkCId "ComplVV"
+cidUseV = mkCId "UseV"
+cidAdVVP = mkCId "AdVVP"
+cidAdvVP = mkCId "AdvVP"
+cidAdvVPSlash = mkCId "AdvVPSlash"
+cidPrepNP = mkCId "PrepNP"
+cidto_Prep = mkCId "to_Prep"
+cidsuch_as_Prep= mkCId "such_as_Prep"
+cidPastPartAP = mkCId "PastPartAP"
+cidPassVPSlash = mkCId "PassVPSlash"
+cidAdvS = mkCId "AdvS"
+cidPositA = mkCId "PositA"
+cidIDig = mkCId "IDig"
+cidIIDig = mkCId "IIDig"
+cidNumCard = mkCId "NumCard"
+cidNumDigits = mkCId "NumDigits"
+cidNumNumeral = mkCId "NumNumeral"
+cidnum = mkCId "num"
+cidpot2as3 = mkCId "pot2as3"
+cidpot1as2 = mkCId "pot1as2"
+cidpot0as1 = mkCId "pot0as1"
+cidpot01 = mkCId "pot01"
+cidpot0 = mkCId "pot0"
+cidn2 = mkCId "n2"
+cidn3 = mkCId "n3"
+cidn4 = mkCId "n4"
+cidn5 = mkCId "n5"
+cidn6 = mkCId "n6"
+cidn7 = mkCId "n7"
+cidn8 = mkCId "n8"
+cidn9 = mkCId "n9"
+cidPossPron = mkCId "PossPron"
+cidCompAP = mkCId "CompAP"
+cidCompNP = mkCId "CompNP"
+cidCompAdv = mkCId "CompAdv"
+cidCompS = mkCId "CompS"
+cidCompVP = mkCId "CompVP"
+cidUseComp = mkCId "UseComp"
+cidCompoundCN = mkCId "CompoundCN"
+cidDashCN = mkCId "DashCN"
+cidProgrVP = mkCId "ProgrVP"
+cidGerundN = mkCId "GerundN"
+cidGerundAP = mkCId "GerundAP"
+cidGenNP = mkCId "GenNP"
+cidPredetNP = mkCId "PredetNP"
+cidDetNP = mkCId "DetNP"
+cidAdAP = mkCId "AdAP"
+cidAdvAP = mkCId "AdvAP"
+cidPositAdAAdj = mkCId "PositAdAAdj"
+cideither7or_DConj = mkCId "either7or_DConj"
+cidboth7and_DConj = mkCId "both7and_DConj"
+cidor_Conj = mkCId "or_Conj"
+cidand_Conj = mkCId "and_Conj"
+cidamp_Conj = mkCId "amp_Conj"
+cidSlashV2V = mkCId "SlashV2V"
+cidComplVA = mkCId "ComplVA"
+cidAdNum = mkCId "AdNum"
+cidi_Pron = mkCId "i_Pron"
+cidOrdSuperl = mkCId "OrdSuperl"
+cidno_RP = mkCId "no_RP"
+cidthat_RP = mkCId "that_RP"
+cidUseRCl = mkCId "UseRCl"
+cidRelSlash = mkCId "RelSlash"
+cidRelNP = mkCId "RelNP"
+cidRelVP = mkCId "RelVP"
+cidmany_Det = mkCId "many_Det"
diff --git a/treebanks/PennTreebank/Monad.hs b/treebanks/PennTreebank/Monad.hs
new file mode 100644
index 000000000..30fd1d7a0
--- /dev/null
+++ b/treebanks/PennTreebank/Monad.hs
@@ -0,0 +1,98 @@
+module Monad ( Rule(..), Grammar, grammar
+ , P, parse
+ , cat, word, lemma, inside, transform
+ , many, many1, opt
+ ) where
+
+import Data.Tree
+import Data.Char
+import qualified Data.Map as Map
+import Control.Monad
+import PGF hiding (Tree,parse)
+
+infix 1 :->
+
+
+data Rule t e = t :-> P t e e
+type Grammar t e = t -> PGF -> Morpho -> [Tree t] -> e
+
+grammar :: (Ord t,Show t) => ([e] -> e) -> [Rule t e] -> Grammar t e
+grammar def rules = gr
+ where
+ gr = \tag ->
+ case Map.lookup tag pmap of
+ Just f -> \pgf m ts -> case unP f gr pgf m ts of
+ Just (e,[]) -> e
+ _ -> case ts of
+ [Node w []] -> def []
+ ts -> def [gr tag pgf m ts | Node tag ts <- ts]
+ Nothing -> \pgf m ts -> case ts of
+ [Node w []] -> def []
+ ts -> def [gr tag pgf m ts | Node tag ts <- ts]
+
+ pmap = Map.fromListWith mplus (map (\(t :-> r) -> (t,r)) rules)
+
+
+newtype P t e a = P {unP :: Grammar t e -> PGF -> Morpho -> [Tree t] -> Maybe (a,[Tree t])}
+
+instance Monad (P t e) where
+ return x = P (\gr pgf m ts -> Just (x,ts))
+ f >>= g = P (\gr pgf m ts -> case unP f gr pgf m ts of
+ Just (x,ts) -> unP (g x) gr pgf m ts
+ Nothing -> Nothing)
+
+instance MonadPlus (P t e) where
+ mzero = P (\gr pgf m ts -> Nothing)
+ mplus f g = P (\gr pgf m ts -> unP f gr pgf m ts `mplus` unP g gr pgf m ts)
+
+
+parse :: Grammar t e -> PGF -> Morpho -> Tree t -> e
+parse gr pgf morpho (Node tag ts) = gr tag pgf morpho ts
+
+cat :: Eq t => t -> P t e e
+cat tag = P (\gr pgf morpho ts ->
+ case ts of
+ (Node tag1 ts1 : ts) | tag == tag1 -> Just (gr tag1 pgf morpho ts1,ts)
+ _ -> Nothing)
+
+word :: P t e t
+word = P (\gr pgf morpho ts ->
+ case ts of
+ (Node w [] : ts) -> Just (w,ts)
+ _ -> Nothing)
+
+inside :: Eq t => t -> P t e a -> P t e a
+inside tag f = P (\gr pgf morpho ts ->
+ case ts of
+ (Node tag1 ts1 : ts) | tag == tag1 -> case unP f gr pgf morpho ts1 of
+ Just (x,[]) -> Just (x,ts)
+ _ -> Nothing
+ _ -> Nothing)
+
+lemma :: String -> String -> P String e CId
+lemma cat0 an0 = P (\gr pgf morpho ts ->
+ case ts of
+ (Node w [] : ts) -> case [lemma | (lemma, an1) <- lookupMorpho morpho (map toLower w)
+ , let cat1 = maybe "" (showType []) (functionType pgf lemma)
+ , cat0 == cat1 && an0 == an1] of
+ (id:_) -> Just (id,ts)
+ _ -> Nothing
+ _ -> Nothing)
+
+transform :: ([Tree t] -> [Tree t]) -> P t e ()
+transform f = P (\gr pgf morpho ts -> Just ((),f ts))
+
+many :: P t e a -> P t e [a]
+many f = do x <- f
+ xs <- many f
+ return (x:xs)
+ `mplus`
+ do return []
+
+many1 :: P t e a -> P t e [a]
+many1 f = do x <- f
+ xs <- many f
+ return (x:xs)
+
+opt :: P t e a -> a -> P t e a
+opt f x = mplus f (return x)
diff --git a/treebanks/PennTreebank/PennFormat.hs b/treebanks/PennTreebank/PennFormat.hs
new file mode 100644
index 000000000..2aaf0a6b6
--- /dev/null
+++ b/treebanks/PennTreebank/PennFormat.hs
@@ -0,0 +1,38 @@
+module PennFormat(parseTreebank, showTree) where
+
+import Text.PrettyPrint
+import Data.Tree
+import Data.Char
+
+parseTreebank :: String -> [Tree String]
+parseTreebank [] = []
+parseTreebank (c:cs)
+ | isSpace c = parseTreebank cs
+ | c == '(' = let (ts,cs1) = parseTrees cs
+ in ts ++ parseTreebank cs1
+
+parseTrees [] = ([],[])
+parseTrees (c:cs)
+ | isSpace c = parseTrees cs
+ | c == ')' = ([],cs)
+ | c == '(' = let (w, cs1) = parseWord cs
+ (children,cs2) = parseTrees cs1
+ (rest, cs3) = parseTrees cs2
+ in (Node (normalize w) children : rest,cs3)
+ | otherwise = let (w, cs1) = parseWord (c:cs)
+ (rest, cs2) = parseTrees cs1
+ in (Node w [] : rest,cs2)
+
+normalize tag =
+ let (tag0,mod) = break (=='-') tag
+ in if null tag0
+ then tag
+ else tag0
+
+parseWord = break (\c -> isSpace c || c == '(' || c == ')')
+
+printTree (Node w []) = text w
+printTree (Node l children) = parens (text l <+> hsep (map printTree children))
+
+showTree :: Tree String -> String
+showTree = render . printTree
diff --git a/treebanks/PennTreebank/training.hs b/treebanks/PennTreebank/training.hs
new file mode 100644
index 000000000..433e5852c
--- /dev/null
+++ b/treebanks/PennTreebank/training.hs
@@ -0,0 +1,125 @@
+import PGF
+import qualified Data.Map as Map
+import Data.Maybe
+import Data.List
+
+main = do
+ pgf <- readPGF "ParseEngAbs.pgf"
+ ls <- fmap (filterExprs . zip [1..] . lines) $ readFile "log4.txt"
+ putStrLn ""
+ putStrLn ("trees: "++show (length ls))
+ let stats = foldl' (collectStats pgf)
+ (initStats pgf)
+ [(n,fromMaybe (error l) (readExpr (toQ l)),Just (mkCId "Phr"),Nothing) | (n,l) <- ls]
+
+ putStrLn ("coverage: "++show (coverage stats))
+
+ putStrLn ("Writing ParseEngAbs.probs...")
+ writeFile "ParseEngAbs.probs" (unlines [show f ++ "\t" ++ show p | (f,p) <- uprobs pgf stats])
+
+ putStrLn ("Writing ParseEngAbs2.probs...")
+ writeFile "ParseEngAbs2.probs" (unlines [show cat1 ++ "\t" ++ show cat2 ++ "\t" ++ show p | (cat1,cat2,p) <- mprobs pgf stats])
+
+ putStrLn ("Writing global.probs...")
+ writeFile "global.probs" (unlines [show f ++ "\t" ++ show p | (f,p) <- gprobs pgf stats])
+
+ putStrLn ("Writing categories.probs...")
+ writeFile "categories.probs" (unlines [show f ++ "\t" ++ show p | (f,p) <- cprobs pgf stats])
+ where
+ toQ [] = []
+ toQ ('[':cs) = let (xs,']':ys) = break (==']') cs
+ in toQ ('?' : ys)
+ toQ ('?':cs) = 'Q' : toQ cs
+ toQ (c:cs) = c : toQ cs
+
+filterExprs [] = []
+filterExprs ((n,l):ls)
+ | null l = filterExprs ls
+ | elem (head l) "+#*" = (n,drop 2 l) : filterExprs ls
+ | otherwise = filterExprs ls
+
+initStats pgf =
+ (Map.fromListWith (+)
+ ([(f,1) | f <- functions pgf] ++
+ [(cat pgf f,1) | f <- functions pgf])
+ ,Map.empty
+ ,0
+ )
+
+collectStats pgf (ustats,bstats,count) (n,e,mb_cat1,mb_cat2) =
+ case unApp e of
+ Just (f,args) -> let fcat2 = cat2 pgf f n e
+ fcat = fromMaybe (cat2 pgf f n e) mb_cat1
+ cf = fromMaybe 0 (Map.lookup f ustats)
+ cc = fromMaybe 0 (Map.lookup fcat ustats)
+ in if isJust mb_cat1 && f /= mkCId "Q" && fcat /= fcat2
+ then error (show n ++ ": " ++ showExpr [] e)
+ else
+ cf `seq` cc `seq` bstats `seq` count `seq`
+ foldl' (collectStats pgf)
+ (Map.insert f (cf+1) (Map.insert fcat (cc+1) ustats)
+ ,(if null args
+ then Map.insertWith (+) (fcat,wildCId) 1
+ else id)
+ (maybe bstats (\cat2 -> Map.insertWith (+) (cat2,fcat) 1 bstats) mb_cat2)
+ ,count+1
+ )
+ (zipWith3 (\e mb_cat1 mb_cat2 -> (n,e,mb_cat1,mb_cat2)) args (argCats f) (repeat (Just fcat)))
+ Nothing -> case unStr e of
+ Just _ -> (ustats,bstats,count+1)
+ Nothing -> error ("collectStats ("++showExpr [] e++")")
+ where
+ argCats f =
+ case fmap unType (functionType pgf f) of
+ Just (arg_tys,_,_) -> let tyCat (_,_,ty) = let (_,cat,_) = unType ty in Just cat
+ in map tyCat arg_tys
+ Nothing -> repeat Nothing
+
+coverage (ustats,bstats,count) =
+ let c = fromMaybe 0 (Map.lookup (mkCId "Q") ustats)
+ in (fromIntegral (count - c) / fromIntegral count) * 100
+
+uprobs pgf (ustats,bstats,count) =
+ [toProb f (cat pgf f) | f <- functions pgf]
+ where
+ toProb f cat =
+ let count = fromMaybe 0 (Map.lookup f ustats)
+ cat_mass = fromMaybe 0 (Map.lookup cat ustats)
+ in (f, fromIntegral count / fromIntegral cat_mass :: Double)
+
+mprobs pgf (ustats,bstats,count) =
+ concat [toProb cat | cat <- categories pgf]
+ where
+ toProb cat =
+ let mass = sum [count | ((cat1,cat2),count) <- Map.toList bstats, cat1==cat]
+ cat_count = fromMaybe 0 (Map.lookup cat ustats)
+ fun_count = sum [fromMaybe 0 (Map.lookup f ustats) | f <- functionsByCat pgf cat]
+ in (cat,mkCId "*",if cat_count == 0 then 0 else fromIntegral (cat_count - fun_count) / fromIntegral cat_count) :
+ [(cat1,cat2,fromIntegral count / fromIntegral mass)
+ | ((cat1,cat2),count) <- Map.toList bstats, cat1==cat]
+
+gprobs pgf (ustats,bstats,count) =
+ sortBy (\x y -> compare (snd y) (snd x)) [toProb f | f <- functions pgf]
+ where
+ toProb f =
+ let fcount = fromMaybe 0 (Map.lookup f ustats)
+ in (f, fromIntegral fcount / fromIntegral count :: Double)
+
+cprobs pgf (ustats,bstats,count) =
+ sortBy (\x y -> compare (snd y) (snd x)) [toProb c | c <- categories pgf]
+ where
+ mass = sum [fromMaybe 0 (Map.lookup c ustats) | c <- categories pgf]
+
+ toProb c =
+ let fcount = fromMaybe 0 (Map.lookup c ustats)
+ in (c, fromIntegral fcount / fromIntegral mass :: Double)
+
+cat pgf f =
+ case fmap unType (functionType pgf f) of
+ Just (_,cat,_) -> cat
+ Nothing -> error ("Unknown function "++showCId f)
+
+cat2 pgf f n e =
+ case fmap unType (functionType pgf f) of
+ Just (_,cat,_) -> cat
+ Nothing -> error (show n ++ ": Unknown function "++showCId f++" in "++showExpr [] e)
diff --git a/treebanks/PennTreebank/translate.hs b/treebanks/PennTreebank/translate.hs
new file mode 100644
index 000000000..35a17e22a
--- /dev/null
+++ b/treebanks/PennTreebank/translate.hs
@@ -0,0 +1,809 @@
+-- [1416,4467,4623,4871,4561,4303,3763,3137,2501,1857,1353,952,646,483,332,200,116,89,54,41,20,22,7,2,4,5,0,3,2,1,0,0,0,0,0,1]
+-- average 5
+
+import Monad
+import Idents
+import PennFormat
+
+import PGF hiding (Tree,parse)
+import Control.Monad
+import System.IO
+import System.Process
+import Data.Maybe
+import Data.List
+import Data.IORef
+import Data.Char
+import Data.Tree
+
+test = False
+
+main = do
+ pgf <- readPGF "ParseEngAbs.pgf"
+ let Just language = readLanguage "ParseEng"
+ morpho = buildMorpho pgf language
+ s <- readFile "wsj.02-21"
+ ref <- newIORef (0,0,0)
+ mapM_ (process pgf morpho ref) ((if test then take 40 else id) (parseTreebank s))
+ where
+ process pgf morpho ref t = do
+ (cn,co,l) <- readIORef ref
+ let e = (flatten . parse penn pgf morpho . prune) t
+ (cn',co') = count (cn,co) e
+ l' = l+1
+ writeIORef ref (cn',co',l')
+ hPutStrLn stdout (showExpr [] e)
+ when test $ do
+ writeFile ("tmp_tree.dot") (graphvizAbstractTree pgf (True,False) e)
+ rawSystem "dot" ["-Tpdf", "tmp_tree.dot", "-otrees/tree"++showAlign l'++".pdf"]
+ return ()
+ hPutStrLn stderr (show ((fromIntegral cn' / fromIntegral co') * 100))
+
+ count (cn,co) e = cn `seq` co `seq`
+ case unApp e of
+ Just (f,es) -> if f == meta
+ then foldl' count (cn, co+1) es
+ else foldl' count (cn+1,co+1) es
+ Nothing -> (cn+1,co+1)
+
+
+ showAlign n =
+ replicate (5 - length s) '0' ++ s
+ where
+ s = show n
+
+ prune (Node tag ts)
+ | tag == "S"
+ && not (null ts)
+ && last ts == Node "." [Node "." []] = Node tag (init ts)
+ | otherwise = Node tag ts
+
+ flatten e =
+ case unApp e of
+ Just (f,es) | f == meta -> mkApp f (concatMap grab es)
+ | otherwise -> mkApp f (map flatten es)
+ Nothing -> e
+
+ grab e =
+ case unApp e of
+ Just (f,es) | f == meta -> concatMap grab es
+ | otherwise -> [mkApp f (map flatten es)]
+ Nothing -> []
+
+
+penn :: Grammar String Expr
+penn =
+ grammar (mkApp meta)
+ [ "ADVP":-> do adv <- cat "RB"
+ case unApp adv of
+ Just (f,[a]) | f == cidPositAdvAdj -> return (mkApp cidPositAdVAdj [a])
+ _ -> mzero
+ `mplus`
+ do adV <- inside "RB" (lemma "AdV" "s")
+ return (mkApp adV [])
+ , "ADJP":-> do adas <- many pAdA
+ v <- inside "JJ" (lemma "V2" "s VPPart")
+ pps <- many (cat "PP")
+ let adj = mkApp cidPastPartAP [mkApp v []]
+ ap0 = foldr (\ada ap -> mkApp cidAdAP [ada,ap]) adj adas
+ ap = foldr (\pp ap -> mkApp cidAdvAP [ap,pp]) ap0 pps
+ return ap
+ `mplus`
+ do adas0 <- many pAdA
+ adjs <- many1 (cat "JJ")
+ let adj = last adjs
+ adas = adas0 ++ [mkApp cidPositAdAAdj [adj] | adj <- init adjs]
+ ap = foldr (\ada ap -> mkApp cidAdAP [ada,ap]) (mkApp cidPositA [adj]) adas
+ return ap
+ , "S" :-> do advs <- many $ do pp <- cat "PP"
+ inside "," word
+ return pp
+ `mplus`
+ do cat "ADVP"
+ e0 <- do (tmp,pol,sl,e) <- pClSlash
+ guard (not sl)
+ return (mkApp cidUseCl [tmp,pol,e])
+ `mplus`
+ do s <- cat "S"
+ inside "," word
+ np <- cat "NP"
+ inside "VP" $ do
+ (t,v) <- pV "VS"
+ inside "SBAR" $ do
+ cat "-NONE-"
+ inside "S" $ do
+ cat "-NONE-"
+ return (mkApp cidUseCl [mkApp cidTTAnt [ mkApp (fromMaybe meta (isVTense t)) []
+ , mkApp cidASimul []
+ ]
+ ,mkApp cidPPos []
+ ,mkApp cidComplPredVP [np,mkApp cidComplVS [mkApp v [],s]]
+ ])
+ opt (inside "." word) ""
+ return (foldr (\ad e -> mkApp cidAdvS [ad, e]) e0 advs)
+ `mplus`
+ do s1 <- cat "S"
+ opt (inside "," word) ""
+ cc <- cat "CC"
+ s2 <- cat "S"
+ return (mkApp cidConjS [cc, mkApp cidBaseS [s1,s2]])
+ , "SBAR" :-> do (do cat "-NONE-" -- missing preposition
+ return ()
+ `mplus`
+ do w <- inside "IN" word
+ guard (w == "that"))
+ cat "S"
+ , "NP" :-> do (m_cc,list_np) <- pBaseNPs
+ case m_cc of
+ Just cc -> return (mkApp cidConjNP [cc, mkListNP list_np])
+ Nothing -> if length list_np > 1
+ then return (mkApp meta list_np)
+ else return (head list_np)
+ `mplus`
+ do np <- cat "NP"
+ rs <- inside "SBAR" $
+ do rp <- cat "WHNP"
+ inside "S" $
+ do (tmp,pol,sl,e) <- pClSlash
+ guard sl
+ return (mkApp cidUseRCl [tmp,pol,mkApp cidRelSlash [rp,e]])
+ `mplus`
+ do inside "NP" (cat "-NONE-")
+ (tmp,pol,sl,vp) <- inside "VP" pVP
+ guard (not sl)
+ return (mkApp cidUseRCl [fromMaybe (mkApp meta []) (isVTense tmp)
+ ,mkApp pol []
+ ,mkApp cidRelVP [rp,vp]])
+ return (mkApp cidRelNP [np,rs])
+ `mplus`
+ do (m_cc,list_np) <- pNPs
+ case m_cc of
+ Just cc -> return (mkApp cidConjNP [cc, mkListNP list_np])
+ Nothing -> if length list_np > 1
+ then return (mkApp meta list_np)
+ else return (head list_np)
+ , "VP" :-> do (_,_,_,e) <- pVP
+ return e
+ , "PP" :-> do prep <- do cat "IN"
+ `mplus`
+ do inside "TO" word
+ return (mkApp cidto_Prep [])
+ `mplus`
+ do w1 <- inside "JJ" word
+ w2 <- inside "IN" word
+ guard (w1 == "such" && w2 == "as")
+ return (mkApp cidsuch_as_Prep [])
+ np <- cat "NP"
+ return (mkApp cidPrepNP [prep,np])
+ `mplus`
+ do pp1 <- cat "PP"
+ inside "," word
+ conj <- cat "CC"
+ pp2 <- cat "PP"
+ opt (inside "," word) ""
+ return (mkApp cidConjAdv [conj, mkApp cidBaseAdv [pp1,pp2]])
+ , "CC" :-> do cc <- word
+ case cc of
+ "and" -> return (mkApp cidand_Conj [])
+ "&" -> return (mkApp cidamp_Conj [])
+ "or" -> return (mkApp cidor_Conj [])
+ _ -> mzero
+ , "DT" :-> do (dt,b) <- pDT
+ return dt
+ , "IN" :-> do prep <- lemma "Prep" "s"
+ return (mkApp prep [])
+ , "NN" :-> do transform (concatMap splitDashN)
+ (do n <- lemma "N" "s Sg Nom"
+ (do inside "-" word
+ n2 <- lemma "N" "s Sg Nom"
+ return (mkApp cidDashCN [mkApp n [], mkApp n2 []])
+ `mplus`
+ do return (mkApp n [])))
+ `mplus`
+ do v <- lemma "V" "s VPresPart"
+ return (mkApp cidGerundN [mkApp v []])
+ , "NNS" :-> do transform (concatMap splitDashN)
+ (do n <- lemma "N" "s Pl Nom"
+ return (mkApp n [])
+ `mplus`
+ do n1 <- lemma "N" "s Sg Nom"
+ inside "-" word
+ n2 <- lemma "N" "s Pl Nom"
+ return (mkApp cidDashCN [mkApp n1 [], mkApp n2 []]))
+ , "PRP" :-> do p <- (lemma "Pron" "s (NCase Nom)"
+ `mplus`
+ lemma "Pron" "s NPAcc"
+ `mplus`
+ (do w <- word
+ guard (w == "I") -- upper case word
+ return cidi_Pron))
+ return (mkApp p [])
+ , "PRP$":-> do p <- lemma "Pron" "s (NCase Gen)"
+ return (mkApp cidPossPron [mkApp p []])
+ , "RB" :-> do a <- lemma "A" "s AAdv"
+ return (mkApp cidPositAdvAdj [mkApp a []])
+ `mplus`
+ do adv <- lemma "Adv" "s"
+ return (mkApp adv [])
+ , "QP" :-> do adn <- inside "IN" (lemma "AdN" "s")
+ num <- pCD
+ return (mkApp cidDetQuant [mkApp cidIndefArt [], mkApp cidNumCard [mkApp cidAdNum [mkApp adn [], num]]])
+ , "WHNP":-> cat "WP"
+ `mplus`
+ cat "WDT"
+ `mplus`
+ cat "WP$"
+ `mplus`
+ do cat "-NONE-"
+ return (mkApp cidno_RP [])
+ `mplus`
+ do w <- inside "IN" word
+ guard (w == "that")
+ return (mkApp cidthat_RP [])
+ , "-NONE-"
+ :-> return (mkApp meta [])
+ , "JJ" :-> do a <- lemma "A" "s (AAdj Posit Nom)"
+ return (mkApp a [])
+ , "JJR" :-> do a <- lemma "A" "s (AAdj Compar Nom)"
+ return (mkApp a [])
+ , "JJS" :-> do a <- lemma "A" "s (AAdj Superl Nom)"
+ return (mkApp cidOrdSuperl [mkApp a []])
+ , "VB" :-> do v <- mplus (lemma "V" "s VInf") (lemma "V2" "s VInf")
+ return (mkApp v [])
+ , "VBD" :-> do v <- mplus (lemma "V" "s VPast") (lemma "V2" "s VPast")
+ return (mkApp v [])
+ , "VBG" :-> do v <- mplus (lemma "V" "s VPresPart") (lemma "V2" "s VPresPart")
+ return (mkApp v [])
+ , "VBN" :-> do v <- mplus (lemma "V" "s VPPart") (lemma "V2" "s VPPart")
+ return (mkApp v [])
+ , "VBP" :-> do v <- mplus (lemma "V" "s VInf") (lemma "V2" "s VInf")
+ return (mkApp v [])
+ , "VBZ" :-> do v <- mplus (lemma "V" "s VPres") (lemma "V2" "s VPres")
+ return (mkApp v [])
+ , "PDT" :-> do pdt <- lemma "Predet" "s"
+ return (mkApp pdt [])
+ , "WP" :-> do rp <- (lemma "RP" "s (RC Masc (NCase Nom))"
+ `mplus`
+ lemma "RP" "s (RC Masc NPAcc)")
+ return (mkApp rp [])
+ , "WDT" :-> do rp <- lemma "RP" "s (RC Neutr (NCase Nom))"
+ return (mkApp rp [])
+ , "WP$" :-> do rp <- lemma "RP" "s (RC Masc (NCase Gen))"
+ return (mkApp rp [])
+ ]
+
+data VForm a
+ = VInf | VPart | VGerund | VTense a
+
+instance Functor VForm where
+ fmap f VInf = VInf
+ fmap f VPart = VPart
+ fmap f VGerund = VGerund
+ fmap f (VTense t) = VTense (f t)
+
+isVInf VInf = True
+isVInf _ = False
+
+isVPart VPart = True
+isVPart _ = False
+
+isVGerund VGerund = True
+isVGerund _ = False
+
+isVTense (VTense t) = Just t
+isVTense _ = Nothing
+
+
+pVP = do
+ (t,a,p,sl,e0) <- do t <- pCopula
+ p <- pPol
+ inside "VP" $ do
+ advs <- many (cat "ADVP")
+ (t',p',sl,e0) <- pVP
+ guard (isVPart t' && sl && p' == cidPPos)
+ let e1 = mkApp cidPassVPSlash [e0]
+ e2 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e1 advs
+ return (t,cidASimul,p,False,e2)
+ `mplus`
+ do t <- pCopula
+ p <- pPol
+ advs <- many (cat "ADVP")
+ e <- do e <- cat "ADJP"
+ return (mkApp cidCompAP [e])
+ `mplus`
+ do e <- cat "NP"
+ return (mkApp cidCompNP [e])
+ `mplus`
+ do e <- cat "NP"
+ return (mkApp cidCompNP [e])
+ `mplus`
+ do e <- cat "PP"
+ return (mkApp cidCompAdv [e])
+ `mplus`
+ do e <- cat "SBAR"
+ return (mkApp cidCompS [e])
+ `mplus`
+ do e <- inside "S" $ do
+ inside "NP" (cat "-NONE-")
+ (tmp,pol,sl,e) <- inside "VP" pVP
+ guard (isVInf tmp && not sl && pol == cidPPos)
+ return e
+ return (mkApp cidCompVP [e])
+ let e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) (mkApp cidUseComp [e]) advs
+ return (t,cidASimul,p,False,e1)
+ `mplus`
+ do t <- pCopula
+ p <- pPol
+ advs <- many (cat "ADVP")
+ (tmp,pol,sl,e) <- inside "VP" pVP
+ guard (isVGerund tmp && not sl && pol == cidPPos)
+ let e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e advs
+ return (t,cidASimul,p,False,mkApp cidProgrVP [e1])
+ `mplus`
+ do t <- pCopula
+ p <- pPol
+ adv <- cat "ADVP"
+ return (t,cidASimul,p,False,mkApp cidUseComp [mkApp cidCompAdv [adv]])
+ `mplus`
+ do w <- inside "MD" word
+ t <- case w of
+ "will" -> return cidTFut
+ "would" -> return cidTCond
+ _ -> mzero
+ p <- pPol
+ advs <- many (cat "ADVP")
+ (tmp,pol,sl,e0) <- inside "VP" pVP
+ guard (isVInf tmp && pol == cidPPos)
+ let e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs
+ return (VTense t,cidASimul,p,sl,e1)
+ `mplus`
+ do t <- pHave
+ p <- pPol
+ advs <- many (cat "ADVP")
+ (tmp,pol,sl,e0) <- inside "VP" pVP
+ guard (isVPart tmp && pol == cidPPos)
+ let e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs
+ return (t,cidAAnter,p,sl,e1)
+ `mplus`
+ do t <- pDo
+ p <- pPol
+ advs <- many (cat "ADVP")
+ (tmp,p',sl,e0) <- inside "VP" $ pVP
+ guard (p' == cidPPos)
+ let e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs
+ return (t,cidASimul,p,sl,e1)
+ `mplus`
+ do advs <- many (cat "ADVP")
+ inside "TO" word -- infinitives
+ e0 <- cat "VP"
+ let e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs
+ return (VInf,cidASimul,cidPPos,False,e1)
+ `mplus`
+ do advs1 <- many (cat "ADVP")
+ (t,v) <- pV "V2"
+ pps <- many (cat "PP")
+ let e0 = mkApp cidSlashV2a [mkApp v []]
+ e1 = foldl (\e pp -> mkApp cidAdvVPSlash [e, pp]) e0 pps
+ (sl,e2) <- (do (inside "NP" (cat "-NONE-")
+ `mplus`
+ inside "SBAR" (cat "-NONE-"))
+ return (True,e1)
+ `mplus`
+ do np <- cat "NP"
+ return (False,mkApp cidComplSlash [e1, np]))
+ let e3 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e2 advs1
+ return (t,cidASimul,cidPPos,sl,e3)
+ `mplus`
+ do (t,v) <- inside "MD" $
+ (do v <- lemma "VV" "s (VVF VPres)"
+ return (cidTPres,v)
+ `mplus`
+ do v <- lemma "VV" "s (VVF VPast)"
+ return (cidTPast,v))
+ p <- pPol
+ advs <- many (cat "ADVP")
+ vp <- cat "VP"
+ let e0 = mkApp cidComplVV [mkApp v [], vp]
+ e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs
+ return (VTense t,cidASimul,p,False,e1)
+ `mplus`
+ do advs <- many (cat "ADVP")
+ (t,v) <- pVV
+ vp <- inside "S" $ do
+ inside "NP" (cat "-NONE-")
+ (tmp,pol,sl,e) <- inside "VP" pVP
+ guard ((isVInf tmp || isVGerund tmp) && not sl && pol == cidPPos)
+ return e
+ let e0 = mkApp cidComplVV [mkApp v [], vp]
+ e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs
+ return (t,cidASimul,cidPPos,False,e1)
+ `mplus`
+ do advs <- many (cat "ADVP")
+ (t,v) <- pV "V2V"
+ inside "S" $
+ (do inside "NP" (cat "-NONE-")
+ (tmp,pol,sl,vp) <- inside "VP" pVP
+ guard (isVInf tmp && not sl)
+ let e0 = mkApp cidSlashV2V [mkApp v [], mkApp pol [], vp]
+ e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs
+ return (t,cidASimul,cidPPos,True,e1)
+ `mplus`
+ do np <- cat "NP"
+ (tmp,pol,sl,vp) <- inside "VP" pVP
+ guard (isVInf tmp && not sl)
+ let e0 = mkApp cidComplSlash [mkApp cidSlashV2V [mkApp v [], mkApp pol [], vp], np]
+ e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs
+ return (t,cidASimul,cidPPos,False,e1))
+ `mplus`
+ do advs <- many (cat "ADVP")
+ (t,v) <- pV "VA"
+ adjp <- cat "ADJP"
+ let e0 = mkApp cidComplVA [mkApp v [], adjp]
+ e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs
+ return (t,cidASimul,cidPPos,False,e1)
+ `mplus`
+ do advs <- many (cat "ADVP")
+ (t,v) <- pV "VS"
+ s <- cat "S" `mplus` cat "SBAR"
+ let e0 = mkApp cidComplVS [mkApp v [], s]
+ e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs
+ return (t,cidASimul,cidPPos,False,e1)
+ `mplus`
+ do advs <- many (cat "ADVP")
+ (t,v) <- pV "V"
+ let e0 = mkApp cidUseV [mkApp v []]
+ e1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) e0 advs
+ return (t,cidASimul,cidPPos,False,e1)
+ pps <- many (cat "PP"
+ `mplus`
+ inside "ADVP" (cat "RB"))
+ let tmp = fmap (\t -> mkApp cidTTAnt [mkApp t [],mkApp a []]) t
+ e1 = foldl (\e pp -> mkApp (if sl then cidAdvVPSlash else cidAdvVP) [e, pp]) e0 pps
+ return (tmp, p, sl, e1)
+
+pClSlash = do np <- cat "NP"
+ advs <- many (cat "ADVP")
+ (tmp,pol,sl,vp) <- do (tmp,pol,sl,vp) <- inside "VP" pVP
+ return (isVTense tmp,pol,sl,vp)
+ `mplus`
+ do vp <- cat "VP"
+ return (Nothing,meta,False,vp)
+ let vp1 = foldr (\pp e -> mkApp cidAdVVP [pp, e]) vp advs
+ return (fromMaybe (mkApp meta []) tmp
+ ,mkApp pol []
+ ,sl
+ ,mkApp (if sl then cidSlashVP else cidPredVP) [np,vp1]
+ )
+
+pV cat =
+ do v <- lookup "VB" "s VInf"
+ return (VInf,v)
+ `mplus`
+ do v <- lookup "VBP" "s VInf"
+ return (VTense cidTPres,v)
+ `mplus`
+ do v <- lookup "VBZ" "s VPres"
+ return (VTense cidTPres,v)
+ `mplus`
+ do v <- lookup "VBD" "s VPast"
+ return (VTense cidTPast,v)
+ `mplus`
+ do v <- lookup "VBN" "s VPPart"
+ return (VPart,v)
+ `mplus`
+ do v <- lookup "VBG" "s VPresPart"
+ return (VGerund,v)
+ where
+ lookup pos fld =
+ inside pos $
+ (do lemma cat fld
+ `mplus`
+ do w <- word
+ return (mkCId ("["++w++"_"++cat++"]")))
+
+pVV =
+ do v <- lookup "VB" "s (VVF VInf)"
+ return (VInf,v)
+ `mplus`
+ do v <- lookup "VBP" "s (VVF VInf)"
+ return (VTense cidTPres,v)
+ `mplus`
+ do v <- lookup "VBZ" "s (VVF VPres)"
+ return (VTense cidTPres,v)
+ `mplus`
+ do v <- lookup "VBD" "s (VVF VPast)"
+ return (VTense cidTPast,v)
+ `mplus`
+ do v <- lookup "VBN" "s (VVF VPPart)"
+ return (VPart,v)
+ `mplus`
+ do v <- lookup "VBG" "s (VVF VPresPart)"
+ return (VGerund,v)
+ where
+ lookup pos fld =
+ inside pos $
+ (do lemma "VV" fld
+ `mplus`
+ do w <- word
+ return (mkCId ("["++w++"_VV]")))
+
+pCopula =
+ do s <- inside "VB" word
+ guard (s == "be")
+ return VInf
+ `mplus`
+ do s <- inside "VBP" word
+ guard (s == "am" || s == "'m" || s == "are" || s == "'re")
+ return (VTense cidTPres)
+ `mplus`
+ do s <- inside "VBZ" word
+ guard (s == "is" || s == "'s")
+ return (VTense cidTPres)
+ `mplus`
+ do s <- inside "VBD" word
+ guard (s == "were" || s == "was")
+ return (VTense cidTPast)
+ `mplus`
+ do s <- inside "VBN" word
+ guard (s == "been")
+ return VPart
+ `mplus`
+ do s <- inside "VBG" word
+ guard (s == "being")
+ return VGerund
+
+pDo =
+ do s <- inside "VB" word
+ guard (s == "do")
+ return VInf
+ `mplus`
+ do s <- inside "VBP" word
+ guard (s == "do")
+ return (VTense cidTPres)
+ `mplus`
+ do s <- inside "VBZ" word
+ guard (s == "does")
+ return (VTense cidTPres)
+ `mplus`
+ do s <- inside "VBD" word
+ guard (s == "did")
+ return (VTense cidTPast)
+
+pHave =
+ do s <- inside "VB" word
+ guard (s == "have")
+ return VInf
+ `mplus`
+ do s <- inside "VBP" word
+ guard (s == "have")
+ return (VTense cidTPres)
+ `mplus`
+ do s <- inside "VBZ" word
+ guard (s == "has")
+ return (VTense cidTPres)
+ `mplus`
+ do s <- inside "VBD" word
+ guard (s == "had")
+ return (VTense cidTPast)
+ `mplus`
+ do s <- inside "VBN" word
+ guard (s == "had")
+ return VPart
+
+pPol =
+ do w <- inside "RB" word
+ guard (w == "n't" || w == "not")
+ return cidPNeg
+ `mplus`
+ do return cidPPos
+
+pBaseNP =
+ do np <- inside "NN" (lemma "NP" "s (NCase Nom)")
+ return (mkApp np [])
+ `mplus`
+ do m_pdt <- opt (liftM Just (cat "PDT")) Nothing
+ m_q <- opt (liftM Just pQuant) Nothing
+ m_num <- opt (liftM Just pCD ) Nothing
+ m_ord <- opt (liftM Just (cat "JJS")) Nothing
+ adjs <- many pModCN
+ ns <- many1 (mplus (cat "NN" >>= \n -> return (n,cidNumSg))
+ (cat "NNS" >>= \n -> return (n,cidNumPl)))
+ let (n,s) = last ns
+ cn0 = foldr (\(n,s) e -> mkApp cidCompoundCN [mkApp s [], n, e])
+ (mkApp cidUseN [n])
+ (init ns)
+ cn = foldr (\adj e -> mkApp cidAdjCN [adj, e])
+ cn0
+ adjs
+ num = maybe (mkApp s []) (\n -> mkApp cidNumCard [n]) m_num
+
+ mkDetQuant q num =
+ case m_ord of
+ Just ord -> mkApp cidDetQuantOrd [q,num,ord]
+ Nothing -> mkApp cidDetQuant [q,num]
+
+ e0 <- if s == cidNumSg
+ then case m_q of
+ Just (q,True) -> return (mkApp cidDetCN [mkDetQuant q num,cn])
+ Just (q,False) -> return (mkApp cidDetCN [q,cn])
+ Nothing -> do guard (isNothing m_num)
+ return (mkApp cidMassNP [cn])
+ else case m_q of
+ Just (q,True) -> return (mkApp cidDetCN [mkDetQuant q num,cn])
+ Just (q,False) -> return (mkApp cidDetCN [q,cn])
+ Nothing -> return (mkApp cidDetCN [mkDetQuant (mkApp cidIndefArt []) num,cn])
+ let e1 = case m_pdt of
+ Just pdt -> mkApp cidPredetNP [pdt,e0]
+ Nothing -> e0
+ return e1
+ `mplus`
+ do dt <- cat "QP"
+ n <- mplus (cat "NN") (cat "NNS")
+ return (mkApp cidDetCN [dt,mkApp cidUseN [n]])
+ `mplus`
+ do m_q <- opt (liftM Just pQuant) Nothing
+ ws2 <- many1 (inside "NNP" word `mplus` inside "NNPS" word)
+ let e0 = mkApp cidSymbPN
+ [mkApp cidMkSymb
+ [mkStr (unwords ws2)]]
+ case m_q of
+ Just (q,b) -> do guard b
+ return (mkApp cidUseQuantPN [q,e0])
+ Nothing -> return (mkApp cidUsePN [e0])
+ `mplus`
+ do p <- inside "PRP" (lemma "NP" "s (NCase Nom)")
+ return (mkApp p [])
+ `mplus`
+ do p <- cat "PRP"
+ return (mkApp cidUsePron [p])
+ `mplus`
+ do np <- cat "NP"
+ pps <- many1 (cat "PP")
+ prns <- many (cat "PRN")
+ let e0 = foldl (\e pp -> mkApp cidAdvNP [e, pp]) np pps
+ e1 = foldl (\e pn -> mkApp meta [e, pn]) e0 prns
+ return e1
+ `mplus`
+ do np <- cat "NP"
+ inside "," word
+ (t',p',sl,vp) <- inside "VP" pVP
+ guard (isVPart t' && sl && p' == cidPPos)
+ inside "," word
+ return (mkApp meta [np, vp])
+ `mplus`
+ do (q,b) <- pQuant
+ return (mkApp cidDetNP [if b
+ then mkApp cidDetQuant [mkApp cidIndefArt [],mkApp cidNumSg []]
+ else q])
+ `mplus`
+ do n <- pCD
+ return (mkApp cidDetNP [mkApp cidDetQuant [mkApp cidIndefArt [],mkApp cidNumCard [n]]])
+
+pBaseNPs = do
+ np <- pBaseNP
+ (do inside "," word
+ (m_cc,nps) <- pBaseNPs
+ return (m_cc ,np:nps)
+ `mplus`
+ do cc <- cat "CC"
+ np2 <- pBaseNP
+ return (Just cc,[np,np2])
+ `mplus`
+ do return (Nothing,[np]))
+
+pNPs = do
+ (t1,t2) <- do w <- inside "DT" word
+ case map toLower w of
+ "both" -> return (mkApp cidand_Conj [],mkApp cidboth7and_DConj [])
+ "either" -> return (mkApp cidor_Conj [],mkApp cideither7or_DConj [])
+ _ -> mzero
+ `mplus`
+ do return (mkApp meta [],mkApp meta [])
+ (m_cc,nps) <- pList
+ return (fmap (toDConj t1 t2) m_cc,nps)
+ where
+ toDConj t1 t2 cc
+ | cc == t1 = t2
+ | otherwise = cc
+
+ pList = do
+ np <- cat "NP"
+ (do inside "," word
+ (m_cc,nps) <- pList
+ return (m_cc ,np:nps)
+ `mplus`
+ do cc <- cat "CC"
+ np2 <- cat "NP"
+ return (Just cc,[np,np2])
+ `mplus`
+ do return (Nothing,[np]))
+
+mkListNP nps0 =
+ foldr (\np1 np2 -> mkApp cidConsNP [np1,np2]) (mkApp cidBaseNP nps2) nps1
+ where
+ (nps1,nps2) = splitAt (length nps0-2) nps0
+
+pModCN =
+ do v <- inside "VBN" (lemma "V2" "s VPPart")
+ return (mkApp cidPastPartAP [mkApp v []])
+ `mplus`
+ do v <- inside "JJ" (lemma "V2" "s VPPart")
+ return (mkApp cidPastPartAP [mkApp v []])
+ `mplus`
+ do v <- inside "JJ" (lemma "V" "s VPresPart")
+ return (mkApp cidGerundAP [mkApp v []])
+ `mplus`
+ do a <- cat "JJ"
+ return (mkApp cidPositA [a])
+ `mplus`
+ do a <- cat "ADJP"
+ return a
+
+pCD =
+ do w0 <- inside "CD" word
+ let w = filter (/=',') w0
+ guard (not (null w) && all isDigit w)
+ let es = [mkApp (mkCId ("D_"++[d])) [] | d <- w]
+ e0 = foldr (\e1 e2 -> mkApp cidIIDig [e1,e2]) (mkApp cidIDig [last es]) (init es)
+ e1 = mkApp cidNumDigits [e0]
+ return e1
+ `mplus`
+ do w <- inside "CD" word
+ e <- case map toLower w of
+ "one" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot01 []]]]])
+ "two" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot0 [mkApp cidn2 []]]]]])
+ "three" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot0 [mkApp cidn3 []]]]]])
+ "four" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot0 [mkApp cidn4 []]]]]])
+ "five" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot0 [mkApp cidn5 []]]]]])
+ "six" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot0 [mkApp cidn6 []]]]]])
+ "seven" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot0 [mkApp cidn7 []]]]]])
+ "eight" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot0 [mkApp cidn8 []]]]]])
+ "nine" -> return (mkApp cidnum [mkApp cidpot2as3 [mkApp cidpot1as2 [mkApp cidpot0as1 [mkApp cidpot0 [mkApp cidn9 []]]]]])
+ _ -> mzero
+ return (mkApp cidNumNumeral [e])
+ `mplus`
+ do cat "CD"
+
+pQuant =
+ inside "DT" pDT
+ `mplus`
+ do dt <- cat "PRP$"
+ return (dt,True)
+ `mplus`
+ do np <- inside "NP" $ do
+ np <- pBaseNP
+ inside "POS" word
+ return np
+ return (mkApp cidGenNP [np],True)
+ `mplus`
+ do dt <- pMany
+ return (dt,False)
+
+pDT =
+ do dt <- mplus (lemma "Quant" "s False Sg")
+ (lemma "Quant" "s False Pl")
+ return (mkApp dt [],True)
+ `mplus`
+ do dt <- lemma "Det" "s"
+ return (mkApp dt [],False)
+
+pMany =
+ do w <- inside "JJ" word
+ guard (map toLower w == "many")
+ return (mkApp cidmany_Det [])
+
+pAdA = do adv <- cat "RB"
+ case unApp adv of
+ Just (f,[a]) | f == cidPositAdvAdj
+ -> return (mkApp cidPositAdAAdj [a])
+ _ -> mzero
+ `mplus`
+ do ada <- inside "RB" (lemma "AdA" "s")
+ return (mkApp ada [])
+
+splitDashN (Node w []) =
+ case break (=='-') w of
+ (w1,'-':w2) -> Node w1 [] : Node "-" [Node "-" []] : splitDashN (Node w2 [])
+ _ -> [Node w []]
+splitDashN t = [t]
+
+meta = mkCId "?"