summaryrefslogtreecommitdiff
path: root/treebanks
diff options
context:
space:
mode:
authorkrasimir <krasimir@chalmers.se>2015-11-18 10:16:55 +0000
committerkrasimir <krasimir@chalmers.se>2015-11-18 10:16:55 +0000
commitfe7397efe4403f94797203e6bf00daa95d749618 (patch)
tree235108f9755137fa5261564db643322c25a0b435 /treebanks
parent9335a2997c5784d36e65d019ce39409a5bde5a95 (diff)
some more conversion patterns for Susanne
Diffstat (limited to 'treebanks')
-rw-r--r--treebanks/susanne/Idents.hs11
-rw-r--r--treebanks/susanne/Parser.hs2
-rw-r--r--treebanks/susanne/convert.hs142
3 files changed, 126 insertions, 29 deletions
diff --git a/treebanks/susanne/Idents.hs b/treebanks/susanne/Idents.hs
index 14a731f76..50fbc6d26 100644
--- a/treebanks/susanne/Idents.hs
+++ b/treebanks/susanne/Idents.hs
@@ -39,6 +39,7 @@ cidConsS = app0 "ConsS"
cidConjS = app0 "ConjS"
cidMassNP = app1 "MassNP"
cidAdvNP = app2 "AdvNP"
+cidExtAdvNP = app2 "ExtAdvNP"
cidTPres = app0 "TPres"
cidTPast = app0 "TPast"
cidTFut = app0 "TFut"
@@ -64,10 +65,10 @@ cidAdvS = app2 "AdvS"
cidPositA = app1 "PositA"
cidIDig = app0 "IDig"
cidIIDig = app0 "IIDig"
-cidNumCard = app0 "NumCard"
-cidNumDigits = app0 "NumDigits"
-cidNumNumeral = app0 "NumNumeral"
-cidnum = app0 "num"
+cidNumCard = app1 "NumCard"
+cidNumDigits = app1 "NumDigits"
+cidNumNumeral = app1 "NumNumeral"
+cidnum = app1 "num"
cidpot2as3 = app0 "pot2as3"
cidpot1as2 = app0 "pot1as2"
cidpot0as1 = app0 "pot0as1"
@@ -94,7 +95,7 @@ cidDashSgN = app2 "DashSgN"
cidDashPlN = app2 "DashPlN"
cidProgrVP = app0 "ProgrVP"
cidGerundN = app0 "GerundN"
-cidGerundAP = app0 "GerundAP"
+cidPresPartAP = app1 "PresPartAP"
cidGenNP = app1 "GenNP"
cidPredetNP = app1 "PredetNP"
cidDetNP = app1 "DetNP"
diff --git a/treebanks/susanne/Parser.hs b/treebanks/susanne/Parser.hs
index f34bb3423..c3db1d3a2 100644
--- a/treebanks/susanne/Parser.hs
+++ b/treebanks/susanne/Parser.hs
@@ -19,6 +19,8 @@ instance MonadPlus P where
mzero = P (\pgf cnc ts -> Nothing)
mplus f g = P (\pgf cnc ts -> mplus (runP f pgf cnc ts) (runP g pgf cnc ts))
+getConcr = P (\pgf cnc ts -> Just (ts,cnc))
+
match convert tag_spec = P (\pgf cnc ts ->
case ts of
(t@(Phrase tag1 mods1 fn1 _ _):ts)
diff --git a/treebanks/susanne/convert.hs b/treebanks/susanne/convert.hs
index edeea2825..509063379 100644
--- a/treebanks/susanne/convert.hs
+++ b/treebanks/susanne/convert.hs
@@ -2,6 +2,7 @@ import System.Directory
import System.FilePath
import Data.List
import Data.Char(toLower)
+import Data.Maybe(fromMaybe)
import Control.Monad
import qualified Data.Map as Map
@@ -44,6 +45,9 @@ convert pgf eng t@(Phrase tag mods fn idx ts)
| tag == "Po"= case runP pPP pgf eng ts of
Just ([],x) -> x
_ -> Phrase tag mods fn idx ts'
+ | tag == "Fr"= case runP pRS pgf eng ts of
+ Just ([],x) -> x
+ _ -> Phrase tag mods fn idx ts'
| otherwise = Phrase tag mods fn idx ts'
where
ts' = map (convert pgf eng) ts
@@ -78,9 +82,9 @@ pSubject =
`mplus`
do insideOpt convert "N:S" pNP
`mplus`
- do match convert "M:s"
+ do insideOpt convert "M:s" pM
`mplus`
- do match convert "M:S"
+ do insideOpt convert "M:S" pM
`mplus`
do insideOpt convert "Ds:s" $ do
det <- pDet
@@ -157,16 +161,21 @@ pVP =
advs)
adVs)
`mplus`
- do inside "V" (match convert "VBZ")
- adVs <- many pAdV
- p <- pPol
+ do (t,p) <- inside "V" pCopula
comp <- pComp
advs <- many pVPMods
- return (cidTTAnt cidTPres cidASimul,p,foldr (\adv t -> cidAdVVP adv t)
- (foldl (\t adv -> cidAdvVP t adv)
- (cidUseComp comp)
- advs)
- adVs)
+ return (cidTTAnt t cidASimul,p,foldl (\t adv -> cidAdvVP t adv)
+ (cidUseComp comp)
+ advs)
+
+pCopula =
+ do match convert "VBZ"
+ p <- pPol
+ return (cidTPres,p)
+ `mplus`
+ do match convert "VBDZ"
+ p <- pPol
+ return (cidTPast,p)
pComp =
do adv <- insideOpt convert "R:e" pAdv
@@ -302,12 +311,13 @@ pVInf cat =
do v <- match convert "VH0" -- have
return (Active,Simple,v)
+pVPPresPart =
+ insideOpt convert "Vg" $ do
+ v <- pVPresPart "V"
+ return (cidUseV v)
+
pVPart cat =
- do v <- do lemma "VVGi" cat "s VPresPart"
- `mplus`
- do lemma "VVGt" cat "s VPresPart"
- `mplus`
- do lemma "VVGv" cat "s VPresPart"
+ do v <- pVPresPart cat
return (Active,Progressive,v)
`mplus`
do v <- pVPastPart cat
@@ -320,6 +330,13 @@ pVPres cat =
`mplus`
do lemma "VV0v" cat "s VInf"
+pVPresPart cat =
+ do lemma "VVGi" cat "s VPresPart"
+ `mplus`
+ do lemma "VVGt" cat "s VPresPart"
+ `mplus`
+ do lemma "VVGv" cat "s VPresPart"
+
pVPastPart cat =
do lemma "VVNi" cat "s VPPart"
`mplus`
@@ -347,7 +364,7 @@ pObject =
`mplus`
match convert "N:e"
`mplus`
- match convert "M:e"
+ insideOpt convert "M:e" pM
`mplus`
do insideOpt convert "Ds:e" $ do
det <- pDet
@@ -454,11 +471,12 @@ pPP =
_ -> return (cidMassNP (cidUseN n)) -- we don't know the number
return (cidPrepNP prep np)
-pNP =
+pNP =
do np <- pBaseNP
- match convert "YC"
- fr <- insideOpt convert "Fr" pRCl
- return (cidRelNP np fr)
+ mods <- many pNPMods
+ return (foldl (\t mod -> mod t)
+ np
+ mods)
`mplus`
do pBaseNP
@@ -509,12 +527,16 @@ pDet =
do det <- lemma "DA2" "Det" "s"
return (\num -> det)
`mplus`
+ do num0 <- pNumeral
+ return (\num -> cidDetQuant cidIndefArt num0)
+ `mplus`
do q <- pQuant
ord <- pOrd
return (\num -> cidDetQuantOrd q num ord)
`mplus`
- do q <- pQuant
- return (\num -> cidDetQuant q num)
+ do q <- pQuant
+ mb_num <- opt pNumeral
+ return (\num -> cidDetQuant q (fromMaybe num mb_num))
pQuant =
do match convert "AT"
@@ -563,6 +585,27 @@ pOrd =
do a <- lemma "JJT" "A" "s (AAdj Superl Nom)"
return (cidOrdSuperl a)
+pNumeral =
+ do w <- word "MC" `mplus` word "MC1"
+ cnc <- getConcr
+ case parse cnc "Numeral" w of
+ Right [(e,_)] -> do n <- toParseTree e
+ return (cidNumCard (cidNumNumeral n))
+ _ -> mzero
+ `mplus`
+ do w <- word "MCn"
+ cnc <- getConcr
+ case parse cnc "Digits" w of
+ Right [(e,_)] -> do n <- toParseTree e
+ return (cidNumCard (cidNumDigits n))
+ _ -> mzero
+ where
+ toParseTree e =
+ case unApp e of
+ Just (f,es) -> do ps <- mapM toParseTree es
+ return (App f ps)
+ Nothing -> mzero
+
pGenitive =
do np <- insideOpt convert "N" pNP
match convert "GG"
@@ -617,6 +660,9 @@ pAP =
do vp <- match convert "Tn"
return (cidPastPartAP vp)
`mplus`
+ do vp <- insideOpt convert "Tg" pVPPresPart
+ return (cidPresPartAP vp)
+ `mplus`
do insideOpt convert "J" $ do
adas <- many pAdA
ap <- pAP
@@ -743,9 +789,45 @@ pCNMods =
return (cidSubjS cidthat_Subj s)
return (\t -> cidAdvCN t adv)
`mplus`
- do fr <- insideOpt convert "Fr" pRCl
+ do fr <- insideOpt convert "Fr" pRS
return (\t -> cidRelCN t fr)
+pNPMods =
+ do adv <- insideOpt convert "Po" $ pPP
+ return (\t -> cidAdvNP t adv)
+ `mplus`
+ do adv <- insideOpt convert "P" $ pPP
+ return (\t -> cidAdvNP t adv)
+ `mplus`
+ do adv <- insideOpt convert "Fn" $ do
+ match convert "CST"
+ s <- pS
+ return (cidSubjS cidthat_Subj s)
+ return (\t -> cidAdvNP t adv)
+ `mplus`
+ do match convert "YC"
+ adv <- insideOpt convert "Po" $ pPP
+ opt (match convert "YC")
+ return (\t -> cidExtAdvNP t adv)
+ `mplus`
+ do match convert "YC"
+ adv <- insideOpt convert "P" $ pPP
+ opt (match convert "YC")
+ return (\t -> cidExtAdvNP t adv)
+ `mplus`
+ do match convert "YC"
+ adv <- insideOpt convert "Fn" $ do
+ match convert "CST"
+ s <- pS
+ return (cidSubjS cidthat_Subj s)
+ opt (match convert "YC")
+ return (\t -> cidExtAdvNP t adv)
+ `mplus`
+ do match convert "YC"
+ fr <- insideOpt convert "Fr" pRS
+ opt (match convert "YC")
+ return (\t -> cidRelNP t fr)
+
pName =
do w1 <- word "NP1s"
w2 <- word "NNL1cb"
@@ -765,7 +847,19 @@ pName =
,word "NP2x", word "NP2z"]
return (cidSymbPN (cidMkSymb (Lit w1)))
-pRCl =
+pM = do
+ num <- pNumeral
+ mods <- many pNPMods
+ return (foldl (\t mod -> mod t)
+ (cidDetNP (cidDetQuant cidIndefArt num))
+ mods)
+
+pRS =
+ do rp <- pRP
+ np <- pSubject
+ (t,p,vp) <- pVP
+ return (cidUseRCl t p (cidRelSlash rp (cidSlashVP np vp)))
+ `mplus`
do rp <- pRP
(t,p,vp) <- pVP
opt (match convert "YC")