summaryrefslogtreecommitdiff
path: root/treebanks
diff options
context:
space:
mode:
authorkr.angelov <kr.angelov@gmail.com>2013-03-21 13:39:24 +0000
committerkr.angelov <kr.angelov@gmail.com>2013-03-21 13:39:24 +0000
commitd1866472ebcb4223b0343b0a81903c9fdfee1ef7 (patch)
treeb2fd1815d01dadd2e93a68066a2a549683529298 /treebanks
parentc6e4db8f4ae94608459d3963acdb8dd9479c89f7 (diff)
added configuration file which defines the heads for all syntactic functions in ParseEng
Diffstat (limited to 'treebanks')
-rw-r--r--treebanks/PennTreebank/Dependencies.hs37
-rw-r--r--treebanks/PennTreebank/ParseEngAbs.heads223
2 files changed, 260 insertions, 0 deletions
diff --git a/treebanks/PennTreebank/Dependencies.hs b/treebanks/PennTreebank/Dependencies.hs
new file mode 100644
index 000000000..d785507b3
--- /dev/null
+++ b/treebanks/PennTreebank/Dependencies.hs
@@ -0,0 +1,37 @@
+module Dependencies where
+
+import PGF
+import qualified Data.Map as Map
+import Data.Maybe as Maybe
+
+type HeadTable = Map.Map CId [CId]
+
+readHeadTable :: FilePath -> IO HeadTable
+readHeadTable fpath = do
+ ls <- fmap lines $ readFile fpath
+ return (Map.fromList [(head ws, tail ws) | l <- ls, let ws = map mkCId (words l), not (null ws)])
+
+getDependencies :: HeadTable -> Expr -> (CId,[(CId,CId)])
+getDependencies tbl e =
+ case unApp e of
+ Just (f,es)
+ | null es -> (f,[])
+ | f == mkCId "MkSymb" -> (f,[])
+ | otherwise -> case Map.lookup f tbl of
+ Just cs -> let xs = zipWith (\c e -> (c,getDependencies tbl e)) cs es
+ hes = [he | (c,he) <- xs, c == c_head]
+ (h,deps) = head hes
+ in if length hes /= 1
+ then error ("there must be exactly one head in "++showExpr [] e)
+ else (h,concat (deps:[(h,m):deps | (c,(m,deps)) <- xs, c == c_mod]))
+ Nothing -> error ("there is no head defined for function "++showCId f)
+ Nothing -> error ("this is not a function application: "++showExpr [] e)
+
+c_head = mkCId "head"
+c_mod = mkCId "mod"
+
+test = do
+ t <- readHeadTable "ParseEngAbs.heads"
+ es <- fmap (concatMap (maybeToList . readExpr) . lines) $ readFile "wsj.full"
+ let deps = Map.fromListWith (+) [(d,1) | e <- es, d <- snd (getDependencies t e)]
+ writeFile "deps" (unlines (map show (Map.toList deps)))
diff --git a/treebanks/PennTreebank/ParseEngAbs.heads b/treebanks/PennTreebank/ParseEngAbs.heads
new file mode 100644
index 000000000..f77a60287
--- /dev/null
+++ b/treebanks/PennTreebank/ParseEngAbs.heads
@@ -0,0 +1,223 @@
+PositA head
+ComparA head mod
+ComplA2 head mod
+ReflA2 head
+UseA2 head
+UseComparA head
+CAdvAP mod head mod
+AdjOrd head
+SentAP head mod
+AdAP mod head
+AdvAP head mod
+PositAdvAdj head
+PrepNP head mod
+AdAdv mod head
+PositAdAAdj head
+SubjS mod head
+AdnCAdv head
+ComplV2 head mod
+ComplV3 head mod mod
+ComplV2V head mod mod
+ComplV2S head mod mod
+ComplV2Q head mod mod
+ComplV2A head mod mod
+SlashV2 mod head
+SlashVVV2 mod head mod
+NumInt head
+OrdInt head
+AdvSC head
+NumInt head
+OrdInt head
+ConjS head mod
+ConjRS head mod
+ConjAP head mod
+ConjNP mod head
+ConjAdv head mod
+ConjIAdv head mod
+ConjCN head mod
+GenNP head
+GenIP head
+GenRP mod head
+EmptyRelSlash head
+MkVPI head
+ConjVPI head mod
+ComplVPIVV head mod
+MkVPS _ _ head
+ConjVPS head mod
+PredVPS mod head
+PartVP head
+PassVPSlash head
+ExistNP head
+ExistIP head
+ProgrVP head
+ImpPl1 head
+ImpP3 mod head
+DetCN _ head
+UsePN head
+UsePron head
+PredetNP mod head
+PPartNP head mod
+AdvNP head mod
+RelNP head mod
+DetNP head
+DetQuant head _ _
+DetQuantOrd head _ _
+NumCard head
+NumDigits head
+NumNumeral head
+AdNum mod head
+OrdDigits head
+OrdNumeral head
+OrdSuperl head
+MassNP head
+PossPron head
+UseN head
+ComplN2 head mod
+ComplN3 head mod
+UseN2 head
+Use2N3 head
+Use3N3 head
+AdjCN mod head
+RelCN head mod
+AdvCN head mod
+SentCN head mod
+ApposCN head mod
+num _
+pot0 _
+pot1to19 _
+pot0as1 _
+pot1 _
+pot1plus _ _
+pot1as2 _
+pot2 _
+pot2plus _ _
+pot2as3 _
+pot3 _
+pot3plus _ _
+IDig _
+IIDig _ _
+num2digits head
+PhrUtt mod head mod
+UttS head
+UttQS head
+UttImpSg _ head
+UttImpPl _ head
+UttImpPol _ head
+UttIP head
+UttIAdv head
+UttNP head
+UttAdv head
+UttVP head
+UttCN head
+UttCard head
+UttAP head
+UttInterj head
+PConjConj head
+VocNP head
+QuestCl head
+QuestVP mod head
+QuestSlash mod head
+QuestIAdv mod head
+QuestIComp mod head
+IdetCN mod head
+IdetIP head
+AdvIP head mod
+IdetQuant head mod
+PrepIP _ head
+AdvIAdv head mod
+CompIAdv head
+CompIP head
+ComplSlashIP head mod
+AdvQVP head mod
+AddAdvQVP head mod
+QuestQVP mod head
+RelCl head
+RelVP mod head
+RelSlash mod head
+FunRP mod mod head
+PredVP mod head
+PredSCVP mod head
+SlashVP mod head
+AdvSlash head mod
+SlashPrep head mod
+SlashVS mod head mod
+ImpVP head
+EmbedS head
+EmbedQS head
+EmbedVP head
+UseCl _ _ head
+UseQCl _ _ head
+UseRCl _ _ head
+UseSlash _ _ head
+AdvS mod head
+ExtAdvS mod head
+SSubjS mod mod head
+RelS head mod
+SymbPN head
+CNNumNP head _
+MkSymb _
+TTAnt head mod
+UseV head
+ComplVV head mod
+ComplVS head mod
+ComplVQ head mod
+ComplVA head mod
+SlashV2a head
+Slash2V3 head mod
+Slash3V3 head mod
+SlashV2V head _ _ mod
+SlashV2S head mod
+SlashV2Q head mod
+SlashV2A head mod
+ComplSlash head mod
+SlashVV head mod
+SlashV2VNP head mod mod
+ReflVP head
+UseComp head
+AdvVP head mod
+AdVVP mod head
+AdvVPSlash head mod
+AdVVPSlash mod head
+VPSlashPrep head mod
+CompAP head
+CompNP head
+CompAdv head
+CompCN head
+CompoundCN _ mod head
+DashCN mod head
+GerundN head
+GerundAP head
+PastPartAP head
+OrdCompar head
+PositAdVAdj head
+UseQuantPN _ head
+SlashVPIV2V head _ mod
+SlashSlashV2V head _ _ mod
+ComplVV head _ _ mod
+PredVPosv mod head
+PredVPovs mod head
+CompS head
+CompQS head
+CompVP mod mod head
+VPSlashVS mod head
+PastPartRS _ _ head
+PresPartRS _ _ head
+ApposNP head mod
+AdAdV mod head
+UttAdV head
+BaseNP head mod
+ConsNP head mod
+BaseVPS head mod
+ConsVPS head mod
+BaseVPI head mod
+ConsVPI head mod
+BaseAP head mod
+ConsAP head mod
+BaseS head mod
+ConsS head mod
+BaseCN head mod
+ConsCN head mod
+BaseAdv head mod
+ConsAdv head mod
+BaseRS head mod
+ConsRS head mod