summaryrefslogtreecommitdiff
path: root/treebanks/susanne/convert.hs
blob: dfd2328ca3f06ce747e63782d7b04e3b0fc60002 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import System.Directory
import System.FilePath
import Data.List
import Data.Char(toLower)

import PGF (readPGF, readLanguage, buildMorpho, lookupMorpho, mkCId, functionType, unType)
import SusanneFormat

Just eng = readLanguage "ParseEng"

main = do
  gr <- readPGF "../../ParseEngAbs.pgf"
  let morpho = buildMorpho gr eng
  fs <- getDirectoryContents "data"
  txts <- (mapM (\f -> readFile ("data" </> f)) . filter ((/= ".") . take 1)) (sort fs)
  --let ts = concatMap (convert gr morpho) (readTreebank (lines (concat txts)))
  let ts = readTreebank (lines (concat txts))
  writeFile "text" (unlines (map show ts))

convert pgf morpho w@(Word _ tag _ lemma)
  | elem tag ["YB","YBL","YBR","YF","YIL","YIR","YTL","YTR", "YO"] = []
  | tag == "NN1c" = convertLemma pgf morpho (mkCId "N") "s Sg Nom" w
  | tag == "NN1n" = convertLemma pgf morpho (mkCId "N") "s Sg Nom" w
  | tag == "NN2"  = convertLemma pgf morpho (mkCId "N") "s Pl Nom" w
  | tag == "JJ"   = convertLemma pgf morpho (mkCId "A") "s (AAdj Posit Nom)" w
  | tag == "JB"   = convertLemma pgf morpho (mkCId "A") "s (AAdj Posit Nom)" w
  | tag == "JBo"  = convertLemma pgf morpho (mkCId "A") "s (AAdj Posit Nom)" w
  | tag == "AT"   = convertLemma pgf morpho (mkCId "Quant") "s False Sg" w
  | tag == "VVDi" = convertLemma pgf morpho (mkCId "V") "s VPast" w
  | tag == "VVDt" = convertLemma pgf morpho (mkCId "V2") "s VPast" w
  | tag == "VVDv" = convertLemma pgf morpho (mkCId "V") "s VPast" w
  | tag == "VVZi" = convertLemma pgf morpho (mkCId "V") "s VPres" w
  | tag == "VVZt" = convertLemma pgf morpho (mkCId "V2") "s VPres" w
  | tag == "VVZv" = convertLemma pgf morpho (mkCId "V") "s VPres" w
  | tag == "PPHS2"= convertLemma pgf morpho (mkCId "Pron") "s (NCase Nom)" w
  | tag == "PPHO2"= convertLemma pgf morpho (mkCId "Pron") "s NPAcc" w
  | tag == "RR"   = convertLemma pgf morpho (mkCId "Adv") "s" w
  | tag == "II"   = convertLemma pgf morpho (mkCId "Prep") "s" w
  | tag == "IO"   = convertLemma pgf morpho (mkCId "Prep") "s" w
  | otherwise                                                 = [w]
convert pgf morpho (Phrase tag mods fn idx ts)
  | tag == "O" = concatMap (convert pgf morpho) ts
  | otherwise  = [Phrase tag mods fn idx (concatMap (convert pgf morpho) ts)]

convertLemma pgf morpho cat an0 w@(Word _ tag form _) =
  case [f | (f,an) <- lookupMorpho morpho (map toLower form), hasCat pgf f cat, an == an0] of
    [f] -> [App f []]
    _   -> [w]
  where
    hasCat pgf f cat =
      case functionType pgf f of
        Just ty -> case unType ty of
                     (_,cat1,_) -> cat1 == cat
        Nothing -> False