summaryrefslogtreecommitdiff
path: root/src/GF/UseGrammar/Treebank.hs
blob: 12dc598f2baa82574b08c21aec4b35f2df8cb132 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
----------------------------------------------------------------------
-- |
-- Module      : Treebank
-- Maintainer  : AR
-- Stability   : (stable)
-- Portability : (portable)
--
-- Generate multilingual treebanks. AR 8\/2\/2006
--
-- (c) Aarne Ranta 2006 under GNU GPL
--
-- Purpose: to generate treebanks.
-----------------------------------------------------------------------------

module GF.UseGrammar.Treebank (mkTreebank,testTreebank) where

import GF.Compile.ShellState 
import GF.UseGrammar.Linear (linTree2string)
import GF.UseGrammar.Custom
import GF.UseGrammar.GetTree (string2tree)
import GF.Grammar.TypeCheck (annotate)
import GF.Canon.CMacros (noMark)
import GF.Grammar.Grammar (Trm)
import GF.Grammar.MMacros (exp2tree)
import GF.Grammar.Macros (zIdent)
import GF.Grammar.PrGrammar (prt_)
import GF.Grammar.Values (tree2exp)
import GF.Data.Operations
import GF.Infra.Option
import qualified GF.Grammar.Abstract as A

-- Generate a treebank with a multilingual grammar. AR 8/2/2006
-- (c) Aarne Ranta 2006 under GNU GPL

-- | the main functions 
mkTreebank :: Options -> ShellState -> String -> [A.Tree] -> Res
mkTreebank opts sh com trees = putInXML opts "treebank" comm (concatMap mkItem tris)
 where
   mkItem(t,i)= putInXML opts "item" (cat i)  (mkTree t ++ concatMap (mkLin t) langs)
--   mkItem(t,i)= putInXML opts "item" (cat i)  (mkTree t >>mapM_ (mkLin t) langs)
   mkTree t   = putInXML opts "tree" []       (puts $ showTree t)
   mkLin t lg = putInXML opts "lin" (lang lg) (puts $ linearize sh lg t)

   langs   = [prt_ l | l <- allLanguages sh]
   comm    = "" --- " command=" ++ show com +++ "abstract=" ++ show abstr
   abstr   = "" --- "Abs" ----
   cat i   = " number=" ++ show (show i) --- " cat=" ++ show "S" ----
   lang lg = " lang=" ++ show (prt_ (zIdent lg))
   tris    = zip trees [1..]

testTreebank :: Options -> ShellState -> String -> Res
testTreebank opts sh = putInXML opts "testtreebank" [] . concatMap testOne . getTreebank . lines
 where
  testOne (e,lang,str0) = do
    let tr = annot gr e
    let str = linearize sh lang tr
    if str == str0 then ret else putInXML opts "diff" [] $ concat [
      putInXML opts "tree" [] (puts $ showTree tr),
      putInXML opts "old"  (" lang=" ++ show (prt_ (zIdent lang))) $ puts str0,
      putInXML opts "new"  (" lang=" ++ show (prt_ (zIdent lang))) $ puts str
      ]
  gr = firstStateGrammar sh

-- string vs. IO
type Res = [String] -- IO ()
puts :: String -> Res
puts = return  -- putStrLn
ret = [] -- return ()
--

getTreebank :: [String] -> [(String,String,String)]
getTreebank ll = case ll of
  [] -> []
  l:ls -> 
    let (l1,l2)   = getItem ls
        (tr,lins) = getTree l1
        lglins    = getLins lins
    in [(tr,lang,str) | (lang,str) <- lglins] ++ getTreebank l2
 where
   getItem = span ((/="</item") . take 6)

   getTree (_:ss) = let (t1,t2) = span ((/="</tree") . take 6) ss in (last t1, drop 1 t2)

   getLins (beg:str:end:ss) = (getLang beg, str):getLins ss
   getLins _ = []

   getLang = takeWhile (/='"') . tail . dropWhile (/='"')

annot :: StateGrammar -> String -> A.Tree
annot gr s = errVal (error "illegal tree") $ do
  let t = tree2exp $ string2tree gr s
  annotate (grammar gr) t

putInXML :: Options -> String -> String -> Res -> Res
putInXML opts tag attrs io = 
  (ifXML $ puts $ tagXML $ tag ++ attrs) ++
  io ++
  (ifXML $ puts $ tagXML $ '/':tag)
 where
  ifXML c = if oElem showXML opts then c else []


tagXML :: String -> String
tagXML s = "<" ++ s ++ ">"

--- these handy functions are borrowed from EmbedAPI

linearize mgr lang = 
  untok .
  linTree2string noMark (canModules mgr) (zIdent lang) 
 where
   sgr   = stateGrammarOfLangOpt False mgr (zIdent lang)
   untok = customOrDefault (stateOptions sgr) useUntokenizer customUntokenizer sgr

showTree t = prt_ $ tree2exp t