summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2006-09-21 15:56:39 +0000
committeraarne <aarne@cs.chalmers.se>2006-09-21 15:56:39 +0000
commit2d8cff01d4db2c127b030bc6b93982c78aaeb2a8 (patch)
treeeb28fe63ed3bb777d67279d6cfe0015c01eca082 /src
parente8530abb74c03fa9071e26c1a0e527986a9e7d04 (diff)
adjusting compact tb format (not yet ready)
Diffstat (limited to 'src')
-rw-r--r--src/GF/UseGrammar/Treebank.hs12
1 files changed, 8 insertions, 4 deletions
diff --git a/src/GF/UseGrammar/Treebank.hs b/src/GF/UseGrammar/Treebank.hs
index ad0f737c8..0072d88a5 100644
--- a/src/GF/UseGrammar/Treebank.hs
+++ b/src/GF/UseGrammar/Treebank.hs
@@ -209,17 +209,21 @@ tagXML s = "<" ++ s ++ ">"
mkCompactTreebank :: Options -> ShellState -> [A.Tree] -> [String]
mkCompactTreebank opts sh = printCompactTreebank . mkJustMultiTreebank opts sh
-printCompactTreebank :: MultiTreebank -> [String]
-printCompactTreebank tb = (unwords ws : "\n" : map lins tb) where
+printCompactTreebank :: (MultiTreebank,[String]) -> [String]
+printCompactTreebank (tb,lgs) = (stat:langs:unwords ws : "\n" : linss) where
ws = L.sort $ L.nub $ concat $ map (concatMap (words . snd) . snd) tb
+ linss = map lins tb
lins (_,ls) = unlines [unwords (map encode (words ws)) | (_,ws) <- ls]
encode w = maybe undefined id $ M.lookup w wmap
wmap = M.fromAscList $ zip ws (map show [0..])
+ stat = unwords $ map show [length ws, length lgs, length tb, smax]
+ langs = unwords lgs
+ smax = maximum [length (words l) | l <- linss]
-- [(String,[(String,String)])] -- tree,lang,lin
-mkJustMultiTreebank :: Options -> ShellState -> [A.Tree] -> MultiTreebank
+mkJustMultiTreebank :: Options -> ShellState -> [A.Tree] -> (MultiTreebank,[String])
mkJustMultiTreebank opts sh ts =
- [(prt_ t, [(la, lin la t) | la <- langs]) | t <- ts] where
+ ([(prt_ t, [(la, lin la t) | la <- langs]) | t <- ts],langs) where
langs = map prt_ $ allLanguages sh
lin = linearize opts sh