summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2006-09-21 20:55:49 +0000
committeraarne <aarne@cs.chalmers.se>2006-09-21 20:55:49 +0000
commitfb89adc1cf2196df0af1e735d8070f87d810cdab (patch)
tree21f0a2c3c716fe9c16a1761f2f6705d1d023dc4b
parent2d8cff01d4db2c127b030bc6b93982c78aaeb2a8 (diff)
finished compact tb format (for a C++ interpreter now)
-rw-r--r--src/GF/UseGrammar/Treebank.hs11
1 files changed, 7 insertions, 4 deletions
diff --git a/src/GF/UseGrammar/Treebank.hs b/src/GF/UseGrammar/Treebank.hs
index 0072d88a5..952b71877 100644
--- a/src/GF/UseGrammar/Treebank.hs
+++ b/src/GF/UseGrammar/Treebank.hs
@@ -212,13 +212,16 @@ mkCompactTreebank opts sh = printCompactTreebank . mkJustMultiTreebank opts sh
printCompactTreebank :: (MultiTreebank,[String]) -> [String]
printCompactTreebank (tb,lgs) = (stat:langs:unwords ws : "\n" : linss) where
ws = L.sort $ L.nub $ concat $ map (concatMap (words . snd) . snd) tb
- linss = map lins tb
- lins (_,ls) = unlines [unwords (map encode (words ws)) | (_,ws) <- ls]
+
+ linss = map (unwords . pad) linss0
+ linss0 = map (map (show . encode) . words) allExs
+ allExs = concat [[snd (ls !! i) | (_,ls) <- tb] | i <- [0..length lgs - 1]]
encode w = maybe undefined id $ M.lookup w wmap
- wmap = M.fromAscList $ zip ws (map show [0..])
+ wmap = M.fromAscList $ zip ws [1..]
stat = unwords $ map show [length ws, length lgs, length tb, smax]
langs = unwords lgs
- smax = maximum [length (words l) | l <- linss]
+ smax = maximum $ map length linss0
+ pad ws = ws ++ replicate (smax - length ws) "0"
-- [(String,[(String,String)])] -- tree,lang,lin
mkJustMultiTreebank :: Options -> ShellState -> [A.Tree] -> (MultiTreebank,[String])