diff options
| author | aarne <aarne@cs.chalmers.se> | 2006-09-21 20:55:49 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2006-09-21 20:55:49 +0000 |
| commit | fb89adc1cf2196df0af1e735d8070f87d810cdab (patch) | |
| tree | 21f0a2c3c716fe9c16a1761f2f6705d1d023dc4b /src | |
| parent | 2d8cff01d4db2c127b030bc6b93982c78aaeb2a8 (diff) | |
finished compact tb format (for a C++ interpreter now)
Diffstat (limited to 'src')
| -rw-r--r-- | src/GF/UseGrammar/Treebank.hs | 11 |
1 files changed, 7 insertions, 4 deletions
diff --git a/src/GF/UseGrammar/Treebank.hs b/src/GF/UseGrammar/Treebank.hs index 0072d88a5..952b71877 100644 --- a/src/GF/UseGrammar/Treebank.hs +++ b/src/GF/UseGrammar/Treebank.hs @@ -212,13 +212,16 @@ mkCompactTreebank opts sh = printCompactTreebank . mkJustMultiTreebank opts sh printCompactTreebank :: (MultiTreebank,[String]) -> [String] printCompactTreebank (tb,lgs) = (stat:langs:unwords ws : "\n" : linss) where ws = L.sort $ L.nub $ concat $ map (concatMap (words . snd) . snd) tb - linss = map lins tb - lins (_,ls) = unlines [unwords (map encode (words ws)) | (_,ws) <- ls] + + linss = map (unwords . pad) linss0 + linss0 = map (map (show . encode) . words) allExs + allExs = concat [[snd (ls !! i) | (_,ls) <- tb] | i <- [0..length lgs - 1]] encode w = maybe undefined id $ M.lookup w wmap - wmap = M.fromAscList $ zip ws (map show [0..]) + wmap = M.fromAscList $ zip ws [1..] stat = unwords $ map show [length ws, length lgs, length tb, smax] langs = unwords lgs - smax = maximum [length (words l) | l <- linss] + smax = maximum $ map length linss0 + pad ws = ws ++ replicate (smax - length ws) "0" -- [(String,[(String,String)])] -- tree,lang,lin mkJustMultiTreebank :: Options -> ShellState -> [A.Tree] -> (MultiTreebank,[String]) |
