summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2006-03-03 20:51:03 +0000
committeraarne <aarne@cs.chalmers.se>2006-03-03 20:51:03 +0000
commit3ff765620c9df08212275b5a3f207fd939a60922 (patch)
tree0c33b782770bd19620c20d21ea4211a3af0206f0 /src
parente6f115a3006a64930f4bfd889dd215074c862cbb (diff)
distinguished uni and multi treebanks
Diffstat (limited to 'src')
-rw-r--r--src/GF/Compile/ShellState.hs7
-rw-r--r--src/GF/Data/Operations.hs2
-rw-r--r--src/GF/Shell.hs26
-rw-r--r--src/GF/UseGrammar/Treebank.hs91
4 files changed, 96 insertions, 30 deletions
diff --git a/src/GF/Compile/ShellState.hs b/src/GF/Compile/ShellState.hs
index 269b9adb1..0718814c5 100644
--- a/src/GF/Compile/ShellState.hs
+++ b/src/GF/Compile/ShellState.hs
@@ -75,7 +75,7 @@ data ShellState = ShSt {
transfers :: [(Ident,T.Env)] -- ^ transfer modules
}
-type Treebank = Map.Map String [(String,String)] -- lang, tree
+type Treebank = Map.Map String [String] -- string, trees
actualConcretes :: ShellState -> [((Ident,Ident),Bool)]
actualConcretes sh = nub [((c,c),b) |
@@ -480,9 +480,8 @@ addTransfer :: (Ident,T.Env) -> ShellState -> ShellState
addTransfer it@(i,_) sh =
sh {transfers = it : filter ((/= i) . fst) (transfers sh)}
-addTreebank :: (Ident,Treebank) -> ShellState -> ShellState
-addTreebank it@(i,_) sh =
- sh {treebanks = it : filter ((/= i) . fst) (treebanks sh)}
+addTreebanks :: [(Ident,Treebank)] -> ShellState -> ShellState
+addTreebanks its sh = sh {treebanks = its ++ treebanks sh}
findTreebank :: ShellState -> Ident -> Err Treebank
findTreebank sh i = maybeErr "no treebank found" $ lookup i $ treebanks sh
diff --git a/src/GF/Data/Operations.hs b/src/GF/Data/Operations.hs
index f5434486f..ac1ec85bb 100644
--- a/src/GF/Data/Operations.hs
+++ b/src/GF/Data/Operations.hs
@@ -585,7 +585,7 @@ removeAssoc :: Eq a => a -> [(a,b)] -> [(a,b)]
removeAssoc a = filter ((/=a) . fst)
-- | chop into separator-separated parts
-chunks :: String -> [String] -> [[String]]
+chunks :: Eq a => a -> [a] -> [[a]]
chunks sep ws = case span (/= sep) ws of
(a,_:b) -> a : bs where bs = chunks sep b
(a, []) -> if null a then [] else [a]
diff --git a/src/GF/Shell.hs b/src/GF/Shell.hs
index d502b74ce..cde5ff743 100644
--- a/src/GF/Shell.hs
+++ b/src/GF/Shell.hs
@@ -182,9 +182,8 @@ execC :: CommandOpt -> ShellIO
execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case comm of
CImport file | oElem (iOpt "treebank") opts -> do
- ss <- readFileIf file >>= return . lines
- let tb = pre2treebank $ getTreebank ss
- changeState (addTreebank (I.identC (takeWhile (/='.') file), tb)) sa
+ tbs <- readUniTreebanks file
+ changeState (addTreebanks tbs) sa
CImport file | oElem fromExamples opts -> do
es <- liftM nub $ getGFEFiles opts file
system $ "gf -examples" +++ unlines es
@@ -296,7 +295,7 @@ execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case com
CTreeBank | oElem doCompute opts -> do -- -c
let bank = prCommandArg a
- returnArg (AString $ unlines $ testTreebank opts st bank) sa
+ returnArg (AString $ unlines $ testMultiTreebank opts st bank) sa
CTreeBank | oElem getTrees opts -> do -- -trees
let bank = prCommandArg a
tes = map (string2treeErr gro) $ treesTreebank opts bank
@@ -305,21 +304,28 @@ execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case com
CTreeBank -> do
let ts = strees $ s2t $ snd sa
comm = "command" ----
- returnArg (AString $ unlines $ mkTreebank opts st comm ts) sa
+ returnArg (AString $ unlines $ mkMultiTreebank opts st comm ts) sa
CLookupTreebank -> do
let tbs = treebanks st
+ let s = prCommandArg a
if null tbs
then returnArg (AError "no treebank") sa
else do
let tbi = maybe (fst $ head tbs) I.identC (getOptVal opts (aOpt "treebank"))
case lookup tbi tbs of
Nothing -> returnArg (AError ("no treebank" +++ prt tbi)) sa
- Just tb -> do
- let s = prCommandArg a
- let tes = map (string2treeErr gro . snd) $ lookupTreebank tb s
- terms = [t | Ok t <- tes]
- returnArg (ATrms terms) sa
+ Just tb -> case () of
+ _ | oElem (iOpt "strings") opts -> do
+ returnArg (AString $ unlines $ map fst $ assocsTreebank tb) sa
+ _ | oElem (iOpt "raw") opts -> do
+ returnArg (AString $ unlines $ lookupTreebank tb s) sa
+ _ | oElem (iOpt "assocs") opts -> do
+ returnArg (AString $ unlines $ map printAssoc $ assocsTreebank tb) sa
+ _ -> do
+ let tes = map (string2treeErr gro) $ lookupTreebank tb s
+ terms = [t | Ok t <- tes]
+ returnArg (ATrms terms) sa
CShowTreeGraph | oElem emitCode opts -> do -- -o
returnArg (AString $ visualizeTrees opts $ strees $ s2t a) sa
diff --git a/src/GF/UseGrammar/Treebank.hs b/src/GF/UseGrammar/Treebank.hs
index befbae0c0..f1dd5b75b 100644
--- a/src/GF/UseGrammar/Treebank.hs
+++ b/src/GF/UseGrammar/Treebank.hs
@@ -13,12 +13,19 @@
-----------------------------------------------------------------------------
module GF.UseGrammar.Treebank (
- mkTreebank,
- testTreebank,
+ mkMultiTreebank,
+ mkUniTreebank,
+ multi2uniTreebank,
+ uni2multiTreebank,
+ testMultiTreebank,
treesTreebank,
getTreebank,
+ getUniTreebank,
+ readUniTreebanks,
+ readMultiTreebank,
lookupTreebank,
- pre2treebank
+ assocsTreebank,
+ printAssoc
) where
import GF.Compile.ShellState
@@ -34,15 +41,54 @@ import GF.Grammar.PrGrammar (prt_)
import GF.Grammar.Values (tree2exp)
import GF.Data.Operations
import GF.Infra.Option
+import GF.Infra.Ident (Ident)
+import GF.Infra.UseIO
import qualified GF.Grammar.Abstract as A
import qualified Data.Map as M
-- Generate a treebank with a multilingual grammar. AR 8/2/2006
-- (c) Aarne Ranta 2006 under GNU GPL
+-- keys are trees; format: XML file
+type MultiTreebank = [(String,[(String,String)])] -- tree,lang,lin
+
+-- keys are strings; format: string TAB tree TAB ... TAB tree
+type UniTreebank = Treebank -- M.Map String [String] -- string,tree
+
+-- both formats can be read from both kinds of files
+readUniTreebanks :: FilePath -> IO [(Ident,UniTreebank)]
+readUniTreebanks file = do
+ s <- readFileIf file
+ return $ if isMultiTreebank s
+ then multi2uniTreebank $ getTreebank $ lines s
+ else
+ let tb = getUniTreebank $ lines s
+ in [(zIdent (unsuffixFile file),tb)]
+
+readMultiTreebank :: FilePath -> IO MultiTreebank
+readMultiTreebank file = do
+ s <- readFileIf file
+ return $ if isMultiTreebank s
+ then getTreebank $ lines s
+ else uni2multiTreebank (zIdent (unsuffixFile file)) $ getUniTreebank $ lines s
+
+isMultiTreebank :: String -> Bool
+isMultiTreebank s = take 10 s == "<treebank>"
+
+multi2uniTreebank :: MultiTreebank -> [(Ident,UniTreebank)]
+multi2uniTreebank mt@((_,lls):_) = [(zIdent la, mkTb la) | (la,_) <- lls] where
+ mkTb la = M.fromListWith (++) [(s,[t]) | (t,lls) <- mt, (l,s) <- lls, l==la]
+multi2uniTreebank [] = []
+
+uni2multiTreebank :: Ident -> UniTreebank -> MultiTreebank
+uni2multiTreebank la tb =
+ [(t,[(prt_ la, s)]) | (s,ts) <- assocsTreebank tb, t <- ts]
+
-- | the main functions
-mkTreebank :: Options -> ShellState -> String -> [A.Tree] -> Res
-mkTreebank opts sh com trees = putInXML opts "treebank" comm (concatMap mkItem tris)
+
+-- builds a treebank where trees are the keys, and writes a file (opt. XML)
+mkMultiTreebank :: Options -> ShellState -> String -> [A.Tree] -> Res
+mkMultiTreebank opts sh com trees = putInXML opts "treebank" comm (concatMap mkItem tris)
where
mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t ++ concatMap (mkLin t) langs)
-- mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t >>mapM_ (mkLin t) langs)
@@ -56,10 +102,19 @@ mkTreebank opts sh com trees = putInXML opts "treebank" comm (concatMap mkItem t
lang lg = " lang=" ++ show (prt_ (zIdent lg))
tris = zip trees [1..]
-testTreebank :: Options -> ShellState -> String -> Res
-testTreebank opts sh = putInXML opts "testtreebank" [] .
- concatMap testOne .
- getTreebanks . lines
+-- builds a unilingual treebank where strings are the keys into an internal treebank
+
+mkUniTreebank :: Options -> ShellState -> Language -> [A.Tree] -> Treebank
+mkUniTreebank opts sh lg trees = M.fromListWith (++) [(lin t, [prt_ t]) | t <- trees]
+ where
+ lang = prt_ lg
+ lin t = linearize sh lang t
+
+-- reads a treebank and linearizes its trees again, printing all differences
+testMultiTreebank :: Options -> ShellState -> String -> Res
+testMultiTreebank opts sh = putInXML opts "testtreebank" [] .
+ concatMap testOne .
+ getTreebanks . lines
where
testOne (e,lang,str0) = do
let tr = annot gr e
@@ -71,6 +126,7 @@ testTreebank opts sh = putInXML opts "testtreebank" [] .
]
gr = firstStateGrammar sh
+-- writes all the trees of the treebank
treesTreebank :: Options -> String -> [String]
treesTreebank _ = terms . getTreebank . lines where
terms ts = [t | (t,_) <- ts]
@@ -82,13 +138,17 @@ puts = return -- putStrLn
ret = [] -- return ()
--
-type PreTreebank = [(String,[(String,String)])]
+-- here strings are keys
+assocsTreebank :: UniTreebank -> [(String,[String])]
+assocsTreebank = M.assocs
+
+printAssoc (s, ts) = s ++ concat ["\t" ++ t | t <- ts]
getTreebanks :: [String] -> [(String,String,String)]
getTreebanks = concatMap grps . getTreebank where
grps (t,lls) = [(t,x,y) | (x,y) <- lls]
-getTreebank :: [String] -> PreTreebank
+getTreebank :: [String] -> MultiTreebank
getTreebank ll = case ll of
l:ls@(_:_:_) ->
let (l1,l2) = getItem ls
@@ -107,11 +167,12 @@ getTreebank ll = case ll of
getLang = takeWhile (/='"') . tail . dropWhile (/='"')
-lookupTreebank :: Treebank -> String -> [(String,String)]
-lookupTreebank tb s = maybe [] id $ M.lookup s tb
+getUniTreebank :: [String] -> UniTreebank
+getUniTreebank ls = M.fromListWith (++) [(s, ts) | s:ts <- map chop ls] where
+ chop = chunks '\t'
-pre2treebank :: PreTreebank -> Treebank
-pre2treebank tb = M.fromListWith (++) [(s,[(l,t)]) | (t,ls) <- tb, (l,s) <- ls]
+lookupTreebank :: Treebank -> String -> [String]
+lookupTreebank tb s = maybe [] id $ M.lookup s tb
annot :: StateGrammar -> String -> A.Tree
annot gr s = errVal (error "illegal tree") $ do