summaryrefslogtreecommitdiff
path: root/treebanks/susanne/convert.hs
diff options
context:
space:
mode:
Diffstat (limited to 'treebanks/susanne/convert.hs')
-rw-r--r--treebanks/susanne/convert.hs14
1 files changed, 14 insertions, 0 deletions
diff --git a/treebanks/susanne/convert.hs b/treebanks/susanne/convert.hs
new file mode 100644
index 000000000..91fdc2cf4
--- /dev/null
+++ b/treebanks/susanne/convert.hs
@@ -0,0 +1,14 @@
+import System.Directory
+import System.FilePath
+import Data.List
+
+import SusanneFormat
+
+main = do
+ fs <- getDirectoryContents "data"
+ txts <- (mapM (\f -> readFile ("data" </> f)) . filter ((/= ".") . take 1)) (sort fs)
+ let ts = filter (not . isBreak) (readTreebank (lines (concat txts)))
+ writeFile "text" (unlines (map show ts))
+
+isBreak (Phrase "Oh" [Word _ "YB" "<minbrk>" _]) = True
+isBreak _ = False