summaryrefslogtreecommitdiff
path: root/src/GF/Canon/Unlex.hs
diff options
context:
space:
mode:
authoraarne <unknown>2003-09-22 13:16:55 +0000
committeraarne <unknown>2003-09-22 13:16:55 +0000
commitb1402e8bd6a68a891b00a214d6cf184d66defe19 (patch)
tree90372ac4e53dce91cf949dbf8e93be06f1d9e8bd /src/GF/Canon/Unlex.hs
Founding the newly structured GF2.0 cvs archive.
Diffstat (limited to 'src/GF/Canon/Unlex.hs')
-rw-r--r--src/GF/Canon/Unlex.hs37
1 files changed, 37 insertions, 0 deletions
diff --git a/src/GF/Canon/Unlex.hs b/src/GF/Canon/Unlex.hs
new file mode 100644
index 000000000..f665f4c85
--- /dev/null
+++ b/src/GF/Canon/Unlex.hs
@@ -0,0 +1,37 @@
+module Unlex where
+
+import Operations
+import Str
+
+import Char
+import List (isPrefixOf)
+
+-- elementary text postprocessing. AR 21/11/2001
+
+formatAsText :: String -> String
+formatAsText = unwords . format . cap . words where
+ format ws = case ws of
+ w : c : ww | major c -> (w ++ c) : format (cap ww)
+ w : c : ww | minor c -> (w ++ c) : format ww
+ c : ww | para c -> "\n\n" : format ww
+ w : ww -> w : format ww
+ [] -> []
+ cap (p:(c:cs):ww) | para p = p : (toUpper c : cs) : ww
+ cap ((c:cs):ww) = (toUpper c : cs) : ww
+ cap [] = []
+ major = flip elem (map (:[]) ".!?")
+ minor = flip elem (map (:[]) ",:;")
+ para = (=="<p>")
+
+unlex :: [Str] -> String
+unlex = formatAsText . performBinds . concat . map sstr . take 1 ----
+
+-- modified from GF/src/Text by adding hyphen
+performBinds :: String -> String
+performBinds = unwords . format . words where
+ format ws = case ws of
+ w : "-" : u : ws -> format ((w ++ "-" ++ u) : ws)
+ w : "&+" : u : ws -> format ((w ++ u) : ws)
+ w : ws -> w : format ws
+ [] -> []
+