diff options
| author | aarne <unknown> | 2003-09-22 13:16:55 +0000 |
|---|---|---|
| committer | aarne <unknown> | 2003-09-22 13:16:55 +0000 |
| commit | b1402e8bd6a68a891b00a214d6cf184d66defe19 (patch) | |
| tree | 90372ac4e53dce91cf949dbf8e93be06f1d9e8bd /src/GF/Canon/Unlex.hs | |
Founding the newly structured GF2.0 cvs archive.
Diffstat (limited to 'src/GF/Canon/Unlex.hs')
| -rw-r--r-- | src/GF/Canon/Unlex.hs | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/src/GF/Canon/Unlex.hs b/src/GF/Canon/Unlex.hs new file mode 100644 index 000000000..f665f4c85 --- /dev/null +++ b/src/GF/Canon/Unlex.hs @@ -0,0 +1,37 @@ +module Unlex where + +import Operations +import Str + +import Char +import List (isPrefixOf) + +-- elementary text postprocessing. AR 21/11/2001 + +formatAsText :: String -> String +formatAsText = unwords . format . cap . words where + format ws = case ws of + w : c : ww | major c -> (w ++ c) : format (cap ww) + w : c : ww | minor c -> (w ++ c) : format ww + c : ww | para c -> "\n\n" : format ww + w : ww -> w : format ww + [] -> [] + cap (p:(c:cs):ww) | para p = p : (toUpper c : cs) : ww + cap ((c:cs):ww) = (toUpper c : cs) : ww + cap [] = [] + major = flip elem (map (:[]) ".!?") + minor = flip elem (map (:[]) ",:;") + para = (=="<p>") + +unlex :: [Str] -> String +unlex = formatAsText . performBinds . concat . map sstr . take 1 ---- + +-- modified from GF/src/Text by adding hyphen +performBinds :: String -> String +performBinds = unwords . format . words where + format ws = case ws of + w : "-" : u : ws -> format ((w ++ "-" ++ u) : ws) + w : "&+" : u : ws -> format ((w ++ u) : ws) + w : ws -> w : format ws + [] -> [] + |
