From b1402e8bd6a68a891b00a214d6cf184d66defe19 Mon Sep 17 00:00:00 2001 From: aarne Date: Mon, 22 Sep 2003 13:16:55 +0000 Subject: Founding the newly structured GF2.0 cvs archive. --- src/GF/Canon/Unlex.hs | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 src/GF/Canon/Unlex.hs (limited to 'src/GF/Canon/Unlex.hs') diff --git a/src/GF/Canon/Unlex.hs b/src/GF/Canon/Unlex.hs new file mode 100644 index 000000000..f665f4c85 --- /dev/null +++ b/src/GF/Canon/Unlex.hs @@ -0,0 +1,37 @@ +module Unlex where + +import Operations +import Str + +import Char +import List (isPrefixOf) + +-- elementary text postprocessing. AR 21/11/2001 + +formatAsText :: String -> String +formatAsText = unwords . format . cap . words where + format ws = case ws of + w : c : ww | major c -> (w ++ c) : format (cap ww) + w : c : ww | minor c -> (w ++ c) : format ww + c : ww | para c -> "\n\n" : format ww + w : ww -> w : format ww + [] -> [] + cap (p:(c:cs):ww) | para p = p : (toUpper c : cs) : ww + cap ((c:cs):ww) = (toUpper c : cs) : ww + cap [] = [] + major = flip elem (map (:[]) ".!?") + minor = flip elem (map (:[]) ",:;") + para = (=="

") + +unlex :: [Str] -> String +unlex = formatAsText . performBinds . concat . map sstr . take 1 ---- + +-- modified from GF/src/Text by adding hyphen +performBinds :: String -> String +performBinds = unwords . format . words where + format ws = case ws of + w : "-" : u : ws -> format ((w ++ "-" ++ u) : ws) + w : "&+" : u : ws -> format ((w ++ u) : ws) + w : ws -> w : format ws + [] -> [] + -- cgit v1.2.3