summaryrefslogtreecommitdiff
path: root/src/GF/Text
diff options
context:
space:
mode:
authoraarne <unknown>2004-08-15 21:02:10 +0000
committeraarne <unknown>2004-08-15 21:02:10 +0000
commitc96162ba8b228e0e2db0202c4ee155103537488a (patch)
tree5407a7e20c138ace5606e63aa891eba46c1bbf62 /src/GF/Text
parentf65d08638bb0cded9b4c74d810696e920fcebb65 (diff)
experiments with unlexer
Diffstat (limited to 'src/GF/Text')
-rw-r--r--src/GF/Text/Text.hs28
1 files changed, 28 insertions, 0 deletions
diff --git a/src/GF/Text/Text.hs b/src/GF/Text/Text.hs
index 2fbf97fd3..de29e9026 100644
--- a/src/GF/Text/Text.hs
+++ b/src/GF/Text/Text.hs
@@ -6,7 +6,25 @@ import Char
-- elementary text postprocessing. AR 21/11/2001
-- This is very primitive indeed. The functions should work on
-- token lists and not on strings. AR 5/12/2002
+-- XML hack 14/8/2004; not in use yet
+-- does not apply untokenizer within XML tags --- heuristic "< "
+-- this function is applied from top level...
+untokWithXML :: (String -> String) -> String -> String
+untokWithXML unt s = case s of
+ '<':cs@(c:_) | isAlpha c -> '<':beg ++ ">" ++ unto (drop 1 rest) where
+ (beg,rest) = span (/='>') cs
+ '<':cs -> '<':unto cs ---
+ [] -> []
+ _ -> unt beg ++ unto rest where
+ (beg,rest) = span (/='<') s
+ where
+ unto = untokWithXML unt
+
+-- ... whereas this one is embedded on a branch
+exceptXML :: (String -> String) -> String -> String
+exceptXML unt s = '<':beg ++ ">" ++ unt (drop 1 rest) where
+ (beg,rest) = span (/='>') s
formatAsTextLit :: String -> String
formatAsTextLit = formatAsText . unwords . map unStringLit . words
@@ -62,3 +80,13 @@ unStringLit s = case s of
_ -> s
where
strlim = (=='\'')
+
+concatRemSpace :: String -> String
+concatRemSpace = concat . words
+{-
+concatRemSpace s = case s of
+ '<':cs -> exceptXML concatRemSpace cs
+ c : cs | isSpace c -> concatRemSpace cs
+ c :cs -> c : concatRemSpace cs
+ _ -> s
+-} \ No newline at end of file