From c96162ba8b228e0e2db0202c4ee155103537488a Mon Sep 17 00:00:00 2001
From: aarne <unknown>
Date: Sun, 15 Aug 2004 21:02:10 +0000
Subject: experiments with unlexer

---
 src/GF/Text/Text.hs | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'src/GF/Text')
diff --git a/src/GF/Text/Text.hs b/src/GF/Text/Text.hs
index 2fbf97fd3..de29e9026 100644
--- a/src/GF/Text/Text.hs
+++ b/src/GF/Text/Text.hs
@@ -6,7 +6,25 @@ import Char
 -- elementary text postprocessing. AR 21/11/2001
 -- This is very primitive indeed. The functions should work on
 -- token lists and not on strings. AR 5/12/2002
+-- XML hack 14/8/2004; not in use yet
 
+-- does not apply untokenizer within XML tags --- heuristic "< "
+-- this function is applied from top level...
+untokWithXML :: (String -> String) -> String -> String
+untokWithXML unt s = case s of
+  '<':cs@(c:_) | isAlpha c -> '<':beg ++ ">" ++ unto (drop 1 rest) where 
+                  (beg,rest) = span (/='>') cs
+  '<':cs -> '<':unto cs ---
+  [] -> []
+  _ -> unt beg ++ unto rest where
+               (beg,rest) = span (/='<') s
+ where
+   unto = untokWithXML unt
+
+-- ... whereas this one is embedded on a branch
+exceptXML :: (String -> String) -> String -> String
+exceptXML unt s = '<':beg ++ ">" ++ unt (drop 1 rest) where 
+  (beg,rest) = span (/='>') s
 
 formatAsTextLit :: String -> String
 formatAsTextLit = formatAsText . unwords . map unStringLit . words 
@@ -62,3 +80,13 @@ unStringLit s = case s of
   _ -> s
  where
    strlim = (=='\'')
+
+concatRemSpace :: String -> String
+concatRemSpace = concat . words
+{-
+concatRemSpace s = case s of
+  '<':cs -> exceptXML concatRemSpace cs
+  c : cs | isSpace c -> concatRemSpace cs
+  c :cs -> c : concatRemSpace cs
+  _ -> s
+-}
\ No newline at end of file
-- 
cgit v1.2.3