diff options
| author | aarne <unknown> | 2004-08-15 21:02:10 +0000 |
|---|---|---|
| committer | aarne <unknown> | 2004-08-15 21:02:10 +0000 |
| commit | c96162ba8b228e0e2db0202c4ee155103537488a (patch) | |
| tree | 5407a7e20c138ace5606e63aa891eba46c1bbf62 /src/GF/Text/Text.hs | |
| parent | f65d08638bb0cded9b4c74d810696e920fcebb65 (diff) | |
experiments with unlexer
Diffstat (limited to 'src/GF/Text/Text.hs')
| -rw-r--r-- | src/GF/Text/Text.hs | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/src/GF/Text/Text.hs b/src/GF/Text/Text.hs index 2fbf97fd3..de29e9026 100644 --- a/src/GF/Text/Text.hs +++ b/src/GF/Text/Text.hs @@ -6,7 +6,25 @@ import Char -- elementary text postprocessing. AR 21/11/2001 -- This is very primitive indeed. The functions should work on -- token lists and not on strings. AR 5/12/2002 +-- XML hack 14/8/2004; not in use yet +-- does not apply untokenizer within XML tags --- heuristic "< " +-- this function is applied from top level... +untokWithXML :: (String -> String) -> String -> String +untokWithXML unt s = case s of + '<':cs@(c:_) | isAlpha c -> '<':beg ++ ">" ++ unto (drop 1 rest) where + (beg,rest) = span (/='>') cs + '<':cs -> '<':unto cs --- + [] -> [] + _ -> unt beg ++ unto rest where + (beg,rest) = span (/='<') s + where + unto = untokWithXML unt + +-- ... whereas this one is embedded on a branch +exceptXML :: (String -> String) -> String -> String +exceptXML unt s = '<':beg ++ ">" ++ unt (drop 1 rest) where + (beg,rest) = span (/='>') s formatAsTextLit :: String -> String formatAsTextLit = formatAsText . unwords . map unStringLit . words @@ -62,3 +80,13 @@ unStringLit s = case s of _ -> s where strlim = (=='\'') + +concatRemSpace :: String -> String +concatRemSpace = concat . words +{- +concatRemSpace s = case s of + '<':cs -> exceptXML concatRemSpace cs + c : cs | isSpace c -> concatRemSpace cs + c :cs -> c : concatRemSpace cs + _ -> s +-}
\ No newline at end of file |
