summaryrefslogtreecommitdiff
path: root/src/GF/Text/Text.hs
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-06-25 16:43:48 +0000
committeraarne <aarne@cs.chalmers.se>2008-06-25 16:43:48 +0000
commitb96b36f43de3e2f8b58d5f539daa6f6d47f25870 (patch)
tree0992334be13cec6538a1dea22fbbf26ad6bdf224 /src/GF/Text/Text.hs
parentfe367412e0aeb4ad5c02de68e6eca382e0f96984 (diff)
removed src for 2.9
Diffstat (limited to 'src/GF/Text/Text.hs')
-rw-r--r--src/GF/Text/Text.hs149
1 files changed, 0 insertions, 149 deletions
diff --git a/src/GF/Text/Text.hs b/src/GF/Text/Text.hs
deleted file mode 100644
index b55355c20..000000000
--- a/src/GF/Text/Text.hs
+++ /dev/null
@@ -1,149 +0,0 @@
-----------------------------------------------------------------------
--- |
--- Module : Text
--- Maintainer : AR
--- Stability : (stable)
--- Portability : (portable)
---
--- > CVS $Date: 2005/06/23 14:32:44 $
--- > CVS $Author: aarne $
--- > CVS $Revision: 1.10 $
---
--- elementary text postprocessing. AR 21\/11\/2001.
---
--- This is very primitive indeed. The functions should work on
--- token lists and not on strings. AR 5\/12\/2002
---
--- XML hack 14\/8\/2004; not in use yet
------------------------------------------------------------------------------
-
-module GF.Text.Text (untokWithXML,
- exceptXML,
- formatAsTextLit,
- formatAsCodeLit,
- formatAsText,
- formatAsHTML,
- formatAsLatex,
- formatAsCode,
- performBinds,
- performBindsFinnish,
- unStringLit,
- concatRemSpace
- ) where
-
-import GF.Data.Operations
-import Data.Char
-
--- | does not apply untokenizer within XML tags --- heuristic "< "
--- this function is applied from top level...
-untokWithXML :: (String -> String) -> String -> String
-untokWithXML unt s = case s of
- '<':cs@(c:_) | isAlpha c -> '<':beg ++ ">" ++ unto (drop 1 rest) where
- (beg,rest) = span (/='>') cs
- '<':cs -> '<':unto cs ---
- [] -> []
- _ -> unt beg ++ unto rest where
- (beg,rest) = span (/='<') s
- where
- unto = untokWithXML unt
-
--- | ... whereas this one is embedded on a branch
-exceptXML :: (String -> String) -> String -> String
-exceptXML unt s = '<':beg ++ ">" ++ unt (drop 1 rest) where
- (beg,rest) = span (/='>') s
-
-formatAsTextLit :: String -> String
-formatAsTextLit = formatAsText . unwords . map unStringLit . words
---- hope that there will be deforestation...
-
-formatAsCodeLit :: String -> String
-formatAsCodeLit = formatAsCode . unwords . map unStringLit . words
-
-formatAsText,formatAsHTML,formatAsLatex :: String -> String
-formatAsText = formatAsTextGen (const False) (=="&-")
-formatAsHTML = formatAsTextGen (\s -> take 1 s == "<" || last s == '>') (const False)
-formatAsLatex = formatAsTextGen ((=="\\") . take 1) (const False)
-
-formatAsTextGen :: (String -> Bool) -> (String -> Bool) -> String -> String
-formatAsTextGen tag para = unwords . format . cap . words where
- format ws = case ws of
- w : ww | capit w -> format $ (cap ww)
- w : c : ww | major c -> format $ (w ++ c) :(cap ww)
- w : c : ww | minor c -> format $ (w ++ c) : ww
- p : c : ww | openp p -> format $ (p ++ c) :ww
- p : c : ww | spanish p -> format $ (p ++ concat (cap [c])) :ww
- c : ww | para c -> "\n\n" : format ww
- w : ww -> w : format ww
- [] -> []
- cap (p:ww) | tag p = p : cap ww
- cap ((c:cs):ww) = (toUpper c : cs) : ww
- cap [] = []
- capit = (=="&|")
- major = flip elem (map singleton ".!?")
- minor = flip elem (map singleton ",:;)")
- openp = all (flip elem "(")
- spanish = all (flip elem "\161\191")
-
-formatAsCode :: String -> String
-formatAsCode = rend 0 . words where
- -- render from BNF Converter
- rend i ss = case ss of
- "[" :ts -> cons "[" $ rend i ts
- "(" :ts -> cons "(" $ rend i ts
- "{" :ts -> cons "{" $ new (i+1) $ rend (i+1) ts
- "}" : ";":ts -> new (i-1) $ space "}" $ cons ";" $ new (i-1) $ rend (i-1) ts
- "}" :ts -> new (i-1) $ cons "}" $ new (i-1) $ rend (i-1) ts
- ";" :ts -> cons ";" $ new i $ rend i ts
- t : "," :ts -> cons t $ space "," $ rend i ts
- t : ")" :ts -> cons t $ cons ")" $ rend i ts
- t : "]" :ts -> cons t $ cons "]" $ rend i ts
- t :ts -> space t $ rend i ts
- _ -> ""
- cons s t = s ++ t
- new i s = '\n' : replicate (2*i) ' ' ++ dropWhile isSpace s
- space t s = if null s then t else t ++ " " ++ s
-
-performBinds :: String -> String
-performBinds = performBindsOpt (\x y -> y)
-
-
--- The function defines an effect of the former on the latter part,
--- such as in vowel harmony. It is triggered by the binder token "&*"
-
-performBindsOpt :: (String -> String -> String) -> String -> String
-performBindsOpt harm = unwords . format . words where
- format ws = case ws of
- w : "&+" : u : ws -> format ((w ++ u) : ws)
- w : "&*" : u : ws -> format ((w ++ harm w u) : ws)
- w : ws -> w : format ws
- [] -> []
-
--- unlexer for Finnish particles
--- Notice: left associativity crucial for "tie &* ko &* han" --> "tieköhän"
-
-performBindsFinnish :: String -> String
-performBindsFinnish = performBindsOpt vowelHarmony where
- vowelHarmony w p = if any (flip elem "aouAOU") w then p else map toFront p
- toFront c = case c of
- 'A' -> '\196'
- 'O' -> '\214'
- 'a' -> '\228'
- 'o' -> '\246'
- _ -> c
-
-unStringLit :: String -> String
-unStringLit s = case s of
- c : cs | strlim c && strlim (last cs) -> init cs
- _ -> s
- where
- strlim = (=='\'')
-
-concatRemSpace :: String -> String
-concatRemSpace = concat . words
-{-
-concatRemSpace s = case s of
- '<':cs -> exceptXML concatRemSpace cs
- c : cs | isSpace c -> concatRemSpace cs
- c :cs -> c : concatRemSpace cs
- _ -> s
--}