summaryrefslogtreecommitdiff
path: root/src/GF/Text/Text.hs
blob: 08e897a9b6069a8f59c8d8054def3e2d15c9b4d0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
module Text where

import Operations
import Char

-- elementary text postprocessing. AR 21/11/2001
-- This is very primitive indeed. The functions should work on
-- token lists and not on strings. AR 5/12/2002


formatAsTextLit :: String -> String
formatAsTextLit = formatAsText . unwords . map unStringLit . words 
--- hope that there will be deforestation...

formatAsCodeLit :: String -> String
formatAsCodeLit = formatAsCode . unwords . map unStringLit . words 

formatAsText :: String -> String
formatAsText = unwords . format . cap . words where
  format ws = case ws of
    w : c : ww | major c -> (w ++ c)      : format (cap ww)
    w : c : ww | minor c -> (w ++ c)      : format ww
    c     : ww | para  c -> "\n\n"        : format ww
    w     : ww           -> w             : format ww
    [] -> []
  cap (p:(c:cs):ww) | para p = p : (toUpper c : cs) : ww
  cap ((c:cs):ww) = (toUpper c : cs) : ww
  cap [] = []
  major = flip elem (map singleton ".!?") 
  minor = flip elem (map singleton ",:;")
  para  = (=="<p>") 

formatAsCode :: String -> String
formatAsCode = unwords . format . words where
  format ws = case ws of
    p : w : ww | parB p -> format ((p ++ w') : ww') where (w':ww') = format (w:ww)
    w : p : ww | par  p -> format ((w ++ p') : ww') where (p':ww') = format (p:ww)
    w     : ww           -> w        : format ww
    [] -> []
  parB = flip elem (map singleton "([{")
  parE = flip elem (map singleton "}])")
  par t = parB t || parE t

performBinds :: String -> String
performBinds = unwords . format . words where
  format ws = case ws of
    w : "&+" : u : ws -> format ((w ++ u) : ws)
    w : ws            -> w : format ws
    []                -> []

unStringLit :: String -> String
unStringLit s = case s of
  c : cs | strlim c && strlim (last cs) -> init cs
  _ -> s
 where
   strlim = (=='\'')