summaryrefslogtreecommitdiff
path: root/src/GF/Canon/Unlex.hs
blob: 2a9fe118fa6166c55b1b135363f5c95ca12275b2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
----------------------------------------------------------------------
-- |
-- Module      : Unlex
-- Maintainer  : AR
-- Stability   : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/02/18 19:21:07 $ 
-- > CVS $Author: peb $
-- > CVS $Revision: 1.7 $
--
-- elementary text postprocessing. AR 21/11/2001
-----------------------------------------------------------------------------

module Unlex (formatAsText, unlex, performBinds) where

import Operations
import Str

import Char
import List (isPrefixOf)

formatAsText :: String -> String
formatAsText = unwords . format . cap . words where
  format ws = case ws of
    w : c : ww | major c -> (w ++ c)      : format (cap ww)
    w : c : ww | minor c -> (w ++ c)      : format ww
    c     : ww | para  c -> "\n\n"        : format ww
    w     : ww           -> w             : format ww
    [] -> []
  cap (p:(c:cs):ww) | para p = p : (toUpper c : cs) : ww
  cap ((c:cs):ww) = (toUpper c : cs) : ww
  cap [] = []
  major = flip elem (map (:[]) ".!?") 
  minor = flip elem (map (:[]) ",:;")
  para  = (=="&-") 

unlex :: [Str] -> String
unlex = formatAsText . performBinds . concat . map sstr . take 1 ----

-- | modified from GF/src/Text by adding hyphen
performBinds :: String -> String
performBinds = unwords . format . words where
  format ws = case ws of
    w : "-"  : u : ws -> format ((w ++ "-" ++ u) : ws)
    w : "&+" : u : ws -> format ((w ++ u) : ws)
    w : ws            -> w : format ws
    []                -> []