summaryrefslogtreecommitdiff
path: root/src/compiler/GF/Data/Str.hs
blob: 0c9ab05eca9276d1b5469f2c192cf4292434d833 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
----------------------------------------------------------------------
-- |
-- Module      : Str
-- Maintainer  : AR
-- Stability   : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:22:09 $ 
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.8 $
--
-- (Description of the module)
-----------------------------------------------------------------------------

module GF.Data.Str (
  Str (..), Tok (..),        --- constructors needed in PrGrammar 
  str2strings, str2allStrings, str, sstr, sstrV, 
  isZeroTok, prStr, plusStr, glueStr,
  strTok, 
  allItems
) where

import GF.Data.Operations(prQuotedString)
import Data.List (isPrefixOf, intersperse) --, isSuffixOf

-- | abstract token list type. AR 2001, revised and simplified 20\/4\/2003
newtype Str = Str [Tok]  deriving (Read, Show, Eq, Ord)

-- | notice that having both pre and post would leave to inconsistent situations:
--
-- > pre {"x" ; "y" / "a"} ++ post {"b" ; "a" / "x"}
--
-- always violates a condition expressed by the one or the other
data Tok = 
   TK String
 | TN Ss [(Ss, [String])] -- ^ variants depending on next string 
--- | TP Ss [(Ss, [String])] -- variants depending on previous string
    deriving (Eq, Ord, Show, Read)


-- | a variant can itself be a token list, but for simplicity only a list of strings
-- i.e. not itself containing variants
type Ss = [String]

-- matching functions in both ways

matchPrefix :: Ss -> [(Ss,[String])] -> [String] -> Ss
matchPrefix s vs t = 
  head $ [u | 
    (u,as) <- vs, 
    any (\c -> isPrefixOf c (concat (unmarkup t))) as
  ] ++ [s]
{-
matchSuffix :: String -> Ss -> [(Ss,[String])] -> Ss
matchSuffix t s vs = 
  head ([u | (u,as) <- vs, any (\c -> isSuffixOf c t) as] ++ [s])
-}
unmarkup :: [String] -> [String]
unmarkup = filter (not . isXMLtag) where
  isXMLtag s = case s of
    '<':cs@(_:_) -> last cs == '>'
    _ -> False

str2strings :: Str -> Ss
str2strings (Str st) = alls st where 
  alls st = case st of
    TK s     : ts   -> s                   : alls ts
    TN ds vs : ts   -> matchPrefix ds vs t ++ t where t = alls ts
----    u :TP ds vs: ts -> [u] ++ matchSuffix u ds vs ++ alls ts
    []              -> []

str2allStrings :: Str -> [Ss]
str2allStrings (Str st) = alls st where 
  alls st = case st of
    TK s     : ts -> [s        : t | t <- alls ts]
    TN ds vs : [] -> [ds      ++ v | v <- map fst vs]
    TN ds vs : ts -> [matchPrefix ds vs t ++ t | t <- alls ts]
    []            -> [[]]

sstr :: Str -> String
sstr = unwords . str2strings

-- | to handle a list of variants
sstrV :: [Str] -> String
sstrV ss = case ss of
  []  -> "*"
  _   -> unwords $ intersperse "/" $ map (unwords . str2strings) ss

str :: String -> Str
str s = if null s then Str [] else Str [itS s]

itS :: String -> Tok
itS s = TK s

isZeroTok :: Str -> Bool
isZeroTok t = case t of
  Str [] -> True
  Str [TK []] -> True
  _ -> False

strTok :: Ss -> [(Ss,[String])] -> Str
strTok ds vs = Str [TN ds vs]

prStr :: Str -> String
prStr = prQuotedString . sstr

plusStr :: Str -> Str -> Str
plusStr (Str ss) (Str tt) = Str (ss ++ tt)

glueStr :: Str -> Str -> Str
glueStr (Str ss) (Str tt) = Str $ case (ss,tt) of
  ([],_) -> tt
  (_,[]) -> ss
  _ -> init ss ++ glueIt (last ss) (head tt) ++ tail tt
 where
   glueIt t u = case (t,u) of
     (TK s, TK s') -> return $ TK $ s ++ s'
     (TN ds vs, TN es ws) -> return $ TN (glues (matchPrefix ds vs es) es) 
                               [(glues (matchPrefix ds vs w) w,cs) | (w,cs) <- ws]
     (TN ds vs, TK s) -> map TK $ glues (matchPrefix ds vs [s]) [s]
     (TK s, TN es ws) -> return $ TN (glues [s] es) [(glues [s] w, c) | (w,c) <- ws]

glues :: [[a]] -> [[a]] -> [[a]]
glues ss tt = case (ss,tt) of
  ([],_) -> tt
  (_,[]) -> ss
  _ -> init ss ++ [last ss ++ head tt] ++ tail tt

-- | to create the list of all lexical items
allItems :: Str -> [String]
allItems (Str s) = concatMap allOne s where
  allOne t = case t of
    TK s -> [s]
    TN ds vs -> ds ++ concatMap fst vs