1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
|
----------------------------------------------------------------------
-- |
-- Module : Ident
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/15 11:43:33 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.8 $
--
-- (Description of the module)
-----------------------------------------------------------------------------
module GF.Infra.Ident (-- ** Identifiers
Ident, ident2utf8, showIdent, prefixIdent,
identS, identC, identV, identA, identAV, identW,
argIdent, isArgIdent, getArgIndex,
varStr, varX, isWildIdent, varIndex,
-- ** Raw Identifiers
RawIdent, rawIdentS, rawIdentC, ident2raw, prefixRawIdent,
isPrefixOf, showRawIdent{-,
-- ** Refreshing identifiers
IdState, initIdStateN, initIdState,
lookVar, refVar, refVarPlus-}
) where
import qualified Data.ByteString.UTF8 as UTF8
import qualified Data.ByteString.Char8 as BS(append,isPrefixOf)
-- Limit use of BS functions to the ones that work correctly on
-- UTF-8-encoded bytestrings!
import Data.Char(isDigit)
import PGF.Internal(Binary(..))
import GF.Text.Pretty
-- | the constructors labelled /INTERNAL/ are
-- internal representation never returned by the parser
data Ident =
IC {-# UNPACK #-} !RawIdent -- ^ raw identifier after parsing, resolved in Rename
| IW -- ^ wildcard
--
-- below this constructor: internal representation never returned by the parser
| IV {-# UNPACK #-} !RawIdent {-# UNPACK #-} !Int -- ^ /INTERNAL/ variable
| IA {-# UNPACK #-} !RawIdent {-# UNPACK #-} !Int -- ^ /INTERNAL/ argument of cat at position
| IAV {-# UNPACK #-} !RawIdent {-# UNPACK #-} !Int {-# UNPACK #-} !Int -- ^ /INTERNAL/ argument of cat with bindings at position
--
deriving (Eq, Ord, Show, Read)
-- | Identifiers are stored as UTF-8-encoded bytestrings.
newtype RawIdent = Id { rawId2utf8 :: UTF8.ByteString }
deriving (Eq, Ord, Show, Read)
pack = UTF8.fromString
unpack = UTF8.toString
rawIdentS = Id . pack
rawIdentC = Id
showRawIdent = unpack . rawId2utf8
prefixRawIdent (Id x) (Id y) = Id (BS.append x y)
isPrefixOf (Id x) (Id y) = BS.isPrefixOf x y
instance Binary RawIdent where
put = put . rawId2utf8
get = fmap rawIdentC get
-- | This function should be used with care, since the returned ByteString is
-- UTF-8-encoded.
ident2utf8 :: Ident -> UTF8.ByteString
ident2utf8 i = case i of
IC (Id s) -> s
IV (Id s) n -> BS.append s (pack ('_':show n))
IA (Id s) j -> BS.append s (pack ('_':show j))
IAV (Id s) b j -> BS.append s (pack ('_':show b ++ '_':show j))
IW -> pack "_"
ident2raw = Id . ident2utf8
showIdent :: Ident -> String
showIdent i = unpack $! ident2utf8 i
instance Pretty Ident where pp = pp . showIdent
identS :: String -> Ident
identS = identC . rawIdentS
identC :: RawIdent -> Ident
identV :: RawIdent -> Int -> Ident
identA :: RawIdent -> Int -> Ident
identAV:: RawIdent -> Int -> Int -> Ident
identW :: Ident
(identC, identV, identA, identAV, identW) =
(IC, IV, IA, IAV, IW)
prefixIdent :: String -> Ident -> Ident
prefixIdent pref = identC . Id . BS.append (pack pref) . ident2utf8
-- normal identifier
-- ident s = IC s
-- | to mark argument variables
argIdent :: Int -> Ident -> Int -> Ident
argIdent 0 (IC c) i = identA c i
argIdent b (IC c) i = identAV c b i
isArgIdent IA{} = True
isArgIdent IAV{} = True
isArgIdent _ = False
getArgIndex (IA _ i) = Just i
getArgIndex (IAV _ _ i) = Just i
getArgIndex (IC (Id bs))
| isDigit c =
-- (Just . read . unpack . snd . BS.spanEnd isDigit) bs -- not ok with UTF-8
(Just . read . reverse . takeWhile isDigit) s
where s@(c:_) = reverse (unpack bs)
getArgIndex x = Nothing
-- | used in lin defaults
varStr :: Ident
varStr = identA (rawIdentS "str") 0
-- | refreshing variables
varX :: Int -> Ident
varX = identV (rawIdentS "x")
isWildIdent :: Ident -> Bool
isWildIdent x = case x of
IW -> True
IC s | s == wild -> True
_ -> False
wild = Id (pack "_")
varIndex :: Ident -> Int
varIndex (IV _ n) = n
varIndex _ = -1 --- other than IV should not count
|