summaryrefslogtreecommitdiff
path: root/src/compiler/GF/Infra/Ident.hs
blob: 7d0bed8041ef75f0d20915ea336f646fff984fa0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
----------------------------------------------------------------------
-- |
-- Module      : Ident
-- Maintainer  : AR
-- Stability   : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/15 11:43:33 $ 
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.8 $
--
-- (Description of the module)
-----------------------------------------------------------------------------

module GF.Infra.Ident (-- ** Identifiers
              ModuleName(..), moduleNameS,
	      Ident, ident2utf8, showIdent, prefixIdent,
	      identS, identC, identV, identA, identAV, identW,
	      argIdent, isArgIdent, getArgIndex,
              varStr, varX, isWildIdent, varIndex,
              -- ** Raw Identifiers
              RawIdent, rawIdentS, rawIdentC, ident2raw, prefixRawIdent,
              isPrefixOf, showRawIdent{-,
	      -- ** Refreshing identifiers
	      IdState, initIdStateN, initIdState,
	      lookVar, refVar, refVarPlus-}
	     ) where

import qualified Data.ByteString.UTF8 as UTF8
import qualified Data.ByteString.Char8 as BS(append,isPrefixOf)
                 -- Limit use of BS functions to the ones that work correctly on
                 -- UTF-8-encoded bytestrings!
import Data.Char(isDigit)
import PGF.Internal(Binary(..))
import GF.Text.Pretty


-- | Module names
newtype ModuleName = MN Ident deriving (Eq,Ord)

moduleNameS = MN . identS

instance Show ModuleName where showsPrec d (MN m) = showsPrec d m
instance Pretty ModuleName where pp (MN m) = pp m


-- | the constructors labelled /INTERNAL/ are
-- internal representation never returned by the parser
data Ident = 
   IC  {-# UNPACK #-} !RawIdent                                           -- ^ raw identifier after parsing, resolved in Rename
 | IW                                                                     -- ^ wildcard
--
-- below this constructor: internal representation never returned by the parser
 | IV  {-# UNPACK #-} !RawIdent {-# UNPACK #-} !Int                       -- ^ /INTERNAL/ variable
 | IA  {-# UNPACK #-} !RawIdent {-# UNPACK #-} !Int                       -- ^ /INTERNAL/ argument of cat at position
 | IAV {-# UNPACK #-} !RawIdent {-# UNPACK #-} !Int {-# UNPACK #-} !Int   -- ^ /INTERNAL/ argument of cat with bindings at position
-- 
  deriving (Eq, Ord, Show, Read)

-- | Identifiers are stored as UTF-8-encoded bytestrings.
newtype RawIdent = Id { rawId2utf8 :: UTF8.ByteString }
  deriving (Eq, Ord, Show, Read)

pack = UTF8.fromString
unpack = UTF8.toString

rawIdentS = Id . pack
rawIdentC = Id
showRawIdent = unpack . rawId2utf8

prefixRawIdent (Id x) (Id y) = Id (BS.append x y) 
isPrefixOf (Id x) (Id y) = BS.isPrefixOf x y

instance Binary RawIdent where
  put = put . rawId2utf8
  get = fmap rawIdentC get


-- | This function should be used with care, since the returned ByteString is
-- UTF-8-encoded.
ident2utf8 :: Ident -> UTF8.ByteString
ident2utf8 i = case i of
  IC (Id s) -> s
  IV (Id s) n -> BS.append s (pack ('_':show n))
  IA (Id s) j -> BS.append s (pack ('_':show j))
  IAV (Id s) b j -> BS.append s (pack ('_':show b ++ '_':show j))
  IW -> pack "_"

ident2raw = Id . ident2utf8

showIdent :: Ident -> String
showIdent i = unpack $! ident2utf8 i

instance Pretty Ident where pp = pp . showIdent

identS :: String -> Ident
identS = identC . rawIdentS

identC :: RawIdent -> Ident
identV :: RawIdent -> Int -> Ident
identA :: RawIdent -> Int -> Ident
identAV:: RawIdent -> Int -> Int -> Ident
identW :: Ident
(identC, identV, identA, identAV, identW) = 
    (IC,     IV,     IA,     IAV,     IW)


prefixIdent :: String -> Ident -> Ident
prefixIdent pref = identC . Id . BS.append (pack pref) . ident2utf8

-- normal identifier
-- ident s = IC s

-- | to mark argument variables
argIdent :: Int -> Ident -> Int -> Ident
argIdent 0 (IC c) i = identA  c i
argIdent b (IC c) i = identAV c b i

isArgIdent IA{}  = True
isArgIdent IAV{} = True
isArgIdent _     = False

getArgIndex (IA _ i)    = Just i
getArgIndex (IAV _ _ i) = Just i
getArgIndex (IC (Id bs))
  | isDigit c =
   -- (Just . read . unpack . snd . BS.spanEnd isDigit) bs -- not ok with UTF-8
      (Just . read . reverse . takeWhile isDigit) s
  where s@(c:_) = reverse (unpack bs)
getArgIndex x = Nothing

-- | used in lin defaults
varStr :: Ident
varStr = identA (rawIdentS "str") 0

-- | refreshing variables
varX :: Int -> Ident
varX = identV (rawIdentS "x")

isWildIdent :: Ident -> Bool
isWildIdent x = case x of
  IW -> True
  IC s | s == wild -> True
  _ -> False

wild = Id (pack "_")

varIndex :: Ident -> Int
varIndex (IV _ n) = n
varIndex _ = -1 --- other than IV should not count