summaryrefslogtreecommitdiff
path: root/src/GF/Grammar/Lexer.x
diff options
context:
space:
mode:
Diffstat (limited to 'src/GF/Grammar/Lexer.x')
-rw-r--r--src/GF/Grammar/Lexer.x272
1 files changed, 272 insertions, 0 deletions
diff --git a/src/GF/Grammar/Lexer.x b/src/GF/Grammar/Lexer.x
new file mode 100644
index 000000000..bc1a2277f
--- /dev/null
+++ b/src/GF/Grammar/Lexer.x
@@ -0,0 +1,272 @@
+-- -*- haskell -*-
+-- This Alex file was machine-generated by the BNF converter
+{
+module GF.Grammar.Lexer
+ ( Token(..), Posn(..)
+ , P, runP, lexer, getPosn, failLoc
+ , isReservedWord
+ ) where
+
+import GF.Infra.Ident
+import GF.Data.Operations
+import qualified Data.ByteString.Char8 as BS
+import qualified Data.Map as Map
+
+}
+
+
+$l = [a-zA-Z\192 - \255] # [\215 \247] -- isolatin1 letter FIXME
+$c = [A-Z\192-\221] # [\215] -- capital isolatin1 letter FIXME
+$s = [a-z\222-\255] # [\247] -- small isolatin1 letter FIXME
+$d = [0-9] -- digit
+$i = [$l $d _ '] -- identifier character
+$u = [\0-\255] -- universal: any character
+
+@rsyms = -- symbols and non-identifier-like reserved words
+ \; | \= | \{ | \} | \( | \) | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/
+
+:-
+"--" [.]* ; -- Toss single line comments
+"{-" ([$u # \-] | \- [$u # \}])* ("-")+ "}" ;
+
+$white+ ;
+@rsyms { tok (eitherResIdent (T_Ident . identC)) }
+\' ($u # \')* \' { tok (eitherResIdent (T_LString . BS.unpack)) }
+(\_ | $l)($l | $d | \_ | \')* { tok (eitherResIdent (T_Ident . identC)) }
+
+\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \" { tok (T_String . unescapeInitTail . BS.unpack) }
+
+$d+ { tok (T_Integer . read . BS.unpack) }
+$d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . BS.unpack) }
+
+{
+
+tok f p s = f s
+
+data Token
+ = T_exclmark
+ | T_patt
+ | T_int_label
+ | T_oparen
+ | T_cparen
+ | T_star
+ | T_starstar
+ | T_plus
+ | T_plusplus
+ | T_comma
+ | T_minus
+ | T_rarrow
+ | T_dot
+ | T_alt
+ | T_colon
+ | T_semicolon
+ | T_less
+ | T_equal
+ | T_big_rarrow
+ | T_great
+ | T_questmark
+ | T_obrack
+ | T_lam
+ | T_lamlam
+ | T_cbrack
+ | T_ocurly
+ | T_bar
+ | T_ccurly
+ | T_underscore
+ | T_at
+ | T_PType
+ | T_Str
+ | T_Strs
+ | T_Tok
+ | T_Type
+ | T_abstract
+ | T_case
+ | T_cat
+ | T_concrete
+ | T_data
+ | T_def
+ | T_flags
+ | T_fn
+ | T_fun
+ | T_in
+ | T_incomplete
+ | T_instance
+ | T_interface
+ | T_let
+ | T_lin
+ | T_lincat
+ | T_lindef
+ | T_of
+ | T_open
+ | T_oper
+ | T_param
+ | T_pattern
+ | T_pre
+ | T_printname
+ | T_resource
+ | T_strs
+ | T_table
+ | T_transfer
+ | T_variants
+ | T_where
+ | T_with
+ | T_String String -- string literals
+ | T_Integer Integer -- integer literals
+ | T_Double Double -- double precision float literals
+ | T_LString String
+ | T_Ident Ident
+ | T_EOF
+
+eitherResIdent :: (BS.ByteString -> Token) -> BS.ByteString -> Token
+eitherResIdent tv s =
+ case Map.lookup s resWords of
+ Just t -> t
+ Nothing -> tv s
+
+isReservedWord :: BS.ByteString -> Bool
+isReservedWord s = Map.member s resWords
+
+resWords = Map.fromList
+ [ b "!" T_exclmark
+ , b "#" T_patt
+ , b "$" T_int_label
+ , b "(" T_oparen
+ , b ")" T_cparen
+ , b "*" T_star
+ , b "**" T_starstar
+ , b "+" T_plus
+ , b "++" T_plusplus
+ , b "," T_comma
+ , b "-" T_minus
+ , b "->" T_rarrow
+ , b "." T_dot
+ , b "/" T_alt
+ , b ":" T_colon
+ , b ";" T_semicolon
+ , b "<" T_less
+ , b "=" T_equal
+ , b "=>" T_big_rarrow
+ , b ">" T_great
+ , b "?" T_questmark
+ , b "[" T_obrack
+ , b "]" T_cbrack
+ , b "\\" T_lam
+ , b "\\\\" T_lamlam
+ , b "{" T_ocurly
+ , b "}" T_ccurly
+ , b "|" T_bar
+ , b "_" T_underscore
+ , b "@" T_at
+ , b "PType" T_PType
+ , b "Str" T_Str
+ , b "Strs" T_Strs
+ , b "Tok" T_Tok
+ , b "Type" T_Type
+ , b "abstract" T_abstract
+ , b "case" T_case
+ , b "cat" T_cat
+ , b "concrete" T_concrete
+ , b "data" T_data
+ , b "def" T_def
+ , b "flags" T_flags
+ , b "fn" T_fn
+ , b "fun" T_fun
+ , b "in" T_in
+ , b "incomplete" T_incomplete
+ , b "instance" T_instance
+ , b "interface" T_interface
+ , b "let" T_let
+ , b "lin" T_lin
+ , b "lincat" T_lincat
+ , b "lindef" T_lindef
+ , b "of" T_of
+ , b "open" T_open
+ , b "oper" T_oper
+ , b "param" T_param
+ , b "pattern" T_pattern
+ , b "pre" T_pre
+ , b "printname" T_printname
+ , b "resource" T_resource
+ , b "strs" T_strs
+ , b "table" T_table
+ , b "transfer" T_transfer
+ , b "variants" T_variants
+ , b "where" T_where
+ , b "with" T_with
+ ]
+ where b s t = (BS.pack s, t)
+
+unescapeInitTail :: String -> String
+unescapeInitTail = unesc . tail where
+ unesc s = case s of
+ '\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs
+ '\\':'n':cs -> '\n' : unesc cs
+ '\\':'t':cs -> '\t' : unesc cs
+ '"':[] -> []
+ c:cs -> c : unesc cs
+ _ -> []
+
+-------------------------------------------------------------------
+-- Alex wrapper code.
+-- A modified "posn" wrapper.
+-------------------------------------------------------------------
+
+data Posn = Pn {-# UNPACK #-} !Int
+ {-# UNPACK #-} !Int
+
+alexMove :: Posn -> Char -> Posn
+alexMove (Pn l c) '\n' = Pn (l+1) 1
+alexMove (Pn l c) _ = Pn l (c+1)
+
+alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
+alexGetChar (AI p _ s) =
+ case BS.uncons s of
+ Nothing -> Nothing
+ Just (c,s) ->
+ let p' = alexMove p c
+ in p' `seq` Just (c, (AI p' c s))
+
+alexInputPrevChar :: AlexInput -> Char
+alexInputPrevChar (AI p c s) = c
+
+data AlexInput = AI {-# UNPACK #-} !Posn -- current position,
+ {-# UNPACK #-} !Char -- previous char
+ {-# UNPACK #-} !BS.ByteString -- current input string
+
+data ParseResult a
+ = POk AlexInput a
+ | PFailed Posn -- The position of the error
+ String -- The error message
+
+newtype P a = P { unP :: AlexInput -> ParseResult a }
+
+instance Monad P where
+ return a = a `seq` (P $ \s -> POk s a)
+ (P m) >>= k = P $ \ s -> case m s of
+ POk s1 a -> unP (k a) s1
+ PFailed posn err -> PFailed posn err
+ fail msg = P $ \(AI posn _ _) -> PFailed posn msg
+
+runP :: P a -> BS.ByteString -> Either (Posn,String) a
+runP (P f) txt =
+ case f (AI (Pn 1 0) ' ' txt) of
+ POk _ x -> Right x
+ PFailed pos msg -> Left (pos,msg)
+
+failLoc :: Posn -> String -> P a
+failLoc pos msg = P $ \_ -> PFailed pos msg
+
+lexer :: (Token -> P a) -> P a
+lexer cont = P go
+ where
+ go inp@(AI pos _ str) =
+ case alexScan inp 0 of
+ AlexEOF -> unP (cont T_EOF) inp
+ AlexError (AI pos _ _) -> PFailed pos "lexical error"
+ AlexSkip inp' len -> go inp'
+ AlexToken inp' len act -> unP (cont (act pos (BS.take len str))) inp'
+
+getPosn :: P Posn
+getPosn = P $ \inp@(AI pos _ _) -> POk inp pos
+
+}