diff options
Diffstat (limited to 'src/GF/Grammar/Lexer.x')
| -rw-r--r-- | src/GF/Grammar/Lexer.x | 272 |
1 files changed, 272 insertions, 0 deletions
diff --git a/src/GF/Grammar/Lexer.x b/src/GF/Grammar/Lexer.x new file mode 100644 index 000000000..bc1a2277f --- /dev/null +++ b/src/GF/Grammar/Lexer.x @@ -0,0 +1,272 @@ +-- -*- haskell -*- +-- This Alex file was machine-generated by the BNF converter +{ +module GF.Grammar.Lexer + ( Token(..), Posn(..) + , P, runP, lexer, getPosn, failLoc + , isReservedWord + ) where + +import GF.Infra.Ident +import GF.Data.Operations +import qualified Data.ByteString.Char8 as BS +import qualified Data.Map as Map + +} + + +$l = [a-zA-Z\192 - \255] # [\215 \247] -- isolatin1 letter FIXME +$c = [A-Z\192-\221] # [\215] -- capital isolatin1 letter FIXME +$s = [a-z\222-\255] # [\247] -- small isolatin1 letter FIXME +$d = [0-9] -- digit +$i = [$l $d _ '] -- identifier character +$u = [\0-\255] -- universal: any character + +@rsyms = -- symbols and non-identifier-like reserved words + \; | \= | \{ | \} | \( | \) | \* \* | \: | \- \> | \, | \[ | \] | \- | \. | \| | \% | \? | \< | \> | \@ | \# | \! | \* | \+ | \+ \+ | \\ | \\\\ | \= \> | \_ | \$ | \/ + +:- +"--" [.]* ; -- Toss single line comments +"{-" ([$u # \-] | \- [$u # \}])* ("-")+ "}" ; + +$white+ ; +@rsyms { tok (eitherResIdent (T_Ident . identC)) } +\' ($u # \')* \' { tok (eitherResIdent (T_LString . BS.unpack)) } +(\_ | $l)($l | $d | \_ | \')* { tok (eitherResIdent (T_Ident . identC)) } + +\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \" { tok (T_String . unescapeInitTail . BS.unpack) } + +$d+ { tok (T_Integer . read . BS.unpack) } +$d+ \. $d+ (e (\-)? $d+)? { tok (T_Double . read . BS.unpack) } + +{ + +tok f p s = f s + +data Token + = T_exclmark + | T_patt + | T_int_label + | T_oparen + | T_cparen + | T_star + | T_starstar + | T_plus + | T_plusplus + | T_comma + | T_minus + | T_rarrow + | T_dot + | T_alt + | T_colon + | T_semicolon + | T_less + | T_equal + | T_big_rarrow + | T_great + | T_questmark + | T_obrack + | T_lam + | T_lamlam + | T_cbrack + | T_ocurly + | T_bar + | T_ccurly + | T_underscore + | T_at + | T_PType + | T_Str + | T_Strs + | T_Tok + | T_Type + | T_abstract + | T_case + | T_cat + | T_concrete + | T_data + | T_def + | T_flags + | T_fn + | T_fun + | T_in + | T_incomplete + | T_instance + | T_interface + | T_let + | T_lin + | T_lincat + | T_lindef + | T_of + | T_open + | T_oper + | T_param + | T_pattern + | T_pre + | T_printname + | T_resource + | T_strs + | T_table + | T_transfer + | T_variants + | T_where + | T_with + | T_String String -- string literals + | T_Integer Integer -- integer literals + | T_Double Double -- double precision float literals + | T_LString String + | T_Ident Ident + | T_EOF + +eitherResIdent :: (BS.ByteString -> Token) -> BS.ByteString -> Token +eitherResIdent tv s = + case Map.lookup s resWords of + Just t -> t + Nothing -> tv s + +isReservedWord :: BS.ByteString -> Bool +isReservedWord s = Map.member s resWords + +resWords = Map.fromList + [ b "!" T_exclmark + , b "#" T_patt + , b "$" T_int_label + , b "(" T_oparen + , b ")" T_cparen + , b "*" T_star + , b "**" T_starstar + , b "+" T_plus + , b "++" T_plusplus + , b "," T_comma + , b "-" T_minus + , b "->" T_rarrow + , b "." T_dot + , b "/" T_alt + , b ":" T_colon + , b ";" T_semicolon + , b "<" T_less + , b "=" T_equal + , b "=>" T_big_rarrow + , b ">" T_great + , b "?" T_questmark + , b "[" T_obrack + , b "]" T_cbrack + , b "\\" T_lam + , b "\\\\" T_lamlam + , b "{" T_ocurly + , b "}" T_ccurly + , b "|" T_bar + , b "_" T_underscore + , b "@" T_at + , b "PType" T_PType + , b "Str" T_Str + , b "Strs" T_Strs + , b "Tok" T_Tok + , b "Type" T_Type + , b "abstract" T_abstract + , b "case" T_case + , b "cat" T_cat + , b "concrete" T_concrete + , b "data" T_data + , b "def" T_def + , b "flags" T_flags + , b "fn" T_fn + , b "fun" T_fun + , b "in" T_in + , b "incomplete" T_incomplete + , b "instance" T_instance + , b "interface" T_interface + , b "let" T_let + , b "lin" T_lin + , b "lincat" T_lincat + , b "lindef" T_lindef + , b "of" T_of + , b "open" T_open + , b "oper" T_oper + , b "param" T_param + , b "pattern" T_pattern + , b "pre" T_pre + , b "printname" T_printname + , b "resource" T_resource + , b "strs" T_strs + , b "table" T_table + , b "transfer" T_transfer + , b "variants" T_variants + , b "where" T_where + , b "with" T_with + ] + where b s t = (BS.pack s, t) + +unescapeInitTail :: String -> String +unescapeInitTail = unesc . tail where + unesc s = case s of + '\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs + '\\':'n':cs -> '\n' : unesc cs + '\\':'t':cs -> '\t' : unesc cs + '"':[] -> [] + c:cs -> c : unesc cs + _ -> [] + +------------------------------------------------------------------- +-- Alex wrapper code. +-- A modified "posn" wrapper. +------------------------------------------------------------------- + +data Posn = Pn {-# UNPACK #-} !Int + {-# UNPACK #-} !Int + +alexMove :: Posn -> Char -> Posn +alexMove (Pn l c) '\n' = Pn (l+1) 1 +alexMove (Pn l c) _ = Pn l (c+1) + +alexGetChar :: AlexInput -> Maybe (Char,AlexInput) +alexGetChar (AI p _ s) = + case BS.uncons s of + Nothing -> Nothing + Just (c,s) -> + let p' = alexMove p c + in p' `seq` Just (c, (AI p' c s)) + +alexInputPrevChar :: AlexInput -> Char +alexInputPrevChar (AI p c s) = c + +data AlexInput = AI {-# UNPACK #-} !Posn -- current position, + {-# UNPACK #-} !Char -- previous char + {-# UNPACK #-} !BS.ByteString -- current input string + +data ParseResult a + = POk AlexInput a + | PFailed Posn -- The position of the error + String -- The error message + +newtype P a = P { unP :: AlexInput -> ParseResult a } + +instance Monad P where + return a = a `seq` (P $ \s -> POk s a) + (P m) >>= k = P $ \ s -> case m s of + POk s1 a -> unP (k a) s1 + PFailed posn err -> PFailed posn err + fail msg = P $ \(AI posn _ _) -> PFailed posn msg + +runP :: P a -> BS.ByteString -> Either (Posn,String) a +runP (P f) txt = + case f (AI (Pn 1 0) ' ' txt) of + POk _ x -> Right x + PFailed pos msg -> Left (pos,msg) + +failLoc :: Posn -> String -> P a +failLoc pos msg = P $ \_ -> PFailed pos msg + +lexer :: (Token -> P a) -> P a +lexer cont = P go + where + go inp@(AI pos _ str) = + case alexScan inp 0 of + AlexEOF -> unP (cont T_EOF) inp + AlexError (AI pos _ _) -> PFailed pos "lexical error" + AlexSkip inp' len -> go inp' + AlexToken inp' len act -> unP (cont (act pos (BS.take len str))) inp' + +getPosn :: P Posn +getPosn = P $ \inp@(AI pos _ _) -> POk inp pos + +} |
