diff options
| author | bringert <unknown> | 2004-08-23 07:51:36 +0000 |
|---|---|---|
| committer | bringert <unknown> | 2004-08-23 07:51:36 +0000 |
| commit | 65f012d15513814bd2cc4ad74f54edd35ade13fe (patch) | |
| tree | 089419071773038e8357a6b97a9ec0481df2a338 /src/GF/CFGM/LexCFG.hs | |
| parent | 25ffe15333a881022047409a1c12a17dd41d1198 (diff) | |
Added CFGM format (pm -printer=cfgm) and utf8 conversion for pm.
Diffstat (limited to 'src/GF/CFGM/LexCFG.hs')
| -rw-r--r-- | src/GF/CFGM/LexCFG.hs | 273 |
1 files changed, 273 insertions, 0 deletions
diff --git a/src/GF/CFGM/LexCFG.hs b/src/GF/CFGM/LexCFG.hs new file mode 100644 index 000000000..60d5ef632 --- /dev/null +++ b/src/GF/CFGM/LexCFG.hs @@ -0,0 +1,273 @@ +{-# OPTIONS -cpp #-} +{-# LINE 3 "LexCFG.x" #-} +module LexCFG where + +import ErrM + +#if __GLASGOW_HASKELL__ >= 503 +import Data.Array +import Data.Char (ord) +import Data.Array.Base (unsafeAt) +#else +import Array +import Char (ord) +#endif +alex_base :: Array Int Int +alex_base = listArray (0,11) [1,57,66,0,9,154,362,0,277,485,211,51] + +alex_table :: Array Int Int +alex_table = listArray (0,740) [0,-1,-1,-1,-1,-1,-1,-1,-1,-1,2,2,2,2,2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,2,3,8,-1,-1,-1,-1,-1,-1,-1,-1,-1,3,4,3,3,11,11,11,11,11,11,11,11,11,11,3,3,-1,-1,-1,-1,-1,2,2,2,2,2,3,0,0,0,2,2,2,2,2,0,0,0,0,0,0,0,0,0,2,0,0,3,-1,3,-1,-1,-1,2,11,11,11,11,11,11,11,11,11,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,-1,3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,6,0,0,0,0,0,0,0,0,6,6,6,6,6,6,6,6,6,6,0,0,0,0,-1,0,0,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,9,0,0,-1,6,9,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,0,0,0,0,0,0,0,0,0,0,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,10,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,0,0,0,0,0,0,0,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,0,0,0,0,6,0,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,0,0,0,0,0,0,0,0,0,0,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,10,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,0,6,6,6,6,6,6,6,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] + +alex_check :: Array Int Int +alex_check = listArray (0,740) [-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,9,10,11,12,13,62,-1,-1,-1,9,10,11,12,13,-1,-1,-1,-1,-1,-1,-1,-1,-1,32,-1,-1,91,92,93,94,95,96,32,48,49,50,51,52,53,54,55,56,57,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,39,-1,-1,-1,-1,-1,-1,-1,-1,48,49,50,51,52,53,54,55,56,57,-1,-1,-1,-1,215,-1,-1,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,34,-1,-1,247,95,39,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,92,-1,-1,-1,-1,-1,-1,-1,34,-1,-1,-1,-1,-1,-1,-1,-1,-1,110,-1,-1,-1,-1,-1,116,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,92,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,39,248,249,250,251,252,253,254,255,48,49,50,51,52,53,54,55,56,57,-1,-1,-1,-1,-1,-1,-1,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,-1,-1,-1,-1,95,-1,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,34,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,92,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,-1,248,249,250,251,252,253,254,255,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1] + +alex_deflt :: Array Int Int +alex_deflt = listArray (0,11) [5,-1,-1,-1,-1,-1,-1,-1,9,9,-1,-1] + +alex_accept = listArray (0::Int,11) [[],[],[(AlexAccSkip)],[(AlexAcc (alex_action_1))],[],[(AlexAcc (alex_action_2))],[(AlexAcc (alex_action_2))],[(AlexAcc (alex_action_3))],[],[],[],[(AlexAcc (alex_action_4))]] +{-# LINE 31 "LexCFG.x" #-} + +tok f p s = f p s + +data Tok = + TS String -- reserved words + | TL String -- string literals + | TI String -- integer literals + | TV String -- identifiers + | TD String -- double precision float literals + | TC String -- character literals + + deriving (Eq,Show) + +data Token = + PT Posn Tok + | Err Posn + deriving Show + +tokenPos (PT (Pn _ l _) _ :_) = "line " ++ show l +tokenPos (Err (Pn _ l _) :_) = "line " ++ show l +tokenPos _ = "end of file" + +posLineCol (Pn _ l c) = (l,c) +mkPosToken t@(PT p _) = (posLineCol p, prToken t) + +prToken t = case t of + PT _ (TS s) -> s + PT _ (TI s) -> s + PT _ (TV s) -> s + PT _ (TD s) -> s + PT _ (TC s) -> s + + +eitherResIdent :: (String -> Tok) -> String -> Tok +eitherResIdent tv s = if isResWord s then (TS s) else (tv s) where + isResWord s = isInTree s $ + B "grammar" (B "end" N N) (B "startcat" N N) + +data BTree = N | B String BTree BTree deriving (Show) + +isInTree :: String -> BTree -> Bool +isInTree x tree = case tree of + N -> False + B a left right + | x < a -> isInTree x left + | x > a -> isInTree x right + | x == a -> True + +unescapeInitTail :: String -> String +unescapeInitTail = unesc . tail where + unesc s = case s of + '\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs + '\\':'n':cs -> '\n' : unesc cs + '\\':'t':cs -> '\t' : unesc cs + '"':[] -> [] + c:cs -> c : unesc cs + _ -> [] + +------------------------------------------------------------------- +-- Alex wrapper code. +-- A modified "posn" wrapper. +------------------------------------------------------------------- + +data Posn = Pn !Int !Int !Int + deriving (Eq, Show) + +alexStartPos :: Posn +alexStartPos = Pn 0 1 1 + +alexMove :: Posn -> Char -> Posn +alexMove (Pn a l c) '\t' = Pn (a+1) l (((c+7) `div` 8)*8+1) +alexMove (Pn a l c) '\n' = Pn (a+1) (l+1) 1 +alexMove (Pn a l c) _ = Pn (a+1) l (c+1) + +type AlexInput = (Posn, -- current position, + Char, -- previous char + String) -- current input string + +tokens :: String -> [Token] +tokens str = go (alexStartPos, '\n', str) + where + go :: (Posn, Char, String) -> [Token] + go inp@(pos, _, str) = + case alexScan inp 0 of + AlexEOF -> [] + AlexError (pos, _, _) -> fail $ show pos ++ ": lexical error" + AlexSkip inp' len -> go inp' + AlexToken inp' len act -> act pos (take len str) : (go inp') + +alexGetChar :: AlexInput -> Maybe (Char,AlexInput) +alexGetChar (p, c, []) = Nothing +alexGetChar (p, _, (c:s)) = + let p' = alexMove p c + in p' `seq` Just (c, (p', c, s)) + +alexInputPrevChar :: AlexInput -> Char +alexInputPrevChar (p, c, s) = c + +alex_action_1 = tok (\p s -> PT p (TS s)) +alex_action_2 = tok (\p s -> PT p (eitherResIdent TV s)) +alex_action_3 = tok (\p s -> PT p (TL $ unescapeInitTail s)) +alex_action_4 = tok (\p s -> PT p (TI s)) +{-# LINE 1 "GenericTemplate.hs" #-} +{-# LINE 1 "<built-in>" #-} +{-# LINE 1 "<command line>" #-} +{-# LINE 1 "GenericTemplate.hs" #-} +-- ----------------------------------------------------------------------------- +-- ALEX TEMPLATE +-- +-- This code is in the PUBLIC DOMAIN; you may copy it freely and use +-- it for any purpose whatsoever. + +-- ----------------------------------------------------------------------------- +-- INTERNALS and main scanner engine + +{-# LINE 23 "GenericTemplate.hs" #-} +{-# LINE 35 "GenericTemplate.hs" #-} + +{-# LINE 44 "GenericTemplate.hs" #-} + +{-# LINE 67 "GenericTemplate.hs" #-} +alexIndexShortOffAddr arr off = arr ! off + + +-- ----------------------------------------------------------------------------- +-- Main lexing routines + +data AlexReturn a + = AlexEOF + | AlexError !AlexInput + | AlexSkip !AlexInput !Int + | AlexToken !AlexInput !Int a + +-- alexScan :: AlexInput -> StartCode -> Maybe (AlexInput,Int,act) +alexScan input (sc) + = alexScanUser undefined input (sc) + +alexScanUser user input (sc) + = case alex_scan_tkn user input (0) input sc AlexNone of + (AlexNone, input') -> + case alexGetChar input of + Nothing -> + + + + AlexEOF + Just _ -> + + + + AlexError input + + (AlexLastSkip input len, _) -> + + + + AlexSkip input len + + (AlexLastAcc k input len, _) -> + + + + AlexToken input len k + + +-- Push the input through the DFA, remembering the most recent accepting +-- state it encountered. + +alex_scan_tkn user orig_input len input s last_acc = + input `seq` -- strict in the input + case s of + (-1) -> (last_acc, input) + _ -> alex_scan_tkn' user orig_input len input s last_acc + +alex_scan_tkn' user orig_input len input s last_acc = + let + new_acc = check_accs (alex_accept `unsafeAt` (s)) + in + new_acc `seq` + case alexGetChar input of + Nothing -> (new_acc, input) + Just (c, new_input) -> + + + + let + base = alexIndexShortOffAddr alex_base s + (ord_c) = ord c + offset = (base + ord_c) + check = alexIndexShortOffAddr alex_check offset + + new_s = if (offset >= (0)) && (check == ord_c) + then alexIndexShortOffAddr alex_table offset + else alexIndexShortOffAddr alex_deflt s + in + alex_scan_tkn user orig_input (len + (1)) new_input new_s new_acc + + where + check_accs [] = last_acc + check_accs (AlexAcc a : _) = AlexLastAcc a input (len) + check_accs (AlexAccSkip : _) = AlexLastSkip input (len) + check_accs (AlexAccPred a pred : rest) + | pred user orig_input (len) input + = AlexLastAcc a input (len) + check_accs (AlexAccSkipPred pred : rest) + | pred user orig_input (len) input + = AlexLastSkip input (len) + check_accs (_ : rest) = check_accs rest + +data AlexLastAcc a + = AlexNone + | AlexLastAcc a !AlexInput !Int + | AlexLastSkip !AlexInput !Int + +data AlexAcc a user + = AlexAcc a + | AlexAccSkip + | AlexAccPred a (AlexAccPred user) + | AlexAccSkipPred (AlexAccPred user) + +type AlexAccPred user = user -> AlexInput -> Int -> AlexInput -> Bool + +-- ----------------------------------------------------------------------------- +-- Predicates on a rule + +alexAndPred p1 p2 user in1 len in2 + = p1 user in1 len in2 && p2 user in1 len in2 + +--alexPrevCharIsPred :: Char -> AlexAccPred _ +alexPrevCharIs c _ input _ _ = c == alexInputPrevChar input + +--alexPrevCharIsOneOfPred :: Array Char Bool -> AlexAccPred _ +alexPrevCharIsOneOf arr _ input _ _ = arr ! alexInputPrevChar input + +--alexRightContext :: Int -> AlexAccPred _ +alexRightContext (sc) user _ _ input = + case alex_scan_tkn user input (0) input sc AlexNone of + (AlexNone, _) -> False + _ -> True + -- TODO: there's no need to find the longest + -- match when checking the right context, just + -- the first match will do. + +-- used by wrappers +iUnbox (i) = i |
