summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorbringert <unknown>2004-12-06 16:02:08 +0000
committerbringert <unknown>2004-12-06 16:02:08 +0000
commit798cb370029712dc96e9ca5d1fbacc698c6d3649 (patch)
tree4db09c94a9c6046439d0e1c6ce4971add0b86013 /src
parentb63729307209c8f1c942cb70af12f89a46372993 (diff)
Use HashTable to share strings in tokens when parsing GFC files.
Diffstat (limited to 'src')
-rw-r--r--src/GF/Canon/LexGFC.hs14
-rw-r--r--src/GF/Canon/LexGFC.x131
-rw-r--r--src/GF/Data/SharedString.hs18
3 files changed, 156 insertions, 7 deletions
diff --git a/src/GF/Canon/LexGFC.hs b/src/GF/Canon/LexGFC.hs
index d27e31657..56376894b 100644
--- a/src/GF/Canon/LexGFC.hs
+++ b/src/GF/Canon/LexGFC.hs
@@ -1,7 +1,7 @@
{-# OPTIONS -fglasgow-exts -cpp #-}
{-# LINE 3 "LexGFC.x" #-}
module LexGFC where
-
+import SharedString
import ErrM
#if __GLASGOW_HASKELL__ >= 503
@@ -35,10 +35,10 @@ alex_accept = listArray (0::Int,14) [[],[],[(AlexAccSkip)],[(AlexAcc (alex_actio
tok f p s = f p s
data Tok =
- TS String -- reserved words
- | TL String -- string literals
+ TS !String -- reserved words
+ | TL !String -- string literals
| TI String -- integer literals
- | TV String -- identifiers
+ | TV !String -- identifiers -- H
| TD String -- double precision float literals
| TC String -- character literals
@@ -130,9 +130,9 @@ alexGetChar (p, _, (c:s)) =
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (p, c, s) = c
-alex_action_1 = tok (\p s -> PT p (TS s))
-alex_action_2 = tok (\p s -> PT p (eitherResIdent TV s))
-alex_action_3 = tok (\p s -> PT p (TL $ unescapeInitTail s))
+alex_action_1 = tok (\p s -> PT p (TS (shareString s)))
+alex_action_2 = tok (\p s -> PT p (eitherResIdent (TV . shareString) s))
+alex_action_3 = tok (\p s -> PT p (TL $ shareString $ unescapeInitTail s))
alex_action_4 = tok (\p s -> PT p (TI s))
{-# LINE 1 "GenericTemplate.hs" #-}
{-# LINE 1 "<built-in>" #-}
diff --git a/src/GF/Canon/LexGFC.x b/src/GF/Canon/LexGFC.x
new file mode 100644
index 000000000..c9697c270
--- /dev/null
+++ b/src/GF/Canon/LexGFC.x
@@ -0,0 +1,131 @@
+-- -*- haskell -*-
+-- This Alex file was machine-generated by the BNF converter
+-- Lines with -- H have been hacked for greater performance
+{
+module LexGFC where
+import SharedString -- H
+import ErrM
+}
+
+
+$l = [a-zA-Z\192 - \255] # [\215 \247] -- isolatin1 letter FIXME
+$c = [A-Z\192-\221] # [\215] -- capital isolatin1 letter FIXME
+$s = [a-z\222-\255] # [\247] -- small isolatin1 letter FIXME
+$d = [0-9] -- digit
+$i = [$l $d _ '] -- identifier character
+$u = [\0-\255] -- universal: any character
+
+@rsyms = -- reserved words consisting of special symbols
+ \; | \= | \{ | \} | \: | \- \> | \* \* | \[ | \] | \\ | \. | \( | \) | \_ | \< | \> | \$ | \? | \= \> | \! | \+ \+ | \/ | \@ | \+ | \| | \,
+
+:-
+
+$white+ ;
+@rsyms { tok (\p s -> PT p (TS (shareString s))) } -- H
+
+$l $i* { tok (\p s -> PT p (eitherResIdent (TV . shareString) s)) } -- H
+\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \"{ tok (\p s -> PT p (TL $ shareString $ unescapeInitTail s)) } -- H
+
+$d+ { tok (\p s -> PT p (TI s)) }
+
+
+{
+
+tok f p s = f p s
+
+data Tok =
+ TS !String -- reserved words -- H
+ | TL !String -- string literals -- H
+ | TI String -- integer literals
+ | TV !String -- identifiers -- H
+ | TD String -- double precision float literals
+ | TC String -- character literals
+
+ deriving (Eq,Show,Ord)
+
+data Token =
+ PT Posn Tok
+ | Err Posn
+ deriving (Eq,Show,Ord)
+
+tokenPos (PT (Pn _ l _) _ :_) = "line " ++ show l
+tokenPos (Err (Pn _ l _) :_) = "line " ++ show l
+tokenPos _ = "end of file"
+
+posLineCol (Pn _ l c) = (l,c)
+mkPosToken t@(PT p _) = (posLineCol p, prToken t)
+
+prToken t = case t of
+ PT _ (TS s) -> s
+ PT _ (TI s) -> s
+ PT _ (TV s) -> s
+ PT _ (TD s) -> s
+ PT _ (TC s) -> s
+
+ _ -> show t
+
+eitherResIdent :: (String -> Tok) -> String -> Tok
+eitherResIdent tv s = if isResWord s then (TS s) else (tv s) where
+ isResWord s = isInTree s $
+ B "lin" (B "concrete" (B "Type" (B "Str" (B "Ints" N N) N) (B "cat" (B "abstract" N N) N)) (B "fun" (B "flags" (B "data" N N) N) (B "in" (B "grammar" N N) N))) (B "pre" (B "open" (B "of" (B "lincat" N N) N) (B "param" (B "oper" N N) N)) (B "transfer" (B "table" (B "resource" N N) N) (B "variants" N N)))
+
+data BTree = N | B String BTree BTree deriving (Show)
+
+isInTree :: String -> BTree -> Bool
+isInTree x tree = case tree of
+ N -> False
+ B a left right
+ | x < a -> isInTree x left
+ | x > a -> isInTree x right
+ | x == a -> True
+
+unescapeInitTail :: String -> String
+unescapeInitTail = unesc . tail where
+ unesc s = case s of
+ '\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs
+ '\\':'n':cs -> '\n' : unesc cs
+ '\\':'t':cs -> '\t' : unesc cs
+ '"':[] -> []
+ c:cs -> c : unesc cs
+ _ -> []
+
+-------------------------------------------------------------------
+-- Alex wrapper code.
+-- A modified "posn" wrapper.
+-------------------------------------------------------------------
+
+data Posn = Pn !Int !Int !Int
+ deriving (Eq, Show,Ord)
+
+alexStartPos :: Posn
+alexStartPos = Pn 0 1 1
+
+alexMove :: Posn -> Char -> Posn
+alexMove (Pn a l c) '\t' = Pn (a+1) l (((c+7) `div` 8)*8+1)
+alexMove (Pn a l c) '\n' = Pn (a+1) (l+1) 1
+alexMove (Pn a l c) _ = Pn (a+1) l (c+1)
+
+type AlexInput = (Posn, -- current position,
+ Char, -- previous char
+ String) -- current input string
+
+tokens :: String -> [Token]
+tokens str = go (alexStartPos, '\n', str)
+ where
+ go :: (Posn, Char, String) -> [Token]
+ go inp@(pos, _, str) =
+ case alexScan inp 0 of
+ AlexEOF -> []
+ AlexError (pos, _, _) -> fail $ show pos ++ ": lexical error"
+ AlexSkip inp' len -> go inp'
+ AlexToken inp' len act -> act pos (take len str) : (go inp')
+
+alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
+alexGetChar (p, c, []) = Nothing
+alexGetChar (p, _, (c:s)) =
+ let p' = alexMove p c
+ in p' `seq` Just (c, (p', c, s))
+
+alexInputPrevChar :: AlexInput -> Char
+alexInputPrevChar (p, c, s) = c
+}
diff --git a/src/GF/Data/SharedString.hs b/src/GF/Data/SharedString.hs
new file mode 100644
index 000000000..f03f6f469
--- /dev/null
+++ b/src/GF/Data/SharedString.hs
@@ -0,0 +1,18 @@
+module SharedString (shareString) where
+
+import Data.HashTable as H
+import System.IO.Unsafe (unsafePerformIO)
+
+{-# NOINLINE stringPool #-}
+stringPool :: HashTable String String
+stringPool = unsafePerformIO $ new (==) hashString
+
+{-# NOINLINE shareString #-}
+shareString :: String -> String
+shareString s = unsafePerformIO $ do
+ mv <- H.lookup stringPool s
+ case mv of
+ Just s' -> return s'
+ Nothing -> do
+ H.insert stringPool s s
+ return s