diff options
Diffstat (limited to 'src/runtime/haskell/PGF')
| -rw-r--r-- | src/runtime/haskell/PGF/Macros.hs | 127 | ||||
| -rw-r--r-- | src/runtime/haskell/PGF/Optimize.hs | 21 |
2 files changed, 143 insertions, 5 deletions
diff --git a/src/runtime/haskell/PGF/Macros.hs b/src/runtime/haskell/PGF/Macros.hs index b60c8a0d4..08052ce2f 100644 --- a/src/runtime/haskell/PGF/Macros.hs +++ b/src/runtime/haskell/PGF/Macros.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE MagicHash, BangPatterns, FlexibleContexts #-} module PGF.Macros where import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint @@ -13,6 +14,9 @@ import qualified Data.Array as Array import Data.List import Data.Array.IArray import Text.PrettyPrint +import GHC.Prim +import GHC.Base(getTag) +import Data.Char -- operations for manipulating PGF grammars and objects @@ -241,3 +245,126 @@ computeSeq filter seq args = concatMap compute seq flattenBracketedString :: BracketedString -> [String] flattenBracketedString (Leaf w) = [w] flattenBracketedString (Bracket _ _ _ _ _ _ bss) = concatMap flattenBracketedString bss + + +-- The following is a version of Data.List.sortBy which together +-- with the sorting also eliminates duplicate values +sortNubBy cmp = mergeAll . sequences + where + sequences (a:b:xs) = + case cmp a b of + GT -> descending b [a] xs + EQ -> sequences (b:xs) + LT -> ascending b (a:) xs + sequences xs = [xs] + + descending a as [] = [a:as] + descending a as (b:bs) = + case cmp a b of + GT -> descending b (a:as) bs + EQ -> descending a as bs + LT -> (a:as) : sequences (b:bs) + + ascending a as [] = let !x = as [a] + in [x] + ascending a as (b:bs) = + case cmp a b of + GT -> let !x = as [a] + in x : sequences (b:bs) + EQ -> ascending a as bs + LT -> ascending b (\ys -> as (a:ys)) bs + + mergeAll [x] = x + mergeAll xs = mergeAll (mergePairs xs) + + mergePairs (a:b:xs) = let !x = merge a b + in x : mergePairs xs + mergePairs xs = xs + + merge as@(a:as') bs@(b:bs') = + case cmp a b of + GT -> b:merge as bs' + EQ -> a:merge as' bs' + LT -> a:merge as' bs + merge [] bs = bs + merge as [] = as + + +-- The following function does case-insensitive comparison of sequences. +-- This is used to allow case-insensitive parsing, while +-- the linearizer still has access to the original cases. +compareCaseInsensitve s1 s2 = + case compareSeq (elems s1) (elems s2) of + (EQ,c) -> c + (c, _) -> c + where + compareSeq [] [] = dup EQ + compareSeq [] _ = dup LT + compareSeq _ [] = dup GT + compareSeq (x:xs) (y:ys) = + case compareSym x y of + (EQ,EQ) -> compareSeq xs ys + (EQ,c2) -> case compareSeq xs ys of + (c1,_) -> (c1,c2) + x -> x + + compareSym s1 s2 = + case s1 of + SymCat d1 r1 + -> case s2 of + SymCat d2 r2 + -> case compare d1 d2 of + EQ -> dup (r1 `compare` r2) + x -> dup x + _ -> dup LT + SymLit d1 r1 + -> case s2 of + SymCat {} -> dup GT + SymLit d2 r2 + -> case compare d1 d2 of + EQ -> dup (r1 `compare` r2) + x -> dup x + _ -> dup LT + SymVar d1 r1 + -> if tagToEnum# (getTag s2 ># 2#) + then dup LT + else case s2 of + SymVar d2 r2 + -> case compare d1 d2 of + EQ -> dup (r1 `compare` r2) + x -> dup x + _ -> dup GT + SymKS t1 + -> if tagToEnum# (getTag s2 ># 3#) + then dup LT + else case s2 of + SymKS t2 -> t1 `compareToken` t2 + _ -> dup GT + SymKP a1 b1 + -> if tagToEnum# (getTag s2 ># 4#) + then dup LT + else case s2 of + SymKP a2 b2 + -> case compare a1 a2 of + EQ -> dup (b1 `compare` b2) + x -> dup x + _ -> dup GT + _ -> let t1 = getTag s1 + t2 = getTag s2 + in if tagToEnum# (t1 <# t2) + then dup LT + else if tagToEnum# (t1 ==# t2) + then dup EQ + else dup GT + + compareToken [] [] = dup EQ + compareToken [] _ = dup LT + compareToken _ [] = dup GT + compareToken (x:xs) (y:ys) + | x == y = compareToken xs ys + | otherwise = case compare (toLower x) (toLower y) of + EQ -> case compareToken xs ys of + (c,_) -> (c,compare x y) + c -> dup c + + dup x = (x,x) diff --git a/src/runtime/haskell/PGF/Optimize.hs b/src/runtime/haskell/PGF/Optimize.hs index d3fb9290e..451955647 100644 --- a/src/runtime/haskell/PGF/Optimize.hs +++ b/src/runtime/haskell/PGF/Optimize.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE BangPatterns, FlexibleContexts #-} module PGF.Optimize ( optimizePGF , updateProductionIndices @@ -44,9 +44,9 @@ topDownFilter startCat cnc = env2 (productions cnc) cats = Map.mapWithKey filterCatLabels (cnccats cnc) - (seqs,funs) = env3 - in cnc{ sequences = mkSetArray seqs - , cncfuns = mkSetArray funs + (seqs,funs) = reorderSeqs env3 + in cnc{ sequences = seqs + , cncfuns = funs , productions = prods , cnccats = cats , lindefs = defs @@ -171,7 +171,18 @@ topDownFilter startCat cnc = in CncCat start end lbls' Nothing -> error "unknown category" - mkSetArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] + reorderSeqs (seqs,funs) = (seqs',funs') + where + sorted = sortNubBy ciCmp (Map.toList seqs) + seqs' = mkArray (map fst sorted) + re = array (0,Map.size seqs-1) (zipWith (\(_,i) j -> (i,j)) sorted [0..]) :: Array LIndex LIndex + funs' = array (0,Map.size funs-1) [(v,CncFun fun (amap ((!) re) lins)) | (CncFun fun lins,v) <- Map.toList funs] + + ciCmp (s1,_) (s2,_) + | Map.lookup (mkCId "case_sensitive") (cflags cnc) == Just (LStr "on") + = compare s1 s2 + | otherwise = compareCaseInsensitve s1 s2 + mkArray lst = listArray (0,length lst-1) lst mapAccumLSet f b set = let (b',lst) = mapAccumL f b (Set.toList set) |
