summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/compiler/GF/Compile/GrammarToPGF.hs133
-rw-r--r--src/runtime/haskell/PGF/Macros.hs127
-rw-r--r--src/runtime/haskell/PGF/Optimize.hs21
3 files changed, 146 insertions, 135 deletions
diff --git a/src/compiler/GF/Compile/GrammarToPGF.hs b/src/compiler/GF/Compile/GrammarToPGF.hs
index b83154e19..8c4d4558c 100644
--- a/src/compiler/GF/Compile/GrammarToPGF.hs
+++ b/src/compiler/GF/Compile/GrammarToPGF.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE BangPatterns, FlexibleContexts, MagicHash #-}
+{-# LANGUAGE BangPatterns, FlexibleContexts #-}
module GF.Compile.GrammarToPGF (mkCanon2pgf) where
--import GF.Compile.Export
@@ -8,16 +8,13 @@ import GF.Compile.GenerateBC
import PGF(CId,mkCId,utf8CId)
import PGF.Internal(fidInt,fidFloat,fidString,fidVar)
import PGF.Internal(updateProductionIndices)
---import qualified PGF.Macros as CM
import qualified PGF.Internal as C
import qualified PGF.Internal as D
import GF.Grammar.Predef
---import GF.Grammar.Printer
import GF.Grammar.Grammar
import qualified GF.Grammar.Lookup as Look
import qualified GF.Grammar as A
import qualified GF.Grammar.Macros as GM
---import GF.Compile.GeneratePMCFG
import GF.Infra.Ident
import GF.Infra.Option
@@ -30,9 +27,6 @@ import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import Data.Array.IArray
-import Data.Char
-import GHC.Prim
-import GHC.Base(getTag)
mkCanon2pgf :: Options -> SourceGrammar -> ModuleName -> IOE D.PGF
mkCanon2pgf opts gr am = do
@@ -65,7 +59,7 @@ mkCanon2pgf opts gr am = do
mkConcr cm = do
let cflags = err (const noOptions) mflags (lookupModule gr cm)
ciCmp | flag optCaseSensitive cflags = compare
- | otherwise = compareCaseInsensitve
+ | otherwise = C.compareCaseInsensitve
(ex_seqs,cdefs) <- addMissingPMCFGs
Map.empty
@@ -74,7 +68,7 @@ mkCanon2pgf opts gr am = do
let flags = Map.fromList [(mkCId f,x) | (f,x) <- optionsPGF cflags]
- seqs = (mkArray . sortNubBy ciCmp . concat) $
+ seqs = (mkArray . C.sortNubBy ciCmp . concat) $
(Map.keys ex_seqs : [maybe [] elems (mseqs mi) | (m,mi) <- allExtends gr cm])
ex_seqs_arr = mkMapArray ex_seqs :: Array SeqId Sequence
@@ -312,124 +306,3 @@ genPrintNames cdefs =
mkArray lst = listArray (0,length lst-1) lst
mkMapArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
-
--- The following is a version of Data.List.sortBy which together
--- with the sorting also eliminates duplicate values
-sortNubBy cmp = mergeAll . sequences
- where
- sequences (a:b:xs) =
- case cmp a b of
- GT -> descending b [a] xs
- EQ -> sequences (b:xs)
- LT -> ascending b (a:) xs
- sequences xs = [xs]
-
- descending a as [] = [a:as]
- descending a as (b:bs) =
- case cmp a b of
- GT -> descending b (a:as) bs
- EQ -> descending a as bs
- LT -> (a:as) : sequences (b:bs)
-
- ascending a as [] = let !x = as [a]
- in [x]
- ascending a as (b:bs) =
- case cmp a b of
- GT -> let !x = as [a]
- in x : sequences (b:bs)
- EQ -> ascending a as bs
- LT -> ascending b (\ys -> as (a:ys)) bs
-
- mergeAll [x] = x
- mergeAll xs = mergeAll (mergePairs xs)
-
- mergePairs (a:b:xs) = let !x = merge a b
- in x : mergePairs xs
- mergePairs xs = xs
-
- merge as@(a:as') bs@(b:bs') =
- case cmp a b of
- GT -> b:merge as bs'
- EQ -> a:merge as' bs'
- LT -> a:merge as' bs
- merge [] bs = bs
- merge as [] = as
-
--- The following function does case-insensitive comparison of sequences.
--- This is used to allow case-insensitive parsing, while
--- the linearizer still has access to the original cases.
-compareCaseInsensitve s1 s2 =
- case compareSeq (elems s1) (elems s2) of
- (EQ,c) -> c
- (c, _) -> c
- where
- compareSeq [] [] = dup EQ
- compareSeq [] _ = dup LT
- compareSeq _ [] = dup GT
- compareSeq (x:xs) (y:ys) =
- case compareSym x y of
- (EQ,EQ) -> compareSeq xs ys
- (EQ,c2) -> case compareSeq xs ys of
- (c1,_) -> (c1,c2)
- x -> x
-
- compareSym s1 s2 =
- case s1 of
- D.SymCat d1 r1
- -> case s2 of
- D.SymCat d2 r2
- -> case compare d1 d2 of
- EQ -> dup (r1 `compare` r2)
- x -> dup x
- _ -> dup LT
- D.SymLit d1 r1
- -> case s2 of
- D.SymCat {} -> dup GT
- D.SymLit d2 r2
- -> case compare d1 d2 of
- EQ -> dup (r1 `compare` r2)
- x -> dup x
- _ -> dup LT
- D.SymVar d1 r1
- -> if tagToEnum# (getTag s2 ># 2#)
- then dup LT
- else case s2 of
- D.SymVar d2 r2
- -> case compare d1 d2 of
- EQ -> dup (r1 `compare` r2)
- x -> dup x
- _ -> dup GT
- D.SymKS t1
- -> if tagToEnum# (getTag s2 ># 3#)
- then dup LT
- else case s2 of
- D.SymKS t2 -> t1 `compareToken` t2
- _ -> dup GT
- D.SymKP a1 b1
- -> if tagToEnum# (getTag s2 ># 4#)
- then dup LT
- else case s2 of
- D.SymKP a2 b2
- -> case compare a1 a2 of
- EQ -> dup (b1 `compare` b2)
- x -> dup x
- _ -> dup GT
- _ -> let t1 = getTag s1
- t2 = getTag s2
- in if tagToEnum# (t1 <# t2)
- then dup LT
- else if tagToEnum# (t1 ==# t2)
- then dup EQ
- else dup GT
-
- compareToken [] [] = dup EQ
- compareToken [] _ = dup LT
- compareToken _ [] = dup GT
- compareToken (x:xs) (y:ys)
- | x == y = compareToken xs ys
- | otherwise = case compare (toLower x) (toLower y) of
- EQ -> case compareToken xs ys of
- (c,_) -> (c,compare x y)
- c -> dup c
-
- dup x = (x,x)
diff --git a/src/runtime/haskell/PGF/Macros.hs b/src/runtime/haskell/PGF/Macros.hs
index b60c8a0d4..08052ce2f 100644
--- a/src/runtime/haskell/PGF/Macros.hs
+++ b/src/runtime/haskell/PGF/Macros.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE MagicHash, BangPatterns, FlexibleContexts #-}
module PGF.Macros where
import Prelude hiding ((<>)) -- GHC 8.4.1 clash with Text.PrettyPrint
@@ -13,6 +14,9 @@ import qualified Data.Array as Array
import Data.List
import Data.Array.IArray
import Text.PrettyPrint
+import GHC.Prim
+import GHC.Base(getTag)
+import Data.Char
-- operations for manipulating PGF grammars and objects
@@ -241,3 +245,126 @@ computeSeq filter seq args = concatMap compute seq
flattenBracketedString :: BracketedString -> [String]
flattenBracketedString (Leaf w) = [w]
flattenBracketedString (Bracket _ _ _ _ _ _ bss) = concatMap flattenBracketedString bss
+
+
+-- The following is a version of Data.List.sortBy which together
+-- with the sorting also eliminates duplicate values
+sortNubBy cmp = mergeAll . sequences
+ where
+ sequences (a:b:xs) =
+ case cmp a b of
+ GT -> descending b [a] xs
+ EQ -> sequences (b:xs)
+ LT -> ascending b (a:) xs
+ sequences xs = [xs]
+
+ descending a as [] = [a:as]
+ descending a as (b:bs) =
+ case cmp a b of
+ GT -> descending b (a:as) bs
+ EQ -> descending a as bs
+ LT -> (a:as) : sequences (b:bs)
+
+ ascending a as [] = let !x = as [a]
+ in [x]
+ ascending a as (b:bs) =
+ case cmp a b of
+ GT -> let !x = as [a]
+ in x : sequences (b:bs)
+ EQ -> ascending a as bs
+ LT -> ascending b (\ys -> as (a:ys)) bs
+
+ mergeAll [x] = x
+ mergeAll xs = mergeAll (mergePairs xs)
+
+ mergePairs (a:b:xs) = let !x = merge a b
+ in x : mergePairs xs
+ mergePairs xs = xs
+
+ merge as@(a:as') bs@(b:bs') =
+ case cmp a b of
+ GT -> b:merge as bs'
+ EQ -> a:merge as' bs'
+ LT -> a:merge as' bs
+ merge [] bs = bs
+ merge as [] = as
+
+
+-- The following function does case-insensitive comparison of sequences.
+-- This is used to allow case-insensitive parsing, while
+-- the linearizer still has access to the original cases.
+compareCaseInsensitve s1 s2 =
+ case compareSeq (elems s1) (elems s2) of
+ (EQ,c) -> c
+ (c, _) -> c
+ where
+ compareSeq [] [] = dup EQ
+ compareSeq [] _ = dup LT
+ compareSeq _ [] = dup GT
+ compareSeq (x:xs) (y:ys) =
+ case compareSym x y of
+ (EQ,EQ) -> compareSeq xs ys
+ (EQ,c2) -> case compareSeq xs ys of
+ (c1,_) -> (c1,c2)
+ x -> x
+
+ compareSym s1 s2 =
+ case s1 of
+ SymCat d1 r1
+ -> case s2 of
+ SymCat d2 r2
+ -> case compare d1 d2 of
+ EQ -> dup (r1 `compare` r2)
+ x -> dup x
+ _ -> dup LT
+ SymLit d1 r1
+ -> case s2 of
+ SymCat {} -> dup GT
+ SymLit d2 r2
+ -> case compare d1 d2 of
+ EQ -> dup (r1 `compare` r2)
+ x -> dup x
+ _ -> dup LT
+ SymVar d1 r1
+ -> if tagToEnum# (getTag s2 ># 2#)
+ then dup LT
+ else case s2 of
+ SymVar d2 r2
+ -> case compare d1 d2 of
+ EQ -> dup (r1 `compare` r2)
+ x -> dup x
+ _ -> dup GT
+ SymKS t1
+ -> if tagToEnum# (getTag s2 ># 3#)
+ then dup LT
+ else case s2 of
+ SymKS t2 -> t1 `compareToken` t2
+ _ -> dup GT
+ SymKP a1 b1
+ -> if tagToEnum# (getTag s2 ># 4#)
+ then dup LT
+ else case s2 of
+ SymKP a2 b2
+ -> case compare a1 a2 of
+ EQ -> dup (b1 `compare` b2)
+ x -> dup x
+ _ -> dup GT
+ _ -> let t1 = getTag s1
+ t2 = getTag s2
+ in if tagToEnum# (t1 <# t2)
+ then dup LT
+ else if tagToEnum# (t1 ==# t2)
+ then dup EQ
+ else dup GT
+
+ compareToken [] [] = dup EQ
+ compareToken [] _ = dup LT
+ compareToken _ [] = dup GT
+ compareToken (x:xs) (y:ys)
+ | x == y = compareToken xs ys
+ | otherwise = case compare (toLower x) (toLower y) of
+ EQ -> case compareToken xs ys of
+ (c,_) -> (c,compare x y)
+ c -> dup c
+
+ dup x = (x,x)
diff --git a/src/runtime/haskell/PGF/Optimize.hs b/src/runtime/haskell/PGF/Optimize.hs
index d3fb9290e..451955647 100644
--- a/src/runtime/haskell/PGF/Optimize.hs
+++ b/src/runtime/haskell/PGF/Optimize.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE BangPatterns, FlexibleContexts #-}
module PGF.Optimize
( optimizePGF
, updateProductionIndices
@@ -44,9 +44,9 @@ topDownFilter startCat cnc =
env2
(productions cnc)
cats = Map.mapWithKey filterCatLabels (cnccats cnc)
- (seqs,funs) = env3
- in cnc{ sequences = mkSetArray seqs
- , cncfuns = mkSetArray funs
+ (seqs,funs) = reorderSeqs env3
+ in cnc{ sequences = seqs
+ , cncfuns = funs
, productions = prods
, cnccats = cats
, lindefs = defs
@@ -171,7 +171,18 @@ topDownFilter startCat cnc =
in CncCat start end lbls'
Nothing -> error "unknown category"
- mkSetArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
+ reorderSeqs (seqs,funs) = (seqs',funs')
+ where
+ sorted = sortNubBy ciCmp (Map.toList seqs)
+ seqs' = mkArray (map fst sorted)
+ re = array (0,Map.size seqs-1) (zipWith (\(_,i) j -> (i,j)) sorted [0..]) :: Array LIndex LIndex
+ funs' = array (0,Map.size funs-1) [(v,CncFun fun (amap ((!) re) lins)) | (CncFun fun lins,v) <- Map.toList funs]
+
+ ciCmp (s1,_) (s2,_)
+ | Map.lookup (mkCId "case_sensitive") (cflags cnc) == Just (LStr "on")
+ = compare s1 s2
+ | otherwise = compareCaseInsensitve s1 s2
+
mkArray lst = listArray (0,length lst-1) lst
mapAccumLSet f b set = let (b',lst) = mapAccumL f b (Set.toList set)