summaryrefslogtreecommitdiff
path: root/src/compiler/GF/Grammar/Grammar.hs
diff options
context:
space:
mode:
authorhallgren <hallgren@chalmers.se>2013-02-27 20:59:43 +0000
committerhallgren <hallgren@chalmers.se>2013-02-27 20:59:43 +0000
commit0feb386691bb82e13c3dcc01e27ae33d8865f2ca (patch)
treef117b97d05861b5dcf58a241c8c2f6547b32182e /src/compiler/GF/Grammar/Grammar.hs
parent95c4cbb8f5ef10d04839e73a7f0dafe8536dab2d (diff)
Faster regular expression pattern matching in the grammar compiler.
The sequence operator (x+y) was implemented by splitting the string to be matched at all positions and trying to match the parts against the two subpatterns. To reduce the number of splits, we now estimate the minimum and maximum length of the string that the subpatterns could match. For common cases, where one of the subpatterns is a string of known length, like in (x+"y") or (x + ("a"|"o"|"u"|"e")+"y"), only one split will be tried.
Diffstat (limited to 'src/compiler/GF/Grammar/Grammar.hs')
-rw-r--r--src/compiler/GF/Grammar/Grammar.hs6
1 files changed, 5 insertions, 1 deletions
diff --git a/src/compiler/GF/Grammar/Grammar.hs b/src/compiler/GF/Grammar/Grammar.hs
index 218a2bd0b..c59cd809e 100644
--- a/src/compiler/GF/Grammar/Grammar.hs
+++ b/src/compiler/GF/Grammar/Grammar.hs
@@ -430,6 +430,7 @@ data Term =
| Error String -- ^ error values returned by Predef.error
deriving (Show, Eq, Ord)
+-- | Patterns
data Patt =
PC Ident [Patt] -- ^ constructor pattern: @C p1 ... pn@ @C@
| PP QIdent [Patt] -- ^ package constructor pattern: @P.C p1 ... pn@ @P.C@
@@ -450,14 +451,17 @@ data Patt =
| PNeg Patt -- ^ negated pattern: -p
| PAlt Patt Patt -- ^ disjunctive pattern: p1 | p2
| PSeq Patt Patt -- ^ sequence of token parts: p + q
+ | PMSeq MPatt MPatt -- ^ sequence of token parts: p + q
| PRep Patt -- ^ repetition of token part: p*
| PChar -- ^ string of length one: ?
| PChars [Char] -- ^ character list: ["aeiou"]
| PMacro Ident -- #p
| PM QIdent -- #m.p
-
deriving (Show, Eq, Ord)
+-- | Measured pattern (paired with the min & max matching length)
+type MPatt = ((Int,Int),Patt)
+
-- | to guide computation and type checking of tables
data TInfo =
TRaw -- ^ received from parser; can be anything