summaryrefslogtreecommitdiff
path: root/src/compiler/GF/Grammar/Macros.hs
diff options
context:
space:
mode:
authorhallgren <hallgren@chalmers.se>2013-02-27 20:59:43 +0000
committerhallgren <hallgren@chalmers.se>2013-02-27 20:59:43 +0000
commit0feb386691bb82e13c3dcc01e27ae33d8865f2ca (patch)
treef117b97d05861b5dcf58a241c8c2f6547b32182e /src/compiler/GF/Grammar/Macros.hs
parent95c4cbb8f5ef10d04839e73a7f0dafe8536dab2d (diff)
Faster regular expression pattern matching in the grammar compiler.
The sequence operator (x+y) was implemented by splitting the string to be matched at all positions and trying to match the parts against the two subpatterns. To reduce the number of splits, we now estimate the minimum and maximum length of the string that the subpatterns could match. For common cases, where one of the subpatterns is a string of known length, like in (x+"y") or (x + ("a"|"o"|"u"|"e")+"y"), only one split will be tried.
Diffstat (limited to 'src/compiler/GF/Grammar/Macros.hs')
-rw-r--r--src/compiler/GF/Grammar/Macros.hs4
1 files changed, 4 insertions, 0 deletions
diff --git a/src/compiler/GF/Grammar/Macros.hs b/src/compiler/GF/Grammar/Macros.hs
index 97146b197..bd7de5db4 100644
--- a/src/compiler/GF/Grammar/Macros.hs
+++ b/src/compiler/GF/Grammar/Macros.hs
@@ -483,6 +483,8 @@ composOp co trm =
ImplArg t -> liftM ImplArg (co t)
_ -> return trm -- covers K, Vr, Cn, Sort, EPatt
+composSafePattOp op = runIdentity . composPattOp (return . op)
+
composPattOp :: Monad m => (Patt -> m Patt) -> Patt -> m Patt
composPattOp op patt =
case patt of
@@ -495,6 +497,7 @@ composPattOp op patt =
PNeg p -> liftM PNeg (op p)
PAlt p1 p2 -> liftM2 PAlt (op p1) (op p2)
PSeq p1 p2 -> liftM2 PSeq (op p1) (op p2)
+ PMSeq (_,p1) (_,p2) -> liftM2 PSeq (op p1) (op p2) -- information loss
PRep p -> liftM PRep (op p)
_ -> return patt -- covers cases without subpatterns
@@ -545,6 +548,7 @@ collectPattOp op patt =
PNeg p -> op p
PAlt p1 p2 -> op p1++op p2
PSeq p1 p2 -> op p1++op p2
+ PMSeq (_,p1) (_,p2) -> op p1++op p2
PRep p -> op p
_ -> [] -- covers cases without subpatterns