summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbringert <bringert@cs.chalmers.se>2007-01-07 20:04:39 +0000
committerbringert <bringert@cs.chalmers.se>2007-01-07 20:04:39 +0000
commit090bb304666457e8c13aadbd45924a7f80459ae7 (patch)
tree5cb5c97c5a7b759b4c61812074196604053fafe1
parent7c80aca735458c56e7d66375aa33baa8d58b11d9 (diff)
Generate more compact JSGF by converting to ENBF.
-rw-r--r--src/GF/Speech/PrJSGF.hs41
-rw-r--r--src/GF/Speech/RegExp.hs26
-rw-r--r--src/GF/Speech/SRG.hs36
-rw-r--r--src/GF/Speech/TransformCFG.hs2
4 files changed, 59 insertions, 46 deletions
diff --git a/src/GF/Speech/PrJSGF.hs b/src/GF/Speech/PrJSGF.hs
index d1d904dbb..9d6dca598 100644
--- a/src/GF/Speech/PrJSGF.hs
+++ b/src/GF/Speech/PrJSGF.hs
@@ -55,11 +55,11 @@ prJSGF sisr srg@(SRG{grammarName=name,startCat=start,origStartCat=origStart,rule
rule True "MAIN" [prCat start]
prRule (SRGRule cat origCat rhs) =
comment origCat $$
--- rule False cat (map prAlt (ebnfSRGAlts rhs))
- rule False cat (map prAlt rhs)
+ rule False cat (map prAlt (ebnfSRGAlts rhs))
+-- rule False cat (map prAlt rhs)
-- FIXME: use the probability
--- prAlt (EBnfSRGAlt mp n rhs) = tag sisr (profileInitSISR n) . showChar ' '. prItem sisr rhs
- prAlt (SRGAlt mp n rhs) = initTag <+> prItem sisr n rhs <+> finalTag
+ prAlt (EBnfSRGAlt mp n rhs) = sep [initTag, prItem sisr n rhs, finalTag]
+-- prAlt (SRGAlt mp n rhs) = initTag <+> prItem sisr n rhs <+> finalTag
where initTag | isEmpty t = empty
| otherwise = text "<NULL>" <+> t
where t = tag sisr (profileInitSISR n)
@@ -74,25 +74,25 @@ catFormId = (++ "_cat")
prCat :: SRGCat -> Doc
prCat c = char '<' <> text c <> char '>'
-{-
-prItem :: Maybe SISRFormat -> EBnfSRGItem -> ShowS
-prItem sisr = f 1
+prItem :: Maybe SISRFormat -> CFTerm -> EBnfSRGItem -> Doc
+prItem sisr t = f 1
where
- f _ (REUnion []) = showString "<VOID>"
+ f _ (REUnion []) = text "<VOID>"
f p (REUnion xs)
- | not (null es) = wrap "[" (f 0 (REUnion nes)) "]"
- | otherwise = (if p >= 1 then paren else id) (joinS " | " (map (f 1) xs))
- where (es,nes) = partition (== REConcat []) xs
- f _ (REConcat []) = showString "<NULL>"
- f p (REConcat xs) = (if p >= 3 then paren else id) (unwordsS (map (f 2) xs))
- f p (RERepeat x) = f 3 x . showString "*"
- f _ (RESymbol s) = prSymbol sisr s
--}
+ | not (null es) = brackets (f 0 (REUnion nes))
+ | otherwise = (if p >= 1 then parens else id) (alts (map (f 1) xs))
+ where (es,nes) = partition isEpsilon xs
+ f _ (REConcat []) = text "<NULL>"
+ f p (REConcat xs) = (if p >= 3 then parens else id) (hsep (map (f 2) xs))
+ f p (RERepeat x) = f 3 x <> char '*'
+ f _ (RESymbol s) = prSymbol sisr t s
+{-
prItem :: Maybe SISRFormat -> CFTerm -> [Symbol SRGNT Token] -> Doc
prItem _ _ [] = text "<NULL>"
prItem sisr cn ss = paren $ hsep $ map (prSymbol sisr cn) ss
where paren = if length ss == 1 then id else parens
+-}
prSymbol :: Maybe SISRFormat -> CFTerm -> Symbol SRGNT Token -> Doc
prSymbol sisr cn (Cat n@(c,_)) = prCat c <+> tag sisr (catSISR cn n)
@@ -103,7 +103,7 @@ tag :: Maybe SISRFormat -> (SISRFormat -> SISRTag) -> Doc
tag Nothing _ = empty
tag (Just fmt) t = case t fmt of
[] -> empty
- ts -> char '{' <+> text (e $ prSISR ts) <+> char '}'
+ ts -> char '{' <+> (text (e $ prSISR ts)) <+> char '}'
where e [] = []
e ('}':xs) = '\\':'}':e xs
e ('\n':xs) = ' ' : e (dropWhile isSpace xs)
@@ -115,11 +115,11 @@ isPunct c = c `elem` "-_.;.,?!"
comment :: String -> Doc
comment s = text "//" <+> text s
-
+alts :: [Doc] -> Doc
+alts = sep . prepunctuate (text "| ")
rule :: Bool -> SRGCat -> [Doc] -> Doc
-rule pub c xs = p <+> prCat c <+> char '='
- $$ nest 2 (sep (prepunctuate (text "| ") xs) <+> char ';')
+rule pub c xs = sep [p <+> prCat c <+> char '=', nest 2 (alts xs) <+> char ';']
where p = if pub then text "public" else empty
-- Pretty-printing utilities
@@ -133,3 +133,4 @@ prepunctuate p (x:xs) = x : map (p <>) xs
($++$) :: Doc -> Doc -> Doc
x $++$ y = x $$ emptyLine $$ y
+
diff --git a/src/GF/Speech/RegExp.hs b/src/GF/Speech/RegExp.hs
index 2f4c7cd48..1eb6efa4d 100644
--- a/src/GF/Speech/RegExp.hs
+++ b/src/GF/Speech/RegExp.hs
@@ -1,4 +1,9 @@
-module GF.Speech.RegExp (RE(..), dfa2re, prRE) where
+module GF.Speech.RegExp (RE(..),
+ epsilonRE, nullRE,
+ isEpsilon, isNull,
+ unionRE, concatRE, seqRE,
+ repeatRE,
+ dfa2re, prRE) where
import Data.List
@@ -10,17 +15,17 @@ data RE a =
| REConcat [RE a] -- ^ REConcat [] is epsilon
| RERepeat (RE a)
| RESymbol a
- deriving (Eq,Show)
+ deriving (Eq,Ord,Show)
-dfa2re :: Show a => DFA a -> RE a
+dfa2re :: (Show a,Ord a) => DFA a -> RE a
dfa2re = finalRE . elimStates . modifyTransitions merge . addLoops
. oneFinalState () epsilonRE . mapTransitions RESymbol
where addLoops fa = newTransitions [(s,s,nullRE) | (s,_) <- states fa] fa
merge es = [(f,t,unionRE ls)
| ((f,t),ls) <- buildMultiMap [((f,t),l) | (f,t,l) <- es]]
-elimStates :: Show a => DFA (RE a) -> DFA (RE a)
+elimStates :: (Show a, Ord a) => DFA (RE a) -> DFA (RE a)
elimStates fa =
case [s | (s,_) <- states fa, isInternal fa s] of
[] -> fa
@@ -31,18 +36,22 @@ elimStates fa =
ts = [(sA, sB, r r1 r3) | (sA,r1) <- sAs, (sB,r3) <- sBs]
r r1 r3 = concatRE [r1, repeatRE r2, r3]
+epsilonRE :: RE a
epsilonRE = REConcat []
+nullRE :: RE a
nullRE = REUnion []
+isNull :: RE a -> Bool
isNull (REUnion []) = True
isNull _ = False
+isEpsilon :: RE a -> Bool
isEpsilon (REConcat []) = True
isEpsilon _ = False
-unionRE :: [RE a] -> RE a
-unionRE = unionOrId . concatMap toList
+unionRE :: Ord a => [RE a] -> RE a
+unionRE = unionOrId . sortNub . concatMap toList
where
toList (REUnion xs) = xs
toList x = [x]
@@ -58,11 +67,14 @@ concatRE xs | any isNull xs = nullRE
toList (REConcat xs) = xs
toList x = [x]
+seqRE :: [a] -> RE a
+seqRE = concatRE . map RESymbol
+
repeatRE :: RE a -> RE a
repeatRE x | isNull x || isEpsilon x = epsilonRE
| otherwise = RERepeat x
-finalRE :: DFA (RE a) -> RE a
+finalRE :: Ord a => DFA (RE a) -> RE a
finalRE fa = concatRE [repeatRE r1, r2,
repeatRE (unionRE [r3, concatRE [r4, repeatRE r1, r2]])]
where
diff --git a/src/GF/Speech/SRG.hs b/src/GF/Speech/SRG.hs
index cf74ba66e..cc03cdca6 100644
--- a/src/GF/Speech/SRG.hs
+++ b/src/GF/Speech/SRG.hs
@@ -23,8 +23,8 @@ module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..),
makeSimpleSRG, makeSRG
, lookupFM_, prtS
, topDownFilter, cfgCatToGFCat, srgTopCats
- --, EBnfSRGAlt(..), EBnfSRGItem
- --, ebnfSRGAlts
+ , EBnfSRGAlt(..), EBnfSRGItem
+ , ebnfSRGAlts
) where
import GF.Data.Operations
@@ -33,7 +33,7 @@ import GF.Infra.Ident
import GF.Formalism.CFG
import GF.Formalism.Utilities (Symbol(..), NameProfile(..)
, Profile(..), SyntaxForest
- , filterCats, mapSymbol)
+ , filterCats, mapSymbol, symbol)
import GF.Conversion.Types
import GF.Infra.Print
import GF.Speech.TransformCFG
@@ -177,8 +177,7 @@ srgTopCats srg = buildMultiMap [(oc, cat) | SRGRule cat origCat _ <- rules srg,
-- * Size-optimized EBNF SRGs
--
-{-
-data EBnfSRGAlt = EBnfSRGAlt (Maybe Double) Name EBnfSRGItem
+data EBnfSRGAlt = EBnfSRGAlt (Maybe Double) CFTerm EBnfSRGItem
deriving (Eq,Show)
type EBnfSRGItem = RE (Symbol SRGNT Token)
@@ -186,21 +185,22 @@ type EBnfSRGItem = RE (Symbol SRGNT Token)
ebnfSRGAlts :: [SRGAlt] -> [EBnfSRGAlt]
ebnfSRGAlts alts = [EBnfSRGAlt p n (ebnfSRGItem sss)
- | ((p,n),sss) <- buildMultiMap [((p,n),ss) | SRGAlt p n ss <- alts]]
+ | ((n,p),sss) <- buildMultiMap [((n,p),ss) | SRGAlt p n ss <- alts]]
ebnfSRGItem :: [[Symbol SRGNT Token]] -> EBnfSRGItem
-ebnfSRGItem = dfa2re . mkSRGFA
-
-mkSRGFA :: [[Symbol SRGNT Token]] -> DFA (Symbol SRGNT Token)
-mkSRGFA = {- minimize . dfa2nfa . -} foldr addString (newFA ())
-
-addString :: [a] -> DFA a -> DFA a
-addString xs fa = addFinalState (last sts0) $ newTransitions ts fa'
- where (fa',ss) = newStates (replicate (length xs) ()) fa
- sts0 = startState fa : sts1
- sts1 = map fst ss
- ts = zip3 sts0 sts1 xs
--}
+ebnfSRGItem = unionRE . map mergeItems . sortGroupBy (compareBy filterCats)
+
+-- ^ Merges a list of right-hand sides which all have the same
+-- sequence of non-terminals.
+mergeItems :: [[Symbol SRGNT Token]] -> EBnfSRGItem
+--mergeItems = unionRE . map seqRE
+mergeItems [] = nullRE
+mergeItems sss | any null rss = t
+ | otherwise = concatRE [t,seqRE (head cs), mergeItems nss]
+ where (tss,rss) = unzip $ map (span isToken) sss
+ t = unionRE (map seqRE tss)
+ (cs,nss) = unzip $ map (splitAt 1) rss
+ isToken = symbol (const False) (const True)
--
-- * Utilities for building and printing SRGs
diff --git a/src/GF/Speech/TransformCFG.hs b/src/GF/Speech/TransformCFG.hs
index ed1730a3d..bb6f16243 100644
--- a/src/GF/Speech/TransformCFG.hs
+++ b/src/GF/Speech/TransformCFG.hs
@@ -60,7 +60,7 @@ data CFTerm
| CFVar Int
| CFConst String
| CFMeta String
- deriving (Eq,Show)
+ deriving (Eq,Ord,Show)
type Cat_ = String
type CFSymbol_ = Symbol Cat_ Token