diff options
| author | aarne <aarne@cs.chalmers.se> | 2006-06-20 08:38:44 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2006-06-20 08:38:44 +0000 |
| commit | 402a113b567a96eef61946552b68df6ac6eb6712 (patch) | |
| tree | 6446bbaf195800c7e0ec71d30cba113c9bcf1f74 /src/GF/UseGrammar | |
| parent | cb168e92e23d0f620b75f6119a4cb298360e6f21 (diff) | |
made -fcfg default parser; added lexer textvars
Diffstat (limited to 'src/GF/UseGrammar')
| -rw-r--r-- | src/GF/UseGrammar/Custom.hs | 1 | ||||
| -rw-r--r-- | src/GF/UseGrammar/Parsing.hs | 43 | ||||
| -rw-r--r-- | src/GF/UseGrammar/Tokenize.hs | 12 |
3 files changed, 31 insertions, 25 deletions
diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs index 4314747bf..07eda0a37 100644 --- a/src/GF/UseGrammar/Custom.hs +++ b/src/GF/UseGrammar/Custom.hs @@ -450,6 +450,7 @@ customTokenizer = ,(strCI "chars", const $ sg . map (tS . singleton)) ,(strCI "code", const $ sg . lexHaskell) ,(strCI "codevars", \gr -> sg . (lexHaskellVar $ stateIsWord gr)) + ,(strCI "textvars", \gr -> sg . (lexTextVar $ stateIsWord gr)) ,(strCI "text", const $ sg . lexText) ,(strCI "unglue", \gr -> sg . map tS . decomposeWords (stateMorpho gr)) ,(strCI "codelit", \gr -> sg . (lexHaskellLiteral $ stateIsWord gr)) diff --git a/src/GF/UseGrammar/Parsing.hs b/src/GF/UseGrammar/Parsing.hs index 6e8965f08..e979579c9 100644 --- a/src/GF/UseGrammar/Parsing.hs +++ b/src/GF/UseGrammar/Parsing.hs @@ -54,25 +54,30 @@ parseStringMsg os sg cat s = do return (ts, unlines $ reverse ss) parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree] +parseStringC opts0 sg cat s + | oElem (iOpt "old") opts0 || + (not (oElem (iOpt "fcfg") opts0) && stateHasHOAS sg) = do + let opts = unionOptions opts0 $ stateOptions sg + cf = stateCF sg + gr = stateGrammarST sg + cn = cncId sg + toks = customOrDefault opts useTokenizer customTokenizer sg s + parser = customOrDefault opts useParser customParser sg cat + if oElem (iOpt "cut") opts + then doUntil (not . null) $ map (tokens2trms opts sg cn parser) toks + else mapM (tokens2trms opts sg cn parser) toks >>= return . concat + +---- | or [oElem p opts0 | +---- p <- [newCParser,newMParser,newFParser,newParser,newerParser] = do ----- to test peb's new parser 6/10/2003 ----- (obsoleted by "newer" below) --- parseStringC opts0 sg cat s --- | oElem newParser opts0 = do --- let pm = maybe "" id $ getOptVal opts0 useParser -- -parser=pm --- ct = cfCat2Cat cat --- ts <- checkErr $ NewOld.newParser pm sg ct s --- mapM (checkErr . annotate (stateGrammarST sg) . refreshMetas []) ts - --- to use peb's newer parser 7/4-05 -parseStringC opts0 sg cat s - | oElem newCParser opts0 || oElem newMParser opts0 || oElem newFParser opts0 || oElem newParser opts0 || oElem newerParser opts0 = do + | otherwise = do let opts = unionOptions opts0 $ stateOptions sg algorithm | oElem newCParser opts0 = "c" | oElem newMParser opts0 = "m" | oElem newFParser opts0 = "f" - | otherwise = "c" -- default algorithm - strategy = maybe "bottomup" id $ getOptVal opts useParser -- -parser=bottomup/topdown + | otherwise = "f" -- default algorithm: FCFG + strategy = maybe "bottomup" id $ getOptVal opts useParser + -- -parser=bottomup/topdown tokenizer = customOrDefault opts useTokenizer customTokenizer sg toks = case tokenizer s of t:_ -> t @@ -81,16 +86,6 @@ parseStringC opts0 sg cat s ts' <- mapM (checkErr . annotate (stateGrammarST sg) . refreshMetas []) ts return $ optIntOrAll opts flagNumber ts' -parseStringC opts0 sg cat s = do - let opts = unionOptions opts0 $ stateOptions sg - cf = stateCF sg - gr = stateGrammarST sg - cn = cncId sg - toks = customOrDefault opts useTokenizer customTokenizer sg s - parser = customOrDefault opts useParser customParser sg cat - if oElem (iOpt "cut") opts - then doUntil (not . null) $ map (tokens2trms opts sg cn parser) toks - else mapM (tokens2trms opts sg cn parser) toks >>= return . concat tokens2trms :: Options ->StateGrammar ->Ident -> CFParser -> [CFTok] -> Check [Tree] tokens2trms opts sg cn parser toks = trees2trms opts sg cn toks trees info diff --git a/src/GF/UseGrammar/Tokenize.hs b/src/GF/UseGrammar/Tokenize.hs index d16fdf32f..9f1ab5449 100644 --- a/src/GF/UseGrammar/Tokenize.hs +++ b/src/GF/UseGrammar/Tokenize.hs @@ -20,6 +20,7 @@ module GF.UseGrammar.Tokenize ( tokWords, lexHaskellLiteral, lexHaskellVar, lexText, + lexTextVar, lexC2M, lexC2M', lexTextLiteral, lexIgnore, @@ -58,6 +59,10 @@ isFloat s = case s of '.':cs@(_:_) -> all isDigit cs _ -> False +isString s = case s of + c:cs@(_:_) -> (c == '\'' && d == '\'') || (c == '"' && d == '"') where d = last cs + _ -> False + mkCFTok :: String -> CFTok mkCFTok s = case s of @@ -86,6 +91,7 @@ mkLit s | all isDigit s = tI s | otherwise = tL s +-- obsolete mkTL :: String -> CFTok mkTL s | isFloat s = tF s @@ -104,6 +110,7 @@ lexText :: String -> [CFTok] lexText = uncap . lx where lx s = case s of + '?':'?':cs -> tS "??" : lx cs p : cs | isMPunct p -> tS [p] : uncap (lx cs) p : cs | isPunct p -> tS [p] : lx cs s : cs | isSpace s -> lx cs @@ -177,7 +184,7 @@ unknown2string isKnown = map mkOne where | isFloat s = tF s | all isDigit s = tI s | otherwise = tL s - mkOne t@(TC s) = if isKnown s then t else mkTL s + mkOne t@(TC s) = if isKnown s then t else mkLit s mkOne t = t unknown2var :: (String -> Bool) -> [CFTok] -> [CFTok] @@ -186,6 +193,7 @@ unknown2var isKnown = map mkOne where mkOne t@(TS s) | isKnown s = t | isFloat s = tF s + | isString s = tL (init (tail s)) | all isDigit s = tI s | otherwise = tV s mkOne t@(TC s) = if isKnown s then t else tV s @@ -197,6 +205,8 @@ lexTextLiteral isKnown = unknown2string (eitherUpper isKnown) . lexText lexHaskellLiteral isKnown = unknown2string isKnown . lexHaskell lexHaskellVar isKnown = unknown2var isKnown . lexHaskell +lexTextVar isKnown = unknown2var (eitherUpper isKnown) . lexText + eitherUpper isKnown w@(c:cs) = isKnown (toLower c : cs) || isKnown (toUpper c : cs) eitherUpper isKnown w = isKnown w |
