summaryrefslogtreecommitdiff
path: root/src/GF/UseGrammar
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2006-06-20 08:38:44 +0000
committeraarne <aarne@cs.chalmers.se>2006-06-20 08:38:44 +0000
commit402a113b567a96eef61946552b68df6ac6eb6712 (patch)
tree6446bbaf195800c7e0ec71d30cba113c9bcf1f74 /src/GF/UseGrammar
parentcb168e92e23d0f620b75f6119a4cb298360e6f21 (diff)
made -fcfg default parser; added lexer textvars
Diffstat (limited to 'src/GF/UseGrammar')
-rw-r--r--src/GF/UseGrammar/Custom.hs1
-rw-r--r--src/GF/UseGrammar/Parsing.hs43
-rw-r--r--src/GF/UseGrammar/Tokenize.hs12
3 files changed, 31 insertions, 25 deletions
diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs
index 4314747bf..07eda0a37 100644
--- a/src/GF/UseGrammar/Custom.hs
+++ b/src/GF/UseGrammar/Custom.hs
@@ -450,6 +450,7 @@ customTokenizer =
,(strCI "chars", const $ sg . map (tS . singleton))
,(strCI "code", const $ sg . lexHaskell)
,(strCI "codevars", \gr -> sg . (lexHaskellVar $ stateIsWord gr))
+ ,(strCI "textvars", \gr -> sg . (lexTextVar $ stateIsWord gr))
,(strCI "text", const $ sg . lexText)
,(strCI "unglue", \gr -> sg . map tS . decomposeWords (stateMorpho gr))
,(strCI "codelit", \gr -> sg . (lexHaskellLiteral $ stateIsWord gr))
diff --git a/src/GF/UseGrammar/Parsing.hs b/src/GF/UseGrammar/Parsing.hs
index 6e8965f08..e979579c9 100644
--- a/src/GF/UseGrammar/Parsing.hs
+++ b/src/GF/UseGrammar/Parsing.hs
@@ -54,25 +54,30 @@ parseStringMsg os sg cat s = do
return (ts, unlines $ reverse ss)
parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree]
+parseStringC opts0 sg cat s
+ | oElem (iOpt "old") opts0 ||
+ (not (oElem (iOpt "fcfg") opts0) && stateHasHOAS sg) = do
+ let opts = unionOptions opts0 $ stateOptions sg
+ cf = stateCF sg
+ gr = stateGrammarST sg
+ cn = cncId sg
+ toks = customOrDefault opts useTokenizer customTokenizer sg s
+ parser = customOrDefault opts useParser customParser sg cat
+ if oElem (iOpt "cut") opts
+ then doUntil (not . null) $ map (tokens2trms opts sg cn parser) toks
+ else mapM (tokens2trms opts sg cn parser) toks >>= return . concat
+
+---- | or [oElem p opts0 |
+---- p <- [newCParser,newMParser,newFParser,newParser,newerParser] = do
----- to test peb's new parser 6/10/2003
----- (obsoleted by "newer" below)
--- parseStringC opts0 sg cat s
--- | oElem newParser opts0 = do
--- let pm = maybe "" id $ getOptVal opts0 useParser -- -parser=pm
--- ct = cfCat2Cat cat
--- ts <- checkErr $ NewOld.newParser pm sg ct s
--- mapM (checkErr . annotate (stateGrammarST sg) . refreshMetas []) ts
-
--- to use peb's newer parser 7/4-05
-parseStringC opts0 sg cat s
- | oElem newCParser opts0 || oElem newMParser opts0 || oElem newFParser opts0 || oElem newParser opts0 || oElem newerParser opts0 = do
+ | otherwise = do
let opts = unionOptions opts0 $ stateOptions sg
algorithm | oElem newCParser opts0 = "c"
| oElem newMParser opts0 = "m"
| oElem newFParser opts0 = "f"
- | otherwise = "c" -- default algorithm
- strategy = maybe "bottomup" id $ getOptVal opts useParser -- -parser=bottomup/topdown
+ | otherwise = "f" -- default algorithm: FCFG
+ strategy = maybe "bottomup" id $ getOptVal opts useParser
+ -- -parser=bottomup/topdown
tokenizer = customOrDefault opts useTokenizer customTokenizer sg
toks = case tokenizer s of
t:_ -> t
@@ -81,16 +86,6 @@ parseStringC opts0 sg cat s
ts' <- mapM (checkErr . annotate (stateGrammarST sg) . refreshMetas []) ts
return $ optIntOrAll opts flagNumber ts'
-parseStringC opts0 sg cat s = do
- let opts = unionOptions opts0 $ stateOptions sg
- cf = stateCF sg
- gr = stateGrammarST sg
- cn = cncId sg
- toks = customOrDefault opts useTokenizer customTokenizer sg s
- parser = customOrDefault opts useParser customParser sg cat
- if oElem (iOpt "cut") opts
- then doUntil (not . null) $ map (tokens2trms opts sg cn parser) toks
- else mapM (tokens2trms opts sg cn parser) toks >>= return . concat
tokens2trms :: Options ->StateGrammar ->Ident -> CFParser -> [CFTok] -> Check [Tree]
tokens2trms opts sg cn parser toks = trees2trms opts sg cn toks trees info
diff --git a/src/GF/UseGrammar/Tokenize.hs b/src/GF/UseGrammar/Tokenize.hs
index d16fdf32f..9f1ab5449 100644
--- a/src/GF/UseGrammar/Tokenize.hs
+++ b/src/GF/UseGrammar/Tokenize.hs
@@ -20,6 +20,7 @@ module GF.UseGrammar.Tokenize ( tokWords,
lexHaskellLiteral,
lexHaskellVar,
lexText,
+ lexTextVar,
lexC2M, lexC2M',
lexTextLiteral,
lexIgnore,
@@ -58,6 +59,10 @@ isFloat s = case s of
'.':cs@(_:_) -> all isDigit cs
_ -> False
+isString s = case s of
+ c:cs@(_:_) -> (c == '\'' && d == '\'') || (c == '"' && d == '"') where d = last cs
+ _ -> False
+
mkCFTok :: String -> CFTok
mkCFTok s = case s of
@@ -86,6 +91,7 @@ mkLit s
| all isDigit s = tI s
| otherwise = tL s
+-- obsolete
mkTL :: String -> CFTok
mkTL s
| isFloat s = tF s
@@ -104,6 +110,7 @@ lexText :: String -> [CFTok]
lexText = uncap . lx where
lx s = case s of
+ '?':'?':cs -> tS "??" : lx cs
p : cs | isMPunct p -> tS [p] : uncap (lx cs)
p : cs | isPunct p -> tS [p] : lx cs
s : cs | isSpace s -> lx cs
@@ -177,7 +184,7 @@ unknown2string isKnown = map mkOne where
| isFloat s = tF s
| all isDigit s = tI s
| otherwise = tL s
- mkOne t@(TC s) = if isKnown s then t else mkTL s
+ mkOne t@(TC s) = if isKnown s then t else mkLit s
mkOne t = t
unknown2var :: (String -> Bool) -> [CFTok] -> [CFTok]
@@ -186,6 +193,7 @@ unknown2var isKnown = map mkOne where
mkOne t@(TS s)
| isKnown s = t
| isFloat s = tF s
+ | isString s = tL (init (tail s))
| all isDigit s = tI s
| otherwise = tV s
mkOne t@(TC s) = if isKnown s then t else tV s
@@ -197,6 +205,8 @@ lexTextLiteral isKnown = unknown2string (eitherUpper isKnown) . lexText
lexHaskellLiteral isKnown = unknown2string isKnown . lexHaskell
lexHaskellVar isKnown = unknown2var isKnown . lexHaskell
+lexTextVar isKnown = unknown2var (eitherUpper isKnown) . lexText
+
eitherUpper isKnown w@(c:cs) = isKnown (toLower c : cs) || isKnown (toUpper c : cs)
eitherUpper isKnown w = isKnown w