summaryrefslogtreecommitdiff
path: root/src/GF
diff options
context:
space:
mode:
authorkrasimir <krasimir@chalmers.se>2009-10-23 08:35:32 +0000
committerkrasimir <krasimir@chalmers.se>2009-10-23 08:35:32 +0000
commitd5f4669aec26c6a580a28b05a6005425ad663555 (patch)
treea0ea03cadac7cce788e3359ed48434837fcd0fcd /src/GF
parent70b5e2a93037603f9f5b20c46e9ad15a95a7c097 (diff)
experimental robust parser
Diffstat (limited to 'src/GF')
-rw-r--r--src/GF/Command/Commands.hs18
-rw-r--r--src/GF/Data/TrieMap.hs11
2 files changed, 26 insertions, 3 deletions
diff --git a/src/GF/Command/Commands.hs b/src/GF/Command/Commands.hs
index 1b12d82cc..d182b65ba 100644
--- a/src/GF/Command/Commands.hs
+++ b/src/GF/Command/Commands.hs
@@ -389,12 +389,17 @@ allCommands cod env@(pgf, mos) = Map.fromList [
"Shows all trees returned by parsing a string in the grammars in scope.",
"The -lang flag can be used to restrict this to fewer languages.",
"The default start category can be overridden by the -cat flag.",
- "See also the ps command for lexing and character encoding."
+ "See also the ps command for lexing and character encoding.",
+ "",
+ "The -openclass flag is experimental and allows some robustness in ",
+ "the parser. For example if -openclass=\"A,N,V\" is given, the parser",
+ "will accept unknown adjectives, nouns and verbs with the resource grammar."
],
exec = \opts -> returnFromExprs . concatMap (par opts) . toStrings,
flags = [
("cat","target category of parsing"),
- ("lang","the languages of parsing (comma-separated, no spaces)")
+ ("lang","the languages of parsing (comma-separated, no spaces)"),
+ ("openclass","list of open-class categories for robust parsing")
]
}),
("pg", emptyCommandInfo { -----
@@ -742,7 +747,9 @@ allCommands cod env@(pgf, mos) = Map.fromList [
]
where
enc = encodeUnicode cod
- par opts s = concat [parse pgf lang (optType opts) s | lang <- optLangs opts, canParse pgf lang]
+ par opts s = case optOpenTypes opts of
+ [] -> concat [parse pgf lang (optType opts) s | lang <- optLangs opts, canParse pgf lang]
+ open_typs -> concat [parseWithRecovery pgf lang (optType opts) open_typs s | lang <- optLangs opts, canParse pgf lang]
void = ([],[])
@@ -789,6 +796,11 @@ allCommands cod env@(pgf, mos) = Map.fromList [
"" -> languages pgf
lang -> map mkCId (chunks ',' lang)
optLang opts = head $ optLangs opts ++ [wildCId]
+
+ optOpenTypes opts = case valStrOpts "openclass" "" opts of
+ "" -> []
+ cats -> mapMaybe readType (chunks ',' cats)
+
optType opts =
let str = valStrOpts "cat" (showCId $ lookStartCat pgf) opts
in case readType str of
diff --git a/src/GF/Data/TrieMap.hs b/src/GF/Data/TrieMap.hs
index 37c56fc3a..a6749d641 100644
--- a/src/GF/Data/TrieMap.hs
+++ b/src/GF/Data/TrieMap.hs
@@ -12,6 +12,9 @@ module GF.Data.TrieMap
, insertWith
, unionWith
+ , unionsWith
+
+ , elems
) where
import Prelude hiding (lookup, null)
@@ -53,3 +56,11 @@ unionWith f (Tr mb_v1 m1) (Tr mb_v2 m2) =
(Just v1,Just v2) -> Just (f v1 v2)
m = Map.unionWith (unionWith f) m1 m2
in Tr mb_v m
+
+unionsWith :: Ord k => (v -> v -> v) -> [TrieMap k v] -> TrieMap k v
+unionsWith f = foldl (unionWith f) empty
+
+elems :: TrieMap k v -> [v]
+elems tr = collect tr []
+ where
+ collect (Tr mb_v m) xs = maybe id (:) mb_v (Map.fold collect xs m)