summaryrefslogtreecommitdiff
path: root/src/compiler/GF/Command
diff options
context:
space:
mode:
authorgdetrez <gdetrez@crans.org>2011-02-10 15:00:06 +0000
committergdetrez <gdetrez@crans.org>2011-02-10 15:00:06 +0000
commit45ecae4b774aee96dcc3e9f2c5f82307982faa08 (patch)
tree21b7ce41a617b24e8b25b92e912789049e7b2def /src/compiler/GF/Command
parentd7ae73f1c7c5b95f1f08dadd314fa7143602b523 (diff)
Adding a basic lexicon-based tokenizer and the asociated command in gf shell
Diffstat (limited to 'src/compiler/GF/Command')
-rw-r--r--src/compiler/GF/Command/Commands.hs30
1 files changed, 30 insertions, 0 deletions
diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs
index 1290666cb..00d8e427a 100644
--- a/src/compiler/GF/Command/Commands.hs
+++ b/src/compiler/GF/Command/Commands.hs
@@ -964,6 +964,13 @@ allCommands env@(pgf, mos) = Map.fromList [
],
flags = [("file","the output filename")]
}),
+ ("t", emptyCommandInfo {
+ longname = "tokenize",
+ synopsis = "Tokenize string usng the vocabulary",
+ exec = execToktok env,
+ options = [],
+ flags = [("lang","The name of the concrete to use")]
+ }),
("ai", emptyCommandInfo {
longname = "abstract_info",
syntax = "ai IDENTIFIER or ai EXPR",
@@ -1251,3 +1258,26 @@ prMorphoAnalysis (w,lps) =
unlines (w:[showCId l ++ " : " ++ p | (l,p) <- lps])
+-- This function is to be excuted when the command 'tok' is parsed
+execToktok :: PGFEnv -> [Option] -> [Expr] -> IO CommandOutput
+execToktok (pgf, _) opts exprs = do
+ let tokenizers = Map.fromList [ (l, mkTokenizer pgf l) | l <- languages pgf]
+ case getLang opts of
+ Nothing -> do
+ let output = concatMap toStringList [t input | (_,t) <- Map.toList tokenizers]
+ return ([ELit $ LStr o | o <- output],unlines output)
+ Just lang -> case Map.lookup lang tokenizers of
+ Just tok -> do
+ let output = toStringList $ tok input
+ return ([ELit $ LStr o | o <- output],unlines output)
+ Nothing -> return ([],"Unknown language: " ++ show lang)
+ where input = case exprs of
+ [ELit (LStr s)] -> s
+ _ -> ""
+ toStringList :: Maybe [String] -> [String]
+ toStringList Nothing = []
+ toStringList (Just l) = l
+ getLang :: [Option] -> Maybe Language
+ getLang [] = Nothing
+ getLang (OFlag "lang" (VId l):_) = readLanguage l
+ getLang (_:os) = getLang os