diff options
| author | gdetrez <gdetrez@crans.org> | 2011-02-10 15:00:06 +0000 |
|---|---|---|
| committer | gdetrez <gdetrez@crans.org> | 2011-02-10 15:00:06 +0000 |
| commit | 45ecae4b774aee96dcc3e9f2c5f82307982faa08 (patch) | |
| tree | 21b7ce41a617b24e8b25b92e912789049e7b2def /src/compiler | |
| parent | d7ae73f1c7c5b95f1f08dadd314fa7143602b523 (diff) | |
Adding a basic lexicon-based tokenizer and the asociated command in gf shell
Diffstat (limited to 'src/compiler')
| -rw-r--r-- | src/compiler/GF/Command/Commands.hs | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs index 1290666cb..00d8e427a 100644 --- a/src/compiler/GF/Command/Commands.hs +++ b/src/compiler/GF/Command/Commands.hs @@ -964,6 +964,13 @@ allCommands env@(pgf, mos) = Map.fromList [ ], flags = [("file","the output filename")] }), + ("t", emptyCommandInfo { + longname = "tokenize", + synopsis = "Tokenize string usng the vocabulary", + exec = execToktok env, + options = [], + flags = [("lang","The name of the concrete to use")] + }), ("ai", emptyCommandInfo { longname = "abstract_info", syntax = "ai IDENTIFIER or ai EXPR", @@ -1251,3 +1258,26 @@ prMorphoAnalysis (w,lps) = unlines (w:[showCId l ++ " : " ++ p | (l,p) <- lps]) +-- This function is to be excuted when the command 'tok' is parsed +execToktok :: PGFEnv -> [Option] -> [Expr] -> IO CommandOutput +execToktok (pgf, _) opts exprs = do + let tokenizers = Map.fromList [ (l, mkTokenizer pgf l) | l <- languages pgf] + case getLang opts of + Nothing -> do + let output = concatMap toStringList [t input | (_,t) <- Map.toList tokenizers] + return ([ELit $ LStr o | o <- output],unlines output) + Just lang -> case Map.lookup lang tokenizers of + Just tok -> do + let output = toStringList $ tok input + return ([ELit $ LStr o | o <- output],unlines output) + Nothing -> return ([],"Unknown language: " ++ show lang) + where input = case exprs of + [ELit (LStr s)] -> s + _ -> "" + toStringList :: Maybe [String] -> [String] + toStringList Nothing = [] + toStringList (Just l) = l + getLang :: [Option] -> Maybe Language + getLang [] = Nothing + getLang (OFlag "lang" (VId l):_) = readLanguage l + getLang (_:os) = getLang os |
