summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/compiler/GF/Command/Commands.hs33
-rw-r--r--src/runtime/haskell/PGF.hs4
-rw-r--r--src/runtime/haskell/PGF/Tokenizer.hs113
3 files changed, 0 insertions, 150 deletions
diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs
index 348a942d8..1255b3517 100644
--- a/src/compiler/GF/Command/Commands.hs
+++ b/src/compiler/GF/Command/Commands.hs
@@ -1120,13 +1120,6 @@ allCommands = Map.fromList [
],
flags = [("file","the output filename")]
}),
- ("t", emptyCommandInfo {
- longname = "tokenize",
- synopsis = "Tokenize string using the vocabulary",
- exec = execToktok,
- options = [],
- flags = [("lang","The name of the concrete to use")]
- }),
("ai", emptyCommandInfo {
longname = "abstract_info",
syntax = "ai IDENTIFIER or ai EXPR",
@@ -1426,32 +1419,6 @@ prMorphoAnalysis (w,lps) =
unlines (w:[showCId l ++ " : " ++ p | (l,p) <- lps])
--- This function is to be excuted when the command 'tok' is parsed
-execToktok :: Monad m => PGFEnv -> [Option] -> [Expr] -> m CommandOutput
-execToktok (pgf, _) opts exprs = do
- let tokenizers = Map.fromList [ (l, mkTokenizer pgf l) | l <- languages pgf]
- case getLang opts of
- Nothing -> do
- let output = concatMap toStringList [t input | (_,t) <- Map.toList tokenizers]
- return (fromStrings output)
- Just lang -> case Map.lookup lang tokenizers of
- Just tok -> do
- let output = toStringList $ tok input
- return (fromStrings output)
- Nothing -> return (pipeMessage ("Unknown language: " ++ show lang))
- where input = case exprs of
- [ELit (LStr s)] -> s
- _ -> ""
- toStringList :: Maybe [String] -> [String]
- toStringList Nothing = []
- toStringList (Just l) = l
- getLang :: [Option] -> Maybe Language
- getLang [] = Nothing
- getLang (OFlag "lang" (VId l):_) = readLanguage l
- getLang (_:os) = getLang os
-
-
-
trie = render . pptss . toTrie . map toATree
where
pptss [ts] = "*"<+>nest 2 (ppts ts)
diff --git a/src/runtime/haskell/PGF.hs b/src/runtime/haskell/PGF.hs
index d2e70166c..07c14324f 100644
--- a/src/runtime/haskell/PGF.hs
+++ b/src/runtime/haskell/PGF.hs
@@ -119,9 +119,6 @@ module PGF(
-- extra:
morphoKnown, isInMorpho,
- -- ** Tokenizing
- mkTokenizer,
-
-- ** Visualizations
graphvizAbstractTree,
graphvizParseTree,
@@ -165,7 +162,6 @@ import PGF.Expr (Tree)
import PGF.Morphology
import PGF.Data
import PGF.Binary()
-import PGF.Tokenizer
import qualified PGF.Forest as Forest
import qualified PGF.Parse as Parse
import PGF.Utilities(replace)
diff --git a/src/runtime/haskell/PGF/Tokenizer.hs b/src/runtime/haskell/PGF/Tokenizer.hs
deleted file mode 100644
index 6b8800c57..000000000
--- a/src/runtime/haskell/PGF/Tokenizer.hs
+++ /dev/null
@@ -1,113 +0,0 @@
-{-
-GF Tokenizer.
-
-In this module are implemented function that build a fst-based tokenizer
-from a Concrete grammar.
-
--}
-{-# LANGUAGE CPP #-}
-module PGF.Tokenizer
- ( mkTokenizer
- ) where
-
-#if MIN_VERSION_base(4,8,0)
-import Prelude hiding ((<*>))
-#endif
---import Data.List (intercalate)
---import Test.QuickCheck
-import FST.TransducerInterface
-import PGF.Morphology (fullFormLexicon, buildMorpho)
-import PGF.Data (PGF, Language)
-
-
-
-data LexSymbol = Tok String
- deriving (Show, Read)
-
-type Lexicon = [LexSymbol]
-
--- | This is the construction function. Given a PGF and a Language, it
--- extract the lexicon for this language and build a tokenization fst
--- from it.
-mkTokenizer :: PGF -> Language -> (String -> Maybe [String])
-mkTokenizer pgf lang = mkTrans lexicon
- where lexicon = map (Tok . fst) lexicon'
- lexicon' = fullFormLexicon $ buildMorpho pgf lang
-
-mkTrans :: Lexicon -> (String -> Maybe [String])
-mkTrans = applyDown . lexiconTrans
-
-lexiconTrans :: Lexicon -> Transducer Char
-lexiconTrans lexicon = compile (words |> star ((spaces <|> glue) |> words)) "abcdefghijklmnopqrstuvwxyz "
- where words = foldr (<|>) (empty) $ map tokToRR lexicon
- glue = eps <*> stringReg " &+ "
-
-stringReg :: String -> Reg Char
-stringReg str = foldr (\x y -> s x |> y) eps str
-
-tokToRR:: LexSymbol -> RReg Char
-tokToRR (Tok str) = foldr ((|>) . idR . s) (idR eps) str
-
-spaces :: RReg Char
-spaces = idR $ s ' '
-
-
-
--- TESTING
-
--- verry small test lexicon
--- testLexicon :: Lexicon
--- testLexicon
--- = [ Tok "car"
--- , Tok "elf"
--- ]
-
--- myTrans :: String -> Maybe [String]
--- myTrans = mkTrans testLexicon
-
--- data TestCase = TestCase String String
--- deriving (Show, Read)
-
--- instance Arbitrary TestCase where
--- arbitrary = arbitraryTestCase
--- --coarbitrary c = variant (ord c `rem` 4)
-
--- arbitraryTestCase:: Gen TestCase
--- arbitraryTestCase = do
--- words <- listOf1 $ elements [t | Tok t <- testLexicon]
--- tokens <- intercalateSometime "+&+" words
--- return $ TestCase (linearize tokens) (intercalate " " tokens)
--- where intercalateSometime :: a -> [a] -> Gen [a]
--- intercalateSometime x (x1:x2:xs) = do
--- b <- arbitrary
--- let pre = case b of
--- True -> x1:x:[]
--- False -> x1:[]
--- suf <- intercalateSometime x (x2:xs)
--- return (pre++suf)
--- intercalateSometime _ xs = return xs
-
--- linearize :: [String] -> String
--- linearize = linearize' False
--- where linearize' _ [] = "" -- boolean indicate if the last token was a real word and not +&+
--- linearize' _ ("+&+":ss) = linearize' False ss
--- linearize' True (s:ss) = ' ':s ++ linearize' True ss
--- linearize' False (s:ss) = s ++ linearize' True ss
-
--- testTrans :: (String -> Maybe [String]) -> TestCase -> Bool
--- testTrans t (TestCase s1 s2) =
--- case t s1 of
--- Nothing -> False
--- Just l -> elem s2 l
-
--- main :: IO ()
--- main = do
--- putStrLn "\n=== Transducer ==="
--- print $ lexiconTrans lexicon
--- putStrLn "\n=== example output ==="
--- putStrLn $ "Input: " ++ show "car elfcar elf"
--- putStrLn $ "Output: " ++ (show $ mkTrans lexicon "car elfcar elf")
--- putStrLn "\n=== QuickCheck tests ==="
--- quickCheck (testTrans myTrans)
--- putStrLn "\n=== Examples of test cases ==="
--- sample (arbitrary :: Gen TestCase)