summaryrefslogtreecommitdiff
path: root/contrib/eaglesconv/EaglesConv.hs
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/eaglesconv/EaglesConv.hs')
-rw-r--r--contrib/eaglesconv/EaglesConv.hs135
1 files changed, 0 insertions, 135 deletions
diff --git a/contrib/eaglesconv/EaglesConv.hs b/contrib/eaglesconv/EaglesConv.hs
deleted file mode 100644
index aa8929496..000000000
--- a/contrib/eaglesconv/EaglesConv.hs
+++ /dev/null
@@ -1,135 +0,0 @@
--- Copyright (C) 2011 Nikita Frolov
-
--- No, we can't pipeline parsing and generation, because there is no guarantee
--- that we have collected all forms for a lemma before we've scanned the
--- complete file.
-
-import qualified Data.Text as T
-import qualified Data.Text.IO as UTF8
-import System.IO
-import System.Environment
-import Control.Monad
-import Control.Monad.State
-import qualified Data.Map as M
-import Codec.Text.IConv
-import qualified Data.ByteString.Lazy as BS
-import qualified Data.ByteString.Internal as BSI
-
-import EaglesMatcher
-
-type Lemmas = M.Map T.Text Forms
-
-main :: IO ()
-main = do
- args <- getArgs
- forM_ args $ \ f -> do
- entries <- UTF8.readFile f >>= (return . T.lines)
- lemmas <- return $ execState (collectLemmas entries) (M.empty :: Lemmas)
- mapM_ generateLin (M.assocs lemmas)
-
-collectLemmas entries = do
- forM_ entries $ \ entry -> do
- let ws = T.words entry
- lemma = head ws
- tags = toPairs $ tail ws
- lemmas <- get
- forM_ tags $ \ (form, tag) -> do
- let forms = (case M.lookup lemma lemmas of
- Just f -> f
- Nothing -> M.empty) :: Forms
- if isOpenCat . T.unpack $ tag
- then put $ M.insert lemma (M.insert tag form forms) lemmas
- else return ()
-
-generateLin :: (T.Text, Forms) -> IO ()
-generateLin (lemma, forms) = do
- let lemma' = myVeryOwnCyrillicRomanizationIConvSucks lemma
- UTF8.putStr $ T.concat [T.pack "lin ", lemma']
- UTF8.putStr $ case T.unpack . head . M.keys $ forms of
- ('N':_:_:_:g:a:'0':_) ->
- T.concat $ [T.pack "_N = mkN "]
- ++ map (quote . noun forms) [ ('N','S'), ('G','S')
- , ('D','S'), ('F','S'), ('C','S'), ('O','S')
- , ('L','S'), ('N','P'), ('G','P'), ('D','P')
- , ('F','P'), ('C','P'), ('O','P') ]
- ++ [showG g, sp, showAni a, ln]
- ('N':_:c:n:g:a:_) ->
- T.concat $ [T.pack "_PN = mkPN "
- , quote $ noun forms ('N', 'S')
- , showG g, sp
- , showN n, sp, showAni a, ln]
- ('A':_) ->
- T.concat $ [T.pack "_A = mkA ", quote $ adj forms 'P',
- if adj forms 'P' /= adj forms 'C'
- then quote $ adj forms 'C'
- else T.pack ""
- , ln]
- ('V':t) ->
- let a = case t of
- (_:_:_:_:'P':_:a':_) -> a'
- (_:_:_:_:_:a':_) -> a'
- in
- T.concat $ [T.pack "_V = mkV ", showAsp a, sp]
- ++ map (quote . verbPres forms) [ ('S','1'), ('S','2')
- , ('S','3'), ('P','1')
- , ('P','2'), ('P','3')]
- ++ [ quote $ verbPast forms ('S', 'M')
- , quote $ verbImp forms, quote $ verbInf forms, ln]
- ('D':_) ->
- T.concat $ [T.pack "_Adv = mkAdv "
- , quote . adv $ forms, ln]
- putStrLn ""
- hFlush stdout
- where quote x = T.concat [T.pack "\"", x, T.pack "\" "]
- showG 'F' = T.pack "Fem"
- showG 'A' = T.pack "Neut"
- showG _ = T.pack "Masc"
- showAni 'I' = T.pack "Inanimate"
- showAni _ = T.pack "Animate"
- showN 'P' = T.pack "Pl"
- showN _ = T.pack "Sg"
- showAsp 'F' = T.pack "Perfective"
- showAsp _ = T.pack "Imperfective"
- sp = T.singleton ' '
- ln = T.pack " ;"
-
-toPairs xs = zip (stride 2 xs) (stride 2 (drop 1 xs))
- where stride _ [] = []
- stride n (x:xs) = x : stride n (drop (n-1) xs)
-
-myVeryOwnCyrillicRomanizationIConvSucks s = T.pack . concatMap r . T.unpack $ s
- where r 'а' = "a"
- r 'б' = "b"
- r 'в' = "v"
- r 'г' = "g"
- r 'д' = "d"
- r 'е' = "je"
- r 'ё' = "jo"
- r 'ж' = "zh"
- r 'з' = "z"
- r 'и' = "i"
- r 'й' = "jj"
- r 'к' = "k"
- r 'л' = "l"
- r 'м' = "m"
- r 'н' = "n"
- r 'о' = "o"
- r 'п' = "p"
- r 'р' = "r"
- r 'с' = "s"
- r 'т' = "t"
- r 'у' = "u"
- r 'ф' = "f"
- r 'х' = "kh"
- r 'ц' = "c"
- r 'ч' = "ch"
- r 'ш' = "sh"
- r 'щ' = "shc"
- r 'ъ' = "yy"
- r 'ы' = "y"
- r 'ь' = "q"
- r 'э' = "e"
- r 'ю' = "ju"
- r 'я' = "ja"
- r '-' = "_"
- r o = [o]