diff options
Diffstat (limited to 'contrib/eaglesconv/EaglesConv.hs')
| -rw-r--r-- | contrib/eaglesconv/EaglesConv.hs | 135 |
1 files changed, 0 insertions, 135 deletions
diff --git a/contrib/eaglesconv/EaglesConv.hs b/contrib/eaglesconv/EaglesConv.hs deleted file mode 100644 index aa8929496..000000000 --- a/contrib/eaglesconv/EaglesConv.hs +++ /dev/null @@ -1,135 +0,0 @@ --- Copyright (C) 2011 Nikita Frolov - --- No, we can't pipeline parsing and generation, because there is no guarantee --- that we have collected all forms for a lemma before we've scanned the --- complete file. - -import qualified Data.Text as T -import qualified Data.Text.IO as UTF8 -import System.IO -import System.Environment -import Control.Monad -import Control.Monad.State -import qualified Data.Map as M -import Codec.Text.IConv -import qualified Data.ByteString.Lazy as BS -import qualified Data.ByteString.Internal as BSI - -import EaglesMatcher - -type Lemmas = M.Map T.Text Forms - -main :: IO () -main = do - args <- getArgs - forM_ args $ \ f -> do - entries <- UTF8.readFile f >>= (return . T.lines) - lemmas <- return $ execState (collectLemmas entries) (M.empty :: Lemmas) - mapM_ generateLin (M.assocs lemmas) - -collectLemmas entries = do - forM_ entries $ \ entry -> do - let ws = T.words entry - lemma = head ws - tags = toPairs $ tail ws - lemmas <- get - forM_ tags $ \ (form, tag) -> do - let forms = (case M.lookup lemma lemmas of - Just f -> f - Nothing -> M.empty) :: Forms - if isOpenCat . T.unpack $ tag - then put $ M.insert lemma (M.insert tag form forms) lemmas - else return () - -generateLin :: (T.Text, Forms) -> IO () -generateLin (lemma, forms) = do - let lemma' = myVeryOwnCyrillicRomanizationIConvSucks lemma - UTF8.putStr $ T.concat [T.pack "lin ", lemma'] - UTF8.putStr $ case T.unpack . head . M.keys $ forms of - ('N':_:_:_:g:a:'0':_) -> - T.concat $ [T.pack "_N = mkN "] - ++ map (quote . noun forms) [ ('N','S'), ('G','S') - , ('D','S'), ('F','S'), ('C','S'), ('O','S') - , ('L','S'), ('N','P'), ('G','P'), ('D','P') - , ('F','P'), ('C','P'), ('O','P') ] - ++ [showG g, sp, showAni a, ln] - ('N':_:c:n:g:a:_) -> - T.concat $ [T.pack "_PN = mkPN " - , quote $ noun forms ('N', 'S') - , showG g, sp - , showN n, sp, showAni a, ln] - ('A':_) -> - T.concat $ [T.pack "_A = mkA ", quote $ adj forms 'P', - if adj forms 'P' /= adj forms 'C' - then quote $ adj forms 'C' - else T.pack "" - , ln] - ('V':t) -> - let a = case t of - (_:_:_:_:'P':_:a':_) -> a' - (_:_:_:_:_:a':_) -> a' - in - T.concat $ [T.pack "_V = mkV ", showAsp a, sp] - ++ map (quote . verbPres forms) [ ('S','1'), ('S','2') - , ('S','3'), ('P','1') - , ('P','2'), ('P','3')] - ++ [ quote $ verbPast forms ('S', 'M') - , quote $ verbImp forms, quote $ verbInf forms, ln] - ('D':_) -> - T.concat $ [T.pack "_Adv = mkAdv " - , quote . adv $ forms, ln] - putStrLn "" - hFlush stdout - where quote x = T.concat [T.pack "\"", x, T.pack "\" "] - showG 'F' = T.pack "Fem" - showG 'A' = T.pack "Neut" - showG _ = T.pack "Masc" - showAni 'I' = T.pack "Inanimate" - showAni _ = T.pack "Animate" - showN 'P' = T.pack "Pl" - showN _ = T.pack "Sg" - showAsp 'F' = T.pack "Perfective" - showAsp _ = T.pack "Imperfective" - sp = T.singleton ' ' - ln = T.pack " ;" - -toPairs xs = zip (stride 2 xs) (stride 2 (drop 1 xs)) - where stride _ [] = [] - stride n (x:xs) = x : stride n (drop (n-1) xs) - -myVeryOwnCyrillicRomanizationIConvSucks s = T.pack . concatMap r . T.unpack $ s - where r 'а' = "a" - r 'б' = "b" - r 'в' = "v" - r 'г' = "g" - r 'д' = "d" - r 'е' = "je" - r 'ё' = "jo" - r 'ж' = "zh" - r 'з' = "z" - r 'и' = "i" - r 'й' = "jj" - r 'к' = "k" - r 'л' = "l" - r 'м' = "m" - r 'н' = "n" - r 'о' = "o" - r 'п' = "p" - r 'р' = "r" - r 'с' = "s" - r 'т' = "t" - r 'у' = "u" - r 'ф' = "f" - r 'х' = "kh" - r 'ц' = "c" - r 'ч' = "ch" - r 'ш' = "sh" - r 'щ' = "shc" - r 'ъ' = "yy" - r 'ы' = "y" - r 'ь' = "q" - r 'э' = "e" - r 'ю' = "ju" - r 'я' = "ja" - r '-' = "_" - r o = [o] |
