summaryrefslogtreecommitdiff
path: root/src/tools/gftest/Main.hs
diff options
context:
space:
mode:
authorInari Listenmaa <inari.listenmaa@gmail.com>2018-06-15 14:31:21 +0200
committerInari Listenmaa <inari.listenmaa@gmail.com>2018-06-15 14:31:21 +0200
commit9d2b92dbc1d9e221ce180497cd7d04e0757650a9 (patch)
tree01ea74d4e1ba6a4cea565d263369da9b4947a4b0 /src/tools/gftest/Main.hs
parent2d9240e0365161cb97accb75ccace24eb431e07e (diff)
Split gftest to a new repo
Diffstat (limited to 'src/tools/gftest/Main.hs')
-rw-r--r--src/tools/gftest/Main.hs444
1 files changed, 0 insertions, 444 deletions
diff --git a/src/tools/gftest/Main.hs b/src/tools/gftest/Main.hs
deleted file mode 100644
index f8e122318..000000000
--- a/src/tools/gftest/Main.hs
+++ /dev/null
@@ -1,444 +0,0 @@
-{-# LANGUAGE DeriveDataTypeable #-}
-
-module Main where
-
-import Grammar
-import EqRel
-
-import Control.Monad ( when )
-import Data.List ( intercalate, groupBy, sortBy, deleteFirstsBy, isInfixOf )
-import Data.Maybe ( fromMaybe, mapMaybe )
-import qualified Data.Set as S
-import qualified Data.Map as M
-
-import System.Console.CmdArgs hiding ( name, args )
-import qualified System.Console.CmdArgs as A
-import System.FilePath.Posix ( takeFileName )
-import System.IO ( stdout, hSetBuffering, BufferMode(..) )
-
-
-data GfTest
- = GfTest
- { grammar :: Maybe FilePath
- -- Languages
- , lang :: Lang
-
- -- Functions and cats
- , function :: Name
- , category :: Cat
- , tree :: String
- , start_cat :: Maybe Cat
- , show_cats :: Bool
- , show_funs :: Bool
- , funs_of_arity :: Maybe Int
- , show_coercions:: Bool
- , show_contexts :: Maybe Int
- , concr_string :: String
-
- -- Information about fields
- , equal_fields :: Bool
- , empty_fields :: Bool
- , unused_fields :: Bool
- , erased_trees :: Bool
-
- -- Compare to old grammar
- , old_grammar :: Maybe FilePath
- , only_changed_cats :: Bool
-
- -- Misc
- , treebank :: Maybe FilePath
- , count_trees :: Maybe Int
- , debug :: Bool
- , write_to_file :: Bool
-
- } deriving (Data,Typeable,Show,Eq)
-
-gftest = GfTest
- { grammar = def &= typFile &= help "Path to the grammar (PGF) you want to test"
- , lang = def &= A.typ "\"Eng Swe\""
- &= help "Concrete syntax + optional translations"
- , tree = def &= A.typ "\"UseN tree_N\""
- &= A.name "t" &= help "Test the given tree"
- , function = def &= A.typ "UseN"
- &= A.name "f" &= help "Test the given function(s)"
- , category = def &= A.typ "NP"
- &= A.name "c" &= help "Test all functions with given goal category"
- , start_cat = def &= A.typ "Utt"
- &= A.name "s" &= help "Use the given category as start category"
- , concr_string = def &= A.typ "the" &= help "Show all functions that include given string"
- , show_cats = def &= help "Show all available categories"
- , show_funs = def &= help "Show all available functions"
- , funs_of_arity = def &= A.typ "2" &= help "Show all functions of arity 2"
- , show_coercions= def &= help "Show coercions in the grammar"
- , show_contexts = def &= A.typ "8410" &= help "Show contexts for a given concrete type (given as FId)"
- , debug = def &= help "Show debug output"
- , equal_fields = def &= A.name "q" &= help "Show fields whose strings are always identical"
- , empty_fields = def &= A.name "e" &= help "Show fields whose strings are always empty"
- , unused_fields = def &= help "Show fields that never make it into the top category"
- , erased_trees = def &= A.name "r" &= help "Show trees that are erased"
- , treebank = def &= typFile
- &= A.name "b" &= help "Path to a treebank"
- , count_trees = def &= A.typ "3" &= help "Number of trees of size <3>"
- , old_grammar = def &= typFile
- &= A.name "o" &= help "Path to an earlier version of the grammar"
- , only_changed_cats = def &= help "When comparing against an earlier version of a grammar, only test functions in categories that have changed between versions"
- , write_to_file = def &= help "Write the results in a file (<GRAMMAR>_<FUN>.org)"
- }
-
-
-main :: IO ()
-main = do
- hSetBuffering stdout NoBuffering
-
- args <- cmdArgs gftest
-
- case grammar args of
- Nothing -> putStrLn "Usage: `gftest -g <PGF grammar> [OPTIONS]'\nTo see available commands, run `gftest --help' or visit https://github.com/GrammaticalFramework/GF/blob/master/src/tools/gftest/README.md"
- Just fp -> do
- let (absName,grName) = (takeFileName $ stripPGF fp, stripPGF fp ++ ".pgf") --doesn't matter if the name is given with or without ".pgf"
-
- (langName:langTrans) = case lang args of
- [] -> [ absName ++ "Eng" ] -- if no English grammar found, it will be given a default value later
- langs -> [ absName ++ t | t <- words langs ]
-
- -- Read grammar and translations
- gr <- readGrammar langName grName
- grTrans <- sequence [ readGrammar lt grName | lt <- langTrans ]
-
- -- if language given by the user was not valid, use default language from Grammar
- let langName = concrLang gr
-
- let startcat = startCat gr `fromMaybe` start_cat args
-
- testTree' t n = testTree False gr grTrans t n ctxs
- where
- s = top t
- c = snd (ctyp s)
- cs = c:[ coe
- | (cat,coe) <- coercions gr
- , c == cat ]
- ctxs = concat [ contextsFor gr sc cat
- | sc <- ccats gr startcat
- , cat <- cs ]
-
- output = -- Print to stdout or write to a file
- if write_to_file args
- then \x ->
- do let fname = concat [ langName, "_", function args, category args, ".org" ]
- writeFile fname x
- putStrLn $ "Wrote results in " ++ fname
- else putStrLn
-
-
- intersectConcrCats cats_fields intersection =
- M.fromListWith intersection
- ([ (c,fields)
- | (CC (Just c) _,fields) <- cats_fields
- ] ++
- [ (cat,fields)
- | (c@(CC Nothing _),fields) <- cats_fields
- , (CC (Just cat) _,coe) <- coercions gr
- , c == coe
- ])
-
- printStats tab =
- sequence_ [ do putStrLn $ "==> " ++ c ++ ": "
- putStrLn $ unlines (map (fs!!) xs)
- | (c,vs) <- M.toList tab
- , let fs = fieldNames gr c
- , xs@(_:_) <- [ S.toList vs ] ]
- -----------------------------------------------------------------------------
- -- Testing functions
-
- -- Test a tree
- let trees = case tree args of
- [] -> []
- ts -> [ readTree gr t | t <- lines ts ]
- output $
- unlines [ testTree' tree 1 | tree <- trees ]
-
- -- Test a function
- let substrs xs = filter (/="*") $ groupBy (\a b -> a/='*' && b/='*') xs
- let cats = case category args of
- [] -> []
- cs -> if '*' `elem` cs
- then let subs = substrs cs
- in nub [ cat | (cat,_,_,_) <- concrCats gr
- , all (`isInfixOf` cat) subs ]
- else words cs
- output $
- unlines [ testTree' t n
- | cat <- cats
- , (t,n) <- treesUsingFun gr (functionsByCat gr cat) `zip` [1..]]
-
- -- Test all functions in a category
- let funs = case function args of
- [] -> []
- fs -> if '*' `elem` fs
- then let subs = substrs fs
- in nub [ f | s <- symbols gr, let f = show s
- , all (`isInfixOf` f) subs
- , arity s >= 1 ]
- else words fs
- output $
- unlines [ testFun (debug args) gr grTrans startcat f
- | f <- funs ]
-
------------------------------------------------------------------------------
--- Information about the grammar
-
- -- Show contexts for a particular concrete category
- case show_contexts args of
- Nothing -> return ()
- Just fid -> mapM_ print
- [ ctx dummyHole
- | start <- ccats gr startcat
- , ctx <- contextsFor gr start (mkCC gr fid) ]
-
- -- Show available categories
- when (show_cats args) $ do
- putStrLn "* Categories in the grammar:"
- let concrcats = sortBy (\(_,a,_,_) (_,b,_,_) -> a `compare` b) (concrCats gr)
- sequence_ [ do putStrLn cat
- when (debug args) $
- putStrLn $ unwords $
- [ " Compiles to concrete" ] ++
- [ "categories " ++ show bg++"—"++show end
- | bg/=end ] ++
- [ "category " ++ show bg
- | bg==end ]
- | (cat,bg,end,_) <- concrcats
- , end >= 0]
-
- -- Show available functions
- when (show_funs args) $ do
- putStrLn "* Functions in the grammar:"
- putStrLn $ unlines $ nub [ show s | s <- symbols gr ]
-
- -- Show coercions in the grammar
- when (show_coercions args) $ do
- putStrLn "* Coercions in the grammar:"
- putStrLn $ unlines [ show cat++"--->"++show coe | (cat,coe) <- coercions gr ]
-
- case funs_of_arity args of
- Nothing -> return ()
- Just n -> do
- putStrLn $ "* Functions in the grammar of arity " ++ show n ++ ":"
- putStrLn $ unlines $ nub [ show s | s <- symbols gr, arity s == n ]
-
- -- Show all functions that contain the given string
- -- (e.g. English "it" appears in DefArt, ImpersCl, it_Pron, …)
- case concr_string args of
- [] -> return ()
- str -> do putStrLn $ "### The following functions contain the string '" ++ str ++ "':"
- putStr "==> "
- putStrLn $ intercalate ", " $ nub [ name s | s <- hasConcrString gr str]
-
- -- Show empty fields
- when (empty_fields args) $ do
- putStrLn "### Empty fields:"
- printStats $ intersectConcrCats (emptyFields gr) S.intersection
- putStrLn ""
-
- -- Show erased trees
- when (erased_trees args) $ do
- putStrLn "* Erased trees:"
- sequence_
- [ do putStrLn ("** " ++ intercalate "," erasedTrees ++ " : " ++ uncoerceAbsCat gr c)
- sequence_
- [ do putStrLn ("- Tree: " ++ showTree t)
- putStrLn ("- Lin: " ++ s)
- putStrLn $ unlines
- [ "- Trans: "++linearize tgr t
- | tgr <- grTrans ]
- | t <- ts
- , let s = linearize gr t
- , let erasedSymbs = [ sym | sym <- flatten t, c==snd (ctyp sym) ]
- ]
- | top <- take 1 $ ccats gr startcat
- , (c,ts) <- forgets gr top
- , let erasedTrees =
- concat [ [ showTree subtree
- | sym <- flatten t
- , let csym = snd (ctyp sym)
- , c == csym || coerces gr c csym
- , let Just subtree = subTree sym t ]
- | t <- ts ]
- ]
- putStrLn ""
-
- -- Show unused fields
- when (unused_fields args) $ do
-
- let unused =
- [ (c,S.fromList notUsed)
- | tp <- ccats gr startcat
- , (c,is) <- reachableFieldsFromTop gr tp
- , let ar = head $
- [ length (seqs f)
- | f <- symbols gr, snd (ctyp f) == c ] ++
- [ length (seqs f)
- | (b,a) <- coercions gr, a == c
- , f <- symbols gr, snd (ctyp f) == b ]
- notUsed = [ i | i <- [0..ar-1], i `notElem` is ]
- , not (null notUsed)
- ]
- putStrLn "### Unused fields:"
- printStats $ intersectConcrCats unused S.intersection
- putStrLn ""
-
- -- Show equal fields
- let tab = intersectConcrCats (equalFields gr) (/\)
- when (equal_fields args) $ do
- putStrLn "### Equal fields:"
- sequence_
- [ putStrLn ("==> " ++ c ++ ":\n" ++ cl)
- | (c,eqr) <- M.toList tab
- , let fs = fieldNames gr c
- , cl <- case eqr of
- Top -> ["TOP"]
- Classes xss -> [ unlines (map (fs!!) xs)
- | xs@(_:_:_) <- xss ]
- ]
- putStrLn ""
-
- case count_trees args of
- Nothing -> return ()
- Just n -> do let start = head $ ccats gr startcat
- let i = featCard gr start n
- let iTot = sum [ featCard gr start m | m <- [1..n] ]
- putStr $ "There are "++show iTot++" trees up to size "++show n
- putStrLn $ ", and "++show i++" of exactly size "++show n++".\nFor example: "
- putStrLn $ "* " ++ show (featIth gr start n 0)
- putStrLn $ "* " ++ show (featIth gr start n (i-1))
-
-
--------------------------------------------------------------------------------
--- Read trees from treebank.
-
- treebank' <-
- case treebank args of
- Nothing -> return []
- Just fp -> do
- tb <- readFile fp
- return [ readTree gr s
- | s <- lines tb ]
- mapM_ print treebank'
-
--------------------------------------------------------------------------------
--- Comparison with old grammar
-
- case old_grammar args of
- Nothing -> return ()
- Just fp -> do
- oldgr <- readGrammar langName (stripPGF fp ++ ".pgf")
- let ogr = oldgr { concrLang = concrLang oldgr ++ "-OLD" }
- difcats = diffCats ogr gr -- (acat, [#o, #n], olabels, nlabels)
-
- --------------------------------------------------------------------------
- -- generate statistics of the changes in the concrete categories
- let ccatChangeFile = langName ++ "-ccat-diff.org"
- writeFile ccatChangeFile ""
- sequence_
- [ appendFile ccatChangeFile $ unlines
- [ "* " ++ acat
- , show o ++ " concrete categories in the old grammar,"
- , show n ++ " concrete categories in the new grammar."
- , "** Labels only in old (" ++ show (length ol) ++ "):"
- , intercalate ", " ol
- , "** Labels only in new (" ++ show (length nl) ++ "):"
- , intercalate ", " nl ]
- | (acat, [o,n], ol, nl) <- difcats ]
- when (debug args) $
- sequence_
- [ appendFile ccatChangeFile $
- unlines $
- ("* All concrete cats in the "++age++" grammar:"):
- [ show cts | cts <- concrCats g ]
- | (g,age) <- [(ogr,"old"),(gr,"new")] ]
-
- putStrLn $ "Created file " ++ ccatChangeFile
-
- --------------------------------------------------------------------------
- -- Print out tests for all functions in the changed cats.
- -- If -f, -c or --treebank specified, use them.
-
- let f cat = (cat, treesUsingFun gr $ functionsByCat gr cat)
-
- byCat = [ f cat | cat <- cats ] -- from command line arg -c
- changed = [ f cat | (cat,_,_,_) <- difcats
- , only_changed_cats args ]
- byFun = [ (cat, treesUsingFun gr fs)
- | funName <- funs -- comes from command line arg -f
- , let fs@(s:_) = lookupSymbol gr funName
- , let cat = snd $ Grammar.typ s ]
- fromTb = [ (cat,[tree]) | tree <- treebank'++trees
- , let (CC (Just cat) _) = ccatOf tree ]
-
- treesToTest =
- case concat [byFun, byCat, changed, fromTb] of
- [] -> [ f cat -- nothing else specified -> test all functions
- | (cat,_,_,_) <- concrCats gr ]
- xs -> S.toList $ S.fromList xs
-
- writeLinFile file grammar otherGrammar = do
- writeFile file ""
- putStrLn "Testing functions in… "
- diff <- concat `fmap`
- sequence [ do let cs = [ compareTree grammar otherGrammar grTrans startcat t
- | t <- ttrees ]
- putStr $ cat ++ " \r"
- -- prevent lazy evaluation; make printout accurate
- appendFile ("/tmp/"++file) (unwords $ map show cs)
- return [ c | c@(Comparison f (x:xs)) <- cs ]
- | (cat,ttrees) <- treesToTest ]
-
- let shorterTree c1 c2 = length (funTree c1) `compare` length (funTree c2)
- writeFile file $ unlines
- [ show comp
- | comp <- sortBy shorterTree diff ]
-
- writeLinFile (langName ++ "-lin-diff.org") gr ogr
- putStrLn $ "Created file " ++ (langName ++ "-lin-diff.org")
-
- ---------------------------------------------------------------------------
- -- Print statistics about the functions: e.g., in the old grammar,
- -- all these 5 functions used to be in the same category:
- -- [DefArt,PossPron,no_Quant,this_Quant,that_Quant]
- -- but in the new grammar, they are split into two:
- -- [DefArt,PossPron,no_Quant] and [this_Quant,that_Quant].
- let groupFuns grammar = -- :: Grammar -> [[Symbol]]
- concat [ groupBy sameCCat $ sortBy compareCCat funs
- | (cat,_,_,_) <- difcats
- , let funs = functionsByCat grammar cat ]
-
- sortByName = sortBy (\s t -> name s `compare` name t)
- writeFunFile groupedFuns file grammar = do
- writeFile file ""
- sequence_ [ do appendFile file "---\n"
- appendFile file $ unlines
- [ showConcrFun gr fun
- | fun <- sortByName funs ]
- | funs <- groupedFuns ]
-
- writeFunFile (groupFuns ogr) (langName ++ "-old-funs.org") ogr
- writeFunFile (groupFuns gr) (langName ++ "-new-funs.org") gr
-
- putStrLn $ "Created files " ++ langName ++ "-(old|new)-funs.org"
-
-
- where
-
- nub = S.toList . S.fromList
-
- sameCCat :: Symbol -> Symbol -> Bool
- sameCCat s1 s2 = snd (ctyp s1) == snd (ctyp s2)
-
- compareCCat :: Symbol -> Symbol -> Ordering
- compareCCat s1 s2 = snd (ctyp s1) `compare` snd (ctyp s2)
-
- stripPGF :: String -> String
- stripPGF s = case reverse s of
- 'f':'g':'p':'.':name -> reverse name
- name -> s
-