summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--examples/PennTreebank/training.hs53
1 files changed, 42 insertions, 11 deletions
diff --git a/examples/PennTreebank/training.hs b/examples/PennTreebank/training.hs
index 5f50d6a78..31bc74492 100644
--- a/examples/PennTreebank/training.hs
+++ b/examples/PennTreebank/training.hs
@@ -5,12 +5,23 @@ import Data.List
main = do
pgf <- readPGF "ParseEngAbs.pgf"
- ls <- fmap lines $ readFile "log.txt"
- let stats = foldl' (collectStats pgf)
- (initStats pgf)
+ ls <- fmap (filterExprs . lines) $ readFile "log3.txt"
+ putStrLn ""
+ putStrLn ("trees: "++show (length ls))
+ let stats = foldl' (collectStats pgf)
+ (initStats pgf)
[(fromMaybe (error l) (readExpr (toQ l)),Just (mkCId "Phr"),Nothing) | l <- ls]
- mapM_ putStrLn [show f ++ "\t" ++ show p | (f,p) <- uprobs pgf stats]
- mapM_ putStrLn [show cat1 ++ "\t" ++ show cat2 ++ "\t" ++ show p | (cat1,cat2,p) <- bprobs pgf stats]
+
+ putStrLn ("coverage: "++show (coverage stats))
+
+ putStrLn ("Writing ParseEngAbs.probs...")
+ writeFile "ParseEngAbs.probs" (unlines [show f ++ "\t" ++ show p | (f,p) <- uprobs pgf stats])
+
+ putStrLn ("Writing ParseEngAbs2.probs...")
+ writeFile "ParseEngAbs2.probs" (unlines [show cat1 ++ "\t" ++ show cat2 ++ "\t" ++ show p | (cat1,cat2,p) <- bprobs pgf stats])
+
+ putStrLn ("Writing global.probs...")
+ writeFile "global.probs" (unlines [show f ++ "\t" ++ show p | (f,p) <- gprobs pgf stats])
where
toQ [] = []
toQ ('[':cs) = let (xs,']':ys) = break (==']') cs
@@ -18,29 +29,38 @@ main = do
toQ ('?':cs) = 'Q' : toQ cs
toQ (c:cs) = c : toQ cs
+filterExprs [] = []
+filterExprs (l:ls)
+ | null l = filterExprs ls
+ | elem (head l) "+#*" = drop 2 l : filterExprs ls
+ | otherwise = filterExprs ls
initStats pgf =
(Map.fromListWith (+)
([(f,1) | f <- functions pgf] ++
[(cat pgf f,1) | f <- functions pgf])
,Map.empty
+ ,0
)
-collectStats pgf (ustats,bstats) (e,mb_cat1,mb_cat2) =
+collectStats pgf (ustats,bstats,count) (e,mb_cat1,mb_cat2) =
case unApp e of
Just (f,args) -> let fcat = fromMaybe (cat2 pgf f e) mb_cat1
cf = fromMaybe 0 (Map.lookup f ustats)
cc = fromMaybe 0 (Map.lookup fcat ustats)
- in cf `seq` cc `seq` bstats `seq`
+ in cf `seq` cc `seq` bstats `seq` count `seq`
foldl' (collectStats pgf)
(Map.insert f (cf+1) (Map.insert fcat (cc+1) ustats)
,(if null args
then Map.insertWith (+) (fcat,wildCId) 1
else id)
(maybe bstats (\cat2 -> Map.insertWith (+) (cat2,fcat) 1 bstats) mb_cat2)
+ ,count+1
)
(zip3 args (argCats f) (repeat (Just fcat)))
- Nothing -> (ustats,bstats)
+ Nothing -> case unStr e of
+ Just _ -> (ustats,bstats,count+1)
+ Nothing -> error ("collectStats ("++show e++")")
where
argCats f =
case fmap unType (functionType pgf f) of
@@ -48,7 +68,11 @@ collectStats pgf (ustats,bstats) (e,mb_cat1,mb_cat2) =
in map tyCat arg_tys
Nothing -> repeat Nothing
-uprobs pgf (ustats,bstats) =
+coverage (ustats,bstats,count) =
+ let c = fromMaybe 0 (Map.lookup (mkCId "Q") ustats)
+ in (fromIntegral (count - c) / fromIntegral count) * 100
+
+uprobs pgf (ustats,bstats,count) =
[toProb f (cat pgf f) | f <- functions pgf]
where
toProb f cat =
@@ -56,7 +80,7 @@ uprobs pgf (ustats,bstats) =
cat_mass = fromMaybe 0 (Map.lookup cat ustats)
in (f, fromIntegral count / fromIntegral cat_mass :: Double)
-bprobs pgf (ustats,bstats) =
+bprobs pgf (ustats,bstats,count) =
concat [toProb cat | cat <- categories pgf]
where
toProb cat =
@@ -64,6 +88,13 @@ bprobs pgf (ustats,bstats) =
in [(cat1,cat2,fromIntegral count / fromIntegral mass)
| ((cat1,cat2),count) <- Map.toList bstats, cat1==cat]
+gprobs pgf (ustats,bstats,count) =
+ sortBy (\x y -> compare (snd y) (snd x)) [toProb f | f <- functions pgf]
+ where
+ toProb f =
+ let fcount = fromMaybe 0 (Map.lookup f ustats)
+ in (f, fromIntegral fcount / fromIntegral count :: Double)
+
cat pgf f =
case fmap unType (functionType pgf f) of
Just (_,cat,_) -> cat
@@ -72,4 +103,4 @@ cat pgf f =
cat2 pgf f e =
case fmap unType (functionType pgf f) of
Just (_,cat,_) -> cat
- Nothing -> error ("Unknown function "++showCId f++show e)
+ Nothing -> error ("Unknown function "++showCId f++" "++show e)