summaryrefslogtreecommitdiff
path: root/examples/PennTreebank/training.hs
blob: 5f50d6a78bd1f70375011a9b42c74f40c0d74337 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import PGF
import qualified Data.Map as Map
import Data.Maybe
import Data.List

main = do
  pgf <- readPGF "ParseEngAbs.pgf"
  ls <- fmap lines $ readFile "log.txt"
  let stats = foldl' (collectStats pgf) 
                     (initStats pgf) 
                     [(fromMaybe (error l) (readExpr (toQ l)),Just (mkCId "Phr"),Nothing) | l <- ls]
  mapM_ putStrLn [show f ++ "\t" ++ show p | (f,p) <- uprobs pgf stats]
  mapM_ putStrLn [show cat1 ++ "\t" ++ show cat2 ++ "\t" ++ show p | (cat1,cat2,p) <- bprobs pgf stats]
  where
    toQ []       = []
    toQ ('[':cs) = let (xs,']':ys) = break (==']') cs
                   in toQ ('?' : ys)
    toQ ('?':cs) = 'Q' : toQ cs
    toQ (c:cs)   = c   : toQ cs


initStats pgf =
  (Map.fromListWith (+)
      ([(f,1) | f <- functions pgf] ++
       [(cat pgf f,1) | f <- functions pgf])
  ,Map.empty
  )

collectStats pgf (ustats,bstats) (e,mb_cat1,mb_cat2) =
  case unApp e of
    Just (f,args) -> let fcat = fromMaybe (cat2 pgf f e) mb_cat1
                         cf   = fromMaybe 0 (Map.lookup f ustats)
                         cc   = fromMaybe 0 (Map.lookup fcat ustats)
                     in cf `seq` cc `seq` bstats `seq`
                        foldl' (collectStats pgf)
                               (Map.insert f (cf+1) (Map.insert fcat (cc+1) ustats)
                               ,(if null args
                                   then Map.insertWith (+) (fcat,wildCId) 1
                                   else id)
                                (maybe bstats (\cat2 -> Map.insertWith (+) (cat2,fcat) 1 bstats) mb_cat2)
                               )
                               (zip3 args (argCats f) (repeat (Just fcat)))
    Nothing       -> (ustats,bstats)
  where
	argCats f =
	  case fmap unType (functionType pgf f) of
	    Just (arg_tys,_,_) -> let tyCat (_,_,ty) = let (_,cat,_) = unType ty in Just cat
	                          in map tyCat arg_tys
	    Nothing            -> repeat Nothing

uprobs pgf (ustats,bstats) =
  [toProb f (cat pgf f) | f <- functions pgf]
  where
    toProb f cat =
      let count    = fromMaybe 0 (Map.lookup f ustats)
          cat_mass = fromMaybe 0 (Map.lookup cat ustats)
      in (f, fromIntegral count / fromIntegral cat_mass :: Double)

bprobs pgf (ustats,bstats) =
  concat [toProb cat | cat <- categories pgf]
  where
    toProb cat =
      let mass = sum [count | ((cat1,cat2),count) <- Map.toList bstats, cat1==cat]
      in [(cat1,cat2,fromIntegral count / fromIntegral mass) 
					| ((cat1,cat2),count) <- Map.toList bstats, cat1==cat]

cat pgf f =
  case fmap unType (functionType pgf f) of
    Just (_,cat,_) -> cat
    Nothing        -> error ("Unknown function "++showCId f)

cat2 pgf f e =
  case fmap unType (functionType pgf f) of
    Just (_,cat,_) -> cat
    Nothing        -> error ("Unknown function "++showCId f++show e)