diff options
| author | aarne <unknown> | 2005-10-30 22:44:00 +0000 |
|---|---|---|
| committer | aarne <unknown> | 2005-10-30 22:44:00 +0000 |
| commit | f9293c6b29696db51b6bab7b5171b74bd6da084b (patch) | |
| tree | 6319da9c57ebf79ade657289b6bcb47d1a8a0fa1 /src/GF/Probabilistic/Probabilistic.hs | |
| parent | 815dda6b4bfd573f9091d7a667a67076602fb205 (diff) | |
probabilistic
Diffstat (limited to 'src/GF/Probabilistic/Probabilistic.hs')
| -rw-r--r-- | src/GF/Probabilistic/Probabilistic.hs | 225 |
1 files changed, 225 insertions, 0 deletions
diff --git a/src/GF/Probabilistic/Probabilistic.hs b/src/GF/Probabilistic/Probabilistic.hs new file mode 100644 index 000000000..bc69a1cf3 --- /dev/null +++ b/src/GF/Probabilistic/Probabilistic.hs @@ -0,0 +1,225 @@ +---------------------------------------------------------------------- +-- | +-- Module : Probabilistic +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/10/30 23:44:00 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.1 $ +-- +-- Probabilistic abstract syntax. AR 30\/10\/2005 +-- +-- (c) Aarne Ranta 2005 under GNU GPL +-- +-- Contents: parsing and random generation with probabilistic grammars. +-- To begin with, we use simple types and don't +-- guarantee the correctness of bindings\/dependences. +----------------------------------------------------------------------------- + +module GF.Probabilistic.Probabilistic ( + generateRandomTreesProb -- :: Options -> StdGen -> GFCGrammar -> Probs -> Cat -> [Exp] + ,checkGrammarProbs -- :: GFCGrammar -> Probs -> Err () + ,computeProbTree -- :: Probs -> Tree -> Double + ,rankByScore -- :: Ord n => [(a,n)] -> [(a,n)] + ,Probs -- = BinTree Ident Double + ,getProbsFromFile -- :: Opts -> IO Probs + ,emptyProbs -- :: Probs + ) where + +import GF.Canon.GFC +import GF.Grammar.LookAbs +import GF.Grammar.PrGrammar +import GF.Grammar.Macros +import GF.Grammar.Values +import GF.Grammar.Grammar (Cat) + +import GF.Infra.Ident +import GF.Data.Zipper +import GF.Data.Operations +import GF.Infra.Option + +import Data.Char +import Data.List +import Control.Monad +import System.Random + + +-- | generate an infinite list of trees, with their probabilities +generateRandomTreesProb :: Options -> StdGen -> GFCGrammar -> Probs -> Cat -> [Exp] +generateRandomTreesProb opts gen gr probs cat = + map str2tr $ randomTrees gen gr' cat' where + gr' = gr2sgr gr probs + cat' = prt $ snd cat + +-- | check that probabilities attached to a grammar make sense +checkGrammarProbs :: GFCGrammar -> Probs -> Err () +checkGrammarProbs gr probs = err Bad (const (return ())) $ checkSGrammar $ gr2sgr gr probs + +-- | compute the probability of a given tree +computeProbTree :: Probs -> Tree -> Double +computeProbTree probs (Tr (N (_,at,_,_,_),ts)) = case at of + AtC (_,f) -> case lookupTree prt f probs of + Ok p -> p * product (map prob ts) + _ -> product (map prob ts) + _ -> 1.0 ---- + where + prob = computeProbTree probs + +-- | rank from highest to lowest score, e.g. probability +rankByScore :: Ord n => [(a,n)] -> [(a,n)] +rankByScore = sortBy (\ (_,p) (_,q) -> compare q p) + +getProbsFromFile :: Options -> IO Probs +getProbsFromFile opts = do + s <- maybe (return "") readFile $ getOptVal opts probFile + return $ buildTree $ pProbs $ lines s + where + pProbs ss = [(zIdent f, read p) | s <- ss, [f,p] <- [words s]] + +type Probs = BinTree Ident Double + +emptyProbs :: Probs +emptyProbs = emptyBinTree + +------------------------------------------ +-- translate grammar to simpler form and generated trees back + +gr2sgr :: GFCGrammar -> Probs -> SGrammar +gr2sgr gr probs = buildTree [(c,{- fillProb -} rs) | rs@((_,(_,c)):_) <- rules] where + rules = + groupBy (\x y -> scat x == scat y) $ + sortBy (\x y -> compare (scat x) (scat y)) + [(trId f, ty') | (f,ty) <- funRulesOf gr, ty' <- trTy ty] + trId (_,f) = let f' = prt f in case lookupTree prt f probs of + Ok p -> (p,f') + _ -> (2.0, f') + trTy ty = case catSkeleton ty of + Ok (mcs,mc) -> [(map trCat mcs, trCat mc)] + _ -> [] + trCat (m,c) = prt c --- + scat (_,(_,c)) = c + +str2tr :: STree -> Exp +str2tr t = case t of + SApp ((_,f),ts) -> mkApp (trId f) (map str2tr ts) + SMeta _ -> mkMeta 0 +---- SString s -> K s + where + trId = cn . zIdent + +type SGrammar = BinTree SCat [SRule] +type SIdent = String +type SRule = (SFun,SType) +type SType = ([SCat],SCat) +type SCat = SIdent +type SFun = (Double,SIdent) + +allRules gr = concat [rs | (c,rs) <- tree2list gr] + +data STree = + SApp (SFun,[STree]) +-- | SAppN (SIdent,[STree]) -- no probability given + | SMeta SCat + | SString String + | SInt Int + deriving (Show,Eq) + +probTree :: STree -> Double +probTree t = case t of + SApp ((p,_),ts) -> p * product (map probTree ts) + _ -> 1 + +rankTrees :: [STree] -> [(STree,Double)] +rankTrees ts = sortBy (\ (_,p) (_,q) -> compare q p) [(t,probTree t) | t <- ts] + +randomTrees :: StdGen -> SGrammar -> SCat -> [STree] +randomTrees gen = genTrees (randomRs (0.0, 1.0) gen) + +genTrees :: [Double] -> SGrammar -> SCat -> [STree] +genTrees ds gr cat = + let (t,k) = genTree ds gr cat + in t : genTrees (drop k ds) gr cat + +genTree :: [Double] -> SGrammar -> SCat -> (STree,Int) +genTree rs gr = gett rs where + gett ds cat = let + d:ds2 = ds + (pf,args) = getf d cat + (ts,k) = getts ds2 args + in (SApp (pf,ts), k+1) + getf d cat = hitRegion d [(p,(pf,args)) | (pf@(p,_),(args,_)) <- look cat] + getts ds cats = case cats of + c:cs -> let + (t, k) = gett ds c + (ts,ks) = getts (drop k ds) cs + in (t:ts, k + ks) + _ -> ([],0) + look cat = errVal [] $ lookupTree id cat gr + +hitRegion :: Double -> [(Double,a)] -> a +hitRegion d vs = case vs of + (p1,v1):vs2 -> + if d < p1 then v1 else hitRegion d [(p+p1,v) | (p,v) <- vs2] + +--- this should recover from rounding errors + +checkSGrammar :: SGrammar -> Err SGrammar +checkSGrammar = mapMTree chCat where + chCat (c,rs) = case sum [p | ((p,f),_) <- rs] of + s | s /= 1.0 -> Bad $ "illegal probability sum " ++ show s ++ " in " ++ c + _ -> return (c,rs) + +-- for cases where explicit probability is not given (encoded as +-- p > 1) divide the remaining mass by the number of such cases + +fillProb :: [SRule] -> [SRule] +fillProb rs = [((defa p,f),ty) | ((p,f),ty) <- rs] where + defa p = if p > 1.0 then def else p + def = (1 - sum given) / genericLength nope + (nope,given) = partition (> 1.0) [p | ((p,_),_) <- rs] + + +------------------------------------------ +-- to test outside GF + +prSTree t = case t of + SApp ((p,f),ts) -> f ++ prParenth (show p) ++ concat (map pr1 ts) + SMeta c -> '?':c + SString s -> prQuotedString s + SInt i -> show i + where + pr1 t@(SApp (_,ts)) = ' ' : (if null ts then id else prParenth) (prSTree t) + pr1 t = prSTree t + + +mkSGrammar :: [SRule] -> SGrammar +mkSGrammar rules = + buildTree [(c, fillProb rs) | rs@((_,(_,c)):_) <- rules'] where + rules' = + groupBy (\x y -> scat x == scat y) $ + sortBy (\x y -> compare (scat x) (scat y)) + rules + scat (_,(_,c)) = c + +pSRule :: String -> SRule +pSRule s = case words s of + p : f : c : cs -> + if isDigit (head p) + then ((read p, f),(init cs', last cs')) + else ((2.0, p),(init (c:cs'), last (c:cs'))) --- hack for automatic probability + where cs' = [cs !! i | i <- [0,2..length cs - 1]] + _ -> error $ "not a rule" +++ s + +exSgr = mkSGrammar $ map pSRule [ + "0.8 a : A" + ,"0.2 b : A" + ,"0.2 n : A -> S -> S" + ,"0.8 e : S" + ] + +ex1 :: IO () +ex1 = do + g <- newStdGen + mapM_ (putStrLn . prSTree) $ randomTrees g exSgr "S" |
