summaryrefslogtreecommitdiff
path: root/src/GF/Probabilistic
diff options
context:
space:
mode:
authoraarne <unknown>2005-10-30 22:44:00 +0000
committeraarne <unknown>2005-10-30 22:44:00 +0000
commitf9293c6b29696db51b6bab7b5171b74bd6da084b (patch)
tree6319da9c57ebf79ade657289b6bcb47d1a8a0fa1 /src/GF/Probabilistic
parent815dda6b4bfd573f9091d7a667a67076602fb205 (diff)
probabilistic
Diffstat (limited to 'src/GF/Probabilistic')
-rw-r--r--src/GF/Probabilistic/Probabilistic.hs225
1 files changed, 225 insertions, 0 deletions
diff --git a/src/GF/Probabilistic/Probabilistic.hs b/src/GF/Probabilistic/Probabilistic.hs
new file mode 100644
index 000000000..bc69a1cf3
--- /dev/null
+++ b/src/GF/Probabilistic/Probabilistic.hs
@@ -0,0 +1,225 @@
+----------------------------------------------------------------------
+-- |
+-- Module : Probabilistic
+-- Maintainer : AR
+-- Stability : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/10/30 23:44:00 $
+-- > CVS $Author: aarne $
+-- > CVS $Revision: 1.1 $
+--
+-- Probabilistic abstract syntax. AR 30\/10\/2005
+--
+-- (c) Aarne Ranta 2005 under GNU GPL
+--
+-- Contents: parsing and random generation with probabilistic grammars.
+-- To begin with, we use simple types and don't
+-- guarantee the correctness of bindings\/dependences.
+-----------------------------------------------------------------------------
+
+module GF.Probabilistic.Probabilistic (
+ generateRandomTreesProb -- :: Options -> StdGen -> GFCGrammar -> Probs -> Cat -> [Exp]
+ ,checkGrammarProbs -- :: GFCGrammar -> Probs -> Err ()
+ ,computeProbTree -- :: Probs -> Tree -> Double
+ ,rankByScore -- :: Ord n => [(a,n)] -> [(a,n)]
+ ,Probs -- = BinTree Ident Double
+ ,getProbsFromFile -- :: Opts -> IO Probs
+ ,emptyProbs -- :: Probs
+ ) where
+
+import GF.Canon.GFC
+import GF.Grammar.LookAbs
+import GF.Grammar.PrGrammar
+import GF.Grammar.Macros
+import GF.Grammar.Values
+import GF.Grammar.Grammar (Cat)
+
+import GF.Infra.Ident
+import GF.Data.Zipper
+import GF.Data.Operations
+import GF.Infra.Option
+
+import Data.Char
+import Data.List
+import Control.Monad
+import System.Random
+
+
+-- | generate an infinite list of trees, with their probabilities
+generateRandomTreesProb :: Options -> StdGen -> GFCGrammar -> Probs -> Cat -> [Exp]
+generateRandomTreesProb opts gen gr probs cat =
+ map str2tr $ randomTrees gen gr' cat' where
+ gr' = gr2sgr gr probs
+ cat' = prt $ snd cat
+
+-- | check that probabilities attached to a grammar make sense
+checkGrammarProbs :: GFCGrammar -> Probs -> Err ()
+checkGrammarProbs gr probs = err Bad (const (return ())) $ checkSGrammar $ gr2sgr gr probs
+
+-- | compute the probability of a given tree
+computeProbTree :: Probs -> Tree -> Double
+computeProbTree probs (Tr (N (_,at,_,_,_),ts)) = case at of
+ AtC (_,f) -> case lookupTree prt f probs of
+ Ok p -> p * product (map prob ts)
+ _ -> product (map prob ts)
+ _ -> 1.0 ----
+ where
+ prob = computeProbTree probs
+
+-- | rank from highest to lowest score, e.g. probability
+rankByScore :: Ord n => [(a,n)] -> [(a,n)]
+rankByScore = sortBy (\ (_,p) (_,q) -> compare q p)
+
+getProbsFromFile :: Options -> IO Probs
+getProbsFromFile opts = do
+ s <- maybe (return "") readFile $ getOptVal opts probFile
+ return $ buildTree $ pProbs $ lines s
+ where
+ pProbs ss = [(zIdent f, read p) | s <- ss, [f,p] <- [words s]]
+
+type Probs = BinTree Ident Double
+
+emptyProbs :: Probs
+emptyProbs = emptyBinTree
+
+------------------------------------------
+-- translate grammar to simpler form and generated trees back
+
+gr2sgr :: GFCGrammar -> Probs -> SGrammar
+gr2sgr gr probs = buildTree [(c,{- fillProb -} rs) | rs@((_,(_,c)):_) <- rules] where
+ rules =
+ groupBy (\x y -> scat x == scat y) $
+ sortBy (\x y -> compare (scat x) (scat y))
+ [(trId f, ty') | (f,ty) <- funRulesOf gr, ty' <- trTy ty]
+ trId (_,f) = let f' = prt f in case lookupTree prt f probs of
+ Ok p -> (p,f')
+ _ -> (2.0, f')
+ trTy ty = case catSkeleton ty of
+ Ok (mcs,mc) -> [(map trCat mcs, trCat mc)]
+ _ -> []
+ trCat (m,c) = prt c ---
+ scat (_,(_,c)) = c
+
+str2tr :: STree -> Exp
+str2tr t = case t of
+ SApp ((_,f),ts) -> mkApp (trId f) (map str2tr ts)
+ SMeta _ -> mkMeta 0
+---- SString s -> K s
+ where
+ trId = cn . zIdent
+
+type SGrammar = BinTree SCat [SRule]
+type SIdent = String
+type SRule = (SFun,SType)
+type SType = ([SCat],SCat)
+type SCat = SIdent
+type SFun = (Double,SIdent)
+
+allRules gr = concat [rs | (c,rs) <- tree2list gr]
+
+data STree =
+ SApp (SFun,[STree])
+-- | SAppN (SIdent,[STree]) -- no probability given
+ | SMeta SCat
+ | SString String
+ | SInt Int
+ deriving (Show,Eq)
+
+probTree :: STree -> Double
+probTree t = case t of
+ SApp ((p,_),ts) -> p * product (map probTree ts)
+ _ -> 1
+
+rankTrees :: [STree] -> [(STree,Double)]
+rankTrees ts = sortBy (\ (_,p) (_,q) -> compare q p) [(t,probTree t) | t <- ts]
+
+randomTrees :: StdGen -> SGrammar -> SCat -> [STree]
+randomTrees gen = genTrees (randomRs (0.0, 1.0) gen)
+
+genTrees :: [Double] -> SGrammar -> SCat -> [STree]
+genTrees ds gr cat =
+ let (t,k) = genTree ds gr cat
+ in t : genTrees (drop k ds) gr cat
+
+genTree :: [Double] -> SGrammar -> SCat -> (STree,Int)
+genTree rs gr = gett rs where
+ gett ds cat = let
+ d:ds2 = ds
+ (pf,args) = getf d cat
+ (ts,k) = getts ds2 args
+ in (SApp (pf,ts), k+1)
+ getf d cat = hitRegion d [(p,(pf,args)) | (pf@(p,_),(args,_)) <- look cat]
+ getts ds cats = case cats of
+ c:cs -> let
+ (t, k) = gett ds c
+ (ts,ks) = getts (drop k ds) cs
+ in (t:ts, k + ks)
+ _ -> ([],0)
+ look cat = errVal [] $ lookupTree id cat gr
+
+hitRegion :: Double -> [(Double,a)] -> a
+hitRegion d vs = case vs of
+ (p1,v1):vs2 ->
+ if d < p1 then v1 else hitRegion d [(p+p1,v) | (p,v) <- vs2]
+
+--- this should recover from rounding errors
+
+checkSGrammar :: SGrammar -> Err SGrammar
+checkSGrammar = mapMTree chCat where
+ chCat (c,rs) = case sum [p | ((p,f),_) <- rs] of
+ s | s /= 1.0 -> Bad $ "illegal probability sum " ++ show s ++ " in " ++ c
+ _ -> return (c,rs)
+
+-- for cases where explicit probability is not given (encoded as
+-- p > 1) divide the remaining mass by the number of such cases
+
+fillProb :: [SRule] -> [SRule]
+fillProb rs = [((defa p,f),ty) | ((p,f),ty) <- rs] where
+ defa p = if p > 1.0 then def else p
+ def = (1 - sum given) / genericLength nope
+ (nope,given) = partition (> 1.0) [p | ((p,_),_) <- rs]
+
+
+------------------------------------------
+-- to test outside GF
+
+prSTree t = case t of
+ SApp ((p,f),ts) -> f ++ prParenth (show p) ++ concat (map pr1 ts)
+ SMeta c -> '?':c
+ SString s -> prQuotedString s
+ SInt i -> show i
+ where
+ pr1 t@(SApp (_,ts)) = ' ' : (if null ts then id else prParenth) (prSTree t)
+ pr1 t = prSTree t
+
+
+mkSGrammar :: [SRule] -> SGrammar
+mkSGrammar rules =
+ buildTree [(c, fillProb rs) | rs@((_,(_,c)):_) <- rules'] where
+ rules' =
+ groupBy (\x y -> scat x == scat y) $
+ sortBy (\x y -> compare (scat x) (scat y))
+ rules
+ scat (_,(_,c)) = c
+
+pSRule :: String -> SRule
+pSRule s = case words s of
+ p : f : c : cs ->
+ if isDigit (head p)
+ then ((read p, f),(init cs', last cs'))
+ else ((2.0, p),(init (c:cs'), last (c:cs'))) --- hack for automatic probability
+ where cs' = [cs !! i | i <- [0,2..length cs - 1]]
+ _ -> error $ "not a rule" +++ s
+
+exSgr = mkSGrammar $ map pSRule [
+ "0.8 a : A"
+ ,"0.2 b : A"
+ ,"0.2 n : A -> S -> S"
+ ,"0.8 e : S"
+ ]
+
+ex1 :: IO ()
+ex1 = do
+ g <- newStdGen
+ mapM_ (putStrLn . prSTree) $ randomTrees g exSgr "S"