summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/GF/Grammar/SGrammar.hs169
-rw-r--r--src/GF/Probabilistic/Probabilistic.hs74
-rw-r--r--src/GF/Shell.hs4
-rw-r--r--src/GF/Shell/HelpFile.hs12
-rw-r--r--src/GF/Shell/ShellCommands.hs6
-rw-r--r--src/GF/UseGrammar/Generate.hs107
-rw-r--r--src/HelpFile12
7 files changed, 203 insertions, 181 deletions
diff --git a/src/GF/Grammar/SGrammar.hs b/src/GF/Grammar/SGrammar.hs
new file mode 100644
index 000000000..e0c001b6b
--- /dev/null
+++ b/src/GF/Grammar/SGrammar.hs
@@ -0,0 +1,169 @@
+----------------------------------------------------------------------
+-- |
+-- Module : SGrammar
+-- Maintainer : AR
+-- Stability : (stable)
+-- Portability : (portable)
+--
+--
+-- A simple format for context-free abstract syntax used e.g. in
+-- generation. AR 31\/3\/2006
+--
+-- (c) Aarne Ranta 2004 under GNU GPL
+--
+-- Purpose: to generate corpora. We use simple types and don't
+-- guarantee the correctness of bindings\/dependences.
+-----------------------------------------------------------------------------
+
+module GF.Grammar.SGrammar where
+
+import GF.Canon.GFC
+import GF.Grammar.LookAbs
+import GF.Grammar.PrGrammar
+import GF.Grammar.Macros
+import GF.Grammar.Values
+import GF.Grammar.Grammar
+import GF.Infra.Ident (Ident)
+
+import GF.Data.Operations
+import GF.Data.Zipper
+import GF.Infra.Option
+
+import Data.List
+
+-- (c) Aarne Ranta 2006 under GNU GPL
+
+
+type SGrammar = BinTree SCat [SRule]
+type SIdent = String
+type SRule = (SFun,SType)
+type SType = ([SCat],SCat)
+type SCat = SIdent
+type SFun = (Double,SIdent)
+
+allRules gr = concat [rs | (c,rs) <- tree2list gr]
+
+data STree =
+ SApp (SFun,[STree])
+ | SMeta SCat
+ | SString String
+ | SInt Integer
+ | SFloat Double
+ deriving (Show,Eq)
+
+depth :: STree -> Int
+depth t = case t of
+ SApp (_,ts@(_:_)) -> maximum (map depth ts) + 1
+ _ -> 1
+
+type Probs = BinTree Ident Double
+
+emptyProbs :: Probs
+emptyProbs = emptyBinTree
+
+prProbs :: Probs -> String
+prProbs = unlines . map pr . tree2list where
+ pr (f,p) = prt f ++ "\t" ++ show p
+
+------------------------------------------
+-- translate grammar to simpler form and generated trees back
+
+gr2sgr :: Options -> Probs -> GFCGrammar -> SGrammar
+gr2sgr opts probs gr = buildTree [(c,norm (noexp c rs)) | rs@((_,(_,c)):_) <- rules] where
+ noe = maybe [] (chunks ',') $ getOptVal opts (aOpt "noexpand")
+ only = maybe [] (chunks ',') $ getOptVal opts (aOpt "doexpand")
+ un = getOptInt opts (aOpt "atoms")
+ rules =
+ prune $
+ groupBy (\x y -> scat x == scat y) $
+ sortBy (\x y -> compare (scat x) (scat y)) $
+ [(trId f, ty') | (f,ty) <- funRulesOf gr, ty' <- trTy ty]
+ trId (_,f) = let f' = prt f in case lookupTree prt f probs of
+ Ok p -> (p,f')
+ _ -> (2.0, f')
+ trTy ty = case catSkeleton ty of
+ Ok (mcs,mc) -> [(map trCat mcs, trCat mc)]
+ _ -> []
+ trCat (m,c) = prt c ---
+ scat (_,(_,c)) = c
+
+ prune rs = maybe rs (\n -> map (onlyAtoms n) rs) $ un
+
+ norm = fillProb
+
+ onlyAtoms n rs =
+ let (rs1,rs2) = partition atom rs
+ in take n rs1 ++ rs2
+ atom = null . fst . snd
+
+ noexp c rs
+ | null only = if elem c noe then [((2.0,'?':c),([],c))] else rs
+ | otherwise = if elem c only then rs else [((2.0,'?':c),([],c))]
+
+-- for cases where explicit probability is not given (encoded as
+-- p > 1) divide the remaining mass by the number of such cases
+
+fillProb :: [SRule] -> [SRule]
+fillProb rs = [((defa p,f),ty) | ((p,f),ty) <- rs] where
+ defa p = if p > 1.0 then def else p
+ def = (1 - sum given) / genericLength nope
+ (nope,given) = partition (> 1.0) [p | ((p,_),_) <- rs]
+
+-- str2tr :: STree -> Exp
+str2tr t = case t of
+ SApp ((_,'?':c),[]) -> mkMeta 0 -- from noexpand=c
+ SApp ((_,f),ts) -> mkApp (trId f) (map str2tr ts)
+ SMeta _ -> mkMeta 0
+ SString s -> K s
+ SInt i -> EInt i
+ SFloat i -> EFloat i
+ where
+ trId = cn . zIdent
+
+-- tr2str :: Tree -> STree
+tr2str (Tr (N (_,at,val,_,_),ts)) = case (at,val) of
+ (AtC (_,f), _) -> SApp ((2.0,prt_ f),map tr2str ts)
+ (AtM _, v) -> SMeta (catOf v)
+ (AtL s, _) -> SString s
+ (AtI i, _) -> SInt i
+ (AtF i, _) -> SFloat i
+ _ -> SMeta "FAILED_TO_GENERATE" ---- err monad!
+ where
+ catOf v = case v of
+ VApp w _ -> catOf w
+ VCn (_,c) -> prt_ c
+ _ -> "FAILED_TO_GENERATE_FROM_META"
+
+
+------------------------------------------
+-- to test
+
+prSTree t = case t of
+ SApp ((_,f),ts) -> f ++ concat (map pr1 ts)
+ SMeta c -> '?':c
+ SString s -> prQuotedString s
+ SInt i -> show i
+ SFloat i -> show i
+ where
+ pr1 t@(SApp (_,ts)) = ' ' : (if null ts then id else prParenth) (prSTree t)
+ pr1 t = prSTree t
+
+pSRule :: String -> SRule
+pSRule s = case words s of
+ f : _ : cs -> ((2.0,f),(init cs', last cs'))
+ where cs' = [cs !! i | i <- [0,2..length cs - 1]]
+ _ -> error $ "not a rule" +++ s
+
+exSgr = map pSRule [
+ "Pred : NP -> VP -> S"
+ ,"Compl : TV -> NP -> VP"
+ ,"PredVV : VV -> VP -> VP"
+ ,"DefCN : CN -> NP"
+ ,"ModCN : AP -> CN -> CN"
+ ,"john : NP"
+ ,"walk : VP"
+ ,"love : TV"
+ ,"try : VV"
+ ,"girl : CN"
+ ,"big : AP"
+ ]
diff --git a/src/GF/Probabilistic/Probabilistic.hs b/src/GF/Probabilistic/Probabilistic.hs
index 935175ed9..25258db52 100644
--- a/src/GF/Probabilistic/Probabilistic.hs
+++ b/src/GF/Probabilistic/Probabilistic.hs
@@ -34,7 +34,8 @@ import GF.Grammar.LookAbs
import GF.Grammar.PrGrammar
import GF.Grammar.Macros
import GF.Grammar.Values
-import GF.Grammar.Grammar -- (Cat,EInt,K)
+import GF.Grammar.Grammar
+import GF.Grammar.SGrammar
import GF.Infra.Ident
import GF.Data.Zipper
@@ -54,13 +55,13 @@ timeout = 99
generateRandomTreesProb :: Options -> StdGen -> GFCGrammar -> Probs -> Cat -> [Exp]
generateRandomTreesProb opts gen gr probs cat =
map str2tr $ randomTrees gen gr' cat' where
- gr' = gr2sgr gr probs
+ gr' = gr2sgr opts probs gr
cat' = prt $ snd cat
-- | check that probabilities attached to a grammar make sense
checkGrammarProbs :: GFCGrammar -> Probs -> Err Probs
checkGrammarProbs gr probs =
- err Bad (return . gr2probs) $ checkSGrammar $ gr2sgr gr probs where
+ err Bad (return . gr2probs) $ checkSGrammar $ gr2sgr noOptions probs gr where
gr2probs sgr = buildTree [(zIdent f,p) | (_,rs) <- tree2list sgr, ((p,f),_) <- rs]
-- | compute the probability of a given tree
@@ -95,61 +96,9 @@ pProb s = case words s of
readD :: String -> Double
readD = read
-type Probs = BinTree Ident Double
-
-emptyProbs :: Probs
-emptyProbs = emptyBinTree
-
-prProbs :: Probs -> String
-prProbs = unlines . map pr . tree2list where
- pr (f,p) = prt f ++ "\t" ++ show p
-
------------------------------------------
-- translate grammar to simpler form and generated trees back
-gr2sgr :: GFCGrammar -> Probs -> SGrammar
-gr2sgr gr probs = buildTree [(c,fillProb rs) | rs@((_,(_,c)):_) <- rules] where
- rules =
- groupBy (\x y -> scat x == scat y) $
- sortBy (\x y -> compare (scat x) (scat y))
- [(trId f, ty') | (f,ty) <- funRulesOf gr, ty' <- trTy ty]
- trId (_,f) = let f' = prt f in case lookupTree prt f probs of
- Ok p -> (p,f')
- _ -> (2.0, f')
- trTy ty = case catSkeleton ty of
- Ok (mcs,mc) -> [(map trCat mcs, trCat mc)]
- _ -> []
- trCat (m,c) = prt c ---
- scat (_,(_,c)) = c
-
-str2tr :: STree -> Exp
-str2tr t = case t of
- SApp ((_,f),ts) -> mkApp (trId f) (map str2tr ts)
- SMeta _ -> mkMeta 0
- SString s -> K s
- SInt i -> EInt i
- SFloat i -> EFloat i
- where
- trId = cn . zIdent
-
-type SGrammar = BinTree SCat [SRule]
-type SIdent = String
-type SRule = (SFun,SType)
-type SType = ([SCat],SCat)
-type SCat = SIdent
-type SFun = (Double,SIdent)
-
-allRules gr = concat [rs | (c,rs) <- tree2list gr]
-
-data STree =
- SApp (SFun,[STree])
--- | SAppN (SIdent,[STree]) -- no probability given
- | SMeta SCat
- | SString String
- | SInt Integer
- | SFloat Double
- deriving (Show,Eq)
-
probTree :: STree -> Double
probTree t = case t of
SApp ((p,_),ts) -> p * product (map probTree ts)
@@ -204,16 +153,8 @@ checkSGrammar = mapMTree chCat where
Bad $ "illegal probability sum " ++ show s ++ " in " ++ c
_ -> return (c,rs)
--- for cases where explicit probability is not given (encoded as
--- p > 1) divide the remaining mass by the number of such cases
-
-fillProb :: [SRule] -> [SRule]
-fillProb rs = [((defa p,f),ty) | ((p,f),ty) <- rs] where
- defa p = if p > 1.0 then def else p
- def = (1 - sum given) / genericLength nope
- (nope,given) = partition (> 1.0) [p | ((p,_),_) <- rs]
-
+{-
------------------------------------------
-- to test outside GF
@@ -246,7 +187,7 @@ pSRule s = case words s of
where cs' = [cs !! i | i <- [0,2..length cs - 1]]
_ -> error $ "not a rule" +++ s
-exSgr = mkSGrammar $ map pSRule [
+expSgr = mkSGrammar $ map pSRule [
"0.8 a : A"
,"0.2 b : A"
,"0.2 n : A -> S -> S"
@@ -257,3 +198,6 @@ ex1 :: IO ()
ex1 = do
g <- newStdGen
mapM_ (putStrLn . prSTree) $ randomTrees g exSgr "S"
+
+-}
+
diff --git a/src/GF/Shell.hs b/src/GF/Shell.hs
index 3af343bb2..bdbf6d62c 100644
--- a/src/GF/Shell.hs
+++ b/src/GF/Shell.hs
@@ -249,6 +249,10 @@ execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case com
let p = optParseArgErrMsg opts gro x
case p of
Ok (ts,msg)
+ | oElem (iOpt "fail") opts && null ts -> do
+ putStrLnFlush ("#FAIL:" +++ x) >> changeArg (const $ ATrms ts) sa
+ | oElem (iOpt "ambiguous") opts && length ts > 1 -> do
+ putStrLnFlush ("#AMBIGUOUS:" +++ x) >> changeArg (const $ ATrms ts) sa
| oElem (iOpt "prob") opts -> do
let probs = stateProbs gro
let tps = rankByScore [(t,computeProbTree probs t) | t <- ts]
diff --git a/src/GF/Shell/HelpFile.hs b/src/GF/Shell/HelpFile.hs
index dfb01da08..57692b493 100644
--- a/src/GF/Shell/HelpFile.hs
+++ b/src/GF/Shell/HelpFile.hs
@@ -199,10 +199,12 @@ txtHelpFile =
"\n grammar (overridden by the -lang flag), in the category S (overridden" ++
"\n by the -cat flag)." ++
"\n options for batch input:" ++
- "\n -lines parse each line of input separately, ignoring empty lines" ++
- "\n -all as -lines, but also parse empty lines" ++
- "\n -prob rank results by probability" ++
- "\n -cut stop after first lexing result leading to parser success" ++
+ "\n -lines parse each line of input separately, ignoring empty lines" ++
+ "\n -all as -lines, but also parse empty lines" ++
+ "\n -prob rank results by probability" ++
+ "\n -cut stop after first lexing result leading to parser success" ++
+ "\n -fail show strings whose parse fails prefixed by #FAIL" ++
+ "\n -ambiguous show strings that have more than one parse prefixed by #AMBIGUOUS" ++
"\n options for selecting parsing method:" ++
"\n (default)parse using an overgenerating CFG" ++
"\n -cfg parse using a much less overgenerating CFG" ++
@@ -344,11 +346,13 @@ txtHelpFile =
"\n -lang use the abstract syntax of this grammar" ++
"\n -number generate (at most) this number of trees" ++
"\n -noexpand don't expand these categories (comma-separated, e.g. -noexpand=V,CN)" ++
+ "\n -doexpand only expand these categories (comma-separated, e.g. -doexpand=V,CN)" ++
"\n examples:" ++
"\n gt -depth=10 -cat=NP -- generate all NP's to depth 10 " ++
"\n gt (PredVP ? (NegVG ?)) -- generate all trees of this form" ++
"\n gt -cat=S -tr | l -- generate and linearize" ++
"\n gt -noexpand=NP | l -mark=metacat -- the only NP is meta, linearized \"?0 +NP\"" ++
+ "\n gt | l | p -lines -ambiguous | grep \"#AMBIGUOUS\" -- show ambiguous strings" ++
"\n" ++
"\nma, morphologically_analyse: ma String" ++
"\n Runs morphological analysis on each word in String and displays" ++
diff --git a/src/GF/Shell/ShellCommands.hs b/src/GF/Shell/ShellCommands.hs
index d6209cffa..b93335416 100644
--- a/src/GF/Shell/ShellCommands.hs
+++ b/src/GF/Shell/ShellCommands.hs
@@ -180,11 +180,11 @@ optionsOfCommand co = case co of
CTransformGrammar _ -> flags "printer"
CConvertLatex _ -> none
CLinearize _ -> both "utf8 table struct record all multi" "lang number unlexer mark"
- CParse -> both "cut new newer cfg mcfg n ign raw v lines all prob"
+ CParse -> both "ambiguous fail cut new newer cfg mcfg n ign raw v lines all prob"
"cat lang lexer parser number rawtrees"
CTranslate _ _ -> opts "cat lexer parser"
- CGenerateRandom -> both "cf prob" "cat lang number depth"
- CGenerateTrees -> both "metas" "atoms depth alts cat lang number noexpand"
+ CGenerateRandom -> both "cf prob" "cat lang number depth atoms noexpand doexpand"
+ CGenerateTrees -> both "metas" "atoms depth alts cat lang number noexpand doexpand"
CPutTerm -> flags "transform number"
CTreeBank -> opts "c xml trees"
CLookupTreebank -> both "assocs raw strings trees" "treebank"
diff --git a/src/GF/UseGrammar/Generate.hs b/src/GF/UseGrammar/Generate.hs
index c96bb5e40..d368056d4 100644
--- a/src/GF/UseGrammar/Generate.hs
+++ b/src/GF/UseGrammar/Generate.hs
@@ -25,7 +25,7 @@ import GF.Grammar.PrGrammar
import GF.Grammar.Macros
import GF.Grammar.Values
import GF.Grammar.Grammar (Cat)
-
+import GF.Grammar.SGrammar
import GF.Data.Operations
import GF.Data.Zipper
import GF.Infra.Option
@@ -43,59 +43,11 @@ import Data.List
generateTrees :: Options -> GFCGrammar -> Cat -> Int -> Maybe Int -> Maybe Tree -> [Exp]
generateTrees opts gr cat n mn mt = map str2tr $ generate gr' ifm cat' n mn mt'
where
- gr' = gr2sgr noe ats gr
+ gr' = gr2sgr opts emptyProbs gr
cat' = prt $ snd cat
mt' = maybe Nothing (return . tr2str) mt
ifm = oElem withMetas opts
- ats = getOptInt opts (aOpt "atoms")
- noe = maybe [] (chunks ',') $ getOptVal opts (aOpt "noexpand")
-------------------------------------------
--- translate grammar to simpler form and generated trees back
-
-gr2sgr :: [SIdent] -> Maybe Int -> GFCGrammar -> SGrammar
-gr2sgr noe un gr = buildTree [(c,noexp c rs) | rs@((_,(_,c)):_) <- prune rules] where
- rules =
- groupBy (\x y -> scat x == scat y) $
- sortBy (\x y -> compare (scat x) (scat y))
- [(trId f, ty') | (f,ty) <- funRulesOf gr, ty' <- trTy ty]
- trId = prt . snd
- trTy ty = case catSkeleton ty of
- Ok (mcs,mc) -> [(map trCat mcs, trCat mc)]
- _ -> []
- trCat (m,c) = prt c ---
- scat (_,(_,c)) = c
-
- prune rs = maybe rs (\n -> map (onlyAtoms n) rs) $ un
- onlyAtoms n rs =
- let (rs1,rs2) = partition atom rs
- in take n rs1 ++ rs2
- atom = null . fst . snd
-
- noexp c rs = if elem c noe then [('?':c,([],c))] else rs
-
--- str2tr :: STree -> Exp
-str2tr t = case t of
- SApp ('?':c,[]) -> mkMeta 0 -- from noexpand=c
- SApp (f,ts) -> mkApp (trId f) (map str2tr ts)
- SMeta _ -> mkMeta 0
----- SString s -> K s
- where
- trId = cn . zIdent
-
--- tr2str :: Tree -> STree
-tr2str (Tr (N (_,at,val,_,_),ts)) = case (at,val) of
- (AtC (_,f), _) -> SApp (prt_ f,map tr2str ts)
- (AtM _, v) -> SMeta (catOf v)
- (AtL s, _) -> SString s
- (AtI i, _) -> SInt i
- (AtF i, _) -> SFloat i
- _ -> SMeta "FAILED_TO_GENERATE" ---- err monad!
- where
- catOf v = case v of
- VApp w _ -> catOf w
- VCn (_,c) -> prt_ c
- _ -> "FAILED_TO_GENERATE_FROM_META"
------------------------------------------
-- do the main thing with a simpler data structure
@@ -139,58 +91,3 @@ generate gr ifm cat i mn mt = case mt of
SApp (f,ts) -> [SApp (f,ts') | ts' <- combinations (map genM ts)]
SMeta k -> gen k
_ -> [t]
-
-type SGrammar = BinTree SCat [SRule]
-type SIdent = String
-type SRule = (SFun,SType)
-type SType = ([SCat],SCat)
-type SCat = SIdent
-type SFun = SIdent
-
-allRules gr = concat [rs | (c,rs) <- tree2list gr]
-
-data STree =
- SApp (SFun,[STree])
- | SMeta SCat
- | SString String
- | SInt Integer
- | SFloat Double
- deriving (Show,Eq)
-
-depth :: STree -> Int
-depth t = case t of
- SApp (_,ts@(_:_)) -> maximum (map depth ts) + 1
- _ -> 1
-
-------------------------------------------
--- to test
-
-prSTree t = case t of
- SApp (f,ts) -> f ++ concat (map pr1 ts)
- SMeta c -> '?':c
- SString s -> prQuotedString s
- SInt i -> show i
- SFloat i -> show i
- where
- pr1 t@(SApp (_,ts)) = ' ' : (if null ts then id else prParenth) (prSTree t)
- pr1 t = prSTree t
-
-pSRule :: String -> SRule
-pSRule s = case words s of
- f : _ : cs -> (f,(init cs', last cs'))
- where cs' = [cs !! i | i <- [0,2..length cs - 1]]
- _ -> error $ "not a rule" +++ s
-
-exSgr = map pSRule [
- "Pred : NP -> VP -> S"
- ,"Compl : TV -> NP -> VP"
- ,"PredVV : VV -> VP -> VP"
- ,"DefCN : CN -> NP"
- ,"ModCN : AP -> CN -> CN"
- ,"john : NP"
- ,"walk : VP"
- ,"love : TV"
- ,"try : VV"
- ,"girl : CN"
- ,"big : AP"
- ]
diff --git a/src/HelpFile b/src/HelpFile
index 1070a800e..97ce04186 100644
--- a/src/HelpFile
+++ b/src/HelpFile
@@ -170,10 +170,12 @@ p, parse: p String
grammar (overridden by the -lang flag), in the category S (overridden
by the -cat flag).
options for batch input:
- -lines parse each line of input separately, ignoring empty lines
- -all as -lines, but also parse empty lines
- -prob rank results by probability
- -cut stop after first lexing result leading to parser success
+ -lines parse each line of input separately, ignoring empty lines
+ -all as -lines, but also parse empty lines
+ -prob rank results by probability
+ -cut stop after first lexing result leading to parser success
+ -fail show strings whose parse fails prefixed by #FAIL
+ -ambiguous show strings that have more than one parse prefixed by #AMBIGUOUS
options for selecting parsing method:
(default)parse using an overgenerating CFG
-cfg parse using a much less overgenerating CFG
@@ -315,11 +317,13 @@ gt, generate_trees: gt Tree?
-lang use the abstract syntax of this grammar
-number generate (at most) this number of trees
-noexpand don't expand these categories (comma-separated, e.g. -noexpand=V,CN)
+ -doexpand only expand these categories (comma-separated, e.g. -doexpand=V,CN)
examples:
gt -depth=10 -cat=NP -- generate all NP's to depth 10
gt (PredVP ? (NegVG ?)) -- generate all trees of this form
gt -cat=S -tr | l -- generate and linearize
gt -noexpand=NP | l -mark=metacat -- the only NP is meta, linearized "?0 +NP"
+ gt | l | p -lines -ambiguous | grep "#AMBIGUOUS" -- show ambiguous strings
ma, morphologically_analyse: ma String
Runs morphological analysis on each word in String and displays