From af13bae2dfb9adaa7c4aa273961fc09cc7ba1b7a Mon Sep 17 00:00:00 2001 From: krasimir Date: Sun, 17 Jan 2010 17:05:21 +0000 Subject: now the linearization is completely based on PMCFG --- src/compiler/GF/Command/Commands.hs | 13 ++++------- src/compiler/GF/Compile/GeneratePMCFG.hs | 37 +++++++++++--------------------- src/compiler/GF/Compile/GrammarToPGF.hs | 32 +++++++++++++++++++++++++-- src/compiler/GF/Compile/PGFPretty.hs | 1 + src/compiler/GF/Compile/PGFtoJS.hs | 2 +- src/compiler/GF/Quiz.hs | 8 +++---- src/compiler/GF/Speech/PGFToCFG.hs | 4 ++-- src/compiler/GF/Speech/VoiceXML.hs | 1 - 8 files changed, 54 insertions(+), 44 deletions(-) (limited to 'src/compiler/GF') diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs index 3647d2e14..addf9b94a 100644 --- a/src/compiler/GF/Command/Commands.hs +++ b/src/compiler/GF/Command/Commands.hs @@ -14,7 +14,7 @@ module GF.Command.Commands ( import PGF import PGF.CId -import PGF.ShowLinearize +import PGF.Linearize import PGF.VisualizeTree import PGF.Macros import PGF.Data ---- @@ -344,9 +344,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [ ("all","show all forms and variants"), ("bracket","show tree structure with brackets and paths to nodes"), ("multi","linearize to all languages (default)"), - ("record","show source-code-like record"), ("table","show all forms labelled by parameters"), - ("term", "show PGF term"), ("treebank","show the tree and tag linearizations with language names") ] ++ stringOpOptions, flags = [ @@ -797,11 +795,9 @@ allCommands cod env@(pgf, mos) = Map.fromList [ linear :: [Option] -> CId -> Expr -> String linear opts lang = let unl = unlex opts lang in case opts of - _ | isOpt "all" opts -> allLinearize unl pgf lang - _ | isOpt "table" opts -> tableLinearize unl pgf lang - _ | isOpt "term" opts -> termLinearize pgf lang - _ | isOpt "record" opts -> recordLinearize pgf lang - _ | isOpt "bracket" opts -> markLinearize pgf lang + _ | isOpt "all" opts -> unlines . concat . intersperse [[]] . map (map (unl . snd)) . tabularLinearizes pgf lang + _ | isOpt "table" opts -> unlines . concat . intersperse [[]] . map (map (\(p,v) -> p+++":"+++unl v)) . tabularLinearizes pgf lang + _ | isOpt "bracket" opts -> unlines . markLinearizes pgf lang _ -> unl . linearize pgf lang unlex opts lang = stringOps Nothing (getUnlex opts lang ++ map prOpt opts) ---- @@ -957,4 +953,3 @@ prMorphoAnalysis (w,lps) = morphoMissing :: Morpho -> [String] -> [String] morphoMissing mo ws = [w | w <- ws, null (lookupMorpho mo w)] - diff --git a/src/compiler/GF/Compile/GeneratePMCFG.hs b/src/compiler/GF/Compile/GeneratePMCFG.hs index d7bc39e7c..e6e3fdc79 100644 --- a/src/compiler/GF/Compile/GeneratePMCFG.hs +++ b/src/compiler/GF/Compile/GeneratePMCFG.hs @@ -168,10 +168,7 @@ choices nr path = BM (\c s -> let (args,_) = s | otherwise = c : addConstraint path0 index0 tcs mkRecord :: [BranchM (Value a)] -> BranchM (Value a) -mkRecord xs = BM (\c -> go xs (c . Rec)) - where - go [] c s = c [] s - go (BM m:fs) c s = go fs (\bs s -> c (m (\v s -> Return v) s : bs) s) s +mkRecord xs = BM (\c -> foldl (\c (BM m) bs s -> c (m (\v s -> Return v) s : bs) s) (c . Rec) xs []) ---------------------------------------------------------------------- @@ -202,7 +199,7 @@ protoFCat cnc_defs (n,cat) ctype = _ -> error $ "Not a record: " ++ show ctype | otherwise = ctype - loop path rcs tcs (R record) = List.foldl' (\(rcs,tcs) (index,term) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record) + loop path rcs tcs (R record) = List.foldr (\(index,term) (rcs,tcs) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record) loop path rcs tcs (C i) = ( rcs,(path,[0..i]):tcs) loop path rcs tcs (S _) = (path:rcs, tcs) loop path rcs tcs (F id) = case Map.lookup id cnc_defs of @@ -229,7 +226,7 @@ go' (Variant bs) path ss = do b <- member bs go' (Return v) path ss = go v path ss go :: Value SeqId -> FPath -> [SeqId] -> BacktrackM Env [SeqId] -go (Rec xs) path ss = foldM (\ss (lbl,b) -> go' b (lbl:path) ss) ss (zip [0..] xs) +go (Rec xs) path ss = foldM (\ss (lbl,b) -> go' b (lbl:path) ss) ss (reverse (zip [0..] xs)) go (Str seqid) path ss = return (seqid : ss) go (Con i) path ss = restrictHead path i >> return ss @@ -350,7 +347,7 @@ emptyGrammarEnv cnc_defs lincats params = where (size,poly) = getMultipliers 1 [] ctype - getMultipliers m ms (R record) = foldl (\(m,ms) t -> getMultipliers m ms t) (m,ms) record + getMultipliers m ms (R record) = foldr (\t (m,ms) -> getMultipliers m ms t) (m,ms) record getMultipliers m ms (S _) = (m,ms) getMultipliers m ms (C max_index) = (m*(max_index+1),m : ms) getMultipliers m ms (F id) = case Map.lookup id cnc_defs of @@ -364,17 +361,11 @@ emptyGrammarEnv cnc_defs lincats params = getLabels _ t = error (show t) expandHOAS abs_defs cnc_defs lincats lindefs env = - foldl add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) hoCats + foldl add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) (Map.keys lincats) where hoTypes :: [(Int,CId)] hoTypes = sortNub [(n,c) | (_,(ty,_,_)) <- abs_defs , (n,c) <- fst (typeSkeleton ty), n > 0] - - hoCats :: [CId] - hoCats = sortNub [c | (_,(ty,_,_)) <- abs_defs - , h <- case ty of {DTyp hyps val _ -> hyps} - , let ty = typeOfHypo h - , c <- fst (catSkeleton ty)] -- add a range of PMCFG categories for each GF high-order category add_hoCat env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (n,cat) = @@ -386,8 +377,7 @@ expandHOAS abs_defs cnc_defs lincats lindefs env = -- add one PMCFG function for each high-order type: _B : Cat -> Var -> ... -> Var -> HoCat add_hoFun env (n,cat) = - let linRec = reverse $ - [[FSymCat 0 i] | (l,i) <- case arg of {PFCat _ _ rcs _ -> zip rcs [0..]}] ++ + let linRec = [[FSymCat 0 i] | i <- case arg of {PFCat _ _ rcs _ -> [0..length rcs-1]}] ++ [[FSymLit i 0] | i <- [1..n]] (env1,lins) = List.mapAccumL addFSeq env linRec newLinRec = mkArray lins @@ -405,13 +395,10 @@ expandHOAS abs_defs cnc_defs lincats lindefs env = -- add one PMCFG function for each high-order category: _V : Var -> Cat add_varFun env cat = - convertRule cnc_defs env (PFRule _V [(0,cidVar)] (0,cat) [arg] res lindef) + case Map.lookup cat lindefs of + Nothing -> env + Just lindef -> convertRule cnc_defs env (PFRule _V [(0,cidVar)] (0,cat) [arg] res lindef) where - lindef = - case Map.lookup cat lindefs of - Nothing -> error $ "No lindef for " ++ showCId cat - Just def -> def - arg = case Map.lookup cidVar lincats of Nothing -> error $ "No lincat for " ++ showCId cat @@ -455,15 +442,15 @@ getParserInfo :: GrammarEnv -> ParserInfo getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = ParserInfo { functions = mkArray funSet , sequences = mkArray seqSet - , productions0= productions0 - , productions = filterProductions productions0 + , productions = IntMap.union prodSet coercions + , pproductions = IntMap.empty + , lproductions = Map.empty , startCats = maybe Map.empty (Map.map (\(start,end,_,lbls) -> (start,end,lbls))) (IntMap.lookup 0 catSet) , totalCats = last_id+1 } where mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] - productions0 = IntMap.union prodSet coercions coercions = IntMap.fromList [(fcat,Set.fromList (map FCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet] getFCats :: GrammarEnv -> ProtoFCat -> [FCat] diff --git a/src/compiler/GF/Compile/GrammarToPGF.hs b/src/compiler/GF/Compile/GrammarToPGF.hs index c015eff01..31c768045 100644 --- a/src/compiler/GF/Compile/GrammarToPGF.hs +++ b/src/compiler/GF/Compile/GrammarToPGF.hs @@ -5,7 +5,7 @@ import GF.Compile.Export import GF.Compile.GeneratePMCFG import PGF.CId -import PGF.Linearize(realize) +import PGF.Macros(updateProductionIndices) import qualified PGF.Macros as CM import qualified PGF.Data as C import qualified PGF.Data as D @@ -46,7 +46,7 @@ mkCanon2gfcc opts cnc gr = -- Adds parsers for all concretes addParsers :: Options -> D.PGF -> IO D.PGF addParsers opts pgf = do cncs <- sequence [conv lang cnc | (lang,cnc) <- Map.toList (D.concretes pgf)] - return pgf { D.concretes = Map.fromList cncs } + return $ updateProductionIndices $ pgf { D.concretes = Map.fromList cncs } where conv lang cnc = do pinfo <- convertConcrete opts (D.abstract pgf) lang cnc return (lang,cnc { D.parser = Just pinfo }) @@ -586,3 +586,31 @@ requiredCanModules isSingle gr c = nub $ filter notReuse ops ++ exts where notReuse i = errVal True $ do m <- M.lookupModule gr i return $ M.isModRes m -- to exclude reused Cnc and Abs from required + + +realize :: C.Term -> String +realize = concat . take 1 . realizes + +realizes :: C.Term -> [String] +realizes = map (unwords . untokn) . realizest + +realizest :: C.Term -> [[C.Tokn]] +realizest trm = case trm of + C.R ts -> realizest (ts !! 0) + C.S ss -> map concat $ combinations $ map realizest ss + C.K t -> [[t]] + C.W s t -> [[C.KS (s ++ r)] | [C.KS r] <- realizest t] + C.FV ts -> concatMap realizest ts + C.TM s -> [[C.KS s]] + _ -> [[C.KS $ "REALIZE_ERROR " ++ show trm]] ---- debug + +untokn :: [C.Tokn] -> [String] +untokn ts = case ts of + C.KP d _ : [] -> d + C.KP d vs : ws -> let ss@(s:_) = untokn ws in sel d vs s ++ ss + C.KS s : ws -> s : untokn ws + [] -> [] + where + sel d vs w = case [v | C.Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of + v:_ -> v + _ -> d diff --git a/src/compiler/GF/Compile/PGFPretty.hs b/src/compiler/GF/Compile/PGFPretty.hs index 679714db5..706081999 100644 --- a/src/compiler/GF/Compile/PGFPretty.hs +++ b/src/compiler/GF/Compile/PGFPretty.hs @@ -60,6 +60,7 @@ prCnc abstr name c = prAll prLinCat (lincats c) $$ prAll prLin (lins (expand c)) pr p (P t1 t2) = prec p 3 (pr 3 t1 <> text "!" <> pr 3 t2) pr p (S ts) = prec p 2 (hsep (punctuate (text " ++") (map (pr 2) ts))) pr p (K (KS t)) = doubleQuotes (text t) + pr p (K _) = empty pr p (V i) = text ("argv_" ++ show (i+1)) pr p (C i) = text (show (i+1)) pr p (FV ts) = prec p 1 (hsep (punctuate (text " |") (map (pr 1) ts))) diff --git a/src/compiler/GF/Compile/PGFtoJS.hs b/src/compiler/GF/Compile/PGFtoJS.hs index 0cec4121d..67d18809a 100644 --- a/src/compiler/GF/Compile/PGFtoJS.hs +++ b/src/compiler/GF/Compile/PGFtoJS.hs @@ -90,7 +90,7 @@ children = JS.Ident "cs" -- Parser parser2js :: ParserInfo -> [JS.Expr] -parser2js p = [new "Parser" [JS.EObj $ [JS.Prop (JS.IntPropName cat) (JS.EArray (map frule2js (Set.toList set))) | (cat,set) <- IntMap.toList (productions0 p)], +parser2js p = [new "Parser" [JS.EObj $ [JS.Prop (JS.IntPropName cat) (JS.EArray (map frule2js (Set.toList set))) | (cat,set) <- IntMap.toList (productions p)], JS.EArray $ (map ffun2js (Array.elems (functions p))), JS.EArray $ (map seq2js (Array.elems (sequences p))), JS.EObj $ map cats (Map.assocs (startCats p)), diff --git a/src/compiler/GF/Quiz.hs b/src/compiler/GF/Quiz.hs index 52d9dee6b..d0353ff79 100644 --- a/src/compiler/GF/Quiz.hs +++ b/src/compiler/GF/Quiz.hs @@ -19,7 +19,7 @@ module GF.Quiz ( ) where import PGF -import PGF.ShowLinearize +import PGF.Linearize import GF.Data.Operations import GF.Infra.UseIO import GF.Infra.Option @@ -51,11 +51,11 @@ morphologyList :: PGF -> Language -> Type -> Int -> IO [(String,[String])] morphologyList pgf ig typ number = do ts <- generateRandom pgf typ >>= return . take (max 1 number) gen <- newStdGen - let ss = map (tabularLinearize pgf ig) ts + let ss = map (tabularLinearizes pgf ig) ts let size = length (head ss) let forms = take number $ randomRs (0,size-1) gen - return [(head (snd (head pws)) +++ par, ws) | - (pws,i) <- zip ss forms, let (par,ws) = pws !! i] + return [(snd (head pws0) +++ fst (pws0 !! i), ws) | + (pwss@(pws0:_),i) <- zip ss forms, let ws = map (\pws -> snd (pws !! i)) pwss] -- | compare answer to the list of right answers, increase score and give feedback mkAnswer :: Encoding -> [String] -> String -> (Integer, String) diff --git a/src/compiler/GF/Speech/PGFToCFG.hs b/src/compiler/GF/Speech/PGFToCFG.hs index bd27deadf..4ac430704 100644 --- a/src/compiler/GF/Speech/PGFToCFG.hs +++ b/src/compiler/GF/Speech/PGFToCFG.hs @@ -37,7 +37,7 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co pinfo = fromMaybe (error "pgfToCFG: No parser.") (lookParser pgf lang) rules :: [(FCat,Production)] - rules = [(fcat,prod) | (fcat,set) <- IntMap.toList (PGF.productions pinfo) + rules = [(fcat,prod) | (fcat,set) <- IntMap.toList (PGF.pproductions pinfo) , prod <- Set.toList set] fcatCats :: Map FCat Cat @@ -58,7 +58,7 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co topdownRules cat = f cat [] where - f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo)) + f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (pproductions pinfo)) g (FApply funid args) rules = (functions pinfo ! funid,args) : rules g (FCoerce cat) rules = f cat rules diff --git a/src/compiler/GF/Speech/VoiceXML.hs b/src/compiler/GF/Speech/VoiceXML.hs index fb25d6a1e..d3939931e 100644 --- a/src/compiler/GF/Speech/VoiceXML.hs +++ b/src/compiler/GF/Speech/VoiceXML.hs @@ -16,7 +16,6 @@ import GF.Speech.SRG (getSpeechLanguage) import PGF.CId import PGF.Data import PGF.Macros -import PGF.Linearize (showPrintName) import Control.Monad (liftM) import Data.List (isPrefixOf, find, intersperse) -- cgit v1.2.3