summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/compiler/GF/Command/Commands.hs13
-rw-r--r--src/compiler/GF/Compile/GeneratePMCFG.hs37
-rw-r--r--src/compiler/GF/Compile/GrammarToPGF.hs32
-rw-r--r--src/compiler/GF/Compile/PGFPretty.hs1
-rw-r--r--src/compiler/GF/Compile/PGFtoJS.hs2
-rw-r--r--src/compiler/GF/Quiz.hs8
-rw-r--r--src/compiler/GF/Speech/PGFToCFG.hs4
-rw-r--r--src/compiler/GF/Speech/VoiceXML.hs1
-rw-r--r--src/runtime/haskell/PGF/Binary.hs19
-rw-r--r--src/runtime/haskell/PGF/Linearize.hs230
-rw-r--r--src/runtime/haskell/PGF/Macros.hs72
-rw-r--r--src/runtime/haskell/PGF/Morphology.hs24
-rw-r--r--src/runtime/haskell/PGF/PMCFG.hs32
-rw-r--r--src/runtime/haskell/PGF/Parse.hs4
-rw-r--r--src/runtime/haskell/PGF/ShowLinearize.hs114
-rw-r--r--src/runtime/haskell/PGF/VisualizeTree.hs10
16 files changed, 254 insertions, 349 deletions
diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs
index 3647d2e14..addf9b94a 100644
--- a/src/compiler/GF/Command/Commands.hs
+++ b/src/compiler/GF/Command/Commands.hs
@@ -14,7 +14,7 @@ module GF.Command.Commands (
import PGF
import PGF.CId
-import PGF.ShowLinearize
+import PGF.Linearize
import PGF.VisualizeTree
import PGF.Macros
import PGF.Data ----
@@ -344,9 +344,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [
("all","show all forms and variants"),
("bracket","show tree structure with brackets and paths to nodes"),
("multi","linearize to all languages (default)"),
- ("record","show source-code-like record"),
("table","show all forms labelled by parameters"),
- ("term", "show PGF term"),
("treebank","show the tree and tag linearizations with language names")
] ++ stringOpOptions,
flags = [
@@ -797,11 +795,9 @@ allCommands cod env@(pgf, mos) = Map.fromList [
linear :: [Option] -> CId -> Expr -> String
linear opts lang = let unl = unlex opts lang in case opts of
- _ | isOpt "all" opts -> allLinearize unl pgf lang
- _ | isOpt "table" opts -> tableLinearize unl pgf lang
- _ | isOpt "term" opts -> termLinearize pgf lang
- _ | isOpt "record" opts -> recordLinearize pgf lang
- _ | isOpt "bracket" opts -> markLinearize pgf lang
+ _ | isOpt "all" opts -> unlines . concat . intersperse [[]] . map (map (unl . snd)) . tabularLinearizes pgf lang
+ _ | isOpt "table" opts -> unlines . concat . intersperse [[]] . map (map (\(p,v) -> p+++":"+++unl v)) . tabularLinearizes pgf lang
+ _ | isOpt "bracket" opts -> unlines . markLinearizes pgf lang
_ -> unl . linearize pgf lang
unlex opts lang = stringOps Nothing (getUnlex opts lang ++ map prOpt opts) ----
@@ -957,4 +953,3 @@ prMorphoAnalysis (w,lps) =
morphoMissing :: Morpho -> [String] -> [String]
morphoMissing mo ws = [w | w <- ws, null (lookupMorpho mo w)]
-
diff --git a/src/compiler/GF/Compile/GeneratePMCFG.hs b/src/compiler/GF/Compile/GeneratePMCFG.hs
index d7bc39e7c..e6e3fdc79 100644
--- a/src/compiler/GF/Compile/GeneratePMCFG.hs
+++ b/src/compiler/GF/Compile/GeneratePMCFG.hs
@@ -168,10 +168,7 @@ choices nr path = BM (\c s -> let (args,_) = s
| otherwise = c : addConstraint path0 index0 tcs
mkRecord :: [BranchM (Value a)] -> BranchM (Value a)
-mkRecord xs = BM (\c -> go xs (c . Rec))
- where
- go [] c s = c [] s
- go (BM m:fs) c s = go fs (\bs s -> c (m (\v s -> Return v) s : bs) s) s
+mkRecord xs = BM (\c -> foldl (\c (BM m) bs s -> c (m (\v s -> Return v) s : bs) s) (c . Rec) xs [])
----------------------------------------------------------------------
@@ -202,7 +199,7 @@ protoFCat cnc_defs (n,cat) ctype =
_ -> error $ "Not a record: " ++ show ctype
| otherwise = ctype
- loop path rcs tcs (R record) = List.foldl' (\(rcs,tcs) (index,term) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record)
+ loop path rcs tcs (R record) = List.foldr (\(index,term) (rcs,tcs) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record)
loop path rcs tcs (C i) = ( rcs,(path,[0..i]):tcs)
loop path rcs tcs (S _) = (path:rcs, tcs)
loop path rcs tcs (F id) = case Map.lookup id cnc_defs of
@@ -229,7 +226,7 @@ go' (Variant bs) path ss = do b <- member bs
go' (Return v) path ss = go v path ss
go :: Value SeqId -> FPath -> [SeqId] -> BacktrackM Env [SeqId]
-go (Rec xs) path ss = foldM (\ss (lbl,b) -> go' b (lbl:path) ss) ss (zip [0..] xs)
+go (Rec xs) path ss = foldM (\ss (lbl,b) -> go' b (lbl:path) ss) ss (reverse (zip [0..] xs))
go (Str seqid) path ss = return (seqid : ss)
go (Con i) path ss = restrictHead path i >> return ss
@@ -350,7 +347,7 @@ emptyGrammarEnv cnc_defs lincats params =
where
(size,poly) = getMultipliers 1 [] ctype
- getMultipliers m ms (R record) = foldl (\(m,ms) t -> getMultipliers m ms t) (m,ms) record
+ getMultipliers m ms (R record) = foldr (\t (m,ms) -> getMultipliers m ms t) (m,ms) record
getMultipliers m ms (S _) = (m,ms)
getMultipliers m ms (C max_index) = (m*(max_index+1),m : ms)
getMultipliers m ms (F id) = case Map.lookup id cnc_defs of
@@ -364,17 +361,11 @@ emptyGrammarEnv cnc_defs lincats params =
getLabels _ t = error (show t)
expandHOAS abs_defs cnc_defs lincats lindefs env =
- foldl add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) hoCats
+ foldl add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) (Map.keys lincats)
where
hoTypes :: [(Int,CId)]
hoTypes = sortNub [(n,c) | (_,(ty,_,_)) <- abs_defs
, (n,c) <- fst (typeSkeleton ty), n > 0]
-
- hoCats :: [CId]
- hoCats = sortNub [c | (_,(ty,_,_)) <- abs_defs
- , h <- case ty of {DTyp hyps val _ -> hyps}
- , let ty = typeOfHypo h
- , c <- fst (catSkeleton ty)]
-- add a range of PMCFG categories for each GF high-order category
add_hoCat env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (n,cat) =
@@ -386,8 +377,7 @@ expandHOAS abs_defs cnc_defs lincats lindefs env =
-- add one PMCFG function for each high-order type: _B : Cat -> Var -> ... -> Var -> HoCat
add_hoFun env (n,cat) =
- let linRec = reverse $
- [[FSymCat 0 i] | (l,i) <- case arg of {PFCat _ _ rcs _ -> zip rcs [0..]}] ++
+ let linRec = [[FSymCat 0 i] | i <- case arg of {PFCat _ _ rcs _ -> [0..length rcs-1]}] ++
[[FSymLit i 0] | i <- [1..n]]
(env1,lins) = List.mapAccumL addFSeq env linRec
newLinRec = mkArray lins
@@ -405,13 +395,10 @@ expandHOAS abs_defs cnc_defs lincats lindefs env =
-- add one PMCFG function for each high-order category: _V : Var -> Cat
add_varFun env cat =
- convertRule cnc_defs env (PFRule _V [(0,cidVar)] (0,cat) [arg] res lindef)
+ case Map.lookup cat lindefs of
+ Nothing -> env
+ Just lindef -> convertRule cnc_defs env (PFRule _V [(0,cidVar)] (0,cat) [arg] res lindef)
where
- lindef =
- case Map.lookup cat lindefs of
- Nothing -> error $ "No lindef for " ++ showCId cat
- Just def -> def
-
arg =
case Map.lookup cidVar lincats of
Nothing -> error $ "No lincat for " ++ showCId cat
@@ -455,15 +442,15 @@ getParserInfo :: GrammarEnv -> ParserInfo
getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
ParserInfo { functions = mkArray funSet
, sequences = mkArray seqSet
- , productions0= productions0
- , productions = filterProductions productions0
+ , productions = IntMap.union prodSet coercions
+ , pproductions = IntMap.empty
+ , lproductions = Map.empty
, startCats = maybe Map.empty (Map.map (\(start,end,_,lbls) -> (start,end,lbls))) (IntMap.lookup 0 catSet)
, totalCats = last_id+1
}
where
mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
- productions0 = IntMap.union prodSet coercions
coercions = IntMap.fromList [(fcat,Set.fromList (map FCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet]
getFCats :: GrammarEnv -> ProtoFCat -> [FCat]
diff --git a/src/compiler/GF/Compile/GrammarToPGF.hs b/src/compiler/GF/Compile/GrammarToPGF.hs
index c015eff01..31c768045 100644
--- a/src/compiler/GF/Compile/GrammarToPGF.hs
+++ b/src/compiler/GF/Compile/GrammarToPGF.hs
@@ -5,7 +5,7 @@ import GF.Compile.Export
import GF.Compile.GeneratePMCFG
import PGF.CId
-import PGF.Linearize(realize)
+import PGF.Macros(updateProductionIndices)
import qualified PGF.Macros as CM
import qualified PGF.Data as C
import qualified PGF.Data as D
@@ -46,7 +46,7 @@ mkCanon2gfcc opts cnc gr =
-- Adds parsers for all concretes
addParsers :: Options -> D.PGF -> IO D.PGF
addParsers opts pgf = do cncs <- sequence [conv lang cnc | (lang,cnc) <- Map.toList (D.concretes pgf)]
- return pgf { D.concretes = Map.fromList cncs }
+ return $ updateProductionIndices $ pgf { D.concretes = Map.fromList cncs }
where
conv lang cnc = do pinfo <- convertConcrete opts (D.abstract pgf) lang cnc
return (lang,cnc { D.parser = Just pinfo })
@@ -586,3 +586,31 @@ requiredCanModules isSingle gr c = nub $ filter notReuse ops ++ exts where
notReuse i = errVal True $ do
m <- M.lookupModule gr i
return $ M.isModRes m -- to exclude reused Cnc and Abs from required
+
+
+realize :: C.Term -> String
+realize = concat . take 1 . realizes
+
+realizes :: C.Term -> [String]
+realizes = map (unwords . untokn) . realizest
+
+realizest :: C.Term -> [[C.Tokn]]
+realizest trm = case trm of
+ C.R ts -> realizest (ts !! 0)
+ C.S ss -> map concat $ combinations $ map realizest ss
+ C.K t -> [[t]]
+ C.W s t -> [[C.KS (s ++ r)] | [C.KS r] <- realizest t]
+ C.FV ts -> concatMap realizest ts
+ C.TM s -> [[C.KS s]]
+ _ -> [[C.KS $ "REALIZE_ERROR " ++ show trm]] ---- debug
+
+untokn :: [C.Tokn] -> [String]
+untokn ts = case ts of
+ C.KP d _ : [] -> d
+ C.KP d vs : ws -> let ss@(s:_) = untokn ws in sel d vs s ++ ss
+ C.KS s : ws -> s : untokn ws
+ [] -> []
+ where
+ sel d vs w = case [v | C.Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of
+ v:_ -> v
+ _ -> d
diff --git a/src/compiler/GF/Compile/PGFPretty.hs b/src/compiler/GF/Compile/PGFPretty.hs
index 679714db5..706081999 100644
--- a/src/compiler/GF/Compile/PGFPretty.hs
+++ b/src/compiler/GF/Compile/PGFPretty.hs
@@ -60,6 +60,7 @@ prCnc abstr name c = prAll prLinCat (lincats c) $$ prAll prLin (lins (expand c))
pr p (P t1 t2) = prec p 3 (pr 3 t1 <> text "!" <> pr 3 t2)
pr p (S ts) = prec p 2 (hsep (punctuate (text " ++") (map (pr 2) ts)))
pr p (K (KS t)) = doubleQuotes (text t)
+ pr p (K _) = empty
pr p (V i) = text ("argv_" ++ show (i+1))
pr p (C i) = text (show (i+1))
pr p (FV ts) = prec p 1 (hsep (punctuate (text " |") (map (pr 1) ts)))
diff --git a/src/compiler/GF/Compile/PGFtoJS.hs b/src/compiler/GF/Compile/PGFtoJS.hs
index 0cec4121d..67d18809a 100644
--- a/src/compiler/GF/Compile/PGFtoJS.hs
+++ b/src/compiler/GF/Compile/PGFtoJS.hs
@@ -90,7 +90,7 @@ children = JS.Ident "cs"
-- Parser
parser2js :: ParserInfo -> [JS.Expr]
-parser2js p = [new "Parser" [JS.EObj $ [JS.Prop (JS.IntPropName cat) (JS.EArray (map frule2js (Set.toList set))) | (cat,set) <- IntMap.toList (productions0 p)],
+parser2js p = [new "Parser" [JS.EObj $ [JS.Prop (JS.IntPropName cat) (JS.EArray (map frule2js (Set.toList set))) | (cat,set) <- IntMap.toList (productions p)],
JS.EArray $ (map ffun2js (Array.elems (functions p))),
JS.EArray $ (map seq2js (Array.elems (sequences p))),
JS.EObj $ map cats (Map.assocs (startCats p)),
diff --git a/src/compiler/GF/Quiz.hs b/src/compiler/GF/Quiz.hs
index 52d9dee6b..d0353ff79 100644
--- a/src/compiler/GF/Quiz.hs
+++ b/src/compiler/GF/Quiz.hs
@@ -19,7 +19,7 @@ module GF.Quiz (
) where
import PGF
-import PGF.ShowLinearize
+import PGF.Linearize
import GF.Data.Operations
import GF.Infra.UseIO
import GF.Infra.Option
@@ -51,11 +51,11 @@ morphologyList :: PGF -> Language -> Type -> Int -> IO [(String,[String])]
morphologyList pgf ig typ number = do
ts <- generateRandom pgf typ >>= return . take (max 1 number)
gen <- newStdGen
- let ss = map (tabularLinearize pgf ig) ts
+ let ss = map (tabularLinearizes pgf ig) ts
let size = length (head ss)
let forms = take number $ randomRs (0,size-1) gen
- return [(head (snd (head pws)) +++ par, ws) |
- (pws,i) <- zip ss forms, let (par,ws) = pws !! i]
+ return [(snd (head pws0) +++ fst (pws0 !! i), ws) |
+ (pwss@(pws0:_),i) <- zip ss forms, let ws = map (\pws -> snd (pws !! i)) pwss]
-- | compare answer to the list of right answers, increase score and give feedback
mkAnswer :: Encoding -> [String] -> String -> (Integer, String)
diff --git a/src/compiler/GF/Speech/PGFToCFG.hs b/src/compiler/GF/Speech/PGFToCFG.hs
index bd27deadf..4ac430704 100644
--- a/src/compiler/GF/Speech/PGFToCFG.hs
+++ b/src/compiler/GF/Speech/PGFToCFG.hs
@@ -37,7 +37,7 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
pinfo = fromMaybe (error "pgfToCFG: No parser.") (lookParser pgf lang)
rules :: [(FCat,Production)]
- rules = [(fcat,prod) | (fcat,set) <- IntMap.toList (PGF.productions pinfo)
+ rules = [(fcat,prod) | (fcat,set) <- IntMap.toList (PGF.pproductions pinfo)
, prod <- Set.toList set]
fcatCats :: Map FCat Cat
@@ -58,7 +58,7 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
topdownRules cat = f cat []
where
- f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo))
+ f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (pproductions pinfo))
g (FApply funid args) rules = (functions pinfo ! funid,args) : rules
g (FCoerce cat) rules = f cat rules
diff --git a/src/compiler/GF/Speech/VoiceXML.hs b/src/compiler/GF/Speech/VoiceXML.hs
index fb25d6a1e..d3939931e 100644
--- a/src/compiler/GF/Speech/VoiceXML.hs
+++ b/src/compiler/GF/Speech/VoiceXML.hs
@@ -16,7 +16,6 @@ import GF.Speech.SRG (getSpeechLanguage)
import PGF.CId
import PGF.Data
import PGF.Macros
-import PGF.Linearize (showPrintName)
import Control.Monad (liftM)
import Data.List (isPrefixOf, find, intersperse)
diff --git a/src/runtime/haskell/PGF/Binary.hs b/src/runtime/haskell/PGF/Binary.hs
index 7d5db73af..a9a6a78dc 100644
--- a/src/runtime/haskell/PGF/Binary.hs
+++ b/src/runtime/haskell/PGF/Binary.hs
@@ -2,6 +2,7 @@ module PGF.Binary where
import PGF.CId
import PGF.Data
+import PGF.Macros
import Data.Binary
import Data.Binary.Put
import Data.Binary.Get
@@ -28,10 +29,11 @@ instance Binary PGF where
gflags <- get
abstract <- get
concretes <- get
- return (PGF{ absname=absname, cncnames=cncnames
- , gflags=gflags
- , abstract=abstract, concretes=concretes
- })
+ return $ updateProductionIndices $
+ (PGF{ absname=absname, cncnames=cncnames
+ , gflags=gflags
+ , abstract=abstract, concretes=concretes
+ })
instance Binary CId where
put (CId bs) = put bs
@@ -185,15 +187,16 @@ instance Binary Production where
_ -> decodingError
instance Binary ParserInfo where
- put p = put (functions p, sequences p, productions0 p, totalCats p, startCats p)
+ put p = put (functions p, sequences p, productions p, totalCats p, startCats p)
get = do functions <- get
sequences <- get
- productions0<- get
+ productions <- get
totalCats <- get
startCats <- get
return (ParserInfo{functions=functions,sequences=sequences
- ,productions0=productions0
- ,productions =filterProductions productions0
+ ,productions = productions
+ ,pproductions = IntMap.empty
+ ,lproductions = Map.empty
,totalCats=totalCats,startCats=startCats})
decodingError = fail "This PGF file was compiled with different version of GF"
diff --git a/src/runtime/haskell/PGF/Linearize.hs b/src/runtime/haskell/PGF/Linearize.hs
index de3daf11d..9058cba61 100644
--- a/src/runtime/haskell/PGF/Linearize.hs
+++ b/src/runtime/haskell/PGF/Linearize.hs
@@ -1,38 +1,81 @@
-{-# LANGUAGE ParallelListComp #-}
-module PGF.Linearize
- (linearizes,showPrintName,realize,realizes,linTree, linTreeMark,linearizesMark) where
+module PGF.Linearize(linearizes,markLinearizes,tabularLinearizes) where
import PGF.CId
import PGF.Data
import PGF.Macros
-import PGF.Tree
-
+import Data.Maybe (fromJust)
+import Data.Array.IArray
+import Data.List
import Control.Monad
import qualified Data.Map as Map
-import Data.List
-
-import Debug.Trace
+import qualified Data.IntMap as IntMap
+import qualified Data.Set as Set
-- linearization and computation of concrete PGF Terms
-linearizes :: PGF -> CId -> Expr -> [String]
-linearizes pgf lang = realizes . linTree pgf lang
-
-realize :: Term -> String
-realize = concat . take 1 . realizes
+type LinTable = Array FIndex [Tokn]
-realizes :: Term -> [String]
-realizes = map (unwords . untokn) . realizest
+linearizes :: PGF -> CId -> Expr -> [String]
+linearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang (\_ _ lint -> lint)
-realizest :: Term -> [[Tokn]]
-realizest trm = case trm of
- R ts -> realizest (ts !! 0)
- S ss -> map concat $ combinations $ map realizest ss
- K t -> [[t]]
- W s t -> [[KS (s ++ r)] | [KS r] <- realizest t]
- FV ts -> concatMap realizest ts
- TM s -> [[KS s]]
- _ -> [[KS $ "REALIZE_ERROR " ++ show trm]] ---- debug
+linTree :: PGF -> Language -> (Maybe CId -> [Int] -> LinTable -> LinTable) -> Expr -> [LinTable]
+linTree pgf lang mark e = lin0 [] [] [] Nothing e
+ where
+ cnc = lookMap (error "no lang") lang (concretes pgf)
+ pinfo = fromJust (parser cnc)
+ lp = lproductions pinfo
+
+ lin0 path xs ys mb_fid (EAbs _ x e) = lin0 path (showCId x:xs) ys mb_fid e
+ lin0 path xs ys mb_fid (ETyped e _) = lin0 path xs ys mb_fid e
+ lin0 path xs ys mb_fid e | null xs = lin path ys mb_fid e []
+ | otherwise = apply path (xs ++ ys) mb_fid _B (e:[ELit (LStr x) | x <- xs])
+
+ lin path xs mb_fid (EApp e1 e2) es = lin path xs mb_fid e1 (e2:es)
+ lin path xs mb_fid (ELit l) [] = case l of
+ LStr s -> return (mark Nothing path (ss s))
+ LInt n -> return (mark Nothing path (ss (show n)))
+ LFlt f -> return (mark Nothing path (ss (show f)))
+ lin path xs mb_fid (EMeta i) es = apply path xs mb_fid _V (ELit (LStr ('?':show i)):es)
+ lin path xs mb_fid (EFun f) es = map (mark (Just f) path) (apply path xs mb_fid f es)
+ lin path xs mb_fid (EVar i) es = apply path xs mb_fid _V (ELit (LStr (xs !! i)) :es)
+ lin path xs mb_fid (ETyped e _) es = lin path xs mb_fid e es
+ lin path xs mb_fid (EImplArg e) es = lin path xs mb_fid e es
+
+ ss s = listArray (0,0) [[KS s]]
+
+ apply path xs mb_fid f es =
+ case Map.lookup f lp of
+ Just prods -> case lookupProds mb_fid prods of
+ Just set -> do prod <- Set.toList set
+ case prod of
+ FApply funid fids -> do guard (length fids == length es)
+ args <- sequence (zipWith3 (\i fid e -> lin0 (sub i path) [] xs (Just fid) e) [0..] fids es)
+ let (FFun _ lins) = functions pinfo ! funid
+ return (listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins])
+ FCoerce fid -> apply path xs (Just fid) f es
+ Nothing -> mzero
+ Nothing -> apply path xs mb_fid _V [ELit (LStr "?")] -- function without linearization
+ where
+ lookupProds (Just fid) prods = IntMap.lookup fid prods
+ lookupProds Nothing prods
+ | f == _B || f == _V = Nothing
+ | otherwise = Just (Set.filter isApp (Set.unions (IntMap.elems prods)))
+
+ sub i path
+ | f == _B || f == _V = path
+ | otherwise = i:path
+
+ isApp (FApply _ _) = True
+ isApp _ = False
+
+ computeSeq seqid args = concatMap compute (elems seq)
+ where
+ seq = sequences pinfo ! seqid
+
+ compute (FSymCat d r) = (args !! d) ! r
+ compute (FSymLit d r) = (args !! d) ! r
+ compute (FSymKS ts) = map KS ts
+ compute (FSymKP ts alts) = [KP ts alts]
untokn :: [Tokn] -> [String]
untokn ts = case ts of
@@ -45,126 +88,23 @@ untokn ts = case ts of
v:_ -> v
_ -> d
--- Lifts all variants to the top level (except those in macros).
-liftVariants :: Term -> [Term]
-liftVariants = f
- where
- f (R ts) = liftM R $ mapM f ts
- f (P t1 t2) = liftM2 P (f t1) (f t2)
- f (S ts) = liftM S $ mapM f ts
- f (FV ts) = ts >>= f
- f (W s t) = liftM (W s) $ f t
- f t = return t
-
-linTree :: PGF -> CId -> Expr -> Term
-linTree pgf lang e = lin (expr2tree e) Nothing
+-- create a table from labels+params to variants
+tabularLinearizes :: PGF -> CId -> Expr -> [[(String,String)]]
+tabularLinearizes pgf lang e = map (zip lbls . map (unwords . untokn) . elems) (linTree pgf lang (\_ _ lint -> lint) e)
where
- cnc = lookMap (error "no lang") lang (concretes pgf)
-
- lin (Abs xs e ) mty = case lin e Nothing of
- R ts -> R $ ts ++ (Data.List.map (kks . showCId . snd) xs)
- TM s -> R $ (TM s) : (Data.List.map (kks . showCId . snd) xs)
- lin (Fun fun es) mty = case Map.lookup fun (funs (abstract pgf)) of
- Just (DTyp hyps _ _,_,_) -> let argVariants = sequence [liftVariants (lin e (Just ty)) | e <- es | (_,_,ty) <- hyps]
- in variants [compute pgf lang args $ lookMap tm0 fun (lins cnc) | args <- argVariants]
- Nothing -> tm0
- lin (Lit (LStr s)) mty = R [kks (show s)] -- quoted
- lin (Lit (LInt i)) mty = R [kks (show i)]
- lin (Lit (LFlt d)) mty = R [kks (show d)]
- lin (Var x) mty = case mty of
- Just (DTyp _ cat _) -> compute pgf lang [K (KS (showCId x))] (lookMap tm0 cat (lindefs cnc))
- Nothing -> TM (showCId x)
- lin (Meta i) mty = case mty of
- Just (DTyp _ cat _) -> compute pgf lang [K (KS ("?" ++ show i))] (lookMap tm0 cat (lindefs cnc))
- Nothing -> TM (show i)
-
-variants :: [Term] -> Term
-variants ts = case ts of
- [t] -> t
- _ -> FV ts
-
-unvariants :: Term -> [Term]
-unvariants t = case t of
- FV ts -> ts
- _ -> [t]
-
-compute :: PGF -> CId -> [Term] -> Term -> Term
-compute pgf lang args = comp where
- comp trm = case trm of
- P r p -> proj (comp r) (comp p)
- W s t -> W s (comp t)
- R ts -> R $ map comp ts
- V i -> idx args i -- already computed
- F c -> comp $ look c -- not computed (if contains argvar)
- FV ts -> FV $ map comp ts
- S ts -> S $ filter (/= S []) $ map comp ts
- _ -> trm
-
- look = lookOper pgf lang
-
- idx xs i = if i > length xs - 1
- then trace
- ("too large " ++ show i ++ " for\n" ++ unlines (map show xs) ++ "\n") tm0
- else xs !! i
-
- proj r p = case (r,p) of
- (_, FV ts) -> FV $ map (proj r) ts
- (FV ts, _ ) -> FV $ map (\t -> proj t p) ts
- (W s t, _) -> kks (s ++ getString (proj t p))
- _ -> comp $ getField r (getIndex p)
-
- getString t = case t of
- K (KS s) -> s
- _ -> error ("ERROR in grammar compiler: string from "++ show t) "ERR"
-
- getIndex t = case t of
- C i -> i
- TM _ -> 0 -- default value for parameter
- _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 666
-
- getField t i = case t of
- R rs -> idx rs i
- TM s -> TM s
- _ -> error ("ERROR in grammar compiler: field from " ++ show t) t
-
----------
--- markup with tree positions
-
-linearizesMark :: PGF -> CId -> Expr -> [String]
-linearizesMark pgf lang = realizes . linTreeMark pgf lang
-
-linTreeMark :: PGF -> CId -> Expr -> Term
-linTreeMark pgf lang = lin [] . expr2tree
+ lbls = case unApp e of
+ Just (f,_) -> let cat = valCat (lookType pgf f)
+ in case parser (lookConcr pgf lang) >>= Map.lookup cat . startCats of
+ Just (_,_,lbls) -> elems lbls
+ Nothing -> error "No labels"
+ Nothing -> error "Not function application"
+
+
+-- show bracketed markup with references to tree structure
+markLinearizes :: PGF -> CId -> Expr -> [String]
+markLinearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang mark
where
- lin p (Abs xs e ) = case lin p e of
- R ts -> R $ ts ++ (Data.List.map (kks . showCId . snd) xs)
- TM s -> R $ (TM s) : (Data.List.map (kks . showCId . snd) xs)
- lin p (Fun fun es) =
- let argVariants =
- mapM (\ (i,e) -> liftVariants $ lin (sub p i) e) (zip [0..] es)
- in variants [mark (fun,p) $ compute pgf lang args $ look fun |
- args <- argVariants]
- lin p (Lit (LStr s)) = mark p $ R [kks (show s)] -- quoted
- lin p (Lit (LInt i)) = mark p $ R [kks (show i)]
- lin p (Lit (LFlt d)) = mark p $ R [kks (show d)]
- lin p (Var x) = mark p $ TM (showCId x)
- lin p (Meta i) = mark p $ TM (show i)
-
- look = lookLin pgf lang
-
- mark :: Show a => a -> Term -> Term
- mark p t = case t of
- R ts -> R $ map (mark p) ts
- FV ts -> R $ map (mark p) ts
- S ts -> S $ bracket p ts
- K s -> S $ bracket p [t]
- W s (R ts) -> R [mark p $ kks (s ++ u) | K (KS u) <- ts]
- _ -> t
- -- otherwise in normal form
-
- bracket p ts = [kks ("("++show p)] ++ ts ++ [kks ")"]
- sub p i = p ++ [i]
-
--- | Show the printname of function or category
-showPrintName :: PGF -> Language -> CId -> String
-showPrintName pgf lang id = lookMap "?" id $ printnames $ lookMap (error "no lang") lang $ concretes pgf
+ mark mb_f path lint = amap (bracket mb_f path) lint
+
+ bracket Nothing path ts = [KS ("("++show (reverse path))] ++ ts ++ [KS ")"]
+ bracket (Just f) path ts = [KS ("(("++showCId f++","++show (reverse path)++")")] ++ ts ++ [KS ")"]
diff --git a/src/runtime/haskell/PGF/Macros.hs b/src/runtime/haskell/PGF/Macros.hs
index 81f946211..bf6252f2a 100644
--- a/src/runtime/haskell/PGF/Macros.hs
+++ b/src/runtime/haskell/PGF/Macros.hs
@@ -3,10 +3,14 @@ module PGF.Macros where
import PGF.CId
import PGF.Data
import Control.Monad
-import qualified Data.Map as Map
-import qualified Data.Array as Array
+import qualified Data.Map as Map
+import qualified Data.Set as Set
+import qualified Data.IntMap as IntMap
+import qualified Data.IntSet as IntSet
+import qualified Data.Array as Array
import Data.Maybe
import Data.List
+import GF.Data.Utilities(sortNub)
-- operations for manipulating PGF grammars and objects
@@ -122,6 +126,10 @@ contextLength :: Type -> Int
contextLength ty = case ty of
DTyp hyps _ _ -> length hyps
+-- | Show the printname of function or category
+showPrintName :: PGF -> Language -> CId -> String
+showPrintName pgf lang id = lookMap "?" id $ printnames $ lookMap (error "no lang") lang $ concretes pgf
+
term0 :: CId -> Term
term0 = TM . showCId
@@ -151,3 +159,63 @@ cidVar = mkCId "__gfVar"
_B = mkCId "__gfB"
_V = mkCId "__gfV"
+
+updateProductionIndices :: PGF -> PGF
+updateProductionIndices pgf = pgf{concretes = fmap updateConcrete (concretes pgf)}
+ where
+ updateConcrete cnc =
+ case parser cnc of
+ Nothing -> cnc
+ Just pinfo -> let prods0 = filterProductions (productions pinfo)
+ p_prods = parseIndex pinfo prods0
+ l_prods = linIndex pinfo prods0
+ in cnc{parser = Just pinfo{pproductions = p_prods, lproductions = l_prods}}
+
+ filterProductions prods0
+ | IntMap.size prods == IntMap.size prods0 = prods
+ | otherwise = filterProductions prods
+ where
+ prods = IntMap.mapMaybe (filterProdSet prods0) prods0
+
+ filterProdSet prods set0
+ | Set.null set = Nothing
+ | otherwise = Just set
+ where
+ set = Set.filter (filterRule prods) set0
+
+ filterRule prods (FApply funid args) = all (\fcat -> isLiteralFCat fcat || IntMap.member fcat prods) args
+ filterRule prods (FCoerce fcat) = isLiteralFCat fcat || IntMap.member fcat prods
+ filterRule prods _ = True
+
+ parseIndex pinfo = IntMap.mapMaybeWithKey filterProdSet
+ where
+ filterProdSet fid prods
+ | fid `IntSet.member` ho_fids = Just prods
+ | otherwise = let prods' = Set.filter (not . is_ho_prod) prods
+ in if Set.null prods'
+ then Nothing
+ else Just prods'
+
+ is_ho_prod (FApply _ [fid]) | fid == fcatVar = True
+ is_ho_prod _ = False
+
+ ho_fids :: IntSet.IntSet
+ ho_fids = IntSet.fromList [fid | cat <- ho_cats
+ , fid <- maybe [] (\(s,e,_) -> [s..e]) (Map.lookup cat (startCats pinfo))]
+
+ ho_cats :: [CId]
+ ho_cats = sortNub [c | (ty,_,_) <- Map.elems (funs (abstract pgf))
+ , h <- case ty of {DTyp hyps val _ -> hyps}
+ , let ty = typeOfHypo h
+ , c <- fst (catSkeleton ty)]
+
+ linIndex pinfo productions =
+ Map.fromListWith (IntMap.unionWith Set.union)
+ [(fun,IntMap.singleton res (Set.singleton prod)) | (res,prods) <- IntMap.toList productions
+ , prod <- Set.toList prods
+ , fun <- getFunctions prod]
+ where
+ getFunctions (FApply funid args) = let FFun fun _ = functions pinfo Array.! funid in [fun]
+ getFunctions (FCoerce fid) = case IntMap.lookup fid productions of
+ Nothing -> []
+ Just prods -> [fun | prod <- Set.toList prods, fun <- getFunctions prod] \ No newline at end of file
diff --git a/src/runtime/haskell/PGF/Morphology.hs b/src/runtime/haskell/PGF/Morphology.hs
index 9eee71a97..be786ebbb 100644
--- a/src/runtime/haskell/PGF/Morphology.hs
+++ b/src/runtime/haskell/PGF/Morphology.hs
@@ -2,11 +2,13 @@ module PGF.Morphology(Lemma,Analysis,Morpho,
buildMorpho,
lookupMorpho,fullFormLexicon) where
-import PGF.ShowLinearize (collectWords)
-import PGF.Data
import PGF.CId
+import PGF.Data
import qualified Data.Map as Map
+import qualified Data.Set as Set
+import qualified Data.IntMap as IntMap
+import Data.Array.IArray
import Data.List (intersperse)
-- these 4 definitions depend on the datastructure used
@@ -17,7 +19,23 @@ type Analysis = String
newtype Morpho = Morpho (Map.Map String [(Lemma,Analysis)])
buildMorpho :: PGF -> Language -> Morpho
-buildMorpho pgf lang = Morpho (Map.fromListWith (++) (collectWords pgf lang))
+buildMorpho pgf lang = Morpho $
+ case Map.lookup lang (concretes pgf) >>= parser of
+ Just pinfo -> collectWords pinfo
+ Nothing -> Map.empty
+
+collectWords pinfo = Map.fromListWith (++)
+ [(t, [(fun,lbls ! l)]) | (s,e,lbls) <- Map.elems (startCats pinfo)
+ , fid <- [s..e]
+ , FApply funid _ <- maybe [] Set.toList (IntMap.lookup fid (pproductions pinfo))
+ , let FFun fun lins = functions pinfo ! funid
+ , (l,seqid) <- assocs lins
+ , sym <- elems (sequences pinfo ! seqid)
+ , t <- sym2tokns sym]
+ where
+ sym2tokns (FSymKS ts) = ts
+ sym2tokns (FSymKP ts alts) = ts ++ [t | Alt ts ps <- alts, t <- ts]
+ sym2tokns _ = []
lookupMorpho :: Morpho -> String -> [(Lemma,Analysis)]
lookupMorpho (Morpho mo) s = maybe [] id $ Map.lookup s mo
diff --git a/src/runtime/haskell/PGF/PMCFG.hs b/src/runtime/haskell/PGF/PMCFG.hs
index abf7e4380..0ef0e3295 100644
--- a/src/runtime/haskell/PGF/PMCFG.hs
+++ b/src/runtime/haskell/PGF/PMCFG.hs
@@ -34,12 +34,13 @@ data Alternative =
deriving (Eq,Ord,Show)
data ParserInfo
- = ParserInfo { functions :: Array FunId FFun
- , sequences :: Array SeqId FSeq
- , productions0:: IntMap.IntMap (Set.Set Production) -- this are the original productions as they are loaded from the PGF file
- , productions :: IntMap.IntMap (Set.Set Production) -- this are the productions after the filtering for useless productions
- , startCats :: Map.Map CId (FCat,FCat,Array FIndex String) -- for every category - start/end FCat and a list of label names
- , totalCats :: {-# UNPACK #-} !FCat
+ = ParserInfo { functions :: Array FunId FFun
+ , sequences :: Array SeqId FSeq
+ , productions :: IntMap.IntMap (Set.Set Production) -- the original productions loaded from the PGF file
+ , pproductions :: IntMap.IntMap (Set.Set Production) -- productions needed for parsing
+ , lproductions :: Map.Map CId (IntMap.IntMap (Set.Set Production)) -- productions needed for linearization
+ , startCats :: Map.Map CId (FCat,FCat,Array FIndex String) -- for every category - start/end FCat and a list of label names
+ , totalCats :: {-# UNPACK #-} !FCat
}
@@ -98,22 +99,3 @@ ppFCat fcat
ppFunId funid = char 'F' <> int funid
ppSeqId seqid = char 'S' <> int seqid
-
-
-filterProductions = closure
- where
- closure prods0
- | IntMap.size prods == IntMap.size prods0 = prods
- | otherwise = closure prods
- where
- prods = IntMap.mapMaybe (filterProdSet prods0) prods0
-
- filterProdSet prods set0
- | Set.null set = Nothing
- | otherwise = Just set
- where
- set = Set.filter (filterRule prods) set0
-
- filterRule prods (FApply funid args) = all (\fcat -> isLiteralFCat fcat || IntMap.member fcat prods) args
- filterRule prods (FCoerce fcat) = isLiteralFCat fcat || IntMap.member fcat prods
- filterRule prods _ = True
diff --git a/src/runtime/haskell/PGF/Parse.hs b/src/runtime/haskell/PGF/Parse.hs
index e9936233c..5a4ccc719 100644
--- a/src/runtime/haskell/PGF/Parse.hs
+++ b/src/runtime/haskell/PGF/Parse.hs
@@ -59,7 +59,7 @@ initState pgf lang (DTyp _ start _) =
let items = case Map.lookup start (startCats pinfo) of
Just (s,e,labels) -> do cat <- range (s,e)
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
- [] cat (productions pinfo)
+ [] cat (pproductions pinfo)
let FFun fn lins = functions pinfo ! funid
(lbl,seqid) <- assocs lins
return (Active 0 0 funid seqid args (AK cat lbl))
@@ -72,7 +72,7 @@ initState pgf lang (DTyp _ start _) =
in PState pgf
pinfo
- (Chart emptyAC [] emptyPC (productions pinfo) (totalCats pinfo) 0)
+ (Chart emptyAC [] emptyPC (pproductions pinfo) (totalCats pinfo) 0)
(TMap.singleton [] (Set.fromList items))
-- | From the current state and the next token
diff --git a/src/runtime/haskell/PGF/ShowLinearize.hs b/src/runtime/haskell/PGF/ShowLinearize.hs
deleted file mode 100644
index fa4de86c8..000000000
--- a/src/runtime/haskell/PGF/ShowLinearize.hs
+++ /dev/null
@@ -1,114 +0,0 @@
-module PGF.ShowLinearize (
- collectWords,
- tableLinearize,
- recordLinearize,
- termLinearize,
- tabularLinearize,
- allLinearize,
- markLinearize
- ) where
-
-import PGF.CId
-import PGF.Data
-import PGF.Tree
-import PGF.Macros
-import PGF.Linearize
-
-import GF.Data.Operations
-import Data.List
-import qualified Data.Map as Map
-
--- printing linearizations in different ways with source parameters
-
--- internal representation, only used internally in this module
-data Record =
- RR [(String,Record)]
- | RT [(String,Record)]
- | RFV [Record]
- | RS String
- | RCon String
-
-prRecord :: Record -> String
-prRecord = prr where
- prr t = case t of
- RR fs -> concat $
- "{" :
- (intersperse ";" (map (\ (l,v) -> unwords [l,"=", prr v]) fs)) ++ ["}"]
- RT fs -> concat $
- "table {" :
- (intersperse ";" (map (\ (l,v) -> unwords [l,"=>",prr v]) fs)) ++ ["}"]
- RFV ts -> concat $
- "variants {" : (intersperse ";" (map prr ts)) ++ ["}"]
- RS s -> prQuotedString s
- RCon s -> s
-
--- uses the encoding of record types in PGF.paramlincat
-mkRecord :: Term -> Term -> Record
-mkRecord typ trm = case (typ,trm) of
- (_, FV ts) -> RFV $ map (mkRecord typ) ts
- (R rs, R ts) -> RR [(str lab, mkRecord ty t) | (P lab ty, t) <- zip rs ts]
- (S [FV ps,ty],R ts) -> RT [(str par, mkRecord ty t) | (par, t) <- zip ps ts]
- (_,W s (R ts)) -> mkRecord typ (R [K (KS (s ++ u)) | K (KS u) <- ts])
- (FV ps, C i) -> RCon $ str $ ps !! i
- (S [], _) -> case realizes trm of
- [s] -> RS s
- ss -> RFV $ map RS ss
- _ -> RS $ show trm ---- printTree trm
- where
- str = realize
-
--- show all branches, without labels and params
-allLinearize :: (String -> String) -> PGF -> CId -> Expr -> String
-allLinearize unlex pgf lang = concat . map (unlex . pr) . tabularLinearize pgf lang where
- pr (p,vs) = unlines vs
-
--- show all branches, with labels and params
-tableLinearize :: (String -> String) -> PGF -> CId -> Expr -> String
-tableLinearize unlex pgf lang = unlines . map pr . tabularLinearize pgf lang where
- pr (p,vs) = p +++ ":" +++ unwords (intersperse "|" (map unlex vs))
-
--- create a table from labels+params to variants
-tabularLinearize :: PGF -> CId -> Expr -> [(String,[String])]
-tabularLinearize pgf lang = branches . recLinearize pgf lang where
- branches r = case r of
- RR fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t]
- RT fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t]
- RFV rs -> concatMap branches rs
- RS s -> [([], [s])]
- RCon _ -> []
-
--- show record in GF-source-like syntax
-recordLinearize :: PGF -> CId -> Expr -> String
-recordLinearize pgf lang = prRecord . recLinearize pgf lang
-
--- create a GF-like record, forming the basis of all functions above
-recLinearize :: PGF -> CId -> Expr -> Record
-recLinearize pgf lang tree = mkRecord typ $ linTree pgf lang tree where
- typ = case expr2tree tree of
- Fun f _ -> lookParamLincat pgf lang $ valCat $ lookType pgf f
-
--- show PGF term
-termLinearize :: PGF -> CId -> Expr -> String
-termLinearize pgf lang = show . linTree pgf lang
-
--- show bracketed markup with references to tree structure
-markLinearize :: PGF -> CId -> Expr -> String
-markLinearize pgf lang = concat . take 1 . linearizesMark pgf lang
-
-
--- for Morphology: word, lemma, tags
-collectWords :: PGF -> Language -> [(String, [(CId,String)])]
-collectWords pgf lang =
- concatMap collOne
- [(f,c,length xs) | (f,(DTyp xs c _,_,_)) <- Map.toList $ funs $ abstract pgf]
- where
- collOne (f,c,i) =
- fromRec f [showCId c] (recLinearize pgf lang (foldl EApp (EFun f) (replicate i (EMeta 888))))
- fromRec f v r = case r of
- RR rs -> concat [fromRec f v t | (_,t) <- rs]
- RT rs -> concat [fromRec f (p:v) t | (p,t) <- rs]
- RFV rs -> concatMap (fromRec f v) rs
- RS s -> [(w,[(f,unwords (reverse v))]) | w <- words s, w /= "?888"] ---
--- RS s -> [(s,[(f,unwords (reverse v))])]
- RCon c -> [] ---- inherent
-
diff --git a/src/runtime/haskell/PGF/VisualizeTree.hs b/src/runtime/haskell/PGF/VisualizeTree.hs
index 429551f54..8e9b28740 100644
--- a/src/runtime/haskell/PGF/VisualizeTree.hs
+++ b/src/runtime/haskell/PGF/VisualizeTree.hs
@@ -102,7 +102,7 @@ graphvizDependencyTree format debug mlab ms pgf lang exp = case format of
ifd s = if debug then s else []
- pot = readPosText $ head $ linearizesMark pgf lang exp
+ pot = readPosText $ concat $ take 1 $ markLinearizes pgf lang exp
---- use Just str if you have str to match against
prelude = ["rankdir=LR ;", "node [shape = plaintext] ;"]
@@ -188,9 +188,7 @@ getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss]
---- nubrec and domins are quadratic, but could be (n log n)
graphvizParseTree :: PGF -> CId -> Expr -> String
-graphvizParseTree pgf lang = prGraph False . lin2tree pgf . linMark where
- linMark = head . linearizesMark pgf lang
- ---- use Just str if you have str to match against
+graphvizParseTree pgf lang = prGraph False . lin2tree pgf . concat . take 1 . markLinearizes pgf lang where
lin2tree pgf s = trace s $ prelude ++ nodes ++ links where
@@ -235,12 +233,12 @@ tag s = "<" ++ s ++ ">"
showp = init . tail . show
mtag = tag . ('n':) . uncommas
--- word alignments from Linearize.linearizesMark
+-- word alignments from Linearize.markLinearize
-- words are chunks like {[0,1,1,0] old}
graphvizAlignment :: PGF -> Expr -> String
graphvizAlignment pgf = prGraph True . lin2graph . linsMark where
- linsMark t = [s | la <- cncnames pgf, s <- take 1 (linearizesMark pgf la t)]
+ linsMark t = [concat (take 1 (markLinearizes pgf la t)) | la <- cncnames pgf]
lin2graph :: [String] -> [String]
lin2graph ss = trace (show ss) $ prelude ++ nodes ++ links