diff options
Diffstat (limited to 'src/tools/gftest/Grammar.hs')
| -rw-r--r-- | src/tools/gftest/Grammar.hs | 1091 |
1 files changed, 1091 insertions, 0 deletions
diff --git a/src/tools/gftest/Grammar.hs b/src/tools/gftest/Grammar.hs new file mode 100644 index 000000000..f8333e78b --- /dev/null +++ b/src/tools/gftest/Grammar.hs @@ -0,0 +1,1091 @@ +{-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-} + +module Grammar + ( Grammar(..), readGrammar + , Tree, top, Symbol(..), showTree + , Cat, ConcrCat(..) + , Lang, Name + + -- Categories, coercions + , ccats, ccatOf, arity + , coerces, uncoerce + , uncoerceAbsCat + + -- Testing and comparison + , testTree, testFun + , compareTree, Comparison(..) + , treesUsingFun + + -- Contexts + , contextsFor + + -- FEAT + , featIth, featCard + + -- Fields + , forgets, reachableFieldsFromTop + , emptyFields, equalFields, fieldNames + + -- misc + , showConcrFun, subTree, flatten + , diffCats, hasConcrString +) where + +import Data.Either ( lefts ) +import Data.List +import qualified Data.Map as M +import Data.Maybe +import Data.Char +import qualified Data.Set as S +import qualified Mu +import qualified FMap as F +import qualified Data.Tree as T +import EqRel + +import GHC.Exts ( the ) +import Debug.Trace + +import qualified PGF2 +import qualified PGF2.Internal as I + +-------------------------------------------------------------------------------- +-- grammar types + +-- name + +type Name = String + +-- concrete category + +type Cat = PGF2.Cat -- i.e. String + +data ConcrCat = CC (Maybe Cat) I.FId -- i.e. Int + deriving ( Eq ) + +instance Show ConcrCat where + show (CC (Just cat) fid) = cat ++ "_" ++ show fid + show (CC Nothing fid) = "_" ++ show fid + +instance Ord ConcrCat where + (CC _ fid1) `compare` (CC _ fid2) = fid1 `compare` fid2 + +ccatOf :: Tree -> ConcrCat +ccatOf (App tp _) = snd (ctyp tp) + +-- tree + +data RoseTree a + = App { top :: a, args :: [RoseTree a] } + deriving ( Eq, Ord ) + +-- from http://hackage.haskell.org/package/containers-0.5.11.0/docs/src/Data.Tree.html#foldTree +foldTree :: (a -> [b] -> b) -> RoseTree a -> b +foldTree f = go where + go (App x ts) = f x (map go ts) + +flatten :: RoseTree a -> [a] +flatten (App tp as) = tp : concatMap flatten as + +type Tree = RoseTree Symbol +type AmbTree = RoseTree [Symbol] -- used as an intermediate category for parsing + +instance Show Tree where + show = showTree + +showTree :: Tree -> String +showTree (App a []) = show a +showTree (App f xs) = unwords (show f : map showTreeArg xs) + where showTreeArg (App a []) = show a + showTreeArg t = "(" ++ showTree t ++ ")" + +subTree :: Symbol -> Tree -> Maybe Tree +subTree symb t@(App tp tr) + | symb==tp = Just t + | otherwise = listToMaybe $ mapMaybe (subTree symb) tr + +-- symbol + +type SeqId = Int + +data Symbol + = Symbol + { name :: Name + , seqs :: [SeqId] + , typ :: ([Cat], Cat) + , ctyp :: ([ConcrCat],ConcrCat) + } + deriving ( Eq, Ord ) + +instance Show Symbol where + show = name + +arity :: Symbol -> Int +arity = length . fst . ctyp + +hole :: ConcrCat -> Symbol +hole c = Symbol (show c) [] ([], "") ([],c) + +showConcrFun :: Grammar -> Symbol -> String +showConcrFun gr detCN = show detCN ++ " : " ++ args ++ show np_209 + where + (dets_cns,np_209) = ctyp detCN + args = concatMap (\x -> show x ++ " → ") dets_cns + +-- grammar + +type Lang = String + +data Grammar + = Grammar + { + concrLang :: Lang + , parse :: String -> [Tree] + , readTree :: String -> Tree + , linearize :: Tree -> String + , tabularLin :: Tree -> [(String,String)] + , concrCats :: [(PGF2.Cat,I.FId,I.FId,[String])] + , coercions :: [(ConcrCat,ConcrCat)] + , contextsTab :: M.Map ConcrCat (M.Map ConcrCat [Tree -> Tree]) + , startCat :: Cat + , symbols :: [Symbol] + , lookupSymbol :: String -> [Symbol] + , functionsByCat :: Cat -> [Symbol] + , concrSeqs :: SeqId -> [Either String (Int,Int)] + , feat :: FEAT + , nonEmptyCats :: S.Set ConcrCat + , allCats :: [ConcrCat] + } + +fieldNames :: Grammar -> Cat -> [String] +fieldNames gr c = map fst . tabularLin gr $ t + where + t:_ = [ t + | f <- functionsByCat gr c + , let (_,c') = ctyp f + , c' `S.member` nonEmptyCats gr + , t <- featAll gr c' + ] + + +-------------------------------------------------------------------------------- +-- grammar + +readGrammar :: Lang -> FilePath -> IO Grammar +readGrammar lang file = + do pgf <- PGF2.readPGF file + return (toGrammar pgf lang) + +toGrammar :: PGF2.PGF -> Lang -> Grammar +toGrammar pgf langName = + let gr = + Grammar + { concrLang = lname + + , parse = \s -> + case PGF2.parse lang (PGF2.startCat pgf) s of + PGF2.ParseOk es_fs -> map (mkTree gr.fst) es_fs + PGF2.ParseFailed i s -> error s + PGF2.ParseIncomplete -> error "Incomplete parse" + + , readTree = \s -> + case PGF2.readExpr s of + Just t -> mkTree gr t + Nothing -> error "readTree: no parse" + + , linearize = \t -> + PGF2.linearize lang (mkExpr t) + + , tabularLin = \t -> + PGF2.tabularLinearize lang (mkExpr t) + + , startCat = + mkCat (PGF2.startCat pgf) + + , concrCats = + I.concrCategories lang + + , symbols = + [ Symbol { + name = nm, + seqs = sqs, + ctyp = (argsCC, goalCC), + typ = (map (uncoerceAbsCat gr) argsCC, goalcat) + } + | (goalcat,bg,end,_) <- I.concrCategories lang + , goalfid <- [bg..end] + , I.PApply funId pargs <- I.concrProductions lang goalfid + , let goalCC = CC (Just goalcat) goalfid + , let argsCC = [ mkCC argfid | I.PArg _ argfid <- pargs ] + , let (nm,sqs) = I.concrFunction lang funId ] + + , lookupSymbol = lookupAll (symb2table `map` symbols gr) + + , functionsByCat = \c -> + [ symb + | symb <- symbols gr + , snd (typ symb) == c + , snd (ctyp symb) `elem` nonEmptyCats gr ] + + , coercions = + [ ( mkCC cfid, CC Nothing afid ) + | afid <- [0..I.concrTotalCats lang] + , I.PCoerce cfid <- I.concrProductions lang afid ] + + , contextsTab = + M.fromList + [ (top, M.fromList (contexts gr top)) + | top <- allCats gr ] + + , concrSeqs = + map cseq2Either . I.concrSequence lang + + , feat = + mkFEAT gr + + , allCats = S.toList $ S.fromList $ + [ a | f <- symbols gr, let (args,goal) = ctyp f + , a <- goal:args + ] ++ + [ c | (cat,coe) <- coercions gr + , c <- [coe,cat] + ] + , nonEmptyCats = S.fromList + [ c + | let -- all functions, organized by result type + funs = M.fromListWith (++) $ + [ (cat,[Right f]) + | f <- symbols gr + , let (_,cat) = ctyp f + ] ++ + [ (coe,[Left cat]) + | (cat,coe) <- coercions gr + ] + + -- all categories, with their dependencies + defs = + [ if or [ arity f == 0 | Right f <- fs ] + then (c, [], \_ -> True) -- has a word + else (c, ys, h) -- no word + | c <- allCats gr + , let -- relevant functions for c + fs = fromMaybe [] (M.lookup c funs) + + -- categories we depend on + ys = S.toList $ S.fromList $ + [ cat | Right f <- fs, cat <- fst (ctyp f) ] ++ + [ cat | Left cat <- fs ] + + -- compute if we're empty, given the emptiness of others + h bs = or $ + [ and [ tab M.! a | a <- args ] + | Right f <- fs + , let (args,_) = ctyp f + ] ++ + [ tab M.! cat + | Left cat <- fs + ] + where + tab = M.fromList (ys `zip` bs) + ] + , (c,True) <- allCats gr `zip` Mu.mu False defs (allCats gr) + ] + + + + } + in gr + where + -- language + (lang,lname) = case M.lookup langName (PGF2.languages pgf) of + Just la -> (la,langName) + Nothing -> let (defName,defGr) = head $ M.assocs $ PGF2.languages pgf + msg = "no grammar found with name " ++ langName ++ + ", using " ++ defName + in trace msg (defGr,defName) + + -- categories and expressions + mkCat tp = cat where (_, cat, _) = PGF2.unType tp + + mkExpr (App n []) | not (null s) && all isDigit s = + PGF2.mkInt (read s) + where + s = show n + + mkExpr (App f xs) = + PGF2.mkApp (name f) [ mkExpr x | x <- xs ] + + mkCC fid = CC ccat fid + where ccat = case [ cat | (cat,bg,end,_) <- I.concrCategories lang + , fid `elem` [bg..end] ] of + [] -> Nothing -- means it's coercion + xs -> Just $ the xs + + -- misc + symb2table s = (s, name s) + + cseq2Either (I.SymKS tok) = Left tok + cseq2Either (I.SymCat x y) = Right (x,y) + cseq2Either x = Left (show x) + +-- parsing and reading trees +mkTree :: Grammar -> PGF2.Expr -> Tree +mkTree gr = disambTree . ambTree + + where + ambTree t = -- :: PGF2.Expr -> AmbTree + case PGF2.unApp t of + Just (f,xs) -> App (lookupSymbol gr f) [ ambTree x | x <- xs ] + Nothing -> error (PGF2.showExpr [] t) + + disambTree at = -- :: AmbTree -> Tree + case foldTree reduce at of + App [x] ts -> App x [ disambTree t | t <- ts ] + App _ _ts -> error "mkTree: invalid tree" + + reduce fs as = -- :: [Symbol] -> [AmbTree] -> AmbTree + let red = [ symbol | symbol <- fs + , let argTypes = + uncoerce gr `map` fst (ctyp symbol) + , let goalTypes = + uncoerce gr `map` [ snd (ctyp s) | App [s] _ <- as ] + -- there should be only one symbol in (still ambiguous) fs + -- whose argument type matches its (already unambiguous) subtrees + , and [ intersect a r /= [] + | (a,r) <- zip argTypes goalTypes ] ] + in case red of + [x] -> App [x] as + _ -> App fs as + +-- categories and coercions +ccats :: Grammar -> Cat -> [ConcrCat] +ccats gr utt = [ cc + | cc@(CC (Just cat) _) <- S.toList (nonEmptyCats gr) + , cat == utt ] + +uncoerceAbsCat :: Grammar -> ConcrCat -> Cat +uncoerceAbsCat gr c = case c of + CC (Just cat) _ -> cat + CC Nothing _ -> the [ uncoerceAbsCat gr x | x <- uncoerce gr c ] + +uncoerce :: Grammar -> ConcrCat -> [ConcrCat] +uncoerce gr c = case c of + CC Nothing _ -> lookupAll (coercions gr) c + _ -> [c] + +coerces :: Grammar -> ConcrCat -> ConcrCat -> Bool +coerces gr coe cat = (cat,coe) `elem` coercions gr + +lookupAll :: (Eq a) => [(b,a)] -> a -> [b] +lookupAll kvs key = [ v | (v,k) <- kvs, k==key ] + +singleton [x] = True +singleton xs = False + +-------------------------------------------------------------------------------- +-- compute categories reachable from S + +reachableCatsFromTop :: Grammar -> ConcrCat -> [ConcrCat] +reachableCatsFromTop gr top = [ c | (c,True) <- cs `zip` rs ] + where + rs = Mu.mu False defs cs + cs = S.toList (nonEmptyCats gr) + + defs = + [ if c == top + then (c, [], \_ -> True) + else (c, ys, or) + | c <- cs + , let ys = S.toList $ S.fromList $ + [ b + | f <- symbols gr + , let (as,b) = ctyp f + , all (`S.member` nonEmptyCats gr) as + , c `elem` as + ] ++ + [ b + | (a,b) <- coercions gr + , a == c + , b `S.member` nonEmptyCats gr + ] + ] + +reachableFieldsFromTop :: Grammar -> ConcrCat -> [(ConcrCat,S.Set Int)] +reachableFieldsFromTop gr top = cs `zip` rs + where + rs = Mu.mu S.empty defs cs + cs = S.toList (nonEmptyCats gr) + + defs = + [ if c == top + then (c, [], \_ -> S.fromList [0]) -- this assumes the top only has one field + else (c, ys, h) + | c <- cs + , let fs = [ Right (f,k) + | f <- symbols gr + , let (as,_) = ctyp f + , all (`S.member` nonEmptyCats gr) as + , (a,k) <- as `zip` [0..] + , c == a + ] ++ + [ Left b + | (a,b) <- coercions gr + , a == c + , b `S.member` nonEmptyCats gr + ] + + ys = S.toList $ S.fromList + [ case f of + Right (f,_) -> snd (ctyp f) + Left b -> b + | f <- fs + ] + + h rs = S.unions + [ case f of + Right (f,k) -> apply (f,k) (args M.! snd (ctyp f)) + Left b -> args M.! b + | f <- fs + ] + where + args = M.fromList (ys `zip` rs) + ] + + apply (f,k) r = + S.fromList + [ j + | (sq,i) <- seqs f `zip` [0..] + , i `S.member` r + , Right (k',j) <- concrSeqs gr sq + , k' == k + ] + +-------------------------------------------------------------------------------- +-- analyzing contexts + +equalFields :: Grammar -> [(ConcrCat,EqRel Int)] +equalFields gr = cs `zip` eqrels + where + eqrels = Mu.mu Top defs cs + cs = S.toList (nonEmptyCats gr) + + defs = + [ (c, depcats, h) + | c <- cs + -- fs = everything that has c as a goal category + -- there's two possibilities: + , let fs = -- 1) c is not a coercion: functions can have c as a goal category + [ Right f + | f <- symbols gr + , all (`S.member` nonEmptyCats gr) (fst (ctyp f)) + , c == snd (ctyp f) + ] ++ + -- 2) c is a coercion: here's a list of (nonempty) categories c uncoerces into + [ Left cat + | (cat,coe) <- coercions gr + , coe == c + , cat `S.member` nonEmptyCats gr + ] + + -- all the categories c depends on + depcats = S.toList $ S.fromList $ concat + [ case f of + Right f -> fst (ctyp f) -- 1) if c is not a coercion: + -- all arg cats of the functions with c as goal cat + Left cat -> [cat] -- 2) if c is a coercion: just the cats that it uncoerces into + | f <- fs + ] + + -- Function to give to mu: + -- computes the equivalence relation, given the eq.rels of its arguments + h rs = foldr (/\) Top $ [ apply f eqs + | Right f <- fs + , let eqs = map (args M.!) (fst $ ctyp f) + ] ++ + [ args M.! cat + | Left cat <- fs + ] + where + args = M.fromList (depcats `zip` rs) + ] + where + apply f eqs = + basic [ concatMap lin (concrSeqs gr sq) + | sq <- seqs f + ] + where + lin (Left str) = [ str | not (null str) ] + lin (Right (i,j)) = [ show i ++ "#" ++ show (rep (eqs !! i) j) ] + +contextsFor :: Grammar -> ConcrCat -> ConcrCat -> [Tree -> Tree] +contextsFor gr top hole = [] `fromMaybe` M.lookup hole (contextsTab gr M.! top) + +contexts :: Grammar -> ConcrCat -> [(ConcrCat,[Tree -> Tree])] +contexts gr top = + [ (c, map (path2context . reverse . snd) (F.toList paths)) + | (c, paths) <- cs `zip` pathss + ] + where + pathss = Mu.muDiff F.nil F.isNil dif uni defs cs + cs = S.toList (nonEmptyCats gr) + + -- all symbols with at least one argument, and only good arguments + goodSyms = + [ f + | f <- symbols gr + , arity f >= 1 + , snd (ctyp f) `S.member` nonEmptyCats gr + , all (`S.member` nonEmptyCats gr) (fst (ctyp f)) + ] + + -- definitions table for fixpoint iteration + fm1 `dif` fm2 = + [ d | d@(xs,_) <- F.toList fm1, not (fm2 `F.covers` xs) ] `ins` F.nil + + fm1 `uni` fm2 = + F.toList fm1 `ins` fm2 + + paths `ins` fm = + foldl collect fm + . map snd + . sort + $ [ (size p, p) | p <- paths ] + where + collect fm (str,p) + | fm `F.covers` str = fm + | otherwise = F.add str p fm + + size (_,p) = + sum [ if i == j then 1 else smallest gr t + | (f,i) <- p + , let (ts,_) = ctyp f + , (t,j) <- ts `zip` [0..] + ] + + defs = + [ if c == top + then (c, [], \_ -> F.unit [0] []) + else (c, ys, h) + | c <- cs + + -- everything that uses c in one of the two ways: + , let fs = -- 1) Functions that take c as the kth argument + [ Right (f,k) + | f <- goodSyms + , (t,k) <- fst (ctyp f) `zip` [0..] + , t == c + ] ++ + -- 2) coercions that uncoerce to c + [ Left coe + | (cat,coe) <- coercions gr + , cat == c + , coe `S.member` nonEmptyCats gr + ] + + -- goal categories for c + ys = S.toList $ S.fromList $ + [ case f of + Right (f,_) -> snd (ctyp f) -- 1) goal category of the function that uses c + Left coe -> coe -- 2) (category of the) coercion that uncoerces to c + | f <- fs + ] + + -- function to give to Mu + h ps = ([ (apply (f,k) str, (f,k):fis) + | Right (f,k) <- fs + , (str,fis) <- args M.! snd (ctyp f) + ] ++ + [ q + | Left a <- fs + , q <- args M.! a + ]) `ins` F.nil + where + args = M.fromList (ys `zip` map F.toList ps) + ] + where -- fields of B that make it to the top + apply :: (Symbol, Int) -> [Int] -> [Int] -- fields of A that make it to the top + apply (f,k) is = + S.toList $ S.fromList $ + [ y + | (sq,i) <- seqs f `zip` [0..] + , i `elem` is + , Right (x,y) <- concrSeqs gr sq + , x == k + ] + + path2context [] x = x + path2context ((f,i):fis) x = + App f + [ if j == i + then path2context fis x + else head (featAll gr t) + | (t,j) <- fst (ctyp f) `zip` [0..] + ] + +forgets :: Grammar -> ConcrCat -> [(ConcrCat,[Tree])] +forgets gr top = + filter (not . null . snd) + [ (c, [ path2context (reverse p) (head (featAll gr c)) + | (is,p) <- F.toList paths + , length is == fields c -- all indices forgotten + ] + ) + | (c, paths) <- cs `zip` pathss + ] + where + pathss = Mu.muDiff F.nil F.isNil dif uni defs cs + cs = S.toList (nonEmptyCats gr) + + -- all symbols with at least one argument, and only good arguments + goodSyms = + [ f + | f <- symbols gr + , arity f >= 1 + , snd (ctyp f) `S.member` nonEmptyCats gr + , all (`S.member` nonEmptyCats gr) (fst (ctyp f)) + ] + + fieldsTab = + M.fromList $ + [ (b, length (seqs f)) + | f <- symbols gr + , let (as,b) = ctyp f + ] + + fields a = + head $ + [ n + | c <- a : [ b | (b,a') <- coercions gr, a' == a ] + , Just n <- [M.lookup c fieldsTab] + ] ++ + error (show a ++ " has no function creating it") + + -- definitions table for fixpoint iteration + fm1 `dif` fm2 = + [ d | d@(xs,_) <- F.toList fm1, not (fm2 `F.covers` xs) ] `ins` F.nil + + fm1 `uni` fm2 = + F.toList fm1 `ins` fm2 + + paths `ins` fm = + foldl collect fm + . map snd + . sort + $ [ (size p, p) | p <- paths ] + where + collect fm (str,p) + | fm `F.covers` str = fm + | otherwise = F.add str p fm + + size (_,p) = + sum [ if i == j then 1 else smallest gr t + | (f,i) <- p + , let (ts,_) = ctyp f + , (t,j) <- ts `zip` [0..] + ] + + defs = + [ if c == top + then (c, [], \_ -> F.unit [] []) + else (c, ys, h) + | c <- cs + + -- everything that uses c in one of the two ways: + , let fs = -- 1) Functions that take c as the kth argument + [ Right (f,k) + | f <- goodSyms + , (t,k) <- fst (ctyp f) `zip` [0..] + , t == c + ] ++ + -- 2) coercions that uncoerce to c + [ Left coe + | (cat,coe) <- coercions gr + , cat == c + , coe `S.member` nonEmptyCats gr + ] + + -- goal categories for c + ys = S.toList $ S.fromList $ + [ case f of + Right (f,_) -> snd (ctyp f) + Left coe -> coe + | f <- fs + ] + + h ps = ([ (apply (f,k) str, (f,k):fis) + | Right (f,k) <- fs + , (str,fis) <- args M.! snd (ctyp f) + , length str < fields c + ] ++ + [ q + | Left a <- fs + , q@(str,_) <- args M.! a + , length str < fields c + ]) `ins` F.nil + where + args = M.fromList (ys `zip` map F.toList ps) + ] + where + apply :: (Symbol, Int) -> [Int] -> [Int] + apply (f,k) is = + [ y + | y <- [0..fields (fst (ctyp f) !! k)-1] + , y `S.notMember` used + ] + where + used = S.fromList $ + [ y + | (sq,i) <- seqs f `zip` [0..] + , i `notElem` is + , Right (x,y) <- concrSeqs gr sq + , x == k + ] + + path2context [] x = x + path2context ((f,i):fis) x = + App f + [ if j == i + then path2context fis x + else head (featAll gr t) + | (t,j) <- fst (ctyp f) `zip` [0..] + ] + +--traceLength s xs = trace (s ++ ":" ++ show (length xs)) xs + +emptyFields :: Grammar -> [(ConcrCat,S.Set Int)] +emptyFields gr = cs `zip` fields + where + cs = S.toList (nonEmptyCats gr) + fields = Mu.mu (S.fromList [0..99999]) defs cs + + defs = + [ (c, ys, h) + | c <- cs + , let fs = -- everything that has c as a goal category + [ Right f + | f <- symbols gr + , all (`S.member` nonEmptyCats gr) (fst (ctyp f)) + , c == snd (ctyp f) + ] ++ + -- 2) c is a coercion: here's a list of (nonempty) categories c uncoerces into + [ Left cat + | (cat,coe) <- coercions gr + , coe == c + , cat `S.member` nonEmptyCats gr + ] + + -- all the categories c depends on + ys = S.toList $ S.fromList $ concat + [ case f of + Right f -> fst (ctyp f) + Left cat -> [cat] + | f <- fs + ] + + -- Function to give to mu: + -- computes whether the field is empty, given the emptiness of its arguments. + -- a field in C is empty, if there's some function + -- f :: A -> B -> C + -- and it uses only empty fields from A and B. + -- we're only looking at a given C at a time, + + h :: [S.Set Int] -> S.Set Int + h vs = foldr1 S.intersection $ [ apply f emptyfields + | Right f <- fs + , let emptyfields = map (args M.!) (fst $ ctyp f) + ] ++ + [ args M.! cat + | Left cat <- fs + ] + where + args :: M.Map ConcrCat (S.Set Int) -- empty fields of each category + args = M.fromList (ys `zip` vs) + ] + where + --apply :: Symbol -- some f :: A -> B + -- -> [S.Set Int] -- for each argument type to f, which fields are empty + -- -> S.Set Int -- empty fields in B + apply f empties = + S.fromList + [ i + | (sq,i) <- seqs f `zip` [0..] + , let isEmpty s = case s of + Left str -> str == "" + Right (k,j) -> j `S.member` (empties !! k) + , all isEmpty (concrSeqs gr sq) + ] +-------------------------------------------------------------------------------- +-- FEAT-style generator magic + +type FEAT = [ConcrCat] -> Int -> (Integer, Integer -> [Tree]) + +smallest :: Grammar -> ConcrCat -> Int +smallest gr c = head [ n | n <- [0..], featCard gr c n > 0 ] + +-- compute how many trees there are of a given size and type +featCard :: Grammar -> ConcrCat -> Int -> Integer +featCard gr c n = featCardVec gr [c] n + +-- generate the i-th tree of a given size and type +featIth :: Grammar -> ConcrCat -> Int -> Integer -> Tree +featIth gr c n i = head (featIthVec gr [c] n i) + +-- generate all trees (infinitely many) of a given type +featAll :: Grammar -> ConcrCat -> [Tree] +featAll gr c = [ featIth gr c n i | n <- [0..], i <- [0..featCard gr c n-1] ] + +-- compute how many tree-vectors there are of a given size and type-vector +featCardVec :: Grammar -> [ConcrCat] -> Int -> Integer +featCardVec gr cs n = fst (feat gr cs n) + +-- generate the i-th tree-vector of a given size and type-vector +featIthVec :: Grammar -> [ConcrCat] -> Int -> Integer -> [Tree] +featIthVec gr cs n i = snd (feat gr cs n) i + +mkFEAT :: Grammar -> FEAT +mkFEAT gr = catList + where + catList' :: FEAT + catList' [] 0 = (1, \0 -> []) + catList' [] _ = (0, error "indexing in an empty sequence") + + catList' [c] s = + parts $ + [ (n, \i -> [App f (h i)]) + | s > 0 + , f <- symbols gr + , let (xs,y) = ctyp f + , y == c + , let (n,h) = catList xs (s-1) + ] ++ + [ catList [x] s -- put (s-1) if it doesn't terminate + | s > 0 + , (x,y) <- coercions gr + , y == c + ] + + catList' (c:cs) s = + parts [ (nx*nxs, \i -> hx (i `mod` nx) ++ hxs (i `div` nx)) + | k <- [0..s] + , let (nx,hx) = catList [c] k + (nxs,hxs) = catList cs (s-k) + ] + + catList :: FEAT + catList = memoList (memoNat . catList') + where + -- all possible categories of the grammar + cats = S.toList $ S.fromList $ + [ x | f <- symbols gr + , let (xs,y) = ctyp f + , x <- y:xs ] ++ + [ z | (x,y) <- coercions gr + , z <- [x,y] ] + + memoList f = \cs -> case cs of + [] -> fNil + a:as -> fCons a as + where + fNil = f [] + fCons = (tab M.!) + tab = M.fromList [ (c, memoList (f . (c:))) | c <- cats ] + + memoNat f = (tab!!) + where + tab = [ f i | i <- [0..] ] + + parts [] = (0, error "indexing outside of a sequence") + parts ((n,h):nhs) = (n+n', \i -> if i < n then h i else h' (i-n)) + where + (n',h') = parts nhs + + +-------------------------------------------------------------------------------- +-- Functions used in Main + +-- compare two grammars +diffCats :: Grammar -> Grammar -> [(Cat,[Int],[String],[String])] +diffCats gr1 gr2 = + [ (acat1,[difFid c1, difFid c2],labels1 \\ labels2,labels2 \\ labels1) + | c1@(acat1,_i1,_j2,labels1) <- concrCats gr1 + , c2@(acat2,_i2,_j2,labels2) <- concrCats gr2 + , difFid c1 /= difFid c2 -- different amount of concrete categories + || labels1 /= labels2 -- or the labels are different + , acat1==acat2 ] + + where + difFid (_,i,j,_) = 1 + (j-i) + + +-- return a list of symbols that have a specified string, e.g. "it" in English +-- grammar appears in functions CleftAdv, CleftNP, ImpersCl, DefArt, it_Pron +hasConcrString :: Grammar -> String -> [Symbol] +hasConcrString gr str = + [ symb + | symb <- symbols gr + , str `elem` concatMap (lefts . concrSeqs gr) (seqs symb) ] + +-- nice printouts +type Context = String +type LinTree = ((Lang,Context),(Lang,String),(Lang,String),(Lang,String)) +data Comparison = Comparison { funTree :: String, linTree :: [LinTree] } +instance Show Comparison where + show c = unlines $ funTree c : map showLinTree (linTree c) + +dummyHole = App (Symbol "∅" [] ([], "") ([], CC Nothing 99999999)) [] + +showLinTree :: LinTree -> String +showLinTree ((an,hl),(l1,t1),(l2,t2),(_l,[])) = unlines ["", an++hl, l1++t1, l2++t2] +showLinTree ((an,hl),(l1,t1),(l2,t2),(l3,t3)) = unlines ["", an++hl, l1++t1, l2++t2, l3++t3] + +compareTree :: Grammar -> Grammar -> [Grammar] -> Tree -> Comparison +compareTree gr oldgr transgr t = Comparison { + funTree = "* " ++ show t +, linTree = [ ( ("** ",hl), (langName gr,newLin), (langName oldgr, oldLin), transLin ) + | ctx <- ctxs + , let hl = show (ctx dummyHole) + , let transLin = case transgr of + [] -> ("","") + g:_ -> (langName g, linearize g (ctx t)) + , let newLin = linearize gr (ctx t) + , let oldLin = linearize oldgr (ctx t) + , newLin /= oldLin ] } + where + w = top t + c = snd (ctyp w) + cs = [ coe + | (cat,coe) <- coercions gr + , c == cat ] + ctxs = concat + [ contextsFor gr sc cat + | sc <- ccats gr (startCat gr) + , cat <- cs ] + langName gr = concrLang gr ++ "> " + +type Result = String + +testFun :: Bool -> Grammar -> [Grammar] -> Cat -> Name -> Result +testFun debug gr trans startcat funname = + let test = testTree debug gr trans + in unlines [ test t n cs + | (n,(t,cs)) <- zip [1..] trees_Ctxs ] + + where + trees_Ctxs = [ (t,commonCtxs) | t <- reducedTrees + , not $ null commonCtxs ] ++ + [ (t,uniqueCtxs) | t <- allTrees + , not $ null uniqueCtxs ] + + (start:_) = ccats gr startcat + hl f c1 c2 = f (c1 dummyHole) == f (c2 dummyHole) +-- applyHole = hl id -- TODO why doesn't this work for equality of contexts? + applyHole = hl show -- :: (Tree -> Tree) -> (Tree -> Tree) -> Bool + + goalcats = map ccatOf allTrees :: [ConcrCat] -- these are not coercions (coercions can't be goals) + + coercionsThatCoverAllGoalcats = [ (c,fs) + | (c,fs) <- contexts gr start + , all (coerces gr c) goalcats ] + funs = case lookupSymbol gr funname of + [] -> error $ "Function "++funname++" not found" + fs -> fs + allTrees = treesUsingFun gr funs + ctxs = nubBy applyHole $ concatMap (contextsFor gr start) goalcats :: [Tree->Tree] + + (commonCtxs,reducedTrees) = case coercionsThatCoverAllGoalcats of + [] -> ([],[]) -- no coercion covers all goal cats -> all contexts are relevant + cs -> (cCtxs,rTrees) -- all goal cats coerce into same -> find redundant contexts + where + (coe,coercedCtxs) = head coercionsThatCoverAllGoalcats + cCtxs = intersectBy applyHole ctxs coercedCtxs + rTrees = concat $ bestExamples (head funs) gr + [ [ App newTop subtrees ] + | (App tp subtrees) <- allTrees + , let newTop = tp { ctyp = (fst $ ctyp tp, coe)} ] + uniqueCtxs = deleteFirstsBy applyHole ctxs commonCtxs + showCtx f = let t = f dummyHole in show t ++ "\t\t\t" ++ showConcrFun gr (top t) + +testTree :: Bool -> Grammar -> [Grammar] -> Tree -> Int -> [Tree -> Tree] -> Result +testTree debug gr tgrs t n ctxs = unlines + [ "* " ++ {- show n ++ ")" ++ -} show t + , showConcrFun gr w + , if debug then unlines $ tabularPrint gr t else "" + , unlines $ concat + [ [ "** " ++ show m ++ ") " ++ show (ctx (App (hole c) [])) + , langName gr ++ linearize gr (ctx t) + ] ++ + [ langName tgr ++ linearize tgr (ctx t) + | tgr <- tgrs ] + | (ctx,m) <- zip ctxs [1..] + ] + , "" ] + where + w = top t + c = snd (ctyp w) + langName gr = concrLang gr ++ "> " + + tabularPrint gr t = + let cseqs = [ concatMap showCSeq cseq + | cseq <- map (concrSeqs gr) (seqs $ top t) ] + tablins = tabularLin gr t :: [(String,String)] + in [ fieldname ++ ":\t" ++ lin ++ "\t" ++ s + | ((fieldname,lin),s) <- zip tablins cseqs ] + showCSeq (Left tok) = " " ++ show tok ++ " " + showCSeq (Right (i,j)) = " <" ++ show i ++ "," ++ show j ++ "> " + +-------------------------------------------------------------------------------- +-- Generate test trees + +treesUsingFun :: Grammar -> [Symbol] -> [Tree] +treesUsingFun gr detCNs = + [ tree + | detCN <- detCNs + , let (dets_cns,np_209) = ctyp detCN -- :: ([ConcrCat],ConcrCat) + , let bestArgs = case dets_cns of + [] -> [[]] + xs -> bestTrees detCN gr dets_cns + , tree <- App detCN `map` bestArgs ] + + +bestTrees :: Symbol -> Grammar -> [ConcrCat] -> [[Tree]] +bestTrees fun gr cats = + bestExamples fun gr $ take 200 -- change this to something else if too slow + [ featIthVec gr cats size i + | all (`S.member` nonEmptyCats gr) cats + , size <- [0..10] + , let card = featCardVec gr cats size + , i <- [0..card-1] + ] + +testsAsWellAs :: (Eq a, Eq b) => [a] -> [b] -> Bool +xs `testsAsWellAs` ys = go (xs `zip` ys) + where + go [] = + True + + go ((x,y):xys) = + and [ y' == y | (x',y') <- xys, x == x' ] && + go [ xy | xy@(x',_) <- xys, x /= x' ] + + +bestExamples :: Symbol -> Grammar -> [[Tree]] -> [[Tree]] +bestExamples fun gr vtrees = go [] vtrees_lins + where + syncategorematics = concatMap (lefts . concrSeqs gr) (seqs fun) + vtrees_lins = [ (vtree, syncategorematics ++ + concatMap (map snd . tabularLin gr) vtree) --linearise all trees at once + | vtree <- vtrees ] :: [([Tree],[String])] + + go cur [] = map fst cur + go cur (vt@(ts,lins):vts) + | any (`testsAsWellAs` lins) (map snd cur) = go cur vts + | otherwise = go' (vt:[ c | c@(_,clins) <- cur + , not (lins `testsAsWellAs` clins) ]) + vts + + go' cur vts | enough cur = map fst cur + | otherwise = go cur vts + + enough :: [([Tree],[String])] -> Bool + enough [(_,lins)] = all singleton (group $ sort lins) -- can stop earlier but let's not do that + enough _ = False +
\ No newline at end of file |
