diff options
| author | peb <unknown> | 2005-04-11 12:57:45 +0000 |
|---|---|---|
| committer | peb <unknown> | 2005-04-11 12:57:45 +0000 |
| commit | ac00f77dadd4d447803dd7cab5a36f47365325d0 (patch) | |
| tree | 2fd02b19234f8d1fcc20ee67a2367d4d4eebfcd8 /src/GF/Formalism/Utilities.hs | |
| parent | f6273f7033b85eea9a8d0cc7d31e9697ba95d5b7 (diff) | |
"Committed_by_peb"
Diffstat (limited to 'src/GF/Formalism/Utilities.hs')
| -rw-r--r-- | src/GF/Formalism/Utilities.hs | 271 |
1 files changed, 271 insertions, 0 deletions
diff --git a/src/GF/Formalism/Utilities.hs b/src/GF/Formalism/Utilities.hs new file mode 100644 index 000000000..166534bc4 --- /dev/null +++ b/src/GF/Formalism/Utilities.hs @@ -0,0 +1,271 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:50 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Basic type declarations and functions for grammar formalisms +----------------------------------------------------------------------------- + + +module GF.Formalism.Utilities where + +import Monad +import Array +import List (groupBy) + +import GF.Data.SortedList +import GF.Data.Assoc +import GF.Data.Utilities (sameLength, foldMerge, splitBy) + +import GF.Infra.Print + +------------------------------------------------------------ +-- * symbols + +data Symbol c t = Cat c | Tok t + deriving (Eq, Ord, Show) + +symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a +symbol fc ft (Cat cat) = fc cat +symbol fc ft (Tok tok) = ft tok + +mapSymbol :: (c -> d) -> (t -> u) -> Symbol c t -> Symbol d u +mapSymbol fc ft = symbol (Cat . fc) (Tok . ft) + +filterCats :: [Symbol c t] -> [c] +filterCats syms = [ cat | Cat cat <- syms ] + +filterToks :: [Symbol c t] -> [t] +filterToks syms = [ tok | Tok tok <- syms ] + + +------------------------------------------------------------ +-- * edges + +data Edge s = Edge Int Int s + deriving (Eq, Ord, Show) + +instance Functor Edge where + fmap f (Edge i j s) = Edge i j (f s) + + +------------------------------------------------------------ +-- * representaions of input tokens + +data Input t = MkInput { inputEdges :: [Edge t], + inputBounds :: (Int, Int), + inputFrom :: Array Int (Assoc t [Int]), + inputTo :: Array Int (Assoc t [Int]), + inputToken :: Assoc t [(Int, Int)] + } + +makeInput :: Ord t => [Edge t] -> Input t +input :: Ord t => [t] -> Input t +inputMany :: Ord t => [[t]] -> Input t + +instance Show t => Show (Input t) where + show input = "makeInput " ++ show (inputEdges input) + +---------- + +makeInput inEdges | null inEdges = input [] + | otherwise = MkInput inEdges inBounds inFrom inTo inToken + where inBounds = foldr1 minmax [ (i, j) | Edge i j _ <- inEdges ] + where minmax (a, b) (a', b') = (min a a', max b b') + inFrom = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds $ + [ (i, [(tok, j)]) | Edge i j tok <- inEdges ] + inTo = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds + [ (j, [(tok, i)]) | Edge i j tok <- inEdges ] + inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ] + +input toks = MkInput inEdges inBounds inFrom inTo inToken + where inEdges = zipWith3 Edge [0..] [1..] toks + inBounds = (0, length toks) + inFrom = listArray inBounds $ + [ listAssoc [(tok, [j])] | (tok, j) <- zip toks [1..] ] ++ [ listAssoc [] ] + inTo = listArray inBounds $ + [ listAssoc [] ] ++ [ listAssoc [(tok, [i])] | (tok, i) <- zip toks [0..] ] + inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ] + +inputMany toks = MkInput inEdges inBounds inFrom inTo inToken + where inEdges = [ Edge i j t | (i, j, ts) <- zip3 [0..] [1..] toks, t <- ts ] + inBounds = (0, length toks) + inFrom = listArray inBounds $ + [ listAssoc [ (t, [j]) | t <- nubsort ts ] | (ts, j) <- zip toks [1..] ] + ++ [ listAssoc [] ] + inTo = listArray inBounds $ + [ listAssoc [] ] ++ + [ listAssoc [ (t, [i]) | t <- nubsort ts ] | (ts, i) <- zip toks [0..] ] + inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ] + + +------------------------------------------------------------ +-- * charts, forests & trees + +-- | The values of the chart, a list of key-daughters pairs, +-- has unique keys. In essence, it is a map from 'n' to daughters. +-- The daughters should be a set (not necessarily sorted) of rhs's. +type SyntaxChart n e = Assoc e [(n, [[e]])] + +-- better(?) representation of forests: +-- data Forest n = F (SMap n (SList [Forest n])) Bool +-- == +-- type Forest n = GeneralTrie n (SList [Forest n]) Bool +-- (the Bool == isMeta) + +data SyntaxForest n = FMeta + | FNode n [[SyntaxForest n]] + -- ^ The outer list should be a set (not necessarily sorted) + -- of possible alternatives. Ie. the outer list + -- is a disjunctive node, and the inner lists + -- are (conjunctive) concatenative nodes + deriving (Eq, Ord, Show) + +data SyntaxTree n = TMeta | TNode n [SyntaxTree n] + deriving (Eq, Ord, Show) + +forestName :: SyntaxForest n -> Maybe n +forestName (FNode n _) = Just n +forestName (FMeta) = Nothing + +treeName :: SyntaxTree n -> Maybe n +treeName (TNode n _) = Just n +treeName (TMeta) = Nothing + +instance Functor SyntaxTree where + fmap f (TNode n trees) = TNode (f n) $ map (fmap f) trees + fmap f (TMeta) = TMeta + +instance Functor SyntaxForest where + fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests + fmap f (FMeta) = FMeta + +{- måste tänka mer på detta: +compactForests :: Ord n => [SyntaxForest n] -> SList (SyntaxForest n) +compactForests = map joinForests . groupBy eqNames . sortForests + where eqNames f g = forestName f == forestName g + sortForests = foldMerge mergeForests [] . map return + mergeForests [] gs = gs + mergeForests fs [] = fs + mergeForests fs@(f:fs') gs@(g:gs') + = case forestName f `compare` forestName g of + LT -> f : mergeForests fs' gs + GT -> g : mergeForests fs gs' + EQ -> f : g : mergeForests fs' gs' + joinForests fs = case forestName (head fs) of + Nothing -> FMeta + Just name -> FNode name $ + compactDaughters $ + concat [ fss | FNode _ fss <- fs ] + compactDaughters fss = case head fss of + [] -> [[]] + [_] -> map return $ compactForests $ concat fss + _ -> nubsort fss +-} + +-- ** conversions between representations + +forest2trees :: SyntaxForest n -> SList (SyntaxTree n) +forest2trees (FNode n forests) = map (TNode n) $ forests >>= mapM forest2trees +forest2trees (FMeta) = [TMeta] + +chart2forests :: (Ord n, Ord e) => + SyntaxChart n e -- ^ The complete chart + -> (e -> Bool) -- ^ When is an edge 'FMeta'? + -> [e] -- ^ The starting edges + -> SList (SyntaxForest n) -- ^ The result has unique keys, ie. all 'n' are joined together. + -- In essence, the result is a map from 'n' to forest daughters + +-- simplest implementation +chart2forests chart isMeta = concatMap edge2forests + where edge2forests edge = if isMeta edge then [FMeta] + else map item2forest $ chart ? edge + item2forest (name, children) = FNode name $ children >>= mapM edge2forests + +{- +-- more intelligent(?) implementation, +-- requiring that charts and forests are sorted maps and sorted sets +chart2forests chart isMeta = es2fs + where e2fs e = if isMeta e then [FMeta] else map i2f $ chart ? e + es2fs es = if null metas then fs else FMeta : fs + where (metas, nonMetas) = splitBy isMeta es + fs = map i2f $ unionMap (<++>) $ map (chart ?) nonMetas + i2f (name, children) = FNode name $ + case head children of + [] -> [[]] + [_] -> map return $ es2fs $ concat children + _ -> children >>= mapM e2fs +-} + + +-- ** operations on forests + +unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n) +unifyManyForests = foldM unifyForests FMeta + +-- | two forests can be unified, if either is 'FMeta', or both have the same parent, +-- and all children can be unified +unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n) +unifyForests FMeta forest = return forest +unifyForests forest FMeta = return forest +unifyForests (FNode name1 children1) (FNode name2 children2) + | name1 == name2 && not (null children) = return $ FNode name1 children + | otherwise = fail "forest unification failure" + where children = [ forests | forests1 <- children1, forests2 <- children2, + sameLength forests1 forests2, + forests <- zipWithM unifyForests forests1 forests2 ] + + +-- ** operations on trees + +unifyManyTrees :: (Monad m, Eq n) => [SyntaxTree n] -> m (SyntaxTree n) +unifyManyTrees = foldM unifyTrees TMeta + +-- | two trees can be unified, if either is 'TMeta', +-- or both have the same parent, and their children can be unified +unifyTrees :: (Monad m, Eq n) => SyntaxTree n -> SyntaxTree n -> m (SyntaxTree n) +unifyTrees TMeta tree = return tree +unifyTrees tree TMeta = return tree +unifyTrees (TNode name1 children1) (TNode name2 children2) + | name1 == name2 && sameLength children1 children2 + = liftM (TNode name1) $ zipWithM unifyTrees children1 children2 + | otherwise = fail "tree unification failure" + + + +------------------------------------------------------------ +-- pretty-printing + +instance (Print c, Print t) => Print (Symbol c t) where + prt = symbol prt (simpleShow . prt) + where simpleShow str = "\"" ++ concatMap mkEsc str ++ "\"" + mkEsc '\\' = "\\\\" + mkEsc '\"' = "\\\"" + mkEsc '\n' = "\\n" + mkEsc '\t' = "\\t" + mkEsc chr = [chr] + prtList = prtSep " " + +instance Print t => Print (Input t) where + prt input = "input " ++ prt (inputEdges input) + +instance (Print s) => Print (Edge s) where + prt (Edge i j s) = "[" ++ show i ++ "-" ++ show j ++ ": " ++ prt s ++ "]" + prtList = prtSep "" + +instance (Print s) => Print (SyntaxTree s) where + prt (TNode s trees) = prt s ++ "^{" ++ prtSep " " trees ++ "}" + prt (TMeta) = "?" + prtList = prtAfter "\n" + +instance (Print s) => Print (SyntaxForest s) where + prt (FNode s forests) = prt s ++ "^{" ++ prtSep " | " (map (prtSep " ") forests) ++ "}" + prt (FMeta) = "?" + prtList = prtAfter "\n" + + |
