diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-05-21 09:26:44 +0000 |
| commit | 055c0d0d5a5bb0dc75904fe53df7f2e4f5732a8f (patch) | |
| tree | 0e63fb68c69c8f6ad0f78893c63420f0a3600e1c /src-3.0/GF/Formalism | |
| parent | 915a1de71783ab8446b1af9e72c7ba7dfbc12d3f (diff) | |
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
Diffstat (limited to 'src-3.0/GF/Formalism')
| -rw-r--r-- | src-3.0/GF/Formalism/CFG.hs | 50 | ||||
| -rw-r--r-- | src-3.0/GF/Formalism/FCFG.hs | 106 | ||||
| -rw-r--r-- | src-3.0/GF/Formalism/GCFG.hs | 47 | ||||
| -rw-r--r-- | src-3.0/GF/Formalism/MCFG.hs | 58 | ||||
| -rw-r--r-- | src-3.0/GF/Formalism/SimpleGFC.hs | 268 | ||||
| -rw-r--r-- | src-3.0/GF/Formalism/Utilities.hs | 423 |
6 files changed, 952 insertions, 0 deletions
diff --git a/src-3.0/GF/Formalism/CFG.hs b/src-3.0/GF/Formalism/CFG.hs new file mode 100644 index 000000000..c38adb4e2 --- /dev/null +++ b/src-3.0/GF/Formalism/CFG.hs @@ -0,0 +1,50 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:49 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- CFG formalism +----------------------------------------------------------------------------- + +module GF.Formalism.CFG where + +import GF.Formalism.Utilities +import GF.Infra.Print +import GF.Data.Assoc (accumAssoc) +import GF.Data.SortedList (groupPairs) +import GF.Data.Utilities (mapSnd) + +------------------------------------------------------------ +-- type definitions + +type CFGrammar c n t = [CFRule c n t] +data CFRule c n t = CFRule c [Symbol c t] n + deriving (Eq, Ord, Show) + +type CFChart c n t = CFGrammar (Edge c) n t + + +------------------------------------------------------------ +-- building syntax charts from grammars + +grammar2chart :: (Ord n, Ord e) => CFGrammar e n t -> SyntaxChart n e +grammar2chart cfchart = accumAssoc groupSyntaxNodes $ + [ (lhs, SNode name (filterCats rhs)) | + CFRule lhs rhs name <- cfchart ] + + +---------------------------------------------------------------------- +-- pretty-printing + +instance (Print n, Print c, Print t) => Print (CFRule c n t) where + prt (CFRule cat rhs name) = prt name ++ " : " ++ prt cat ++ + ( if null rhs then "" + else " --> " ++ prtSep " " rhs ) + prtList = prtSep "\n" + + diff --git a/src-3.0/GF/Formalism/FCFG.hs b/src-3.0/GF/Formalism/FCFG.hs new file mode 100644 index 000000000..5f9656658 --- /dev/null +++ b/src-3.0/GF/Formalism/FCFG.hs @@ -0,0 +1,106 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : Krasimir Angelov +-- Stability : (stable) +-- Portability : (portable) +-- +-- Definitions of fast multiple context-free grammars +----------------------------------------------------------------------------- + +module GF.Formalism.FCFG + ( + -- * Token + FToken + + -- * Category + , FPath + , FCat + + , fcatString, fcatInt, fcatFloat, fcatVar + + -- * Symbol + , FIndex + , FSymbol(..) + + -- * Name + , FName + , isCoercionF + + -- * Grammar + , FPointPos + , FGrammar + , FRule(..) + ) where + +import Control.Monad (liftM) +import Data.List (groupBy) +import Data.Array +import qualified Data.Map as Map + +import GF.Formalism.Utilities +import qualified GF.GFCC.CId as AbsGFCC +import GF.Infra.PrintClass + + +------------------------------------------------------------ +-- Token +type FToken = String + + +------------------------------------------------------------ +-- Category +type FPath = [FIndex] +type FCat = Int + +fcatString, fcatInt, fcatFloat, fcatVar :: Int +fcatString = (-1) +fcatInt = (-2) +fcatFloat = (-3) +fcatVar = (-4) + + +------------------------------------------------------------ +-- Symbol +type FIndex = Int +data FSymbol + = FSymCat {-# UNPACK #-} !FCat {-# UNPACK #-} !FIndex {-# UNPACK #-} !Int + | FSymTok FToken + + +------------------------------------------------------------ +-- Name +type FName = NameProfile AbsGFCC.CId + +isCoercionF :: FName -> Bool +isCoercionF (Name fun [Unify [0]]) = fun == AbsGFCC.CId "_" +isCoercionF _ = False + + +------------------------------------------------------------ +-- Grammar + +type FPointPos = Int +type FGrammar = ([FRule], Map.Map AbsGFCC.CId [FCat]) +data FRule = FRule FName [FCat] FCat (Array FIndex (Array FPointPos FSymbol)) + +------------------------------------------------------------ +-- pretty-printing + +instance Print AbsGFCC.CId where + prt (AbsGFCC.CId s) = s + +instance Print FSymbol where + prt (FSymCat c l n) = "($" ++ prt n ++ "!" ++ prt l ++ ")" + prt (FSymTok t) = simpleShow (prt t) + where simpleShow str = "\"" ++ concatMap mkEsc str ++ "\"" + mkEsc '\\' = "\\\\" + mkEsc '\"' = "\\\"" + mkEsc '\n' = "\\n" + mkEsc '\t' = "\\t" + mkEsc chr = [chr] + prtList = prtSep " " + +instance Print FRule where + prt (FRule name args res lins) = prt name ++ " : " ++ (if null args then "" else prtSep " " args ++ " -> ") ++ prt res ++ + " =\n [" ++ prtSep "\n " ["("++prtSep " " [prt sym | (_,sym) <- assocs syms]++")" | (_,syms) <- assocs lins]++"]" + prtList = prtSep "\n" diff --git a/src-3.0/GF/Formalism/GCFG.hs b/src-3.0/GF/Formalism/GCFG.hs new file mode 100644 index 000000000..5242081c7 --- /dev/null +++ b/src-3.0/GF/Formalism/GCFG.hs @@ -0,0 +1,47 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/09 09:28:44 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.3 $ +-- +-- Basic GCFG formalism (derived from Pollard 1984) +----------------------------------------------------------------------------- + +module GF.Formalism.GCFG where + +import GF.Formalism.Utilities (SyntaxChart) +import GF.Data.Assoc (assocMap, accumAssoc) +import GF.Data.SortedList (nubsort, groupPairs) +import GF.Infra.PrintClass + +---------------------------------------------------------------------- + +type Grammar c n l t = [Rule c n l t] +data Rule c n l t = Rule (Abstract c n) (Concrete l t) + deriving (Eq, Ord, Show) + +data Abstract cat name = Abs cat [cat] name + deriving (Eq, Ord, Show) +data Concrete lin term = Cnc lin [lin] term + deriving (Eq, Ord, Show) + +---------------------------------------------------------------------- + +instance (Print c, Print n, Print l, Print t) => Print (Rule n c l t) where + prt (Rule abs cnc) = prt abs ++ " := " ++ prt cnc + prtList = prtSep "\n" + +instance (Print c, Print n) => Print (Abstract c n) where + prt (Abs cat args name) = prt name ++ ". " ++ prt cat ++ + ( if null args then "" + else " --> " ++ prtSep " " args ) + +instance (Print l, Print t) => Print (Concrete l t) where + prt (Cnc lcat args term) = prt term + ++ " : " ++ prt lcat ++ + ( if null args then "" + else " / " ++ prtSep " " args) diff --git a/src-3.0/GF/Formalism/MCFG.hs b/src-3.0/GF/Formalism/MCFG.hs new file mode 100644 index 000000000..e6aa965e7 --- /dev/null +++ b/src-3.0/GF/Formalism/MCFG.hs @@ -0,0 +1,58 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/09 09:28:45 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.2 $ +-- +-- Definitions of multiple context-free grammars +----------------------------------------------------------------------------- + +module GF.Formalism.MCFG where + +import Control.Monad (liftM) +import Data.List (groupBy) + +import GF.Formalism.Utilities +import GF.Formalism.GCFG + +import GF.Infra.PrintClass + + +------------------------------------------------------------ +-- grammar types + +-- | the lables in the linearization record should be in the same +-- order as specified by the linearization type @[lbl]@ +type MCFGrammar cat name lbl tok = Grammar cat name [lbl] [Lin cat lbl tok] +type MCFRule cat name lbl tok = Rule cat name [lbl] [Lin cat lbl tok] + +-- | variants are encoded as several linearizations with the same label +data Lin cat lbl tok = Lin lbl [Symbol (cat, lbl, Int) tok] + deriving (Eq, Ord, Show) + +instantiateArgs :: [cat] -> Lin cat' lbl tok -> Lin cat lbl tok +instantiateArgs args (Lin lbl lin) = Lin lbl (map instSym lin) + where instSym = mapSymbol instCat id + instCat (_, lbl, nr) = (args !! nr, lbl, nr) + +expandVariants :: Eq lbl => MCFRule cat name lbl tok -> [MCFRule cat name lbl tok] +expandVariants (Rule abs (Cnc typ typs lins)) = liftM (Rule abs . Cnc typ typs) $ + expandLins lins + where expandLins = sequence . groupBy eqLbl + eqLbl (Lin l1 _) (Lin l2 _) = l1 == l2 + + +------------------------------------------------------------ +-- pretty-printing + +instance (Print c, Print l, Print t) => Print (Lin c l t) where + prt (Lin lbl lin) = prt lbl ++ " = " ++ prtSep " " (map (symbol prArg (show.prt)) lin) + where prArg (cat, lbl, nr) = prt cat ++ "@" ++ prt nr ++ prt lbl + prtList = prtBefore "\n\t" + + + diff --git a/src-3.0/GF/Formalism/SimpleGFC.hs b/src-3.0/GF/Formalism/SimpleGFC.hs new file mode 100644 index 000000000..ea1f9dc12 --- /dev/null +++ b/src-3.0/GF/Formalism/SimpleGFC.hs @@ -0,0 +1,268 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/08/11 14:11:46 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.7 $ +-- +-- Simplistic GFC format +----------------------------------------------------------------------------- + +module GF.Formalism.SimpleGFC where + +import Control.Monad (liftM) +import qualified GF.Canon.AbsGFC as AbsGFC +import qualified GF.Infra.Ident as Ident +import GF.Formalism.GCFG +import GF.Infra.Print + +---------------------------------------------------------------------- +-- * basic (leaf) types + +type Constr = AbsGFC.CIdent +type Var = Ident.Ident +type Label = AbsGFC.Label + +anyVar :: Var +anyVar = Ident.wildIdent + +---------------------------------------------------------------------- +-- * simple GFC + +type SimpleGrammar c n t = Grammar (Decl c) n (LinType c t) (Maybe (Term c t)) +type SimpleRule c n t = Rule (Decl c) n (LinType c t) (Maybe (Term c t)) + +-- ** dependent type declarations + +-- 'Decl x c ts' == x is of type (c applied to ts) +-- data Decl c = Decl Var c [TTerm] +-- deriving (Eq, Ord, Show) + +-- | 'Decl x t' == 'x' is of type 't' +data Decl c = Decl Var (AbsType c) deriving (Eq, Ord, Show) +-- | '[t1..tn] ::--> t' == 't1 -> ... -> tn -> t' +data AbsType c = [FOType c] ::--> FOType c deriving (Eq, Ord, Show) +-- | 'c ::@ [t1..tn]' == '(c t1 ... tn)' +data FOType c = c ::@ [TTerm] deriving (Eq, Ord, Show) + +-- including second order functions: +-- (A -> B) ==> Decl _ ([A ::@ []] ::--> (B ::@ [])) +-- (x : A -> B -> C) ==> Decl x ([A ::@ [], B ::@ []] ::--> (C ::@ [])) +-- (y : A t x -> B (t x)) ==> Decl y ([A ::@ [t:@[],TVar x]] ::--> (B ::@ [t:@[TVar x]])) + + +data TTerm = Constr :@ [TTerm] + | TVar Var + deriving (Eq, Ord, Show) + +decl2cat :: Decl c -> c +decl2cat (Decl _ (_ ::--> (cat ::@ _))) = cat + +varsInTTerm :: TTerm -> [Var] +varsInTTerm tterm = vars tterm [] + where vars (TVar x) = (x:) + vars (_ :@ ts) = foldr (.) id $ map vars ts + +tterm2term :: TTerm -> Term c t +tterm2term (con :@ terms) = con :^ map tterm2term terms +-- tterm2term (TVar x) = Var x +tterm2term term = error $ "tterm2term: illegal term" + +term2tterm :: Term c t -> TTerm +term2tterm (con :^ terms) = con :@ map term2tterm terms +-- term2tterm (Var x) = TVar x +term2tterm term = error $ "term2tterm: illegal term" + +-- ** linearization types and terms + +data LinType c t = RecT [(Label, LinType c t)] + | TblT [Term c t] (LinType c t) + | ConT [Term c t] + | StrT + deriving (Eq, Ord, Show) + +isBaseType :: LinType c t -> Bool +isBaseType (ConT _) = True +isBaseType (StrT) = True +isBaseType _ = False + +data Term c t + = Arg Int c (Path c t) -- ^ argument variable, the 'Path' is a path + -- pointing into the term + | Constr :^ [Term c t] -- ^ constructor + | Rec [(Label, Term c t)] -- ^ record + | Term c t :. Label -- ^ record projection + | Tbl [(Term c t, Term c t)] -- ^ table of patterns\/terms + | Term c t :! Term c t -- ^ table selection + | Variants [Term c t] -- ^ variants + | Term c t :++ Term c t -- ^ concatenation + | Token t -- ^ single token + | Empty -- ^ empty string + ---- | Wildcard -- ^ wildcard pattern variable + ---- | Var Var -- ^ bound pattern variable + + -- Res CIdent -- ^ resource identifier + -- Int Integer -- ^ integer + deriving (Eq, Ord, Show) + +-- ** calculations on terms + +(+.) :: Term c t -> Label -> Term c t +Variants terms +. lbl = variants $ map (+. lbl) terms +Rec record +. lbl = maybe err id $ lookup lbl record + where err = error $ "(+.): label not in record" +Arg arg cat path +. lbl = Arg arg cat (path ++. lbl) +term +. lbl = term :. lbl + +(+!) :: (Eq c, Eq t) => Term c t -> Term c t -> Term c t +Variants terms +! pat = variants $ map (+! pat) terms +term +! Variants pats = variants $ map (term +!) pats +term +! arg@(Arg _ _ _) = term :! arg +Arg arg cat path +! pat = Arg arg cat (path ++! pat) +-- cannot handle tables with pattern variales or wildcards (yet): +term@(Tbl table) +! pat = maybe (term :! pat) id $ lookup pat table +term +! pat = term :! pat + +{- does not work correctly: +lookupTbl term [] _ = term +lookupTbl _ ((Wildcard, term) : _) _ = term +lookupTbl _ ((Var x, term) : _) pat = subst x pat term +lookupTbl _ ((pat', term) : _) pat | pat == pat' = term +lookupTbl term (_ : tbl) pat = lookupTbl term tbl pat + +subst x a (Arg n c (Path path)) = Arg n c (Path (map substP path)) + where substP (Right (Var y)) | x==y = Right a + substP p = p +subst x a (con :^ ts) = con :^ map (subst x a) ts +subst x a (Rec rec) = Rec [ (l, subst x a t) | (l, t) <- rec ] +subst x a (t :. l) = subst x a t +. l +subst x a (Tbl tbl) = Tbl [ (subst x a p, subst x a t) | (p, t) <- tbl ] +subst x a (t :! s) = subst x a t +! subst x a s +subst x a (Variants ts) = variants $ map (subst x a) ts +subst x a (t1 :++ t2) = subst x a t1 ?++ subst x a t2 +subst x a (Var y) | x==y = a +subst x a t = t +-} + +(?++) :: Term c t -> Term c t -> Term c t +Variants terms ?++ term = variants $ map (?++ term) terms +term ?++ Variants terms = variants $ map (term ?++) terms +Empty ?++ term = term +term ?++ Empty = term +term1 ?++ term2 = term1 :++ term2 + +variants :: [Term c t] -> Term c t +variants terms0 = case concatMap flatten terms0 of + [term] -> term + terms -> Variants terms + where flatten (Variants ts) = ts + flatten t = [t] + +-- ** enumerations + +enumerateTerms :: (Eq c, Eq t) => Maybe (Term c t) -> LinType c t -> [Term c t] +enumerateTerms arg (StrT) = maybe err return arg + where err = error "enumeratePatterns: parameter type should not be string" +enumerateTerms arg (ConT terms) = terms +enumerateTerms arg (RecT rtype) + = liftM Rec $ mapM enumAssign rtype + where enumAssign (lbl, ctype) = liftM ((,) lbl) $ enumerateTerms arg ctype +enumerateTerms arg (TblT terms ctype) + = liftM Tbl $ mapM enumCase terms + where enumCase pat = liftM ((,) pat) $ enumerateTerms (fmap (+! pat) arg) ctype + +enumeratePatterns :: (Eq c, Eq t) => LinType c t -> [Term c t] +enumeratePatterns t = enumerateTerms Nothing t + +---------------------------------------------------------------------- +-- * paths of record projections and table selections + +-- | Note that the list of labels/selection terms is /reversed/ +newtype Path c t = Path [Either Label (Term c t)] deriving (Eq, Ord, Show) + +emptyPath :: Path c t +emptyPath = Path [] + +-- ** calculations on paths + +(++.) :: Path c t -> Label -> Path c t +Path path ++. lbl = Path (Left lbl : path) + +(++!) :: Path c t -> Term c t -> Path c t +Path path ++! sel = Path (Right sel : path) + +lintypeFollowPath :: (Print c,Print t) => Path c t -> LinType c t -> LinType c t +lintypeFollowPath (Path path0) ctype0 = follow (reverse path0) ctype0 + where follow [] ctype = ctype + follow (Right pat : path) (TblT _ ctype) = follow path ctype + follow (Left lbl : path) (RecT rec) + = maybe err (follow path) $ lookup lbl rec + where err = error $ "lintypeFollowPath: label not in record type" + ++ "\nOriginal Path: " ++ prt (Path path0) + ++ "\nOriginal CType: " ++ prt ctype0 + ++ "\nCurrent Label: " ++ prt lbl + ++ "\nCurrent RType: " ++ prt (RecT rec) + --- by AR for debugging 23/11/2005 + +termFollowPath :: (Eq c, Eq t) => Path c t -> Term c t -> Term c t +termFollowPath (Path path0) = follow (reverse path0) + where follow [] term = term + follow (Right pat : path) term = follow path (term +! pat) + follow (Left lbl : path) term = follow path (term +. lbl) + +lintype2paths :: (Eq c, Eq t) => Path c t -> LinType c t -> [Path c t] +lintype2paths path (ConT _) = [] +lintype2paths path (StrT) = [ path ] +lintype2paths path (RecT rec) = concat [ lintype2paths (path ++. lbl) ctype | + (lbl, ctype) <- rec ] +lintype2paths path (TblT pts vt)= concat [ lintype2paths (path ++! pat) vt | + pat <- pts ] + +---------------------------------------------------------------------- +-- * pretty-printing + +instance Print c => Print (Decl c) where + prt (Decl var typ) | var == anyVar = prt typ + | otherwise = "(?" ++ prt var ++ ":" ++ prt typ ++ ")" + +instance Print c => Print (AbsType c) where + prt ([] ::--> typ) = prt typ + prt (args ::--> typ) = "(" ++ prtAfter "->" args ++ prt typ ++ ")" + +instance Print c => Print (FOType c) where + prt (cat ::@ args) = prt cat ++ prtBefore " " args + +instance Print TTerm where + prt (con :@ args) + | null args = prt con + | otherwise = "(" ++ prt con ++ prtBefore " " args ++ ")" + prt (TVar var) = "?" ++ prt var + +instance (Print c, Print t) => Print (LinType c t) where + prt (RecT rec) = "{" ++ prtPairList ":" "; " rec ++ "}" + prt (TblT ts t2) = "([" ++ prtSep "|" ts ++ "] => " ++ prt t2 ++ ")" + prt (ConT ts) = "[" ++ prtSep "|" ts ++ "]" + prt (StrT) = "Str" + +instance (Print c, Print t) => Print (Term c t) where + prt (Arg n c p) = prt c ++ prt n ++ prt p + prt (c :^ []) = prt c + prt (c :^ ts) = "(" ++ prt c ++ prtBefore " " ts ++ ")" + prt (Rec rec) = "{" ++ prtPairList "=" "; " rec ++ "}" + prt (Tbl tbl) = "[" ++ prtPairList "=>" "; " tbl ++ "]" + prt (Variants ts) = "{| " ++ prtSep " | " ts ++ " |}" + prt (t1 :++ t2) = prt t1 ++ "++" ++ prt t2 + prt (Token t) = "'" ++ prt t ++ "'" + prt (Empty) = "[]" + prt (term :. lbl) = prt term ++ "." ++ prt lbl + prt (term :! sel) = prt term ++ "!" ++ prt sel +-- prt (Wildcard) = "_" +-- prt (Var var) = "?" ++ prt var + +instance (Print c, Print t) => Print (Path c t) where + prt (Path path) = concatMap prtEither (reverse path) + where prtEither (Left lbl) = "." ++ prt lbl + prtEither (Right patt) = "!" ++ prt patt diff --git a/src-3.0/GF/Formalism/Utilities.hs b/src-3.0/GF/Formalism/Utilities.hs new file mode 100644 index 000000000..d1826d095 --- /dev/null +++ b/src-3.0/GF/Formalism/Utilities.hs @@ -0,0 +1,423 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/05/13 12:40:19 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.6 $ +-- +-- Basic type declarations and functions for grammar formalisms +----------------------------------------------------------------------------- + + +module GF.Formalism.Utilities where + +import Control.Monad +import Data.Array +import Data.List (groupBy) + +import GF.Data.SortedList +import GF.Data.Assoc +import GF.Data.Utilities (sameLength, foldMerge, splitBy) + +import GF.Infra.PrintClass + +------------------------------------------------------------ +-- * symbols + +data Symbol c t = Cat c | Tok t + deriving (Eq, Ord, Show) + +symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a +symbol fc ft (Cat cat) = fc cat +symbol fc ft (Tok tok) = ft tok + +mapSymbol :: (c -> d) -> (t -> u) -> Symbol c t -> Symbol d u +mapSymbol fc ft = symbol (Cat . fc) (Tok . ft) + +filterCats :: [Symbol c t] -> [c] +filterCats syms = [ cat | Cat cat <- syms ] + +filterToks :: [Symbol c t] -> [t] +filterToks syms = [ tok | Tok tok <- syms ] + +------------------------------------------------------------ +-- * edges + +data Edge s = Edge Int Int s + deriving (Eq, Ord, Show) + +instance Functor Edge where + fmap f (Edge i j s) = Edge i j (f s) + + +------------------------------------------------------------ +-- * representaions of input tokens + +data Input t = MkInput { inputEdges :: [Edge t], + inputBounds :: (Int, Int), + inputFrom :: Array Int (Assoc t [Int]), + inputTo :: Array Int (Assoc t [Int]), + inputToken :: Assoc t [(Int, Int)] + } + +makeInput :: Ord t => [Edge t] -> Input t +input :: Ord t => [t] -> Input t +inputMany :: Ord t => [[t]] -> Input t + +instance Show t => Show (Input t) where + show input = "makeInput " ++ show (inputEdges input) + +---------- + +makeInput inEdges | null inEdges = input [] + | otherwise = MkInput inEdges inBounds inFrom inTo inToken + where inBounds = foldr1 minmax [ (i, j) | Edge i j _ <- inEdges ] + where minmax (a, b) (a', b') = (min a a', max b b') + inFrom = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds $ + [ (i, [(tok, j)]) | Edge i j tok <- inEdges ] + inTo = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds + [ (j, [(tok, i)]) | Edge i j tok <- inEdges ] + inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ] + +input toks = MkInput inEdges inBounds inFrom inTo inToken + where inEdges = zipWith3 Edge [0..] [1..] toks + inBounds = (0, length toks) + inFrom = listArray inBounds $ + [ listAssoc [(tok, [j])] | (tok, j) <- zip toks [1..] ] ++ [ listAssoc [] ] + inTo = listArray inBounds $ + [ listAssoc [] ] ++ [ listAssoc [(tok, [i])] | (tok, i) <- zip toks [0..] ] + inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ] + +inputMany toks = MkInput inEdges inBounds inFrom inTo inToken + where inEdges = [ Edge i j t | (i, j, ts) <- zip3 [0..] [1..] toks, t <- ts ] + inBounds = (0, length toks) + inFrom = listArray inBounds $ + [ listAssoc [ (t, [j]) | t <- nubsort ts ] | (ts, j) <- zip toks [1..] ] + ++ [ listAssoc [] ] + inTo = listArray inBounds $ + [ listAssoc [] ] ++ + [ listAssoc [ (t, [i]) | t <- nubsort ts ] | (ts, i) <- zip toks [0..] ] + inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ] + + +------------------------------------------------------------ +-- * representations of syntactical analyses + +-- ** charts as finite maps over edges + +-- | The values of the chart, a list of key-daughters pairs, +-- has unique keys. In essence, it is a map from 'n' to daughters. +-- The daughters should be a set (not necessarily sorted) of rhs's. +type SyntaxChart n e = Assoc e [SyntaxNode n [e]] + +data SyntaxNode n e = SMeta + | SNode n [e] + | SString String + | SInt Integer + | SFloat Double + deriving (Eq,Ord) + +groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]] +groupSyntaxNodes [] = [] +groupSyntaxNodes (SNode n0 es0:xs) = (SNode n0 (es0:ess)) : groupSyntaxNodes xs' + where + (ess,xs') = span xs + + span [] = ([],[]) + span xs@(SNode n es:xs') + | n0 == n = let (ess,xs) = span xs' in (es:ess,xs) + | otherwise = ([],xs) +groupSyntaxNodes (SString s:xs) = (SString s) : groupSyntaxNodes xs +groupSyntaxNodes (SInt n:xs) = (SInt n) : groupSyntaxNodes xs +groupSyntaxNodes (SFloat f:xs) = (SFloat f) : groupSyntaxNodes xs + +-- better(?) representation of forests: +-- data Forest n = F (SMap n (SList [Forest n])) Bool +-- == +-- type Forest n = GeneralTrie n (SList [Forest n]) Bool +-- (the Bool == isMeta) + +-- ** syntax forests + +data SyntaxForest n = FMeta + | FNode n [[SyntaxForest n]] + -- ^ The outer list should be a set (not necessarily sorted) + -- of possible alternatives. Ie. the outer list + -- is a disjunctive node, and the inner lists + -- are (conjunctive) concatenative nodes + | FString String + | FInt Integer + | FFloat Double + deriving (Eq, Ord, Show) + +instance Functor SyntaxForest where + fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests + fmap _ (FString s) = FString s + fmap _ (FInt n) = FInt n + fmap _ (FFloat f) = FFloat f + fmap _ (FMeta) = FMeta + +forestName :: SyntaxForest n -> Maybe n +forestName (FNode n _) = Just n +forestName _ = Nothing + +unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n) +unifyManyForests = foldM unifyForests FMeta + +-- | two forests can be unified, if either is 'FMeta', or both have the same parent, +-- and all children can be unified +unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n) +unifyForests FMeta forest = return forest +unifyForests forest FMeta = return forest +unifyForests (FNode name1 children1) (FNode name2 children2) + | name1 == name2 && not (null children) = return $ FNode name1 children + where children = [ forests | forests1 <- children1, forests2 <- children2, + sameLength forests1 forests2, + forests <- zipWithM unifyForests forests1 forests2 ] +unifyForests (FString s1) (FString s2) + | s1 == s2 = return $ FString s1 +unifyForests (FInt n1) (FInt n2) + | n1 == n2 = return $ FInt n1 +unifyForests (FFloat f1) (FFloat f2) + | f1 == f2 = return $ FFloat f1 +unifyForests _ _ = fail "forest unification failure" + +{- måste tänka mer på detta: +compactForests :: Ord n => [SyntaxForest n] -> SList (SyntaxForest n) +compactForests = map joinForests . groupBy eqNames . sortForests + where eqNames f g = forestName f == forestName g + sortForests = foldMerge mergeForests [] . map return + mergeForests [] gs = gs + mergeForests fs [] = fs + mergeForests fs@(f:fs') gs@(g:gs') + = case forestName f `compare` forestName g of + LT -> f : mergeForests fs' gs + GT -> g : mergeForests fs gs' + EQ -> f : g : mergeForests fs' gs' + joinForests fs = case forestName (head fs) of + Nothing -> FMeta + Just name -> FNode name $ + compactDaughters $ + concat [ fss | FNode _ fss <- fs ] + compactDaughters fss = case head fss of + [] -> [[]] + [_] -> map return $ compactForests $ concat fss + _ -> nubsort fss +-} + +-- ** syntax trees + +data SyntaxTree n = TMeta + | TNode n [SyntaxTree n] + | TString String + | TInt Integer + | TFloat Double + deriving (Eq, Ord, Show) + +instance Functor SyntaxTree where + fmap f (TNode n trees) = TNode (f n) $ map (fmap f) trees + fmap _ (TString s) = TString s + fmap _ (TInt n) = TInt n + fmap _ (TFloat f) = TFloat f + fmap _ (TMeta) = TMeta + +treeName :: SyntaxTree n -> Maybe n +treeName (TNode n _) = Just n +treeName (TMeta) = Nothing + +unifyManyTrees :: (Monad m, Eq n) => [SyntaxTree n] -> m (SyntaxTree n) +unifyManyTrees = foldM unifyTrees TMeta + +-- | two trees can be unified, if either is 'TMeta', +-- or both have the same parent, and their children can be unified +unifyTrees :: (Monad m, Eq n) => SyntaxTree n -> SyntaxTree n -> m (SyntaxTree n) +unifyTrees TMeta tree = return tree +unifyTrees tree TMeta = return tree +unifyTrees (TNode name1 children1) (TNode name2 children2) + | name1 == name2 && sameLength children1 children2 + = liftM (TNode name1) $ zipWithM unifyTrees children1 children2 +unifyTrees (TString s1) (TString s2) + | s1 == s2 = return (TString s1) +unifyTrees (TInt n1) (TInt n2) + | n1 == n2 = return (TInt n1) +unifyTrees (TFloat f1) (TFloat f2) + | f1 == f2 = return (TFloat f1) +unifyTrees _ _ = fail "tree unification failure" + +-- ** conversions between representations + +chart2forests :: (Ord n, Ord e) => + SyntaxChart n e -- ^ The complete chart + -> (e -> Bool) -- ^ When is an edge 'FMeta'? + -> [e] -- ^ The starting edges + -> SList (SyntaxForest n) -- ^ The result has unique keys, ie. all 'n' are joined together. + -- In essence, the result is a map from 'n' to forest daughters + +-- simplest implementation + +chart2forests chart isMeta = concatMap (edge2forests []) + where edge2forests edges edge + | isMeta edge = [FMeta] + | edge `elem` edges = [] + | otherwise = map (item2forest (edge:edges)) $ chart ? edge + item2forest edges (SMeta) = FMeta + item2forest edges (SNode name children) = + FNode name $ children >>= mapM (edge2forests edges) + item2forest edges (SString s) = FString s + item2forest edges (SInt n) = FInt n + item2forest edges (SFloat f) = FFloat f + +{- -before AR inserted peb's patch 8/7/2007, this was: + +chart2forests chart isMeta = concatMap edge2forests + where edge2forests edge = if isMeta edge then [FMeta] + else map item2forest $ chart ? edge + item2forest (SMeta) = FMeta + item2forest (SNode name children) = FNode name $ children >>= mapM edge2forests + item2forest (SString s) = FString s + item2forest (SInt n) = FInt n + item2forest (SFloat f) = FFloat f + +-} + +{- +-- more intelligent(?) implementation, +-- requiring that charts and forests are sorted maps and sorted sets +chart2forests chart isMeta = es2fs + where e2fs e = if isMeta e then [FMeta] else map i2f $ chart ? e + es2fs es = if null metas then fs else FMeta : fs + where (metas, nonMetas) = splitBy isMeta es + fs = map i2f $ unionMap (<++>) $ map (chart ?) nonMetas + i2f (name, children) = FNode name $ + case head children of + [] -> [[]] + [_] -> map return $ es2fs $ concat children + _ -> children >>= mapM e2fs +-} + + +forest2trees :: SyntaxForest n -> SList (SyntaxTree n) +forest2trees (FNode n forests) = map (TNode n) $ forests >>= mapM forest2trees +forest2trees (FString s) = [TString s] +forest2trees (FInt n) = [TInt n] +forest2trees (FFloat f) = [TFloat f] +forest2trees (FMeta) = [TMeta] + +---------------------------------------------------------------------- +-- * profiles + +-- | Pairing a rule name with a profile +data NameProfile a = Name a [Profile (SyntaxForest a)] + deriving (Eq, Ord, Show) + +name2fun :: NameProfile a -> a +name2fun (Name fun _) = fun + +-- | A profile is a simple representation of a function on a number of arguments. +-- We only use lists of profiles +data Profile a = Unify [Int] -- ^ The Int's are the argument positions. + -- 'Unify []' will become a metavariable, + -- 'Unify [a,b]' means that the arguments are equal, + | Constant a + deriving (Eq, Ord, Show) + +instance Functor Profile where + fmap f (Constant a) = Constant (f a) + fmap f (Unify xs) = Unify xs + +-- | a function name where the profile does not contain arguments +-- (i.e. denoting a constant, not a function) +constantNameToForest :: NameProfile a -> SyntaxForest a +constantNameToForest name@(Name fun profile) = FNode fun [map unConstant profile] + where unConstant (Constant a) = a + unConstant (Unify []) = FMeta + unConstant _ = error $ "constantNameToForest: the profile should not contain arguments" + +-- | profile application; we need some way of unifying a list of arguments +applyProfile :: ([b] -> a) -> [Profile a] -> [b] -> [a] +applyProfile unify profile args = map apply profile + where apply (Unify xs) = unify $ map (args !!) xs + apply (Constant a) = a + +-- | monadic profile application +applyProfileM :: Monad m => ([b] -> m a) -> [Profile a] -> [b] -> m [a] +applyProfileM unify profile args = mapM apply profile + where apply (Unify xs) = unify $ map (args !!) xs + apply (Constant a) = return a + +-- | profile composition: +-- +-- > applyProfile u z (ps `composeProfiles` qs) args +-- > == +-- > applyProfile u z ps (applyProfile u z qs args) +-- +-- compare with function composition +-- +-- > (p . q) arg +-- > == +-- > p (q arg) +-- +-- Note that composing an 'Constant' with two or more arguments returns an error +-- (since 'Unify' can only take arguments) -- this might change in the future, if there is a need. +composeProfiles :: [Profile a] -> [Profile a] -> [Profile a] +composeProfiles ps qs = map compose ps + where compose (Unify [x]) = qs !! x + compose (Unify xs) = Unify [ y | x <- xs, let Unify ys = qs !! x, y <- ys ] + compose constant = constant + + + +------------------------------------------------------------ +-- pretty-printing + +instance (Print c, Print t) => Print (Symbol c t) where + prt = symbol prt (simpleShow . prt) + where simpleShow str = "\"" ++ concatMap mkEsc str ++ "\"" + mkEsc '\\' = "\\\\" + mkEsc '\"' = "\\\"" + mkEsc '\n' = "\\n" + mkEsc '\t' = "\\t" + mkEsc chr = [chr] + prtList = prtSep " " + +instance Print t => Print (Input t) where + prt input = "input " ++ prt (inputEdges input) + +instance (Print s) => Print (Edge s) where + prt (Edge i j s) = "[" ++ show i ++ "-" ++ show j ++ ": " ++ prt s ++ "]" + prtList = prtSep "" + +instance (Print s) => Print (SyntaxTree s) where + prt (TNode s trees) + | null trees = prt s + | otherwise = "(" ++ prt s ++ prtBefore " " trees ++ ")" + prt (TString s) = show s + prt (TInt n) = show n + prt (TFloat f) = show f + prt (TMeta) = "?" + prtList = prtAfter "\n" + +instance (Print s) => Print (SyntaxForest s) where + prt (FNode s []) = "(" ++ prt s ++ " - ERROR: null forests)" + prt (FNode s [[]]) = prt s + prt (FNode s [forests]) = "(" ++ prt s ++ prtBefore " " forests ++ ")" + prt (FNode s children) = "{" ++ prtSep " | " [ prt s ++ prtBefore " " forests | + forests <- children ] ++ "}" + prt (FString s) = show s + prt (FInt n) = show n + prt (FFloat f) = show f + prt (FMeta) = "?" + prtList = prtAfter "\n" + +instance Print a => Print (Profile a) where + prt (Unify []) = "?" + prt (Unify args) = prtSep "=" args + prt (Constant a) = prt a + +instance Print a => Print (NameProfile a) where + prt (Name fun profile) = prt fun ++ prt profile + + |
