diff options
| author | aarne <aarne@cs.chalmers.se> | 2008-06-25 16:54:35 +0000 |
|---|---|---|
| committer | aarne <aarne@cs.chalmers.se> | 2008-06-25 16:54:35 +0000 |
| commit | e9e80fc389365e24d4300d7d5390c7d833a96c50 (patch) | |
| tree | f0b58473adaa670bd8fc52ada419d8cad470ee03 /src/GF/Data/Str.hs | |
| parent | b96b36f43de3e2f8b58d5f539daa6f6d47f25870 (diff) | |
changed names of resource-1.3; added a note on homepage on release
Diffstat (limited to 'src/GF/Data/Str.hs')
| -rw-r--r-- | src/GF/Data/Str.hs | 134 |
1 files changed, 134 insertions, 0 deletions
diff --git a/src/GF/Data/Str.hs b/src/GF/Data/Str.hs new file mode 100644 index 000000000..6f65764c7 --- /dev/null +++ b/src/GF/Data/Str.hs @@ -0,0 +1,134 @@ +---------------------------------------------------------------------- +-- | +-- Module : Str +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/21 16:22:09 $ +-- > CVS $Author: bringert $ +-- > CVS $Revision: 1.8 $ +-- +-- (Description of the module) +----------------------------------------------------------------------------- + +module GF.Data.Str ( + Str (..), Tok (..), --- constructors needed in PrGrammar + str2strings, str2allStrings, str, sstr, sstrV, + isZeroTok, prStr, plusStr, glueStr, + strTok, + allItems +) where + +import GF.Data.Operations +import Data.List (isPrefixOf, isSuffixOf, intersperse) + +-- | abstract token list type. AR 2001, revised and simplified 20\/4\/2003 +newtype Str = Str [Tok] deriving (Read, Show, Eq, Ord) + +-- | notice that having both pre and post would leave to inconsistent situations: +-- +-- > pre {"x" ; "y" / "a"} ++ post {"b" ; "a" / "x"} +-- +-- always violates a condition expressed by the one or the other +data Tok = + TK String + | TN Ss [(Ss, [String])] -- ^ variants depending on next string +--- | TP Ss [(Ss, [String])] -- variants depending on previous string + deriving (Eq, Ord, Show, Read) + + +-- | a variant can itself be a token list, but for simplicity only a list of strings +-- i.e. not itself containing variants +type Ss = [String] + +-- matching functions in both ways + +matchPrefix :: Ss -> [(Ss,[String])] -> [String] -> Ss +matchPrefix s vs t = + head $ [u | + (u,as) <- vs, + any (\c -> isPrefixOf c (concat (unmarkup t))) as + ] ++ [s] + +matchSuffix :: String -> Ss -> [(Ss,[String])] -> Ss +matchSuffix t s vs = + head ([u | (u,as) <- vs, any (\c -> isSuffixOf c t) as] ++ [s]) + +unmarkup :: [String] -> [String] +unmarkup = filter (not . isXMLtag) where + isXMLtag s = case s of + '<':cs@(_:_) -> last cs == '>' + _ -> False + +str2strings :: Str -> Ss +str2strings (Str st) = alls st where + alls st = case st of + TK s : ts -> s : alls ts + TN ds vs : ts -> matchPrefix ds vs t ++ t where t = alls ts +---- u :TP ds vs: ts -> [u] ++ matchSuffix u ds vs ++ alls ts + [] -> [] + +str2allStrings :: Str -> [Ss] +str2allStrings (Str st) = alls st where + alls st = case st of + TK s : ts -> [s : t | t <- alls ts] + TN ds vs : [] -> [ds ++ v | v <- map fst vs] + TN ds vs : ts -> [matchPrefix ds vs t ++ t | t <- alls ts] + [] -> [[]] + +sstr :: Str -> String +sstr = unwords . str2strings + +-- | to handle a list of variants +sstrV :: [Str] -> String +sstrV ss = case ss of + [] -> "*" + _ -> unwords $ intersperse "/" $ map (unwords . str2strings) ss + +str :: String -> Str +str s = if null s then Str [] else Str [itS s] + +itS :: String -> Tok +itS s = TK s + +isZeroTok :: Str -> Bool +isZeroTok t = case t of + Str [] -> True + Str [TK []] -> True + _ -> False + +strTok :: Ss -> [(Ss,[String])] -> Str +strTok ds vs = Str [TN ds vs] + +prStr :: Str -> String +prStr = prQuotedString . sstr + +plusStr :: Str -> Str -> Str +plusStr (Str ss) (Str tt) = Str (ss ++ tt) + +glueStr :: Str -> Str -> Str +glueStr (Str ss) (Str tt) = Str $ case (ss,tt) of + ([],_) -> tt + (_,[]) -> ss + _ -> init ss ++ glueIt (last ss) (head tt) ++ tail tt + where + glueIt t u = case (t,u) of + (TK s, TK s') -> return $ TK $ s ++ s' + (TN ds vs, TN es ws) -> return $ TN (glues (matchPrefix ds vs es) es) + [(glues (matchPrefix ds vs w) w,cs) | (w,cs) <- ws] + (TN ds vs, TK s) -> map TK $ glues (matchPrefix ds vs [s]) [s] + (TK s, TN es ws) -> return $ TN (glues [s] es) [(glues [s] w, c) | (w,c) <- ws] + +glues :: [[a]] -> [[a]] -> [[a]] +glues ss tt = case (ss,tt) of + ([],_) -> tt + (_,[]) -> ss + _ -> init ss ++ [last ss ++ head tt] ++ tail tt + +-- | to create the list of all lexical items +allItems :: Str -> [String] +allItems (Str s) = concatMap allOne s where + allOne t = case t of + TK s -> [s] + TN ds vs -> ds ++ concatMap fst vs |
