summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoraarne <unknown>2003-11-10 07:55:45 +0000
committeraarne <unknown>2003-11-10 07:55:45 +0000
commit249d506f58a8b5f8ef87295ab3dde2d13ddd3885 (patch)
tree658679a324c4f360901c38637464e8f63b59515d
parent4c99687f217ce258f821d55e68f5403233f6dea7 (diff)
Morphological analysis and glueing.
-rw-r--r--src/GF/Canon/CMacros.hs12
-rw-r--r--src/GF/Canon/MkGFC.hs3
-rw-r--r--src/GF/Compile/GetGrammar.hs3
-rw-r--r--src/GF/Compile/Rename.hs4
-rw-r--r--src/GF/Compile/ShellState.hs2
-rw-r--r--src/GF/Grammar/PrGrammar.hs18
-rw-r--r--src/GF/Source/AbsGF.hs2
-rw-r--r--src/GF/Source/PrintGF.hs2
-rw-r--r--src/GF/Source/SourceToGrammar.hs2
-rw-r--r--src/GF/UseGrammar/Custom.hs18
-rw-r--r--src/GF/UseGrammar/Linear.hs5
-rw-r--r--src/GF/UseGrammar/Morphology.hs45
-rw-r--r--src/Today.hs2
13 files changed, 84 insertions, 34 deletions
diff --git a/src/GF/Canon/CMacros.hs b/src/GF/Canon/CMacros.hs
index 17433e48b..d32f639b4 100644
--- a/src/GF/Canon/CMacros.hs
+++ b/src/GF/Canon/CMacros.hs
@@ -152,3 +152,15 @@ redirectIdent n f@(CIQ _ c) = CIQ n c
ciq n f = CIQ n f
+wordsInTerm :: Term -> [String]
+wordsInTerm trm = filter (not . null) $ case trm of
+ K (KS s) -> [s]
+ S c _ -> wo c
+ R rs -> concat [wo t | Ass _ t <- rs]
+ T _ cs -> concat [wo t | Cas _ t <- cs]
+ C s t -> wo s ++ wo t
+ FV ts -> concatMap wo ts
+ K (KP ss vs) -> ss ++ concat [s ++ t | Var s t <- vs]
+ P t _ -> wo t --- not needed ?
+ _ -> []
+ where wo = wordsInTerm
diff --git a/src/GF/Canon/MkGFC.hs b/src/GF/Canon/MkGFC.hs
index 7547280a9..8f1e46b21 100644
--- a/src/GF/Canon/MkGFC.hs
+++ b/src/GF/Canon/MkGFC.hs
@@ -12,6 +12,9 @@ import qualified Modules as M
prCanonModInfo :: CanonModule -> String
prCanonModInfo = prt . info2mod
+prCanon :: CanonGrammar -> String
+prCanon = unlines . map prCanonModInfo . M.modules
+
canon2grammar :: Canon -> CanonGrammar
canon2grammar (Gr modules) = M.MGrammar $ map mod2info modules where
mod2info m = case m of
diff --git a/src/GF/Compile/GetGrammar.hs b/src/GF/Compile/GetGrammar.hs
index a9a40ee06..32efb960b 100644
--- a/src/GF/Compile/GetGrammar.hs
+++ b/src/GF/Compile/GetGrammar.hs
@@ -79,4 +79,5 @@ oldLexer = map change . L.tokens where
(L.PT p (L.TS s)) | elem s new -> (L.PT p (L.TV (init s ++ "Z")))
_ -> t
new = words $ "abstract concrete interface incomplete " ++
- "instance out open resource reuse transfer with"
+ "instance out open resource reuse transfer with where"
+
diff --git a/src/GF/Compile/Rename.hs b/src/GF/Compile/Rename.hs
index 49e08ab6e..3a0bf5c52 100644
--- a/src/GF/Compile/Rename.hs
+++ b/src/GF/Compile/Rename.hs
@@ -76,11 +76,11 @@ renameIdentTerm :: Status -> Term -> Err Term
renameIdentTerm env@(act,imps) t =
errIn ("atomic term" +++ prt t +++ "given" +++ unwords (map (prt . fst) qualifs)) $
case t of
+ Vr (IC "Int") -> return $ Q cPredefAbs cInt -- Int and String are predefined cats
+ Vr (IC "String") -> return $ Q cPredefAbs cString
Vr c -> do
f <- lookupTreeMany prt opens c
return $ f c
- Vr (IC "Int") -> return $ Q cPredefAbs cInt -- Int and String are predefined cats
- Vr (IC "String") -> return $ Q cPredefAbs cString
Cn c -> do
f <- lookupTreeMany prt opens c
return $ f c
diff --git a/src/GF/Compile/ShellState.hs b/src/GF/Compile/ShellState.hs
index ad1566f1f..4d0c8b260 100644
--- a/src/GF/Compile/ShellState.hs
+++ b/src/GF/Compile/ShellState.hs
@@ -133,7 +133,7 @@ updateShellState opts sh (gr,(sgr,rts)) = do
canModules = cgr,
srcModules = src,
cfs = zip concrs cfs,
- morphos = zip concrs (repeat emptyMorpho),
+ morphos = zip concrs (map (mkMorpho cgr) concrs),
gloptions = options (M.allFlags src), ---- canModules
readFiles = [ft | ft@(f,_) <- readFiles sh, notInrts f] ++ rts,
absCats = csi,
diff --git a/src/GF/Grammar/PrGrammar.hs b/src/GF/Grammar/PrGrammar.hs
index 607b766da..2b5648d8a 100644
--- a/src/GF/Grammar/PrGrammar.hs
+++ b/src/GF/Grammar/PrGrammar.hs
@@ -62,15 +62,20 @@ prContext co = unwords $ map prParenth [prt x +++ ":" +++ prt t | (x,t) <- co]
instance Print A.Exp where prt = C.printTree
instance Print A.Term where prt = C.printTree
-instance Print A.Patt where prt = C.printTree
instance Print A.Case where prt = C.printTree
instance Print A.Atom where prt = C.printTree
-instance Print A.CIdent where prt = C.printTree
instance Print A.CType where prt = C.printTree
instance Print A.Label where prt = C.printTree
instance Print A.Module where prt = C.printTree
instance Print A.Sort where prt = C.printTree
+instance Print A.Patt where
+ prt = C.printTree
+ prt_ = prPatt
+
+instance Print A.CIdent where
+ prt = C.printTree
+ prt_ (A.CIQ _ c) = prt c
-- printing values and trees in editing
@@ -183,6 +188,15 @@ prExp e = case e of
App _ _ -> prParenth $ prExp e
_ -> pr1 e
+prPatt :: A.Patt -> String
+prPatt p = case p of
+ A.PC c ps -> prt_ c +++ unwords (map pr1 ps)
+ _ -> prt p --- PR
+ where
+ pr1 p = case p of
+ A.PC _ (_:_) -> prParenth $ prPatt p
+ _ -> prPatt p
+
-- option -strip strips qualifications
prTermOpt opts = if oElem nostripQualif opts then prt else prExp
diff --git a/src/GF/Source/AbsGF.hs b/src/GF/Source/AbsGF.hs
index 8acf35349..28313c5ce 100644
--- a/src/GF/Source/AbsGF.hs
+++ b/src/GF/Source/AbsGF.hs
@@ -172,6 +172,8 @@ data Exp =
| EConcat Exp Exp
| EGlue Exp Exp
| ELet [LocDef] Exp
+ | ELetb [LocDef] Exp
+ | EWhere Exp [LocDef]
| EEqs [Equation]
| ELString LString
| ELin Ident
diff --git a/src/GF/Source/PrintGF.hs b/src/GF/Source/PrintGF.hs
index b406f1935..e4238ceff 100644
--- a/src/GF/Source/PrintGF.hs
+++ b/src/GF/Source/PrintGF.hs
@@ -321,6 +321,8 @@ instance Print Exp where
EConcat exp0 exp -> prPrec i 0 (concat [prt 1 exp0 , ["++"] , prt 0 exp])
EGlue exp0 exp -> prPrec i 0 (concat [prt 1 exp0 , ["+"] , prt 0 exp])
ELet locdefs exp -> prPrec i 0 (concat [["let"] , ["{"] , prt 0 locdefs , ["}"] , ["in"] , prt 0 exp])
+ ELetb locdefs exp -> prPrec i 0 (concat [["let"] , prt 0 locdefs , ["in"] , prt 0 exp])
+ EWhere exp locdefs -> prPrec i 0 (concat [prt 1 exp , ["where"] , ["{"] , prt 0 locdefs , ["}"]])
EEqs equations -> prPrec i 0 (concat [["fn"] , ["{"] , prt 0 equations , ["}"]])
ELString lstring -> prPrec i 4 (concat [prt 0 lstring])
ELin id -> prPrec i 2 (concat [["Lin"] , prt 0 id])
diff --git a/src/GF/Source/SourceToGrammar.hs b/src/GF/Source/SourceToGrammar.hs
index 4c4bc93a6..53681104c 100644
--- a/src/GF/Source/SourceToGrammar.hs
+++ b/src/GF/Source/SourceToGrammar.hs
@@ -357,6 +357,8 @@ transExp x = case x of
where
tryLoc (c,(mty,Just e)) = return (c,(mty,e))
tryLoc (c,_) = Bad $ "local definition of" +++ GP.prt c +++ "without value"
+ ELetb defs exp -> transExp $ ELet defs exp
+ EWhere exp defs -> transExp $ ELet defs exp
ELString (LString str) -> return $ G.K str
ELin id -> liftM G.LiT $ transIdent id
diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs
index 1048aab95..4d5eb8122 100644
--- a/src/GF/UseGrammar/Custom.hs
+++ b/src/GF/UseGrammar/Custom.hs
@@ -21,11 +21,13 @@ import CFIdent
---- import CFtoGrammar
import PPrCF
+import PrLBNF
import PrGrammar
+import MkGFC
import Zipper
-----import Morphology
+import Morphology
-----import GrammarToHaskell
-----import GrammarToCanon (showCanon, showCanonOpt)
-----import qualified GrammarToGFC as GFC
@@ -141,16 +143,16 @@ customGrammarParser =
customGrammarPrinter =
customData "Grammar printers, selected by option -printer=x" $
[
----- (strCI "gf", prt) -- DEFAULT
- (strCI "cf", prCF . stateCF)
-
+ (strCI "gfc", prCanon . stateGrammarST) -- DEFAULT
+ ,(strCI "cf", prCF . stateCF)
+ ,(strCI "lbnf", prLBNF . stateCF)
+ ,(strCI "morpho", prMorpho . stateMorpho)
+ ,(strCI "opts", prOpts . stateOptions)
{- ----
(strCI "gf", prt . st2grammar . stateGrammarST) -- DEFAULT
,(strCI "canon", showCanon "Lang" . stateGrammarST)
,(strCI "gfc", GFC.showGFC . stateGrammarST)
,(strCI "canonOpt",showCanonOpt "Lang" . stateGrammarST)
- ,(strCI "morpho", prMorpho . stateMorpho)
- ,(strCI "opts", prOpts . stateOptions)
-}
-- add your own grammar printers here
--- also include printing via grammar2syntax!
@@ -236,6 +238,7 @@ customTokenizer =
,(strCI "chars", const $ map (tS . singleton))
,(strCI "code", const $ lexHaskell)
,(strCI "text", const $ lexText)
+ ,(strCI "unglue", \gr -> map tS . decomposeWords (stateMorpho gr))
---- ,(strCI "codelit", lexHaskellLiteral . stateIsWord)
---- ,(strCI "textlit", lexTextLiteral . stateIsWord)
,(strCI "codeC", const $ lexC2M)
@@ -253,7 +256,8 @@ customUntokenizer =
,(strCI "textlit", const $ formatAsTextLit)
,(strCI "codelit", const $ formatAsCodeLit)
,(strCI "concat", const $ concat . words)
- ,(strCI "bind", const $ performBinds)
+ ,(strCI "glue", const $ performBinds)
+ ,(strCI "bind", const $ performBinds) -- backward compat
-- add your own untokenizers here
]
++ moreCustomUntokenizer
diff --git a/src/GF/UseGrammar/Linear.hs b/src/GF/UseGrammar/Linear.hs
index a46200b36..c439d62b2 100644
--- a/src/GF/UseGrammar/Linear.hs
+++ b/src/GF/UseGrammar/Linear.hs
@@ -159,7 +159,7 @@ linearizeToStrss gr mk e = do
R rs <- linearizeToRecord gr mk e ----
t <- lookupErr linLab0 [(r,s) | Ass r s <- rs]
return $ map strsFromTerm $ allInTable t
-
+-}
-- the value is a list of strings, not forgetting their arguments
allLinsOfFun :: CanonGrammar -> CIdent -> Err [[(Label,[([Patt],Term)])]]
@@ -168,9 +168,6 @@ allLinsOfFun gr f = do
allLinValues t
-
--}
-
-- returns printname if one exists; otherwise linearizes with metas
printOrLinearize :: CanonGrammar -> Ident -> A.Fun -> String
diff --git a/src/GF/UseGrammar/Morphology.hs b/src/GF/UseGrammar/Morphology.hs
index 102e41340..c8f00615a 100644
--- a/src/GF/UseGrammar/Morphology.hs
+++ b/src/GF/UseGrammar/Morphology.hs
@@ -3,8 +3,14 @@ module Morphology where
import AbsGFC
import GFC
import PrGrammar
+import CMacros
+import LookAbs
+import Ident
+import qualified Macros as M
+import Linear
import Operations
+import Glue
import Char
import List (sortBy, intersperse)
@@ -40,35 +46,33 @@ appMorphoOnly m s = (s, ms) where
isKnownWord :: Morpho -> String -> Bool
isKnownWord mo = not . null . snd . appMorphoOnly mo
-mkMorpho :: CanonGrammar -> Morpho
-mkMorpho gr = emptyMorpho ----
-{- ----
-mkMorpho gr = mkMorphoTree $ concat $ map mkOne $ allItems where
+mkMorpho :: CanonGrammar -> Ident -> Morpho
+---- mkMorpho gr = emptyMorpho ----
+mkMorpho gr a = mkMorphoTree $ concat $ map mkOne $ allItems where
+
mkOne (Left (fun,c)) = map (prOne fun c) $ allLins fun
mkOne (Right (fun,_)) = map (prSyn fun) $ allSyns fun
-- gather forms of lexical items
- allLins fun = errVal [] $ do
- ts <- allLinsOfFun gr fun
+ allLins fun@(m,f) = errVal [] $ do
+ ts <- allLinsOfFun gr (CIQ a f)
ss <- mapM (mapPairsM (mapPairsM (return . wordsInTerm))) ts
return [(p,s) | (p,fs) <- concat $ map snd $ concat ss, s <- fs]
- prOne f c (ps,s) = (s, prt f +++ tagPrt c ++ concat (map tagPrt ps))
+ prOne (_,f) c (ps,s) = (s, prt f +++ tagPrt c ++ concat (map prt_ ps))
-- gather syncategorematic words
- allSyns fun = errVal [] $ do
- tss <- allLinsOfFun gr fun
+ allSyns fun@(m,f) = errVal [] $ do
+ tss <- allLinsOfFun gr (CIQ a f)
let ss = [s | ts <- tss, (_,fs) <- ts, (_,s) <- fs]
return $ concat $ map wordsInTerm ss
prSyn f s = (s, "+<syncategorematic>" ++ tagPrt f)
-- all words, Left from lexical rules and Right syncategorematic
- allItems = [lexRole t (f,c) | (f,c) <- allFuns, t <- lookType f] where
- allFuns = allFunsWithValCat ab
- lookType = errVal [] . liftM (:[]) . lookupFunType ab
- lexRole t = case typeForm t of
+ allItems = [lexRole t (f,c) | (f,c,t) <- allFuns] where
+ allFuns = [(f,c,t) | (f,t) <- funRulesOf gr, Ok c <- [M.valCat t]]
+ lexRole t = case M.typeForm t of
Ok ([],_,_) -> Left
_ -> Right
--}
-- printing full-form lexicon and results
@@ -82,8 +86,8 @@ prMorphoAnalysisShort :: (String,[String]) -> String
prMorphoAnalysisShort (w,fs) = prBracket (w' ++ prTList "/" fs) where
w' = if null fs then w +++ "*" else ""
-tagPrt :: Print a => a -> String
-tagPrt = ("+" ++) . prt --- could look up print name in grammar
+tagPrt :: Print a => (a,a) -> String
+tagPrt (m,c) = "+" ++ prt c --- module name
-- print all words recognized
@@ -99,6 +103,15 @@ prFullForm :: Morpho -> String
prFullForm = unlines . map prOne . tree2list where
prOne (s,ps) = s ++ " : " ++ unwords (intersperse "/" ps)
+-- using Huet's unglueing method to find word boundaries
+---- it would be much better to use a trie also for morphological analysis,
+---- so this is for the sake of experiment
+---- Moreover, we should specify the cases in which this happens - not all words
+
+decomposeWords :: Morpho -> String -> [String]
+decomposeWords mo s = errVal (words s) $
+ decomposeSimple (tcompileSimple (map fst $ tree2list mo)) s
+
-- auxiliaries
mkMorphoTree :: (Ord a, Eq b) => [(a,b)] -> BinTree (a,[b])
diff --git a/src/Today.hs b/src/Today.hs
index 4d0cffe81..719a3da7d 100644
--- a/src/Today.hs
+++ b/src/Today.hs
@@ -1 +1 @@
-module Today where today = "Fri Nov 7 16:15:47 CET 2003"
+module Today where today = "Mon Nov 10 09:43:09 CET 2003"