summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-06-10 16:00:42 +0000
committeraarne <aarne@cs.chalmers.se>2008-06-10 16:00:42 +0000
commit9628eb6314bfcb827142d1869626696de101e245 (patch)
tree6800242a4ce3cd45a6dda7a4eb93d9aab588e5be
parent9c75bddcfc0340b6e5dd06a23d686113532d2726 (diff)
preliminary version of morpho analysis
-rw-r--r--src-3.0/GF/Command/Commands.hs34
-rw-r--r--src-3.0/PGF/Morphology.hs46
-rw-r--r--src-3.0/PGF/ShowLinearize.hs1
3 files changed, 78 insertions, 3 deletions
diff --git a/src-3.0/GF/Command/Commands.hs b/src-3.0/GF/Command/Commands.hs
index 6aa19dfba..292e802b9 100644
--- a/src-3.0/GF/Command/Commands.hs
+++ b/src-3.0/GF/Command/Commands.hs
@@ -14,6 +14,7 @@ import PGF.CId
import PGF.ShowLinearize
import PGF.Macros
import PGF.Data ----
+import PGF.Morphology
import GF.Compile.Export
import GF.Infra.UseIO
import GF.Data.ErrM ----
@@ -60,9 +61,9 @@ commandHelp full (co,info) = unlines $ [
"flags: " ++ unwords (flags info)
] else []
--- this list must be kept sorted by the command name!
+-- this list must no more be kept sorted by the command name
allCommands :: PGF -> Map.Map String CommandInfo
-allCommands pgf = Map.fromAscList [
+allCommands pgf = Map.fromList [
("cc", emptyCommandInfo {
longname = "compute_concrete",
synopsis = "computes concrete syntax term using the source grammar",
@@ -144,15 +145,39 @@ allCommands pgf = Map.fromAscList [
options = ["all","record","table","term"],
flags = ["lang"]
}),
+
+ ("ma", emptyCommandInfo {
+ longname = "morpho_analyse",
+ synopsis = "print the morphological analyses of all words in the string",
+ explanation = unlines [
+ "Prints all the analyses of space-separated words in the input string,",
+ "using the morphological analyser of the actual grammar (see command pf)"
+ ],
+ exec = \opts ->
+ return . fromString . unlines .
+ map prMorphoAnalysis . concatMap (morphos opts) .
+ concatMap words . toStrings
+ }),
+
("p", emptyCommandInfo {
longname = "parse",
synopsis = "parse a string to abstract syntax expression",
explanation = "Shows all trees (expressions) returned for String by the actual\n"++
"grammar (overridden by the -lang flag), in the category S (overridden\n"++
"by the -cat flag).",
- exec = \opts -> return . fromTrees . concatMap (par opts). toStrings,
+ exec = \opts -> return . fromTrees . concatMap (par opts) . toStrings,
flags = ["cat","lang"]
}),
+ ("pf", emptyCommandInfo {
+ longname = "print_fullform",
+ synopsis = "print the full-form lexicon of the actual grammar",
+ explanation = unlines [
+ "Prints all the strings in the actual grammar with their possible analyses"
+ ],
+ exec = \opts _ ->
+ return $ fromString $ concatMap
+ (prFullFormLexicon . buildMorpho pgf . mkCId) $ optLangs opts
+ }),
("pg", emptyCommandInfo {
longname = "print_grammar",
synopsis = "print the actual grammar with the given printer",
@@ -206,3 +231,6 @@ allCommands pgf = Map.fromAscList [
prGrammar opts = case valIdOpts "printer" "" opts of
"cats" -> unwords $ categories pgf
v -> prPGF (read v) pgf (prCId (absname pgf))
+
+ morphos opts s =
+ [lookupMorpho (buildMorpho pgf (mkCId la)) s | la <- optLangs opts]
diff --git a/src-3.0/PGF/Morphology.hs b/src-3.0/PGF/Morphology.hs
new file mode 100644
index 000000000..97def6b9a
--- /dev/null
+++ b/src-3.0/PGF/Morphology.hs
@@ -0,0 +1,46 @@
+module PGF.Morphology where
+
+import PGF.ShowLinearize
+import PGF.Data
+import PGF.CId
+
+import qualified Data.Map as Map
+import Data.List (intersperse)
+
+-- these 4 definitions depend on the datastructure used
+
+type Morpho = Map.Map String [(Lemma,Analysis)]
+
+lookupMorpho :: Morpho -> String -> [(Lemma,Analysis)]
+lookupMorpho mo s = maybe noAnalysis id $ Map.lookup s mo
+
+buildMorpho :: PGF -> CId -> Morpho
+buildMorpho pgf = Map.fromListWith (++) . collectWords pgf
+
+prFullFormLexicon :: Morpho -> String
+prFullFormLexicon mo =
+ unlines [w ++ " : " ++ prMorphoAnalysis ts | (w,ts) <- Map.assocs mo]
+
+prMorphoAnalysis :: [(Lemma,Analysis)] -> String
+prMorphoAnalysis lps = unlines [l ++ " " ++ p | (l,p) <- lps]
+
+type Lemma = String
+type Analysis = String
+
+noAnalysis :: [(Lemma,Analysis)]
+noAnalysis = []
+
+collectWords :: PGF -> CId -> [(String, [(Lemma,Analysis)])]
+collectWords pgf lang =
+ concatMap collOne
+ [(f,c,0) | (f,(DTyp [] c _,_)) <- Map.toList $ funs $ abstract pgf]
+ where
+ collOne (f,c,i) =
+ fromRec f [prCId c] (recLinearize pgf lang (EApp f (replicate i (EMeta 888))))
+ fromRec f v r = case r of
+ RR rs -> concat [fromRec f v t | (_,t) <- rs]
+ RT rs -> concat [fromRec f (p:v) t | (p,t) <- rs]
+ RFV rs -> concatMap (fromRec f v) rs
+ RS s -> [(s,[(prCId f,unwords (reverse v))])]
+ RCon c -> [] ---- inherent
+
diff --git a/src-3.0/PGF/ShowLinearize.hs b/src-3.0/PGF/ShowLinearize.hs
index a1c1e476a..98a0806ba 100644
--- a/src-3.0/PGF/ShowLinearize.hs
+++ b/src-3.0/PGF/ShowLinearize.hs
@@ -1,4 +1,5 @@
module PGF.ShowLinearize (
+ Record (..), recLinearize, --- used in PGF.Morphology
tableLinearize,
recordLinearize,
termLinearize,