summaryrefslogtreecommitdiff
path: root/src/GF/UseGrammar/Morphology.hs
blob: 3aeb08dc72ad07090c5908ae1e879139e89ed270 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
----------------------------------------------------------------------
-- |
-- Module      : Morphology
-- Maintainer  : AR
-- Stability   : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:23:49 $ 
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.8 $
--
-- Morphological analyser constructed from a GF grammar.
--
-- we first found the binary search tree sorted by word forms more efficient
-- than a trie, at least for grammars with 7000 word forms
-- (18\/11\/2003) but this may change since we have to use a trie 
-- for decompositions and also want to use it in the parser
-----------------------------------------------------------------------------

module GF.UseGrammar.Morphology where

import GF.Canon.AbsGFC
import GF.Canon.GFC
import GF.Grammar.PrGrammar
import GF.Canon.CMacros
import GF.Canon.Look
import GF.Grammar.LookAbs
import GF.Infra.Ident
import qualified GF.Grammar.Macros as M
import GF.UseGrammar.Linear

import GF.Data.Operations
import GF.Data.Glue

import Data.Char
import Data.List (sortBy, intersperse)
import Control.Monad (liftM)
import GF.Data.Trie2

-- construct a morphological analyser from a GF grammar. AR 11/4/2001

-- we first found the binary search tree sorted by word forms more efficient
-- than a trie, at least for grammars with 7000 word forms
-- (18\/11\/2003) but this may change since we have to use a trie 
-- for decompositions and also want to use it in the parser

type Morpho = Trie Char String

emptyMorpho :: Morpho
emptyMorpho = emptyTrie

appMorpho :: Morpho -> String -> (String,[String])
appMorpho = appMorphoOnly
---- add lookup for literals

-- without literals
appMorphoOnly :: Morpho -> String -> (String,[String])
appMorphoOnly m s = trieLookup m s

-- recognize word, exluding literals
isKnownWord :: Morpho -> String -> Bool
isKnownWord mo = not . null . snd . appMorphoOnly mo

mkMorpho :: CanonGrammar -> Ident -> Morpho 
mkMorpho gr a = tcompile $ concatMap mkOne $ allItems where

  comp = ccompute gr [] -- to undo 'values' optimization

  mkOne (Left (fun,c))  = map (prOne fun c) $ allLins fun
  mkOne (Right (fun,_)) = map (prSyn fun) $ allSyns fun
  
  -- gather forms of lexical items
  allLins fun@(m,f) = errVal [] $ do
    ts <- lookupLin gr (CIQ a f) >>= comp >>= allAllLinValues  
    ss <- mapM (mapPairsM (mapPairsM (liftM wordsInTerm . comp))) ts
    return [(p,s) | (p,fs) <- concat $ map snd $ concat ss, s <- fs]
  prOne (_,f) c (ps,s) = (s, [prt f +++ tagPrt c +++ unwords (map prt_ ps)])

  -- gather syncategorematic words
  allSyns fun@(m,f) = errVal [] $ do
    tss <- allLinsOfFun gr (CIQ a f) 
    let ss = [s | ts <- tss, (_,fs) <- ts, (_,s) <- fs]
    return $ concat $ map wordsInTerm ss
  prSyn f s = (s, ["+<syncategorematic>" ++ tagPrt f])

  -- all words, Left from lexical rules and Right syncategorematic
  allItems = [lexRole t (f,c) | (f,c,t) <- allFuns] where
    allFuns = [(f,c,t) | (f,t) <- funRulesOf gr, Ok c <- [M.valCat t]]
    lexRole t = case M.typeForm t of
      Ok ([],_,_) -> Left
      _ -> Right

-- printing full-form lexicon and results

prMorpho :: Morpho -> String
prMorpho = unlines . map prMorphoAnalysis . collapse

prMorphoAnalysis :: (String,[String]) -> String
prMorphoAnalysis (w,fs0) = 
  let fs = filter (not . null) fs0  in
  if null fs then w ++++ "*" else unlines (w:fs)

prMorphoAnalysisShort :: (String,[String]) -> String
prMorphoAnalysisShort (w,fs) = prBracket (w' ++ prTList "/" fs) where
  w' = if null fs then w +++ "*" else ""

tagPrt :: Print a => (a,a) -> String
tagPrt (m,c) = "+" ++ prt c --- module name

-- | print all words recognized
allMorphoWords :: Morpho -> [String]
allMorphoWords = map fst . collapse

-- analyse running text and show results either in short form or on separate lines

-- | analyse running text and show just the word, with "*" if not found
morphoTextStatus :: Morpho -> String -> String
morphoTextStatus mo = unlines . map (prMark . appMorpho mo) . words where
  prMark (w,fs) = if null fs then "*" +++ w else w

-- | analyse running text and show results in short form, one word per line
morphoTextShort :: Morpho -> String -> String
morphoTextShort mo = unlines . map (prMorphoAnalysisShort . appMorpho mo) . words

-- | analyse running text and show results on separate lines
morphoText :: Morpho -> String -> String
morphoText mo = unlines . map (('\n':) . prMorphoAnalysis . appMorpho mo) . words

-- format used in the Italian Verb Engine
prFullForm :: Morpho -> String
prFullForm = unlines . map prOne . collapse where
  prOne (s,ps) = s ++ " : " ++ unwords (intersperse "/" ps)

-- using Huet's unglueing method to find word boundaries
---- it would be much better to use a trie also for morphological analysis,
---- so this is for the sake of experiment
---- Moreover, we should specify the cases in which this happens - not all words

decomposeWords :: Morpho -> String -> [String]
decomposeWords mo s = errVal (words s) $ decomposeSimple mo s