summaryrefslogtreecommitdiff
path: root/src/GF/UseGrammar/Morphology.hs
blob: 62aeb772566c009050379e57f921cbf34cacfe16 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
----------------------------------------------------------------------
-- |
-- Module      : Morphology
-- Maintainer  : AR
-- Stability   : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/02/24 11:46:39 $ 
-- > CVS $Author: peb $
-- > CVS $Revision: 1.7 $
--
-- Morphological analyser constructed from a GF grammar.
--
-- we first found the binary search tree sorted by word forms more efficient
-- than a trie, at least for grammars with 7000 word forms
-- (18\/11\/2003) but this may change since we have to use a trie 
-- for decompositions and also want to use it in the parser
-----------------------------------------------------------------------------

module Morphology where

import AbsGFC
import GFC
import PrGrammar
import CMacros
import LookAbs
import Ident
import qualified Macros as M
import Linear

import Operations
import Glue

import Char
import List (sortBy, intersperse)
import Monad (liftM)
import Trie2

-- construct a morphological analyser from a GF grammar. AR 11/4/2001

-- we first found the binary search tree sorted by word forms more efficient
-- than a trie, at least for grammars with 7000 word forms
-- (18\/11\/2003) but this may change since we have to use a trie 
-- for decompositions and also want to use it in the parser

type Morpho = Trie Char String

emptyMorpho :: Morpho
emptyMorpho = emptyTrie

appMorpho :: Morpho -> String -> (String,[String])
appMorpho = appMorphoOnly
---- add lookup for literals

-- without literals
appMorphoOnly :: Morpho -> String -> (String,[String])
appMorphoOnly m s = trieLookup m s

-- recognize word, exluding literals
isKnownWord :: Morpho -> String -> Bool
isKnownWord mo = not . null . snd . appMorphoOnly mo

mkMorpho :: CanonGrammar -> Ident -> Morpho 
mkMorpho gr a = tcompile $ concatMap mkOne $ allItems where

  mkOne (Left (fun,c))  = map (prOne fun c) $ allLins fun
  mkOne (Right (fun,_)) = map (prSyn fun) $ allSyns fun
  
  -- gather forms of lexical items
  allLins fun@(m,f) = errVal [] $ do
    ts <- allLinsOfFun gr (CIQ a f)  
    ss <- mapM (mapPairsM (mapPairsM (return . wordsInTerm))) ts
    return [(p,s) | (p,fs) <- concat $ map snd $ concat ss, s <- fs]
  prOne (_,f) c (ps,s) = (s, [prt f +++ tagPrt c +++ unwords (map prt_ ps)])

  -- gather syncategorematic words
  allSyns fun@(m,f) = errVal [] $ do
    tss <- allLinsOfFun gr (CIQ a f) 
    let ss = [s | ts <- tss, (_,fs) <- ts, (_,s) <- fs]
    return $ concat $ map wordsInTerm ss
  prSyn f s = (s, ["+<syncategorematic>" ++ tagPrt f])

  -- all words, Left from lexical rules and Right syncategorematic
  allItems = [lexRole t (f,c) | (f,c,t) <- allFuns] where
    allFuns = [(f,c,t) | (f,t) <- funRulesOf gr, Ok c <- [M.valCat t]]
    lexRole t = case M.typeForm t of
      Ok ([],_,_) -> Left
      _ -> Right

-- printing full-form lexicon and results

prMorpho :: Morpho -> String
prMorpho = unlines . map prMorphoAnalysis . collapse

prMorphoAnalysis :: (String,[String]) -> String
prMorphoAnalysis (w,fs) = unlines (w:fs)

prMorphoAnalysisShort :: (String,[String]) -> String
prMorphoAnalysisShort (w,fs) = prBracket (w' ++ prTList "/" fs) where
  w' = if null fs then w +++ "*" else ""

tagPrt :: Print a => (a,a) -> String
tagPrt (m,c) = "+" ++ prt c --- module name

-- | print all words recognized
allMorphoWords :: Morpho -> [String]
allMorphoWords = map fst . collapse

-- analyse running text and show results either in short form or on separate lines

-- | analyse running text and show results in short form
morphoTextShort :: Morpho -> String -> String
morphoTextShort mo = unwords . map (prMorphoAnalysisShort . appMorpho mo) . words

-- | analyse running text and show results on separate lines
morphoText :: Morpho -> String -> String
morphoText mo = unlines . map (('\n':) . prMorphoAnalysis . appMorpho mo) . words

-- format used in the Italian Verb Engine
prFullForm :: Morpho -> String
prFullForm = unlines . map prOne . collapse where
  prOne (s,ps) = s ++ " : " ++ unwords (intersperse "/" ps)

-- using Huet's unglueing method to find word boundaries
---- it would be much better to use a trie also for morphological analysis,
---- so this is for the sake of experiment
---- Moreover, we should specify the cases in which this happens - not all words

decomposeWords :: Morpho -> String -> [String]
decomposeWords mo s = errVal (words s) $ decomposeSimple mo s