1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
|
module GF.Text.Clitics (getClitics,getCliticsText) where
import Data.List
-- AR 6/2/2011
-- Analyse word as stem+clitic whenever
-- (1) clitic is in clitic list
-- (2) either
-- (a) stem is in Lexicon
-- (b) stem can be analysed as stem0+clitic0
--
-- Examples:
-- Italian amarmi = amar+mi
-- Finnish autossanikohan = autossa+ni+kohan
--
-- The analysis gives all results, including the case where the whole word is in Lexicon.
--
-- The clitics in the list are expected to be reversed.
getClitics :: (String -> Bool) -> [String] -> String -> [[String]]
getClitics isLex rclitics = map (reverse . map reverse) . clits . reverse where
clits rword = ifLex rword [rclit:more |
rclit <- rclitics, stem <- splits rclit rword, more <- clits stem]
splits c = maybe [] return . stripPrefix c
ifLex w ws = if isLex (reverse w) then [w] : ws else ws
getCliticsText :: (String -> Bool) -> [String] -> [String] -> [String]
getCliticsText isLex rclitics =
map unwords . sequence . map (map render . getClitics isLex rclitics)
where
render = unwords . intersperse "&+"
-- example
--getClitics1 = getClitics exlex1 exclits1
--exlex1 = flip elem ["auto", "naise", "rahan","maa","maahan","maahankaan"]
--exclits1 = map reverse ["ni","ko","han","pas","nsa","kin","kaan"]
|