1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
|
--# -path=.:../abstract:../common:../../prelude
--1 Russian auxiliary operations.
-- This module contains operations that are needed to make the
-- resource syntax work. To define everything that is needed to
-- implement $Test$, it moreover contains regular lexical
-- patterns needed for $Lex$.
resource ResRus = ParamX ** open Prelude in {
flags coding=utf8 ; optimize=all ;
--2 Enumerated parameter types
--
-- These types are the ones found in school grammars.
-- Their parameter values are atomic.
-- Some parameters, such as $Number$, are inherited from $ParamX$.
param
Gender = Masc | Fem | Neut ;
Case = Nom | Gen | Dat | Acc | Inst | Prepos PrepKind ;
PrepKind = PrepOther | PrepVNa;
Animacy = Animate | Inanimate ;
Voice = Act | Pass ;
Aspect = Imperfective | Perfective ;
RusTense = Present | PastRus | Future ;
-- Degree = Pos | Comp | Super ;
AfterPrep = Yes | No ;
Possessive = NonPoss | Poss GenNum ;
-- Anteriority = Simul | Anter ;
ClForm = ClIndic RusTense Anteriority | ClCondit | ClInfinit | ClImper;
-- "naked infinitive" clauses
-- A number of Russian nouns have common gender. They can
-- denote both males and females: "умница" (a clever person), "инженер" (an engineer).
-- We overlook this phenomenon for now.
-- The AfterPrep parameter is introduced in order to describe
-- the variations of the third person personal pronoun forms
-- depending on whether they come after a preposition or not.
-- Declination forms depend on Case, Animacy , Gender:
-- "большие дома" - "больших домов" (big houses - big houses'),
-- Animacy plays role only in the Accusative case (Masc Sg and Plural forms):
-- Accusative Animate = Genetive, Accusaive Inanimate = Nominative
-- "я люблю большие дома-"я люблю больших мужчин"
-- (I love big houses - I love big men);
-- and on Number: "большой дом - "большие дома"
-- (a big house - big houses).
-- The plural never makes a gender distinction.
GenNum = GSg Gender | GPl ;
-- Coercions between the compound gen-num type and gender and number:
oper
gennum : Gender -> Number -> GenNum = \g,n ->
case n of {
Sg => GSg g ;
Pl => GPl
} ;
numGenNum : GenNum -> Number = \gn ->
case gn of {
GSg _ => Sg ;
GPl => Pl
} ;
-- The Possessive parameter is introduced in order to describe
-- the possessives of personal pronouns, which are used in the
-- Genetive constructions like "моя мама" (my mother) instead of
-- "мама моя" (the mother of mine).
--2 For $Noun$
-- Nouns decline according to number and case.
-- For the sake of shorter description these parameters are
-- combined in the type SubstForm.
param
NForm = NF Number Case ;
-- Real parameter types (i.e. ones on which words and phrases depend)
-- are mostly hierarchical. The alternative would be cross-products of
-- simple parameters, but this would usually overgenerate.
-- However, we use the cross-products in complex cases
-- (for example, aspect and tense parameter in the verb description)
-- where the relationship between the parameters are non-trivial
-- even though we aware that some combinations do not exist
-- (for example, present perfective does not exist, but removing
-- this combination would lead to having different descriptions
-- for perfective and imperfective verbs, which we do not want for the
-- sake of uniformity).
param PronForm = PF Case AfterPrep Possessive;
oper Pronoun = { s : PronForm => Str ; n : Number ; p : Person ;
g: PronGen ; pron: Bool} ;
-- Gender is not morphologically determined for first
-- and second person pronouns.
param PronGen = PGen Gender | PNoGen ;
-- The following coercion is useful:
oper
pgen2gen : PronGen -> Gender = \p -> case p of {
PGen g => g ;
PNoGen => Masc ---- variants {Masc ; Fem} --- the best we can do for ya, tu
} ;
oper
extCase: PronForm -> Case = \pf -> case pf of { PF c _ _ => c } ;
mkPronForm: Case -> AfterPrep -> Possessive -> PronForm =
\c,n,p -> PF c n p ;
CommNoun = {s : NForm => Str ; g : Gender ; anim : Animacy } ;
NounPhrase : Type = { s : PronForm => Str ; n : Number ;
p : Person ; g: PronGen ; anim : Animacy ; pron: Bool} ;
mkNP : Number -> CommNoun -> NounPhrase = \n,chelovek ->
{s = \\cas => chelovek.s ! NF n (extCase cas) ;
n = n ; g = PGen chelovek.g ; p = P3 ; pron =False ;
anim = chelovek.anim
} ;
det2NounPhrase : Adjective -> NounPhrase = \eto ->
{s = \\pf => eto.s ! (AF (extCase pf) Inanimate (GSg Neut)); n = Sg ; g = PGen Neut ; pron = False ; p = P3 ; anim = Inanimate } ;
pron2NounPhraseNum : Pronoun -> Animacy -> Number -> NounPhrase = \ona, anim, num ->
{s = ona.s ; n = num ; g = ona.g ;
pron = ona.pron; p = ona.p ; anim = anim } ;
-- Agreement of $NP$ is a record. We'll add $Gender$ later.
-- oper Agr = {n : Number ; p : Person} ;
----2 For $Verb$
-- Mood is the main verb classification parameter.
-- The verb mood can be infinitive, subjunctive, imperative, and indicative.
-- Note: subjunctive mood is analytical, i.e. formed from the past form of the
-- indicative mood plus the particle "ли". That is why they have the same GenNum
-- parameter. We choose to keep the "redundant" form in order to indicate
-- the presence of the subjunctive mood in Russian verbs.
-- Aspect and Voice parameters are present in every mood, so Voice is put
-- before the mood parameter in verb form description the hierachy.
-- Moreover Aspect is regarded as an inherent parameter of a verb entry.
-- The primary reason for that is that one imperfective form can have several
-- perfective forms: "ломать" - "сломать" - "поломать" (to break).
-- Besides, the perfective form could be formed from imperfective
-- by prefixation, but also by taking a completely different stem:
-- "говорить"-"сказать" (to say). In the later case it is even natural to
-- regard them as different verb entries.
-- Another reason is that looking at the Aspect as an inherent verb parameter
-- seem to be customary in other similar projects:
-- http://starling.rinet.ru/morph.htm
-- Note: Of course, the whole inflection table has many redundancies
-- in a sense that many verbs do not have all grammatically possible
-- forms. For example, passive does not exist for the verb
-- "любить" (to love), but exists for the verb "ломаться" (to break).
-- In present tense verbs do not conjugate according to Genus,
-- so parameter GenNum instead Number is used for the sake of
-- using for example as adjective in predication.
-- Depending on the tense verbs conjugate according to combinations
-- of gender, person and number of the verb objects.
-- Participles (Present and PastRus) and Gerund forms are not included in the
-- current description. This is the verb type used in the lexicon:
oper Verbum : Type = { s: VerbForm => Str ; asp : Aspect };
param
VerbForm = VFORM Voice VerbConj ;
VerbConj = VIND GenNum VTense | VIMP Number Person | VINF | VSUB GenNum ;
VTense = VPresent Person | VPast | VFuture Person ;
oper
getVTense : RusTense -> Person -> VTense= \t,p ->
case t of { Present => VPresent p ; PastRus => VPast; Future => VFuture p } ;
getTense : Tense -> RusTense= \t ->
case t of { Pres => Present
; Fut => Future --# notpresent
; _ => PastRus --# notpresent
} ;
getVoice: VerbForm -> Voice = \vf ->
case vf of {
VFORM Act _ => Act;
VFORM Pass _ => Pass
};
oper sebya : Case => Str =table {
Nom => "";
Gen => "себя";
Dat=> "себе";
Acc => "себя";
Inst => "собой";
Prep =>"себе"};
Verb : Type = {s : ClForm => GenNum => Person => Str ; asp : Aspect ; w: Voice} ;
-- Verb phrases are discontinuous: the parts of a verb phrase are
-- (s) an inflected verb, (s2) verb adverbials (not negation though), and
-- (s3) complement. This discontinuity is needed in sentence formation
-- to account for word order variations.
VerbPhrase : Type = Verb ** {s2: Str; s3 : Gender => Number => Str ;
negBefore: Bool} ;
-- This is one instance of Gazdar's *slash categories*, corresponding to his
-- $S/NP$.
-- We cannot have - nor would we want to have - a productive slash-category former.
-- Perhaps a handful more will be needed.
--
-- Notice that the slash category has the same relation to sentences as
-- transitive verbs have to verbs: it's like a *sentence taking a complement*.
SlashNounPhrase = Clause ** {c2 : Complement} ;
Clause = {s : Polarity => ClForm => Str} ;
-- This is the traditional $S -> NP VP$ rule.
predVerbPhrase : NounPhrase -> VerbPhrase -> SlashNounPhrase =
\Ya, tebyaNeVizhu -> {
s = \\b,clf => let { ya = Ya.s ! (mkPronForm Nom No NonPoss);
khorosho = tebyaNeVizhu.s2;
vizhu = tebyaNeVizhu.s ! clf !(gennum (pgen2gen Ya.g) Ya.n)! Ya.p;
tebya = tebyaNeVizhu.s3 ! (pgen2gen Ya.g) ! Ya.n
}
in ya ++ khorosho ++ vizhu ++ tebya;
c2 = {s = ""; c = Nom}
} ;
-- Questions are either direct ("Ты счастлив?")
-- or indirect ("Потом он спросил счастлив ли ты").
param
QuestForm = DirQ | IndirQ ;
---- The order of sentence is needed already in $VP$.
--
-- Order = ODir | OQuest ;
oper
getActVerbForm : ClForm -> Gender -> Number -> Person -> VerbForm = \clf,g,n, p -> case clf of
{ ClIndic Future _ => VFORM Act (VIND (gennum g n) (VFuture p));
ClIndic PastRus _ => VFORM Act (VIND (gennum g n) VPast);
ClIndic Present _ => VFORM Act (VIND (gennum g n) (VPresent p));
ClCondit => VFORM Act (VSUB (gennum g n));
ClInfinit => VFORM Act VINF ;
ClImper => VFORM Act (VIMP n p)
};
getPassVerbForm : ClForm -> Gender -> Number -> Person -> VerbForm = \clf,g,n, p -> case clf of
{ ClIndic Future _ => VFORM Pass (VIND (gennum g n) (VFuture p));
ClIndic PastRus _ => VFORM Pass (VIND (gennum g n) VPast);
ClIndic Present _ => VFORM Pass (VIND (gennum g n) (VPresent p));
ClCondit => VFORM Pass (VSUB (gennum g n));
ClInfinit => VFORM Pass VINF ;
ClImper => VFORM Pass (VIMP n p)
};
--2 For $Adjective$
-- The short form is only inflected in gender and number.
-- Fixing this would require changing the Degree type.
param
AdjForm = AF Case Animacy GenNum | AFShort GenNum | AdvF;
oper
Complement = {s : Str ; c : Case} ;
oper Refl ={s: Case => Str};
oper sam: Refl=
{s = table{
Nom => "сам";
Gen => "себя";
Dat => "себе";
Acc => "себя";
Inst => "собой";
Prepos _ => "себе"
}
};
pgNum : PronGen -> Number -> GenNum = \g,n ->
case n of
{ Sg => GSg (pgen2gen g) ; -- assuming pronoun "I" is a male
Pl => GPl
} ;
-- _ => variants {GSg Masc ; GSg Fem} } ;
-- "variants" version cause "no term variants" error during linearization
oper genGNum : GenNum -> Gender = \gn ->
case gn of { GSg Fem => Fem; GSg Masc => Masc; _ => Neut } ;
oper numAF: AdjForm -> Number = \af ->
case af of { AdvF => Sg; AFShort gn => numGenNum gn; AF _ _ gn => (numGenNum gn) } ;
oper genAF: AdjForm -> Gender = \af ->
case af of { AdvF => Neut; AFShort gn => genGNum gn; AF _ _ gn => (genGNum gn) } ;
oper caseAF: AdjForm -> Case = \af ->
case af of { AdvF => Nom; AFShort _ => Nom; AF c _ _ => c } ;
-- The Degree parameter should also be more complex, since most Russian
-- adjectives have two comparative forms:
-- attributive (syntactic (compound), declinable) -
-- "более высокий" (corresponds to "more high")
-- and predicative (indeclinable)- "выше" (higher) and more than one
-- superlative forms: "самый высокий" (corresponds to "the most high") -
-- "высочайший" (the highest).
-- Even one more parameter independent of the degree can be added,
-- since Russian adjectives in the positive degree also have two forms:
-- long (attributive and predicative) - "высокий" (high) and short (predicative) - "высок"
-- although this parameter will not be exactly orthogonal to the
-- degree parameter.
-- Short form has no case declension, so in principle
-- it can be considered as an additional case.
-- Note: although the predicative usage of the long
-- form is perfectly grammatical, it can have a slightly different meaning
-- compared to the short form.
-- For example: "он - больной" (long, predicative) vs.
-- "он - болен" (short, predicative).
--3 Adjective phrases
--
-- An adjective phrase may contain a complement, e.g. "моложе Риты".
-- Then it is used as postfix in modification, e.g. "человек, моложе Риты".
IsPostfixAdj = Bool ;
-- Simple adjectives are not postfix:
-- Adjective type includes both non-degree adjective classes:
-- possesive ("мамин"[mother's], "лисий" [fox'es])
-- and relative ("русский" [Russian]) adjectives.
Adjective : Type = {s : AdjForm => Str} ;
-- A special type of adjectives just having positive forms
-- (for semantic reasons) is useful, e.g. "финский".
AdjPhrase = Adjective ** {p : IsPostfixAdj} ;
mkAdjPhrase : Adjective -> IsPostfixAdj -> AdjPhrase = \novuj ,p -> novuj ** {p = p} ;
----2 For $Relative$
--
-- RAgr = RNoAg | RAg {n : Number ; p : Person} ;
-- RCase = RPrep | RC Case ;
--
--2 For $Numeral$
param DForm = unit | teen | ten | hund ;
param Place = attr | indep ;
param Size = nom | sgg | plg ;
--param Gend = masc | fem | neut ;
oper mille : Size => Str = table {
{nom} => "тысяча" ;
{sgg} => "тысячи" ;
_ => "тысяч"} ;
oper gg : Str -> Gender => Str = \s -> table {_ => s} ;
-- CardOrd = NCard | NOrd ;
}
|