From 604ec0a8c9c0c2da963f3abb9c00ceadd399d3aa Mon Sep 17 00:00:00 2001
From: aarne
- grammar Ex (Eng Swe);
+ grammar Ex(Eng,Swe);
abstract Ex = { abstract {
cat
S ; NP ; VP ;
fun
- Pred : NP -> VP -> S ; Pred : NP VP -> S = (Pred);
+ Pred : NP -> VP -> S ; Pred : NP,VP -> S = (Pred);
She, They : NP ; She : -> NP = (She);
Sleep : VP ; Sleep : -> VP = (Sleep);
They : -> NP = (They);
} } ;
- ;
+
concrete Eng of Ex = { concrete Eng {
lincat
S = {s : Str} ;
@@ -122,7 +123,7 @@ due to the alphabetical sorting of GFCC grammars.
param
Num = Sg | Pl ;
lin
- Pred np vp = { Pred = [($0[1], $1[0][$0[0]])] ;
+ Pred np vp = { Pred = [(($0!1),(($1!0)!($0!0)))];
s = np.s ++ vp.s ! np.n} ;
She = {s = "she" ; n = Sg} ; She = [0, "she"];
They = {s = "they" ; n = Pl} ;
@@ -141,13 +142,12 @@ due to the alphabetical sorting of GFCC grammars.
param
Num = Sg | Pl ;
lin
- Pred np vp = { Pred = [($0[1], $1[0])];
+ Pred np vp = { Pred = [(($0!0),($1!0))];
s = np.s ++ vp.s} ;
She = {s = "hon"} ; She = ["hon"];
They = {s = "de"} ; They = ["de"];
Sleep = {s = "sover"} ; Sleep = ["sover"];
- } ;
- } ;
+ } } ;
@@ -161,9 +161,9 @@ the concrete languages. The abstract syntax and the concrete
syntaxes themselves follow.
- Grammar ::= Header ";" Abstract ";" [Concrete] ";" ;
+ Grammar ::= Header ";" Abstract ";" [Concrete] ;
Header ::= "grammar" CId "(" [CId] ")" ;
- Abstract ::= "abstract" "{" [AbsDef] "}" ";" ;
+ Abstract ::= "abstract" "{" [AbsDef] "}" ;
Concrete ::= "concrete" CId "{" [CncDef] "}" ;
@@ -224,24 +224,27 @@ literal.
Linearization terms (Term) are built as follows.
+Constructor names are shown to make the later code
+examples readable.
- Term ::= "[" [Term] "]" ; -- array
- Term ::= Term "[" Term "]" ; -- access to indexed field
- Term ::= "(" [Term] ")" ; -- sequence with ++
- Term ::= Tokn ; -- token
- Term ::= "$" Integer ; -- argument subtree
- Term ::= Integer ; -- array index
- Term ::= "[|" [Term] "|]" ; -- free variation
+ R. Term ::= "[" [Term] "]" ; -- array
+ P. Term ::= "(" Term "!" Term ")" ; -- access to indexed field
+ S. Term ::= "(" [Term] ")" ; -- sequence with ++
+ K. Term ::= Tokn ; -- token
+ V. Term ::= "$" Integer ; -- argument
+ C. Term ::= Integer ; -- array index
+ FV. Term ::= "[|" [Term] "|]" ; -- free variation
+ TM. Term ::= "?" ; -- linearization of metavariable
Tokens are strings or (maybe obsolescent) prefix-dependent variant lists.
- Tokn ::= String ; - Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ; - Variant ::= [String] "/" [String] ; + KS. Tokn ::= String ; + KP. Tokn ::= "[" "pre" [String] "[" [Variant] "]" "]" ; + Var. Variant ::= [String] "/" [String] ;
Three special forms of terms are introduced by the compiler @@ -250,9 +253,9 @@ their presence makes grammars much more compact. Their semantics will be explained in a later section.
- Term ::= CId ; -- global constant
- Term ::= "(" String "+" Term ")" ; -- prefix + suffix table
- Term ::= "(" Term "@" Term ")"; -- record parameter alias
+ F. Term ::= CId ; -- global constant
+ W. Term ::= "(" String "+" Term ")" ; -- prefix + suffix table
+ RP. Term ::= "(" Term "@" Term ")"; -- record parameter alias
Identifiers are like Ident in GF and GFC, except that
@@ -282,7 +285,7 @@ in which linearization is performed.
AS s -> R [kks (show s)] -- quoted
AI i -> R [kks (show i)]
AF d -> R [kks (show d)]
- AM -> R [kks "?"] ---- TODO: proper lincat
+ AM -> TM
where
lin = linExp mcfg lang
comp = compute mcfg lang
@@ -301,6 +304,7 @@ a string using the following algorithm.
K (KP s _) -> unwords s ---- prefix choice TODO
W s t -> s ++ realize t
FV (t:_) -> realize t
+ TM -> "?"
Since the order of record fields is not necessarily @@ -320,39 +324,48 @@ needed:
-The code is cleaned from debugging information present in the working -version. -
-- compute :: GFCC -> CId -> [Term] -> Term -> Term - compute mcfg lang args = comp where - comp trm = case trm of - P r (FV ts) -> FV $ Prelude.map (comp . P r) ts +The code is presented in one-level pattern matching, to +enable reimplementations in languages that do not permit +deep patterns (such as Java and C++). + ++ compute :: GFCC -> CId -> [Term] -> Term -> Term + compute mcfg lang args = comp where + comp trm = case trm of + P r p -> proj (comp r) (comp p) + RP i t -> RP (comp i) (comp t) + W s t -> W s (comp t) + R ts -> R $ Prelude.map comp ts + V i -> idx args (fromInteger i) -- already computed + F c -> comp $ look c -- not computed (if contains V) + FV ts -> FV $ Prelude.map comp ts + S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts + _ -> trm - P r p -> case (comp r, comp p) of + look = lookLin mcfg lang - -- for the suffix optimization - (W s (R ss), p') -> case comp $ idx ss (getIndex p') of - K (KS u) -> kks (s ++ u) + idx xs i = xs !! i - (r', p') -> comp $ (getFields r') !! (getIndex p') + proj r p = case (r,p) of + (_, FV ts) -> FV $ Prelude.map (proj r) ts + (W s t, _) -> kks (s ++ getString (proj t p)) + _ -> comp $ getField r (getIndex p) - RP i t -> RP (comp i) (comp t) - W s t -> W s (comp t) - R ts -> R $ Prelude.map comp ts - V i -> args !! (fromInteger i) -- already computed - S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts - F c -> comp $ lookLin mcfg lang -- not yet computed - FV ts -> FV $ Prelude.map comp ts - _ -> trm + getString t = case t of + K (KS s) -> s + _ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR" - getIndex t = case t of - C i -> fromInteger i - RP p _ -> getIndex p + getIndex t = case t of + C i -> fromInteger i + RP p _ -> getIndex p + TM -> 0 -- default value for parameter + _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0 - getFields t = case t of - R rs -> rs - RP _ r -> getFields r + getField t i = case t of + R rs -> idx rs i + RP _ r -> getField r i + TM -> TM + _ -> trace ("ERROR in grammar compiler: field from " ++ show t) t@@ -365,7 +378,7 @@ explanation. Global constants- Term ::= CId ; + Term ::= CId ;are shorthands for complex terms. They are produced by the @@ -378,7 +391,7 @@ its definition. Prefix-suffix tables
- Term ::= "(" String "+" Term ")" ; + Term ::= "(" String "+" Term ")" ;represent tables of word forms divided to the longest common prefix @@ -410,7 +423,7 @@ take effect. The most curious construct of GFCC is the parameter array alias,
- Term ::= "(" Term "@" Term ")"; + Term ::= "(" Term "@" Term ")";This form is used as the value of parameter records, such as the type @@ -451,8 +464,8 @@ we get the encoding The GFCC computation rules are essentially
- t [(i @ r)] = t[i] - (i @ r) [j] = r[j] + (t ! (i @ _)) = (t ! i) + ((_ @ r) ! j) =(r ! j)@@ -574,11 +587,11 @@ This expression must first be translated to a case expression, which can then be translated to the GFCC term- [2,5][$0[$1]] + ([2,5] ! ($0 ! $1))-assuming that the variable $np$ is the first argument and that its -$Number$ field is the second in the record. +assuming that the variable
npis the first argument and that its +Numberfield is the second in the record.This transformation of course has to be performed recursively, since @@ -693,9 +706,71 @@ The available commands are +
Interpreter in C++
++A base-line interpreter in C++ has been started. +Its main functionality is random generation of trees and linearization of them. +
++Here are some results from running the different interpreters, compared +to running the same grammar in GF, saved in
+.gfcmformat. +The grammar contains the English, German, and Norwegian +versions of Bronzeage. The experiment was carried out on +Ubuntu Linux laptop with 1.5 GHz Intel centrino processor. +
| + | GF | +gfcc(hs) | +gfcc++ | +
|---|---|---|---|
| program size | +7249k | +803k | +113k | +
| grammar size | +336k | +119k | +119k | +
| read grammar | +1150ms | +510ms | +150ms | +
| generate 222 | +9500ms | +450ms | +800ms | +
| memory | +21M | +10M | +2M | +
+To summarize: +
+-Interpreters in Java and C++. +Interpreter in Java.
Parsing via MCFG @@ -706,7 +781,11 @@ Parsing via MCFG
-File compression of GFCC output. +Hand-written parsers for GFCC grammars to reduce code size +(and efficiency?) of interpreters. +
++Binary format and/or file compression of GFCC output.
Syntax editor based on GFCC. -- cgit v1.2.3