summaryrefslogtreecommitdiff
path: root/src/PGF/doc/syntax.txt
diff options
context:
space:
mode:
authoraarne <aarne@cs.chalmers.se>2008-06-25 16:54:35 +0000
committeraarne <aarne@cs.chalmers.se>2008-06-25 16:54:35 +0000
commite9e80fc389365e24d4300d7d5390c7d833a96c50 (patch)
treef0b58473adaa670bd8fc52ada419d8cad470ee03 /src/PGF/doc/syntax.txt
parentb96b36f43de3e2f8b58d5f539daa6f6d47f25870 (diff)
changed names of resource-1.3; added a note on homepage on release
Diffstat (limited to 'src/PGF/doc/syntax.txt')
-rw-r--r--src/PGF/doc/syntax.txt180
1 files changed, 180 insertions, 0 deletions
diff --git a/src/PGF/doc/syntax.txt b/src/PGF/doc/syntax.txt
new file mode 100644
index 000000000..db8f7c149
--- /dev/null
+++ b/src/PGF/doc/syntax.txt
@@ -0,0 +1,180 @@
+GFCC Syntax
+
+
+==Syntax of GFCC files==
+
+The parser syntax is very simple, as defined in BNF:
+```
+ Grm. Grammar ::= [RExp] ;
+
+ App. RExp ::= "(" CId [RExp] ")" ;
+ AId. RExp ::= CId ;
+ AInt. RExp ::= Integer ;
+ AStr. RExp ::= String ;
+ AFlt. RExp ::= Double ;
+ AMet. RExp ::= "?" ;
+
+ terminator RExp "" ;
+
+ token CId (('_' | letter) (letter | digit | '\'' | '_')*) ;
+```
+While a parser and a printer can be generated for many languages
+from this grammar by using the BNF Converter, a parser is also
+easy to write by hand using recursive descent.
+
+
+==Syntax of well-formed GFCC code==
+
+Here is a summary of well-formed syntax,
+with a comment on the semantics of each construction.
+```
+ Grammar ::=
+ ("grammar" CId CId*) -- abstract syntax name and concrete syntax names
+ "(" "flags" Flag* ")" -- global and abstract flags
+ "(" "abstract" Abstract ")" -- abstract syntax
+ "(" "concrete" Concrete* ")" -- concrete syntaxes
+
+ Abstract ::=
+ "(" "fun" FunDef* ")" -- function definitions
+ "(" "cat" CatDef* ")" -- category definitions
+
+ Concrete ::=
+ "(" CId -- language name
+ "flags" Flag* -- concrete flags
+ "lin" LinDef* -- linearization rules
+ "oper" LinDef* -- operations (macros)
+ "lincat" LinDef* -- linearization type definitions
+ "lindef" LinDef* -- linearization default definitions
+ "printname" LinDef* -- printname definitions
+ "param" LinDef* -- lincats with labels and parameter value names
+ ")"
+
+ Flag ::= "(" CId String ")" -- flag and value
+ FunDef ::= "(" CId Type Exp ")" -- function, type, and definition
+ CatDef ::= "(" CId Hypo* ")" -- category and context
+ LinDef ::= "(" CId Term ")" -- function and definition
+
+ Type ::=
+ "(" CId -- value category
+ "(" "H" Hypo* ")" -- argument context
+ "(" "X" Exp* ")" ")" -- arguments (of dependent value type)
+
+ Exp ::=
+ "(" CId -- function
+ "(" "B" CId* ")" -- bindings
+ "(" "X" Exp* ")" ")" -- arguments
+ | CId -- variable
+ | "?" -- metavariable
+ | "(" "Eq" Equation* ")" -- group of pattern equations
+ | Integer -- integer literal (non-negative)
+ | Float -- floating-point literal (non-negative)
+ | String -- string literal (in double quotes)
+
+ Hypo ::= "(" CId Type ")" -- variable and type
+
+ Equation ::= "(" "E" Exp Exp* ")" -- value and pattern list
+
+ Term ::=
+ "(" "R" Term* ")" -- array (record or table)
+ | "(" "S" Term* ")" -- concatenated sequence
+ | "(" "FV" Term* ")" -- free variant list
+ | "(" "P" Term Term ")" -- access to index (projection or selection)
+ | "(" "W" String Term ")" -- token prefix with suffix list
+ | "(" "A" Integer ")" -- pointer to subtree
+ | String -- token (in double quotes)
+ | Integer -- index in array
+ | CId -- macro constant
+ | "?" -- metavariable
+```
+
+
+==GFCC interpreter==
+
+The first phase in interpreting GFCC is to parse a GFCC file and
+build an internal abstract syntax representation, as specified
+in the previous section.
+
+With this representation, linearization can be performed by
+a straightforward function from expressions (``Exp``) to terms
+(``Term``). All expressions except groups of pattern equations
+can be linearized.
+
+Here is a reference Haskell implementation of linearization:
+```
+ linExp :: GFCC -> CId -> Exp -> Term
+ linExp gfcc lang tree@(DTr _ at trees) = case at of
+ AC fun -> comp (map lin trees) $ look fun
+ AS s -> R [K (show s)] -- quoted
+ AI i -> R [K (show i)]
+ AF d -> R [K (show d)]
+ AM -> TM
+ where
+ lin = linExp gfcc lang
+ comp = compute gfcc lang
+ look = lookLin gfcc lang
+```
+TODO: bindings must be supported.
+
+Terms resulting from linearization are evaluated in
+call-by-value order, with two environments needed:
+- the grammar (a concrete syntax) to give the global constants
+- an array of terms to give the subtree linearizations
+
+
+The Haskell implementation works as follows:
+```
+compute :: GFCC -> CId -> [Term] -> Term -> Term
+compute gfcc lang args = comp where
+ comp trm = case trm of
+ P r p -> proj (comp r) (comp p)
+ W s t -> W s (comp t)
+ R ts -> R $ map comp ts
+ V i -> idx args (fromInteger i) -- already computed
+ F c -> comp $ look c -- not computed (if contains V)
+ FV ts -> FV $ Prelude.map comp ts
+ S ts -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts
+ _ -> trm
+
+ look = lookOper gfcc lang
+
+ idx xs i = xs !! i
+
+ proj r p = case (r,p) of
+ (_, FV ts) -> FV $ Prelude.map (proj r) ts
+ (FV ts, _ ) -> FV $ Prelude.map (\t -> proj t p) ts
+ (W s t, _) -> kks (s ++ getString (proj t p))
+ _ -> comp $ getField r (getIndex p)
+
+ getString t = case t of
+ K (KS s) -> s
+ _ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR"
+
+ getIndex t = case t of
+ C i -> fromInteger i
+ RP p _ -> getIndex p
+ TM -> 0 -- default value for parameter
+ _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0
+
+ getField t i = case t of
+ R rs -> idx rs i
+ RP _ r -> getField r i
+ TM -> TM
+ _ -> trace ("ERROR in grammar compiler: field from " ++ show t) t
+```
+The result of linearization is usually a record, which is realized as
+a string using the following algorithm.
+```
+ realize :: Term -> String
+ realize trm = case trm of
+ R (t:_) -> realize t
+ S ss -> unwords $ map realize ss
+ K s -> s
+ W s t -> s ++ realize t
+ FV (t:_) -> realize t -- TODO: all variants
+ TM -> "?"
+```
+Notice that realization always picks the first field of a record.
+If a linearization type has more than one field, the first field
+does not necessarily contain the desired string.
+Also notice that the order of record fields in GFCC is not necessarily
+the same as in GF source.