1 files changed, 180 insertions, 0 deletions
diff --git a/src/PGF/doc/syntax.txt b/src/PGF/doc/syntax.txt
new file mode 100644
index 000000000..db8f7c149
--- /dev/null
+++ b/src/PGF/doc/syntax.txt
@@ -0,0 +1,180 @@
+GFCC Syntax 
+
+
+==Syntax of GFCC files==
+
+The parser syntax is very simple, as defined in BNF:
+```
+  Grm. Grammar ::= [RExp] ;
+
+  App.  RExp ::= "(" CId [RExp] ")" ;
+  AId.  RExp ::= CId ;
+  AInt. RExp ::= Integer ;
+  AStr. RExp ::= String ;
+  AFlt. RExp ::= Double ;
+  AMet. RExp ::= "?" ;
+
+  terminator RExp "" ;
+
+  token CId (('_' | letter) (letter | digit | '\'' | '_')*) ;
+```
+While a parser and a printer can be generated for many languages
+from this grammar by using the BNF Converter, a parser is also
+easy to write by hand using recursive descent.
+
+
+==Syntax of well-formed GFCC code==
+
+Here is a summary of well-formed syntax, 
+with a comment on the semantics of each construction.
+```
+  Grammar ::= 
+    ("grammar" CId CId*)      -- abstract syntax name and concrete syntax names
+    "(" "flags"    Flag* ")"     -- global and abstract flags
+    "(" "abstract" Abstract ")"  -- abstract syntax
+    "(" "concrete" Concrete* ")" -- concrete syntaxes
+
+  Abstract ::= 
+    "(" "fun"   FunDef* ")"      -- function definitions
+    "(" "cat"   CatDef* ")"      -- category definitions
+
+  Concrete ::= 
+    "(" CId                      -- language name
+      "flags"     Flag*          -- concrete flags
+      "lin"       LinDef*        -- linearization rules
+      "oper"      LinDef*        -- operations (macros)
+      "lincat"    LinDef*        -- linearization type definitions
+      "lindef"    LinDef*        -- linearization default definitions
+      "printname" LinDef*        -- printname definitions
+      "param"     LinDef*        -- lincats with labels and parameter value names
+    ")" 
+
+  Flag   ::= "(" CId String ")"   -- flag and value
+  FunDef ::= "(" CId Type Exp ")" -- function, type, and definition
+  CatDef ::= "(" CId Hypo* ")"    -- category and context
+  LinDef ::= "(" CId Term ")"     -- function and definition
+
+  Type ::= 
+    "(" CId                 -- value category
+      "(" "H" Hypo* ")"     --   argument context
+      "(" "X" Exp* ")" ")"  --   arguments (of dependent value type)
+
+  Exp ::=
+     "(" CId                -- function
+       "(" "B" CId* ")"     --   bindings
+       "(" "X" Exp* ")" ")" --   arguments
+   | CId                    -- variable
+   | "?"                    -- metavariable
+   | "(" "Eq" Equation* ")" -- group of pattern equations
+   | Integer                -- integer literal (non-negative)
+   | Float                  -- floating-point literal (non-negative)
+   | String                 -- string literal (in double quotes)
+
+  Hypo ::= "(" CId Type ")" -- variable and type
+
+  Equation ::= "(" "E" Exp Exp* ")" -- value and pattern list
+
+  Term ::= 
+     "(" "R"  Term* ")"       -- array (record or table)
+   | "(" "S"  Term* ")"       -- concatenated sequence
+   | "(" "FV" Term* ")"       -- free variant list
+   | "(" "P"  Term Term ")"   -- access to index (projection or selection)
+   | "(" "W"  String Term ")" -- token prefix with suffix list
+   | "(" "A"  Integer ")"     -- pointer to subtree
+   | String                   -- token (in double quotes)
+   | Integer                  -- index in array
+   | CId                      -- macro constant
+   | "?"                      -- metavariable
+```
+
+
+==GFCC interpreter==
+
+The first phase in interpreting GFCC is to parse a GFCC file and
+build an internal abstract syntax representation, as specified
+in the previous section.
+
+With this representation, linearization can be performed by
+a straightforward function from expressions (``Exp``) to terms
+(``Term``). All expressions except groups of pattern equations
+can be linearized.
+
+Here is a reference Haskell implementation of linearization:
+```
+  linExp :: GFCC -> CId -> Exp -> Term
+  linExp gfcc lang tree@(DTr _ at trees) = case at of
+    AC fun -> comp (map lin trees) $ look fun
+    AS s   -> R [K (show s)] -- quoted
+    AI i   -> R [K (show i)]
+    AF d   -> R [K (show d)]
+    AM     -> TM
+   where
+     lin  = linExp gfcc lang
+     comp = compute gfcc lang
+     look = lookLin gfcc lang
+```
+TODO: bindings must be supported.
+
+Terms resulting from linearization are evaluated in
+call-by-value order, with two environments needed:
+- the grammar (a concrete syntax) to give the global constants
+- an array of terms to give the subtree linearizations
+
+
+The Haskell implementation works as follows:
+```
+compute :: GFCC -> CId -> [Term] -> Term -> Term
+compute gfcc lang args = comp where
+  comp trm = case trm of
+    P r p  -> proj (comp r) (comp p)
+    W s t  -> W s (comp t)
+    R ts   -> R $ map comp ts
+    V i    -> idx args (fromInteger i)  -- already computed
+    F c    -> comp $ look c             -- not computed (if contains V)
+    FV ts  -> FV $ Prelude.map comp ts
+    S ts   -> S $ Prelude.filter (/= S []) $ Prelude.map comp ts
+    _ -> trm
+
+  look = lookOper gfcc lang
+
+  idx xs i = xs !! i
+
+  proj r p = case (r,p) of
+    (_,     FV ts) -> FV $ Prelude.map (proj r) ts
+    (FV ts, _    ) -> FV $ Prelude.map (\t -> proj t p) ts
+    (W s t, _)     -> kks (s ++ getString (proj t p))
+    _              -> comp $ getField r (getIndex p)
+
+  getString t = case t of
+    K (KS s) -> s
+    _ -> trace ("ERROR in grammar compiler: string from "++ show t) "ERR"
+
+  getIndex t =  case t of
+    C i    -> fromInteger i
+    RP p _ -> getIndex p
+    TM     -> 0  -- default value for parameter
+    _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 0
+
+  getField t i = case t of
+    R rs   -> idx rs i
+    RP _ r -> getField r i
+    TM     -> TM
+    _ -> trace ("ERROR in grammar compiler: field from " ++ show t) t
+```
+The result of linearization is usually a record, which is realized as
+a string using the following algorithm.
+```
+  realize :: Term -> String
+  realize trm = case trm of
+    R (t:_)  -> realize t
+    S ss     -> unwords $ map realize ss
+    K s      -> s
+    W s t    -> s ++ realize t
+    FV (t:_) -> realize t  -- TODO: all variants
+    TM       -> "?"
+```
+Notice that realization always picks the first field of a record.
+If a linearization type has more than one field, the first field
+does not necessarily contain the desired string.
+Also notice that the order of record fields in GFCC is not necessarily
+the same as in GF source.