summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoraarne <aarne@chalmers.se>2010-03-02 19:10:56 +0000
committeraarne <aarne@chalmers.se>2010-03-02 19:10:56 +0000
commite4748e998453b979af46983a079f2ec3d307ada4 (patch)
treeef1c2b278850a7f6a95959b6f18469645cf1094c
parent21b10f91cbbd0a2d369dc55306bad53df1502cd7 (diff)
restored gfcc example (GF C compiler)
-rw-r--r--examples/gfcc/CleanJVM.hs59
-rw-r--r--examples/gfcc/Imper.gf53
-rw-r--r--examples/gfcc/ImperC.gf56
-rw-r--r--examples/gfcc/ImperEng.gf71
-rw-r--r--examples/gfcc/ImperJVM.gf93
-rw-r--r--examples/gfcc/JVM.hs20
-rw-r--r--examples/gfcc/Makefile12
-rw-r--r--examples/gfcc/ResImper.gf85
-rw-r--r--examples/gfcc/ResImperEng.gf16
-rw-r--r--examples/gfcc/abs.c20
-rw-r--r--examples/gfcc/complin.bbl96
-rw-r--r--examples/gfcc/complin.tex1477
-rw-r--r--examples/gfcc/factorial.c38
-rw-r--r--examples/gfcc/fibonacci.c18
-rw-r--r--examples/gfcc/runtime.j55
15 files changed, 2169 insertions, 0 deletions
diff --git a/examples/gfcc/CleanJVM.hs b/examples/gfcc/CleanJVM.hs
new file mode 100644
index 000000000..7c4c1bb54
--- /dev/null
+++ b/examples/gfcc/CleanJVM.hs
@@ -0,0 +1,59 @@
+module Main where
+
+import Char
+import System
+
+--- translation from Symbolic JVM to real Jasmin code
+
+main :: IO ()
+main = do
+ jvm:src:_ <- getArgs
+ s <- readFile jvm
+ let cls = takeWhile (/='.') src
+ let obj = cls ++ ".j"
+ writeFile obj $ boilerplate cls
+ appendFile obj $ mkJVM cls s
+ putStrLn $ "wrote file " ++ obj
+ system $ "java -jar jasmin.jar " ++ obj
+ return ()
+
+mkJVM :: String -> String -> String
+mkJVM cls = unlines . reverse . fst . foldl trans ([],([],0)) . liness where
+ trans (code,(env,v)) s = case words s of
+ ".method":p:s:f:ns
+ | f == "main" ->
+ (".method public static main([Ljava/lang/String;)V":code,([],1))
+ | otherwise ->
+ (unwords [".method",p,s, f ++ glue ns] : code,([],0))
+ "alloc":t:x:_ -> (("; " ++ s):code, ((x,v):env, v + size t))
+ ".limit":"locals":ns -> chCode (".limit locals " ++ show (length ns + 1))
+ "runtime":f:ns -> chCode $ "invokestatic " ++ "runtime/" ++ f ++ glue ns
+ "static":f:ns -> chCode $ "invokestatic " ++ cls ++ "/" ++ f ++ glue ns
+ "alloc":ns -> chCode $ "; " ++ s
+ ins:x:_ | symb ins -> chCode $ ins ++ " " ++ look x
+ "goto":ns -> chCode $ "goto " ++ glue ns
+ "ifeq":ns -> chCode $ "ifeq " ++ glue ns
+ "label":ns -> chCode $ glue ns ++ ":"
+ ";":[] -> chCode ""
+ _ -> chCode s
+ where
+ chCode c = (c:code,(env,v))
+ look x = maybe (error $ x ++ show env) show $ lookup x env
+ glue = concat --init . concat
+ symb = flip elem ["load","store"] . tail
+ size t = case t of
+ "d" -> 2
+ _ -> 1
+ liness = lines . map (\c -> if c==';' then '\n' else c)
+
+
+boilerplate :: String -> String
+boilerplate cls = unlines [
+ ".class public " ++ cls,
+ ".super java/lang/Object",
+ ".method public <init>()V",
+ "aload_0",
+ "invokenonvirtual java/lang/Object/<init>()V",
+ "return",
+ ".end method"
+ ]
diff --git a/examples/gfcc/Imper.gf b/examples/gfcc/Imper.gf
new file mode 100644
index 000000000..a207759b7
--- /dev/null
+++ b/examples/gfcc/Imper.gf
@@ -0,0 +1,53 @@
+abstract Imper = {
+
+ flags startcat = Program ;
+
+ cat
+ Program ;
+ Rec ListTyp ;
+ Typ ;
+ IsNum Typ ;
+ ListTyp ;
+ Fun ListTyp Typ ;
+ Stm ;
+ Exp Typ ;
+ Var Typ ;
+ ListExp ListTyp ;
+
+ fun
+ Empty : Program ;
+ Funct : (AS : ListTyp) -> (V : Typ) ->
+ (Fun AS V -> Rec AS) -> Program ;
+ FunctNil : (V : Typ) ->
+ Stm -> (Fun NilTyp V -> Program) -> Program ;
+ RecOne : (A : Typ) -> (Var A -> Stm) -> Program -> Rec (ConsTyp A NilTyp) ;
+ RecCons : (A : Typ) -> (AS : ListTyp) ->
+ (Var A -> Rec AS) -> Program -> Rec (ConsTyp A AS) ;
+
+ Decl : (A : Typ) -> (Var A -> Stm) -> Stm ;
+ Assign : (A : Typ) -> Var A -> Exp A -> Stm -> Stm ;
+ While : Exp TInt -> Stm -> Stm -> Stm ;
+ IfElse : Exp TInt -> Stm -> Stm -> Stm -> Stm ;
+ Block : Stm -> Stm -> Stm ;
+ Printf : (A : Typ) -> Exp A -> Stm -> Stm ;
+ Return : (A : Typ) -> Exp A -> Stm ;
+ Returnv : Stm ;
+ End : Stm ;
+
+ EVar : (A : Typ) -> Var A -> Exp A ;
+ EInt : Int -> Exp TInt ;
+ EFloat : Int -> Int -> Exp TFloat ;
+ ELt : (n : Typ) -> IsNum n -> Exp n -> Exp n -> Exp TInt ;
+ EAdd, EMul, ESub : (n : Typ) -> IsNum n -> Exp n -> Exp n -> Exp n ;
+ EAppNil : (V : Typ) -> Fun NilTyp V -> Exp V ;
+ EApp : (AS : ListTyp) -> (V : Typ) -> Fun AS V -> ListExp AS -> Exp V ;
+
+ TInt, TFloat : Typ ;
+ isNumInt : IsNum TInt ; isNumFloat : IsNum TFloat ;
+ NilTyp : ListTyp ;
+ ConsTyp : Typ -> ListTyp -> ListTyp ;
+
+ OneExp : (A : Typ) -> Exp A -> ListExp (ConsTyp A NilTyp) ;
+ ConsExp : (A : Typ) -> (AS : ListTyp) ->
+ Exp A -> ListExp AS -> ListExp (ConsTyp A AS) ;
+}
diff --git a/examples/gfcc/ImperC.gf b/examples/gfcc/ImperC.gf
new file mode 100644
index 000000000..57f9d9f9e
--- /dev/null
+++ b/examples/gfcc/ImperC.gf
@@ -0,0 +1,56 @@
+--# -path=.:../../lib/prelude
+concrete ImperC of Imper = open ResImper in {
+ flags lexer=codevars ; unlexer=code ; startcat=Program ;
+
+ lincat
+ Exp = PrecExp ;
+ Typ = {s,s2 : Str} ;
+ Rec = {s,s2,s3 : Str} ;
+
+ lin
+ Empty = ss [] ;
+ FunctNil val stm cont = ss (
+ val.s ++ cont.$0 ++ paren [] ++ "{" ++
+ stm.s ++ "}" ++ ";" ++ cont.s) ;
+ Funct args val rec = ss (
+ val.s ++ rec.$0 ++ paren rec.s2 ++ "{" ++
+ rec.s ++ "}" ++ ";" ++ rec.s3) ;
+
+ RecOne typ stm prg = stm ** {
+ s2 = typ.s ++ stm.$0 ;
+ s3 = prg.s
+ } ;
+ RecCons typ _ body prg = {
+ s = body.s ;
+ s2 = typ.s ++ body.$0 ++ "," ++ body.s2 ;
+ s3 = prg.s
+ } ;
+
+ Decl typ cont = continues (typ.s ++ cont.$0) cont ;
+ Assign _ x exp = continues (x.s ++ "=" ++ exp.s) ;
+ While exp loop = continue ("while" ++ paren exp.s ++ loop.s) ;
+ IfElse exp t f = continue ("if" ++ paren exp.s ++ t.s ++ "else" ++ f.s) ;
+ Block stm = continue ("{" ++ stm.s ++ "}") ;
+ Printf t e = continues ("printf" ++ paren (t.s2 ++ "," ++ e.s)) ;
+ Return _ exp = statement ("return" ++ exp.s) ;
+ Returnv = statement "return" ;
+ End = ss [] ;
+
+ EVar _ x = constant x.s ;
+ EInt n = constant n.s ;
+ EFloat a b = constant (a.s ++ "." ++ b.s) ;
+ EMul _ _ = infixL 3 "*" ;
+ EAdd _ _ = infixL 2 "+" ;
+ ESub _ _ = infixL 2 "-" ;
+ ELt _ _ = infixN 1 "<" ;
+
+ EAppNil val f = constant (f.s ++ paren []) ;
+ EApp args val f exps = constant (f.s ++ paren exps.s) ;
+
+ TInt = {s = "int" ; s2 = "\"%d\""} ;
+ TFloat = {s = "float" ; s2 = "\"%f\""} ;
+ NilTyp = ss [] ;
+ ConsTyp = cc2 ;
+ OneExp _ e = e ;
+ ConsExp _ _ e es = ss (e.s ++ "," ++ es.s) ;
+}
diff --git a/examples/gfcc/ImperEng.gf b/examples/gfcc/ImperEng.gf
new file mode 100644
index 000000000..6a2a10e7c
--- /dev/null
+++ b/examples/gfcc/ImperEng.gf
@@ -0,0 +1,71 @@
+-- # -path=.:prelude
+--# -path=.:../../lib/prelude
+
+-- Toy English phrasing of C programs. Intended use is with
+-- speech synthesis. Printed code should use HTML formatting.
+-- AR 5/10/2005.
+
+concrete ImperEng of Imper = open Prelude, ResImperEng in {
+ flags lexer=textvars ; unlexer=text ; startcat=Program ;
+
+ lincat
+ Rec = {s,s2,s3 : Str} ;
+
+ lin
+ Empty = ss [] ;
+ FunctNil val stm cont = ss (
+ ["The function"] ++ cont.$0 ++
+ "returns" ++ indef ++ val.s ++ "." ++
+ ["It is defined as follows :"] ++
+ stm.s ++
+ PARA ++
+ cont.s) ;
+ Funct args val rec = ss (
+ ["The function"] ++ rec.$0 ++
+ "takes" ++ rec.s2 ++
+ "and" ++ "returns" ++ indef ++ val.s ++ "." ++
+ ["It is defined as follows:"] ++
+ rec.s ++
+ PARA ++
+ rec.s3) ;
+
+ RecOne typ stm prg = stm ** {
+ s2 = indef ++ typ.s ++ stm.$0 ;
+ s3 = prg.s
+ } ;
+ RecCons typ _ body prg = {
+ s = body.s ;
+ s2 = indef ++ typ.s ++ body.$0 ++ "and" ++ body.s2 ;
+ s3 = prg.s
+ } ;
+
+ Decl typ cont = continues ("let" ++ cont.$0 ++ "be" ++ indef ++ typ.s) cont ;
+ Assign _ x exp = continues ("set" ++ x.s ++ "to" ++ exp.s) ;
+ While exp loop = continues (["if"] ++ exp.s ++
+ [", do the following :"] ++ loop.s ++
+ ["test the condition and repeat the loop if the condition holds"]) ;
+ IfElse exp t f = continue ("if" ++ exp.s ++ [", then"] ++ t.s ++ "Else" ++ f.s) ;
+ Block stm = continue (stm.s) ;
+ Printf t e = continues ("print" ++ e.s) ;
+ Return _ exp = statement ("return" ++ exp.s) ;
+ Returnv = statement ["return from the function"] ;
+ End = ss [] ;
+
+ EVar _ x = constant x.s ;
+ EInt n = constant n.s ;
+ EFloat a b = constant (a.s ++ "." ++ b.s) ;
+ EMul _ _ = prefix "product" ;
+ EAdd _ _ = prefix "sum" ;
+ ESub _ _ x y = ss (["the subtraction of"] ++ y.s ++ "from" ++ x.s) ;
+ ELt _ _ = comparison "smaller" ;
+
+ EAppNil val f = constant f.s ;
+ EApp args val f exps = constant (f.s ++ ["applied to"] ++ exps.s) ;
+
+ TInt = {s = "integer"} ;
+ TFloat = {s = "float"} ;
+ NilTyp = ss [] ;
+ ConsTyp = cc2 ;
+ OneExp _ e = e ;
+ ConsExp _ _ e es = ss (e.s ++ "and" ++ es.s) ;
+}
diff --git a/examples/gfcc/ImperJVM.gf b/examples/gfcc/ImperJVM.gf
new file mode 100644
index 000000000..d5d390727
--- /dev/null
+++ b/examples/gfcc/ImperJVM.gf
@@ -0,0 +1,93 @@
+--# -path=.:../../lib/prelude
+concrete ImperJVM of Imper = open ResImper in {
+
+flags lexer=codevars ; unlexer=code ; startcat=Stm ;
+
+ lincat
+ Rec = {s,s2,s3 : Str} ; -- code, storage for locals, continuation
+ Typ = {s : Str ; t : TypIdent} ;
+ Stm = Instr ;
+
+ lin
+ Empty = ss [] ;
+ FunctNil val stm cont = ss (
+ ".method" ++ "public" ++ "static" ++ cont.$0 ++ paren [] ++ val.s ++ ";" ++
+ ".limit" ++ "locals" ++ stm.s2 ++ ";" ++
+ ".limit" ++ "stack" ++ "1000" ++ ";" ++
+ stm.s ++
+ ".end" ++ "method" ++ ";" ++ ";" ++
+ cont.s
+ ) ;
+ Funct args val rec = ss (
+ ".method" ++ "public" ++ "static" ++ rec.$0 ++ paren args.s ++ val.s ++ ";" ++
+ ".limit" ++ "locals" ++ rec.s2 ++ ";" ++
+ ".limit" ++ "stack" ++ "1000" ++ ";" ++
+ rec.s ++
+ ".end" ++ "method" ++ ";" ++ ";" ++
+ rec.s3
+ ) ;
+
+ RecOne typ stm prg = instrb typ.s (
+ ["alloc"] ++ typ.s ++ stm.$0 ++ stm.s2) {s = stm.s ; s2 = stm.s2 ; s3 = prg.s};
+
+ RecCons typ _ body prg = instrb typ.s (
+ ["alloc"] ++ typ.s ++ body.$0 ++ body.s2)
+ {s = body.s ; s2 = body.s2 ; s3 = prg.s};
+
+ Decl typ cont = instrb typ.s (
+ ["alloc"] ++ typ.s ++ cont.$0
+ ) cont ;
+ Assign t x exp = instrc (exp.s ++ typInstr "store" t.t ++ x.s) ;
+ While exp loop =
+ let
+ test = "TEST_" ++ loop.s2 ;
+ end = "END_" ++ loop.s2
+ in instrl (
+ "label" ++ test ++ ";" ++
+ exp.s ++
+ "ifeq" ++ end ++ ";" ++
+ loop.s ++
+ "goto" ++ test ++ ";" ++
+ "label" ++ end
+ ) ;
+ IfElse exp t f =
+ let
+ false = "FALSE_" ++ t.s2 ++ f.s2 ;
+ true = "TRUE_" ++ t.s2 ++ f.s2
+ in instrl (
+ exp.s ++
+ "ifeq" ++ false ++ ";" ++
+ t.s ++
+ "goto" ++ true ++ ";" ++
+ "label" ++ false ++ ";" ++
+ f.s ++
+ "label" ++ true
+ ) ;
+ Block stm = instrc stm.s ;
+ Printf t e = instrc (e.s ++ "runtime" ++ typInstr "printf" t.t ++ paren (t.s) ++ "V") ;
+ Return t exp = instr (exp.s ++ typInstr "return" t.t) ;
+ Returnv = instr "return" ;
+ End = ss [] ** {s2,s3 = []} ;
+
+ EVar t x = instr (typInstr "load" t.t ++ x.s) ;
+ EInt n = instr ("ldc" ++ n.s) ;
+ EFloat a b = instr ("ldc" ++ a.s ++ "." ++ b.s) ;
+ EAdd t _ = binopt "add" t.t ;
+ ESub t _ = binopt "sub" t.t ;
+ EMul t _ = binopt "mul" t.t ;
+ ELt t _ = binop ("runtime" ++ typInstr "lt" t.t ++ paren (t.s ++ t.s) ++ "I") ;
+ EAppNil val f = instr (
+ "static" ++ f.s ++ paren [] ++ val.s
+ ) ;
+ EApp args val f exps = instr (
+ exps.s ++
+ "static" ++ f.s ++ paren args.s ++ val.s
+ ) ;
+
+ TInt = {s = "I" ; t = TIInt} ;
+ TFloat = {s = "F" ; t = TIFloat} ;
+ NilTyp = ss [] ;
+ ConsTyp = cc2 ;
+ OneExp _ e = e ;
+ ConsExp _ _ = cc2 ;
+}
diff --git a/examples/gfcc/JVM.hs b/examples/gfcc/JVM.hs
new file mode 100644
index 000000000..380570049
--- /dev/null
+++ b/examples/gfcc/JVM.hs
@@ -0,0 +1,20 @@
+module JVM where
+
+mkJVM :: String -> String
+mkJVM = unlines . reverse . fst . foldl trans ([],([],0)) . lines where
+ trans (code,(env,v)) s = case words s of
+ ".method":f:ns -> ((".method " ++ f ++ concat ns):code,([],0))
+ "alloc":t:x:_ -> (code, ((x,v):env, v + size t))
+ ".limit":"locals":ns -> chCode (".limit locals " ++ show (length ns - 1))
+ t:"_load" :x:_ -> chCode (t ++ "load " ++ look x)
+ t:"_store":x:_ -> chCode (t ++ "store " ++ look x)
+ t:"_return":_ -> chCode (t ++ "return")
+ "goto":ns -> chCode ("goto " ++ concat ns)
+ "ifzero":ns -> chCode ("ifzero " ++ concat ns)
+ _ -> chCode s
+ where
+ chCode c = (c:code,(env,v))
+ look x = maybe (x ++ show env) show $ lookup x env
+ size t = case t of
+ "d" -> 2
+ _ -> 1
diff --git a/examples/gfcc/Makefile b/examples/gfcc/Makefile
new file mode 100644
index 000000000..d260049fa
--- /dev/null
+++ b/examples/gfcc/Makefile
@@ -0,0 +1,12 @@
+all: pgf runtime
+
+pgf:
+ gf -make ImperC.gf ImperJVM.gf
+
+runtime:
+ java -jar jasmin.jar runtime.j
+
+doc:
+ pdflatex complin.tex
+ pdflatex complin.tex
+
diff --git a/examples/gfcc/ResImper.gf b/examples/gfcc/ResImper.gf
new file mode 100644
index 000000000..57cdf9434
--- /dev/null
+++ b/examples/gfcc/ResImper.gf
@@ -0,0 +1,85 @@
+resource ResImper = open Predef in {
+
+ -- precedence
+
+ param PAssoc = PN | PL | PR ;
+
+ oper
+ Prec : PType = Predef.Ints 4 ;
+ PrecExp : Type = {s : Str ; p : Prec ; a : PAssoc} ;
+
+ mkPrec : Prec -> PAssoc -> Str -> PrecExp = \p,a,f ->
+ {s = f ; p = p ; a = a} ;
+
+ usePrec : PrecExp -> Prec -> Str = \x,p ->
+ case <<x.p,p> : Prec * Prec> of {
+ <3,4> | <2,3> | <2,4> => paren x.s ;
+ <1,1> | <1,0> | <0,0> => x.s ;
+ <1,_> | <0,_> => paren x.s ;
+ _ => x.s
+ } ;
+
+ constant : Str -> PrecExp = mkPrec 4 PN ;
+
+ infixN : Prec -> Str -> (_,_ : PrecExp) -> PrecExp = \p,f,x,y ->
+ mkPrec p PN (usePrec x (nextPrec p) ++ f ++ usePrec y (nextPrec p)) ;
+ infixL : Prec -> Str -> (_,_ : PrecExp) -> PrecExp = \p,f,x,y ->
+ mkPrec p PL (usePrec x p ++ f ++ usePrec y (nextPrec p)) ;
+ infixR : Prec -> Str -> (_,_ : PrecExp) -> PrecExp = \p,f,x,y ->
+ mkPrec p PR (usePrec x (nextPrec p) ++ f ++ usePrec y p) ;
+
+ nextPrec : Prec -> Prec = \p -> case <p : Prec> of {
+ 4 => 4 ;
+ n => Predef.plus n 1
+ } ;
+
+ -- string operations
+
+ SS : Type = {s : Str} ;
+ ss : Str -> SS = \s -> {s = s} ;
+ cc2 : (_,_ : SS) -> SS = \x,y -> ss (x.s ++ y.s) ;
+
+ paren : Str -> Str = \str -> "(" ++ str ++ ")" ;
+
+ continues : Str -> SS -> SS = \s,t -> ss (s ++ ";" ++ t.s) ;
+ continue : Str -> SS -> SS = \s,t -> ss (s ++ t.s) ;
+ statement : Str -> SS = \s -> ss (s ++ ";");
+
+ -- taking cases of list size
+
+ param
+ Size = Zero | One | More ;
+ oper
+ nextSize : Size -> Size = \n -> case n of {
+ Zero => One ;
+ _ => More
+ } ;
+ separator : Str -> Size -> Str = \t,n -> case n of {
+ Zero => [] ;
+ _ => t
+ } ;
+
+ -- operations for JVM
+
+ param TypIdent = TIInt | TIFloat ; -- to be continued
+
+ oper
+ typInstr : Str -> TypIdent -> Str = \instr,t -> case t of {
+ TIInt => "i" + instr ;
+ TIFloat => "f" + instr
+ } ;
+
+ Instr : Type = {s,s2,s3 : Str} ; -- code, variables, labels
+ instr : Str -> Instr = \s ->
+ statement s ** {s2,s3 = []} ;
+ instrc : Str -> Instr -> Instr = \s,i ->
+ ss (s ++ ";" ++ i.s) ** {s2 = i.s2 ; s3 = i.s3} ;
+ instrl : Str -> Instr -> Instr = \s,i ->
+ ss (s ++ ";" ++ i.s) ** {s2 = i.s2 ; s3 = "L" ++ i.s3} ;
+ instrb : Str -> Str -> Instr -> Instr = \v,s,i ->
+ ss (s ++ ";" ++ i.s) ** {s2 = v ++ i.s2 ; s3 = i.s3} ;
+ binop : Str -> SS -> SS -> SS = \op, x, y ->
+ ss (x.s ++ y.s ++ op ++ ";") ;
+ binopt : Str -> TypIdent -> SS -> SS -> SS = \op, t ->
+ binop (typInstr op t) ;
+}
diff --git a/examples/gfcc/ResImperEng.gf b/examples/gfcc/ResImperEng.gf
new file mode 100644
index 000000000..87cb30a54
--- /dev/null
+++ b/examples/gfcc/ResImperEng.gf
@@ -0,0 +1,16 @@
+resource ResImperEng = open Predef, Prelude in {
+
+ oper
+ indef = pre {"a" ;
+ "an" / strs {"a" ; "e" ; "i" ; "o" ; "A" ; "E" ; "I" ; "O" }} ;
+
+ constant : Str -> SS = ss ;
+ prefix : Str -> SS -> SS -> SS = \f,x,y ->
+ ss ("the" ++ f ++ "of" ++ x.s ++ "and" ++ y.s) ;
+ comparison : Str -> SS -> SS -> SS = \f,x,y ->
+ ss (x.s ++ "is" ++ f ++ "than" ++ y.s) ;
+ continues : Str -> SS -> SS = \s,t -> ss (s ++ "." ++ t.s) ;
+ continue : Str -> SS -> SS = \s,t -> ss (s ++ t.s) ;
+ statement : Str -> SS = \s -> ss (s ++ ".");
+
+}
diff --git a/examples/gfcc/abs.c b/examples/gfcc/abs.c
new file mode 100644
index 000000000..947711c13
--- /dev/null
+++ b/examples/gfcc/abs.c
@@ -0,0 +1,20 @@
+int abs (int x){
+ int y ;
+ {
+ if (x < 0){
+ y = 0 - x ;
+ }
+ else {
+ y = x ;
+ }
+ }
+ return y ;
+ } ;
+int main () {
+ int i ;
+ i = abs (16);
+ printf ("%d",i) ;
+ return ;
+ } ;
+
+
diff --git a/examples/gfcc/complin.bbl b/examples/gfcc/complin.bbl
new file mode 100644
index 000000000..cf18b704f
--- /dev/null
+++ b/examples/gfcc/complin.bbl
@@ -0,0 +1,96 @@
+\begin{thebibliography}{10}
+
+\bibitem{aho-ullman}
+A.~Aho, R.~Sethi, and J.~Ullman.
+\newblock {\em {Compilers: Principles, Techniques, and Tools}}.
+\newblock {Addison-Wesley}, 1988.
+
+\bibitem{cayenne}
+L.~Augustsson.
+\newblock {Cayenne---a language with dependent types}.
+\newblock In {\em Proc. of {ICFP'98}}. ACM Press, September 1998.
+
+\bibitem{bnfc}
+M.~Forsberg and A.~Ranta.
+\newblock {\mbox{BNF Converter Homepage}}.
+\newblock \verb!http://www.cs.chalmers.se/~markus/BNFC/!, 2002--2004.
+
+\bibitem{harper-honsell}
+R.~Harper, F.~Honsell, and G.~Plotkin.
+\newblock {A Framework for Defining Logics}.
+\newblock {\em {JACM}}, 40(1):143--184, 1993.
+
+\bibitem{metal}
+G.~Kahn, B.~Lang, B.~Mélèse, and E.~Morcos.
+\newblock Metal: a formalism to specify formalisms.
+\newblock {\em Science of {C}omputer {P}rogramming}, 3:151--188, 1983.
+
+\bibitem{khegai}
+J.\ Khegai, B.\ Nordström, and A.\ Ranta.
+\newblock {Multilingual Syntax Editing in GF}.
+\newblock In A.~Gelbukh, editor, {\em {Intelligent Text Processing and
+ Computational Linguistics (CICLing-2003), Mexico City, February 2003}},
+ volume 2588 of {\em LNCS}, pages 453--464. Springer-Verlag, {2003}.
+%\newblock URL \url{http://www.cs.chalmers.se/~aarne/articles/mexico.ps.gz}.
+
+\bibitem{knuth-attr}
+D.~Knuth.
+\newblock Semantics of context-free languages.
+\newblock {\em Mathematical {Systems} {Theory}}, 2:127--145, 1968.
+
+\bibitem{landin}
+P.~Landin.
+\newblock The next 700 programming languages.
+\newblock {\em {Communications of the ACM}}, 9:157--166, 1966.
+
+\bibitem{magnusson-nordstr}
+L.~Magnusson and B.~Nordstr\"{o}m.
+\newblock The {ALF} proof editor and its proof engine.
+\newblock In {\em {Types for Proofs and Programs}}, LNCS 806, pages 213--237.
+ Springer, 1994.
+
+\bibitem{happy}
+S.~Marlow.
+\newblock {Happy, The Parser Generator for Haskell}, 2001.
+\newblock \verb6http://www.haskell.org/happy/6.
+
+\bibitem{jasmin}
+J.~Meyer and T.~Downing.
+\newblock {\em {Java Virtual Machine}}.
+\newblock O'Reilly, 1997.
+
+\bibitem{semBNF}
+{P. M\"aenp\"a\"a}.
+\newblock {Semantic BNF}.
+\newblock In E.~Gimenez and C.~Paulin-Mohring, editors, {\em Types for Proofs
+ and Programs, TYPES'96}, volume 1512 of {\em LNCS}, pages 196--215.
+ Springer-Verlag, 1998.
+
+\bibitem{pereira-shieber}
+F.~Pereira and S.~Shieber.
+\newblock {\em {Prolog and Natural-Language Analysis}}.
+\newblock {CSLI}, Stanford, 1987.
+
+\bibitem{twelf}
+F.~Pfenning.
+\newblock {The Twelf Project}.
+\newblock \verb!http://www-2.cs.cmu.edu/~twelf!, 2002.
+
+\bibitem{gf-jfp}
+A.~Ranta.
+\newblock {Grammatical Framework: A Type-Theoretical Grammar Formalism}.
+\newblock {\em {The Journal of Functional Programming}}, 14(2):145--189, 2004.
+%\newblock URL \url{http://www.cs.chalmers.se/~aarne/articles/gf-jfp.ps.gz}.
+
+\bibitem{gf-homepage}
+A.~Ranta, K.~Angelov, and T.~Hallgren.
+\newblock {\mbox{Grammatical Framework Homepage}}.
+\newblock \verb!http://grammaticalframework.org!, 2000--2009.
+
+\bibitem{teitelbaum}
+T.~Teitelbaum and T.~Reps.
+\newblock The {Cornell} {Program} {Synthesizer}: a syntax-directed programming
+ environment.
+\newblock {\em Commun. {ACM}}, 24(9):563--573, 1981.
+
+\end{thebibliography}
diff --git a/examples/gfcc/complin.tex b/examples/gfcc/complin.tex
new file mode 100644
index 000000000..9e624a846
--- /dev/null
+++ b/examples/gfcc/complin.tex
@@ -0,0 +1,1477 @@
+\documentclass[12pt]{article}
+
+\usepackage[latin1]{inputenc}
+%\input{psfig.sty}
+
+\setlength{\oddsidemargin}{0mm}
+%\setlength{\evensidemargin}{0mm}
+\setlength{\evensidemargin}{-2mm}
+\setlength{\topmargin}{-16mm}
+\setlength{\textheight}{240mm}
+\setlength{\textwidth}{158mm}
+
+%\setlength{\parskip}{2mm}
+%\setlength{\parindent}{0mm}
+
+\input{macros}
+
+\newcommand{\begit}{\begin{itemize}}
+\newcommand{\enit}{\end{itemize}}
+\newcommand{\HOAS}{higher-order abstract syntax}
+\newcommand{\newone}{} %%{\newpage}
+\newcommand{\heading}[1]{\subsection{#1}}
+\newcommand{\explanation}[1]{{\small #1}}
+\newcommand{\empha}[1]{{\em #1}}
+\newcommand{\commentOut}[1]{}
+\newcommand{\rarrow}{\; \rightarrow\;}
+
+\newcommand{\nocolor}{} %% {\color[rgb]{0,0,0}}
+
+
+\title{{\bf Declarative Language Definitions and Code Generation as Linearization}}
+
+\author{Aarne Ranta \\
+ Department of Computing Science \\
+ Chalmers University of Technology and the University of Gothenburg\\
+ {\tt aarne@cs.chalmers.se}}
+
+\date{30 November 2004}
+
+\begin{document}
+
+\maketitle
+
+
+\subsection*{Abstract}
+
+{\em
+This paper presents a compiler for a fragment of the C programming
+language, with JVM (Java Virtual Machine) as target language.
+The compiler is implemented in a purely declarative way:
+its definition consists of an abstract syntax of program
+structures and two concrete syntaxes matching the abstract
+syntax: one for C and one for JVM. From these grammar components,
+the compiler is derived by using the GF (Grammatical Framework)
+grammat tool: the front end consists of parsing and semantic
+checking in accordance to the C grammar, and the back end consists
+of linearization in accordance to the JVM grammar. The tool provides
+other functionalities as well, such as decompilation and interactive
+editing.
+}
+
+\section{Introduction}
+
+The experiment reported in this paper was prompted by a challenge
+posted by Lennart Augustsson to the participants of the workshop
+on Dependent Types in Programming held at Dagstuhl in September 2004.
+The challenge was to use dependent types to write a compiler from
+C to bytecode. This paper does not meet the challenge quite literally,
+since our compiler is for a different subset of C than Augustsson's
+specification, and since the bytecode that we generate is JVM instead
+of his format. But it definitely makes use of dependent types.
+
+Augustsson's challenge did not specify \textit{how} dependent
+types are to be used, and the first of the two points we make in this
+paper (and its title) reflects our interpretation:
+we use dependent types, in combination with higher-order abstract syntax (HOAS),
+to define the grammar of the source language (here, the fragment of C).
+The grammar constitutes the single, declarative source from which
+the compiler front end is derived, comprising both parser and type
+checker.
+
+The second point, code generation by linearization, means that the
+back end is likewise implemented by a grammar of the target
+language (in this case, a fragment of JVM). This grammar is the
+declarative source from which the compiler back end is derived.
+In addition, some postprocessing is needed to
+make the code conform to Jasmin assembler requirements.
+
+The complete code of the compiler is 300 lines: 250 lines for the grammars,
+50 lines for the postprocessor. The code
+is presented in the appendices of this paper.
+
+
+
+\section{The Grammatical Framework}
+
+The tool we have used for implementing the compiler is
+GF, the \empha{Grammatical Framework} \cite{gf-jfp}. GF
+is similar to a Logical Framework (LF)
+\cite{harper-honsell}
+extended with
+a notation for defining concrete syntax. GF was originally
+designed to help building multilingual
+translation systems for natural languages and also
+between formal and natural languages. The translation model
+implemented by GF is very simple:
+\begin{verbatim}
+ parsing linearization
+ ------------> ------------>
+ Language_1 Abstract Syntax Language_2
+ <------------ <------------
+ linearization parsing
+\end{verbatim}
+An abstract syntax is similar to a \empha{theory}, or a
+\empha{signature} in a logical framework. A
+concrete syntax defines, in a declarative way,
+a translation of abstract syntax trees (well-formed terms)
+into concrete language structures, and from this definition, one can
+derive both linearization and parsing.
+
+To give an example,
+a (somewhat simplified) translator for addition expressions
+consists of the abstract syntax rule
+\begin{verbatim}
+ fun EAdd : (A : Typ) -> Exp A -> Exp A -> Exp A ;
+\end{verbatim}
+the C concrete syntax rule
+\begin{verbatim}
+ lin EAdd _ x y = {s = x.s ++ "+" ++ y.s ; prec = 2} ;
+\end{verbatim}
+and the JVM concrete syntax rule
+\begin{verbatim}
+ lin EAdd t x y = {s = x.s ++ y.s ++ t.s ++ "_add"} ;
+\end{verbatim}
+The abstract syntax rule uses a type argument to capture
+the fact that addition is polymorphic (which is a simplification,
+because we will restrict the rule to numeric types only)
+and that both operands have the same type as the value.
+The C rule shows that the type information is suppressed,
+and that the expression has precedence level 2 (which is a simplification,
+since we will also treat associativity).
+The JVM rule shows how addition is translated to stack machine
+instructions, where the type of the postfixed addition instruction has to
+be made explicit. Our compiler, like any GF translation system, will
+consist of rules like these.
+
+The number of languages related to one abstract syntax in
+a translation system is of course not limited to two.
+Sometimes just one language is involved;
+GF then works much the same way as any grammar
+formalism or parser generator.
+The largest number of languages in an application known to us is 88;
+its domain are numeral expressions from 1 to 999,999 \cite{gf-homepage}.
+
+In addition to linearization and parsing, GF supports grammar-based
+\empha{multilingual authoring} \cite{khegai}: interactive editing
+of abstract syntax trees with immediate feedback as linearized texts,
+and the possibility to textual through the parsers.
+
+From the GF point of view, the goal of the compiler experiment
+is to investigate if GF is capable of implementing
+compilers using the ideas of single-source language definition
+and code generation as linearization. The working hypothesis
+was that it \textit{is} capable but inconvenient, and that,
+working out a complete example, we would find out what
+should be done to extend GF into a compiler construction tool.
+
+
+\subsection{Advantages and disadvantages}
+
+Due to the way in which it is built, our compiler has
+a number of unusual, yet attractive features:
+\bequ
+The front end is defined by a grammar of C as its single source.
+
+The grammar defines both abstract and concrete syntax, and also
+semantic well-formedness (types, variable scopes).
+
+The back end is implemented by means of a grammar of JVM providing
+another concrete syntax to the abstract syntax of C.
+
+As a result of the way JVM is defined, only semantically well formed
+JVM programs are generated.
+
+The JVM grammar can also be used as a decompiler, which translates
+JVM code back into C code.
+
+The language has an interactive editor that also supports incremental
+compilation.
+\enqu
+The problems that we encountered and their causes will be explained in
+the relevant sections of this report. To summarize,
+\bequ
+The scoping conditions resulting from \HOAS\ are slightly different
+from the standard ones of C.
+
+Our JVM syntax is slightly different from the specification, and
+hence needs some postprocessing.
+
+Using \HOAS\ to encode all bindings is sometimes cumbersome.
+\enqu
+The first shortcoming seems to be inevitable with the technique
+we use: just like lambda calculus, our C semantics allows
+overshadowing of earlier bindings by later ones.
+The second problem is systematically solved by using
+an intermediate JVM format, where symbolic variable addresses
+are used instead of numeric stack addresses.
+The last shortcoming is partly inherent in the problem of binding:
+to spell out, in any formal notation,
+what happens in complex binding structures \textit{is}
+complicated. But it also suggests ways in which GF could be
+tuned to give better support
+to compiler construction, which, after all, is not an intended
+use of GF as it is now.
+
+
+
+
+
+
+
+
+\section{The abstract syntax}
+
+An \empha{abstract syntax} in GF consists of \texttt{cat} judgements
+\[
+\mbox{\texttt{cat}} \; C \; \Gamma
+\]
+declaring basic types (depending on a context $\Gamma$),
+and \texttt{fun} judgements
+\[
+\mbox{\texttt{fun}} \; f \; \mbox{\texttt{:}} \; A
+\]
+declaring functions $f$ of any type $A$, which can be a basic type or
+a function type.
+\empha{Syntax trees} are well-formed terms of basic
+types, in $\eta$-long normal form.
+
+As for notation, each judgement form is recognized by
+its keyword (\texttt{cat}, \texttt{fun}, etc),
+and the same keyword governs all judgements
+until the next keyword is encountered.
+
+The abstract syntax that we will present is no doubt closer
+to C than to JVM. One reason is that what we are building is a
+\textit{C compiler}, and match with the target language is a
+secondary consideration. Another, more general reason is that
+C is a higher-level language and JVM which means, among
+other things, that C makes more semantic distinctions.
+In general, the abstract syntax of a translation system
+must reflect all semantic distinctions that can be made
+in the languages involved, and then it is a good idea to
+start with looking at what the most distinctive language
+needs.
+
+
+
+\subsection{Statements}
+
+Statements in C may involve variables, expressions, and
+other statements.
+The following \texttt{cat} judgements of GF define the syntactic categories
+that are needed to construct statements
+\begin{verbatim}
+ cat
+ Stm ;
+ Typ ;
+ Exp Typ ;
+ Var Typ ;
+\end{verbatim}
+The type \texttt{Typ} is the type of C's datatypes.
+The type of expressions is a dependent type,
+since it has a nonempty context, indicating that \texttt{Exp} takes
+a \texttt{Typ} as argument. The rules for \texttt{Exp}
+will thus be rules to construct well-typed expressions of
+a given type. \texttt{Var}\ is the type of variables,
+of a given type, that get bound in C's variable
+declarations.
+
+Let us start with the simplest kind of statements:
+declarations and assignments. The following \texttt{fun}
+rules define their abstract syntax:
+\begin{verbatim}
+ fun
+ Decl : (A : Typ) -> (Var A -> Stm) -> Stm ;
+ Assign : (A : Typ) -> Var A -> Exp A -> Stm -> Stm ;
+\end{verbatim}
+The \texttt{Decl}\ function captures the rule that
+a variable must be declared before it can be used or assigned to:
+its second argument is a \empha{continuation}, which is
+the sequence of statements that depend on (= may refer to)
+the declared variable.
+The \texttt{Assign} function uses dependent types to
+control that a variable is always assigned a value of proper
+type.
+
+We will treat all statements, except
+\texttt{return}s, in terms of continuations. A sequence of
+statements (which always has the type \texttt{Stm}) thus
+always ends in a \texttt{return}, or, abruptly, in
+an empty statement, \texttt{End}. Here are rules for some other
+statement forms:
+\begin{verbatim}
+ While : Exp TInt -> Stm -> Stm -> Stm ;
+ IfElse : Exp TInt -> Stm -> Stm -> Stm -> Stm ;
+ Block : Stm -> Stm -> Stm ;
+ Return : (A : Typ) -> Exp A -> Stm ;
+ End : Stm ;
+\end{verbatim}
+Here is an example of a piece of code and its abstract syntax.
+\begin{verbatim}
+ int x ; Decl (TNum TInt) (\x ->
+ x = 5 ; Assign (TNum TInt) x (EInt 5) (
+ return x ; Return (TNum TInt) (EVar (TNum TInt) x)))
+\end{verbatim}
+The details of expression and type
+syntax will be explained in the next section.
+
+Our binding syntax is more liberal than C's in two ways.
+First,
+lambda calculus permits overshadowing previous bindings
+by new ones, e.g. to write \verb6\x -> (\x -> f x)6.
+The corresponding overshadowing of declarations is not
+legal in C, within one and the same block.
+Secondly, we allow declarations anywhere in a block,
+not just in the beginning.
+The second deviation would be easy to mend, whereas
+the first one is inherent to the method of \HOAS.
+
+
+
+\subsection{Types and expressions}
+
+Our fragment of C has two types: integers and floats.
+Many operators of C are overloaded so that they can
+be used for both of these types, as well as for
+some other numeric types---but not for e.g.\ arrays
+and structures. We capture this distinction by a notion
+reminiscent of \empha{type classes}: we introduce a special
+category of numeric types, and a coercion of numeric types
+into types in general.
+\begin{verbatim}
+ cat
+ NumTyp ;
+ fun
+ TInt, TFloat : NumTyp ;
+ TNum : NumTyp -> Typ ;
+\end{verbatim}
+Well-typed expressions are built from constants,
+from variables, and by means of binary operations.
+\begin{verbatim}
+ EVar : (A : Typ) -> Var A -> Exp A ;
+ EInt : Int -> Exp (TNum TInt) ;
+ EFloat : Int -> Int -> Exp (TNum TFloat) ;
+ ELt : (n : NumTyp) -> let Ex = Exp (TNum n) in
+ Ex -> Ex -> Exp (TNum TInt) ;
+ EAdd, EMul, ESub : (n : NumTyp) -> let Ex = Exp (TNum n) in
+ Ex -> Ex -> Ex ;
+\end{verbatim}
+Notice that the category \texttt{Var} has no constructors,
+but its expressions are only created by
+variable bindings in \HOAS.
+Notice also that GF has a built-in type \texttt{Int} of
+integer literals, but floats are constructed by hand.
+
+Yet another expression form are function calls. To this
+end, we need a notion of (user-defined) functions and
+argument lists. The type of functions depends on an
+argument type list and a value type. Expression lists
+are formed to match type lists.
+\begin{verbatim}
+ cat
+ ListTyp ;
+ Fun ListTyp Typ ;
+ ListExp ListTyp ;
+
+ fun
+ EAppNil : (V : Typ) -> Fun NilTyp V -> Exp V ;
+ EApp : (AS : ListTyp) -> (V : Typ) ->
+ Fun AS V -> ListExp AS -> Exp V ;
+
+ NilTyp : ListTyp ;
+ ConsTyp : Typ -> ListTyp -> ListTyp ;
+
+ OneExp : (A : Typ) -> Exp A -> ListExp (ConsTyp A NilTyp) ;
+ ConsExp : (A : Typ) -> (AS : ListTyp) ->
+ Exp A -> ListExp AS -> ListExp (ConsExp A AS) ;
+\end{verbatim}
+The separation between zero-element applications and other
+applications is a concession to the concrete syntax of C:
+it in this way that we can control the use of commas so that
+they appear between arguments (\texttt{(x,y,z)}) but not
+after the last argument (\texttt{(x,y,z,)}).
+The compositionality of linearization (Section~\ref{compositionality} below)
+forbids case analysis on the length of the lists.
+
+
+\subsection{Functions}
+
+On the top level, a program is a sequence of functions.
+Each function may refer to functions defined earlier
+in the program. The idea to express the binding of
+function symbols with \HOAS\ is analogous to the binding
+of variables in statements, using a continuation.
+As with variables, the principal way to build function symbols is as
+bound variables (in addition, there can be some
+built-in functions, unlike in the case of variables).
+The continuation of can be recursive, which we express by
+making the function body into a part of the continuation;
+the category \texttt{Rec} is the combination of a function
+body and the subsequent function definitions.
+\begin{verbatim}
+ cat
+ Program ;
+ Rec ListTyp ;
+ fun
+ Empty : Program ;
+ Funct : (AS : ListTyp) -> (V : Typ) ->
+ (Fun AS V -> Rec AS) -> Program ;
+ FunctNil : (V : Typ) ->
+ Stm -> (Fun NilTyp V -> Program) -> Program ;
+\end{verbatim}
+For syntactic reasons similar to function application
+expressions in the previous section, we have distinguished between
+empty and non-empty argument lists.
+
+The tricky problem with function definitions
+is that they involve two nested binding constructions:
+the outer binding of the function symbol and the inner
+binding of the function parameter lists.
+For the latter, we could use
+vectors of variables, in the same way as vectors of
+expressions are used to give arguments to functions.
+However, this would lead to the need of cumbersome
+projection functions when using the parameters
+in the function body. A more elegant solution is
+to use \HOAS\ to build function bodies:
+\begin{verbatim}
+ RecOne : (A : Typ) ->
+ (Var A -> Stm) -> Program -> Rec (ConsTyp A NilTyp) ;
+ RecCons : (A : Typ) -> (AS : ListTyp) ->
+ (Var A -> Rec AS) -> Program -> Rec (ConsTyp A AS) ;
+\end{verbatim}
+The end result is an abstract syntax whose relation
+to concrete syntax is somewhat remote. Here is an example of
+the code of a function and its abstract syntax:
+\begin{verbatim}
+ let int = TNum TInt in
+ int fact Funct (ConsTyp int NilTyp) int (\fact ->
+ (int n) { RecOne int (\n ->
+ int f ; Decl int (\f ->
+ f = 1 ; Assign int f (EInt 1) (
+ while (1 < n) { While (ELt int (EInt 1) (EVar int n)) (Block (
+ f = n * f ; Assign int f (EMul int (EVar int n) (EVar int f)) (
+ n = n - 1 ; Assign int n (ESub int (EVar int n) (EInt 1))
+ } End))
+ return f ; (Return int (EVar int f))) End)))
+ } ; Empty)
+\end{verbatim}
+
+
+\subsection{The \texttt{printf} function}
+
+To give a valid type to the C function \texttt{printf}
+is one of the things that can be done with
+dependent types \cite{cayenne}. We have not defined \texttt{printf}
+in its full strength, partly because of the difficulties to compile
+it into JVM. But we use special cases of \texttt{printf} as
+statements, to be able to print values of different types.
+\begin{verbatim}
+ Printf : (A : Typ) -> Exp A -> Stm -> Stm ;
+\end{verbatim}
+
+
+
+
+
+\section{The concrete syntax of C}
+
+A concrete syntax, for a given abstract syntax,
+consists of \texttt{lincat} judgements
+\[
+\mbox{\texttt{lincat}} \; C \; \mbox{\texttt{=}} \; T
+\]
+defining the \empha{linearization types} $T$ of each category $C$,
+and \texttt{lin} judgements
+\[
+\mbox{\texttt{lin}} \; f \; \mbox{\texttt{=}} \; t
+\]
+defining the \empha{linearization functions} $t$ of each function $f$
+in the abstract syntax. The linearization functions are
+checked to be well-typed with respect the \texttt{lincat}
+definitions, and the syntax of GF forces them to be \empha{compositional}
+in the sense that the linearization of a complex tree is always
+a function of the linearizations of the subtrees. Schematically, if
+\[
+ f \colon A_{1} \rarrow \cdots \rarrow A_{n} \rarrow A
+\]
+then
+\[
+ \sugmap{f} \colon
+ \sugmap{A_{1}} \rarrow \cdots
+ \rarrow \sugmap{A_{n}} \rarrow \sugmap{A}
+\]
+and the linearization algorithm is simply
+\[
+ \sugmap{(f \; a_{1} \; \ldots \; a_{n})} \; = \;
+ \sugmap{f} \; \sugmap{a_{1}} \; \ldots \; \sugmap{a_{n}}
+\]
+using the \sugmap{} notation for both linearization types,
+linearization functions, and linearizations of trees.
+
+\label{compositionality}
+Because of compositionality, no case analysis on expressions
+is possible in linearization rules. The values of linearization
+therefore have to carry information on how they are used in
+different situations. Therefore linearization
+types are generally record types instead of just the string type.
+The simplest record type that is used in GF is
+\bece
+\verb6{s : Str}6
+\ence
+If the linearization type of a category is not explicitly
+given by a \texttt{lincat} judgement, this type is
+used by default.
+
+With \HOAS, a syntax tree can have variable-bindings in its
+constituents. The linearization of such a constituent
+is compositionally defined to be the record linearizing the body
+extended with fields for each of the variable symbols:
+\[
+\sugmap{(\lambda x_{0} \rarrow \cdots \rarrow \lambda x_{n} \rarrow b)}
+\;=\;
+\sugmap{b} *\!* \{\$_{0} = \sugmap{x_{0}} ; \ldots ; \$_{n} = \sugmap{x_{n}}\}
+\]
+Notice that the variable symbols can
+always be found because linearizable trees are in $\eta$-long normal form.
+Also notice that we are here using the
+\sugmap{} notation in yet another way, to denote the magic operation
+that converts variable symbols into strings.
+
+
+\subsection{Resource modules}
+
+Resource modules define auxiliary notions that can be
+used in concrete syntax. These notions include
+\empha{parameter types} defined by \texttt{param}
+judgements
+\[
+\mbox{\texttt{param}} \; P \; \mbox{\texttt{=}}
+ \; C_{1} \; \Gamma_{1} \; \mid \; \cdots \; \mid \;
+ \; C_{n} \; \Gamma_{n}
+\]
+and \empha{operations} defined by
+\texttt{oper} judgements
+\[
+\mbox{\texttt{oper}} \; f \; \mbox{\texttt{:}} \; T \; \mbox{\texttt{=}} \; t
+\]
+These judgements are
+similar to datatype and function definitions
+in functional programming, with the restriction
+that parameter types must be finite and operations
+may not be recursive. It is due to these restrictions that
+we can always derive a parsing algorithm from a set of
+linearization rules.
+
+In addition to types defined in \texttt{param} judgements,
+initial segments of natural numbers, \texttt{Ints n},
+can be used as parameter types. This is the most important parameter
+type we use in the syntax of C, to represent precedence.
+
+The following string operations are useful in almost
+all grammars. They are actually included in a GF \texttt{Prelude},
+but are here defined from scratch to make the code shown in
+the Appendices complete.
+\begin{verbatim}
+ oper
+ SS : Type = {s : Str} ;
+ ss : Str -> SS = \s -> {s = s} ;
+ cc2 : (_,_ : SS) -> SS = \x,y -> ss (x.s ++ y.s) ;
+ paren : Str -> Str = \str -> "(" ++ str ++ ")" ;
+\end{verbatim}
+
+
+
+\subsection{Precedence}
+
+We want to be able to recognize and generate one and the same expression with
+or without parentheses, depending on whether its precedence level
+is lower or higher than expected. For instance, a sum used as
+an operand of multiplication must be in parentheses. We
+capture this by defining a parameter type of
+precedence levels. Five levels are enough for the present
+fragment of C, so we use the enumeration type of
+integers from 0 to 4 to define the \empha{inherent precedence level}
+of an expression
+\begin{verbatim}
+ oper
+ Prec : PType = Predef.Ints 4 ;
+ PrecExp : Type = {s : Str ; p : Prec} ;
+\end{verbatim}
+in a resource module (see Appendix D), and
+\begin{verbatim}
+ lincat Exp = PrecExp ;
+\end{verbatim}
+in the concrete syntax of C itself.
+
+To build an expression that has a certain inherent precedence level,
+we use the operation
+\begin{verbatim}
+ mkPrec : Prec -> Str -> PrecExp = \p,s -> {s = s ; p = p} ;
+\end{verbatim}
+To use an expression of a given inherent level at some expected level,
+we define a function that says that, if the inherent level is lower
+than the expected level, parentheses are required.
+\begin{verbatim}
+ usePrec : PrecExp -> Prec -> Str = \x,p ->
+ ifThenStr
+ (less x.p p)
+ (paren x.s)
+ x.s ;
+\end{verbatim}
+(The code shown in Appendix D is at the moment more cumbersome,
+due to holes in the support for integer arithmetic in GF.)
+
+With the help of \texttt{mkPrec} and \texttt{usePrec},
+we can now define the main high-level operations that are
+used in the concrete syntax itself---constants (highest level),
+non-associative infixes, and left associative infixes:
+\begin{verbatim}
+ constant : Str -> PrecExp = mkPrec 4 ;
+
+ infixN : Prec -> Str -> (_,_ : PrecExp) -> PrecExp = \p,f,x,y ->
+ mkPrec p (usePrec x (nextPrec p) ++ f ++ usePrec y (nextPrec p)) ;
+ infixL : Prec -> Str -> (_,_ : PrecExp) -> PrecExp = \p,f,x,y ->
+ mkPrec p (usePrec x p ++ f ++ usePrec y (nextPrec p)) ;
+\end{verbatim}
+(The code in Appendix D adds to this an associativity parameter,
+which is redundant in GF, but which we use to instruct the Happy
+parser generator.)
+
+
+\subsection{Expressions}
+
+With the machinery introduced, the linearization rules of expressions
+are simple and concise:
+\begin{verbatim}
+ EVar _ x = constant x.s ;
+ EInt n = constant n.s ;
+ EFloat a b = constant (a.s ++ "." ++ b.s) ;
+ EMul _ = infixL 3 "*" ;
+ EAdd _ = infixL 2 "+" ;
+ ESub _ = infixL 2 "-" ;
+ ELt _ = infixN 1 "<" ;
+
+ EAppNil val f = constant (f.s ++ paren []) ;
+ EApp args val f exps = constant (f.s ++ paren exps.s) ;
+\end{verbatim}
+
+
+\subsection{Types}
+
+Types are expressed in two different ways:
+in declarations, we have \texttt{int} and \texttt{float}, but
+as formatting arguments to \texttt{printf}, we have
+\verb6"%d"6 and \verb6"%f"6, with the quotes belonging to the
+names. The simplest solution in GF is to linearize types
+to records with two string fields.
+\begin{verbatim}
+ lincat
+ Typ, NumTyp = {s,s2 : Str} ;
+ lin
+ TInt = {s = "int" ; s2 = "\"%d\""} ;
+ TFloat = {s = "float" ; s2 = "\"%f\""} ;
+\end{verbatim}
+
+
+\subsection{Statements}
+
+Statements in C have
+the simplest linearization type, \verb6{s : Str}6.
+We use a handful of auxiliary operations to regulate
+the use of semicolons on a high level.
+\begin{verbatim}
+ oper
+ continues : Str -> SS -> SS = \s,t -> ss (s ++ ";" ++ t.s) ;
+ continue : Str -> SS -> SS = \s,t -> ss (s ++ t.s) ;
+ statement : Str -> SS = \s -> ss (s ++ ";");
+\end{verbatim}
+As for declarations, which bind variables, we notice the
+projection \verb6.$06 to refer to the bound variable.
+Also notice the use of the \texttt{s2} field of the type
+in \texttt{printf}.
+\begin{verbatim}
+ lin
+ Decl typ cont = continues (typ.s ++ cont.$0) cont ;
+ Assign _ x exp = continues (x.s ++ "=" ++ exp.s) ;
+ While exp loop = continue ("while" ++ paren exp.s ++ loop.s) ;
+ IfElse exp t f = continue ("if" ++ paren exp.s ++ t.s ++ "else" ++ f.s) ;
+ Block stm = continue ("{" ++ stm.s ++ "}") ;
+ Printf t e = continues ("printf" ++ paren (t.s2 ++ "," ++ e.s)) ;
+ Return _ exp = statement ("return" ++ exp.s) ;
+ Returnv = statement "return" ;
+ End = ss [] ;
+\end{verbatim}
+
+
+\subsection{Functions and programs}
+
+The category \texttt{Rec} of recursive function bodies with continuations
+has three components: the function body itself, the parameter list, and
+the program that follows. We express this by a linearization type that
+contains three strings:
+\begin{verbatim}
+ lincat Rec = {s,s2,s3 : Str} ;
+\end{verbatim}
+The body construction rules accumulate the parameter list
+independently of the two other components:
+\begin{verbatim}
+ lin
+ RecOne typ stm prg = stm ** {
+ s2 = typ.s ++ stm.$0 ;
+ s3 = prg.s
+ } ;
+ RecCons typ _ body prg = {
+ s = body.s ;
+ s2 = typ.s ++ body.$0 ++ "," ++ body.s2 ;
+ s3 = prg.s
+ } ;
+\end{verbatim}
+The top-level program construction rules rearrange the three
+components into a linear structure:
+\begin{verbatim}
+ FunctNil val stm cont = ss (
+ val.s ++ cont.$0 ++ paren [] ++ "{" ++
+ stm.s ++ "}" ++ ";" ++ cont.s) ;
+ Funct args val rec = ss (
+ val.s ++ rec.$0 ++ paren rec.s2 ++ "{" ++
+ rec.s ++ "}" ++ ";" ++ rec.s3) ;
+\end{verbatim}
+
+
+%%\subsection{Lexing and unlexing}
+
+
+
+\section{The concrete syntax of JVM}
+
+JVM syntax is, linguistically, more straightforward than
+the syntax of C, and could even be defined by a regular
+expression. However, the JVM syntax that our compiler
+generates does not comprise full JVM, but only the fragment
+that corresponds to well-formed C programs.
+
+The JVM syntax we use is a symbolic variant of the Jasmin assembler
+\cite{jasmin}.
+The main deviation from Jasmin are
+variable addresses, as described in Section~\ref{postproc}.
+The other deviations have to do with spacing: the normal
+unlexer of GF puts spaces between constituents, whereas
+in JVM, type names are integral parts of instruction names.
+We indicate gluing uniformly by generating an underscore
+on the side from which the adjacent element is glued. Thus
+e.g.\ \verb6i _load6 becomes \verb6iload6.
+
+
+\subsection{Symbolic JVM}
+\label{postproc}
+
+What makes the translation from our abstract syntax to JVM
+tricky is that variables must be replaced by
+numeric addresses (relative to the frame pointer).
+Code generation must therefore maintain a symbol table that permits
+the lookup of variable addresses. As shown in the code
+in Appendix C, we do not treat symbol tables
+in linearization, but instead generated code in
+\empha{Symbolic JVM}---that is, JVM with symbolic addresses.
+Therefore we need a postprocessor that resolves the symbolic addresses,
+shown in Appendix E.
+
+To make the postprocessor straightforward,
+Symbolic JVM has special \texttt{alloc} instructions,
+which are not present in real JVM.
+Our compiler generates \texttt{alloc} instructions from
+variable declarations.
+The postprocessor comments out the \texttt{alloc} instructions, but we
+found it a good idea not to erase them completely, since they make the
+code more readable.
+
+The following example shows how the three representations (C, Symbolic JVM, JVM)
+look like for a piece of code.
+\begin{verbatim}
+ int x ; alloc i x ; x gets address 0
+ int y ; alloc i y ; y gets address 1
+ x = 5 ; ldc 5 ldc 5
+ i _store x istore 0
+ y = x ; i _load x iload 0
+ i _store y istore 1
+\end{verbatim}
+
+
+\subsection{Labels and jumps}
+
+A problem related to variable addresses
+is the generation of fresh labels for
+jumps. We solve this in linearization
+by maintaining a growing label suffix
+as a field of the linearization of statements into
+instructions. The problem remains that statements on the
+same nesting level, e.g.\ the two branches
+of an \texttt{if-else} statement can use the same
+labels. Making them unique must be
+added to the post-processing pass. This is
+always possible, because labels are nested in a
+disciplined way, and jumps can never go to remote labels.
+
+As it turned out laborious to thread the label counter
+to expressions, we decided to compile comparison
+expressions (\verb6x < y6) into function calls, and provide the functions in
+a run-time library. This will no more work for the
+conjunction (\verb6x && y6)
+and disjunction (\verb6x || y6), if we want to keep their semantics
+lazy, since function calls are strict in their arguments.
+
+
+
+
+
+\subsection{How to restore code generation by linearization}
+
+Since postprocessing is needed, we have not quite achieved
+the goal of code generation as linearization---if
+linearization is understood in the
+sense of GF. In GF, linearization can only depend
+on parameters from finite parameter sets. Since the size of
+a symbol table can grow indefinitely, it is not
+possible to encode linearization with updates to and
+lookups from a symbol table, as is usual in code generation.
+
+One attempt we made to achieve JVM linearization with
+numeric addresses was to alpha-convert abstract syntax syntax trees
+so that variables get indexed with integers that indicate their
+depths in the tree. This hack works in the present fragment of C
+because all variables need the same amount of memory (one word),
+but would break down if we added double-precision floats. Therefore
+we have used the less pure (from the point of view of
+code generation as linearization) method of
+symbolic addresses.
+
+It would certainly be possible to generate variable addresses
+directly in the syntax trees by using dependent types; but this
+would clutter the abstract
+syntax in a way that is hard to motivate when we are in
+the business of describing the syntax of C. The abstract syntax would
+have to, so to say, anticipate all demands of the compiler's
+target languages.
+
+
+\subsection{Problems with the JVM bytecode verifier}
+
+An inherent restriction for linearization in GF is compositionality.
+This prevents optimizations during linearization
+by clever instruction selection, elimination of superfluous
+labels and jumps, etc. One such optimization, the removal
+of unreachable code (i.e.\ code after a \texttt{return} instruction)
+is actually required by the JVM byte code verifier.
+The solution is, again, to perform this optimization in postprocessing.
+What we currently do, however, is to be careful and write
+C programs so that they always end with a return statement in the
+outermost block.
+
+Another problem related to \texttt{return} instructions is that
+both C and JVM programs have a designated \texttt{main} function.
+This function must have a certain type, which is different in C and
+JVM. In C, \texttt{main} returns an integer encoding what
+errors may have happend during execution. The JVM
+\texttt{main}, on the other hand, returns a \texttt{void}, i.e.\
+no value at all. A \texttt{main} program returning an
+integer therefore provokes a JVM bytecode verifier error.
+The postprocessor could take care of this; but currently
+we just write programs with void \texttt{return}s in the
+\texttt{main} functions.
+
+The parameter list of \texttt{main} is also different in C (empty list)
+and JVM (a string array \texttt{args}). We handle this problem
+with an \empha{ad hoc} postprocessor rule.
+
+Every function prelude in JVM must indicate the maximum space for
+local variables, and the maximum evaluation stack space (within
+the function's own stack frame). The locals limit is computed in
+linearization by maintaining a counter field. The stack limit
+is blindly set to 1000; it would be possible to set an
+accurate limit in the postprocessing phase.
+
+
+\section{Translation as linearization vs.\ transfer}
+
+Many of the problems we have encountered in code generation by
+linearization are familiar from
+translation systems for natural languages. For instance, to translate
+the English pronoun \eex{you} to German, you have to choose
+between \eex{du, ihr, Sie}; for Italian, there are four
+variants, and so on. To deal with this by linearization,
+all semantic distinctions made in any of the involved languages
+have to be present in the common abstract syntax. The usual solution to
+this problem is not a universal abstract syntax, but
+\empha{transfer}: translation does not just linearize
+the same syntax trees to another language, but uses
+a noncompositional function that translates
+trees of one language into trees of another.
+
+Using transfer in the
+back end is precisely what traditional compilers do.
+The transfer function in our case would be a noncompositional
+function from the abstract syntax of C to a different abstract
+syntax of JVM. The abstract syntax notation of GF permits
+definitions of functions, and the GF interpreter can be used
+for evaluating terms into normal form. Thus one could write
+the code generator just like in any functional language:
+by sending in an environment and a syntax tree, and
+returning a new environment with an instruction list:
+\begin{verbatim}
+ fun
+ transStm : Env -> Stm -> EnvInstr ;
+ def
+ transStm env (Decl typ cont) = ...
+ transStm env (While (ELt a b) stm cont) = ...
+ transStm env (While exp stm cont) = ...
+\end{verbatim}
+This would be cumbersome in practice, because
+GF does not have programming-language facilities
+like built-in lists and tuples, or monads. Moreover,
+the compiler could no longer be inverted into a decompiler,
+in the way true linearization can be inverted into a parser.
+
+
+
+\section{Parser generation}
+\label{bnfc}
+
+The whole GF part of the compiler (parser, type checker, Symbolic JVM
+generator) can be run in the GF interpreter.
+The weakest point of the resulting compiler, by current
+standards, is the parser. GF is a powerful grammar formalism, which
+needs a very general parser, taking care of ambiguities and other
+problems that are typical of natural languages but should be
+overcome in programming languages by design. The parser is moreover run
+in an interpreter that takes the grammar (in a suitably compiled form)
+as an argument.
+
+Fortunately, it is easy to replace the generic, interpreting GF parser
+by a compiled LR(1) parser. GF supports the translation of a concrete
+syntax into the \empha{Labelled BNF} (LBNF) format, %% \cite{lbnf},
+which in turn can be translated to parser generator code
+(Happy, Bison, or JavaCUP), by the BNF Converter \cite{bnfc}.
+The parser we are therefore using in the compiler is a Haskell
+program generated by Happy \cite{happy}.
+
+We regard parser generation
+as a first step towards developing GF into a
+production-quality compiler compiler. The efficiency of the parser
+is not the only relevant thing. Another advantage of an LR(1)
+parser generator is that it performs an analysis on the grammar
+finding conflicts, and provides a debugger. It may be
+difficult for a human to predict how a context-free grammar
+performs at parsing; it is much more difficult to do this for
+a grammar written in the abstract way that GF permits (cf.\ the
+example in Appendix B).
+
+The current version of the C grammar is ambiguous. GF's own
+parser returns all alternatives, whereas the parser generated by
+Happy rules out some of them by its normal conflict handling
+policy. This means, in practice, that extra brackets are
+sometimes needed to group staments together.
+
+
+\subsection{Another notation for \HOAS}
+
+Describing variable bindings with \HOAS\ is sometimes considered
+unintuitive. Let us consider the declaration rule of C (without
+type dependencies for simplicity):
+\begin{verbatim}
+ fun Decl : Typ -> (Var -> Stm) -> Stm ;
+ lin Decl typ stm = {s = typ.s ++ stm.$0 ++ ";" ++stm.s} ;
+\end{verbatim}
+Compare this with a corresponding LBNF rule (also using a continuation):
+\begin{verbatim}
+ Decl. Stm ::= Typ Ident ";" Stm ;
+\end{verbatim}
+To explain bindings attached to this rule, one can say, in natural language,
+that the identifier gets bound in the statement that follows.
+This means that syntax trees formed by this rule do not have
+the form \verb6(Decl typ x stm)6, but the form \verb6(Decl typ (\x -> stm))6.
+
+One way to formalize the informal binding rules stated beside
+BNF rules is to use \empha{profiles}: data structures describing
+the way in which the logical arguments of the syntax tree are
+represented by the linearized form. The declaration rule can be
+written using a profile notation as follows:
+\begin{verbatim}
+ Decl [1,(2)3]. Stm ::= Typ Ident ";" Stm ;
+\end{verbatim}
+When compiling GF grammars into LBNF, we were forced to enrich
+LBNF by a (more general) profile notation
+(cf.\ \cite{gf-jfp}, Section 3.3). This suggested at the same
+time that profiles could provide a user-fiendly notation for
+\HOAS\ avoiding the explicit use of lambda calculus.
+
+
+
+\section{Using the compiler}
+
+Our compiler is invoked, of course, by the command \texttt{gfcc}.
+It produces a JVM \texttt{.class} file, by running the
+Jasmin bytecode assembler \cite{jasmin} on a Jasmin (\texttt{.j})
+file:
+\begin{verbatim}
+ % gfcc factorial.c
+ > > wrote file factorial.j
+ Generated: factorial.class
+\end{verbatim}
+The Jasmin code is produced by a postprocessor, written in Haskell
+(Appendix E), from the Symbolic JVM format that is produced by
+linearization. The reasons why actual Jasmin is not generated
+by linearization are explained in Section~\ref{postproc} above.
+
+In addition to the batch compiler, GF provides an interactive
+syntax editor, in which C programs can be constructed by
+stepwise refinements, local changes, etc.\ \cite{khegai}. The user of the
+editor can work simultaneously on all languages involved.
+In our case, this means that changes can be done both to
+the C code and to the JVM code, and they are automatically
+carried over from one language to the other.
+\commentOut{
+A screen dump of the editor is shown in Fig~\ref{demo}.
+
+\begin{figure}
+\centerline{\psfig{figure=demo2.epsi}} \caption{
+GF editor session where an integer
+expression is expected to be given. The left window shows the
+abstract syntax tree, and the right window the evolving C and
+JVM code. The editor focus is shadowed, and the refinement alternatives
+are shown in a pop-up window.
+}
+\label{demo}
+\end{figure}
+}
+
+
+\section{Related work}
+
+The theoretical ideas behind our compiler experiment
+are familiar from various sources.
+Single-source language and compiler definitions
+can be built using attribute grammars \cite{knuth-attr}.
+The use of
+dependent types in combination with higher-order abstract syntax
+has been studied in various logical frameworks
+\cite{harper-honsell,magnusson-nordstr,twelf}.
+The addition of linearization rules to type-theoretical
+abstract syntax is studied in \cite{semBNF}, which also
+compares the method with attribute grammars.
+
+The idea of using a common abstract syntax for different
+languages was clearly exposed by Landin \cite{landin}. The view of
+code generation as linearization is a central aspect of
+the classic compiler textbook by Aho, Sethi, and Ullman
+\cite{aho-ullman}.
+The use of one and the same grammar both for parsing and linearization
+is a guiding principle of unification-based linguistic grammar
+formalisms \cite{pereira-shieber}. Interactive editors derived from
+grammars have been developed in various programming and proof
+assistants \cite{teitelbaum,metal,magnusson-nordstr}.
+
+Even though the different ideas are well-known,
+we have not seen them used together to construct a complete
+compiler. In our view, putting these ideas together is
+an attractive approach to compiling, since a compiler written
+in this way is completely declarative, hence concise,
+and therefore easy to modify and extend. What is more, if
+a new language construct is added, the GF type checker
+verifies that the addition is propagated to all components
+of the compiler. As the implementation is declarative,
+it is also self-documenting, since a human-readable
+grammar defines the syntax and static
+semantics that is actually used in the implementation.
+
+
+\section{Conclusion}
+
+The \texttt{gfcc} compiler translates a representative
+fragment of C to JVM, and growing the fragment
+does not necessarily pose any new kinds of problems.
+Using \HOAS\ and dependent types to describe the abstract
+syntax of C works fine, and defining the concrete syntax
+of C on top of this using GF linearization machinery is
+possible. To build a parser that is more efficient than
+GF's generic one, GF offers code generation for standard
+parser tools.
+
+One result of the experiment is the beginning of a
+library for dealing with typical programming language structures
+such as precedence. This library is exploited in the parser
+generator, which maps certain parameters used into GF grammars
+into precedence directives in labelled BNF grammars.
+
+The most serious difficulty with JVM code generation by linearization
+is to maintain a symbol table mapping variables to addresses.
+The solution we have chosen is to generate Symbolic JVM, that is,
+JVM with symbolic addresses, and translate the symbolic addresses to
+(relative) memory locations by a postprocessor.
+
+Since the postprocessor works uniformly for the whole Symbolic JVM,
+building a new compiler to generate JVM should now be
+possible by just writing GF grammars. The most immediate
+idea for developing GF as a compiler tool is to define
+a similar symbolic format for an intermediate language,
+which uses three-operand code and virtual registers.
+
+
+
+\bibliographystyle{plain}
+
+\bibliography{gf-bib}
+
+
+\newpage
+\subsection*{Appendix A: The abstract syntax}
+
+\small
+\begin{verbatim}
+abstract Imper = PredefAbs ** {
+ cat
+ Program ;
+ Rec ListTyp ;
+ Typ ;
+ NumTyp ;
+ ListTyp ;
+ Fun ListTyp Typ ;
+ Body ListTyp ;
+ Stm ;
+ Exp Typ ;
+ Var Typ ;
+ ListExp ListTyp ;
+
+ fun
+ Empty : Program ;
+ Funct : (AS : ListTyp) -> (V : Typ) -> (Fun AS V -> Rec AS) -> Program ;
+ FunctNil : (V : Typ) -> Stm -> (Fun NilTyp V -> Program) -> Program ;
+ RecOne : (A : Typ) -> (Var A -> Stm) -> Program -> Rec (ConsTyp A NilTyp) ;
+ RecCons : (A : Typ) -> (AS : ListTyp) ->
+ (Var A -> Rec AS) -> Program -> Rec (ConsTyp A AS) ;
+
+ Decl : (A : Typ) -> (Var A -> Stm) -> Stm ;
+ Assign : (A : Typ) -> Var A -> Exp A -> Stm -> Stm ;
+ While : Exp (TNum TInt) -> Stm -> Stm -> Stm ;
+ IfElse : Exp (TNum TInt) -> Stm -> Stm -> Stm -> Stm ;
+ Block : Stm -> Stm -> Stm ;
+ Printf : (A : Typ) -> Exp A -> Stm -> Stm ;
+ Return : (A : Typ) -> Exp A -> Stm ;
+ Returnv : Stm ;
+ End : Stm ;
+
+ EVar : (A : Typ) -> Var A -> Exp A ;
+ EInt : Int -> Exp (TNum TInt) ;
+ EFloat : Int -> Int -> Exp (TNum TFloat) ;
+ ELt : (n : NumTyp) -> let Ex = Exp (TNum n) in Ex -> Ex -> Exp (TNum TInt) ;
+ EAdd, EMul, ESub : (n : NumTyp) -> let Ex = Exp (TNum n) in Ex -> Ex -> Ex ;
+ EAppNil : (V : Typ) -> Fun NilTyp V -> Exp V ;
+ EApp : (AS : ListTyp) -> (V : Typ) -> Fun AS V -> ListExp AS -> Exp V ;
+
+ TNum : NumTyp -> Typ ;
+ TInt, TFloat : NumTyp ;
+ NilTyp : ListTyp ;
+ ConsTyp : Typ -> ListTyp -> ListTyp ;
+ OneExp : (A : Typ) -> Exp A -> ListExp (ConsTyp A NilTyp) ;
+ ConsExp : (A : Typ) -> (AS : ListTyp) ->
+ Exp A -> ListExp AS -> ListExp (ConsTyp A AS) ;
+}
+\end{verbatim}
+\normalsize
+\newpage
+
+
+\subsection*{Appendix B: The concrete syntax of C}
+
+\small
+\begin{verbatim}
+concrete ImperC of Imper = open ResImper in {
+ flags lexer=codevars ; unlexer=code ; startcat=Program ;
+
+ lincat
+ Exp = PrecExp ;
+ Typ, NumTyp = {s,s2 : Str} ;
+ Rec = {s,s2,s3 : Str} ;
+ lin
+ Empty = ss [] ;
+ FunctNil val stm cont = ss (
+ val.s ++ cont.$0 ++ paren [] ++ "{" ++ stm.s ++ "}" ++ ";" ++ cont.s) ;
+ Funct args val rec = ss (
+ val.s ++ rec.$0 ++ paren rec.s2 ++ "{" ++ rec.s ++ "}" ++ ";" ++ rec.s3) ;
+ RecOne typ stm prg = stm ** {
+ s2 = typ.s ++ stm.$0 ;
+ s3 = prg.s
+ } ;
+ RecCons typ _ body prg = {
+ s = body.s ;
+ s2 = typ.s ++ body.$0 ++ "," ++ body.s2 ;
+ s3 = prg.s
+ } ;
+
+ Decl typ cont = continues (typ.s ++ cont.$0) cont ;
+ Assign _ x exp = continues (x.s ++ "=" ++ exp.s) ;
+ While exp loop = continue ("while" ++ paren exp.s ++ loop.s) ;
+ IfElse exp t f = continue ("if" ++ paren exp.s ++ t.s ++ "else" ++ f.s) ;
+ Block stm = continue ("{" ++ stm.s ++ "}") ;
+ Printf t e = continues ("printf" ++ paren (t.s2 ++ "," ++ e.s)) ;
+ Return _ exp = statement ("return" ++ exp.s) ;
+ Returnv = statement "return" ;
+ End = ss [] ;
+
+ EVar _ x = constant x.s ;
+ EInt n = constant n.s ;
+ EFloat a b = constant (a.s ++ "." ++ b.s) ;
+ EMul _ = infixL 3 "*" ;
+ EAdd _ = infixL 2 "+" ;
+ ESub _ = infixL 2 "-" ;
+ ELt _ = infixN 1 "<" ;
+ EAppNil val f = constant (f.s ++ paren []) ;
+ EApp args val f exps = constant (f.s ++ paren exps.s) ;
+
+ TNum t = t ;
+ TInt = {s = "int" ; s2 = "\"%d\""} ; TFloat = {s = "float" ; s2 = "\"%f\""} ;
+ NilTyp = ss [] ; ConsTyp = cc2 ;
+ OneExp _ e = e ; ConsExp _ _ e es = ss (e.s ++ "," ++ es.s) ;
+}
+\end{verbatim}
+\normalsize
+\newpage
+
+
+\subsection*{Appendix C: The concrete syntax of JVM}
+
+\small
+\begin{verbatim}
+concrete ImperJVM of Imper = open ResImper in {
+ flags lexer=codevars ; unlexer=code ; startcat=Program ;
+
+ lincat
+ Rec = {s,s2,s3 : Str} ; -- code, storage for locals, continuation
+ Stm = Instr ;
+
+ lin
+ Empty = ss [] ;
+ FunctNil val stm cont = ss (
+ ".method" ++ "public" ++ "static" ++ cont.$0 ++ paren [] ++ val.s ++ ";" ++
+ ".limit" ++ "locals" ++ stm.s2 ++ ";" ++
+ ".limit" ++ "stack" ++ "1000" ++ ";" ++
+ stm.s ++
+ ".end" ++ "method" ++ ";" ++ ";" ++
+ cont.s
+ ) ;
+ Funct args val rec = ss (
+ ".method" ++ "public" ++ "static" ++ rec.$0 ++ paren args.s ++ val.s ++ ";" ++
+ ".limit" ++ "locals" ++ rec.s2 ++ ";" ++
+ ".limit" ++ "stack" ++ "1000" ++ ";" ++
+ rec.s ++
+ ".end" ++ "method" ++ ";" ++ ";" ++
+ rec.s3
+ ) ;
+
+ RecOne typ stm prg = instrb typ.s (
+ ["alloc"] ++ typ.s ++ stm.$0 ++ stm.s2) {s = stm.s ; s2 = stm.s2 ; s3 = prg.s};
+
+ RecCons typ _ body prg = instrb typ.s (
+ ["alloc"] ++ typ.s ++ body.$0 ++ body.s2)
+ {s = body.s ; s2 = body.s2 ; s3 = prg.s};
+
+ Decl typ cont = instrb typ.s (
+ ["alloc"] ++ typ.s ++ cont.$0
+ ) cont ;
+ Assign t x exp = instrc (exp.s ++ t.s ++ "_store" ++ x.s) ;
+ While exp loop =
+ let
+ test = "TEST_" ++ loop.s2 ;
+ end = "END_" ++ loop.s2
+ in instrl (
+ "label" ++ test ++ ";" ++
+ exp.s ++
+ "ifeq" ++ end ++ ";" ++
+ loop.s ++
+ "goto" ++ test ++ ";" ++
+ "label" ++ end
+ ) ;
+ IfElse exp t f =
+ let
+ false = "FALSE_" ++ t.s2 ++ f.s2 ;
+ true = "TRUE_" ++ t.s2 ++ f.s2
+ in instrl (
+ exp.s ++
+ "ifeq" ++ false ++ ";" ++
+ t.s ++
+ "goto" ++ true ++ ";" ++
+ "label" ++ false ++ ";" ++
+ f.s ++
+ "label" ++ true
+ ) ;
+ Block stm = instrc stm.s ;
+ Printf t e = instrc (e.s ++ "invokestatic" ++ t.s ++ "runtime/printf" ++ paren (t.s) ++ "v") ;
+ Return t exp = instr (exp.s ++ t.s ++ "_return") ;
+ Returnv = instr "return" ;
+ End = ss [] ** {s2,s3 = []} ;
+
+ EVar t x = instr (t.s ++ "_load" ++ x.s) ;
+ EInt n = instr ("ldc" ++ n.s) ;
+ EFloat a b = instr ("ldc" ++ a.s ++ "." ++ b.s) ;
+ EAdd = binopt "_add" ;
+ ESub = binopt "_sub" ;
+ EMul = binopt "_mul" ;
+ ELt t = binop ("invokestatic" ++ t.s ++ "runtime/lt" ++ paren (t.s ++ t.s) ++ "i") ;
+ EAppNil val f = instr (
+ "invokestatic" ++ f.s ++ paren [] ++ val.s
+ ) ;
+ EApp args val f exps = instr (
+ exps.s ++
+ "invokestatic" ++ f.s ++ paren args.s ++ val.s
+ ) ;
+
+ TNum t = t ;
+ TInt = ss "i" ;
+ TFloat = ss "f" ;
+ NilTyp = ss [] ;
+ ConsTyp = cc2 ;
+ OneExp _ e = e ;
+ ConsExp _ _ = cc2 ;
+}
+\end{verbatim}
+\normalsize
+\newpage
+
+\subsection*{Appendix D: Auxiliary operations for concrete syntax}
+
+\small
+\begin{verbatim}
+resource ResImper = open Predef in {
+
+ -- precedence
+
+ param PAssoc = PN | PL | PR ;
+
+ oper
+ Prec : PType = Predef.Ints 4 ;
+ PrecExp : Type = {s : Str ; p : Prec ; a : PAssoc} ;
+
+ mkPrec : Prec -> PAssoc -> Str -> PrecExp = \p,a,f ->
+ {s = f ; p = p ; a = a} ;
+
+ usePrec : PrecExp -> Prec -> Str = \x,p ->
+ case <<x.p,p> : Prec * Prec> of {
+ <3,4> | <2,3> | <2,4> => paren x.s ;
+ <1,1> | <1,0> | <0,0> => x.s ;
+ <1,_> | <0,_> => paren x.s ;
+ _ => x.s
+ } ;
+
+ constant : Str -> PrecExp = mkPrec 4 PN ;
+
+ infixN : Prec -> Str -> (_,_ : PrecExp) -> PrecExp = \p,f,x,y ->
+ mkPrec p PN (usePrec x (nextPrec p) ++ f ++ usePrec y (nextPrec p)) ;
+ infixL : Prec -> Str -> (_,_ : PrecExp) -> PrecExp = \p,f,x,y ->
+ mkPrec p PL (usePrec x p ++ f ++ usePrec y (nextPrec p)) ;
+ infixR : Prec -> Str -> (_,_ : PrecExp) -> PrecExp = \p,f,x,y ->
+ mkPrec p PR (usePrec x (nextPrec p) ++ f ++ usePrec y p) ;
+
+ nextPrec : Prec -> Prec = \p -> case <p : Prec> of {
+ 4 => 4 ;
+ n => Predef.plus n 1
+ } ;
+
+ -- string operations
+
+ SS : Type = {s : Str} ;
+ ss : Str -> SS = \s -> {s = s} ;
+ cc2 : (_,_ : SS) -> SS = \x,y -> ss (x.s ++ y.s) ;
+ paren : Str -> Str = \str -> "(" ++ str ++ ")" ;
+
+ continues : Str -> SS -> SS = \s,t -> ss (s ++ ";" ++ t.s) ;
+ continue : Str -> SS -> SS = \s,t -> ss (s ++ t.s) ;
+ statement : Str -> SS = \s -> ss (s ++ ";");
+
+ -- operations for JVM
+
+ Instr : Type = {s,s2,s3 : Str} ; -- code, variables, labels
+ instr : Str -> Instr = \s ->
+ statement s ** {s2,s3 = []} ;
+ instrc : Str -> Instr -> Instr = \s,i ->
+ ss (s ++ ";" ++ i.s) ** {s2 = i.s2 ; s3 = i.s3} ;
+ instrl : Str -> Instr -> Instr = \s,i ->
+ ss (s ++ ";" ++ i.s) ** {s2 = i.s2 ; s3 = "L" ++ i.s3} ;
+ instrb : Str -> Str -> Instr -> Instr = \v,s,i ->
+ ss (s ++ ";" ++ i.s) ** {s2 = v ++ i.s2 ; s3 = i.s3} ;
+ binop : Str -> SS -> SS -> SS = \op, x, y ->
+ ss (x.s ++ y.s ++ op ++ ";") ;
+ binopt : Str -> SS -> SS -> SS -> SS = \op, t ->
+ binop (t.s ++ op) ;
+}
+\end{verbatim}
+\normalsize
+\newpage
+
+
+\subsection*{Appendix E: Translation of Symbolic JVM to Jasmin}
+
+\small
+\begin{verbatim}
+module Main where
+import Char
+import System
+
+main :: IO ()
+main = do
+ jvm:src:_ <- getArgs
+ s <- readFile jvm
+ let cls = takeWhile (/='.') src
+ let obj = cls ++ ".j"
+ writeFile obj $ boilerplate cls
+ appendFile obj $ mkJVM cls s
+ putStrLn $ "wrote file " ++ obj
+
+mkJVM :: String -> String -> String
+mkJVM cls = unlines . reverse . fst . foldl trans ([],([],0)) . lines where
+ trans (code,(env,v)) s = case words s of
+ ".method":p:s:f:ns
+ | f == "main" -> (".method public static main([Ljava/lang/String;)V":code,([],1))
+ | otherwise -> (unwords [".method",p,s, f ++ typesig ns] : code,([],0))
+ "alloc":t:x:_ -> (("; " ++ s):code, ((x,v):env, v + size t))
+ ".limit":"locals":ns -> chCode (".limit locals " ++ show (length ns))
+ "invokestatic":t:f:ns | take 8 f == "runtime/" ->
+ chCode $ "invokestatic " ++ "runtime/" ++ t ++ drop 8 f ++ typesig ns
+ "invokestatic":f:ns -> chCode $ "invokestatic " ++ cls ++ "/" ++ f ++ typesig ns
+ "alloc":ns -> chCode $ "; " ++ s
+ t:('_':instr):[";"] -> chCode $ t ++ instr
+ t:('_':instr):x:_ -> chCode $ t ++ instr ++ " " ++ look x
+ "goto":ns -> chCode $ "goto " ++ label ns
+ "ifeq":ns -> chCode $ "ifeq " ++ label ns
+ "label":ns -> chCode $ label ns ++ ":"
+ ";":[] -> chCode ""
+ _ -> chCode s
+ where
+ chCode c = (c:code,(env,v))
+ look x = maybe (error $ x ++ show env) show $ lookup x env
+ typesig = init . map toUpper . concat
+ label = init . concat
+ size t = case t of
+ "d" -> 2
+ _ -> 1
+
+boilerplate :: String -> String
+boilerplate cls = unlines [
+ ".class public " ++ cls, ".super java/lang/Object",
+ ".method public <init>()V","aload_0",
+ "invokenonvirtual java/lang/Object/<init>()V","return",
+ ".end method"]
+\end{verbatim}
+\normalsize
+\newpage
+
+
+
+
+
+
+\end{document}
+
+\begin{verbatim}
+\end{verbatim}
+
diff --git a/examples/gfcc/factorial.c b/examples/gfcc/factorial.c
new file mode 100644
index 000000000..76fee32d0
--- /dev/null
+++ b/examples/gfcc/factorial.c
@@ -0,0 +1,38 @@
+int fact (int n) {
+ int f ;
+ f = 1 ;
+ {
+ while (1 < n) {
+ f = n * f ;
+ n = n - 1 ;
+ }
+ }
+ return f ;
+} ;
+
+int factr (int n) {
+ int f ;
+ {
+ if (n < 2) {
+ f = 1 ;
+ }
+ else {
+ f = n * factr (n-1) ;
+ }
+ }
+ return f ;
+} ;
+
+int main () {
+ int n ;
+ n = 1 ;
+ {
+ while (n < 11) {
+ printf("%d",fact(n)) ;
+ printf("%d",factr(n)) ;
+ n = n+1 ;
+ }
+ }
+ return ;
+} ;
+
diff --git a/examples/gfcc/fibonacci.c b/examples/gfcc/fibonacci.c
new file mode 100644
index 000000000..80e8a0d5c
--- /dev/null
+++ b/examples/gfcc/fibonacci.c
@@ -0,0 +1,18 @@
+int mx () {
+ return 5000000 ;
+} ;
+
+int main () {
+ int lo ; int hi ;
+ lo = 1 ;
+ hi = lo ;
+ printf("%d",lo) ;
+ {
+ while (hi < mx()) {
+ printf("%d",hi) ;
+ hi = lo + hi ;
+ lo = hi - lo ;
+ }
+ }
+ return ;
+} ;
diff --git a/examples/gfcc/runtime.j b/examples/gfcc/runtime.j
new file mode 100644
index 000000000..88db0b9b8
--- /dev/null
+++ b/examples/gfcc/runtime.j
@@ -0,0 +1,55 @@
+.class public runtime
+.super java/lang/Object
+;
+; standard initializer
+.method public <init>()V
+ aload_0
+ invokenonvirtual java/lang/Object/<init>()V
+ return
+.end method
+
+.method public static ilt(II)I
+.limit locals 2
+.limit stack 2
+ iload_0
+ iload_1
+ if_icmpge Label0
+ iconst_1
+ ireturn
+ Label0:
+ iconst_0
+ ireturn
+.end method
+
+.method public static flt(FF)I
+.limit locals 2
+.limit stack 2
+ fload_0
+ fload_1
+ fcmpl
+ ifge Label0
+ iconst_1
+ ireturn
+ Label0:
+ iconst_0
+ ireturn
+.end method
+
+.method public static iprintf(I)V
+.limit locals 1
+.limit stack 1000
+ getstatic java/lang/System/out Ljava/io/PrintStream;
+ iload_0
+ invokevirtual java/io/PrintStream/println(I)V
+ return
+.end method
+
+.method public static fprintf(F)V
+.limit locals 1
+.limit stack 1000
+ getstatic java/lang/System/out Ljava/io/PrintStream;
+ fload_0
+ invokevirtual java/io/PrintStream/println(F)V
+ return
+.end method
+