diff options
Diffstat (limited to 'examples/gfcc')
| -rw-r--r-- | examples/gfcc/CleanJVM.hs | 59 | ||||
| -rw-r--r-- | examples/gfcc/Imper.gf | 53 | ||||
| -rw-r--r-- | examples/gfcc/ImperC.gf | 56 | ||||
| -rw-r--r-- | examples/gfcc/ImperJVM.gf | 93 | ||||
| -rw-r--r-- | examples/gfcc/Makefile | 12 | ||||
| -rw-r--r-- | examples/gfcc/README | 21 | ||||
| -rw-r--r-- | examples/gfcc/ResImper.gf | 85 | ||||
| -rw-r--r-- | examples/gfcc/abs.c | 20 | ||||
| -rw-r--r-- | examples/gfcc/complin.bbl | 96 | ||||
| -rw-r--r-- | examples/gfcc/complin.tex | 1488 | ||||
| -rw-r--r-- | examples/gfcc/factorial.c | 38 | ||||
| -rw-r--r-- | examples/gfcc/fibonacci.c | 18 | ||||
| -rw-r--r-- | examples/gfcc/gfcc | 2 | ||||
| -rw-r--r-- | examples/gfcc/runtime.j | 55 |
14 files changed, 0 insertions, 2096 deletions
diff --git a/examples/gfcc/CleanJVM.hs b/examples/gfcc/CleanJVM.hs deleted file mode 100644 index 7c4c1bb54..000000000 --- a/examples/gfcc/CleanJVM.hs +++ /dev/null @@ -1,59 +0,0 @@ -module Main where - -import Char -import System - ---- translation from Symbolic JVM to real Jasmin code - -main :: IO () -main = do - jvm:src:_ <- getArgs - s <- readFile jvm - let cls = takeWhile (/='.') src - let obj = cls ++ ".j" - writeFile obj $ boilerplate cls - appendFile obj $ mkJVM cls s - putStrLn $ "wrote file " ++ obj - system $ "java -jar jasmin.jar " ++ obj - return () - -mkJVM :: String -> String -> String -mkJVM cls = unlines . reverse . fst . foldl trans ([],([],0)) . liness where - trans (code,(env,v)) s = case words s of - ".method":p:s:f:ns - | f == "main" -> - (".method public static main([Ljava/lang/String;)V":code,([],1)) - | otherwise -> - (unwords [".method",p,s, f ++ glue ns] : code,([],0)) - "alloc":t:x:_ -> (("; " ++ s):code, ((x,v):env, v + size t)) - ".limit":"locals":ns -> chCode (".limit locals " ++ show (length ns + 1)) - "runtime":f:ns -> chCode $ "invokestatic " ++ "runtime/" ++ f ++ glue ns - "static":f:ns -> chCode $ "invokestatic " ++ cls ++ "/" ++ f ++ glue ns - "alloc":ns -> chCode $ "; " ++ s - ins:x:_ | symb ins -> chCode $ ins ++ " " ++ look x - "goto":ns -> chCode $ "goto " ++ glue ns - "ifeq":ns -> chCode $ "ifeq " ++ glue ns - "label":ns -> chCode $ glue ns ++ ":" - ";":[] -> chCode "" - _ -> chCode s - where - chCode c = (c:code,(env,v)) - look x = maybe (error $ x ++ show env) show $ lookup x env - glue = concat --init . concat - symb = flip elem ["load","store"] . tail - size t = case t of - "d" -> 2 - _ -> 1 - liness = lines . map (\c -> if c==';' then '\n' else c) - - -boilerplate :: String -> String -boilerplate cls = unlines [ - ".class public " ++ cls, - ".super java/lang/Object", - ".method public <init>()V", - "aload_0", - "invokenonvirtual java/lang/Object/<init>()V", - "return", - ".end method" - ] diff --git a/examples/gfcc/Imper.gf b/examples/gfcc/Imper.gf deleted file mode 100644 index a207759b7..000000000 --- a/examples/gfcc/Imper.gf +++ /dev/null @@ -1,53 +0,0 @@ -abstract Imper = { - - flags startcat = Program ; - - cat - Program ; - Rec ListTyp ; - Typ ; - IsNum Typ ; - ListTyp ; - Fun ListTyp Typ ; - Stm ; - Exp Typ ; - Var Typ ; - ListExp ListTyp ; - - fun - Empty : Program ; - Funct : (AS : ListTyp) -> (V : Typ) -> - (Fun AS V -> Rec AS) -> Program ; - FunctNil : (V : Typ) -> - Stm -> (Fun NilTyp V -> Program) -> Program ; - RecOne : (A : Typ) -> (Var A -> Stm) -> Program -> Rec (ConsTyp A NilTyp) ; - RecCons : (A : Typ) -> (AS : ListTyp) -> - (Var A -> Rec AS) -> Program -> Rec (ConsTyp A AS) ; - - Decl : (A : Typ) -> (Var A -> Stm) -> Stm ; - Assign : (A : Typ) -> Var A -> Exp A -> Stm -> Stm ; - While : Exp TInt -> Stm -> Stm -> Stm ; - IfElse : Exp TInt -> Stm -> Stm -> Stm -> Stm ; - Block : Stm -> Stm -> Stm ; - Printf : (A : Typ) -> Exp A -> Stm -> Stm ; - Return : (A : Typ) -> Exp A -> Stm ; - Returnv : Stm ; - End : Stm ; - - EVar : (A : Typ) -> Var A -> Exp A ; - EInt : Int -> Exp TInt ; - EFloat : Int -> Int -> Exp TFloat ; - ELt : (n : Typ) -> IsNum n -> Exp n -> Exp n -> Exp TInt ; - EAdd, EMul, ESub : (n : Typ) -> IsNum n -> Exp n -> Exp n -> Exp n ; - EAppNil : (V : Typ) -> Fun NilTyp V -> Exp V ; - EApp : (AS : ListTyp) -> (V : Typ) -> Fun AS V -> ListExp AS -> Exp V ; - - TInt, TFloat : Typ ; - isNumInt : IsNum TInt ; isNumFloat : IsNum TFloat ; - NilTyp : ListTyp ; - ConsTyp : Typ -> ListTyp -> ListTyp ; - - OneExp : (A : Typ) -> Exp A -> ListExp (ConsTyp A NilTyp) ; - ConsExp : (A : Typ) -> (AS : ListTyp) -> - Exp A -> ListExp AS -> ListExp (ConsTyp A AS) ; -} diff --git a/examples/gfcc/ImperC.gf b/examples/gfcc/ImperC.gf deleted file mode 100644 index 57f9d9f9e..000000000 --- a/examples/gfcc/ImperC.gf +++ /dev/null @@ -1,56 +0,0 @@ ---# -path=.:../../lib/prelude -concrete ImperC of Imper = open ResImper in { - flags lexer=codevars ; unlexer=code ; startcat=Program ; - - lincat - Exp = PrecExp ; - Typ = {s,s2 : Str} ; - Rec = {s,s2,s3 : Str} ; - - lin - Empty = ss [] ; - FunctNil val stm cont = ss ( - val.s ++ cont.$0 ++ paren [] ++ "{" ++ - stm.s ++ "}" ++ ";" ++ cont.s) ; - Funct args val rec = ss ( - val.s ++ rec.$0 ++ paren rec.s2 ++ "{" ++ - rec.s ++ "}" ++ ";" ++ rec.s3) ; - - RecOne typ stm prg = stm ** { - s2 = typ.s ++ stm.$0 ; - s3 = prg.s - } ; - RecCons typ _ body prg = { - s = body.s ; - s2 = typ.s ++ body.$0 ++ "," ++ body.s2 ; - s3 = prg.s - } ; - - Decl typ cont = continues (typ.s ++ cont.$0) cont ; - Assign _ x exp = continues (x.s ++ "=" ++ exp.s) ; - While exp loop = continue ("while" ++ paren exp.s ++ loop.s) ; - IfElse exp t f = continue ("if" ++ paren exp.s ++ t.s ++ "else" ++ f.s) ; - Block stm = continue ("{" ++ stm.s ++ "}") ; - Printf t e = continues ("printf" ++ paren (t.s2 ++ "," ++ e.s)) ; - Return _ exp = statement ("return" ++ exp.s) ; - Returnv = statement "return" ; - End = ss [] ; - - EVar _ x = constant x.s ; - EInt n = constant n.s ; - EFloat a b = constant (a.s ++ "." ++ b.s) ; - EMul _ _ = infixL 3 "*" ; - EAdd _ _ = infixL 2 "+" ; - ESub _ _ = infixL 2 "-" ; - ELt _ _ = infixN 1 "<" ; - - EAppNil val f = constant (f.s ++ paren []) ; - EApp args val f exps = constant (f.s ++ paren exps.s) ; - - TInt = {s = "int" ; s2 = "\"%d\""} ; - TFloat = {s = "float" ; s2 = "\"%f\""} ; - NilTyp = ss [] ; - ConsTyp = cc2 ; - OneExp _ e = e ; - ConsExp _ _ e es = ss (e.s ++ "," ++ es.s) ; -} diff --git a/examples/gfcc/ImperJVM.gf b/examples/gfcc/ImperJVM.gf deleted file mode 100644 index d5d390727..000000000 --- a/examples/gfcc/ImperJVM.gf +++ /dev/null @@ -1,93 +0,0 @@ ---# -path=.:../../lib/prelude -concrete ImperJVM of Imper = open ResImper in { - -flags lexer=codevars ; unlexer=code ; startcat=Stm ; - - lincat - Rec = {s,s2,s3 : Str} ; -- code, storage for locals, continuation - Typ = {s : Str ; t : TypIdent} ; - Stm = Instr ; - - lin - Empty = ss [] ; - FunctNil val stm cont = ss ( - ".method" ++ "public" ++ "static" ++ cont.$0 ++ paren [] ++ val.s ++ ";" ++ - ".limit" ++ "locals" ++ stm.s2 ++ ";" ++ - ".limit" ++ "stack" ++ "1000" ++ ";" ++ - stm.s ++ - ".end" ++ "method" ++ ";" ++ ";" ++ - cont.s - ) ; - Funct args val rec = ss ( - ".method" ++ "public" ++ "static" ++ rec.$0 ++ paren args.s ++ val.s ++ ";" ++ - ".limit" ++ "locals" ++ rec.s2 ++ ";" ++ - ".limit" ++ "stack" ++ "1000" ++ ";" ++ - rec.s ++ - ".end" ++ "method" ++ ";" ++ ";" ++ - rec.s3 - ) ; - - RecOne typ stm prg = instrb typ.s ( - ["alloc"] ++ typ.s ++ stm.$0 ++ stm.s2) {s = stm.s ; s2 = stm.s2 ; s3 = prg.s}; - - RecCons typ _ body prg = instrb typ.s ( - ["alloc"] ++ typ.s ++ body.$0 ++ body.s2) - {s = body.s ; s2 = body.s2 ; s3 = prg.s}; - - Decl typ cont = instrb typ.s ( - ["alloc"] ++ typ.s ++ cont.$0 - ) cont ; - Assign t x exp = instrc (exp.s ++ typInstr "store" t.t ++ x.s) ; - While exp loop = - let - test = "TEST_" ++ loop.s2 ; - end = "END_" ++ loop.s2 - in instrl ( - "label" ++ test ++ ";" ++ - exp.s ++ - "ifeq" ++ end ++ ";" ++ - loop.s ++ - "goto" ++ test ++ ";" ++ - "label" ++ end - ) ; - IfElse exp t f = - let - false = "FALSE_" ++ t.s2 ++ f.s2 ; - true = "TRUE_" ++ t.s2 ++ f.s2 - in instrl ( - exp.s ++ - "ifeq" ++ false ++ ";" ++ - t.s ++ - "goto" ++ true ++ ";" ++ - "label" ++ false ++ ";" ++ - f.s ++ - "label" ++ true - ) ; - Block stm = instrc stm.s ; - Printf t e = instrc (e.s ++ "runtime" ++ typInstr "printf" t.t ++ paren (t.s) ++ "V") ; - Return t exp = instr (exp.s ++ typInstr "return" t.t) ; - Returnv = instr "return" ; - End = ss [] ** {s2,s3 = []} ; - - EVar t x = instr (typInstr "load" t.t ++ x.s) ; - EInt n = instr ("ldc" ++ n.s) ; - EFloat a b = instr ("ldc" ++ a.s ++ "." ++ b.s) ; - EAdd t _ = binopt "add" t.t ; - ESub t _ = binopt "sub" t.t ; - EMul t _ = binopt "mul" t.t ; - ELt t _ = binop ("runtime" ++ typInstr "lt" t.t ++ paren (t.s ++ t.s) ++ "I") ; - EAppNil val f = instr ( - "static" ++ f.s ++ paren [] ++ val.s - ) ; - EApp args val f exps = instr ( - exps.s ++ - "static" ++ f.s ++ paren args.s ++ val.s - ) ; - - TInt = {s = "I" ; t = TIInt} ; - TFloat = {s = "F" ; t = TIFloat} ; - NilTyp = ss [] ; - ConsTyp = cc2 ; - OneExp _ e = e ; - ConsExp _ _ = cc2 ; -} diff --git a/examples/gfcc/Makefile b/examples/gfcc/Makefile deleted file mode 100644 index d260049fa..000000000 --- a/examples/gfcc/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -all: pgf runtime - -pgf: - gf -make ImperC.gf ImperJVM.gf - -runtime: - java -jar jasmin.jar runtime.j - -doc: - pdflatex complin.tex - pdflatex complin.tex - diff --git a/examples/gfcc/README b/examples/gfcc/README deleted file mode 100644 index 72dca27df..000000000 --- a/examples/gfcc/README +++ /dev/null @@ -1,21 +0,0 @@ -gfcc = GF C Compiler -(c) Aarne Ranta 2004-2010 under LGPL - -You need: -- gf -- jasmin assembler on your path as jasmin.jar; obtain from - http://sourceforge.net/projects/jasmin/files/ -- ghc -- javac -- java - - -Example: -- make -- ./gfcc fibonacci.c -- java -cp . fibonacci - - -More information: -- make doc -- open complin.pdf diff --git a/examples/gfcc/ResImper.gf b/examples/gfcc/ResImper.gf deleted file mode 100644 index 57cdf9434..000000000 --- a/examples/gfcc/ResImper.gf +++ /dev/null @@ -1,85 +0,0 @@ -resource ResImper = open Predef in { - - -- precedence - - param PAssoc = PN | PL | PR ; - - oper - Prec : PType = Predef.Ints 4 ; - PrecExp : Type = {s : Str ; p : Prec ; a : PAssoc} ; - - mkPrec : Prec -> PAssoc -> Str -> PrecExp = \p,a,f -> - {s = f ; p = p ; a = a} ; - - usePrec : PrecExp -> Prec -> Str = \x,p -> - case <<x.p,p> : Prec * Prec> of { - <3,4> | <2,3> | <2,4> => paren x.s ; - <1,1> | <1,0> | <0,0> => x.s ; - <1,_> | <0,_> => paren x.s ; - _ => x.s - } ; - - constant : Str -> PrecExp = mkPrec 4 PN ; - - infixN : Prec -> Str -> (_,_ : PrecExp) -> PrecExp = \p,f,x,y -> - mkPrec p PN (usePrec x (nextPrec p) ++ f ++ usePrec y (nextPrec p)) ; - infixL : Prec -> Str -> (_,_ : PrecExp) -> PrecExp = \p,f,x,y -> - mkPrec p PL (usePrec x p ++ f ++ usePrec y (nextPrec p)) ; - infixR : Prec -> Str -> (_,_ : PrecExp) -> PrecExp = \p,f,x,y -> - mkPrec p PR (usePrec x (nextPrec p) ++ f ++ usePrec y p) ; - - nextPrec : Prec -> Prec = \p -> case <p : Prec> of { - 4 => 4 ; - n => Predef.plus n 1 - } ; - - -- string operations - - SS : Type = {s : Str} ; - ss : Str -> SS = \s -> {s = s} ; - cc2 : (_,_ : SS) -> SS = \x,y -> ss (x.s ++ y.s) ; - - paren : Str -> Str = \str -> "(" ++ str ++ ")" ; - - continues : Str -> SS -> SS = \s,t -> ss (s ++ ";" ++ t.s) ; - continue : Str -> SS -> SS = \s,t -> ss (s ++ t.s) ; - statement : Str -> SS = \s -> ss (s ++ ";"); - - -- taking cases of list size - - param - Size = Zero | One | More ; - oper - nextSize : Size -> Size = \n -> case n of { - Zero => One ; - _ => More - } ; - separator : Str -> Size -> Str = \t,n -> case n of { - Zero => [] ; - _ => t - } ; - - -- operations for JVM - - param TypIdent = TIInt | TIFloat ; -- to be continued - - oper - typInstr : Str -> TypIdent -> Str = \instr,t -> case t of { - TIInt => "i" + instr ; - TIFloat => "f" + instr - } ; - - Instr : Type = {s,s2,s3 : Str} ; -- code, variables, labels - instr : Str -> Instr = \s -> - statement s ** {s2,s3 = []} ; - instrc : Str -> Instr -> Instr = \s,i -> - ss (s ++ ";" ++ i.s) ** {s2 = i.s2 ; s3 = i.s3} ; - instrl : Str -> Instr -> Instr = \s,i -> - ss (s ++ ";" ++ i.s) ** {s2 = i.s2 ; s3 = "L" ++ i.s3} ; - instrb : Str -> Str -> Instr -> Instr = \v,s,i -> - ss (s ++ ";" ++ i.s) ** {s2 = v ++ i.s2 ; s3 = i.s3} ; - binop : Str -> SS -> SS -> SS = \op, x, y -> - ss (x.s ++ y.s ++ op ++ ";") ; - binopt : Str -> TypIdent -> SS -> SS -> SS = \op, t -> - binop (typInstr op t) ; -} diff --git a/examples/gfcc/abs.c b/examples/gfcc/abs.c deleted file mode 100644 index 947711c13..000000000 --- a/examples/gfcc/abs.c +++ /dev/null @@ -1,20 +0,0 @@ -int abs (int x){ - int y ; - { - if (x < 0){ - y = 0 - x ; - } - else { - y = x ; - } - } - return y ; - } ; -int main () { - int i ; - i = abs (16); - printf ("%d",i) ; - return ; - } ; - - diff --git a/examples/gfcc/complin.bbl b/examples/gfcc/complin.bbl deleted file mode 100644 index cf18b704f..000000000 --- a/examples/gfcc/complin.bbl +++ /dev/null @@ -1,96 +0,0 @@ -\begin{thebibliography}{10} - -\bibitem{aho-ullman} -A.~Aho, R.~Sethi, and J.~Ullman. -\newblock {\em {Compilers: Principles, Techniques, and Tools}}. -\newblock {Addison-Wesley}, 1988. - -\bibitem{cayenne} -L.~Augustsson. -\newblock {Cayenne---a language with dependent types}. -\newblock In {\em Proc. of {ICFP'98}}. ACM Press, September 1998. - -\bibitem{bnfc} -M.~Forsberg and A.~Ranta. -\newblock {\mbox{BNF Converter Homepage}}. -\newblock \verb!http://www.cs.chalmers.se/~markus/BNFC/!, 2002--2004. - -\bibitem{harper-honsell} -R.~Harper, F.~Honsell, and G.~Plotkin. -\newblock {A Framework for Defining Logics}. -\newblock {\em {JACM}}, 40(1):143--184, 1993. - -\bibitem{metal} -G.~Kahn, B.~Lang, B.~Mélèse, and E.~Morcos. -\newblock Metal: a formalism to specify formalisms. -\newblock {\em Science of {C}omputer {P}rogramming}, 3:151--188, 1983. - -\bibitem{khegai} -J.\ Khegai, B.\ Nordström, and A.\ Ranta. -\newblock {Multilingual Syntax Editing in GF}. -\newblock In A.~Gelbukh, editor, {\em {Intelligent Text Processing and - Computational Linguistics (CICLing-2003), Mexico City, February 2003}}, - volume 2588 of {\em LNCS}, pages 453--464. Springer-Verlag, {2003}. -%\newblock URL \url{http://www.cs.chalmers.se/~aarne/articles/mexico.ps.gz}. - -\bibitem{knuth-attr} -D.~Knuth. -\newblock Semantics of context-free languages. -\newblock {\em Mathematical {Systems} {Theory}}, 2:127--145, 1968. - -\bibitem{landin} -P.~Landin. -\newblock The next 700 programming languages. -\newblock {\em {Communications of the ACM}}, 9:157--166, 1966. - -\bibitem{magnusson-nordstr} -L.~Magnusson and B.~Nordstr\"{o}m. -\newblock The {ALF} proof editor and its proof engine. -\newblock In {\em {Types for Proofs and Programs}}, LNCS 806, pages 213--237. - Springer, 1994. - -\bibitem{happy} -S.~Marlow. -\newblock {Happy, The Parser Generator for Haskell}, 2001. -\newblock \verb6http://www.haskell.org/happy/6. - -\bibitem{jasmin} -J.~Meyer and T.~Downing. -\newblock {\em {Java Virtual Machine}}. -\newblock O'Reilly, 1997. - -\bibitem{semBNF} -{P. M\"aenp\"a\"a}. -\newblock {Semantic BNF}. -\newblock In E.~Gimenez and C.~Paulin-Mohring, editors, {\em Types for Proofs - and Programs, TYPES'96}, volume 1512 of {\em LNCS}, pages 196--215. - Springer-Verlag, 1998. - -\bibitem{pereira-shieber} -F.~Pereira and S.~Shieber. -\newblock {\em {Prolog and Natural-Language Analysis}}. -\newblock {CSLI}, Stanford, 1987. - -\bibitem{twelf} -F.~Pfenning. -\newblock {The Twelf Project}. -\newblock \verb!http://www-2.cs.cmu.edu/~twelf!, 2002. - -\bibitem{gf-jfp} -A.~Ranta. -\newblock {Grammatical Framework: A Type-Theoretical Grammar Formalism}. -\newblock {\em {The Journal of Functional Programming}}, 14(2):145--189, 2004. -%\newblock URL \url{http://www.cs.chalmers.se/~aarne/articles/gf-jfp.ps.gz}. - -\bibitem{gf-homepage} -A.~Ranta, K.~Angelov, and T.~Hallgren. -\newblock {\mbox{Grammatical Framework Homepage}}. -\newblock \verb!http://grammaticalframework.org!, 2000--2009. - -\bibitem{teitelbaum} -T.~Teitelbaum and T.~Reps. -\newblock The {Cornell} {Program} {Synthesizer}: a syntax-directed programming - environment. -\newblock {\em Commun. {ACM}}, 24(9):563--573, 1981. - -\end{thebibliography} diff --git a/examples/gfcc/complin.tex b/examples/gfcc/complin.tex deleted file mode 100644 index 250db07bb..000000000 --- a/examples/gfcc/complin.tex +++ /dev/null @@ -1,1488 +0,0 @@ -\documentclass[12pt]{article} - -\usepackage[latin1]{inputenc} -%\input{psfig.sty} - -\setlength{\oddsidemargin}{0mm} -%\setlength{\evensidemargin}{0mm} -\setlength{\evensidemargin}{-2mm} -\setlength{\topmargin}{-16mm} -\setlength{\textheight}{240mm} -\setlength{\textwidth}{158mm} - -%\setlength{\parskip}{2mm} -%\setlength{\parindent}{0mm} - -%\input{macros} - -\newcommand{\begit}{\begin{itemize}} -\newcommand{\enit}{\end{itemize}} -\newcommand{\HOAS}{higher-order abstract syntax} -\newcommand{\newone}{} %%{\newpage} -\newcommand{\heading}[1]{\subsection{#1}} -\newcommand{\explanation}[1]{{\small #1}} -\newcommand{\empha}[1]{{\em #1}} -\newcommand{\commentOut}[1]{} -\newcommand{\rarrow}{\; \rightarrow\;} - -\newcommand{\nocolor}{} %% {\color[rgb]{0,0,0}} - -\newcommand{\bequ}{\begin{quote}} -\newcommand{\enqu}{\end{quote}} -\newcommand{\bece}{\begin{center}} -\newcommand{\ence}{\end{center}} -\newcommand{\eex}[1]{\begin{em}#1\end{em}} -\newcommand{\sugmap}[1]{\mbox{$#1^{\mbox{\scriptsize o}}$}} - -\title{{\bf Declarative Language Definitions and Code Generation as Linearization}} - -\author{Aarne Ranta \\ - Department of Computing Science \\ - Chalmers University of Technology and the University of Gothenburg\\ - {\tt aarne@cs.chalmers.se}} - -\date{30 November 2004} - -\begin{document} - -\maketitle - - -\subsection*{Abstract} - -{\em -This paper presents a compiler for a fragment of the C programming -language, with JVM (Java Virtual Machine) as target language. -The compiler is implemented in a purely declarative way: -its definition consists of an abstract syntax of program -structures and two concrete syntaxes matching the abstract -syntax: one for C and one for JVM. From these grammar components, -the compiler is derived by using the GF (Grammatical Framework) -grammat tool: the front end consists of parsing and semantic -checking in accordance to the C grammar, and the back end consists -of linearization in accordance to the JVM grammar. The tool provides -other functionalities as well, such as decompilation and interactive -editing. -} - -{\bf Notice}. {\em -Revised 2 March 2010 for GF 3.1. The only change is that the GF -parser is now used directly, instead of going via BNFC (Section 7). -} - -\section{Introduction} - -The experiment reported in this paper was prompted by a challenge -posted by Lennart Augustsson to the participants of the workshop -on Dependent Types in Programming held at Dagstuhl in September 2004. -The challenge was to use dependent types to write a compiler from -C to bytecode. This paper does not meet the challenge quite literally, -since our compiler is for a different subset of C than Augustsson's -specification, and since the bytecode that we generate is JVM instead -of his format. But it definitely makes use of dependent types. - -Augustsson's challenge did not specify \textit{how} dependent -types are to be used, and the first of the two points we make in this -paper (and its title) reflects our interpretation: -we use dependent types, in combination with higher-order abstract syntax (HOAS), -to define the grammar of the source language (here, the fragment of C). -The grammar constitutes the single, declarative source from which -the compiler front end is derived, comprising both parser and type -checker. - -The second point, code generation by linearization, means that the -back end is likewise implemented by a grammar of the target -language (in this case, a fragment of JVM). This grammar is the -declarative source from which the compiler back end is derived. -In addition, some postprocessing is needed to -make the code conform to Jasmin assembler requirements. - -The complete code of the compiler is 300 lines: 250 lines for the grammars, -50 lines for the postprocessor. The code -is presented in the appendices of this paper. - - - -\section{The Grammatical Framework} - -The tool we have used for implementing the compiler is -GF, the \empha{Grammatical Framework} \cite{gf-jfp}. GF -is similar to a Logical Framework (LF) -\cite{harper-honsell} -extended with -a notation for defining concrete syntax. GF was originally -designed to help building multilingual -translation systems for natural languages and also -between formal and natural languages. The translation model -implemented by GF is very simple: -\begin{verbatim} - parsing linearization - ------------> ------------> - Language_1 Abstract Syntax Language_2 - <------------ <------------ - linearization parsing -\end{verbatim} -An abstract syntax is similar to a \empha{theory}, or a -\empha{signature} in a logical framework. A -concrete syntax defines, in a declarative way, -a translation of abstract syntax trees (well-formed terms) -into concrete language structures, and from this definition, one can -derive both linearization and parsing. - -To give an example, -a (somewhat simplified) translator for addition expressions -consists of the abstract syntax rule -\begin{verbatim} - fun EAdd : (A : Typ) -> Exp A -> Exp A -> Exp A ; -\end{verbatim} -the C concrete syntax rule -\begin{verbatim} - lin EAdd _ x y = {s = x.s ++ "+" ++ y.s ; prec = 2} ; -\end{verbatim} -and the JVM concrete syntax rule -\begin{verbatim} - lin EAdd t x y = {s = x.s ++ y.s ++ t.s ++ "_add"} ; -\end{verbatim} -The abstract syntax rule uses a type argument to capture -the fact that addition is polymorphic (which is a simplification, -because we will restrict the rule to numeric types only) -and that both operands have the same type as the value. -The C rule shows that the type information is suppressed, -and that the expression has precedence level 2 (which is a simplification, -since we will also treat associativity). -The JVM rule shows how addition is translated to stack machine -instructions, where the type of the postfixed addition instruction has to -be made explicit. Our compiler, like any GF translation system, will -consist of rules like these. - -The number of languages related to one abstract syntax in -a translation system is of course not limited to two. -Sometimes just one language is involved; -GF then works much the same way as any grammar -formalism or parser generator. -The largest number of languages in an application known to us is 88; -its domain are numeral expressions from 1 to 999,999 \cite{gf-homepage}. - -In addition to linearization and parsing, GF supports grammar-based -\empha{multilingual authoring} \cite{khegai}: interactive editing -of abstract syntax trees with immediate feedback as linearized texts, -and the possibility to textual through the parsers. - -From the GF point of view, the goal of the compiler experiment -is to investigate if GF is capable of implementing -compilers using the ideas of single-source language definition -and code generation as linearization. The working hypothesis -was that it \textit{is} capable but inconvenient, and that, -working out a complete example, we would find out what -should be done to extend GF into a compiler construction tool. - - -\subsection{Advantages and disadvantages} - -Due to the way in which it is built, our compiler has -a number of unusual, yet attractive features: -\bequ -The front end is defined by a grammar of C as its single source. - -The grammar defines both abstract and concrete syntax, and also -semantic well-formedness (types, variable scopes). - -The back end is implemented by means of a grammar of JVM providing -another concrete syntax to the abstract syntax of C. - -As a result of the way JVM is defined, only semantically well formed -JVM programs are generated. - -The JVM grammar can also be used as a decompiler, which translates -JVM code back into C code. - -The language has an interactive editor that also supports incremental -compilation. -\enqu -The problems that we encountered and their causes will be explained in -the relevant sections of this report. To summarize, -\bequ -The scoping conditions resulting from \HOAS\ are slightly different -from the standard ones of C. - -Our JVM syntax is slightly different from the specification, and -hence needs some postprocessing. - -Using \HOAS\ to encode all bindings is sometimes cumbersome. -\enqu -The first shortcoming seems to be inevitable with the technique -we use: just like lambda calculus, our C semantics allows -overshadowing of earlier bindings by later ones. -The second problem is systematically solved by using -an intermediate JVM format, where symbolic variable addresses -are used instead of numeric stack addresses. -The last shortcoming is partly inherent in the problem of binding: -to spell out, in any formal notation, -what happens in complex binding structures \textit{is} -complicated. But it also suggests ways in which GF could be -tuned to give better support -to compiler construction, which, after all, is not an intended -use of GF as it is now. - - - - - - - - -\section{The abstract syntax} - -An \empha{abstract syntax} in GF consists of \texttt{cat} judgements -\[ -\mbox{\texttt{cat}} \; C \; \Gamma -\] -declaring basic types (depending on a context $\Gamma$), -and \texttt{fun} judgements -\[ -\mbox{\texttt{fun}} \; f \; \mbox{\texttt{:}} \; A -\] -declaring functions $f$ of any type $A$, which can be a basic type or -a function type. -\empha{Syntax trees} are well-formed terms of basic -types, in $\eta$-long normal form. - -As for notation, each judgement form is recognized by -its keyword (\texttt{cat}, \texttt{fun}, etc), -and the same keyword governs all judgements -until the next keyword is encountered. - -The abstract syntax that we will present is no doubt closer -to C than to JVM. One reason is that what we are building is a -\textit{C compiler}, and match with the target language is a -secondary consideration. Another, more general reason is that -C is a higher-level language and JVM which means, among -other things, that C makes more semantic distinctions. -In general, the abstract syntax of a translation system -must reflect all semantic distinctions that can be made -in the languages involved, and then it is a good idea to -start with looking at what the most distinctive language -needs. - - - -\subsection{Statements} - -Statements in C may involve variables, expressions, and -other statements. -The following \texttt{cat} judgements of GF define the syntactic categories -that are needed to construct statements -\begin{verbatim} - cat - Stm ; - Typ ; - Exp Typ ; - Var Typ ; -\end{verbatim} -The type \texttt{Typ} is the type of C's datatypes. -The type of expressions is a dependent type, -since it has a nonempty context, indicating that \texttt{Exp} takes -a \texttt{Typ} as argument. The rules for \texttt{Exp} -will thus be rules to construct well-typed expressions of -a given type. \texttt{Var}\ is the type of variables, -of a given type, that get bound in C's variable -declarations. - -Let us start with the simplest kind of statements: -declarations and assignments. The following \texttt{fun} -rules define their abstract syntax: -\begin{verbatim} - fun - Decl : (A : Typ) -> (Var A -> Stm) -> Stm ; - Assign : (A : Typ) -> Var A -> Exp A -> Stm -> Stm ; -\end{verbatim} -The \texttt{Decl}\ function captures the rule that -a variable must be declared before it can be used or assigned to: -its second argument is a \empha{continuation}, which is -the sequence of statements that depend on (= may refer to) -the declared variable. -The \texttt{Assign} function uses dependent types to -control that a variable is always assigned a value of proper -type. - -We will treat all statements, except -\texttt{return}s, in terms of continuations. A sequence of -statements (which always has the type \texttt{Stm}) thus -always ends in a \texttt{return}, or, abruptly, in -an empty statement, \texttt{End}. Here are rules for some other -statement forms: -\begin{verbatim} - While : Exp TInt -> Stm -> Stm -> Stm ; - IfElse : Exp TInt -> Stm -> Stm -> Stm -> Stm ; - Block : Stm -> Stm -> Stm ; - Return : (A : Typ) -> Exp A -> Stm ; - End : Stm ; -\end{verbatim} -Here is an example of a piece of code and its abstract syntax. -\begin{verbatim} - int x ; Decl (TNum TInt) (\x -> - x = 5 ; Assign (TNum TInt) x (EInt 5) ( - return x ; Return (TNum TInt) (EVar (TNum TInt) x))) -\end{verbatim} -The details of expression and type -syntax will be explained in the next section. - -Our binding syntax is more liberal than C's in two ways. -First, -lambda calculus permits overshadowing previous bindings -by new ones, e.g. to write \verb6\x -> (\x -> f x)6. -The corresponding overshadowing of declarations is not -legal in C, within one and the same block. -Secondly, we allow declarations anywhere in a block, -not just in the beginning. -The second deviation would be easy to mend, whereas -the first one is inherent to the method of \HOAS. - - - -\subsection{Types and expressions} - -Our fragment of C has two types: integers and floats. -Many operators of C are overloaded so that they can -be used for both of these types, as well as for -some other numeric types---but not for e.g.\ arrays -and structures. We capture this distinction by a notion -reminiscent of \empha{type classes}: we introduce a special -category of numeric types, and a coercion of numeric types -into types in general. -\begin{verbatim} - cat - NumTyp ; - fun - TInt, TFloat : NumTyp ; - TNum : NumTyp -> Typ ; -\end{verbatim} -Well-typed expressions are built from constants, -from variables, and by means of binary operations. -\begin{verbatim} - EVar : (A : Typ) -> Var A -> Exp A ; - EInt : Int -> Exp (TNum TInt) ; - EFloat : Int -> Int -> Exp (TNum TFloat) ; - ELt : (n : NumTyp) -> let Ex = Exp (TNum n) in - Ex -> Ex -> Exp (TNum TInt) ; - EAdd, EMul, ESub : (n : NumTyp) -> let Ex = Exp (TNum n) in - Ex -> Ex -> Ex ; -\end{verbatim} -Notice that the category \texttt{Var} has no constructors, -but its expressions are only created by -variable bindings in \HOAS. -Notice also that GF has a built-in type \texttt{Int} of -integer literals, but floats are constructed by hand. - -Yet another expression form are function calls. To this -end, we need a notion of (user-defined) functions and -argument lists. The type of functions depends on an -argument type list and a value type. Expression lists -are formed to match type lists. -\begin{verbatim} - cat - ListTyp ; - Fun ListTyp Typ ; - ListExp ListTyp ; - - fun - EAppNil : (V : Typ) -> Fun NilTyp V -> Exp V ; - EApp : (AS : ListTyp) -> (V : Typ) -> - Fun AS V -> ListExp AS -> Exp V ; - - NilTyp : ListTyp ; - ConsTyp : Typ -> ListTyp -> ListTyp ; - - OneExp : (A : Typ) -> Exp A -> ListExp (ConsTyp A NilTyp) ; - ConsExp : (A : Typ) -> (AS : ListTyp) -> - Exp A -> ListExp AS -> ListExp (ConsExp A AS) ; -\end{verbatim} -The separation between zero-element applications and other -applications is a concession to the concrete syntax of C: -it in this way that we can control the use of commas so that -they appear between arguments (\texttt{(x,y,z)}) but not -after the last argument (\texttt{(x,y,z,)}). -The compositionality of linearization (Section~\ref{compositionality} below) -forbids case analysis on the length of the lists. - - -\subsection{Functions} - -On the top level, a program is a sequence of functions. -Each function may refer to functions defined earlier -in the program. The idea to express the binding of -function symbols with \HOAS\ is analogous to the binding -of variables in statements, using a continuation. -As with variables, the principal way to build function symbols is as -bound variables (in addition, there can be some -built-in functions, unlike in the case of variables). -The continuation of can be recursive, which we express by -making the function body into a part of the continuation; -the category \texttt{Rec} is the combination of a function -body and the subsequent function definitions. -\begin{verbatim} - cat - Program ; - Rec ListTyp ; - fun - Empty : Program ; - Funct : (AS : ListTyp) -> (V : Typ) -> - (Fun AS V -> Rec AS) -> Program ; - FunctNil : (V : Typ) -> - Stm -> (Fun NilTyp V -> Program) -> Program ; -\end{verbatim} -For syntactic reasons similar to function application -expressions in the previous section, we have distinguished between -empty and non-empty argument lists. - -The tricky problem with function definitions -is that they involve two nested binding constructions: -the outer binding of the function symbol and the inner -binding of the function parameter lists. -For the latter, we could use -vectors of variables, in the same way as vectors of -expressions are used to give arguments to functions. -However, this would lead to the need of cumbersome -projection functions when using the parameters -in the function body. A more elegant solution is -to use \HOAS\ to build function bodies: -\begin{verbatim} - RecOne : (A : Typ) -> - (Var A -> Stm) -> Program -> Rec (ConsTyp A NilTyp) ; - RecCons : (A : Typ) -> (AS : ListTyp) -> - (Var A -> Rec AS) -> Program -> Rec (ConsTyp A AS) ; -\end{verbatim} -The end result is an abstract syntax whose relation -to concrete syntax is somewhat remote. Here is an example of -the code of a function and its abstract syntax: -\begin{verbatim} - let int = TNum TInt in - int fact Funct (ConsTyp int NilTyp) int (\fact -> - (int n) { RecOne int (\n -> - int f ; Decl int (\f -> - f = 1 ; Assign int f (EInt 1) ( - while (1 < n) { While (ELt int (EInt 1) (EVar int n)) (Block ( - f = n * f ; Assign int f (EMul int (EVar int n) (EVar int f)) ( - n = n - 1 ; Assign int n (ESub int (EVar int n) (EInt 1)) - } End)) - return f ; (Return int (EVar int f))) End))) - } ; Empty) -\end{verbatim} - - -\subsection{The \texttt{printf} function} - -To give a valid type to the C function \texttt{printf} -is one of the things that can be done with -dependent types \cite{cayenne}. We have not defined \texttt{printf} -in its full strength, partly because of the difficulties to compile -it into JVM. But we use special cases of \texttt{printf} as -statements, to be able to print values of different types. -\begin{verbatim} - Printf : (A : Typ) -> Exp A -> Stm -> Stm ; -\end{verbatim} - - - - - -\section{The concrete syntax of C} - -A concrete syntax, for a given abstract syntax, -consists of \texttt{lincat} judgements -\[ -\mbox{\texttt{lincat}} \; C \; \mbox{\texttt{=}} \; T -\] -defining the \empha{linearization types} $T$ of each category $C$, -and \texttt{lin} judgements -\[ -\mbox{\texttt{lin}} \; f \; \mbox{\texttt{=}} \; t -\] -defining the \empha{linearization functions} $t$ of each function $f$ -in the abstract syntax. The linearization functions are -checked to be well-typed with respect the \texttt{lincat} -definitions, and the syntax of GF forces them to be \empha{compositional} -in the sense that the linearization of a complex tree is always -a function of the linearizations of the subtrees. Schematically, if -\[ - f \colon A_{1} \rarrow \cdots \rarrow A_{n} \rarrow A -\] -then -\[ - \sugmap{f} \colon - \sugmap{A_{1}} \rarrow \cdots - \rarrow \sugmap{A_{n}} \rarrow \sugmap{A} -\] -and the linearization algorithm is simply -\[ - \sugmap{(f \; a_{1} \; \ldots \; a_{n})} \; = \; - \sugmap{f} \; \sugmap{a_{1}} \; \ldots \; \sugmap{a_{n}} -\] -using the \sugmap{} notation for both linearization types, -linearization functions, and linearizations of trees. - -\label{compositionality} -Because of compositionality, no case analysis on expressions -is possible in linearization rules. The values of linearization -therefore have to carry information on how they are used in -different situations. Therefore linearization -types are generally record types instead of just the string type. -The simplest record type that is used in GF is -\bece -\verb6{s : Str}6 -\ence -If the linearization type of a category is not explicitly -given by a \texttt{lincat} judgement, this type is -used by default. - -With \HOAS, a syntax tree can have variable-bindings in its -constituents. The linearization of such a constituent -is compositionally defined to be the record linearizing the body -extended with fields for each of the variable symbols: -\[ -\sugmap{(\lambda x_{0} \rarrow \cdots \rarrow \lambda x_{n} \rarrow b)} -\;=\; -\sugmap{b} *\!* \{\$_{0} = \sugmap{x_{0}} ; \ldots ; \$_{n} = \sugmap{x_{n}}\} -\] -Notice that the variable symbols can -always be found because linearizable trees are in $\eta$-long normal form. -Also notice that we are here using the -\sugmap{} notation in yet another way, to denote the magic operation -that converts variable symbols into strings. - - -\subsection{Resource modules} - -Resource modules define auxiliary notions that can be -used in concrete syntax. These notions include -\empha{parameter types} defined by \texttt{param} -judgements -\[ -\mbox{\texttt{param}} \; P \; \mbox{\texttt{=}} - \; C_{1} \; \Gamma_{1} \; \mid \; \cdots \; \mid \; - \; C_{n} \; \Gamma_{n} -\] -and \empha{operations} defined by -\texttt{oper} judgements -\[ -\mbox{\texttt{oper}} \; f \; \mbox{\texttt{:}} \; T \; \mbox{\texttt{=}} \; t -\] -These judgements are -similar to datatype and function definitions -in functional programming, with the restriction -that parameter types must be finite and operations -may not be recursive. It is due to these restrictions that -we can always derive a parsing algorithm from a set of -linearization rules. - -In addition to types defined in \texttt{param} judgements, -initial segments of natural numbers, \texttt{Ints n}, -can be used as parameter types. This is the most important parameter -type we use in the syntax of C, to represent precedence. - -The following string operations are useful in almost -all grammars. They are actually included in a GF \texttt{Prelude}, -but are here defined from scratch to make the code shown in -the Appendices complete. -\begin{verbatim} - oper - SS : Type = {s : Str} ; - ss : Str -> SS = \s -> {s = s} ; - cc2 : (_,_ : SS) -> SS = \x,y -> ss (x.s ++ y.s) ; - paren : Str -> Str = \str -> "(" ++ str ++ ")" ; -\end{verbatim} - - - -\subsection{Precedence} - -We want to be able to recognize and generate one and the same expression with -or without parentheses, depending on whether its precedence level -is lower or higher than expected. For instance, a sum used as -an operand of multiplication must be in parentheses. We -capture this by defining a parameter type of -precedence levels. Five levels are enough for the present -fragment of C, so we use the enumeration type of -integers from 0 to 4 to define the \empha{inherent precedence level} -of an expression -\begin{verbatim} - oper - Prec : PType = Predef.Ints 4 ; - PrecExp : Type = {s : Str ; p : Prec} ; -\end{verbatim} -in a resource module (see Appendix D), and -\begin{verbatim} - lincat Exp = PrecExp ; -\end{verbatim} -in the concrete syntax of C itself. - -To build an expression that has a certain inherent precedence level, -we use the operation -\begin{verbatim} - mkPrec : Prec -> Str -> PrecExp = \p,s -> {s = s ; p = p} ; -\end{verbatim} -To use an expression of a given inherent level at some expected level, -we define a function that says that, if the inherent level is lower -than the expected level, parentheses are required. -\begin{verbatim} - usePrec : PrecExp -> Prec -> Str = \x,p -> - ifThenStr - (less x.p p) - (paren x.s) - x.s ; -\end{verbatim} -(The code shown in Appendix D is at the moment more cumbersome, -due to holes in the support for integer arithmetic in GF.) - -With the help of \texttt{mkPrec} and \texttt{usePrec}, -we can now define the main high-level operations that are -used in the concrete syntax itself---constants (highest level), -non-associative infixes, and left associative infixes: -\begin{verbatim} - constant : Str -> PrecExp = mkPrec 4 ; - - infixN : Prec -> Str -> (_,_ : PrecExp) -> PrecExp = \p,f,x,y -> - mkPrec p (usePrec x (nextPrec p) ++ f ++ usePrec y (nextPrec p)) ; - infixL : Prec -> Str -> (_,_ : PrecExp) -> PrecExp = \p,f,x,y -> - mkPrec p (usePrec x p ++ f ++ usePrec y (nextPrec p)) ; -\end{verbatim} -(The code in Appendix D adds to this an associativity parameter, -which is redundant in GF, but which we use to instruct the Happy -parser generator.) - - -\subsection{Expressions} - -With the machinery introduced, the linearization rules of expressions -are simple and concise: -\begin{verbatim} - EVar _ x = constant x.s ; - EInt n = constant n.s ; - EFloat a b = constant (a.s ++ "." ++ b.s) ; - EMul _ = infixL 3 "*" ; - EAdd _ = infixL 2 "+" ; - ESub _ = infixL 2 "-" ; - ELt _ = infixN 1 "<" ; - - EAppNil val f = constant (f.s ++ paren []) ; - EApp args val f exps = constant (f.s ++ paren exps.s) ; -\end{verbatim} - - -\subsection{Types} - -Types are expressed in two different ways: -in declarations, we have \texttt{int} and \texttt{float}, but -as formatting arguments to \texttt{printf}, we have -\verb6"%d"6 and \verb6"%f"6, with the quotes belonging to the -names. The simplest solution in GF is to linearize types -to records with two string fields. -\begin{verbatim} - lincat - Typ, NumTyp = {s,s2 : Str} ; - lin - TInt = {s = "int" ; s2 = "\"%d\""} ; - TFloat = {s = "float" ; s2 = "\"%f\""} ; -\end{verbatim} - - -\subsection{Statements} - -Statements in C have -the simplest linearization type, \verb6{s : Str}6. -We use a handful of auxiliary operations to regulate -the use of semicolons on a high level. -\begin{verbatim} - oper - continues : Str -> SS -> SS = \s,t -> ss (s ++ ";" ++ t.s) ; - continue : Str -> SS -> SS = \s,t -> ss (s ++ t.s) ; - statement : Str -> SS = \s -> ss (s ++ ";"); -\end{verbatim} -As for declarations, which bind variables, we notice the -projection \verb6.$06 to refer to the bound variable. -Also notice the use of the \texttt{s2} field of the type -in \texttt{printf}. -\begin{verbatim} - lin - Decl typ cont = continues (typ.s ++ cont.$0) cont ; - Assign _ x exp = continues (x.s ++ "=" ++ exp.s) ; - While exp loop = continue ("while" ++ paren exp.s ++ loop.s) ; - IfElse exp t f = continue ("if" ++ paren exp.s ++ t.s ++ "else" ++ f.s) ; - Block stm = continue ("{" ++ stm.s ++ "}") ; - Printf t e = continues ("printf" ++ paren (t.s2 ++ "," ++ e.s)) ; - Return _ exp = statement ("return" ++ exp.s) ; - Returnv = statement "return" ; - End = ss [] ; -\end{verbatim} - - -\subsection{Functions and programs} - -The category \texttt{Rec} of recursive function bodies with continuations -has three components: the function body itself, the parameter list, and -the program that follows. We express this by a linearization type that -contains three strings: -\begin{verbatim} - lincat Rec = {s,s2,s3 : Str} ; -\end{verbatim} -The body construction rules accumulate the parameter list -independently of the two other components: -\begin{verbatim} - lin - RecOne typ stm prg = stm ** { - s2 = typ.s ++ stm.$0 ; - s3 = prg.s - } ; - RecCons typ _ body prg = { - s = body.s ; - s2 = typ.s ++ body.$0 ++ "," ++ body.s2 ; - s3 = prg.s - } ; -\end{verbatim} -The top-level program construction rules rearrange the three -components into a linear structure: -\begin{verbatim} - FunctNil val stm cont = ss ( - val.s ++ cont.$0 ++ paren [] ++ "{" ++ - stm.s ++ "}" ++ ";" ++ cont.s) ; - Funct args val rec = ss ( - val.s ++ rec.$0 ++ paren rec.s2 ++ "{" ++ - rec.s ++ "}" ++ ";" ++ rec.s3) ; -\end{verbatim} - - -%%\subsection{Lexing and unlexing} - - - -\section{The concrete syntax of JVM} - -JVM syntax is, linguistically, more straightforward than -the syntax of C, and could even be defined by a regular -expression. However, the JVM syntax that our compiler -generates does not comprise full JVM, but only the fragment -that corresponds to well-formed C programs. - -The JVM syntax we use is a symbolic variant of the Jasmin assembler -\cite{jasmin}. -The main deviation from Jasmin are -variable addresses, as described in Section~\ref{postproc}. -The other deviations have to do with spacing: the normal -unlexer of GF puts spaces between constituents, whereas -in JVM, type names are integral parts of instruction names. -We indicate gluing uniformly by generating an underscore -on the side from which the adjacent element is glued. Thus -e.g.\ \verb6i _load6 becomes \verb6iload6. - - -\subsection{Symbolic JVM} -\label{postproc} - -What makes the translation from our abstract syntax to JVM -tricky is that variables must be replaced by -numeric addresses (relative to the frame pointer). -Code generation must therefore maintain a symbol table that permits -the lookup of variable addresses. As shown in the code -in Appendix C, we do not treat symbol tables -in linearization, but instead generated code in -\empha{Symbolic JVM}---that is, JVM with symbolic addresses. -Therefore we need a postprocessor that resolves the symbolic addresses, -shown in Appendix E. - -To make the postprocessor straightforward, -Symbolic JVM has special \texttt{alloc} instructions, -which are not present in real JVM. -Our compiler generates \texttt{alloc} instructions from -variable declarations. -The postprocessor comments out the \texttt{alloc} instructions, but we -found it a good idea not to erase them completely, since they make the -code more readable. - -The following example shows how the three representations (C, Symbolic JVM, JVM) -look like for a piece of code. -\begin{verbatim} - int x ; alloc i x ; x gets address 0 - int y ; alloc i y ; y gets address 1 - x = 5 ; ldc 5 ldc 5 - i _store x istore 0 - y = x ; i _load x iload 0 - i _store y istore 1 -\end{verbatim} - - -\subsection{Labels and jumps} - -A problem related to variable addresses -is the generation of fresh labels for -jumps. We solve this in linearization -by maintaining a growing label suffix -as a field of the linearization of statements into -instructions. The problem remains that statements on the -same nesting level, e.g.\ the two branches -of an \texttt{if-else} statement can use the same -labels. Making them unique must be -added to the post-processing pass. This is -always possible, because labels are nested in a -disciplined way, and jumps can never go to remote labels. - -As it turned out laborious to thread the label counter -to expressions, we decided to compile comparison -expressions (\verb6x < y6) into function calls, and provide the functions in -a run-time library. This will no more work for the -conjunction (\verb6x && y6) -and disjunction (\verb6x || y6), if we want to keep their semantics -lazy, since function calls are strict in their arguments. - - - - - -\subsection{How to restore code generation by linearization} - -Since postprocessing is needed, we have not quite achieved -the goal of code generation as linearization---if -linearization is understood in the -sense of GF. In GF, linearization can only depend -on parameters from finite parameter sets. Since the size of -a symbol table can grow indefinitely, it is not -possible to encode linearization with updates to and -lookups from a symbol table, as is usual in code generation. - -One attempt we made to achieve JVM linearization with -numeric addresses was to alpha-convert abstract syntax syntax trees -so that variables get indexed with integers that indicate their -depths in the tree. This hack works in the present fragment of C -because all variables need the same amount of memory (one word), -but would break down if we added double-precision floats. Therefore -we have used the less pure (from the point of view of -code generation as linearization) method of -symbolic addresses. - -It would certainly be possible to generate variable addresses -directly in the syntax trees by using dependent types; but this -would clutter the abstract -syntax in a way that is hard to motivate when we are in -the business of describing the syntax of C. The abstract syntax would -have to, so to say, anticipate all demands of the compiler's -target languages. - - -\subsection{Problems with the JVM bytecode verifier} - -An inherent restriction for linearization in GF is compositionality. -This prevents optimizations during linearization -by clever instruction selection, elimination of superfluous -labels and jumps, etc. One such optimization, the removal -of unreachable code (i.e.\ code after a \texttt{return} instruction) -is actually required by the JVM byte code verifier. -The solution is, again, to perform this optimization in postprocessing. -What we currently do, however, is to be careful and write -C programs so that they always end with a return statement in the -outermost block. - -Another problem related to \texttt{return} instructions is that -both C and JVM programs have a designated \texttt{main} function. -This function must have a certain type, which is different in C and -JVM. In C, \texttt{main} returns an integer encoding what -errors may have happend during execution. The JVM -\texttt{main}, on the other hand, returns a \texttt{void}, i.e.\ -no value at all. A \texttt{main} program returning an -integer therefore provokes a JVM bytecode verifier error. -The postprocessor could take care of this; but currently -we just write programs with void \texttt{return}s in the -\texttt{main} functions. - -The parameter list of \texttt{main} is also different in C (empty list) -and JVM (a string array \texttt{args}). We handle this problem -with an \empha{ad hoc} postprocessor rule. - -Every function prelude in JVM must indicate the maximum space for -local variables, and the maximum evaluation stack space (within -the function's own stack frame). The locals limit is computed in -linearization by maintaining a counter field. The stack limit -is blindly set to 1000; it would be possible to set an -accurate limit in the postprocessing phase. - - -\section{Translation as linearization vs.\ transfer} - -Many of the problems we have encountered in code generation by -linearization are familiar from -translation systems for natural languages. For instance, to translate -the English pronoun \eex{you} to German, you have to choose -between \eex{du, ihr, Sie}; for Italian, there are four -variants, and so on. To deal with this by linearization, -all semantic distinctions made in any of the involved languages -have to be present in the common abstract syntax. The usual solution to -this problem is not a universal abstract syntax, but -\empha{transfer}: translation does not just linearize -the same syntax trees to another language, but uses -a noncompositional function that translates -trees of one language into trees of another. - -Using transfer in the -back end is precisely what traditional compilers do. -The transfer function in our case would be a noncompositional -function from the abstract syntax of C to a different abstract -syntax of JVM. The abstract syntax notation of GF permits -definitions of functions, and the GF interpreter can be used -for evaluating terms into normal form. Thus one could write -the code generator just like in any functional language: -by sending in an environment and a syntax tree, and -returning a new environment with an instruction list: -\begin{verbatim} - fun - transStm : Env -> Stm -> EnvInstr ; - def - transStm env (Decl typ cont) = ... - transStm env (While (ELt a b) stm cont) = ... - transStm env (While exp stm cont) = ... -\end{verbatim} -This would be cumbersome in practice, because -GF does not have programming-language facilities -like built-in lists and tuples, or monads. Moreover, -the compiler could no longer be inverted into a decompiler, -in the way true linearization can be inverted into a parser. - - - -\section{Parser generation} -\label{bnfc} - -The whole GF part of the compiler (parser, type checker, Symbolic JVM -generator) can be run in the GF interpreter. -The weakest point of the resulting compiler, by current -standards, is the parser. GF is a powerful grammar formalism, which -needs a very general parser, taking care of ambiguities and other -problems that are typical of natural languages but should be -overcome in programming languages by design. The parser is moreover run -in an interpreter that takes the grammar (in a suitably compiled form) -as an argument. - -Fortunately, it is easy to replace the generic, interpreting GF parser -by a compiled LR(1) parser. GF supports the translation of a concrete -syntax into the \empha{Labelled BNF} (LBNF) format, %% \cite{lbnf}, -which in turn can be translated to parser generator code -(Happy, Bison, or JavaCUP), by the BNF Converter \cite{bnfc}. -The parser we are therefore using in the compiler is a Haskell -program generated by Happy \cite{happy}. - -We regard parser generation -as a first step towards developing GF into a -production-quality compiler compiler. The efficiency of the parser -is not the only relevant thing. Another advantage of an LR(1) -parser generator is that it performs an analysis on the grammar -finding conflicts, and provides a debugger. It may be -difficult for a human to predict how a context-free grammar -performs at parsing; it is much more difficult to do this for -a grammar written in the abstract way that GF permits (cf.\ the -example in Appendix B). - -The current version of the C grammar is ambiguous. GF's own -parser returns all alternatives, whereas the parser generated by -Happy rules out some of them by its normal conflict handling -policy. This means, in practice, that extra brackets are -sometimes needed to group staments together. - - -\subsection{Another notation for \HOAS} - -Describing variable bindings with \HOAS\ is sometimes considered -unintuitive. Let us consider the declaration rule of C (without -type dependencies for simplicity): -\begin{verbatim} - fun Decl : Typ -> (Var -> Stm) -> Stm ; - lin Decl typ stm = {s = typ.s ++ stm.$0 ++ ";" ++stm.s} ; -\end{verbatim} -Compare this with a corresponding LBNF rule (also using a continuation): -\begin{verbatim} - Decl. Stm ::= Typ Ident ";" Stm ; -\end{verbatim} -To explain bindings attached to this rule, one can say, in natural language, -that the identifier gets bound in the statement that follows. -This means that syntax trees formed by this rule do not have -the form \verb6(Decl typ x stm)6, but the form \verb6(Decl typ (\x -> stm))6. - -One way to formalize the informal binding rules stated beside -BNF rules is to use \empha{profiles}: data structures describing -the way in which the logical arguments of the syntax tree are -represented by the linearized form. The declaration rule can be -written using a profile notation as follows: -\begin{verbatim} - Decl [1,(2)3]. Stm ::= Typ Ident ";" Stm ; -\end{verbatim} -When compiling GF grammars into LBNF, we were forced to enrich -LBNF by a (more general) profile notation -(cf.\ \cite{gf-jfp}, Section 3.3). This suggested at the same -time that profiles could provide a user-fiendly notation for -\HOAS\ avoiding the explicit use of lambda calculus. - - - -\section{Using the compiler} - -Our compiler is invoked, of course, by the command \texttt{gfcc}. -It produces a JVM \texttt{.class} file, by running the -Jasmin bytecode assembler \cite{jasmin} on a Jasmin (\texttt{.j}) -file: -\begin{verbatim} - % gfcc factorial.c - > > wrote file factorial.j - Generated: factorial.class -\end{verbatim} -The Jasmin code is produced by a postprocessor, written in Haskell -(Appendix E), from the Symbolic JVM format that is produced by -linearization. The reasons why actual Jasmin is not generated -by linearization are explained in Section~\ref{postproc} above. - -In addition to the batch compiler, GF provides an interactive -syntax editor, in which C programs can be constructed by -stepwise refinements, local changes, etc.\ \cite{khegai}. The user of the -editor can work simultaneously on all languages involved. -In our case, this means that changes can be done both to -the C code and to the JVM code, and they are automatically -carried over from one language to the other. -\commentOut{ -A screen dump of the editor is shown in Fig~\ref{demo}. - -\begin{figure} -\centerline{\psfig{figure=demo2.epsi}} \caption{ -GF editor session where an integer -expression is expected to be given. The left window shows the -abstract syntax tree, and the right window the evolving C and -JVM code. The editor focus is shadowed, and the refinement alternatives -are shown in a pop-up window. -} -\label{demo} -\end{figure} -} - - -\section{Related work} - -The theoretical ideas behind our compiler experiment -are familiar from various sources. -Single-source language and compiler definitions -can be built using attribute grammars \cite{knuth-attr}. -The use of -dependent types in combination with higher-order abstract syntax -has been studied in various logical frameworks -\cite{harper-honsell,magnusson-nordstr,twelf}. -The addition of linearization rules to type-theoretical -abstract syntax is studied in \cite{semBNF}, which also -compares the method with attribute grammars. - -The idea of using a common abstract syntax for different -languages was clearly exposed by Landin \cite{landin}. The view of -code generation as linearization is a central aspect of -the classic compiler textbook by Aho, Sethi, and Ullman -\cite{aho-ullman}. -The use of one and the same grammar both for parsing and linearization -is a guiding principle of unification-based linguistic grammar -formalisms \cite{pereira-shieber}. Interactive editors derived from -grammars have been developed in various programming and proof -assistants \cite{teitelbaum,metal,magnusson-nordstr}. - -Even though the different ideas are well-known, -we have not seen them used together to construct a complete -compiler. In our view, putting these ideas together is -an attractive approach to compiling, since a compiler written -in this way is completely declarative, hence concise, -and therefore easy to modify and extend. What is more, if -a new language construct is added, the GF type checker -verifies that the addition is propagated to all components -of the compiler. As the implementation is declarative, -it is also self-documenting, since a human-readable -grammar defines the syntax and static -semantics that is actually used in the implementation. - - -\section{Conclusion} - -The \texttt{gfcc} compiler translates a representative -fragment of C to JVM, and growing the fragment -does not necessarily pose any new kinds of problems. -Using \HOAS\ and dependent types to describe the abstract -syntax of C works fine, and defining the concrete syntax -of C on top of this using GF linearization machinery is -possible. To build a parser that is more efficient than -GF's generic one, GF offers code generation for standard -parser tools. - -One result of the experiment is the beginning of a -library for dealing with typical programming language structures -such as precedence. This library is exploited in the parser -generator, which maps certain parameters used into GF grammars -into precedence directives in labelled BNF grammars. - -The most serious difficulty with JVM code generation by linearization -is to maintain a symbol table mapping variables to addresses. -The solution we have chosen is to generate Symbolic JVM, that is, -JVM with symbolic addresses, and translate the symbolic addresses to -(relative) memory locations by a postprocessor. - -Since the postprocessor works uniformly for the whole Symbolic JVM, -building a new compiler to generate JVM should now be -possible by just writing GF grammars. The most immediate -idea for developing GF as a compiler tool is to define -a similar symbolic format for an intermediate language, -which uses three-operand code and virtual registers. - - - -\bibliographystyle{plain} - -\bibliography{gf-bib} - - -\newpage -\subsection*{Appendix A: The abstract syntax} - -\small -\begin{verbatim} -abstract Imper = PredefAbs ** { - cat - Program ; - Rec ListTyp ; - Typ ; - NumTyp ; - ListTyp ; - Fun ListTyp Typ ; - Body ListTyp ; - Stm ; - Exp Typ ; - Var Typ ; - ListExp ListTyp ; - - fun - Empty : Program ; - Funct : (AS : ListTyp) -> (V : Typ) -> (Fun AS V -> Rec AS) -> Program ; - FunctNil : (V : Typ) -> Stm -> (Fun NilTyp V -> Program) -> Program ; - RecOne : (A : Typ) -> (Var A -> Stm) -> Program -> Rec (ConsTyp A NilTyp) ; - RecCons : (A : Typ) -> (AS : ListTyp) -> - (Var A -> Rec AS) -> Program -> Rec (ConsTyp A AS) ; - - Decl : (A : Typ) -> (Var A -> Stm) -> Stm ; - Assign : (A : Typ) -> Var A -> Exp A -> Stm -> Stm ; - While : Exp (TNum TInt) -> Stm -> Stm -> Stm ; - IfElse : Exp (TNum TInt) -> Stm -> Stm -> Stm -> Stm ; - Block : Stm -> Stm -> Stm ; - Printf : (A : Typ) -> Exp A -> Stm -> Stm ; - Return : (A : Typ) -> Exp A -> Stm ; - Returnv : Stm ; - End : Stm ; - - EVar : (A : Typ) -> Var A -> Exp A ; - EInt : Int -> Exp (TNum TInt) ; - EFloat : Int -> Int -> Exp (TNum TFloat) ; - ELt : (n : NumTyp) -> let Ex = Exp (TNum n) in Ex -> Ex -> Exp (TNum TInt) ; - EAdd, EMul, ESub : (n : NumTyp) -> let Ex = Exp (TNum n) in Ex -> Ex -> Ex ; - EAppNil : (V : Typ) -> Fun NilTyp V -> Exp V ; - EApp : (AS : ListTyp) -> (V : Typ) -> Fun AS V -> ListExp AS -> Exp V ; - - TNum : NumTyp -> Typ ; - TInt, TFloat : NumTyp ; - NilTyp : ListTyp ; - ConsTyp : Typ -> ListTyp -> ListTyp ; - OneExp : (A : Typ) -> Exp A -> ListExp (ConsTyp A NilTyp) ; - ConsExp : (A : Typ) -> (AS : ListTyp) -> - Exp A -> ListExp AS -> ListExp (ConsTyp A AS) ; -} -\end{verbatim} -\normalsize -\newpage - - -\subsection*{Appendix B: The concrete syntax of C} - -\small -\begin{verbatim} -concrete ImperC of Imper = open ResImper in { - flags lexer=codevars ; unlexer=code ; startcat=Program ; - - lincat - Exp = PrecExp ; - Typ, NumTyp = {s,s2 : Str} ; - Rec = {s,s2,s3 : Str} ; - lin - Empty = ss [] ; - FunctNil val stm cont = ss ( - val.s ++ cont.$0 ++ paren [] ++ "{" ++ stm.s ++ "}" ++ ";" ++ cont.s) ; - Funct args val rec = ss ( - val.s ++ rec.$0 ++ paren rec.s2 ++ "{" ++ rec.s ++ "}" ++ ";" ++ rec.s3) ; - RecOne typ stm prg = stm ** { - s2 = typ.s ++ stm.$0 ; - s3 = prg.s - } ; - RecCons typ _ body prg = { - s = body.s ; - s2 = typ.s ++ body.$0 ++ "," ++ body.s2 ; - s3 = prg.s - } ; - - Decl typ cont = continues (typ.s ++ cont.$0) cont ; - Assign _ x exp = continues (x.s ++ "=" ++ exp.s) ; - While exp loop = continue ("while" ++ paren exp.s ++ loop.s) ; - IfElse exp t f = continue ("if" ++ paren exp.s ++ t.s ++ "else" ++ f.s) ; - Block stm = continue ("{" ++ stm.s ++ "}") ; - Printf t e = continues ("printf" ++ paren (t.s2 ++ "," ++ e.s)) ; - Return _ exp = statement ("return" ++ exp.s) ; - Returnv = statement "return" ; - End = ss [] ; - - EVar _ x = constant x.s ; - EInt n = constant n.s ; - EFloat a b = constant (a.s ++ "." ++ b.s) ; - EMul _ = infixL 3 "*" ; - EAdd _ = infixL 2 "+" ; - ESub _ = infixL 2 "-" ; - ELt _ = infixN 1 "<" ; - EAppNil val f = constant (f.s ++ paren []) ; - EApp args val f exps = constant (f.s ++ paren exps.s) ; - - TNum t = t ; - TInt = {s = "int" ; s2 = "\"%d\""} ; TFloat = {s = "float" ; s2 = "\"%f\""} ; - NilTyp = ss [] ; ConsTyp = cc2 ; - OneExp _ e = e ; ConsExp _ _ e es = ss (e.s ++ "," ++ es.s) ; -} -\end{verbatim} -\normalsize -\newpage - - -\subsection*{Appendix C: The concrete syntax of JVM} - -\small -\begin{verbatim} -concrete ImperJVM of Imper = open ResImper in { - flags lexer=codevars ; unlexer=code ; startcat=Program ; - - lincat - Rec = {s,s2,s3 : Str} ; -- code, storage for locals, continuation - Stm = Instr ; - - lin - Empty = ss [] ; - FunctNil val stm cont = ss ( - ".method" ++ "public" ++ "static" ++ cont.$0 ++ paren [] ++ val.s ++ ";" ++ - ".limit" ++ "locals" ++ stm.s2 ++ ";" ++ - ".limit" ++ "stack" ++ "1000" ++ ";" ++ - stm.s ++ - ".end" ++ "method" ++ ";" ++ ";" ++ - cont.s - ) ; - Funct args val rec = ss ( - ".method" ++ "public" ++ "static" ++ rec.$0 ++ paren args.s ++ val.s ++ ";" ++ - ".limit" ++ "locals" ++ rec.s2 ++ ";" ++ - ".limit" ++ "stack" ++ "1000" ++ ";" ++ - rec.s ++ - ".end" ++ "method" ++ ";" ++ ";" ++ - rec.s3 - ) ; - - RecOne typ stm prg = instrb typ.s ( - ["alloc"] ++ typ.s ++ stm.$0 ++ stm.s2) {s = stm.s ; s2 = stm.s2 ; s3 = prg.s}; - - RecCons typ _ body prg = instrb typ.s ( - ["alloc"] ++ typ.s ++ body.$0 ++ body.s2) - {s = body.s ; s2 = body.s2 ; s3 = prg.s}; - - Decl typ cont = instrb typ.s ( - ["alloc"] ++ typ.s ++ cont.$0 - ) cont ; - Assign t x exp = instrc (exp.s ++ t.s ++ "_store" ++ x.s) ; - While exp loop = - let - test = "TEST_" ++ loop.s2 ; - end = "END_" ++ loop.s2 - in instrl ( - "label" ++ test ++ ";" ++ - exp.s ++ - "ifeq" ++ end ++ ";" ++ - loop.s ++ - "goto" ++ test ++ ";" ++ - "label" ++ end - ) ; - IfElse exp t f = - let - false = "FALSE_" ++ t.s2 ++ f.s2 ; - true = "TRUE_" ++ t.s2 ++ f.s2 - in instrl ( - exp.s ++ - "ifeq" ++ false ++ ";" ++ - t.s ++ - "goto" ++ true ++ ";" ++ - "label" ++ false ++ ";" ++ - f.s ++ - "label" ++ true - ) ; - Block stm = instrc stm.s ; - Printf t e = instrc (e.s ++ "invokestatic" ++ t.s ++ "runtime/printf" ++ paren (t.s) ++ "v") ; - Return t exp = instr (exp.s ++ t.s ++ "_return") ; - Returnv = instr "return" ; - End = ss [] ** {s2,s3 = []} ; - - EVar t x = instr (t.s ++ "_load" ++ x.s) ; - EInt n = instr ("ldc" ++ n.s) ; - EFloat a b = instr ("ldc" ++ a.s ++ "." ++ b.s) ; - EAdd = binopt "_add" ; - ESub = binopt "_sub" ; - EMul = binopt "_mul" ; - ELt t = binop ("invokestatic" ++ t.s ++ "runtime/lt" ++ paren (t.s ++ t.s) ++ "i") ; - EAppNil val f = instr ( - "invokestatic" ++ f.s ++ paren [] ++ val.s - ) ; - EApp args val f exps = instr ( - exps.s ++ - "invokestatic" ++ f.s ++ paren args.s ++ val.s - ) ; - - TNum t = t ; - TInt = ss "i" ; - TFloat = ss "f" ; - NilTyp = ss [] ; - ConsTyp = cc2 ; - OneExp _ e = e ; - ConsExp _ _ = cc2 ; -} -\end{verbatim} -\normalsize -\newpage - -\subsection*{Appendix D: Auxiliary operations for concrete syntax} - -\small -\begin{verbatim} -resource ResImper = open Predef in { - - -- precedence - - param PAssoc = PN | PL | PR ; - - oper - Prec : PType = Predef.Ints 4 ; - PrecExp : Type = {s : Str ; p : Prec ; a : PAssoc} ; - - mkPrec : Prec -> PAssoc -> Str -> PrecExp = \p,a,f -> - {s = f ; p = p ; a = a} ; - - usePrec : PrecExp -> Prec -> Str = \x,p -> - case <<x.p,p> : Prec * Prec> of { - <3,4> | <2,3> | <2,4> => paren x.s ; - <1,1> | <1,0> | <0,0> => x.s ; - <1,_> | <0,_> => paren x.s ; - _ => x.s - } ; - - constant : Str -> PrecExp = mkPrec 4 PN ; - - infixN : Prec -> Str -> (_,_ : PrecExp) -> PrecExp = \p,f,x,y -> - mkPrec p PN (usePrec x (nextPrec p) ++ f ++ usePrec y (nextPrec p)) ; - infixL : Prec -> Str -> (_,_ : PrecExp) -> PrecExp = \p,f,x,y -> - mkPrec p PL (usePrec x p ++ f ++ usePrec y (nextPrec p)) ; - infixR : Prec -> Str -> (_,_ : PrecExp) -> PrecExp = \p,f,x,y -> - mkPrec p PR (usePrec x (nextPrec p) ++ f ++ usePrec y p) ; - - nextPrec : Prec -> Prec = \p -> case <p : Prec> of { - 4 => 4 ; - n => Predef.plus n 1 - } ; - - -- string operations - - SS : Type = {s : Str} ; - ss : Str -> SS = \s -> {s = s} ; - cc2 : (_,_ : SS) -> SS = \x,y -> ss (x.s ++ y.s) ; - paren : Str -> Str = \str -> "(" ++ str ++ ")" ; - - continues : Str -> SS -> SS = \s,t -> ss (s ++ ";" ++ t.s) ; - continue : Str -> SS -> SS = \s,t -> ss (s ++ t.s) ; - statement : Str -> SS = \s -> ss (s ++ ";"); - - -- operations for JVM - - Instr : Type = {s,s2,s3 : Str} ; -- code, variables, labels - instr : Str -> Instr = \s -> - statement s ** {s2,s3 = []} ; - instrc : Str -> Instr -> Instr = \s,i -> - ss (s ++ ";" ++ i.s) ** {s2 = i.s2 ; s3 = i.s3} ; - instrl : Str -> Instr -> Instr = \s,i -> - ss (s ++ ";" ++ i.s) ** {s2 = i.s2 ; s3 = "L" ++ i.s3} ; - instrb : Str -> Str -> Instr -> Instr = \v,s,i -> - ss (s ++ ";" ++ i.s) ** {s2 = v ++ i.s2 ; s3 = i.s3} ; - binop : Str -> SS -> SS -> SS = \op, x, y -> - ss (x.s ++ y.s ++ op ++ ";") ; - binopt : Str -> SS -> SS -> SS -> SS = \op, t -> - binop (t.s ++ op) ; -} -\end{verbatim} -\normalsize -\newpage - - -\subsection*{Appendix E: Translation of Symbolic JVM to Jasmin} - -\small -\begin{verbatim} -module Main where -import Char -import System - -main :: IO () -main = do - jvm:src:_ <- getArgs - s <- readFile jvm - let cls = takeWhile (/='.') src - let obj = cls ++ ".j" - writeFile obj $ boilerplate cls - appendFile obj $ mkJVM cls s - putStrLn $ "wrote file " ++ obj - -mkJVM :: String -> String -> String -mkJVM cls = unlines . reverse . fst . foldl trans ([],([],0)) . lines where - trans (code,(env,v)) s = case words s of - ".method":p:s:f:ns - | f == "main" -> (".method public static main([Ljava/lang/String;)V":code,([],1)) - | otherwise -> (unwords [".method",p,s, f ++ typesig ns] : code,([],0)) - "alloc":t:x:_ -> (("; " ++ s):code, ((x,v):env, v + size t)) - ".limit":"locals":ns -> chCode (".limit locals " ++ show (length ns)) - "invokestatic":t:f:ns | take 8 f == "runtime/" -> - chCode $ "invokestatic " ++ "runtime/" ++ t ++ drop 8 f ++ typesig ns - "invokestatic":f:ns -> chCode $ "invokestatic " ++ cls ++ "/" ++ f ++ typesig ns - "alloc":ns -> chCode $ "; " ++ s - t:('_':instr):[";"] -> chCode $ t ++ instr - t:('_':instr):x:_ -> chCode $ t ++ instr ++ " " ++ look x - "goto":ns -> chCode $ "goto " ++ label ns - "ifeq":ns -> chCode $ "ifeq " ++ label ns - "label":ns -> chCode $ label ns ++ ":" - ";":[] -> chCode "" - _ -> chCode s - where - chCode c = (c:code,(env,v)) - look x = maybe (error $ x ++ show env) show $ lookup x env - typesig = init . map toUpper . concat - label = init . concat - size t = case t of - "d" -> 2 - _ -> 1 - -boilerplate :: String -> String -boilerplate cls = unlines [ - ".class public " ++ cls, ".super java/lang/Object", - ".method public <init>()V","aload_0", - "invokenonvirtual java/lang/Object/<init>()V","return", - ".end method"] -\end{verbatim} -\normalsize -\newpage - - - - - - -\end{document} - -\begin{verbatim} -\end{verbatim} - diff --git a/examples/gfcc/factorial.c b/examples/gfcc/factorial.c deleted file mode 100644 index 76fee32d0..000000000 --- a/examples/gfcc/factorial.c +++ /dev/null @@ -1,38 +0,0 @@ -int fact (int n) { - int f ; - f = 1 ; - { - while (1 < n) { - f = n * f ; - n = n - 1 ; - } - } - return f ; -} ; - -int factr (int n) { - int f ; - { - if (n < 2) { - f = 1 ; - } - else { - f = n * factr (n-1) ; - } - } - return f ; -} ; - -int main () { - int n ; - n = 1 ; - { - while (n < 11) { - printf("%d",fact(n)) ; - printf("%d",factr(n)) ; - n = n+1 ; - } - } - return ; -} ; - diff --git a/examples/gfcc/fibonacci.c b/examples/gfcc/fibonacci.c deleted file mode 100644 index 80e8a0d5c..000000000 --- a/examples/gfcc/fibonacci.c +++ /dev/null @@ -1,18 +0,0 @@ -int mx () { - return 5000000 ; -} ; - -int main () { - int lo ; int hi ; - lo = 1 ; - hi = lo ; - printf("%d",lo) ; - { - while (hi < mx()) { - printf("%d",hi) ; - hi = lo + hi ; - lo = hi - lo ; - } - } - return ; -} ; diff --git a/examples/gfcc/gfcc b/examples/gfcc/gfcc deleted file mode 100644 index ff0bd2855..000000000 --- a/examples/gfcc/gfcc +++ /dev/null @@ -1,2 +0,0 @@ -echo "rf -file=$1 | ps -lexcode | p | pt -number=1 | l -lang=ImperJVM | wf -file=tmp.gfcc" | gf Imper.pgf -runghc CleanJVM tmp.gfcc $1 diff --git a/examples/gfcc/runtime.j b/examples/gfcc/runtime.j deleted file mode 100644 index 88db0b9b8..000000000 --- a/examples/gfcc/runtime.j +++ /dev/null @@ -1,55 +0,0 @@ -.class public runtime -.super java/lang/Object -; -; standard initializer -.method public <init>()V - aload_0 - invokenonvirtual java/lang/Object/<init>()V - return -.end method - -.method public static ilt(II)I -.limit locals 2 -.limit stack 2 - iload_0 - iload_1 - if_icmpge Label0 - iconst_1 - ireturn - Label0: - iconst_0 - ireturn -.end method - -.method public static flt(FF)I -.limit locals 2 -.limit stack 2 - fload_0 - fload_1 - fcmpl - ifge Label0 - iconst_1 - ireturn - Label0: - iconst_0 - ireturn -.end method - -.method public static iprintf(I)V -.limit locals 1 -.limit stack 1000 - getstatic java/lang/System/out Ljava/io/PrintStream; - iload_0 - invokevirtual java/io/PrintStream/println(I)V - return -.end method - -.method public static fprintf(F)V -.limit locals 1 -.limit stack 1000 - getstatic java/lang/System/out Ljava/io/PrintStream; - fload_0 - invokevirtual java/io/PrintStream/println(F)V - return -.end method - |
