diff options
| author | kr.angelov <kr.angelov@gmail.com> | 2012-01-20 13:41:10 +0000 |
|---|---|---|
| committer | kr.angelov <kr.angelov@gmail.com> | 2012-01-20 13:41:10 +0000 |
| commit | 2eee382a62a909d5a3f2f5eda94f30fe68fd5335 (patch) | |
| tree | b0b0d513535895f244214aebf6358e172b8dce6d /src/runtime/c/pgf | |
| parent | b9728357126f8b9a6311cca17d9f0dcc2a7bfb9b (diff) | |
initial import of the C runtime
Diffstat (limited to 'src/runtime/c/pgf')
| -rw-r--r-- | src/runtime/c/pgf/data.c | 251 | ||||
| -rw-r--r-- | src/runtime/c/pgf/data.h | 388 | ||||
| -rw-r--r-- | src/runtime/c/pgf/edsl.h | 20 | ||||
| -rw-r--r-- | src/runtime/c/pgf/expr.c | 334 | ||||
| -rw-r--r-- | src/runtime/c/pgf/expr.h | 284 | ||||
| -rw-r--r-- | src/runtime/c/pgf/linearize.c | 613 | ||||
| -rw-r--r-- | src/runtime/c/pgf/linearize.h | 156 | ||||
| -rw-r--r-- | src/runtime/c/pgf/loader.c | 396 | ||||
| -rw-r--r-- | src/runtime/c/pgf/panic.c | 8 | ||||
| -rw-r--r-- | src/runtime/c/pgf/panic.h | 6 | ||||
| -rw-r--r-- | src/runtime/c/pgf/parser.c | 697 | ||||
| -rw-r--r-- | src/runtime/c/pgf/parser.h | 127 | ||||
| -rw-r--r-- | src/runtime/c/pgf/pgf.h | 78 | ||||
| -rw-r--r-- | src/runtime/c/pgf/reader.c | 843 | ||||
| -rw-r--r-- | src/runtime/c/pgf/type.h | 22 | ||||
| -rw-r--r-- | src/runtime/c/pgf/unloader.c | 248 |
16 files changed, 3586 insertions, 885 deletions
diff --git a/src/runtime/c/pgf/data.c b/src/runtime/c/pgf/data.c new file mode 100644 index 000000000..98cbf5427 --- /dev/null +++ b/src/runtime/c/pgf/data.c @@ -0,0 +1,251 @@ +#include "data.h" +#include "expr.h" +#include <gu/type.h> +#include <gu/variant.h> +#include <gu/assert.h> + + +PgfCCat pgf_ccat_string = { NULL, GU_NULL_SEQ, -1 }; +PgfCCat pgf_ccat_int = { NULL, GU_NULL_SEQ, -2 }; +PgfCCat pgf_ccat_float = { NULL, GU_NULL_SEQ, -3 }; +PgfCCat pgf_ccat_var = { NULL, GU_NULL_SEQ, -4 }; + +PgfCCatId +pgf_literal_cat(PgfLiteral lit) +{ + switch (gu_variant_tag(lit)) { + case PGF_LITERAL_STR: + return &pgf_ccat_string; + case PGF_LITERAL_INT: + return &pgf_ccat_int; + case PGF_LITERAL_FLT: + return &pgf_ccat_float; + default: + gu_impossible(); + return NULL; + } +} + +bool +pgf_tokens_equal(PgfTokens t1, PgfTokens t2) +{ + size_t len1 = gu_seq_length(t1); + size_t len2 = gu_seq_length(t2); + if (len1 != len2) { + return false; + } + for (size_t i = 0; i < len1; i++) { + GuString s1 = gu_seq_get(t1, PgfToken, i); + GuString s2 = gu_seq_get(t2, PgfToken, i); + if (!gu_string_eq(s1, s2)) { + return false; + } + } + return true; +} + + + +GU_DEFINE_TYPE(PgfTokens, GuSeq, gu_type(GuString)); + +GU_DEFINE_TYPE(PgfCId, typedef, gu_type(GuString)); + +GU_DEFINE_TYPE(GuStringL, GuList, gu_type(GuString)); + + +#define gu_type__PgfCIdMap gu_type__GuStringMap +typedef GuType_GuStringMap GuType_PgfCIdMap; +#define GU_TYPE_INIT_PgfCIdMap GU_TYPE_INIT_GuStringMap + +GU_DEFINE_TYPE(PgfCCat, struct, + GU_MEMBER_S(PgfCCat, cnccat, PgfCncCat), + GU_MEMBER(PgfCCat, prods, PgfProductionSeq)); + +GU_DEFINE_TYPE(PgfCCatId, shared, gu_type(PgfCCat)); + +GU_DEFINE_TYPE(PgfCCatIds, GuList, gu_type(PgfCCatId)); + +GU_DEFINE_TYPE(PgfCCatSeq, GuSeq, gu_type(PgfCCatId)); + +GU_DEFINE_TYPE(PgfAlternative, struct, + GU_MEMBER(PgfAlternative, form, PgfTokens), + GU_MEMBER_P(PgfAlternative, prefixes, GuStringL)); + + +GU_DEFINE_TYPE( + PgfSymbol, GuVariant, + GU_CONSTRUCTOR_S( + PGF_SYMBOL_CAT, PgfSymbolCat, + GU_MEMBER(PgfSymbolCat, d, int), + GU_MEMBER(PgfSymbolCat, r, int)), + GU_CONSTRUCTOR_S( + PGF_SYMBOL_LIT, PgfSymbolLit, + GU_MEMBER(PgfSymbolLit, d, int), + GU_MEMBER(PgfSymbolLit, r, int)), + GU_CONSTRUCTOR_S( + PGF_SYMBOL_VAR, PgfSymbolVar, + GU_MEMBER(PgfSymbolVar, d, int), + GU_MEMBER(PgfSymbolVar, r, int)), + GU_CONSTRUCTOR_S( + PGF_SYMBOL_KS, PgfSymbolKS, + GU_MEMBER(PgfSymbolKS, tokens, PgfTokens)), + GU_CONSTRUCTOR_S( + PGF_SYMBOL_KP, PgfSymbolKP, + GU_MEMBER(PgfSymbolKP, default_form, PgfTokens), + GU_MEMBER(PgfSymbolKP, n_forms, GuLength), + GU_FLEX_MEMBER(PgfSymbolKP, forms, PgfAlternative))); + +GU_DEFINE_TYPE( + PgfCncCat, struct, + GU_MEMBER(PgfCncCat, cid, PgfCId), + GU_MEMBER_P(PgfCncCat, cats, PgfCCatIds), + GU_MEMBER(PgfCncCat, n_lins, size_t), + GU_MEMBER_P(PgfCncCat, lindefs, PgfFunIds), + GU_MEMBER_P(PgfCncCat, labels, GuStringL)); + +// GU_DEFINE_TYPE(PgfSequence, GuList, gu_ptr_type(PgfSymbol)); +// GU_DEFINE_TYPE(PgfSequence, GuList, gu_type(PgfSymbol)); +GU_DEFINE_TYPE(PgfSequence, GuSeq, gu_type(PgfSymbol)); + +GU_DEFINE_TYPE(PgfFlags, GuStringMap, gu_type(PgfLiteral), &gu_null_variant); + +typedef PgfFlags* PgfFlagsP; + +GU_DEFINE_TYPE(PgfFlagsP, pointer, gu_type(PgfFlags)); + +GU_DEFINE_TYPE(PgfSequences, GuList, gu_type(PgfSequence)); + +GU_DEFINE_TYPE(PgfSeqId, typedef, gu_type(PgfSequence)); + +GU_DEFINE_TYPE( + PgfCncFun, struct, + GU_MEMBER(PgfCncFun, fun, PgfCId), + GU_MEMBER(PgfCncFun, n_lins, GuLength), + GU_FLEX_MEMBER(PgfCncFun, lins, PgfSeqId)); + +GU_DEFINE_TYPE(PgfCncFuns, GuList, + GU_TYPE_LIT(referenced, _, gu_ptr_type(PgfCncFun))); + +GU_DEFINE_TYPE(PgfFunId, shared, gu_type(PgfCncFun)); + +GU_DEFINE_TYPE(PgfFunIds, GuList, gu_type(PgfFunId)); + +GU_DEFINE_TYPE( + PgfPArg, struct, + GU_MEMBER_P(PgfPArg, hypos, PgfCCatIds), + GU_MEMBER(PgfPArg, ccat, PgfCCatId)); + +GU_DEFINE_TYPE(PgfPArgs, GuSeq, gu_type(PgfPArg)); + +GU_DEFINE_TYPE( + PgfProduction, GuVariant, + GU_CONSTRUCTOR_S( + PGF_PRODUCTION_APPLY, PgfProductionApply, + GU_MEMBER(PgfProductionApply, fun, PgfFunId), + GU_MEMBER(PgfProductionApply, args, PgfPArgs)), + GU_CONSTRUCTOR_S( + PGF_PRODUCTION_COERCE, PgfProductionCoerce, + GU_MEMBER(PgfProductionCoerce, coerce, PgfCCatId)), + GU_CONSTRUCTOR_S( + PGF_PRODUCTION_CONST, PgfProductionConst, + GU_MEMBER(PgfProductionConst, expr, PgfExpr), + GU_MEMBER(PgfProductionConst, n_toks, GuLength), + GU_FLEX_MEMBER(PgfProductionConst, toks, GuString))); + +GU_DEFINE_TYPE(PgfProductions, GuList, gu_type(PgfProduction)); +GU_DEFINE_TYPE(PgfProductionSeq, GuSeq, gu_type(PgfProduction)); + +GU_DEFINE_TYPE( + PgfPatt, GuVariant, + GU_CONSTRUCTOR_S( + PGF_PATT_APP, PgfPattApp, + GU_MEMBER(PgfPattApp, ctor, PgfCId), + GU_MEMBER(PgfPattApp, n_args, GuLength), + GU_MEMBER(PgfPattApp, args, PgfPatt)), + GU_CONSTRUCTOR_S( + PGF_PATT_LIT, PgfPattLit, + GU_MEMBER(PgfPattLit, lit, PgfLiteral)), + GU_CONSTRUCTOR_S( + PGF_PATT_VAR, PgfPattVar, + GU_MEMBER(PgfPattVar, var, PgfCId)), + GU_CONSTRUCTOR_S( + PGF_PATT_AS, PgfPattAs, + GU_MEMBER(PgfPattAs, var, PgfCId), + GU_MEMBER(PgfPattAs, patt, PgfPatt)), + GU_CONSTRUCTOR( + PGF_PATT_WILD, void), + GU_CONSTRUCTOR_S( + PGF_PATT_IMPL_ARG, PgfPattImplArg, + GU_MEMBER(PgfPattImplArg, patt, PgfPatt)), + GU_CONSTRUCTOR_S( + PGF_PATT_TILDE, PgfPattTilde, + GU_MEMBER(PgfPattTilde, expr, PgfExpr))); + +GU_DEFINE_TYPE( + PgfEquation, struct, + GU_MEMBER(PgfEquation, body, PgfExpr), + GU_MEMBER(PgfEquation, n_patts, GuLength), + GU_MEMBER(PgfEquation, patts, PgfPatt)); + +// Distinct type so we can give it special treatment in the reader +GU_DEFINE_TYPE(PgfEquationsM, GuSeq, gu_type(PgfEquation)); + +GU_DEFINE_TYPE( + PgfFunDecl, struct, + GU_MEMBER_P(PgfFunDecl, type, PgfType), + GU_MEMBER(PgfFunDecl, arity, int), + GU_MEMBER(PgfFunDecl, defns, PgfEquationsM), + GU_MEMBER(PgfFunDecl, prob, double)); + +GU_DEFINE_TYPE( + PgfCatFun, struct, + GU_MEMBER(PgfCatFun, prob, double), + GU_MEMBER(PgfCatFun, fun, PgfCId)); + +GU_DEFINE_TYPE( + PgfCat, struct, + GU_MEMBER(PgfCat, context, PgfHypos), + GU_MEMBER(PgfCat, n_functions, GuLength), + GU_FLEX_MEMBER(PgfCat, functions, PgfCatFun)); + + +GU_DEFINE_TYPE( + PgfAbstr, struct, + GU_MEMBER(PgfAbstr, aflags, PgfFlagsP), + GU_MEMBER_V(PgfAbstr, funs, + GU_TYPE_LIT(pointer, PgfCIdMap*, + GU_TYPE_LIT(PgfCIdMap, _, + gu_ptr_type(PgfFunDecl), + &gu_null_struct))), + GU_MEMBER_V(PgfAbstr, cats, + GU_TYPE_LIT(pointer, PgfCIdMap*, + GU_TYPE_LIT(PgfCIdMap, _, + gu_ptr_type(PgfCat), + &gu_null_struct)))); + +GU_DEFINE_TYPE( + PgfPrintNames, PgfCIdMap, gu_type(GuString), NULL); + +GU_DEFINE_TYPE( + PgfConcr, struct, + GU_MEMBER(PgfConcr, cflags, PgfFlagsP), + GU_MEMBER_P(PgfConcr, printnames, PgfPrintNames), + GU_MEMBER_V(PgfConcr, cnccats, + GU_TYPE_LIT(pointer, PgfCIdMap*, + GU_TYPE_LIT(PgfCIdMap, _, + gu_ptr_type(PgfCncCat), + &gu_null_struct)))); + +GU_DEFINE_TYPE( + PgfPGF, struct, + GU_MEMBER(PgfPGF, major_version, uint16_t), + GU_MEMBER(PgfPGF, minor_version, uint16_t), + GU_MEMBER(PgfPGF, gflags, PgfFlagsP), + GU_MEMBER(PgfPGF, absname, PgfCId), + GU_MEMBER(PgfPGF, abstract, PgfAbstr), + GU_MEMBER_V(PgfPGF, concretes, + GU_TYPE_LIT(pointer, PgfCIdMap*, + GU_TYPE_LIT(PgfCIdMap, _, + gu_ptr_type(PgfConcr), + &gu_null_struct)))); + diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h index b17ea6d66..d80a19526 100644 --- a/src/runtime/c/pgf/data.h +++ b/src/runtime/c/pgf/data.h @@ -1,76 +1,330 @@ -#ifndef PGF_DATA_H -#define PGF_DATA_H +/* + * Copyright 2010 University of Helsinki. + * + * This file is part of libpgf. + * + * Libpgf is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation, either version 3 of the License, or (at your + * option) any later version. + * + * Libpgf is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libpgf. If not, see <http://www.gnu.org/licenses/>. + */ -typedef int BindType; +#ifndef PGF_DATA_H_ +#define PGF_DATA_H_ -#include "expr.h" -#include "type.h" +#include <gu/list.h> +#include <gu/variant.h> +#include <gu/map.h> +#include <gu/string.h> +#include <gu/type.h> +#include <gu/seq.h> +#include <pgf/pgf.h> +#include <pgf/expr.h> -struct _String { - int len; - unsigned int chars[]; +typedef struct PgfCCat PgfCCat; +typedef PgfCCat* PgfCCatId; +extern GU_DECLARE_TYPE(PgfCCat, struct); +extern GU_DECLARE_TYPE(PgfCCatId, shared); +typedef GuList(PgfCCatId) PgfCCatIds; +extern GU_DECLARE_TYPE(PgfCCatIds, GuList); +typedef GuSeq PgfCCatSeq; +extern GU_DECLARE_TYPE(PgfCCatSeq, GuSeq); + +typedef struct PgfAbstr PgfAbstr; +typedef struct PgfFunDecl PgfFunDecl; +typedef struct PgfConcr PgfConcr; + +typedef int PgfLength; +typedef struct GuVariant PgfSymbol; +typedef struct PgfAlternative PgfAlternative; +typedef struct PgfCncFun PgfCncFun; + + +typedef GuSeq PgfSequence; // -> PgfSymbol + +typedef PgfCncFun* PgfFunId; // key to PgfCncFuns +extern GU_DECLARE_TYPE(PgfFunId, shared); +typedef GuList(PgfCncFun*) PgfCncFuns; +extern GU_DECLARE_TYPE(PgfCncFuns, GuList); +typedef GuList(PgfFunId) PgfFunIds; +extern GU_DECLARE_TYPE(PgfFunIds, GuList); +// typedef GuStringMap PgfCIdMap; // PgfCId -> ? +#define PgfCIdMap GuStringMap +typedef PgfCIdMap PgfFlags; // PgfCId -> PgfLiteral +extern GU_DECLARE_TYPE(PgfFlags, GuMap); + +extern GU_DECLARE_TYPE(PgfType, struct); +typedef GuVariant PgfProduction; +typedef GuList(PgfProduction) PgfProductions; +extern GU_DECLARE_TYPE(PgfProductions, GuList); +typedef GuSeq PgfProductionSeq; +extern GU_DECLARE_TYPE(PgfProductionSeq, GuSeq); + +typedef struct PgfCatFun PgfCatFun; +typedef struct PgfCncCat PgfCncCat; +extern GU_DECLARE_TYPE(PgfCncCat, struct); +typedef GuVariant PgfPatt; + +typedef GuList(GuString) GuStringL; +extern GU_DECLARE_TYPE(GuStringL, GuList); +typedef GuSeq PgfTokens; // -> PgfToken +extern GU_DECLARE_TYPE(PgfTokens, GuSeq); + +bool +pgf_tokens_equal(PgfTokens t1, PgfTokens t2); + + + +typedef PgfExpr PgfTree; + +typedef struct PgfEquation PgfEquation; +typedef GuSeq PgfEquations; +typedef PgfEquations PgfEquationsM; // can be null +extern GU_DECLARE_TYPE(PgfEquationsM, GuSeq); +typedef struct PgfCat PgfCat; + +typedef PgfSequence PgfSeqId; // shared reference + +extern GU_DECLARE_TYPE(PgfSeqId, typedef); + +typedef GuList(PgfSequence) PgfSequences; + +extern GU_DECLARE_TYPE(PgfSequences, GuList); + + + + +struct PgfAbstr { + PgfFlags* aflags; + PgfCIdMap* funs; // |-> PgfFunDecl* + PgfCIdMap* cats; // |-> PgfCat* }; -struct _CId { - int len; - char chars[]; +struct PgfPGF { + uint16_t major_version; + uint16_t minor_version; + PgfFlags* gflags; + PgfCId absname; + PgfAbstr abstract; + PgfCIdMap* concretes; // |-> PgfConcr* + GuPool* pool; }; -typedef struct _CIdList { - int count; - CId names[]; -} *CIdList; - -typedef struct _AbsCat { - CId name; - Context hypos; - CIdList funs; -} *AbsCat; - -typedef struct _AbsCats { - int count; - struct _AbsCat lst[]; -} *AbsCats; - -typedef struct _AbsFun { - CId name; - Type ty; - int arrity; - Equations equs; -} *AbsFun; - -typedef struct _AbsFuns { - int count; - struct _AbsFun lst[]; -} *AbsFuns; - -struct _Flag { - CId name; - Literal value; -} ; - -typedef struct _Flags { - int count; - struct _Flag values[]; -} *Flags; - -typedef struct _Abstract { - CId name; - Flags flags; - AbsFuns funs; - AbsCats cats; -} *Abstract; - -typedef struct _Concrete { - CId name; - Flags flags; -} *Concrete; - -struct _PGF { - Flags flags; - int nConcr; - struct _Abstract abstract; - struct _Concrete concretes[]; +extern GU_DECLARE_TYPE(PgfPGF, struct); + +struct PgfFunDecl { + PgfType* type; + int arity; // Only for computational defs? + PgfEquationsM defns; // maybe null + double prob; +}; + +struct PgfCatFun { + double prob; + PgfCId fun; +}; + +struct PgfCat { + // TODO: Add cid here + PgfHypos context; + GuLength n_functions; + PgfCatFun functions[]; // XXX: resolve to PgfFunDecl*? +}; + + +struct PgfCncCat { + PgfCId cid; + PgfCCatIds* cats; + PgfFunIds* lindefs; + size_t n_lins; + + GuStringL* labels; + /**< Labels for tuples. All nested tuples, records and tables + * in the GF linearization types are flattened into a single + * tuple in the corresponding PGF concrete category. This + * field holds the labels that indicate which GF field or + * parameter (or their combination) each tuple element + * represents. */ +}; + +struct PgfCncFun { + PgfCId fun; // XXX: resolve to PgfFunDecl*? + GuLength n_lins; + PgfSeqId lins[]; +}; + +struct PgfAlternative { + PgfTokens form; + /**< The form of this variant as a list of tokens. */ + + GuStringL* prefixes; + /**< The prefixes of the following symbol that trigger this + * form. */ +}; + +struct PgfCCat { + PgfCncCat* cnccat; + PgfProductionSeq prods; + int fid; +}; + +extern PgfCCat pgf_ccat_string, pgf_ccat_int, pgf_ccat_float, pgf_ccat_var; + +typedef PgfCIdMap PgfPrintNames; +extern GU_DECLARE_TYPE(PgfPrintNames, GuStringMap); + +struct PgfConcr { + PgfFlags* cflags; + PgfPrintNames* printnames; + PgfCIdMap* cnccats; + PgfCCatSeq extra_ccats; }; -#endif +extern GU_DECLARE_TYPE(PgfConcr, struct); + +typedef enum { + PGF_SYMBOL_CAT, + PGF_SYMBOL_LIT, + PGF_SYMBOL_VAR, + PGF_SYMBOL_KS, + PGF_SYMBOL_KP +} PgfSymbolTag; + +typedef struct PgfSymbolIdx PgfSymbolIdx; + +struct PgfSymbolIdx { + int d; + int r; +}; + +typedef PgfSymbolIdx PgfSymbolCat, PgfSymbolLit, PgfSymbolVar; + +typedef struct { + PgfTokens tokens; +} PgfSymbolKS; + +typedef struct PgfSymbolKP +/** A prefix-dependent symbol. The form that this symbol takes + * depends on the form of a prefix of the following symbol. */ +{ + PgfTokens default_form; + /**< Default form that this symbol takes if none of of the + * variant forms is triggered. */ + + GuLength n_forms; + PgfAlternative forms[]; + /**< Variant forms whose choise depends on the following + * symbol. */ +} PgfSymbolKP; + + + + +// PgfProduction + +typedef enum { + PGF_PRODUCTION_APPLY, + PGF_PRODUCTION_COERCE, + PGF_PRODUCTION_CONST +} PgfProductionTag; + +typedef struct PgfPArg PgfPArg; + +struct PgfPArg { + PgfCCatId ccat; + PgfCCatIds* hypos; +}; + +GU_DECLARE_TYPE(PgfPArg, struct); + +typedef GuSeq PgfPArgs; + +GU_DECLARE_TYPE(PgfPArgs, GuSeq); + +typedef struct { + PgfFunId fun; + PgfPArgs args; +} PgfProductionApply; + +typedef struct PgfProductionCoerce +/** A coercion. This production is a logical union of the coercions of + * another FId. This allows common subsets of productions to be + * shared. */ +{ + PgfCCatId coerce; +} PgfProductionCoerce; + +typedef struct { + PgfExpr expr; // XXX + GuLength n_toks; + GuString toks[]; // XXX +} PgfProductionConst; + + +extern GU_DECLARE_TYPE(PgfProduction, GuVariant); +extern GU_DECLARE_TYPE(PgfBindType, enum); +extern GU_DECLARE_TYPE(PgfLiteral, GuVariant); + + +PgfCCatId +pgf_literal_cat(PgfLiteral lit); + +// PgfPatt + +typedef enum { + PGF_PATT_APP, + PGF_PATT_LIT, + PGF_PATT_VAR, + PGF_PATT_AS, + PGF_PATT_WILD, + PGF_PATT_IMPL_ARG, + PGF_PATT_TILDE, + PGF_PATT_NUM_TAGS +} PgfPattTag; + +typedef struct { + PgfCId ctor; + GuLength n_args; + PgfPatt args[]; +} PgfPattApp; + +typedef struct { + PgfLiteral* lit; +} PgfPattLit; + +typedef struct { + PgfCId var; +} PgfPattVar; + +typedef struct { + PgfCId var; + PgfPatt patt; +} PgfPattAs; + +typedef void PgfPattWild; + +typedef struct { + PgfPatt patt; +} PgfPattImplArg; + +typedef struct { + PgfExpr expr; +} PgfPattTilde; + +struct PgfEquation { + PgfExpr body; + GuLength n_patts; + PgfPatt patts[]; +}; + + + +#endif /* PGF_PRIVATE_H_ */ diff --git a/src/runtime/c/pgf/edsl.h b/src/runtime/c/pgf/edsl.h new file mode 100644 index 000000000..af21e6c15 --- /dev/null +++ b/src/runtime/c/pgf/edsl.h @@ -0,0 +1,20 @@ +#ifndef PGF_EDSL_H_ +#define PGF_EDSL_H_ + +#include <pgf/expr.h> + +#define APP(f, a) \ + gu_new_variant_i(PGF_EDSL_POOL, PGF_EXPR_APP, PgfExprApp, f, a) +#define APP2(f, a1, a2) APP(APP(f, a1), a2) +#define APP3(f, a1, a2, a3) APP2(APP(f, a1), a2, a3) + +#define VAR(s) \ + gu_new_variant_i(PGF_EDSL_POOL, PGF_EXPR_FUN, PgfExprFun, gu_cstring(#s)) + +#define APPV(s, a) APP(VAR(s), a) +#define APPV2(s, a1, a2) APP2(VAR(s), a1, a2) +#define APPV3(s, a1, a2, a3) APP3(VAR(s), a1, a2) + + + +#endif // PGF_EDSL_H_ diff --git a/src/runtime/c/pgf/expr.c b/src/runtime/c/pgf/expr.c new file mode 100644 index 000000000..cd5d69928 --- /dev/null +++ b/src/runtime/c/pgf/expr.c @@ -0,0 +1,334 @@ +#include "expr.h" +#include <gu/intern.h> +#include <gu/assert.h> +#include <ctype.h> + + +PgfExpr +pgf_expr_unwrap(PgfExpr expr) +{ + while (true) { + GuVariantInfo i = gu_variant_open(expr); + switch (i.tag) { + case PGF_EXPR_IMPL_ARG: { + PgfExprImplArg* eimpl = i.data; + expr = eimpl->expr; + break; + } + case PGF_EXPR_TYPED: { + PgfExprTyped* etyped = i.data; + expr = etyped->expr; + break; + } + default: + return expr; + } + } +} + +int +pgf_expr_arity(PgfExpr expr) +{ + int n = 0; + while (true) { + PgfExpr e = pgf_expr_unwrap(expr); + GuVariantInfo i = gu_variant_open(e); + switch (i.tag) { + case PGF_EXPR_APP: { + PgfExprApp* app = i.data; + expr = app->fun; + n = n + 1; + break; + } + case PGF_EXPR_FUN: + return n; + default: + return -1; + } + } +} + +PgfApplication* +pgf_expr_unapply(PgfExpr expr, GuPool* pool) +{ + int arity = pgf_expr_arity(expr); + if (arity < 0) { + return NULL; + } + PgfApplication* appl = gu_new_flex(pool, PgfApplication, args, arity); + appl->n_args = arity; + for (int n = arity - 1; n >= 0; n--) { + PgfExpr e = pgf_expr_unwrap(expr); + gu_assert(gu_variant_tag(e) == PGF_EXPR_APP); + PgfExprApp* app = gu_variant_data(e); + appl->args[n] = app->arg; + expr = app->fun; + } + PgfExpr e = pgf_expr_unwrap(expr); + gu_assert(gu_variant_tag(e) == PGF_EXPR_FUN); + PgfExprFun* fun = gu_variant_data(e); + appl->fun = fun->fun; + return appl; +} + +GU_DEFINE_TYPE(PgfBindType, enum, + GU_ENUM_C(PgfBindType, PGF_BIND_TYPE_EXPLICIT), + GU_ENUM_C(PgfBindType, PGF_BIND_TYPE_IMPLICIT)); + +GU_DEFINE_TYPE(PgfLiteral, GuVariant, + GU_CONSTRUCTOR_S(PGF_LITERAL_STR, PgfLiteralStr, + GU_MEMBER(PgfLiteralStr, val, GuString)), + GU_CONSTRUCTOR_S(PGF_LITERAL_INT, PgfLiteralInt, + GU_MEMBER(PgfLiteralInt, val, int)), + GU_CONSTRUCTOR_S(PGF_LITERAL_FLT, PgfLiteralFlt, + GU_MEMBER(PgfLiteralFlt, val, double))); + +GU_DECLARE_TYPE(PgfType, struct); + +GU_DEFINE_TYPE(PgfHypo, struct, + GU_MEMBER(PgfHypo, bindtype, PgfBindType), + GU_MEMBER(PgfHypo, cid, PgfCId), + GU_MEMBER_P(PgfHypo, type, PgfType)); + +GU_DEFINE_TYPE(PgfHypos, GuSeq, gu_type(PgfHypo)); + +GU_DEFINE_TYPE(PgfType, struct, + GU_MEMBER(PgfType, hypos, PgfHypos), + GU_MEMBER(PgfType, cid, PgfCId), + GU_MEMBER(PgfType, n_exprs, GuLength), + GU_FLEX_MEMBER(PgfType, exprs, PgfExpr)); + +GU_DEFINE_TYPE( + PgfExpr, GuVariant, + GU_CONSTRUCTOR_S( + PGF_EXPR_ABS, PgfExprAbs, + GU_MEMBER(PgfExprAbs, bind_type, PgfBindType), + GU_MEMBER(PgfExprAbs, id, GuStr), + GU_MEMBER(PgfExprAbs, body, PgfExpr)), + GU_CONSTRUCTOR_S( + PGF_EXPR_APP, PgfExprApp, + GU_MEMBER(PgfExprApp, fun, PgfExpr), + GU_MEMBER(PgfExprApp, arg, PgfExpr)), + GU_CONSTRUCTOR_S( + PGF_EXPR_LIT, PgfExprLit, + GU_MEMBER(PgfExprLit, lit, PgfLiteral)), + GU_CONSTRUCTOR_S( + PGF_EXPR_META, PgfExprMeta, + GU_MEMBER(PgfExprMeta, id, int)), + GU_CONSTRUCTOR_S( + PGF_EXPR_FUN, PgfExprFun, + GU_MEMBER(PgfExprFun, fun, GuStr)), + GU_CONSTRUCTOR_S( + PGF_EXPR_VAR, PgfExprVar, + GU_MEMBER(PgfExprVar, var, int)), + GU_CONSTRUCTOR_S( + PGF_EXPR_TYPED, PgfExprTyped, + GU_MEMBER(PgfExprTyped, expr, PgfExpr), + GU_MEMBER_P(PgfExprTyped, type, PgfType)), + GU_CONSTRUCTOR_S( + PGF_EXPR_IMPL_ARG, PgfExprImplArg, + GU_MEMBER(PgfExprImplArg, expr, PgfExpr))); + + +typedef struct PgfExprParser PgfExprParser; + +struct PgfExprParser { + GuReader* rdr; + GuIntern* intern; + GuExn* err; + GuPool* expr_pool; + const char* lookahead; + int next_char; +}; + + +static const char pgf_expr_lpar[] = "("; +static const char pgf_expr_rpar[] = ")"; +static const char pgf_expr_semic[] = ";"; + +static char +pgf_expr_parser_next(PgfExprParser* parser) +{ + if (parser->next_char >= 0) { + char ret = (char) parser->next_char; + parser->next_char = -1; + return ret; + } + return gu_getc(parser->rdr, parser->err); +} + +static const char* +pgf_expr_parser_lookahead(PgfExprParser* parser) +{ + if (parser->lookahead != NULL) { + return parser->lookahead; + } + const char* str = NULL; + char c; + do { + c = pgf_expr_parser_next(parser); + if (!gu_ok(parser->err)) { + return NULL; + } + } while (isspace(c)); + switch (c) { + case '(': + str = pgf_expr_lpar; + break; + case ')': + str = pgf_expr_rpar; + break; + case ';': + str = pgf_expr_semic; + break; + default: + if (isalpha(c)) { + GuPool* tmp_pool = gu_new_pool(); + GuCharBuf* chars = gu_new_buf(char, tmp_pool); + while (isalnum(c) || c == '_') { + gu_buf_push(chars, char, c); + c = pgf_expr_parser_next(parser); + if (!gu_ok(parser->err)) { + return NULL; + } + } + parser->next_char = (unsigned char) c; + char* tmp_str = gu_chars_str(gu_buf_seq(chars), + tmp_pool); + str = gu_intern_str(parser->intern, tmp_str); + gu_pool_free(tmp_pool); + } + } + parser->lookahead = str; + return str; +} + +static bool +pgf_expr_parser_token_is_id(const char* str) +{ + if (str == NULL || !str[0]) { + return false; + } + char c = str[0]; + return (isalpha(c) || c == '_'); +} + +static void +pgf_expr_parser_consume(PgfExprParser* parser) +{ + pgf_expr_parser_lookahead(parser); + parser->lookahead = NULL; +} + +static PgfExpr +pgf_expr_parser_expr(PgfExprParser* parser); + +static PgfExpr +pgf_expr_parser_term(PgfExprParser* parser) +{ + const char* la = pgf_expr_parser_lookahead(parser); + + if (la == pgf_expr_lpar) { + pgf_expr_parser_consume(parser); + PgfExpr expr = pgf_expr_parser_expr(parser); + la = pgf_expr_parser_lookahead(parser); + if (la == pgf_expr_rpar) { + pgf_expr_parser_consume(parser); + return expr; + } + } else if (pgf_expr_parser_token_is_id(la)) { + pgf_expr_parser_consume(parser); + GuString s = gu_str_string(la, parser->expr_pool); + return gu_new_variant_i(parser->expr_pool, + PGF_EXPR_FUN, + PgfExprFun, + s); + } + return gu_null_variant; +} + +static PgfExpr +pgf_expr_parser_expr(PgfExprParser* parser) +{ + PgfExpr expr = pgf_expr_parser_term(parser); + if (gu_variant_is_null(expr)) + { + return expr; + } + while (true) { + PgfExpr arg = pgf_expr_parser_term(parser); + if (gu_variant_is_null(arg)) { + return expr; + } + expr = gu_new_variant_i(parser->expr_pool, + PGF_EXPR_APP, + PgfExprApp, + expr, arg); + } +} + + + +PgfExpr +pgf_read_expr(GuReader* rdr, GuPool* pool, GuExn* err) +{ + GuPool* tmp_pool = gu_new_pool(); + PgfExprParser* parser = gu_new(PgfExprParser, tmp_pool); + parser->rdr = rdr; + parser->intern = gu_new_intern(pool, tmp_pool); + parser->expr_pool = pool; + parser->err = err; + parser->lookahead = NULL; + parser->next_char = -1; + PgfExpr expr = pgf_expr_parser_expr(parser); + const char* la = pgf_expr_parser_lookahead(parser); + if (la == pgf_expr_semic) { + pgf_expr_parser_consume(parser); + } else { + expr = gu_null_variant; + } + gu_pool_free(tmp_pool); + return expr; +} + +static void +pgf_expr_print_with_paren(PgfExpr expr, bool need_paren, + GuWriter* wtr, GuExn* err) +{ + GuVariantInfo ei = gu_variant_open(expr); + switch (ei.tag) { + case PGF_EXPR_FUN: { + PgfExprFun* fun = ei.data; + gu_string_write(fun->fun, wtr, err); + break; + } + case PGF_EXPR_APP: { + PgfExprApp* app = ei.data; + if (need_paren) { + gu_puts("(", wtr, err); + } + pgf_expr_print_with_paren(app->fun, false, wtr, err); + gu_puts(" ", wtr, err); + pgf_expr_print_with_paren(app->arg, true, wtr, err); + if (need_paren) { + gu_puts(")", wtr, err); + } + break; + } + case PGF_EXPR_ABS: + case PGF_EXPR_LIT: + case PGF_EXPR_META: + case PGF_EXPR_VAR: + case PGF_EXPR_TYPED: + case PGF_EXPR_IMPL_ARG: + gu_impossible(); + break; + default: + gu_impossible(); + } +} + +void +pgf_expr_print(PgfExpr expr, GuWriter* wtr, GuExn* err) { + pgf_expr_print_with_paren(expr, false, wtr, err); +} diff --git a/src/runtime/c/pgf/expr.h b/src/runtime/c/pgf/expr.h index d4d2aaea2..7ecca30bd 100644 --- a/src/runtime/c/pgf/expr.h +++ b/src/runtime/c/pgf/expr.h @@ -1,144 +1,152 @@ -#ifndef PGF_EXPR_H -#define PGF_EXPR_H +#ifndef EXPR_H_ +#define EXPR_H_ -#define LIT_STR 0 -#define LIT_INT 1 -#define LIT_FLOAT 2 +#include <gu/read.h> +#include <gu/write.h> +#include <gu/variant.h> +#include <gu/seq.h> +#include <pgf/pgf.h> -struct _Literal { - int tag; +/// Abstract syntax trees +/// @file + +/// An abstract syntax tree +typedef GuVariant PgfExpr; + +GU_DECLARE_TYPE(PgfExpr, GuVariant); + +typedef GuList(PgfExpr) PgfExprs; + +typedef struct PgfHypo PgfHypo; +typedef struct PgfType PgfType; + +typedef int PgfMetaId; + +typedef enum { + PGF_BIND_TYPE_EXPLICIT, + PGF_BIND_TYPE_IMPLICIT +} PgfBindType; + +// PgfLiteral + +typedef GuVariant PgfLiteral; + + +typedef enum { + PGF_LITERAL_STR, + PGF_LITERAL_INT, + PGF_LITERAL_FLT, + PGF_LITERAL_NUM_TAGS +} PgfLiteralTag; + +typedef struct { + GuStr val; +} PgfLiteralStr; + +typedef struct { + int val; +} PgfLiteralInt; + +typedef struct { + double val; +} PgfLiteralFlt; + + + +struct PgfHypo { + PgfBindType bindtype; + + PgfCId cid; + /**< Locally scoped name for the parameter if dependent types + * are used. "_" for normal parameters. */ + + PgfType* type; +}; + +typedef GuSeq PgfHypos; +extern GU_DECLARE_TYPE(PgfHypos, GuSeq); + +struct PgfType { + PgfHypos hypos; + PgfCId cid; /// XXX: resolve to PgfCat*? + int n_exprs; + PgfExpr exprs[]; }; -typedef struct _LiteralStr { - struct _Literal _; - String val; -} *LiteralStr; - -typedef struct _LiteralInt { - struct _Literal _; - int val; -} *LiteralInt; - -typedef struct _LiteralFloat { - struct _Literal _; - double val; -} *LiteralFloat; - -#define TAG_ABS 0 -#define TAG_APP 1 -#define TAG_LIT 2 -#define TAG_MET 3 -#define TAG_FUN 4 -#define TAG_VAR 5 -#define TAG_TYP 6 -#define TAG_IMP 7 - -struct _Expr { - int tag; + +typedef enum { + PGF_EXPR_ABS, + PGF_EXPR_APP, + PGF_EXPR_LIT, + PGF_EXPR_META, + PGF_EXPR_FUN, + PGF_EXPR_VAR, + PGF_EXPR_TYPED, + PGF_EXPR_IMPL_ARG, + PGF_EXPR_NUM_TAGS +} PgfExprTag; + +typedef struct { + PgfBindType bind_type; + PgfCId id; // + PgfExpr body; +} PgfExprAbs; + +typedef struct { + PgfExpr fun; + PgfExpr arg; +} PgfExprApp; + +typedef struct { + PgfLiteral lit; +} PgfExprLit; + +typedef struct { + PgfMetaId id; +} PgfExprMeta; + +typedef struct { + PgfCId fun; +} PgfExprFun; + +typedef struct { + int var; +} PgfExprVar; + +/**< A variable. The value is a de Bruijn index to the environment, + * beginning from the innermost variable. */ + +typedef struct { + PgfExpr expr; + PgfType* type; +} PgfExprTyped; + +typedef struct { + PgfExpr expr; +} PgfExprImplArg; + +int +pgf_expr_arity(PgfExpr expr); + +PgfExpr +pgf_expr_unwrap(PgfExpr expr); + +typedef struct PgfApplication PgfApplication; + +struct PgfApplication { + PgfCId fun; + int n_args; + PgfExpr args[]; }; -typedef struct _ExprAbs { - struct _Expr _; - BindType bt; - CId var; - Expr body; -} *ExprAbs; - -typedef struct _ExprApp { - struct _Expr _; - Expr left, right; -} *ExprApp; - -typedef struct _ExprLit { - struct _Expr _; - Literal lit; -} *ExprLit; - -typedef struct _ExprMeta { - struct _Expr _; - int id; -} *ExprMeta; - -typedef struct _ExprFun { - struct _Expr _; - CId fun; -} *ExprFun; - -typedef struct _ExprVar { - struct _Expr _; - int index; -} *ExprVar; - -typedef struct _ExprTyped { - struct _Expr _; - Expr e; - Type ty; -} *ExprTyped; - -typedef struct _ExprImplArg { - struct _Expr _; - Expr e; -} *ExprImplArg; - -#define TAG_PAPP 0 -#define TAG_PVAR 1 -#define TAG_PAT 2 -#define TAG_PWILD 3 -#define TAG_PLIT 4 -#define TAG_PIMP 5 -#define TAG_PTILDE 6 - -typedef struct _Patt { - int tag; -} *Patt; - -typedef struct _Patts { - int count; - Patt pats[]; -} *Patts; - -typedef struct _PattApp { - struct _Patt _; - CId fun; - struct _Patts args; -} *PattApp; - -typedef struct _PattVar { - struct _Patt _; - CId var; -} *PattVar; - -typedef struct _PattAt { - struct _Patt _; - CId var; - Patt pat; -} *PattAt; - -typedef struct _PattWild { - struct _Patt _; -} *PattWild; - -typedef struct _PattLit { - struct _Patt _; - Literal lit; -} *PattLit; - -typedef struct _PattImplArg { - struct _Patt _; - Patt pat; -} *PattImplArg; - -typedef struct _PattTilde { - struct _Patt _; - Expr e; -} *PattTilde; - -typedef struct _Equations { - int count; - struct _Equation { - Patts lhs; - Expr rhs; - } equs[]; -} *Equations; - -#endif +PgfApplication* +pgf_expr_unapply(PgfExpr expr, GuPool* pool); + + +PgfExpr +pgf_read_expr(GuReader* rdr, GuPool* pool, GuExn* err); + +void +pgf_expr_print(PgfExpr expr, GuWriter* wtr, GuExn* err); + +#endif /* EXPR_H_ */ diff --git a/src/runtime/c/pgf/linearize.c b/src/runtime/c/pgf/linearize.c new file mode 100644 index 000000000..ce1b99b5b --- /dev/null +++ b/src/runtime/c/pgf/linearize.c @@ -0,0 +1,613 @@ +/* + * Copyright 2010 University of Helsinki. + * + * This file is part of libpgf. + * + * Libpgf is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation, either version 3 of the License, or (at your + * option) any later version. + * + * Libpgf is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libpgf. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "data.h" +#include "linearize.h" +#include <gu/map.h> +#include <gu/fun.h> +#include <gu/log.h> +#include <gu/choice.h> +#include <gu/seq.h> +#include <gu/string.h> +#include <gu/assert.h> +#include <pgf/expr.h> + +typedef GuStringMap PgfLinInfer; +typedef GuSeq PgfProdSeq; + +static GU_DEFINE_TYPE(PgfProdSeq, GuSeq, gu_type(PgfProduction)); + +typedef struct PgfLinInferEntry PgfLinInferEntry; + +struct PgfLinInferEntry { + PgfCCat* cat; + PgfCncFun* fun; +}; + +static GU_DEFINE_TYPE( + PgfLinInferEntry, struct, + GU_MEMBER_P(PgfLinInferEntry, cat, PgfCCat) + // ,GU_MEMBER(PgfLinInferEntry, fun, ...) + ); + +typedef GuBuf PgfLinInfers; +static GU_DEFINE_TYPE(PgfLinInfers, GuBuf, gu_type(PgfLinInferEntry)); + +typedef GuIntMap PgfCncProds; +static GU_DEFINE_TYPE(PgfCncProds, GuIntMap, gu_type(PgfProdSeq), + &gu_null_seq); + +typedef GuStringMap PgfLinProds; +static GU_DEFINE_TYPE(PgfLinProds, GuStringMap, gu_ptr_type(PgfCncProds), + &gu_null_struct); + + +static GuHash +pgf_lzr_cats_hash_fn(GuHasher* self, const void* p) +{ + (void) self; + PgfCCatIds* cats = *(PgfCCatIds* const*)p; + size_t len = gu_list_length(cats); + uintptr_t h = 0; + for (size_t i = 0; i < len; i++) { + h = 101 * h + (uintptr_t) gu_list_index(cats, i); + } + return h; +} + +static bool +pgf_lzr_cats_eq_fn(GuEquality* self, const void* p1, const void* p2) +{ + (void) self; + PgfCCatIds* cats1 = *(PgfCCatIds* const*) p1; + PgfCCatIds* cats2 = *(PgfCCatIds* const*) p2; + int len = gu_list_length(cats1); + if (gu_list_length(cats2) != len) { + return false; + } + for (int i = 0; i < len; i++) { + PgfCCat* cat1 = gu_list_index(cats1, i); + PgfCCat* cat2 = gu_list_index(cats2, i); + if (cat1 != cat2) { + return false; + } + } + return true; +} + +static GuHasher +pgf_lzr_cats_hasher[1] = { + { + .eq = { pgf_lzr_cats_eq_fn }, + .hash = pgf_lzr_cats_hash_fn + } +}; + +typedef GuMap PgfInferMap; +static GU_DEFINE_TYPE(PgfInferMap, GuMap, + gu_ptr_type(PgfCCatIds), pgf_lzr_cats_hasher, + gu_ptr_type(PgfLinInfers), &gu_null_struct); + +typedef GuStringMap PgfFunIndices; +static GU_DEFINE_TYPE(PgfFunIndices, GuStringMap, gu_ptr_type(PgfInferMap), + &gu_null_struct); + +typedef GuBuf PgfCCatBuf; +static GU_DEFINE_TYPE(PgfCCatBuf, GuBuf, gu_ptr_type(PgfCCat)); + +typedef GuMap PgfCoerceIdx; +static GU_DEFINE_TYPE(PgfCoerceIdx, GuMap, + gu_type(PgfCCat), NULL, + gu_ptr_type(PgfCCatBuf), &gu_null_struct); + +struct PgfLzr { + PgfConcr* cnc; + GuPool* pool; + PgfFunIndices* fun_indices; + PgfCoerceIdx* coerce_idx; +}; + +GU_DEFINE_TYPE( + PgfLzr, struct, + GU_MEMBER_P(PgfLzr, cnc, PgfConcr), + GU_MEMBER_P(PgfLzr, fun_indices, PgfFunIndices), + GU_MEMBER_P(PgfLzr, coerce_idx, PgfCoerceIdx)); + + + + +static void +pgf_lzr_add_infer_entry(PgfLzr* lzr, + PgfInferMap* infer_table, + PgfCCat* cat, + PgfProductionApply* papply) +{ + PgfPArgs args = papply->args; + size_t n_args = gu_seq_length(args); + PgfCCatIds* arg_cats = gu_new_list(PgfCCatIds, lzr->pool, n_args); + for (size_t i = 0; i < n_args; i++) { + // XXX: What about the hypos in the args? + gu_list_index(arg_cats, i) = gu_seq_get(args, PgfPArg, i).ccat; + } + gu_debug("%d,%d,%d -> %d, %s", + n_args > 0 ? gu_list_index(arg_cats, 0)->fid : -1, + n_args > 1 ? gu_list_index(arg_cats, 1)->fid : -1, + n_args > 2 ? gu_list_index(arg_cats, 2)->fid : -1, + cat->fid, papply->fun->fun); + PgfLinInfers* entries = + gu_map_get(infer_table, &arg_cats, PgfLinInfers*); + if (!entries) { + entries = gu_new_buf(PgfLinInferEntry, lzr->pool); + gu_map_put(infer_table, &arg_cats, PgfLinInfers*, entries); + } else { + // XXX: arg_cats is duplicate, we ought to free it + // Display warning? + } + + PgfLinInferEntry entry = { + .cat = cat, + .fun = papply->fun + }; + gu_buf_push(entries, PgfLinInferEntry, entry); +} + + +static void +pgf_lzr_index(PgfLzr* lzr, PgfCCat* cat, PgfProduction prod) +{ + void* data = gu_variant_data(prod); + switch (gu_variant_tag(prod)) { + case PGF_PRODUCTION_APPLY: { + PgfProductionApply* papply = data; + PgfInferMap* infer = + gu_map_get(lzr->fun_indices, &papply->fun->fun, + PgfInferMap*); + gu_debug("index: %s -> %d", papply->fun->fun, cat->fid); + if (!infer) { + infer = gu_map_type_new(PgfInferMap, lzr->pool); + gu_map_put(lzr->fun_indices, + &papply->fun->fun, PgfInferMap*, infer); + } + pgf_lzr_add_infer_entry(lzr, infer, cat, papply); + break; + } + case PGF_PRODUCTION_COERCE: { + PgfProductionCoerce* pcoerce = data; + PgfCCatBuf* cats = gu_map_get(lzr->coerce_idx, pcoerce->coerce, + PgfCCatBuf*); + if (!cats) { + cats = gu_new_buf(PgfCCat*, lzr->pool); + gu_map_put(lzr->coerce_idx, + pcoerce->coerce, PgfCCatBuf*, cats); + } + gu_debug("coerce_idx: %d -> %d", pcoerce->coerce->fid, cat->fid); + gu_buf_push(cats, PgfCCat*, cat); + break; + } + default: + // Display warning? + break; + } +} + +static void +pgf_lzr_index_ccat(PgfLzr* lzr, PgfCCat* cat) +{ + gu_debug("ccat: %d", cat->fid); + if (gu_seq_is_null(cat->prods)) { + return; + } + size_t n_prods = gu_seq_length(cat->prods); + for (size_t i = 0; i < n_prods; i++) { + PgfProduction prod = gu_seq_get(cat->prods, PgfProduction, i); + pgf_lzr_index(lzr, cat, prod); + } +} + +typedef struct { + GuMapItor fn; + PgfLzr* lzr; +} PgfLzrIndexFn; + +static void +pgf_lzr_index_cnccat_cb(GuMapItor* fn, const void* key, void* value, + GuExn* err) +{ + (void) (key && err); + PgfLzrIndexFn* clo = (PgfLzrIndexFn*) fn; + PgfCncCat** cnccatp = value; + PgfCncCat* cnccat = *cnccatp; + gu_enter("-> cnccat: %s", cnccat->cid); + int n_ccats = gu_list_length(cnccat->cats); + for (int i = 0; i < n_ccats; i++) { + PgfCCat* cat = gu_list_index(cnccat->cats, i); + if (cat) { + pgf_lzr_index_ccat(clo->lzr, cat); + } + } + gu_exit("<-"); +} + + +PgfLzr* +pgf_new_lzr(PgfConcr* cnc, GuPool* pool) +{ + PgfLzr* lzr = gu_new(PgfLzr, pool); + lzr->cnc = cnc; + lzr->pool = pool; + lzr->fun_indices = gu_map_type_new(PgfFunIndices, pool); + lzr->coerce_idx = gu_map_type_new(PgfCoerceIdx, pool); + PgfLzrIndexFn clo = { { pgf_lzr_index_cnccat_cb }, lzr }; + gu_map_iter(cnc->cnccats, &clo.fn, NULL); + size_t n_extras = gu_seq_length(cnc->extra_ccats); + for (size_t i = 0; i < n_extras; i++) { + PgfCCat* cat = gu_seq_get(cnc->extra_ccats, PgfCCat*, i); + pgf_lzr_index_ccat(lzr, cat); + } + // TODO: prune productions with zero linearizations + return lzr; +} + +typedef struct PgfLzn PgfLzn; + +struct PgfLzn { + PgfLzr* lzr; + GuChoice* ch; + PgfExpr expr; + GuEnum en; +}; + + +// +// PgfCncTree +// + +typedef enum { + PGF_CNC_TREE_APP, + PGF_CNC_TREE_LIT, +} PgfCncTreeTag; + +typedef struct PgfCncTreeApp PgfCncTreeApp; +struct PgfCncTreeApp { + PgfCncFun* fun; + GuLength n_args; + PgfCncTree args[]; +}; + +typedef struct PgfCncTreeLit PgfCncTreeLit; +struct PgfCncTreeLit { + PgfLiteral lit; +}; + + +static PgfCCat* +pgf_lzn_pick_supercat(PgfLzn* lzn, PgfCCat* cat) +{ + gu_enter("->"); + while (true) { + PgfCCatBuf* supers = + gu_map_get(lzn->lzr->coerce_idx, cat, PgfCCatBuf*); + if (!supers) { + break; + } + gu_debug("n_supers: %d", gu_buf_length(supers)); + int ch = gu_choice_next(lzn->ch, gu_buf_length(supers) + 1); + gu_debug("choice: %d", ch); + if (ch == 0) { + break; + } + cat = gu_buf_get(supers, PgfCCat*, ch - 1); + } + gu_exit("<- %d", cat->fid); + return cat; +} + +static PgfCCat* +pgf_lzn_infer(PgfLzn* lzn, PgfExpr expr, GuPool* pool, PgfCncTree* ctree_out); + +static PgfCCat* +pgf_lzn_infer_apply_try(PgfLzn* lzn, PgfApplication* appl, + PgfInferMap* infer, GuChoiceMark* marks, + PgfCCatIds* arg_cats, int* ip, int n_args, + GuPool* pool, PgfCncTreeApp* app_out) +{ + gu_enter("f: %s, *ip: %d, n_args: %d", appl->fun, *ip, n_args); + PgfCCat* ret = NULL; + while (*ip < n_args) { + PgfCncTree* arg_treep = + (app_out == NULL ? NULL : &app_out->args[*ip]); + PgfCCat* arg_i = + pgf_lzn_infer(lzn, appl->args[*ip], pool, arg_treep); + if (arg_i == NULL) { + goto finish; + } + arg_i = pgf_lzn_pick_supercat(lzn, arg_i); + gu_list_index(arg_cats, *ip) = arg_i; + marks[++*ip] = gu_choice_mark(lzn->ch); + } + PgfLinInfers* entries = gu_map_get(infer, &arg_cats, PgfLinInfers*); + if (!entries) { + goto finish; + } + size_t n_entries = gu_buf_length(entries); + int e = gu_choice_next(lzn->ch, n_entries); + gu_debug("entry %d of %d", e, n_entries); + if (e < 0) { + goto finish; + } + PgfLinInferEntry* entry = gu_buf_index(entries, PgfLinInferEntry, e); + if (app_out != NULL) { + app_out->fun = entry->fun; + } + ret = entry->cat; +finish: + gu_exit("fid: %d", ret ? ret->fid : -1); + return ret; +} +typedef GuList(GuChoiceMark) PgfChoiceMarks; + +static PgfCCat* +pgf_lzn_infer_application(PgfLzn* lzn, PgfApplication* appl, + GuPool* pool, PgfCncTree* ctree_out) +{ + PgfInferMap* infer = + gu_map_get(lzn->lzr->fun_indices, &appl->fun, PgfInferMap*); + gu_enter("-> f: %s, n_args: %d", appl->fun, appl->n_args); + if (infer == NULL) { + gu_exit("<- couldn't find f"); + return NULL; + } + GuPool* tmp_pool = gu_new_pool(); + PgfCCat* ret = NULL; + int n = appl->n_args; + PgfCCatIds* arg_cats = gu_new_list(PgfCCatIds, tmp_pool, n); + + PgfCncTreeApp* appt = NULL; + if (ctree_out) { + appt = gu_new_flex_variant(PGF_CNC_TREE_APP, PgfCncTreeApp, + args, n, ctree_out, pool); + appt->n_args = n; + } + + PgfChoiceMarks* marksl = gu_new_list(PgfChoiceMarks, tmp_pool, n + 1); + GuChoiceMark* marks = gu_list_elems(marksl); + int i = 0; + marks[i] = gu_choice_mark(lzn->ch); + while (true) { + ret = pgf_lzn_infer_apply_try(lzn, appl, infer, + marks, arg_cats, + &i, n, pool, appt); + if (ret != NULL) { + break; + } + + do { + --i; + if (i < 0) { + goto finish; + } + gu_choice_reset(lzn->ch, marks[i]); + } while (!gu_choice_advance(lzn->ch)); + } +finish: + gu_pool_free(tmp_pool); + gu_exit("<- fid: %d", ret ? ret->fid : -1); + return ret; +} + +static PgfCCat* +pgf_lzn_infer(PgfLzn* lzn, PgfExpr expr, GuPool* pool, PgfCncTree* ctree_out) +{ + PgfCCat* ret = NULL; + GuPool* tmp_pool = gu_new_pool(); + PgfApplication* appl = pgf_expr_unapply(expr, tmp_pool); + if (appl != NULL) { + ret = pgf_lzn_infer_application(lzn, appl, pool, ctree_out); + } else { + GuVariantInfo i = gu_variant_open(pgf_expr_unwrap(expr)); + switch (i.tag) { + case PGF_EXPR_LIT: { + PgfExprLit* elit = i.data; + if (pool != NULL) { + *ctree_out = gu_new_variant_i( + pool, PGF_CNC_TREE_LIT, + PgfCncTreeLit, + .lit = elit->lit); + } + ret = pgf_literal_cat(elit->lit); + } + default: + // XXX: should we do something here? + break; + } + } + gu_pool_free(tmp_pool); + return ret; +} + + +static PgfCncTree +pgf_lzn_next(PgfLzn* lzn, GuPool* pool) +{ + // XXX: rewrite this whole mess + PgfCncTree ctree = gu_null_variant; + if (gu_variant_is_null(lzn->expr)) { + return ctree; + } + PgfCCat* cat = NULL; + GuChoiceMark mark = gu_choice_mark(lzn->ch); + do { + cat = pgf_lzn_infer(lzn, lzn->expr, NULL, NULL); + gu_choice_reset(lzn->ch, mark); + } while (!cat && gu_choice_advance(lzn->ch)); + + if (cat) { + PgfCCat* cat2 = pgf_lzn_infer(lzn, lzn->expr, pool, &ctree); + gu_assert(cat == cat2); + gu_debug("fid: %d", cat->fid); + gu_choice_reset(lzn->ch, mark); + if (!gu_choice_advance(lzn->ch)) { + lzn->expr = gu_null_variant; + } + } + return ctree; +} + +static void +pgf_cnc_tree_enum_next(GuEnum* self, void* to, GuPool* pool) +{ + PgfLzn* lzn = gu_container(self, PgfLzn, en); + PgfCncTree* toc = to; + *toc = pgf_lzn_next(lzn, pool); +} + +PgfCncTreeEnum* +pgf_lzr_concretize(PgfLzr* lzr, PgfExpr expr, GuPool* pool) +{ + PgfLzn* lzn = gu_new(PgfLzn, pool); + lzn->lzr = lzr; + lzn->expr = expr; + lzn->ch = gu_new_choice(pool); + lzn->en.next = pgf_cnc_tree_enum_next; + return &lzn->en; +} + + +int +pgf_cnc_tree_dimension(PgfCncTree ctree) +{ + GuVariantInfo cti = gu_variant_open(ctree); + switch (cti.tag) { + case PGF_CNC_TREE_LIT: + return 1; + case PGF_CNC_TREE_APP: { + PgfCncTreeApp* fapp = cti.data; + return fapp->fun->n_lins; + } + default: + gu_impossible(); + return 0; + } +} + +void +pgf_lzr_linearize(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs** fnsp) +{ + PgfLinFuncs* fns = *fnsp; + GuVariantInfo cti = gu_variant_open(ctree); + + switch (cti.tag) { + case PGF_CNC_TREE_LIT: { + gu_require(lin_idx == 0); + PgfCncTreeLit* flit = cti.data; + if (fns->expr_literal) { + fns->expr_literal(fnsp, flit->lit); + } + break; + } + case PGF_CNC_TREE_APP: { + PgfCncTreeApp* fapp = cti.data; + PgfCncFun* fun = fapp->fun; + if (fns->expr_apply) { + fns->expr_apply(fnsp, fun->fun, fapp->n_args); + } + gu_require(lin_idx < fun->n_lins); + PgfSequence seq = fun->lins[lin_idx]; + size_t nsyms = gu_seq_length(seq); + PgfSymbol* syms = gu_seq_data(seq); + for (size_t i = 0; i < nsyms; i++) { + PgfSymbol sym = syms[i]; + GuVariantInfo sym_i = gu_variant_open(sym); + switch (sym_i.tag) { + case PGF_SYMBOL_CAT: + case PGF_SYMBOL_VAR: + case PGF_SYMBOL_LIT: { + PgfSymbolIdx* sidx = sym_i.data; + gu_assert((unsigned) sidx->d < fapp->n_args); + PgfCncTree argf = fapp->args[sidx->d]; + pgf_lzr_linearize(lzr, argf, sidx->r, fnsp); + break; + } + case PGF_SYMBOL_KS: { + PgfSymbolKS* ks = sym_i.data; + if (fns->symbol_tokens) { + fns->symbol_tokens(fnsp, ks->tokens); + } + break; + } + case PGF_SYMBOL_KP: { + // TODO: correct prefix-dependencies + PgfSymbolKP* kp = sym_i.data; + if (fns->symbol_tokens) { + fns->symbol_tokens(fnsp, + kp->default_form); + } + break; + } + default: + gu_impossible(); + } + } + break; + } // case PGF_CNC_TREE_APP + default: + gu_impossible(); + } +} + + + +typedef struct PgfSimpleLin PgfSimpleLin; + +struct PgfSimpleLin { + PgfLinFuncs* funcs; + GuWriter* wtr; + GuExn* err; +}; + +static void +pgf_file_lzn_symbol_tokens(PgfLinFuncs** funcs, PgfTokens toks) +{ + PgfSimpleLin* flin = gu_container(funcs, PgfSimpleLin, funcs); + if (!gu_ok(flin->err)) { + return; + } + size_t len = gu_seq_length(toks); + for (size_t i = 0; i < len; i++) { + PgfToken tok = gu_seq_get(toks, PgfToken, i); + gu_string_write(tok, flin->wtr, flin->err); + gu_putc(' ', flin->wtr, flin->err); + } +} + +static PgfLinFuncs pgf_file_lin_funcs = { + .symbol_tokens = pgf_file_lzn_symbol_tokens +}; + +void +pgf_lzr_linearize_simple(PgfLzr* lzr, PgfCncTree ctree, + size_t lin_idx, GuWriter* wtr, GuExn* err) +{ + PgfSimpleLin flin = { + .funcs = &pgf_file_lin_funcs, + .wtr = wtr, + .err = err + }; + pgf_lzr_linearize(lzr, ctree, lin_idx, &flin.funcs); +} diff --git a/src/runtime/c/pgf/linearize.h b/src/runtime/c/pgf/linearize.h new file mode 100644 index 000000000..db36343f2 --- /dev/null +++ b/src/runtime/c/pgf/linearize.h @@ -0,0 +1,156 @@ +/* + * Copyright 2010-2011 University of Helsinki. + * + * This file is part of libpgf. + * + * Libpgf is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation, either version 3 of the License, or (at your + * option) any later version. + * + * Libpgf is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libpgf. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <gu/type.h> +#include <gu/dump.h> +#include <gu/enum.h> +#include <pgf/data.h> + +/// Linearization of abstract syntax trees. +/// @file + +/** @name Linearizers + * + * Linearization begins by choosing a concrete category (#PgfConcr) for some + * grammar, and creating a new linearizer (#PgfLzr) which can then be used to + * linearize abstract syntax trees (#PgfExpr) of that grammar into the given + * concrete category. + * + * @{ + */ + + +/// A linearizer. +typedef struct PgfLzr PgfLzr; +/**< + * + * A #PgfLzr object transforms abstract syntax trees of a PGF grammar + * into sequences of token events for a single concrete category of + * that grammar. + * + */ +GU_DECLARE_TYPE(PgfLzr, struct); + + +/// Create a new linearizer. +PgfLzr* +pgf_new_lzr(PgfConcr* cnc, GuPool* pool); +/**< + * @param cnc The concrete category to linearize to. + * + * @pool + * + * @return A new linearizer. + */ + +/** @} + * + * @name Enumerating concrete syntax trees + * + * Because of the \c variants construct in GF, there may be several + * possible concrete syntax trees that correspond to a given abstract + * syntax tree. These can be enumerated with #pgf_lzr_concretize and + * #pgf_cnc_trees_next. + * + * @{ + */ + + +/// A concrete syntax tree +typedef GuVariant PgfCncTree; + +/// An enumeration of #PgfCncTree trees. +typedef GuEnum PgfCncTreeEnum; + +/// Begin enumerating concrete syntax variants. +PgfCncTreeEnum* +pgf_lzr_concretize(PgfLzr* lzr, PgfExpr expr, GuPool* pool); + +/** @} + * + * @name Linearizing concrete syntax trees + * + * An individual concrete syntax tree has several different + * linearizations, corresponding to the various fields and cases of + * corresponding GF values. The number of these linearizations, called + * the \e dimension of the tree, can be retrieved with + * #pgf_cnc_tree_dimension. + * + * A single linearization of a concrete syntax tree is performed by + * #pgf_lzr_linearize. The linearization is realized as a sequence of + * events that are notified by calling the functions of a #PgfLinFuncs + * structure that the client provides. + * + * @{ + */ + +/// Callback functions for linearization. +typedef struct PgfLinFuncs PgfLinFuncs; + +struct PgfLinFuncs +{ + /// Output tokens + void (*symbol_tokens)(PgfLinFuncs** self, PgfTokens toks); + + void (*symbol_expr)(PgfLinFuncs** self, + int argno, PgfExpr expr, int lin_idx); + + /// Begin application + void (*expr_apply)(PgfLinFuncs** self, PgfCId cid, int n_args); + + /// Output literal + void (*expr_literal)(PgfLinFuncs** self, PgfLiteral lit); + + void (*abort)(PgfLinFuncs** self); + void (*finish)(PgfLinFuncs** self); +}; + + + + + +/// Linearize a concrete syntax tree. +void +pgf_lzr_linearize(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx, + PgfLinFuncs** fnsp); + + +/// Linearize a concrete syntax tree as space-separated tokens. +void +pgf_lzr_linearize_simple(PgfLzr* lzr, PgfCncTree ctree, + size_t lin_idx, GuWriter* wtr, GuExn* err); + + +/// Return the dimension of a concrete syntax tree. +int +pgf_cnc_tree_dimension(PgfCncTree ctree); +/**< + * @param ctree A concrete syntax tree. + * + * @return The dimension of the tree, i.e. the number of different + * linearizations the tree has. + */ + +//@} + + + +extern GuTypeTable +pgf_linearize_dump_table; + diff --git a/src/runtime/c/pgf/loader.c b/src/runtime/c/pgf/loader.c deleted file mode 100644 index 0d2f40661..000000000 --- a/src/runtime/c/pgf/loader.c +++ /dev/null @@ -1,396 +0,0 @@ -#include "../pgf.h" -#include "data.h" -#include "panic.h" -#include <stdio.h> -#include <stdlib.h> - -static int readTag(FILE *f) { - return getc(f); -} - -static int readInt16(FILE *f) { - int x = getc(f); - int y = getc(f); - return ((x << 8) | y); -} - -static int readInt(FILE *f) { - unsigned int x = (unsigned int) getc(f); - if (x <= 0x7f) - return (int) x; - else { - unsigned int y = (unsigned int) readInt(f); - return (int) ((y << 7) | (x & 0x7f)) ; - } -} - -static double readFloat(FILE *f) { - double d; - fread(&d, sizeof(d), 1, f); - return d; -} - -static String readString(FILE *f) { - int len = readInt(f); - String str = (String) malloc(sizeof(struct _CId)+len*sizeof(unsigned int)); - str->len = len; - - int i; - for (i = 0; i < len; i++) { - int c = fgetc(f); - str->chars[i] = c; - } - - return str; -} - -static CId readCId(FILE *f) { - int len = readInt(f); - CId cid = (CId) malloc(sizeof(struct _CId)+len*sizeof(char)); - cid->len = len; - fread(&cid->chars, sizeof(char), len, f); - return cid; -} - -static CIdList readCIdList(FILE *f) { - int i; - int count = readInt(f); - CIdList list = (CIdList) malloc(sizeof(struct _CIdList)+count*sizeof(CId)); - - list->count = count; - for (i = 0; i < count; i++) { - list->names[i] = readCId(f); - } - - return list; -} - -static Literal readLiteral(FILE *f) { - int tag = readTag(f); - switch (tag) { - case LIT_STR: - { - LiteralStr lit = (LiteralStr) malloc(sizeof(struct _LiteralStr)); - lit->_.tag = tag; - lit->val = readString(f); - return ((Literal) lit); - } - case LIT_INT: - { - LiteralInt lit = (LiteralInt) malloc(sizeof(struct _LiteralInt)); - lit->_.tag = tag; - lit->val = readInt(f); - return ((Literal) lit); - } - case LIT_FLOAT: - { - LiteralFloat lit = (LiteralFloat) malloc(sizeof(struct _LiteralFloat)); - lit->_.tag = tag; - lit->val = readFloat(f); - return ((Literal) lit); - } - default: - __pgf_panic("Unknown literal tag"); - } -} - -static Flags readFlags(FILE *f) { - int i; - int count = readInt(f); - Flags flags = (Flags) malloc(sizeof(struct _Flags)+count*sizeof(struct _Flag)); - - flags->count = count; - for (i = 0; i < count; i++) { - flags->values[i].name = readCId(f); - flags->values[i].value = readLiteral(f); - } - - return flags; -} - -static Context readContext(FILE *f); -static Type readType(FILE *f); - -static Expr readExpr(FILE *f) { - int tag = readTag(f); - - switch (tag) { - case TAG_ABS: - { - ExprAbs e = (ExprAbs) malloc(sizeof(struct _ExprAbs)); - e->_.tag = tag; - e->bt = readTag(f); - e->var = readCId(f); - e->body = readExpr(f); - return ((Expr) e); - } - case TAG_APP: - { - ExprApp e = (ExprApp) malloc(sizeof(struct _ExprApp)); - e->_.tag = tag; - e->left = readExpr(f); - e->right = readExpr(f); - return ((Expr) e); - } - case TAG_LIT: - { - ExprLit e = (ExprLit) malloc(sizeof(struct _ExprLit)); - e->_.tag = tag; - e->lit = readLiteral(f); - return ((Expr) e); - } - case TAG_MET: - { - ExprMeta e = (ExprMeta) malloc(sizeof(struct _ExprMeta)); - e->_.tag = tag; - e->id = readInt(f); - return ((Expr) e); - } - case TAG_FUN: - { - ExprFun e = (ExprFun) malloc(sizeof(struct _ExprFun)); - e->_.tag = tag; - e->fun = readCId(f); - return ((Expr) e); - } - case TAG_VAR: - { - ExprVar e = (ExprVar) malloc(sizeof(struct _ExprVar)); - e->_.tag = tag; - e->index = readInt(f); - return ((Expr) e); - } - case TAG_TYP: - { - ExprTyped e = (ExprTyped) malloc(sizeof(struct _ExprTyped)); - e->_.tag = tag; - e->e = readExpr(f); - e->ty = readType(f); - return ((Expr) e); - } - case TAG_IMP: - { - ExprImplArg e = (ExprImplArg) malloc(sizeof(struct _ExprImplArg)); - e->_.tag = tag; - e->e = readExpr(f); - return ((Expr) e); - } - default: - __pgf_panic("Unknown expression tag"); - } -} - -static Type readType(FILE *f) { - Context hypos = readContext(f); - CId cat = readCId(f); - - int i; - int count = readInt(f); - Type ty = (Type) malloc(sizeof(struct _Type)+count*sizeof(Expr)); - - ty->hypos = hypos; - ty->cat = cat; - ty->nArgs = count; - for (i = 0; i < count; i++) { - ty->args[i] = readExpr(f); - } - - return ty; -} - -static void readHypo(FILE *f, Hypo h) { - h->bt = readTag(f); - h->var = readCId(f); - h->ty = readType(f); -} - -static Context readContext(FILE *f) { - int i; - int size = readInt(f); - Context ctxt = (Context) malloc(sizeof(struct _Context)+size*sizeof(struct _Hypo)); - - ctxt->size = size; - for (i = 0; i < size; i++) { - readHypo(f, &ctxt->hypos[i]); - } - - return ctxt; -} - -static Patt readPatt(FILE *f) { - int tag = readTag(f); - - switch (tag) { - case TAG_PAPP: - { - CId fun = readCId(f); - - int i; - int count = readInt(f); - PattApp p = (PattApp) malloc(sizeof(struct _PattApp)+count*sizeof(Patt)); - - p->_.tag = tag; - p->fun = fun; - p->args.count = count; - for (i = 0; i < count; i++) { - p->args.pats[i] = readPatt(f); - } - - return ((Patt) p); - } - case TAG_PVAR: - { - PattVar p = (PattVar) malloc(sizeof(struct _PattVar)); - p->_.tag = tag; - p->var = readCId(f); - return ((Patt) p); - } - case TAG_PAT: - { - PattAt p = (PattAt) malloc(sizeof(struct _PattAt)); - p->_.tag = tag; - p->var = readCId(f); - p->pat = readPatt(f); - return ((Patt) p); - } - case TAG_PWILD: - { - PattWild p = (PattWild) malloc(sizeof(struct _PattWild)); - p->_.tag = tag; - return ((Patt) p); - } - case TAG_PLIT: - { - PattLit p = (PattLit) malloc(sizeof(struct _PattLit)); - p->_.tag = tag; - p->lit = readLiteral(f); - return ((Patt) p); - } - case TAG_PIMP: - { - PattImplArg p = (PattImplArg) malloc(sizeof(struct _PattImplArg)); - p->_.tag = tag; - p->pat = readPatt(f); - return ((Patt) p); - } - case TAG_PTILDE: - { - PattTilde p = (PattTilde) malloc(sizeof(struct _PattTilde)); - p->_.tag = tag; - p->e = readExpr(f); - return ((Patt) p); - } - default: - __pgf_panic("Unknown pattern tag"); - } -} - -static Patts readPatts(FILE *f) { - int i; - int count = readInt(f); - Patts pats = (Patts) malloc(sizeof(struct _Patts)+count*sizeof(Patt)); - - pats->count = count; - for (i = 0; i < count; i++) { - pats->pats[i] = readPatt(f); - } - - return pats; -} - -static Equations readEquations(FILE *f) { - int i; - int count = readInt(f); - Equations equs = (Equations) malloc(sizeof(struct _Equations)+count*sizeof(struct _Equation)); - - equs->count = count; - for (i = 0; i < count; i++) { - equs->equs[i].lhs = readPatts(f); - equs->equs[i].rhs = readExpr(f); - } - - return equs; -} - -static void readAbsFun(FILE *f, AbsFun fun) { - fun->name = readCId(f); - fun->ty = readType(f); - fun->arrity = readInt(f); - if (readTag(f) != 0) - fun->equs = readEquations(f); - else - fun->equs = NULL; -} - -static AbsFuns readAbsFuns(FILE *f) { - int i; - int count = readInt(f); - AbsFuns funs = (AbsFuns) malloc(sizeof(struct _AbsFuns)+count*sizeof(struct _AbsFun)); - - funs->count = count; - for (i = 0; i < count; i++) { - readAbsFun(f, &funs->lst[i]); - } - - return funs; -} - -static void readAbsCat(FILE *f, AbsCat cat) { - cat->name = readCId(f); - cat->hypos = readContext(f); - cat->funs = readCIdList(f); -} - -static AbsCats readAbsCats(FILE *f) { - int i; - int count = readInt(f); - AbsCats cats = (AbsCats) malloc(sizeof(struct _AbsCats)+count*sizeof(struct _AbsCat)); - - cats->count = count; - for (i = 0; i < count; i++) { - readAbsCat(f, &cats->lst[i]); - } - - return cats; -} - -static void readAbstr(FILE *f, Abstract abstr) { - abstr->name = readCId(f); - abstr->flags = readFlags(f); - abstr->funs = readAbsFuns(f); - abstr->cats = readAbsCats(f); -} - -static void readConcr(FILE *f, Concrete concr) { - concr->name = readCId(f); - concr->flags = readFlags(f); -} - -PGF readPGF(char *filename) { - FILE *f = fopen(filename, "rb"); - if (f == NULL) - return NULL; - - int maj_ver = readInt16(f); - int min_ver = readInt16(f); - - Flags flags = readFlags(f); - - struct _Abstract abstr; - readAbstr(f, &abstr); - - int nConcr = readInt(f); - PGF pgf = (PGF) malloc(sizeof(struct _PGF)+sizeof(Concrete)*nConcr); - - pgf->flags = flags; - pgf->abstract = abstr; - pgf->nConcr = nConcr; - - int i; -// for (i = 0; i < nConcr; i++) { -// readConcr(f, &pgf->concretes[i]); -// } - - fclose(f); - return pgf; -} diff --git a/src/runtime/c/pgf/panic.c b/src/runtime/c/pgf/panic.c deleted file mode 100644 index 2a8553a83..000000000 --- a/src/runtime/c/pgf/panic.c +++ /dev/null @@ -1,8 +0,0 @@ -#include "panic.h" -#include <stdio.h> - -void __pgf_panic(char *msg) { - printf("%s\n",msg); - fflush(stdout); - exit(1); -} diff --git a/src/runtime/c/pgf/panic.h b/src/runtime/c/pgf/panic.h deleted file mode 100644 index 00ace7533..000000000 --- a/src/runtime/c/pgf/panic.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef PGF_PANIC_H -#define PGF_PANIC_H - -void __pgf_panic(char *msg); - -#endif diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c new file mode 100644 index 000000000..59ee66182 --- /dev/null +++ b/src/runtime/c/pgf/parser.c @@ -0,0 +1,697 @@ +#include <pgf/parser.h> +#include <gu/choice.h> +#include <gu/seq.h> +#include <gu/assert.h> +#include <gu/log.h> + +typedef struct PgfItem PgfItem; + +enum { + PGF_FID_SYNTHETIC = -999 +}; + +typedef GuBuf PgfItemBuf; +typedef GuList(PgfItemBuf*) PgfItemBufs; + + + +// GuString -> PgfItemBuf* +typedef GuMap PgfTransitions; + +typedef GuBuf PgfCCatBuf; + +struct PgfParser { + PgfConcr* concr; +}; + +struct PgfParse { + PgfParser* parser; + PgfTransitions* transitions; + PgfCCatBuf* completed; +}; + +typedef struct PgfParseResult PgfParseResult; + +struct PgfParseResult { + PgfCCatBuf* completed; + GuChoice* choice; + PgfExprEnum en; +}; + +typedef struct PgfItemBase PgfItemBase; + +struct PgfItemBase { + PgfItemBuf* conts; + PgfCCat* ccat; + PgfProduction prod; + unsigned short lin_idx; +}; + +struct PgfItem { + PgfItemBase* base; + PgfPArgs args; + PgfSymbol curr_sym; + uint16_t seq_idx; + uint8_t tok_idx; + uint8_t alt; +}; + +typedef GuMap PgfContsMap; + + +static GU_DEFINE_TYPE(PgfItemBuf, abstract, _); +static GU_DEFINE_TYPE(PgfItemBufs, abstract, _); +static GU_DEFINE_TYPE(PgfContsMap, GuMap, + gu_type(PgfCCat), NULL, + gu_ptr_type(PgfItemBufs), &gu_null_struct); + +static GU_DEFINE_TYPE(PgfGenCatMap, GuMap, + gu_type(PgfItemBuf), NULL, + gu_ptr_type(PgfCCat), &gu_null_struct); + +static GU_DEFINE_TYPE(PgfTransitions, GuStringMap, + gu_ptr_type(PgfItemBuf), &gu_null_struct); + +typedef GuMap PgfGenCatMap; + +typedef struct PgfParsing PgfParsing; + +struct PgfParsing { + PgfParse* parse; + GuPool* pool; + PgfContsMap* conts_map; + PgfGenCatMap* generated_cats; +}; + +static void +pgf_parsing_add_transition(PgfParsing* parsing, PgfToken tok, PgfItem* item) +{ + gu_debug("%s -> %p", tok, item); + PgfTransitions* tmap = parsing->parse->transitions; + PgfItemBuf* items = gu_map_get(tmap, &tok, PgfItemBuf*); + if (!items) { + items = gu_new_buf(PgfItem*, parsing->pool); + gu_map_put(tmap, &tok, PgfItemBuf*, items); + } + gu_buf_push(items, PgfItem*, item); +} + +static PgfItemBufs* +pgf_parsing_get_contss(PgfParsing* parsing, PgfCCat* cat) +{ + PgfItemBufs* contss = gu_map_get(parsing->conts_map, cat, PgfItemBufs*); + if (!contss) { + size_t n_lins = cat->cnccat->n_lins; + contss = gu_new_list(PgfItemBufs, parsing->pool, n_lins); + for (size_t i = 0; i < n_lins; i++) { + gu_list_index(contss, i) = NULL; + } + gu_map_put(parsing->conts_map, cat, PgfItemBufs*, contss); + } + return contss; +} + + +static PgfItemBuf* +pgf_parsing_get_conts(PgfParsing* parsing, PgfCCat* cat, size_t lin_idx) +{ + gu_require(lin_idx < cat->cnccat->n_lins); + PgfItemBufs* contss = pgf_parsing_get_contss(parsing, cat); + PgfItemBuf* conts = gu_list_index(contss, lin_idx); + if (!conts) { + conts = gu_new_buf(PgfItem*, parsing->pool); + gu_list_index(contss, lin_idx) = conts; + } + return conts; +} + +static PgfCCat* +pgf_parsing_create_completed(PgfParsing* parsing, PgfItemBuf* conts, + PgfCncCat* cnccat) +{ + PgfCCat* cat = gu_new(PgfCCat, parsing->pool); + cat->cnccat = cnccat; + cat->fid = PGF_FID_SYNTHETIC; + cat->prods = gu_buf_seq(gu_new_buf(PgfProduction, parsing->pool)); + gu_map_put(parsing->generated_cats, conts, PgfCCat*, cat); + return cat; +} + +static PgfCCat* +pgf_parsing_get_completed(PgfParsing* parsing, PgfItemBuf* conts) +{ + return gu_map_get(parsing->generated_cats, conts, PgfCCat*); +} + +static PgfSymbol +pgf_item_base_symbol(PgfItemBase* ibase, size_t seq_idx, GuPool* pool) +{ + GuVariantInfo i = gu_variant_open(ibase->prod); + switch (i.tag) { + case PGF_PRODUCTION_APPLY: { + PgfProductionApply* papp = i.data; + PgfCncFun* fun = papp->fun; + gu_assert(ibase->lin_idx < fun->n_lins); + PgfSequence seq = fun->lins[ibase->lin_idx]; + gu_assert(seq_idx <= gu_seq_length(seq)); + if (seq_idx == gu_seq_length(seq)) { + return gu_null_variant; + } else { + return gu_seq_get(seq, PgfSymbol, seq_idx); + } + break; + } + case PGF_PRODUCTION_COERCE: { + gu_assert(seq_idx <= 1); + if (seq_idx == 1) { + return gu_null_variant; + } else { + return gu_new_variant_i(pool, PGF_SYMBOL_CAT, + PgfSymbolCat, + .d = 0, .r = ibase->lin_idx); + } + break; + } + default: + gu_impossible(); + } + return gu_null_variant; +} + +static PgfItem* +pgf_new_item(PgfItemBase* base, GuPool* pool) +{ + PgfItem* item = gu_new(PgfItem, pool); + GuVariantInfo pi = gu_variant_open(base->prod); + switch (pi.tag) { + case PGF_PRODUCTION_APPLY: { + PgfProductionApply* papp = pi.data; + item->args = papp->args; + break; + } + case PGF_PRODUCTION_COERCE: { + PgfProductionCoerce* pcoerce = pi.data; + item->args = gu_new_seq(PgfPArg, 1, pool); + PgfPArg* parg = gu_seq_index(item->args, PgfPArg, 0); + parg->hypos = NULL; + parg->ccat = pcoerce->coerce; + break; + } + default: + gu_impossible(); + } + item->base = base; + item->curr_sym = pgf_item_base_symbol(item->base, 0, pool); + item->seq_idx = 0; + item->tok_idx = 0; + item->alt = 0; + return item; +} + +static PgfItem* +pgf_item_copy(PgfItem* item, GuPool* pool) +{ + PgfItem* copy = gu_new(PgfItem, pool); + memcpy(copy, item, sizeof(PgfItem)); + return copy; +} + +static void +pgf_item_advance(PgfItem* item, GuPool* pool) +{ + item->seq_idx++; + item->curr_sym = pgf_item_base_symbol(item->base, item->seq_idx, pool); +} + +static void +pgf_parsing_item(PgfParsing* parsing, PgfItem* item); + +static void +pgf_parsing_combine(PgfParsing* parsing, PgfItem* cont, PgfCCat* cat) +{ + if (cont == NULL) { + gu_buf_push(parsing->parse->completed, PgfCCat*, cat); + return; + } + PgfItem* item = pgf_item_copy(cont, parsing->pool); + size_t nargs = gu_seq_length(cont->args); + item->args = gu_new_seq(PgfPArg, nargs, parsing->pool); + memcpy(gu_seq_data(item->args), gu_seq_data(cont->args), + nargs * sizeof(PgfPArg)); + gu_assert(gu_variant_tag(item->curr_sym) == PGF_SYMBOL_CAT); + PgfSymbolCat* pcat = gu_variant_data(cont->curr_sym); + gu_seq_set(item->args, PgfPArg, pcat->d, + ((PgfPArg) { .hypos = NULL, .ccat = cat })); + pgf_item_advance(item, parsing->pool); + pgf_parsing_item(parsing, item); +} + +static void +pgf_parsing_production(PgfParsing* parsing, PgfCCat* cat, size_t lin_idx, + PgfProduction prod, PgfItemBuf* conts) +{ + PgfItemBase* base = gu_new(PgfItemBase, parsing->pool); + base->ccat = cat; + base->lin_idx = lin_idx; + base->prod = prod; + base->conts = conts; + PgfItem* item = pgf_new_item(base, parsing->pool); + pgf_parsing_item(parsing, item); +} + +static void +pgf_parsing_complete(PgfParsing* parsing, PgfItem* item) +{ + GuVariantInfo i = gu_variant_open(item->base->prod); + PgfProduction prod = gu_null_variant; + switch (i.tag) { + case PGF_PRODUCTION_APPLY: { + PgfProductionApply* papp = i.data; + PgfProductionApply* new_papp = + gu_new_variant(PGF_PRODUCTION_APPLY, + PgfProductionApply, + &prod, parsing->pool); + new_papp->fun = papp->fun; + new_papp->args = item->args; + break; + } + case PGF_PRODUCTION_COERCE: { + PgfProductionCoerce* new_pcoerce = + gu_new_variant(PGF_PRODUCTION_COERCE, + PgfProductionCoerce, + &prod, parsing->pool); + PgfPArg* parg = gu_seq_index(item->args, PgfPArg, 0); + gu_assert(!parg->hypos || !parg->hypos->len); + new_pcoerce->coerce = parg->ccat; + break; + } + default: + gu_impossible(); + } + PgfItemBuf* conts = item->base->conts; + PgfCCat* cat = pgf_parsing_get_completed(parsing, conts); + if (cat != NULL) { + // The category has already been created. If it has also been + // predicted already, then process a new item for this production. + PgfItemBufs* contss = pgf_parsing_get_contss(parsing, cat); + size_t n_contss = gu_list_length(contss); + for (size_t i = 0; i < n_contss; i++) { + PgfItemBuf* conts2 = gu_list_index(contss, i); + /* If there are continuations for + * linearization index i, then (cat, i) has + * already been predicted. Add the new + * production immediately to the agenda, + * i.e. process it. */ + if (conts2) { + pgf_parsing_production(parsing, cat, i, + prod, conts2); + } + } + } else { + cat = pgf_parsing_create_completed(parsing, conts, + item->base->ccat->cnccat); + size_t n_conts = gu_buf_length(conts); + for (size_t i = 0; i < n_conts; i++) { + PgfItem* cont = gu_buf_get(conts, PgfItem*, i); + pgf_parsing_combine(parsing, cont, cat); + } + } + GuBuf* prodbuf = gu_seq_buf(cat->prods); + gu_buf_push(prodbuf, PgfProduction, prod); +} + + +static void +pgf_parsing_predict(PgfParsing* parsing, PgfItem* item, + PgfCCat* cat, size_t lin_idx) +{ + gu_enter("-> cat: %d", cat->fid); + if (gu_seq_is_null(cat->prods)) { + // Empty category + return; + } + PgfItemBuf* conts = pgf_parsing_get_conts(parsing, cat, lin_idx); + gu_buf_push(conts, PgfItem*, item); + if (gu_buf_length(conts) == 1) { + /* First time we encounter this linearization + * of this category at the current position, + * so predict it. */ + PgfProductionSeq prods = cat->prods; + size_t n_prods = gu_seq_length(prods); + for (size_t i = 0; i < n_prods; i++) { + PgfProduction prod = + gu_seq_get(prods, PgfProduction, i); + pgf_parsing_production(parsing, cat, lin_idx, + prod, conts); + } + } else { + /* If it has already been completed, combine. */ + PgfCCat* completed = + pgf_parsing_get_completed(parsing, conts); + if (completed) { + pgf_parsing_combine(parsing, item, completed); + } + } + gu_exit(NULL); +} + +static void +pgf_parsing_symbol(PgfParsing* parsing, PgfItem* item, PgfSymbol sym) { + switch (gu_variant_tag(sym)) { + case PGF_SYMBOL_CAT: { + PgfSymbolCat* scat = gu_variant_data(sym); + PgfPArg* parg = gu_seq_index(item->args, PgfPArg, scat->d); + gu_assert(!parg->hypos || !parg->hypos->len); + pgf_parsing_predict(parsing, item, parg->ccat, scat->r); + break; + } + case PGF_SYMBOL_KS: { + PgfSymbolKS* sks = gu_variant_data(sym); + gu_assert(item->tok_idx < gu_seq_length(sks->tokens)); + PgfToken tok = + gu_seq_get(sks->tokens, PgfToken, item->tok_idx); + pgf_parsing_add_transition(parsing, tok, item); + break; + } + case PGF_SYMBOL_KP: { + PgfSymbolKP* skp = gu_variant_data(sym); + size_t idx = item->tok_idx; + uint8_t alt = item->alt; + gu_assert(idx < gu_seq_length(skp->default_form)); + if (idx == 0) { + PgfToken tok = gu_seq_get(skp->default_form, PgfToken, 0); + pgf_parsing_add_transition(parsing, tok, item); + for (size_t i = 0; i < skp->n_forms; i++) { + PgfTokens toks = skp->forms[i].form; + PgfTokens toks2 = skp->default_form; + // XXX: do nubbing properly + bool skip = pgf_tokens_equal(toks, toks2); + for (size_t j = 0; j < i; j++) { + PgfTokens toks2 = skp->forms[j].form; + skip |= pgf_tokens_equal(toks, toks2); + } + if (skip) { + continue; + } + PgfToken tok = gu_seq_get(toks, PgfToken, 0); + pgf_parsing_add_transition(parsing, tok, item); + } + } else if (alt == 0) { + PgfToken tok = + gu_seq_get(skp->default_form, PgfToken, idx); + pgf_parsing_add_transition(parsing, tok, item); + } else { + gu_assert(alt <= skp->n_forms); + PgfToken tok = gu_seq_get(skp->forms[alt - 1].form, + PgfToken, idx); + pgf_parsing_add_transition(parsing, tok, item); + } + break; + } + case PGF_SYMBOL_LIT: + // XXX TODO proper support + break; + case PGF_SYMBOL_VAR: + // XXX TODO proper support + break; + default: + gu_impossible(); + } +} + +static void +pgf_parsing_item(PgfParsing* parsing, PgfItem* item) +{ + GuVariantInfo i = gu_variant_open(item->base->prod); + switch (i.tag) { + case PGF_PRODUCTION_APPLY: { + PgfProductionApply* papp = i.data; + PgfCncFun* fun = papp->fun; + PgfSequence seq = fun->lins[item->base->lin_idx]; + if (item->seq_idx == gu_seq_length(seq)) { + pgf_parsing_complete(parsing, item); + } else { + PgfSymbol sym = + gu_seq_get(seq, PgfSymbol, item->seq_idx); + pgf_parsing_symbol(parsing, item, sym); + } + break; + } + case PGF_PRODUCTION_COERCE: { + PgfProductionCoerce* pcoerce = i.data; + switch (item->seq_idx) { + case 0: + pgf_parsing_predict(parsing, item, + pcoerce->coerce, + item->base->lin_idx); + break; + case 1: + pgf_parsing_complete(parsing, item); + break; + default: + gu_impossible(); + } + break; + } + default: + gu_impossible(); + } +} + +static bool +pgf_parsing_scan_toks(PgfParsing* parsing, PgfItem* old_item, + PgfToken tok, int alt, PgfTokens toks) +{ + gu_assert(old_item->tok_idx < gu_seq_length(toks)); + if (!gu_string_eq(gu_seq_get(toks, PgfToken, old_item->tok_idx), + tok)) { + return false; + } + PgfItem* item = pgf_item_copy(old_item, parsing->pool); + item->tok_idx++; + item->alt = alt; + if (item->tok_idx == gu_seq_length(toks)) { + item->tok_idx = 0; + pgf_item_advance(item, parsing->pool); + } + pgf_parsing_item(parsing, item); + return true; +} + +static void +pgf_parsing_scan(PgfParsing* parsing, PgfItem* item, PgfToken tok) +{ + bool succ = false; + GuVariantInfo i = gu_variant_open(item->curr_sym); + switch (i.tag) { + case PGF_SYMBOL_KS: { + PgfSymbolKS* ks = i.data; + succ = pgf_parsing_scan_toks(parsing, item, tok, 0, + ks->tokens); + break; + } + case PGF_SYMBOL_KP: { + PgfSymbolKP* kp = i.data; + size_t alt = item->alt; + if (item->tok_idx == 0) { + succ = pgf_parsing_scan_toks(parsing, item, tok, 0, + kp->default_form); + for (size_t i = 0; i < kp->n_forms; i++) { + // XXX: do nubbing properly + PgfTokens toks = kp->forms[i].form; + PgfTokens toks2 = kp->default_form; + bool skip = pgf_tokens_equal(toks, toks2); + for (size_t j = 0; j < i; j++) { + PgfTokens toks2 = kp->forms[j].form; + skip |= pgf_tokens_equal(toks, toks2); + } + if (!skip) { + succ |= pgf_parsing_scan_toks( + parsing, item, tok, i + 1, + kp->forms[i].form); + } + } + } else if (alt == 0) { + succ = pgf_parsing_scan_toks(parsing, item, tok, 0, + kp->default_form); + } else { + gu_assert(alt <= kp->n_forms); + succ = pgf_parsing_scan_toks(parsing, item, tok, + alt, kp->forms[alt - 1].form); + } + break; + } + default: + gu_impossible(); + } + gu_assert(succ); +} + + +static PgfParsing* +pgf_new_parsing(PgfParse* parse, GuPool* parse_pool, GuPool* out_pool) +{ + PgfParsing* parsing = gu_new(PgfParsing, out_pool); + parsing->parse = parse; + parsing->generated_cats = gu_map_type_new(PgfGenCatMap, out_pool); + parsing->conts_map = gu_map_type_new(PgfContsMap, out_pool); + parsing->pool = parse_pool; + return parsing; +} + +static PgfParse* +pgf_new_parse(PgfParser* parser, GuPool* pool) +{ + PgfParse* parse = gu_new(PgfParse, pool); + parse->parser = parser; + parse->transitions = gu_map_type_new(PgfTransitions, pool); + parse->completed = gu_new_buf(PgfCCat*, pool); + return parse; +} + +PgfParse* +pgf_parse_token(PgfParse* parse, PgfToken tok, GuPool* pool) +{ + PgfItemBuf* agenda = + gu_map_get(parse->transitions, &tok, PgfItemBuf*); + if (!agenda) { + return NULL; + } + PgfParse* next_parse = pgf_new_parse(parse->parser, pool); + GuPool* tmp_pool = gu_new_pool(); + PgfParsing* parsing = pgf_new_parsing(next_parse, pool, tmp_pool); + size_t n_items = gu_buf_length(agenda); + for (size_t i = 0; i < n_items; i++) { + PgfItem* item = gu_buf_get(agenda, PgfItem*, i); + pgf_parsing_scan(parsing, item, tok); + } + gu_pool_free(tmp_pool); + return next_parse; +} + +static PgfExpr +pgf_cat_to_expr(PgfCCat* cat, GuChoice* choice, GuPool* pool); + +static PgfExpr +pgf_production_to_expr(PgfProduction prod, GuChoice* choice, GuPool* pool) +{ + GuVariantInfo pi = gu_variant_open(prod); + switch (pi.tag) { + case PGF_PRODUCTION_APPLY: { + PgfProductionApply* papp = pi.data; + PgfExpr expr = gu_new_variant_i(pool, PGF_EXPR_FUN, + PgfExprFun, + .fun = papp->fun->fun); + size_t n_args = gu_seq_length(papp->args); + for (size_t i = 0; i < n_args; i++) { + PgfPArg* parg = gu_seq_index(papp->args, PgfPArg, i); + gu_assert(!parg->hypos || !parg->hypos->len); + PgfExpr earg = pgf_cat_to_expr(parg->ccat, choice, pool); + expr = gu_new_variant_i(pool, PGF_EXPR_APP, + PgfExprApp, + .fun = expr, .arg = earg); + } + return expr; + } + case PGF_PRODUCTION_COERCE: { + PgfProductionCoerce* pcoerce = pi.data; + return pgf_cat_to_expr(pcoerce->coerce, choice, pool); + } + default: + gu_impossible(); + } + return gu_null_variant; +} + + +static PgfExpr +pgf_cat_to_expr(PgfCCat* cat, GuChoice* choice, GuPool* pool) +{ + if (cat->fid != PGF_FID_SYNTHETIC) { + // XXX: What should the PgfMetaId be? + return gu_new_variant_i(pool, PGF_EXPR_META, + PgfExprMeta, + .id = 0); + } + size_t n_prods = gu_seq_length(cat->prods); + int i = gu_choice_next(choice, n_prods); + if (i == -1) { + return gu_null_variant; + } + PgfProduction prod = gu_seq_get(cat->prods, PgfProduction, i); + return pgf_production_to_expr(prod, choice, pool); +} + + +static PgfExpr +pgf_parse_result_next(PgfParseResult* pr, GuPool* pool) +{ + if (pr->choice == NULL) { + return gu_null_variant; + } + size_t n_results = gu_buf_length(pr->completed); + GuChoiceMark mark = gu_choice_mark(pr->choice); + int i = gu_choice_next(pr->choice, n_results); + if (i == -1) { + return gu_null_variant; + } + PgfCCat* cat = gu_buf_get(pr->completed, PgfCCat*, i); + PgfExpr ret = pgf_cat_to_expr(cat, pr->choice, pool); + gu_choice_reset(pr->choice, mark); + if (!gu_choice_advance(pr->choice)) { + pr->choice = NULL; + }; + return ret; +} + +static void +pgf_parse_result_enum_next(GuEnum* self, void* to, GuPool* pool) +{ + PgfParseResult* pr = gu_container(self, PgfParseResult, en); + *(PgfExpr*)to = pgf_parse_result_next(pr, pool); +} + +PgfExprEnum* +pgf_parse_result(PgfParse* parse, GuPool* pool) +{ + return &gu_new_i(pool, PgfParseResult, + .completed = parse->completed, + .choice = gu_new_choice(pool), + .en.next = pgf_parse_result_enum_next)->en; +} + + + +// TODO: s/CId/Cat, add the cid to Cat, make Cat the key to CncCat +PgfParse* +pgf_parser_parse(PgfParser* parser, PgfCId cat, size_t lin_idx, GuPool* pool) +{ + PgfParse* parse = pgf_new_parse(parser, pool); + GuPool* tmp_pool = gu_new_pool(); + PgfParsing* parsing = pgf_new_parsing(parse, pool, tmp_pool); + PgfCncCat* cnccat = + gu_map_get(parser->concr->cnccats, &cat, PgfCncCat*); + if (!cnccat) { + // error ... + gu_impossible(); + } + gu_assert(lin_idx < cnccat->n_lins); + size_t n_ccats = gu_list_length(cnccat->cats); + for (size_t i = 0; i < n_ccats; i++) { + PgfCCat* ccat = gu_list_index(cnccat->cats, i); + if (ccat != NULL) { + pgf_parsing_predict(parsing, NULL, ccat, lin_idx); + } + } + gu_pool_free(tmp_pool); + return parse; +} + +PgfParser* +pgf_new_parser(PgfConcr* concr, GuPool* pool) +{ + gu_require(concr != NULL); + PgfParser* parser = gu_new(PgfParser, pool); + parser->concr = concr; + return parser; +} diff --git a/src/runtime/c/pgf/parser.h b/src/runtime/c/pgf/parser.h new file mode 100644 index 000000000..127bed5dc --- /dev/null +++ b/src/runtime/c/pgf/parser.h @@ -0,0 +1,127 @@ +#ifndef PGF_PARSER_H_ +#define PGF_PARSER_H_ + +#include <gu/enum.h> +#include <pgf/data.h> +#include <pgf/expr.h> + +/// Parsing +/** @file + * + * @todo Querying the parser for expected continuations + * + * @todo Literals and custom categories + * + * @todo HOAS, dependent types... + */ + +typedef struct PgfParse PgfParse; + +/** @name Creating a new parser + * + * A #PgfParser object can parse sentences of a single concrete category into + * abstract syntax trees (#PgfExpr). The parser is created with + * #pgf_new_parser. + * + * @{ + */ + +/// A parser for a single concrete category +typedef struct PgfParser PgfParser; + + +/// Create a new parser +PgfParser* +pgf_new_parser(PgfConcr* concr, GuPool* pool); +/**< + * @param concr The concrete category whose sentences are to be parsed + * + * @pool + * + * @return A newly created parser for the concrete category \p concr + */ + +/** @} + * + * @name Parsing a sentence + * + * The progress of parsing is controlled by the client code. Firstly, the + * parsing of a sentence is initiated with #pgf_parser_parse. This returns an + * initial #PgfParse object, which represents the state of the parsing. A new + * parse state is obtained by feeding a token with #pgf_parse_token. The old + * parse state is unaffected by this, so backtracking - and even branching - + * can be accomplished by retaining the earlier #PgfParse objects. + * + * @{ + */ + +/// Begin parsing +PgfParse* +pgf_parser_parse(PgfParser* parser, PgfCId cat, size_t lin_idx, GuPool* pool); +/**< + * @param parser The parser to use + * + * @param cat The identifier of the abstract category to parse + * + * @param lin_idx The index of the field of the concrete category to parse + * + * @pool + * + * @return An initial parsing state. +*/ + + +/// Feed a token to the parser +PgfParse* +pgf_parse_token(PgfParse* parse, PgfToken tok, GuPool* pool); +/**< + * @param parse The current parse state + * + * @param tok The token to feed + * + * @pool + * + * @return A new parse state obtained by feeding \p tok as an input to \p + * parse, or \c NULL if the token was unexpected. + * + * @note The new parse state partially depends on the old one, so it doesn't + * make sense to use a \p pool argument with a longer lifetime than that of + * the pool used to create \parse. + */ + + +/** @} + * @name Retrieving abstract syntax trees + * + * After the desired tokens have been fed to the parser, the resulting parse + * state can be queried for completed results. The #pgf_parse_result function + * returns an enumeration (#GuEnum) of possible abstract syntax trees whose + * linearization is the sequence of tokens fed so far. + * + * @{ + */ + + +/// An enumeration of #PgfExpr elements. +typedef GuEnum PgfExprEnum; + + +/// Retrieve the current parses from the parse state. +PgfExprEnum* +pgf_parse_result(PgfParse* parse, GuPool* pool); +/**< + * @param parse A parse state + * + * @pool + * + * @return An enumeration of #PgfExpr elements representing the abstract + * syntax trees that would linearize to the sequence of tokens fed to produce + * \p parse. The enumeration may yield zero, one or more abstract syntax + * trees, depending on whether the parse was unsuccesful, unambiguously + * succesful, or ambiguously successful. + */ + + +/** @} */ + +#endif // PGF_PARSER_H_ diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h new file mode 100644 index 000000000..8ee26aadd --- /dev/null +++ b/src/runtime/c/pgf/pgf.h @@ -0,0 +1,78 @@ +/* + * Copyright 2010 University of Helsinki. + * + * This file is part of libpgf. + * + * Libpgf is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation, either version 3 of the License, or (at your + * option) any later version. + * + * Libpgf is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libpgf. If not, see <http://www.gnu.org/licenses/>. + */ + +/** @file + * + * The public libpgf API. + */ + +#ifndef PGF_H_ +#define PGF_H_ + +#include <gu/exn.h> +#include <gu/mem.h> +#include <gu/in.h> +#include <gu/string.h> + + +typedef GuString PgfCId; +extern GU_DECLARE_TYPE(PgfCId, typedef); + + +/// A single lexical token +typedef GuString PgfToken; + +/// @name PGF Grammar objects +/// @{ + +typedef struct PgfPGF PgfPGF; + +/**< A representation of a PGF grammar. + */ + + +PgfPGF* +pgf_read(GuIn* in, GuPool* pool, GuExn* err); + +/**< Read a grammar from a PGF file. + * + * @param from PGF input stream. + * The stream must be positioned in the beginning of a binary + * PGF representation. After a succesful invocation, the stream is + * still open and positioned at the end of the representation. + * + * @param[out] err_out Raised error. + * If non-\c NULL, \c *err_out should be \c NULL. Then, upon + * failure, \c *err_out is set to point to a newly allocated + * error object, which the caller must free with #g_exn_free + * or #g_exn_propagate. + * + * @return A new PGF object, or \c NULL upon failure. The returned + * object must later be freed with #pgf_free. + * + */ + + +#include <gu/type.h> +GU_DECLARE_TYPE(PgfPGF, struct); + +/// @} + + +#endif // PGF_H_ diff --git a/src/runtime/c/pgf/reader.c b/src/runtime/c/pgf/reader.c new file mode 100644 index 000000000..6aa07e6b7 --- /dev/null +++ b/src/runtime/c/pgf/reader.c @@ -0,0 +1,843 @@ +/* + * Copyright 2010 University of Helsinki. + * + * This file is part of libpgf. + * + * Libpgf is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation, either version 3 of the License, or (at your + * option) any later version. + * + * Libpgf is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libpgf. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "data.h" +#include "expr.h" +#include <gu/defs.h> +#include <gu/map.h> +#include <gu/seq.h> +#include <gu/assert.h> +#include <gu/intern.h> +#include <gu/in.h> +#include <gu/bits.h> +#include <gu/exn.h> +#include <gu/utf8.h> + +#define GU_LOG_ENABLE +#include <gu/log.h> + +typedef struct PgfIdContext PgfIdContext; + +// +// PgfReader +// + +typedef struct PgfReader PgfReader; + +struct PgfReader { + GuIn* in; + GuExn* err; + GuPool* opool; + GuPool* pool; + GuSymTable* symtab; + PgfSequences* curr_sequences; + PgfCncFuns* curr_cncfuns; + GuMap* curr_ccats; + GuMap* ccat_locs; + GuMap* curr_lindefs; + GuMap* curr_coercions; + GuTypeMap* read_to_map; + GuTypeMap* read_new_map; + void* curr_key; + GuPool* curr_pool; +}; + +typedef struct PgfReadTagExn PgfReadTagExn; + +struct PgfReadTagExn { + GuType* type; + int tag; +}; + +static GU_DEFINE_TYPE(PgfReadTagExn, abstract, _); + +static GU_DEFINE_TYPE(PgfReadExn, abstract, _); + +static uint8_t +pgf_read_u8(PgfReader* rdr) +{ + uint8_t u = gu_in_u8(rdr->in, rdr->err); + gu_debug("u8: %u", u); + return u; +} + +static uint32_t +pgf_read_uint(PgfReader* rdr) +{ + uint32_t u = 0; + int shift = 0; + uint8_t b = 0; + do { + b = pgf_read_u8(rdr); + gu_return_on_exn(rdr->err, 0); + u |= (b & ~0x80) << shift; + shift += 7; + } while (b & 0x80); + gu_debug("uint: %u", u); + return u; +} + +static int32_t +pgf_read_int(PgfReader* rdr) +{ + uint32_t u = pgf_read_uint(rdr); + return gu_decode_2c32(u, rdr->err); +} + +static GuLength +pgf_read_len(PgfReader* rdr) +{ + int32_t len = pgf_read_int(rdr); + // It's crucial that we return 0 on failure, so the + // caller can proceed without checking for error + // immediately. + gu_return_on_exn(rdr->err, 0); + if (len < 0) { + gu_raise_i(rdr->err, PgfReadTagExn, + .type = gu_type(GuLength), .tag = len); + return 0; + } + return (GuLength) len; +} + +typedef const struct PgfReadToFn PgfReadToFn; + +struct PgfReadToFn { + void (*fn)(GuType* type, PgfReader* rdr, void* to); +}; + +static void +pgf_read_to(PgfReader* rdr, GuType* type, void* to) { + PgfReadToFn* fn = gu_type_map_get(rdr->read_to_map, type); + fn->fn(type, rdr, to); +} + +typedef const struct PgfReadNewFn PgfReadNewFn; +struct PgfReadNewFn { + void* (*fn)(GuType* type, PgfReader* rdr, GuPool* pool, + size_t* size_out); +}; + +static void* +pgf_read_new(PgfReader* rdr, GuType* type, GuPool* pool, size_t* size_out) +{ + size_t size = 0; + PgfReadNewFn* fn = gu_type_map_get(rdr->read_new_map, type); + return fn->fn(type, rdr, pool, size_out ? size_out : &size); +} + +static void* +pgf_read_new_type(GuType* type, PgfReader* rdr, GuPool* pool, + size_t* size_out) +{ + GuTypeRepr* repr = gu_type_repr(type); + void* to = gu_malloc_aligned(pool, repr->size, repr->align); + pgf_read_to(rdr, type, to); + *size_out = repr->size; + return to; +} + +static void* +pgf_read_struct(GuStructRepr* stype, PgfReader* rdr, void* to, + GuPool* pool, size_t* size_out) +{ + GuTypeRepr* repr = gu_type_cast((GuType*)stype, repr); + size_t size = repr->size; + GuLength length = 0; + bool have_length = false; + uint8_t* p = NULL; + uint8_t* bto = to; + gu_enter("-> struct %s", stype->name); + + for (int i = 0; i < stype->members.len; i++) { + const GuMember* m = &stype->members.elems[i]; + gu_enter("-> %s.%s", stype->name, m->name); + if (m->is_flex) { + gu_assert(have_length && p == NULL && pool != NULL); + size_t m_size = gu_type_size(m->type); + size = gu_flex_size(size, m->offset, + length, m_size); + p = gu_malloc_aligned(pool, size, repr->align); + for (size_t j = 0; j < length; j++) { + pgf_read_to(rdr, m->type, + &p[m->offset + j * m_size]); + gu_return_on_exn(rdr->err, NULL); + } + } else { + pgf_read_to(rdr, m->type, &bto[m->offset]); + gu_return_on_exn(rdr->err, NULL); + } + if (m->type == gu_type(GuLength)) { + gu_assert(!have_length); + have_length = true; + length = gu_member(GuLength, to, m->offset); + } + gu_exit("<- %s.%s", stype->name, m->name); + } + if (p) { + memcpy(p, to, repr->size); + } + if (size_out) { + *size_out = size; + } + gu_exit("<- struct %s", stype->name); + return p; +} + +static void +pgf_read_to_struct(GuType* type, PgfReader* rdr, void* to) +{ + GuStructRepr* stype = gu_type_cast(type, struct); + pgf_read_struct(stype, rdr, to, NULL, NULL); +} + +static void* +pgf_read_new_struct(GuType* type, PgfReader* rdr, + GuPool* pool, size_t* size_out) +{ + GuStructRepr* stype = gu_type_cast(type, struct); + if (gu_struct_has_flex(stype)) { + GuPool* tmp_pool = gu_new_pool(); + void* to = gu_type_malloc(type, tmp_pool); + void* p = pgf_read_struct(stype, rdr, to, pool, size_out); + gu_pool_free(tmp_pool); + gu_assert(p); + return p; + } else { + void* to = gu_type_malloc(type, pool); + pgf_read_struct(stype, rdr, to, NULL, NULL); + return to; + } +} + + +static void +pgf_read_to_pointer(GuType* type, PgfReader* rdr, void* to) +{ + GuPointerType* ptype = (GuPointerType*) type; + GuType* pointed = ptype->pointed_type; + gu_require(gu_type_has_kind(pointed, gu_kind(struct)) || + gu_type_has_kind(pointed, gu_kind(abstract))); + GuStruct** sto = to; + *sto = pgf_read_new(rdr, pointed, rdr->opool, NULL); +} + +static void +pgf_read_to_GuVariant(GuType* type, PgfReader* rdr, void* to) +{ + GuVariantType* vtype = (GuVariantType*) type; + GuVariant* vto = to; + + uint8_t btag = pgf_read_u8(rdr); + gu_return_on_exn(rdr->err,); + if (btag >= vtype->ctors.len) { + gu_raise_i(rdr->err, PgfReadTagExn, + .type = type, .tag = btag); + return; + } + GuConstructor* ctor = &vtype->ctors.elems[btag]; + gu_enter("-> variant %s", ctor->c_name); + GuPool* tmp_pool = gu_new_pool(); + GuTypeRepr* repr = gu_type_repr(ctor->type); + size_t size = repr->size; + void* init = pgf_read_new(rdr, ctor->type, tmp_pool, &size); + *vto = gu_make_variant(btag, size, repr->align, init, rdr->opool); + gu_pool_free(tmp_pool); + gu_exit("<- variant %s", ctor->c_name); +} + +static void +pgf_read_to_enum(GuType* type, PgfReader* rdr, void* to) +{ + // For now, assume that enum values are encoded in a single octet + GuEnumType* etype = (GuEnumType*) type; + uint8_t tag = pgf_read_u8(rdr); + gu_return_on_exn(rdr->err,); + if (tag >= etype->constants.len) { + gu_raise_i(rdr->err, PgfReadTagExn, + .type = type, .tag = tag); + return; + } + GuEnumConstant* econ = &etype->constants.elems[tag]; + size_t size = gu_type_size(type); + if (size == sizeof(int8_t)) { + *((int8_t*) to) = econ->value; + } else if (size == sizeof(int16_t)) { + *((int16_t*) to) = econ->value; + } else if (size == sizeof(int32_t)) { + *((int32_t*) to) = econ->value; + } else if (size == sizeof(int64_t)) { + *((int64_t*) to) = econ->value; + } else { + gu_impossible(); + } +} + +static void +pgf_read_to_void(GuType* info, PgfReader* rdr, void* to) +{ + (void) (info && rdr && to); +} + + +static void +pgf_read_to_int(GuType* type, PgfReader* rdr, void* to) +{ + (void) type; + *(int*) to = pgf_read_int(rdr); +} + +static void +pgf_read_to_uint16_t(GuType* type, PgfReader* rdr, void* to) +{ + (void) type; + *(uint16_t*) to = gu_in_u16be(rdr->in, rdr->err); +} + +static void +pgf_read_to_GuLength(GuType* type, PgfReader* rdr, void* to) +{ + (void) type; + *(GuLength*) to = pgf_read_len(rdr); +} + +static void +pgf_read_to_double(GuType* type, PgfReader* rdr, void* to) +{ + (void) type; + *(double*) to = gu_in_f64be(rdr->in, rdr->err); +} + +static void +pgf_read_to_alias(GuType* type, PgfReader* rdr, void* to) +{ + GuTypeAlias* atype = gu_type_cast(type, alias); + pgf_read_to(rdr, atype->type, to); +} + +static void +pgf_read_into_map(GuMapType* mtype, PgfReader* rdr, GuMap* map, GuPool* pool) +{ + /* The parameter pool is the temporary pool used to store the + map. But the actual values need to be more persistent so we + store them in rdr->opool. */ + (void) pool; + GuPool* tmp_pool = gu_new_pool(); + void* key = NULL; + GuLength len = pgf_read_len(rdr); + gu_return_on_exn(rdr->err, ); + if (mtype->hasher) { + key = gu_type_malloc(mtype->key_type, tmp_pool); + } + for (size_t i = 0; i < len; i++) { + if (mtype->hasher) { + pgf_read_to(rdr, mtype->key_type, key); + } else { + key = pgf_read_new(rdr, mtype->key_type, + rdr->opool, NULL); + } + gu_return_on_exn(rdr->err, ); + rdr->curr_key = key; + /* If an old value already exists, read into + it. This allows us to create the value + object and point into it before we read the + content. */ + void* valp = gu_map_insert(map, key); + pgf_read_to(rdr, mtype->value_type, valp); + gu_return_on_exn(rdr->err, ); + } + gu_pool_free(tmp_pool); +} + +static void* +pgf_read_new_GuMap(GuType* type, PgfReader* rdr, GuPool* pool, size_t* size_out) +{ + (void) size_out; + GuMapType* mtype = (GuMapType*) type; + GuMap* map = gu_map_type_make(mtype, pool); + pgf_read_into_map(mtype, rdr, map, pool); + gu_return_on_exn(rdr->err, NULL); + return map; +} + +static void +pgf_read_to_GuString(GuType* type, PgfReader* rdr, void* to) +{ + (void) (type); + gu_enter("-> GuString"); + GuString* sp = to; + + GuPool* tmp_pool = gu_new_pool(); + GuStringBuf* sbuf = gu_string_buf(tmp_pool); + GuWriter* wtr = gu_string_buf_writer(sbuf); + + GuLength len = pgf_read_len(rdr); + + for (size_t i = 0; i < len; i++) { + GuUCS ucs = gu_in_utf8(rdr->in, rdr->err); + gu_ucs_write(ucs, wtr, rdr->err); + } + GuString str = gu_string_buf_freeze(sbuf, tmp_pool); + GuSymbol sym = gu_symtable_intern(rdr->symtab, str); + gu_pool_free(tmp_pool); + + gu_exit("<- GuString"); + *sp = sym; +} + +static void +pgf_read_to_PgfCId(GuType* type, PgfReader* rdr, void* to) +{ + (void) (type); + gu_enter("-> PgfCId"); + PgfCId* sp = to; + + GuPool* tmp_pool = gu_new_pool(); + GuStringBuf* sbuf = gu_string_buf(tmp_pool); + GuWriter* wtr = gu_string_buf_writer(sbuf); + + GuLength len = pgf_read_len(rdr); + + for (size_t i = 0; i < len; i++) { + // CIds are in latin-1 + GuUCS ucs = gu_in_u8(rdr->in, rdr->err); + gu_ucs_write(ucs, wtr, rdr->err); + } + GuString str = gu_string_buf_freeze(sbuf, tmp_pool); + GuSymbol sym = gu_symtable_intern(rdr->symtab, str); + gu_pool_free(tmp_pool); + + gu_exit("<- PgfCId"); + *sp = sym; +} + +static void +pgf_read_to_PgfCCatId(GuType* type, PgfReader* rdr, void* to) +{ + (void) (type); + PgfCCat** pto = to; + int fid = pgf_read_int(rdr); + gu_return_on_exn(rdr->err,); + PgfCCat* ccat = gu_map_get(rdr->curr_ccats, &fid, PgfCCat*); + if (ccat) { + *pto = ccat; + return; + } + GuBuf* locs = gu_map_get(rdr->ccat_locs, &fid, GuBuf*); + if (!locs) { + locs = gu_new_buf(PgfCCat**, rdr->pool); + gu_map_put(rdr->ccat_locs, &fid, GuBuf*, locs); + } + gu_buf_push(locs, PgfCCat**, pto); +} + +static void +pgf_read_to_PgfCCat(GuType* type, PgfReader* rdr, void* to) +{ + (void) type; + gu_enter("->"); + PgfCCat* cat = to; + cat->cnccat = NULL; + pgf_read_to(rdr, gu_type(PgfProductionSeq), &cat->prods); + int* fidp = rdr->curr_key; + cat->fid = *fidp; + GuBuf* locs_buf = gu_map_get(rdr->ccat_locs, fidp, GuBuf*); + if (locs_buf) { + size_t len = gu_buf_length(locs_buf); + PgfCCat*** locs = gu_buf_data(locs_buf); + for (size_t n = 0; n < len; n++) { + *(locs[n]) = cat; + } + } + gu_exit("<-"); +} + +// This is only needed because new_struct would otherwise override. +// TODO: get rid of new_struct and all the FAM mess +static void* +pgf_read_new_PgfCCat(GuType* type, PgfReader* rdr, GuPool* pool, + size_t* size_out) +{ + PgfCCat* ccat = gu_new(PgfCCat, pool); + pgf_read_to_PgfCCat(type, rdr, ccat); + *size_out = sizeof(PgfCCat); + return ccat; +} + +static void* +pgf_read_new_GuList(GuType* type, PgfReader* rdr, GuPool* pool, size_t* size_out) +{ + GuListType* ltype = gu_type_cast(type, GuList); + GuLength length = pgf_read_len(rdr); + gu_return_on_exn(rdr->err, NULL); + void* list = gu_list_type_alloc(ltype, length, pool); + for (size_t i = 0; i < length; i++) { + void* elem = gu_list_type_index(ltype, list, i); + pgf_read_to(rdr, ltype->elem_type, elem); + gu_return_on_exn(rdr->err, NULL); + } + *size_out = gu_flex_size(ltype->size, ltype->elems_offset, + length, + gu_type_size(ltype->elem_type)); + return list; +} + +static void +pgf_read_to_GuSeq(GuType* type, PgfReader* rdr, void* to) +{ + gu_enter("->"); + GuSeqType* stype = gu_type_cast(type, GuSeq); + GuLength length = pgf_read_len(rdr); + GuTypeRepr* repr = gu_type_repr(stype->elem_type); + gu_return_on_exn(rdr->err, ); + GuSeq seq = gu_make_seq(repr->size, length, rdr->opool); + uint8_t* data = gu_seq_data(seq); + for (size_t i = 0; i < length; i++) { + void* elem = &data[i * repr->size]; + pgf_read_to(rdr, stype->elem_type, elem); + gu_return_on_exn(rdr->err, ); + } + GuSeq* sto = to; + *sto = seq; + gu_exit("<-"); +} + +static void +pgf_read_to_maybe_seq(GuType* type, PgfReader* rdr, void* to) +{ + GuSeq* sto = to; + uint8_t tag = pgf_read_u8(rdr); + gu_return_on_exn(rdr->err,); + switch (tag) { + case 0: + *sto = gu_null_seq; + break; + case 1: + pgf_read_to_GuSeq(type, rdr, to); + break; + default: + gu_raise_i(rdr->err, PgfReadTagExn, + .type = type, .tag = tag); + break; + } +} + + +static void* +pgf_read_new_idarray(GuType* type, PgfReader* rdr, GuPool* pool, + size_t* size_out) +{ + (void) type; + void* list = pgf_read_new_GuList(type, rdr, rdr->curr_pool, size_out); + if (type == gu_type(PgfSequences)) { + rdr->curr_sequences = list; + } else if (type == gu_type(PgfCncFuns)) { + rdr->curr_cncfuns = list; + } else { + gu_impossible(); + } + return list; +} + +static void +pgf_read_to_PgfSeqId(GuType* type, PgfReader* rdr, void* to) +{ + (void) type; + int32_t id = pgf_read_int(rdr); + gu_return_on_exn(rdr->err,); + if (id < 0 || id >= gu_list_length(rdr->curr_sequences)) { + gu_raise(rdr->err, PgfReadExn); + return; + } + *(PgfSeqId*) to = gu_list_elems(rdr->curr_sequences)[id]; +} + + +static void +pgf_read_to_PgfFunId(GuType* type, PgfReader* rdr, void* to) +{ + (void) type; + int32_t id = pgf_read_int(rdr); + gu_return_on_exn(rdr->err,); + if (id < 0 || id >= gu_list_length(rdr->curr_cncfuns)) { + gu_raise(rdr->err, PgfReadExn); + return; + } + *(PgfFunId*) to = gu_list_elems(rdr->curr_cncfuns)[id]; +} + +static GU_DEFINE_TYPE(PgfLinDefs, GuIntMap, gu_ptr_type(PgfFunIds), + &gu_null_struct); +typedef PgfCCat PgfCCatData; +static GU_DEFINE_TYPE(PgfCCatData, typedef, gu_type(PgfCCat)); + +static GU_DEFINE_TYPE(PgfCCatMap, GuIntMap, gu_ptr_type(PgfCCat), + &gu_null_struct); + +static GU_DEFINE_TYPE(PgfCncCatMap, GuStringMap, gu_ptr_type(PgfCncCat), + &gu_null_struct); + +typedef struct { + GuMapItor fn; + GuBuf* seq; +} PgfCCatCbCtx; + +static PgfCncCat* +pgf_ccat_set_cnccat(PgfCCat* ccat, GuBuf* newly_set) +{ + if (!ccat->cnccat) { + size_t n_prods = gu_seq_length(ccat->prods); + for (size_t i = 0; i < n_prods; i++) { + PgfProduction prod = + gu_seq_get(ccat->prods, PgfProduction, i); + GuVariantInfo i = gu_variant_open(prod); + switch (i.tag) { + case PGF_PRODUCTION_COERCE: { + PgfProductionCoerce* pcoerce = i.data; + PgfCncCat* cnccat = + pgf_ccat_set_cnccat(pcoerce->coerce, + newly_set); + if (!ccat->cnccat) { + ccat->cnccat = cnccat; + } else if (ccat->cnccat != cnccat) { + // XXX: real error + gu_impossible(); + } + break; + } + case PGF_PRODUCTION_APPLY: + // Shouldn't happen with current PGF. + // XXX: real error + gu_impossible(); + break; + default: + gu_impossible(); + } + } + gu_buf_push(newly_set, PgfCCat*, ccat); + } + return ccat->cnccat; +} + + +static void +pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err) +{ + (void) (key && err); + PgfCCatCbCtx* ctx = (PgfCCatCbCtx*) fn; + PgfCCat** ccatp = value; + pgf_ccat_set_cnccat(*ccatp, ctx->seq); +} + +static void* +pgf_read_new_PgfConcr(GuType* type, PgfReader* rdr, GuPool* pool, + size_t* size_out) +{ + (void) (type && size_out); + /* We allocate indices from a temporary pool. The actual data + * is allocated from rdr->opool. Once everything is resolved + * and indices aren't needed, the temporary pool can be + * freed. */ + GuPool* tmp_pool = gu_new_pool(); + rdr->curr_pool = tmp_pool; + PgfConcr* concr = gu_new(PgfConcr, pool);; + concr->cflags = + pgf_read_new(rdr, gu_type(PgfFlags), pool, NULL); + concr->printnames = + pgf_read_new(rdr, gu_type(PgfPrintNames), pool, NULL); + rdr->curr_sequences = + pgf_read_new(rdr, gu_type(PgfSequences), pool, NULL); + rdr->curr_cncfuns = + pgf_read_new(rdr, gu_type(PgfCncFuns), pool, NULL); + GuMapType* lindefs_t = gu_type_cast(gu_type(PgfLinDefs), GuMap); + rdr->curr_lindefs = gu_map_type_make(lindefs_t, tmp_pool); + pgf_read_into_map(lindefs_t, rdr, rdr->curr_lindefs, rdr->opool); + GuMapType* ccats_t = gu_type_cast(gu_type(PgfCCatMap), GuMap); + rdr->curr_ccats = + gu_new_int_map(PgfCCat*, &gu_null_struct, tmp_pool); + rdr->ccat_locs = + gu_new_int_map(GuBuf*, &gu_null_struct, tmp_pool); + pgf_read_into_map(ccats_t, rdr, rdr->curr_ccats, rdr->opool); + concr->cnccats = pgf_read_new(rdr, gu_type(PgfCncCatMap), + rdr->opool, NULL); + + GuBuf* extra_ccats = gu_new_buf(PgfCCat*, tmp_pool); + PgfCCatCbCtx ctx = { { pgf_read_ccat_cb }, extra_ccats }; + gu_map_iter(rdr->curr_ccats, &ctx.fn, NULL); + concr->extra_ccats = gu_buf_freeze(extra_ccats, rdr->opool); + (void) pgf_read_int(rdr); // totalcats + gu_pool_free(tmp_pool); + return concr; +} + +static bool +pgf_ccat_n_lins(PgfCCat* cat, int* n_lins) { + if (gu_seq_is_null(cat->prods)) { + return true; + } + size_t n_prods = gu_seq_length(cat->prods); + for (size_t j = 0; j < n_prods; j++) { + PgfProduction prod = + gu_seq_get(cat->prods, PgfProduction, j); + GuVariantInfo i = gu_variant_open(prod); + switch (i.tag) { + case PGF_PRODUCTION_APPLY: { + PgfProductionApply* papp = i.data; + if (*n_lins == -1) { + *n_lins = (int) papp->fun->n_lins; + } else if (*n_lins != (int) papp->fun->n_lins) { + // Inconsistent n_lins for different productions! + return false; + } + break; + } + case PGF_PRODUCTION_COERCE: { + PgfProductionCoerce* pcoerce = i.data; + bool succ = pgf_ccat_n_lins(pcoerce->coerce, n_lins); + if (!succ) { + return false; + } + break; + } + default: + gu_impossible(); + } + } + return true; +} + +static void* +pgf_read_new_PgfCncCat(GuType* type, PgfReader* rdr, GuPool* pool, + size_t* size_out) +{ + PgfCId cid = *(PgfCId*) rdr->curr_key; + gu_enter("-> cid"); + (void) (type && size_out); + PgfCncCat* cnccat = gu_new(PgfCncCat, pool); + cnccat->cid = cid; + int first = pgf_read_int(rdr); + int last = pgf_read_int(rdr); + int len = last + 1 - first; + PgfCCatIds* cats = gu_new_list(PgfCCatIds, pool, len); + int n_lins = -1; + for (int i = 0; i < len; i++) { + int n = first + i; + PgfCCat* ccat = gu_map_get(rdr->curr_ccats, &n, PgfCCat*); + /* ccat can be NULL if the PGF is optimized and the + * category has been erased as useless */ + gu_list_index(cats, i) = ccat; + if (ccat != NULL) { + // TODO: error if overlap + ccat->cnccat = cnccat; + if (!pgf_ccat_n_lins(ccat, &n_lins)) { + gu_raise(rdr->err, PgfReadExn); + goto fail; + } + + } + gu_debug("range[%d] = %d", i, ccat ? ccat->fid : -1); + } + cnccat->n_lins = n_lins == -1 ? 0 : (size_t) n_lins; + cnccat->cats = cats; + cnccat->lindefs = gu_map_get(rdr->curr_lindefs, &first, PgfFunIds*); + cnccat->labels = pgf_read_new(rdr, gu_type(GuStringL), + pool, NULL); + gu_exit("<-"); + return cnccat; +fail: + gu_exit("<- fail"); + return NULL; +} + +#define PGF_READ_TO_FN(k_, fn_) \ + { gu_kind(k_), (void*) &(PgfReadToFn){ fn_ } } + +#define PGF_READ_TO(k_) \ + PGF_READ_TO_FN(k_, pgf_read_to_##k_) + + +static GuTypeTable +pgf_read_to_table = GU_TYPETABLE( + GU_SLIST_0, + PGF_READ_TO(struct), + PGF_READ_TO(GuVariant), + PGF_READ_TO(enum), + PGF_READ_TO(void), + PGF_READ_TO(int), + PGF_READ_TO(uint16_t), + PGF_READ_TO(GuLength), + PGF_READ_TO(PgfCId), + PGF_READ_TO(GuString), + PGF_READ_TO(double), + PGF_READ_TO(pointer), + PGF_READ_TO_FN(PgfEquationsM, pgf_read_to_maybe_seq), + PGF_READ_TO(GuSeq), + PGF_READ_TO(PgfCCatId), + PGF_READ_TO(PgfCCat), + PGF_READ_TO(PgfSeqId), + PGF_READ_TO(PgfFunId), + PGF_READ_TO(alias)); + +#define PGF_READ_NEW_FN(k_, fn_) \ + { gu_kind(k_), (void*) &(PgfReadNewFn){ fn_ } } + +#define PGF_READ_NEW(k_) \ + PGF_READ_NEW_FN(k_, pgf_read_new_##k_) + +static GuTypeTable +pgf_read_new_table = GU_TYPETABLE( + GU_SLIST_0, + PGF_READ_NEW(type), + PGF_READ_NEW(struct), + PGF_READ_NEW(GuMap), + PGF_READ_NEW(GuList), + PGF_READ_NEW(PgfCCat), + PGF_READ_NEW(PgfCncCat), + PGF_READ_NEW(PgfConcr), + PGF_READ_NEW_FN(PgfSequences, pgf_read_new_idarray), + PGF_READ_NEW_FN(PgfCncFuns, pgf_read_new_idarray) + ); + +static PgfReader* +pgf_new_reader(GuIn* in, GuPool* opool, GuPool* pool, GuExn* err) +{ + PgfReader* rdr = gu_new(PgfReader, pool); + rdr->opool = opool; + rdr->symtab = gu_new_symtable(opool, pool); + rdr->err = err; + rdr->in = in; + rdr->curr_sequences = NULL; + rdr->curr_cncfuns = NULL; + rdr->read_to_map = gu_new_type_map(&pgf_read_to_table, pool); + rdr->read_new_map = gu_new_type_map(&pgf_read_new_table, pool); + rdr->pool = pool; + return rdr; +} + + +PgfPGF* +pgf_read(GuIn* in, GuPool* pool, GuExn* err) +{ + GuPool* tmp_pool = gu_new_pool(); + PgfReader* rdr = pgf_new_reader(in, pool, tmp_pool, err); + PgfPGF* pgf = pgf_read_new(rdr, gu_type(PgfPGF), pool, NULL); + gu_pool_free(tmp_pool); + gu_return_on_exn(err, NULL); + return pgf; +} diff --git a/src/runtime/c/pgf/type.h b/src/runtime/c/pgf/type.h deleted file mode 100644 index 7de9dea20..000000000 --- a/src/runtime/c/pgf/type.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef PGF_TYPE_H -#define PGF_TYPE_H - -typedef struct _Hypo { - BindType bt; - CId var; - Type ty; -} *Hypo; - -typedef struct _Context { - int size; - struct _Hypo hypos[]; -} *Context; - -struct _Type { - Context hypos; - CId cat; - int nArgs; - Expr args[]; -}; - -#endif diff --git a/src/runtime/c/pgf/unloader.c b/src/runtime/c/pgf/unloader.c deleted file mode 100644 index 6a1b0d41d..000000000 --- a/src/runtime/c/pgf/unloader.c +++ /dev/null @@ -1,248 +0,0 @@ -#include "../pgf.h" -#include "data.h" -#include "panic.h" -#include <stdlib.h> - -static void freeCId(CId id) { - free(id); -} - -static void freeCIdList(CIdList ids) { - int i; - for (i = 0; i < ids->count; i++) { - freeCId(ids->names[i]); - } - free(ids); -} - -static void freeString(String str) { - free(str); -} - -static void freeLiteral(Literal lit) { - switch (lit->tag) { - case LIT_STR: - freeString (((LiteralStr) lit)->val); - break; - } - free(lit); -} - -static void freeFlags(Flags flags) { - int i; - for (i = 0; i < flags->count; i++) { - freeCId(flags->values[i].name); - freeLiteral(flags->values[i].value); - } - free(flags); -} - -static void freeContext(Context ctxt); -static void freeType(Type ty); - -static void freeExpr(Expr e0) { - - switch (e0->tag) { - case TAG_ABS: - { - ExprAbs e = (ExprAbs) e0; - freeCId(e->var); - freeExpr(e->body); - } - break; - case TAG_APP: - { - ExprApp e = (ExprApp) e0; - freeExpr(e->left); - freeExpr(e->right); - } - break; - case TAG_LIT: - { - ExprLit e = (ExprLit) e0; - freeLiteral(e->lit); - } - break; - case TAG_MET: - { - ExprMeta e = (ExprMeta) e0; - } - break; - case TAG_FUN: - { - ExprFun e = (ExprFun) e0; - freeCId(e->fun); - } - break; - case TAG_VAR: - { - ExprVar e = (ExprVar) e0; - } - break; - case TAG_TYP: - { - ExprTyped e = (ExprTyped) e0; - freeExpr(e->e); - freeType(e->ty); - } - break; - case TAG_IMP: - { - ExprImplArg e = (ExprImplArg) e0; - freeExpr(e->e); - } - break; - default: - __pgf_panic("Unknown expression tag"); - } - - free(e0); -} - -static void freeType(Type ty) { - freeContext(ty->hypos); - freeCId(ty->cat); - - int i; - for (i = 0; i < ty->nArgs; i++) { - freeExpr(ty->args[i]); - } - - free(ty); -} - -static void freeHypo(Hypo hypo) { - freeCId(hypo->var); - freeType(hypo->ty); -} - -static void freeContext(Context ctxt) { - int i; - for (i = 0; i < ctxt->size; i++) { - freeHypo(&ctxt->hypos[i]); - } - free(ctxt); -} - -static void freePatt(Patt p0) { - switch (p0->tag) { - case TAG_PAPP: - { - int i; - PattApp p = (PattApp) p0; - - freeCId(p->fun); - for (i = 0; i < p->args.count; i++) { - freePatt(p->args.pats[i]); - } - } - break; - case TAG_PVAR: - { - PattVar p = (PattVar) p0; - freeCId(p->var); - } - break; - case TAG_PAT: - { - PattAt p = (PattAt) p0; - freeCId(p->var); - freePatt(p->pat); - } - break; - case TAG_PWILD: - { - PattWild p = (PattWild) p0; - } - break; - case TAG_PLIT: - { - PattLit p = (PattLit) p0; - freeLiteral(p->lit); - } - break; - case TAG_PIMP: - { - PattImplArg p = (PattImplArg) p0; - freePatt(p->pat); - } - break; - case TAG_PTILDE: - { - PattTilde p = (PattTilde) p0; - freeExpr(p->e); - } - break; - default: - __pgf_panic("Unknown pattern tag"); - } - - free(p0); -} - -static void freePatts(Patts pats) { - int i; - for (i = 0; i < pats->count; i++) { - freePatt(pats->pats[i]); - } - free(pats); -} - -static void freeEquations(Equations equs) { - int i; - for (i = 0; i < equs->count; i++) { - freePatts(equs->equs[i].lhs); - freeExpr(equs->equs[i].rhs); - } - free(equs); -} - -static void freeAbsFun(AbsFun fun) { - freeCId(fun->name); - freeType(fun->ty); - freeEquations(fun->equs); -} - -static void freeAbsFuns(AbsFuns funs) { - int i; - for (i = 0; i < funs->count; i++) { - freeAbsFun(&funs->lst[i]); - } - free(funs); -} - -static void freeAbsCat(AbsCat cat) { - freeCId(cat->name); - freeContext(cat->hypos); - freeCIdList(cat->funs); -} - -static void freeAbsCats(AbsCats cats) { - int i; - for (i = 0; i < cats->count; i++) { - freeAbsCat(&cats->lst[i]); - } - free(cats); -} - -static void freeAbstract(Abstract abstr) { - freeCId(abstr->name); - freeFlags(abstr->flags); - freeAbsFuns(abstr->funs); - freeAbsCats(abstr->cats); -} - -static void freeConcrete(Concrete concr) { -// freeCId(concr->name); -// freeFlags(concr->flags); -} - -void freePGF(PGF pgf) { - int i; - - freeFlags(pgf->flags); - freeAbstract(&pgf->abstract); - for (i = 0; i < pgf->nConcr; i++) - freeConcrete(&pgf->concretes[i]); - free(pgf); -} |
