diff options
author | Jari Vetoniemi <mailroxas@gmail.com> | 2018-09-26 15:29:17 +0300 |
---|---|---|
committer | Jari Vetoniemi <mailroxas@gmail.com> | 2018-09-26 15:29:17 +0300 |
commit | 715d3d48f962d17575ff9de0034f2ac89b59f975 (patch) | |
tree | 0ec8cd8e5b895bea4771b7c453cab5415fa5e6a7 | |
parent | d98285e367c29ec9eb1cacf5cf424d6910270efd (diff) |
Goodbye C compiler, hello colm compiler
-rw-r--r-- | src/bin/fspec/dump.c | 881 | ||||
-rw-r--r-- | src/compiler/compiler.lm | 298 | ||||
-rw-r--r-- | src/compiler/expr.lm | 410 | ||||
-rw-r--r-- | src/compiler/types.lm | 55 | ||||
-rw-r--r-- | src/fspec/bcode.c | 199 | ||||
-rw-r--r-- | src/fspec/bcode.h | 92 | ||||
-rw-r--r-- | src/fspec/lexer.h | 25 | ||||
-rw-r--r-- | src/fspec/memory.h | 8 | ||||
-rw-r--r-- | src/fspec/private/bcode-types.h | 16 | ||||
-rw-r--r-- | src/fspec/ragel/lexer-expr.h | 20 | ||||
-rw-r--r-- | src/fspec/ragel/lexer-expr.rl | 118 | ||||
-rw-r--r-- | src/fspec/ragel/lexer-stack.h | 42 | ||||
-rw-r--r-- | src/fspec/ragel/lexer-stack.rl | 153 | ||||
-rw-r--r-- | src/fspec/ragel/lexer.rl | 180 | ||||
-rw-r--r-- | src/fspec/ragel/validator.rl | 96 | ||||
-rw-r--r-- | src/fspec/validator.h | 19 | ||||
-rw-r--r-- | src/util/membuf.c | 31 | ||||
-rw-r--r-- | src/util/membuf.h | 14 |
18 files changed, 763 insertions, 1894 deletions
diff --git a/src/bin/fspec/dump.c b/src/bin/fspec/dump.c deleted file mode 100644 index 8ca53b2..0000000 --- a/src/bin/fspec/dump.c +++ /dev/null @@ -1,881 +0,0 @@ -#include <stdlib.h> -#include <stdio.h> -#include <limits.h> -#include <string.h> -#include <assert.h> -#include <err.h> - -#include <iconv.h> -#include <errno.h> -#include <locale.h> -#include <langinfo.h> -#include <squash.h> - -#include <fspec/bcode.h> -#include <fspec/lexer.h> -#include <fspec/validator.h> -#include "fspec/ragel/lexer-expr.h" -#include "util/membuf.h" - -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) - -#if 0 - -static size_t -to_hex(const uint8_t *buf, const size_t buf_sz, char *out, const size_t out_sz, const bool reverse) -{ - assert(out); - const char nibble[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - const uint8_t nbs = sizeof(nibble) - 1; - - size_t w = 0, last_non_zero = w; - for (size_t i = 0; i < buf_sz && out_sz > 2 && w < out_sz - 2; ++i) { - for (uint8_t c = 0; c < CHAR_BIT / 8 && w < out_sz; ++c) { - const size_t idx = (reverse ? (buf_sz - 1) - i : i); - const uint8_t hi = (buf[idx] >> (4 * (c + 1))) & nbs; - const uint8_t lo = (buf[idx] >> (8 * c)) & nbs; - - if (w || hi || lo) { - out[w++] = nibble[hi]; - out[w++] = nibble[lo]; - last_non_zero = (hi || lo ? w : last_non_zero); - } - } - } - - if (!w) { - out[w++] = nibble[0]; - out[w++] = nibble[0]; - } else { - w = last_non_zero; - } - - assert(w < out_sz); - out[w] = 0; - return w; -} - -static void -print_dec(const uint8_t *buf, const size_t size, const bool is_signed) -{ - char hex[2 * sizeof(fspec_num) + 1]; - to_hex(buf, size, hex, sizeof(hex), true); - - static_assert(sizeof(fspec_num) <= sizeof(uint64_t), "fspec_num is larger than uint64_t"); - - if (is_signed) { - printf("%ld", (int64_t)strtoll(hex, NULL, 16)); - } else { - printf("%lu", (uint64_t)strtoull(hex, NULL, 16)); - } -} - -static void -print_udec(const uint8_t *buf, const size_t size) -{ - print_dec(buf, size, false); -} - -static void -print_sdec(const uint8_t *buf, const size_t size) -{ - print_dec(buf, size, true); -} - -static void -print_hex(const uint8_t *buf, const size_t size) -{ - char hex[2 * sizeof(fspec_num) + 1]; - to_hex(buf, size, hex, sizeof(hex), true); - printf("0x%s", hex); -} - -static void -print_array(const uint8_t *buf, const size_t size, const size_t nmemb, void (*fun)(const uint8_t *buf, const size_t size)) -{ - const int indent = 4; - if (nmemb > 8) { - printf("{\n%*s", indent, ""); - } else if (nmemb > 1) { - printf("{ "); - } - - for (size_t n = 0; n < nmemb; ++n) { - fun(buf + n * size, size); - printf("%s", (nmemb > 1 && n + 1 < nmemb ? ", " : "")); - - if (n + 1 < nmemb && !((n + 1) % 8)) - printf("\n%*s", indent, ""); - } - - printf("%s\n", (nmemb > 8 ? "\n}" : (nmemb > 1 ? " }" : ""))); -} - -static void -print_str(const char *buf, const size_t size, const size_t nmemb) -{ - const bool has_nl = memchr(buf, '\n', size * nmemb); - if (has_nl) - puts("```"); - - for (size_t n = 0; n < size * nmemb && buf[n] != 0; ++n) - printf("%c", buf[n]); - - puts((has_nl ? "```" : "")); -} - -struct code { - const enum fspec_op *start, *end, *data; -}; - -static void -dump_ops(const struct code *code) -{ - for (const enum fspec_op *op = code->start; op; op = fspec_op_next(op, code->end, false)) { - printf("%*s- ", (*op == FSPEC_OP_ARG ? 2 : 0), ""); - switch (*op) { - case FSPEC_OP_HEADER: - printf("header\n"); - break; - - case FSPEC_OP_DECLARATION: - printf("declaration\n"); - break; - - case FSPEC_OP_READ: - printf("read\n"); - break; - - case FSPEC_OP_GOTO: - printf("goto\n"); - break; - - case FSPEC_OP_FILTER: - printf("filter\n"); - break; - - case FSPEC_OP_VISUAL: - printf("visual\n"); - break; - - case FSPEC_OP_ARG: - { - const enum fspec_arg *arg = (void*)(op + 1); - printf("arg "); - switch (*arg) { - case FSPEC_ARG_STR: - printf("str %s\n", fspec_arg_get_cstr(arg, code->data)); - break; - - case FSPEC_ARG_VAR: - printf("var %" PRI_FSPEC_NUM "\n", fspec_arg_get_num(arg)); - break; - - case FSPEC_ARG_NUM: - printf("num %" PRI_FSPEC_NUM "\n", fspec_arg_get_num(arg)); - break; - - case FSPEC_ARG_OFF: - printf("off %" PRI_FSPEC_NUM "\n", fspec_arg_get_num(arg)); - break; - - case FSPEC_ARG_DAT: - printf("dat %" PRI_FSPEC_NUM "\n", fspec_arg_get_num(arg)); - break; - - case FSPEC_ARG_EOF: - printf("eof\n"); - break; - - case FSPEC_ARG_LAST: - break; - } - } - break; - - case FSPEC_OP_LAST: - break; - } - } -} - -static const enum fspec_op* -get_last_struct(const struct code *code) -{ - const enum fspec_op *last = NULL; - for (const enum fspec_op *op = code->start; op; op = fspec_op_next(op, code->end, true)) { - const enum fspec_arg *arg; - if (*op == FSPEC_OP_DECLARATION && - (arg = fspec_op_get_arg(op, code->end, 1, 1<<FSPEC_ARG_NUM)) && - fspec_arg_get_num(arg) == FSPEC_DECLARATION_STRUCT) { - last = op; - } - } - return last; -} - -struct dynbuf { - void *data; - size_t len, written; -}; - -static inline void -dynbuf_resize(struct dynbuf *buf, const size_t size) -{ - assert(buf); - if (!(buf->data = realloc(buf->data, size))) - err(EXIT_FAILURE, "realloc(%zu)", size); - - buf->len = size; -} - -static inline void -dynbuf_resize_if_needed(struct dynbuf *buf, const size_t size) -{ - if (buf->len >= size) - return; - - dynbuf_resize(buf, size); -} - -static inline void -dynbuf_grow_if_needed(struct dynbuf *buf, const size_t nmemb) -{ - assert(buf); - if (buf->len >= nmemb && buf->written <= buf->len - nmemb) - return; - - dynbuf_resize(buf, buf->written + nmemb); -} - -static inline void -dynbuf_append(struct dynbuf *buf, const void *data, const size_t data_sz) -{ - dynbuf_grow_if_needed(buf, data_sz); - memcpy((char*)buf->data + buf->written, data, data_sz); - buf->written += data_sz; - assert(buf->written <= buf->len); -} - -static inline void -dynbuf_reset(struct dynbuf *buf) -{ - assert(buf); - buf->written = 0; -} - -static inline void -dynbuf_release(struct dynbuf *buf) -{ - assert(buf); - free(buf->data); - *buf = (struct dynbuf){0}; -} - -static void -display(const void *buf, const size_t size, const size_t nmemb, const bool is_signed, const enum fspec_visual visual) -{ - switch (visual) { - case FSPEC_VISUAL_NUL: - puts("..."); - break; - - case FSPEC_VISUAL_STR: - print_str(buf, size, nmemb); - break; - - case FSPEC_VISUAL_HEX: - print_array(buf, size, nmemb, print_hex); - break; - - case FSPEC_VISUAL_DEC: - print_array(buf, size, nmemb, (is_signed ? print_sdec : print_udec)); - break; - - case FSPEC_VISUAL_LAST: - break; - } -} - -struct decl { - struct dynbuf buf; - const char *name; - const void *start, *end; - size_t nmemb; - uint8_t size; - enum fspec_visual visual; - enum fspec_declaration declaration; -}; - -static void -decl_display(const struct decl *decl) -{ - assert(decl); - assert(decl->size * decl->nmemb <= decl->buf.len); - printf("%s: ", decl->name); - display(decl->buf.data, decl->size, decl->nmemb, false, decl->visual); -} - -static fspec_num -decl_get_num(const struct decl *decl) -{ - assert(decl); - assert(decl->nmemb == 1); - assert(decl->size * decl->nmemb <= decl->buf.len); - char hex[2 * sizeof(fspec_num) + 1]; - to_hex(decl->buf.data, decl->size, hex, sizeof(hex), true); - static_assert(sizeof(fspec_num) <= sizeof(uint64_t), "fspec_num is larger than uint64_t"); - return (fspec_num)strtoull(hex, NULL, 16); -} - -static const char* -decl_get_cstr(const struct decl *decl) -{ - assert(decl); - return decl->buf.data; -} - -struct context { - struct code code; - struct decl *decl; - fspec_num decl_count; -}; - -static fspec_num -var_get_num(const struct context *context, const enum fspec_arg *arg) -{ - assert(context && arg); - return decl_get_num(&context->decl[fspec_arg_get_num(arg)]); -} - -static const char* -var_get_cstr(const struct context *context, const enum fspec_arg *arg) -{ - assert(context && arg); - return decl_get_cstr(&context->decl[fspec_arg_get_num(arg)]); -} - -enum type { - TYPE_NUM, - TYPE_STR, -}; - -static enum type -var_get_type(const struct context *context, const enum fspec_arg *arg) -{ - assert(context && arg); - const struct decl *decl = &context->decl[fspec_arg_get_num(arg)]; - switch (decl->visual) { - case FSPEC_VISUAL_DEC: - case FSPEC_VISUAL_HEX: - case FSPEC_VISUAL_NUL: - return TYPE_NUM; - - case FSPEC_VISUAL_STR: - return TYPE_STR; - - case FSPEC_VISUAL_LAST: - break; - } - return ~0; -} - -static void -filter_decompress(const struct context *context, struct decl *decl) -{ - assert(decl); - - const enum fspec_arg *arg; - if (!(arg = fspec_op_get_arg(context->code.start, context->code.end, 2, 1<<FSPEC_ARG_STR))) - errx(EXIT_FAILURE, "missing compression"); - - SquashCodec *codec; - const char *algo = fspec_arg_get_cstr(arg, context->code.data); - if (!(codec = squash_get_codec(algo))) - errx(EXIT_FAILURE, "unknown compression '%s'", algo); - - SquashOptions *opts; - if (!(opts = squash_options_new(codec, NULL))) - errx(EXIT_FAILURE, "squash_options_new"); - - size_t dsize = squash_codec_get_uncompressed_size(codec, decl->buf.len, decl->buf.data); - dsize = (dsize ? dsize : decl->buf.len * 2); - - { - const enum fspec_arg *var = arg; - if ((arg = fspec_arg_next(arg, context->code.end, 1, 1<<FSPEC_ARG_NUM | 1<<FSPEC_ARG_VAR))) { - var = arg; - - switch (*var) { - case FSPEC_ARG_NUM: - dsize = fspec_arg_get_num(arg); - break; - - case FSPEC_ARG_VAR: - dsize = var_get_num(context, arg); - break; - - default: - break; - } - } - - for (; (var = fspec_arg_next(var, context->code.end, 1, 1<<FSPEC_ARG_STR));) { - const char *key = fspec_arg_get_cstr(var, context->code.data); - if (!(var = fspec_arg_next(var, context->code.end, 1, ~0))) - errx(EXIT_FAILURE, "expected argument for key '%s'", key); - - switch (*var) { - case FSPEC_ARG_STR: - squash_options_set_string(opts, key, fspec_arg_get_cstr(var, context->code.data)); - break; - - case FSPEC_ARG_NUM: - squash_options_set_int(opts, key, fspec_arg_get_num(var)); - break; - - case FSPEC_ARG_VAR: - if (var_get_type(context, var) == TYPE_STR) { - squash_options_set_string(opts, key, var_get_cstr(context, var)); - } else { - squash_options_set_int(opts, key, var_get_num(context, var)); - } - break; - - default: - break; - } - } - } - - // what a horrible api - squash_object_ref(opts); - - SquashStatus r; - struct dynbuf buf = {0}; - dynbuf_resize(&buf, dsize); - while ((r = squash_codec_decompress_with_options(codec, &buf.len, buf.data, decl->buf.len, decl->buf.data, opts)) == SQUASH_BUFFER_FULL) - dynbuf_resize(&buf, dsize *= 2); - - dynbuf_resize_if_needed(&buf, (buf.written = buf.len)); - squash_object_unref(opts); - - if (r != SQUASH_OK) - errx(EXIT_FAILURE, "squash_codec_decompress(%zu, %zu) = %d: %s", dsize, decl->buf.len, r, squash_status_to_string(r)); - - dynbuf_release(&decl->buf); - decl->buf = buf; - decl->nmemb = buf.len / decl->size; -} - -static void -filter_decode(const struct context *context, struct decl *decl) -{ - assert(decl); - - const enum fspec_arg *arg; - if (!(arg = fspec_op_get_arg(context->code.start, context->code.end, 2, 1<<FSPEC_ARG_STR))) - errx(EXIT_FAILURE, "missing encoding"); - - const char *encoding = fspec_arg_get_cstr(arg, context->code.data); - - static const char *sys_encoding; - if (!sys_encoding) { - setlocale(LC_ALL, ""); - sys_encoding = nl_langinfo(CODESET); - } - - iconv_t iv; - if ((iv = iconv_open(sys_encoding, encoding)) == (iconv_t)-1) - err(EXIT_FAILURE, "iconv_open(%s, %s)", sys_encoding, encoding); - - struct dynbuf buf = {0}; - const uint8_t *in = decl->buf.data; - size_t in_left = decl->buf.written; - do { - char enc[1024], *out = enc; - size_t out_left = sizeof(enc); - - errno = 0; - if (iconv(iv, (char**)&in, &in_left, &out, &out_left) == (size_t)-1 && errno != E2BIG) - err(EXIT_FAILURE, "iconv(%s, %s)", sys_encoding, encoding); - - dynbuf_append(&buf, enc, sizeof(enc) - out_left); - } while (in_left > 0); - - iconv_close(iv); - - dynbuf_release(&decl->buf); - decl->buf = buf; - decl->nmemb = buf.len / decl->size; -} - -static void -call(const struct context *context, FILE *f) -{ - assert(context && f); - - struct decl *decl = NULL; - for (const enum fspec_op *op = context->code.start; op; op = fspec_op_next(op, context->code.end, true)) { - if (decl && op == decl->end) { - decl_display(decl); - decl = NULL; - } - - switch (*op) { - case FSPEC_OP_DECLARATION: - { - const enum fspec_arg *arg; - arg = fspec_op_get_arg(op, context->code.end, 2, 1<<FSPEC_ARG_NUM); - decl = &context->decl[fspec_arg_get_num(arg)]; - dynbuf_reset(&decl->buf); - } - break; - - case FSPEC_OP_READ: - { - assert(decl); - const enum fspec_arg *arg = fspec_op_get_arg(op, context->code.end, 1, 1<<FSPEC_ARG_NUM); - static_assert(CHAR_BIT == 8, "doesn't work otherwere right now"); - decl->size = fspec_arg_get_num(arg) / 8; - decl->nmemb = 0; - - for (const enum fspec_arg *var = arg; (var = fspec_arg_next(var, context->code.end, 1, ~0));) { - switch (*var) { - case FSPEC_ARG_NUM: - case FSPEC_ARG_VAR: - { - const fspec_num v = (*var == FSPEC_ARG_NUM ? fspec_arg_get_num(var) : var_get_num(context, var)); - if (v == 0) { - goto noop; - } else if (v > 1) { - const size_t nmemb = (decl->nmemb ? decl->nmemb : 1) * v; - dynbuf_grow_if_needed(&decl->buf, decl->size * nmemb); - const size_t read = fread((char*)decl->buf.data + decl->buf.written, decl->size, nmemb, f); - decl->buf.written += decl->size * read; - decl->nmemb += read; - } - } - break; - - case FSPEC_ARG_STR: - break; - - case FSPEC_ARG_EOF: - { - const size_t nmemb = (decl->nmemb ? decl->nmemb : 1); - size_t read = 0, r = nmemb; - while (r == nmemb) { - dynbuf_grow_if_needed(&decl->buf, decl->size * nmemb); - read += (r = fread((char*)decl->buf.data + decl->buf.written, decl->size, nmemb, f)); - decl->buf.written += decl->size * r; - }; - decl->nmemb += read; - } - break; - - default: - break; - } - } -noop: - - if (!fspec_arg_next(arg, context->code.end, 1, ~0)) { - dynbuf_grow_if_needed(&decl->buf, decl->size * 1); - const size_t read = fread((char*)decl->buf.data + decl->buf.written, decl->size, 1, f); - decl->buf.written += decl->size * read; - decl->nmemb = read; - } - - assert(decl->nmemb != 0); - } - break; - - case FSPEC_OP_GOTO: - { - decl = NULL; - const enum fspec_arg *arg = fspec_op_get_arg(op, context->code.end, 1, 1<<FSPEC_ARG_VAR); - const struct decl *d = &context->decl[fspec_arg_get_num(arg)]; - struct context c = *context; - c.code.start = d->start; - c.code.end = d->end; - - for (const enum fspec_arg *var = arg; (var = fspec_arg_next(var, context->code.end, 1, ~0));) { - switch (*var) { - case FSPEC_ARG_NUM: - case FSPEC_ARG_VAR: - { - const fspec_num v = (*var == FSPEC_ARG_NUM ? fspec_arg_get_num(var) : var_get_num(context, var)); - for (fspec_num i = 0; i < v; ++i) - call(&c, f); - } - break; - - // XXX: How to handle STR with stdin? - // With fseek would be easy. - case FSPEC_ARG_STR: - break; - - case FSPEC_ARG_EOF: - while (!feof(f)) - call(&c, f); - break; - - default: - break; - } - } - - if (!fspec_arg_next(arg, context->code.end, 1, ~0)) - call(&c, f); - } - break; - - case FSPEC_OP_FILTER: - { - assert(decl); - const enum fspec_arg *arg = fspec_op_get_arg(op, context->code.end, 1, 1<<FSPEC_ARG_STR); - - const struct { - const char *name; - void (*fun)(const struct context*, struct decl*); - } map[] = { - { .name = "encoding", .fun = filter_decode }, - { .name = "compression", .fun = filter_decompress }, - }; - - const char *filter = fspec_arg_get_cstr(arg, context->code.data); - for (size_t i = 0; i < ARRAY_SIZE(map); ++i) { - if (!strcmp(filter, map[i].name)) { - struct context c = *context; - c.code.start = op; - map[i].fun(&c, decl); - break; - } - - if (i == ARRAY_SIZE(map) - 1) - warnx("unknown filter '%s'", filter); - } - } - break; - - case FSPEC_OP_VISUAL: - { - assert(decl); - const enum fspec_arg *arg = fspec_op_get_arg(op, context->code.end, 1, 1<<FSPEC_ARG_NUM); - decl->visual = fspec_arg_get_num(arg); - } - break; - - case FSPEC_OP_ARG: - case FSPEC_OP_HEADER: - case FSPEC_OP_LAST: - break; - } - } - - if (decl && context->code.end == decl->end) - decl_display(decl); -} - -static void -setup(const struct context *context) -{ - assert(context); - - for (const enum fspec_op *op = context->code.start; op; op = fspec_op_next(op, context->code.end, true)) { - switch (*op) { - case FSPEC_OP_DECLARATION: - { - const enum fspec_arg *arg[4]; - arg[0] = fspec_op_get_arg(op, context->code.end, 1, 1<<FSPEC_ARG_NUM); - arg[1] = fspec_arg_next(arg[0], context->code.end, 1, 1<<FSPEC_ARG_NUM); - arg[2] = fspec_arg_next(arg[1], context->code.end, 1, 1<<FSPEC_ARG_OFF); - arg[3] = fspec_arg_next(arg[2], context->code.end, 1, 1<<FSPEC_ARG_STR); - const fspec_num id = fspec_arg_get_num(arg[1]); - struct decl *decl = &context->decl[id]; - decl->declaration = fspec_arg_get_num(arg[0]); - decl->name = fspec_arg_get_cstr(arg[3], context->code.data); - decl->visual = FSPEC_VISUAL_DEC; - decl->start = op; - decl->end = (char*)op + fspec_arg_get_num(arg[2]); - assert(!decl->buf.data); - } - break; - - default: - break; - } - } -} - -static void -execute(const struct fspec_mem *mem) -{ - assert(mem); - - struct context context = { - .code.start = mem->data, - .code.end = (void*)((char*)mem->data + mem->len), - .code.data = mem->data - }; - - printf("output: %zu bytes\n", mem->len); - dump_ops(&context.code); - - const enum fspec_arg *arg = fspec_op_get_arg(context.code.data, context.code.end, 2, 1<<FSPEC_ARG_NUM); - context.decl_count = fspec_arg_get_num(arg); - - if (!(context.decl = calloc(context.decl_count, sizeof(*context.decl)))) - err(EXIT_FAILURE, "calloc(%zu, %zu)", context.decl_count, sizeof(*context.decl)); - - setup(&context); - - puts("\nexecution:"); - context.code.start = get_last_struct(&context.code); - assert(context.code.start); - call(&context, stdin); - - for (fspec_num i = 0; i < context.decl_count; ++i) - dynbuf_release(&context.decl[i].buf); - - free(context.decl); -} -#endif - -static FILE* -fopen_or_die(const char *path, const char *mode) -{ - assert(path && mode); - - FILE *f; - if (!(f = fopen(path, mode))) - err(EXIT_FAILURE, "fopen(%s, %s)", path, mode); - - return f; -} - -#define container_of(ptr, type, member) ((type *)((char *)(1 ? (ptr) : &((type *)0)->member) - offsetof(type, member))) - -struct expr { - struct fspec_expr expr; - struct membuf output; - FILE *file; -}; - -static size_t -fspec_expr_write(struct fspec_expr *expr, const void *output, const size_t size, const size_t nmemb) -{ - assert(expr && output); - // struct expr *l = container_of(expr, struct expr, expr); - (void)expr, (void)size, (void)nmemb; - return nmemb; -} - -static size_t -fspec_expr_read(struct fspec_expr *expr, void *input, const size_t size, const size_t nmemb) -{ - assert(expr && input); - struct expr *l = container_of(expr, struct expr, expr); - return fread(input, size, nmemb, l->file); -} - -struct lexer { - struct fspec_lexer lexer; - struct membuf output; - FILE *file; -}; - -static size_t -fspec_lexer_write(struct fspec_lexer *lexer, const enum fspec_lexer_section section, const void *output, const size_t size, const size_t nmemb) -{ - assert(lexer && output); - // struct lexer *l = container_of(lexer, struct lexer, lexer); - (void)lexer, (void)section, (void)size, (void)nmemb; - return nmemb; -} - -static size_t -fspec_lexer_read(struct fspec_lexer *lexer, void *input, const size_t size, const size_t nmemb) -{ - assert(lexer && input); - struct lexer *l = container_of(lexer, struct lexer, lexer); - return fread(input, size, nmemb, l->file); -} - -#if 0 -static size_t -fspec_validator_read(struct fspec_validator *validator, void *ptr, const size_t size, const size_t nmemb) -{ - assert(validator && ptr); - assert(ptr == validator->mem.input.data); - const size_t read = validator->mem.input.len / size; - assert((validator->mem.input.len && read == nmemb) || (!validator->mem.input.len && !read)); - validator->mem.input.len -= read * size; - assert(validator->mem.input.len == 0); - return read; -} -#endif - -int -main(int argc, const char *argv[]) -{ - if (argc < 2) - errx(EXIT_FAILURE, "usage: %s file.spec < data", argv[0]); - - char output[4096]; - - { - char input[4096]; - struct expr l = { - .expr = { - .ops.read = fspec_expr_read, - .ops.write = fspec_expr_write, - .mem.input = { .data = input, .len = sizeof(input) }, - }, - .file = fopen_or_die(argv[1], "rb"), - .output.mem = { .data = output, .len = sizeof(output) }, - }; - - if (!fspec_expr_parse(&l.expr, argv[1])) - exit(EXIT_FAILURE); - - fclose(l.file); - // bcode = l.expr.mem.output; - } - - { - char input[4096]; - struct lexer l = { - .lexer = { - .ops.read = fspec_lexer_read, - .ops.write = fspec_lexer_write, - .mem.input = { .data = input, .len = sizeof(input) }, - }, - .file = fopen_or_die(argv[1], "rb"), - .output.mem = { .data = output, .len = sizeof(output) }, - }; - - if (!fspec_lexer_parse(&l.lexer, argv[1])) - exit(EXIT_FAILURE); - - fclose(l.file); - // bcode = l.lexer.mem.output; - } - -#if 0 - - { - struct fspec_validator validator = { - .ops.read = fspec_validator_read, - .mem.input = bcode, - }; - - if (!fspec_validator_parse(&validator, argv[1])) - exit(EXIT_FAILURE); - } - - execute(&bcode); -#endif - return EXIT_SUCCESS; -} diff --git a/src/compiler/compiler.lm b/src/compiler/compiler.lm new file mode 100644 index 0000000..59b0ee3 --- /dev/null +++ b/src/compiler/compiler.lm @@ -0,0 +1,298 @@ +include 'expr.lm' + +context fspec + token WS / space / + + context primitive + token TYPE_SIGN / [us] / + token TYPE_BITS / [1-9][0-9]* / + int strtoull(a:str, b:int) = c_strtoull + + def type + signed:bool + bits:int + [TYPE_SIGN TYPE_BITS] { + lhs.signed = ($r1 == 's') + lhs.bits = strtoull($r2, 10) + } + end + + context container + context enum + lex + ignore / space+ / + literal `= `, `{ `} + end + + literal `enum + + int + const_int_expr(expr:collapser::collapsed) { + if (!expr || !expr.result.value || !expr.result.value.number) reject + return expr.result.value.number.value + } + + def item + value:int + [name:name::type `= expr::enum::type `, item] { lhs.value = const_int_expr(r3.collapsed) } + | [name:name::type `= expr::enum::type] { lhs.value = const_int_expr(r3.collapsed) } + | [name:name::type `, item] { lhs.value = 0 } # TODO: count + | [name:name::type] { lhs.value = 0 } # TODO: count + + def type + name:str + items:item+ + [type:`enum WS+ name::type? `{ item+ `}] { if (name::type in r3) lhs.name = $(name::type in r3) lhs.items = r5 } + end + + context strukt # <- struct is taken :( + lex + ignore / space+ / + literal `{ `} + end + + literal `struct + + def item + [data:declaration::type] + + def type + name:str + items:item+ + [type:`struct WS+ name::type? `{ item+ `}] { if (name::type in r3) lhs.name = $(name::type in r3) lhs.items = r5 } + end + + context select + lex + ignore / space+ / + literal `( `) `{ `} `* + end + + literal `select + + def item + [expr:expr::paren::type `) data:declaration::type] + | [expr:`* `) data:declaration::type] + + def type + name:str + items:item+ # BUG: marking item+ with items: in the match below causes weird behaviour + [type:`select `( expr::paren::type `) `{ item+ `}] { lhs.items = r6 } + end + + def type + [data:enum::type] | [data:strukt::type] | [data:select::type] + end + + context declaration + lex + ignore / space+ / + literal `; `| `[ `] + end + + literal `enum `struct + token VISUAL / 'nul' | 'dec' | 'hex' | 'str' / + + def visual + [WS+ name:VISUAL] + + def filter + [`| function:reference::function::type] + + def length + [`[ expr:expr::bracket::type `]] + + def extra + length:collapser::collapsed + [length* filter:filter* visual:visual?] { + f:str = '' + for l:length in repeat(r1) { + if (f != '') + f = f + '*' + + if (l.expr.collapsed.result.value) { + f = f + '(' + $l.expr.collapsed.result.value + ')' + } else { + f = f + '(' + $l.expr.collapsed + ')' + } + } + lhs.length = collapser::collapsestr(f) + } + + def type + # enum name <primitive> name <extra>; + [cref:`enum WS+ parent:name::type WS+ primitive:primitive::type WS+ name:name::type extra:extra `;] commit + # struct name name <extra>; + | [cref:`struct WS+ parent:name::type WS+ name:name::type extra:extra `;] commit + # <primitive> name <extra>; + | [primitive:primitive::type WS+ name:name::type extra:extra `;] commit + # select ((thing)) { ... } <extra>; INVALID + | [container::select::type extra `;] commit { reject } + # select ((thing)) { ... } <primitive> name <extra>; INVALID + | [container::select::type primitive::type WS+ name::type extra `;] commit { reject } + # struct (optional) { ... } <primitive> name <extra>; INVALID + | [container::strukt::type primitive::type WS+ name::type extra `;] commit { reject } + # enum (optional) { ... } <primitive> name <extra>; + | [container:container::type primitive:primitive::type WS+ name:name::type extra:extra `;] commit + # select ((expr)) { ... } name <extra>; + # struct (optional) { ... } name <extra>; + | [container:container::type name:name::type extra:extra `;] + # (enum|struct) name { ... }; + | [container:container::type `;] + end + + def source + [items:declaration::type*] commit +end + +parse source:fspec::source[stdin] + +if (!source) { + print(error) + exit(1) +} + +struct scope + names:map<str, map<str, any>> +end + +global g_scopes:list<scope> = new list<scope>() + +void +push_scope() { + s:scope = new scope() + s->names = new map<str, map<str, any>>() + g_scopes->push_head(s) +} + +void +pop_scope() +{ + g_scopes->pop_head() +} + +any +lookup_no_error(type:str, name:str) { + for s:scope in g_scopes { + cmap:map<str, any> = s->names->find(type) + if (cmap) { + var:any = cmap->find(name) + if (var) + return var + } + } + return nil +} + +any +insert(type:str, name:str, var:any) +{ + if (!name) + return var # <anon> + + if (type != 'variable' && lookup_no_error(type, name)) { + print('`', type, ' ', name, '` is already declared as a `', type, '` in current scope!\n') + exit(1) + } + + cmap:map<str, any> = g_scopes->top->names->find(type) + + if (!cmap) { + cmap = new map<str, any>() + } else if (cmap->find(name)) { + print('`', type, ' ', name, '` is already declared as a `', type, '` in current scope!\n') + exit(1) + } + + cmap->insert(name, var) + g_scopes->top->names->insert(type, cmap) + return var +} + +any +lookup(type:str, name:str) +{ + r:any = lookup_no_error(type, name) + if (!r) { + print('`', type, ' ', name, '` is not declared in this or outer scope!\n') + exit(1) + } + return r +} + +str +container_name_str(s:ref<str>) { if (!s) return '<anon>' return s } + +str +signed_str(s:ref<bool>) { if (s) return 'signed' return 'unsigned' } + +void +print_declaration(d:fspec::declaration::type) +{ + insert('variable', $d.name, d) + print('variable `', $d.name, '` is ') + + c:fspec::container::type + if (d.cref) c = lookup($d.cref, $d.parent) else c = d.container + + if (c) + print('`', c.data.type, ' ', container_name_str(c.data.name), '` ') + + if (d.primitive) + print(d.primitive.bits, ' bits and ', signed_str(d.primitive.signed)) + + print('\n') + + if (d.extra) { + if (d.extra.length) { + if (!d.extra.length.result.value || d.extra.length.result.value.reference) { + print(' it has a variable length that needs to be computed with formula `', $d.extra.length, '`\n') + } else { + if (d.extra.length.result.value.number) { + print(' it has a constant length of `', $d.extra.length.result.value, '`\n') + } else if (d.extra.length.result.value.string) { + print(' its length will increase until pattern `', $d.extra.length.result.value.string.raw, '` has been read from stream\n') + } + } + } + + for f:fspec::declaration::filter in repeat(d.extra.filter) + print(' it needs to be filtered with `', $f.function, '`\n') + + for v:fspec::declaration::visual in child(d.extra.visual) + print(' and it should be visualized as `', $v.name, '`\n') + } +} + +void +walk(s:fspec::container::type) +{ + insert($s.data.type, s.data.name, s) + if ($s.data.type == 'enum') { + for i:fspec::container::enum::item in repeat(s.data.items) + insert('variable', $i.name, i) + } else if ($s.data.type == 'struct') { + push_scope() + for d:fspec::container::strukt::item in repeat(s.data.items) { + if (d.data.container) + walk(d.data.container) + if (d.data.name) + print_declaration(d.data) + } + pop_scope() + } else if ($s.data.type == 'select') { + push_scope() + for d:fspec::container::select::item in repeat(s.data.items) { + if (d.data.container) + walk(d.data.container) + if (d.data.name) + print_declaration(d.data) + } + pop_scope() + } +} + +push_scope() +for s:fspec::declaration::type in repeat(source.items) + walk(s.container) +pop_scope() diff --git a/src/compiler/expr.lm b/src/compiler/expr.lm new file mode 100644 index 0000000..d615358 --- /dev/null +++ b/src/compiler/expr.lm @@ -0,0 +1,410 @@ +include 'types.lm' + +global RTYPE_UNKNOWN:int = 0 +global RTYPE_NUMBER:int = 1 +global RTYPE_STRING:int = 2 + +context expr + context enum + token EXPR / (any - [,}])+ / + + def type + collapsed:collapser::collapsed + [EXPR] { + lhs.collapsed = collapser::collapsestr($r1) + if (!lhs.collapsed) reject + } + end + + context paren + literal `( `) + token EXPR / (any - [()])+ / + + def syntax + [EXPR] | [`( syntax `)] + + def type + collapsed:collapser::collapsed + [syntax] { + lhs.collapsed = collapser::collapsestr($r1) + if (!lhs.collapsed) reject + } + end + + context bracket + literal `[ `] + token EXPR / (any - '[' - ']')+ / + + def syntax + [EXPR] | [`[ syntax `]] + + def type + collapsed:collapser::collapsed + [syntax] { + lhs.collapsed = collapser::collapsestr($r1) + if (!lhs.collapsed) reject + } + end + + context arg + literal `( `) + token EXPR / (any - [(),])+ / + + def syntax + [EXPR] | [`( syntax `)] + + def type + collapsed:collapser::collapsed + [syntax] { + lhs.collapsed = collapser::collapsestr($r1) + if (!lhs.collapsed) reject + } + end +end + +context reference + context function + literal `( `) `, + + def arg + [expr::arg::type `, arg] | [expr::arg::type] + + def type + [name:name::type `( args:arg* `)] + end + + context variable + def type + [name:name::type] + end + + def type + [function::type] + | [variable::type] +end + +context collapser + # BUG: lists seem to not really work well here + # implement simple native stack + int op_stack_new() = c_op_stack_new + int op_stack_free(stack:int) = c_op_stack_free + str op_stack_top(stack:int) = c_op_stack_top + bool op_stack_push(stack:int, op:str) = c_op_stack_push + str op_stack_pop(stack:int) = c_op_stack_pop + + stack:int + values:str + next_is_unary:bool + + token WS / space / + literal `+ `- + literal `( `) `+# `-# `! `~ `* `/ `% `#+ `#- `<< `>> `< `> `<= `>= `== `!= `& `^ `| `&& `|| `? `: + literal `. `[ `] + literal `sizeof + + def unary_unambi + [`!] | [`~] + + def binary_unambi + [`.] | [`*] | [`/] | [`%] | [`<<] | [`>>] | [`<] | [`>] | [`<=] | [`>=] | [`==] | [`!=] | [`&] | [`^] | [`|] | [`&&] | [`||] + + def ternary + [`:] + + context reducer + int modulo(a:int, b:int) = c_modulo + int bitnot(a:int) = c_bitnot + int bitand(a:int, b:int) = c_bitand + int bitor(a:int, b:int) = c_bitor + int bitxor(a:int, b:int) = c_bitxor + int shiftl(a:int, b:int) = c_shiftl + int shiftr(a:int, b:int) = c_shiftr + int subscript(a:str, b:int) = c_subscript + + def builtin + value:value + [`sizeof `( string::type `)] { lhs.value = parse value[$r3.length] } + + def value + rtype:int + [builtin:builtin] { lhs = r1.value } + | [number:number::type] { lhs.rtype = RTYPE_NUMBER } + | [string:string::type] { lhs.rtype = RTYPE_STRING } + | [reference:reference::type] + + def unary + [`+#] | [`-#] | [unary_unambi] + + def binary + [`#+] | [`#-] | [binary_unambi] + + def anynary + [unary] | [binary] | [ternary] + + def numop + value:value + [number::type WS `-#] { lhs.value = parse value[$(r1.value - (r1.value * 2))] } + | [number::type WS `+#] { lhs.value = parse value[$r1.value] } + | [number::type WS `!] { r:int = 0 if (r1.value == 0) r = 1 lhs.value = parse value[$r] } + | [number::type WS `~] { lhs.value = parse value[$bitnot(r1.value)] } + | [number::type WS number::type WS `*] { lhs.value = parse value[$(r1.value * r3.value)] } + | [number::type WS number::type WS `/] { lhs.value = parse value[$(r1.value / r3.value)] } + | [number::type WS number::type WS `#+] { lhs.value = parse value[$(r1.value + r3.value)] } + | [number::type WS number::type WS `#-] { lhs.value = parse value[$(r1.value - r3.value)] } + | [number::type WS number::type WS `<<] { lhs.value = parse value[$shiftl(r1.value, r3.value)] } + | [number::type WS number::type WS `>>] { lhs.value = parse value[$shiftr(r1.value, r3.value)] } + | [number::type WS number::type WS `<] { r:int = 0 if (r1.value < r3.value) r = 1 lhs.value = parse value[$r] } + | [number::type WS number::type WS `>] { r:int = 0 if (r1.value > r3.value) r = 1 lhs.value = parse value[$r] } + | [number::type WS number::type WS `<=] { r:int = 0 if (r1.value <= r3.value) r = 1 lhs.value = parse value[$r] } + | [number::type WS number::type WS `>=] { r:int = 0 if (r1.value >= r3.value) r = 1 lhs.value = parse value[$r] } + | [number::type WS number::type WS `==] { r:int = 0 if (r1.value == r3.value) r = 1 lhs.value = parse value[$r] } + | [number::type WS number::type WS `!=] { r:int = 0 if (r1.value != r3.value) r = 1 lhs.value = parse value[$r] } + | [number::type WS number::type WS `&] { lhs.value = parse value[$bitand(r1.value, r3.value)] } + | [number::type WS number::type WS `^] { lhs.value = parse value[$bitxor(r1.value, r3.value)] } + | [number::type WS number::type WS `|] { lhs.value = parse value[$bitor(r1.value, r3.value)] } + | [number::type WS number::type WS `&&] { r:int = 0 if (r1.value && r3.value) r = 1 lhs.value = parse value[$r] } + | [number::type WS number::type WS `||] { r:int = 0 if (r1.value || r3.value) r = 1 lhs.value = parse value[$r] } + | [number::type WS number::type WS number::type WS `:] { if (r1.value) lhs.value = parse value[$r3] else lhs.value = parse value[$r5] } + | [number::type WS value WS `]] commit { reject } + + # strings can only be operated with `!= and `== against other strings + def stringop + value:value + [string::type WS string::type WS `==] commit { r:int = 0 if (r1.raw == r3.raw) r = 1 lhs.value = parse value[$r] } + | [string::type WS string::type WS `!=] commit { r:int = 0 if (r1.raw != r3.raw) r = 1 lhs.value = parse value[$r] } + | [number::type WS string::type WS string::type WS `:] { if (r1.value) lhs.value = parse value[$r3] else lhs.value = parse value[$r5] } + | [string::type WS unary] commit { reject } # <unary> str + | [string::type WS number::type WS binary] commit { reject } # str <binary> num + | [number::type WS string::type WS binary] commit { reject } # num <binary> str + | [string::type WS string::type WS binary] { reject } # str <math> str + | [value WS number::type WS string::type WS ternary] commit { reject } # (v ? num : str) + | [value WS string::type WS number::type WS ternary] commit { reject } # (v ? str : num) + | [string::type WS value WS value WS ternary] commit { reject } # (str ? v : v) + | [string::type WS number::type WS `]] { + if (r1.length <= r3.value) { + print('subscript out of bounds\n') + reject + } else { + lhs.value = parse value[$subscript($r1.raw, r3.value)] + } + } + + def valueop + rtype:int + [value WS value WS `]] { lhs.rtype = RTYPE_NUMBER } + | [value WS unary] { lhs.rtype = RTYPE_NUMBER } + | [value WS value WS binary] { lhs.rtype = RTYPE_NUMBER } + | [value WS value WS value WS ternary] { if (r3.rtype != r5.rtype) reject lhs.rtype = r1.rtype } + + def operation + rtype:int + [numop] { lhs = parse operation[$r1.value] } + | [stringop] { lhs = parse operation[$r1.value] } + | [valueop] { lhs.rtype = r1.rtype } + | [value] { lhs.rtype = r1.rtype } + | [value WS] { lhs.rtype = r1.rtype } + | [operation WS] { lhs.rtype = r1.rtype } + | [operation anynary] { lhs.rtype = r1.rtype } + + def collapsed + value:value + [operation+] commit { + # we check return type of every operation to make sure we don't operate on different types + rtype:int = RTYPE_UNKNOWN + for i:operation in repeat(r1) { + if (i.rtype != RTYPE_UNKNOWN && rtype != RTYPE_UNKNOWN && i.rtype != rtype) + reject + rtype = i.rtype + } + lhs.value = parse value[$lhs] + } + end + + def operator + precedence:int + rassoc:bool + open:str + close:str + args:int + [`[] { lhs.precedence = 0 lhs.rassoc = false lhs.args = 0 lhs.open = ']' } + | [`]] { lhs.precedence = 0 lhs.rassoc = false lhs.args = 2 lhs.close = '[' } + | [`(] { lhs.precedence = 0 lhs.rassoc = false lhs.args = 0 lhs.open = ')' } + | [`)] { lhs.precedence = 0 lhs.rassoc = false lhs.args = 0 lhs.close = '(' } + | [`.] { lhs.precedence = 0 lhs.rassoc = false lhs.args = 2 } + | [`+#] { lhs.precedence = 1 lhs.rassoc = true lhs.args = 1 } + | [`-#] { lhs.precedence = 1 lhs.rassoc = true lhs.args = 1 } + | [`!] { lhs.precedence = 1 lhs.rassoc = true lhs.args = 1 } + | [`~] { lhs.precedence = 1 lhs.rassoc = true lhs.args = 1 } + | [`*] { lhs.precedence = 2 lhs.rassoc = false lhs.args = 2 } + | [`/] { lhs.precedence = 2 lhs.rassoc = false lhs.args = 2 } + | [`%] { lhs.precedence = 2 lhs.rassoc = false lhs.args = 2 } + | [`#+] { lhs.precedence = 3 lhs.rassoc = false lhs.args = 2 } + | [`#-] { lhs.precedence = 3 lhs.rassoc = false lhs.args = 2 } + | [`<<] { lhs.precedence = 4 lhs.rassoc = false lhs.args = 2 } + | [`>>] { lhs.precedence = 4 lhs.rassoc = false lhs.args = 2 } + | [`<] { lhs.precedence = 5 lhs.rassoc = false lhs.args = 2 } + | [`>] { lhs.precedence = 5 lhs.rassoc = false lhs.args = 2 } + | [`<=] { lhs.precedence = 5 lhs.rassoc = false lhs.args = 2 } + | [`>=] { lhs.precedence = 5 lhs.rassoc = false lhs.args = 2 } + | [`==] { lhs.precedence = 6 lhs.rassoc = false lhs.args = 2 } + | [`!=] { lhs.precedence = 6 lhs.rassoc = false lhs.args = 2 } + | [`&] { lhs.precedence = 7 lhs.rassoc = false lhs.args = 2 } + | [`^] { lhs.precedence = 8 lhs.rassoc = false lhs.args = 2 } + | [`|] { lhs.precedence = 9 lhs.rassoc = false lhs.args = 2 } + | [`&&] { lhs.precedence = 10 lhs.rassoc = false lhs.args = 2 } + | [`||] { lhs.precedence = 11 lhs.rassoc = false lhs.args = 2 } + | [`?] { lhs.precedence = 12 lhs.rassoc = true lhs.args = 0 lhs.open = ':' } + | [`:] { lhs.precedence = 12 lhs.rassoc = true lhs.args = 3 } + + void + operate(op:operator) + { + if (!op.args) + return 0 + + s:str = values + $op + # print('collapse: ', s, ' -> ') + r:reducer::collapsed = parse reducer::collapsed[s] + + if (!r) { + reject + } else { + # print(^r, '\n') + values = $r + ' ' + } + } + + void + flush_all() + { + while (op_stack_top(stack)) + operate(parse operator[op_stack_pop(stack)]) + } + + void + flush_until(name:str) + { + while (op_stack_top(stack) && op_stack_top(stack) != name) + operate(parse operator[op_stack_pop(stack)]) + } + + void + flush_ordered(name:str) + { + op:operator = parse operator[name] + + top:operator + if (op_stack_top(stack)) top = parse operator[op_stack_top(stack)] + while (top && (top.precedence < op.precedence || (top.precedence == op.precedence && !top.rassoc)) && !top.open) { + operate(parse operator[op_stack_pop(stack)]) + if (op_stack_top(stack)) top = parse operator[op_stack_top(stack)] else top = nil + } + + if (op.close) + flush_until(op.close) + + next_is_unary = !op.close + } + + void + stack_op(name:str) + { + flush_ordered(name) + # print('push op: ', name, '\n') + op_stack_push(stack, name) + } + + void + stack_value(value:str) + { + # print('push value: ', value, '\n') + values = values + value + ' ' + next_is_unary = false + } + + def value + [reducer::builtin] | [number::unsigned::type] | [string::type] | [reference::type] + + def ambiguous + [`+] | [`-] + + def unambiguous + [unary_unambi] | [binary_unambi] + + def binary + [ambiguous] | [binary_unambi] + + def otherops + op:str + [ambiguous] { if (next_is_unary) lhs.op = $r1 + '#' else lhs.op = '#' + $r1 } + | [unambiguous] { lhs.op = $r1 } + + def lsquare + [`[] { stack_op($lhs) } + + def rsquare + [`]] { stack_op($lhs) } + + def lparen + [`(] { stack_op($lhs) } + + def rparen + [`)] { stack_op($lhs) } + + def question + [`?] { stack_op($lhs) } + + def colon + [`:] { stack_op($lhs) } + + def constant + [number::unsigned::type] | [string::type] + + def tok#en + [value WS+ value] commit { reject } + | [binary WS* binary] commit { reject } + | [constant WS* `(] commit { reject } + | [`) WS* value] commit { reject } + | [`] WS* value] commit { reject } + | [lparen tok+ rparen] commit + | [lsquare WS* rsquare] commit + | [tok+ question tok+ colon tok+] commit + | [otherops] { stack_op(r1.op) } + | [value] { stack_value($r1) } + | [WS] { lhs = nil } + + def collapsed + result:reducer::collapsed + [tok*] commit { flush_all() lhs.result = parse reducer::collapsed[values] if (!lhs.result) reject } + + collapsed + collapse(s:stream) + { + c:collapser = new collapser() + c->stack = op_stack_new() + c->values = '' + c->next_is_unary = true + parse r:collapsed(c)[s] + op_stack_free(c->stack) + return r + } + + collapsed + collapsestr(s:str) + { + c:collapser = new collapser() + c->stack = op_stack_new() + c->values = '' + c->next_is_unary = true + parse r:collapsed(c)[s] + op_stack_free(c->stack) + return r + } +end + +# r:collapser::collapsed = collapser::collapse(stdin) +# if (r) { +# print($r.result, '\n') +# } else { +# print('invalid expression\n') +# } diff --git a/src/compiler/types.lm b/src/compiler/types.lm new file mode 100644 index 0000000..34a9026 --- /dev/null +++ b/src/compiler/types.lm @@ -0,0 +1,55 @@ +context number + context unsigned + literal `true `false + token OCT / '0'[0-7]+ / + token DEC / [0-9]+ / + token HEX / '0x' xdigit+ / + int strtoull(a:str, b:int) = c_strtoull + + def type + value:int + [`false] { lhs.value = 0 } + | [`true] { lhs.value = 1 } + | [OCT] { lhs.value = strtoull($r1, 8) } + | [DEC] { lhs.value = strtoull($r1, 10) } + | [HEX] { lhs.value = strtoull($r1, 16) } + end + + lex + ignore / space+ / + literal `+ `- + end + + def type + value:int + [unsigned::type] { lhs.value = r1.value } + | [`- type] { lhs.value = r2.value - (r2.value * 2) } + | [`+ type] { lhs.value = r2.value } +end + +context string + rl ESC / '\\' / + token ESC_CHR / ESC [abfnrtv\\'"e] / + token ESC_HEX / ESC 'x' xdigit{2} / + token ESC_OCT / ESC [0-7]{1,3} / + token CHAR / ^cntrl - ['"] - ESC / + literal `' `" + + def raw + [ESC_CHR] # TODO: how to output raw bytes? + | [ESC_HEX] # TODO: how to output raw bytes? + | [ESC_OCT] # TODO: how to output raw bytes? + | [CHAR] + + def type + length:int + [`' raw:raw* `'] { i:int = 0 for s:raw in r2 i = i + 1 lhs.length = i } + | [`" raw:raw* `"] { i:int = 0 for s:raw in r2 i = i + 1 lhs.length = i } +end + +context name + token NAME / [a-zA-Z_][a-zA-Z_0-9]* / + + def type + [NAME] +end diff --git a/src/fspec/bcode.c b/src/fspec/bcode.c deleted file mode 100644 index 70e4b08..0000000 --- a/src/fspec/bcode.c +++ /dev/null @@ -1,199 +0,0 @@ -#include <fspec/bcode.h> -#include <fspec/memory.h> -#include "private/bcode-types.h" - -#include <stdlib.h> -#include <string.h> -#include <assert.h> -#include <err.h> - -static_assert(sizeof(fspec_off) <= sizeof(((struct fspec_mem*)0)->len), "fspec_off should not be larger than what fspec_mem can represent"); -static_assert(sizeof(enum fspec_op) == sizeof(uint8_t), "enum fspec_op is expected to have size of uint8_t"); -static_assert(sizeof(struct fspec_bcode) == sizeof(enum fspec_op), "struct fspec_bcode is expected to have size of enum fspec_op"); -static_assert(sizeof(FSPEC_OP_LAST) <= 8, "op codes need more than 3 bits to be represented"); - -#if 0 -uint8_t -fspec_op_get_num_args(const struct fspec_op_code *code) -{ - return code->op >> 2; -} - -static fspec_off -arg_data_len(const enum fspec_arg *arg) -{ - assert(arg); - - switch (*arg) { - case FSPEC_ARG_NUM: - return sizeof(fspec_num); - - case FSPEC_ARG_VAR: - return sizeof(fspec_var); - - case FSPEC_ARG_STR: - case FSPEC_ARG_OFF: - return sizeof(fspec_off); - - case FSPEC_ARG_DAT: - { - struct fspec_mem mem; - fspec_arg_get_mem(arg, NULL, &mem); - return sizeof(fspec_off) + mem.len; - } - - case FSPEC_ARG_EOF: - break; - - case FSPEC_ARG_LAST: - errx(EXIT_FAILURE, "%s: unexpected argument type %u", __func__, *arg); - break; - } - - return 0; -} - -static fspec_off -arg_len(const enum fspec_arg *arg) -{ - return sizeof(*arg) + arg_data_len(arg); -} - -void -fspec_arg_get_mem(const enum fspec_arg *arg, const void *data, struct fspec_mem *out_mem) -{ - assert(arg && out_mem); - - switch (*arg) { - case FSPEC_ARG_STR: - { - assert(data); - fspec_off off; - fspec_strsz len; - memcpy(&off, (char*)arg + sizeof(*arg), sizeof(off)); - memcpy(&len, (char*)data + off, sizeof(len)); - out_mem->data = (char*)data + off + sizeof(len); - out_mem->len = len; - } - break; - - case FSPEC_ARG_DAT: - { - fspec_off len; - memcpy(&len, (char*)arg + sizeof(*arg), sizeof(len)); - out_mem->data = (char*)arg + sizeof(*arg) + sizeof(len); - out_mem->len = len; - } - break; - - case FSPEC_ARG_VAR: - case FSPEC_ARG_NUM: - case FSPEC_ARG_OFF: - out_mem->data = (char*)arg + sizeof(*arg); - out_mem->len = arg_data_len(arg); - break; - - case FSPEC_ARG_EOF: - *out_mem = (struct fspec_mem){0}; - break; - - case FSPEC_ARG_LAST: - errx(EXIT_FAILURE, "%s: unexpected argument type %u", __func__, *arg); - break; - } -} - -fspec_num -fspec_arg_get_num(const enum fspec_arg *arg) -{ - assert(arg && *arg < FSPEC_ARG_LAST); - fspec_num v; - switch (*arg) { - case FSPEC_ARG_NUM: - memcpy(&v, arg + sizeof(*arg), sizeof(v)); - break; - - case FSPEC_ARG_VAR: - { - fspec_var var; - memcpy(&var, arg + sizeof(*arg), sizeof(var)); - v = var; - } - break; - - case FSPEC_ARG_DAT: - case FSPEC_ARG_OFF: - { - fspec_off off; - memcpy(&off, arg + sizeof(*arg), sizeof(off)); - v = off; - } - break; - - case FSPEC_ARG_STR: - case FSPEC_ARG_EOF: - case FSPEC_ARG_LAST: - errx(EXIT_FAILURE, "%s: unexpected argument type %u", __func__, *arg); - break; - } - return v; -} - -const char* -fspec_arg_get_cstr(const enum fspec_arg *arg, const void *data) -{ - assert(arg && *arg == FSPEC_ARG_STR); - struct fspec_mem mem; - fspec_arg_get_mem(arg, data, &mem); - return (const char*)mem.data; -} - -const enum fspec_arg* -fspec_op_get_arg(const enum fspec_op *start, const void *end, const uint8_t nth, const uint32_t expect) -{ - uint8_t i = 0; - const enum fspec_arg *arg = NULL; - for (const enum fspec_op *op = fspec_op_next(start, end, false); op && i < nth; op = fspec_op_next(op, end, false)) { - if (*op != FSPEC_OP_ARG) - return NULL; - - arg = (void*)(op + 1); - assert(*arg >= 0 && *arg < FSPEC_ARG_LAST); - ++i; - } - - if (arg && !(expect & (1<<*arg))) - errx(EXIT_FAILURE, "got unexpected argument of type %u", *arg); - - return arg; -} - -const enum fspec_arg* -fspec_arg_next(const enum fspec_arg *arg, const void *end, const uint8_t nth, const uint32_t expect) -{ - return fspec_op_get_arg((void*)(arg - 1), end, nth, expect); -} - -const enum fspec_op* -fspec_op_next(const enum fspec_op *start, const void *end, const bool skip_args) -{ - assert(start && end); - fspec_off off = sizeof(*start); - if ((void*)start < end && *start == FSPEC_OP_ARG) - off += arg_len((void*)(start + 1)); - - for (const enum fspec_op *op = start + off; (void*)start < end && (void*)op < end; ++op) { - if (*op >= FSPEC_OP_LAST) - errx(EXIT_FAILURE, "got unexected opcode %u", *op); - - if (skip_args && *op == FSPEC_OP_ARG) { - op += arg_len((void*)(op + 1)); - continue; - } - - return op; - } - - return NULL; -} -#endif diff --git a/src/fspec/bcode.h b/src/fspec/bcode.h deleted file mode 100644 index 3d216af..0000000 --- a/src/fspec/bcode.h +++ /dev/null @@ -1,92 +0,0 @@ -#pragma once - -#include <inttypes.h> -#include <stdint.h> -#include <stdbool.h> - -/** maximum range of numbers */ -#define PRI_FSPEC_NUM PRIu64 -typedef uint64_t fspec_num; - -enum fspec_visual { - FSPEC_VISUAL_NUL, - FSPEC_VISUAL_DEC, - FSPEC_VISUAL_HEX, - FSPEC_VISUAL_STR, - FSPEC_VISUAL_LAST, -} __attribute__((packed)); - -enum fspec_type { - FSPEC_TYPE_CODE, - FSPEC_TYPE_CALL, - FSPEC_TYPE_U8, - FSPEC_TYPE_S8, - FSPEC_TYPE_U16, - FSPEC_TYPE_S16, - FSPEC_TYPE_U32, - FSPEC_TYPE_S32, - FSPEC_TYPE_U64, - FSPEC_TYPE_S64, - FSPEC_TYPE_LAST, -} __attribute__((packed)); - -enum fspec_storage { - FSPEC_STORAGE_DATA, - FSPEC_STORAGE_LOCAL, - FSPEC_STORAGE_LAST, -} __attribute__((packed)); - -enum fspec_builtin { - FSPEC_BUILTIN_ADD, - FSPEC_BUILTIN_SUB, - FSPEC_BUILTIN_MUL, - FSPEC_BUILTIN_DIV, - FSPEC_BUILTIN_MOD, - FSPEC_BUILTIN_BIT_AND, - FSPEC_BUILTIN_BIT_OR, - FSPEC_BUILTIN_BIT_XOR, - FSPEC_BUILTIN_BIT_LEFT, - FSPEC_BUILTIN_BIT_RIGHT, - FSPEC_BUILTIN_DECLARE, - FSPEC_BUILTIN_READ, - FSPEC_BUILTIN_FILTER, - FSPEC_BUILTIN_VISUAL, - FSPEC_BUILTIN_LAST, -} __attribute__((packed)); - -enum fspec_op { - FSPEC_OP_BUILTIN, - FSPEC_OP_PUSH, - FSPEC_OP_POP, - FSPEC_OP_VAR, - FSPEC_OP_LAST, -} __attribute__((packed)); - -struct fspec_bcode { - char op, data[]; -} __attribute__((packed)); - -#if 0 -('fspc')(version) -OP_BUILTIN (declare) OP_PUSH OP_VAR8 (storage) OP_VAR8 (type) OP_VAR [name] OP_POP -OP_BUILTIN (filter) -OP_FUN FUN_ASSIGN VAR0 VAR [data] -OP_FUN FUN_READ -#endif - -#if 0 -uint8_t -fspec_op_get_num_args(const struct fspec_bcode *code); - -const struct fspec_bcode* -fspec_op_next(const struct fspec_bcode *code, const void *end, const bool skip_args); - -const struct fspec_bcode* -fspec_op_get_arg(const struct fspec_bcode *code, const void *end, const uint8_t nth, const uint32_t expect); - -const struct fspec_arg* -fspec_arg_next(const struct fspec_bcode *code, const void *end, const uint8_t nth, const uint32_t expect); - -fspec_num -fspec_ref_get_num(const struct fspec_bcode *code); -#endif diff --git a/src/fspec/lexer.h b/src/fspec/lexer.h deleted file mode 100644 index ef6e059..0000000 --- a/src/fspec/lexer.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#include <fspec/memory.h> - -#include <stdbool.h> - -enum fspec_lexer_section { - FSPEC_SECTION_DATA, - FSPEC_SECTION_CODE, -}; - -struct fspec_lexer; -struct fspec_lexer { - struct { - size_t (*read)(struct fspec_lexer *lexer, void *input, const size_t size, const size_t nmemb); - size_t (*write)(struct fspec_lexer *lexer, const enum fspec_lexer_section section, const void *output, const size_t size, const size_t nmemb); - } ops; - - struct { - struct fspec_mem input; - } mem; -}; - -bool -fspec_lexer_parse(struct fspec_lexer *lexer, const char *name); diff --git a/src/fspec/memory.h b/src/fspec/memory.h deleted file mode 100644 index 768415a..0000000 --- a/src/fspec/memory.h +++ /dev/null @@ -1,8 +0,0 @@ -#pragma once - -#include <stddef.h> - -struct fspec_mem { - void *data; - size_t len; -}; diff --git a/src/fspec/private/bcode-types.h b/src/fspec/private/bcode-types.h deleted file mode 100644 index 8c9ce74..0000000 --- a/src/fspec/private/bcode-types.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once - -#include <inttypes.h> -#include <stdint.h> - -/** maximum size of string literals */ -#define PRI_FSPEC_STRSZ PRIu8 -typedef uint8_t fspec_strsz; - -/** maximum range of variable ids */ -#define PRI_FSPEC_VAR PRIu16 -typedef uint16_t fspec_var; - -/** maximum range of bytecode offsets */ -#define PRI_FSPEC_OFF PRIu32 -typedef uint32_t fspec_off; diff --git a/src/fspec/ragel/lexer-expr.h b/src/fspec/ragel/lexer-expr.h deleted file mode 100644 index 904736d..0000000 --- a/src/fspec/ragel/lexer-expr.h +++ /dev/null @@ -1,20 +0,0 @@ -#pragma once - -#include <fspec/memory.h> - -#include <stdbool.h> - -struct fspec_expr; -struct fspec_expr { - struct { - size_t (*read)(struct fspec_expr *lexer, void *input, const size_t size, const size_t nmemb); - size_t (*write)(struct fspec_expr *lexer, const void *output, const size_t size, const size_t nmemb); - } ops; - - struct { - struct fspec_mem input; - } mem; -}; - -bool -fspec_expr_parse(struct fspec_expr *lexer, const char *name); diff --git a/src/fspec/ragel/lexer-expr.rl b/src/fspec/ragel/lexer-expr.rl deleted file mode 100644 index 0f1f83d..0000000 --- a/src/fspec/ragel/lexer-expr.rl +++ /dev/null @@ -1,118 +0,0 @@ -#include "lexer-expr.h" -#include "lexer-stack.h" -#include "util/ragel/ragel.h" - -#include <stdlib.h> -#include <stdio.h> -#include <assert.h> -#include <err.h> - -static uint8_t -precedence(char op) -{ - switch (op) { - case '^': return 4; - case '*': return 3; - case '/': return 3; - case '+': return 2; - case '-': return 2; - } - errx(EXIT_FAILURE, "unknown operator %c for precedence", op); - return 0; -} - -static size_t -pop(char cur, char *mstack, size_t open) -{ - static char cvar = 'a'; - - // 1 + 2 + 4 + 3 * 2 / 2 * 2 * 2 - 2 * 2 + 5; - while (open >= 3) { - const char last_op = mstack[open - 2]; - const uint8_t last_prio = precedence(last_op); - const uint8_t new_prio = precedence(cur); - - if (last_prio <= new_prio) - break; - - printf("%c = ", cvar); - for (size_t i = open - 3; i < open; ++i) - printf("%c ", mstack[i]); - puts(";"); - open -= 3; - - mstack[open++] = cvar; - ++cvar; - } - - return open; -} - -%%{ - machine fspec_expr; - include fspec_stack "lexer-stack.rl"; - variable p ragel.p; - variable pe ragel.pe; - variable eof ragel.eof; - write data noerror nofinal; - - action op { - open = pop(fc, mstack, open); - mstack[open++] = fc; - } - - logical_operators = '&&' | '||' | '==' | '<' | '>' | '<=' | '>='; - calc_operators = '-' | '+' | '/' | '*' | '%'; - bitwise_operators = '&' | '|' | '^' | '<<' | '>>'; - - main := |* - calc_operators => op; - stack_num => { mstack[open++] = fc;}; - '(' => { }; - ')' => { }; - ' '; - ';' => { - printf("v = "); - for (size_t i = 0; i < open; ++i) - printf("%c ", mstack[i]); - puts(";"); - }; - *|; -}%% - - -bool -fspec_expr_parse(struct fspec_expr *expr, const char *name) -{ - int cs, act; - const char *ts, *te; - (void)ts; - - size_t open = 0; - char mstack[25]; - - %% write init; - - (void)fspec_expr_en_main; - assert(expr); - assert(expr->ops.read); - assert(expr->ops.write); - assert(expr->mem.input.data && expr->mem.input.len); - assert(expr->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range"); - - char var[256]; - struct stack stack = { .var.buf.mem = { .data = var, .len = sizeof(var) } }; - struct ragel ragel = { .name = name, .lineno = 1 }; - - // static const fspec_num version = 0; - - struct fspec_mem input = expr->mem.input; - for (bool eof = false; !ragel.error && !eof;) { - const size_t bytes = expr->ops.read(expr, input.data, 1, input.len); - const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes }; - ragel_feed_input(&ragel, (eof = (bytes < input.len)), &rl); - %% write exec; - } - - return !ragel.error; -} diff --git a/src/fspec/ragel/lexer-stack.h b/src/fspec/ragel/lexer-stack.h deleted file mode 100644 index eebf055..0000000 --- a/src/fspec/ragel/lexer-stack.h +++ /dev/null @@ -1,42 +0,0 @@ -#pragma once - -#include "util/membuf.h" - -#include <stdint.h> - -struct varbuf { - struct membuf buf; - size_t offset; -}; - -void -varbuf_begin(struct varbuf *var); - -void -varbuf_reset(struct varbuf *var); - -void -varbuf_remove_last(struct varbuf *var); - -struct stack { - struct varbuf var; - - union { - struct fspec_mem str; - uint64_t num; - }; - - enum stack_type { - STACK_STR, - STACK_NUM, - } type; -}; - -void -stack_num(struct stack *stack, const uint8_t base); - -const struct fspec_mem* -stack_get_str(const struct stack *stack); - -uint64_t -stack_get_num(const struct stack *stack); diff --git a/src/fspec/ragel/lexer-stack.rl b/src/fspec/ragel/lexer-stack.rl deleted file mode 100644 index 940f820..0000000 --- a/src/fspec/ragel/lexer-stack.rl +++ /dev/null @@ -1,153 +0,0 @@ -#include "lexer-stack.h" - -#include <stdlib.h> -#include <assert.h> -#include <err.h> - -void -varbuf_begin(struct varbuf *var) -{ - assert(var); - var->offset = var->buf.written; - assert(var->offset <= var->buf.mem.len); -} - -void -varbuf_reset(struct varbuf *var) -{ - assert(var); - var->offset = var->buf.written = 0; -} - -void -varbuf_remove_last(struct varbuf *var) -{ - assert(var); - assert(var->buf.written >= var->offset); - const size_t size = var->buf.written - var->offset; - assert(var->buf.written >= size); - var->buf.written -= size; - assert(var->buf.written <= var->buf.mem.len); -} - -static void -stack_check_type(const struct stack *stack, const enum stack_type type) -{ - assert(stack); - - if (stack->type == type) - return; - - const char *got = (type == STACK_STR ? "str" : "num"), *expected = (stack->type == STACK_STR ? "str" : "num"); - errx(EXIT_FAILURE, "tried to get '%s' from stack, but the last pushed type was '%s'", got, expected); -} - -void -stack_num(struct stack *stack, const uint8_t base) -{ - assert(stack); - membuf_terminate(&stack->var.buf, (char[]){ 0 }, 1); - const char *str = (char*)stack->var.buf.mem.data + stack->var.offset; - stack->type = STACK_NUM; - stack->num = strtoll(str, NULL, base); - varbuf_remove_last(&stack->var); -} - -const struct fspec_mem* -stack_get_str(const struct stack *stack) -{ - stack_check_type(stack, STACK_STR); - return &stack->str; -} - -uint64_t -stack_get_num(const struct stack *stack) -{ - stack_check_type(stack, STACK_NUM); - return stack->num; -} - -%%{ - machine fspec_stack; - - action stack_oct { - stack_num(&stack, 8); - } - - action stack_hex { - stack_num(&stack, 16); - } - - action stack_dec { - stack_num(&stack, 10); - } - - action stack_str { - membuf_terminate(&stack.var.buf, (char[]){ 0 }, 1); - stack.type = STACK_STR; - stack.str = stack.var.buf.mem; - stack.str.len = stack.var.buf.written; - } - - action store_esc_num { - const fspec_num v = stack_get_num(&stack); - assert(v <= 255); - membuf_append(&stack.var.buf, (uint8_t[]){ v }, sizeof(uint8_t)); - } - - action store_esc { - const struct { const char e, v; } map[] = { - { .e = 'a', .v = '\a' }, - { .e = 'b', .v = '\b' }, - { .e = 'f', .v = '\f' }, - { .e = 'n', .v = '\n' }, - { .e = 'r', .v = '\r' }, - { .e = 't', .v = '\t' }, - { .e = 'v', .v = '\v' }, - { .e = '\\', .v = '\\' }, - { .e = '\'', .v = '\'' }, - { .e = '\"', .v = '"' }, - { .e = 'e', .v = 0x1B }, - }; - - for (size_t i = 0; i < sizeof(map) / sizeof(map[0]); ++i) { - if (fc != map[i].e) - continue; - - membuf_append(&stack.var.buf, &map[i].v, sizeof(map[i].v)); - break; - } - } - - action store { - membuf_append(&stack.var.buf, fpc, 1); - } - - action begin_num { - varbuf_begin(&stack.var); - } - - action begin_str { - varbuf_reset(&stack.var); - } - - # Semantic - quote = ['"]; - esc = [abfnrtv\\'"e]; - esc_chr = '\\'; - esc_hex = 'x' <: xdigit{2}; - hex = '0x' <: xdigit{1,}; - oct = [0-7]{1,3}; - dec = [\-+]? <: (([1-9] <: digit*) | '0'); - name = ((alpha | '_') <: (alnum | '_')*); - - # Stack - stack_name = name >begin_str $store %stack_str; - stack_hex = hex >begin_num $store %stack_hex; - stack_dec = dec >begin_num $store %stack_dec; - stack_oct = oct >begin_num $store %stack_oct; - stack_esc_hex = esc_hex >begin_num <>*store %stack_hex; - stack_esc = esc_chr <: ((stack_esc_hex | stack_oct) %store_esc_num | esc %~store_esc); - stack_str = quote <: ((stack_esc? <: print? $store) - zlen)* >begin_str %stack_str :>> quote; - stack_num = stack_dec | stack_hex; -}%% diff --git a/src/fspec/ragel/lexer.rl b/src/fspec/ragel/lexer.rl deleted file mode 100644 index 8354bc0..0000000 --- a/src/fspec/ragel/lexer.rl +++ /dev/null @@ -1,180 +0,0 @@ -#include <fspec/lexer.h> -#include <fspec/bcode.h> -#include "lexer-stack.h" -#include "util/ragel/ragel.h" -#include "fspec/private/bcode-types.h" - -#include <assert.h> - -%%{ - machine fspec_lexer; - include fspec_stack "lexer-stack.rl"; - variable p ragel.p; - variable pe ragel.pe; - variable eof ragel.eof; - write data noerror nofinal; - - action arg_eof { - // codebuf_append_arg(&state.out, FSPEC_ARG_EOF, NULL); - } - - action arg_num { - // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ stack_get_num(&state.stack) }); - } - - action arg_str { - // const struct fspec_mem *str = stack_get_str(&state.stack); - // codebuf_append_arg_cstr(&state.out, str->data, str->len); - } - - action arg_var { - // state_append_arg_var(&state, true, stack_get_str(&state.stack)); - } - - action filter { - // codebuf_append_op(&state.out, FSPEC_OP_FILTER); - } - - action goto { - // codebuf_append_op(&state.out, FSPEC_OP_GOTO); - // state_append_arg_var(&state, false, stack_get_str(&state.stack)); - } - - action vnul { - // codebuf_append_op(&state.out, FSPEC_OP_VISUAL); - // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_NUL }); - } - - action vdec { - // codebuf_append_op(&state.out, FSPEC_OP_VISUAL); - // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_DEC }); - } - - action vhex { - // codebuf_append_op(&state.out, FSPEC_OP_VISUAL); - // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_HEX }); - } - - action vstr { - // codebuf_append_op(&state.out, FSPEC_OP_VISUAL); - // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_STR }); - } - - action r8 { - // codebuf_append_op(&state.out, FSPEC_OP_READ); - // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 8 }); - } - - action r16 { - // codebuf_append_op(&state.out, FSPEC_OP_READ); - // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 16 }); - } - - action r32 { - // codebuf_append_op(&state.out, FSPEC_OP_READ); - // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 32 }); - } - - action r64 { - // codebuf_append_op(&state.out, FSPEC_OP_READ); - // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 64 }); - } - - action enum_member_end { - } - - action enum_member_start { - } - - action enum_end { - } - - action enum_start { - } - - action struct_member_end { - // state_finish_declaration(&state, FSPEC_DECLARATION_MEMBER); - } - - action struct_member_start { - // state_append_declaration(&state, FSPEC_DECLARATION_MEMBER, stack_get_str(&state.stack)); - } - - action struct_end { - // state_finish_declaration(&state, FSPEC_DECLARATION_STRUCT); - } - - action struct_start { - // state_append_declaration(&state, FSPEC_DECLARATION_STRUCT, stack_get_str(&state.stack)); - } - - action type_err { - ragel_throw_error(&ragel, "unknown type name"); - } - - action visual_err { - ragel_throw_error(&ragel, "unknown visualization"); - } - - action syntax_err { - ragel_throw_error(&ragel, "malformed input (machine failed here or in next expression)"); - } - - action line { - ragel_advance_line(&ragel); - } - - # Semantic - newline = '\n'; - valid = ^cntrl; - comment = '//' <: valid* :>> newline; - type = ('u8' | 's8') %r8 | ('u16' | 's16') %r16 | ('u32' | 's32') %r32 | ('u64' | 's64') %r64; - visual = 'nul' %vnul | 'dec' %vdec | 'hex' %vhex | 'str' %vstr; - - # Catchers - catch_const_expr = stack_num %arg_num; - catch_struct = 'struct ' <: stack_name; - catch_enum = 'enum ' <: stack_name; - catch_type = (catch_struct %goto | type) $!type_err; - catch_args = stack_num %arg_num | stack_str %arg_str | stack_name %arg_var; - catch_array = '[' <: (catch_args | '$' %arg_eof) :>> ']'; - catch_filter = ' | ' %filter <: stack_name %arg_str :>> ('(' <: catch_args? <: (', ' <: catch_args)* :>> ')')?; - catch_visual = ' ' <: visual $!visual_err; - - # Abstract - struct_member = stack_name %struct_member_start :>> ': ' <: (catch_type <: catch_array* catch_filter* catch_visual?) :>> ';' %struct_member_end; - struct = catch_struct %struct_start :>> ' {' <: (space | comment | struct_member)* :>> '};' %struct_end; - enum_member = stack_name %enum_member_start :>> (': ' <: catch_const_expr)? :>> ';' %enum_member_end; - enum = catch_enum %enum_start :>> ' {' <: (space | comment | enum_member)* :>> '};' %enum_end; - line = valid* :>> newline %line; - main := ((space | comment | enum | struct)* & line*) $!syntax_err; -}%% - -bool -fspec_lexer_parse(struct fspec_lexer *lexer, const char *name) -{ - int cs; - %% write init; - - (void)fspec_lexer_en_main; - assert(lexer); - assert(lexer->ops.read); - assert(lexer->mem.input.data && lexer->mem.input.len); - assert(lexer->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range"); - - char var[256]; - struct stack stack = { .var.buf.mem = { .data = var, .len = sizeof(var) } }; - struct ragel ragel = { .name = name, .lineno = 1 }; - - // static const fspec_num version = 0; - - struct fspec_mem input = lexer->mem.input; - for (bool eof = false; !ragel.error && !eof;) { - const size_t bytes = lexer->ops.read(lexer, input.data, 1, input.len); - const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes }; - ragel_feed_input(&ragel, (eof = (bytes < input.len)), &rl); - %% write exec; - } - - return !ragel.error; -} diff --git a/src/fspec/ragel/validator.rl b/src/fspec/ragel/validator.rl deleted file mode 100644 index 90ead21..0000000 --- a/src/fspec/ragel/validator.rl +++ /dev/null @@ -1,96 +0,0 @@ -#include <fspec/bcode.h> -#include <fspec/validator.h> -#include "util/ragel/ragel.h" -#include "fspec/private/bcode-types.h" - -#include <assert.h> - -struct stack { - union { - fspec_num num; - fspec_off off; - fspec_var var; - fspec_strsz strsz; - unsigned char b[sizeof(fspec_num)]; - } u; - uint8_t i; // writing index for u.b -}; - -struct state { - struct ragel ragel; - struct stack stack; -}; - -%%{ - machine fspec_validator; - variable p state.ragel.p; - variable pe state.ragel.pe; - variable eof state.ragel.eof; - write data noerror nofinal; - -# BLT_HEADER = 0; -# BLT_ADD = 1; -# BLT_SUB = 2; -# BLT_MUL = 3; -# BLT_DIV = 4; -# BLT_MOD = 5; -# BLT_BIT_AND = 6; -# BLT_BIT_OR = 7; -# BLT_BIT_XOR = 8; -# BLT_BIT_LEFT = 9; -# BLT_BIT_RIGHT = 10; -# BLT_DECLARE = 11; -# BLT_READ = 12; -# BLT_GOTO = 13; -# BLT_FILTER = 14; -# BLT_VISUAL = 15; -# -# builtins = BLT_HEADER | -# BLT_ADD | BLT_SUB | BLT_MUL | BLT_DIV | BLT_MOD | -# BLT_BIT_AND | BLT_BIT_OR | BLT_BIT_XOR | BLT_BIT_LEFT | BLT_BIT_RIGHT -# BLT_DECLARE | BLT_READ | BLT_GOTO | BLT_FILTER | BLT_VISUAL; -# -# OP_ARG = 0; -# OP_REF = 1; -# OP_BLT = 2 OP_ARG builtins; -# OP_FUN = 3; -# -# arg_ops = OP_REF | OP_FUN | OP_BUILTIN OP_FUN -# -# BLT_DECLARE = OP_BUILTIN 10 OP_ARG 2 OP_REF OP_REF; -# BLT_READ = OP_BUILTIN 11 OP_ARG 1..255 OP_REF (arg_ops)*; -# -# pattern = ((BLT_READ | BLT_GOTO) BLT_FILTER* BLT_VISUAL?)* $!pattern_error; -# main := (BLT_HEADER <: BLT_DECLARE* <: pattern) %check_decls $advance $!syntax_error; - main := any*; -}%% - -bool -fspec_validator_parse(struct fspec_validator *validator, const char *name) -{ - int cs; - %% write init; - - (void)fspec_validator_en_main; - assert(validator); - assert(validator->ops.read); - assert(validator->mem.input.data && validator->mem.input.len); - assert(validator->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range"); - - struct state state = { - .ragel.name = name, - .ragel.lineno = 1, - }; - - static_assert(sizeof(state.stack.u) == sizeof(state.stack.u.b), "bytes doesn't represent the largest member in union"); - - struct fspec_mem input = validator->mem.input; - for (bool eof = false; !state.ragel.error && !eof;) { - const size_t bytes = validator->ops.read(validator, input.data, 1, input.len); - const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes, .binary = true }; - ragel_feed_input(&state.ragel, (eof = (bytes < input.len)), &rl); - %% write exec; - } - - return !state.ragel.error; -} diff --git a/src/fspec/validator.h b/src/fspec/validator.h deleted file mode 100644 index a20e98a..0000000 --- a/src/fspec/validator.h +++ /dev/null @@ -1,19 +0,0 @@ -#pragma once - -#include <fspec/memory.h> - -#include <stdbool.h> - -struct fspec_validator; -struct fspec_validator { - struct { - size_t (*read)(struct fspec_validator *validator, void *ptr, const size_t size, const size_t nmemb); - } ops; - - struct { - struct fspec_mem input; - } mem; -}; - -bool -fspec_validator_parse(struct fspec_validator *validator, const char *name); diff --git a/src/util/membuf.c b/src/util/membuf.c deleted file mode 100644 index 0602679..0000000 --- a/src/util/membuf.c +++ /dev/null @@ -1,31 +0,0 @@ -#include "membuf.h" - -#include <stdlib.h> -#include <assert.h> -#include <memory.h> -#include <err.h> - -static void -membuf_bounds_check(const struct membuf *buf, const size_t nmemb) -{ - assert(buf); - - if (buf->mem.len < nmemb || buf->written > buf->mem.len - nmemb) - errx(EXIT_FAILURE, "%s: %zu bytes exceeds the maximum storage size of %zu bytes", __func__, buf->written + nmemb, buf->mem.len); -} - -void -membuf_terminate(struct membuf *buf, const void *data, const size_t data_sz) -{ - assert(data || !data_sz); - membuf_bounds_check(buf, data_sz); - memcpy((char*)buf->mem.data + buf->written, data, data_sz); -} - -void -membuf_append(struct membuf *buf, const void *data, const size_t data_sz) -{ - membuf_terminate(buf, data, data_sz); - buf->written += data_sz; - assert(buf->written <= buf->mem.len); -} diff --git a/src/util/membuf.h b/src/util/membuf.h deleted file mode 100644 index 86d8dde..0000000 --- a/src/util/membuf.h +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once - -#include <fspec/memory.h> - -struct membuf { - struct fspec_mem mem; - size_t written; -}; - -void -membuf_terminate(struct membuf *buf, const void *data, const size_t data_sz); - -void -membuf_append(struct membuf *buf, const void *data, const size_t data_sz); |