From 4750be2da326297830691c54adbab0a5dea14802 Mon Sep 17 00:00:00 2001 From: Jari Vetoniemi Date: Fri, 23 Feb 2018 12:54:43 +0200 Subject: wip --- src/bin/fspec/dump.c | 838 ++++++++++++++++++++++++++++++++++++++++ src/dump.c | 819 --------------------------------------- src/fspec/bcode-internal.h | 16 - src/fspec/bcode.c | 14 +- src/fspec/bcode.h | 110 ++++-- src/fspec/lexer.h | 12 +- src/fspec/lexer.rl | 616 ----------------------------- src/fspec/private/bcode-types.h | 16 + src/fspec/ragel/lexer-expr.h | 20 + src/fspec/ragel/lexer-expr.rl | 122 ++++++ src/fspec/ragel/lexer-stack.h | 42 ++ src/fspec/ragel/lexer-stack.rl | 153 ++++++++ src/fspec/ragel/lexer.rl | 180 +++++++++ src/fspec/ragel/validator.rl | 96 +++++ src/fspec/validator.h | 2 + src/fspec/validator.rl | 236 ----------- src/ragel/ragel.h | 30 -- src/ragel/ragel.rl | 91 ----- src/util/membuf.c | 31 ++ src/util/membuf.h | 14 + src/util/ragel/ragel.h | 30 ++ src/util/ragel/ragel.rl | 92 +++++ 22 files changed, 1727 insertions(+), 1853 deletions(-) create mode 100644 src/bin/fspec/dump.c delete mode 100644 src/dump.c delete mode 100644 src/fspec/bcode-internal.h delete mode 100644 src/fspec/lexer.rl create mode 100644 src/fspec/private/bcode-types.h create mode 100644 src/fspec/ragel/lexer-expr.h create mode 100644 src/fspec/ragel/lexer-expr.rl create mode 100644 src/fspec/ragel/lexer-stack.h create mode 100644 src/fspec/ragel/lexer-stack.rl create mode 100644 src/fspec/ragel/lexer.rl create mode 100644 src/fspec/ragel/validator.rl delete mode 100644 src/fspec/validator.rl delete mode 100644 src/ragel/ragel.h delete mode 100644 src/ragel/ragel.rl create mode 100644 src/util/membuf.c create mode 100644 src/util/membuf.h create mode 100644 src/util/ragel/ragel.h create mode 100644 src/util/ragel/ragel.rl (limited to 'src') diff --git a/src/bin/fspec/dump.c b/src/bin/fspec/dump.c new file mode 100644 index 0000000..07a6757 --- /dev/null +++ b/src/bin/fspec/dump.c @@ -0,0 +1,838 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include "util/membuf.h" + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + +#if 0 + +static size_t +to_hex(const uint8_t *buf, const size_t buf_sz, char *out, const size_t out_sz, const bool reverse) +{ + assert(out); + const char nibble[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; + const uint8_t nbs = sizeof(nibble) - 1; + + size_t w = 0, last_non_zero = w; + for (size_t i = 0; i < buf_sz && out_sz > 2 && w < out_sz - 2; ++i) { + for (uint8_t c = 0; c < CHAR_BIT / 8 && w < out_sz; ++c) { + const size_t idx = (reverse ? (buf_sz - 1) - i : i); + const uint8_t hi = (buf[idx] >> (4 * (c + 1))) & nbs; + const uint8_t lo = (buf[idx] >> (8 * c)) & nbs; + + if (w || hi || lo) { + out[w++] = nibble[hi]; + out[w++] = nibble[lo]; + last_non_zero = (hi || lo ? w : last_non_zero); + } + } + } + + if (!w) { + out[w++] = nibble[0]; + out[w++] = nibble[0]; + } else { + w = last_non_zero; + } + + assert(w < out_sz); + out[w] = 0; + return w; +} + +static void +print_dec(const uint8_t *buf, const size_t size, const bool is_signed) +{ + char hex[2 * sizeof(fspec_num) + 1]; + to_hex(buf, size, hex, sizeof(hex), true); + + static_assert(sizeof(fspec_num) <= sizeof(uint64_t), "fspec_num is larger than uint64_t"); + + if (is_signed) { + printf("%ld", (int64_t)strtoll(hex, NULL, 16)); + } else { + printf("%lu", (uint64_t)strtoull(hex, NULL, 16)); + } +} + +static void +print_udec(const uint8_t *buf, const size_t size) +{ + print_dec(buf, size, false); +} + +static void +print_sdec(const uint8_t *buf, const size_t size) +{ + print_dec(buf, size, true); +} + +static void +print_hex(const uint8_t *buf, const size_t size) +{ + char hex[2 * sizeof(fspec_num) + 1]; + to_hex(buf, size, hex, sizeof(hex), true); + printf("0x%s", hex); +} + +static void +print_array(const uint8_t *buf, const size_t size, const size_t nmemb, void (*fun)(const uint8_t *buf, const size_t size)) +{ + const int indent = 4; + if (nmemb > 8) { + printf("{\n%*s", indent, ""); + } else if (nmemb > 1) { + printf("{ "); + } + + for (size_t n = 0; n < nmemb; ++n) { + fun(buf + n * size, size); + printf("%s", (nmemb > 1 && n + 1 < nmemb ? ", " : "")); + + if (n + 1 < nmemb && !((n + 1) % 8)) + printf("\n%*s", indent, ""); + } + + printf("%s\n", (nmemb > 8 ? "\n}" : (nmemb > 1 ? " }" : ""))); +} + +static void +print_str(const char *buf, const size_t size, const size_t nmemb) +{ + const bool has_nl = memchr(buf, '\n', size * nmemb); + if (has_nl) + puts("```"); + + for (size_t n = 0; n < size * nmemb && buf[n] != 0; ++n) + printf("%c", buf[n]); + + puts((has_nl ? "```" : "")); +} + +struct code { + const enum fspec_op *start, *end, *data; +}; + +static void +dump_ops(const struct code *code) +{ + for (const enum fspec_op *op = code->start; op; op = fspec_op_next(op, code->end, false)) { + printf("%*s- ", (*op == FSPEC_OP_ARG ? 2 : 0), ""); + switch (*op) { + case FSPEC_OP_HEADER: + printf("header\n"); + break; + + case FSPEC_OP_DECLARATION: + printf("declaration\n"); + break; + + case FSPEC_OP_READ: + printf("read\n"); + break; + + case FSPEC_OP_GOTO: + printf("goto\n"); + break; + + case FSPEC_OP_FILTER: + printf("filter\n"); + break; + + case FSPEC_OP_VISUAL: + printf("visual\n"); + break; + + case FSPEC_OP_ARG: + { + const enum fspec_arg *arg = (void*)(op + 1); + printf("arg "); + switch (*arg) { + case FSPEC_ARG_STR: + printf("str %s\n", fspec_arg_get_cstr(arg, code->data)); + break; + + case FSPEC_ARG_VAR: + printf("var %" PRI_FSPEC_NUM "\n", fspec_arg_get_num(arg)); + break; + + case FSPEC_ARG_NUM: + printf("num %" PRI_FSPEC_NUM "\n", fspec_arg_get_num(arg)); + break; + + case FSPEC_ARG_OFF: + printf("off %" PRI_FSPEC_NUM "\n", fspec_arg_get_num(arg)); + break; + + case FSPEC_ARG_DAT: + printf("dat %" PRI_FSPEC_NUM "\n", fspec_arg_get_num(arg)); + break; + + case FSPEC_ARG_EOF: + printf("eof\n"); + break; + + case FSPEC_ARG_LAST: + break; + } + } + break; + + case FSPEC_OP_LAST: + break; + } + } +} + +static const enum fspec_op* +get_last_struct(const struct code *code) +{ + const enum fspec_op *last = NULL; + for (const enum fspec_op *op = code->start; op; op = fspec_op_next(op, code->end, true)) { + const enum fspec_arg *arg; + if (*op == FSPEC_OP_DECLARATION && + (arg = fspec_op_get_arg(op, code->end, 1, 1<data = realloc(buf->data, size))) + err(EXIT_FAILURE, "realloc(%zu)", size); + + buf->len = size; +} + +static inline void +dynbuf_resize_if_needed(struct dynbuf *buf, const size_t size) +{ + if (buf->len >= size) + return; + + dynbuf_resize(buf, size); +} + +static inline void +dynbuf_grow_if_needed(struct dynbuf *buf, const size_t nmemb) +{ + assert(buf); + if (buf->len >= nmemb && buf->written <= buf->len - nmemb) + return; + + dynbuf_resize(buf, buf->written + nmemb); +} + +static inline void +dynbuf_append(struct dynbuf *buf, const void *data, const size_t data_sz) +{ + dynbuf_grow_if_needed(buf, data_sz); + memcpy((char*)buf->data + buf->written, data, data_sz); + buf->written += data_sz; + assert(buf->written <= buf->len); +} + +static inline void +dynbuf_reset(struct dynbuf *buf) +{ + assert(buf); + buf->written = 0; +} + +static inline void +dynbuf_release(struct dynbuf *buf) +{ + assert(buf); + free(buf->data); + *buf = (struct dynbuf){0}; +} + +static void +display(const void *buf, const size_t size, const size_t nmemb, const bool is_signed, const enum fspec_visual visual) +{ + switch (visual) { + case FSPEC_VISUAL_NUL: + puts("..."); + break; + + case FSPEC_VISUAL_STR: + print_str(buf, size, nmemb); + break; + + case FSPEC_VISUAL_HEX: + print_array(buf, size, nmemb, print_hex); + break; + + case FSPEC_VISUAL_DEC: + print_array(buf, size, nmemb, (is_signed ? print_sdec : print_udec)); + break; + + case FSPEC_VISUAL_LAST: + break; + } +} + +struct decl { + struct dynbuf buf; + const char *name; + const void *start, *end; + size_t nmemb; + uint8_t size; + enum fspec_visual visual; + enum fspec_declaration declaration; +}; + +static void +decl_display(const struct decl *decl) +{ + assert(decl); + assert(decl->size * decl->nmemb <= decl->buf.len); + printf("%s: ", decl->name); + display(decl->buf.data, decl->size, decl->nmemb, false, decl->visual); +} + +static fspec_num +decl_get_num(const struct decl *decl) +{ + assert(decl); + assert(decl->nmemb == 1); + assert(decl->size * decl->nmemb <= decl->buf.len); + char hex[2 * sizeof(fspec_num) + 1]; + to_hex(decl->buf.data, decl->size, hex, sizeof(hex), true); + static_assert(sizeof(fspec_num) <= sizeof(uint64_t), "fspec_num is larger than uint64_t"); + return (fspec_num)strtoull(hex, NULL, 16); +} + +static const char* +decl_get_cstr(const struct decl *decl) +{ + assert(decl); + return decl->buf.data; +} + +struct context { + struct code code; + struct decl *decl; + fspec_num decl_count; +}; + +static fspec_num +var_get_num(const struct context *context, const enum fspec_arg *arg) +{ + assert(context && arg); + return decl_get_num(&context->decl[fspec_arg_get_num(arg)]); +} + +static const char* +var_get_cstr(const struct context *context, const enum fspec_arg *arg) +{ + assert(context && arg); + return decl_get_cstr(&context->decl[fspec_arg_get_num(arg)]); +} + +enum type { + TYPE_NUM, + TYPE_STR, +}; + +static enum type +var_get_type(const struct context *context, const enum fspec_arg *arg) +{ + assert(context && arg); + const struct decl *decl = &context->decl[fspec_arg_get_num(arg)]; + switch (decl->visual) { + case FSPEC_VISUAL_DEC: + case FSPEC_VISUAL_HEX: + case FSPEC_VISUAL_NUL: + return TYPE_NUM; + + case FSPEC_VISUAL_STR: + return TYPE_STR; + + case FSPEC_VISUAL_LAST: + break; + } + return ~0; +} + +static void +filter_decompress(const struct context *context, struct decl *decl) +{ + assert(decl); + + const enum fspec_arg *arg; + if (!(arg = fspec_op_get_arg(context->code.start, context->code.end, 2, 1<code.data); + if (!(codec = squash_get_codec(algo))) + errx(EXIT_FAILURE, "unknown compression '%s'", algo); + + SquashOptions *opts; + if (!(opts = squash_options_new(codec, NULL))) + errx(EXIT_FAILURE, "squash_options_new"); + + size_t dsize = squash_codec_get_uncompressed_size(codec, decl->buf.len, decl->buf.data); + dsize = (dsize ? dsize : decl->buf.len * 2); + + { + const enum fspec_arg *var = arg; + if ((arg = fspec_arg_next(arg, context->code.end, 1, 1<code.end, 1, 1<code.data); + if (!(var = fspec_arg_next(var, context->code.end, 1, ~0))) + errx(EXIT_FAILURE, "expected argument for key '%s'", key); + + switch (*var) { + case FSPEC_ARG_STR: + squash_options_set_string(opts, key, fspec_arg_get_cstr(var, context->code.data)); + break; + + case FSPEC_ARG_NUM: + squash_options_set_int(opts, key, fspec_arg_get_num(var)); + break; + + case FSPEC_ARG_VAR: + if (var_get_type(context, var) == TYPE_STR) { + squash_options_set_string(opts, key, var_get_cstr(context, var)); + } else { + squash_options_set_int(opts, key, var_get_num(context, var)); + } + break; + + default: + break; + } + } + } + + // what a horrible api + squash_object_ref(opts); + + SquashStatus r; + struct dynbuf buf = {0}; + dynbuf_resize(&buf, dsize); + while ((r = squash_codec_decompress_with_options(codec, &buf.len, buf.data, decl->buf.len, decl->buf.data, opts)) == SQUASH_BUFFER_FULL) + dynbuf_resize(&buf, dsize *= 2); + + dynbuf_resize_if_needed(&buf, (buf.written = buf.len)); + squash_object_unref(opts); + + if (r != SQUASH_OK) + errx(EXIT_FAILURE, "squash_codec_decompress(%zu, %zu) = %d: %s", dsize, decl->buf.len, r, squash_status_to_string(r)); + + dynbuf_release(&decl->buf); + decl->buf = buf; + decl->nmemb = buf.len / decl->size; +} + +static void +filter_decode(const struct context *context, struct decl *decl) +{ + assert(decl); + + const enum fspec_arg *arg; + if (!(arg = fspec_op_get_arg(context->code.start, context->code.end, 2, 1<code.data); + + static const char *sys_encoding; + if (!sys_encoding) { + setlocale(LC_ALL, ""); + sys_encoding = nl_langinfo(CODESET); + } + + iconv_t iv; + if ((iv = iconv_open(sys_encoding, encoding)) == (iconv_t)-1) + err(EXIT_FAILURE, "iconv_open(%s, %s)", sys_encoding, encoding); + + struct dynbuf buf = {0}; + const uint8_t *in = decl->buf.data; + size_t in_left = decl->buf.written; + do { + char enc[1024], *out = enc; + size_t out_left = sizeof(enc); + + errno = 0; + if (iconv(iv, (char**)&in, &in_left, &out, &out_left) == (size_t)-1 && errno != E2BIG) + err(EXIT_FAILURE, "iconv(%s, %s)", sys_encoding, encoding); + + dynbuf_append(&buf, enc, sizeof(enc) - out_left); + } while (in_left > 0); + + iconv_close(iv); + + dynbuf_release(&decl->buf); + decl->buf = buf; + decl->nmemb = buf.len / decl->size; +} + +static void +call(const struct context *context, FILE *f) +{ + assert(context && f); + + struct decl *decl = NULL; + for (const enum fspec_op *op = context->code.start; op; op = fspec_op_next(op, context->code.end, true)) { + if (decl && op == decl->end) { + decl_display(decl); + decl = NULL; + } + + switch (*op) { + case FSPEC_OP_DECLARATION: + { + const enum fspec_arg *arg; + arg = fspec_op_get_arg(op, context->code.end, 2, 1<decl[fspec_arg_get_num(arg)]; + dynbuf_reset(&decl->buf); + } + break; + + case FSPEC_OP_READ: + { + assert(decl); + const enum fspec_arg *arg = fspec_op_get_arg(op, context->code.end, 1, 1<size = fspec_arg_get_num(arg) / 8; + decl->nmemb = 0; + + for (const enum fspec_arg *var = arg; (var = fspec_arg_next(var, context->code.end, 1, ~0));) { + switch (*var) { + case FSPEC_ARG_NUM: + case FSPEC_ARG_VAR: + { + const fspec_num v = (*var == FSPEC_ARG_NUM ? fspec_arg_get_num(var) : var_get_num(context, var)); + if (v == 0) { + goto noop; + } else if (v > 1) { + const size_t nmemb = (decl->nmemb ? decl->nmemb : 1) * v; + dynbuf_grow_if_needed(&decl->buf, decl->size * nmemb); + const size_t read = fread((char*)decl->buf.data + decl->buf.written, decl->size, nmemb, f); + decl->buf.written += decl->size * read; + decl->nmemb += read; + } + } + break; + + case FSPEC_ARG_STR: + break; + + case FSPEC_ARG_EOF: + { + const size_t nmemb = (decl->nmemb ? decl->nmemb : 1); + size_t read = 0, r = nmemb; + while (r == nmemb) { + dynbuf_grow_if_needed(&decl->buf, decl->size * nmemb); + read += (r = fread((char*)decl->buf.data + decl->buf.written, decl->size, nmemb, f)); + decl->buf.written += decl->size * r; + }; + decl->nmemb += read; + } + break; + + default: + break; + } + } +noop: + + if (!fspec_arg_next(arg, context->code.end, 1, ~0)) { + dynbuf_grow_if_needed(&decl->buf, decl->size * 1); + const size_t read = fread((char*)decl->buf.data + decl->buf.written, decl->size, 1, f); + decl->buf.written += decl->size * read; + decl->nmemb = read; + } + + assert(decl->nmemb != 0); + } + break; + + case FSPEC_OP_GOTO: + { + decl = NULL; + const enum fspec_arg *arg = fspec_op_get_arg(op, context->code.end, 1, 1<decl[fspec_arg_get_num(arg)]; + struct context c = *context; + c.code.start = d->start; + c.code.end = d->end; + + for (const enum fspec_arg *var = arg; (var = fspec_arg_next(var, context->code.end, 1, ~0));) { + switch (*var) { + case FSPEC_ARG_NUM: + case FSPEC_ARG_VAR: + { + const fspec_num v = (*var == FSPEC_ARG_NUM ? fspec_arg_get_num(var) : var_get_num(context, var)); + for (fspec_num i = 0; i < v; ++i) + call(&c, f); + } + break; + + // XXX: How to handle STR with stdin? + // With fseek would be easy. + case FSPEC_ARG_STR: + break; + + case FSPEC_ARG_EOF: + while (!feof(f)) + call(&c, f); + break; + + default: + break; + } + } + + if (!fspec_arg_next(arg, context->code.end, 1, ~0)) + call(&c, f); + } + break; + + case FSPEC_OP_FILTER: + { + assert(decl); + const enum fspec_arg *arg = fspec_op_get_arg(op, context->code.end, 1, 1<code.data); + for (size_t i = 0; i < ARRAY_SIZE(map); ++i) { + if (!strcmp(filter, map[i].name)) { + struct context c = *context; + c.code.start = op; + map[i].fun(&c, decl); + break; + } + + if (i == ARRAY_SIZE(map) - 1) + warnx("unknown filter '%s'", filter); + } + } + break; + + case FSPEC_OP_VISUAL: + { + assert(decl); + const enum fspec_arg *arg = fspec_op_get_arg(op, context->code.end, 1, 1<visual = fspec_arg_get_num(arg); + } + break; + + case FSPEC_OP_ARG: + case FSPEC_OP_HEADER: + case FSPEC_OP_LAST: + break; + } + } + + if (decl && context->code.end == decl->end) + decl_display(decl); +} + +static void +setup(const struct context *context) +{ + assert(context); + + for (const enum fspec_op *op = context->code.start; op; op = fspec_op_next(op, context->code.end, true)) { + switch (*op) { + case FSPEC_OP_DECLARATION: + { + const enum fspec_arg *arg[4]; + arg[0] = fspec_op_get_arg(op, context->code.end, 1, 1<code.end, 1, 1<code.end, 1, 1<code.end, 1, 1<decl[id]; + decl->declaration = fspec_arg_get_num(arg[0]); + decl->name = fspec_arg_get_cstr(arg[3], context->code.data); + decl->visual = FSPEC_VISUAL_DEC; + decl->start = op; + decl->end = (char*)op + fspec_arg_get_num(arg[2]); + assert(!decl->buf.data); + } + break; + + default: + break; + } + } +} + +static void +execute(const struct fspec_mem *mem) +{ + assert(mem); + + struct context context = { + .code.start = mem->data, + .code.end = (void*)((char*)mem->data + mem->len), + .code.data = mem->data + }; + + printf("output: %zu bytes\n", mem->len); + dump_ops(&context.code); + + const enum fspec_arg *arg = fspec_op_get_arg(context.code.data, context.code.end, 2, 1<member) - offsetof(type, member))) + +struct lexer { + struct fspec_lexer lexer; + struct membuf output; + FILE *file; +}; + +static size_t +fspec_lexer_write(struct fspec_lexer *lexer, const enum fspec_lexer_section section, const void *output, const size_t size, const size_t nmemb) +{ + assert(lexer && output); + // struct lexer *l = container_of(lexer, struct lexer, lexer); + (void)lexer, (void)section, (void)size, (void)nmemb; + return nmemb; +} + +static size_t +fspec_lexer_read(struct fspec_lexer *lexer, void *input, const size_t size, const size_t nmemb) +{ + assert(lexer && input); + struct lexer *l = container_of(lexer, struct lexer, lexer); + return fread(input, size, nmemb, l->file); +} + +#if 0 +static size_t +fspec_validator_read(struct fspec_validator *validator, void *ptr, const size_t size, const size_t nmemb) +{ + assert(validator && ptr); + assert(ptr == validator->mem.input.data); + const size_t read = validator->mem.input.len / size; + assert((validator->mem.input.len && read == nmemb) || (!validator->mem.input.len && !read)); + validator->mem.input.len -= read * size; + assert(validator->mem.input.len == 0); + return read; +} +#endif + +int +main(int argc, const char *argv[]) +{ + if (argc < 2) + errx(EXIT_FAILURE, "usage: %s file.spec < data", argv[0]); + + char output[4096]; + + { + char input[4096]; + struct lexer l = { + .lexer = { + .ops.read = fspec_lexer_read, + .ops.write = fspec_lexer_write, + .mem.input = { .data = input, .len = sizeof(input) }, + }, + .file = fopen_or_die(argv[1], "rb"), + .output.mem = { .data = output, .len = sizeof(output) }, + }; + + if (!fspec_lexer_parse(&l.lexer, argv[1])) + exit(EXIT_FAILURE); + + fclose(l.file); + // bcode = l.lexer.mem.output; + } + +#if 0 + + { + struct fspec_validator validator = { + .ops.read = fspec_validator_read, + .mem.input = bcode, + }; + + if (!fspec_validator_parse(&validator, argv[1])) + exit(EXIT_FAILURE); + } + + execute(&bcode); +#endif + return EXIT_SUCCESS; +} diff --git a/src/dump.c b/src/dump.c deleted file mode 100644 index 8af7119..0000000 --- a/src/dump.c +++ /dev/null @@ -1,819 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include - -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) - -static size_t -to_hex(const uint8_t *buf, const size_t buf_sz, char *out, const size_t out_sz, const bool reverse) -{ - assert(out); - const char nibble[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - const uint8_t nbs = sizeof(nibble) - 1; - - size_t w = 0, last_non_zero = w; - for (size_t i = 0; i < buf_sz && out_sz > 2 && w < out_sz - 2; ++i) { - for (uint8_t c = 0; c < CHAR_BIT / 8 && w < out_sz; ++c) { - const size_t idx = (reverse ? (buf_sz - 1) - i : i); - const uint8_t hi = (buf[idx] >> (4 * (c + 1))) & nbs; - const uint8_t lo = (buf[idx] >> (8 * c)) & nbs; - - if (w || hi || lo) { - out[w++] = nibble[hi]; - out[w++] = nibble[lo]; - last_non_zero = (hi || lo ? w : last_non_zero); - } - } - } - - if (!w) { - out[w++] = nibble[0]; - out[w++] = nibble[0]; - } else { - w = last_non_zero; - } - - assert(w < out_sz); - out[w] = 0; - return w; -} - -static void -print_dec(const uint8_t *buf, const size_t size, const bool is_signed) -{ - char hex[2 * sizeof(fspec_num) + 1]; - to_hex(buf, size, hex, sizeof(hex), true); - - static_assert(sizeof(fspec_num) <= sizeof(uint64_t), "fspec_num is larger than uint64_t"); - - if (is_signed) { - printf("%ld", (int64_t)strtoll(hex, NULL, 16)); - } else { - printf("%lu", (uint64_t)strtoull(hex, NULL, 16)); - } -} - -static void -print_udec(const uint8_t *buf, const size_t size) -{ - print_dec(buf, size, false); -} - -static void -print_sdec(const uint8_t *buf, const size_t size) -{ - print_dec(buf, size, true); -} - -static void -print_hex(const uint8_t *buf, const size_t size) -{ - char hex[2 * sizeof(fspec_num) + 1]; - to_hex(buf, size, hex, sizeof(hex), true); - printf("0x%s", hex); -} - -static void -print_array(const uint8_t *buf, const size_t size, const size_t nmemb, void (*fun)(const uint8_t *buf, const size_t size)) -{ - const int indent = 4; - if (nmemb > 8) { - printf("{\n%*s", indent, ""); - } else if (nmemb > 1) { - printf("{ "); - } - - for (size_t n = 0; n < nmemb; ++n) { - fun(buf + n * size, size); - printf("%s", (nmemb > 1 && n + 1 < nmemb ? ", " : "")); - - if (n + 1 < nmemb && !((n + 1) % 8)) - printf("\n%*s", indent, ""); - } - - printf("%s\n", (nmemb > 8 ? "\n}" : (nmemb > 1 ? " }" : ""))); -} - -static void -print_str(const char *buf, const size_t size, const size_t nmemb) -{ - const bool has_nl = memchr(buf, '\n', size * nmemb); - if (has_nl) - puts("```"); - - for (size_t n = 0; n < size * nmemb && buf[n] != 0; ++n) - printf("%c", buf[n]); - - puts((has_nl ? "```" : "")); -} - -struct code { - const enum fspec_op *start, *end, *data; -}; - -static void -dump_ops(const struct code *code) -{ - for (const enum fspec_op *op = code->start; op; op = fspec_op_next(op, code->end, false)) { - printf("%*s- ", (*op == FSPEC_OP_ARG ? 2 : 0), ""); - switch (*op) { - case FSPEC_OP_HEADER: - printf("header\n"); - break; - - case FSPEC_OP_DECLARATION: - printf("declaration\n"); - break; - - case FSPEC_OP_READ: - printf("read\n"); - break; - - case FSPEC_OP_GOTO: - printf("goto\n"); - break; - - case FSPEC_OP_FILTER: - printf("filter\n"); - break; - - case FSPEC_OP_VISUAL: - printf("visual\n"); - break; - - case FSPEC_OP_ARG: - { - const enum fspec_arg *arg = (void*)(op + 1); - printf("arg "); - switch (*arg) { - case FSPEC_ARG_STR: - printf("str %s\n", fspec_arg_get_cstr(arg, code->data)); - break; - - case FSPEC_ARG_VAR: - printf("var %" PRI_FSPEC_NUM "\n", fspec_arg_get_num(arg)); - break; - - case FSPEC_ARG_NUM: - printf("num %" PRI_FSPEC_NUM "\n", fspec_arg_get_num(arg)); - break; - - case FSPEC_ARG_OFF: - printf("off %" PRI_FSPEC_NUM "\n", fspec_arg_get_num(arg)); - break; - - case FSPEC_ARG_DAT: - printf("dat %" PRI_FSPEC_NUM "\n", fspec_arg_get_num(arg)); - break; - - case FSPEC_ARG_EOF: - printf("eof\n"); - break; - - case FSPEC_ARG_LAST: - break; - } - } - break; - - case FSPEC_OP_LAST: - break; - } - } -} - -static const enum fspec_op* -get_last_struct(const struct code *code) -{ - const enum fspec_op *last = NULL; - for (const enum fspec_op *op = code->start; op; op = fspec_op_next(op, code->end, true)) { - const enum fspec_arg *arg; - if (*op == FSPEC_OP_DECLARATION && - (arg = fspec_op_get_arg(op, code->end, 1, 1<data = realloc(buf->data, size))) - err(EXIT_FAILURE, "realloc(%zu)", size); - - buf->len = size; -} - -static inline void -dynbuf_resize_if_needed(struct dynbuf *buf, const size_t size) -{ - if (buf->len >= size) - return; - - dynbuf_resize(buf, size); -} - -static inline void -dynbuf_grow_if_needed(struct dynbuf *buf, const size_t nmemb) -{ - assert(buf); - if (buf->len >= nmemb && buf->written <= buf->len - nmemb) - return; - - dynbuf_resize(buf, buf->written + nmemb); -} - -static inline void -dynbuf_append(struct dynbuf *buf, const void *data, const size_t data_sz) -{ - dynbuf_grow_if_needed(buf, data_sz); - memcpy((char*)buf->data + buf->written, data, data_sz); - buf->written += data_sz; - assert(buf->written <= buf->len); -} - -static inline void -dynbuf_reset(struct dynbuf *buf) -{ - assert(buf); - buf->written = 0; -} - -static inline void -dynbuf_release(struct dynbuf *buf) -{ - assert(buf); - free(buf->data); - *buf = (struct dynbuf){0}; -} - -static void -display(const void *buf, const size_t size, const size_t nmemb, const bool is_signed, const enum fspec_visual visual) -{ - switch (visual) { - case FSPEC_VISUAL_NUL: - puts("..."); - break; - - case FSPEC_VISUAL_STR: - print_str(buf, size, nmemb); - break; - - case FSPEC_VISUAL_HEX: - print_array(buf, size, nmemb, print_hex); - break; - - case FSPEC_VISUAL_DEC: - print_array(buf, size, nmemb, (is_signed ? print_sdec : print_udec)); - break; - - case FSPEC_VISUAL_LAST: - break; - } -} - -struct decl { - struct dynbuf buf; - const char *name; - const void *start, *end; - size_t nmemb; - uint8_t size; - enum fspec_visual visual; - enum fspec_declaration declaration; -}; - -static void -decl_display(const struct decl *decl) -{ - assert(decl); - assert(decl->size * decl->nmemb <= decl->buf.len); - printf("%s: ", decl->name); - display(decl->buf.data, decl->size, decl->nmemb, false, decl->visual); -} - -static fspec_num -decl_get_num(const struct decl *decl) -{ - assert(decl); - assert(decl->nmemb == 1); - assert(decl->size * decl->nmemb <= decl->buf.len); - char hex[2 * sizeof(fspec_num) + 1]; - to_hex(decl->buf.data, decl->size, hex, sizeof(hex), true); - static_assert(sizeof(fspec_num) <= sizeof(uint64_t), "fspec_num is larger than uint64_t"); - return (fspec_num)strtoull(hex, NULL, 16); -} - -static const char* -decl_get_cstr(const struct decl *decl) -{ - assert(decl); - return decl->buf.data; -} - -struct context { - struct code code; - struct decl *decl; - fspec_num decl_count; -}; - -static fspec_num -var_get_num(const struct context *context, const enum fspec_arg *arg) -{ - assert(context && arg); - return decl_get_num(&context->decl[fspec_arg_get_num(arg)]); -} - -static const char* -var_get_cstr(const struct context *context, const enum fspec_arg *arg) -{ - assert(context && arg); - return decl_get_cstr(&context->decl[fspec_arg_get_num(arg)]); -} - -enum type { - TYPE_NUM, - TYPE_STR, -}; - -static enum type -var_get_type(const struct context *context, const enum fspec_arg *arg) -{ - assert(context && arg); - const struct decl *decl = &context->decl[fspec_arg_get_num(arg)]; - switch (decl->visual) { - case FSPEC_VISUAL_DEC: - case FSPEC_VISUAL_HEX: - case FSPEC_VISUAL_NUL: - return TYPE_NUM; - - case FSPEC_VISUAL_STR: - return TYPE_STR; - - case FSPEC_VISUAL_LAST: - break; - } - return ~0; -} - -static void -filter_decompress(const struct context *context, struct decl *decl) -{ - assert(decl); - - const enum fspec_arg *arg; - if (!(arg = fspec_op_get_arg(context->code.start, context->code.end, 2, 1<code.data); - if (!(codec = squash_get_codec(algo))) - errx(EXIT_FAILURE, "unknown compression '%s'", algo); - - SquashOptions *opts; - if (!(opts = squash_options_new(codec, NULL))) - errx(EXIT_FAILURE, "squash_options_new"); - - size_t dsize = squash_codec_get_uncompressed_size(codec, decl->buf.len, decl->buf.data); - dsize = (dsize ? dsize : decl->buf.len * 2); - - { - const enum fspec_arg *var = arg; - if ((arg = fspec_arg_next(arg, context->code.end, 1, 1<code.end, 1, 1<code.data); - if (!(var = fspec_arg_next(var, context->code.end, 1, ~0))) - errx(EXIT_FAILURE, "expected argument for key '%s'", key); - - switch (*var) { - case FSPEC_ARG_STR: - squash_options_set_string(opts, key, fspec_arg_get_cstr(var, context->code.data)); - break; - - case FSPEC_ARG_NUM: - squash_options_set_int(opts, key, fspec_arg_get_num(var)); - break; - - case FSPEC_ARG_VAR: - if (var_get_type(context, var) == TYPE_STR) { - squash_options_set_string(opts, key, var_get_cstr(context, var)); - } else { - squash_options_set_int(opts, key, var_get_num(context, var)); - } - break; - - default: - break; - } - } - } - - // what a horrible api - squash_object_ref(opts); - - SquashStatus r; - struct dynbuf buf = {0}; - dynbuf_resize(&buf, dsize); - while ((r = squash_codec_decompress_with_options(codec, &buf.len, buf.data, decl->buf.len, decl->buf.data, opts)) == SQUASH_BUFFER_FULL) - dynbuf_resize(&buf, dsize *= 2); - - dynbuf_resize_if_needed(&buf, (buf.written = buf.len)); - squash_object_unref(opts); - - if (r != SQUASH_OK) - errx(EXIT_FAILURE, "squash_codec_decompress(%zu, %zu) = %d: %s", dsize, decl->buf.len, r, squash_status_to_string(r)); - - dynbuf_release(&decl->buf); - decl->buf = buf; - decl->nmemb = buf.len / decl->size; -} - -static void -filter_decode(const struct context *context, struct decl *decl) -{ - assert(decl); - - const enum fspec_arg *arg; - if (!(arg = fspec_op_get_arg(context->code.start, context->code.end, 2, 1<code.data); - - static const char *sys_encoding; - if (!sys_encoding) { - setlocale(LC_ALL, ""); - sys_encoding = nl_langinfo(CODESET); - } - - iconv_t iv; - if ((iv = iconv_open(sys_encoding, encoding)) == (iconv_t)-1) - err(EXIT_FAILURE, "iconv_open(%s, %s)", sys_encoding, encoding); - - struct dynbuf buf = {0}; - const uint8_t *in = decl->buf.data; - size_t in_left = decl->buf.written; - do { - char enc[1024], *out = enc; - size_t out_left = sizeof(enc); - - errno = 0; - if (iconv(iv, (char**)&in, &in_left, &out, &out_left) == (size_t)-1 && errno != E2BIG) - err(EXIT_FAILURE, "iconv(%s, %s)", sys_encoding, encoding); - - dynbuf_append(&buf, enc, sizeof(enc) - out_left); - } while (in_left > 0); - - iconv_close(iv); - - dynbuf_release(&decl->buf); - decl->buf = buf; - decl->nmemb = buf.len / decl->size; -} - -static void -call(const struct context *context, FILE *f) -{ - assert(context && f); - - struct decl *decl = NULL; - for (const enum fspec_op *op = context->code.start; op; op = fspec_op_next(op, context->code.end, true)) { - if (decl && op == decl->end) { - decl_display(decl); - decl = NULL; - } - - switch (*op) { - case FSPEC_OP_DECLARATION: - { - const enum fspec_arg *arg; - arg = fspec_op_get_arg(op, context->code.end, 2, 1<decl[fspec_arg_get_num(arg)]; - dynbuf_reset(&decl->buf); - } - break; - - case FSPEC_OP_READ: - { - assert(decl); - const enum fspec_arg *arg = fspec_op_get_arg(op, context->code.end, 1, 1<size = fspec_arg_get_num(arg) / 8; - decl->nmemb = 0; - - for (const enum fspec_arg *var = arg; (var = fspec_arg_next(var, context->code.end, 1, ~0));) { - switch (*var) { - case FSPEC_ARG_NUM: - case FSPEC_ARG_VAR: - { - const fspec_num v = (*var == FSPEC_ARG_NUM ? fspec_arg_get_num(var) : var_get_num(context, var)); - if (v == 0) { - goto noop; - } else if (v > 1) { - const size_t nmemb = (decl->nmemb ? decl->nmemb : 1) * v; - dynbuf_grow_if_needed(&decl->buf, decl->size * nmemb); - const size_t read = fread((char*)decl->buf.data + decl->buf.written, decl->size, nmemb, f); - decl->buf.written += decl->size * read; - decl->nmemb += read; - } - } - break; - - case FSPEC_ARG_STR: - break; - - case FSPEC_ARG_EOF: - { - const size_t nmemb = (decl->nmemb ? decl->nmemb : 1); - size_t read = 0, r = nmemb; - while (r == nmemb) { - dynbuf_grow_if_needed(&decl->buf, decl->size * nmemb); - read += (r = fread((char*)decl->buf.data + decl->buf.written, decl->size, nmemb, f)); - decl->buf.written += decl->size * r; - }; - decl->nmemb += read; - } - break; - - default: - break; - } - } -noop: - - if (!fspec_arg_next(arg, context->code.end, 1, ~0)) { - dynbuf_grow_if_needed(&decl->buf, decl->size * 1); - const size_t read = fread((char*)decl->buf.data + decl->buf.written, decl->size, 1, f); - decl->buf.written += decl->size * read; - decl->nmemb = read; - } - - assert(decl->nmemb != 0); - } - break; - - case FSPEC_OP_GOTO: - { - decl = NULL; - const enum fspec_arg *arg = fspec_op_get_arg(op, context->code.end, 1, 1<decl[fspec_arg_get_num(arg)]; - struct context c = *context; - c.code.start = d->start; - c.code.end = d->end; - - for (const enum fspec_arg *var = arg; (var = fspec_arg_next(var, context->code.end, 1, ~0));) { - switch (*var) { - case FSPEC_ARG_NUM: - case FSPEC_ARG_VAR: - { - const fspec_num v = (*var == FSPEC_ARG_NUM ? fspec_arg_get_num(var) : var_get_num(context, var)); - for (fspec_num i = 0; i < v; ++i) - call(&c, f); - } - break; - - // XXX: How to handle STR with stdin? - // With fseek would be easy. - case FSPEC_ARG_STR: - break; - - case FSPEC_ARG_EOF: - while (!feof(f)) - call(&c, f); - break; - - default: - break; - } - } - - if (!fspec_arg_next(arg, context->code.end, 1, ~0)) - call(&c, f); - } - break; - - case FSPEC_OP_FILTER: - { - assert(decl); - const enum fspec_arg *arg = fspec_op_get_arg(op, context->code.end, 1, 1<code.data); - for (size_t i = 0; i < ARRAY_SIZE(map); ++i) { - if (!strcmp(filter, map[i].name)) { - struct context c = *context; - c.code.start = op; - map[i].fun(&c, decl); - break; - } - - if (i == ARRAY_SIZE(map) - 1) - warnx("unknown filter '%s'", filter); - } - } - break; - - case FSPEC_OP_VISUAL: - { - assert(decl); - const enum fspec_arg *arg = fspec_op_get_arg(op, context->code.end, 1, 1<visual = fspec_arg_get_num(arg); - } - break; - - case FSPEC_OP_ARG: - case FSPEC_OP_HEADER: - case FSPEC_OP_LAST: - break; - } - } - - if (decl && context->code.end == decl->end) - decl_display(decl); -} - -static void -setup(const struct context *context) -{ - assert(context); - - for (const enum fspec_op *op = context->code.start; op; op = fspec_op_next(op, context->code.end, true)) { - switch (*op) { - case FSPEC_OP_DECLARATION: - { - const enum fspec_arg *arg[4]; - arg[0] = fspec_op_get_arg(op, context->code.end, 1, 1<code.end, 1, 1<code.end, 1, 1<code.end, 1, 1<decl[id]; - decl->declaration = fspec_arg_get_num(arg[0]); - decl->name = fspec_arg_get_cstr(arg[3], context->code.data); - decl->visual = FSPEC_VISUAL_DEC; - decl->start = op; - decl->end = (char*)op + fspec_arg_get_num(arg[2]); - assert(!decl->buf.data); - } - break; - - default: - break; - } - } -} - -static void -execute(const struct fspec_mem *mem) -{ - assert(mem); - - struct context context = { - .code.start = mem->data, - .code.end = (void*)((char*)mem->data + mem->len), - .code.data = mem->data - }; - - printf("output: %zu bytes\n", mem->len); - dump_ops(&context.code); - - const enum fspec_arg *arg = fspec_op_get_arg(context.code.data, context.code.end, 2, 1<member) - offsetof(type, member))) - -struct lexer { - struct fspec_lexer lexer; - FILE *file; -}; - -static size_t -fspec_lexer_read(struct fspec_lexer *lexer, void *ptr, const size_t size, const size_t nmemb) -{ - assert(lexer && ptr); - struct lexer *l = container_of(lexer, struct lexer, lexer); - return fread(ptr, size, nmemb, l->file); -} - -static size_t -fspec_validator_read(struct fspec_validator *validator, void *ptr, const size_t size, const size_t nmemb) -{ - assert(validator && ptr); - assert(ptr == validator->mem.input.data); - const size_t read = validator->mem.input.len / size; - assert((validator->mem.input.len && read == nmemb) || (!validator->mem.input.len && !read)); - validator->mem.input.len -= read * size; - assert(validator->mem.input.len == 0); - return read; -} - -int -main(int argc, const char *argv[]) -{ - if (argc < 2) - errx(EXIT_FAILURE, "usage: %s file.spec < data", argv[0]); - - char output[4096]; - struct fspec_mem bcode = {0}; - - { - char input[4096]; - struct lexer l = { - .lexer = { - .ops.read = fspec_lexer_read, - .mem.input = { .data = input, sizeof(input) }, - .mem.output = { .data = output, sizeof(output) }, - }, - .file = fopen_or_die(argv[1], "rb"), - }; - - if (!fspec_lexer_parse(&l.lexer, argv[1])) - exit(EXIT_FAILURE); - - fclose(l.file); - bcode = l.lexer.mem.output; - } - - { - struct fspec_validator validator = { - .ops.read = fspec_validator_read, - .mem.input = bcode, - }; - - if (!fspec_validator_parse(&validator, argv[1])) - exit(EXIT_FAILURE); - } - - execute(&bcode); - return EXIT_SUCCESS; -} diff --git a/src/fspec/bcode-internal.h b/src/fspec/bcode-internal.h deleted file mode 100644 index 8c9ce74..0000000 --- a/src/fspec/bcode-internal.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once - -#include -#include - -/** maximum size of string literals */ -#define PRI_FSPEC_STRSZ PRIu8 -typedef uint8_t fspec_strsz; - -/** maximum range of variable ids */ -#define PRI_FSPEC_VAR PRIu16 -typedef uint16_t fspec_var; - -/** maximum range of bytecode offsets */ -#define PRI_FSPEC_OFF PRIu32 -typedef uint32_t fspec_off; diff --git a/src/fspec/bcode.c b/src/fspec/bcode.c index 0d0d3fd..70e4b08 100644 --- a/src/fspec/bcode.c +++ b/src/fspec/bcode.c @@ -1,5 +1,6 @@ #include -#include "bcode-internal.h" +#include +#include "private/bcode-types.h" #include #include @@ -8,7 +9,15 @@ static_assert(sizeof(fspec_off) <= sizeof(((struct fspec_mem*)0)->len), "fspec_off should not be larger than what fspec_mem can represent"); static_assert(sizeof(enum fspec_op) == sizeof(uint8_t), "enum fspec_op is expected to have size of uint8_t"); -static_assert(sizeof(enum fspec_arg) == sizeof(uint8_t), "enum fspec_arg is expected to have size of uint8_t"); +static_assert(sizeof(struct fspec_bcode) == sizeof(enum fspec_op), "struct fspec_bcode is expected to have size of enum fspec_op"); +static_assert(sizeof(FSPEC_OP_LAST) <= 8, "op codes need more than 3 bits to be represented"); + +#if 0 +uint8_t +fspec_op_get_num_args(const struct fspec_op_code *code) +{ + return code->op >> 2; +} static fspec_off arg_data_len(const enum fspec_arg *arg) @@ -187,3 +196,4 @@ fspec_op_next(const enum fspec_op *start, const void *end, const bool skip_args) return NULL; } +#endif diff --git a/src/fspec/bcode.h b/src/fspec/bcode.h index d84060e..3d216af 100644 --- a/src/fspec/bcode.h +++ b/src/fspec/bcode.h @@ -1,7 +1,5 @@ #pragma once -#include - #include #include #include @@ -10,34 +8,6 @@ #define PRI_FSPEC_NUM PRIu64 typedef uint64_t fspec_num; -enum fspec_arg { - FSPEC_ARG_DAT, - FSPEC_ARG_OFF, - FSPEC_ARG_NUM, - FSPEC_ARG_VAR, - FSPEC_ARG_STR, - FSPEC_ARG_EOF, - FSPEC_ARG_LAST, -} __attribute__((packed)); - -void -fspec_arg_get_mem(const enum fspec_arg *arg, const void *data, struct fspec_mem *out_mem); - -fspec_num -fspec_arg_get_num(const enum fspec_arg *arg); - -const char* -fspec_arg_get_cstr(const enum fspec_arg *arg, const void *data); - -const enum fspec_arg* -fspec_arg_next(const enum fspec_arg *arg, const void *end, const uint8_t nth, const uint32_t expect); - -enum fspec_declaration { - FSPEC_DECLARATION_STRUCT, - FSPEC_DECLARATION_MEMBER, - FSPEC_DECLARATION_LAST, -} __attribute__((packed)); - enum fspec_visual { FSPEC_VISUAL_NUL, FSPEC_VISUAL_DEC, @@ -46,19 +16,77 @@ enum fspec_visual { FSPEC_VISUAL_LAST, } __attribute__((packed)); +enum fspec_type { + FSPEC_TYPE_CODE, + FSPEC_TYPE_CALL, + FSPEC_TYPE_U8, + FSPEC_TYPE_S8, + FSPEC_TYPE_U16, + FSPEC_TYPE_S16, + FSPEC_TYPE_U32, + FSPEC_TYPE_S32, + FSPEC_TYPE_U64, + FSPEC_TYPE_S64, + FSPEC_TYPE_LAST, +} __attribute__((packed)); + +enum fspec_storage { + FSPEC_STORAGE_DATA, + FSPEC_STORAGE_LOCAL, + FSPEC_STORAGE_LAST, +} __attribute__((packed)); + +enum fspec_builtin { + FSPEC_BUILTIN_ADD, + FSPEC_BUILTIN_SUB, + FSPEC_BUILTIN_MUL, + FSPEC_BUILTIN_DIV, + FSPEC_BUILTIN_MOD, + FSPEC_BUILTIN_BIT_AND, + FSPEC_BUILTIN_BIT_OR, + FSPEC_BUILTIN_BIT_XOR, + FSPEC_BUILTIN_BIT_LEFT, + FSPEC_BUILTIN_BIT_RIGHT, + FSPEC_BUILTIN_DECLARE, + FSPEC_BUILTIN_READ, + FSPEC_BUILTIN_FILTER, + FSPEC_BUILTIN_VISUAL, + FSPEC_BUILTIN_LAST, +} __attribute__((packed)); + enum fspec_op { - FSPEC_OP_ARG, - FSPEC_OP_HEADER, - FSPEC_OP_DECLARATION, - FSPEC_OP_READ, - FSPEC_OP_GOTO, - FSPEC_OP_FILTER, - FSPEC_OP_VISUAL, + FSPEC_OP_BUILTIN, + FSPEC_OP_PUSH, + FSPEC_OP_POP, + FSPEC_OP_VAR, FSPEC_OP_LAST, } __attribute__((packed)); -const enum fspec_op* -fspec_op_next(const enum fspec_op *op, const void *end, const bool skip_args); +struct fspec_bcode { + char op, data[]; +} __attribute__((packed)); + +#if 0 +('fspc')(version) +OP_BUILTIN (declare) OP_PUSH OP_VAR8 (storage) OP_VAR8 (type) OP_VAR [name] OP_POP +OP_BUILTIN (filter) +OP_FUN FUN_ASSIGN VAR0 VAR [data] +OP_FUN FUN_READ +#endif -const enum fspec_arg* -fspec_op_get_arg(const enum fspec_op *op, const void *end, const uint8_t nth, const uint32_t expect); +#if 0 +uint8_t +fspec_op_get_num_args(const struct fspec_bcode *code); + +const struct fspec_bcode* +fspec_op_next(const struct fspec_bcode *code, const void *end, const bool skip_args); + +const struct fspec_bcode* +fspec_op_get_arg(const struct fspec_bcode *code, const void *end, const uint8_t nth, const uint32_t expect); + +const struct fspec_arg* +fspec_arg_next(const struct fspec_bcode *code, const void *end, const uint8_t nth, const uint32_t expect); + +fspec_num +fspec_ref_get_num(const struct fspec_bcode *code); +#endif diff --git a/src/fspec/lexer.h b/src/fspec/lexer.h index 7b60e6b..ef6e059 100644 --- a/src/fspec/lexer.h +++ b/src/fspec/lexer.h @@ -2,14 +2,22 @@ #include +#include + +enum fspec_lexer_section { + FSPEC_SECTION_DATA, + FSPEC_SECTION_CODE, +}; + struct fspec_lexer; struct fspec_lexer { struct { - size_t (*read)(struct fspec_lexer *lexer, void *ptr, const size_t size, const size_t nmemb); + size_t (*read)(struct fspec_lexer *lexer, void *input, const size_t size, const size_t nmemb); + size_t (*write)(struct fspec_lexer *lexer, const enum fspec_lexer_section section, const void *output, const size_t size, const size_t nmemb); } ops; struct { - struct fspec_mem input, output; + struct fspec_mem input; } mem; }; diff --git a/src/fspec/lexer.rl b/src/fspec/lexer.rl deleted file mode 100644 index 51d1a54..0000000 --- a/src/fspec/lexer.rl +++ /dev/null @@ -1,616 +0,0 @@ -#include "ragel/ragel.h" -#include -#include -#include "bcode-internal.h" - -#include -#include -#include -#include - -#define PLACEHOLDER 0xDEADBEEF -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) - -typedef uint8_t fspec_strsz; - -struct membuf { - struct fspec_mem mem; - fspec_off written; -}; - -static void -membuf_bounds_check(const struct membuf *buf, const fspec_off nmemb) -{ - assert(buf); - - if (buf->mem.len < nmemb || buf->written > buf->mem.len - nmemb) - errx(EXIT_FAILURE, "%s: %" PRI_FSPEC_OFF " bytes exceeds the maximum storage size of %zu bytes", __func__, buf->written + nmemb, buf->mem.len); -} - -static void -membuf_terminate(struct membuf *buf, const void *data, const fspec_off data_sz) -{ - membuf_bounds_check(buf, data_sz); - memcpy((char*)buf->mem.data + buf->written, data, data_sz); -} - -static void -membuf_replace(struct membuf *buf, const fspec_off off, const void *data, const fspec_off data_sz) -{ - assert(buf->mem.len >= data_sz && off <= buf->mem.len - data_sz); - memcpy((char*)buf->mem.data + off, data, data_sz); -} - -static void -membuf_append_at(struct membuf *buf, const fspec_off off, const void *data, const fspec_off data_sz) -{ - assert(off <= buf->written); - membuf_bounds_check(buf, data_sz); - const size_t rest = buf->written - off; - memmove((char*)buf->mem.data + off + data_sz, (char*)buf->mem.data + off, rest); - membuf_replace(buf, off, data, data_sz); - buf->written += data_sz; - assert(buf->written <= buf->mem.len); -} - -static void -membuf_append(struct membuf *buf, const void *data, const fspec_off data_sz) -{ - membuf_append_at(buf, buf->written, data, data_sz); -} - -struct varbuf { - struct membuf buf; - fspec_off offset; -}; - -static inline void -varbuf_begin(struct varbuf *var) -{ - assert(var); - var->offset = var->buf.written; - assert(var->offset <= var->buf.mem.len); -} - -static void -varbuf_reset(struct varbuf *var) -{ - assert(var); - var->offset = var->buf.written = 0; -} - -static inline void -varbuf_remove_last(struct varbuf *var) -{ - assert(var); - assert(var->buf.written >= var->offset); - const fspec_off size = var->buf.written - var->offset; - assert(var->buf.written >= size); - var->buf.written -= size; - assert(var->buf.written <= var->buf.mem.len); -} - -enum section { - SECTION_DATA, - SECTION_CODE, - SECTION_LAST, -}; - -struct codebuf { - struct membuf buf; - const void *decl[FSPEC_DECLARATION_LAST], *end[SECTION_LAST], *strings; - fspec_var declarations; -}; - -static void -codebuf_append(struct codebuf *code, const enum section section, const void *data, const fspec_off data_sz) -{ - assert(code->end[section]); - const fspec_off off = (char*)code->end[section] - (char*)code->buf.mem.data; - membuf_append_at(&code->buf, off, data, data_sz); - - for (enum section s = section; s < ARRAY_SIZE(code->end); ++s) { - code->end[s] = (char*)code->end[s] + data_sz; - assert((char*)code->end[s] <= (char*)code->buf.mem.data + code->buf.mem.len); - } - - if (section == SECTION_DATA) { - for (enum fspec_declaration d = 0; d < ARRAY_SIZE(code->decl); ++d) { - code->decl[d] = (code->decl[d] ? (char*)code->decl[d] + data_sz : NULL); - assert((char*)code->decl[d] <= (char*)code->buf.mem.data + code->buf.mem.len); - } - } - - assert(code->end[SECTION_DATA] <= code->end[SECTION_CODE]); - assert((char*)code->end[SECTION_CODE] == (char*)code->buf.mem.data + code->buf.written); -} - -static void -codebuf_append_op(struct codebuf *code, const enum fspec_op op) -{ - codebuf_append(code, SECTION_CODE, &op, sizeof(op)); -} - -static uint8_t -arg_sizeof(const enum fspec_arg type) -{ - switch (type) { - case FSPEC_ARG_DAT: - case FSPEC_ARG_OFF: - case FSPEC_ARG_STR: - return sizeof(fspec_off); - - case FSPEC_ARG_NUM: - return sizeof(fspec_num); - - case FSPEC_ARG_VAR: - return sizeof(fspec_var); - - case FSPEC_ARG_EOF: - break; - - case FSPEC_ARG_LAST: - errx(EXIT_FAILURE, "%s: unexpected argument type %u", __func__, type); - } - - return 0; -} - -static void -codebuf_append_arg(struct codebuf *code, const enum fspec_arg type, const void *v) -{ - assert(code); - codebuf_append_op(code, FSPEC_OP_ARG); - codebuf_append(code, SECTION_CODE, &type, sizeof(type)); - codebuf_append(code, SECTION_CODE, v, arg_sizeof(type)); -} - -static void -codebuf_replace_arg(struct codebuf *code, const enum fspec_arg *arg, const enum fspec_arg type, const void *v) -{ - assert(code && arg); - assert(*arg == type); - const fspec_off off = ((char*)arg + 1) - (char*)code->buf.mem.data; - membuf_replace(&code->buf, off, v, arg_sizeof(type)); -} - -static bool -get_string_offset(const void *start, const void *end, const void *str, const fspec_strsz str_sz, void const **out_off) -{ - assert(out_off); - - while (start < end) { - fspec_strsz len; - memcpy(&len, start, sizeof(len)); - if (len == str_sz && !memcmp((char*)start + sizeof(len), str, len)) { - *out_off = start; - return true; - } - start = (char*)start + sizeof(len) + len + 1; - } - - return false; -} - -static void -codebuf_append_arg_cstr(struct codebuf *code, const void *str, const fspec_strsz str_sz) -{ - const void *ptr; - if (!get_string_offset(code->strings, code->end[SECTION_DATA], str, str_sz, &ptr)) { - ptr = code->end[SECTION_DATA]; - codebuf_append(code, SECTION_DATA, &str_sz, sizeof(str_sz)); - codebuf_append(code, SECTION_DATA, str, str_sz); - codebuf_append(code, SECTION_DATA, (char[]){ 0 }, 1); - } - - const fspec_off off = (char*)ptr - (char*)code->buf.mem.data; - codebuf_append_arg(code, FSPEC_ARG_STR, &off); -} - -static const enum fspec_op* -get_named_op(const enum fspec_op *start, const void *end, const void *data, const enum fspec_op op, const uint8_t nth, const void *name, const fspec_strsz name_sz, fspec_var *out_id) -{ - fspec_var id = 0; - if ((void*)start < end && *start == FSPEC_OP_DECLARATION) - id = fspec_arg_get_num(fspec_op_get_arg(start, end, 2, 1<decl[FSPEC_DECLARATION_STRUCT] : code->end[SECTION_DATA]); - return get_named_op(start, code->end[SECTION_CODE], code->buf.mem.data, FSPEC_OP_DECLARATION, 4, str->data, str->len, out_id); -} - -static bool -codebuf_append_arg_var(struct codebuf *code, const bool member, const struct fspec_mem *var) -{ - fspec_var id = -1; - if (!get_declaration(code, member, var, &id)) - return false; - - codebuf_append_arg(code, FSPEC_ARG_VAR, &id); - return true; -} - -static void -codebuf_append_declaration(struct codebuf *code, const enum fspec_declaration decl) -{ - code->decl[decl] = code->end[SECTION_CODE]; - codebuf_append_op(code, FSPEC_OP_DECLARATION); - codebuf_append_arg(code, FSPEC_ARG_NUM, (fspec_num[]){ decl }); - codebuf_append_arg(code, FSPEC_ARG_NUM, (fspec_num[]){ code->declarations++ }); - codebuf_append_arg(code, FSPEC_ARG_OFF, (fspec_off[]){ PLACEHOLDER }); -} - -enum stack_type { - STACK_STR, - STACK_NUM, -}; - -struct stack { - union { - struct fspec_mem str; - uint64_t num; - }; - enum stack_type type; -}; - -static const char* -stack_type_to_str(const enum stack_type type) -{ - switch (type) { - case STACK_STR: return "str"; - case STACK_NUM: return "num"; - }; - return "unknown"; -} - -static void -stack_check_type(const struct stack *stack, const enum stack_type type) -{ - assert(stack); - - if (stack->type != type) - errx(EXIT_FAILURE, "tried to get '%s' from stack, but the last pushed type was '%s'", stack_type_to_str(type), stack_type_to_str(stack->type)); -} - -static const struct fspec_mem* -stack_get_str(const struct stack *stack) -{ - stack_check_type(stack, STACK_STR); - return &stack->str; -} - -static uint64_t -stack_get_num(const struct stack *stack) -{ - stack_check_type(stack, STACK_NUM); - return stack->num; -} - -struct state { - struct ragel ragel; - struct stack stack; - struct codebuf out; - struct varbuf var; -}; - -static void -state_stack_num(struct state *state, const uint8_t base) -{ - assert(state); - membuf_terminate(&state->var.buf, (char[]){ 0 }, 1); - const char *str = (char*)state->var.buf.mem.data + state->var.offset; - state->stack.type = STACK_NUM; - state->stack.num = strtoll(str + (base == 16 && *str == 'x'), NULL, base); - varbuf_remove_last(&state->var); -} - -static void -state_append_arg_var(struct state *state, const bool member, const struct fspec_mem *str) -{ - assert(state && str); - - if (!codebuf_append_arg_var(&state->out, member, str)) - ragel_throw_error(&state->ragel, "'%s' undeclared", (char*)str->data); -} - -static void -state_append_declaration(struct state *state, const enum fspec_declaration decl, const struct fspec_mem *str) -{ - assert(state && str); - - if (get_declaration(&state->out, (decl == FSPEC_DECLARATION_MEMBER), str, NULL)) - ragel_throw_error(&state->ragel, "'%s' redeclared", (char*)str->data); - - codebuf_append_declaration(&state->out, decl); - codebuf_append_arg_cstr(&state->out, str->data, str->len); -} - -static void -state_finish_declaration(struct state *state, const enum fspec_declaration decl) -{ - assert(state && state->out.decl[decl]); - const char *end = state->out.end[SECTION_CODE]; - const fspec_off off = end - (char*)state->out.decl[decl]; - codebuf_replace_arg(&state->out, fspec_op_get_arg(state->out.decl[decl], end, 3, 1<out.decl[decl] = NULL; -} - -%%{ - machine fspec_lexer; - variable p state.ragel.p; - variable pe state.ragel.pe; - variable eof state.ragel.eof; - write data noerror nofinal; - - action arg_eof { - codebuf_append_arg(&state.out, FSPEC_ARG_EOF, NULL); - } - - action arg_num { - codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ stack_get_num(&state.stack) }); - } - - action arg_str { - const struct fspec_mem *str = stack_get_str(&state.stack); - codebuf_append_arg_cstr(&state.out, str->data, str->len); - } - - action arg_var { - state_append_arg_var(&state, true, stack_get_str(&state.stack)); - } - - action filter { - codebuf_append_op(&state.out, FSPEC_OP_FILTER); - } - - action goto { - codebuf_append_op(&state.out, FSPEC_OP_GOTO); - state_append_arg_var(&state, false, stack_get_str(&state.stack)); - } - - action vnul { - codebuf_append_op(&state.out, FSPEC_OP_VISUAL); - codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_NUL }); - } - - action vdec { - codebuf_append_op(&state.out, FSPEC_OP_VISUAL); - codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_DEC }); - } - - action vhex { - codebuf_append_op(&state.out, FSPEC_OP_VISUAL); - codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_HEX }); - } - - action vstr { - codebuf_append_op(&state.out, FSPEC_OP_VISUAL); - codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_STR }); - } - - action r8 { - codebuf_append_op(&state.out, FSPEC_OP_READ); - codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 8 }); - } - - action r16 { - codebuf_append_op(&state.out, FSPEC_OP_READ); - codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 16 }); - } - - action r32 { - codebuf_append_op(&state.out, FSPEC_OP_READ); - codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 32 }); - } - - action r64 { - codebuf_append_op(&state.out, FSPEC_OP_READ); - codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 64 }); - } - - action member_end { - state_finish_declaration(&state, FSPEC_DECLARATION_MEMBER); - } - - action member_start { - state_append_declaration(&state, FSPEC_DECLARATION_MEMBER, stack_get_str(&state.stack)); - } - - action struct_end { - state_finish_declaration(&state, FSPEC_DECLARATION_STRUCT); - } - - action struct_start { - state_append_declaration(&state, FSPEC_DECLARATION_STRUCT, stack_get_str(&state.stack)); - } - - action stack_oct { - state_stack_num(&state, 8); - } - - action stack_hex { - state_stack_num(&state, 16); - } - - action stack_dec { - state_stack_num(&state, 10); - } - - action stack_str { - membuf_terminate(&state.var.buf, (char[]){ 0 }, 1); - state.stack.type = STACK_STR; - state.stack.str = state.var.buf.mem; - state.stack.str.len = state.var.buf.written; - } - - action store_esc_num { - const fspec_num v = stack_get_num(&state.stack); - assert(v <= 255); - const uint8_t u8 = v; - membuf_append(&state.var.buf, &u8, sizeof(u8)); - } - - action store_esc { - const struct { const char e, v; } map[] = { - { .e = 'a', .v = '\a' }, - { .e = 'b', .v = '\b' }, - { .e = 'f', .v = '\f' }, - { .e = 'n', .v = '\n' }, - { .e = 'r', .v = '\r' }, - { .e = 't', .v = '\t' }, - { .e = 'v', .v = '\v' }, - { .e = '\\', .v = '\\' }, - { .e = '\'', .v = '\'' }, - { .e = '\"', .v = '"' }, - { .e = 'e', .v = 0x1B }, - }; - - for (size_t i = 0; i < ARRAY_SIZE(map); ++i) { - if (*state.ragel.p != map[i].e) - continue; - - membuf_append(&state.var.buf, &map[i].v, sizeof(map[i].v)); - break; - } - } - - action store { - membuf_append(&state.var.buf, state.ragel.p, 1); - } - - action begin_num { - varbuf_begin(&state.var); - } - - action begin_str { - varbuf_reset(&state.var); - } - - action type_err { - ragel_throw_error(&state.ragel, "unknown type name"); - } - - action visual_err { - ragel_throw_error(&state.ragel, "unknown visualization"); - } - - action syntax_err { - ragel_throw_error(&state.ragel, "malformed input (machine failed here or in next expression)"); - } - - action line { - ragel_advance_line(&state.ragel); - } - - # Semantic - quote = ['"]; - newline = '\n'; - esc = [abfnrtv\\'"e]; - esc_chr = '\\'; - esc_hex = 'x' <: xdigit{2}; - hex = '0x' <: xdigit{1,}; - oct = [0-7]{1,3}; - dec = [\-+]? <: (([1-9] <: digit*) | '0'); - valid = ^cntrl; - comment = '//' <: valid* :>> newline; - type = ('u8' | 's8') %r8 | ('u16' | 's16') %r16 | ('u32' | 's32') %r32 | ('u64' | 's64') %r64; - visual = 'nul' %vnul | 'dec' %vdec | 'hex' %vhex | 'str' %vstr; - reserved = 'struct' | type | visual; - name = ((alpha | '_') <: (alnum | '_')*) - reserved; - - # Stack - stack_name = name >begin_str $store %stack_str; - stack_hex = hex >begin_num $store %stack_hex; - stack_dec = dec >begin_num $store %stack_dec; - stack_oct = oct >begin_num $store %stack_oct; - stack_esc_hex = esc_hex >begin_num $store %stack_hex; - stack_esc = esc_chr <: ((stack_esc_hex | stack_oct) %store_esc_num | esc %~store_esc); - stack_str = quote <: ((stack_esc? <: print? $store) - zlen)* >begin_str %stack_str :>> quote; - stack_num = stack_dec | stack_hex; - - # Catchers - catch_struct = 'struct ' <: stack_name; - catch_type = (catch_struct %goto | type) $!type_err; - catch_args = stack_num %arg_num | stack_str %arg_str | stack_name %arg_var; - catch_array = '[' <: (catch_args | '$' %arg_eof) :>> ']'; - catch_filter = ' | ' %filter <: stack_name %arg_str :>> ('(' <: catch_args? <: (', ' <: catch_args)* :>> ')')?; - catch_visual = ' ' <: visual $!visual_err; - - # Abstract - member = stack_name %member_start :> ': ' <: (catch_type <: catch_array* catch_filter* catch_visual?) :>> ';' %member_end; - struct = catch_struct %struct_start :>> ' {' <: (space | comment | member)* :>> '};' %struct_end; - line = valid* :>> newline %line; - main := ((space | comment | struct)* & line*) $!syntax_err; -}%% - -bool -fspec_lexer_parse(struct fspec_lexer *lexer, const char *name) -{ - int cs; - %% write init; - - (void)fspec_lexer_en_main; - assert(lexer); - assert(lexer->ops.read); - assert(lexer->mem.input.data && lexer->mem.input.len); - assert(lexer->mem.output.data && lexer->mem.output.len); - assert(lexer->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range"); - assert(lexer->mem.output.len <= (fspec_off)~0 && "output storage size exceeds fspec_off range"); - - char var[256]; - struct state state = { - .ragel.name = name, - .ragel.lineno = 1, - .var.buf.mem = { .data = var, .len = sizeof(var) }, - .out.buf.mem = lexer->mem.output, - }; - - static const fspec_num version = 0; - state.out.end[SECTION_CODE] = state.out.end[SECTION_DATA] = state.out.buf.mem.data; - codebuf_append_op(&state.out, FSPEC_OP_HEADER); - codebuf_append_arg(&state.out, FSPEC_ARG_NUM, &version); - codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ PLACEHOLDER }); - codebuf_append_arg(&state.out, FSPEC_ARG_DAT, (fspec_off[]){ PLACEHOLDER }); - state.out.end[SECTION_DATA] = state.out.end[SECTION_CODE]; - state.out.strings = state.out.end[SECTION_DATA]; - - struct fspec_mem input = lexer->mem.input; - for (bool eof = false; !state.ragel.error && !eof;) { - const size_t bytes = lexer->ops.read(lexer, input.data, 1, input.len); - const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes }; - ragel_feed_input(&state.ragel, (eof = (bytes < input.len)), &rl); - %% write exec; - } - - { - const void *end = state.out.end[SECTION_CODE]; - codebuf_replace_arg(&state.out, fspec_op_get_arg(state.out.buf.mem.data, end, 2, 1<mem.output.len = state.out.buf.written; - return !state.ragel.error; -} diff --git a/src/fspec/private/bcode-types.h b/src/fspec/private/bcode-types.h new file mode 100644 index 0000000..8c9ce74 --- /dev/null +++ b/src/fspec/private/bcode-types.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +/** maximum size of string literals */ +#define PRI_FSPEC_STRSZ PRIu8 +typedef uint8_t fspec_strsz; + +/** maximum range of variable ids */ +#define PRI_FSPEC_VAR PRIu16 +typedef uint16_t fspec_var; + +/** maximum range of bytecode offsets */ +#define PRI_FSPEC_OFF PRIu32 +typedef uint32_t fspec_off; diff --git a/src/fspec/ragel/lexer-expr.h b/src/fspec/ragel/lexer-expr.h new file mode 100644 index 0000000..904736d --- /dev/null +++ b/src/fspec/ragel/lexer-expr.h @@ -0,0 +1,20 @@ +#pragma once + +#include + +#include + +struct fspec_expr; +struct fspec_expr { + struct { + size_t (*read)(struct fspec_expr *lexer, void *input, const size_t size, const size_t nmemb); + size_t (*write)(struct fspec_expr *lexer, const void *output, const size_t size, const size_t nmemb); + } ops; + + struct { + struct fspec_mem input; + } mem; +}; + +bool +fspec_expr_parse(struct fspec_expr *lexer, const char *name); diff --git a/src/fspec/ragel/lexer-expr.rl b/src/fspec/ragel/lexer-expr.rl new file mode 100644 index 0000000..2975043 --- /dev/null +++ b/src/fspec/ragel/lexer-expr.rl @@ -0,0 +1,122 @@ +#include "lexer-expr.h" +#include "lexer-stack.h" +#include "util/ragel/ragel.h" + +#include +#include +#include +#include + +static uint8_t +precedence(char op) +{ + switch (op) { + case '^': return 4; + case '*': return 3; + case '/': return 3; + case '+': return 2; + case '-': return 2; + } + errx(EXIT_FAILURE, "unknown operator %c for precedence", op); + return 0; +} + +static size_t +pop(char cur, char *mstack, size_t open) +{ + static char cvar = 'a'; + + // 1 + 2 + 4 + 3 * 2 / 2 * 2 * 2 - 2 * 2 + 5; + while (open >= 3) { + const char last_op = mstack[open - 2]; + const uint8_t last_prio = precedence(last_op); + const uint8_t new_prio = precedence(cur); + + if (last_prio <= new_prio) + break; + + printf("%c = ", cvar); + for (size_t i = open - 3; i < open; ++i) + printf("%c ", mstack[i]); + puts(";"); + open -= 3; + + mstack[open++] = cvar; + ++cvar; + } + + return open; +} + +%%{ + machine fspec_expr; + include fspec_stack "lexer-stack.rl"; + variable p ragel.p; + variable pe ragel.pe; + variable eof ragel.eof; + write data noerror nofinal; + + action op { + open = pop(fc, mstack, open); + mstack[open++] = fc; + } + + logical_operators = '&&' | '||' | '==' | '<' | '>' | '<=' | '>='; + calc_operators = '-' | '+' | '/' | '*' | '%'; + bitwise_operators = '&' | '|' | '^' | '<<' | '>>'; + + main := |* + '+' => op; + '/' => op; + '*' => op; + '-' => op; + '^' => op; + stack_num => { mstack[open++] = fc;}; + '(' => { }; + ')' => { }; + ' '; + ';' => { + printf("v = "); + for (size_t i = 0; i < open; ++i) + printf("%c ", mstack[i]); + puts(";"); + }; + *|; +}%% + + +bool +fspec_expr_parse(struct fspec_expr *expr, const char *name) +{ + int cs, act; + const char *ts, *te; + (void)ts; + + size_t open = 0; + char mstack[25]; + + %% write init; + + (void)fspec_expr_en_main; + assert(expr); + assert(expr->ops.read); + assert(expr->ops.write); + assert(expr->mem.input.data && expr->mem.input.len); + assert(expr->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range"); + + char var[256]; + struct stack stack = { .var.buf.mem = { .data = var, .len = sizeof(var) } }; + struct ragel ragel = { .name = name, .lineno = 1 }; + + // static const fspec_num version = 0; + + struct fspec_mem input = expr->mem.input; + for (bool eof = false; !ragel.error && !eof;) { + const size_t bytes = expr->ops.read(expr, input.data, 1, input.len); + const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes }; + ragel_feed_input(&ragel, (eof = (bytes < input.len)), &rl); + %% write exec; + } + + return !ragel.error; +} diff --git a/src/fspec/ragel/lexer-stack.h b/src/fspec/ragel/lexer-stack.h new file mode 100644 index 0000000..eebf055 --- /dev/null +++ b/src/fspec/ragel/lexer-stack.h @@ -0,0 +1,42 @@ +#pragma once + +#include "util/membuf.h" + +#include + +struct varbuf { + struct membuf buf; + size_t offset; +}; + +void +varbuf_begin(struct varbuf *var); + +void +varbuf_reset(struct varbuf *var); + +void +varbuf_remove_last(struct varbuf *var); + +struct stack { + struct varbuf var; + + union { + struct fspec_mem str; + uint64_t num; + }; + + enum stack_type { + STACK_STR, + STACK_NUM, + } type; +}; + +void +stack_num(struct stack *stack, const uint8_t base); + +const struct fspec_mem* +stack_get_str(const struct stack *stack); + +uint64_t +stack_get_num(const struct stack *stack); diff --git a/src/fspec/ragel/lexer-stack.rl b/src/fspec/ragel/lexer-stack.rl new file mode 100644 index 0000000..940f820 --- /dev/null +++ b/src/fspec/ragel/lexer-stack.rl @@ -0,0 +1,153 @@ +#include "lexer-stack.h" + +#include +#include +#include + +void +varbuf_begin(struct varbuf *var) +{ + assert(var); + var->offset = var->buf.written; + assert(var->offset <= var->buf.mem.len); +} + +void +varbuf_reset(struct varbuf *var) +{ + assert(var); + var->offset = var->buf.written = 0; +} + +void +varbuf_remove_last(struct varbuf *var) +{ + assert(var); + assert(var->buf.written >= var->offset); + const size_t size = var->buf.written - var->offset; + assert(var->buf.written >= size); + var->buf.written -= size; + assert(var->buf.written <= var->buf.mem.len); +} + +static void +stack_check_type(const struct stack *stack, const enum stack_type type) +{ + assert(stack); + + if (stack->type == type) + return; + + const char *got = (type == STACK_STR ? "str" : "num"), *expected = (stack->type == STACK_STR ? "str" : "num"); + errx(EXIT_FAILURE, "tried to get '%s' from stack, but the last pushed type was '%s'", got, expected); +} + +void +stack_num(struct stack *stack, const uint8_t base) +{ + assert(stack); + membuf_terminate(&stack->var.buf, (char[]){ 0 }, 1); + const char *str = (char*)stack->var.buf.mem.data + stack->var.offset; + stack->type = STACK_NUM; + stack->num = strtoll(str, NULL, base); + varbuf_remove_last(&stack->var); +} + +const struct fspec_mem* +stack_get_str(const struct stack *stack) +{ + stack_check_type(stack, STACK_STR); + return &stack->str; +} + +uint64_t +stack_get_num(const struct stack *stack) +{ + stack_check_type(stack, STACK_NUM); + return stack->num; +} + +%%{ + machine fspec_stack; + + action stack_oct { + stack_num(&stack, 8); + } + + action stack_hex { + stack_num(&stack, 16); + } + + action stack_dec { + stack_num(&stack, 10); + } + + action stack_str { + membuf_terminate(&stack.var.buf, (char[]){ 0 }, 1); + stack.type = STACK_STR; + stack.str = stack.var.buf.mem; + stack.str.len = stack.var.buf.written; + } + + action store_esc_num { + const fspec_num v = stack_get_num(&stack); + assert(v <= 255); + membuf_append(&stack.var.buf, (uint8_t[]){ v }, sizeof(uint8_t)); + } + + action store_esc { + const struct { const char e, v; } map[] = { + { .e = 'a', .v = '\a' }, + { .e = 'b', .v = '\b' }, + { .e = 'f', .v = '\f' }, + { .e = 'n', .v = '\n' }, + { .e = 'r', .v = '\r' }, + { .e = 't', .v = '\t' }, + { .e = 'v', .v = '\v' }, + { .e = '\\', .v = '\\' }, + { .e = '\'', .v = '\'' }, + { .e = '\"', .v = '"' }, + { .e = 'e', .v = 0x1B }, + }; + + for (size_t i = 0; i < sizeof(map) / sizeof(map[0]); ++i) { + if (fc != map[i].e) + continue; + + membuf_append(&stack.var.buf, &map[i].v, sizeof(map[i].v)); + break; + } + } + + action store { + membuf_append(&stack.var.buf, fpc, 1); + } + + action begin_num { + varbuf_begin(&stack.var); + } + + action begin_str { + varbuf_reset(&stack.var); + } + + # Semantic + quote = ['"]; + esc = [abfnrtv\\'"e]; + esc_chr = '\\'; + esc_hex = 'x' <: xdigit{2}; + hex = '0x' <: xdigit{1,}; + oct = [0-7]{1,3}; + dec = [\-+]? <: (([1-9] <: digit*) | '0'); + name = ((alpha | '_') <: (alnum | '_')*); + + # Stack + stack_name = name >begin_str $store %stack_str; + stack_hex = hex >begin_num $store %stack_hex; + stack_dec = dec >begin_num $store %stack_dec; + stack_oct = oct >begin_num $store %stack_oct; + stack_esc_hex = esc_hex >begin_num <>*store %stack_hex; + stack_esc = esc_chr <: ((stack_esc_hex | stack_oct) %store_esc_num | esc %~store_esc); + stack_str = quote <: ((stack_esc? <: print? $store) - zlen)* >begin_str %stack_str :>> quote; + stack_num = stack_dec | stack_hex; +}%% diff --git a/src/fspec/ragel/lexer.rl b/src/fspec/ragel/lexer.rl new file mode 100644 index 0000000..b4a21dc --- /dev/null +++ b/src/fspec/ragel/lexer.rl @@ -0,0 +1,180 @@ +#include +#include +#include "lexer-stack.h" +#include "util/ragel/ragel.h" +#include "fspec/private/bcode-types.h" + +#include + +%%{ + machine fspec_lexer; + include fspec_stack "lexer-stack.rl"; + variable p ragel.p; + variable pe ragel.pe; + variable eof ragel.eof; + write data noerror nofinal; + + action arg_eof { + // codebuf_append_arg(&state.out, FSPEC_ARG_EOF, NULL); + } + + action arg_num { + // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ stack_get_num(&state.stack) }); + } + + action arg_str { + // const struct fspec_mem *str = stack_get_str(&state.stack); + // codebuf_append_arg_cstr(&state.out, str->data, str->len); + } + + action arg_var { + // state_append_arg_var(&state, true, stack_get_str(&state.stack)); + } + + action filter { + // codebuf_append_op(&state.out, FSPEC_OP_FILTER); + } + + action goto { + // codebuf_append_op(&state.out, FSPEC_OP_GOTO); + // state_append_arg_var(&state, false, stack_get_str(&state.stack)); + } + + action vnul { + // codebuf_append_op(&state.out, FSPEC_OP_VISUAL); + // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_NUL }); + } + + action vdec { + // codebuf_append_op(&state.out, FSPEC_OP_VISUAL); + // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_DEC }); + } + + action vhex { + // codebuf_append_op(&state.out, FSPEC_OP_VISUAL); + // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_HEX }); + } + + action vstr { + // codebuf_append_op(&state.out, FSPEC_OP_VISUAL); + // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_STR }); + } + + action r8 { + // codebuf_append_op(&state.out, FSPEC_OP_READ); + // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 8 }); + } + + action r16 { + // codebuf_append_op(&state.out, FSPEC_OP_READ); + // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 16 }); + } + + action r32 { + // codebuf_append_op(&state.out, FSPEC_OP_READ); + // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 32 }); + } + + action r64 { + // codebuf_append_op(&state.out, FSPEC_OP_READ); + // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 64 }); + } + + action enum_member_end { + } + + action enum_member_start { + } + + action enum_end { + } + + action enum_start { + } + + action struct_member_end { + // state_finish_declaration(&state, FSPEC_DECLARATION_MEMBER); + } + + action struct_member_start { + // state_append_declaration(&state, FSPEC_DECLARATION_MEMBER, stack_get_str(&state.stack)); + } + + action struct_end { + // state_finish_declaration(&state, FSPEC_DECLARATION_STRUCT); + } + + action struct_start { + // state_append_declaration(&state, FSPEC_DECLARATION_STRUCT, stack_get_str(&state.stack)); + } + + action type_err { + ragel_throw_error(&ragel, "unknown type name"); + } + + action visual_err { + ragel_throw_error(&ragel, "unknown visualization"); + } + + action syntax_err { + ragel_throw_error(&ragel, "malformed input (machine failed here or in next expression)"); + } + + action line { + ragel_advance_line(&ragel); + } + + # Semantic + newline = '\n'; + valid = ^cntrl; + comment = '//' <: valid* :>> newline; + type = ('u8' | 's8') %r8 | ('u16' | 's16') %r16 | ('u32' | 's32') %r32 | ('u64' | 's32') %r64; + visual = 'nul' %vnul | 'dec' %vdec | 'hex' %vhex | 'str' %vstr; + + # Catchers + catch_const_expr = stack_num %arg_num; + catch_struct = 'struct ' <: stack_name; + catch_enum = 'enum ' <: stack_name; + catch_type = (catch_struct %goto | type) $!type_err; + catch_args = stack_num %arg_num | stack_str %arg_str | stack_name %arg_var; + catch_array = '[' <: (catch_args | '$' %arg_eof) :>> ']'; + catch_filter = ' | ' %filter <: stack_name %arg_str :>> ('(' <: catch_args? <: (', ' <: catch_args)* :>> ')')?; + catch_visual = ' ' <: visual $!visual_err; + + # Abstract + struct_member = stack_name %struct_member_start :>> ': ' <: (catch_type <: catch_array* catch_filter* catch_visual?) :>> ';' %struct_member_end; + struct = catch_struct %struct_start :>> ' {' <: (space | comment | struct_member)* :>> '};' %struct_end; + enum_member = stack_name %enum_member_start :>> (': ' <: catch_const_expr)? :>> ';' %enum_member_end; + enum = catch_enum %enum_start :>> ' {' <: (space | comment | enum_member)* :>> '};' %enum_end; + line = valid* :>> newline %line; + main := ((space | comment | enum | struct)* & line*) $!syntax_err; +}%% + +bool +fspec_lexer_parse(struct fspec_lexer *lexer, const char *name) +{ + int cs; + %% write init; + + (void)fspec_lexer_en_main; + assert(lexer); + assert(lexer->ops.read); + assert(lexer->mem.input.data && lexer->mem.input.len); + assert(lexer->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range"); + + char var[256]; + struct stack stack = { .var.buf.mem = { .data = var, .len = sizeof(var) } }; + struct ragel ragel = { .name = name, .lineno = 1 }; + + // static const fspec_num version = 0; + + struct fspec_mem input = lexer->mem.input; + for (bool eof = false; !ragel.error && !eof;) { + const size_t bytes = lexer->ops.read(lexer, input.data, 1, input.len); + const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes }; + ragel_feed_input(&ragel, (eof = (bytes < input.len)), &rl); + %% write exec; + } + + return !ragel.error; +} diff --git a/src/fspec/ragel/validator.rl b/src/fspec/ragel/validator.rl new file mode 100644 index 0000000..90ead21 --- /dev/null +++ b/src/fspec/ragel/validator.rl @@ -0,0 +1,96 @@ +#include +#include +#include "util/ragel/ragel.h" +#include "fspec/private/bcode-types.h" + +#include + +struct stack { + union { + fspec_num num; + fspec_off off; + fspec_var var; + fspec_strsz strsz; + unsigned char b[sizeof(fspec_num)]; + } u; + uint8_t i; // writing index for u.b +}; + +struct state { + struct ragel ragel; + struct stack stack; +}; + +%%{ + machine fspec_validator; + variable p state.ragel.p; + variable pe state.ragel.pe; + variable eof state.ragel.eof; + write data noerror nofinal; + +# BLT_HEADER = 0; +# BLT_ADD = 1; +# BLT_SUB = 2; +# BLT_MUL = 3; +# BLT_DIV = 4; +# BLT_MOD = 5; +# BLT_BIT_AND = 6; +# BLT_BIT_OR = 7; +# BLT_BIT_XOR = 8; +# BLT_BIT_LEFT = 9; +# BLT_BIT_RIGHT = 10; +# BLT_DECLARE = 11; +# BLT_READ = 12; +# BLT_GOTO = 13; +# BLT_FILTER = 14; +# BLT_VISUAL = 15; +# +# builtins = BLT_HEADER | +# BLT_ADD | BLT_SUB | BLT_MUL | BLT_DIV | BLT_MOD | +# BLT_BIT_AND | BLT_BIT_OR | BLT_BIT_XOR | BLT_BIT_LEFT | BLT_BIT_RIGHT +# BLT_DECLARE | BLT_READ | BLT_GOTO | BLT_FILTER | BLT_VISUAL; +# +# OP_ARG = 0; +# OP_REF = 1; +# OP_BLT = 2 OP_ARG builtins; +# OP_FUN = 3; +# +# arg_ops = OP_REF | OP_FUN | OP_BUILTIN OP_FUN +# +# BLT_DECLARE = OP_BUILTIN 10 OP_ARG 2 OP_REF OP_REF; +# BLT_READ = OP_BUILTIN 11 OP_ARG 1..255 OP_REF (arg_ops)*; +# +# pattern = ((BLT_READ | BLT_GOTO) BLT_FILTER* BLT_VISUAL?)* $!pattern_error; +# main := (BLT_HEADER <: BLT_DECLARE* <: pattern) %check_decls $advance $!syntax_error; + main := any*; +}%% + +bool +fspec_validator_parse(struct fspec_validator *validator, const char *name) +{ + int cs; + %% write init; + + (void)fspec_validator_en_main; + assert(validator); + assert(validator->ops.read); + assert(validator->mem.input.data && validator->mem.input.len); + assert(validator->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range"); + + struct state state = { + .ragel.name = name, + .ragel.lineno = 1, + }; + + static_assert(sizeof(state.stack.u) == sizeof(state.stack.u.b), "bytes doesn't represent the largest member in union"); + + struct fspec_mem input = validator->mem.input; + for (bool eof = false; !state.ragel.error && !eof;) { + const size_t bytes = validator->ops.read(validator, input.data, 1, input.len); + const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes, .binary = true }; + ragel_feed_input(&state.ragel, (eof = (bytes < input.len)), &rl); + %% write exec; + } + + return !state.ragel.error; +} diff --git a/src/fspec/validator.h b/src/fspec/validator.h index c4705b2..a20e98a 100644 --- a/src/fspec/validator.h +++ b/src/fspec/validator.h @@ -2,6 +2,8 @@ #include +#include + struct fspec_validator; struct fspec_validator { struct { diff --git a/src/fspec/validator.rl b/src/fspec/validator.rl deleted file mode 100644 index b00a827..0000000 --- a/src/fspec/validator.rl +++ /dev/null @@ -1,236 +0,0 @@ -#include "ragel/ragel.h" -#include -#include -#include "bcode-internal.h" - -#include - -struct stack { - union { - fspec_num num; - fspec_off off; - fspec_var var; - fspec_strsz strsz; - unsigned char b[sizeof(fspec_num)]; - } u; - uint8_t i; // writing index for u.b -}; - -struct range { - fspec_off start, end; -}; - -struct context { - struct range data; - fspec_var declarations, expected_declarations; - fspec_off str_end, decl_start, decl_end[FSPEC_DECLARATION_LAST], offset; - enum fspec_declaration last_decl_type; -}; - -struct state { - struct ragel ragel; - struct context context; - struct stack stack; -}; - -%%{ - machine fspec_validator; - variable p state.ragel.p; - variable pe state.ragel.pe; - variable eof state.ragel.eof; - write data noerror nofinal; - - action store_decls { - if (state.stack.u.num > (fspec_var)~0) - ragel_throw_error(&state.ragel, "expected declarations overflows"); - - state.context.expected_declarations = state.stack.u.num; - } - - action check_decls { - if (state.context.declarations != state.context.expected_declarations) - ragel_throw_error(&state.ragel, "expected declarations did not match with the content: expected: %" PRI_FSPEC_VAR " got: %" PRI_FSPEC_VAR, state.context.expected_declarations, state.context.declarations); - } - - action mark_dat { - // we can replace this logic with fspec generated code in future - // struct str { len: u32; str: u8[len]['\0']; } - // struct dat { len: u32; strings: struct str[$::len]; } - if (state.context.offset > (fspec_off)~0 - state.stack.u.off) - ragel_throw_error(&state.ragel, "dat section length overflows"); - - state.context.data = (struct range){ .start = state.context.offset, .end = state.stack.u.off }; - } - - action test_inside_dat { - state.context.offset < (state.context.data.start + state.context.data.end) - } - - action mark_str { - if (state.context.offset >= (fspec_off)~0 - state.stack.u.strsz) // >= for null byte - ragel_throw_error(&state.ragel, "str length overflows"); - - state.context.str_end = state.context.offset + state.stack.u.strsz; - } - - action test_inside_str { - state.context.offset < state.context.str_end - } - - action check_var { - if (state.context.declarations <= state.stack.u.var) - ragel_throw_error(&state.ragel, "refenced undeclared variable"); - } - - action check_str { - if (state.stack.u.off < state.context.data.start) { - ragel_throw_error(&state.ragel, "str before data section range: %" PRI_FSPEC_OFF " <= %" PRI_FSPEC_OFF, state.stack.u.off, state.context.data.start + state.context.data.end); - } else if (state.context.data.start + state.context.data.end <= state.stack.u.off) { - ragel_throw_error(&state.ragel, "str after data section range: %" PRI_FSPEC_OFF " <= %" PRI_FSPEC_OFF, state.context.data.start + state.context.data.end, state.stack.u.off); - } - } - - action check_decl_type { - if (state.stack.u.num >= FSPEC_DECLARATION_LAST) - ragel_throw_error(&state.ragel, "invalid declaration type: %" PRI_FSPEC_NUM, state.stack.u.num); - - state.context.last_decl_type = state.stack.u.num; - } - - action check_decl_num { - if (state.context.declarations >= (fspec_var)~0) - ragel_throw_error(&state.ragel, "declarations overflows"); - - if (state.context.declarations != state.stack.u.num) - ragel_throw_error(&state.ragel, "invalid declaration number: %" PRI_FSPEC_NUM " expected: %" PRI_FSPEC_VAR, state.stack.u.num, state.context.declarations); - - ++state.context.declarations; - } - - action start_decl { - state.context.decl_start = state.context.offset; - } - - action mark_decl { - const fspec_off sz = (state.context.offset - state.context.decl_start); - assert(sz <= state.stack.u.off); - - if (state.context.offset > (fspec_off)~0 - state.stack.u.off - sz) - ragel_throw_error(&state.ragel, "declaration length overflows"); - - state.context.decl_end[state.context.last_decl_type] = state.context.offset + state.stack.u.off - sz; - } - - action check_struct { - if (state.context.last_decl_type != FSPEC_DECLARATION_STRUCT) - ragel_throw_error(&state.ragel, "expected struct declaration"); - } - - action check_member { - if (state.context.last_decl_type != FSPEC_DECLARATION_MEMBER) - ragel_throw_error(&state.ragel, "expected member declaration"); - } - - action check_member_end { - if (state.context.decl_end[FSPEC_DECLARATION_MEMBER] != state.context.offset) - ragel_throw_error(&state.ragel, "invalid member end: %" PRI_FSPEC_OFF " expected: %" PRI_FSPEC_OFF, state.context.decl_end[FSPEC_DECLARATION_MEMBER], state.context.offset); - } - - action check_struct_end { - if (state.context.decl_end[FSPEC_DECLARATION_STRUCT] != state.context.offset) - ragel_throw_error(&state.ragel, "invalid struct end: %" PRI_FSPEC_OFF " expected: %" PRI_FSPEC_OFF, state.context.decl_end[FSPEC_DECLARATION_STRUCT], state.context.offset); - } - - action check_visual_type { - if (state.stack.u.num >= FSPEC_VISUAL_LAST) - ragel_throw_error(&state.ragel, "invalid visual type: %" PRI_FSPEC_NUM, state.stack.u.num); - } - - action arg_error { - ragel_throw_error(&state.ragel, "malformed argument"); - } - - action op_error { - ragel_throw_error(&state.ragel, "unexpected argument"); - } - - action pattern_error { - ragel_throw_error(&state.ragel, "unexpected pattern"); - } - - action syntax_error { - ragel_throw_error(&state.ragel, "unexpected byte"); - } - - action store { - if (state.stack.i < sizeof(state.stack.u.b)) - state.stack.u.b[state.stack.i++] = fc; - } - - action flush { - state.stack.i = 0; - } - - action advance { - ++state.context.offset; - } - - stack1 = any{1} >flush $store; - stack2 = any{2} >flush $store; - stack4 = any{4} >flush $store; - stack8 = any{8} >flush $store; - - ARG_DAT = 0 stack4 %*mark_dat ((stack1 %*mark_str (any when test_inside_str)* 0) when test_inside_dat)*; - ARG_OFF = 1 stack4; - ARG_NUM = 2 stack8; - ARG_VAR = 3 stack2 %check_var; - ARG_STR = 4 stack4 %check_str; - ARG_EOF = 5; - - OP_ARG_DAT = 0 ARG_DAT $!arg_error; - OP_ARG_OFF = 0 ARG_OFF $!arg_error; - OP_ARG_NUM = 0 ARG_NUM $!arg_error; - OP_ARG_VAR = 0 ARG_VAR $!arg_error; - OP_ARG_STR = 0 ARG_STR $!arg_error; - OP_ARG_EOF = 0 ARG_EOF $!arg_error; - - OP_HEADER = 1 (OP_ARG_NUM OP_ARG_NUM %store_decls OP_ARG_DAT) $!op_error; - OP_DECLARATION = 2 >start_decl (OP_ARG_NUM %check_decl_type OP_ARG_NUM %check_decl_num OP_ARG_OFF %mark_decl OP_ARG_STR) $!op_error; - OP_READ = 3 (OP_ARG_NUM (OP_ARG_NUM | OP_ARG_VAR | OP_ARG_STR | OP_ARG_EOF)*) $!op_error; - OP_GOTO = 4 (OP_ARG_VAR (OP_ARG_NUM | OP_ARG_VAR | OP_ARG_STR | OP_ARG_EOF)*) $!op_error; - OP_FILTER = 5 (OP_ARG_STR (OP_ARG_NUM | OP_ARG_VAR | OP_ARG_STR)*) $!op_error; - OP_VISUAL = 6 (OP_ARG_NUM %check_visual_type) $!op_error; - - pattern = (OP_DECLARATION %check_struct <: (OP_DECLARATION %check_member (OP_READ | OP_GOTO) OP_FILTER* OP_VISUAL? %check_member_end)*)* %check_struct_end $!pattern_error; - main := (OP_HEADER <: pattern) %check_decls $advance $!syntax_error; -}%% - -bool -fspec_validator_parse(struct fspec_validator *validator, const char *name) -{ - int cs; - %% write init; - - (void)fspec_validator_en_main; - assert(validator); - assert(validator->ops.read); - assert(validator->mem.input.data && validator->mem.input.len); - assert(validator->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range"); - - struct state state = { - .ragel.name = name, - .ragel.lineno = 1, - }; - - static_assert(sizeof(state.stack.u) == sizeof(state.stack.u.b), "bytes doesn't represent the largest member in union"); - - struct fspec_mem input = validator->mem.input; - for (bool eof = false; !state.ragel.error && !eof;) { - const size_t bytes = validator->ops.read(validator, input.data, 1, input.len); - const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes, .binary = true }; - ragel_feed_input(&state.ragel, (eof = (bytes < input.len)), &rl); - %% write exec; - } - - return !state.ragel.error; -} diff --git a/src/ragel/ragel.h b/src/ragel/ragel.h deleted file mode 100644 index b2c7572..0000000 --- a/src/ragel/ragel.h +++ /dev/null @@ -1,30 +0,0 @@ -#pragma once - -#include -#include - -struct ragel_mem { - const char *data, *end; - bool binary; // binary input bit -}; - -struct ragel { - struct ragel_mem input; // block of input data - uint64_t lineno; // current line - const char *p, *pe, *eof; // see ragel doc - const char *cl; // current line start - const char *name; // may be current file name for example - bool error; // error thrown bit -}; - -__attribute__((format(printf, 2, 3))) void -ragel_throw_error(struct ragel *ragel, const char *fmt, ...); - -void -ragel_set_name(struct ragel *ragel, const char *name); - -void -ragel_advance_line(struct ragel *ragel); - -void -ragel_feed_input(struct ragel *ragel, const bool eof, const struct ragel_mem *input); diff --git a/src/ragel/ragel.rl b/src/ragel/ragel.rl deleted file mode 100644 index 7e51030..0000000 --- a/src/ragel/ragel.rl +++ /dev/null @@ -1,91 +0,0 @@ -#include "ragel.h" -#include -#include -#include -#include - -%%{ - machine ragel; - write data noerror nofinal; - - action red { fputs("\x1b[31m", stderr); } - action reset { fputs("\x1b[0m", stderr); } - action end { fputs("\x1b[0m\n", stderr); } - action mark { fputc((fpc == *error ? '^' : '~'), stderr); } - action lead { fputc(' ', stderr); } - action char { fputc(fc, stderr); } - - valid = ^cntrl - space - punct; - mark_token = (space valid | punct) ${ *error = fpc; }; - search_err := ((any | mark_token) when { fpc != ragel->pe && fpc <= ragel->p })*; - - word = print | valid*; - until_err = (any when { fpc != *error })*; - print_err := (until_err <: (word - '\n') >red %reset <: (print - '\n')*) $char >*lead %!end %/end; - print_mark := (until_err $lead <: (any | word) >red $mark) >*lead %!end %/end; -}%% - -static void -ragel_exec_error(const struct ragel *ragel, const int start_cs, const char **error) -{ - (void)ragel_start; - assert(ragel && ragel->cl && error); - int cs = start_cs; - const char *p = ragel->cl, *pe = ragel->pe, *eof = ragel->eof; - assert(p <= pe); - %% write exec; -} - -void -ragel_throw_error(struct ragel *ragel, const char *fmt, ...) -{ - assert(ragel && fmt); - ragel->error = true; - - const char *error = ragel->p; - - if (!ragel->input.binary) - ragel_exec_error(ragel, ragel_en_search_err, &error); - - const char *name = (ragel->name ? ragel->name : ""); - assert(error >= ragel->cl); - uint64_t column = (error - ragel->cl); - fprintf(stderr, "\x1b[37m%s:%" PRIu64 ":%" PRIu64 " \x1b[31merror: \x1b[0m", name, ragel->lineno, column); - - va_list args; - va_start(args, fmt); - vfprintf(stderr, fmt, args); - va_end(args); - fputc('\n', stderr); - - if (!ragel->input.binary) { - ragel_exec_error(ragel, ragel_en_print_err, &error); - ragel_exec_error(ragel, ragel_en_print_mark, &error); - } -} - -void -ragel_set_name(struct ragel *ragel, const char *name) -{ - assert(ragel); - ragel->name = name; -} - -void -ragel_advance_line(struct ragel *ragel) -{ - assert(ragel); - ++ragel->lineno; - ragel->cl = ragel->p; -} - -void -ragel_feed_input(struct ragel *ragel, const bool eof, const struct ragel_mem *input) -{ - assert(ragel); - ragel->input = *input; - ragel->cl = ragel->p = ragel->input.data; - ragel->pe = ragel->input.end; - ragel->eof = (eof ? ragel->pe : NULL); - assert(ragel->p <= ragel->pe); -} diff --git a/src/util/membuf.c b/src/util/membuf.c new file mode 100644 index 0000000..0602679 --- /dev/null +++ b/src/util/membuf.c @@ -0,0 +1,31 @@ +#include "membuf.h" + +#include +#include +#include +#include + +static void +membuf_bounds_check(const struct membuf *buf, const size_t nmemb) +{ + assert(buf); + + if (buf->mem.len < nmemb || buf->written > buf->mem.len - nmemb) + errx(EXIT_FAILURE, "%s: %zu bytes exceeds the maximum storage size of %zu bytes", __func__, buf->written + nmemb, buf->mem.len); +} + +void +membuf_terminate(struct membuf *buf, const void *data, const size_t data_sz) +{ + assert(data || !data_sz); + membuf_bounds_check(buf, data_sz); + memcpy((char*)buf->mem.data + buf->written, data, data_sz); +} + +void +membuf_append(struct membuf *buf, const void *data, const size_t data_sz) +{ + membuf_terminate(buf, data, data_sz); + buf->written += data_sz; + assert(buf->written <= buf->mem.len); +} diff --git a/src/util/membuf.h b/src/util/membuf.h new file mode 100644 index 0000000..86d8dde --- /dev/null +++ b/src/util/membuf.h @@ -0,0 +1,14 @@ +#pragma once + +#include + +struct membuf { + struct fspec_mem mem; + size_t written; +}; + +void +membuf_terminate(struct membuf *buf, const void *data, const size_t data_sz); + +void +membuf_append(struct membuf *buf, const void *data, const size_t data_sz); diff --git a/src/util/ragel/ragel.h b/src/util/ragel/ragel.h new file mode 100644 index 0000000..b2c7572 --- /dev/null +++ b/src/util/ragel/ragel.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include + +struct ragel_mem { + const char *data, *end; + bool binary; // binary input bit +}; + +struct ragel { + struct ragel_mem input; // block of input data + uint64_t lineno; // current line + const char *p, *pe, *eof; // see ragel doc + const char *cl; // current line start + const char *name; // may be current file name for example + bool error; // error thrown bit +}; + +__attribute__((format(printf, 2, 3))) void +ragel_throw_error(struct ragel *ragel, const char *fmt, ...); + +void +ragel_set_name(struct ragel *ragel, const char *name); + +void +ragel_advance_line(struct ragel *ragel); + +void +ragel_feed_input(struct ragel *ragel, const bool eof, const struct ragel_mem *input); diff --git a/src/util/ragel/ragel.rl b/src/util/ragel/ragel.rl new file mode 100644 index 0000000..c52f27b --- /dev/null +++ b/src/util/ragel/ragel.rl @@ -0,0 +1,92 @@ +#include "ragel.h" + +#include +#include +#include +#include + +%%{ + machine ragel; + write data noerror nofinal; + + action red { fputs("\x1b[31m", stderr); } + action reset { fputs("\x1b[0m", stderr); } + action end { fputs("\x1b[0m\n", stderr); } + action mark { fputc((fpc == *error ? '^' : '~'), stderr); } + action lead { fputc(' ', stderr); } + action char { fputc(fc, stderr); } + + valid = ^cntrl - space - punct; + mark_token = (space valid | punct) ${ *error = fpc; }; + search_err := ((any | mark_token) when { fpc != ragel->pe && fpc <= ragel->p })*; + + word = print | valid*; + until_err = (any when { fpc != *error })*; + print_err := (until_err <: (word - '\n') >red %reset <: (print - '\n')*) $char >*lead %!end %/end; + print_mark := (until_err $lead <: (any | word) >red $mark) >*lead %!end %/end; +}%% + +static void +ragel_exec_error(const struct ragel *ragel, const int start_cs, const char **error) +{ + (void)ragel_start; + assert(ragel && ragel->cl && error); + int cs = start_cs; + const char *p = ragel->cl, *pe = ragel->pe, *eof = ragel->eof; + assert(p <= pe); + %% write exec; +} + +void +ragel_throw_error(struct ragel *ragel, const char *fmt, ...) +{ + assert(ragel && fmt); + ragel->error = true; + + const char *error = ragel->p; + + if (!ragel->input.binary) + ragel_exec_error(ragel, ragel_en_search_err, &error); + + const char *name = (ragel->name ? ragel->name : ""); + assert(error >= ragel->cl); + uint64_t column = (error - ragel->cl); + fprintf(stderr, "\x1b[37m%s:%" PRIu64 ":%" PRIu64 " \x1b[31merror: \x1b[0m", name, ragel->lineno, column); + + va_list args; + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + fputc('\n', stderr); + + if (!ragel->input.binary) { + ragel_exec_error(ragel, ragel_en_print_err, &error); + ragel_exec_error(ragel, ragel_en_print_mark, &error); + } +} + +void +ragel_set_name(struct ragel *ragel, const char *name) +{ + assert(ragel); + ragel->name = name; +} + +void +ragel_advance_line(struct ragel *ragel) +{ + assert(ragel); + ++ragel->lineno; + ragel->cl = ragel->p; +} + +void +ragel_feed_input(struct ragel *ragel, const bool eof, const struct ragel_mem *input) +{ + assert(ragel); + ragel->input = *input; + ragel->cl = ragel->p = ragel->input.data; + ragel->pe = ragel->input.end; + ragel->eof = (eof ? ragel->pe : NULL); + assert(ragel->p <= ragel->pe); +} -- cgit v1.2.3