diff options
| -rw-r--r-- | Makefile | 15 | ||||
| -rw-r--r-- | spec/elf.fspec | 8 | ||||
| -rw-r--r-- | src/bin/fspec/dump.c (renamed from src/dump.c) | 33 | ||||
| -rw-r--r-- | src/fspec/bcode.c | 14 | ||||
| -rw-r--r-- | src/fspec/bcode.h | 110 | ||||
| -rw-r--r-- | src/fspec/lexer.h | 12 | ||||
| -rw-r--r-- | src/fspec/lexer.rl | 616 | ||||
| -rw-r--r-- | src/fspec/private/bcode-types.h (renamed from src/fspec/bcode-internal.h) | 0 | ||||
| -rw-r--r-- | src/fspec/ragel/lexer-expr.h | 20 | ||||
| -rw-r--r-- | src/fspec/ragel/lexer-expr.rl | 122 | ||||
| -rw-r--r-- | src/fspec/ragel/lexer-stack.h | 42 | ||||
| -rw-r--r-- | src/fspec/ragel/lexer-stack.rl | 153 | ||||
| -rw-r--r-- | src/fspec/ragel/lexer.rl | 180 | ||||
| -rw-r--r-- | src/fspec/ragel/validator.rl | 96 | ||||
| -rw-r--r-- | src/fspec/validator.h | 2 | ||||
| -rw-r--r-- | src/fspec/validator.rl | 236 | ||||
| -rw-r--r-- | src/util/membuf.c | 31 | ||||
| -rw-r--r-- | src/util/membuf.h | 14 | ||||
| -rw-r--r-- | src/util/ragel/ragel.h (renamed from src/ragel/ragel.h) | 0 | ||||
| -rw-r--r-- | src/util/ragel/ragel.rl (renamed from src/ragel/ragel.rl) | 1 | ||||
| -rw-r--r-- | vim/filespec.vim | 2 | 
21 files changed, 796 insertions, 911 deletions
@@ -23,14 +23,17 @@ all: $(bins)  $(bins): %:  	$(LINK.c) $(filter %.c %.a,$^) $(LDLIBS) -o $@ -fspec-ragel.a: src/ragel/ragel.h src/ragel/ragel.c -fspec-bcode.a: src/fspec/memory.h src/fspec/bcode.h src/fspec/bcode.c -fspec-lexer.a: src/ragel/ragel.h src/fspec/lexer.h src/fspec/lexer.c -fspec-validator.a: src/ragel/ragel.h src/fspec/validator.h src/fspec/validator.c +fspec-membuf.a: src/util/membuf.h src/util/membuf.c +fspec-ragel.a: src/util/ragel/ragel.h src/util/ragel/ragel.c +fspec-lexer-stack.a: src/fspec/ragel/lexer-stack.h src/fspec/ragel/lexer-stack.c +fspec-lexer-expr.a: src/fspec/ragel/lexer-expr.h src/fspec/ragel/lexer-expr.c +fspec-bcode.a: src/fspec/memory.h src/fspec/private/bcode-types.h src/fspec/bcode.h src/fspec/bcode.c fspec-ragel.a +fspec-lexer.a: src/fspec/lexer.h src/fspec/ragel/lexer.c fspec-lexer-stack.a fspec-lexer-expr.a fspec-bcode.a +fspec-validator.a: src/fspec/validator.h src/fspec/ragel/validator.c fspec-ragel.a  fspec-dump: private CPPFLAGS += $(shell pkg-config --cflags-only-I squash-0.8)  fspec-dump: private LDLIBS += $(shell pkg-config --libs-only-l squash-0.8) -fspec-dump: src/dump.c fspec-ragel.a fspec-bcode.a fspec-lexer.a fspec-validator.a +fspec-dump: src/bin/fspec/dump.c fspec-ragel.a fspec-membuf.a fspec-bcode.a fspec-lexer-stack.a fspec-lexer-expr.a fspec-lexer.a fspec-validator.a  dec2bin: src/bin/misc/dec2bin.c @@ -48,7 +51,7 @@ install-bin: $(bins)  install: install-bin  clean: -	$(RM) src/ragel/ragel.c src/fspec/lexer.c src/fspec/validator.c +	$(RM) src/util/ragel/*.c src/fspec/ragel/*.c  	$(RM) $(bins) *.a  .PHONY: all clean install diff --git a/spec/elf.fspec b/spec/elf.fspec index e9f3f3b..2ad5f78 100644 --- a/spec/elf.fspec +++ b/spec/elf.fspec @@ -1,3 +1,11 @@ +enum foo { +   foo: 0x1; +   bar: 0x2; +   eaf: 0x3; +   eaf: 0xDEADBEEF; +   bar; +}; +  struct elf64 {     e_entry: u64 hex;     e_phoff: u64; diff --git a/src/dump.c b/src/bin/fspec/dump.c index 8af7119..07a6757 100644 --- a/src/dump.c +++ b/src/bin/fspec/dump.c @@ -14,9 +14,12 @@  #include <fspec/bcode.h>  #include <fspec/lexer.h>  #include <fspec/validator.h> +#include "util/membuf.h"  #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) +#if 0 +  static size_t  to_hex(const uint8_t *buf, const size_t buf_sz, char *out, const size_t out_sz, const bool reverse)  { @@ -737,6 +740,7 @@ execute(const struct fspec_mem *mem)     free(context.decl);  } +#endif  static FILE*  fopen_or_die(const char *path, const char *mode) @@ -754,17 +758,28 @@ fopen_or_die(const char *path, const char *mode)  struct lexer {     struct fspec_lexer lexer; +   struct membuf output;     FILE *file;  };  static size_t -fspec_lexer_read(struct fspec_lexer *lexer, void *ptr, const size_t size, const size_t nmemb) +fspec_lexer_write(struct fspec_lexer *lexer, const enum fspec_lexer_section section, const void *output, const size_t size, const size_t nmemb) +{ +   assert(lexer && output); +   // struct lexer *l = container_of(lexer, struct lexer, lexer); +   (void)lexer, (void)section, (void)size, (void)nmemb; +   return nmemb; +} + +static size_t +fspec_lexer_read(struct fspec_lexer *lexer, void *input, const size_t size, const size_t nmemb)  { -   assert(lexer && ptr); +   assert(lexer && input);     struct lexer *l = container_of(lexer, struct lexer, lexer); -   return fread(ptr, size, nmemb, l->file); +   return fread(input, size, nmemb, l->file);  } +#if 0  static size_t  fspec_validator_read(struct fspec_validator *validator, void *ptr, const size_t size, const size_t nmemb)  { @@ -776,6 +791,7 @@ fspec_validator_read(struct fspec_validator *validator, void *ptr, const size_t     assert(validator->mem.input.len == 0);     return read;  } +#endif  int  main(int argc, const char *argv[]) @@ -784,26 +800,28 @@ main(int argc, const char *argv[])        errx(EXIT_FAILURE, "usage: %s file.spec < data", argv[0]);     char output[4096]; -   struct fspec_mem bcode = {0};     {        char input[4096];        struct lexer l = {           .lexer = {              .ops.read = fspec_lexer_read, -            .mem.input = { .data = input, sizeof(input) }, -            .mem.output = { .data = output, sizeof(output) }, +            .ops.write = fspec_lexer_write, +            .mem.input = { .data = input, .len = sizeof(input) },           },           .file = fopen_or_die(argv[1], "rb"), +         .output.mem = { .data = output, .len = sizeof(output) },        };        if (!fspec_lexer_parse(&l.lexer, argv[1]))           exit(EXIT_FAILURE);        fclose(l.file); -      bcode = l.lexer.mem.output; +      // bcode = l.lexer.mem.output;     } +#if 0 +     {        struct fspec_validator validator = {           .ops.read = fspec_validator_read, @@ -815,5 +833,6 @@ main(int argc, const char *argv[])     }     execute(&bcode); +#endif     return EXIT_SUCCESS;  } diff --git a/src/fspec/bcode.c b/src/fspec/bcode.c index 0d0d3fd..70e4b08 100644 --- a/src/fspec/bcode.c +++ b/src/fspec/bcode.c @@ -1,5 +1,6 @@  #include <fspec/bcode.h> -#include "bcode-internal.h" +#include <fspec/memory.h> +#include "private/bcode-types.h"  #include <stdlib.h>  #include <string.h> @@ -8,7 +9,15 @@  static_assert(sizeof(fspec_off) <= sizeof(((struct fspec_mem*)0)->len), "fspec_off should not be larger than what fspec_mem can represent");  static_assert(sizeof(enum fspec_op) == sizeof(uint8_t), "enum fspec_op is expected to have size of uint8_t"); -static_assert(sizeof(enum fspec_arg) == sizeof(uint8_t), "enum fspec_arg is expected to have size of uint8_t"); +static_assert(sizeof(struct fspec_bcode) == sizeof(enum fspec_op), "struct fspec_bcode is expected to have size of enum fspec_op"); +static_assert(sizeof(FSPEC_OP_LAST) <= 8, "op codes need more than 3 bits to be represented"); + +#if 0 +uint8_t +fspec_op_get_num_args(const struct fspec_op_code *code) +{ +   return code->op >> 2; +}  static fspec_off  arg_data_len(const enum fspec_arg *arg) @@ -187,3 +196,4 @@ fspec_op_next(const enum fspec_op *start, const void *end, const bool skip_args)     return NULL;  } +#endif diff --git a/src/fspec/bcode.h b/src/fspec/bcode.h index d84060e..3d216af 100644 --- a/src/fspec/bcode.h +++ b/src/fspec/bcode.h @@ -1,7 +1,5 @@  #pragma once -#include <fspec/memory.h> -  #include <inttypes.h>  #include <stdint.h>  #include <stdbool.h> @@ -10,34 +8,6 @@  #define PRI_FSPEC_NUM PRIu64  typedef uint64_t fspec_num; -enum fspec_arg { -   FSPEC_ARG_DAT, -   FSPEC_ARG_OFF, -   FSPEC_ARG_NUM, -   FSPEC_ARG_VAR, -   FSPEC_ARG_STR, -   FSPEC_ARG_EOF, -   FSPEC_ARG_LAST, -} __attribute__((packed)); - -void -fspec_arg_get_mem(const enum fspec_arg *arg, const void *data, struct fspec_mem *out_mem); - -fspec_num -fspec_arg_get_num(const enum fspec_arg *arg); - -const char* -fspec_arg_get_cstr(const enum fspec_arg *arg, const void *data); - -const enum fspec_arg* -fspec_arg_next(const enum fspec_arg *arg, const void *end, const uint8_t nth, const uint32_t expect); - -enum fspec_declaration { -   FSPEC_DECLARATION_STRUCT, -   FSPEC_DECLARATION_MEMBER, -   FSPEC_DECLARATION_LAST, -} __attribute__((packed)); -  enum fspec_visual {     FSPEC_VISUAL_NUL,     FSPEC_VISUAL_DEC, @@ -46,19 +16,77 @@ enum fspec_visual {     FSPEC_VISUAL_LAST,  } __attribute__((packed)); +enum fspec_type { +   FSPEC_TYPE_CODE, +   FSPEC_TYPE_CALL, +   FSPEC_TYPE_U8, +   FSPEC_TYPE_S8, +   FSPEC_TYPE_U16, +   FSPEC_TYPE_S16, +   FSPEC_TYPE_U32, +   FSPEC_TYPE_S32, +   FSPEC_TYPE_U64, +   FSPEC_TYPE_S64, +   FSPEC_TYPE_LAST, +} __attribute__((packed)); + +enum fspec_storage { +   FSPEC_STORAGE_DATA, +   FSPEC_STORAGE_LOCAL, +   FSPEC_STORAGE_LAST, +} __attribute__((packed)); + +enum fspec_builtin { +   FSPEC_BUILTIN_ADD, +   FSPEC_BUILTIN_SUB, +   FSPEC_BUILTIN_MUL, +   FSPEC_BUILTIN_DIV, +   FSPEC_BUILTIN_MOD, +   FSPEC_BUILTIN_BIT_AND, +   FSPEC_BUILTIN_BIT_OR, +   FSPEC_BUILTIN_BIT_XOR, +   FSPEC_BUILTIN_BIT_LEFT, +   FSPEC_BUILTIN_BIT_RIGHT, +   FSPEC_BUILTIN_DECLARE, +   FSPEC_BUILTIN_READ, +   FSPEC_BUILTIN_FILTER, +   FSPEC_BUILTIN_VISUAL, +   FSPEC_BUILTIN_LAST, +} __attribute__((packed)); +  enum fspec_op { -   FSPEC_OP_ARG, -   FSPEC_OP_HEADER, -   FSPEC_OP_DECLARATION, -   FSPEC_OP_READ, -   FSPEC_OP_GOTO, -   FSPEC_OP_FILTER, -   FSPEC_OP_VISUAL, +   FSPEC_OP_BUILTIN, +   FSPEC_OP_PUSH, +   FSPEC_OP_POP, +   FSPEC_OP_VAR,     FSPEC_OP_LAST,  } __attribute__((packed)); -const enum fspec_op* -fspec_op_next(const enum fspec_op *op, const void *end, const bool skip_args); +struct fspec_bcode { +   char op, data[]; +} __attribute__((packed)); + +#if 0 +('fspc')(version) +OP_BUILTIN (declare) OP_PUSH OP_VAR8 (storage) OP_VAR8 (type) OP_VAR [name] OP_POP +OP_BUILTIN (filter) +OP_FUN FUN_ASSIGN VAR0 VAR [data] +OP_FUN FUN_READ +#endif -const enum fspec_arg* -fspec_op_get_arg(const enum fspec_op *op, const void *end, const uint8_t nth, const uint32_t expect); +#if 0 +uint8_t +fspec_op_get_num_args(const struct fspec_bcode *code); + +const struct fspec_bcode* +fspec_op_next(const struct fspec_bcode *code, const void *end, const bool skip_args); + +const struct fspec_bcode* +fspec_op_get_arg(const struct fspec_bcode *code, const void *end, const uint8_t nth, const uint32_t expect); + +const struct fspec_arg* +fspec_arg_next(const struct fspec_bcode *code, const void *end, const uint8_t nth, const uint32_t expect); + +fspec_num +fspec_ref_get_num(const struct fspec_bcode *code); +#endif diff --git a/src/fspec/lexer.h b/src/fspec/lexer.h index 7b60e6b..ef6e059 100644 --- a/src/fspec/lexer.h +++ b/src/fspec/lexer.h @@ -2,14 +2,22 @@  #include <fspec/memory.h> +#include <stdbool.h> + +enum fspec_lexer_section { +   FSPEC_SECTION_DATA, +   FSPEC_SECTION_CODE, +}; +  struct fspec_lexer;  struct fspec_lexer {     struct { -      size_t (*read)(struct fspec_lexer *lexer, void *ptr, const size_t size, const size_t nmemb); +      size_t (*read)(struct fspec_lexer *lexer, void *input, const size_t size, const size_t nmemb); +      size_t (*write)(struct fspec_lexer *lexer, const enum fspec_lexer_section section, const void *output, const size_t size, const size_t nmemb);     } ops;     struct { -      struct fspec_mem input, output; +      struct fspec_mem input;     } mem;  }; diff --git a/src/fspec/lexer.rl b/src/fspec/lexer.rl deleted file mode 100644 index 51d1a54..0000000 --- a/src/fspec/lexer.rl +++ /dev/null @@ -1,616 +0,0 @@ -#include "ragel/ragel.h" -#include <fspec/bcode.h> -#include <fspec/lexer.h> -#include "bcode-internal.h" - -#include <stdlib.h> -#include <string.h> -#include <assert.h> -#include <err.h> - -#define PLACEHOLDER 0xDEADBEEF -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) - -typedef uint8_t fspec_strsz; - -struct membuf { -   struct fspec_mem mem; -   fspec_off written; -}; - -static void -membuf_bounds_check(const struct membuf *buf, const fspec_off nmemb) -{ -   assert(buf); - -   if (buf->mem.len < nmemb || buf->written > buf->mem.len - nmemb) -      errx(EXIT_FAILURE, "%s: %" PRI_FSPEC_OFF " bytes exceeds the maximum storage size of %zu bytes", __func__, buf->written + nmemb, buf->mem.len); -} - -static void -membuf_terminate(struct membuf *buf, const void *data, const fspec_off data_sz) -{ -   membuf_bounds_check(buf, data_sz); -   memcpy((char*)buf->mem.data + buf->written, data, data_sz); -} - -static void -membuf_replace(struct membuf *buf, const fspec_off off, const void *data, const fspec_off data_sz) -{ -   assert(buf->mem.len >= data_sz && off <= buf->mem.len - data_sz); -   memcpy((char*)buf->mem.data + off, data, data_sz); -} - -static void -membuf_append_at(struct membuf *buf, const fspec_off off, const void *data, const fspec_off data_sz) -{ -   assert(off <= buf->written); -   membuf_bounds_check(buf, data_sz); -   const size_t rest = buf->written - off; -   memmove((char*)buf->mem.data + off + data_sz, (char*)buf->mem.data + off, rest); -   membuf_replace(buf, off, data, data_sz); -   buf->written += data_sz; -   assert(buf->written <= buf->mem.len); -} - -static void -membuf_append(struct membuf *buf, const void *data, const fspec_off data_sz) -{ -   membuf_append_at(buf, buf->written, data, data_sz); -} - -struct varbuf { -   struct membuf buf; -   fspec_off offset; -}; - -static inline void -varbuf_begin(struct varbuf *var) -{ -   assert(var); -   var->offset = var->buf.written; -   assert(var->offset <= var->buf.mem.len); -} - -static void -varbuf_reset(struct varbuf *var) -{ -   assert(var); -   var->offset = var->buf.written = 0; -} - -static inline void -varbuf_remove_last(struct varbuf *var) -{ -   assert(var); -   assert(var->buf.written >= var->offset); -   const fspec_off size = var->buf.written - var->offset; -   assert(var->buf.written >= size); -   var->buf.written -= size; -   assert(var->buf.written <= var->buf.mem.len); -} - -enum section { -   SECTION_DATA, -   SECTION_CODE, -   SECTION_LAST, -}; - -struct codebuf { -   struct membuf buf; -   const void *decl[FSPEC_DECLARATION_LAST], *end[SECTION_LAST], *strings; -   fspec_var declarations; -}; - -static void -codebuf_append(struct codebuf *code, const enum section section, const void *data, const fspec_off data_sz) -{ -   assert(code->end[section]); -   const fspec_off off = (char*)code->end[section] - (char*)code->buf.mem.data; -   membuf_append_at(&code->buf, off, data, data_sz); - -   for (enum section s = section; s < ARRAY_SIZE(code->end); ++s) { -      code->end[s] = (char*)code->end[s] + data_sz; -      assert((char*)code->end[s] <= (char*)code->buf.mem.data + code->buf.mem.len); -   } - -   if (section == SECTION_DATA) { -      for (enum fspec_declaration d = 0; d < ARRAY_SIZE(code->decl); ++d) { -         code->decl[d] = (code->decl[d] ? (char*)code->decl[d] + data_sz : NULL); -         assert((char*)code->decl[d] <= (char*)code->buf.mem.data + code->buf.mem.len); -      } -   } - -   assert(code->end[SECTION_DATA] <= code->end[SECTION_CODE]); -   assert((char*)code->end[SECTION_CODE] == (char*)code->buf.mem.data + code->buf.written); -} - -static void -codebuf_append_op(struct codebuf *code, const enum fspec_op op) -{ -   codebuf_append(code, SECTION_CODE, &op, sizeof(op)); -} - -static uint8_t -arg_sizeof(const enum fspec_arg type) -{ -   switch (type) { -      case FSPEC_ARG_DAT: -      case FSPEC_ARG_OFF: -      case FSPEC_ARG_STR: -         return sizeof(fspec_off); - -      case FSPEC_ARG_NUM: -         return sizeof(fspec_num); - -      case FSPEC_ARG_VAR: -         return sizeof(fspec_var); - -      case FSPEC_ARG_EOF: -         break; - -      case FSPEC_ARG_LAST: -         errx(EXIT_FAILURE, "%s: unexpected argument type %u", __func__, type); -   } - -   return 0; -} - -static void -codebuf_append_arg(struct codebuf *code, const enum fspec_arg type, const void *v) -{ -   assert(code); -   codebuf_append_op(code, FSPEC_OP_ARG); -   codebuf_append(code, SECTION_CODE, &type, sizeof(type)); -   codebuf_append(code, SECTION_CODE, v, arg_sizeof(type)); -} - -static void -codebuf_replace_arg(struct codebuf *code, const enum fspec_arg *arg, const enum fspec_arg type, const void *v) -{ -   assert(code && arg); -   assert(*arg == type); -   const fspec_off off = ((char*)arg + 1) - (char*)code->buf.mem.data; -   membuf_replace(&code->buf, off, v, arg_sizeof(type)); -} - -static bool -get_string_offset(const void *start, const void *end, const void *str, const fspec_strsz str_sz, void const **out_off) -{ -   assert(out_off); - -   while (start < end) { -      fspec_strsz len; -      memcpy(&len, start, sizeof(len)); -      if (len == str_sz && !memcmp((char*)start + sizeof(len), str, len)) { -         *out_off = start; -         return true; -      } -      start = (char*)start + sizeof(len) + len + 1; -   } - -   return false; -} - -static void -codebuf_append_arg_cstr(struct codebuf *code, const void *str, const fspec_strsz str_sz) -{ -   const void *ptr; -   if (!get_string_offset(code->strings, code->end[SECTION_DATA], str, str_sz, &ptr)) { -      ptr = code->end[SECTION_DATA]; -      codebuf_append(code, SECTION_DATA, &str_sz, sizeof(str_sz)); -      codebuf_append(code, SECTION_DATA, str, str_sz); -      codebuf_append(code, SECTION_DATA, (char[]){ 0 }, 1); -   } - -   const fspec_off off = (char*)ptr - (char*)code->buf.mem.data; -   codebuf_append_arg(code, FSPEC_ARG_STR, &off); -} - -static const enum fspec_op* -get_named_op(const enum fspec_op *start, const void *end, const void *data, const enum fspec_op op, const uint8_t nth, const void *name, const fspec_strsz name_sz, fspec_var *out_id) -{ -   fspec_var id = 0; -   if ((void*)start < end && *start == FSPEC_OP_DECLARATION) -      id = fspec_arg_get_num(fspec_op_get_arg(start, end, 2, 1<<FSPEC_ARG_NUM)); - -   for (const enum fspec_op *p = start; p; p = fspec_op_next(p, end, true)) { -      const enum fspec_arg *arg; -      if (*p != op || !(arg = fspec_op_get_arg(p, end, nth, 1<<FSPEC_ARG_STR))) -         continue; - -      struct fspec_mem str; -      fspec_arg_get_mem(arg, data, &str); -      if (str.len == name_sz && !memcmp(name, str.data, name_sz)) { -         if (out_id) -            *out_id = id; - -         return p; -      } - -      ++id; -   } - -   return NULL; -} - -static const enum fspec_op* -get_declaration(struct codebuf *code, const bool member, const struct fspec_mem *str, fspec_var *out_id) -{ -   const void *start = (member ? code->decl[FSPEC_DECLARATION_STRUCT] : code->end[SECTION_DATA]); -   return get_named_op(start, code->end[SECTION_CODE], code->buf.mem.data, FSPEC_OP_DECLARATION, 4, str->data, str->len, out_id); -} - -static bool -codebuf_append_arg_var(struct codebuf *code, const bool member, const struct fspec_mem *var) -{ -   fspec_var id = -1; -   if (!get_declaration(code, member, var, &id)) -      return false; - -   codebuf_append_arg(code, FSPEC_ARG_VAR, &id); -   return true; -} - -static void -codebuf_append_declaration(struct codebuf *code, const enum fspec_declaration decl) -{ -   code->decl[decl] = code->end[SECTION_CODE]; -   codebuf_append_op(code, FSPEC_OP_DECLARATION); -   codebuf_append_arg(code, FSPEC_ARG_NUM, (fspec_num[]){ decl }); -   codebuf_append_arg(code, FSPEC_ARG_NUM, (fspec_num[]){ code->declarations++ }); -   codebuf_append_arg(code, FSPEC_ARG_OFF, (fspec_off[]){ PLACEHOLDER }); -} - -enum stack_type { -   STACK_STR, -   STACK_NUM, -}; - -struct stack { -   union { -      struct fspec_mem str; -      uint64_t num; -   }; -   enum stack_type type; -}; - -static const char* -stack_type_to_str(const enum stack_type type) -{ -   switch (type) { -      case STACK_STR: return "str"; -      case STACK_NUM: return "num"; -   }; -   return "unknown"; -} - -static void -stack_check_type(const struct stack *stack, const enum stack_type type) -{ -   assert(stack); - -   if (stack->type != type) -      errx(EXIT_FAILURE, "tried to get '%s' from stack, but the last pushed type was '%s'", stack_type_to_str(type), stack_type_to_str(stack->type)); -} - -static const struct fspec_mem* -stack_get_str(const struct stack *stack) -{ -   stack_check_type(stack, STACK_STR); -   return &stack->str; -} - -static uint64_t -stack_get_num(const struct stack *stack) -{ -   stack_check_type(stack, STACK_NUM); -   return stack->num; -} - -struct state { -   struct ragel ragel; -   struct stack stack; -   struct codebuf out; -   struct varbuf var; -}; - -static void -state_stack_num(struct state *state, const uint8_t base) -{ -   assert(state); -   membuf_terminate(&state->var.buf, (char[]){ 0 }, 1); -   const char *str = (char*)state->var.buf.mem.data + state->var.offset; -   state->stack.type = STACK_NUM; -   state->stack.num = strtoll(str + (base == 16 && *str == 'x'), NULL, base); -   varbuf_remove_last(&state->var); -} - -static void -state_append_arg_var(struct state *state, const bool member, const struct fspec_mem *str) -{ -   assert(state && str); - -   if (!codebuf_append_arg_var(&state->out, member, str)) -      ragel_throw_error(&state->ragel, "'%s' undeclared", (char*)str->data); -} - -static void -state_append_declaration(struct state *state, const enum fspec_declaration decl, const struct fspec_mem *str) -{ -   assert(state && str); - -   if (get_declaration(&state->out, (decl == FSPEC_DECLARATION_MEMBER), str, NULL)) -      ragel_throw_error(&state->ragel, "'%s' redeclared", (char*)str->data); - -   codebuf_append_declaration(&state->out, decl); -   codebuf_append_arg_cstr(&state->out, str->data, str->len); -} - -static void -state_finish_declaration(struct state *state, const enum fspec_declaration decl) -{ -   assert(state && state->out.decl[decl]); -   const char *end = state->out.end[SECTION_CODE]; -   const fspec_off off = end - (char*)state->out.decl[decl]; -   codebuf_replace_arg(&state->out, fspec_op_get_arg(state->out.decl[decl], end, 3, 1<<FSPEC_ARG_OFF), FSPEC_ARG_OFF, &off); -   state->out.decl[decl] = NULL; -} - -%%{ -   machine fspec_lexer; -   variable p state.ragel.p; -   variable pe state.ragel.pe; -   variable eof state.ragel.eof; -   write data noerror nofinal; - -   action arg_eof { -      codebuf_append_arg(&state.out, FSPEC_ARG_EOF, NULL); -   } - -   action arg_num { -      codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ stack_get_num(&state.stack) }); -   } - -   action arg_str { -      const struct fspec_mem *str = stack_get_str(&state.stack); -      codebuf_append_arg_cstr(&state.out, str->data, str->len); -   } - -   action arg_var { -      state_append_arg_var(&state, true, stack_get_str(&state.stack)); -   } - -   action filter { -      codebuf_append_op(&state.out, FSPEC_OP_FILTER); -   } - -   action goto { -      codebuf_append_op(&state.out, FSPEC_OP_GOTO); -      state_append_arg_var(&state, false, stack_get_str(&state.stack)); -   } - -   action vnul { -      codebuf_append_op(&state.out, FSPEC_OP_VISUAL); -      codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_NUL }); -   } - -   action vdec { -      codebuf_append_op(&state.out, FSPEC_OP_VISUAL); -      codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_DEC }); -   } - -   action vhex { -      codebuf_append_op(&state.out, FSPEC_OP_VISUAL); -      codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_HEX }); -   } - -   action vstr { -      codebuf_append_op(&state.out, FSPEC_OP_VISUAL); -      codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_STR }); -   } - -   action r8 { -      codebuf_append_op(&state.out, FSPEC_OP_READ); -      codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 8 }); -   } - -   action r16 { -      codebuf_append_op(&state.out, FSPEC_OP_READ); -      codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 16 }); -   } - -   action r32 { -      codebuf_append_op(&state.out, FSPEC_OP_READ); -      codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 32 }); -   } - -   action r64 { -      codebuf_append_op(&state.out, FSPEC_OP_READ); -      codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 64 }); -   } - -   action member_end { -      state_finish_declaration(&state, FSPEC_DECLARATION_MEMBER); -   } - -   action member_start { -      state_append_declaration(&state, FSPEC_DECLARATION_MEMBER, stack_get_str(&state.stack)); -   } - -   action struct_end { -      state_finish_declaration(&state, FSPEC_DECLARATION_STRUCT); -   } - -   action struct_start { -      state_append_declaration(&state, FSPEC_DECLARATION_STRUCT, stack_get_str(&state.stack)); -   } - -   action stack_oct { -      state_stack_num(&state, 8); -   } - -   action stack_hex { -      state_stack_num(&state, 16); -   } - -   action stack_dec { -      state_stack_num(&state, 10); -   } - -   action stack_str { -      membuf_terminate(&state.var.buf, (char[]){ 0 }, 1); -      state.stack.type = STACK_STR; -      state.stack.str = state.var.buf.mem; -      state.stack.str.len = state.var.buf.written; -   } - -   action store_esc_num { -      const fspec_num v = stack_get_num(&state.stack); -      assert(v <= 255); -      const uint8_t u8 = v; -      membuf_append(&state.var.buf, &u8, sizeof(u8)); -   } - -   action store_esc { -      const struct { const char e, v; } map[] = { -         { .e = 'a', .v = '\a' }, -         { .e = 'b', .v = '\b' }, -         { .e = 'f', .v = '\f' }, -         { .e = 'n', .v = '\n' }, -         { .e = 'r', .v = '\r' }, -         { .e = 't', .v = '\t' }, -         { .e = 'v', .v = '\v' }, -         { .e = '\\', .v = '\\' }, -         { .e = '\'', .v = '\'' }, -         { .e = '\"', .v = '"' }, -         { .e = 'e', .v = 0x1B }, -      }; - -      for (size_t i = 0; i < ARRAY_SIZE(map); ++i) { -         if (*state.ragel.p != map[i].e) -            continue; - -         membuf_append(&state.var.buf, &map[i].v, sizeof(map[i].v)); -         break; -      } -   } - -   action store { -      membuf_append(&state.var.buf, state.ragel.p, 1); -   } - -   action begin_num { -      varbuf_begin(&state.var); -   } - -   action begin_str { -      varbuf_reset(&state.var); -   } - -   action type_err { -      ragel_throw_error(&state.ragel, "unknown type name"); -   } - -   action visual_err { -      ragel_throw_error(&state.ragel, "unknown visualization"); -   } - -   action syntax_err { -      ragel_throw_error(&state.ragel, "malformed input (machine failed here or in next expression)"); -   } - -   action line { -      ragel_advance_line(&state.ragel); -   } - -   # Semantic -   quote = ['"]; -   newline = '\n'; -   esc = [abfnrtv\\'"e]; -   esc_chr = '\\'; -   esc_hex = 'x' <: xdigit{2}; -   hex = '0x' <: xdigit{1,}; -   oct = [0-7]{1,3}; -   dec = [\-+]? <: (([1-9] <: digit*) | '0'); -   valid = ^cntrl; -   comment = '//' <: valid* :>> newline; -   type = ('u8' | 's8') %r8 | ('u16' | 's16') %r16 | ('u32' | 's32') %r32 | ('u64' | 's64') %r64; -   visual = 'nul' %vnul | 'dec' %vdec | 'hex' %vhex | 'str' %vstr; -   reserved = 'struct' | type | visual; -   name = ((alpha | '_') <: (alnum | '_')*) - reserved; - -   # Stack -   stack_name = name >begin_str $store %stack_str; -   stack_hex = hex >begin_num $store %stack_hex; -   stack_dec = dec >begin_num $store %stack_dec; -   stack_oct = oct >begin_num $store %stack_oct; -   stack_esc_hex = esc_hex >begin_num $store %stack_hex; -   stack_esc = esc_chr <: ((stack_esc_hex | stack_oct) %store_esc_num | esc %~store_esc); -   stack_str = quote <: ((stack_esc? <: print? $store) - zlen)* >begin_str %stack_str :>> quote; -   stack_num = stack_dec | stack_hex; - -   # Catchers -   catch_struct = 'struct ' <: stack_name; -   catch_type = (catch_struct %goto | type) $!type_err; -   catch_args = stack_num %arg_num | stack_str %arg_str | stack_name %arg_var; -   catch_array = '[' <: (catch_args | '$' %arg_eof) :>> ']'; -   catch_filter = ' | ' %filter <: stack_name %arg_str :>> ('(' <: catch_args? <: (', ' <: catch_args)* :>> ')')?; -   catch_visual = ' ' <: visual $!visual_err; - -   # Abstract -   member = stack_name %member_start :> ': ' <: (catch_type <: catch_array* catch_filter* catch_visual?) :>> ';' %member_end; -   struct = catch_struct %struct_start :>> ' {' <: (space | comment | member)* :>> '};' %struct_end; -   line = valid* :>> newline %line; -   main := ((space | comment | struct)* & line*) $!syntax_err; -}%% - -bool -fspec_lexer_parse(struct fspec_lexer *lexer, const char *name) -{ -   int cs; -   %% write init; - -   (void)fspec_lexer_en_main; -   assert(lexer); -   assert(lexer->ops.read); -   assert(lexer->mem.input.data && lexer->mem.input.len); -   assert(lexer->mem.output.data && lexer->mem.output.len); -   assert(lexer->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range"); -   assert(lexer->mem.output.len <= (fspec_off)~0 && "output storage size exceeds fspec_off range"); - -   char var[256]; -   struct state state = { -      .ragel.name = name, -      .ragel.lineno = 1, -      .var.buf.mem = { .data = var, .len = sizeof(var) }, -      .out.buf.mem = lexer->mem.output, -   }; - -   static const fspec_num version = 0; -   state.out.end[SECTION_CODE] = state.out.end[SECTION_DATA] = state.out.buf.mem.data; -   codebuf_append_op(&state.out, FSPEC_OP_HEADER); -   codebuf_append_arg(&state.out, FSPEC_ARG_NUM, &version); -   codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ PLACEHOLDER }); -   codebuf_append_arg(&state.out, FSPEC_ARG_DAT, (fspec_off[]){ PLACEHOLDER }); -   state.out.end[SECTION_DATA] = state.out.end[SECTION_CODE]; -   state.out.strings = state.out.end[SECTION_DATA]; - -   struct fspec_mem input = lexer->mem.input; -   for (bool eof = false; !state.ragel.error && !eof;) { -      const size_t bytes = lexer->ops.read(lexer, input.data, 1, input.len); -      const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes }; -      ragel_feed_input(&state.ragel, (eof = (bytes < input.len)), &rl); -      %% write exec; -   } - -   { -      const void *end = state.out.end[SECTION_CODE]; -      codebuf_replace_arg(&state.out, fspec_op_get_arg(state.out.buf.mem.data, end, 2, 1<<FSPEC_ARG_NUM), FSPEC_ARG_NUM, (fspec_num[]){ state.out.declarations }); -      const fspec_off off = (char*)state.out.end[SECTION_DATA] - (char*)state.out.strings; -      codebuf_replace_arg(&state.out, fspec_op_get_arg(state.out.buf.mem.data, end, 3, 1<<FSPEC_ARG_DAT), FSPEC_ARG_DAT, &off); -   } - -   lexer->mem.output.len = state.out.buf.written; -   return !state.ragel.error; -} diff --git a/src/fspec/bcode-internal.h b/src/fspec/private/bcode-types.h index 8c9ce74..8c9ce74 100644 --- a/src/fspec/bcode-internal.h +++ b/src/fspec/private/bcode-types.h diff --git a/src/fspec/ragel/lexer-expr.h b/src/fspec/ragel/lexer-expr.h new file mode 100644 index 0000000..904736d --- /dev/null +++ b/src/fspec/ragel/lexer-expr.h @@ -0,0 +1,20 @@ +#pragma once + +#include <fspec/memory.h> + +#include <stdbool.h> + +struct fspec_expr; +struct fspec_expr { +   struct { +      size_t (*read)(struct fspec_expr *lexer, void *input, const size_t size, const size_t nmemb); +      size_t (*write)(struct fspec_expr *lexer, const void *output, const size_t size, const size_t nmemb); +   } ops; + +   struct { +      struct fspec_mem input; +   } mem; +}; + +bool +fspec_expr_parse(struct fspec_expr *lexer, const char *name); diff --git a/src/fspec/ragel/lexer-expr.rl b/src/fspec/ragel/lexer-expr.rl new file mode 100644 index 0000000..2975043 --- /dev/null +++ b/src/fspec/ragel/lexer-expr.rl @@ -0,0 +1,122 @@ +#include "lexer-expr.h" +#include "lexer-stack.h" +#include "util/ragel/ragel.h" + +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> +#include <err.h> + +static uint8_t +precedence(char op) +{ +   switch (op) { +      case '^': return 4; +      case '*': return 3; +      case '/': return 3; +      case '+': return 2; +      case '-': return 2; +   } +   errx(EXIT_FAILURE, "unknown operator %c for precedence", op); +   return 0; +} + +static size_t +pop(char cur, char *mstack, size_t open) +{ +   static char cvar = 'a'; + +   // 1 + 2 + 4 + 3 * 2 / 2 * 2 * 2 - 2 * 2 + 5; +   while (open >= 3) { +      const char last_op = mstack[open - 2]; +      const uint8_t last_prio = precedence(last_op); +      const uint8_t new_prio = precedence(cur); + +      if (last_prio <= new_prio) +         break; + +      printf("%c = ", cvar); +      for (size_t i = open - 3; i < open; ++i) +         printf("%c ", mstack[i]); +      puts(";"); +      open -= 3; + +      mstack[open++] = cvar; +      ++cvar; +   } + +   return open; +} + +%%{ +   machine fspec_expr; +   include fspec_stack "lexer-stack.rl"; +   variable p ragel.p; +   variable pe ragel.pe; +   variable eof ragel.eof; +   write data noerror nofinal; + +   action op { +      open = pop(fc, mstack, open); +      mstack[open++] = fc; +   } + +   logical_operators = '&&' | '||' | '==' | '<' | '>' | '<=' | '>='; +   calc_operators = '-' | '+' | '/' | '*' | '%'; +   bitwise_operators = '&' | '|' | '^' | '<<' | '>>'; + +   main := |* +      '+' => op; +      '/' => op; +      '*' => op; +      '-' => op; +      '^' => op; +      stack_num => { mstack[open++] = fc;}; +      '(' => { }; +      ')' => { }; +      ' '; +      ';' => { +         printf("v = "); +         for (size_t i = 0; i < open; ++i) +            printf("%c ", mstack[i]); +         puts(";"); +      }; +      *|; +}%% + + +bool +fspec_expr_parse(struct fspec_expr *expr, const char *name) +{ +   int cs, act; +   const char *ts, *te; +   (void)ts; + +   size_t open = 0; +   char mstack[25]; + +   %% write init; + +   (void)fspec_expr_en_main; +   assert(expr); +   assert(expr->ops.read); +   assert(expr->ops.write); +   assert(expr->mem.input.data && expr->mem.input.len); +   assert(expr->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range"); + +   char var[256]; +   struct stack stack = { .var.buf.mem = { .data = var, .len = sizeof(var) } }; +   struct ragel ragel = { .name = name, .lineno = 1 }; + +   // static const fspec_num version = 0; + +   struct fspec_mem input = expr->mem.input; +   for (bool eof = false; !ragel.error && !eof;) { +      const size_t bytes = expr->ops.read(expr, input.data, 1, input.len); +      const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes }; +      ragel_feed_input(&ragel, (eof = (bytes < input.len)), &rl); +      %% write exec; +   } + +   return !ragel.error; +} diff --git a/src/fspec/ragel/lexer-stack.h b/src/fspec/ragel/lexer-stack.h new file mode 100644 index 0000000..eebf055 --- /dev/null +++ b/src/fspec/ragel/lexer-stack.h @@ -0,0 +1,42 @@ +#pragma once + +#include "util/membuf.h" + +#include <stdint.h> + +struct varbuf { +   struct membuf buf; +   size_t offset; +}; + +void +varbuf_begin(struct varbuf *var); + +void +varbuf_reset(struct varbuf *var); + +void +varbuf_remove_last(struct varbuf *var); + +struct stack { +   struct varbuf var; + +   union { +      struct fspec_mem str; +      uint64_t num; +   }; + +   enum stack_type { +      STACK_STR, +      STACK_NUM, +   } type; +}; + +void +stack_num(struct stack *stack, const uint8_t base); + +const struct fspec_mem* +stack_get_str(const struct stack *stack); + +uint64_t +stack_get_num(const struct stack *stack); diff --git a/src/fspec/ragel/lexer-stack.rl b/src/fspec/ragel/lexer-stack.rl new file mode 100644 index 0000000..940f820 --- /dev/null +++ b/src/fspec/ragel/lexer-stack.rl @@ -0,0 +1,153 @@ +#include "lexer-stack.h" + +#include <stdlib.h> +#include <assert.h> +#include <err.h> + +void +varbuf_begin(struct varbuf *var) +{ +   assert(var); +   var->offset = var->buf.written; +   assert(var->offset <= var->buf.mem.len); +} + +void +varbuf_reset(struct varbuf *var) +{ +   assert(var); +   var->offset = var->buf.written = 0; +} + +void +varbuf_remove_last(struct varbuf *var) +{ +   assert(var); +   assert(var->buf.written >= var->offset); +   const size_t size = var->buf.written - var->offset; +   assert(var->buf.written >= size); +   var->buf.written -= size; +   assert(var->buf.written <= var->buf.mem.len); +} + +static void +stack_check_type(const struct stack *stack, const enum stack_type type) +{ +   assert(stack); + +   if (stack->type == type) +      return; + +   const char *got = (type == STACK_STR ? "str" : "num"), *expected = (stack->type == STACK_STR ? "str" : "num"); +   errx(EXIT_FAILURE, "tried to get '%s' from stack, but the last pushed type was '%s'", got, expected); +} + +void +stack_num(struct stack *stack, const uint8_t base) +{ +   assert(stack); +   membuf_terminate(&stack->var.buf, (char[]){ 0 }, 1); +   const char *str = (char*)stack->var.buf.mem.data + stack->var.offset; +   stack->type = STACK_NUM; +   stack->num = strtoll(str, NULL, base); +   varbuf_remove_last(&stack->var); +} + +const struct fspec_mem* +stack_get_str(const struct stack *stack) +{ +   stack_check_type(stack, STACK_STR); +   return &stack->str; +} + +uint64_t +stack_get_num(const struct stack *stack) +{ +   stack_check_type(stack, STACK_NUM); +   return stack->num; +} + +%%{ +   machine fspec_stack; + +   action stack_oct { +      stack_num(&stack, 8); +   } + +   action stack_hex { +      stack_num(&stack, 16); +   } + +   action stack_dec { +      stack_num(&stack, 10); +   } + +   action stack_str { +      membuf_terminate(&stack.var.buf, (char[]){ 0 }, 1); +      stack.type = STACK_STR; +      stack.str = stack.var.buf.mem; +      stack.str.len = stack.var.buf.written; +   } + +   action store_esc_num { +      const fspec_num v = stack_get_num(&stack); +      assert(v <= 255); +      membuf_append(&stack.var.buf, (uint8_t[]){ v }, sizeof(uint8_t)); +   } + +   action store_esc { +      const struct { const char e, v; } map[] = { +         { .e = 'a', .v = '\a' }, +         { .e = 'b', .v = '\b' }, +         { .e = 'f', .v = '\f' }, +         { .e = 'n', .v = '\n' }, +         { .e = 'r', .v = '\r' }, +         { .e = 't', .v = '\t' }, +         { .e = 'v', .v = '\v' }, +         { .e = '\\', .v = '\\' }, +         { .e = '\'', .v = '\'' }, +         { .e = '\"', .v = '"' }, +         { .e = 'e', .v = 0x1B }, +      }; + +      for (size_t i = 0; i < sizeof(map) / sizeof(map[0]); ++i) { +         if (fc != map[i].e) +            continue; + +         membuf_append(&stack.var.buf, &map[i].v, sizeof(map[i].v)); +         break; +      } +   } + +   action store { +      membuf_append(&stack.var.buf, fpc, 1); +   } + +   action begin_num { +      varbuf_begin(&stack.var); +   } + +   action begin_str { +      varbuf_reset(&stack.var); +   } + +   # Semantic +   quote = ['"]; +   esc = [abfnrtv\\'"e]; +   esc_chr = '\\'; +   esc_hex = 'x' <: xdigit{2}; +   hex = '0x' <: xdigit{1,}; +   oct = [0-7]{1,3}; +   dec = [\-+]? <: (([1-9] <: digit*) | '0'); +   name = ((alpha | '_') <: (alnum | '_')*); + +   # Stack +   stack_name = name >begin_str $store %stack_str; +   stack_hex = hex >begin_num $store %stack_hex; +   stack_dec = dec >begin_num $store %stack_dec; +   stack_oct = oct >begin_num $store %stack_oct; +   stack_esc_hex = esc_hex >begin_num <>*store %stack_hex; +   stack_esc = esc_chr <: ((stack_esc_hex | stack_oct) %store_esc_num | esc %~store_esc); +   stack_str = quote <: ((stack_esc? <: print? $store) - zlen)* >begin_str %stack_str :>> quote; +   stack_num = stack_dec | stack_hex; +}%% diff --git a/src/fspec/ragel/lexer.rl b/src/fspec/ragel/lexer.rl new file mode 100644 index 0000000..b4a21dc --- /dev/null +++ b/src/fspec/ragel/lexer.rl @@ -0,0 +1,180 @@ +#include <fspec/lexer.h> +#include <fspec/bcode.h> +#include "lexer-stack.h" +#include "util/ragel/ragel.h" +#include "fspec/private/bcode-types.h" + +#include <assert.h> + +%%{ +   machine fspec_lexer; +   include fspec_stack "lexer-stack.rl"; +   variable p ragel.p; +   variable pe ragel.pe; +   variable eof ragel.eof; +   write data noerror nofinal; + +   action arg_eof { +      // codebuf_append_arg(&state.out, FSPEC_ARG_EOF, NULL); +   } + +   action arg_num { +      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ stack_get_num(&state.stack) }); +   } + +   action arg_str { +      // const struct fspec_mem *str = stack_get_str(&state.stack); +      // codebuf_append_arg_cstr(&state.out, str->data, str->len); +   } + +   action arg_var { +      // state_append_arg_var(&state, true, stack_get_str(&state.stack)); +   } + +   action filter { +      // codebuf_append_op(&state.out, FSPEC_OP_FILTER); +   } + +   action goto { +      // codebuf_append_op(&state.out, FSPEC_OP_GOTO); +      // state_append_arg_var(&state, false, stack_get_str(&state.stack)); +   } + +   action vnul { +      // codebuf_append_op(&state.out, FSPEC_OP_VISUAL); +      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_NUL }); +   } + +   action vdec { +      // codebuf_append_op(&state.out, FSPEC_OP_VISUAL); +      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_DEC }); +   } + +   action vhex { +      // codebuf_append_op(&state.out, FSPEC_OP_VISUAL); +      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_HEX }); +   } + +   action vstr { +      // codebuf_append_op(&state.out, FSPEC_OP_VISUAL); +      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_STR }); +   } + +   action r8 { +      // codebuf_append_op(&state.out, FSPEC_OP_READ); +      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 8 }); +   } + +   action r16 { +      // codebuf_append_op(&state.out, FSPEC_OP_READ); +      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 16 }); +   } + +   action r32 { +      // codebuf_append_op(&state.out, FSPEC_OP_READ); +      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 32 }); +   } + +   action r64 { +      // codebuf_append_op(&state.out, FSPEC_OP_READ); +      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 64 }); +   } + +   action enum_member_end { +   } + +   action enum_member_start { +   } + +   action enum_end { +   } + +   action enum_start { +   } + +   action struct_member_end { +      // state_finish_declaration(&state, FSPEC_DECLARATION_MEMBER); +   } + +   action struct_member_start { +      // state_append_declaration(&state, FSPEC_DECLARATION_MEMBER, stack_get_str(&state.stack)); +   } + +   action struct_end { +      // state_finish_declaration(&state, FSPEC_DECLARATION_STRUCT); +   } + +   action struct_start { +      // state_append_declaration(&state, FSPEC_DECLARATION_STRUCT, stack_get_str(&state.stack)); +   } + +   action type_err { +      ragel_throw_error(&ragel, "unknown type name"); +   } + +   action visual_err { +      ragel_throw_error(&ragel, "unknown visualization"); +   } + +   action syntax_err { +      ragel_throw_error(&ragel, "malformed input (machine failed here or in next expression)"); +   } + +   action line { +      ragel_advance_line(&ragel); +   } + +   # Semantic +   newline = '\n'; +   valid = ^cntrl; +   comment = '//' <: valid* :>> newline; +   type = ('u8' | 's8') %r8 | ('u16' | 's16') %r16 | ('u32' | 's32') %r32 | ('u64' | 's32') %r64; +   visual = 'nul' %vnul | 'dec' %vdec | 'hex' %vhex | 'str' %vstr; + +   # Catchers +   catch_const_expr = stack_num %arg_num; +   catch_struct = 'struct ' <: stack_name; +   catch_enum = 'enum ' <: stack_name; +   catch_type = (catch_struct %goto | type) $!type_err; +   catch_args = stack_num %arg_num | stack_str %arg_str | stack_name %arg_var; +   catch_array = '[' <: (catch_args | '$' %arg_eof) :>> ']'; +   catch_filter = ' | ' %filter <: stack_name %arg_str :>> ('(' <: catch_args? <: (', ' <: catch_args)* :>> ')')?; +   catch_visual = ' ' <: visual $!visual_err; + +   # Abstract +   struct_member = stack_name %struct_member_start :>> ': ' <: (catch_type <: catch_array* catch_filter* catch_visual?) :>> ';' %struct_member_end; +   struct = catch_struct %struct_start :>> ' {' <: (space | comment | struct_member)* :>> '};' %struct_end; +   enum_member = stack_name %enum_member_start :>> (': ' <: catch_const_expr)? :>> ';' %enum_member_end; +   enum = catch_enum %enum_start :>> ' {' <: (space | comment | enum_member)* :>> '};' %enum_end; +   line = valid* :>> newline %line; +   main := ((space | comment | enum | struct)* & line*) $!syntax_err; +}%% + +bool +fspec_lexer_parse(struct fspec_lexer *lexer, const char *name) +{ +   int cs; +   %% write init; + +   (void)fspec_lexer_en_main; +   assert(lexer); +   assert(lexer->ops.read); +   assert(lexer->mem.input.data && lexer->mem.input.len); +   assert(lexer->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range"); + +   char var[256]; +   struct stack stack = { .var.buf.mem = { .data = var, .len = sizeof(var) } }; +   struct ragel ragel = { .name = name, .lineno = 1 }; + +   // static const fspec_num version = 0; + +   struct fspec_mem input = lexer->mem.input; +   for (bool eof = false; !ragel.error && !eof;) { +      const size_t bytes = lexer->ops.read(lexer, input.data, 1, input.len); +      const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes }; +      ragel_feed_input(&ragel, (eof = (bytes < input.len)), &rl); +      %% write exec; +   } + +   return !ragel.error; +} diff --git a/src/fspec/ragel/validator.rl b/src/fspec/ragel/validator.rl new file mode 100644 index 0000000..90ead21 --- /dev/null +++ b/src/fspec/ragel/validator.rl @@ -0,0 +1,96 @@ +#include <fspec/bcode.h> +#include <fspec/validator.h> +#include "util/ragel/ragel.h" +#include "fspec/private/bcode-types.h" + +#include <assert.h> + +struct stack { +   union { +      fspec_num num; +      fspec_off off; +      fspec_var var; +      fspec_strsz strsz; +      unsigned char b[sizeof(fspec_num)]; +   } u; +   uint8_t i; // writing index for u.b +}; + +struct state { +   struct ragel ragel; +   struct stack stack; +}; + +%%{ +   machine fspec_validator; +   variable p state.ragel.p; +   variable pe state.ragel.pe; +   variable eof state.ragel.eof; +   write data noerror nofinal; + +#   BLT_HEADER = 0; +#   BLT_ADD = 1; +#   BLT_SUB = 2; +#   BLT_MUL = 3; +#   BLT_DIV = 4; +#   BLT_MOD = 5; +#   BLT_BIT_AND = 6; +#   BLT_BIT_OR = 7; +#   BLT_BIT_XOR = 8; +#   BLT_BIT_LEFT = 9; +#   BLT_BIT_RIGHT = 10; +#   BLT_DECLARE = 11; +#   BLT_READ = 12; +#   BLT_GOTO = 13; +#   BLT_FILTER = 14; +#   BLT_VISUAL = 15; +# +#   builtins = BLT_HEADER | +#              BLT_ADD | BLT_SUB | BLT_MUL | BLT_DIV | BLT_MOD | +#              BLT_BIT_AND | BLT_BIT_OR | BLT_BIT_XOR | BLT_BIT_LEFT | BLT_BIT_RIGHT +#              BLT_DECLARE | BLT_READ | BLT_GOTO | BLT_FILTER | BLT_VISUAL; +# +#   OP_ARG = 0; +#   OP_REF = 1; +#   OP_BLT = 2 OP_ARG builtins; +#   OP_FUN = 3; +# +#   arg_ops = OP_REF | OP_FUN | OP_BUILTIN OP_FUN +# +#   BLT_DECLARE = OP_BUILTIN 10 OP_ARG 2 OP_REF OP_REF; +#   BLT_READ = OP_BUILTIN 11 OP_ARG 1..255 OP_REF (arg_ops)*; +# +#   pattern = ((BLT_READ | BLT_GOTO) BLT_FILTER* BLT_VISUAL?)* $!pattern_error; +#   main := (BLT_HEADER <: BLT_DECLARE* <: pattern) %check_decls $advance $!syntax_error; +   main := any*; +}%% + +bool +fspec_validator_parse(struct fspec_validator *validator, const char *name) +{ +   int cs; +   %% write init; + +   (void)fspec_validator_en_main; +   assert(validator); +   assert(validator->ops.read); +   assert(validator->mem.input.data && validator->mem.input.len); +   assert(validator->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range"); + +   struct state state = { +      .ragel.name = name, +      .ragel.lineno = 1, +   }; + +   static_assert(sizeof(state.stack.u) == sizeof(state.stack.u.b), "bytes doesn't represent the largest member in union"); + +   struct fspec_mem input = validator->mem.input; +   for (bool eof = false; !state.ragel.error && !eof;) { +      const size_t bytes = validator->ops.read(validator, input.data, 1, input.len); +      const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes, .binary = true }; +      ragel_feed_input(&state.ragel, (eof = (bytes < input.len)), &rl); +      %% write exec; +   } + +   return !state.ragel.error; +} diff --git a/src/fspec/validator.h b/src/fspec/validator.h index c4705b2..a20e98a 100644 --- a/src/fspec/validator.h +++ b/src/fspec/validator.h @@ -2,6 +2,8 @@  #include <fspec/memory.h> +#include <stdbool.h> +  struct fspec_validator;  struct fspec_validator {     struct { diff --git a/src/fspec/validator.rl b/src/fspec/validator.rl deleted file mode 100644 index b00a827..0000000 --- a/src/fspec/validator.rl +++ /dev/null @@ -1,236 +0,0 @@ -#include "ragel/ragel.h" -#include <fspec/bcode.h> -#include <fspec/validator.h> -#include "bcode-internal.h" - -#include <assert.h> - -struct stack { -   union { -      fspec_num num; -      fspec_off off; -      fspec_var var; -      fspec_strsz strsz; -      unsigned char b[sizeof(fspec_num)]; -   } u; -   uint8_t i; // writing index for u.b -}; - -struct range { -   fspec_off start, end; -}; - -struct context { -   struct range data; -   fspec_var declarations, expected_declarations; -   fspec_off str_end, decl_start, decl_end[FSPEC_DECLARATION_LAST], offset; -   enum fspec_declaration last_decl_type; -}; - -struct state { -   struct ragel ragel; -   struct context context; -   struct stack stack; -}; - -%%{ -   machine fspec_validator; -   variable p state.ragel.p; -   variable pe state.ragel.pe; -   variable eof state.ragel.eof; -   write data noerror nofinal; - -   action store_decls { -      if (state.stack.u.num > (fspec_var)~0) -         ragel_throw_error(&state.ragel, "expected declarations overflows"); - -      state.context.expected_declarations = state.stack.u.num; -   } - -   action check_decls { -      if (state.context.declarations != state.context.expected_declarations) -         ragel_throw_error(&state.ragel, "expected declarations did not match with the content: expected: %" PRI_FSPEC_VAR " got: %" PRI_FSPEC_VAR, state.context.expected_declarations, state.context.declarations); -   } - -   action mark_dat { -      // we can replace this logic with fspec generated code in future -      // struct str { len: u32; str: u8[len]['\0']; } -      // struct dat { len: u32; strings: struct str[$::len]; } -      if (state.context.offset > (fspec_off)~0 - state.stack.u.off) -         ragel_throw_error(&state.ragel, "dat section length overflows"); - -      state.context.data = (struct range){ .start = state.context.offset, .end = state.stack.u.off }; -   } - -   action test_inside_dat { -      state.context.offset < (state.context.data.start + state.context.data.end) -   } - -   action mark_str { -      if (state.context.offset >= (fspec_off)~0 - state.stack.u.strsz) // >= for null byte -         ragel_throw_error(&state.ragel, "str length overflows"); - -      state.context.str_end = state.context.offset + state.stack.u.strsz; -   } - -   action test_inside_str { -      state.context.offset < state.context.str_end -   } - -   action check_var { -      if (state.context.declarations <= state.stack.u.var) -         ragel_throw_error(&state.ragel, "refenced undeclared variable"); -   } - -   action check_str { -      if (state.stack.u.off < state.context.data.start) { -         ragel_throw_error(&state.ragel, "str before data section range: %" PRI_FSPEC_OFF " <= %" PRI_FSPEC_OFF, state.stack.u.off, state.context.data.start + state.context.data.end); -      } else if (state.context.data.start + state.context.data.end <= state.stack.u.off) { -         ragel_throw_error(&state.ragel, "str after data section range: %" PRI_FSPEC_OFF " <= %" PRI_FSPEC_OFF, state.context.data.start + state.context.data.end, state.stack.u.off); -      } -   } - -   action check_decl_type { -      if (state.stack.u.num >= FSPEC_DECLARATION_LAST) -         ragel_throw_error(&state.ragel, "invalid declaration type: %" PRI_FSPEC_NUM, state.stack.u.num); - -      state.context.last_decl_type = state.stack.u.num; -   } - -   action check_decl_num { -      if (state.context.declarations >= (fspec_var)~0) -         ragel_throw_error(&state.ragel, "declarations overflows"); - -      if (state.context.declarations != state.stack.u.num) -         ragel_throw_error(&state.ragel, "invalid declaration number: %" PRI_FSPEC_NUM " expected: %" PRI_FSPEC_VAR, state.stack.u.num, state.context.declarations); - -      ++state.context.declarations; -   } - -   action start_decl { -      state.context.decl_start = state.context.offset; -   } - -   action mark_decl { -      const fspec_off sz = (state.context.offset - state.context.decl_start); -      assert(sz <= state.stack.u.off); - -      if (state.context.offset > (fspec_off)~0 - state.stack.u.off - sz) -         ragel_throw_error(&state.ragel, "declaration length overflows"); - -      state.context.decl_end[state.context.last_decl_type] = state.context.offset + state.stack.u.off - sz; -   } - -   action check_struct { -      if (state.context.last_decl_type != FSPEC_DECLARATION_STRUCT) -         ragel_throw_error(&state.ragel, "expected struct declaration"); -   } - -   action check_member { -      if (state.context.last_decl_type != FSPEC_DECLARATION_MEMBER) -         ragel_throw_error(&state.ragel, "expected member declaration"); -   } - -   action check_member_end { -      if (state.context.decl_end[FSPEC_DECLARATION_MEMBER] != state.context.offset) -         ragel_throw_error(&state.ragel, "invalid member end: %" PRI_FSPEC_OFF " expected: %" PRI_FSPEC_OFF, state.context.decl_end[FSPEC_DECLARATION_MEMBER], state.context.offset); -   } - -   action check_struct_end { -      if (state.context.decl_end[FSPEC_DECLARATION_STRUCT] != state.context.offset) -         ragel_throw_error(&state.ragel, "invalid struct end: %" PRI_FSPEC_OFF " expected: %" PRI_FSPEC_OFF, state.context.decl_end[FSPEC_DECLARATION_STRUCT], state.context.offset); -   } - -   action check_visual_type { -      if (state.stack.u.num >= FSPEC_VISUAL_LAST) -         ragel_throw_error(&state.ragel, "invalid visual type: %" PRI_FSPEC_NUM, state.stack.u.num); -   } - -   action arg_error { -      ragel_throw_error(&state.ragel, "malformed argument"); -   } - -   action op_error { -      ragel_throw_error(&state.ragel, "unexpected argument"); -   } - -   action pattern_error { -      ragel_throw_error(&state.ragel, "unexpected pattern"); -   } - -   action syntax_error { -      ragel_throw_error(&state.ragel, "unexpected byte"); -   } - -   action store { -      if (state.stack.i < sizeof(state.stack.u.b)) -         state.stack.u.b[state.stack.i++] = fc; -   } - -   action flush { -      state.stack.i = 0; -   } - -   action advance { -      ++state.context.offset; -   } - -   stack1 = any{1} >flush $store; -   stack2 = any{2} >flush $store; -   stack4 = any{4} >flush $store; -   stack8 = any{8} >flush $store; - -   ARG_DAT = 0 stack4 %*mark_dat ((stack1 %*mark_str (any when test_inside_str)* 0) when test_inside_dat)*; -   ARG_OFF = 1 stack4; -   ARG_NUM = 2 stack8; -   ARG_VAR = 3 stack2 %check_var; -   ARG_STR = 4 stack4 %check_str; -   ARG_EOF = 5; - -   OP_ARG_DAT = 0 ARG_DAT $!arg_error; -   OP_ARG_OFF = 0 ARG_OFF $!arg_error; -   OP_ARG_NUM = 0 ARG_NUM $!arg_error; -   OP_ARG_VAR = 0 ARG_VAR $!arg_error; -   OP_ARG_STR = 0 ARG_STR $!arg_error; -   OP_ARG_EOF = 0 ARG_EOF $!arg_error; - -   OP_HEADER = 1 (OP_ARG_NUM OP_ARG_NUM %store_decls OP_ARG_DAT) $!op_error; -   OP_DECLARATION = 2 >start_decl (OP_ARG_NUM %check_decl_type OP_ARG_NUM %check_decl_num OP_ARG_OFF %mark_decl OP_ARG_STR) $!op_error; -   OP_READ = 3 (OP_ARG_NUM (OP_ARG_NUM | OP_ARG_VAR | OP_ARG_STR | OP_ARG_EOF)*) $!op_error; -   OP_GOTO = 4 (OP_ARG_VAR (OP_ARG_NUM | OP_ARG_VAR | OP_ARG_STR | OP_ARG_EOF)*) $!op_error; -   OP_FILTER = 5 (OP_ARG_STR (OP_ARG_NUM | OP_ARG_VAR | OP_ARG_STR)*) $!op_error; -   OP_VISUAL = 6 (OP_ARG_NUM %check_visual_type) $!op_error; - -   pattern = (OP_DECLARATION %check_struct <: (OP_DECLARATION %check_member (OP_READ | OP_GOTO) OP_FILTER* OP_VISUAL? %check_member_end)*)* %check_struct_end $!pattern_error; -   main := (OP_HEADER <: pattern) %check_decls $advance $!syntax_error; -}%% - -bool -fspec_validator_parse(struct fspec_validator *validator, const char *name) -{ -   int cs; -   %% write init; - -   (void)fspec_validator_en_main; -   assert(validator); -   assert(validator->ops.read); -   assert(validator->mem.input.data && validator->mem.input.len); -   assert(validator->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range"); - -   struct state state = { -      .ragel.name = name, -      .ragel.lineno = 1, -   }; - -   static_assert(sizeof(state.stack.u) == sizeof(state.stack.u.b), "bytes doesn't represent the largest member in union"); - -   struct fspec_mem input = validator->mem.input; -   for (bool eof = false; !state.ragel.error && !eof;) { -      const size_t bytes = validator->ops.read(validator, input.data, 1, input.len); -      const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes, .binary = true }; -      ragel_feed_input(&state.ragel, (eof = (bytes < input.len)), &rl); -      %% write exec; -   } - -   return !state.ragel.error; -} diff --git a/src/util/membuf.c b/src/util/membuf.c new file mode 100644 index 0000000..0602679 --- /dev/null +++ b/src/util/membuf.c @@ -0,0 +1,31 @@ +#include "membuf.h" + +#include <stdlib.h> +#include <assert.h> +#include <memory.h> +#include <err.h> + +static void +membuf_bounds_check(const struct membuf *buf, const size_t nmemb) +{ +   assert(buf); + +   if (buf->mem.len < nmemb || buf->written > buf->mem.len - nmemb) +      errx(EXIT_FAILURE, "%s: %zu bytes exceeds the maximum storage size of %zu bytes", __func__, buf->written + nmemb, buf->mem.len); +} + +void +membuf_terminate(struct membuf *buf, const void *data, const size_t data_sz) +{ +   assert(data || !data_sz); +   membuf_bounds_check(buf, data_sz); +   memcpy((char*)buf->mem.data + buf->written, data, data_sz); +} + +void +membuf_append(struct membuf *buf, const void *data, const size_t data_sz) +{ +   membuf_terminate(buf, data, data_sz); +   buf->written += data_sz; +   assert(buf->written <= buf->mem.len); +} diff --git a/src/util/membuf.h b/src/util/membuf.h new file mode 100644 index 0000000..86d8dde --- /dev/null +++ b/src/util/membuf.h @@ -0,0 +1,14 @@ +#pragma once + +#include <fspec/memory.h> + +struct membuf { +   struct fspec_mem mem; +   size_t written; +}; + +void +membuf_terminate(struct membuf *buf, const void *data, const size_t data_sz); + +void +membuf_append(struct membuf *buf, const void *data, const size_t data_sz); diff --git a/src/ragel/ragel.h b/src/util/ragel/ragel.h index b2c7572..b2c7572 100644 --- a/src/ragel/ragel.h +++ b/src/util/ragel/ragel.h diff --git a/src/ragel/ragel.rl b/src/util/ragel/ragel.rl index 7e51030..c52f27b 100644 --- a/src/ragel/ragel.rl +++ b/src/util/ragel/ragel.rl @@ -1,4 +1,5 @@  #include "ragel.h" +  #include <inttypes.h>  #include <stdio.h>  #include <stdarg.h> diff --git a/vim/filespec.vim b/vim/filespec.vim index 077f41c..19c9945 100644 --- a/vim/filespec.vim +++ b/vim/filespec.vim @@ -9,7 +9,7 @@ syn keyword	fsTodo		contained TODO FIXME XXX  syn cluster	fsCommentGroup	contains=fsTodo,fsBadContinuation  syn region	fsComment	start="//" skip="\\$" end="$" keepend contains=@fsCommentGroup,@Spell -syn keyword	fsStructure	struct union +syn keyword	fsStructure	enum struct union  syn keyword	fsType		s8 s16 s32 s64  syn keyword	fsType		u8 u16 u32 u64  syn keyword	fsConstant	nul dec hex str  | 
