diff options
Diffstat (limited to 'src/fspec')
| -rw-r--r-- | src/fspec/bcode-internal.h | 16 | ||||
| -rw-r--r-- | src/fspec/bcode.c | 189 | ||||
| -rw-r--r-- | src/fspec/bcode.h | 64 | ||||
| -rw-r--r-- | src/fspec/lexer.h | 17 | ||||
| -rw-r--r-- | src/fspec/lexer.rl | 616 | ||||
| -rw-r--r-- | src/fspec/memory.h | 8 | ||||
| -rw-r--r-- | src/fspec/validator.h | 17 | ||||
| -rw-r--r-- | src/fspec/validator.rl | 237 | 
8 files changed, 1164 insertions, 0 deletions
| diff --git a/src/fspec/bcode-internal.h b/src/fspec/bcode-internal.h new file mode 100644 index 0000000..8c9ce74 --- /dev/null +++ b/src/fspec/bcode-internal.h @@ -0,0 +1,16 @@ +#pragma once + +#include <inttypes.h> +#include <stdint.h> + +/** maximum size of string literals */ +#define PRI_FSPEC_STRSZ PRIu8 +typedef uint8_t fspec_strsz; + +/** maximum range of variable ids */ +#define PRI_FSPEC_VAR PRIu16 +typedef uint16_t fspec_var; + +/** maximum range of bytecode offsets */ +#define PRI_FSPEC_OFF PRIu32 +typedef uint32_t fspec_off; diff --git a/src/fspec/bcode.c b/src/fspec/bcode.c new file mode 100644 index 0000000..0a89260 --- /dev/null +++ b/src/fspec/bcode.c @@ -0,0 +1,189 @@ +#include <fspec/bcode.h> +#include "bcode-internal.h" + +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <err.h> + +static_assert(sizeof(fspec_off) <= sizeof(((struct fspec_mem*)0)->len), "fspec_off should not be larger than what fspec_mem can represent"); +static_assert(sizeof(enum fspec_op) == sizeof(uint8_t), "enum fspec_op is expected to have size of uint8_t"); +static_assert(sizeof(enum fspec_arg) == sizeof(uint8_t), "enum fspec_arg is expected to have size of uint8_t"); + +static fspec_off +arg_data_len(const enum fspec_arg *arg) +{ +   assert(arg); + +   switch (*arg) { +      case FSPEC_ARG_NUM: +         return sizeof(fspec_num); + +      case FSPEC_ARG_VAR: +         return sizeof(fspec_var); + +      case FSPEC_ARG_STR: +      case FSPEC_ARG_OFF: +         return sizeof(fspec_off); + +      case FSPEC_ARG_DAT: +         { +            struct fspec_mem mem; +            fspec_arg_get_mem(arg, NULL, &mem); +            return sizeof(fspec_off) + mem.len; +         } + +      case FSPEC_ARG_EOF: +         break; + +      case FSPEC_ARG_LAST: +         errx(EXIT_FAILURE, "%s: unexpected argument type %u", __func__, *arg); +         break; +   } + +   return 0; +} + +static fspec_off +arg_len(const enum fspec_arg *arg) +{ +   return sizeof(*arg) + arg_data_len(arg); +} + +void +fspec_arg_get_mem(const enum fspec_arg *arg, const void *data, struct fspec_mem *out_mem) +{ +   assert(arg && out_mem); + +   switch (*arg) { +      case FSPEC_ARG_STR: +         { +            assert(data); +            fspec_off off; +            fspec_strsz len; +            memcpy(&off, (char*)arg + sizeof(*arg), sizeof(off)); +            memcpy(&len, (char*)data + off, sizeof(len)); +            out_mem->data = (char*)data + off + sizeof(len); +            out_mem->len = len; +         } +         break; + +      case FSPEC_ARG_DAT: +         { +            fspec_off len; +            memcpy(&len, (char*)arg + sizeof(*arg), sizeof(len)); +            out_mem->data = (char*)arg + sizeof(*arg) + sizeof(len); +            out_mem->len = len; +         } +         break; + +      case FSPEC_ARG_VAR: +      case FSPEC_ARG_NUM: +      case FSPEC_ARG_OFF: +         out_mem->data = (char*)arg + sizeof(*arg); +         out_mem->len = arg_data_len(arg); +         break; + +      case FSPEC_ARG_EOF: +         *out_mem = (struct fspec_mem){0}; +         break; + +      case FSPEC_ARG_LAST: +         errx(EXIT_FAILURE, "%s: unexpected argument type %u", __func__, *arg); +         break; +   } +} + +fspec_num +fspec_arg_get_num(const enum fspec_arg *arg) +{ +   assert(arg); +   fspec_num v; +   switch (*arg) { +      case FSPEC_ARG_NUM: +         memcpy(&v, arg + sizeof(*arg), sizeof(v)); +         break; + +      case FSPEC_ARG_VAR: +         { +            fspec_var var; +            memcpy(&var, arg + sizeof(*arg), sizeof(var)); +            v = var; +         } +         break; + +      case FSPEC_ARG_DAT: +      case FSPEC_ARG_OFF: +         { +            fspec_off off; +            memcpy(&off, arg + sizeof(*arg), sizeof(off)); +            v = off; +         } +         break; + +      case FSPEC_ARG_STR: +      case FSPEC_ARG_EOF: +      case FSPEC_ARG_LAST: +         errx(EXIT_FAILURE, "%s: unexpected argument type %u", __func__, *arg); +         break; +   } +   return v; +} + +const char* +fspec_arg_get_cstr(const enum fspec_arg *arg, const void *data) +{ +   assert(arg && *arg == FSPEC_ARG_STR); +   struct fspec_mem mem; +   fspec_arg_get_mem(arg, data, &mem); +   return (const char*)mem.data; +} + +const enum fspec_arg* +fspec_op_get_arg(const enum fspec_op *start, const void *end, const uint8_t nth, const uint32_t expect) +{ +   uint8_t i = 0; +   const enum fspec_arg *arg = NULL; +   for (const enum fspec_op *op = fspec_op_next(start, end, false); op && i < nth; op = fspec_op_next(op, end, false)) { +      if (*op != FSPEC_OP_ARG) +         return NULL; + +      arg = (void*)(op + 1); +      assert(*arg >= 0 && *arg < FSPEC_ARG_LAST); +      ++i; +   } + +   if (arg && !(expect & (1<<*arg))) +      errx(EXIT_FAILURE, "got unexpected argument of type %u", *arg); + +   return arg; +} + +const enum fspec_arg* +fspec_arg_next(const enum fspec_arg *arg, const void *end, const uint8_t nth, const uint32_t expect) +{ +   return fspec_op_get_arg((void*)(arg - 1), end, nth, expect); +} + +const enum fspec_op* +fspec_op_next(const enum fspec_op *start, const void *end, const bool skip_args) +{ +   assert(start && end); +   fspec_off off = sizeof(*start); +   if ((void*)start < end && *start == FSPEC_OP_ARG) +      off += arg_len((void*)(start + 1)); + +   for (const enum fspec_op *op = start + off; (void*)start < end && (void*)op < end; ++op) { +      if (*op >= FSPEC_OP_LAST) +         errx(EXIT_FAILURE, "got unexected opcode %u", *op); + +      if (skip_args && *op == FSPEC_OP_ARG) { +         op += arg_len((void*)(op + 1)); +         continue; +      } + +      return op; +   } + +   return NULL; +} diff --git a/src/fspec/bcode.h b/src/fspec/bcode.h new file mode 100644 index 0000000..d84060e --- /dev/null +++ b/src/fspec/bcode.h @@ -0,0 +1,64 @@ +#pragma once + +#include <fspec/memory.h> + +#include <inttypes.h> +#include <stdint.h> +#include <stdbool.h> + +/** maximum range of numbers */ +#define PRI_FSPEC_NUM PRIu64 +typedef uint64_t fspec_num; + +enum fspec_arg { +   FSPEC_ARG_DAT, +   FSPEC_ARG_OFF, +   FSPEC_ARG_NUM, +   FSPEC_ARG_VAR, +   FSPEC_ARG_STR, +   FSPEC_ARG_EOF, +   FSPEC_ARG_LAST, +} __attribute__((packed)); + +void +fspec_arg_get_mem(const enum fspec_arg *arg, const void *data, struct fspec_mem *out_mem); + +fspec_num +fspec_arg_get_num(const enum fspec_arg *arg); + +const char* +fspec_arg_get_cstr(const enum fspec_arg *arg, const void *data); + +const enum fspec_arg* +fspec_arg_next(const enum fspec_arg *arg, const void *end, const uint8_t nth, const uint32_t expect); + +enum fspec_declaration { +   FSPEC_DECLARATION_STRUCT, +   FSPEC_DECLARATION_MEMBER, +   FSPEC_DECLARATION_LAST, +} __attribute__((packed)); + +enum fspec_visual { +   FSPEC_VISUAL_NUL, +   FSPEC_VISUAL_DEC, +   FSPEC_VISUAL_HEX, +   FSPEC_VISUAL_STR, +   FSPEC_VISUAL_LAST, +} __attribute__((packed)); + +enum fspec_op { +   FSPEC_OP_ARG, +   FSPEC_OP_HEADER, +   FSPEC_OP_DECLARATION, +   FSPEC_OP_READ, +   FSPEC_OP_GOTO, +   FSPEC_OP_FILTER, +   FSPEC_OP_VISUAL, +   FSPEC_OP_LAST, +} __attribute__((packed)); + +const enum fspec_op* +fspec_op_next(const enum fspec_op *op, const void *end, const bool skip_args); + +const enum fspec_arg* +fspec_op_get_arg(const enum fspec_op *op, const void *end, const uint8_t nth, const uint32_t expect); diff --git a/src/fspec/lexer.h b/src/fspec/lexer.h new file mode 100644 index 0000000..7b60e6b --- /dev/null +++ b/src/fspec/lexer.h @@ -0,0 +1,17 @@ +#pragma once + +#include <fspec/memory.h> + +struct fspec_lexer; +struct fspec_lexer { +   struct { +      size_t (*read)(struct fspec_lexer *lexer, void *ptr, const size_t size, const size_t nmemb); +   } ops; + +   struct { +      struct fspec_mem input, output; +   } mem; +}; + +bool +fspec_lexer_parse(struct fspec_lexer *lexer, const char *name); diff --git a/src/fspec/lexer.rl b/src/fspec/lexer.rl new file mode 100644 index 0000000..81390e2 --- /dev/null +++ b/src/fspec/lexer.rl @@ -0,0 +1,616 @@ +#include "ragel/ragel.h" +#include <fspec/bcode.h> +#include <fspec/lexer.h> +#include "bcode-internal.h" + +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <err.h> + +#define PLACEHOLDER 0xDEADBEEF +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + +typedef uint8_t fspec_strsz; + +struct membuf { +   struct fspec_mem mem; +   fspec_off written; +}; + +static void +membuf_bounds_check(const struct membuf *buf, const fspec_off nmemb) +{ +   assert(buf); + +   if (buf->mem.len < nmemb || buf->written > buf->mem.len - nmemb) +      errx(EXIT_FAILURE, "%s: %" PRI_FSPEC_OFF " bytes exceeds the maximum storage size of %zu bytes", __func__, buf->written + nmemb, buf->mem.len); +} + +static void +membuf_terminate(struct membuf *buf, const void *data, const fspec_off data_sz) +{ +   membuf_bounds_check(buf, data_sz); +   memcpy((char*)buf->mem.data + buf->written, data, data_sz); +} + +static void +membuf_replace(struct membuf *buf, const fspec_off off, const void *data, const fspec_off data_sz) +{ +   assert(buf->mem.len >= data_sz && off <= buf->mem.len - data_sz); +   memcpy((char*)buf->mem.data + off, data, data_sz); +} + +static void +membuf_append_at(struct membuf *buf, const fspec_off off, const void *data, const fspec_off data_sz) +{ +   assert(off <= buf->written); +   membuf_bounds_check(buf, data_sz); +   const size_t rest = buf->written - off; +   memmove((char*)buf->mem.data + off + data_sz, (char*)buf->mem.data + off, rest); +   membuf_replace(buf, off, data, data_sz); +   buf->written += data_sz; +   assert(buf->written <= buf->mem.len); +} + +static void +membuf_append(struct membuf *buf, const void *data, const fspec_off data_sz) +{ +   membuf_append_at(buf, buf->written, data, data_sz); +} + +struct varbuf { +   struct membuf buf; +   fspec_off offset; +}; + +static inline void +varbuf_begin(struct varbuf *var) +{ +   assert(var); +   var->offset = var->buf.written; +   assert(var->offset <= var->buf.mem.len); +} + +static void +varbuf_reset(struct varbuf *var) +{ +   assert(var); +   var->offset = var->buf.written = 0; +} + +static inline void +varbuf_remove_last(struct varbuf *var) +{ +   assert(var); +   assert(var->buf.written >= var->offset); +   const fspec_off size = var->buf.written - var->offset; +   assert(var->buf.written >= size); +   var->buf.written -= size; +   assert(var->buf.written <= var->buf.mem.len); +} + +enum section { +   SECTION_DATA, +   SECTION_CODE, +   SECTION_LAST, +}; + +struct codebuf { +   struct membuf buf; +   const void *decl[FSPEC_DECLARATION_LAST], *end[SECTION_LAST], *strings; +   fspec_var declarations; +}; + +static void +codebuf_append(struct codebuf *code, const enum section section, const void *data, const fspec_off data_sz) +{ +   assert(code->end[section]); +   const fspec_off off = (char*)code->end[section] - (char*)code->buf.mem.data; +   membuf_append_at(&code->buf, off, data, data_sz); + +   for (enum section s = section; s < ARRAY_SIZE(code->end); ++s) { +      code->end[s] = (char*)code->end[s] + data_sz; +      assert((char*)code->end[s] <= (char*)code->buf.mem.data + code->buf.mem.len); +   } + +   if (section == SECTION_DATA) { +      for (enum fspec_declaration d = 0; d < ARRAY_SIZE(code->decl); ++d) { +         code->decl[d] = (code->decl[d] ? (char*)code->decl[d] + data_sz : NULL); +         assert((char*)code->decl[d] <= (char*)code->buf.mem.data + code->buf.mem.len); +      } +   } + +   assert(code->end[SECTION_DATA] <= code->end[SECTION_CODE]); +   assert((char*)code->end[SECTION_CODE] == (char*)code->buf.mem.data + code->buf.written); +} + +static void +codebuf_append_op(struct codebuf *code, const enum fspec_op op) +{ +   codebuf_append(code, SECTION_CODE, &op, sizeof(op)); +} + +static uint8_t +arg_sizeof(const enum fspec_arg type) +{ +   switch (type) { +      case FSPEC_ARG_DAT: +      case FSPEC_ARG_OFF: +      case FSPEC_ARG_STR: +         return sizeof(fspec_off); + +      case FSPEC_ARG_NUM: +         return sizeof(fspec_num); + +      case FSPEC_ARG_VAR: +         return sizeof(fspec_var); + +      case FSPEC_ARG_EOF: +         break; + +      case FSPEC_ARG_LAST: +         errx(EXIT_FAILURE, "%s: unexpected argument type %u", __func__, type); +   } + +   return 0; +} + +static void +codebuf_append_arg(struct codebuf *code, const enum fspec_arg type, const void *v) +{ +   assert(code); +   codebuf_append_op(code, FSPEC_OP_ARG); +   codebuf_append(code, SECTION_CODE, &type, sizeof(type)); +   codebuf_append(code, SECTION_CODE, v, arg_sizeof(type)); +} + +static void +codebuf_replace_arg(struct codebuf *code, const enum fspec_arg *arg, const enum fspec_arg type, const void *v) +{ +   assert(code && arg); +   assert(*arg == type); +   const fspec_off off = ((char*)arg + 1) - (char*)code->buf.mem.data; +   membuf_replace(&code->buf, off, v, arg_sizeof(type)); +} + +static bool +get_string_offset(const void *start, const void *end, const void *str, const fspec_strsz str_sz, void const **out_off) +{ +   assert(out_off); + +   while (start < end) { +      fspec_strsz len; +      memcpy(&len, start, sizeof(len)); +      if (len == str_sz && !memcmp((char*)start + sizeof(len), str, len)) { +         *out_off = start; +         return true; +      } +      start = (char*)start + sizeof(len) + len + 1; +   } + +   return false; +} + +static void +codebuf_append_arg_cstr(struct codebuf *code, const void *str, const fspec_strsz str_sz) +{ +   const void *ptr; +   if (!get_string_offset(code->strings, code->end[SECTION_DATA], str, str_sz, &ptr)) { +      ptr = code->end[SECTION_DATA]; +      codebuf_append(code, SECTION_DATA, &str_sz, sizeof(str_sz)); +      codebuf_append(code, SECTION_DATA, str, str_sz); +      codebuf_append(code, SECTION_DATA, (char[]){ 0 }, 1); +   } + +   const fspec_off off = (char*)ptr - (char*)code->buf.mem.data; +   codebuf_append_arg(code, FSPEC_ARG_STR, &off); +} + +static const enum fspec_op* +get_named_op(const enum fspec_op *start, const void *end, const void *data, const enum fspec_op op, const uint8_t nth, const void *name, const fspec_strsz name_sz, fspec_var *out_id) +{ +   fspec_var id = 0; +   if ((void*)start < end && *start == FSPEC_OP_DECLARATION) +      id = fspec_arg_get_num(fspec_op_get_arg(start, end, 2, 1<<FSPEC_ARG_NUM)); + +   for (const enum fspec_op *p = start; p; p = fspec_op_next(p, end, true)) { +      const enum fspec_arg *arg; +      if (*p != op || !(arg = fspec_op_get_arg(p, end, nth, 1<<FSPEC_ARG_STR))) +         continue; + +      struct fspec_mem str; +      fspec_arg_get_mem(arg, data, &str); +      if (str.len == name_sz && !memcmp(name, str.data, name_sz)) { +         if (out_id) +            *out_id = id; + +         return p; +      } + +      ++id; +   } + +   return NULL; +} + +static const enum fspec_op* +get_declaration(struct codebuf *code, const bool member, const struct fspec_mem *str, fspec_var *out_id) +{ +   const void *start = (member ? code->decl[FSPEC_DECLARATION_STRUCT] : code->end[SECTION_DATA]); +   return get_named_op(start, code->end[SECTION_CODE], code->buf.mem.data, FSPEC_OP_DECLARATION, 4, str->data, str->len, out_id); +} + +static bool +codebuf_append_arg_var(struct codebuf *code, const bool member, const struct fspec_mem *var) +{ +   fspec_var id = -1; +   if (!get_declaration(code, member, var, &id)) +      return false; + +   codebuf_append_arg(code, FSPEC_ARG_VAR, &id); +   return true; +} + +static void +codebuf_append_declaration(struct codebuf *code, const enum fspec_declaration decl) +{ +   code->decl[decl] = code->end[SECTION_CODE]; +   codebuf_append_op(code, FSPEC_OP_DECLARATION); +   codebuf_append_arg(code, FSPEC_ARG_NUM, (fspec_num[]){ decl }); +   codebuf_append_arg(code, FSPEC_ARG_NUM, (fspec_num[]){ code->declarations++ }); +   codebuf_append_arg(code, FSPEC_ARG_OFF, (fspec_off[]){ PLACEHOLDER }); +} + +enum stack_type { +   STACK_STR, +   STACK_NUM, +}; + +struct stack { +   union { +      struct fspec_mem str; +      uint64_t num; +   }; +   enum stack_type type; +}; + +static const char* +stack_type_to_str(const enum stack_type type) +{ +   switch (type) { +      case STACK_STR: return "str"; +      case STACK_NUM: return "num"; +   }; +   return "unknown"; +} + +static void +stack_check_type(const struct stack *stack, const enum stack_type type) +{ +   assert(stack); + +   if (stack->type != type) +      errx(EXIT_FAILURE, "tried to get '%s' from stack, but the last pushed type was '%s'", stack_type_to_str(type), stack_type_to_str(stack->type)); +} + +static const struct fspec_mem* +stack_get_str(const struct stack *stack) +{ +   stack_check_type(stack, STACK_STR); +   return &stack->str; +} + +static uint64_t +stack_get_num(const struct stack *stack) +{ +   stack_check_type(stack, STACK_NUM); +   return stack->num; +} + +struct state { +   struct ragel ragel; +   struct stack stack; +   struct codebuf out; +   struct varbuf var; +}; + +static void +state_stack_num(struct state *state, const uint8_t base) +{ +   assert(state); +   membuf_terminate(&state->var.buf, (char[]){ 0 }, 1); +   const char *str = (char*)state->var.buf.mem.data + state->var.offset; +   state->stack.type = STACK_NUM; +   state->stack.num = strtoll(str + (base == 16 && *str == 'x'), NULL, base); +   varbuf_remove_last(&state->var); +} + +static void +state_append_arg_var(struct state *state, const bool member, const struct fspec_mem *str) +{ +   assert(state && str); + +   if (!codebuf_append_arg_var(&state->out, member, str)) +      ragel_throw_error(&state->ragel, "'%s' undeclared", (char*)str->data); +} + +static void +state_append_declaration(struct state *state, const enum fspec_declaration decl, const struct fspec_mem *str) +{ +   assert(state && str); + +   if (get_declaration(&state->out, (decl == FSPEC_DECLARATION_MEMBER), str, NULL)) +      ragel_throw_error(&state->ragel, "'%s' redeclared", (char*)str->data); + +   codebuf_append_declaration(&state->out, decl); +   codebuf_append_arg_cstr(&state->out, str->data, str->len); +} + +static void +state_finish_declaration(struct state *state, const enum fspec_declaration decl) +{ +   assert(state && state->out.decl[decl]); +   const char *end = state->out.end[SECTION_CODE]; +   const fspec_off off = end - (char*)state->out.decl[decl]; +   codebuf_replace_arg(&state->out, fspec_op_get_arg(state->out.decl[decl], end, 3, 1<<FSPEC_ARG_OFF), FSPEC_ARG_OFF, &off); +   state->out.decl[decl] = NULL; +} + +%%{ +   machine fspec_lexer; +   variable p state.ragel.p; +   variable pe state.ragel.pe; +   variable eof state.ragel.eof; +   write data noerror nofinal; + +   action arg_eof { +      codebuf_append_arg(&state.out, FSPEC_ARG_EOF, NULL); +   } + +   action arg_num { +      codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ stack_get_num(&state.stack) }); +   } + +   action arg_str { +      const struct fspec_mem *str = stack_get_str(&state.stack); +      codebuf_append_arg_cstr(&state.out, str->data, str->len); +   } + +   action arg_var { +      state_append_arg_var(&state, true, stack_get_str(&state.stack)); +   } + +   action filter { +      codebuf_append_op(&state.out, FSPEC_OP_FILTER); +   } + +   action goto { +      codebuf_append_op(&state.out, FSPEC_OP_GOTO); +      state_append_arg_var(&state, false, stack_get_str(&state.stack)); +   } + +   action vnul { +      codebuf_append_op(&state.out, FSPEC_OP_VISUAL); +      codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_NUL }); +   } + +   action vdec { +      codebuf_append_op(&state.out, FSPEC_OP_VISUAL); +      codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_DEC }); +   } + +   action vhex { +      codebuf_append_op(&state.out, FSPEC_OP_VISUAL); +      codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_HEX }); +   } + +   action vstr { +      codebuf_append_op(&state.out, FSPEC_OP_VISUAL); +      codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_STR }); +   } + +   action r8 { +      codebuf_append_op(&state.out, FSPEC_OP_READ); +      codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 8 }); +   } + +   action r16 { +      codebuf_append_op(&state.out, FSPEC_OP_READ); +      codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 16 }); +   } + +   action r32 { +      codebuf_append_op(&state.out, FSPEC_OP_READ); +      codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 32 }); +   } + +   action r64 { +      codebuf_append_op(&state.out, FSPEC_OP_READ); +      codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 64 }); +   } + +   action member_end { +      state_finish_declaration(&state, FSPEC_DECLARATION_MEMBER); +   } + +   action member_start { +      state_append_declaration(&state, FSPEC_DECLARATION_MEMBER, stack_get_str(&state.stack)); +   } + +   action struct_end { +      state_finish_declaration(&state, FSPEC_DECLARATION_STRUCT); +   } + +   action struct_start { +      state_append_declaration(&state, FSPEC_DECLARATION_STRUCT, stack_get_str(&state.stack)); +   } + +   action stack_oct { +      state_stack_num(&state, 8); +   } + +   action stack_hex { +      state_stack_num(&state, 16); +   } + +   action stack_dec { +      state_stack_num(&state, 10); +   } + +   action stack_str { +      membuf_terminate(&state.var.buf, (char[]){ 0 }, 1); +      state.stack.type = STACK_STR; +      state.stack.str = state.var.buf.mem; +      state.stack.str.len = state.var.buf.written; +   } + +   action store_esc_num { +      const fspec_num v = stack_get_num(&state.stack); +      assert(v <= 255); +      const uint8_t u8 = v; +      membuf_append(&state.var.buf, &u8, sizeof(u8)); +   } + +   action store_esc { +      const struct { const char e, v; } map[] = { +         { .e = 'a', .v = '\a' }, +         { .e = 'b', .v = '\b' }, +         { .e = 'f', .v = '\f' }, +         { .e = 'n', .v = '\n' }, +         { .e = 'r', .v = '\r' }, +         { .e = 't', .v = '\t' }, +         { .e = 'v', .v = '\v' }, +         { .e = '\\', .v = '\\' }, +         { .e = '\'', .v = '\'' }, +         { .e = '\"', .v = '"' }, +         { .e = 'e', .v = 0x1B }, +      }; + +      for (size_t i = 0; i < ARRAY_SIZE(map); ++i) { +         if (*state.ragel.p != map[i].e) +            continue; + +         membuf_append(&state.var.buf, &map[i].v, sizeof(map[i].v)); +         break; +      } +   } + +   action store { +      membuf_append(&state.var.buf, state.ragel.p, 1); +   } + +   action begin_num { +      varbuf_begin(&state.var); +   } + +   action begin_str { +      varbuf_reset(&state.var); +   } + +   action type_err { +      ragel_throw_error(&state.ragel, "unknown type name"); +   } + +   action visual_err { +      ragel_throw_error(&state.ragel, "unknown visualization"); +   } + +   action syntax_err { +      ragel_throw_error(&state.ragel, "malformed input (machine failed here or in next expression)"); +   } + +   action line { +      ragel_advance_line(&state.ragel); +   } + +   # Semantic +   quote = ['"]; +   newline = '\n'; +   esc = [abfnrtv\\'"e]; +   esc_chr = '\\'; +   esc_hex = 'x' <: xdigit{2}; +   hex = '0' <: esc_hex; +   oct = [0-7]{1,3}; +   dec = [\-+]? <: (([1-9] <: digit*) | '0'); +   valid = ^cntrl; +   comment = '//' <: valid* :>> newline; +   type = ('u8' | 's8') %r8 | ('u16' | 's16') %r16 | ('u32' | 's32') %r32 | ('u64' | 's32') %r64; +   visual = 'nul' %vnul | 'dec' %vdec | 'hex' %vhex | 'str' %vstr; +   reserved = 'struct' | type | visual; +   name = ((alpha | '_') <: (alnum | '_')*) - reserved; + +   # Stack +   stack_name = name >begin_str $store %stack_str; +   stack_hex = hex >begin_num $store %stack_hex; +   stack_dec = dec >begin_num $store %stack_dec; +   stack_oct = oct >begin_num $store %stack_oct; +   stack_esc_hex = esc_hex >begin_num $store %stack_hex; +   stack_esc = esc_chr <: ((stack_esc_hex | stack_oct) %store_esc_num | esc %~store_esc); +   stack_str = quote <: ((stack_esc? <: print? $store) - zlen)* >begin_str %stack_str :>> quote; +   stack_num = stack_dec | stack_hex; + +   # Catchers +   catch_struct = 'struct ' <: stack_name; +   catch_type = (catch_struct %goto | type) $!type_err; +   catch_args = stack_num %arg_num | stack_str %arg_str | stack_name %arg_var; +   catch_array = '[' <: (catch_args | '$' %arg_eof) :>> ']'; +   catch_filter = ' | ' %filter <: stack_name %arg_str :>> ('(' <: catch_args? <: (', ' <: catch_args)* :>> ')')?; +   catch_visual = ' ' <: visual $!visual_err; + +   # Abstract +   member = stack_name %member_start :> ': ' <: (catch_type <: catch_array* catch_filter* catch_visual?) :>> ';' %member_end; +   struct = catch_struct %struct_start :>> ' {' <: (space | comment | member)* :>> '};' %struct_end; +   line = valid* :>> newline %line; +   main := ((space | comment | struct)* & line*) $!syntax_err; +}%% + +bool +fspec_lexer_parse(struct fspec_lexer *lexer, const char *name) +{ +   int cs; +   %% write init; + +   (void)fspec_lexer_en_main; +   assert(lexer); +   assert(lexer->ops.read); +   assert(lexer->mem.input.data && lexer->mem.input.len); +   assert(lexer->mem.output.data && lexer->mem.output.len); +   assert(lexer->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range"); +   assert(lexer->mem.output.len <= (fspec_off)~0 && "output storage size exceeds fspec_off range"); + +   char var[256]; +   struct state state = { +      .ragel.name = name, +      .ragel.lineno = 1, +      .var.buf.mem = { .data = var, .len = sizeof(var) }, +      .out.buf.mem = lexer->mem.output, +   }; + +   static const fspec_num version = 0; +   state.out.end[SECTION_CODE] = state.out.end[SECTION_DATA] = state.out.buf.mem.data; +   codebuf_append_op(&state.out, FSPEC_OP_HEADER); +   codebuf_append_arg(&state.out, FSPEC_ARG_NUM, &version); +   codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ PLACEHOLDER }); +   codebuf_append_arg(&state.out, FSPEC_ARG_DAT, (fspec_off[]){ PLACEHOLDER }); +   state.out.end[SECTION_DATA] = state.out.end[SECTION_CODE]; +   state.out.strings = state.out.end[SECTION_DATA]; + +   struct fspec_mem input = lexer->mem.input; +   for (bool eof = false; !state.ragel.error && !eof;) { +      const size_t bytes = lexer->ops.read(lexer, input.data, 1, input.len); +      const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes }; +      ragel_feed_input(&state.ragel, (eof = (bytes < input.len)), &rl); +      %% write exec; +   } + +   { +      const void *end = state.out.end[SECTION_CODE]; +      codebuf_replace_arg(&state.out, fspec_op_get_arg(state.out.buf.mem.data, end, 2, 1<<FSPEC_ARG_NUM), FSPEC_ARG_NUM, (fspec_num[]){ state.out.declarations }); +      const fspec_off off = (char*)state.out.end[SECTION_DATA] - (char*)state.out.strings; +      codebuf_replace_arg(&state.out, fspec_op_get_arg(state.out.buf.mem.data, end, 3, 1<<FSPEC_ARG_DAT), FSPEC_ARG_DAT, &off); +   } + +   lexer->mem.output.len = state.out.buf.written; +   return !state.ragel.error; +} diff --git a/src/fspec/memory.h b/src/fspec/memory.h new file mode 100644 index 0000000..768415a --- /dev/null +++ b/src/fspec/memory.h @@ -0,0 +1,8 @@ +#pragma once + +#include <stddef.h> + +struct fspec_mem { +   void *data; +   size_t len; +}; diff --git a/src/fspec/validator.h b/src/fspec/validator.h new file mode 100644 index 0000000..c4705b2 --- /dev/null +++ b/src/fspec/validator.h @@ -0,0 +1,17 @@ +#pragma once + +#include <fspec/memory.h> + +struct fspec_validator; +struct fspec_validator { +   struct { +      size_t (*read)(struct fspec_validator *validator, void *ptr, const size_t size, const size_t nmemb); +   } ops; + +   struct { +      struct fspec_mem input; +   } mem; +}; + +bool +fspec_validator_parse(struct fspec_validator *validator, const char *name); diff --git a/src/fspec/validator.rl b/src/fspec/validator.rl new file mode 100644 index 0000000..093348d --- /dev/null +++ b/src/fspec/validator.rl @@ -0,0 +1,237 @@ +#include "ragel/ragel.h" +#include <fspec/bcode.h> +#include <fspec/validator.h> +#include "bcode-internal.h" + +#include <assert.h> + +struct stack { +   union { +      fspec_num num; +      fspec_off off; +      fspec_var var; +      fspec_strsz strsz; +      unsigned char b[sizeof(fspec_num)]; +   } u; +   uint8_t i; // writing index for u.b +}; + +struct range { +   fspec_off start, end; +}; + +struct context { +   struct range data; +   fspec_var declarations, expected_declarations; +   fspec_off str_end, decl_start, decl_end[FSPEC_DECLARATION_LAST], offset; +   enum fspec_declaration last_decl_type; +}; + +struct state { +   struct ragel ragel; +   struct context context; +   struct stack stack; +   bool valid; +}; + +%%{ +   machine fspec_validator; +   variable p state.ragel.p; +   variable pe state.ragel.pe; +   variable eof state.ragel.eof; +   write data noerror nofinal; + +   action store_decls { +      if (state.stack.u.num > (fspec_var)~0) +         ragel_throw_error(&state.ragel, "expected declarations overflows"); + +      state.context.expected_declarations = state.stack.u.num; +   } + +   action check_decls { +      if (state.context.declarations != state.context.expected_declarations) +         ragel_throw_error(&state.ragel, "expected declarations did not match with the content: expected: %" PRI_FSPEC_VAR " got: %" PRI_FSPEC_VAR, state.context.expected_declarations, state.context.declarations); +   } + +   action mark_dat { +      // we can replace this logic with fspec generated code in future +      // struct str { len: u32; str: u8[len]['\0']; } +      // struct dat { len: u32; strings: struct str[$::len]; } +      if (state.context.offset > (fspec_off)~0 - state.stack.u.off) +         ragel_throw_error(&state.ragel, "dat section length overflows"); + +      state.context.data = (struct range){ .start = state.context.offset, .end = state.stack.u.off }; +   } + +   action test_inside_dat { +      state.context.offset < (state.context.data.start + state.context.data.end) +   } + +   action mark_str { +      if (state.context.offset >= (fspec_off)~0 - state.stack.u.strsz) // >= for null byte +         ragel_throw_error(&state.ragel, "str length overflows"); + +      state.context.str_end = state.context.offset + state.stack.u.strsz; +   } + +   action test_inside_str { +      state.context.offset < state.context.str_end +   } + +   action check_var { +      if (state.context.declarations <= state.stack.u.var) +         ragel_throw_error(&state.ragel, "refenced undeclared variable"); +   } + +   action check_str { +      if (state.stack.u.off < state.context.data.start) { +         ragel_throw_error(&state.ragel, "str before data section range: %" PRI_FSPEC_OFF " <= %" PRI_FSPEC_OFF, state.stack.u.off, state.context.data.start + state.context.data.end); +      } else if (state.context.data.start + state.context.data.end <= state.stack.u.off) { +         ragel_throw_error(&state.ragel, "str after data section range: %" PRI_FSPEC_OFF " <= %" PRI_FSPEC_OFF, state.context.data.start + state.context.data.end, state.stack.u.off); +      } +   } + +   action check_decl_type { +      if (state.stack.u.num >= FSPEC_DECLARATION_LAST) +         ragel_throw_error(&state.ragel, "invalid declaration type: %" PRI_FSPEC_NUM, state.stack.u.num); + +      state.context.last_decl_type = state.stack.u.num; +   } + +   action check_decl_num { +      if (state.context.declarations >= (fspec_var)~0) +         ragel_throw_error(&state.ragel, "declarations overflows"); + +      if (state.context.declarations != state.stack.u.num) +         ragel_throw_error(&state.ragel, "invalid declaration number: %" PRI_FSPEC_NUM " expected: %" PRI_FSPEC_VAR, state.stack.u.num, state.context.declarations); + +      ++state.context.declarations; +   } + +   action start_decl { +      state.context.decl_start = state.context.offset; +   } + +   action mark_decl { +      const fspec_off sz = (state.context.offset - state.context.decl_start); +      assert(sz <= state.stack.u.off); + +      if (state.context.offset > (fspec_off)~0 - state.stack.u.off - sz) +         ragel_throw_error(&state.ragel, "declaration length overflows"); + +      state.context.decl_end[state.context.last_decl_type] = state.context.offset + state.stack.u.off - sz; +   } + +   action check_struct { +      if (state.context.last_decl_type != FSPEC_DECLARATION_STRUCT) +         ragel_throw_error(&state.ragel, "expected struct declaration"); +   } + +   action check_member { +      if (state.context.last_decl_type != FSPEC_DECLARATION_MEMBER) +         ragel_throw_error(&state.ragel, "expected member declaration"); +   } + +   action check_member_end { +      if (state.context.decl_end[FSPEC_DECLARATION_MEMBER] != state.context.offset) +         ragel_throw_error(&state.ragel, "invalid member end: %" PRI_FSPEC_OFF " expected: %" PRI_FSPEC_OFF, state.context.decl_end[FSPEC_DECLARATION_MEMBER], state.context.offset); +   } + +   action check_struct_end { +      if (state.context.decl_end[FSPEC_DECLARATION_STRUCT] != state.context.offset) +         ragel_throw_error(&state.ragel, "invalid struct end: %" PRI_FSPEC_OFF " expected: %" PRI_FSPEC_OFF, state.context.decl_end[FSPEC_DECLARATION_STRUCT], state.context.offset); +   } + +   action check_visual_type { +      if (state.stack.u.num >= FSPEC_VISUAL_LAST) +         ragel_throw_error(&state.ragel, "invalid visual type: %" PRI_FSPEC_NUM, state.stack.u.num); +   } + +   action arg_error { +      ragel_throw_error(&state.ragel, "malformed argument"); +   } + +   action op_error { +      ragel_throw_error(&state.ragel, "unexpected argument"); +   } + +   action pattern_error { +      ragel_throw_error(&state.ragel, "unexpected pattern"); +   } + +   action syntax_error { +      ragel_throw_error(&state.ragel, "unexpected byte"); +   } + +   action store { +      if (state.stack.i < sizeof(state.stack.u.b)) +         state.stack.u.b[state.stack.i++] = fc; +   } + +   action flush { +      state.stack.i = 0; +   } + +   action advance { +      ++state.context.offset; +   } + +   stack1 = any{1} >flush $store; +   stack2 = any{2} >flush $store; +   stack4 = any{4} >flush $store; +   stack8 = any{8} >flush $store; + +   ARG_DAT = 0 stack4 %*mark_dat ((stack1 %*mark_str (any when test_inside_str)* 0) when test_inside_dat)*; +   ARG_OFF = 1 stack4; +   ARG_NUM = 2 stack8; +   ARG_VAR = 3 stack2 %check_var; +   ARG_STR = 4 stack4 %check_str; +   ARG_EOF = 5; + +   OP_ARG_DAT = 0 ARG_DAT $!arg_error; +   OP_ARG_OFF = 0 ARG_OFF $!arg_error; +   OP_ARG_NUM = 0 ARG_NUM $!arg_error; +   OP_ARG_VAR = 0 ARG_VAR $!arg_error; +   OP_ARG_STR = 0 ARG_STR $!arg_error; +   OP_ARG_EOF = 0 ARG_EOF $!arg_error; + +   OP_HEADER = 1 (OP_ARG_NUM OP_ARG_NUM %store_decls OP_ARG_DAT) $!op_error; +   OP_DECLARATION = 2 >start_decl (OP_ARG_NUM %check_decl_type OP_ARG_NUM %check_decl_num OP_ARG_OFF %mark_decl OP_ARG_STR) $!op_error; +   OP_READ = 3 (OP_ARG_NUM (OP_ARG_NUM | OP_ARG_VAR | OP_ARG_STR | OP_ARG_EOF)*) $!op_error; +   OP_GOTO = 4 (OP_ARG_VAR (OP_ARG_NUM | OP_ARG_VAR | OP_ARG_STR | OP_ARG_EOF)*) $!op_error; +   OP_FILTER = 5 (OP_ARG_STR (OP_ARG_NUM | OP_ARG_VAR | OP_ARG_STR)*) $!op_error; +   OP_VISUAL = 6 (OP_ARG_NUM %check_visual_type) $!op_error; + +   pattern = (OP_DECLARATION %check_struct <: (OP_DECLARATION %check_member (OP_READ | OP_GOTO) OP_FILTER? OP_VISUAL? %check_member_end)*)* %check_struct_end $!pattern_error; +   main := (OP_HEADER <: pattern) %check_decls $advance $!syntax_error; +}%% + +bool +fspec_validator_parse(struct fspec_validator *validator, const char *name) +{ +   int cs; +   %% write init; + +   (void)fspec_validator_en_main; +   assert(validator); +   assert(validator->ops.read); +   assert(validator->mem.input.data && validator->mem.input.len); +   assert(validator->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range"); + +   struct state state = { +      .ragel.name = name, +      .ragel.lineno = 1, +   }; + +   static_assert(sizeof(state.stack.u) == sizeof(state.stack.u.b), "bytes doesn't represent the largest member in union"); + +   struct fspec_mem input = validator->mem.input; +   for (bool eof = false; !state.ragel.error && !eof;) { +      const size_t bytes = validator->ops.read(validator, input.data, 1, input.len); +      const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes, .binary = true }; +      ragel_feed_input(&state.ragel, (eof = (bytes < input.len)), &rl); +      %% write exec; +   } + +   return !state.ragel.error; +} | 
