summaryrefslogtreecommitdiff
path: root/src/fspec
diff options
context:
space:
mode:
Diffstat (limited to 'src/fspec')
-rw-r--r--src/fspec/bcode-internal.h16
-rw-r--r--src/fspec/bcode.c189
-rw-r--r--src/fspec/bcode.h64
-rw-r--r--src/fspec/lexer.h17
-rw-r--r--src/fspec/lexer.rl616
-rw-r--r--src/fspec/memory.h8
-rw-r--r--src/fspec/validator.h17
-rw-r--r--src/fspec/validator.rl237
8 files changed, 1164 insertions, 0 deletions
diff --git a/src/fspec/bcode-internal.h b/src/fspec/bcode-internal.h
new file mode 100644
index 0000000..8c9ce74
--- /dev/null
+++ b/src/fspec/bcode-internal.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include <inttypes.h>
+#include <stdint.h>
+
+/** maximum size of string literals */
+#define PRI_FSPEC_STRSZ PRIu8
+typedef uint8_t fspec_strsz;
+
+/** maximum range of variable ids */
+#define PRI_FSPEC_VAR PRIu16
+typedef uint16_t fspec_var;
+
+/** maximum range of bytecode offsets */
+#define PRI_FSPEC_OFF PRIu32
+typedef uint32_t fspec_off;
diff --git a/src/fspec/bcode.c b/src/fspec/bcode.c
new file mode 100644
index 0000000..0a89260
--- /dev/null
+++ b/src/fspec/bcode.c
@@ -0,0 +1,189 @@
+#include <fspec/bcode.h>
+#include "bcode-internal.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <err.h>
+
+static_assert(sizeof(fspec_off) <= sizeof(((struct fspec_mem*)0)->len), "fspec_off should not be larger than what fspec_mem can represent");
+static_assert(sizeof(enum fspec_op) == sizeof(uint8_t), "enum fspec_op is expected to have size of uint8_t");
+static_assert(sizeof(enum fspec_arg) == sizeof(uint8_t), "enum fspec_arg is expected to have size of uint8_t");
+
+static fspec_off
+arg_data_len(const enum fspec_arg *arg)
+{
+ assert(arg);
+
+ switch (*arg) {
+ case FSPEC_ARG_NUM:
+ return sizeof(fspec_num);
+
+ case FSPEC_ARG_VAR:
+ return sizeof(fspec_var);
+
+ case FSPEC_ARG_STR:
+ case FSPEC_ARG_OFF:
+ return sizeof(fspec_off);
+
+ case FSPEC_ARG_DAT:
+ {
+ struct fspec_mem mem;
+ fspec_arg_get_mem(arg, NULL, &mem);
+ return sizeof(fspec_off) + mem.len;
+ }
+
+ case FSPEC_ARG_EOF:
+ break;
+
+ case FSPEC_ARG_LAST:
+ errx(EXIT_FAILURE, "%s: unexpected argument type %u", __func__, *arg);
+ break;
+ }
+
+ return 0;
+}
+
+static fspec_off
+arg_len(const enum fspec_arg *arg)
+{
+ return sizeof(*arg) + arg_data_len(arg);
+}
+
+void
+fspec_arg_get_mem(const enum fspec_arg *arg, const void *data, struct fspec_mem *out_mem)
+{
+ assert(arg && out_mem);
+
+ switch (*arg) {
+ case FSPEC_ARG_STR:
+ {
+ assert(data);
+ fspec_off off;
+ fspec_strsz len;
+ memcpy(&off, (char*)arg + sizeof(*arg), sizeof(off));
+ memcpy(&len, (char*)data + off, sizeof(len));
+ out_mem->data = (char*)data + off + sizeof(len);
+ out_mem->len = len;
+ }
+ break;
+
+ case FSPEC_ARG_DAT:
+ {
+ fspec_off len;
+ memcpy(&len, (char*)arg + sizeof(*arg), sizeof(len));
+ out_mem->data = (char*)arg + sizeof(*arg) + sizeof(len);
+ out_mem->len = len;
+ }
+ break;
+
+ case FSPEC_ARG_VAR:
+ case FSPEC_ARG_NUM:
+ case FSPEC_ARG_OFF:
+ out_mem->data = (char*)arg + sizeof(*arg);
+ out_mem->len = arg_data_len(arg);
+ break;
+
+ case FSPEC_ARG_EOF:
+ *out_mem = (struct fspec_mem){0};
+ break;
+
+ case FSPEC_ARG_LAST:
+ errx(EXIT_FAILURE, "%s: unexpected argument type %u", __func__, *arg);
+ break;
+ }
+}
+
+fspec_num
+fspec_arg_get_num(const enum fspec_arg *arg)
+{
+ assert(arg);
+ fspec_num v;
+ switch (*arg) {
+ case FSPEC_ARG_NUM:
+ memcpy(&v, arg + sizeof(*arg), sizeof(v));
+ break;
+
+ case FSPEC_ARG_VAR:
+ {
+ fspec_var var;
+ memcpy(&var, arg + sizeof(*arg), sizeof(var));
+ v = var;
+ }
+ break;
+
+ case FSPEC_ARG_DAT:
+ case FSPEC_ARG_OFF:
+ {
+ fspec_off off;
+ memcpy(&off, arg + sizeof(*arg), sizeof(off));
+ v = off;
+ }
+ break;
+
+ case FSPEC_ARG_STR:
+ case FSPEC_ARG_EOF:
+ case FSPEC_ARG_LAST:
+ errx(EXIT_FAILURE, "%s: unexpected argument type %u", __func__, *arg);
+ break;
+ }
+ return v;
+}
+
+const char*
+fspec_arg_get_cstr(const enum fspec_arg *arg, const void *data)
+{
+ assert(arg && *arg == FSPEC_ARG_STR);
+ struct fspec_mem mem;
+ fspec_arg_get_mem(arg, data, &mem);
+ return (const char*)mem.data;
+}
+
+const enum fspec_arg*
+fspec_op_get_arg(const enum fspec_op *start, const void *end, const uint8_t nth, const uint32_t expect)
+{
+ uint8_t i = 0;
+ const enum fspec_arg *arg = NULL;
+ for (const enum fspec_op *op = fspec_op_next(start, end, false); op && i < nth; op = fspec_op_next(op, end, false)) {
+ if (*op != FSPEC_OP_ARG)
+ return NULL;
+
+ arg = (void*)(op + 1);
+ assert(*arg >= 0 && *arg < FSPEC_ARG_LAST);
+ ++i;
+ }
+
+ if (arg && !(expect & (1<<*arg)))
+ errx(EXIT_FAILURE, "got unexpected argument of type %u", *arg);
+
+ return arg;
+}
+
+const enum fspec_arg*
+fspec_arg_next(const enum fspec_arg *arg, const void *end, const uint8_t nth, const uint32_t expect)
+{
+ return fspec_op_get_arg((void*)(arg - 1), end, nth, expect);
+}
+
+const enum fspec_op*
+fspec_op_next(const enum fspec_op *start, const void *end, const bool skip_args)
+{
+ assert(start && end);
+ fspec_off off = sizeof(*start);
+ if ((void*)start < end && *start == FSPEC_OP_ARG)
+ off += arg_len((void*)(start + 1));
+
+ for (const enum fspec_op *op = start + off; (void*)start < end && (void*)op < end; ++op) {
+ if (*op >= FSPEC_OP_LAST)
+ errx(EXIT_FAILURE, "got unexected opcode %u", *op);
+
+ if (skip_args && *op == FSPEC_OP_ARG) {
+ op += arg_len((void*)(op + 1));
+ continue;
+ }
+
+ return op;
+ }
+
+ return NULL;
+}
diff --git a/src/fspec/bcode.h b/src/fspec/bcode.h
new file mode 100644
index 0000000..d84060e
--- /dev/null
+++ b/src/fspec/bcode.h
@@ -0,0 +1,64 @@
+#pragma once
+
+#include <fspec/memory.h>
+
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+/** maximum range of numbers */
+#define PRI_FSPEC_NUM PRIu64
+typedef uint64_t fspec_num;
+
+enum fspec_arg {
+ FSPEC_ARG_DAT,
+ FSPEC_ARG_OFF,
+ FSPEC_ARG_NUM,
+ FSPEC_ARG_VAR,
+ FSPEC_ARG_STR,
+ FSPEC_ARG_EOF,
+ FSPEC_ARG_LAST,
+} __attribute__((packed));
+
+void
+fspec_arg_get_mem(const enum fspec_arg *arg, const void *data, struct fspec_mem *out_mem);
+
+fspec_num
+fspec_arg_get_num(const enum fspec_arg *arg);
+
+const char*
+fspec_arg_get_cstr(const enum fspec_arg *arg, const void *data);
+
+const enum fspec_arg*
+fspec_arg_next(const enum fspec_arg *arg, const void *end, const uint8_t nth, const uint32_t expect);
+
+enum fspec_declaration {
+ FSPEC_DECLARATION_STRUCT,
+ FSPEC_DECLARATION_MEMBER,
+ FSPEC_DECLARATION_LAST,
+} __attribute__((packed));
+
+enum fspec_visual {
+ FSPEC_VISUAL_NUL,
+ FSPEC_VISUAL_DEC,
+ FSPEC_VISUAL_HEX,
+ FSPEC_VISUAL_STR,
+ FSPEC_VISUAL_LAST,
+} __attribute__((packed));
+
+enum fspec_op {
+ FSPEC_OP_ARG,
+ FSPEC_OP_HEADER,
+ FSPEC_OP_DECLARATION,
+ FSPEC_OP_READ,
+ FSPEC_OP_GOTO,
+ FSPEC_OP_FILTER,
+ FSPEC_OP_VISUAL,
+ FSPEC_OP_LAST,
+} __attribute__((packed));
+
+const enum fspec_op*
+fspec_op_next(const enum fspec_op *op, const void *end, const bool skip_args);
+
+const enum fspec_arg*
+fspec_op_get_arg(const enum fspec_op *op, const void *end, const uint8_t nth, const uint32_t expect);
diff --git a/src/fspec/lexer.h b/src/fspec/lexer.h
new file mode 100644
index 0000000..7b60e6b
--- /dev/null
+++ b/src/fspec/lexer.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <fspec/memory.h>
+
+struct fspec_lexer;
+struct fspec_lexer {
+ struct {
+ size_t (*read)(struct fspec_lexer *lexer, void *ptr, const size_t size, const size_t nmemb);
+ } ops;
+
+ struct {
+ struct fspec_mem input, output;
+ } mem;
+};
+
+bool
+fspec_lexer_parse(struct fspec_lexer *lexer, const char *name);
diff --git a/src/fspec/lexer.rl b/src/fspec/lexer.rl
new file mode 100644
index 0000000..81390e2
--- /dev/null
+++ b/src/fspec/lexer.rl
@@ -0,0 +1,616 @@
+#include "ragel/ragel.h"
+#include <fspec/bcode.h>
+#include <fspec/lexer.h>
+#include "bcode-internal.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <err.h>
+
+#define PLACEHOLDER 0xDEADBEEF
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+
+typedef uint8_t fspec_strsz;
+
+struct membuf {
+ struct fspec_mem mem;
+ fspec_off written;
+};
+
+static void
+membuf_bounds_check(const struct membuf *buf, const fspec_off nmemb)
+{
+ assert(buf);
+
+ if (buf->mem.len < nmemb || buf->written > buf->mem.len - nmemb)
+ errx(EXIT_FAILURE, "%s: %" PRI_FSPEC_OFF " bytes exceeds the maximum storage size of %zu bytes", __func__, buf->written + nmemb, buf->mem.len);
+}
+
+static void
+membuf_terminate(struct membuf *buf, const void *data, const fspec_off data_sz)
+{
+ membuf_bounds_check(buf, data_sz);
+ memcpy((char*)buf->mem.data + buf->written, data, data_sz);
+}
+
+static void
+membuf_replace(struct membuf *buf, const fspec_off off, const void *data, const fspec_off data_sz)
+{
+ assert(buf->mem.len >= data_sz && off <= buf->mem.len - data_sz);
+ memcpy((char*)buf->mem.data + off, data, data_sz);
+}
+
+static void
+membuf_append_at(struct membuf *buf, const fspec_off off, const void *data, const fspec_off data_sz)
+{
+ assert(off <= buf->written);
+ membuf_bounds_check(buf, data_sz);
+ const size_t rest = buf->written - off;
+ memmove((char*)buf->mem.data + off + data_sz, (char*)buf->mem.data + off, rest);
+ membuf_replace(buf, off, data, data_sz);
+ buf->written += data_sz;
+ assert(buf->written <= buf->mem.len);
+}
+
+static void
+membuf_append(struct membuf *buf, const void *data, const fspec_off data_sz)
+{
+ membuf_append_at(buf, buf->written, data, data_sz);
+}
+
+struct varbuf {
+ struct membuf buf;
+ fspec_off offset;
+};
+
+static inline void
+varbuf_begin(struct varbuf *var)
+{
+ assert(var);
+ var->offset = var->buf.written;
+ assert(var->offset <= var->buf.mem.len);
+}
+
+static void
+varbuf_reset(struct varbuf *var)
+{
+ assert(var);
+ var->offset = var->buf.written = 0;
+}
+
+static inline void
+varbuf_remove_last(struct varbuf *var)
+{
+ assert(var);
+ assert(var->buf.written >= var->offset);
+ const fspec_off size = var->buf.written - var->offset;
+ assert(var->buf.written >= size);
+ var->buf.written -= size;
+ assert(var->buf.written <= var->buf.mem.len);
+}
+
+enum section {
+ SECTION_DATA,
+ SECTION_CODE,
+ SECTION_LAST,
+};
+
+struct codebuf {
+ struct membuf buf;
+ const void *decl[FSPEC_DECLARATION_LAST], *end[SECTION_LAST], *strings;
+ fspec_var declarations;
+};
+
+static void
+codebuf_append(struct codebuf *code, const enum section section, const void *data, const fspec_off data_sz)
+{
+ assert(code->end[section]);
+ const fspec_off off = (char*)code->end[section] - (char*)code->buf.mem.data;
+ membuf_append_at(&code->buf, off, data, data_sz);
+
+ for (enum section s = section; s < ARRAY_SIZE(code->end); ++s) {
+ code->end[s] = (char*)code->end[s] + data_sz;
+ assert((char*)code->end[s] <= (char*)code->buf.mem.data + code->buf.mem.len);
+ }
+
+ if (section == SECTION_DATA) {
+ for (enum fspec_declaration d = 0; d < ARRAY_SIZE(code->decl); ++d) {
+ code->decl[d] = (code->decl[d] ? (char*)code->decl[d] + data_sz : NULL);
+ assert((char*)code->decl[d] <= (char*)code->buf.mem.data + code->buf.mem.len);
+ }
+ }
+
+ assert(code->end[SECTION_DATA] <= code->end[SECTION_CODE]);
+ assert((char*)code->end[SECTION_CODE] == (char*)code->buf.mem.data + code->buf.written);
+}
+
+static void
+codebuf_append_op(struct codebuf *code, const enum fspec_op op)
+{
+ codebuf_append(code, SECTION_CODE, &op, sizeof(op));
+}
+
+static uint8_t
+arg_sizeof(const enum fspec_arg type)
+{
+ switch (type) {
+ case FSPEC_ARG_DAT:
+ case FSPEC_ARG_OFF:
+ case FSPEC_ARG_STR:
+ return sizeof(fspec_off);
+
+ case FSPEC_ARG_NUM:
+ return sizeof(fspec_num);
+
+ case FSPEC_ARG_VAR:
+ return sizeof(fspec_var);
+
+ case FSPEC_ARG_EOF:
+ break;
+
+ case FSPEC_ARG_LAST:
+ errx(EXIT_FAILURE, "%s: unexpected argument type %u", __func__, type);
+ }
+
+ return 0;
+}
+
+static void
+codebuf_append_arg(struct codebuf *code, const enum fspec_arg type, const void *v)
+{
+ assert(code);
+ codebuf_append_op(code, FSPEC_OP_ARG);
+ codebuf_append(code, SECTION_CODE, &type, sizeof(type));
+ codebuf_append(code, SECTION_CODE, v, arg_sizeof(type));
+}
+
+static void
+codebuf_replace_arg(struct codebuf *code, const enum fspec_arg *arg, const enum fspec_arg type, const void *v)
+{
+ assert(code && arg);
+ assert(*arg == type);
+ const fspec_off off = ((char*)arg + 1) - (char*)code->buf.mem.data;
+ membuf_replace(&code->buf, off, v, arg_sizeof(type));
+}
+
+static bool
+get_string_offset(const void *start, const void *end, const void *str, const fspec_strsz str_sz, void const **out_off)
+{
+ assert(out_off);
+
+ while (start < end) {
+ fspec_strsz len;
+ memcpy(&len, start, sizeof(len));
+ if (len == str_sz && !memcmp((char*)start + sizeof(len), str, len)) {
+ *out_off = start;
+ return true;
+ }
+ start = (char*)start + sizeof(len) + len + 1;
+ }
+
+ return false;
+}
+
+static void
+codebuf_append_arg_cstr(struct codebuf *code, const void *str, const fspec_strsz str_sz)
+{
+ const void *ptr;
+ if (!get_string_offset(code->strings, code->end[SECTION_DATA], str, str_sz, &ptr)) {
+ ptr = code->end[SECTION_DATA];
+ codebuf_append(code, SECTION_DATA, &str_sz, sizeof(str_sz));
+ codebuf_append(code, SECTION_DATA, str, str_sz);
+ codebuf_append(code, SECTION_DATA, (char[]){ 0 }, 1);
+ }
+
+ const fspec_off off = (char*)ptr - (char*)code->buf.mem.data;
+ codebuf_append_arg(code, FSPEC_ARG_STR, &off);
+}
+
+static const enum fspec_op*
+get_named_op(const enum fspec_op *start, const void *end, const void *data, const enum fspec_op op, const uint8_t nth, const void *name, const fspec_strsz name_sz, fspec_var *out_id)
+{
+ fspec_var id = 0;
+ if ((void*)start < end && *start == FSPEC_OP_DECLARATION)
+ id = fspec_arg_get_num(fspec_op_get_arg(start, end, 2, 1<<FSPEC_ARG_NUM));
+
+ for (const enum fspec_op *p = start; p; p = fspec_op_next(p, end, true)) {
+ const enum fspec_arg *arg;
+ if (*p != op || !(arg = fspec_op_get_arg(p, end, nth, 1<<FSPEC_ARG_STR)))
+ continue;
+
+ struct fspec_mem str;
+ fspec_arg_get_mem(arg, data, &str);
+ if (str.len == name_sz && !memcmp(name, str.data, name_sz)) {
+ if (out_id)
+ *out_id = id;
+
+ return p;
+ }
+
+ ++id;
+ }
+
+ return NULL;
+}
+
+static const enum fspec_op*
+get_declaration(struct codebuf *code, const bool member, const struct fspec_mem *str, fspec_var *out_id)
+{
+ const void *start = (member ? code->decl[FSPEC_DECLARATION_STRUCT] : code->end[SECTION_DATA]);
+ return get_named_op(start, code->end[SECTION_CODE], code->buf.mem.data, FSPEC_OP_DECLARATION, 4, str->data, str->len, out_id);
+}
+
+static bool
+codebuf_append_arg_var(struct codebuf *code, const bool member, const struct fspec_mem *var)
+{
+ fspec_var id = -1;
+ if (!get_declaration(code, member, var, &id))
+ return false;
+
+ codebuf_append_arg(code, FSPEC_ARG_VAR, &id);
+ return true;
+}
+
+static void
+codebuf_append_declaration(struct codebuf *code, const enum fspec_declaration decl)
+{
+ code->decl[decl] = code->end[SECTION_CODE];
+ codebuf_append_op(code, FSPEC_OP_DECLARATION);
+ codebuf_append_arg(code, FSPEC_ARG_NUM, (fspec_num[]){ decl });
+ codebuf_append_arg(code, FSPEC_ARG_NUM, (fspec_num[]){ code->declarations++ });
+ codebuf_append_arg(code, FSPEC_ARG_OFF, (fspec_off[]){ PLACEHOLDER });
+}
+
+enum stack_type {
+ STACK_STR,
+ STACK_NUM,
+};
+
+struct stack {
+ union {
+ struct fspec_mem str;
+ uint64_t num;
+ };
+ enum stack_type type;
+};
+
+static const char*
+stack_type_to_str(const enum stack_type type)
+{
+ switch (type) {
+ case STACK_STR: return "str";
+ case STACK_NUM: return "num";
+ };
+ return "unknown";
+}
+
+static void
+stack_check_type(const struct stack *stack, const enum stack_type type)
+{
+ assert(stack);
+
+ if (stack->type != type)
+ errx(EXIT_FAILURE, "tried to get '%s' from stack, but the last pushed type was '%s'", stack_type_to_str(type), stack_type_to_str(stack->type));
+}
+
+static const struct fspec_mem*
+stack_get_str(const struct stack *stack)
+{
+ stack_check_type(stack, STACK_STR);
+ return &stack->str;
+}
+
+static uint64_t
+stack_get_num(const struct stack *stack)
+{
+ stack_check_type(stack, STACK_NUM);
+ return stack->num;
+}
+
+struct state {
+ struct ragel ragel;
+ struct stack stack;
+ struct codebuf out;
+ struct varbuf var;
+};
+
+static void
+state_stack_num(struct state *state, const uint8_t base)
+{
+ assert(state);
+ membuf_terminate(&state->var.buf, (char[]){ 0 }, 1);
+ const char *str = (char*)state->var.buf.mem.data + state->var.offset;
+ state->stack.type = STACK_NUM;
+ state->stack.num = strtoll(str + (base == 16 && *str == 'x'), NULL, base);
+ varbuf_remove_last(&state->var);
+}
+
+static void
+state_append_arg_var(struct state *state, const bool member, const struct fspec_mem *str)
+{
+ assert(state && str);
+
+ if (!codebuf_append_arg_var(&state->out, member, str))
+ ragel_throw_error(&state->ragel, "'%s' undeclared", (char*)str->data);
+}
+
+static void
+state_append_declaration(struct state *state, const enum fspec_declaration decl, const struct fspec_mem *str)
+{
+ assert(state && str);
+
+ if (get_declaration(&state->out, (decl == FSPEC_DECLARATION_MEMBER), str, NULL))
+ ragel_throw_error(&state->ragel, "'%s' redeclared", (char*)str->data);
+
+ codebuf_append_declaration(&state->out, decl);
+ codebuf_append_arg_cstr(&state->out, str->data, str->len);
+}
+
+static void
+state_finish_declaration(struct state *state, const enum fspec_declaration decl)
+{
+ assert(state && state->out.decl[decl]);
+ const char *end = state->out.end[SECTION_CODE];
+ const fspec_off off = end - (char*)state->out.decl[decl];
+ codebuf_replace_arg(&state->out, fspec_op_get_arg(state->out.decl[decl], end, 3, 1<<FSPEC_ARG_OFF), FSPEC_ARG_OFF, &off);
+ state->out.decl[decl] = NULL;
+}
+
+%%{
+ machine fspec_lexer;
+ variable p state.ragel.p;
+ variable pe state.ragel.pe;
+ variable eof state.ragel.eof;
+ write data noerror nofinal;
+
+ action arg_eof {
+ codebuf_append_arg(&state.out, FSPEC_ARG_EOF, NULL);
+ }
+
+ action arg_num {
+ codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ stack_get_num(&state.stack) });
+ }
+
+ action arg_str {
+ const struct fspec_mem *str = stack_get_str(&state.stack);
+ codebuf_append_arg_cstr(&state.out, str->data, str->len);
+ }
+
+ action arg_var {
+ state_append_arg_var(&state, true, stack_get_str(&state.stack));
+ }
+
+ action filter {
+ codebuf_append_op(&state.out, FSPEC_OP_FILTER);
+ }
+
+ action goto {
+ codebuf_append_op(&state.out, FSPEC_OP_GOTO);
+ state_append_arg_var(&state, false, stack_get_str(&state.stack));
+ }
+
+ action vnul {
+ codebuf_append_op(&state.out, FSPEC_OP_VISUAL);
+ codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_NUL });
+ }
+
+ action vdec {
+ codebuf_append_op(&state.out, FSPEC_OP_VISUAL);
+ codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_DEC });
+ }
+
+ action vhex {
+ codebuf_append_op(&state.out, FSPEC_OP_VISUAL);
+ codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_HEX });
+ }
+
+ action vstr {
+ codebuf_append_op(&state.out, FSPEC_OP_VISUAL);
+ codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_STR });
+ }
+
+ action r8 {
+ codebuf_append_op(&state.out, FSPEC_OP_READ);
+ codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 8 });
+ }
+
+ action r16 {
+ codebuf_append_op(&state.out, FSPEC_OP_READ);
+ codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 16 });
+ }
+
+ action r32 {
+ codebuf_append_op(&state.out, FSPEC_OP_READ);
+ codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 32 });
+ }
+
+ action r64 {
+ codebuf_append_op(&state.out, FSPEC_OP_READ);
+ codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 64 });
+ }
+
+ action member_end {
+ state_finish_declaration(&state, FSPEC_DECLARATION_MEMBER);
+ }
+
+ action member_start {
+ state_append_declaration(&state, FSPEC_DECLARATION_MEMBER, stack_get_str(&state.stack));
+ }
+
+ action struct_end {
+ state_finish_declaration(&state, FSPEC_DECLARATION_STRUCT);
+ }
+
+ action struct_start {
+ state_append_declaration(&state, FSPEC_DECLARATION_STRUCT, stack_get_str(&state.stack));
+ }
+
+ action stack_oct {
+ state_stack_num(&state, 8);
+ }
+
+ action stack_hex {
+ state_stack_num(&state, 16);
+ }
+
+ action stack_dec {
+ state_stack_num(&state, 10);
+ }
+
+ action stack_str {
+ membuf_terminate(&state.var.buf, (char[]){ 0 }, 1);
+ state.stack.type = STACK_STR;
+ state.stack.str = state.var.buf.mem;
+ state.stack.str.len = state.var.buf.written;
+ }
+
+ action store_esc_num {
+ const fspec_num v = stack_get_num(&state.stack);
+ assert(v <= 255);
+ const uint8_t u8 = v;
+ membuf_append(&state.var.buf, &u8, sizeof(u8));
+ }
+
+ action store_esc {
+ const struct { const char e, v; } map[] = {
+ { .e = 'a', .v = '\a' },
+ { .e = 'b', .v = '\b' },
+ { .e = 'f', .v = '\f' },
+ { .e = 'n', .v = '\n' },
+ { .e = 'r', .v = '\r' },
+ { .e = 't', .v = '\t' },
+ { .e = 'v', .v = '\v' },
+ { .e = '\\', .v = '\\' },
+ { .e = '\'', .v = '\'' },
+ { .e = '\"', .v = '"' },
+ { .e = 'e', .v = 0x1B },
+ };
+
+ for (size_t i = 0; i < ARRAY_SIZE(map); ++i) {
+ if (*state.ragel.p != map[i].e)
+ continue;
+
+ membuf_append(&state.var.buf, &map[i].v, sizeof(map[i].v));
+ break;
+ }
+ }
+
+ action store {
+ membuf_append(&state.var.buf, state.ragel.p, 1);
+ }
+
+ action begin_num {
+ varbuf_begin(&state.var);
+ }
+
+ action begin_str {
+ varbuf_reset(&state.var);
+ }
+
+ action type_err {
+ ragel_throw_error(&state.ragel, "unknown type name");
+ }
+
+ action visual_err {
+ ragel_throw_error(&state.ragel, "unknown visualization");
+ }
+
+ action syntax_err {
+ ragel_throw_error(&state.ragel, "malformed input (machine failed here or in next expression)");
+ }
+
+ action line {
+ ragel_advance_line(&state.ragel);
+ }
+
+ # Semantic
+ quote = ['"];
+ newline = '\n';
+ esc = [abfnrtv\\'"e];
+ esc_chr = '\\';
+ esc_hex = 'x' <: xdigit{2};
+ hex = '0' <: esc_hex;
+ oct = [0-7]{1,3};
+ dec = [\-+]? <: (([1-9] <: digit*) | '0');
+ valid = ^cntrl;
+ comment = '//' <: valid* :>> newline;
+ type = ('u8' | 's8') %r8 | ('u16' | 's16') %r16 | ('u32' | 's32') %r32 | ('u64' | 's32') %r64;
+ visual = 'nul' %vnul | 'dec' %vdec | 'hex' %vhex | 'str' %vstr;
+ reserved = 'struct' | type | visual;
+ name = ((alpha | '_') <: (alnum | '_')*) - reserved;
+
+ # Stack
+ stack_name = name >begin_str $store %stack_str;
+ stack_hex = hex >begin_num $store %stack_hex;
+ stack_dec = dec >begin_num $store %stack_dec;
+ stack_oct = oct >begin_num $store %stack_oct;
+ stack_esc_hex = esc_hex >begin_num $store %stack_hex;
+ stack_esc = esc_chr <: ((stack_esc_hex | stack_oct) %store_esc_num | esc %~store_esc);
+ stack_str = quote <: ((stack_esc? <: print? $store) - zlen)* >begin_str %stack_str :>> quote;
+ stack_num = stack_dec | stack_hex;
+
+ # Catchers
+ catch_struct = 'struct ' <: stack_name;
+ catch_type = (catch_struct %goto | type) $!type_err;
+ catch_args = stack_num %arg_num | stack_str %arg_str | stack_name %arg_var;
+ catch_array = '[' <: (catch_args | '$' %arg_eof) :>> ']';
+ catch_filter = ' | ' %filter <: stack_name %arg_str :>> ('(' <: catch_args? <: (', ' <: catch_args)* :>> ')')?;
+ catch_visual = ' ' <: visual $!visual_err;
+
+ # Abstract
+ member = stack_name %member_start :> ': ' <: (catch_type <: catch_array* catch_filter* catch_visual?) :>> ';' %member_end;
+ struct = catch_struct %struct_start :>> ' {' <: (space | comment | member)* :>> '};' %struct_end;
+ line = valid* :>> newline %line;
+ main := ((space | comment | struct)* & line*) $!syntax_err;
+}%%
+
+bool
+fspec_lexer_parse(struct fspec_lexer *lexer, const char *name)
+{
+ int cs;
+ %% write init;
+
+ (void)fspec_lexer_en_main;
+ assert(lexer);
+ assert(lexer->ops.read);
+ assert(lexer->mem.input.data && lexer->mem.input.len);
+ assert(lexer->mem.output.data && lexer->mem.output.len);
+ assert(lexer->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range");
+ assert(lexer->mem.output.len <= (fspec_off)~0 && "output storage size exceeds fspec_off range");
+
+ char var[256];
+ struct state state = {
+ .ragel.name = name,
+ .ragel.lineno = 1,
+ .var.buf.mem = { .data = var, .len = sizeof(var) },
+ .out.buf.mem = lexer->mem.output,
+ };
+
+ static const fspec_num version = 0;
+ state.out.end[SECTION_CODE] = state.out.end[SECTION_DATA] = state.out.buf.mem.data;
+ codebuf_append_op(&state.out, FSPEC_OP_HEADER);
+ codebuf_append_arg(&state.out, FSPEC_ARG_NUM, &version);
+ codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ PLACEHOLDER });
+ codebuf_append_arg(&state.out, FSPEC_ARG_DAT, (fspec_off[]){ PLACEHOLDER });
+ state.out.end[SECTION_DATA] = state.out.end[SECTION_CODE];
+ state.out.strings = state.out.end[SECTION_DATA];
+
+ struct fspec_mem input = lexer->mem.input;
+ for (bool eof = false; !state.ragel.error && !eof;) {
+ const size_t bytes = lexer->ops.read(lexer, input.data, 1, input.len);
+ const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes };
+ ragel_feed_input(&state.ragel, (eof = (bytes < input.len)), &rl);
+ %% write exec;
+ }
+
+ {
+ const void *end = state.out.end[SECTION_CODE];
+ codebuf_replace_arg(&state.out, fspec_op_get_arg(state.out.buf.mem.data, end, 2, 1<<FSPEC_ARG_NUM), FSPEC_ARG_NUM, (fspec_num[]){ state.out.declarations });
+ const fspec_off off = (char*)state.out.end[SECTION_DATA] - (char*)state.out.strings;
+ codebuf_replace_arg(&state.out, fspec_op_get_arg(state.out.buf.mem.data, end, 3, 1<<FSPEC_ARG_DAT), FSPEC_ARG_DAT, &off);
+ }
+
+ lexer->mem.output.len = state.out.buf.written;
+ return !state.ragel.error;
+}
diff --git a/src/fspec/memory.h b/src/fspec/memory.h
new file mode 100644
index 0000000..768415a
--- /dev/null
+++ b/src/fspec/memory.h
@@ -0,0 +1,8 @@
+#pragma once
+
+#include <stddef.h>
+
+struct fspec_mem {
+ void *data;
+ size_t len;
+};
diff --git a/src/fspec/validator.h b/src/fspec/validator.h
new file mode 100644
index 0000000..c4705b2
--- /dev/null
+++ b/src/fspec/validator.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <fspec/memory.h>
+
+struct fspec_validator;
+struct fspec_validator {
+ struct {
+ size_t (*read)(struct fspec_validator *validator, void *ptr, const size_t size, const size_t nmemb);
+ } ops;
+
+ struct {
+ struct fspec_mem input;
+ } mem;
+};
+
+bool
+fspec_validator_parse(struct fspec_validator *validator, const char *name);
diff --git a/src/fspec/validator.rl b/src/fspec/validator.rl
new file mode 100644
index 0000000..093348d
--- /dev/null
+++ b/src/fspec/validator.rl
@@ -0,0 +1,237 @@
+#include "ragel/ragel.h"
+#include <fspec/bcode.h>
+#include <fspec/validator.h>
+#include "bcode-internal.h"
+
+#include <assert.h>
+
+struct stack {
+ union {
+ fspec_num num;
+ fspec_off off;
+ fspec_var var;
+ fspec_strsz strsz;
+ unsigned char b[sizeof(fspec_num)];
+ } u;
+ uint8_t i; // writing index for u.b
+};
+
+struct range {
+ fspec_off start, end;
+};
+
+struct context {
+ struct range data;
+ fspec_var declarations, expected_declarations;
+ fspec_off str_end, decl_start, decl_end[FSPEC_DECLARATION_LAST], offset;
+ enum fspec_declaration last_decl_type;
+};
+
+struct state {
+ struct ragel ragel;
+ struct context context;
+ struct stack stack;
+ bool valid;
+};
+
+%%{
+ machine fspec_validator;
+ variable p state.ragel.p;
+ variable pe state.ragel.pe;
+ variable eof state.ragel.eof;
+ write data noerror nofinal;
+
+ action store_decls {
+ if (state.stack.u.num > (fspec_var)~0)
+ ragel_throw_error(&state.ragel, "expected declarations overflows");
+
+ state.context.expected_declarations = state.stack.u.num;
+ }
+
+ action check_decls {
+ if (state.context.declarations != state.context.expected_declarations)
+ ragel_throw_error(&state.ragel, "expected declarations did not match with the content: expected: %" PRI_FSPEC_VAR " got: %" PRI_FSPEC_VAR, state.context.expected_declarations, state.context.declarations);
+ }
+
+ action mark_dat {
+ // we can replace this logic with fspec generated code in future
+ // struct str { len: u32; str: u8[len]['\0']; }
+ // struct dat { len: u32; strings: struct str[$::len]; }
+ if (state.context.offset > (fspec_off)~0 - state.stack.u.off)
+ ragel_throw_error(&state.ragel, "dat section length overflows");
+
+ state.context.data = (struct range){ .start = state.context.offset, .end = state.stack.u.off };
+ }
+
+ action test_inside_dat {
+ state.context.offset < (state.context.data.start + state.context.data.end)
+ }
+
+ action mark_str {
+ if (state.context.offset >= (fspec_off)~0 - state.stack.u.strsz) // >= for null byte
+ ragel_throw_error(&state.ragel, "str length overflows");
+
+ state.context.str_end = state.context.offset + state.stack.u.strsz;
+ }
+
+ action test_inside_str {
+ state.context.offset < state.context.str_end
+ }
+
+ action check_var {
+ if (state.context.declarations <= state.stack.u.var)
+ ragel_throw_error(&state.ragel, "refenced undeclared variable");
+ }
+
+ action check_str {
+ if (state.stack.u.off < state.context.data.start) {
+ ragel_throw_error(&state.ragel, "str before data section range: %" PRI_FSPEC_OFF " <= %" PRI_FSPEC_OFF, state.stack.u.off, state.context.data.start + state.context.data.end);
+ } else if (state.context.data.start + state.context.data.end <= state.stack.u.off) {
+ ragel_throw_error(&state.ragel, "str after data section range: %" PRI_FSPEC_OFF " <= %" PRI_FSPEC_OFF, state.context.data.start + state.context.data.end, state.stack.u.off);
+ }
+ }
+
+ action check_decl_type {
+ if (state.stack.u.num >= FSPEC_DECLARATION_LAST)
+ ragel_throw_error(&state.ragel, "invalid declaration type: %" PRI_FSPEC_NUM, state.stack.u.num);
+
+ state.context.last_decl_type = state.stack.u.num;
+ }
+
+ action check_decl_num {
+ if (state.context.declarations >= (fspec_var)~0)
+ ragel_throw_error(&state.ragel, "declarations overflows");
+
+ if (state.context.declarations != state.stack.u.num)
+ ragel_throw_error(&state.ragel, "invalid declaration number: %" PRI_FSPEC_NUM " expected: %" PRI_FSPEC_VAR, state.stack.u.num, state.context.declarations);
+
+ ++state.context.declarations;
+ }
+
+ action start_decl {
+ state.context.decl_start = state.context.offset;
+ }
+
+ action mark_decl {
+ const fspec_off sz = (state.context.offset - state.context.decl_start);
+ assert(sz <= state.stack.u.off);
+
+ if (state.context.offset > (fspec_off)~0 - state.stack.u.off - sz)
+ ragel_throw_error(&state.ragel, "declaration length overflows");
+
+ state.context.decl_end[state.context.last_decl_type] = state.context.offset + state.stack.u.off - sz;
+ }
+
+ action check_struct {
+ if (state.context.last_decl_type != FSPEC_DECLARATION_STRUCT)
+ ragel_throw_error(&state.ragel, "expected struct declaration");
+ }
+
+ action check_member {
+ if (state.context.last_decl_type != FSPEC_DECLARATION_MEMBER)
+ ragel_throw_error(&state.ragel, "expected member declaration");
+ }
+
+ action check_member_end {
+ if (state.context.decl_end[FSPEC_DECLARATION_MEMBER] != state.context.offset)
+ ragel_throw_error(&state.ragel, "invalid member end: %" PRI_FSPEC_OFF " expected: %" PRI_FSPEC_OFF, state.context.decl_end[FSPEC_DECLARATION_MEMBER], state.context.offset);
+ }
+
+ action check_struct_end {
+ if (state.context.decl_end[FSPEC_DECLARATION_STRUCT] != state.context.offset)
+ ragel_throw_error(&state.ragel, "invalid struct end: %" PRI_FSPEC_OFF " expected: %" PRI_FSPEC_OFF, state.context.decl_end[FSPEC_DECLARATION_STRUCT], state.context.offset);
+ }
+
+ action check_visual_type {
+ if (state.stack.u.num >= FSPEC_VISUAL_LAST)
+ ragel_throw_error(&state.ragel, "invalid visual type: %" PRI_FSPEC_NUM, state.stack.u.num);
+ }
+
+ action arg_error {
+ ragel_throw_error(&state.ragel, "malformed argument");
+ }
+
+ action op_error {
+ ragel_throw_error(&state.ragel, "unexpected argument");
+ }
+
+ action pattern_error {
+ ragel_throw_error(&state.ragel, "unexpected pattern");
+ }
+
+ action syntax_error {
+ ragel_throw_error(&state.ragel, "unexpected byte");
+ }
+
+ action store {
+ if (state.stack.i < sizeof(state.stack.u.b))
+ state.stack.u.b[state.stack.i++] = fc;
+ }
+
+ action flush {
+ state.stack.i = 0;
+ }
+
+ action advance {
+ ++state.context.offset;
+ }
+
+ stack1 = any{1} >flush $store;
+ stack2 = any{2} >flush $store;
+ stack4 = any{4} >flush $store;
+ stack8 = any{8} >flush $store;
+
+ ARG_DAT = 0 stack4 %*mark_dat ((stack1 %*mark_str (any when test_inside_str)* 0) when test_inside_dat)*;
+ ARG_OFF = 1 stack4;
+ ARG_NUM = 2 stack8;
+ ARG_VAR = 3 stack2 %check_var;
+ ARG_STR = 4 stack4 %check_str;
+ ARG_EOF = 5;
+
+ OP_ARG_DAT = 0 ARG_DAT $!arg_error;
+ OP_ARG_OFF = 0 ARG_OFF $!arg_error;
+ OP_ARG_NUM = 0 ARG_NUM $!arg_error;
+ OP_ARG_VAR = 0 ARG_VAR $!arg_error;
+ OP_ARG_STR = 0 ARG_STR $!arg_error;
+ OP_ARG_EOF = 0 ARG_EOF $!arg_error;
+
+ OP_HEADER = 1 (OP_ARG_NUM OP_ARG_NUM %store_decls OP_ARG_DAT) $!op_error;
+ OP_DECLARATION = 2 >start_decl (OP_ARG_NUM %check_decl_type OP_ARG_NUM %check_decl_num OP_ARG_OFF %mark_decl OP_ARG_STR) $!op_error;
+ OP_READ = 3 (OP_ARG_NUM (OP_ARG_NUM | OP_ARG_VAR | OP_ARG_STR | OP_ARG_EOF)*) $!op_error;
+ OP_GOTO = 4 (OP_ARG_VAR (OP_ARG_NUM | OP_ARG_VAR | OP_ARG_STR | OP_ARG_EOF)*) $!op_error;
+ OP_FILTER = 5 (OP_ARG_STR (OP_ARG_NUM | OP_ARG_VAR | OP_ARG_STR)*) $!op_error;
+ OP_VISUAL = 6 (OP_ARG_NUM %check_visual_type) $!op_error;
+
+ pattern = (OP_DECLARATION %check_struct <: (OP_DECLARATION %check_member (OP_READ | OP_GOTO) OP_FILTER? OP_VISUAL? %check_member_end)*)* %check_struct_end $!pattern_error;
+ main := (OP_HEADER <: pattern) %check_decls $advance $!syntax_error;
+}%%
+
+bool
+fspec_validator_parse(struct fspec_validator *validator, const char *name)
+{
+ int cs;
+ %% write init;
+
+ (void)fspec_validator_en_main;
+ assert(validator);
+ assert(validator->ops.read);
+ assert(validator->mem.input.data && validator->mem.input.len);
+ assert(validator->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range");
+
+ struct state state = {
+ .ragel.name = name,
+ .ragel.lineno = 1,
+ };
+
+ static_assert(sizeof(state.stack.u) == sizeof(state.stack.u.b), "bytes doesn't represent the largest member in union");
+
+ struct fspec_mem input = validator->mem.input;
+ for (bool eof = false; !state.ragel.error && !eof;) {
+ const size_t bytes = validator->ops.read(validator, input.data, 1, input.len);
+ const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes, .binary = true };
+ ragel_feed_input(&state.ragel, (eof = (bytes < input.len)), &rl);
+ %% write exec;
+ }
+
+ return !state.ragel.error;
+}