3 files changed, 642 insertions, 0 deletions
diff --git a/src/ragel/fspec.h b/src/ragel/fspec.h
new file mode 100644
index 0000000..68998f4
--- /dev/null
+++ b/src/ragel/fspec.h
@@ -0,0 +1,77 @@
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+struct fspec_bytes {
+   const uint8_t *data;
+   size_t size;
+};
+
+enum fspec_kind_bits {
+   FSPEC_KIND_IGNORE = 1<<0,
+   FSPEC_KIND_HEXADECIMAL = 1<<1,
+   FSPEC_KIND_ENCODING = 1<<2,
+};
+
+struct fspec_kind {
+   const char *name;
+   uint32_t flags;
+};
+
+enum fspec_array_type {
+   FSPEC_ARRAY_FIXED,
+   FSPEC_ARRAY_MATCH,
+   FSPEC_ARRAY_VAR,
+};
+
+struct fspec_array {
+   enum fspec_array_type type;
+
+   union {
+      struct fspec_bytes match;
+      const char *var;
+      size_t nmemb;
+   };
+};
+
+enum fspec_type_bits {
+   FSPEC_TYPE_SIGNED = 1<<0,
+   FSPEC_TYPE_CONTAINER = 1<<1,
+};
+
+struct fspec_type {
+   const char *name;
+   size_t size;
+   uint32_t flags;
+};
+
+struct fspec_field {
+   struct fspec_type type;
+   struct fspec_array array;
+   struct fspec_kind kind;
+   const char *name;
+};
+
+struct fspec_container {
+   const char *name;
+};
+
+struct fspec;
+struct fspec {
+   struct {
+      void (*field)(struct fspec *fspec, const struct fspec_container *container, const struct fspec_field *field);
+      size_t (*read)(struct fspec *fspec, char *buf, const size_t size, const size_t nmemb);
+   } ops;
+
+   struct {
+      // XXX: replace with ops.alloc, ops.free
+      //      on dump.c we can then just provide implementation that still uses reasonable amount of static memory
+      //      but we don't limit the code from working  with regular dynamic memory
+      uint8_t *data;
+      size_t size;
+   } mem;
+};
+
+void fspec_parse(struct fspec *fspec);
diff --git a/src/ragel/fspec.rl b/src/ragel/fspec.rl
new file mode 100644
index 0000000..8493cf1
--- /dev/null
+++ b/src/ragel/fspec.rl
@@ -0,0 +1,329 @@
+#include "fspec.h"
+#include "ragel.h"
+
+// It's pretty good base so far.
+// ragel_search_str for typechecking variable delcaration is hack.
+// State should have hashmap for fields/containers.
+//
+// XXX: Maybe drop whole container thing and just give field const char *parent; that points to keypath of container.
+//      Then we would have flat structure like, "foo, foo.var, foo.b, ..."
+
+static const struct fspec_container default_container = {0};
+static const struct fspec_field default_field = { .array.nmemb = 1 };
+
+enum stack_type {
+   STACK_VAR,
+   STACK_STR,
+   STACK_NUM,
+};
+
+struct stack {
+   enum stack_type type;
+
+   union {
+      struct fspec_bytes str;
+      const char *var;
+      uint64_t num;
+   };
+};
+
+struct state {
+   struct ragel ragel;
+   struct stack stack;
+   struct fspec_field field;
+   struct fspec_container container;
+   size_t container_data_offset;
+};
+
+static const char*
+stack_type_to_str(const enum stack_type type)
+{
+   switch (type) {
+      case STACK_VAR: return "var";
+      case STACK_STR: return "str";
+      case STACK_NUM: return "num";
+   };
+
+   assert(0 && "should not happen");
+   return "unknown";
+}
+
+static void
+stack_check_type(const struct ragel *ragel, const struct stack *stack, const enum stack_type type)
+{
+   assert(ragel && stack);
+
+   if (stack->type != type)
+      ragel_throw_error(ragel, "tried to get '%s' from stack, but the last pushed type was '%s'", stack_type_to_str(type), stack_type_to_str(stack->type));
+}
+
+static const char*
+stack_get_var(const struct ragel *ragel, const struct stack *stack)
+{
+   assert(ragel && stack);
+   stack_check_type(ragel, stack, STACK_VAR);
+   return stack->var;
+}
+
+static const struct fspec_bytes*
+stack_get_str(const struct ragel *ragel, const struct stack *stack)
+{
+   assert(ragel && stack);
+   stack_check_type(ragel, stack, STACK_STR);
+   return &stack->str;
+}
+
+static uint64_t
+stack_get_num(const struct ragel *ragel, const struct stack *stack)
+{
+   assert(ragel && stack);
+   stack_check_type(ragel, stack, STACK_NUM);
+   return stack->num;
+}
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+
+static void
+fspec_type_from_str(const struct ragel *ragel, const char *str, struct fspec_type *out_type)
+{
+   assert(ragel && str);
+
+   const struct fspec_type types[] = {
+      { .name = "u8", .size = sizeof(uint8_t) },
+      { .name = "u16", .size = sizeof(uint16_t) },
+      { .name = "u32", .size = sizeof(uint32_t) },
+      { .name = "u64", .size = sizeof(uint64_t) },
+      { .name = "s8", .size = sizeof(int8_t), .flags = FSPEC_TYPE_SIGNED },
+      { .name = "s16", .size = sizeof(int16_t), .flags = FSPEC_TYPE_SIGNED },
+      { .name = "s32", .size = sizeof(int32_t), .flags = FSPEC_TYPE_SIGNED },
+      { .name = "s64", .size = sizeof(int64_t), .flags = FSPEC_TYPE_SIGNED },
+   };
+
+   for (size_t i = 0; i < ARRAY_SIZE(types); ++i) {
+      if (strcmp(str, types[i].name))
+         continue;
+
+      *out_type = types[i];
+      return;
+   }
+
+   if (ragel_search_str(ragel, 0, str)) {
+      *out_type = (struct fspec_type){ .name = str, .flags = FSPEC_TYPE_CONTAINER };
+      return;
+   }
+
+   ragel_throw_error(ragel, "invalid type");
+}
+
+static void
+fspec_kind_from_str(const struct ragel *ragel, const char *str, struct fspec_kind *out_kind)
+{
+   assert(ragel && str);
+
+   const struct fspec_kind kinds[] = {
+      { .name = "pad", .flags = FSPEC_KIND_IGNORE },
+      { .name = "hex", .flags = FSPEC_KIND_HEXADECIMAL },
+      { .name = "ascii", .flags = FSPEC_KIND_ENCODING },
+      { .name = "utf8", .flags = FSPEC_KIND_ENCODING },
+      { .name = "sjis", .flags = FSPEC_KIND_ENCODING },
+   };
+
+   for (size_t i = 0; i < ARRAY_SIZE(kinds); ++i) {
+      if (strcmp(str, kinds[i].name))
+         continue;
+
+      *out_kind = kinds[i];
+      return;
+   }
+
+   ragel_throw_error(ragel, "invalid kind");
+}
+
+static void
+check_field_kind(const struct ragel *ragel, const struct fspec_field *field)
+{
+   assert(ragel && field);
+
+   if ((field->kind.flags & FSPEC_KIND_ENCODING) && field->type.size != sizeof(uint8_t))
+      ragel_throw_error(ragel, "invalid kind: %s kind only allowed for u8 and s8 types", field->kind.name);
+}
+
+%%{
+   # File specification parser.
+
+   machine fspec;
+   variable p state.ragel.p;
+   variable pe state.ragel.pe;
+   variable eof state.ragel.eof;
+   write data noerror nofinal;
+
+   action field {
+      fspec->ops.field(fspec, &state.container, &state.field);
+   }
+
+   action field_kind {
+      fspec_kind_from_str(&state.ragel, stack_get_var(&state.ragel, &state.stack), &state.field.kind);
+      check_field_kind(&state.ragel, &state.field);
+   }
+
+   action field_array {
+      switch (state.stack.type) {
+         case STACK_NUM:
+            state.field.array.type = FSPEC_ARRAY_FIXED;
+            state.field.array.nmemb = stack_get_num(&state.ragel, &state.stack);
+            break;
+
+         case STACK_STR:
+            state.field.array.type = FSPEC_ARRAY_MATCH;
+            state.field.array.match = *stack_get_str(&state.ragel, &state.stack);
+            break;
+
+         case STACK_VAR:
+            state.field.array.type = FSPEC_ARRAY_VAR;
+            state.field.array.var = stack_get_var(&state.ragel, &state.stack);
+
+            if (!ragel_search_str(&state.ragel, state.container_data_offset, state.field.array.var))
+               ragel_throw_error(&state.ragel, "undeclared variable '%s'", state.field.array.var);
+            break;
+
+         default:
+            ragel_throw_error(&state.ragel, "array can't contain the stack type of '%s'", stack_type_to_str(state.stack.type));
+            break;
+      }
+   }
+
+   action field_name {
+      state.field.name = stack_get_var(&state.ragel, &state.stack);
+   }
+
+   action field_type {
+      state.field = default_field;
+      fspec_type_from_str(&state.ragel, stack_get_var(&state.ragel, &state.stack), &state.field.type);
+   }
+
+   action container_name {
+      state.container = default_container;
+      state.container.name = stack_get_var(&state.ragel, &state.stack);
+      state.container_data_offset = state.ragel.mem.cur - state.ragel.mem.data;
+   }
+
+   action push_var {
+      state.stack.type = STACK_VAR;
+      state.stack.var = (char*)state.ragel.mem.cur;
+   }
+
+   action push_hex {
+      state.stack.type = STACK_NUM;
+      state.stack.num = strtoll((char*)state.ragel.mem.cur, NULL, 16);
+   }
+
+   action push_dec {
+      state.stack.type = STACK_NUM;
+      state.stack.num = strtoll((char*)state.ragel.mem.cur, NULL, 10);
+   }
+
+   action push_str {
+      state.stack.type = STACK_STR;
+      state.stack.str.data = state.ragel.mem.cur;
+      state.stack.str.size = (state.ragel.mem.data + state.ragel.mem.written) - state.ragel.mem.cur;
+   }
+
+   action convert_escape {
+      ragel_convert_escape(&state.ragel);
+   }
+
+   action remove {
+      ragel_remove_last_data(&state.ragel);
+   }
+
+   action finish {
+      ragel_finish_data(&state.ragel);
+   }
+
+   action store {
+      ragel_store_data(&state.ragel);
+   }
+
+   action begin {
+      ragel_begin_data(&state.ragel);
+   }
+
+   action invalid_kind {
+      ragel_throw_error(&state.ragel, "invalid kind");
+   }
+
+   action invalid_type {
+      ragel_throw_error(&state.ragel, "invalid type");
+   }
+
+   action error {
+      ragel_throw_error(&state.ragel, "malformed input (machine failed here or in previous or next expression)");
+   }
+
+   action line {
+      ragel_advance_line(&state.ragel);
+   }
+
+   # Semantic
+   ws = space;
+   valid = ^cntrl;
+   es = '\\';
+   delim = ';';
+   quote = ['"];
+   bopen = '{';
+   bclose = '}';
+   newline = '\n';
+   octal = [0-7];
+   hex = '0x' <: xdigit+;
+   decimal = ([1-9] <: digit*) | '0';
+   comment = '//' <: valid* :>> newline;
+   escape = es <: ('x' <: xdigit+ | [abfnrtv\\'"e] | octal{1,3});
+   type = 'u8' | 'u16' | 'u32' | 'u64' | 's8' | 's16' | 's32' | 's64';
+   kind = 'ascii' | 'utf8' | 'sjis' | 'hex' | 'pad';
+   reserved = 'struct' | type | kind;
+   var = ((alpha | '_') <: (alnum | '_')*) - reserved;
+
+   # Catchers
+   catch_var = var >begin $store %finish %push_var;
+   catch_struct = ('struct' $store ws+ >store <: var $store) >begin %finish %push_var;
+   catch_type = (catch_struct | type >begin $store %push_var %remove) $!invalid_type;
+   catch_hex = hex >begin $store %push_hex %remove;
+   catch_decimal = decimal >begin $store %push_dec %remove;
+   catch_string = quote <: (escape %convert_escape | print)* >begin $store %finish %push_str :>> quote;
+   catch_array = '[' <: (catch_hex | catch_decimal | catch_string | catch_var) :>> ']';
+   catch_kind = '=' ws* <: kind >begin $store %push_var %remove $!invalid_kind;
+
+   # Actions
+   field = catch_type %field_type ws+ <: catch_var %field_name ws* <: (catch_array %field_array ws*)? <: (catch_kind %field_kind ws*)? :>> delim %field;
+   container = catch_struct %container_name ws* :>> bopen <: (ws | comment | field)* :>> bclose ws* delim;
+   line = valid* :>> newline @line;
+   main := (ws | comment | container)* & line* $!error;
+}%%
+
+void
+fspec_parse(struct fspec *fspec)
+{
+   int cs;
+   %% write init;
+
+   (void)fspec_en_main;
+   assert(fspec);
+   assert(fspec->ops.read);
+   assert(fspec->ops.field);
+
+   struct state state = {
+      .ragel = {
+         .lineno = 1,
+         .mem = {
+            .data = fspec->mem.data,
+            .size = fspec->mem.size,
+         },
+      },
+   };
+
+   for (bool ok = true; ok;) {
+      const size_t bytes = fspec->ops.read(fspec, state.ragel.buf, 1, sizeof(state.ragel.buf));
+      ok = ragel_confirm_input(&state.ragel, bytes);
+      %% write exec;
+   }
+}
diff --git a/src/ragel/ragel.h b/src/ragel/ragel.h
new file mode 100644
index 0000000..af06f4a
--- /dev/null
+++ b/src/ragel/ragel.h
@@ -0,0 +1,236 @@
+#pragma once
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <ctype.h>
+#include <assert.h>
+#include <limits.h>
+#include <err.h>
+
+struct ragel {
+   struct {
+      uint8_t *data; // data\0another_data\0
+      const uint8_t *cur; // data\0another_data\0cursor
+      size_t written, size; // amount of data written / size of data
+   } mem;
+
+   char buf[4096]; // block of input data
+   const char *p, *pe, *eof; // see ragel doc
+   size_t lineno; // current line
+};
+
+static inline void
+ragel_get_current_line(const struct ragel *ragel, size_t *out_lineno, size_t *out_ls, size_t *out_le, size_t *out_ws, size_t *out_we)
+{
+   assert(out_ls && out_le && out_ws && out_we);
+   assert(ragel->p >= ragel->buf && ragel->pe >= ragel->p);
+
+   size_t ls, le, ws, we;
+   size_t off = ragel->p - ragel->buf;
+   size_t lineno = ragel->lineno;
+   const size_t end = ragel->pe - ragel->buf;
+
+   // rewind to first non-space
+   for (; off > 0 && (isspace(ragel->buf[off]) || !ragel->buf[off]); --off) {
+      if (lineno > 0 && ragel->buf[off] == '\n')
+         --lineno;
+   }
+
+   for (ls = off; ls > 0 && ragel->buf[ls] != '\n'; --ls); // beginning of line
+   for (le = off; le < end && ragel->buf[le] != '\n'; ++le); // end of line
+   for (; ls < le && isspace(ragel->buf[ls]); ++ls); // strip leading whitespace
+   for (ws = off; ws > ls && isspace(ragel->buf[ws]); --ws); // rewind to first non-space
+   for (; ws > 0 && ws > ls && !isspace(ragel->buf[ws - 1]); --ws); // find word start
+   for (we = ws; we < le && !isspace(ragel->buf[we]); ++we); // find word ending
+
+   assert(we >= ws && ws >= ls && le >= ls && le >= we);
+   *out_lineno = lineno;
+   *out_ls = ls;
+   *out_le = le;
+   *out_ws = ws;
+   *out_we = we;
+}
+
+__attribute__((format(printf, 2, 3)))
+static inline void
+ragel_throw_error(const struct ragel *ragel, const char *fmt, ...)
+{
+   assert(ragel && fmt);
+
+   size_t lineno, ls, le, ws, we;
+   ragel_get_current_line(ragel, &lineno, &ls, &le, &ws, &we);
+   assert(le - ls <= INT_MAX && ws - ls <= INT_MAX);
+
+   char msg[255];
+   va_list args;
+   va_start(args, fmt);
+   vsnprintf(msg, sizeof(msg), fmt, args);
+   va_end(args);
+
+   const int indent = 8;
+   const size_t mark = (we - ws ? we - ws : 1), cur = (ragel->p - ragel->buf) - ws;
+   warnx("\x1b[37m%zu: \x1b[31merror: \x1b[0m%s\n%*s%.*s", lineno, msg, indent, "", (int)(le - ls), ragel->buf + ls);
+   fprintf(stderr, "%*s%*s\x1b[31m", indent, "", (int)(ws - ls), "");
+   for (size_t i = 0; i < mark; ++i) fputs((i == cur ? "^" : "~"), stderr);
+   fputs("\x1b[0m\n", stderr);
+
+   exit(EXIT_FAILURE);
+}
+
+static inline void
+ragel_bounds_check_data(const struct ragel *ragel, const size_t nmemb)
+{
+   assert(ragel);
+
+   if (ragel->mem.size < nmemb || ragel->mem.written >= ragel->mem.size - nmemb)
+      ragel_throw_error(ragel, "data storage limit exceeded: %zu bytes exceeds the maximum store size of %zu bytes", ragel->mem.written, ragel->mem.size);
+}
+
+static inline void
+ragel_replace_data(struct ragel *ragel, const size_t nmemb, char replacement)
+{
+   assert(ragel);
+
+   if (ragel->mem.written < nmemb)
+      ragel_throw_error(ragel, "parse error: received escape conversion with mem.written of %zu, expected >= %zu", ragel->mem.written, nmemb);
+
+   ragel->mem.data[(ragel->mem.written -= nmemb)] = replacement;
+   ragel->mem.data[++ragel->mem.written] = 0;
+}
+
+static inline void
+ragel_convert_escape(struct ragel *ragel)
+{
+   assert(ragel);
+
+   if (ragel->mem.written < 2)
+      ragel_throw_error(ragel, "parse error: received escape conversion with mem.written of %zu, expected >= 2", ragel->mem.written);
+
+   const struct {
+      const char *e;
+      const char v, b;
+   } map[] = {
+      { .e = "\\a", .v = '\a' },
+      { .e = "\\b", .v = '\b' },
+      { .e = "\\f", .v = '\f' },
+      { .e = "\\n", .v = '\n' },
+      { .e = "\\r", .v = '\r' },
+      { .e = "\\t", .v = '\t' },
+      { .e = "\\v", .v = '\v' },
+      { .e = "\\\\", .v = '\\' },
+      { .e = "\\'", .v = '\'' },
+      { .e = "\\\"", .v = '"' },
+      { .e = "\\e", .v = '\e' },
+      { .e = "\\x", .b = 16 },
+      { .e = "\\", .b = 8 },
+   };
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+   const char *cur = (char*)ragel->mem.cur;
+   const size_t cur_sz = strlen(cur);
+   for (size_t i = 0; i < ARRAY_SIZE(map); ++i) {
+      if (!strncmp(cur, map[i].e, strlen(map[i].e))) {
+         const char v = (!map[i].b ? map[i].v : strtol(cur + strlen(map[i].e), NULL, map[i].b));
+         assert((map[i].b == 8 && cur_sz >= 2) || (map[i].b == 16 && cur_sz >= 2) || (map[i].b == 0 && cur_sz == 2));
+         assert(map[i].b != 8 || isdigit(cur[1]));
+         ragel_replace_data(ragel, cur_sz, v);
+         return;
+      }
+   }
+#undef ARRAY_SIZE
+
+   ragel_throw_error(ragel, "parse error: received unknown escape conversion");
+}
+
+static inline void
+ragel_dump_data(struct ragel *ragel, const size_t offset)
+{
+   const uint8_t *end = ragel->mem.data + ragel->mem.written;
+   for (const uint8_t *p = ragel->mem.data + offset; p && p < end; p = (uint8_t*)memchr(p, 0, end - p), p += !!p)
+      printf("%s\n", p);
+}
+
+static inline const uint8_t*
+ragel_search_data(const struct ragel *ragel, const size_t offset, const uint8_t *data, const size_t size)
+{
+   assert(ragel && data);
+
+   const uint8_t *end = ragel->mem.data + ragel->mem.written;
+   for (const uint8_t *p = ragel->mem.data + offset; p && p < end && (size_t)(end - p) >= size; p = (uint8_t*)memchr(p, 0, end - p), p += !!p) {
+      if (!memcmp(data, p, size))
+         return p;
+   }
+
+   return NULL;
+}
+
+static inline const uint8_t*
+ragel_search_str(const struct ragel *ragel, const size_t offset, const char *str)
+{
+   return ragel_search_data(ragel, offset, (const uint8_t*)str, strlen(str) + 1);
+}
+
+static inline void
+ragel_remove_last_data(struct ragel *ragel)
+{
+   assert(ragel);
+   const uint8_t *end = ragel->mem.data + ragel->mem.written;
+   const size_t size = end - ragel->mem.cur + 1;
+   assert(ragel->mem.written >= size);
+   ragel->mem.written -= size;
+   ragel->mem.data[ragel->mem.written] = 0;
+}
+
+static inline void
+ragel_finish_data(struct ragel *ragel)
+{
+   assert(ragel);
+
+   const uint8_t *end = ragel->mem.data + ragel->mem.written, *p;
+   if ((p = ragel_search_data(ragel, 0, ragel->mem.cur, end - ragel->mem.cur + 1))) {
+      ragel_remove_last_data(ragel);
+      ragel->mem.cur = p;
+   }
+}
+
+static inline void
+ragel_store_data(struct ragel *ragel)
+{
+   ragel_bounds_check_data(ragel, 1);
+   ragel->mem.data[ragel->mem.written++] = *ragel->p;
+   ragel->mem.data[ragel->mem.written] = 0;
+}
+
+static inline void
+ragel_begin_data(struct ragel *ragel)
+{
+   ragel_bounds_check_data(ragel, 1);
+   ragel->mem.written += (ragel->mem.written > 0);
+   ragel->mem.cur = ragel->mem.data + ragel->mem.written;
+}
+
+static inline void
+ragel_advance_line(struct ragel *ragel)
+{
+   assert(ragel);
+   ++ragel->lineno;
+}
+
+static inline bool
+ragel_confirm_input(struct ragel *ragel, const size_t bytes)
+{
+   assert(ragel);
+
+   if (bytes > sizeof(ragel->buf))
+      errx(EXIT_FAILURE, "%s: gave larger buffer than %zu", __func__, sizeof(ragel->buf));
+
+   const bool in_eof = (bytes < sizeof(ragel->buf));
+   ragel->p = ragel->buf;
+   ragel->pe = ragel->p + bytes;
+   ragel->eof = (in_eof ? ragel->pe : NULL);
+   return !in_eof;
+}