summaryrefslogtreecommitdiff
path: root/src/ragel
diff options
context:
space:
mode:
authorJari Vetoniemi <mailroxas@gmail.com>2017-03-30 17:31:44 +0300
committerJari Vetoniemi <mailroxas@gmail.com>2017-04-13 14:49:46 +0300
commit76b8c9e03c97b16d9ff97f3b79c0ecbff0f5e7f2 (patch)
tree70f1d22a923d1c01b22b2fade3b6e96365990dda /src/ragel
Initial commit
Diffstat (limited to 'src/ragel')
-rw-r--r--src/ragel/fspec.h77
-rw-r--r--src/ragel/fspec.rl329
-rw-r--r--src/ragel/ragel.h236
3 files changed, 642 insertions, 0 deletions
diff --git a/src/ragel/fspec.h b/src/ragel/fspec.h
new file mode 100644
index 0000000..68998f4
--- /dev/null
+++ b/src/ragel/fspec.h
@@ -0,0 +1,77 @@
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+struct fspec_bytes {
+ const uint8_t *data;
+ size_t size;
+};
+
+enum fspec_kind_bits {
+ FSPEC_KIND_IGNORE = 1<<0,
+ FSPEC_KIND_HEXADECIMAL = 1<<1,
+ FSPEC_KIND_ENCODING = 1<<2,
+};
+
+struct fspec_kind {
+ const char *name;
+ uint32_t flags;
+};
+
+enum fspec_array_type {
+ FSPEC_ARRAY_FIXED,
+ FSPEC_ARRAY_MATCH,
+ FSPEC_ARRAY_VAR,
+};
+
+struct fspec_array {
+ enum fspec_array_type type;
+
+ union {
+ struct fspec_bytes match;
+ const char *var;
+ size_t nmemb;
+ };
+};
+
+enum fspec_type_bits {
+ FSPEC_TYPE_SIGNED = 1<<0,
+ FSPEC_TYPE_CONTAINER = 1<<1,
+};
+
+struct fspec_type {
+ const char *name;
+ size_t size;
+ uint32_t flags;
+};
+
+struct fspec_field {
+ struct fspec_type type;
+ struct fspec_array array;
+ struct fspec_kind kind;
+ const char *name;
+};
+
+struct fspec_container {
+ const char *name;
+};
+
+struct fspec;
+struct fspec {
+ struct {
+ void (*field)(struct fspec *fspec, const struct fspec_container *container, const struct fspec_field *field);
+ size_t (*read)(struct fspec *fspec, char *buf, const size_t size, const size_t nmemb);
+ } ops;
+
+ struct {
+ // XXX: replace with ops.alloc, ops.free
+ // on dump.c we can then just provide implementation that still uses reasonable amount of static memory
+ // but we don't limit the code from working with regular dynamic memory
+ uint8_t *data;
+ size_t size;
+ } mem;
+};
+
+void fspec_parse(struct fspec *fspec);
diff --git a/src/ragel/fspec.rl b/src/ragel/fspec.rl
new file mode 100644
index 0000000..8493cf1
--- /dev/null
+++ b/src/ragel/fspec.rl
@@ -0,0 +1,329 @@
+#include "fspec.h"
+#include "ragel.h"
+
+// It's pretty good base so far.
+// ragel_search_str for typechecking variable delcaration is hack.
+// State should have hashmap for fields/containers.
+//
+// XXX: Maybe drop whole container thing and just give field const char *parent; that points to keypath of container.
+// Then we would have flat structure like, "foo, foo.var, foo.b, ..."
+
+static const struct fspec_container default_container = {0};
+static const struct fspec_field default_field = { .array.nmemb = 1 };
+
+enum stack_type {
+ STACK_VAR,
+ STACK_STR,
+ STACK_NUM,
+};
+
+struct stack {
+ enum stack_type type;
+
+ union {
+ struct fspec_bytes str;
+ const char *var;
+ uint64_t num;
+ };
+};
+
+struct state {
+ struct ragel ragel;
+ struct stack stack;
+ struct fspec_field field;
+ struct fspec_container container;
+ size_t container_data_offset;
+};
+
+static const char*
+stack_type_to_str(const enum stack_type type)
+{
+ switch (type) {
+ case STACK_VAR: return "var";
+ case STACK_STR: return "str";
+ case STACK_NUM: return "num";
+ };
+
+ assert(0 && "should not happen");
+ return "unknown";
+}
+
+static void
+stack_check_type(const struct ragel *ragel, const struct stack *stack, const enum stack_type type)
+{
+ assert(ragel && stack);
+
+ if (stack->type != type)
+ ragel_throw_error(ragel, "tried to get '%s' from stack, but the last pushed type was '%s'", stack_type_to_str(type), stack_type_to_str(stack->type));
+}
+
+static const char*
+stack_get_var(const struct ragel *ragel, const struct stack *stack)
+{
+ assert(ragel && stack);
+ stack_check_type(ragel, stack, STACK_VAR);
+ return stack->var;
+}
+
+static const struct fspec_bytes*
+stack_get_str(const struct ragel *ragel, const struct stack *stack)
+{
+ assert(ragel && stack);
+ stack_check_type(ragel, stack, STACK_STR);
+ return &stack->str;
+}
+
+static uint64_t
+stack_get_num(const struct ragel *ragel, const struct stack *stack)
+{
+ assert(ragel && stack);
+ stack_check_type(ragel, stack, STACK_NUM);
+ return stack->num;
+}
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+
+static void
+fspec_type_from_str(const struct ragel *ragel, const char *str, struct fspec_type *out_type)
+{
+ assert(ragel && str);
+
+ const struct fspec_type types[] = {
+ { .name = "u8", .size = sizeof(uint8_t) },
+ { .name = "u16", .size = sizeof(uint16_t) },
+ { .name = "u32", .size = sizeof(uint32_t) },
+ { .name = "u64", .size = sizeof(uint64_t) },
+ { .name = "s8", .size = sizeof(int8_t), .flags = FSPEC_TYPE_SIGNED },
+ { .name = "s16", .size = sizeof(int16_t), .flags = FSPEC_TYPE_SIGNED },
+ { .name = "s32", .size = sizeof(int32_t), .flags = FSPEC_TYPE_SIGNED },
+ { .name = "s64", .size = sizeof(int64_t), .flags = FSPEC_TYPE_SIGNED },
+ };
+
+ for (size_t i = 0; i < ARRAY_SIZE(types); ++i) {
+ if (strcmp(str, types[i].name))
+ continue;
+
+ *out_type = types[i];
+ return;
+ }
+
+ if (ragel_search_str(ragel, 0, str)) {
+ *out_type = (struct fspec_type){ .name = str, .flags = FSPEC_TYPE_CONTAINER };
+ return;
+ }
+
+ ragel_throw_error(ragel, "invalid type");
+}
+
+static void
+fspec_kind_from_str(const struct ragel *ragel, const char *str, struct fspec_kind *out_kind)
+{
+ assert(ragel && str);
+
+ const struct fspec_kind kinds[] = {
+ { .name = "pad", .flags = FSPEC_KIND_IGNORE },
+ { .name = "hex", .flags = FSPEC_KIND_HEXADECIMAL },
+ { .name = "ascii", .flags = FSPEC_KIND_ENCODING },
+ { .name = "utf8", .flags = FSPEC_KIND_ENCODING },
+ { .name = "sjis", .flags = FSPEC_KIND_ENCODING },
+ };
+
+ for (size_t i = 0; i < ARRAY_SIZE(kinds); ++i) {
+ if (strcmp(str, kinds[i].name))
+ continue;
+
+ *out_kind = kinds[i];
+ return;
+ }
+
+ ragel_throw_error(ragel, "invalid kind");
+}
+
+static void
+check_field_kind(const struct ragel *ragel, const struct fspec_field *field)
+{
+ assert(ragel && field);
+
+ if ((field->kind.flags & FSPEC_KIND_ENCODING) && field->type.size != sizeof(uint8_t))
+ ragel_throw_error(ragel, "invalid kind: %s kind only allowed for u8 and s8 types", field->kind.name);
+}
+
+%%{
+ # File specification parser.
+
+ machine fspec;
+ variable p state.ragel.p;
+ variable pe state.ragel.pe;
+ variable eof state.ragel.eof;
+ write data noerror nofinal;
+
+ action field {
+ fspec->ops.field(fspec, &state.container, &state.field);
+ }
+
+ action field_kind {
+ fspec_kind_from_str(&state.ragel, stack_get_var(&state.ragel, &state.stack), &state.field.kind);
+ check_field_kind(&state.ragel, &state.field);
+ }
+
+ action field_array {
+ switch (state.stack.type) {
+ case STACK_NUM:
+ state.field.array.type = FSPEC_ARRAY_FIXED;
+ state.field.array.nmemb = stack_get_num(&state.ragel, &state.stack);
+ break;
+
+ case STACK_STR:
+ state.field.array.type = FSPEC_ARRAY_MATCH;
+ state.field.array.match = *stack_get_str(&state.ragel, &state.stack);
+ break;
+
+ case STACK_VAR:
+ state.field.array.type = FSPEC_ARRAY_VAR;
+ state.field.array.var = stack_get_var(&state.ragel, &state.stack);
+
+ if (!ragel_search_str(&state.ragel, state.container_data_offset, state.field.array.var))
+ ragel_throw_error(&state.ragel, "undeclared variable '%s'", state.field.array.var);
+ break;
+
+ default:
+ ragel_throw_error(&state.ragel, "array can't contain the stack type of '%s'", stack_type_to_str(state.stack.type));
+ break;
+ }
+ }
+
+ action field_name {
+ state.field.name = stack_get_var(&state.ragel, &state.stack);
+ }
+
+ action field_type {
+ state.field = default_field;
+ fspec_type_from_str(&state.ragel, stack_get_var(&state.ragel, &state.stack), &state.field.type);
+ }
+
+ action container_name {
+ state.container = default_container;
+ state.container.name = stack_get_var(&state.ragel, &state.stack);
+ state.container_data_offset = state.ragel.mem.cur - state.ragel.mem.data;
+ }
+
+ action push_var {
+ state.stack.type = STACK_VAR;
+ state.stack.var = (char*)state.ragel.mem.cur;
+ }
+
+ action push_hex {
+ state.stack.type = STACK_NUM;
+ state.stack.num = strtoll((char*)state.ragel.mem.cur, NULL, 16);
+ }
+
+ action push_dec {
+ state.stack.type = STACK_NUM;
+ state.stack.num = strtoll((char*)state.ragel.mem.cur, NULL, 10);
+ }
+
+ action push_str {
+ state.stack.type = STACK_STR;
+ state.stack.str.data = state.ragel.mem.cur;
+ state.stack.str.size = (state.ragel.mem.data + state.ragel.mem.written) - state.ragel.mem.cur;
+ }
+
+ action convert_escape {
+ ragel_convert_escape(&state.ragel);
+ }
+
+ action remove {
+ ragel_remove_last_data(&state.ragel);
+ }
+
+ action finish {
+ ragel_finish_data(&state.ragel);
+ }
+
+ action store {
+ ragel_store_data(&state.ragel);
+ }
+
+ action begin {
+ ragel_begin_data(&state.ragel);
+ }
+
+ action invalid_kind {
+ ragel_throw_error(&state.ragel, "invalid kind");
+ }
+
+ action invalid_type {
+ ragel_throw_error(&state.ragel, "invalid type");
+ }
+
+ action error {
+ ragel_throw_error(&state.ragel, "malformed input (machine failed here or in previous or next expression)");
+ }
+
+ action line {
+ ragel_advance_line(&state.ragel);
+ }
+
+ # Semantic
+ ws = space;
+ valid = ^cntrl;
+ es = '\\';
+ delim = ';';
+ quote = ['"];
+ bopen = '{';
+ bclose = '}';
+ newline = '\n';
+ octal = [0-7];
+ hex = '0x' <: xdigit+;
+ decimal = ([1-9] <: digit*) | '0';
+ comment = '//' <: valid* :>> newline;
+ escape = es <: ('x' <: xdigit+ | [abfnrtv\\'"e] | octal{1,3});
+ type = 'u8' | 'u16' | 'u32' | 'u64' | 's8' | 's16' | 's32' | 's64';
+ kind = 'ascii' | 'utf8' | 'sjis' | 'hex' | 'pad';
+ reserved = 'struct' | type | kind;
+ var = ((alpha | '_') <: (alnum | '_')*) - reserved;
+
+ # Catchers
+ catch_var = var >begin $store %finish %push_var;
+ catch_struct = ('struct' $store ws+ >store <: var $store) >begin %finish %push_var;
+ catch_type = (catch_struct | type >begin $store %push_var %remove) $!invalid_type;
+ catch_hex = hex >begin $store %push_hex %remove;
+ catch_decimal = decimal >begin $store %push_dec %remove;
+ catch_string = quote <: (escape %convert_escape | print)* >begin $store %finish %push_str :>> quote;
+ catch_array = '[' <: (catch_hex | catch_decimal | catch_string | catch_var) :>> ']';
+ catch_kind = '=' ws* <: kind >begin $store %push_var %remove $!invalid_kind;
+
+ # Actions
+ field = catch_type %field_type ws+ <: catch_var %field_name ws* <: (catch_array %field_array ws*)? <: (catch_kind %field_kind ws*)? :>> delim %field;
+ container = catch_struct %container_name ws* :>> bopen <: (ws | comment | field)* :>> bclose ws* delim;
+ line = valid* :>> newline @line;
+ main := (ws | comment | container)* & line* $!error;
+}%%
+
+void
+fspec_parse(struct fspec *fspec)
+{
+ int cs;
+ %% write init;
+
+ (void)fspec_en_main;
+ assert(fspec);
+ assert(fspec->ops.read);
+ assert(fspec->ops.field);
+
+ struct state state = {
+ .ragel = {
+ .lineno = 1,
+ .mem = {
+ .data = fspec->mem.data,
+ .size = fspec->mem.size,
+ },
+ },
+ };
+
+ for (bool ok = true; ok;) {
+ const size_t bytes = fspec->ops.read(fspec, state.ragel.buf, 1, sizeof(state.ragel.buf));
+ ok = ragel_confirm_input(&state.ragel, bytes);
+ %% write exec;
+ }
+}
diff --git a/src/ragel/ragel.h b/src/ragel/ragel.h
new file mode 100644
index 0000000..af06f4a
--- /dev/null
+++ b/src/ragel/ragel.h
@@ -0,0 +1,236 @@
+#pragma once
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <ctype.h>
+#include <assert.h>
+#include <limits.h>
+#include <err.h>
+
+struct ragel {
+ struct {
+ uint8_t *data; // data\0another_data\0
+ const uint8_t *cur; // data\0another_data\0cursor
+ size_t written, size; // amount of data written / size of data
+ } mem;
+
+ char buf[4096]; // block of input data
+ const char *p, *pe, *eof; // see ragel doc
+ size_t lineno; // current line
+};
+
+static inline void
+ragel_get_current_line(const struct ragel *ragel, size_t *out_lineno, size_t *out_ls, size_t *out_le, size_t *out_ws, size_t *out_we)
+{
+ assert(out_ls && out_le && out_ws && out_we);
+ assert(ragel->p >= ragel->buf && ragel->pe >= ragel->p);
+
+ size_t ls, le, ws, we;
+ size_t off = ragel->p - ragel->buf;
+ size_t lineno = ragel->lineno;
+ const size_t end = ragel->pe - ragel->buf;
+
+ // rewind to first non-space
+ for (; off > 0 && (isspace(ragel->buf[off]) || !ragel->buf[off]); --off) {
+ if (lineno > 0 && ragel->buf[off] == '\n')
+ --lineno;
+ }
+
+ for (ls = off; ls > 0 && ragel->buf[ls] != '\n'; --ls); // beginning of line
+ for (le = off; le < end && ragel->buf[le] != '\n'; ++le); // end of line
+ for (; ls < le && isspace(ragel->buf[ls]); ++ls); // strip leading whitespace
+ for (ws = off; ws > ls && isspace(ragel->buf[ws]); --ws); // rewind to first non-space
+ for (; ws > 0 && ws > ls && !isspace(ragel->buf[ws - 1]); --ws); // find word start
+ for (we = ws; we < le && !isspace(ragel->buf[we]); ++we); // find word ending
+
+ assert(we >= ws && ws >= ls && le >= ls && le >= we);
+ *out_lineno = lineno;
+ *out_ls = ls;
+ *out_le = le;
+ *out_ws = ws;
+ *out_we = we;
+}
+
+__attribute__((format(printf, 2, 3)))
+static inline void
+ragel_throw_error(const struct ragel *ragel, const char *fmt, ...)
+{
+ assert(ragel && fmt);
+
+ size_t lineno, ls, le, ws, we;
+ ragel_get_current_line(ragel, &lineno, &ls, &le, &ws, &we);
+ assert(le - ls <= INT_MAX && ws - ls <= INT_MAX);
+
+ char msg[255];
+ va_list args;
+ va_start(args, fmt);
+ vsnprintf(msg, sizeof(msg), fmt, args);
+ va_end(args);
+
+ const int indent = 8;
+ const size_t mark = (we - ws ? we - ws : 1), cur = (ragel->p - ragel->buf) - ws;
+ warnx("\x1b[37m%zu: \x1b[31merror: \x1b[0m%s\n%*s%.*s", lineno, msg, indent, "", (int)(le - ls), ragel->buf + ls);
+ fprintf(stderr, "%*s%*s\x1b[31m", indent, "", (int)(ws - ls), "");
+ for (size_t i = 0; i < mark; ++i) fputs((i == cur ? "^" : "~"), stderr);
+ fputs("\x1b[0m\n", stderr);
+
+ exit(EXIT_FAILURE);
+}
+
+static inline void
+ragel_bounds_check_data(const struct ragel *ragel, const size_t nmemb)
+{
+ assert(ragel);
+
+ if (ragel->mem.size < nmemb || ragel->mem.written >= ragel->mem.size - nmemb)
+ ragel_throw_error(ragel, "data storage limit exceeded: %zu bytes exceeds the maximum store size of %zu bytes", ragel->mem.written, ragel->mem.size);
+}
+
+static inline void
+ragel_replace_data(struct ragel *ragel, const size_t nmemb, char replacement)
+{
+ assert(ragel);
+
+ if (ragel->mem.written < nmemb)
+ ragel_throw_error(ragel, "parse error: received escape conversion with mem.written of %zu, expected >= %zu", ragel->mem.written, nmemb);
+
+ ragel->mem.data[(ragel->mem.written -= nmemb)] = replacement;
+ ragel->mem.data[++ragel->mem.written] = 0;
+}
+
+static inline void
+ragel_convert_escape(struct ragel *ragel)
+{
+ assert(ragel);
+
+ if (ragel->mem.written < 2)
+ ragel_throw_error(ragel, "parse error: received escape conversion with mem.written of %zu, expected >= 2", ragel->mem.written);
+
+ const struct {
+ const char *e;
+ const char v, b;
+ } map[] = {
+ { .e = "\\a", .v = '\a' },
+ { .e = "\\b", .v = '\b' },
+ { .e = "\\f", .v = '\f' },
+ { .e = "\\n", .v = '\n' },
+ { .e = "\\r", .v = '\r' },
+ { .e = "\\t", .v = '\t' },
+ { .e = "\\v", .v = '\v' },
+ { .e = "\\\\", .v = '\\' },
+ { .e = "\\'", .v = '\'' },
+ { .e = "\\\"", .v = '"' },
+ { .e = "\\e", .v = '\e' },
+ { .e = "\\x", .b = 16 },
+ { .e = "\\", .b = 8 },
+ };
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+ const char *cur = (char*)ragel->mem.cur;
+ const size_t cur_sz = strlen(cur);
+ for (size_t i = 0; i < ARRAY_SIZE(map); ++i) {
+ if (!strncmp(cur, map[i].e, strlen(map[i].e))) {
+ const char v = (!map[i].b ? map[i].v : strtol(cur + strlen(map[i].e), NULL, map[i].b));
+ assert((map[i].b == 8 && cur_sz >= 2) || (map[i].b == 16 && cur_sz >= 2) || (map[i].b == 0 && cur_sz == 2));
+ assert(map[i].b != 8 || isdigit(cur[1]));
+ ragel_replace_data(ragel, cur_sz, v);
+ return;
+ }
+ }
+#undef ARRAY_SIZE
+
+ ragel_throw_error(ragel, "parse error: received unknown escape conversion");
+}
+
+static inline void
+ragel_dump_data(struct ragel *ragel, const size_t offset)
+{
+ const uint8_t *end = ragel->mem.data + ragel->mem.written;
+ for (const uint8_t *p = ragel->mem.data + offset; p && p < end; p = (uint8_t*)memchr(p, 0, end - p), p += !!p)
+ printf("%s\n", p);
+}
+
+static inline const uint8_t*
+ragel_search_data(const struct ragel *ragel, const size_t offset, const uint8_t *data, const size_t size)
+{
+ assert(ragel && data);
+
+ const uint8_t *end = ragel->mem.data + ragel->mem.written;
+ for (const uint8_t *p = ragel->mem.data + offset; p && p < end && (size_t)(end - p) >= size; p = (uint8_t*)memchr(p, 0, end - p), p += !!p) {
+ if (!memcmp(data, p, size))
+ return p;
+ }
+
+ return NULL;
+}
+
+static inline const uint8_t*
+ragel_search_str(const struct ragel *ragel, const size_t offset, const char *str)
+{
+ return ragel_search_data(ragel, offset, (const uint8_t*)str, strlen(str) + 1);
+}
+
+static inline void
+ragel_remove_last_data(struct ragel *ragel)
+{
+ assert(ragel);
+ const uint8_t *end = ragel->mem.data + ragel->mem.written;
+ const size_t size = end - ragel->mem.cur + 1;
+ assert(ragel->mem.written >= size);
+ ragel->mem.written -= size;
+ ragel->mem.data[ragel->mem.written] = 0;
+}
+
+static inline void
+ragel_finish_data(struct ragel *ragel)
+{
+ assert(ragel);
+
+ const uint8_t *end = ragel->mem.data + ragel->mem.written, *p;
+ if ((p = ragel_search_data(ragel, 0, ragel->mem.cur, end - ragel->mem.cur + 1))) {
+ ragel_remove_last_data(ragel);
+ ragel->mem.cur = p;
+ }
+}
+
+static inline void
+ragel_store_data(struct ragel *ragel)
+{
+ ragel_bounds_check_data(ragel, 1);
+ ragel->mem.data[ragel->mem.written++] = *ragel->p;
+ ragel->mem.data[ragel->mem.written] = 0;
+}
+
+static inline void
+ragel_begin_data(struct ragel *ragel)
+{
+ ragel_bounds_check_data(ragel, 1);
+ ragel->mem.written += (ragel->mem.written > 0);
+ ragel->mem.cur = ragel->mem.data + ragel->mem.written;
+}
+
+static inline void
+ragel_advance_line(struct ragel *ragel)
+{
+ assert(ragel);
+ ++ragel->lineno;
+}
+
+static inline bool
+ragel_confirm_input(struct ragel *ragel, const size_t bytes)
+{
+ assert(ragel);
+
+ if (bytes > sizeof(ragel->buf))
+ errx(EXIT_FAILURE, "%s: gave larger buffer than %zu", __func__, sizeof(ragel->buf));
+
+ const bool in_eof = (bytes < sizeof(ragel->buf));
+ ragel->p = ragel->buf;
+ ragel->pe = ragel->p + bytes;
+ ragel->eof = (in_eof ? ragel->pe : NULL);
+ return !in_eof;
+}