From 76b8c9e03c97b16d9ff97f3b79c0ecbff0f5e7f2 Mon Sep 17 00:00:00 2001 From: Jari Vetoniemi Date: Thu, 30 Mar 2017 17:31:44 +0300 Subject: Initial commit --- src/ragel/fspec.h | 77 +++++++++++++ src/ragel/fspec.rl | 329 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/ragel/ragel.h | 236 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 642 insertions(+) create mode 100644 src/ragel/fspec.h create mode 100644 src/ragel/fspec.rl create mode 100644 src/ragel/ragel.h (limited to 'src/ragel') diff --git a/src/ragel/fspec.h b/src/ragel/fspec.h new file mode 100644 index 0000000..68998f4 --- /dev/null +++ b/src/ragel/fspec.h @@ -0,0 +1,77 @@ +#pragma once + +#include +#include +#include + +struct fspec_bytes { + const uint8_t *data; + size_t size; +}; + +enum fspec_kind_bits { + FSPEC_KIND_IGNORE = 1<<0, + FSPEC_KIND_HEXADECIMAL = 1<<1, + FSPEC_KIND_ENCODING = 1<<2, +}; + +struct fspec_kind { + const char *name; + uint32_t flags; +}; + +enum fspec_array_type { + FSPEC_ARRAY_FIXED, + FSPEC_ARRAY_MATCH, + FSPEC_ARRAY_VAR, +}; + +struct fspec_array { + enum fspec_array_type type; + + union { + struct fspec_bytes match; + const char *var; + size_t nmemb; + }; +}; + +enum fspec_type_bits { + FSPEC_TYPE_SIGNED = 1<<0, + FSPEC_TYPE_CONTAINER = 1<<1, +}; + +struct fspec_type { + const char *name; + size_t size; + uint32_t flags; +}; + +struct fspec_field { + struct fspec_type type; + struct fspec_array array; + struct fspec_kind kind; + const char *name; +}; + +struct fspec_container { + const char *name; +}; + +struct fspec; +struct fspec { + struct { + void (*field)(struct fspec *fspec, const struct fspec_container *container, const struct fspec_field *field); + size_t (*read)(struct fspec *fspec, char *buf, const size_t size, const size_t nmemb); + } ops; + + struct { + // XXX: replace with ops.alloc, ops.free + // on dump.c we can then just provide implementation that still uses reasonable amount of static memory + // but we don't limit the code from working with regular dynamic memory + uint8_t *data; + size_t size; + } mem; +}; + +void fspec_parse(struct fspec *fspec); diff --git a/src/ragel/fspec.rl b/src/ragel/fspec.rl new file mode 100644 index 0000000..8493cf1 --- /dev/null +++ b/src/ragel/fspec.rl @@ -0,0 +1,329 @@ +#include "fspec.h" +#include "ragel.h" + +// It's pretty good base so far. +// ragel_search_str for typechecking variable delcaration is hack. +// State should have hashmap for fields/containers. +// +// XXX: Maybe drop whole container thing and just give field const char *parent; that points to keypath of container. +// Then we would have flat structure like, "foo, foo.var, foo.b, ..." + +static const struct fspec_container default_container = {0}; +static const struct fspec_field default_field = { .array.nmemb = 1 }; + +enum stack_type { + STACK_VAR, + STACK_STR, + STACK_NUM, +}; + +struct stack { + enum stack_type type; + + union { + struct fspec_bytes str; + const char *var; + uint64_t num; + }; +}; + +struct state { + struct ragel ragel; + struct stack stack; + struct fspec_field field; + struct fspec_container container; + size_t container_data_offset; +}; + +static const char* +stack_type_to_str(const enum stack_type type) +{ + switch (type) { + case STACK_VAR: return "var"; + case STACK_STR: return "str"; + case STACK_NUM: return "num"; + }; + + assert(0 && "should not happen"); + return "unknown"; +} + +static void +stack_check_type(const struct ragel *ragel, const struct stack *stack, const enum stack_type type) +{ + assert(ragel && stack); + + if (stack->type != type) + ragel_throw_error(ragel, "tried to get '%s' from stack, but the last pushed type was '%s'", stack_type_to_str(type), stack_type_to_str(stack->type)); +} + +static const char* +stack_get_var(const struct ragel *ragel, const struct stack *stack) +{ + assert(ragel && stack); + stack_check_type(ragel, stack, STACK_VAR); + return stack->var; +} + +static const struct fspec_bytes* +stack_get_str(const struct ragel *ragel, const struct stack *stack) +{ + assert(ragel && stack); + stack_check_type(ragel, stack, STACK_STR); + return &stack->str; +} + +static uint64_t +stack_get_num(const struct ragel *ragel, const struct stack *stack) +{ + assert(ragel && stack); + stack_check_type(ragel, stack, STACK_NUM); + return stack->num; +} + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + +static void +fspec_type_from_str(const struct ragel *ragel, const char *str, struct fspec_type *out_type) +{ + assert(ragel && str); + + const struct fspec_type types[] = { + { .name = "u8", .size = sizeof(uint8_t) }, + { .name = "u16", .size = sizeof(uint16_t) }, + { .name = "u32", .size = sizeof(uint32_t) }, + { .name = "u64", .size = sizeof(uint64_t) }, + { .name = "s8", .size = sizeof(int8_t), .flags = FSPEC_TYPE_SIGNED }, + { .name = "s16", .size = sizeof(int16_t), .flags = FSPEC_TYPE_SIGNED }, + { .name = "s32", .size = sizeof(int32_t), .flags = FSPEC_TYPE_SIGNED }, + { .name = "s64", .size = sizeof(int64_t), .flags = FSPEC_TYPE_SIGNED }, + }; + + for (size_t i = 0; i < ARRAY_SIZE(types); ++i) { + if (strcmp(str, types[i].name)) + continue; + + *out_type = types[i]; + return; + } + + if (ragel_search_str(ragel, 0, str)) { + *out_type = (struct fspec_type){ .name = str, .flags = FSPEC_TYPE_CONTAINER }; + return; + } + + ragel_throw_error(ragel, "invalid type"); +} + +static void +fspec_kind_from_str(const struct ragel *ragel, const char *str, struct fspec_kind *out_kind) +{ + assert(ragel && str); + + const struct fspec_kind kinds[] = { + { .name = "pad", .flags = FSPEC_KIND_IGNORE }, + { .name = "hex", .flags = FSPEC_KIND_HEXADECIMAL }, + { .name = "ascii", .flags = FSPEC_KIND_ENCODING }, + { .name = "utf8", .flags = FSPEC_KIND_ENCODING }, + { .name = "sjis", .flags = FSPEC_KIND_ENCODING }, + }; + + for (size_t i = 0; i < ARRAY_SIZE(kinds); ++i) { + if (strcmp(str, kinds[i].name)) + continue; + + *out_kind = kinds[i]; + return; + } + + ragel_throw_error(ragel, "invalid kind"); +} + +static void +check_field_kind(const struct ragel *ragel, const struct fspec_field *field) +{ + assert(ragel && field); + + if ((field->kind.flags & FSPEC_KIND_ENCODING) && field->type.size != sizeof(uint8_t)) + ragel_throw_error(ragel, "invalid kind: %s kind only allowed for u8 and s8 types", field->kind.name); +} + +%%{ + # File specification parser. + + machine fspec; + variable p state.ragel.p; + variable pe state.ragel.pe; + variable eof state.ragel.eof; + write data noerror nofinal; + + action field { + fspec->ops.field(fspec, &state.container, &state.field); + } + + action field_kind { + fspec_kind_from_str(&state.ragel, stack_get_var(&state.ragel, &state.stack), &state.field.kind); + check_field_kind(&state.ragel, &state.field); + } + + action field_array { + switch (state.stack.type) { + case STACK_NUM: + state.field.array.type = FSPEC_ARRAY_FIXED; + state.field.array.nmemb = stack_get_num(&state.ragel, &state.stack); + break; + + case STACK_STR: + state.field.array.type = FSPEC_ARRAY_MATCH; + state.field.array.match = *stack_get_str(&state.ragel, &state.stack); + break; + + case STACK_VAR: + state.field.array.type = FSPEC_ARRAY_VAR; + state.field.array.var = stack_get_var(&state.ragel, &state.stack); + + if (!ragel_search_str(&state.ragel, state.container_data_offset, state.field.array.var)) + ragel_throw_error(&state.ragel, "undeclared variable '%s'", state.field.array.var); + break; + + default: + ragel_throw_error(&state.ragel, "array can't contain the stack type of '%s'", stack_type_to_str(state.stack.type)); + break; + } + } + + action field_name { + state.field.name = stack_get_var(&state.ragel, &state.stack); + } + + action field_type { + state.field = default_field; + fspec_type_from_str(&state.ragel, stack_get_var(&state.ragel, &state.stack), &state.field.type); + } + + action container_name { + state.container = default_container; + state.container.name = stack_get_var(&state.ragel, &state.stack); + state.container_data_offset = state.ragel.mem.cur - state.ragel.mem.data; + } + + action push_var { + state.stack.type = STACK_VAR; + state.stack.var = (char*)state.ragel.mem.cur; + } + + action push_hex { + state.stack.type = STACK_NUM; + state.stack.num = strtoll((char*)state.ragel.mem.cur, NULL, 16); + } + + action push_dec { + state.stack.type = STACK_NUM; + state.stack.num = strtoll((char*)state.ragel.mem.cur, NULL, 10); + } + + action push_str { + state.stack.type = STACK_STR; + state.stack.str.data = state.ragel.mem.cur; + state.stack.str.size = (state.ragel.mem.data + state.ragel.mem.written) - state.ragel.mem.cur; + } + + action convert_escape { + ragel_convert_escape(&state.ragel); + } + + action remove { + ragel_remove_last_data(&state.ragel); + } + + action finish { + ragel_finish_data(&state.ragel); + } + + action store { + ragel_store_data(&state.ragel); + } + + action begin { + ragel_begin_data(&state.ragel); + } + + action invalid_kind { + ragel_throw_error(&state.ragel, "invalid kind"); + } + + action invalid_type { + ragel_throw_error(&state.ragel, "invalid type"); + } + + action error { + ragel_throw_error(&state.ragel, "malformed input (machine failed here or in previous or next expression)"); + } + + action line { + ragel_advance_line(&state.ragel); + } + + # Semantic + ws = space; + valid = ^cntrl; + es = '\\'; + delim = ';'; + quote = ['"]; + bopen = '{'; + bclose = '}'; + newline = '\n'; + octal = [0-7]; + hex = '0x' <: xdigit+; + decimal = ([1-9] <: digit*) | '0'; + comment = '//' <: valid* :>> newline; + escape = es <: ('x' <: xdigit+ | [abfnrtv\\'"e] | octal{1,3}); + type = 'u8' | 'u16' | 'u32' | 'u64' | 's8' | 's16' | 's32' | 's64'; + kind = 'ascii' | 'utf8' | 'sjis' | 'hex' | 'pad'; + reserved = 'struct' | type | kind; + var = ((alpha | '_') <: (alnum | '_')*) - reserved; + + # Catchers + catch_var = var >begin $store %finish %push_var; + catch_struct = ('struct' $store ws+ >store <: var $store) >begin %finish %push_var; + catch_type = (catch_struct | type >begin $store %push_var %remove) $!invalid_type; + catch_hex = hex >begin $store %push_hex %remove; + catch_decimal = decimal >begin $store %push_dec %remove; + catch_string = quote <: (escape %convert_escape | print)* >begin $store %finish %push_str :>> quote; + catch_array = '[' <: (catch_hex | catch_decimal | catch_string | catch_var) :>> ']'; + catch_kind = '=' ws* <: kind >begin $store %push_var %remove $!invalid_kind; + + # Actions + field = catch_type %field_type ws+ <: catch_var %field_name ws* <: (catch_array %field_array ws*)? <: (catch_kind %field_kind ws*)? :>> delim %field; + container = catch_struct %container_name ws* :>> bopen <: (ws | comment | field)* :>> bclose ws* delim; + line = valid* :>> newline @line; + main := (ws | comment | container)* & line* $!error; +}%% + +void +fspec_parse(struct fspec *fspec) +{ + int cs; + %% write init; + + (void)fspec_en_main; + assert(fspec); + assert(fspec->ops.read); + assert(fspec->ops.field); + + struct state state = { + .ragel = { + .lineno = 1, + .mem = { + .data = fspec->mem.data, + .size = fspec->mem.size, + }, + }, + }; + + for (bool ok = true; ok;) { + const size_t bytes = fspec->ops.read(fspec, state.ragel.buf, 1, sizeof(state.ragel.buf)); + ok = ragel_confirm_input(&state.ragel, bytes); + %% write exec; + } +} diff --git a/src/ragel/ragel.h b/src/ragel/ragel.h new file mode 100644 index 0000000..af06f4a --- /dev/null +++ b/src/ragel/ragel.h @@ -0,0 +1,236 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct ragel { + struct { + uint8_t *data; // data\0another_data\0 + const uint8_t *cur; // data\0another_data\0cursor + size_t written, size; // amount of data written / size of data + } mem; + + char buf[4096]; // block of input data + const char *p, *pe, *eof; // see ragel doc + size_t lineno; // current line +}; + +static inline void +ragel_get_current_line(const struct ragel *ragel, size_t *out_lineno, size_t *out_ls, size_t *out_le, size_t *out_ws, size_t *out_we) +{ + assert(out_ls && out_le && out_ws && out_we); + assert(ragel->p >= ragel->buf && ragel->pe >= ragel->p); + + size_t ls, le, ws, we; + size_t off = ragel->p - ragel->buf; + size_t lineno = ragel->lineno; + const size_t end = ragel->pe - ragel->buf; + + // rewind to first non-space + for (; off > 0 && (isspace(ragel->buf[off]) || !ragel->buf[off]); --off) { + if (lineno > 0 && ragel->buf[off] == '\n') + --lineno; + } + + for (ls = off; ls > 0 && ragel->buf[ls] != '\n'; --ls); // beginning of line + for (le = off; le < end && ragel->buf[le] != '\n'; ++le); // end of line + for (; ls < le && isspace(ragel->buf[ls]); ++ls); // strip leading whitespace + for (ws = off; ws > ls && isspace(ragel->buf[ws]); --ws); // rewind to first non-space + for (; ws > 0 && ws > ls && !isspace(ragel->buf[ws - 1]); --ws); // find word start + for (we = ws; we < le && !isspace(ragel->buf[we]); ++we); // find word ending + + assert(we >= ws && ws >= ls && le >= ls && le >= we); + *out_lineno = lineno; + *out_ls = ls; + *out_le = le; + *out_ws = ws; + *out_we = we; +} + +__attribute__((format(printf, 2, 3))) +static inline void +ragel_throw_error(const struct ragel *ragel, const char *fmt, ...) +{ + assert(ragel && fmt); + + size_t lineno, ls, le, ws, we; + ragel_get_current_line(ragel, &lineno, &ls, &le, &ws, &we); + assert(le - ls <= INT_MAX && ws - ls <= INT_MAX); + + char msg[255]; + va_list args; + va_start(args, fmt); + vsnprintf(msg, sizeof(msg), fmt, args); + va_end(args); + + const int indent = 8; + const size_t mark = (we - ws ? we - ws : 1), cur = (ragel->p - ragel->buf) - ws; + warnx("\x1b[37m%zu: \x1b[31merror: \x1b[0m%s\n%*s%.*s", lineno, msg, indent, "", (int)(le - ls), ragel->buf + ls); + fprintf(stderr, "%*s%*s\x1b[31m", indent, "", (int)(ws - ls), ""); + for (size_t i = 0; i < mark; ++i) fputs((i == cur ? "^" : "~"), stderr); + fputs("\x1b[0m\n", stderr); + + exit(EXIT_FAILURE); +} + +static inline void +ragel_bounds_check_data(const struct ragel *ragel, const size_t nmemb) +{ + assert(ragel); + + if (ragel->mem.size < nmemb || ragel->mem.written >= ragel->mem.size - nmemb) + ragel_throw_error(ragel, "data storage limit exceeded: %zu bytes exceeds the maximum store size of %zu bytes", ragel->mem.written, ragel->mem.size); +} + +static inline void +ragel_replace_data(struct ragel *ragel, const size_t nmemb, char replacement) +{ + assert(ragel); + + if (ragel->mem.written < nmemb) + ragel_throw_error(ragel, "parse error: received escape conversion with mem.written of %zu, expected >= %zu", ragel->mem.written, nmemb); + + ragel->mem.data[(ragel->mem.written -= nmemb)] = replacement; + ragel->mem.data[++ragel->mem.written] = 0; +} + +static inline void +ragel_convert_escape(struct ragel *ragel) +{ + assert(ragel); + + if (ragel->mem.written < 2) + ragel_throw_error(ragel, "parse error: received escape conversion with mem.written of %zu, expected >= 2", ragel->mem.written); + + const struct { + const char *e; + const char v, b; + } map[] = { + { .e = "\\a", .v = '\a' }, + { .e = "\\b", .v = '\b' }, + { .e = "\\f", .v = '\f' }, + { .e = "\\n", .v = '\n' }, + { .e = "\\r", .v = '\r' }, + { .e = "\\t", .v = '\t' }, + { .e = "\\v", .v = '\v' }, + { .e = "\\\\", .v = '\\' }, + { .e = "\\'", .v = '\'' }, + { .e = "\\\"", .v = '"' }, + { .e = "\\e", .v = '\e' }, + { .e = "\\x", .b = 16 }, + { .e = "\\", .b = 8 }, + }; + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + const char *cur = (char*)ragel->mem.cur; + const size_t cur_sz = strlen(cur); + for (size_t i = 0; i < ARRAY_SIZE(map); ++i) { + if (!strncmp(cur, map[i].e, strlen(map[i].e))) { + const char v = (!map[i].b ? map[i].v : strtol(cur + strlen(map[i].e), NULL, map[i].b)); + assert((map[i].b == 8 && cur_sz >= 2) || (map[i].b == 16 && cur_sz >= 2) || (map[i].b == 0 && cur_sz == 2)); + assert(map[i].b != 8 || isdigit(cur[1])); + ragel_replace_data(ragel, cur_sz, v); + return; + } + } +#undef ARRAY_SIZE + + ragel_throw_error(ragel, "parse error: received unknown escape conversion"); +} + +static inline void +ragel_dump_data(struct ragel *ragel, const size_t offset) +{ + const uint8_t *end = ragel->mem.data + ragel->mem.written; + for (const uint8_t *p = ragel->mem.data + offset; p && p < end; p = (uint8_t*)memchr(p, 0, end - p), p += !!p) + printf("%s\n", p); +} + +static inline const uint8_t* +ragel_search_data(const struct ragel *ragel, const size_t offset, const uint8_t *data, const size_t size) +{ + assert(ragel && data); + + const uint8_t *end = ragel->mem.data + ragel->mem.written; + for (const uint8_t *p = ragel->mem.data + offset; p && p < end && (size_t)(end - p) >= size; p = (uint8_t*)memchr(p, 0, end - p), p += !!p) { + if (!memcmp(data, p, size)) + return p; + } + + return NULL; +} + +static inline const uint8_t* +ragel_search_str(const struct ragel *ragel, const size_t offset, const char *str) +{ + return ragel_search_data(ragel, offset, (const uint8_t*)str, strlen(str) + 1); +} + +static inline void +ragel_remove_last_data(struct ragel *ragel) +{ + assert(ragel); + const uint8_t *end = ragel->mem.data + ragel->mem.written; + const size_t size = end - ragel->mem.cur + 1; + assert(ragel->mem.written >= size); + ragel->mem.written -= size; + ragel->mem.data[ragel->mem.written] = 0; +} + +static inline void +ragel_finish_data(struct ragel *ragel) +{ + assert(ragel); + + const uint8_t *end = ragel->mem.data + ragel->mem.written, *p; + if ((p = ragel_search_data(ragel, 0, ragel->mem.cur, end - ragel->mem.cur + 1))) { + ragel_remove_last_data(ragel); + ragel->mem.cur = p; + } +} + +static inline void +ragel_store_data(struct ragel *ragel) +{ + ragel_bounds_check_data(ragel, 1); + ragel->mem.data[ragel->mem.written++] = *ragel->p; + ragel->mem.data[ragel->mem.written] = 0; +} + +static inline void +ragel_begin_data(struct ragel *ragel) +{ + ragel_bounds_check_data(ragel, 1); + ragel->mem.written += (ragel->mem.written > 0); + ragel->mem.cur = ragel->mem.data + ragel->mem.written; +} + +static inline void +ragel_advance_line(struct ragel *ragel) +{ + assert(ragel); + ++ragel->lineno; +} + +static inline bool +ragel_confirm_input(struct ragel *ragel, const size_t bytes) +{ + assert(ragel); + + if (bytes > sizeof(ragel->buf)) + errx(EXIT_FAILURE, "%s: gave larger buffer than %zu", __func__, sizeof(ragel->buf)); + + const bool in_eof = (bytes < sizeof(ragel->buf)); + ragel->p = ragel->buf; + ragel->pe = ragel->p + bytes; + ragel->eof = (in_eof ? ragel->pe : NULL); + return !in_eof; +} -- cgit v1.2.3