summaryrefslogtreecommitdiff
path: root/src/ragel/fspec.rl
diff options
context:
space:
mode:
Diffstat (limited to 'src/ragel/fspec.rl')
-rw-r--r--src/ragel/fspec.rl329
1 files changed, 329 insertions, 0 deletions
diff --git a/src/ragel/fspec.rl b/src/ragel/fspec.rl
new file mode 100644
index 0000000..8493cf1
--- /dev/null
+++ b/src/ragel/fspec.rl
@@ -0,0 +1,329 @@
+#include "fspec.h"
+#include "ragel.h"
+
+// It's pretty good base so far.
+// ragel_search_str for typechecking variable delcaration is hack.
+// State should have hashmap for fields/containers.
+//
+// XXX: Maybe drop whole container thing and just give field const char *parent; that points to keypath of container.
+// Then we would have flat structure like, "foo, foo.var, foo.b, ..."
+
+static const struct fspec_container default_container = {0};
+static const struct fspec_field default_field = { .array.nmemb = 1 };
+
+enum stack_type {
+ STACK_VAR,
+ STACK_STR,
+ STACK_NUM,
+};
+
+struct stack {
+ enum stack_type type;
+
+ union {
+ struct fspec_bytes str;
+ const char *var;
+ uint64_t num;
+ };
+};
+
+struct state {
+ struct ragel ragel;
+ struct stack stack;
+ struct fspec_field field;
+ struct fspec_container container;
+ size_t container_data_offset;
+};
+
+static const char*
+stack_type_to_str(const enum stack_type type)
+{
+ switch (type) {
+ case STACK_VAR: return "var";
+ case STACK_STR: return "str";
+ case STACK_NUM: return "num";
+ };
+
+ assert(0 && "should not happen");
+ return "unknown";
+}
+
+static void
+stack_check_type(const struct ragel *ragel, const struct stack *stack, const enum stack_type type)
+{
+ assert(ragel && stack);
+
+ if (stack->type != type)
+ ragel_throw_error(ragel, "tried to get '%s' from stack, but the last pushed type was '%s'", stack_type_to_str(type), stack_type_to_str(stack->type));
+}
+
+static const char*
+stack_get_var(const struct ragel *ragel, const struct stack *stack)
+{
+ assert(ragel && stack);
+ stack_check_type(ragel, stack, STACK_VAR);
+ return stack->var;
+}
+
+static const struct fspec_bytes*
+stack_get_str(const struct ragel *ragel, const struct stack *stack)
+{
+ assert(ragel && stack);
+ stack_check_type(ragel, stack, STACK_STR);
+ return &stack->str;
+}
+
+static uint64_t
+stack_get_num(const struct ragel *ragel, const struct stack *stack)
+{
+ assert(ragel && stack);
+ stack_check_type(ragel, stack, STACK_NUM);
+ return stack->num;
+}
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+
+static void
+fspec_type_from_str(const struct ragel *ragel, const char *str, struct fspec_type *out_type)
+{
+ assert(ragel && str);
+
+ const struct fspec_type types[] = {
+ { .name = "u8", .size = sizeof(uint8_t) },
+ { .name = "u16", .size = sizeof(uint16_t) },
+ { .name = "u32", .size = sizeof(uint32_t) },
+ { .name = "u64", .size = sizeof(uint64_t) },
+ { .name = "s8", .size = sizeof(int8_t), .flags = FSPEC_TYPE_SIGNED },
+ { .name = "s16", .size = sizeof(int16_t), .flags = FSPEC_TYPE_SIGNED },
+ { .name = "s32", .size = sizeof(int32_t), .flags = FSPEC_TYPE_SIGNED },
+ { .name = "s64", .size = sizeof(int64_t), .flags = FSPEC_TYPE_SIGNED },
+ };
+
+ for (size_t i = 0; i < ARRAY_SIZE(types); ++i) {
+ if (strcmp(str, types[i].name))
+ continue;
+
+ *out_type = types[i];
+ return;
+ }
+
+ if (ragel_search_str(ragel, 0, str)) {
+ *out_type = (struct fspec_type){ .name = str, .flags = FSPEC_TYPE_CONTAINER };
+ return;
+ }
+
+ ragel_throw_error(ragel, "invalid type");
+}
+
+static void
+fspec_kind_from_str(const struct ragel *ragel, const char *str, struct fspec_kind *out_kind)
+{
+ assert(ragel && str);
+
+ const struct fspec_kind kinds[] = {
+ { .name = "pad", .flags = FSPEC_KIND_IGNORE },
+ { .name = "hex", .flags = FSPEC_KIND_HEXADECIMAL },
+ { .name = "ascii", .flags = FSPEC_KIND_ENCODING },
+ { .name = "utf8", .flags = FSPEC_KIND_ENCODING },
+ { .name = "sjis", .flags = FSPEC_KIND_ENCODING },
+ };
+
+ for (size_t i = 0; i < ARRAY_SIZE(kinds); ++i) {
+ if (strcmp(str, kinds[i].name))
+ continue;
+
+ *out_kind = kinds[i];
+ return;
+ }
+
+ ragel_throw_error(ragel, "invalid kind");
+}
+
+static void
+check_field_kind(const struct ragel *ragel, const struct fspec_field *field)
+{
+ assert(ragel && field);
+
+ if ((field->kind.flags & FSPEC_KIND_ENCODING) && field->type.size != sizeof(uint8_t))
+ ragel_throw_error(ragel, "invalid kind: %s kind only allowed for u8 and s8 types", field->kind.name);
+}
+
+%%{
+ # File specification parser.
+
+ machine fspec;
+ variable p state.ragel.p;
+ variable pe state.ragel.pe;
+ variable eof state.ragel.eof;
+ write data noerror nofinal;
+
+ action field {
+ fspec->ops.field(fspec, &state.container, &state.field);
+ }
+
+ action field_kind {
+ fspec_kind_from_str(&state.ragel, stack_get_var(&state.ragel, &state.stack), &state.field.kind);
+ check_field_kind(&state.ragel, &state.field);
+ }
+
+ action field_array {
+ switch (state.stack.type) {
+ case STACK_NUM:
+ state.field.array.type = FSPEC_ARRAY_FIXED;
+ state.field.array.nmemb = stack_get_num(&state.ragel, &state.stack);
+ break;
+
+ case STACK_STR:
+ state.field.array.type = FSPEC_ARRAY_MATCH;
+ state.field.array.match = *stack_get_str(&state.ragel, &state.stack);
+ break;
+
+ case STACK_VAR:
+ state.field.array.type = FSPEC_ARRAY_VAR;
+ state.field.array.var = stack_get_var(&state.ragel, &state.stack);
+
+ if (!ragel_search_str(&state.ragel, state.container_data_offset, state.field.array.var))
+ ragel_throw_error(&state.ragel, "undeclared variable '%s'", state.field.array.var);
+ break;
+
+ default:
+ ragel_throw_error(&state.ragel, "array can't contain the stack type of '%s'", stack_type_to_str(state.stack.type));
+ break;
+ }
+ }
+
+ action field_name {
+ state.field.name = stack_get_var(&state.ragel, &state.stack);
+ }
+
+ action field_type {
+ state.field = default_field;
+ fspec_type_from_str(&state.ragel, stack_get_var(&state.ragel, &state.stack), &state.field.type);
+ }
+
+ action container_name {
+ state.container = default_container;
+ state.container.name = stack_get_var(&state.ragel, &state.stack);
+ state.container_data_offset = state.ragel.mem.cur - state.ragel.mem.data;
+ }
+
+ action push_var {
+ state.stack.type = STACK_VAR;
+ state.stack.var = (char*)state.ragel.mem.cur;
+ }
+
+ action push_hex {
+ state.stack.type = STACK_NUM;
+ state.stack.num = strtoll((char*)state.ragel.mem.cur, NULL, 16);
+ }
+
+ action push_dec {
+ state.stack.type = STACK_NUM;
+ state.stack.num = strtoll((char*)state.ragel.mem.cur, NULL, 10);
+ }
+
+ action push_str {
+ state.stack.type = STACK_STR;
+ state.stack.str.data = state.ragel.mem.cur;
+ state.stack.str.size = (state.ragel.mem.data + state.ragel.mem.written) - state.ragel.mem.cur;
+ }
+
+ action convert_escape {
+ ragel_convert_escape(&state.ragel);
+ }
+
+ action remove {
+ ragel_remove_last_data(&state.ragel);
+ }
+
+ action finish {
+ ragel_finish_data(&state.ragel);
+ }
+
+ action store {
+ ragel_store_data(&state.ragel);
+ }
+
+ action begin {
+ ragel_begin_data(&state.ragel);
+ }
+
+ action invalid_kind {
+ ragel_throw_error(&state.ragel, "invalid kind");
+ }
+
+ action invalid_type {
+ ragel_throw_error(&state.ragel, "invalid type");
+ }
+
+ action error {
+ ragel_throw_error(&state.ragel, "malformed input (machine failed here or in previous or next expression)");
+ }
+
+ action line {
+ ragel_advance_line(&state.ragel);
+ }
+
+ # Semantic
+ ws = space;
+ valid = ^cntrl;
+ es = '\\';
+ delim = ';';
+ quote = ['"];
+ bopen = '{';
+ bclose = '}';
+ newline = '\n';
+ octal = [0-7];
+ hex = '0x' <: xdigit+;
+ decimal = ([1-9] <: digit*) | '0';
+ comment = '//' <: valid* :>> newline;
+ escape = es <: ('x' <: xdigit+ | [abfnrtv\\'"e] | octal{1,3});
+ type = 'u8' | 'u16' | 'u32' | 'u64' | 's8' | 's16' | 's32' | 's64';
+ kind = 'ascii' | 'utf8' | 'sjis' | 'hex' | 'pad';
+ reserved = 'struct' | type | kind;
+ var = ((alpha | '_') <: (alnum | '_')*) - reserved;
+
+ # Catchers
+ catch_var = var >begin $store %finish %push_var;
+ catch_struct = ('struct' $store ws+ >store <: var $store) >begin %finish %push_var;
+ catch_type = (catch_struct | type >begin $store %push_var %remove) $!invalid_type;
+ catch_hex = hex >begin $store %push_hex %remove;
+ catch_decimal = decimal >begin $store %push_dec %remove;
+ catch_string = quote <: (escape %convert_escape | print)* >begin $store %finish %push_str :>> quote;
+ catch_array = '[' <: (catch_hex | catch_decimal | catch_string | catch_var) :>> ']';
+ catch_kind = '=' ws* <: kind >begin $store %push_var %remove $!invalid_kind;
+
+ # Actions
+ field = catch_type %field_type ws+ <: catch_var %field_name ws* <: (catch_array %field_array ws*)? <: (catch_kind %field_kind ws*)? :>> delim %field;
+ container = catch_struct %container_name ws* :>> bopen <: (ws | comment | field)* :>> bclose ws* delim;
+ line = valid* :>> newline @line;
+ main := (ws | comment | container)* & line* $!error;
+}%%
+
+void
+fspec_parse(struct fspec *fspec)
+{
+ int cs;
+ %% write init;
+
+ (void)fspec_en_main;
+ assert(fspec);
+ assert(fspec->ops.read);
+ assert(fspec->ops.field);
+
+ struct state state = {
+ .ragel = {
+ .lineno = 1,
+ .mem = {
+ .data = fspec->mem.data,
+ .size = fspec->mem.size,
+ },
+ },
+ };
+
+ for (bool ok = true; ok;) {
+ const size_t bytes = fspec->ops.read(fspec, state.ragel.buf, 1, sizeof(state.ragel.buf));
+ ok = ragel_confirm_input(&state.ragel, bytes);
+ %% write exec;
+ }
+}