diff options
-rw-r--r-- | Makefile | 61 | ||||
-rw-r--r-- | spec/ability.fspec | 21 | ||||
-rw-r--r-- | spec/eaf.fspec | 17 | ||||
-rw-r--r-- | spec/emz.fspec | 7 | ||||
-rw-r--r-- | spec/ftable.fspec | 2 | ||||
-rw-r--r-- | spec/name.fspec | 11 | ||||
-rw-r--r-- | spec/spell.fspec | 36 | ||||
-rw-r--r-- | spec/vtable.fspec | 2 | ||||
-rw-r--r-- | src/bin/fw/uneaf.c | 208 | ||||
-rw-r--r-- | src/bin/misc/dec2bin.c (renamed from src/utils/dec2bin.c) | 2 | ||||
-rw-r--r-- | src/bin/xi/xi2path.c (renamed from src/xi/xi2path.c) | 2 | ||||
-rw-r--r-- | src/bin/xi/xi2path.h (renamed from src/xi/xi2path.h) | 0 | ||||
-rw-r--r-- | src/bin/xi/xidec.c (renamed from src/xi/xidec.c) | 2 | ||||
-rw-r--r-- | src/bin/xi/xifile.c (renamed from src/xi/xifile.c) | 3 | ||||
-rw-r--r-- | src/bin/xi/xils.c (renamed from src/xi/xils.c) | 2 | ||||
-rw-r--r-- | src/dump.c | 834 | ||||
-rw-r--r-- | src/fspec/bcode-internal.h | 16 | ||||
-rw-r--r-- | src/fspec/bcode.c | 189 | ||||
-rw-r--r-- | src/fspec/bcode.h | 64 | ||||
-rw-r--r-- | src/fspec/lexer.h | 17 | ||||
-rw-r--r-- | src/fspec/lexer.rl | 616 | ||||
-rw-r--r-- | src/fspec/memory.h | 8 | ||||
-rw-r--r-- | src/fspec/validator.h | 17 | ||||
-rw-r--r-- | src/fspec/validator.rl | 237 | ||||
-rw-r--r-- | src/ragel/fspec.h | 77 | ||||
-rw-r--r-- | src/ragel/fspec.rl | 329 | ||||
-rw-r--r-- | src/ragel/ragel.h | 244 | ||||
-rw-r--r-- | src/ragel/ragel.rl | 88 | ||||
-rw-r--r-- | vim/filespec.vim | 14 |
29 files changed, 2259 insertions, 867 deletions
@@ -1,42 +1,55 @@ PREFIX ?= /usr/local bindir ?= /bin -WARNINGS := -Wall -Wextra -Wformat=2 -Winit-self -Wfloat-equal -Wcast-align -Wpointer-arith -CFLAGS += -std=c11 $(WARNINGS) +MAKEFLAGS += --no-builtin-rules -all: fspec-dump dec2bin xidec xi2path xils xifile +# GCC 7: -Wstringop-overflow=, -Walloc-size-larger-than=, -Wduplicated-{branches,cond} +WARNINGS := -Wall -Wextra -Wpedantic -Wformat=2 -Wstrict-aliasing=3 -Wstrict-overflow=5 -Wstack-usage=12500 \ + -Wfloat-equal -Wcast-align -Wpointer-arith -Wchar-subscripts + +override CFLAGS ?= -g +override CFLAGS += -std=c11 $(WARNINGS) +override CPPFLAGS += -Isrc + +bins = fspec-dump dec2bin xidec xi2path xils xifile uneaf +all: $(bins) %.c: %.rl ragel $^ -fspec-dump: src/ragel/ragel.h src/ragel/fspec.h src/ragel/fspec.c src/dump.c - $(LINK.c) $(filter %.c,$^) $(LDLIBS) -o $@ +%.a: + $(LINK.c) -c $(filter %.c,$^) $(LDLIBS) -o $@ + +$(bins): %: + $(LINK.c) $(filter %.c %.a,$^) $(LDLIBS) -o $@ + +fspec-ragel.a: src/ragel/ragel.h src/ragel/ragel.c +fspec-bcode.a: src/fspec/memory.h src/fspec/bcode.h src/fspec/bcode.c +fspec-lexer.a: src/ragel/ragel.h src/fspec/lexer.h src/fspec/lexer.c +fspec-validator.a: src/ragel/ragel.h src/fspec/validator.h src/fspec/validator.c -dec2bin: src/utils/dec2bin.c - $(LINK.c) $(filter %.c,$^) $(LDLIBS) -o $@ +fspec-dump: private CPPFLAGS += $(shell pkg-config --cflags-only-I squash-0.8) +fspec-dump: private LDLIBS += $(shell pkg-config --libs-only-l squash-0.8) +fspec-dump: src/dump.c fspec-ragel.a fspec-bcode.a fspec-lexer.a fspec-validator.a -xidec: src/xi/xidec.c - $(LINK.c) $(filter %.c,$^) $(LDLIBS) -o $@ +dec2bin: src/bin/misc/dec2bin.c -xi2path: src/xi/xi2path.c - $(LINK.c) $(filter %.c,$^) $(LDLIBS) -o $@ +xidec: src/bin/xi/xidec.c +xi2path: src/bin/xi/xi2path.c +xils: src/bin/xi/xils.c +xifile: src/bin/xi/xifile.c -xils: src/xi/xils.c - $(LINK.c) $(filter %.c,$^) $(LDLIBS) -o $@ +uneaf: private LDLIBS += $(shell pkg-config --libs-only-l zlib) +uneaf: src/bin/fw/uneaf.c -xifile: src/xi/xifile.c - $(LINK.c) $(filter %.c,$^) $(LDLIBS) -o $@ +install-bin: $(bins) + install -dm755 "$(DESTDIR)$(PREFIX)$(bindir)" + install -m755 $^ "$(DESTDIR)$(PREFIX)$(bindir)/" -install: - install -Dm755 $(DESTDIR)$(PREFIX)$(bindir)/fspec-dump - install -Dm755 $(DESTDIR)$(PREFIX)$(bindir)/dec2bin - install -Dm755 $(DESTDIR)$(PREFIX)$(bindir)/xidec - install -Dm755 $(DESTDIR)$(PREFIX)$(bindir)/xi2path - install -Dm755 $(DESTDIR)$(PREFIX)$(bindir)/xils - install -Dm755 $(DESTDIR)$(PREFIX)$(bindir)/xifile +install: install-bin clean: - $(RM) src/ragel/fspec.c - $(RM) fspec-dump dec2bin xidec xi2path xils xifile + $(RM) src/ragel/ragel.c src/fspec/lexer.c src/fspec/validator.c + $(RM) $(bins) *.a .PHONY: all clean install diff --git a/spec/ability.fspec b/spec/ability.fspec index 3c2c890..4498d69 100644 --- a/spec/ability.fspec +++ b/spec/ability.fspec @@ -1,11 +1,14 @@ -// Abilities struct ability { - u16 index; - u16 icon_id; - u16 mp_cost; - u16 unknown; - u16 targets; - u8 name[32] = sjis; // The kind actually depends on ROM section - u8 description[256] = sjis; // ^ Ditto, we probably can't express this - u8 padding[726] = pad; + index: u16; + icon_id: u16; + mp_cost: u16; + unknown: u16; + targets: u16; + name: u8[32] | encoding('sjis') str; // The encoding actually depends on ROM region + description: u8[256] | encoding('sjis') str; // ^ Ditto, we can't express this (we need parser options) + padding: u8[726] nul; +}; + +struct dat { + ability: struct ability[$]; }; diff --git a/spec/eaf.fspec b/spec/eaf.fspec new file mode 100644 index 0000000..e9c5702 --- /dev/null +++ b/spec/eaf.fspec @@ -0,0 +1,17 @@ +struct file { + path: u8[256] | encoding('ascii') str; + offset: u64; + size: u64; + padding: u8[16] nul; +}; + +struct eaf { + header: u8[4] | matches('#EAF') str; + major: u16; + minor: u16; + size: u64; + count: u32; + unknown: u64; + padding: u8[100] nul; + files: struct file[count]; +}; diff --git a/spec/emz.fspec b/spec/emz.fspec new file mode 100644 index 0000000..0fe02a1 --- /dev/null +++ b/spec/emz.fspec @@ -0,0 +1,7 @@ +struct emz { + header: u8[4] | matches('#EMZ') str; + unknown: u32 hex; // most likely redunancy check (crc32?) + size: u32; + offset: u32; // always 16? + data: u8[$] | compression('deflate', size) hex; +}; diff --git a/spec/ftable.fspec b/spec/ftable.fspec index 615b7b3..39fdd26 100644 --- a/spec/ftable.fspec +++ b/spec/ftable.fspec @@ -1,3 +1,3 @@ struct ftable { - u16 id; + id: u16[$] hex; }; diff --git a/spec/name.fspec b/spec/name.fspec index 69f75de..d4e0f7c 100644 --- a/spec/name.fspec +++ b/spec/name.fspec @@ -1,5 +1,8 @@ -// NPC IDs -struct name = { - u8 name[28] = ascii; // The kind actually depends on ROM section - u32 id; +struct name { + name: u8[28] | encoding('ascii') str; // The encoding actually depends on ROM region + id: u32; +}; + +struct dat { + name: struct name[$]; }; diff --git a/spec/spell.fspec b/spec/spell.fspec index f65b5ad..68aa5fb 100644 --- a/spec/spell.fspec +++ b/spec/spell.fspec @@ -1,18 +1,22 @@ struct spell { - u16 index; - u16 type; // 1-6 for White/Black/Summon/Ninja/Bard/Blue - u16 element; - u16 targets; - u16 skill; - u16 mp_cost; - u8 casting_time; // in quarter of seconds - u8 recast_delay; // in quarter of seconds - u8 level[24]; // 1 byte per job, 0xxFF if not learnable, first slot is NONE job so always 0xFF - u16 id; // 0 for "unused" spells; often, but not always, equal to index - u8 unknown; - u8 jp_name[20] = sjis; - u8 en_name[20] = ascii; - u8 jp_description[128] = sjis; - u8 en_description[128] = ascii; - u8 padding[687] = pad; + index: u16; + type: u16; // 1-6 for White/Black/Summon/Ninja/Bard/Blue + element: u16; + targets: u16; + skill: u16; + mp_cost: u16; + casting_time: u8; // in quarter of seconds + recast_delay: u8; // in quarter of seconds + level: u8[24] hex; // 1 byte per job, 0xxFF if not learnable, first slot is NONE job so always 0xFF + id: u16; // 0 for "unused" spells; often, but not always, equal to index + unknown: u8; + jp_name: u8[20] | encoding('sjis') str; + en_name: u8[20] | encoding('ascii') str; + jp_description: u8[128] | encoding('sjis') str; + en_description: u8[128] | encoding('ascii') str; + padding: u8[687] nul; +}; + +struct dat { + spell: struct spell[$]; }; diff --git a/spec/vtable.fspec b/spec/vtable.fspec index 0fc8701..de281b8 100644 --- a/spec/vtable.fspec +++ b/spec/vtable.fspec @@ -1,3 +1,3 @@ struct vtable { - u8 exist; + exist: u8[$] hex; }; diff --git a/src/bin/fw/uneaf.c b/src/bin/fw/uneaf.c new file mode 100644 index 0000000..f23c6d6 --- /dev/null +++ b/src/bin/fw/uneaf.c @@ -0,0 +1,208 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <assert.h> +#include <err.h> +#include <sys/stat.h> +#include <zlib.h> + +static const char *stdin_name = "/dev/stdin"; + +int ZEXPORT uncompress2 (dest, destLen, source, sourceLen) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong *sourceLen; +{ + z_stream stream; + int err; + const uInt max = (uInt)-1; + uLong len, left; + Byte buf[1]; /* for detection of incomplete stream when *destLen == 0 */ + + len = *sourceLen; + if (*destLen) { + left = *destLen; + *destLen = 0; + } + else { + left = 1; + dest = buf; + } + + stream.next_in = (z_const Bytef *)source; + stream.avail_in = 0; + stream.zalloc = (alloc_func)0; + stream.zfree = (free_func)0; + stream.opaque = (voidpf)0; + + err = inflateInit2(&stream, -15); + if (err != Z_OK) return err; + + stream.next_out = dest; + stream.avail_out = 0; + + do { + if (stream.avail_out == 0) { + stream.avail_out = left > (uLong)max ? max : (uInt)left; + left -= stream.avail_out; + } + if (stream.avail_in == 0) { + stream.avail_in = len > (uLong)max ? max : (uInt)len; + len -= stream.avail_in; + } + err = inflate(&stream, Z_NO_FLUSH); + } while (err == Z_OK); + + *sourceLen -= len + stream.avail_in; + if (dest != buf) + *destLen = stream.total_out; + else if (stream.total_out && err == Z_BUF_ERROR) + left = 1; + + inflateEnd(&stream); + return err == Z_STREAM_END ? Z_OK : + err == Z_NEED_DICT ? Z_DATA_ERROR : + err == Z_BUF_ERROR && left + stream.avail_out ? Z_DATA_ERROR : + err; +} + +static void +zdeflate(const uint8_t *buf, const size_t buf_sz, uint8_t **out_dec, size_t *inout_dec_sz) +{ + uLongf dsize = (*inout_dec_sz ? *inout_dec_sz : buf_sz * 2), bsize; + int ret = Z_OK; + + do { + if (!(*out_dec = realloc(*out_dec, (bsize = dsize)))) + err(EXIT_FAILURE, "realloc(%zu)", dsize); + dsize *= 2; + } while ((ret = uncompress(*out_dec, &bsize, buf, buf_sz)) == Z_BUF_ERROR && !*inout_dec_sz); + + if (ret != Z_OK) + errx(EXIT_FAILURE, "uncompress(%zu, %zu) == %d", (size_t)(dsize / 2), buf_sz, ret); + + *inout_dec_sz = bsize; +} + +static FILE* +fopen_or_die(const char *path, const char *mode) +{ + assert(path && mode); + + FILE *f; + if (!(f = fopen(path, mode))) + err(EXIT_FAILURE, "fopen(%s, %s)", path, mode); + + return f; +} + +static void +mkdirp(const char *path) +{ + assert(path); + for (const char *s = path; *s; ++s) { + if (*s != '/') + continue; + + *(char*)s = 0; + mkdir(path, 0755); + *(char*)s = '/'; + } +} + +static void +write_data_to(const uint8_t *data, const size_t size, const char *path) +{ + assert(data && path); + mkdirp(path); + FILE *f = fopen_or_die(path, "wb"); + + struct header { + uint8_t magic[4]; + uint32_t unknown; + uint32_t size; + uint32_t offset; + } __attribute__((packed)) header; + + memcpy(&header, data, sizeof(header)); + warnx("%s", path); + + if (!memcmp(header.magic, "#EMZ", sizeof(header.magic))) { + uint8_t *buf = NULL; + size_t dec_size = header.size; + zdeflate(data + header.offset, size - header.offset, &buf, &dec_size); + fwrite(buf, 1, dec_size, f); + free(buf); + } else { + fwrite(data, 1, size, f); + } + + fclose(f); +} + +static void +unpack(const char *path, const char *outdir) +{ + assert(path); + const char *name = (!strcmp(path, "-") ? stdin_name : path); + FILE *f = (name == stdin_name ? stdin : fopen_or_die(name, "rb")); + + struct header { + uint8_t magic[4]; + uint16_t major, minor; + uint64_t size; + uint32_t count; + uint64_t unknown; + uint8_t padding[100]; + } __attribute__((packed)) header; + + if (fread(&header, 1, sizeof(header), f) != sizeof(header)) + err(EXIT_FAILURE, "fread(%zu)", sizeof(header)); + + if (memcmp(header.magic, "#EAF", sizeof(header.magic))) + errx(EXIT_FAILURE, "'%s' is not a #EAF file", name); + + for (size_t i = 0; i < header.count; ++i) { + struct file { + char path[256]; + uint64_t offset, size; + uint8_t padding[16]; + } __attribute__((packed)) file; + + if (fread(&file, 1, sizeof(file), f) != sizeof(file)) + err(EXIT_FAILURE, "fread(%zu)", sizeof(file)); + + fpos_t pos; + fgetpos(f, &pos); + + uint8_t *data; + if (!(data = malloc(file.size))) + err(EXIT_FAILURE, "malloc(%zu)", file.size); + + fseek(f, file.offset, SEEK_SET); + if (fread(data, 1, file.size, f) != file.size) + err(EXIT_FAILURE, "fread(%zu)", file.size); + + char path[4096]; + snprintf(path, sizeof(path), "%s/%s", outdir, file.path); + write_data_to(data, file.size, path); + free(data); + fsetpos(f, &pos); + } + + fclose(f); +} + +int +main(int argc, char *argv[]) +{ + if (argc < 3) + errx(EXIT_FAILURE, "usage: %s outdir file ...", argv[0]); + + for (int i = 2; i < argc; ++i) + unpack(argv[i], argv[1]); + + return EXIT_SUCCESS; +} diff --git a/src/utils/dec2bin.c b/src/bin/misc/dec2bin.c index 97e59bb..be1dd5e 100644 --- a/src/utils/dec2bin.c +++ b/src/bin/misc/dec2bin.c @@ -8,7 +8,7 @@ int main(int argc, char *argv[]) { if (argc < 3) - errx(EXIT_FAILURE, "usage: %s <u8|u16|u32|u64> number\n", argv[0]); + errx(EXIT_FAILURE, "usage: %s <u8|u16|u32|u64> number", argv[0]); const struct { const char *t; diff --git a/src/xi/xi2path.c b/src/bin/xi/xi2path.c index bd9c702..4b4c519 100644 --- a/src/xi/xi2path.c +++ b/src/bin/xi/xi2path.c @@ -7,7 +7,7 @@ int main(int argc, char *argv[]) { if (argc < 2) - errx(EXIT_FAILURE, "usage: %s id\n", argv[0]); + errx(EXIT_FAILURE, "usage: %s id", argv[0]); char path[12]; xi2path(path, strtol(argv[1], NULL, 10)); diff --git a/src/xi/xi2path.h b/src/bin/xi/xi2path.h index 954c554..954c554 100644 --- a/src/xi/xi2path.h +++ b/src/bin/xi/xi2path.h diff --git a/src/xi/xidec.c b/src/bin/xi/xidec.c index 3df917f..cb4c1bb 100644 --- a/src/xi/xidec.c +++ b/src/bin/xi/xidec.c @@ -93,7 +93,7 @@ int main(int argc, char *argv[]) { if (argc < 2) - errx(EXIT_FAILURE, "usage: %s (name | ability | spell | item | text) < data\n", argv[0]); + errx(EXIT_FAILURE, "usage: %s (name | ability | spell | item | text) < data", argv[0]); #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) diff --git a/src/xi/xifile.c b/src/bin/xi/xifile.c index f1b2111..fb7f2f7 100644 --- a/src/xi/xifile.c +++ b/src/bin/xi/xifile.c @@ -153,6 +153,7 @@ detect(const char *path) printf("%s: unknown\n", name); } } + fclose(f); } @@ -160,7 +161,7 @@ int main(int argc, char *argv[]) { if (argc < 2) - errx(EXIT_FAILURE, "usage: %s file\n", argv[0]); + errx(EXIT_FAILURE, "usage: %s file ...", argv[0]); for (int i = 1; i < argc; ++i) detect(argv[i]); diff --git a/src/xi/xils.c b/src/bin/xi/xils.c index 9c9a75e..b29b54b 100644 --- a/src/xi/xils.c +++ b/src/bin/xi/xils.c @@ -78,7 +78,7 @@ main(int argc, char *argv[]) } if (!gamedir) - errx(EXIT_FAILURE, "usage: %s [-a|-v] gamedir\n", argv[0]); + errx(EXIT_FAILURE, "usage: %s [-a|-v] gamedir", argv[0]); dump_tables(gamedir, (const char*[]){ "FTABLE.DAT", "VTABLE.DAT" }, 1, print_all, verbose); @@ -6,13 +6,19 @@ #include <err.h> #include <iconv.h> +#include <errno.h> #include <locale.h> #include <langinfo.h> +#include <squash.h> -#include "ragel/fspec.h" +#include <fspec/bcode.h> +#include <fspec/lexer.h> +#include <fspec/validator.h> + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) static size_t -to_hex(const char *buf, const size_t buf_sz, char *out, const size_t out_sz, const bool reverse) +to_hex(const uint8_t *buf, const size_t buf_sz, char *out, const size_t out_sz, const bool reverse) { assert(out); const char nibble[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; @@ -35,234 +41,688 @@ to_hex(const char *buf, const size_t buf_sz, char *out, const size_t out_sz, con } static void -print_decimal(const char *buf, const bool is_signed, const size_t size, const size_t nmemb) +print_dec(const uint8_t *buf, const size_t size, const bool is_signed) { - if (nmemb > 1) - printf("{ "); + char hex[2 * sizeof(fspec_num) + 1]; + to_hex(buf, size, hex, sizeof(hex), true); - for (size_t n = 0; n < nmemb; ++n) { - char hex[2 * sizeof(uint64_t) + 1]; - to_hex(buf + size * n, size, hex, sizeof(hex), true); - const char *delim = (nmemb > 1 && n + 1 < nmemb ? ", " : ""); - - if (is_signed) { - printf("%ld%s", (int64_t)strtoll(hex, NULL, 16), delim); - } else { - printf("%lu%s", (uint64_t)strtoull(hex, NULL, 16), delim); - } + static_assert(sizeof(fspec_num) <= sizeof(uint64_t), "fspec_num is larger than uint64_t"); + + if (is_signed) { + printf("%ld", (int64_t)strtoll(hex, NULL, 16)); + } else { + printf("%lu", (uint64_t)strtoull(hex, NULL, 16)); } +} + +static void +print_udec(const uint8_t *buf, const size_t size) +{ + print_dec(buf, size, false); +} + +static void +print_sdec(const uint8_t *buf, const size_t size) +{ + print_dec(buf, size, true); +} - printf("%s\n", (nmemb > 1 ? " }" : "")); +static void +print_hex(const uint8_t *buf, const size_t size) +{ + char hex[2 * sizeof(fspec_num) + 1]; + to_hex(buf, size, hex, sizeof(hex), false); + printf("%s", hex); } static void -print_hex(const char *buf, const size_t size, const size_t nmemb) +print_array(const uint8_t *buf, const size_t size, const size_t nmemb, void (*fun)(const uint8_t *buf, const size_t size)) { - if (nmemb > 1) + const int indent = 4; + if (nmemb > 8) { + printf("{\n%*s", indent, ""); + } else if (nmemb > 1) { printf("{ "); + } for (size_t n = 0; n < nmemb; ++n) { - char hex[2 * sizeof(uint64_t) + 1]; - to_hex(buf + size * n, size, hex, sizeof(hex), false); - printf("%s%s", hex, (nmemb > 1 && n + 1 < nmemb ? ", " : "")); + fun(buf + n * size, size); + printf("%s", (nmemb > 1 && n + 1 < nmemb ? ", " : "")); + + if (!((n + 1) % 8)) + printf("\n%*s", indent, ""); } - printf("%s\n", (nmemb > 1 ? " }" : "")); + printf("%s\n", (nmemb > 8 ? "\n}" : (nmemb > 1 ? " }" : ""))); } static void -print_chars(const char *buf, const size_t size, const size_t nmemb) +print_str(const char *buf, const size_t size, const size_t nmemb) { - assert(size == sizeof(char)); + const bool has_nl = memchr(buf, '\n', size * nmemb); + if (has_nl) + puts("```"); - for (size_t n = 0; n < nmemb && buf[n] != 0; ++n) + for (size_t n = 0; n < size * nmemb && buf[n] != 0; ++n) printf("%c", buf[n]); + + puts((has_nl ? "```" : "")); } +struct code { + const enum fspec_op *start, *end, *data; +}; + static void -print_encoded(const char *buf, const char *from, const char *to, const size_t size, const size_t nmemb) +dump_ops(const struct code *code) { - assert(from && size == sizeof(char)); + for (const enum fspec_op *op = code->start; op; op = fspec_op_next(op, code->end, false)) { + printf("%*s- ", (*op == FSPEC_OP_ARG ? 2 : 0), ""); + switch (*op) { + case FSPEC_OP_HEADER: + printf("header\n"); + break; + + case FSPEC_OP_DECLARATION: + printf("declaration\n"); + break; + + case FSPEC_OP_READ: + printf("read\n"); + break; + + case FSPEC_OP_GOTO: + printf("goto\n"); + break; + + case FSPEC_OP_FILTER: + printf("filter\n"); + break; + + case FSPEC_OP_VISUAL: + printf("visual\n"); + break; + + case FSPEC_OP_ARG: + { + const enum fspec_arg *arg = (void*)(op + 1); + printf("arg "); + switch (*arg) { + case FSPEC_ARG_STR: + printf("str %s\n", fspec_arg_get_cstr(arg, code->data)); + break; + + case FSPEC_ARG_VAR: + printf("var %" PRI_FSPEC_NUM "\n", fspec_arg_get_num(arg)); + break; + + case FSPEC_ARG_NUM: + printf("num %" PRI_FSPEC_NUM "\n", fspec_arg_get_num(arg)); + break; + + case FSPEC_ARG_OFF: + printf("off %" PRI_FSPEC_NUM "\n", fspec_arg_get_num(arg)); + break; + + case FSPEC_ARG_DAT: + printf("dat %" PRI_FSPEC_NUM "\n", fspec_arg_get_num(arg)); + break; + + case FSPEC_ARG_EOF: + printf("eof\n"); + break; + + case FSPEC_ARG_LAST: + break; + } + } + break; - if (!to) { - static const char *sys_encoding; - if (!sys_encoding) { - setlocale(LC_ALL, ""); - sys_encoding = nl_langinfo(CODESET); + case FSPEC_OP_LAST: + break; } + } +} - to = sys_encoding; +static const enum fspec_op* +get_last_struct(const struct code *code) +{ + const enum fspec_op *last = NULL; + for (const enum fspec_op *op = code->start; op; op = fspec_op_next(op, code->end, true)) { + const enum fspec_arg *arg; + if (*op == FSPEC_OP_DECLARATION && + (arg = fspec_op_get_arg(op, code->end, 1, 1<<FSPEC_ARG_NUM)) && + fspec_arg_get_num(arg) == FSPEC_DECLARATION_STRUCT) { + last = op; + } } + return last; +} - iconv_t iv; - if ((iv = iconv_open(to, from)) == (iconv_t)-1) - err(EXIT_FAILURE, "iconv_open(%s, %s)", to, from); +struct dynbuf { + void *data; + size_t len, written; +}; - const char *in = buf; - size_t in_left = nmemb; - do { - char enc[1024], *out = enc; - size_t out_left = sizeof(enc); +static inline void +dynbuf_resize(struct dynbuf *buf, const size_t size) +{ + assert(buf); + if (!(buf->data = realloc(buf->data, size))) + err(EXIT_FAILURE, "realloc(%zu)", size); - if (iconv(iv, (char**)&in, &in_left, &out, &out_left) == (size_t)-1) - err(EXIT_FAILURE, "iconv(%s, %s)", to, from); + buf->len = size; +} - print_chars(enc, 1, sizeof(enc) - out_left); - } while (in_left > 0); +static inline void +dynbuf_resize_if_needed(struct dynbuf *buf, const size_t size) +{ + if (buf->len >= size) + return; - iconv_close(iv); - puts(""); + dynbuf_resize(buf, size); } -struct container; -struct field { - struct fspec_field f; - struct container *c, *link; - uint64_t value; -}; +static inline void +dynbuf_grow_if_needed(struct dynbuf *buf, const size_t nmemb) +{ + assert(buf); + if (buf->len >= nmemb && buf->written <= buf->len - nmemb) + return; -struct container { - struct fspec_container c; - struct field fields[255]; - size_t num_fields; -}; + dynbuf_resize(buf, buf->written + nmemb); +} -static size_t -field_get_buffer(const struct field *field, FILE *f, char **buf) +static inline void +dynbuf_append(struct dynbuf *buf, const void *data, const size_t data_sz) { - assert(field && f && buf); - - switch (field->f.array.type) { - case FSPEC_ARRAY_FIXED: - if (!(*buf = calloc(field->f.array.nmemb, field->f.type.size))) - err(EXIT_FAILURE, "calloc(%zu, %zu)", field->f.array.nmemb, field->f.type.size); + dynbuf_grow_if_needed(buf, data_sz); + memcpy((char*)buf->data + buf->written, data, data_sz); + buf->written += data_sz; + assert(buf->written <= buf->len); +} - if (fread(*buf, field->f.type.size, field->f.array.nmemb, f) != field->f.array.nmemb) - return 0; +static inline void +dynbuf_reset(struct dynbuf *buf) +{ + assert(buf); + buf->written = 0; +} - return field->f.array.nmemb; +static inline void +dynbuf_release(struct dynbuf *buf) +{ + assert(buf); + free(buf->data); + *buf = (struct dynbuf){0}; +} - case FSPEC_ARRAY_MATCH: - { - size_t off = 0; - const size_t msz = field->f.array.match.size; - for (size_t len = 0;; ++off) { - if (off >= (len ? len - 1 : len) && !(*buf = realloc(*buf, (len += 1024)))) - err(EXIT_FAILURE, "realloc(%zu)", len); +static void +display(const void *buf, const size_t size, const size_t nmemb, const bool is_signed, const enum fspec_visual visual) +{ + switch (visual) { + case FSPEC_VISUAL_NUL: + puts("..."); + break; - assert(off < len); - if (fread(*buf + off, 1, 1, f) != 1) - return 0; + case FSPEC_VISUAL_STR: + print_str(buf, size, nmemb); + break; - if (off >= msz && !memcmp(field->f.array.match.data, *buf + off - msz, msz)) - break; - } + case FSPEC_VISUAL_HEX: + print_array(buf, size, nmemb, print_hex); + break; - (*buf)[off] = 0; - return off; - } + case FSPEC_VISUAL_DEC: + print_array(buf, size, nmemb, (is_signed ? print_sdec : print_udec)); break; - case FSPEC_ARRAY_VAR: - for (size_t i = 0; i < field->c->num_fields; ++i) { - if (!strcmp(field->c->fields[i].f.name, field->f.array.var)) - return field->c->fields[i].value; - } + case FSPEC_VISUAL_LAST: break; } - - return 0; } +struct decl { + struct dynbuf buf; + const char *name; + const void *start, *end; + size_t nmemb; + uint8_t size; + enum fspec_visual visual; + enum fspec_declaration declaration; +}; + static void -container_process(struct container *container, FILE *f); +decl_display(const struct decl *decl) +{ + assert(decl); + assert(decl->size * decl->nmemb <= decl->buf.len); + printf("%s: ", decl->name); + display(decl->buf.data, decl->size, decl->nmemb, false, decl->visual); +} + +static fspec_num +decl_get_num(const struct decl *decl) +{ + assert(decl); + assert(decl->nmemb == 1); + assert(decl->size * decl->nmemb <= decl->buf.len); + char hex[2 * sizeof(fspec_num) + 1]; + to_hex(decl->buf.data, decl->size, hex, sizeof(hex), true); + static_assert(sizeof(fspec_num) <= sizeof(uint64_t), "fspec_num is larger than uint64_t"); + return (fspec_num)strtoull(hex, NULL, 16); +} + +static const char* +decl_get_cstr(const struct decl *decl) +{ + assert(decl); + return decl->buf.data; +} + +struct context { + struct code code; + struct decl *decl; + fspec_num decl_count; +}; + +static fspec_num +var_get_num(const struct context *context, const enum fspec_arg *arg) +{ + assert(context && arg); + return decl_get_num(&context->decl[fspec_arg_get_num(arg)]); +} + +static const char* +var_get_cstr(const struct context *context, const enum fspec_arg *arg) +{ + assert(context && arg); + return decl_get_cstr(&context->decl[fspec_arg_get_num(arg)]); +} + +enum type { + TYPE_NUM, + TYPE_STR, +}; + +static enum type +var_get_type(const struct context *context, const enum fspec_arg *arg) +{ + assert(context && arg); + const struct decl *decl = &context->decl[fspec_arg_get_num(arg)]; + switch (decl->visual) { + case FSPEC_VISUAL_DEC: + case FSPEC_VISUAL_HEX: + case FSPEC_VISUAL_NUL: + return TYPE_NUM; + + case FSPEC_VISUAL_STR: + return TYPE_STR; + + case FSPEC_VISUAL_LAST: + break; + } + return ~0; +} static void -field_process(struct field *field, FILE *f) +filter_decompress(const struct context *context, struct decl *decl) { - assert(field && f); + assert(decl); - char *buf = NULL; - const size_t nmemb = field_get_buffer(field, f, &buf); + const enum fspec_arg *arg; + if (!(arg = fspec_op_get_arg(context->code.start, context->code.end, 2, 1<<FSPEC_ARG_STR))) + errx(EXIT_FAILURE, "missing compression"); - if (field->link) { - for (size_t i = 0; i < nmemb; ++i) - container_process(field->link, f); - } else { - printf("%s(%zu) %s[%zu] = ", field->f.type.name, field->f.type.size, field->f.name, nmemb); + SquashCodec *codec; + const char *algo = fspec_arg_get_cstr(arg, context->code.data); + if (!(codec = squash_get_codec(algo))) + errx(EXIT_FAILURE, "unknown compression '%s'", algo); - if (field->f.kind.flags & FSPEC_KIND_IGNORE) { - puts("..."); - } else if (field->f.kind.flags & FSPEC_KIND_ENCODING) { - print_encoded(buf, field->f.kind.name, NULL, field->f.type.size, nmemb); - } else if (field->f.kind.flags & FSPEC_KIND_HEXADECIMAL) { - print_hex(buf, field->f.type.size, nmemb); - } else { - print_decimal(buf, (field->f.type.flags & FSPEC_TYPE_SIGNED), field->f.type.size, nmemb); + SquashOptions *opts; + if (!(opts = squash_options_new(codec, NULL))) + errx(EXIT_FAILURE, "squash_options_new"); + + size_t dsize = squash_codec_get_uncompressed_size(codec, decl->buf.len, decl->buf.data); + dsize = (dsize ? dsize : decl->buf.len * 2); + + { + const enum fspec_arg *var = arg; + if ((arg = fspec_arg_next(arg, context->code.end, 1, 1<<FSPEC_ARG_NUM | 1<<FSPEC_ARG_VAR))) { + var = arg; + + switch (*var) { + case FSPEC_ARG_NUM: + dsize = fspec_arg_get_num(arg); + break; + + case FSPEC_ARG_VAR: + dsize = var_get_num(context, arg); + break; + + default: + break; + } } - if (nmemb == 1) { - char hex[2 * sizeof(uint64_t) + 1]; - to_hex(buf, field->f.type.size, hex, sizeof(hex), true); - field->value = strtoull(hex, NULL, 16); + for (; (var = fspec_arg_next(var, context->code.end, 1, 1<<FSPEC_ARG_STR));) { + const char *key = fspec_arg_get_cstr(var, context->code.data); + if (!(var = fspec_arg_next(var, context->code.end, 1, ~0))) + errx(EXIT_FAILURE, "expected argument for key '%s'", key); + + switch (*var) { + case FSPEC_ARG_STR: + squash_options_set_string(opts, key, fspec_arg_get_cstr(var, context->code.data)); + break; + + case FSPEC_ARG_NUM: + squash_options_set_int(opts, key, fspec_arg_get_num(var)); + break; + + case FSPEC_ARG_VAR: + if (var_get_type(context, var) == TYPE_STR) { + squash_options_set_string(opts, key, var_get_cstr(context, var)); + } else { + squash_options_set_int(opts, key, var_get_num(context, var)); + } + break; + + default: + break; + } } } - free(buf); + // what a horrible api + squash_object_ref(opts); + + SquashStatus r; + struct dynbuf buf = {0}; + dynbuf_resize(&buf, dsize); + while ((r = squash_codec_decompress_with_options(codec, &buf.len, buf.data, decl->buf.len, decl->buf.data, opts)) == SQUASH_BUFFER_FULL) + dynbuf_resize(&buf, dsize *= 2); + + dynbuf_resize_if_needed(&buf, (buf.written = buf.len)); + squash_object_unref(opts); + + if (r != SQUASH_OK) + errx(EXIT_FAILURE, "squash_codec_decompress(%zu, %zu) = %d: %s", dsize, decl->buf.len, r, squash_status_to_string(r)); + + dynbuf_release(&decl->buf); + decl->buf = buf; + decl->nmemb = buf.len / decl->size; } static void -container_process(struct container *container, FILE *f) +filter_decode(const struct context *context, struct decl *decl) { - assert(container && f); + assert(decl); - for (size_t i = 0; i < container->num_fields; ++i) - field_process(&container->fields[i], f); -} + const enum fspec_arg *arg; + if (!(arg = fspec_op_get_arg(context->code.start, context->code.end, 2, 1<<FSPEC_ARG_STR))) + errx(EXIT_FAILURE, "missing encoding"); -#define container_of(ptr, type, member) ((type *)((char *)(1 ? (ptr) : &((type *)0)->member) - offsetof(type, member))) + const char *encoding = fspec_arg_get_cstr(arg, context->code.data); -struct fspec_file { - // TODO: Rethink container/field - // I think I want just flat structure of key / value pairs in the end - // Especially if I want to express members of struct members (e.g. struct a { struct b b; u8 c[b.x]; };) - struct container containers[32]; - struct fspec fspec; - FILE *handle; - size_t num_containers; -}; + static const char *sys_encoding; + if (!sys_encoding) { + setlocale(LC_ALL, ""); + sys_encoding = nl_langinfo(CODESET); + } + + iconv_t iv; + if ((iv = iconv_open(sys_encoding, encoding)) == (iconv_t)-1) + err(EXIT_FAILURE, "iconv_open(%s, %s)", sys_encoding, encoding); + + struct dynbuf buf = {0}; + const uint8_t *in = decl->buf.data; + size_t in_left = decl->buf.written; + do { + char enc[1024], *out = enc; + size_t out_left = sizeof(enc); + + errno = 0; + if (iconv(iv, (char**)&in, &in_left, &out, &out_left) == (size_t)-1 && errno != E2BIG) + err(EXIT_FAILURE, "iconv(%s, %s)", sys_encoding, encoding); + + dynbuf_append(&buf, enc, sizeof(enc) - out_left); + } while (in_left > 0); + + iconv_close(iv); + + dynbuf_release(&decl->buf); + decl->buf = buf; + decl->nmemb = buf.len / decl->size; +} static void -fspec_field(struct fspec *fspec, const struct fspec_container *container, const struct fspec_field *field) +call(const struct context *context, FILE *f) { - assert(fspec && container); - struct fspec_file *f = container_of(fspec, struct fspec_file, fspec); + assert(context && f); - if (!f->num_containers || memcmp(container, &f->containers[f->num_containers - 1].c, sizeof(*container))) - f->containers[f->num_containers++].c = *container; + struct decl *decl = NULL; + for (const enum fspec_op *op = context->code.start; op; op = fspec_op_next(op, context->code.end, true)) { + if (decl && op == decl->end) { + decl_display(decl); + decl = NULL; + } - struct container *c = &f->containers[f->num_containers - 1]; + switch (*op) { + case FSPEC_OP_DECLARATION: + { + const enum fspec_arg *arg; + arg = fspec_op_get_arg(op, context->code.end, 2, 1<<FSPEC_ARG_NUM); + decl = &context->decl[fspec_arg_get_num(arg)]; + dynbuf_reset(&decl->buf); + } + break; + + case FSPEC_OP_READ: + { + assert(decl); + const enum fspec_arg *arg = fspec_op_get_arg(op, context->code.end, 1, 1<<FSPEC_ARG_NUM); + static_assert(CHAR_BIT == 8, "doesn't work otherwere right now"); + decl->size = fspec_arg_get_num(arg) / 8; + decl->nmemb = 0; + + for (const enum fspec_arg *var = arg; (var = fspec_arg_next(var, context->code.end, 1, ~0));) { + switch (*var) { + case FSPEC_ARG_NUM: + case FSPEC_ARG_VAR: + { + const fspec_num v = (*var == FSPEC_ARG_NUM ? fspec_arg_get_num(var) : var_get_num(context, var)); + if (v == 0) { + goto noop; + } else if (v > 1) { + const size_t nmemb = (decl->nmemb ? decl->nmemb : 1) * v; + dynbuf_grow_if_needed(&decl->buf, decl->size * nmemb); + const size_t read = fread((char*)decl->buf.data + decl->buf.written, decl->size, nmemb, f); + decl->buf.written += decl->size * read; + decl->nmemb += read; + } + } + break; + + case FSPEC_ARG_STR: + break; + + case FSPEC_ARG_EOF: + { + const size_t nmemb = (decl->nmemb ? decl->nmemb : 1); + size_t read = 0, r = nmemb; + while (r == nmemb) { + dynbuf_grow_if_needed(&decl->buf, decl->size * nmemb); + read += (r = fread((char*)decl->buf.data + decl->buf.written, decl->size, nmemb, f)); + decl->buf.written += decl->size * r; + }; + decl->nmemb += read; + } + break; + + default: + break; + } + } +noop: + + if (!fspec_arg_next(arg, context->code.end, 1, ~0)) { + dynbuf_grow_if_needed(&decl->buf, decl->size * 1); + const size_t read = fread((char*)decl->buf.data + decl->buf.written, decl->size, 1, f); + decl->buf.written += decl->size * read; + decl->nmemb = read; + } + } + break; + + case FSPEC_OP_GOTO: + { + decl = NULL; + const enum fspec_arg *arg = fspec_op_get_arg(op, context->code.end, 1, 1<<FSPEC_ARG_VAR); + const struct decl *d = &context->decl[fspec_arg_get_num(arg)]; + struct context c = *context; + c.code.start = d->start; + c.code.end = d->end; + + for (const enum fspec_arg *var = arg; (var = fspec_arg_next(var, context->code.end, 1, ~0));) { + switch (*var) { + case FSPEC_ARG_NUM: + case FSPEC_ARG_VAR: + { + const fspec_num v = (*var == FSPEC_ARG_NUM ? fspec_arg_get_num(var) : var_get_num(context, var)); + for (fspec_num i = 0; i < v; ++i) + call(&c, f); + } + break; + + // XXX: How to handle STR with stdin? + // With fseek would be easy. + case FSPEC_ARG_STR: + break; + + case FSPEC_ARG_EOF: + while (!feof(f)) + call(&c, f); + break; + + default: + break; + } + } + + if (!fspec_arg_next(arg, context->code.end, 1, ~0)) + call(&c, f); + } + break; + + case FSPEC_OP_FILTER: + { + assert(decl); + const enum fspec_arg *arg = fspec_op_get_arg(op, context->code.end, 1, 1<<FSPEC_ARG_STR); + + const struct { + const char *name; + void (*fun)(const struct context*, struct decl*); + } map[] = { + { .name = "encoding", .fun = filter_decode }, + { .name = "compression", .fun = filter_decompress }, + }; + + const char *filter = fspec_arg_get_cstr(arg, context->code.data); + for (size_t i = 0; i < ARRAY_SIZE(map); ++i) { + if (!strcmp(filter, map[i].name)) { + struct context c = *context; + c.code.start = op; + map[i].fun(&c, decl); + break; + } + + if (i == ARRAY_SIZE(map) - 1) + warnx("unknown filter '%s'", filter); + } + } + break; - if (field->type.flags & FSPEC_TYPE_CONTAINER) { - for (size_t i = 0; i < f->num_containers - 1; ++i) { - if (strcmp(field->type.name, f->containers[i].c.name)) - continue; + case FSPEC_OP_VISUAL: + { + assert(decl); + const enum fspec_arg *arg = fspec_op_get_arg(op, context->code.end, 1, 1<<FSPEC_ARG_NUM); + decl->visual = fspec_arg_get_num(arg); + } + break; - c->fields[c->num_fields].link = &f->containers[i]; - break; + case FSPEC_OP_ARG: + case FSPEC_OP_HEADER: + case FSPEC_OP_LAST: + break; } } - c->fields[c->num_fields].c = c; - c->fields[c->num_fields++].f = *field; + if (decl && context->code.end == decl->end) + decl_display(decl); } -static size_t -fspec_read(struct fspec *fspec, char *buf, const size_t size, const size_t nmemb) +static void +setup(const struct context *context) { - assert(fspec && buf); - struct fspec_file *f = container_of(fspec, struct fspec_file, fspec); - return fread(buf, size, nmemb, f->handle); + assert(context); + + for (const enum fspec_op *op = context->code.start; op; op = fspec_op_next(op, context->code.end, true)) { + switch (*op) { + case FSPEC_OP_DECLARATION: + { + const enum fspec_arg *arg[4]; + arg[0] = fspec_op_get_arg(op, context->code.end, 1, 1<<FSPEC_ARG_NUM); + arg[1] = fspec_arg_next(arg[0], context->code.end, 1, 1<<FSPEC_ARG_NUM); + arg[2] = fspec_arg_next(arg[1], context->code.end, 1, 1<<FSPEC_ARG_OFF); + arg[3] = fspec_arg_next(arg[2], context->code.end, 1, 1<<FSPEC_ARG_STR); + const fspec_num id = fspec_arg_get_num(arg[1]); + struct decl *decl = &context->decl[id]; + decl->declaration = fspec_arg_get_num(arg[0]); + decl->name = fspec_arg_get_cstr(arg[3], context->code.data); + decl->visual = FSPEC_VISUAL_DEC; + decl->start = op; + decl->end = (char*)op + fspec_arg_get_num(arg[2]); + assert(!decl->buf.data); + } + break; + + default: + break; + } + } +} + +static void +execute(const struct fspec_mem *mem) +{ + assert(mem); + + struct context context = { + .code.start = mem->data, + .code.end = (void*)((char*)mem->data + mem->len), + .code.data = mem->data + }; + + printf("output: %zu bytes\n", mem->len); + dump_ops(&context.code); + + const enum fspec_arg *arg = fspec_op_get_arg(context.code.data, context.code.end, 2, 1<<FSPEC_ARG_NUM); + context.decl_count = fspec_arg_get_num(arg); + + if (!(context.decl = calloc(context.decl_count, sizeof(*context.decl)))) + err(EXIT_FAILURE, "calloc(%zu, %zu)", context.decl_count, sizeof(*context.decl)); + + setup(&context); + + puts("\nexecution:"); + context.code.start = get_last_struct(&context.code); + assert(context.code.start); + call(&context, stdin); + + for (fspec_num i = 0; i < context.decl_count; ++i) + dynbuf_release(&context.decl[i].buf); + + free(context.decl); } static FILE* @@ -277,34 +737,70 @@ fopen_or_die(const char *path, const char *mode) return f; } +#define container_of(ptr, type, member) ((type *)((char *)(1 ? (ptr) : &((type *)0)->member) - offsetof(type, member))) + +struct lexer { + struct fspec_lexer lexer; + FILE *file; +}; + +static size_t +fspec_lexer_read(struct fspec_lexer *lexer, void *ptr, const size_t size, const size_t nmemb) +{ + assert(lexer && ptr); + struct lexer *l = container_of(lexer, struct lexer, lexer); + return fread(ptr, size, nmemb, l->file); +} + +static size_t +fspec_validator_read(struct fspec_validator *validator, void *ptr, const size_t size, const size_t nmemb) +{ + assert(validator && ptr); + assert(ptr == validator->mem.input.data); + const size_t read = validator->mem.input.len / size; + assert((validator->mem.input.len && read == nmemb) || (!validator->mem.input.len && !read)); + validator->mem.input.len -= read * size; + assert(validator->mem.input.len == 0); + return read; +} + int main(int argc, const char *argv[]) { if (argc < 2) - errx(EXIT_FAILURE, "usage: %s file.spec < data\n", argv[0]); + errx(EXIT_FAILURE, "usage: %s file.spec < data", argv[0]); + + char output[4096]; + struct fspec_mem bcode = {0}; + + { + char input[4096]; + struct lexer l = { + .lexer = { + .ops.read = fspec_lexer_read, + .mem.input = { .data = input, sizeof(input) }, + .mem.output = { .data = output, sizeof(output) }, + }, + .file = fopen_or_die(argv[1], "rb"), + }; - uint8_t data[4096] = {0}; + if (!fspec_lexer_parse(&l.lexer, argv[1])) + exit(EXIT_FAILURE); - struct fspec_file file = { - .fspec = { - .ops = { - .read = fspec_read, - .field = fspec_field, - }, - .mem = { - .data = data, - .size = sizeof(data), - }, - }, - .handle = fopen_or_die(argv[1], "rb"), - }; + fclose(l.file); + bcode = l.lexer.mem.output; + } - fspec_parse(&file.fspec); + { + struct fspec_validator validator = { + .ops.read = fspec_validator_read, + .mem.input = bcode, + }; - if (!file.num_containers) - errx(EXIT_FAILURE, "'%s' contains no containers", argv[1]); + if (!fspec_validator_parse(&validator, argv[1])) + exit(EXIT_FAILURE); + } - container_process(&file.containers[file.num_containers - 1], stdin); - fclose(file.handle); + execute(&bcode); return EXIT_SUCCESS; } diff --git a/src/fspec/bcode-internal.h b/src/fspec/bcode-internal.h new file mode 100644 index 0000000..8c9ce74 --- /dev/null +++ b/src/fspec/bcode-internal.h @@ -0,0 +1,16 @@ +#pragma once + +#include <inttypes.h> +#include <stdint.h> + +/** maximum size of string literals */ +#define PRI_FSPEC_STRSZ PRIu8 +typedef uint8_t fspec_strsz; + +/** maximum range of variable ids */ +#define PRI_FSPEC_VAR PRIu16 +typedef uint16_t fspec_var; + +/** maximum range of bytecode offsets */ +#define PRI_FSPEC_OFF PRIu32 +typedef uint32_t fspec_off; diff --git a/src/fspec/bcode.c b/src/fspec/bcode.c new file mode 100644 index 0000000..0a89260 --- /dev/null +++ b/src/fspec/bcode.c @@ -0,0 +1,189 @@ +#include <fspec/bcode.h> +#include "bcode-internal.h" + +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <err.h> + +static_assert(sizeof(fspec_off) <= sizeof(((struct fspec_mem*)0)->len), "fspec_off should not be larger than what fspec_mem can represent"); +static_assert(sizeof(enum fspec_op) == sizeof(uint8_t), "enum fspec_op is expected to have size of uint8_t"); +static_assert(sizeof(enum fspec_arg) == sizeof(uint8_t), "enum fspec_arg is expected to have size of uint8_t"); + +static fspec_off +arg_data_len(const enum fspec_arg *arg) +{ + assert(arg); + + switch (*arg) { + case FSPEC_ARG_NUM: + return sizeof(fspec_num); + + case FSPEC_ARG_VAR: + return sizeof(fspec_var); + + case FSPEC_ARG_STR: + case FSPEC_ARG_OFF: + return sizeof(fspec_off); + + case FSPEC_ARG_DAT: + { + struct fspec_mem mem; + fspec_arg_get_mem(arg, NULL, &mem); + return sizeof(fspec_off) + mem.len; + } + + case FSPEC_ARG_EOF: + break; + + case FSPEC_ARG_LAST: + errx(EXIT_FAILURE, "%s: unexpected argument type %u", __func__, *arg); + break; + } + + return 0; +} + +static fspec_off +arg_len(const enum fspec_arg *arg) +{ + return sizeof(*arg) + arg_data_len(arg); +} + +void +fspec_arg_get_mem(const enum fspec_arg *arg, const void *data, struct fspec_mem *out_mem) +{ + assert(arg && out_mem); + + switch (*arg) { + case FSPEC_ARG_STR: + { + assert(data); + fspec_off off; + fspec_strsz len; + memcpy(&off, (char*)arg + sizeof(*arg), sizeof(off)); + memcpy(&len, (char*)data + off, sizeof(len)); + out_mem->data = (char*)data + off + sizeof(len); + out_mem->len = len; + } + break; + + case FSPEC_ARG_DAT: + { + fspec_off len; + memcpy(&len, (char*)arg + sizeof(*arg), sizeof(len)); + out_mem->data = (char*)arg + sizeof(*arg) + sizeof(len); + out_mem->len = len; + } + break; + + case FSPEC_ARG_VAR: + case FSPEC_ARG_NUM: + case FSPEC_ARG_OFF: + out_mem->data = (char*)arg + sizeof(*arg); + out_mem->len = arg_data_len(arg); + break; + + case FSPEC_ARG_EOF: + *out_mem = (struct fspec_mem){0}; + break; + + case FSPEC_ARG_LAST: + errx(EXIT_FAILURE, "%s: unexpected argument type %u", __func__, *arg); + break; + } +} + +fspec_num +fspec_arg_get_num(const enum fspec_arg *arg) +{ + assert(arg); + fspec_num v; + switch (*arg) { + case FSPEC_ARG_NUM: + memcpy(&v, arg + sizeof(*arg), sizeof(v)); + break; + + case FSPEC_ARG_VAR: + { + fspec_var var; + memcpy(&var, arg + sizeof(*arg), sizeof(var)); + v = var; + } + break; + + case FSPEC_ARG_DAT: + case FSPEC_ARG_OFF: + { + fspec_off off; + memcpy(&off, arg + sizeof(*arg), sizeof(off)); + v = off; + } + break; + + case FSPEC_ARG_STR: + case FSPEC_ARG_EOF: + case FSPEC_ARG_LAST: + errx(EXIT_FAILURE, "%s: unexpected argument type %u", __func__, *arg); + break; + } + return v; +} + +const char* +fspec_arg_get_cstr(const enum fspec_arg *arg, const void *data) +{ + assert(arg && *arg == FSPEC_ARG_STR); + struct fspec_mem mem; + fspec_arg_get_mem(arg, data, &mem); + return (const char*)mem.data; +} + +const enum fspec_arg* +fspec_op_get_arg(const enum fspec_op *start, const void *end, const uint8_t nth, const uint32_t expect) +{ + uint8_t i = 0; + const enum fspec_arg *arg = NULL; + for (const enum fspec_op *op = fspec_op_next(start, end, false); op && i < nth; op = fspec_op_next(op, end, false)) { + if (*op != FSPEC_OP_ARG) + return NULL; + + arg = (void*)(op + 1); + assert(*arg >= 0 && *arg < FSPEC_ARG_LAST); + ++i; + } + + if (arg && !(expect & (1<<*arg))) + errx(EXIT_FAILURE, "got unexpected argument of type %u", *arg); + + return arg; +} + +const enum fspec_arg* +fspec_arg_next(const enum fspec_arg *arg, const void *end, const uint8_t nth, const uint32_t expect) +{ + return fspec_op_get_arg((void*)(arg - 1), end, nth, expect); +} + +const enum fspec_op* +fspec_op_next(const enum fspec_op *start, const void *end, const bool skip_args) +{ + assert(start && end); + fspec_off off = sizeof(*start); + if ((void*)start < end && *start == FSPEC_OP_ARG) + off += arg_len((void*)(start + 1)); + + for (const enum fspec_op *op = start + off; (void*)start < end && (void*)op < end; ++op) { + if (*op >= FSPEC_OP_LAST) + errx(EXIT_FAILURE, "got unexected opcode %u", *op); + + if (skip_args && *op == FSPEC_OP_ARG) { + op += arg_len((void*)(op + 1)); + continue; + } + + return op; + } + + return NULL; +} diff --git a/src/fspec/bcode.h b/src/fspec/bcode.h new file mode 100644 index 0000000..d84060e --- /dev/null +++ b/src/fspec/bcode.h @@ -0,0 +1,64 @@ +#pragma once + +#include <fspec/memory.h> + +#include <inttypes.h> +#include <stdint.h> +#include <stdbool.h> + +/** maximum range of numbers */ +#define PRI_FSPEC_NUM PRIu64 +typedef uint64_t fspec_num; + +enum fspec_arg { + FSPEC_ARG_DAT, + FSPEC_ARG_OFF, + FSPEC_ARG_NUM, + FSPEC_ARG_VAR, + FSPEC_ARG_STR, + FSPEC_ARG_EOF, + FSPEC_ARG_LAST, +} __attribute__((packed)); + +void +fspec_arg_get_mem(const enum fspec_arg *arg, const void *data, struct fspec_mem *out_mem); + +fspec_num +fspec_arg_get_num(const enum fspec_arg *arg); + +const char* +fspec_arg_get_cstr(const enum fspec_arg *arg, const void *data); + +const enum fspec_arg* +fspec_arg_next(const enum fspec_arg *arg, const void *end, const uint8_t nth, const uint32_t expect); + +enum fspec_declaration { + FSPEC_DECLARATION_STRUCT, + FSPEC_DECLARATION_MEMBER, + FSPEC_DECLARATION_LAST, +} __attribute__((packed)); + +enum fspec_visual { + FSPEC_VISUAL_NUL, + FSPEC_VISUAL_DEC, + FSPEC_VISUAL_HEX, + FSPEC_VISUAL_STR, + FSPEC_VISUAL_LAST, +} __attribute__((packed)); + +enum fspec_op { + FSPEC_OP_ARG, + FSPEC_OP_HEADER, + FSPEC_OP_DECLARATION, + FSPEC_OP_READ, + FSPEC_OP_GOTO, + FSPEC_OP_FILTER, + FSPEC_OP_VISUAL, + FSPEC_OP_LAST, +} __attribute__((packed)); + +const enum fspec_op* +fspec_op_next(const enum fspec_op *op, const void *end, const bool skip_args); + +const enum fspec_arg* +fspec_op_get_arg(const enum fspec_op *op, const void *end, const uint8_t nth, const uint32_t expect); diff --git a/src/fspec/lexer.h b/src/fspec/lexer.h new file mode 100644 index 0000000..7b60e6b --- /dev/null +++ b/src/fspec/lexer.h @@ -0,0 +1,17 @@ +#pragma once + +#include <fspec/memory.h> + +struct fspec_lexer; +struct fspec_lexer { + struct { + size_t (*read)(struct fspec_lexer *lexer, void *ptr, const size_t size, const size_t nmemb); + } ops; + + struct { + struct fspec_mem input, output; + } mem; +}; + +bool +fspec_lexer_parse(struct fspec_lexer *lexer, const char *name); diff --git a/src/fspec/lexer.rl b/src/fspec/lexer.rl new file mode 100644 index 0000000..81390e2 --- /dev/null +++ b/src/fspec/lexer.rl @@ -0,0 +1,616 @@ +#include "ragel/ragel.h" +#include <fspec/bcode.h> +#include <fspec/lexer.h> +#include "bcode-internal.h" + +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <err.h> + +#define PLACEHOLDER 0xDEADBEEF +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + +typedef uint8_t fspec_strsz; + +struct membuf { + struct fspec_mem mem; + fspec_off written; +}; + +static void +membuf_bounds_check(const struct membuf *buf, const fspec_off nmemb) +{ + assert(buf); + + if (buf->mem.len < nmemb || buf->written > buf->mem.len - nmemb) + errx(EXIT_FAILURE, "%s: %" PRI_FSPEC_OFF " bytes exceeds the maximum storage size of %zu bytes", __func__, buf->written + nmemb, buf->mem.len); +} + +static void +membuf_terminate(struct membuf *buf, const void *data, const fspec_off data_sz) +{ + membuf_bounds_check(buf, data_sz); + memcpy((char*)buf->mem.data + buf->written, data, data_sz); +} + +static void +membuf_replace(struct membuf *buf, const fspec_off off, const void *data, const fspec_off data_sz) +{ + assert(buf->mem.len >= data_sz && off <= buf->mem.len - data_sz); + memcpy((char*)buf->mem.data + off, data, data_sz); +} + +static void +membuf_append_at(struct membuf *buf, const fspec_off off, const void *data, const fspec_off data_sz) +{ + assert(off <= buf->written); + membuf_bounds_check(buf, data_sz); + const size_t rest = buf->written - off; + memmove((char*)buf->mem.data + off + data_sz, (char*)buf->mem.data + off, rest); + membuf_replace(buf, off, data, data_sz); + buf->written += data_sz; + assert(buf->written <= buf->mem.len); +} + +static void +membuf_append(struct membuf *buf, const void *data, const fspec_off data_sz) +{ + membuf_append_at(buf, buf->written, data, data_sz); +} + +struct varbuf { + struct membuf buf; + fspec_off offset; +}; + +static inline void +varbuf_begin(struct varbuf *var) +{ + assert(var); + var->offset = var->buf.written; + assert(var->offset <= var->buf.mem.len); +} + +static void +varbuf_reset(struct varbuf *var) +{ + assert(var); + var->offset = var->buf.written = 0; +} + +static inline void +varbuf_remove_last(struct varbuf *var) +{ + assert(var); + assert(var->buf.written >= var->offset); + const fspec_off size = var->buf.written - var->offset; + assert(var->buf.written >= size); + var->buf.written -= size; + assert(var->buf.written <= var->buf.mem.len); +} + +enum section { + SECTION_DATA, + SECTION_CODE, + SECTION_LAST, +}; + +struct codebuf { + struct membuf buf; + const void *decl[FSPEC_DECLARATION_LAST], *end[SECTION_LAST], *strings; + fspec_var declarations; +}; + +static void +codebuf_append(struct codebuf *code, const enum section section, const void *data, const fspec_off data_sz) +{ + assert(code->end[section]); + const fspec_off off = (char*)code->end[section] - (char*)code->buf.mem.data; + membuf_append_at(&code->buf, off, data, data_sz); + + for (enum section s = section; s < ARRAY_SIZE(code->end); ++s) { + code->end[s] = (char*)code->end[s] + data_sz; + assert((char*)code->end[s] <= (char*)code->buf.mem.data + code->buf.mem.len); + } + + if (section == SECTION_DATA) { + for (enum fspec_declaration d = 0; d < ARRAY_SIZE(code->decl); ++d) { + code->decl[d] = (code->decl[d] ? (char*)code->decl[d] + data_sz : NULL); + assert((char*)code->decl[d] <= (char*)code->buf.mem.data + code->buf.mem.len); + } + } + + assert(code->end[SECTION_DATA] <= code->end[SECTION_CODE]); + assert((char*)code->end[SECTION_CODE] == (char*)code->buf.mem.data + code->buf.written); +} + +static void +codebuf_append_op(struct codebuf *code, const enum fspec_op op) +{ + codebuf_append(code, SECTION_CODE, &op, sizeof(op)); +} + +static uint8_t +arg_sizeof(const enum fspec_arg type) +{ + switch (type) { + case FSPEC_ARG_DAT: + case FSPEC_ARG_OFF: + case FSPEC_ARG_STR: + return sizeof(fspec_off); + + case FSPEC_ARG_NUM: + return sizeof(fspec_num); + + case FSPEC_ARG_VAR: + return sizeof(fspec_var); + + case FSPEC_ARG_EOF: + break; + + case FSPEC_ARG_LAST: + errx(EXIT_FAILURE, "%s: unexpected argument type %u", __func__, type); + } + + return 0; +} + +static void +codebuf_append_arg(struct codebuf *code, const enum fspec_arg type, const void *v) +{ + assert(code); + codebuf_append_op(code, FSPEC_OP_ARG); + codebuf_append(code, SECTION_CODE, &type, sizeof(type)); + codebuf_append(code, SECTION_CODE, v, arg_sizeof(type)); +} + +static void +codebuf_replace_arg(struct codebuf *code, const enum fspec_arg *arg, const enum fspec_arg type, const void *v) +{ + assert(code && arg); + assert(*arg == type); + const fspec_off off = ((char*)arg + 1) - (char*)code->buf.mem.data; + membuf_replace(&code->buf, off, v, arg_sizeof(type)); +} + +static bool +get_string_offset(const void *start, const void *end, const void *str, const fspec_strsz str_sz, void const **out_off) +{ + assert(out_off); + + while (start < end) { + fspec_strsz len; + memcpy(&len, start, sizeof(len)); + if (len == str_sz && !memcmp((char*)start + sizeof(len), str, len)) { + *out_off = start; + return true; + } + start = (char*)start + sizeof(len) + len + 1; + } + + return false; +} + +static void +codebuf_append_arg_cstr(struct codebuf *code, const void *str, const fspec_strsz str_sz) +{ + const void *ptr; + if (!get_string_offset(code->strings, code->end[SECTION_DATA], str, str_sz, &ptr)) { + ptr = code->end[SECTION_DATA]; + codebuf_append(code, SECTION_DATA, &str_sz, sizeof(str_sz)); + codebuf_append(code, SECTION_DATA, str, str_sz); + codebuf_append(code, SECTION_DATA, (char[]){ 0 }, 1); + } + + const fspec_off off = (char*)ptr - (char*)code->buf.mem.data; + codebuf_append_arg(code, FSPEC_ARG_STR, &off); +} + +static const enum fspec_op* +get_named_op(const enum fspec_op *start, const void *end, const void *data, const enum fspec_op op, const uint8_t nth, const void *name, const fspec_strsz name_sz, fspec_var *out_id) +{ + fspec_var id = 0; + if ((void*)start < end && *start == FSPEC_OP_DECLARATION) + id = fspec_arg_get_num(fspec_op_get_arg(start, end, 2, 1<<FSPEC_ARG_NUM)); + + for (const enum fspec_op *p = start; p; p = fspec_op_next(p, end, true)) { + const enum fspec_arg *arg; + if (*p != op || !(arg = fspec_op_get_arg(p, end, nth, 1<<FSPEC_ARG_STR))) + continue; + + struct fspec_mem str; + fspec_arg_get_mem(arg, data, &str); + if (str.len == name_sz && !memcmp(name, str.data, name_sz)) { + if (out_id) + *out_id = id; + + return p; + } + + ++id; + } + + return NULL; +} + +static const enum fspec_op* +get_declaration(struct codebuf *code, const bool member, const struct fspec_mem *str, fspec_var *out_id) +{ + const void *start = (member ? code->decl[FSPEC_DECLARATION_STRUCT] : code->end[SECTION_DATA]); + return get_named_op(start, code->end[SECTION_CODE], code->buf.mem.data, FSPEC_OP_DECLARATION, 4, str->data, str->len, out_id); +} + +static bool +codebuf_append_arg_var(struct codebuf *code, const bool member, const struct fspec_mem *var) +{ + fspec_var id = -1; + if (!get_declaration(code, member, var, &id)) + return false; + + codebuf_append_arg(code, FSPEC_ARG_VAR, &id); + return true; +} + +static void +codebuf_append_declaration(struct codebuf *code, const enum fspec_declaration decl) +{ + code->decl[decl] = code->end[SECTION_CODE]; + codebuf_append_op(code, FSPEC_OP_DECLARATION); + codebuf_append_arg(code, FSPEC_ARG_NUM, (fspec_num[]){ decl }); + codebuf_append_arg(code, FSPEC_ARG_NUM, (fspec_num[]){ code->declarations++ }); + codebuf_append_arg(code, FSPEC_ARG_OFF, (fspec_off[]){ PLACEHOLDER }); +} + +enum stack_type { + STACK_STR, + STACK_NUM, +}; + +struct stack { + union { + struct fspec_mem str; + uint64_t num; + }; + enum stack_type type; +}; + +static const char* +stack_type_to_str(const enum stack_type type) +{ + switch (type) { + case STACK_STR: return "str"; + case STACK_NUM: return "num"; + }; + return "unknown"; +} + +static void +stack_check_type(const struct stack *stack, const enum stack_type type) +{ + assert(stack); + + if (stack->type != type) + errx(EXIT_FAILURE, "tried to get '%s' from stack, but the last pushed type was '%s'", stack_type_to_str(type), stack_type_to_str(stack->type)); +} + +static const struct fspec_mem* +stack_get_str(const struct stack *stack) +{ + stack_check_type(stack, STACK_STR); + return &stack->str; +} + +static uint64_t +stack_get_num(const struct stack *stack) +{ + stack_check_type(stack, STACK_NUM); + return stack->num; +} + +struct state { + struct ragel ragel; + struct stack stack; + struct codebuf out; + struct varbuf var; +}; + +static void +state_stack_num(struct state *state, const uint8_t base) +{ + assert(state); + membuf_terminate(&state->var.buf, (char[]){ 0 }, 1); + const char *str = (char*)state->var.buf.mem.data + state->var.offset; + state->stack.type = STACK_NUM; + state->stack.num = strtoll(str + (base == 16 && *str == 'x'), NULL, base); + varbuf_remove_last(&state->var); +} + +static void +state_append_arg_var(struct state *state, const bool member, const struct fspec_mem *str) +{ + assert(state && str); + + if (!codebuf_append_arg_var(&state->out, member, str)) + ragel_throw_error(&state->ragel, "'%s' undeclared", (char*)str->data); +} + +static void +state_append_declaration(struct state *state, const enum fspec_declaration decl, const struct fspec_mem *str) +{ + assert(state && str); + + if (get_declaration(&state->out, (decl == FSPEC_DECLARATION_MEMBER), str, NULL)) + ragel_throw_error(&state->ragel, "'%s' redeclared", (char*)str->data); + + codebuf_append_declaration(&state->out, decl); + codebuf_append_arg_cstr(&state->out, str->data, str->len); +} + +static void +state_finish_declaration(struct state *state, const enum fspec_declaration decl) +{ + assert(state && state->out.decl[decl]); + const char *end = state->out.end[SECTION_CODE]; + const fspec_off off = end - (char*)state->out.decl[decl]; + codebuf_replace_arg(&state->out, fspec_op_get_arg(state->out.decl[decl], end, 3, 1<<FSPEC_ARG_OFF), FSPEC_ARG_OFF, &off); + state->out.decl[decl] = NULL; +} + +%%{ + machine fspec_lexer; + variable p state.ragel.p; + variable pe state.ragel.pe; + variable eof state.ragel.eof; + write data noerror nofinal; + + action arg_eof { + codebuf_append_arg(&state.out, FSPEC_ARG_EOF, NULL); + } + + action arg_num { + codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ stack_get_num(&state.stack) }); + } + + action arg_str { + const struct fspec_mem *str = stack_get_str(&state.stack); + codebuf_append_arg_cstr(&state.out, str->data, str->len); + } + + action arg_var { + state_append_arg_var(&state, true, stack_get_str(&state.stack)); + } + + action filter { + codebuf_append_op(&state.out, FSPEC_OP_FILTER); + } + + action goto { + codebuf_append_op(&state.out, FSPEC_OP_GOTO); + state_append_arg_var(&state, false, stack_get_str(&state.stack)); + } + + action vnul { + codebuf_append_op(&state.out, FSPEC_OP_VISUAL); + codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_NUL }); + } + + action vdec { + codebuf_append_op(&state.out, FSPEC_OP_VISUAL); + codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_DEC }); + } + + action vhex { + codebuf_append_op(&state.out, FSPEC_OP_VISUAL); + codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_HEX }); + } + + action vstr { + codebuf_append_op(&state.out, FSPEC_OP_VISUAL); + codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_STR }); + } + + action r8 { + codebuf_append_op(&state.out, FSPEC_OP_READ); + codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 8 }); + } + + action r16 { + codebuf_append_op(&state.out, FSPEC_OP_READ); + codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 16 }); + } + + action r32 { + codebuf_append_op(&state.out, FSPEC_OP_READ); + codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 32 }); + } + + action r64 { + codebuf_append_op(&state.out, FSPEC_OP_READ); + codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 64 }); + } + + action member_end { + state_finish_declaration(&state, FSPEC_DECLARATION_MEMBER); + } + + action member_start { + state_append_declaration(&state, FSPEC_DECLARATION_MEMBER, stack_get_str(&state.stack)); + } + + action struct_end { + state_finish_declaration(&state, FSPEC_DECLARATION_STRUCT); + } + + action struct_start { + state_append_declaration(&state, FSPEC_DECLARATION_STRUCT, stack_get_str(&state.stack)); + } + + action stack_oct { + state_stack_num(&state, 8); + } + + action stack_hex { + state_stack_num(&state, 16); + } + + action stack_dec { + state_stack_num(&state, 10); + } + + action stack_str { + membuf_terminate(&state.var.buf, (char[]){ 0 }, 1); + state.stack.type = STACK_STR; + state.stack.str = state.var.buf.mem; + state.stack.str.len = state.var.buf.written; + } + + action store_esc_num { + const fspec_num v = stack_get_num(&state.stack); + assert(v <= 255); + const uint8_t u8 = v; + membuf_append(&state.var.buf, &u8, sizeof(u8)); + } + + action store_esc { + const struct { const char e, v; } map[] = { + { .e = 'a', .v = '\a' }, + { .e = 'b', .v = '\b' }, + { .e = 'f', .v = '\f' }, + { .e = 'n', .v = '\n' }, + { .e = 'r', .v = '\r' }, + { .e = 't', .v = '\t' }, + { .e = 'v', .v = '\v' }, + { .e = '\\', .v = '\\' }, + { .e = '\'', .v = '\'' }, + { .e = '\"', .v = '"' }, + { .e = 'e', .v = 0x1B }, + }; + + for (size_t i = 0; i < ARRAY_SIZE(map); ++i) { + if (*state.ragel.p != map[i].e) + continue; + + membuf_append(&state.var.buf, &map[i].v, sizeof(map[i].v)); + break; + } + } + + action store { + membuf_append(&state.var.buf, state.ragel.p, 1); + } + + action begin_num { + varbuf_begin(&state.var); + } + + action begin_str { + varbuf_reset(&state.var); + } + + action type_err { + ragel_throw_error(&state.ragel, "unknown type name"); + } + + action visual_err { + ragel_throw_error(&state.ragel, "unknown visualization"); + } + + action syntax_err { + ragel_throw_error(&state.ragel, "malformed input (machine failed here or in next expression)"); + } + + action line { + ragel_advance_line(&state.ragel); + } + + # Semantic + quote = ['"]; + newline = '\n'; + esc = [abfnrtv\\'"e]; + esc_chr = '\\'; + esc_hex = 'x' <: xdigit{2}; + hex = '0' <: esc_hex; + oct = [0-7]{1,3}; + dec = [\-+]? <: (([1-9] <: digit*) | '0'); + valid = ^cntrl; + comment = '//' <: valid* :>> newline; + type = ('u8' | 's8') %r8 | ('u16' | 's16') %r16 | ('u32' | 's32') %r32 | ('u64' | 's32') %r64; + visual = 'nul' %vnul | 'dec' %vdec | 'hex' %vhex | 'str' %vstr; + reserved = 'struct' | type | visual; + name = ((alpha | '_') <: (alnum | '_')*) - reserved; + + # Stack + stack_name = name >begin_str $store %stack_str; + stack_hex = hex >begin_num $store %stack_hex; + stack_dec = dec >begin_num $store %stack_dec; + stack_oct = oct >begin_num $store %stack_oct; + stack_esc_hex = esc_hex >begin_num $store %stack_hex; + stack_esc = esc_chr <: ((stack_esc_hex | stack_oct) %store_esc_num | esc %~store_esc); + stack_str = quote <: ((stack_esc? <: print? $store) - zlen)* >begin_str %stack_str :>> quote; + stack_num = stack_dec | stack_hex; + + # Catchers + catch_struct = 'struct ' <: stack_name; + catch_type = (catch_struct %goto | type) $!type_err; + catch_args = stack_num %arg_num | stack_str %arg_str | stack_name %arg_var; + catch_array = '[' <: (catch_args | '$' %arg_eof) :>> ']'; + catch_filter = ' | ' %filter <: stack_name %arg_str :>> ('(' <: catch_args? <: (', ' <: catch_args)* :>> ')')?; + catch_visual = ' ' <: visual $!visual_err; + + # Abstract + member = stack_name %member_start :> ': ' <: (catch_type <: catch_array* catch_filter* catch_visual?) :>> ';' %member_end; + struct = catch_struct %struct_start :>> ' {' <: (space | comment | member)* :>> '};' %struct_end; + line = valid* :>> newline %line; + main := ((space | comment | struct)* & line*) $!syntax_err; +}%% + +bool +fspec_lexer_parse(struct fspec_lexer *lexer, const char *name) +{ + int cs; + %% write init; + + (void)fspec_lexer_en_main; + assert(lexer); + assert(lexer->ops.read); + assert(lexer->mem.input.data && lexer->mem.input.len); + assert(lexer->mem.output.data && lexer->mem.output.len); + assert(lexer->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range"); + assert(lexer->mem.output.len <= (fspec_off)~0 && "output storage size exceeds fspec_off range"); + + char var[256]; + struct state state = { + .ragel.name = name, + .ragel.lineno = 1, + .var.buf.mem = { .data = var, .len = sizeof(var) }, + .out.buf.mem = lexer->mem.output, + }; + + static const fspec_num version = 0; + state.out.end[SECTION_CODE] = state.out.end[SECTION_DATA] = state.out.buf.mem.data; + codebuf_append_op(&state.out, FSPEC_OP_HEADER); + codebuf_append_arg(&state.out, FSPEC_ARG_NUM, &version); + codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ PLACEHOLDER }); + codebuf_append_arg(&state.out, FSPEC_ARG_DAT, (fspec_off[]){ PLACEHOLDER }); + state.out.end[SECTION_DATA] = state.out.end[SECTION_CODE]; + state.out.strings = state.out.end[SECTION_DATA]; + + struct fspec_mem input = lexer->mem.input; + for (bool eof = false; !state.ragel.error && !eof;) { + const size_t bytes = lexer->ops.read(lexer, input.data, 1, input.len); + const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes }; + ragel_feed_input(&state.ragel, (eof = (bytes < input.len)), &rl); + %% write exec; + } + + { + const void *end = state.out.end[SECTION_CODE]; + codebuf_replace_arg(&state.out, fspec_op_get_arg(state.out.buf.mem.data, end, 2, 1<<FSPEC_ARG_NUM), FSPEC_ARG_NUM, (fspec_num[]){ state.out.declarations }); + const fspec_off off = (char*)state.out.end[SECTION_DATA] - (char*)state.out.strings; + codebuf_replace_arg(&state.out, fspec_op_get_arg(state.out.buf.mem.data, end, 3, 1<<FSPEC_ARG_DAT), FSPEC_ARG_DAT, &off); + } + + lexer->mem.output.len = state.out.buf.written; + return !state.ragel.error; +} diff --git a/src/fspec/memory.h b/src/fspec/memory.h new file mode 100644 index 0000000..768415a --- /dev/null +++ b/src/fspec/memory.h @@ -0,0 +1,8 @@ +#pragma once + +#include <stddef.h> + +struct fspec_mem { + void *data; + size_t len; +}; diff --git a/src/fspec/validator.h b/src/fspec/validator.h new file mode 100644 index 0000000..c4705b2 --- /dev/null +++ b/src/fspec/validator.h @@ -0,0 +1,17 @@ +#pragma once + +#include <fspec/memory.h> + +struct fspec_validator; +struct fspec_validator { + struct { + size_t (*read)(struct fspec_validator *validator, void *ptr, const size_t size, const size_t nmemb); + } ops; + + struct { + struct fspec_mem input; + } mem; +}; + +bool +fspec_validator_parse(struct fspec_validator *validator, const char *name); diff --git a/src/fspec/validator.rl b/src/fspec/validator.rl new file mode 100644 index 0000000..093348d --- /dev/null +++ b/src/fspec/validator.rl @@ -0,0 +1,237 @@ +#include "ragel/ragel.h" +#include <fspec/bcode.h> +#include <fspec/validator.h> +#include "bcode-internal.h" + +#include <assert.h> + +struct stack { + union { + fspec_num num; + fspec_off off; + fspec_var var; + fspec_strsz strsz; + unsigned char b[sizeof(fspec_num)]; + } u; + uint8_t i; // writing index for u.b +}; + +struct range { + fspec_off start, end; +}; + +struct context { + struct range data; + fspec_var declarations, expected_declarations; + fspec_off str_end, decl_start, decl_end[FSPEC_DECLARATION_LAST], offset; + enum fspec_declaration last_decl_type; +}; + +struct state { + struct ragel ragel; + struct context context; + struct stack stack; + bool valid; +}; + +%%{ + machine fspec_validator; + variable p state.ragel.p; + variable pe state.ragel.pe; + variable eof state.ragel.eof; + write data noerror nofinal; + + action store_decls { + if (state.stack.u.num > (fspec_var)~0) + ragel_throw_error(&state.ragel, "expected declarations overflows"); + + state.context.expected_declarations = state.stack.u.num; + } + + action check_decls { + if (state.context.declarations != state.context.expected_declarations) + ragel_throw_error(&state.ragel, "expected declarations did not match with the content: expected: %" PRI_FSPEC_VAR " got: %" PRI_FSPEC_VAR, state.context.expected_declarations, state.context.declarations); + } + + action mark_dat { + // we can replace this logic with fspec generated code in future + // struct str { len: u32; str: u8[len]['\0']; } + // struct dat { len: u32; strings: struct str[$::len]; } + if (state.context.offset > (fspec_off)~0 - state.stack.u.off) + ragel_throw_error(&state.ragel, "dat section length overflows"); + + state.context.data = (struct range){ .start = state.context.offset, .end = state.stack.u.off }; + } + + action test_inside_dat { + state.context.offset < (state.context.data.start + state.context.data.end) + } + + action mark_str { + if (state.context.offset >= (fspec_off)~0 - state.stack.u.strsz) // >= for null byte + ragel_throw_error(&state.ragel, "str length overflows"); + + state.context.str_end = state.context.offset + state.stack.u.strsz; + } + + action test_inside_str { + state.context.offset < state.context.str_end + } + + action check_var { + if (state.context.declarations <= state.stack.u.var) + ragel_throw_error(&state.ragel, "refenced undeclared variable"); + } + + action check_str { + if (state.stack.u.off < state.context.data.start) { + ragel_throw_error(&state.ragel, "str before data section range: %" PRI_FSPEC_OFF " <= %" PRI_FSPEC_OFF, state.stack.u.off, state.context.data.start + state.context.data.end); + } else if (state.context.data.start + state.context.data.end <= state.stack.u.off) { + ragel_throw_error(&state.ragel, "str after data section range: %" PRI_FSPEC_OFF " <= %" PRI_FSPEC_OFF, state.context.data.start + state.context.data.end, state.stack.u.off); + } + } + + action check_decl_type { + if (state.stack.u.num >= FSPEC_DECLARATION_LAST) + ragel_throw_error(&state.ragel, "invalid declaration type: %" PRI_FSPEC_NUM, state.stack.u.num); + + state.context.last_decl_type = state.stack.u.num; + } + + action check_decl_num { + if (state.context.declarations >= (fspec_var)~0) + ragel_throw_error(&state.ragel, "declarations overflows"); + + if (state.context.declarations != state.stack.u.num) + ragel_throw_error(&state.ragel, "invalid declaration number: %" PRI_FSPEC_NUM " expected: %" PRI_FSPEC_VAR, state.stack.u.num, state.context.declarations); + + ++state.context.declarations; + } + + action start_decl { + state.context.decl_start = state.context.offset; + } + + action mark_decl { + const fspec_off sz = (state.context.offset - state.context.decl_start); + assert(sz <= state.stack.u.off); + + if (state.context.offset > (fspec_off)~0 - state.stack.u.off - sz) + ragel_throw_error(&state.ragel, "declaration length overflows"); + + state.context.decl_end[state.context.last_decl_type] = state.context.offset + state.stack.u.off - sz; + } + + action check_struct { + if (state.context.last_decl_type != FSPEC_DECLARATION_STRUCT) + ragel_throw_error(&state.ragel, "expected struct declaration"); + } + + action check_member { + if (state.context.last_decl_type != FSPEC_DECLARATION_MEMBER) + ragel_throw_error(&state.ragel, "expected member declaration"); + } + + action check_member_end { + if (state.context.decl_end[FSPEC_DECLARATION_MEMBER] != state.context.offset) + ragel_throw_error(&state.ragel, "invalid member end: %" PRI_FSPEC_OFF " expected: %" PRI_FSPEC_OFF, state.context.decl_end[FSPEC_DECLARATION_MEMBER], state.context.offset); + } + + action check_struct_end { + if (state.context.decl_end[FSPEC_DECLARATION_STRUCT] != state.context.offset) + ragel_throw_error(&state.ragel, "invalid struct end: %" PRI_FSPEC_OFF " expected: %" PRI_FSPEC_OFF, state.context.decl_end[FSPEC_DECLARATION_STRUCT], state.context.offset); + } + + action check_visual_type { + if (state.stack.u.num >= FSPEC_VISUAL_LAST) + ragel_throw_error(&state.ragel, "invalid visual type: %" PRI_FSPEC_NUM, state.stack.u.num); + } + + action arg_error { + ragel_throw_error(&state.ragel, "malformed argument"); + } + + action op_error { + ragel_throw_error(&state.ragel, "unexpected argument"); + } + + action pattern_error { + ragel_throw_error(&state.ragel, "unexpected pattern"); + } + + action syntax_error { + ragel_throw_error(&state.ragel, "unexpected byte"); + } + + action store { + if (state.stack.i < sizeof(state.stack.u.b)) + state.stack.u.b[state.stack.i++] = fc; + } + + action flush { + state.stack.i = 0; + } + + action advance { + ++state.context.offset; + } + + stack1 = any{1} >flush $store; + stack2 = any{2} >flush $store; + stack4 = any{4} >flush $store; + stack8 = any{8} >flush $store; + + ARG_DAT = 0 stack4 %*mark_dat ((stack1 %*mark_str (any when test_inside_str)* 0) when test_inside_dat)*; + ARG_OFF = 1 stack4; + ARG_NUM = 2 stack8; + ARG_VAR = 3 stack2 %check_var; + ARG_STR = 4 stack4 %check_str; + ARG_EOF = 5; + + OP_ARG_DAT = 0 ARG_DAT $!arg_error; + OP_ARG_OFF = 0 ARG_OFF $!arg_error; + OP_ARG_NUM = 0 ARG_NUM $!arg_error; + OP_ARG_VAR = 0 ARG_VAR $!arg_error; + OP_ARG_STR = 0 ARG_STR $!arg_error; + OP_ARG_EOF = 0 ARG_EOF $!arg_error; + + OP_HEADER = 1 (OP_ARG_NUM OP_ARG_NUM %store_decls OP_ARG_DAT) $!op_error; + OP_DECLARATION = 2 >start_decl (OP_ARG_NUM %check_decl_type OP_ARG_NUM %check_decl_num OP_ARG_OFF %mark_decl OP_ARG_STR) $!op_error; + OP_READ = 3 (OP_ARG_NUM (OP_ARG_NUM | OP_ARG_VAR | OP_ARG_STR | OP_ARG_EOF)*) $!op_error; + OP_GOTO = 4 (OP_ARG_VAR (OP_ARG_NUM | OP_ARG_VAR | OP_ARG_STR | OP_ARG_EOF)*) $!op_error; + OP_FILTER = 5 (OP_ARG_STR (OP_ARG_NUM | OP_ARG_VAR | OP_ARG_STR)*) $!op_error; + OP_VISUAL = 6 (OP_ARG_NUM %check_visual_type) $!op_error; + + pattern = (OP_DECLARATION %check_struct <: (OP_DECLARATION %check_member (OP_READ | OP_GOTO) OP_FILTER? OP_VISUAL? %check_member_end)*)* %check_struct_end $!pattern_error; + main := (OP_HEADER <: pattern) %check_decls $advance $!syntax_error; +}%% + +bool +fspec_validator_parse(struct fspec_validator *validator, const char *name) +{ + int cs; + %% write init; + + (void)fspec_validator_en_main; + assert(validator); + assert(validator->ops.read); + assert(validator->mem.input.data && validator->mem.input.len); + assert(validator->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range"); + + struct state state = { + .ragel.name = name, + .ragel.lineno = 1, + }; + + static_assert(sizeof(state.stack.u) == sizeof(state.stack.u.b), "bytes doesn't represent the largest member in union"); + + struct fspec_mem input = validator->mem.input; + for (bool eof = false; !state.ragel.error && !eof;) { + const size_t bytes = validator->ops.read(validator, input.data, 1, input.len); + const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes, .binary = true }; + ragel_feed_input(&state.ragel, (eof = (bytes < input.len)), &rl); + %% write exec; + } + + return !state.ragel.error; +} diff --git a/src/ragel/fspec.h b/src/ragel/fspec.h deleted file mode 100644 index 68998f4..0000000 --- a/src/ragel/fspec.h +++ /dev/null @@ -1,77 +0,0 @@ -#pragma once - -#include <stddef.h> -#include <stdint.h> -#include <stdbool.h> - -struct fspec_bytes { - const uint8_t *data; - size_t size; -}; - -enum fspec_kind_bits { - FSPEC_KIND_IGNORE = 1<<0, - FSPEC_KIND_HEXADECIMAL = 1<<1, - FSPEC_KIND_ENCODING = 1<<2, -}; - -struct fspec_kind { - const char *name; - uint32_t flags; -}; - -enum fspec_array_type { - FSPEC_ARRAY_FIXED, - FSPEC_ARRAY_MATCH, - FSPEC_ARRAY_VAR, -}; - -struct fspec_array { - enum fspec_array_type type; - - union { - struct fspec_bytes match; - const char *var; - size_t nmemb; - }; -}; - -enum fspec_type_bits { - FSPEC_TYPE_SIGNED = 1<<0, - FSPEC_TYPE_CONTAINER = 1<<1, -}; - -struct fspec_type { - const char *name; - size_t size; - uint32_t flags; -}; - -struct fspec_field { - struct fspec_type type; - struct fspec_array array; - struct fspec_kind kind; - const char *name; -}; - -struct fspec_container { - const char *name; -}; - -struct fspec; -struct fspec { - struct { - void (*field)(struct fspec *fspec, const struct fspec_container *container, const struct fspec_field *field); - size_t (*read)(struct fspec *fspec, char *buf, const size_t size, const size_t nmemb); - } ops; - - struct { - // XXX: replace with ops.alloc, ops.free - // on dump.c we can then just provide implementation that still uses reasonable amount of static memory - // but we don't limit the code from working with regular dynamic memory - uint8_t *data; - size_t size; - } mem; -}; - -void fspec_parse(struct fspec *fspec); diff --git a/src/ragel/fspec.rl b/src/ragel/fspec.rl deleted file mode 100644 index 8493cf1..0000000 --- a/src/ragel/fspec.rl +++ /dev/null @@ -1,329 +0,0 @@ -#include "fspec.h" -#include "ragel.h" - -// It's pretty good base so far. -// ragel_search_str for typechecking variable delcaration is hack. -// State should have hashmap for fields/containers. -// -// XXX: Maybe drop whole container thing and just give field const char *parent; that points to keypath of container. -// Then we would have flat structure like, "foo, foo.var, foo.b, ..." - -static const struct fspec_container default_container = {0}; -static const struct fspec_field default_field = { .array.nmemb = 1 }; - -enum stack_type { - STACK_VAR, - STACK_STR, - STACK_NUM, -}; - -struct stack { - enum stack_type type; - - union { - struct fspec_bytes str; - const char *var; - uint64_t num; - }; -}; - -struct state { - struct ragel ragel; - struct stack stack; - struct fspec_field field; - struct fspec_container container; - size_t container_data_offset; -}; - -static const char* -stack_type_to_str(const enum stack_type type) -{ - switch (type) { - case STACK_VAR: return "var"; - case STACK_STR: return "str"; - case STACK_NUM: return "num"; - }; - - assert(0 && "should not happen"); - return "unknown"; -} - -static void -stack_check_type(const struct ragel *ragel, const struct stack *stack, const enum stack_type type) -{ - assert(ragel && stack); - - if (stack->type != type) - ragel_throw_error(ragel, "tried to get '%s' from stack, but the last pushed type was '%s'", stack_type_to_str(type), stack_type_to_str(stack->type)); -} - -static const char* -stack_get_var(const struct ragel *ragel, const struct stack *stack) -{ - assert(ragel && stack); - stack_check_type(ragel, stack, STACK_VAR); - return stack->var; -} - -static const struct fspec_bytes* -stack_get_str(const struct ragel *ragel, const struct stack *stack) -{ - assert(ragel && stack); - stack_check_type(ragel, stack, STACK_STR); - return &stack->str; -} - -static uint64_t -stack_get_num(const struct ragel *ragel, const struct stack *stack) -{ - assert(ragel && stack); - stack_check_type(ragel, stack, STACK_NUM); - return stack->num; -} - -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) - -static void -fspec_type_from_str(const struct ragel *ragel, const char *str, struct fspec_type *out_type) -{ - assert(ragel && str); - - const struct fspec_type types[] = { - { .name = "u8", .size = sizeof(uint8_t) }, - { .name = "u16", .size = sizeof(uint16_t) }, - { .name = "u32", .size = sizeof(uint32_t) }, - { .name = "u64", .size = sizeof(uint64_t) }, - { .name = "s8", .size = sizeof(int8_t), .flags = FSPEC_TYPE_SIGNED }, - { .name = "s16", .size = sizeof(int16_t), .flags = FSPEC_TYPE_SIGNED }, - { .name = "s32", .size = sizeof(int32_t), .flags = FSPEC_TYPE_SIGNED }, - { .name = "s64", .size = sizeof(int64_t), .flags = FSPEC_TYPE_SIGNED }, - }; - - for (size_t i = 0; i < ARRAY_SIZE(types); ++i) { - if (strcmp(str, types[i].name)) - continue; - - *out_type = types[i]; - return; - } - - if (ragel_search_str(ragel, 0, str)) { - *out_type = (struct fspec_type){ .name = str, .flags = FSPEC_TYPE_CONTAINER }; - return; - } - - ragel_throw_error(ragel, "invalid type"); -} - -static void -fspec_kind_from_str(const struct ragel *ragel, const char *str, struct fspec_kind *out_kind) -{ - assert(ragel && str); - - const struct fspec_kind kinds[] = { - { .name = "pad", .flags = FSPEC_KIND_IGNORE }, - { .name = "hex", .flags = FSPEC_KIND_HEXADECIMAL }, - { .name = "ascii", .flags = FSPEC_KIND_ENCODING }, - { .name = "utf8", .flags = FSPEC_KIND_ENCODING }, - { .name = "sjis", .flags = FSPEC_KIND_ENCODING }, - }; - - for (size_t i = 0; i < ARRAY_SIZE(kinds); ++i) { - if (strcmp(str, kinds[i].name)) - continue; - - *out_kind = kinds[i]; - return; - } - - ragel_throw_error(ragel, "invalid kind"); -} - -static void -check_field_kind(const struct ragel *ragel, const struct fspec_field *field) -{ - assert(ragel && field); - - if ((field->kind.flags & FSPEC_KIND_ENCODING) && field->type.size != sizeof(uint8_t)) - ragel_throw_error(ragel, "invalid kind: %s kind only allowed for u8 and s8 types", field->kind.name); -} - -%%{ - # File specification parser. - - machine fspec; - variable p state.ragel.p; - variable pe state.ragel.pe; - variable eof state.ragel.eof; - write data noerror nofinal; - - action field { - fspec->ops.field(fspec, &state.container, &state.field); - } - - action field_kind { - fspec_kind_from_str(&state.ragel, stack_get_var(&state.ragel, &state.stack), &state.field.kind); - check_field_kind(&state.ragel, &state.field); - } - - action field_array { - switch (state.stack.type) { - case STACK_NUM: - state.field.array.type = FSPEC_ARRAY_FIXED; - state.field.array.nmemb = stack_get_num(&state.ragel, &state.stack); - break; - - case STACK_STR: - state.field.array.type = FSPEC_ARRAY_MATCH; - state.field.array.match = *stack_get_str(&state.ragel, &state.stack); - break; - - case STACK_VAR: - state.field.array.type = FSPEC_ARRAY_VAR; - state.field.array.var = stack_get_var(&state.ragel, &state.stack); - - if (!ragel_search_str(&state.ragel, state.container_data_offset, state.field.array.var)) - ragel_throw_error(&state.ragel, "undeclared variable '%s'", state.field.array.var); - break; - - default: - ragel_throw_error(&state.ragel, "array can't contain the stack type of '%s'", stack_type_to_str(state.stack.type)); - break; - } - } - - action field_name { - state.field.name = stack_get_var(&state.ragel, &state.stack); - } - - action field_type { - state.field = default_field; - fspec_type_from_str(&state.ragel, stack_get_var(&state.ragel, &state.stack), &state.field.type); - } - - action container_name { - state.container = default_container; - state.container.name = stack_get_var(&state.ragel, &state.stack); - state.container_data_offset = state.ragel.mem.cur - state.ragel.mem.data; - } - - action push_var { - state.stack.type = STACK_VAR; - state.stack.var = (char*)state.ragel.mem.cur; - } - - action push_hex { - state.stack.type = STACK_NUM; - state.stack.num = strtoll((char*)state.ragel.mem.cur, NULL, 16); - } - - action push_dec { - state.stack.type = STACK_NUM; - state.stack.num = strtoll((char*)state.ragel.mem.cur, NULL, 10); - } - - action push_str { - state.stack.type = STACK_STR; - state.stack.str.data = state.ragel.mem.cur; - state.stack.str.size = (state.ragel.mem.data + state.ragel.mem.written) - state.ragel.mem.cur; - } - - action convert_escape { - ragel_convert_escape(&state.ragel); - } - - action remove { - ragel_remove_last_data(&state.ragel); - } - - action finish { - ragel_finish_data(&state.ragel); - } - - action store { - ragel_store_data(&state.ragel); - } - - action begin { - ragel_begin_data(&state.ragel); - } - - action invalid_kind { - ragel_throw_error(&state.ragel, "invalid kind"); - } - - action invalid_type { - ragel_throw_error(&state.ragel, "invalid type"); - } - - action error { - ragel_throw_error(&state.ragel, "malformed input (machine failed here or in previous or next expression)"); - } - - action line { - ragel_advance_line(&state.ragel); - } - - # Semantic - ws = space; - valid = ^cntrl; - es = '\\'; - delim = ';'; - quote = ['"]; - bopen = '{'; - bclose = '}'; - newline = '\n'; - octal = [0-7]; - hex = '0x' <: xdigit+; - decimal = ([1-9] <: digit*) | '0'; - comment = '//' <: valid* :>> newline; - escape = es <: ('x' <: xdigit+ | [abfnrtv\\'"e] | octal{1,3}); - type = 'u8' | 'u16' | 'u32' | 'u64' | 's8' | 's16' | 's32' | 's64'; - kind = 'ascii' | 'utf8' | 'sjis' | 'hex' | 'pad'; - reserved = 'struct' | type | kind; - var = ((alpha | '_') <: (alnum | '_')*) - reserved; - - # Catchers - catch_var = var >begin $store %finish %push_var; - catch_struct = ('struct' $store ws+ >store <: var $store) >begin %finish %push_var; - catch_type = (catch_struct | type >begin $store %push_var %remove) $!invalid_type; - catch_hex = hex >begin $store %push_hex %remove; - catch_decimal = decimal >begin $store %push_dec %remove; - catch_string = quote <: (escape %convert_escape | print)* >begin $store %finish %push_str :>> quote; - catch_array = '[' <: (catch_hex | catch_decimal | catch_string | catch_var) :>> ']'; - catch_kind = '=' ws* <: kind >begin $store %push_var %remove $!invalid_kind; - - # Actions - field = catch_type %field_type ws+ <: catch_var %field_name ws* <: (catch_array %field_array ws*)? <: (catch_kind %field_kind ws*)? :>> delim %field; - container = catch_struct %container_name ws* :>> bopen <: (ws | comment | field)* :>> bclose ws* delim; - line = valid* :>> newline @line; - main := (ws | comment | container)* & line* $!error; -}%% - -void -fspec_parse(struct fspec *fspec) -{ - int cs; - %% write init; - - (void)fspec_en_main; - assert(fspec); - assert(fspec->ops.read); - assert(fspec->ops.field); - - struct state state = { - .ragel = { - .lineno = 1, - .mem = { - .data = fspec->mem.data, - .size = fspec->mem.size, - }, - }, - }; - - for (bool ok = true; ok;) { - const size_t bytes = fspec->ops.read(fspec, state.ragel.buf, 1, sizeof(state.ragel.buf)); - ok = ragel_confirm_input(&state.ragel, bytes); - %% write exec; - } -} diff --git a/src/ragel/ragel.h b/src/ragel/ragel.h index af06f4a..b2c7572 100644 --- a/src/ragel/ragel.h +++ b/src/ragel/ragel.h @@ -1,236 +1,30 @@ #pragma once -#include <stdlib.h> -#include <stdio.h> -#include <stdarg.h> -#include <stddef.h> +#include <stdint.h> #include <stdbool.h> -#include <string.h> -#include <ctype.h> -#include <assert.h> -#include <limits.h> -#include <err.h> -struct ragel { - struct { - uint8_t *data; // data\0another_data\0 - const uint8_t *cur; // data\0another_data\0cursor - size_t written, size; // amount of data written / size of data - } mem; +struct ragel_mem { + const char *data, *end; + bool binary; // binary input bit +}; - char buf[4096]; // block of input data +struct ragel { + struct ragel_mem input; // block of input data + uint64_t lineno; // current line const char *p, *pe, *eof; // see ragel doc - size_t lineno; // current line + const char *cl; // current line start + const char *name; // may be current file name for example + bool error; // error thrown bit }; -static inline void -ragel_get_current_line(const struct ragel *ragel, size_t *out_lineno, size_t *out_ls, size_t *out_le, size_t *out_ws, size_t *out_we) -{ - assert(out_ls && out_le && out_ws && out_we); - assert(ragel->p >= ragel->buf && ragel->pe >= ragel->p); - - size_t ls, le, ws, we; - size_t off = ragel->p - ragel->buf; - size_t lineno = ragel->lineno; - const size_t end = ragel->pe - ragel->buf; - - // rewind to first non-space - for (; off > 0 && (isspace(ragel->buf[off]) || !ragel->buf[off]); --off) { - if (lineno > 0 && ragel->buf[off] == '\n') - --lineno; - } - - for (ls = off; ls > 0 && ragel->buf[ls] != '\n'; --ls); // beginning of line - for (le = off; le < end && ragel->buf[le] != '\n'; ++le); // end of line - for (; ls < le && isspace(ragel->buf[ls]); ++ls); // strip leading whitespace - for (ws = off; ws > ls && isspace(ragel->buf[ws]); --ws); // rewind to first non-space - for (; ws > 0 && ws > ls && !isspace(ragel->buf[ws - 1]); --ws); // find word start - for (we = ws; we < le && !isspace(ragel->buf[we]); ++we); // find word ending - - assert(we >= ws && ws >= ls && le >= ls && le >= we); - *out_lineno = lineno; - *out_ls = ls; - *out_le = le; - *out_ws = ws; - *out_we = we; -} - -__attribute__((format(printf, 2, 3))) -static inline void -ragel_throw_error(const struct ragel *ragel, const char *fmt, ...) -{ - assert(ragel && fmt); - - size_t lineno, ls, le, ws, we; - ragel_get_current_line(ragel, &lineno, &ls, &le, &ws, &we); - assert(le - ls <= INT_MAX && ws - ls <= INT_MAX); - - char msg[255]; - va_list args; - va_start(args, fmt); - vsnprintf(msg, sizeof(msg), fmt, args); - va_end(args); - - const int indent = 8; - const size_t mark = (we - ws ? we - ws : 1), cur = (ragel->p - ragel->buf) - ws; - warnx("\x1b[37m%zu: \x1b[31merror: \x1b[0m%s\n%*s%.*s", lineno, msg, indent, "", (int)(le - ls), ragel->buf + ls); - fprintf(stderr, "%*s%*s\x1b[31m", indent, "", (int)(ws - ls), ""); - for (size_t i = 0; i < mark; ++i) fputs((i == cur ? "^" : "~"), stderr); - fputs("\x1b[0m\n", stderr); - - exit(EXIT_FAILURE); -} - -static inline void -ragel_bounds_check_data(const struct ragel *ragel, const size_t nmemb) -{ - assert(ragel); - - if (ragel->mem.size < nmemb || ragel->mem.written >= ragel->mem.size - nmemb) - ragel_throw_error(ragel, "data storage limit exceeded: %zu bytes exceeds the maximum store size of %zu bytes", ragel->mem.written, ragel->mem.size); -} - -static inline void -ragel_replace_data(struct ragel *ragel, const size_t nmemb, char replacement) -{ - assert(ragel); - - if (ragel->mem.written < nmemb) - ragel_throw_error(ragel, "parse error: received escape conversion with mem.written of %zu, expected >= %zu", ragel->mem.written, nmemb); - - ragel->mem.data[(ragel->mem.written -= nmemb)] = replacement; - ragel->mem.data[++ragel->mem.written] = 0; -} - -static inline void -ragel_convert_escape(struct ragel *ragel) -{ - assert(ragel); - - if (ragel->mem.written < 2) - ragel_throw_error(ragel, "parse error: received escape conversion with mem.written of %zu, expected >= 2", ragel->mem.written); - - const struct { - const char *e; - const char v, b; - } map[] = { - { .e = "\\a", .v = '\a' }, - { .e = "\\b", .v = '\b' }, - { .e = "\\f", .v = '\f' }, - { .e = "\\n", .v = '\n' }, - { .e = "\\r", .v = '\r' }, - { .e = "\\t", .v = '\t' }, - { .e = "\\v", .v = '\v' }, - { .e = "\\\\", .v = '\\' }, - { .e = "\\'", .v = '\'' }, - { .e = "\\\"", .v = '"' }, - { .e = "\\e", .v = '\e' }, - { .e = "\\x", .b = 16 }, - { .e = "\\", .b = 8 }, - }; - -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) - const char *cur = (char*)ragel->mem.cur; - const size_t cur_sz = strlen(cur); - for (size_t i = 0; i < ARRAY_SIZE(map); ++i) { - if (!strncmp(cur, map[i].e, strlen(map[i].e))) { - const char v = (!map[i].b ? map[i].v : strtol(cur + strlen(map[i].e), NULL, map[i].b)); - assert((map[i].b == 8 && cur_sz >= 2) || (map[i].b == 16 && cur_sz >= 2) || (map[i].b == 0 && cur_sz == 2)); - assert(map[i].b != 8 || isdigit(cur[1])); - ragel_replace_data(ragel, cur_sz, v); - return; - } - } -#undef ARRAY_SIZE - - ragel_throw_error(ragel, "parse error: received unknown escape conversion"); -} - -static inline void -ragel_dump_data(struct ragel *ragel, const size_t offset) -{ - const uint8_t *end = ragel->mem.data + ragel->mem.written; - for (const uint8_t *p = ragel->mem.data + offset; p && p < end; p = (uint8_t*)memchr(p, 0, end - p), p += !!p) - printf("%s\n", p); -} - -static inline const uint8_t* -ragel_search_data(const struct ragel *ragel, const size_t offset, const uint8_t *data, const size_t size) -{ - assert(ragel && data); - - const uint8_t *end = ragel->mem.data + ragel->mem.written; - for (const uint8_t *p = ragel->mem.data + offset; p && p < end && (size_t)(end - p) >= size; p = (uint8_t*)memchr(p, 0, end - p), p += !!p) { - if (!memcmp(data, p, size)) - return p; - } - - return NULL; -} - -static inline const uint8_t* -ragel_search_str(const struct ragel *ragel, const size_t offset, const char *str) -{ - return ragel_search_data(ragel, offset, (const uint8_t*)str, strlen(str) + 1); -} - -static inline void -ragel_remove_last_data(struct ragel *ragel) -{ - assert(ragel); - const uint8_t *end = ragel->mem.data + ragel->mem.written; - const size_t size = end - ragel->mem.cur + 1; - assert(ragel->mem.written >= size); - ragel->mem.written -= size; - ragel->mem.data[ragel->mem.written] = 0; -} - -static inline void -ragel_finish_data(struct ragel *ragel) -{ - assert(ragel); - - const uint8_t *end = ragel->mem.data + ragel->mem.written, *p; - if ((p = ragel_search_data(ragel, 0, ragel->mem.cur, end - ragel->mem.cur + 1))) { - ragel_remove_last_data(ragel); - ragel->mem.cur = p; - } -} - -static inline void -ragel_store_data(struct ragel *ragel) -{ - ragel_bounds_check_data(ragel, 1); - ragel->mem.data[ragel->mem.written++] = *ragel->p; - ragel->mem.data[ragel->mem.written] = 0; -} - -static inline void -ragel_begin_data(struct ragel *ragel) -{ - ragel_bounds_check_data(ragel, 1); - ragel->mem.written += (ragel->mem.written > 0); - ragel->mem.cur = ragel->mem.data + ragel->mem.written; -} - -static inline void -ragel_advance_line(struct ragel *ragel) -{ - assert(ragel); - ++ragel->lineno; -} +__attribute__((format(printf, 2, 3))) void +ragel_throw_error(struct ragel *ragel, const char *fmt, ...); -static inline bool -ragel_confirm_input(struct ragel *ragel, const size_t bytes) -{ - assert(ragel); +void +ragel_set_name(struct ragel *ragel, const char *name); - if (bytes > sizeof(ragel->buf)) - errx(EXIT_FAILURE, "%s: gave larger buffer than %zu", __func__, sizeof(ragel->buf)); +void +ragel_advance_line(struct ragel *ragel); - const bool in_eof = (bytes < sizeof(ragel->buf)); - ragel->p = ragel->buf; - ragel->pe = ragel->p + bytes; - ragel->eof = (in_eof ? ragel->pe : NULL); - return !in_eof; -} +void +ragel_feed_input(struct ragel *ragel, const bool eof, const struct ragel_mem *input); diff --git a/src/ragel/ragel.rl b/src/ragel/ragel.rl new file mode 100644 index 0000000..48c4229 --- /dev/null +++ b/src/ragel/ragel.rl @@ -0,0 +1,88 @@ +#include "ragel.h" +#include <inttypes.h> +#include <stdio.h> +#include <stdarg.h> +#include <assert.h> + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + +%%{ + machine ragel; + write data noerror nofinal; + + action red { fputs("\x1b[31m", stderr); } + action reset { fputs("\x1b[0m", stderr); } + action end { fputs("\x1b[0m\n", stderr); } + action mark { fputc('^', stderr); } + action tail { fputc('~', stderr); } + action lead { fputc(' ', stderr); } + + word = alnum*; + token = ' ' | punct; + until_err = (any when { fpc != *error })*; + search_err := ((any | token %{ *error = fpc; }) when { fpc != ragel->p })*; + print_err := (until_err %red <: word %reset <: (any - '\n')*) ${ fputc(fc, stderr); } >lead %!end %/end; + print_mark := (until_err ${ fputc(' ', stderr); } %red %mark <: any word $tail) >lead %!end %/end; +}%% + +static void +ragel_exec_error(const struct ragel *ragel, const int start_cs, const char **error) +{ + (void)ragel_start; + assert(ragel && ragel->cl && error); + int cs = start_cs; + const char *p = ragel->cl, *pe = ragel->pe, *eof = ragel->eof; + %% write exec; +} + +void +ragel_throw_error(struct ragel *ragel, const char *fmt, ...) +{ + assert(ragel && fmt); + ragel->error = true; + + const char *error = ragel->p; + + if (!ragel->input.binary) + ragel_exec_error(ragel, ragel_en_search_err, &error); + + const char *name = (ragel->name ? ragel->name : ""); + uint64_t column = (error - ragel->cl); + fprintf(stderr, "\x1b[37m%s:%" PRIu64 ":%" PRIu64 " \x1b[31merror: \x1b[0m", name, ragel->lineno, column); + + va_list args; + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + fputc('\n', stderr); + + if (!ragel->input.binary) { + ragel_exec_error(ragel, ragel_en_print_err, &error); + ragel_exec_error(ragel, ragel_en_print_mark, &error); + } +} + +void +ragel_set_name(struct ragel *ragel, const char *name) +{ + assert(ragel); + ragel->name = name; +} + +void +ragel_advance_line(struct ragel *ragel) +{ + assert(ragel); + ++ragel->lineno; + ragel->cl = ragel->p; +} + +void +ragel_feed_input(struct ragel *ragel, const bool eof, const struct ragel_mem *input) +{ + assert(ragel); + ragel->input = *input; + ragel->cl = ragel->p = ragel->input.data; + ragel->pe = ragel->input.end; + ragel->eof = (eof ? ragel->pe : NULL); +} diff --git a/vim/filespec.vim b/vim/filespec.vim index 5e43fc5..077f41c 100644 --- a/vim/filespec.vim +++ b/vim/filespec.vim @@ -12,7 +12,7 @@ syn region fsComment start="//" skip="\\$" end="$" keepend contains=@fsCommentGr syn keyword fsStructure struct union syn keyword fsType s8 s16 s32 s64 syn keyword fsType u8 u16 u32 u64 -syn keyword fsKind ascii utf8 sjis pad hex +syn keyword fsConstant nul dec hex str syn case ignore syn match fsNumbers display transparent "\<\d\|\.\d" contains=fsNumber,fsFloat,fsOctalError,fsOctal @@ -29,12 +29,12 @@ syn match fsOctalError display contained "0\o*[89]\d*" syn case match syn match fsSpecial display contained "\\\(x\x\+\|\o\{1,3}\|.\|$\)" -syn region fsString start=+"+ skip=+\\\\\|\\"+ end=+"+ contains=fsSpecial,@Spell extend -syn match fsCharacter "'[^']*'" contains=fsSpecial +syn match fsString1 "'[^']*'" contains=fsSpecial +syn match fsString2 '"[^"]*"' contains=fsSpecial syn match fsBlock "[{}]" syn match fsBracket "[\[\]]" -syn match fsOperator display "[-+&|<>=!*\/~.,;:%&^?()]" contains=fsComment,fsKind +syn match fsOperator display "[-+&|<>=!*\/~.,;:%&^?()]" contains=fsComment " Define the default highlighting. " Only used when an item doesn't have highlighting yet @@ -42,14 +42,14 @@ hi def link fsTodo Todo hi def link fsComment Comment hi def link fsStructure Structure hi def link fsType Type -hi def link fsKind Constant +hi def link fsConstant Constant hi def link fsNumber Number hi def link fsOctal Number hi def link fsOctalZero PreProc hi def link fsFloat Float hi def link fsOctalError Error -hi def link fsString Constant -hi def link fsCharacter Character +hi def link fsString1 Character +hi def link fsString2 Character hi def link fsSpecial SpecialChar hi def link fsBlock Constant hi def link fsBracket Constant |