summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJari Vetoniemi <mailroxas@gmail.com>2017-04-20 16:49:35 +0300
committerJari Vetoniemi <mailroxas@gmail.com>2017-05-01 22:58:22 +0300
commit29086b1d12a2c28cffdbfbf0b3990a7bd75506b9 (patch)
tree8acd48bc30932812744c0adb102d7a7add494357 /src
parent76b8c9e03c97b16d9ff97f3b79c0ecbff0f5e7f2 (diff)
work in progress
Diffstat (limited to 'src')
-rw-r--r--src/bin/fw/uneaf.c208
-rw-r--r--src/bin/misc/dec2bin.c (renamed from src/utils/dec2bin.c)2
-rw-r--r--src/bin/xi/xi2path.c (renamed from src/xi/xi2path.c)2
-rw-r--r--src/bin/xi/xi2path.h (renamed from src/xi/xi2path.h)0
-rw-r--r--src/bin/xi/xidec.c (renamed from src/xi/xidec.c)2
-rw-r--r--src/bin/xi/xifile.c (renamed from src/xi/xifile.c)3
-rw-r--r--src/bin/xi/xils.c (renamed from src/xi/xils.c)2
-rw-r--r--src/dump.c834
-rw-r--r--src/fspec/bcode-internal.h16
-rw-r--r--src/fspec/bcode.c189
-rw-r--r--src/fspec/bcode.h64
-rw-r--r--src/fspec/lexer.h17
-rw-r--r--src/fspec/lexer.rl616
-rw-r--r--src/fspec/memory.h8
-rw-r--r--src/fspec/validator.h17
-rw-r--r--src/fspec/validator.rl237
-rw-r--r--src/ragel/fspec.h77
-rw-r--r--src/ragel/fspec.rl329
-rw-r--r--src/ragel/ragel.h244
-rw-r--r--src/ragel/ragel.rl88
20 files changed, 2150 insertions, 805 deletions
diff --git a/src/bin/fw/uneaf.c b/src/bin/fw/uneaf.c
new file mode 100644
index 0000000..f23c6d6
--- /dev/null
+++ b/src/bin/fw/uneaf.c
@@ -0,0 +1,208 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+#include <err.h>
+#include <sys/stat.h>
+#include <zlib.h>
+
+static const char *stdin_name = "/dev/stdin";
+
+int ZEXPORT uncompress2 (dest, destLen, source, sourceLen)
+ Bytef *dest;
+ uLongf *destLen;
+ const Bytef *source;
+ uLong *sourceLen;
+{
+ z_stream stream;
+ int err;
+ const uInt max = (uInt)-1;
+ uLong len, left;
+ Byte buf[1]; /* for detection of incomplete stream when *destLen == 0 */
+
+ len = *sourceLen;
+ if (*destLen) {
+ left = *destLen;
+ *destLen = 0;
+ }
+ else {
+ left = 1;
+ dest = buf;
+ }
+
+ stream.next_in = (z_const Bytef *)source;
+ stream.avail_in = 0;
+ stream.zalloc = (alloc_func)0;
+ stream.zfree = (free_func)0;
+ stream.opaque = (voidpf)0;
+
+ err = inflateInit2(&stream, -15);
+ if (err != Z_OK) return err;
+
+ stream.next_out = dest;
+ stream.avail_out = 0;
+
+ do {
+ if (stream.avail_out == 0) {
+ stream.avail_out = left > (uLong)max ? max : (uInt)left;
+ left -= stream.avail_out;
+ }
+ if (stream.avail_in == 0) {
+ stream.avail_in = len > (uLong)max ? max : (uInt)len;
+ len -= stream.avail_in;
+ }
+ err = inflate(&stream, Z_NO_FLUSH);
+ } while (err == Z_OK);
+
+ *sourceLen -= len + stream.avail_in;
+ if (dest != buf)
+ *destLen = stream.total_out;
+ else if (stream.total_out && err == Z_BUF_ERROR)
+ left = 1;
+
+ inflateEnd(&stream);
+ return err == Z_STREAM_END ? Z_OK :
+ err == Z_NEED_DICT ? Z_DATA_ERROR :
+ err == Z_BUF_ERROR && left + stream.avail_out ? Z_DATA_ERROR :
+ err;
+}
+
+static void
+zdeflate(const uint8_t *buf, const size_t buf_sz, uint8_t **out_dec, size_t *inout_dec_sz)
+{
+ uLongf dsize = (*inout_dec_sz ? *inout_dec_sz : buf_sz * 2), bsize;
+ int ret = Z_OK;
+
+ do {
+ if (!(*out_dec = realloc(*out_dec, (bsize = dsize))))
+ err(EXIT_FAILURE, "realloc(%zu)", dsize);
+ dsize *= 2;
+ } while ((ret = uncompress(*out_dec, &bsize, buf, buf_sz)) == Z_BUF_ERROR && !*inout_dec_sz);
+
+ if (ret != Z_OK)
+ errx(EXIT_FAILURE, "uncompress(%zu, %zu) == %d", (size_t)(dsize / 2), buf_sz, ret);
+
+ *inout_dec_sz = bsize;
+}
+
+static FILE*
+fopen_or_die(const char *path, const char *mode)
+{
+ assert(path && mode);
+
+ FILE *f;
+ if (!(f = fopen(path, mode)))
+ err(EXIT_FAILURE, "fopen(%s, %s)", path, mode);
+
+ return f;
+}
+
+static void
+mkdirp(const char *path)
+{
+ assert(path);
+ for (const char *s = path; *s; ++s) {
+ if (*s != '/')
+ continue;
+
+ *(char*)s = 0;
+ mkdir(path, 0755);
+ *(char*)s = '/';
+ }
+}
+
+static void
+write_data_to(const uint8_t *data, const size_t size, const char *path)
+{
+ assert(data && path);
+ mkdirp(path);
+ FILE *f = fopen_or_die(path, "wb");
+
+ struct header {
+ uint8_t magic[4];
+ uint32_t unknown;
+ uint32_t size;
+ uint32_t offset;
+ } __attribute__((packed)) header;
+
+ memcpy(&header, data, sizeof(header));
+ warnx("%s", path);
+
+ if (!memcmp(header.magic, "#EMZ", sizeof(header.magic))) {
+ uint8_t *buf = NULL;
+ size_t dec_size = header.size;
+ zdeflate(data + header.offset, size - header.offset, &buf, &dec_size);
+ fwrite(buf, 1, dec_size, f);
+ free(buf);
+ } else {
+ fwrite(data, 1, size, f);
+ }
+
+ fclose(f);
+}
+
+static void
+unpack(const char *path, const char *outdir)
+{
+ assert(path);
+ const char *name = (!strcmp(path, "-") ? stdin_name : path);
+ FILE *f = (name == stdin_name ? stdin : fopen_or_die(name, "rb"));
+
+ struct header {
+ uint8_t magic[4];
+ uint16_t major, minor;
+ uint64_t size;
+ uint32_t count;
+ uint64_t unknown;
+ uint8_t padding[100];
+ } __attribute__((packed)) header;
+
+ if (fread(&header, 1, sizeof(header), f) != sizeof(header))
+ err(EXIT_FAILURE, "fread(%zu)", sizeof(header));
+
+ if (memcmp(header.magic, "#EAF", sizeof(header.magic)))
+ errx(EXIT_FAILURE, "'%s' is not a #EAF file", name);
+
+ for (size_t i = 0; i < header.count; ++i) {
+ struct file {
+ char path[256];
+ uint64_t offset, size;
+ uint8_t padding[16];
+ } __attribute__((packed)) file;
+
+ if (fread(&file, 1, sizeof(file), f) != sizeof(file))
+ err(EXIT_FAILURE, "fread(%zu)", sizeof(file));
+
+ fpos_t pos;
+ fgetpos(f, &pos);
+
+ uint8_t *data;
+ if (!(data = malloc(file.size)))
+ err(EXIT_FAILURE, "malloc(%zu)", file.size);
+
+ fseek(f, file.offset, SEEK_SET);
+ if (fread(data, 1, file.size, f) != file.size)
+ err(EXIT_FAILURE, "fread(%zu)", file.size);
+
+ char path[4096];
+ snprintf(path, sizeof(path), "%s/%s", outdir, file.path);
+ write_data_to(data, file.size, path);
+ free(data);
+ fsetpos(f, &pos);
+ }
+
+ fclose(f);
+}
+
+int
+main(int argc, char *argv[])
+{
+ if (argc < 3)
+ errx(EXIT_FAILURE, "usage: %s outdir file ...", argv[0]);
+
+ for (int i = 2; i < argc; ++i)
+ unpack(argv[i], argv[1]);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/utils/dec2bin.c b/src/bin/misc/dec2bin.c
index 97e59bb..be1dd5e 100644
--- a/src/utils/dec2bin.c
+++ b/src/bin/misc/dec2bin.c
@@ -8,7 +8,7 @@ int
main(int argc, char *argv[])
{
if (argc < 3)
- errx(EXIT_FAILURE, "usage: %s <u8|u16|u32|u64> number\n", argv[0]);
+ errx(EXIT_FAILURE, "usage: %s <u8|u16|u32|u64> number", argv[0]);
const struct {
const char *t;
diff --git a/src/xi/xi2path.c b/src/bin/xi/xi2path.c
index bd9c702..4b4c519 100644
--- a/src/xi/xi2path.c
+++ b/src/bin/xi/xi2path.c
@@ -7,7 +7,7 @@ int
main(int argc, char *argv[])
{
if (argc < 2)
- errx(EXIT_FAILURE, "usage: %s id\n", argv[0]);
+ errx(EXIT_FAILURE, "usage: %s id", argv[0]);
char path[12];
xi2path(path, strtol(argv[1], NULL, 10));
diff --git a/src/xi/xi2path.h b/src/bin/xi/xi2path.h
index 954c554..954c554 100644
--- a/src/xi/xi2path.h
+++ b/src/bin/xi/xi2path.h
diff --git a/src/xi/xidec.c b/src/bin/xi/xidec.c
index 3df917f..cb4c1bb 100644
--- a/src/xi/xidec.c
+++ b/src/bin/xi/xidec.c
@@ -93,7 +93,7 @@ int
main(int argc, char *argv[])
{
if (argc < 2)
- errx(EXIT_FAILURE, "usage: %s (name | ability | spell | item | text) < data\n", argv[0]);
+ errx(EXIT_FAILURE, "usage: %s (name | ability | spell | item | text) < data", argv[0]);
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
diff --git a/src/xi/xifile.c b/src/bin/xi/xifile.c
index f1b2111..fb7f2f7 100644
--- a/src/xi/xifile.c
+++ b/src/bin/xi/xifile.c
@@ -153,6 +153,7 @@ detect(const char *path)
printf("%s: unknown\n", name);
}
}
+
fclose(f);
}
@@ -160,7 +161,7 @@ int
main(int argc, char *argv[])
{
if (argc < 2)
- errx(EXIT_FAILURE, "usage: %s file\n", argv[0]);
+ errx(EXIT_FAILURE, "usage: %s file ...", argv[0]);
for (int i = 1; i < argc; ++i)
detect(argv[i]);
diff --git a/src/xi/xils.c b/src/bin/xi/xils.c
index 9c9a75e..b29b54b 100644
--- a/src/xi/xils.c
+++ b/src/bin/xi/xils.c
@@ -78,7 +78,7 @@ main(int argc, char *argv[])
}
if (!gamedir)
- errx(EXIT_FAILURE, "usage: %s [-a|-v] gamedir\n", argv[0]);
+ errx(EXIT_FAILURE, "usage: %s [-a|-v] gamedir", argv[0]);
dump_tables(gamedir, (const char*[]){ "FTABLE.DAT", "VTABLE.DAT" }, 1, print_all, verbose);
diff --git a/src/dump.c b/src/dump.c
index 641bb55..8ff1466 100644
--- a/src/dump.c
+++ b/src/dump.c
@@ -6,13 +6,19 @@
#include <err.h>
#include <iconv.h>
+#include <errno.h>
#include <locale.h>
#include <langinfo.h>
+#include <squash.h>
-#include "ragel/fspec.h"
+#include <fspec/bcode.h>
+#include <fspec/lexer.h>
+#include <fspec/validator.h>
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
static size_t
-to_hex(const char *buf, const size_t buf_sz, char *out, const size_t out_sz, const bool reverse)
+to_hex(const uint8_t *buf, const size_t buf_sz, char *out, const size_t out_sz, const bool reverse)
{
assert(out);
const char nibble[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
@@ -35,234 +41,688 @@ to_hex(const char *buf, const size_t buf_sz, char *out, const size_t out_sz, con
}
static void
-print_decimal(const char *buf, const bool is_signed, const size_t size, const size_t nmemb)
+print_dec(const uint8_t *buf, const size_t size, const bool is_signed)
{
- if (nmemb > 1)
- printf("{ ");
+ char hex[2 * sizeof(fspec_num) + 1];
+ to_hex(buf, size, hex, sizeof(hex), true);
- for (size_t n = 0; n < nmemb; ++n) {
- char hex[2 * sizeof(uint64_t) + 1];
- to_hex(buf + size * n, size, hex, sizeof(hex), true);
- const char *delim = (nmemb > 1 && n + 1 < nmemb ? ", " : "");
-
- if (is_signed) {
- printf("%ld%s", (int64_t)strtoll(hex, NULL, 16), delim);
- } else {
- printf("%lu%s", (uint64_t)strtoull(hex, NULL, 16), delim);
- }
+ static_assert(sizeof(fspec_num) <= sizeof(uint64_t), "fspec_num is larger than uint64_t");
+
+ if (is_signed) {
+ printf("%ld", (int64_t)strtoll(hex, NULL, 16));
+ } else {
+ printf("%lu", (uint64_t)strtoull(hex, NULL, 16));
}
+}
+
+static void
+print_udec(const uint8_t *buf, const size_t size)
+{
+ print_dec(buf, size, false);
+}
+
+static void
+print_sdec(const uint8_t *buf, const size_t size)
+{
+ print_dec(buf, size, true);
+}
- printf("%s\n", (nmemb > 1 ? " }" : ""));
+static void
+print_hex(const uint8_t *buf, const size_t size)
+{
+ char hex[2 * sizeof(fspec_num) + 1];
+ to_hex(buf, size, hex, sizeof(hex), false);
+ printf("%s", hex);
}
static void
-print_hex(const char *buf, const size_t size, const size_t nmemb)
+print_array(const uint8_t *buf, const size_t size, const size_t nmemb, void (*fun)(const uint8_t *buf, const size_t size))
{
- if (nmemb > 1)
+ const int indent = 4;
+ if (nmemb > 8) {
+ printf("{\n%*s", indent, "");
+ } else if (nmemb > 1) {
printf("{ ");
+ }
for (size_t n = 0; n < nmemb; ++n) {
- char hex[2 * sizeof(uint64_t) + 1];
- to_hex(buf + size * n, size, hex, sizeof(hex), false);
- printf("%s%s", hex, (nmemb > 1 && n + 1 < nmemb ? ", " : ""));
+ fun(buf + n * size, size);
+ printf("%s", (nmemb > 1 && n + 1 < nmemb ? ", " : ""));
+
+ if (!((n + 1) % 8))
+ printf("\n%*s", indent, "");
}
- printf("%s\n", (nmemb > 1 ? " }" : ""));
+ printf("%s\n", (nmemb > 8 ? "\n}" : (nmemb > 1 ? " }" : "")));
}
static void
-print_chars(const char *buf, const size_t size, const size_t nmemb)
+print_str(const char *buf, const size_t size, const size_t nmemb)
{
- assert(size == sizeof(char));
+ const bool has_nl = memchr(buf, '\n', size * nmemb);
+ if (has_nl)
+ puts("```");
- for (size_t n = 0; n < nmemb && buf[n] != 0; ++n)
+ for (size_t n = 0; n < size * nmemb && buf[n] != 0; ++n)
printf("%c", buf[n]);
+
+ puts((has_nl ? "```" : ""));
}
+struct code {
+ const enum fspec_op *start, *end, *data;
+};
+
static void
-print_encoded(const char *buf, const char *from, const char *to, const size_t size, const size_t nmemb)
+dump_ops(const struct code *code)
{
- assert(from && size == sizeof(char));
+ for (const enum fspec_op *op = code->start; op; op = fspec_op_next(op, code->end, false)) {
+ printf("%*s- ", (*op == FSPEC_OP_ARG ? 2 : 0), "");
+ switch (*op) {
+ case FSPEC_OP_HEADER:
+ printf("header\n");
+ break;
+
+ case FSPEC_OP_DECLARATION:
+ printf("declaration\n");
+ break;
+
+ case FSPEC_OP_READ:
+ printf("read\n");
+ break;
+
+ case FSPEC_OP_GOTO:
+ printf("goto\n");
+ break;
+
+ case FSPEC_OP_FILTER:
+ printf("filter\n");
+ break;
+
+ case FSPEC_OP_VISUAL:
+ printf("visual\n");
+ break;
+
+ case FSPEC_OP_ARG:
+ {
+ const enum fspec_arg *arg = (void*)(op + 1);
+ printf("arg ");
+ switch (*arg) {
+ case FSPEC_ARG_STR:
+ printf("str %s\n", fspec_arg_get_cstr(arg, code->data));
+ break;
+
+ case FSPEC_ARG_VAR:
+ printf("var %" PRI_FSPEC_NUM "\n", fspec_arg_get_num(arg));
+ break;
+
+ case FSPEC_ARG_NUM:
+ printf("num %" PRI_FSPEC_NUM "\n", fspec_arg_get_num(arg));
+ break;
+
+ case FSPEC_ARG_OFF:
+ printf("off %" PRI_FSPEC_NUM "\n", fspec_arg_get_num(arg));
+ break;
+
+ case FSPEC_ARG_DAT:
+ printf("dat %" PRI_FSPEC_NUM "\n", fspec_arg_get_num(arg));
+ break;
+
+ case FSPEC_ARG_EOF:
+ printf("eof\n");
+ break;
+
+ case FSPEC_ARG_LAST:
+ break;
+ }
+ }
+ break;
- if (!to) {
- static const char *sys_encoding;
- if (!sys_encoding) {
- setlocale(LC_ALL, "");
- sys_encoding = nl_langinfo(CODESET);
+ case FSPEC_OP_LAST:
+ break;
}
+ }
+}
- to = sys_encoding;
+static const enum fspec_op*
+get_last_struct(const struct code *code)
+{
+ const enum fspec_op *last = NULL;
+ for (const enum fspec_op *op = code->start; op; op = fspec_op_next(op, code->end, true)) {
+ const enum fspec_arg *arg;
+ if (*op == FSPEC_OP_DECLARATION &&
+ (arg = fspec_op_get_arg(op, code->end, 1, 1<<FSPEC_ARG_NUM)) &&
+ fspec_arg_get_num(arg) == FSPEC_DECLARATION_STRUCT) {
+ last = op;
+ }
}
+ return last;
+}
- iconv_t iv;
- if ((iv = iconv_open(to, from)) == (iconv_t)-1)
- err(EXIT_FAILURE, "iconv_open(%s, %s)", to, from);
+struct dynbuf {
+ void *data;
+ size_t len, written;
+};
- const char *in = buf;
- size_t in_left = nmemb;
- do {
- char enc[1024], *out = enc;
- size_t out_left = sizeof(enc);
+static inline void
+dynbuf_resize(struct dynbuf *buf, const size_t size)
+{
+ assert(buf);
+ if (!(buf->data = realloc(buf->data, size)))
+ err(EXIT_FAILURE, "realloc(%zu)", size);
- if (iconv(iv, (char**)&in, &in_left, &out, &out_left) == (size_t)-1)
- err(EXIT_FAILURE, "iconv(%s, %s)", to, from);
+ buf->len = size;
+}
- print_chars(enc, 1, sizeof(enc) - out_left);
- } while (in_left > 0);
+static inline void
+dynbuf_resize_if_needed(struct dynbuf *buf, const size_t size)
+{
+ if (buf->len >= size)
+ return;
- iconv_close(iv);
- puts("");
+ dynbuf_resize(buf, size);
}
-struct container;
-struct field {
- struct fspec_field f;
- struct container *c, *link;
- uint64_t value;
-};
+static inline void
+dynbuf_grow_if_needed(struct dynbuf *buf, const size_t nmemb)
+{
+ assert(buf);
+ if (buf->len >= nmemb && buf->written <= buf->len - nmemb)
+ return;
-struct container {
- struct fspec_container c;
- struct field fields[255];
- size_t num_fields;
-};
+ dynbuf_resize(buf, buf->written + nmemb);
+}
-static size_t
-field_get_buffer(const struct field *field, FILE *f, char **buf)
+static inline void
+dynbuf_append(struct dynbuf *buf, const void *data, const size_t data_sz)
{
- assert(field && f && buf);
-
- switch (field->f.array.type) {
- case FSPEC_ARRAY_FIXED:
- if (!(*buf = calloc(field->f.array.nmemb, field->f.type.size)))
- err(EXIT_FAILURE, "calloc(%zu, %zu)", field->f.array.nmemb, field->f.type.size);
+ dynbuf_grow_if_needed(buf, data_sz);
+ memcpy((char*)buf->data + buf->written, data, data_sz);
+ buf->written += data_sz;
+ assert(buf->written <= buf->len);
+}
- if (fread(*buf, field->f.type.size, field->f.array.nmemb, f) != field->f.array.nmemb)
- return 0;
+static inline void
+dynbuf_reset(struct dynbuf *buf)
+{
+ assert(buf);
+ buf->written = 0;
+}
- return field->f.array.nmemb;
+static inline void
+dynbuf_release(struct dynbuf *buf)
+{
+ assert(buf);
+ free(buf->data);
+ *buf = (struct dynbuf){0};
+}
- case FSPEC_ARRAY_MATCH:
- {
- size_t off = 0;
- const size_t msz = field->f.array.match.size;
- for (size_t len = 0;; ++off) {
- if (off >= (len ? len - 1 : len) && !(*buf = realloc(*buf, (len += 1024))))
- err(EXIT_FAILURE, "realloc(%zu)", len);
+static void
+display(const void *buf, const size_t size, const size_t nmemb, const bool is_signed, const enum fspec_visual visual)
+{
+ switch (visual) {
+ case FSPEC_VISUAL_NUL:
+ puts("...");
+ break;
- assert(off < len);
- if (fread(*buf + off, 1, 1, f) != 1)
- return 0;
+ case FSPEC_VISUAL_STR:
+ print_str(buf, size, nmemb);
+ break;
- if (off >= msz && !memcmp(field->f.array.match.data, *buf + off - msz, msz))
- break;
- }
+ case FSPEC_VISUAL_HEX:
+ print_array(buf, size, nmemb, print_hex);
+ break;
- (*buf)[off] = 0;
- return off;
- }
+ case FSPEC_VISUAL_DEC:
+ print_array(buf, size, nmemb, (is_signed ? print_sdec : print_udec));
break;
- case FSPEC_ARRAY_VAR:
- for (size_t i = 0; i < field->c->num_fields; ++i) {
- if (!strcmp(field->c->fields[i].f.name, field->f.array.var))
- return field->c->fields[i].value;
- }
+ case FSPEC_VISUAL_LAST:
break;
}
-
- return 0;
}
+struct decl {
+ struct dynbuf buf;
+ const char *name;
+ const void *start, *end;
+ size_t nmemb;
+ uint8_t size;
+ enum fspec_visual visual;
+ enum fspec_declaration declaration;
+};
+
static void
-container_process(struct container *container, FILE *f);
+decl_display(const struct decl *decl)
+{
+ assert(decl);
+ assert(decl->size * decl->nmemb <= decl->buf.len);
+ printf("%s: ", decl->name);
+ display(decl->buf.data, decl->size, decl->nmemb, false, decl->visual);
+}
+
+static fspec_num
+decl_get_num(const struct decl *decl)
+{
+ assert(decl);
+ assert(decl->nmemb == 1);
+ assert(decl->size * decl->nmemb <= decl->buf.len);
+ char hex[2 * sizeof(fspec_num) + 1];
+ to_hex(decl->buf.data, decl->size, hex, sizeof(hex), true);
+ static_assert(sizeof(fspec_num) <= sizeof(uint64_t), "fspec_num is larger than uint64_t");
+ return (fspec_num)strtoull(hex, NULL, 16);
+}
+
+static const char*
+decl_get_cstr(const struct decl *decl)
+{
+ assert(decl);
+ return decl->buf.data;
+}
+
+struct context {
+ struct code code;
+ struct decl *decl;
+ fspec_num decl_count;
+};
+
+static fspec_num
+var_get_num(const struct context *context, const enum fspec_arg *arg)
+{
+ assert(context && arg);
+ return decl_get_num(&context->decl[fspec_arg_get_num(arg)]);
+}
+
+static const char*
+var_get_cstr(const struct context *context, const enum fspec_arg *arg)
+{
+ assert(context && arg);
+ return decl_get_cstr(&context->decl[fspec_arg_get_num(arg)]);
+}
+
+enum type {
+ TYPE_NUM,
+ TYPE_STR,
+};
+
+static enum type
+var_get_type(const struct context *context, const enum fspec_arg *arg)
+{
+ assert(context && arg);
+ const struct decl *decl = &context->decl[fspec_arg_get_num(arg)];
+ switch (decl->visual) {
+ case FSPEC_VISUAL_DEC:
+ case FSPEC_VISUAL_HEX:
+ case FSPEC_VISUAL_NUL:
+ return TYPE_NUM;
+
+ case FSPEC_VISUAL_STR:
+ return TYPE_STR;
+
+ case FSPEC_VISUAL_LAST:
+ break;
+ }
+ return ~0;
+}
static void
-field_process(struct field *field, FILE *f)
+filter_decompress(const struct context *context, struct decl *decl)
{
- assert(field && f);
+ assert(decl);
- char *buf = NULL;
- const size_t nmemb = field_get_buffer(field, f, &buf);
+ const enum fspec_arg *arg;
+ if (!(arg = fspec_op_get_arg(context->code.start, context->code.end, 2, 1<<FSPEC_ARG_STR)))
+ errx(EXIT_FAILURE, "missing compression");
- if (field->link) {
- for (size_t i = 0; i < nmemb; ++i)
- container_process(field->link, f);
- } else {
- printf("%s(%zu) %s[%zu] = ", field->f.type.name, field->f.type.size, field->f.name, nmemb);
+ SquashCodec *codec;
+ const char *algo = fspec_arg_get_cstr(arg, context->code.data);
+ if (!(codec = squash_get_codec(algo)))
+ errx(EXIT_FAILURE, "unknown compression '%s'", algo);
- if (field->f.kind.flags & FSPEC_KIND_IGNORE) {
- puts("...");
- } else if (field->f.kind.flags & FSPEC_KIND_ENCODING) {
- print_encoded(buf, field->f.kind.name, NULL, field->f.type.size, nmemb);
- } else if (field->f.kind.flags & FSPEC_KIND_HEXADECIMAL) {
- print_hex(buf, field->f.type.size, nmemb);
- } else {
- print_decimal(buf, (field->f.type.flags & FSPEC_TYPE_SIGNED), field->f.type.size, nmemb);
+ SquashOptions *opts;
+ if (!(opts = squash_options_new(codec, NULL)))
+ errx(EXIT_FAILURE, "squash_options_new");
+
+ size_t dsize = squash_codec_get_uncompressed_size(codec, decl->buf.len, decl->buf.data);
+ dsize = (dsize ? dsize : decl->buf.len * 2);
+
+ {
+ const enum fspec_arg *var = arg;
+ if ((arg = fspec_arg_next(arg, context->code.end, 1, 1<<FSPEC_ARG_NUM | 1<<FSPEC_ARG_VAR))) {
+ var = arg;
+
+ switch (*var) {
+ case FSPEC_ARG_NUM:
+ dsize = fspec_arg_get_num(arg);
+ break;
+
+ case FSPEC_ARG_VAR:
+ dsize = var_get_num(context, arg);
+ break;
+
+ default:
+ break;
+ }
}
- if (nmemb == 1) {
- char hex[2 * sizeof(uint64_t) + 1];
- to_hex(buf, field->f.type.size, hex, sizeof(hex), true);
- field->value = strtoull(hex, NULL, 16);
+ for (; (var = fspec_arg_next(var, context->code.end, 1, 1<<FSPEC_ARG_STR));) {
+ const char *key = fspec_arg_get_cstr(var, context->code.data);
+ if (!(var = fspec_arg_next(var, context->code.end, 1, ~0)))
+ errx(EXIT_FAILURE, "expected argument for key '%s'", key);
+
+ switch (*var) {
+ case FSPEC_ARG_STR:
+ squash_options_set_string(opts, key, fspec_arg_get_cstr(var, context->code.data));
+ break;
+
+ case FSPEC_ARG_NUM:
+ squash_options_set_int(opts, key, fspec_arg_get_num(var));
+ break;
+
+ case FSPEC_ARG_VAR:
+ if (var_get_type(context, var) == TYPE_STR) {
+ squash_options_set_string(opts, key, var_get_cstr(context, var));
+ } else {
+ squash_options_set_int(opts, key, var_get_num(context, var));
+ }
+ break;
+
+ default:
+ break;
+ }
}
}
- free(buf);
+ // what a horrible api
+ squash_object_ref(opts);
+
+ SquashStatus r;
+ struct dynbuf buf = {0};
+ dynbuf_resize(&buf, dsize);
+ while ((r = squash_codec_decompress_with_options(codec, &buf.len, buf.data, decl->buf.len, decl->buf.data, opts)) == SQUASH_BUFFER_FULL)
+ dynbuf_resize(&buf, dsize *= 2);
+
+ dynbuf_resize_if_needed(&buf, (buf.written = buf.len));
+ squash_object_unref(opts);
+
+ if (r != SQUASH_OK)
+ errx(EXIT_FAILURE, "squash_codec_decompress(%zu, %zu) = %d: %s", dsize, decl->buf.len, r, squash_status_to_string(r));
+
+ dynbuf_release(&decl->buf);
+ decl->buf = buf;
+ decl->nmemb = buf.len / decl->size;
}
static void
-container_process(struct container *container, FILE *f)
+filter_decode(const struct context *context, struct decl *decl)
{
- assert(container && f);
+ assert(decl);
- for (size_t i = 0; i < container->num_fields; ++i)
- field_process(&container->fields[i], f);
-}
+ const enum fspec_arg *arg;
+ if (!(arg = fspec_op_get_arg(context->code.start, context->code.end, 2, 1<<FSPEC_ARG_STR)))
+ errx(EXIT_FAILURE, "missing encoding");
-#define container_of(ptr, type, member) ((type *)((char *)(1 ? (ptr) : &((type *)0)->member) - offsetof(type, member)))
+ const char *encoding = fspec_arg_get_cstr(arg, context->code.data);
-struct fspec_file {
- // TODO: Rethink container/field
- // I think I want just flat structure of key / value pairs in the end
- // Especially if I want to express members of struct members (e.g. struct a { struct b b; u8 c[b.x]; };)
- struct container containers[32];
- struct fspec fspec;
- FILE *handle;
- size_t num_containers;
-};
+ static const char *sys_encoding;
+ if (!sys_encoding) {
+ setlocale(LC_ALL, "");
+ sys_encoding = nl_langinfo(CODESET);
+ }
+
+ iconv_t iv;
+ if ((iv = iconv_open(sys_encoding, encoding)) == (iconv_t)-1)
+ err(EXIT_FAILURE, "iconv_open(%s, %s)", sys_encoding, encoding);
+
+ struct dynbuf buf = {0};
+ const uint8_t *in = decl->buf.data;
+ size_t in_left = decl->buf.written;
+ do {
+ char enc[1024], *out = enc;
+ size_t out_left = sizeof(enc);
+
+ errno = 0;
+ if (iconv(iv, (char**)&in, &in_left, &out, &out_left) == (size_t)-1 && errno != E2BIG)
+ err(EXIT_FAILURE, "iconv(%s, %s)", sys_encoding, encoding);
+
+ dynbuf_append(&buf, enc, sizeof(enc) - out_left);
+ } while (in_left > 0);
+
+ iconv_close(iv);
+
+ dynbuf_release(&decl->buf);
+ decl->buf = buf;
+ decl->nmemb = buf.len / decl->size;
+}
static void
-fspec_field(struct fspec *fspec, const struct fspec_container *container, const struct fspec_field *field)
+call(const struct context *context, FILE *f)
{
- assert(fspec && container);
- struct fspec_file *f = container_of(fspec, struct fspec_file, fspec);
+ assert(context && f);
- if (!f->num_containers || memcmp(container, &f->containers[f->num_containers - 1].c, sizeof(*container)))
- f->containers[f->num_containers++].c = *container;
+ struct decl *decl = NULL;
+ for (const enum fspec_op *op = context->code.start; op; op = fspec_op_next(op, context->code.end, true)) {
+ if (decl && op == decl->end) {
+ decl_display(decl);
+ decl = NULL;
+ }
- struct container *c = &f->containers[f->num_containers - 1];
+ switch (*op) {
+ case FSPEC_OP_DECLARATION:
+ {
+ const enum fspec_arg *arg;
+ arg = fspec_op_get_arg(op, context->code.end, 2, 1<<FSPEC_ARG_NUM);
+ decl = &context->decl[fspec_arg_get_num(arg)];
+ dynbuf_reset(&decl->buf);
+ }
+ break;
+
+ case FSPEC_OP_READ:
+ {
+ assert(decl);
+ const enum fspec_arg *arg = fspec_op_get_arg(op, context->code.end, 1, 1<<FSPEC_ARG_NUM);
+ static_assert(CHAR_BIT == 8, "doesn't work otherwere right now");
+ decl->size = fspec_arg_get_num(arg) / 8;
+ decl->nmemb = 0;
+
+ for (const enum fspec_arg *var = arg; (var = fspec_arg_next(var, context->code.end, 1, ~0));) {
+ switch (*var) {
+ case FSPEC_ARG_NUM:
+ case FSPEC_ARG_VAR:
+ {
+ const fspec_num v = (*var == FSPEC_ARG_NUM ? fspec_arg_get_num(var) : var_get_num(context, var));
+ if (v == 0) {
+ goto noop;
+ } else if (v > 1) {
+ const size_t nmemb = (decl->nmemb ? decl->nmemb : 1) * v;
+ dynbuf_grow_if_needed(&decl->buf, decl->size * nmemb);
+ const size_t read = fread((char*)decl->buf.data + decl->buf.written, decl->size, nmemb, f);
+ decl->buf.written += decl->size * read;
+ decl->nmemb += read;
+ }
+ }
+ break;
+
+ case FSPEC_ARG_STR:
+ break;
+
+ case FSPEC_ARG_EOF:
+ {
+ const size_t nmemb = (decl->nmemb ? decl->nmemb : 1);
+ size_t read = 0, r = nmemb;
+ while (r == nmemb) {
+ dynbuf_grow_if_needed(&decl->buf, decl->size * nmemb);
+ read += (r = fread((char*)decl->buf.data + decl->buf.written, decl->size, nmemb, f));
+ decl->buf.written += decl->size * r;
+ };
+ decl->nmemb += read;
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+noop:
+
+ if (!fspec_arg_next(arg, context->code.end, 1, ~0)) {
+ dynbuf_grow_if_needed(&decl->buf, decl->size * 1);
+ const size_t read = fread((char*)decl->buf.data + decl->buf.written, decl->size, 1, f);
+ decl->buf.written += decl->size * read;
+ decl->nmemb = read;
+ }
+ }
+ break;
+
+ case FSPEC_OP_GOTO:
+ {
+ decl = NULL;
+ const enum fspec_arg *arg = fspec_op_get_arg(op, context->code.end, 1, 1<<FSPEC_ARG_VAR);
+ const struct decl *d = &context->decl[fspec_arg_get_num(arg)];
+ struct context c = *context;
+ c.code.start = d->start;
+ c.code.end = d->end;
+
+ for (const enum fspec_arg *var = arg; (var = fspec_arg_next(var, context->code.end, 1, ~0));) {
+ switch (*var) {
+ case FSPEC_ARG_NUM:
+ case FSPEC_ARG_VAR:
+ {
+ const fspec_num v = (*var == FSPEC_ARG_NUM ? fspec_arg_get_num(var) : var_get_num(context, var));
+ for (fspec_num i = 0; i < v; ++i)
+ call(&c, f);
+ }
+ break;
+
+ // XXX: How to handle STR with stdin?
+ // With fseek would be easy.
+ case FSPEC_ARG_STR:
+ break;
+
+ case FSPEC_ARG_EOF:
+ while (!feof(f))
+ call(&c, f);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ if (!fspec_arg_next(arg, context->code.end, 1, ~0))
+ call(&c, f);
+ }
+ break;
+
+ case FSPEC_OP_FILTER:
+ {
+ assert(decl);
+ const enum fspec_arg *arg = fspec_op_get_arg(op, context->code.end, 1, 1<<FSPEC_ARG_STR);
+
+ const struct {
+ const char *name;
+ void (*fun)(const struct context*, struct decl*);
+ } map[] = {
+ { .name = "encoding", .fun = filter_decode },
+ { .name = "compression", .fun = filter_decompress },
+ };
+
+ const char *filter = fspec_arg_get_cstr(arg, context->code.data);
+ for (size_t i = 0; i < ARRAY_SIZE(map); ++i) {
+ if (!strcmp(filter, map[i].name)) {
+ struct context c = *context;
+ c.code.start = op;
+ map[i].fun(&c, decl);
+ break;
+ }
+
+ if (i == ARRAY_SIZE(map) - 1)
+ warnx("unknown filter '%s'", filter);
+ }
+ }
+ break;
- if (field->type.flags & FSPEC_TYPE_CONTAINER) {
- for (size_t i = 0; i < f->num_containers - 1; ++i) {
- if (strcmp(field->type.name, f->containers[i].c.name))
- continue;
+ case FSPEC_OP_VISUAL:
+ {
+ assert(decl);
+ const enum fspec_arg *arg = fspec_op_get_arg(op, context->code.end, 1, 1<<FSPEC_ARG_NUM);
+ decl->visual = fspec_arg_get_num(arg);
+ }
+ break;
- c->fields[c->num_fields].link = &f->containers[i];
- break;
+ case FSPEC_OP_ARG:
+ case FSPEC_OP_HEADER:
+ case FSPEC_OP_LAST:
+ break;
}
}
- c->fields[c->num_fields].c = c;
- c->fields[c->num_fields++].f = *field;
+ if (decl && context->code.end == decl->end)
+ decl_display(decl);
}
-static size_t
-fspec_read(struct fspec *fspec, char *buf, const size_t size, const size_t nmemb)
+static void
+setup(const struct context *context)
{
- assert(fspec && buf);
- struct fspec_file *f = container_of(fspec, struct fspec_file, fspec);
- return fread(buf, size, nmemb, f->handle);
+ assert(context);
+
+ for (const enum fspec_op *op = context->code.start; op; op = fspec_op_next(op, context->code.end, true)) {
+ switch (*op) {
+ case FSPEC_OP_DECLARATION:
+ {
+ const enum fspec_arg *arg[4];
+ arg[0] = fspec_op_get_arg(op, context->code.end, 1, 1<<FSPEC_ARG_NUM);
+ arg[1] = fspec_arg_next(arg[0], context->code.end, 1, 1<<FSPEC_ARG_NUM);
+ arg[2] = fspec_arg_next(arg[1], context->code.end, 1, 1<<FSPEC_ARG_OFF);
+ arg[3] = fspec_arg_next(arg[2], context->code.end, 1, 1<<FSPEC_ARG_STR);
+ const fspec_num id = fspec_arg_get_num(arg[1]);
+ struct decl *decl = &context->decl[id];
+ decl->declaration = fspec_arg_get_num(arg[0]);
+ decl->name = fspec_arg_get_cstr(arg[3], context->code.data);
+ decl->visual = FSPEC_VISUAL_DEC;
+ decl->start = op;
+ decl->end = (char*)op + fspec_arg_get_num(arg[2]);
+ assert(!decl->buf.data);
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+}
+
+static void
+execute(const struct fspec_mem *mem)
+{
+ assert(mem);
+
+ struct context context = {
+ .code.start = mem->data,
+ .code.end = (void*)((char*)mem->data + mem->len),
+ .code.data = mem->data
+ };
+
+ printf("output: %zu bytes\n", mem->len);
+ dump_ops(&context.code);
+
+ const enum fspec_arg *arg = fspec_op_get_arg(context.code.data, context.code.end, 2, 1<<FSPEC_ARG_NUM);
+ context.decl_count = fspec_arg_get_num(arg);
+
+ if (!(context.decl = calloc(context.decl_count, sizeof(*context.decl))))
+ err(EXIT_FAILURE, "calloc(%zu, %zu)", context.decl_count, sizeof(*context.decl));
+
+ setup(&context);
+
+ puts("\nexecution:");
+ context.code.start = get_last_struct(&context.code);
+ assert(context.code.start);
+ call(&context, stdin);
+
+ for (fspec_num i = 0; i < context.decl_count; ++i)
+ dynbuf_release(&context.decl[i].buf);
+
+ free(context.decl);
}
static FILE*
@@ -277,34 +737,70 @@ fopen_or_die(const char *path, const char *mode)
return f;
}
+#define container_of(ptr, type, member) ((type *)((char *)(1 ? (ptr) : &((type *)0)->member) - offsetof(type, member)))
+
+struct lexer {
+ struct fspec_lexer lexer;
+ FILE *file;
+};
+
+static size_t
+fspec_lexer_read(struct fspec_lexer *lexer, void *ptr, const size_t size, const size_t nmemb)
+{
+ assert(lexer && ptr);
+ struct lexer *l = container_of(lexer, struct lexer, lexer);
+ return fread(ptr, size, nmemb, l->file);
+}
+
+static size_t
+fspec_validator_read(struct fspec_validator *validator, void *ptr, const size_t size, const size_t nmemb)
+{
+ assert(validator && ptr);
+ assert(ptr == validator->mem.input.data);
+ const size_t read = validator->mem.input.len / size;
+ assert((validator->mem.input.len && read == nmemb) || (!validator->mem.input.len && !read));
+ validator->mem.input.len -= read * size;
+ assert(validator->mem.input.len == 0);
+ return read;
+}
+
int
main(int argc, const char *argv[])
{
if (argc < 2)
- errx(EXIT_FAILURE, "usage: %s file.spec < data\n", argv[0]);
+ errx(EXIT_FAILURE, "usage: %s file.spec < data", argv[0]);
+
+ char output[4096];
+ struct fspec_mem bcode = {0};
+
+ {
+ char input[4096];
+ struct lexer l = {
+ .lexer = {
+ .ops.read = fspec_lexer_read,
+ .mem.input = { .data = input, sizeof(input) },
+ .mem.output = { .data = output, sizeof(output) },
+ },
+ .file = fopen_or_die(argv[1], "rb"),
+ };
- uint8_t data[4096] = {0};
+ if (!fspec_lexer_parse(&l.lexer, argv[1]))
+ exit(EXIT_FAILURE);
- struct fspec_file file = {
- .fspec = {
- .ops = {
- .read = fspec_read,
- .field = fspec_field,
- },
- .mem = {
- .data = data,
- .size = sizeof(data),
- },
- },
- .handle = fopen_or_die(argv[1], "rb"),
- };
+ fclose(l.file);
+ bcode = l.lexer.mem.output;
+ }
- fspec_parse(&file.fspec);
+ {
+ struct fspec_validator validator = {
+ .ops.read = fspec_validator_read,
+ .mem.input = bcode,
+ };
- if (!file.num_containers)
- errx(EXIT_FAILURE, "'%s' contains no containers", argv[1]);
+ if (!fspec_validator_parse(&validator, argv[1]))
+ exit(EXIT_FAILURE);
+ }
- container_process(&file.containers[file.num_containers - 1], stdin);
- fclose(file.handle);
+ execute(&bcode);
return EXIT_SUCCESS;
}
diff --git a/src/fspec/bcode-internal.h b/src/fspec/bcode-internal.h
new file mode 100644
index 0000000..8c9ce74
--- /dev/null
+++ b/src/fspec/bcode-internal.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include <inttypes.h>
+#include <stdint.h>
+
+/** maximum size of string literals */
+#define PRI_FSPEC_STRSZ PRIu8
+typedef uint8_t fspec_strsz;
+
+/** maximum range of variable ids */
+#define PRI_FSPEC_VAR PRIu16
+typedef uint16_t fspec_var;
+
+/** maximum range of bytecode offsets */
+#define PRI_FSPEC_OFF PRIu32
+typedef uint32_t fspec_off;
diff --git a/src/fspec/bcode.c b/src/fspec/bcode.c
new file mode 100644
index 0000000..0a89260
--- /dev/null
+++ b/src/fspec/bcode.c
@@ -0,0 +1,189 @@
+#include <fspec/bcode.h>
+#include "bcode-internal.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <err.h>
+
+static_assert(sizeof(fspec_off) <= sizeof(((struct fspec_mem*)0)->len), "fspec_off should not be larger than what fspec_mem can represent");
+static_assert(sizeof(enum fspec_op) == sizeof(uint8_t), "enum fspec_op is expected to have size of uint8_t");
+static_assert(sizeof(enum fspec_arg) == sizeof(uint8_t), "enum fspec_arg is expected to have size of uint8_t");
+
+static fspec_off
+arg_data_len(const enum fspec_arg *arg)
+{
+ assert(arg);
+
+ switch (*arg) {
+ case FSPEC_ARG_NUM:
+ return sizeof(fspec_num);
+
+ case FSPEC_ARG_VAR:
+ return sizeof(fspec_var);
+
+ case FSPEC_ARG_STR:
+ case FSPEC_ARG_OFF:
+ return sizeof(fspec_off);
+
+ case FSPEC_ARG_DAT:
+ {
+ struct fspec_mem mem;
+ fspec_arg_get_mem(arg, NULL, &mem);
+ return sizeof(fspec_off) + mem.len;
+ }
+
+ case FSPEC_ARG_EOF:
+ break;
+
+ case FSPEC_ARG_LAST:
+ errx(EXIT_FAILURE, "%s: unexpected argument type %u", __func__, *arg);
+ break;
+ }
+
+ return 0;
+}
+
+static fspec_off
+arg_len(const enum fspec_arg *arg)
+{
+ return sizeof(*arg) + arg_data_len(arg);
+}
+
+void
+fspec_arg_get_mem(const enum fspec_arg *arg, const void *data, struct fspec_mem *out_mem)
+{
+ assert(arg && out_mem);
+
+ switch (*arg) {
+ case FSPEC_ARG_STR:
+ {
+ assert(data);
+ fspec_off off;
+ fspec_strsz len;
+ memcpy(&off, (char*)arg + sizeof(*arg), sizeof(off));
+ memcpy(&len, (char*)data + off, sizeof(len));
+ out_mem->data = (char*)data + off + sizeof(len);
+ out_mem->len = len;
+ }
+ break;
+
+ case FSPEC_ARG_DAT:
+ {
+ fspec_off len;
+ memcpy(&len, (char*)arg + sizeof(*arg), sizeof(len));
+ out_mem->data = (char*)arg + sizeof(*arg) + sizeof(len);
+ out_mem->len = len;
+ }
+ break;
+
+ case FSPEC_ARG_VAR:
+ case FSPEC_ARG_NUM:
+ case FSPEC_ARG_OFF:
+ out_mem->data = (char*)arg + sizeof(*arg);
+ out_mem->len = arg_data_len(arg);
+ break;
+
+ case FSPEC_ARG_EOF:
+ *out_mem = (struct fspec_mem){0};
+ break;
+
+ case FSPEC_ARG_LAST:
+ errx(EXIT_FAILURE, "%s: unexpected argument type %u", __func__, *arg);
+ break;
+ }
+}
+
+fspec_num
+fspec_arg_get_num(const enum fspec_arg *arg)
+{
+ assert(arg);
+ fspec_num v;
+ switch (*arg) {
+ case FSPEC_ARG_NUM:
+ memcpy(&v, arg + sizeof(*arg), sizeof(v));
+ break;
+
+ case FSPEC_ARG_VAR:
+ {
+ fspec_var var;
+ memcpy(&var, arg + sizeof(*arg), sizeof(var));
+ v = var;
+ }
+ break;
+
+ case FSPEC_ARG_DAT:
+ case FSPEC_ARG_OFF:
+ {
+ fspec_off off;
+ memcpy(&off, arg + sizeof(*arg), sizeof(off));
+ v = off;
+ }
+ break;
+
+ case FSPEC_ARG_STR:
+ case FSPEC_ARG_EOF:
+ case FSPEC_ARG_LAST:
+ errx(EXIT_FAILURE, "%s: unexpected argument type %u", __func__, *arg);
+ break;
+ }
+ return v;
+}
+
+const char*
+fspec_arg_get_cstr(const enum fspec_arg *arg, const void *data)
+{
+ assert(arg && *arg == FSPEC_ARG_STR);
+ struct fspec_mem mem;
+ fspec_arg_get_mem(arg, data, &mem);
+ return (const char*)mem.data;
+}
+
+const enum fspec_arg*
+fspec_op_get_arg(const enum fspec_op *start, const void *end, const uint8_t nth, const uint32_t expect)
+{
+ uint8_t i = 0;
+ const enum fspec_arg *arg = NULL;
+ for (const enum fspec_op *op = fspec_op_next(start, end, false); op && i < nth; op = fspec_op_next(op, end, false)) {
+ if (*op != FSPEC_OP_ARG)
+ return NULL;
+
+ arg = (void*)(op + 1);
+ assert(*arg >= 0 && *arg < FSPEC_ARG_LAST);
+ ++i;
+ }
+
+ if (arg && !(expect & (1<<*arg)))
+ errx(EXIT_FAILURE, "got unexpected argument of type %u", *arg);
+
+ return arg;
+}
+
+const enum fspec_arg*
+fspec_arg_next(const enum fspec_arg *arg, const void *end, const uint8_t nth, const uint32_t expect)
+{
+ return fspec_op_get_arg((void*)(arg - 1), end, nth, expect);
+}
+
+const enum fspec_op*
+fspec_op_next(const enum fspec_op *start, const void *end, const bool skip_args)
+{
+ assert(start && end);
+ fspec_off off = sizeof(*start);
+ if ((void*)start < end && *start == FSPEC_OP_ARG)
+ off += arg_len((void*)(start + 1));
+
+ for (const enum fspec_op *op = start + off; (void*)start < end && (void*)op < end; ++op) {
+ if (*op >= FSPEC_OP_LAST)
+ errx(EXIT_FAILURE, "got unexected opcode %u", *op);
+
+ if (skip_args && *op == FSPEC_OP_ARG) {
+ op += arg_len((void*)(op + 1));
+ continue;
+ }
+
+ return op;
+ }
+
+ return NULL;
+}
diff --git a/src/fspec/bcode.h b/src/fspec/bcode.h
new file mode 100644
index 0000000..d84060e
--- /dev/null
+++ b/src/fspec/bcode.h
@@ -0,0 +1,64 @@
+#pragma once
+
+#include <fspec/memory.h>
+
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+/** maximum range of numbers */
+#define PRI_FSPEC_NUM PRIu64
+typedef uint64_t fspec_num;
+
+enum fspec_arg {
+ FSPEC_ARG_DAT,
+ FSPEC_ARG_OFF,
+ FSPEC_ARG_NUM,
+ FSPEC_ARG_VAR,
+ FSPEC_ARG_STR,
+ FSPEC_ARG_EOF,
+ FSPEC_ARG_LAST,
+} __attribute__((packed));
+
+void
+fspec_arg_get_mem(const enum fspec_arg *arg, const void *data, struct fspec_mem *out_mem);
+
+fspec_num
+fspec_arg_get_num(const enum fspec_arg *arg);
+
+const char*
+fspec_arg_get_cstr(const enum fspec_arg *arg, const void *data);
+
+const enum fspec_arg*
+fspec_arg_next(const enum fspec_arg *arg, const void *end, const uint8_t nth, const uint32_t expect);
+
+enum fspec_declaration {
+ FSPEC_DECLARATION_STRUCT,
+ FSPEC_DECLARATION_MEMBER,
+ FSPEC_DECLARATION_LAST,
+} __attribute__((packed));
+
+enum fspec_visual {
+ FSPEC_VISUAL_NUL,
+ FSPEC_VISUAL_DEC,
+ FSPEC_VISUAL_HEX,
+ FSPEC_VISUAL_STR,
+ FSPEC_VISUAL_LAST,
+} __attribute__((packed));
+
+enum fspec_op {
+ FSPEC_OP_ARG,
+ FSPEC_OP_HEADER,
+ FSPEC_OP_DECLARATION,
+ FSPEC_OP_READ,
+ FSPEC_OP_GOTO,
+ FSPEC_OP_FILTER,
+ FSPEC_OP_VISUAL,
+ FSPEC_OP_LAST,
+} __attribute__((packed));
+
+const enum fspec_op*
+fspec_op_next(const enum fspec_op *op, const void *end, const bool skip_args);
+
+const enum fspec_arg*
+fspec_op_get_arg(const enum fspec_op *op, const void *end, const uint8_t nth, const uint32_t expect);
diff --git a/src/fspec/lexer.h b/src/fspec/lexer.h
new file mode 100644
index 0000000..7b60e6b
--- /dev/null
+++ b/src/fspec/lexer.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <fspec/memory.h>
+
+struct fspec_lexer;
+struct fspec_lexer {
+ struct {
+ size_t (*read)(struct fspec_lexer *lexer, void *ptr, const size_t size, const size_t nmemb);
+ } ops;
+
+ struct {
+ struct fspec_mem input, output;
+ } mem;
+};
+
+bool
+fspec_lexer_parse(struct fspec_lexer *lexer, const char *name);
diff --git a/src/fspec/lexer.rl b/src/fspec/lexer.rl
new file mode 100644
index 0000000..81390e2
--- /dev/null
+++ b/src/fspec/lexer.rl
@@ -0,0 +1,616 @@
+#include "ragel/ragel.h"
+#include <fspec/bcode.h>
+#include <fspec/lexer.h>
+#include "bcode-internal.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <err.h>
+
+#define PLACEHOLDER 0xDEADBEEF
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+
+typedef uint8_t fspec_strsz;
+
+struct membuf {
+ struct fspec_mem mem;
+ fspec_off written;
+};
+
+static void
+membuf_bounds_check(const struct membuf *buf, const fspec_off nmemb)
+{
+ assert(buf);
+
+ if (buf->mem.len < nmemb || buf->written > buf->mem.len - nmemb)
+ errx(EXIT_FAILURE, "%s: %" PRI_FSPEC_OFF " bytes exceeds the maximum storage size of %zu bytes", __func__, buf->written + nmemb, buf->mem.len);
+}
+
+static void
+membuf_terminate(struct membuf *buf, const void *data, const fspec_off data_sz)
+{
+ membuf_bounds_check(buf, data_sz);
+ memcpy((char*)buf->mem.data + buf->written, data, data_sz);
+}
+
+static void
+membuf_replace(struct membuf *buf, const fspec_off off, const void *data, const fspec_off data_sz)
+{
+ assert(buf->mem.len >= data_sz && off <= buf->mem.len - data_sz);
+ memcpy((char*)buf->mem.data + off, data, data_sz);
+}
+
+static void
+membuf_append_at(struct membuf *buf, const fspec_off off, const void *data, const fspec_off data_sz)
+{
+ assert(off <= buf->written);
+ membuf_bounds_check(buf, data_sz);
+ const size_t rest = buf->written - off;
+ memmove((char*)buf->mem.data + off + data_sz, (char*)buf->mem.data + off, rest);
+ membuf_replace(buf, off, data, data_sz);
+ buf->written += data_sz;
+ assert(buf->written <= buf->mem.len);
+}
+
+static void
+membuf_append(struct membuf *buf, const void *data, const fspec_off data_sz)
+{
+ membuf_append_at(buf, buf->written, data, data_sz);
+}
+
+struct varbuf {
+ struct membuf buf;
+ fspec_off offset;
+};
+
+static inline void
+varbuf_begin(struct varbuf *var)
+{
+ assert(var);
+ var->offset = var->buf.written;
+ assert(var->offset <= var->buf.mem.len);
+}
+
+static void
+varbuf_reset(struct varbuf *var)
+{
+ assert(var);
+ var->offset = var->buf.written = 0;
+}
+
+static inline void
+varbuf_remove_last(struct varbuf *var)
+{
+ assert(var);
+ assert(var->buf.written >= var->offset);
+ const fspec_off size = var->buf.written - var->offset;
+ assert(var->buf.written >= size);
+ var->buf.written -= size;
+ assert(var->buf.written <= var->buf.mem.len);
+}
+
+enum section {
+ SECTION_DATA,
+ SECTION_CODE,
+ SECTION_LAST,
+};
+
+struct codebuf {
+ struct membuf buf;
+ const void *decl[FSPEC_DECLARATION_LAST], *end[SECTION_LAST], *strings;
+ fspec_var declarations;
+};
+
+static void
+codebuf_append(struct codebuf *code, const enum section section, const void *data, const fspec_off data_sz)
+{
+ assert(code->end[section]);
+ const fspec_off off = (char*)code->end[section] - (char*)code->buf.mem.data;
+ membuf_append_at(&code->buf, off, data, data_sz);
+
+ for (enum section s = section; s < ARRAY_SIZE(code->end); ++s) {
+ code->end[s] = (char*)code->end[s] + data_sz;
+ assert((char*)code->end[s] <= (char*)code->buf.mem.data + code->buf.mem.len);
+ }
+
+ if (section == SECTION_DATA) {
+ for (enum fspec_declaration d = 0; d < ARRAY_SIZE(code->decl); ++d) {
+ code->decl[d] = (code->decl[d] ? (char*)code->decl[d] + data_sz : NULL);
+ assert((char*)code->decl[d] <= (char*)code->buf.mem.data + code->buf.mem.len);
+ }
+ }
+
+ assert(code->end[SECTION_DATA] <= code->end[SECTION_CODE]);
+ assert((char*)code->end[SECTION_CODE] == (char*)code->buf.mem.data + code->buf.written);
+}
+
+static void
+codebuf_append_op(struct codebuf *code, const enum fspec_op op)
+{
+ codebuf_append(code, SECTION_CODE, &op, sizeof(op));
+}
+
+static uint8_t
+arg_sizeof(const enum fspec_arg type)
+{
+ switch (type) {
+ case FSPEC_ARG_DAT:
+ case FSPEC_ARG_OFF:
+ case FSPEC_ARG_STR:
+ return sizeof(fspec_off);
+
+ case FSPEC_ARG_NUM:
+ return sizeof(fspec_num);
+
+ case FSPEC_ARG_VAR:
+ return sizeof(fspec_var);
+
+ case FSPEC_ARG_EOF:
+ break;
+
+ case FSPEC_ARG_LAST:
+ errx(EXIT_FAILURE, "%s: unexpected argument type %u", __func__, type);
+ }
+
+ return 0;
+}
+
+static void
+codebuf_append_arg(struct codebuf *code, const enum fspec_arg type, const void *v)
+{
+ assert(code);
+ codebuf_append_op(code, FSPEC_OP_ARG);
+ codebuf_append(code, SECTION_CODE, &type, sizeof(type));
+ codebuf_append(code, SECTION_CODE, v, arg_sizeof(type));
+}
+
+static void
+codebuf_replace_arg(struct codebuf *code, const enum fspec_arg *arg, const enum fspec_arg type, const void *v)
+{
+ assert(code && arg);
+ assert(*arg == type);
+ const fspec_off off = ((char*)arg + 1) - (char*)code->buf.mem.data;
+ membuf_replace(&code->buf, off, v, arg_sizeof(type));
+}
+
+static bool
+get_string_offset(const void *start, const void *end, const void *str, const fspec_strsz str_sz, void const **out_off)
+{
+ assert(out_off);
+
+ while (start < end) {
+ fspec_strsz len;
+ memcpy(&len, start, sizeof(len));
+ if (len == str_sz && !memcmp((char*)start + sizeof(len), str, len)) {
+ *out_off = start;
+ return true;
+ }
+ start = (char*)start + sizeof(len) + len + 1;
+ }
+
+ return false;
+}
+
+static void
+codebuf_append_arg_cstr(struct codebuf *code, const void *str, const fspec_strsz str_sz)
+{
+ const void *ptr;
+ if (!get_string_offset(code->strings, code->end[SECTION_DATA], str, str_sz, &ptr)) {
+ ptr = code->end[SECTION_DATA];
+ codebuf_append(code, SECTION_DATA, &str_sz, sizeof(str_sz));
+ codebuf_append(code, SECTION_DATA, str, str_sz);
+ codebuf_append(code, SECTION_DATA, (char[]){ 0 }, 1);
+ }
+
+ const fspec_off off = (char*)ptr - (char*)code->buf.mem.data;
+ codebuf_append_arg(code, FSPEC_ARG_STR, &off);
+}
+
+static const enum fspec_op*
+get_named_op(const enum fspec_op *start, const void *end, const void *data, const enum fspec_op op, const uint8_t nth, const void *name, const fspec_strsz name_sz, fspec_var *out_id)
+{
+ fspec_var id = 0;
+ if ((void*)start < end && *start == FSPEC_OP_DECLARATION)
+ id = fspec_arg_get_num(fspec_op_get_arg(start, end, 2, 1<<FSPEC_ARG_NUM));
+
+ for (const enum fspec_op *p = start; p; p = fspec_op_next(p, end, true)) {
+ const enum fspec_arg *arg;
+ if (*p != op || !(arg = fspec_op_get_arg(p, end, nth, 1<<FSPEC_ARG_STR)))
+ continue;
+
+ struct fspec_mem str;
+ fspec_arg_get_mem(arg, data, &str);
+ if (str.len == name_sz && !memcmp(name, str.data, name_sz)) {
+ if (out_id)
+ *out_id = id;
+
+ return p;
+ }
+
+ ++id;
+ }
+
+ return NULL;
+}
+
+static const enum fspec_op*
+get_declaration(struct codebuf *code, const bool member, const struct fspec_mem *str, fspec_var *out_id)
+{
+ const void *start = (member ? code->decl[FSPEC_DECLARATION_STRUCT] : code->end[SECTION_DATA]);
+ return get_named_op(start, code->end[SECTION_CODE], code->buf.mem.data, FSPEC_OP_DECLARATION, 4, str->data, str->len, out_id);
+}
+
+static bool
+codebuf_append_arg_var(struct codebuf *code, const bool member, const struct fspec_mem *var)
+{
+ fspec_var id = -1;
+ if (!get_declaration(code, member, var, &id))
+ return false;
+
+ codebuf_append_arg(code, FSPEC_ARG_VAR, &id);
+ return true;
+}
+
+static void
+codebuf_append_declaration(struct codebuf *code, const enum fspec_declaration decl)
+{
+ code->decl[decl] = code->end[SECTION_CODE];
+ codebuf_append_op(code, FSPEC_OP_DECLARATION);
+ codebuf_append_arg(code, FSPEC_ARG_NUM, (fspec_num[]){ decl });
+ codebuf_append_arg(code, FSPEC_ARG_NUM, (fspec_num[]){ code->declarations++ });
+ codebuf_append_arg(code, FSPEC_ARG_OFF, (fspec_off[]){ PLACEHOLDER });
+}
+
+enum stack_type {
+ STACK_STR,
+ STACK_NUM,
+};
+
+struct stack {
+ union {
+ struct fspec_mem str;
+ uint64_t num;
+ };
+ enum stack_type type;
+};
+
+static const char*
+stack_type_to_str(const enum stack_type type)
+{
+ switch (type) {
+ case STACK_STR: return "str";
+ case STACK_NUM: return "num";
+ };
+ return "unknown";
+}
+
+static void
+stack_check_type(const struct stack *stack, const enum stack_type type)
+{
+ assert(stack);
+
+ if (stack->type != type)
+ errx(EXIT_FAILURE, "tried to get '%s' from stack, but the last pushed type was '%s'", stack_type_to_str(type), stack_type_to_str(stack->type));
+}
+
+static const struct fspec_mem*
+stack_get_str(const struct stack *stack)
+{
+ stack_check_type(stack, STACK_STR);
+ return &stack->str;
+}
+
+static uint64_t
+stack_get_num(const struct stack *stack)
+{
+ stack_check_type(stack, STACK_NUM);
+ return stack->num;
+}
+
+struct state {
+ struct ragel ragel;
+ struct stack stack;
+ struct codebuf out;
+ struct varbuf var;
+};
+
+static void
+state_stack_num(struct state *state, const uint8_t base)
+{
+ assert(state);
+ membuf_terminate(&state->var.buf, (char[]){ 0 }, 1);
+ const char *str = (char*)state->var.buf.mem.data + state->var.offset;
+ state->stack.type = STACK_NUM;
+ state->stack.num = strtoll(str + (base == 16 && *str == 'x'), NULL, base);
+ varbuf_remove_last(&state->var);
+}
+
+static void
+state_append_arg_var(struct state *state, const bool member, const struct fspec_mem *str)
+{
+ assert(state && str);
+
+ if (!codebuf_append_arg_var(&state->out, member, str))
+ ragel_throw_error(&state->ragel, "'%s' undeclared", (char*)str->data);
+}
+
+static void
+state_append_declaration(struct state *state, const enum fspec_declaration decl, const struct fspec_mem *str)
+{
+ assert(state && str);
+
+ if (get_declaration(&state->out, (decl == FSPEC_DECLARATION_MEMBER), str, NULL))
+ ragel_throw_error(&state->ragel, "'%s' redeclared", (char*)str->data);
+
+ codebuf_append_declaration(&state->out, decl);
+ codebuf_append_arg_cstr(&state->out, str->data, str->len);
+}
+
+static void
+state_finish_declaration(struct state *state, const enum fspec_declaration decl)
+{
+ assert(state && state->out.decl[decl]);
+ const char *end = state->out.end[SECTION_CODE];
+ const fspec_off off = end - (char*)state->out.decl[decl];
+ codebuf_replace_arg(&state->out, fspec_op_get_arg(state->out.decl[decl], end, 3, 1<<FSPEC_ARG_OFF), FSPEC_ARG_OFF, &off);
+ state->out.decl[decl] = NULL;
+}
+
+%%{
+ machine fspec_lexer;
+ variable p state.ragel.p;
+ variable pe state.ragel.pe;
+ variable eof state.ragel.eof;
+ write data noerror nofinal;
+
+ action arg_eof {
+ codebuf_append_arg(&state.out, FSPEC_ARG_EOF, NULL);
+ }
+
+ action arg_num {
+ codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ stack_get_num(&state.stack) });
+ }
+
+ action arg_str {
+ const struct fspec_mem *str = stack_get_str(&state.stack);
+ codebuf_append_arg_cstr(&state.out, str->data, str->len);
+ }
+
+ action arg_var {
+ state_append_arg_var(&state, true, stack_get_str(&state.stack));
+ }
+
+ action filter {
+ codebuf_append_op(&state.out, FSPEC_OP_FILTER);
+ }
+
+ action goto {
+ codebuf_append_op(&state.out, FSPEC_OP_GOTO);
+ state_append_arg_var(&state, false, stack_get_str(&state.stack));
+ }
+
+ action vnul {
+ codebuf_append_op(&state.out, FSPEC_OP_VISUAL);
+ codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_NUL });
+ }
+
+ action vdec {
+ codebuf_append_op(&state.out, FSPEC_OP_VISUAL);
+ codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_DEC });
+ }
+
+ action vhex {
+ codebuf_append_op(&state.out, FSPEC_OP_VISUAL);
+ codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_HEX });
+ }
+
+ action vstr {
+ codebuf_append_op(&state.out, FSPEC_OP_VISUAL);
+ codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_STR });
+ }
+
+ action r8 {
+ codebuf_append_op(&state.out, FSPEC_OP_READ);
+ codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 8 });
+ }
+
+ action r16 {
+ codebuf_append_op(&state.out, FSPEC_OP_READ);
+ codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 16 });
+ }
+
+ action r32 {
+ codebuf_append_op(&state.out, FSPEC_OP_READ);
+ codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 32 });
+ }
+
+ action r64 {
+ codebuf_append_op(&state.out, FSPEC_OP_READ);
+ codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 64 });
+ }
+
+ action member_end {
+ state_finish_declaration(&state, FSPEC_DECLARATION_MEMBER);
+ }
+
+ action member_start {
+ state_append_declaration(&state, FSPEC_DECLARATION_MEMBER, stack_get_str(&state.stack));
+ }
+
+ action struct_end {
+ state_finish_declaration(&state, FSPEC_DECLARATION_STRUCT);
+ }
+
+ action struct_start {
+ state_append_declaration(&state, FSPEC_DECLARATION_STRUCT, stack_get_str(&state.stack));
+ }
+
+ action stack_oct {
+ state_stack_num(&state, 8);
+ }
+
+ action stack_hex {
+ state_stack_num(&state, 16);
+ }
+
+ action stack_dec {
+ state_stack_num(&state, 10);
+ }
+
+ action stack_str {
+ membuf_terminate(&state.var.buf, (char[]){ 0 }, 1);
+ state.stack.type = STACK_STR;
+ state.stack.str = state.var.buf.mem;
+ state.stack.str.len = state.var.buf.written;
+ }
+
+ action store_esc_num {
+ const fspec_num v = stack_get_num(&state.stack);
+ assert(v <= 255);
+ const uint8_t u8 = v;
+ membuf_append(&state.var.buf, &u8, sizeof(u8));
+ }
+
+ action store_esc {
+ const struct { const char e, v; } map[] = {
+ { .e = 'a', .v = '\a' },
+ { .e = 'b', .v = '\b' },
+ { .e = 'f', .v = '\f' },
+ { .e = 'n', .v = '\n' },
+ { .e = 'r', .v = '\r' },
+ { .e = 't', .v = '\t' },
+ { .e = 'v', .v = '\v' },
+ { .e = '\\', .v = '\\' },
+ { .e = '\'', .v = '\'' },
+ { .e = '\"', .v = '"' },
+ { .e = 'e', .v = 0x1B },
+ };
+
+ for (size_t i = 0; i < ARRAY_SIZE(map); ++i) {
+ if (*state.ragel.p != map[i].e)
+ continue;
+
+ membuf_append(&state.var.buf, &map[i].v, sizeof(map[i].v));
+ break;
+ }
+ }
+
+ action store {
+ membuf_append(&state.var.buf, state.ragel.p, 1);
+ }
+
+ action begin_num {
+ varbuf_begin(&state.var);
+ }
+
+ action begin_str {
+ varbuf_reset(&state.var);
+ }
+
+ action type_err {
+ ragel_throw_error(&state.ragel, "unknown type name");
+ }
+
+ action visual_err {
+ ragel_throw_error(&state.ragel, "unknown visualization");
+ }
+
+ action syntax_err {
+ ragel_throw_error(&state.ragel, "malformed input (machine failed here or in next expression)");
+ }
+
+ action line {
+ ragel_advance_line(&state.ragel);
+ }
+
+ # Semantic
+ quote = ['"];
+ newline = '\n';
+ esc = [abfnrtv\\'"e];
+ esc_chr = '\\';
+ esc_hex = 'x' <: xdigit{2};
+ hex = '0' <: esc_hex;
+ oct = [0-7]{1,3};
+ dec = [\-+]? <: (([1-9] <: digit*) | '0');
+ valid = ^cntrl;
+ comment = '//' <: valid* :>> newline;
+ type = ('u8' | 's8') %r8 | ('u16' | 's16') %r16 | ('u32' | 's32') %r32 | ('u64' | 's32') %r64;
+ visual = 'nul' %vnul | 'dec' %vdec | 'hex' %vhex | 'str' %vstr;
+ reserved = 'struct' | type | visual;
+ name = ((alpha | '_') <: (alnum | '_')*) - reserved;
+
+ # Stack
+ stack_name = name >begin_str $store %stack_str;
+ stack_hex = hex >begin_num $store %stack_hex;
+ stack_dec = dec >begin_num $store %stack_dec;
+ stack_oct = oct >begin_num $store %stack_oct;
+ stack_esc_hex = esc_hex >begin_num $store %stack_hex;
+ stack_esc = esc_chr <: ((stack_esc_hex | stack_oct) %store_esc_num | esc %~store_esc);
+ stack_str = quote <: ((stack_esc? <: print? $store) - zlen)* >begin_str %stack_str :>> quote;
+ stack_num = stack_dec | stack_hex;
+
+ # Catchers
+ catch_struct = 'struct ' <: stack_name;
+ catch_type = (catch_struct %goto | type) $!type_err;
+ catch_args = stack_num %arg_num | stack_str %arg_str | stack_name %arg_var;
+ catch_array = '[' <: (catch_args | '$' %arg_eof) :>> ']';
+ catch_filter = ' | ' %filter <: stack_name %arg_str :>> ('(' <: catch_args? <: (', ' <: catch_args)* :>> ')')?;
+ catch_visual = ' ' <: visual $!visual_err;
+
+ # Abstract
+ member = stack_name %member_start :> ': ' <: (catch_type <: catch_array* catch_filter* catch_visual?) :>> ';' %member_end;
+ struct = catch_struct %struct_start :>> ' {' <: (space | comment | member)* :>> '};' %struct_end;
+ line = valid* :>> newline %line;
+ main := ((space | comment | struct)* & line*) $!syntax_err;
+}%%
+
+bool
+fspec_lexer_parse(struct fspec_lexer *lexer, const char *name)
+{
+ int cs;
+ %% write init;
+
+ (void)fspec_lexer_en_main;
+ assert(lexer);
+ assert(lexer->ops.read);
+ assert(lexer->mem.input.data && lexer->mem.input.len);
+ assert(lexer->mem.output.data && lexer->mem.output.len);
+ assert(lexer->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range");
+ assert(lexer->mem.output.len <= (fspec_off)~0 && "output storage size exceeds fspec_off range");
+
+ char var[256];
+ struct state state = {
+ .ragel.name = name,
+ .ragel.lineno = 1,
+ .var.buf.mem = { .data = var, .len = sizeof(var) },
+ .out.buf.mem = lexer->mem.output,
+ };
+
+ static const fspec_num version = 0;
+ state.out.end[SECTION_CODE] = state.out.end[SECTION_DATA] = state.out.buf.mem.data;
+ codebuf_append_op(&state.out, FSPEC_OP_HEADER);
+ codebuf_append_arg(&state.out, FSPEC_ARG_NUM, &version);
+ codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ PLACEHOLDER });
+ codebuf_append_arg(&state.out, FSPEC_ARG_DAT, (fspec_off[]){ PLACEHOLDER });
+ state.out.end[SECTION_DATA] = state.out.end[SECTION_CODE];
+ state.out.strings = state.out.end[SECTION_DATA];
+
+ struct fspec_mem input = lexer->mem.input;
+ for (bool eof = false; !state.ragel.error && !eof;) {
+ const size_t bytes = lexer->ops.read(lexer, input.data, 1, input.len);
+ const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes };
+ ragel_feed_input(&state.ragel, (eof = (bytes < input.len)), &rl);
+ %% write exec;
+ }
+
+ {
+ const void *end = state.out.end[SECTION_CODE];
+ codebuf_replace_arg(&state.out, fspec_op_get_arg(state.out.buf.mem.data, end, 2, 1<<FSPEC_ARG_NUM), FSPEC_ARG_NUM, (fspec_num[]){ state.out.declarations });
+ const fspec_off off = (char*)state.out.end[SECTION_DATA] - (char*)state.out.strings;
+ codebuf_replace_arg(&state.out, fspec_op_get_arg(state.out.buf.mem.data, end, 3, 1<<FSPEC_ARG_DAT), FSPEC_ARG_DAT, &off);
+ }
+
+ lexer->mem.output.len = state.out.buf.written;
+ return !state.ragel.error;
+}
diff --git a/src/fspec/memory.h b/src/fspec/memory.h
new file mode 100644
index 0000000..768415a
--- /dev/null
+++ b/src/fspec/memory.h
@@ -0,0 +1,8 @@
+#pragma once
+
+#include <stddef.h>
+
+struct fspec_mem {
+ void *data;
+ size_t len;
+};
diff --git a/src/fspec/validator.h b/src/fspec/validator.h
new file mode 100644
index 0000000..c4705b2
--- /dev/null
+++ b/src/fspec/validator.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <fspec/memory.h>
+
+struct fspec_validator;
+struct fspec_validator {
+ struct {
+ size_t (*read)(struct fspec_validator *validator, void *ptr, const size_t size, const size_t nmemb);
+ } ops;
+
+ struct {
+ struct fspec_mem input;
+ } mem;
+};
+
+bool
+fspec_validator_parse(struct fspec_validator *validator, const char *name);
diff --git a/src/fspec/validator.rl b/src/fspec/validator.rl
new file mode 100644
index 0000000..093348d
--- /dev/null
+++ b/src/fspec/validator.rl
@@ -0,0 +1,237 @@
+#include "ragel/ragel.h"
+#include <fspec/bcode.h>
+#include <fspec/validator.h>
+#include "bcode-internal.h"
+
+#include <assert.h>
+
+struct stack {
+ union {
+ fspec_num num;
+ fspec_off off;
+ fspec_var var;
+ fspec_strsz strsz;
+ unsigned char b[sizeof(fspec_num)];
+ } u;
+ uint8_t i; // writing index for u.b
+};
+
+struct range {
+ fspec_off start, end;
+};
+
+struct context {
+ struct range data;
+ fspec_var declarations, expected_declarations;
+ fspec_off str_end, decl_start, decl_end[FSPEC_DECLARATION_LAST], offset;
+ enum fspec_declaration last_decl_type;
+};
+
+struct state {
+ struct ragel ragel;
+ struct context context;
+ struct stack stack;
+ bool valid;
+};
+
+%%{
+ machine fspec_validator;
+ variable p state.ragel.p;
+ variable pe state.ragel.pe;
+ variable eof state.ragel.eof;
+ write data noerror nofinal;
+
+ action store_decls {
+ if (state.stack.u.num > (fspec_var)~0)
+ ragel_throw_error(&state.ragel, "expected declarations overflows");
+
+ state.context.expected_declarations = state.stack.u.num;
+ }
+
+ action check_decls {
+ if (state.context.declarations != state.context.expected_declarations)
+ ragel_throw_error(&state.ragel, "expected declarations did not match with the content: expected: %" PRI_FSPEC_VAR " got: %" PRI_FSPEC_VAR, state.context.expected_declarations, state.context.declarations);
+ }
+
+ action mark_dat {
+ // we can replace this logic with fspec generated code in future
+ // struct str { len: u32; str: u8[len]['\0']; }
+ // struct dat { len: u32; strings: struct str[$::len]; }
+ if (state.context.offset > (fspec_off)~0 - state.stack.u.off)
+ ragel_throw_error(&state.ragel, "dat section length overflows");
+
+ state.context.data = (struct range){ .start = state.context.offset, .end = state.stack.u.off };
+ }
+
+ action test_inside_dat {
+ state.context.offset < (state.context.data.start + state.context.data.end)
+ }
+
+ action mark_str {
+ if (state.context.offset >= (fspec_off)~0 - state.stack.u.strsz) // >= for null byte
+ ragel_throw_error(&state.ragel, "str length overflows");
+
+ state.context.str_end = state.context.offset + state.stack.u.strsz;
+ }
+
+ action test_inside_str {
+ state.context.offset < state.context.str_end
+ }
+
+ action check_var {
+ if (state.context.declarations <= state.stack.u.var)
+ ragel_throw_error(&state.ragel, "refenced undeclared variable");
+ }
+
+ action check_str {
+ if (state.stack.u.off < state.context.data.start) {
+ ragel_throw_error(&state.ragel, "str before data section range: %" PRI_FSPEC_OFF " <= %" PRI_FSPEC_OFF, state.stack.u.off, state.context.data.start + state.context.data.end);
+ } else if (state.context.data.start + state.context.data.end <= state.stack.u.off) {
+ ragel_throw_error(&state.ragel, "str after data section range: %" PRI_FSPEC_OFF " <= %" PRI_FSPEC_OFF, state.context.data.start + state.context.data.end, state.stack.u.off);
+ }
+ }
+
+ action check_decl_type {
+ if (state.stack.u.num >= FSPEC_DECLARATION_LAST)
+ ragel_throw_error(&state.ragel, "invalid declaration type: %" PRI_FSPEC_NUM, state.stack.u.num);
+
+ state.context.last_decl_type = state.stack.u.num;
+ }
+
+ action check_decl_num {
+ if (state.context.declarations >= (fspec_var)~0)
+ ragel_throw_error(&state.ragel, "declarations overflows");
+
+ if (state.context.declarations != state.stack.u.num)
+ ragel_throw_error(&state.ragel, "invalid declaration number: %" PRI_FSPEC_NUM " expected: %" PRI_FSPEC_VAR, state.stack.u.num, state.context.declarations);
+
+ ++state.context.declarations;
+ }
+
+ action start_decl {
+ state.context.decl_start = state.context.offset;
+ }
+
+ action mark_decl {
+ const fspec_off sz = (state.context.offset - state.context.decl_start);
+ assert(sz <= state.stack.u.off);
+
+ if (state.context.offset > (fspec_off)~0 - state.stack.u.off - sz)
+ ragel_throw_error(&state.ragel, "declaration length overflows");
+
+ state.context.decl_end[state.context.last_decl_type] = state.context.offset + state.stack.u.off - sz;
+ }
+
+ action check_struct {
+ if (state.context.last_decl_type != FSPEC_DECLARATION_STRUCT)
+ ragel_throw_error(&state.ragel, "expected struct declaration");
+ }
+
+ action check_member {
+ if (state.context.last_decl_type != FSPEC_DECLARATION_MEMBER)
+ ragel_throw_error(&state.ragel, "expected member declaration");
+ }
+
+ action check_member_end {
+ if (state.context.decl_end[FSPEC_DECLARATION_MEMBER] != state.context.offset)
+ ragel_throw_error(&state.ragel, "invalid member end: %" PRI_FSPEC_OFF " expected: %" PRI_FSPEC_OFF, state.context.decl_end[FSPEC_DECLARATION_MEMBER], state.context.offset);
+ }
+
+ action check_struct_end {
+ if (state.context.decl_end[FSPEC_DECLARATION_STRUCT] != state.context.offset)
+ ragel_throw_error(&state.ragel, "invalid struct end: %" PRI_FSPEC_OFF " expected: %" PRI_FSPEC_OFF, state.context.decl_end[FSPEC_DECLARATION_STRUCT], state.context.offset);
+ }
+
+ action check_visual_type {
+ if (state.stack.u.num >= FSPEC_VISUAL_LAST)
+ ragel_throw_error(&state.ragel, "invalid visual type: %" PRI_FSPEC_NUM, state.stack.u.num);
+ }
+
+ action arg_error {
+ ragel_throw_error(&state.ragel, "malformed argument");
+ }
+
+ action op_error {
+ ragel_throw_error(&state.ragel, "unexpected argument");
+ }
+
+ action pattern_error {
+ ragel_throw_error(&state.ragel, "unexpected pattern");
+ }
+
+ action syntax_error {
+ ragel_throw_error(&state.ragel, "unexpected byte");
+ }
+
+ action store {
+ if (state.stack.i < sizeof(state.stack.u.b))
+ state.stack.u.b[state.stack.i++] = fc;
+ }
+
+ action flush {
+ state.stack.i = 0;
+ }
+
+ action advance {
+ ++state.context.offset;
+ }
+
+ stack1 = any{1} >flush $store;
+ stack2 = any{2} >flush $store;
+ stack4 = any{4} >flush $store;
+ stack8 = any{8} >flush $store;
+
+ ARG_DAT = 0 stack4 %*mark_dat ((stack1 %*mark_str (any when test_inside_str)* 0) when test_inside_dat)*;
+ ARG_OFF = 1 stack4;
+ ARG_NUM = 2 stack8;
+ ARG_VAR = 3 stack2 %check_var;
+ ARG_STR = 4 stack4 %check_str;
+ ARG_EOF = 5;
+
+ OP_ARG_DAT = 0 ARG_DAT $!arg_error;
+ OP_ARG_OFF = 0 ARG_OFF $!arg_error;
+ OP_ARG_NUM = 0 ARG_NUM $!arg_error;
+ OP_ARG_VAR = 0 ARG_VAR $!arg_error;
+ OP_ARG_STR = 0 ARG_STR $!arg_error;
+ OP_ARG_EOF = 0 ARG_EOF $!arg_error;
+
+ OP_HEADER = 1 (OP_ARG_NUM OP_ARG_NUM %store_decls OP_ARG_DAT) $!op_error;
+ OP_DECLARATION = 2 >start_decl (OP_ARG_NUM %check_decl_type OP_ARG_NUM %check_decl_num OP_ARG_OFF %mark_decl OP_ARG_STR) $!op_error;
+ OP_READ = 3 (OP_ARG_NUM (OP_ARG_NUM | OP_ARG_VAR | OP_ARG_STR | OP_ARG_EOF)*) $!op_error;
+ OP_GOTO = 4 (OP_ARG_VAR (OP_ARG_NUM | OP_ARG_VAR | OP_ARG_STR | OP_ARG_EOF)*) $!op_error;
+ OP_FILTER = 5 (OP_ARG_STR (OP_ARG_NUM | OP_ARG_VAR | OP_ARG_STR)*) $!op_error;
+ OP_VISUAL = 6 (OP_ARG_NUM %check_visual_type) $!op_error;
+
+ pattern = (OP_DECLARATION %check_struct <: (OP_DECLARATION %check_member (OP_READ | OP_GOTO) OP_FILTER? OP_VISUAL? %check_member_end)*)* %check_struct_end $!pattern_error;
+ main := (OP_HEADER <: pattern) %check_decls $advance $!syntax_error;
+}%%
+
+bool
+fspec_validator_parse(struct fspec_validator *validator, const char *name)
+{
+ int cs;
+ %% write init;
+
+ (void)fspec_validator_en_main;
+ assert(validator);
+ assert(validator->ops.read);
+ assert(validator->mem.input.data && validator->mem.input.len);
+ assert(validator->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range");
+
+ struct state state = {
+ .ragel.name = name,
+ .ragel.lineno = 1,
+ };
+
+ static_assert(sizeof(state.stack.u) == sizeof(state.stack.u.b), "bytes doesn't represent the largest member in union");
+
+ struct fspec_mem input = validator->mem.input;
+ for (bool eof = false; !state.ragel.error && !eof;) {
+ const size_t bytes = validator->ops.read(validator, input.data, 1, input.len);
+ const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes, .binary = true };
+ ragel_feed_input(&state.ragel, (eof = (bytes < input.len)), &rl);
+ %% write exec;
+ }
+
+ return !state.ragel.error;
+}
diff --git a/src/ragel/fspec.h b/src/ragel/fspec.h
deleted file mode 100644
index 68998f4..0000000
--- a/src/ragel/fspec.h
+++ /dev/null
@@ -1,77 +0,0 @@
-#pragma once
-
-#include <stddef.h>
-#include <stdint.h>
-#include <stdbool.h>
-
-struct fspec_bytes {
- const uint8_t *data;
- size_t size;
-};
-
-enum fspec_kind_bits {
- FSPEC_KIND_IGNORE = 1<<0,
- FSPEC_KIND_HEXADECIMAL = 1<<1,
- FSPEC_KIND_ENCODING = 1<<2,
-};
-
-struct fspec_kind {
- const char *name;
- uint32_t flags;
-};
-
-enum fspec_array_type {
- FSPEC_ARRAY_FIXED,
- FSPEC_ARRAY_MATCH,
- FSPEC_ARRAY_VAR,
-};
-
-struct fspec_array {
- enum fspec_array_type type;
-
- union {
- struct fspec_bytes match;
- const char *var;
- size_t nmemb;
- };
-};
-
-enum fspec_type_bits {
- FSPEC_TYPE_SIGNED = 1<<0,
- FSPEC_TYPE_CONTAINER = 1<<1,
-};
-
-struct fspec_type {
- const char *name;
- size_t size;
- uint32_t flags;
-};
-
-struct fspec_field {
- struct fspec_type type;
- struct fspec_array array;
- struct fspec_kind kind;
- const char *name;
-};
-
-struct fspec_container {
- const char *name;
-};
-
-struct fspec;
-struct fspec {
- struct {
- void (*field)(struct fspec *fspec, const struct fspec_container *container, const struct fspec_field *field);
- size_t (*read)(struct fspec *fspec, char *buf, const size_t size, const size_t nmemb);
- } ops;
-
- struct {
- // XXX: replace with ops.alloc, ops.free
- // on dump.c we can then just provide implementation that still uses reasonable amount of static memory
- // but we don't limit the code from working with regular dynamic memory
- uint8_t *data;
- size_t size;
- } mem;
-};
-
-void fspec_parse(struct fspec *fspec);
diff --git a/src/ragel/fspec.rl b/src/ragel/fspec.rl
deleted file mode 100644
index 8493cf1..0000000
--- a/src/ragel/fspec.rl
+++ /dev/null
@@ -1,329 +0,0 @@
-#include "fspec.h"
-#include "ragel.h"
-
-// It's pretty good base so far.
-// ragel_search_str for typechecking variable delcaration is hack.
-// State should have hashmap for fields/containers.
-//
-// XXX: Maybe drop whole container thing and just give field const char *parent; that points to keypath of container.
-// Then we would have flat structure like, "foo, foo.var, foo.b, ..."
-
-static const struct fspec_container default_container = {0};
-static const struct fspec_field default_field = { .array.nmemb = 1 };
-
-enum stack_type {
- STACK_VAR,
- STACK_STR,
- STACK_NUM,
-};
-
-struct stack {
- enum stack_type type;
-
- union {
- struct fspec_bytes str;
- const char *var;
- uint64_t num;
- };
-};
-
-struct state {
- struct ragel ragel;
- struct stack stack;
- struct fspec_field field;
- struct fspec_container container;
- size_t container_data_offset;
-};
-
-static const char*
-stack_type_to_str(const enum stack_type type)
-{
- switch (type) {
- case STACK_VAR: return "var";
- case STACK_STR: return "str";
- case STACK_NUM: return "num";
- };
-
- assert(0 && "should not happen");
- return "unknown";
-}
-
-static void
-stack_check_type(const struct ragel *ragel, const struct stack *stack, const enum stack_type type)
-{
- assert(ragel && stack);
-
- if (stack->type != type)
- ragel_throw_error(ragel, "tried to get '%s' from stack, but the last pushed type was '%s'", stack_type_to_str(type), stack_type_to_str(stack->type));
-}
-
-static const char*
-stack_get_var(const struct ragel *ragel, const struct stack *stack)
-{
- assert(ragel && stack);
- stack_check_type(ragel, stack, STACK_VAR);
- return stack->var;
-}
-
-static const struct fspec_bytes*
-stack_get_str(const struct ragel *ragel, const struct stack *stack)
-{
- assert(ragel && stack);
- stack_check_type(ragel, stack, STACK_STR);
- return &stack->str;
-}
-
-static uint64_t
-stack_get_num(const struct ragel *ragel, const struct stack *stack)
-{
- assert(ragel && stack);
- stack_check_type(ragel, stack, STACK_NUM);
- return stack->num;
-}
-
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
-
-static void
-fspec_type_from_str(const struct ragel *ragel, const char *str, struct fspec_type *out_type)
-{
- assert(ragel && str);
-
- const struct fspec_type types[] = {
- { .name = "u8", .size = sizeof(uint8_t) },
- { .name = "u16", .size = sizeof(uint16_t) },
- { .name = "u32", .size = sizeof(uint32_t) },
- { .name = "u64", .size = sizeof(uint64_t) },
- { .name = "s8", .size = sizeof(int8_t), .flags = FSPEC_TYPE_SIGNED },
- { .name = "s16", .size = sizeof(int16_t), .flags = FSPEC_TYPE_SIGNED },
- { .name = "s32", .size = sizeof(int32_t), .flags = FSPEC_TYPE_SIGNED },
- { .name = "s64", .size = sizeof(int64_t), .flags = FSPEC_TYPE_SIGNED },
- };
-
- for (size_t i = 0; i < ARRAY_SIZE(types); ++i) {
- if (strcmp(str, types[i].name))
- continue;
-
- *out_type = types[i];
- return;
- }
-
- if (ragel_search_str(ragel, 0, str)) {
- *out_type = (struct fspec_type){ .name = str, .flags = FSPEC_TYPE_CONTAINER };
- return;
- }
-
- ragel_throw_error(ragel, "invalid type");
-}
-
-static void
-fspec_kind_from_str(const struct ragel *ragel, const char *str, struct fspec_kind *out_kind)
-{
- assert(ragel && str);
-
- const struct fspec_kind kinds[] = {
- { .name = "pad", .flags = FSPEC_KIND_IGNORE },
- { .name = "hex", .flags = FSPEC_KIND_HEXADECIMAL },
- { .name = "ascii", .flags = FSPEC_KIND_ENCODING },
- { .name = "utf8", .flags = FSPEC_KIND_ENCODING },
- { .name = "sjis", .flags = FSPEC_KIND_ENCODING },
- };
-
- for (size_t i = 0; i < ARRAY_SIZE(kinds); ++i) {
- if (strcmp(str, kinds[i].name))
- continue;
-
- *out_kind = kinds[i];
- return;
- }
-
- ragel_throw_error(ragel, "invalid kind");
-}
-
-static void
-check_field_kind(const struct ragel *ragel, const struct fspec_field *field)
-{
- assert(ragel && field);
-
- if ((field->kind.flags & FSPEC_KIND_ENCODING) && field->type.size != sizeof(uint8_t))
- ragel_throw_error(ragel, "invalid kind: %s kind only allowed for u8 and s8 types", field->kind.name);
-}
-
-%%{
- # File specification parser.
-
- machine fspec;
- variable p state.ragel.p;
- variable pe state.ragel.pe;
- variable eof state.ragel.eof;
- write data noerror nofinal;
-
- action field {
- fspec->ops.field(fspec, &state.container, &state.field);
- }
-
- action field_kind {
- fspec_kind_from_str(&state.ragel, stack_get_var(&state.ragel, &state.stack), &state.field.kind);
- check_field_kind(&state.ragel, &state.field);
- }
-
- action field_array {
- switch (state.stack.type) {
- case STACK_NUM:
- state.field.array.type = FSPEC_ARRAY_FIXED;
- state.field.array.nmemb = stack_get_num(&state.ragel, &state.stack);
- break;
-
- case STACK_STR:
- state.field.array.type = FSPEC_ARRAY_MATCH;
- state.field.array.match = *stack_get_str(&state.ragel, &state.stack);
- break;
-
- case STACK_VAR:
- state.field.array.type = FSPEC_ARRAY_VAR;
- state.field.array.var = stack_get_var(&state.ragel, &state.stack);
-
- if (!ragel_search_str(&state.ragel, state.container_data_offset, state.field.array.var))
- ragel_throw_error(&state.ragel, "undeclared variable '%s'", state.field.array.var);
- break;
-
- default:
- ragel_throw_error(&state.ragel, "array can't contain the stack type of '%s'", stack_type_to_str(state.stack.type));
- break;
- }
- }
-
- action field_name {
- state.field.name = stack_get_var(&state.ragel, &state.stack);
- }
-
- action field_type {
- state.field = default_field;
- fspec_type_from_str(&state.ragel, stack_get_var(&state.ragel, &state.stack), &state.field.type);
- }
-
- action container_name {
- state.container = default_container;
- state.container.name = stack_get_var(&state.ragel, &state.stack);
- state.container_data_offset = state.ragel.mem.cur - state.ragel.mem.data;
- }
-
- action push_var {
- state.stack.type = STACK_VAR;
- state.stack.var = (char*)state.ragel.mem.cur;
- }
-
- action push_hex {
- state.stack.type = STACK_NUM;
- state.stack.num = strtoll((char*)state.ragel.mem.cur, NULL, 16);
- }
-
- action push_dec {
- state.stack.type = STACK_NUM;
- state.stack.num = strtoll((char*)state.ragel.mem.cur, NULL, 10);
- }
-
- action push_str {
- state.stack.type = STACK_STR;
- state.stack.str.data = state.ragel.mem.cur;
- state.stack.str.size = (state.ragel.mem.data + state.ragel.mem.written) - state.ragel.mem.cur;
- }
-
- action convert_escape {
- ragel_convert_escape(&state.ragel);
- }
-
- action remove {
- ragel_remove_last_data(&state.ragel);
- }
-
- action finish {
- ragel_finish_data(&state.ragel);
- }
-
- action store {
- ragel_store_data(&state.ragel);
- }
-
- action begin {
- ragel_begin_data(&state.ragel);
- }
-
- action invalid_kind {
- ragel_throw_error(&state.ragel, "invalid kind");
- }
-
- action invalid_type {
- ragel_throw_error(&state.ragel, "invalid type");
- }
-
- action error {
- ragel_throw_error(&state.ragel, "malformed input (machine failed here or in previous or next expression)");
- }
-
- action line {
- ragel_advance_line(&state.ragel);
- }
-
- # Semantic
- ws = space;
- valid = ^cntrl;
- es = '\\';
- delim = ';';
- quote = ['"];
- bopen = '{';
- bclose = '}';
- newline = '\n';
- octal = [0-7];
- hex = '0x' <: xdigit+;
- decimal = ([1-9] <: digit*) | '0';
- comment = '//' <: valid* :>> newline;
- escape = es <: ('x' <: xdigit+ | [abfnrtv\\'"e] | octal{1,3});
- type = 'u8' | 'u16' | 'u32' | 'u64' | 's8' | 's16' | 's32' | 's64';
- kind = 'ascii' | 'utf8' | 'sjis' | 'hex' | 'pad';
- reserved = 'struct' | type | kind;
- var = ((alpha | '_') <: (alnum | '_')*) - reserved;
-
- # Catchers
- catch_var = var >begin $store %finish %push_var;
- catch_struct = ('struct' $store ws+ >store <: var $store) >begin %finish %push_var;
- catch_type = (catch_struct | type >begin $store %push_var %remove) $!invalid_type;
- catch_hex = hex >begin $store %push_hex %remove;
- catch_decimal = decimal >begin $store %push_dec %remove;
- catch_string = quote <: (escape %convert_escape | print)* >begin $store %finish %push_str :>> quote;
- catch_array = '[' <: (catch_hex | catch_decimal | catch_string | catch_var) :>> ']';
- catch_kind = '=' ws* <: kind >begin $store %push_var %remove $!invalid_kind;
-
- # Actions
- field = catch_type %field_type ws+ <: catch_var %field_name ws* <: (catch_array %field_array ws*)? <: (catch_kind %field_kind ws*)? :>> delim %field;
- container = catch_struct %container_name ws* :>> bopen <: (ws | comment | field)* :>> bclose ws* delim;
- line = valid* :>> newline @line;
- main := (ws | comment | container)* & line* $!error;
-}%%
-
-void
-fspec_parse(struct fspec *fspec)
-{
- int cs;
- %% write init;
-
- (void)fspec_en_main;
- assert(fspec);
- assert(fspec->ops.read);
- assert(fspec->ops.field);
-
- struct state state = {
- .ragel = {
- .lineno = 1,
- .mem = {
- .data = fspec->mem.data,
- .size = fspec->mem.size,
- },
- },
- };
-
- for (bool ok = true; ok;) {
- const size_t bytes = fspec->ops.read(fspec, state.ragel.buf, 1, sizeof(state.ragel.buf));
- ok = ragel_confirm_input(&state.ragel, bytes);
- %% write exec;
- }
-}
diff --git a/src/ragel/ragel.h b/src/ragel/ragel.h
index af06f4a..b2c7572 100644
--- a/src/ragel/ragel.h
+++ b/src/ragel/ragel.h
@@ -1,236 +1,30 @@
#pragma once
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <stddef.h>
+#include <stdint.h>
#include <stdbool.h>
-#include <string.h>
-#include <ctype.h>
-#include <assert.h>
-#include <limits.h>
-#include <err.h>
-struct ragel {
- struct {
- uint8_t *data; // data\0another_data\0
- const uint8_t *cur; // data\0another_data\0cursor
- size_t written, size; // amount of data written / size of data
- } mem;
+struct ragel_mem {
+ const char *data, *end;
+ bool binary; // binary input bit
+};
- char buf[4096]; // block of input data
+struct ragel {
+ struct ragel_mem input; // block of input data
+ uint64_t lineno; // current line
const char *p, *pe, *eof; // see ragel doc
- size_t lineno; // current line
+ const char *cl; // current line start
+ const char *name; // may be current file name for example
+ bool error; // error thrown bit
};
-static inline void
-ragel_get_current_line(const struct ragel *ragel, size_t *out_lineno, size_t *out_ls, size_t *out_le, size_t *out_ws, size_t *out_we)
-{
- assert(out_ls && out_le && out_ws && out_we);
- assert(ragel->p >= ragel->buf && ragel->pe >= ragel->p);
-
- size_t ls, le, ws, we;
- size_t off = ragel->p - ragel->buf;
- size_t lineno = ragel->lineno;
- const size_t end = ragel->pe - ragel->buf;
-
- // rewind to first non-space
- for (; off > 0 && (isspace(ragel->buf[off]) || !ragel->buf[off]); --off) {
- if (lineno > 0 && ragel->buf[off] == '\n')
- --lineno;
- }
-
- for (ls = off; ls > 0 && ragel->buf[ls] != '\n'; --ls); // beginning of line
- for (le = off; le < end && ragel->buf[le] != '\n'; ++le); // end of line
- for (; ls < le && isspace(ragel->buf[ls]); ++ls); // strip leading whitespace
- for (ws = off; ws > ls && isspace(ragel->buf[ws]); --ws); // rewind to first non-space
- for (; ws > 0 && ws > ls && !isspace(ragel->buf[ws - 1]); --ws); // find word start
- for (we = ws; we < le && !isspace(ragel->buf[we]); ++we); // find word ending
-
- assert(we >= ws && ws >= ls && le >= ls && le >= we);
- *out_lineno = lineno;
- *out_ls = ls;
- *out_le = le;
- *out_ws = ws;
- *out_we = we;
-}
-
-__attribute__((format(printf, 2, 3)))
-static inline void
-ragel_throw_error(const struct ragel *ragel, const char *fmt, ...)
-{
- assert(ragel && fmt);
-
- size_t lineno, ls, le, ws, we;
- ragel_get_current_line(ragel, &lineno, &ls, &le, &ws, &we);
- assert(le - ls <= INT_MAX && ws - ls <= INT_MAX);
-
- char msg[255];
- va_list args;
- va_start(args, fmt);
- vsnprintf(msg, sizeof(msg), fmt, args);
- va_end(args);
-
- const int indent = 8;
- const size_t mark = (we - ws ? we - ws : 1), cur = (ragel->p - ragel->buf) - ws;
- warnx("\x1b[37m%zu: \x1b[31merror: \x1b[0m%s\n%*s%.*s", lineno, msg, indent, "", (int)(le - ls), ragel->buf + ls);
- fprintf(stderr, "%*s%*s\x1b[31m", indent, "", (int)(ws - ls), "");
- for (size_t i = 0; i < mark; ++i) fputs((i == cur ? "^" : "~"), stderr);
- fputs("\x1b[0m\n", stderr);
-
- exit(EXIT_FAILURE);
-}
-
-static inline void
-ragel_bounds_check_data(const struct ragel *ragel, const size_t nmemb)
-{
- assert(ragel);
-
- if (ragel->mem.size < nmemb || ragel->mem.written >= ragel->mem.size - nmemb)
- ragel_throw_error(ragel, "data storage limit exceeded: %zu bytes exceeds the maximum store size of %zu bytes", ragel->mem.written, ragel->mem.size);
-}
-
-static inline void
-ragel_replace_data(struct ragel *ragel, const size_t nmemb, char replacement)
-{
- assert(ragel);
-
- if (ragel->mem.written < nmemb)
- ragel_throw_error(ragel, "parse error: received escape conversion with mem.written of %zu, expected >= %zu", ragel->mem.written, nmemb);
-
- ragel->mem.data[(ragel->mem.written -= nmemb)] = replacement;
- ragel->mem.data[++ragel->mem.written] = 0;
-}
-
-static inline void
-ragel_convert_escape(struct ragel *ragel)
-{
- assert(ragel);
-
- if (ragel->mem.written < 2)
- ragel_throw_error(ragel, "parse error: received escape conversion with mem.written of %zu, expected >= 2", ragel->mem.written);
-
- const struct {
- const char *e;
- const char v, b;
- } map[] = {
- { .e = "\\a", .v = '\a' },
- { .e = "\\b", .v = '\b' },
- { .e = "\\f", .v = '\f' },
- { .e = "\\n", .v = '\n' },
- { .e = "\\r", .v = '\r' },
- { .e = "\\t", .v = '\t' },
- { .e = "\\v", .v = '\v' },
- { .e = "\\\\", .v = '\\' },
- { .e = "\\'", .v = '\'' },
- { .e = "\\\"", .v = '"' },
- { .e = "\\e", .v = '\e' },
- { .e = "\\x", .b = 16 },
- { .e = "\\", .b = 8 },
- };
-
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
- const char *cur = (char*)ragel->mem.cur;
- const size_t cur_sz = strlen(cur);
- for (size_t i = 0; i < ARRAY_SIZE(map); ++i) {
- if (!strncmp(cur, map[i].e, strlen(map[i].e))) {
- const char v = (!map[i].b ? map[i].v : strtol(cur + strlen(map[i].e), NULL, map[i].b));
- assert((map[i].b == 8 && cur_sz >= 2) || (map[i].b == 16 && cur_sz >= 2) || (map[i].b == 0 && cur_sz == 2));
- assert(map[i].b != 8 || isdigit(cur[1]));
- ragel_replace_data(ragel, cur_sz, v);
- return;
- }
- }
-#undef ARRAY_SIZE
-
- ragel_throw_error(ragel, "parse error: received unknown escape conversion");
-}
-
-static inline void
-ragel_dump_data(struct ragel *ragel, const size_t offset)
-{
- const uint8_t *end = ragel->mem.data + ragel->mem.written;
- for (const uint8_t *p = ragel->mem.data + offset; p && p < end; p = (uint8_t*)memchr(p, 0, end - p), p += !!p)
- printf("%s\n", p);
-}
-
-static inline const uint8_t*
-ragel_search_data(const struct ragel *ragel, const size_t offset, const uint8_t *data, const size_t size)
-{
- assert(ragel && data);
-
- const uint8_t *end = ragel->mem.data + ragel->mem.written;
- for (const uint8_t *p = ragel->mem.data + offset; p && p < end && (size_t)(end - p) >= size; p = (uint8_t*)memchr(p, 0, end - p), p += !!p) {
- if (!memcmp(data, p, size))
- return p;
- }
-
- return NULL;
-}
-
-static inline const uint8_t*
-ragel_search_str(const struct ragel *ragel, const size_t offset, const char *str)
-{
- return ragel_search_data(ragel, offset, (const uint8_t*)str, strlen(str) + 1);
-}
-
-static inline void
-ragel_remove_last_data(struct ragel *ragel)
-{
- assert(ragel);
- const uint8_t *end = ragel->mem.data + ragel->mem.written;
- const size_t size = end - ragel->mem.cur + 1;
- assert(ragel->mem.written >= size);
- ragel->mem.written -= size;
- ragel->mem.data[ragel->mem.written] = 0;
-}
-
-static inline void
-ragel_finish_data(struct ragel *ragel)
-{
- assert(ragel);
-
- const uint8_t *end = ragel->mem.data + ragel->mem.written, *p;
- if ((p = ragel_search_data(ragel, 0, ragel->mem.cur, end - ragel->mem.cur + 1))) {
- ragel_remove_last_data(ragel);
- ragel->mem.cur = p;
- }
-}
-
-static inline void
-ragel_store_data(struct ragel *ragel)
-{
- ragel_bounds_check_data(ragel, 1);
- ragel->mem.data[ragel->mem.written++] = *ragel->p;
- ragel->mem.data[ragel->mem.written] = 0;
-}
-
-static inline void
-ragel_begin_data(struct ragel *ragel)
-{
- ragel_bounds_check_data(ragel, 1);
- ragel->mem.written += (ragel->mem.written > 0);
- ragel->mem.cur = ragel->mem.data + ragel->mem.written;
-}
-
-static inline void
-ragel_advance_line(struct ragel *ragel)
-{
- assert(ragel);
- ++ragel->lineno;
-}
+__attribute__((format(printf, 2, 3))) void
+ragel_throw_error(struct ragel *ragel, const char *fmt, ...);
-static inline bool
-ragel_confirm_input(struct ragel *ragel, const size_t bytes)
-{
- assert(ragel);
+void
+ragel_set_name(struct ragel *ragel, const char *name);
- if (bytes > sizeof(ragel->buf))
- errx(EXIT_FAILURE, "%s: gave larger buffer than %zu", __func__, sizeof(ragel->buf));
+void
+ragel_advance_line(struct ragel *ragel);
- const bool in_eof = (bytes < sizeof(ragel->buf));
- ragel->p = ragel->buf;
- ragel->pe = ragel->p + bytes;
- ragel->eof = (in_eof ? ragel->pe : NULL);
- return !in_eof;
-}
+void
+ragel_feed_input(struct ragel *ragel, const bool eof, const struct ragel_mem *input);
diff --git a/src/ragel/ragel.rl b/src/ragel/ragel.rl
new file mode 100644
index 0000000..48c4229
--- /dev/null
+++ b/src/ragel/ragel.rl
@@ -0,0 +1,88 @@
+#include "ragel.h"
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <assert.h>
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+
+%%{
+ machine ragel;
+ write data noerror nofinal;
+
+ action red { fputs("\x1b[31m", stderr); }
+ action reset { fputs("\x1b[0m", stderr); }
+ action end { fputs("\x1b[0m\n", stderr); }
+ action mark { fputc('^', stderr); }
+ action tail { fputc('~', stderr); }
+ action lead { fputc(' ', stderr); }
+
+ word = alnum*;
+ token = ' ' | punct;
+ until_err = (any when { fpc != *error })*;
+ search_err := ((any | token %{ *error = fpc; }) when { fpc != ragel->p })*;
+ print_err := (until_err %red <: word %reset <: (any - '\n')*) ${ fputc(fc, stderr); } >lead %!end %/end;
+ print_mark := (until_err ${ fputc(' ', stderr); } %red %mark <: any word $tail) >lead %!end %/end;
+}%%
+
+static void
+ragel_exec_error(const struct ragel *ragel, const int start_cs, const char **error)
+{
+ (void)ragel_start;
+ assert(ragel && ragel->cl && error);
+ int cs = start_cs;
+ const char *p = ragel->cl, *pe = ragel->pe, *eof = ragel->eof;
+ %% write exec;
+}
+
+void
+ragel_throw_error(struct ragel *ragel, const char *fmt, ...)
+{
+ assert(ragel && fmt);
+ ragel->error = true;
+
+ const char *error = ragel->p;
+
+ if (!ragel->input.binary)
+ ragel_exec_error(ragel, ragel_en_search_err, &error);
+
+ const char *name = (ragel->name ? ragel->name : "");
+ uint64_t column = (error - ragel->cl);
+ fprintf(stderr, "\x1b[37m%s:%" PRIu64 ":%" PRIu64 " \x1b[31merror: \x1b[0m", name, ragel->lineno, column);
+
+ va_list args;
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+ fputc('\n', stderr);
+
+ if (!ragel->input.binary) {
+ ragel_exec_error(ragel, ragel_en_print_err, &error);
+ ragel_exec_error(ragel, ragel_en_print_mark, &error);
+ }
+}
+
+void
+ragel_set_name(struct ragel *ragel, const char *name)
+{
+ assert(ragel);
+ ragel->name = name;
+}
+
+void
+ragel_advance_line(struct ragel *ragel)
+{
+ assert(ragel);
+ ++ragel->lineno;
+ ragel->cl = ragel->p;
+}
+
+void
+ragel_feed_input(struct ragel *ragel, const bool eof, const struct ragel_mem *input)
+{
+ assert(ragel);
+ ragel->input = *input;
+ ragel->cl = ragel->p = ragel->input.data;
+ ragel->pe = ragel->input.end;
+ ragel->eof = (eof ? ragel->pe : NULL);
+}