diff options
| author | Jari Vetoniemi <mailroxas@gmail.com> | 2018-09-26 15:29:17 +0300 | 
|---|---|---|
| committer | Jari Vetoniemi <mailroxas@gmail.com> | 2018-09-26 15:29:17 +0300 | 
| commit | 715d3d48f962d17575ff9de0034f2ac89b59f975 (patch) | |
| tree | 0ec8cd8e5b895bea4771b7c453cab5415fa5e6a7 | |
| parent | d98285e367c29ec9eb1cacf5cf424d6910270efd (diff) | |
Goodbye C compiler, hello colm compiler
| -rw-r--r-- | src/bin/fspec/dump.c | 881 | ||||
| -rw-r--r-- | src/compiler/compiler.lm | 298 | ||||
| -rw-r--r-- | src/compiler/expr.lm | 410 | ||||
| -rw-r--r-- | src/compiler/types.lm | 55 | ||||
| -rw-r--r-- | src/fspec/bcode.c | 199 | ||||
| -rw-r--r-- | src/fspec/bcode.h | 92 | ||||
| -rw-r--r-- | src/fspec/lexer.h | 25 | ||||
| -rw-r--r-- | src/fspec/memory.h | 8 | ||||
| -rw-r--r-- | src/fspec/private/bcode-types.h | 16 | ||||
| -rw-r--r-- | src/fspec/ragel/lexer-expr.h | 20 | ||||
| -rw-r--r-- | src/fspec/ragel/lexer-expr.rl | 118 | ||||
| -rw-r--r-- | src/fspec/ragel/lexer-stack.h | 42 | ||||
| -rw-r--r-- | src/fspec/ragel/lexer-stack.rl | 153 | ||||
| -rw-r--r-- | src/fspec/ragel/lexer.rl | 180 | ||||
| -rw-r--r-- | src/fspec/ragel/validator.rl | 96 | ||||
| -rw-r--r-- | src/fspec/validator.h | 19 | ||||
| -rw-r--r-- | src/util/membuf.c | 31 | ||||
| -rw-r--r-- | src/util/membuf.h | 14 | 
18 files changed, 763 insertions, 1894 deletions
| diff --git a/src/bin/fspec/dump.c b/src/bin/fspec/dump.c deleted file mode 100644 index 8ca53b2..0000000 --- a/src/bin/fspec/dump.c +++ /dev/null @@ -1,881 +0,0 @@ -#include <stdlib.h> -#include <stdio.h> -#include <limits.h> -#include <string.h> -#include <assert.h> -#include <err.h> - -#include <iconv.h> -#include <errno.h> -#include <locale.h> -#include <langinfo.h> -#include <squash.h> - -#include <fspec/bcode.h> -#include <fspec/lexer.h> -#include <fspec/validator.h> -#include "fspec/ragel/lexer-expr.h" -#include "util/membuf.h" - -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) - -#if 0 - -static size_t -to_hex(const uint8_t *buf, const size_t buf_sz, char *out, const size_t out_sz, const bool reverse) -{ -   assert(out); -   const char nibble[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; -   const uint8_t nbs = sizeof(nibble) - 1; - -   size_t w = 0, last_non_zero = w; -   for (size_t i = 0; i < buf_sz && out_sz > 2 && w < out_sz - 2; ++i) { -      for (uint8_t c = 0; c < CHAR_BIT / 8 && w < out_sz; ++c) { -         const size_t idx = (reverse ? (buf_sz - 1) - i : i); -         const uint8_t hi = (buf[idx] >> (4 * (c + 1))) & nbs; -         const uint8_t lo = (buf[idx] >> (8 * c)) & nbs; - -         if (w || hi || lo) { -            out[w++] = nibble[hi]; -            out[w++] = nibble[lo]; -            last_non_zero = (hi || lo ? w : last_non_zero); -         } -      } -   } - -   if (!w) { -      out[w++] = nibble[0]; -      out[w++] = nibble[0]; -   } else { -      w = last_non_zero; -   } - -   assert(w < out_sz); -   out[w] = 0; -   return w; -} - -static void -print_dec(const uint8_t *buf, const size_t size, const bool is_signed) -{ -   char hex[2 * sizeof(fspec_num) + 1]; -   to_hex(buf, size, hex, sizeof(hex), true); - -   static_assert(sizeof(fspec_num) <= sizeof(uint64_t), "fspec_num is larger than uint64_t"); - -   if (is_signed) { -      printf("%ld", (int64_t)strtoll(hex, NULL, 16)); -   } else { -      printf("%lu", (uint64_t)strtoull(hex, NULL, 16)); -   } -} - -static void -print_udec(const uint8_t *buf, const size_t size) -{ -   print_dec(buf, size, false); -} - -static void -print_sdec(const uint8_t *buf, const size_t size) -{ -   print_dec(buf, size, true); -} - -static void -print_hex(const uint8_t *buf, const size_t size) -{ -   char hex[2 * sizeof(fspec_num) + 1]; -   to_hex(buf, size, hex, sizeof(hex), true); -   printf("0x%s", hex); -} - -static void -print_array(const uint8_t *buf, const size_t size, const size_t nmemb, void (*fun)(const uint8_t *buf, const size_t size)) -{ -   const int indent = 4; -   if (nmemb > 8) { -      printf("{\n%*s", indent, ""); -   } else if (nmemb > 1) { -      printf("{ "); -   } - -   for (size_t n = 0; n < nmemb; ++n) { -      fun(buf + n * size, size); -      printf("%s", (nmemb > 1 && n + 1 < nmemb ? ", " : "")); - -      if (n + 1 < nmemb && !((n + 1) % 8)) -         printf("\n%*s", indent, ""); -   } - -   printf("%s\n", (nmemb > 8 ? "\n}" : (nmemb > 1 ? " }" : ""))); -} - -static void -print_str(const char *buf, const size_t size, const size_t nmemb) -{ -   const bool has_nl = memchr(buf, '\n', size * nmemb); -   if (has_nl) -      puts("```"); - -   for (size_t n = 0; n < size * nmemb && buf[n] != 0; ++n) -      printf("%c", buf[n]); - -   puts((has_nl ? "```" : "")); -} - -struct code { -   const enum fspec_op *start, *end, *data; -}; - -static void -dump_ops(const struct code *code) -{ -   for (const enum fspec_op *op = code->start; op; op = fspec_op_next(op, code->end, false)) { -      printf("%*s- ", (*op == FSPEC_OP_ARG ? 2 : 0), ""); -      switch (*op) { -         case FSPEC_OP_HEADER: -            printf("header\n"); -            break; - -         case FSPEC_OP_DECLARATION: -            printf("declaration\n"); -            break; - -         case FSPEC_OP_READ: -            printf("read\n"); -            break; - -         case FSPEC_OP_GOTO: -            printf("goto\n"); -            break; - -         case FSPEC_OP_FILTER: -            printf("filter\n"); -            break; - -         case FSPEC_OP_VISUAL: -            printf("visual\n"); -            break; - -         case FSPEC_OP_ARG: -            { -               const enum fspec_arg *arg = (void*)(op + 1); -               printf("arg "); -               switch (*arg) { -                  case FSPEC_ARG_STR: -                     printf("str %s\n", fspec_arg_get_cstr(arg, code->data)); -                     break; - -                  case FSPEC_ARG_VAR: -                     printf("var %" PRI_FSPEC_NUM "\n", fspec_arg_get_num(arg)); -                     break; - -                  case FSPEC_ARG_NUM: -                     printf("num %" PRI_FSPEC_NUM "\n", fspec_arg_get_num(arg)); -                     break; - -                  case FSPEC_ARG_OFF: -                     printf("off %" PRI_FSPEC_NUM "\n", fspec_arg_get_num(arg)); -                     break; - -                  case FSPEC_ARG_DAT: -                     printf("dat %" PRI_FSPEC_NUM "\n", fspec_arg_get_num(arg)); -                     break; - -                  case FSPEC_ARG_EOF: -                     printf("eof\n"); -                     break; - -                  case FSPEC_ARG_LAST: -                     break; -               } -            } -            break; - -         case FSPEC_OP_LAST: -            break; -      } -   } -} - -static const enum fspec_op* -get_last_struct(const struct code *code) -{ -   const enum fspec_op *last = NULL; -   for (const enum fspec_op *op = code->start; op; op = fspec_op_next(op, code->end, true)) { -      const enum fspec_arg *arg; -      if (*op == FSPEC_OP_DECLARATION && -         (arg = fspec_op_get_arg(op, code->end, 1, 1<<FSPEC_ARG_NUM)) && -         fspec_arg_get_num(arg) == FSPEC_DECLARATION_STRUCT) { -         last = op; -      } -   } -   return last; -} - -struct dynbuf { -   void *data; -   size_t len, written; -}; - -static inline void -dynbuf_resize(struct dynbuf *buf, const size_t size) -{ -   assert(buf); -   if (!(buf->data = realloc(buf->data, size))) -      err(EXIT_FAILURE, "realloc(%zu)", size); - -   buf->len = size; -} - -static inline void -dynbuf_resize_if_needed(struct dynbuf *buf, const size_t size) -{ -   if (buf->len >= size) -      return; - -   dynbuf_resize(buf, size); -} - -static inline void -dynbuf_grow_if_needed(struct dynbuf *buf, const size_t nmemb) -{ -   assert(buf); -   if (buf->len >= nmemb && buf->written <= buf->len - nmemb) -      return; - -   dynbuf_resize(buf, buf->written + nmemb); -} - -static inline void -dynbuf_append(struct dynbuf *buf, const void *data, const size_t data_sz) -{ -   dynbuf_grow_if_needed(buf, data_sz); -   memcpy((char*)buf->data + buf->written, data, data_sz); -   buf->written += data_sz; -   assert(buf->written <= buf->len); -} - -static inline void -dynbuf_reset(struct dynbuf *buf) -{ -   assert(buf); -   buf->written = 0; -} - -static inline void -dynbuf_release(struct dynbuf *buf) -{ -   assert(buf); -   free(buf->data); -   *buf = (struct dynbuf){0}; -} - -static void -display(const void *buf, const size_t size, const size_t nmemb, const bool is_signed, const enum fspec_visual visual) -{ -   switch (visual) { -      case FSPEC_VISUAL_NUL: -         puts("..."); -         break; - -      case FSPEC_VISUAL_STR: -         print_str(buf, size, nmemb); -         break; - -      case FSPEC_VISUAL_HEX: -         print_array(buf, size, nmemb, print_hex); -         break; - -      case FSPEC_VISUAL_DEC: -         print_array(buf, size, nmemb, (is_signed ? print_sdec : print_udec)); -         break; - -      case FSPEC_VISUAL_LAST: -         break; -   } -} - -struct decl { -   struct dynbuf buf; -   const char *name; -   const void *start, *end; -   size_t nmemb; -   uint8_t size; -   enum fspec_visual visual; -   enum fspec_declaration declaration; -}; - -static void -decl_display(const struct decl *decl) -{ -   assert(decl); -   assert(decl->size * decl->nmemb <= decl->buf.len); -   printf("%s: ", decl->name); -   display(decl->buf.data, decl->size, decl->nmemb, false, decl->visual); -} - -static fspec_num -decl_get_num(const struct decl *decl) -{ -   assert(decl); -   assert(decl->nmemb == 1); -   assert(decl->size * decl->nmemb <= decl->buf.len); -   char hex[2 * sizeof(fspec_num) + 1]; -   to_hex(decl->buf.data, decl->size, hex, sizeof(hex), true); -   static_assert(sizeof(fspec_num) <= sizeof(uint64_t), "fspec_num is larger than uint64_t"); -   return (fspec_num)strtoull(hex, NULL, 16); -} - -static const char* -decl_get_cstr(const struct decl *decl) -{ -   assert(decl); -   return decl->buf.data; -} - -struct context { -   struct code code; -   struct decl *decl; -   fspec_num decl_count; -}; - -static fspec_num -var_get_num(const struct context *context, const enum fspec_arg *arg) -{ -   assert(context && arg); -   return decl_get_num(&context->decl[fspec_arg_get_num(arg)]); -} - -static const char* -var_get_cstr(const struct context *context, const enum fspec_arg *arg) -{ -   assert(context && arg); -   return decl_get_cstr(&context->decl[fspec_arg_get_num(arg)]); -} - -enum type { -   TYPE_NUM, -   TYPE_STR, -}; - -static enum type -var_get_type(const struct context *context, const enum fspec_arg *arg) -{ -   assert(context && arg); -   const struct decl *decl = &context->decl[fspec_arg_get_num(arg)]; -   switch (decl->visual) { -      case FSPEC_VISUAL_DEC: -      case FSPEC_VISUAL_HEX: -      case FSPEC_VISUAL_NUL: -         return TYPE_NUM; - -      case FSPEC_VISUAL_STR: -         return TYPE_STR; - -      case FSPEC_VISUAL_LAST: -         break; -   } -   return ~0; -} - -static void -filter_decompress(const struct context *context, struct decl *decl) -{ -   assert(decl); - -   const enum fspec_arg *arg; -   if (!(arg = fspec_op_get_arg(context->code.start, context->code.end, 2, 1<<FSPEC_ARG_STR))) -      errx(EXIT_FAILURE, "missing compression"); - -   SquashCodec *codec; -   const char *algo = fspec_arg_get_cstr(arg, context->code.data); -   if (!(codec = squash_get_codec(algo))) -      errx(EXIT_FAILURE, "unknown compression '%s'", algo); - -   SquashOptions *opts; -   if (!(opts = squash_options_new(codec, NULL))) -      errx(EXIT_FAILURE, "squash_options_new"); - -   size_t dsize = squash_codec_get_uncompressed_size(codec, decl->buf.len, decl->buf.data); -   dsize = (dsize ? dsize : decl->buf.len * 2); - -   { -      const enum fspec_arg *var = arg; -      if ((arg = fspec_arg_next(arg, context->code.end, 1, 1<<FSPEC_ARG_NUM | 1<<FSPEC_ARG_VAR))) { -         var = arg; - -         switch (*var) { -            case FSPEC_ARG_NUM: -               dsize = fspec_arg_get_num(arg); -               break; - -            case FSPEC_ARG_VAR: -               dsize = var_get_num(context, arg); -               break; - -            default: -               break; -         } -      } - -      for (; (var = fspec_arg_next(var, context->code.end, 1, 1<<FSPEC_ARG_STR));) { -         const char *key = fspec_arg_get_cstr(var, context->code.data); -         if (!(var = fspec_arg_next(var, context->code.end, 1, ~0))) -            errx(EXIT_FAILURE, "expected argument for key '%s'", key); - -         switch (*var) { -            case FSPEC_ARG_STR: -               squash_options_set_string(opts, key, fspec_arg_get_cstr(var, context->code.data)); -               break; - -            case FSPEC_ARG_NUM: -               squash_options_set_int(opts, key, fspec_arg_get_num(var)); -               break; - -            case FSPEC_ARG_VAR: -               if (var_get_type(context, var) == TYPE_STR) { -                  squash_options_set_string(opts, key, var_get_cstr(context, var)); -               } else { -                  squash_options_set_int(opts, key, var_get_num(context, var)); -               } -               break; - -            default: -               break; -         } -      } -   } - -   // what a horrible api -   squash_object_ref(opts); - -   SquashStatus r; -   struct dynbuf buf = {0}; -   dynbuf_resize(&buf, dsize); -   while ((r = squash_codec_decompress_with_options(codec, &buf.len, buf.data, decl->buf.len, decl->buf.data, opts)) == SQUASH_BUFFER_FULL) -      dynbuf_resize(&buf, dsize *= 2); - -   dynbuf_resize_if_needed(&buf, (buf.written = buf.len)); -   squash_object_unref(opts); - -   if (r != SQUASH_OK) -      errx(EXIT_FAILURE, "squash_codec_decompress(%zu, %zu) = %d: %s", dsize, decl->buf.len, r, squash_status_to_string(r)); - -   dynbuf_release(&decl->buf); -   decl->buf = buf; -   decl->nmemb = buf.len / decl->size; -} - -static void -filter_decode(const struct context *context, struct decl *decl) -{ -   assert(decl); - -   const enum fspec_arg *arg; -   if (!(arg = fspec_op_get_arg(context->code.start, context->code.end, 2, 1<<FSPEC_ARG_STR))) -      errx(EXIT_FAILURE, "missing encoding"); - -   const char *encoding = fspec_arg_get_cstr(arg, context->code.data); - -   static const char *sys_encoding; -   if (!sys_encoding) { -      setlocale(LC_ALL, ""); -      sys_encoding = nl_langinfo(CODESET); -   } - -   iconv_t iv; -   if ((iv = iconv_open(sys_encoding, encoding)) == (iconv_t)-1) -      err(EXIT_FAILURE, "iconv_open(%s, %s)", sys_encoding, encoding); - -   struct dynbuf buf = {0}; -   const uint8_t *in = decl->buf.data; -   size_t in_left = decl->buf.written; -   do { -      char enc[1024], *out = enc; -      size_t out_left = sizeof(enc); - -      errno = 0; -      if (iconv(iv, (char**)&in, &in_left, &out, &out_left) == (size_t)-1 && errno != E2BIG) -         err(EXIT_FAILURE, "iconv(%s, %s)", sys_encoding, encoding); - -      dynbuf_append(&buf, enc, sizeof(enc) - out_left); -   } while (in_left > 0); - -   iconv_close(iv); - -   dynbuf_release(&decl->buf); -   decl->buf = buf; -   decl->nmemb = buf.len / decl->size; -} - -static void -call(const struct context *context, FILE *f) -{ -   assert(context && f); - -   struct decl *decl = NULL; -   for (const enum fspec_op *op = context->code.start; op; op = fspec_op_next(op, context->code.end, true)) { -      if (decl && op == decl->end) { -         decl_display(decl); -         decl = NULL; -      } - -      switch (*op) { -         case FSPEC_OP_DECLARATION: -            { -               const enum fspec_arg *arg; -               arg = fspec_op_get_arg(op, context->code.end, 2, 1<<FSPEC_ARG_NUM); -               decl = &context->decl[fspec_arg_get_num(arg)]; -               dynbuf_reset(&decl->buf); -            } -            break; - -         case FSPEC_OP_READ: -            { -               assert(decl); -               const enum fspec_arg *arg = fspec_op_get_arg(op, context->code.end, 1, 1<<FSPEC_ARG_NUM); -               static_assert(CHAR_BIT == 8, "doesn't work otherwere right now"); -               decl->size = fspec_arg_get_num(arg) / 8; -               decl->nmemb = 0; - -               for (const enum fspec_arg *var = arg; (var = fspec_arg_next(var, context->code.end, 1, ~0));) { -                  switch (*var) { -                     case FSPEC_ARG_NUM: -                     case FSPEC_ARG_VAR: -                        { -                           const fspec_num v = (*var == FSPEC_ARG_NUM ? fspec_arg_get_num(var) : var_get_num(context, var)); -                           if (v == 0) { -                              goto noop; -                           } else if (v > 1) { -                              const size_t nmemb = (decl->nmemb ? decl->nmemb : 1) * v; -                              dynbuf_grow_if_needed(&decl->buf, decl->size * nmemb); -                              const size_t read = fread((char*)decl->buf.data + decl->buf.written, decl->size, nmemb, f); -                              decl->buf.written += decl->size * read; -                              decl->nmemb += read; -                           } -                        } -                        break; - -                     case FSPEC_ARG_STR: -                        break; - -                     case FSPEC_ARG_EOF: -                        { -                           const size_t nmemb = (decl->nmemb ? decl->nmemb : 1); -                           size_t read = 0, r = nmemb; -                           while (r == nmemb) { -                              dynbuf_grow_if_needed(&decl->buf, decl->size * nmemb); -                              read += (r = fread((char*)decl->buf.data + decl->buf.written, decl->size, nmemb, f)); -                              decl->buf.written += decl->size * r; -                           }; -                           decl->nmemb += read; -                        } -                        break; - -                     default: -                        break; -                  } -               } -noop: - -               if (!fspec_arg_next(arg, context->code.end, 1, ~0)) { -                  dynbuf_grow_if_needed(&decl->buf, decl->size * 1); -                  const size_t read = fread((char*)decl->buf.data + decl->buf.written, decl->size, 1, f); -                  decl->buf.written += decl->size * read; -                  decl->nmemb = read; -               } - -               assert(decl->nmemb != 0); -            } -            break; - -         case FSPEC_OP_GOTO: -            { -               decl = NULL; -               const enum fspec_arg *arg = fspec_op_get_arg(op, context->code.end, 1, 1<<FSPEC_ARG_VAR); -               const struct decl *d = &context->decl[fspec_arg_get_num(arg)]; -               struct context c = *context; -               c.code.start = d->start; -               c.code.end = d->end; - -               for (const enum fspec_arg *var = arg; (var = fspec_arg_next(var, context->code.end, 1, ~0));) { -                  switch (*var) { -                     case FSPEC_ARG_NUM: -                     case FSPEC_ARG_VAR: -                        { -                           const fspec_num v = (*var == FSPEC_ARG_NUM ? fspec_arg_get_num(var) : var_get_num(context, var)); -                           for (fspec_num i = 0; i < v; ++i) -                              call(&c, f); -                        } -                        break; - -                     // XXX: How to handle STR with stdin? -                     // With fseek would be easy. -                     case FSPEC_ARG_STR: -                        break; - -                     case FSPEC_ARG_EOF: -                        while (!feof(f)) -                           call(&c, f); -                        break; - -                     default: -                        break; -                  } -               } - -               if (!fspec_arg_next(arg, context->code.end, 1, ~0)) -                  call(&c, f); -            } -            break; - -         case FSPEC_OP_FILTER: -            { -               assert(decl); -               const enum fspec_arg *arg = fspec_op_get_arg(op, context->code.end, 1, 1<<FSPEC_ARG_STR); - -               const struct { -                  const char *name; -                  void (*fun)(const struct context*, struct decl*); -               } map[] = { -                  { .name = "encoding", .fun = filter_decode }, -                  { .name = "compression", .fun = filter_decompress }, -               }; - -               const char *filter = fspec_arg_get_cstr(arg, context->code.data); -               for (size_t i = 0; i < ARRAY_SIZE(map); ++i) { -                  if (!strcmp(filter, map[i].name)) { -                     struct context c = *context; -                     c.code.start = op; -                     map[i].fun(&c, decl); -                     break; -                  } - -                  if (i == ARRAY_SIZE(map) - 1) -                     warnx("unknown filter '%s'", filter); -               } -            } -            break; - -         case FSPEC_OP_VISUAL: -            { -               assert(decl); -               const enum fspec_arg *arg = fspec_op_get_arg(op, context->code.end, 1, 1<<FSPEC_ARG_NUM); -               decl->visual = fspec_arg_get_num(arg); -            } -            break; - -         case FSPEC_OP_ARG: -         case FSPEC_OP_HEADER: -         case FSPEC_OP_LAST: -            break; -      } -   } - -   if (decl && context->code.end == decl->end) -      decl_display(decl); -} - -static void -setup(const struct context *context) -{ -   assert(context); - -   for (const enum fspec_op *op = context->code.start; op; op = fspec_op_next(op, context->code.end, true)) { -      switch (*op) { -         case FSPEC_OP_DECLARATION: -            { -               const enum fspec_arg *arg[4]; -               arg[0] = fspec_op_get_arg(op, context->code.end, 1, 1<<FSPEC_ARG_NUM); -               arg[1] = fspec_arg_next(arg[0], context->code.end, 1, 1<<FSPEC_ARG_NUM); -               arg[2] = fspec_arg_next(arg[1], context->code.end, 1, 1<<FSPEC_ARG_OFF); -               arg[3] = fspec_arg_next(arg[2], context->code.end, 1, 1<<FSPEC_ARG_STR); -               const fspec_num id = fspec_arg_get_num(arg[1]); -               struct decl *decl = &context->decl[id]; -               decl->declaration = fspec_arg_get_num(arg[0]); -               decl->name = fspec_arg_get_cstr(arg[3], context->code.data); -               decl->visual = FSPEC_VISUAL_DEC; -               decl->start = op; -               decl->end = (char*)op + fspec_arg_get_num(arg[2]); -               assert(!decl->buf.data); -            } -            break; - -         default: -            break; -      } -   } -} - -static void -execute(const struct fspec_mem *mem) -{ -   assert(mem); - -   struct context context = { -      .code.start = mem->data, -      .code.end = (void*)((char*)mem->data + mem->len), -      .code.data = mem->data -   }; - -   printf("output: %zu bytes\n", mem->len); -   dump_ops(&context.code); - -   const enum fspec_arg *arg = fspec_op_get_arg(context.code.data, context.code.end, 2, 1<<FSPEC_ARG_NUM); -   context.decl_count = fspec_arg_get_num(arg); - -   if (!(context.decl = calloc(context.decl_count, sizeof(*context.decl)))) -      err(EXIT_FAILURE, "calloc(%zu, %zu)", context.decl_count, sizeof(*context.decl)); - -   setup(&context); - -   puts("\nexecution:"); -   context.code.start = get_last_struct(&context.code); -   assert(context.code.start); -   call(&context, stdin); - -   for (fspec_num i = 0; i < context.decl_count; ++i) -      dynbuf_release(&context.decl[i].buf); - -   free(context.decl); -} -#endif - -static FILE* -fopen_or_die(const char *path, const char *mode) -{ -   assert(path && mode); - -   FILE *f; -   if (!(f = fopen(path, mode))) -      err(EXIT_FAILURE, "fopen(%s, %s)", path, mode); - -   return f; -} - -#define container_of(ptr, type, member) ((type *)((char *)(1 ? (ptr) : &((type *)0)->member) - offsetof(type, member))) - -struct expr { -   struct fspec_expr expr; -   struct membuf output; -   FILE *file; -}; - -static size_t -fspec_expr_write(struct fspec_expr *expr, const void *output, const size_t size, const size_t nmemb) -{ -   assert(expr && output); -   // struct expr *l = container_of(expr, struct expr, expr); -   (void)expr, (void)size, (void)nmemb; -   return nmemb; -} - -static size_t -fspec_expr_read(struct fspec_expr *expr, void *input, const size_t size, const size_t nmemb) -{ -   assert(expr && input); -   struct expr *l = container_of(expr, struct expr, expr); -   return fread(input, size, nmemb, l->file); -} - -struct lexer { -   struct fspec_lexer lexer; -   struct membuf output; -   FILE *file; -}; - -static size_t -fspec_lexer_write(struct fspec_lexer *lexer, const enum fspec_lexer_section section, const void *output, const size_t size, const size_t nmemb) -{ -   assert(lexer && output); -   // struct lexer *l = container_of(lexer, struct lexer, lexer); -   (void)lexer, (void)section, (void)size, (void)nmemb; -   return nmemb; -} - -static size_t -fspec_lexer_read(struct fspec_lexer *lexer, void *input, const size_t size, const size_t nmemb) -{ -   assert(lexer && input); -   struct lexer *l = container_of(lexer, struct lexer, lexer); -   return fread(input, size, nmemb, l->file); -} - -#if 0 -static size_t -fspec_validator_read(struct fspec_validator *validator, void *ptr, const size_t size, const size_t nmemb) -{ -   assert(validator && ptr); -   assert(ptr == validator->mem.input.data); -   const size_t read = validator->mem.input.len / size; -   assert((validator->mem.input.len && read == nmemb) || (!validator->mem.input.len && !read)); -   validator->mem.input.len -= read * size; -   assert(validator->mem.input.len == 0); -   return read; -} -#endif - -int -main(int argc, const char *argv[]) -{ -   if (argc < 2) -      errx(EXIT_FAILURE, "usage: %s file.spec < data", argv[0]); - -   char output[4096]; - -   { -      char input[4096]; -      struct expr l = { -         .expr = { -            .ops.read = fspec_expr_read, -            .ops.write = fspec_expr_write, -            .mem.input = { .data = input, .len = sizeof(input) }, -         }, -         .file = fopen_or_die(argv[1], "rb"), -         .output.mem = { .data = output, .len = sizeof(output) }, -      }; - -      if (!fspec_expr_parse(&l.expr, argv[1])) -         exit(EXIT_FAILURE); - -      fclose(l.file); -      // bcode = l.expr.mem.output; -   } - -   { -      char input[4096]; -      struct lexer l = { -         .lexer = { -            .ops.read = fspec_lexer_read, -            .ops.write = fspec_lexer_write, -            .mem.input = { .data = input, .len = sizeof(input) }, -         }, -         .file = fopen_or_die(argv[1], "rb"), -         .output.mem = { .data = output, .len = sizeof(output) }, -      }; - -      if (!fspec_lexer_parse(&l.lexer, argv[1])) -         exit(EXIT_FAILURE); - -      fclose(l.file); -      // bcode = l.lexer.mem.output; -   } - -#if 0 - -   { -      struct fspec_validator validator = { -         .ops.read = fspec_validator_read, -         .mem.input = bcode, -      }; - -      if (!fspec_validator_parse(&validator, argv[1])) -         exit(EXIT_FAILURE); -   } - -   execute(&bcode); -#endif -   return EXIT_SUCCESS; -} diff --git a/src/compiler/compiler.lm b/src/compiler/compiler.lm new file mode 100644 index 0000000..59b0ee3 --- /dev/null +++ b/src/compiler/compiler.lm @@ -0,0 +1,298 @@ +include 'expr.lm' + +context fspec +   token WS / space / + +   context primitive +      token TYPE_SIGN / [us] / +      token TYPE_BITS / [1-9][0-9]* / +      int strtoull(a:str, b:int) = c_strtoull + +      def type +         signed:bool +         bits:int +         [TYPE_SIGN TYPE_BITS] { +            lhs.signed = ($r1 == 's') +            lhs.bits = strtoull($r2, 10) +         } +   end + +   context container +      context enum +         lex +            ignore / space+ / +            literal `= `, `{ `} +         end + +         literal `enum + +         int +         const_int_expr(expr:collapser::collapsed) { +            if (!expr || !expr.result.value || !expr.result.value.number) reject +            return expr.result.value.number.value +         } + +         def item +            value:int +            [name:name::type `= expr::enum::type `, item] { lhs.value = const_int_expr(r3.collapsed) } +         |  [name:name::type `= expr::enum::type] { lhs.value = const_int_expr(r3.collapsed) } +         |  [name:name::type `, item] { lhs.value = 0 } # TODO: count +         |  [name:name::type] { lhs.value = 0 } # TODO: count + +         def type +            name:str +            items:item+ +            [type:`enum WS+ name::type? `{ item+ `}] { if (name::type in r3) lhs.name = $(name::type in r3) lhs.items = r5 } +      end + +      context strukt # <- struct is taken :( +         lex +            ignore / space+ / +            literal `{ `} +         end + +         literal `struct + +         def item +            [data:declaration::type] + +         def type +            name:str +            items:item+ +            [type:`struct WS+ name::type? `{ item+ `}] { if (name::type in r3) lhs.name = $(name::type in r3) lhs.items = r5 } +      end + +      context select +         lex +            ignore / space+ / +            literal `( `) `{ `} `* +         end + +         literal `select + +         def item +            [expr:expr::paren::type `) data:declaration::type] +         |  [expr:`* `) data:declaration::type] + +         def type +            name:str +            items:item+ # BUG: marking item+ with items: in the match below causes weird behaviour +            [type:`select `( expr::paren::type `) `{ item+ `}] { lhs.items = r6 } +      end + +      def type +         [data:enum::type] | [data:strukt::type] | [data:select::type] +   end + +   context declaration +      lex +         ignore / space+ / +         literal `; `| `[ `] +      end + +      literal `enum `struct +      token VISUAL / 'nul' | 'dec' | 'hex' | 'str' / + +      def visual +         [WS+ name:VISUAL] + +      def filter +         [`| function:reference::function::type] + +      def length +         [`[ expr:expr::bracket::type `]] + +      def extra +         length:collapser::collapsed +         [length* filter:filter* visual:visual?] { +            f:str = '' +            for l:length in repeat(r1) { +               if (f != '') +                  f = f + '*' + +               if (l.expr.collapsed.result.value) { +                  f = f + '(' + $l.expr.collapsed.result.value + ')' +               } else { +                  f = f + '(' + $l.expr.collapsed + ')' +               } +            } +            lhs.length = collapser::collapsestr(f) +         } + +      def type +         # enum name <primitive> name <extra>; +         [cref:`enum WS+ parent:name::type WS+ primitive:primitive::type WS+ name:name::type extra:extra `;] commit +         # struct name name <extra>; +      |  [cref:`struct WS+ parent:name::type WS+ name:name::type extra:extra `;] commit +         # <primitive> name <extra>; +      |  [primitive:primitive::type WS+ name:name::type extra:extra `;] commit +         # select ((thing)) { ... } <extra>; INVALID +      |  [container::select::type extra `;] commit { reject } +         # select ((thing)) { ... } <primitive> name <extra>; INVALID +      |  [container::select::type primitive::type WS+ name::type extra `;] commit { reject } +         # struct (optional) { ... } <primitive> name <extra>; INVALID +      |  [container::strukt::type primitive::type WS+ name::type extra `;] commit { reject } +         # enum (optional) { ... } <primitive> name <extra>; +      |  [container:container::type primitive:primitive::type WS+ name:name::type extra:extra `;] commit +         # select ((expr)) { ... } name <extra>; +         # struct (optional) { ... } name <extra>; +      |  [container:container::type name:name::type extra:extra `;] +         # (enum|struct) name { ... }; +      |  [container:container::type `;] +   end + +   def source +      [items:declaration::type*] commit +end + +parse source:fspec::source[stdin] + +if (!source) { +   print(error) +   exit(1) +} + +struct scope +   names:map<str, map<str, any>> +end + +global g_scopes:list<scope> = new list<scope>() + +void +push_scope() { +   s:scope = new scope() +   s->names = new map<str, map<str, any>>() +   g_scopes->push_head(s) +} + +void +pop_scope() +{ +   g_scopes->pop_head() +} + +any +lookup_no_error(type:str, name:str) { +   for s:scope in g_scopes { +      cmap:map<str, any> = s->names->find(type) +      if (cmap) { +         var:any = cmap->find(name) +         if (var) +            return var +      } +   } +   return nil +} + +any +insert(type:str, name:str, var:any) +{ +   if (!name) +      return var # <anon> + +   if (type != 'variable' && lookup_no_error(type, name)) { +      print('`', type, ' ', name, '` is already declared as a `', type, '` in current scope!\n') +      exit(1) +   } + +   cmap:map<str, any> = g_scopes->top->names->find(type) + +   if (!cmap) { +      cmap = new map<str, any>() +   } else if (cmap->find(name)) { +      print('`', type, ' ', name, '` is already declared as a `', type, '` in current scope!\n') +      exit(1) +   } + +   cmap->insert(name, var) +   g_scopes->top->names->insert(type, cmap) +   return var +} + +any +lookup(type:str, name:str) +{ +   r:any = lookup_no_error(type, name) +   if (!r) { +      print('`', type, ' ', name, '` is not declared in this or outer scope!\n') +      exit(1) +   } +   return r +} + +str +container_name_str(s:ref<str>) { if (!s) return '<anon>' return s } + +str +signed_str(s:ref<bool>) { if (s) return 'signed' return 'unsigned' } + +void +print_declaration(d:fspec::declaration::type) +{ +   insert('variable', $d.name, d) +   print('variable `', $d.name, '` is ') + +   c:fspec::container::type +   if (d.cref) c = lookup($d.cref, $d.parent) else c = d.container + +   if (c) +      print('`', c.data.type, ' ', container_name_str(c.data.name), '` ') + +   if (d.primitive) +      print(d.primitive.bits, ' bits and ', signed_str(d.primitive.signed)) + +   print('\n') + +   if (d.extra) { +      if (d.extra.length) { +         if (!d.extra.length.result.value || d.extra.length.result.value.reference) { +            print('   it has a variable length that needs to be computed with formula `', $d.extra.length, '`\n') +         } else { +            if (d.extra.length.result.value.number) { +               print('   it has a constant length of `', $d.extra.length.result.value, '`\n') +            } else if (d.extra.length.result.value.string) { +               print('   its length will increase until pattern `', $d.extra.length.result.value.string.raw, '` has been read from stream\n') +            } +         } +      } + +      for f:fspec::declaration::filter in repeat(d.extra.filter) +         print('   it needs to be filtered with `', $f.function, '`\n') + +      for v:fspec::declaration::visual in child(d.extra.visual) +         print('   and it should be visualized as `', $v.name, '`\n') +   } +} + +void +walk(s:fspec::container::type) +{ +   insert($s.data.type, s.data.name, s) +   if ($s.data.type == 'enum') { +      for i:fspec::container::enum::item in repeat(s.data.items) +         insert('variable', $i.name, i) +   } else if ($s.data.type == 'struct') { +      push_scope() +      for d:fspec::container::strukt::item in repeat(s.data.items) { +         if (d.data.container) +            walk(d.data.container) +         if (d.data.name) +            print_declaration(d.data) +      } +      pop_scope() +   } else if ($s.data.type == 'select') { +      push_scope() +      for d:fspec::container::select::item in repeat(s.data.items) { +         if (d.data.container) +            walk(d.data.container) +         if (d.data.name) +            print_declaration(d.data) +      } +      pop_scope() +   } +} + +push_scope() +for s:fspec::declaration::type in repeat(source.items) +   walk(s.container) +pop_scope() diff --git a/src/compiler/expr.lm b/src/compiler/expr.lm new file mode 100644 index 0000000..d615358 --- /dev/null +++ b/src/compiler/expr.lm @@ -0,0 +1,410 @@ +include 'types.lm' + +global RTYPE_UNKNOWN:int = 0 +global RTYPE_NUMBER:int = 1 +global RTYPE_STRING:int = 2 + +context expr +   context enum +      token EXPR / (any - [,}])+ / + +      def type +         collapsed:collapser::collapsed +         [EXPR] { +            lhs.collapsed = collapser::collapsestr($r1) +            if (!lhs.collapsed) reject +         } +   end + +   context paren +      literal `( `) +      token EXPR / (any - [()])+ / + +      def syntax +         [EXPR] | [`( syntax `)] + +      def type +         collapsed:collapser::collapsed +         [syntax] { +            lhs.collapsed = collapser::collapsestr($r1) +            if (!lhs.collapsed) reject +         } +   end + +   context bracket +      literal `[ `] +      token EXPR / (any - '[' - ']')+ / + +      def syntax +         [EXPR] | [`[ syntax `]] + +      def type +         collapsed:collapser::collapsed +         [syntax] { +            lhs.collapsed = collapser::collapsestr($r1) +            if (!lhs.collapsed) reject +         } +   end + +   context arg +      literal `( `) +      token EXPR / (any - [(),])+ / + +      def syntax +         [EXPR] | [`( syntax `)] + +      def type +         collapsed:collapser::collapsed +         [syntax] { +            lhs.collapsed = collapser::collapsestr($r1) +            if (!lhs.collapsed) reject +         } +   end +end + +context reference +   context function +      literal `( `) `, + +      def arg +         [expr::arg::type `, arg] | [expr::arg::type] + +      def type +         [name:name::type `( args:arg* `)] +   end + +   context variable +      def type +         [name:name::type] +   end + +   def type +      [function::type] +   |  [variable::type] +end + +context collapser +   # BUG: lists seem to not really work well here +   #      implement simple native stack +   int op_stack_new() = c_op_stack_new +   int op_stack_free(stack:int) = c_op_stack_free +   str op_stack_top(stack:int) = c_op_stack_top +   bool op_stack_push(stack:int, op:str) = c_op_stack_push +   str op_stack_pop(stack:int) = c_op_stack_pop + +   stack:int +   values:str +   next_is_unary:bool + +   token WS / space / +   literal `+ `- +   literal `( `) `+# `-# `! `~ `* `/ `% `#+ `#- `<< `>> `< `> `<= `>= `== `!= `& `^ `| `&& `|| `? `: +   literal `. `[ `] +   literal `sizeof + +   def unary_unambi +      [`!] | [`~] + +   def binary_unambi +      [`.] | [`*] | [`/] | [`%] | [`<<] | [`>>] | [`<] | [`>] | [`<=] | [`>=] | [`==] | [`!=] | [`&] | [`^] | [`|] | [`&&] | [`||] + +   def ternary +      [`:] + +   context reducer +      int modulo(a:int, b:int) = c_modulo +      int bitnot(a:int) = c_bitnot +      int bitand(a:int, b:int) = c_bitand +      int bitor(a:int, b:int) = c_bitor +      int bitxor(a:int, b:int) = c_bitxor +      int shiftl(a:int, b:int) = c_shiftl +      int shiftr(a:int, b:int) = c_shiftr +      int subscript(a:str, b:int) = c_subscript + +      def builtin +         value:value +         [`sizeof `( string::type `)] { lhs.value = parse value[$r3.length] } + +      def value +         rtype:int +         [builtin:builtin] { lhs = r1.value } +      |  [number:number::type] { lhs.rtype = RTYPE_NUMBER } +      |  [string:string::type] { lhs.rtype = RTYPE_STRING } +      |  [reference:reference::type] + +      def unary +         [`+#] | [`-#] | [unary_unambi] + +      def binary +         [`#+] | [`#-] | [binary_unambi] + +      def anynary +         [unary] | [binary] | [ternary] + +      def numop +         value:value +         [number::type WS `-#] { lhs.value = parse value[$(r1.value - (r1.value * 2))] } +      |  [number::type WS `+#] { lhs.value = parse value[$r1.value] } +      |  [number::type WS `!] { r:int = 0 if (r1.value == 0) r = 1 lhs.value = parse value[$r] } +      |  [number::type WS `~] { lhs.value = parse value[$bitnot(r1.value)] } +      |  [number::type WS number::type WS `*] { lhs.value = parse value[$(r1.value * r3.value)] } +      |  [number::type WS number::type WS `/] { lhs.value = parse value[$(r1.value / r3.value)] } +      |  [number::type WS number::type WS `#+] { lhs.value = parse value[$(r1.value + r3.value)] } +      |  [number::type WS number::type WS `#-] { lhs.value = parse value[$(r1.value - r3.value)] } +      |  [number::type WS number::type WS `<<] { lhs.value = parse value[$shiftl(r1.value, r3.value)] } +      |  [number::type WS number::type WS `>>] { lhs.value = parse value[$shiftr(r1.value, r3.value)] } +      |  [number::type WS number::type WS `<] { r:int = 0 if (r1.value < r3.value) r = 1 lhs.value = parse value[$r] } +      |  [number::type WS number::type WS `>] { r:int = 0 if (r1.value > r3.value) r = 1 lhs.value = parse value[$r] } +      |  [number::type WS number::type WS `<=] { r:int = 0 if (r1.value <= r3.value) r = 1 lhs.value = parse value[$r] } +      |  [number::type WS number::type WS `>=] { r:int = 0 if (r1.value >= r3.value) r = 1 lhs.value = parse value[$r] } +      |  [number::type WS number::type WS `==] { r:int = 0 if (r1.value == r3.value) r = 1 lhs.value = parse value[$r] } +      |  [number::type WS number::type WS `!=] { r:int = 0 if (r1.value != r3.value) r = 1 lhs.value = parse value[$r] } +      |  [number::type WS number::type WS `&] { lhs.value = parse value[$bitand(r1.value, r3.value)] } +      |  [number::type WS number::type WS `^] { lhs.value = parse value[$bitxor(r1.value, r3.value)] } +      |  [number::type WS number::type WS `|] { lhs.value = parse value[$bitor(r1.value, r3.value)] } +      |  [number::type WS number::type WS `&&] { r:int = 0 if (r1.value && r3.value) r = 1 lhs.value = parse value[$r] } +      |  [number::type WS number::type WS `||] { r:int = 0 if (r1.value || r3.value) r = 1 lhs.value = parse value[$r] } +      |  [number::type WS number::type WS number::type WS `:] { if (r1.value) lhs.value = parse value[$r3] else lhs.value = parse value[$r5] } +      |  [number::type WS value WS `]] commit { reject } + +      # strings can only be operated with `!= and `== against other strings +      def stringop +         value:value +         [string::type WS string::type WS `==] commit { r:int = 0 if (r1.raw == r3.raw) r = 1 lhs.value = parse value[$r] } +      |  [string::type WS string::type WS `!=] commit { r:int = 0 if (r1.raw != r3.raw) r = 1 lhs.value = parse value[$r] } +      |  [number::type WS string::type WS string::type WS `:] { if (r1.value) lhs.value = parse value[$r3] else lhs.value = parse value[$r5] } +      |  [string::type WS unary] commit { reject } # <unary> str +      |  [string::type WS number::type WS binary] commit { reject } # str <binary> num +      |  [number::type WS string::type WS binary] commit { reject } # num <binary> str +      |  [string::type WS string::type WS binary] { reject } # str <math> str +      |  [value WS number::type WS string::type WS ternary] commit { reject } # (v ? num : str) +      |  [value WS string::type WS number::type WS ternary] commit { reject } # (v ? str : num) +      |  [string::type WS value WS value WS ternary] commit { reject } # (str ? v : v) +      |  [string::type WS number::type WS `]] { +         if (r1.length <= r3.value) { +            print('subscript out of bounds\n') +            reject +         } else { +            lhs.value = parse value[$subscript($r1.raw, r3.value)] +         } +      } + +      def valueop +         rtype:int +         [value WS value WS `]] { lhs.rtype = RTYPE_NUMBER } +      |  [value WS unary] { lhs.rtype = RTYPE_NUMBER } +      |  [value WS value WS binary] { lhs.rtype = RTYPE_NUMBER } +      |  [value WS value WS value WS ternary] { if (r3.rtype != r5.rtype) reject lhs.rtype = r1.rtype } + +      def operation +         rtype:int +          [numop] { lhs = parse operation[$r1.value] } +       |  [stringop] { lhs = parse operation[$r1.value] } +       |  [valueop] { lhs.rtype = r1.rtype } +       |  [value] { lhs.rtype = r1.rtype } +       |  [value WS] { lhs.rtype = r1.rtype } +       |  [operation WS] { lhs.rtype = r1.rtype } +       |  [operation anynary] { lhs.rtype = r1.rtype } + +      def collapsed +         value:value +         [operation+] commit { +            # we check return type of every operation to make sure we don't operate on different types +            rtype:int = RTYPE_UNKNOWN +            for i:operation in repeat(r1) { +               if (i.rtype != RTYPE_UNKNOWN && rtype != RTYPE_UNKNOWN && i.rtype != rtype) +                  reject +               rtype = i.rtype +            } +            lhs.value = parse value[$lhs] +         } +   end + +   def operator +      precedence:int +      rassoc:bool +      open:str +      close:str +      args:int +      [`[]  { lhs.precedence = 0  lhs.rassoc = false lhs.args = 0 lhs.open = ']' } +   |  [`]]  { lhs.precedence = 0  lhs.rassoc = false lhs.args = 2 lhs.close = '[' } +   |  [`(]  { lhs.precedence = 0  lhs.rassoc = false lhs.args = 0 lhs.open = ')' } +   |  [`)]  { lhs.precedence = 0  lhs.rassoc = false lhs.args = 0 lhs.close = '(' } +   |  [`.]  { lhs.precedence = 0  lhs.rassoc = false lhs.args = 2 } +   |  [`+#] { lhs.precedence = 1  lhs.rassoc = true  lhs.args = 1 } +   |  [`-#] { lhs.precedence = 1  lhs.rassoc = true  lhs.args = 1 } +   |  [`!]  { lhs.precedence = 1  lhs.rassoc = true  lhs.args = 1 } +   |  [`~]  { lhs.precedence = 1  lhs.rassoc = true  lhs.args = 1 } +   |  [`*]  { lhs.precedence = 2  lhs.rassoc = false lhs.args = 2 } +   |  [`/]  { lhs.precedence = 2  lhs.rassoc = false lhs.args = 2 } +   |  [`%]  { lhs.precedence = 2  lhs.rassoc = false lhs.args = 2 } +   |  [`#+] { lhs.precedence = 3  lhs.rassoc = false lhs.args = 2 } +   |  [`#-] { lhs.precedence = 3  lhs.rassoc = false lhs.args = 2 } +   |  [`<<] { lhs.precedence = 4  lhs.rassoc = false lhs.args = 2 } +   |  [`>>] { lhs.precedence = 4  lhs.rassoc = false lhs.args = 2 } +   |  [`<]  { lhs.precedence = 5  lhs.rassoc = false lhs.args = 2 } +   |  [`>]  { lhs.precedence = 5  lhs.rassoc = false lhs.args = 2 } +   |  [`<=] { lhs.precedence = 5  lhs.rassoc = false lhs.args = 2 } +   |  [`>=] { lhs.precedence = 5  lhs.rassoc = false lhs.args = 2 } +   |  [`==] { lhs.precedence = 6  lhs.rassoc = false lhs.args = 2 } +   |  [`!=] { lhs.precedence = 6  lhs.rassoc = false lhs.args = 2 } +   |  [`&]  { lhs.precedence = 7  lhs.rassoc = false lhs.args = 2 } +   |  [`^]  { lhs.precedence = 8  lhs.rassoc = false lhs.args = 2 } +   |  [`|]  { lhs.precedence = 9  lhs.rassoc = false lhs.args = 2 } +   |  [`&&] { lhs.precedence = 10 lhs.rassoc = false lhs.args = 2 } +   |  [`||] { lhs.precedence = 11 lhs.rassoc = false lhs.args = 2 } +   |  [`?]  { lhs.precedence = 12 lhs.rassoc = true  lhs.args = 0 lhs.open = ':' } +   |  [`:]  { lhs.precedence = 12 lhs.rassoc = true  lhs.args = 3 } + +   void +   operate(op:operator) +   { +      if (!op.args) +         return 0 + +      s:str = values + $op +      # print('collapse: ', s, ' -> ') +      r:reducer::collapsed = parse reducer::collapsed[s] + +      if (!r) { +         reject +      } else { +         # print(^r, '\n') +         values = $r + ' ' +      } +   } + +   void +   flush_all() +   { +      while (op_stack_top(stack)) +         operate(parse operator[op_stack_pop(stack)]) +   } + +   void +   flush_until(name:str) +   { +      while (op_stack_top(stack) && op_stack_top(stack) != name) +         operate(parse operator[op_stack_pop(stack)]) +   } + +   void +   flush_ordered(name:str) +   { +      op:operator = parse operator[name] + +      top:operator +      if (op_stack_top(stack)) top = parse operator[op_stack_top(stack)] +      while (top && (top.precedence < op.precedence || (top.precedence == op.precedence && !top.rassoc)) && !top.open) { +         operate(parse operator[op_stack_pop(stack)]) +         if (op_stack_top(stack)) top = parse operator[op_stack_top(stack)] else top = nil +      } + +      if (op.close) +         flush_until(op.close) + +      next_is_unary = !op.close +   } + +   void +   stack_op(name:str) +   { +      flush_ordered(name) +      # print('push op: ', name, '\n') +      op_stack_push(stack, name) +   } + +   void +   stack_value(value:str) +   { +      # print('push value: ', value, '\n') +      values = values + value + ' ' +      next_is_unary = false +   } + +   def value +      [reducer::builtin] | [number::unsigned::type] | [string::type] | [reference::type] + +   def ambiguous +      [`+] | [`-] + +   def unambiguous +      [unary_unambi] | [binary_unambi] + +   def binary +      [ambiguous] | [binary_unambi] + +   def otherops +      op:str +      [ambiguous] { if (next_is_unary) lhs.op = $r1 + '#' else lhs.op = '#' + $r1 } +   |  [unambiguous] { lhs.op = $r1 } + +   def lsquare +      [`[] { stack_op($lhs) } + +   def rsquare +      [`]] { stack_op($lhs) } + +   def lparen +      [`(] { stack_op($lhs) } + +   def rparen +      [`)] { stack_op($lhs) } + +   def question +      [`?] { stack_op($lhs) } + +   def colon +      [`:] { stack_op($lhs) } + +   def constant +      [number::unsigned::type] | [string::type] + +   def tok#en +      [value WS+ value] commit { reject } +   |  [binary WS* binary] commit { reject } +   |  [constant WS* `(] commit { reject } +   |  [`) WS* value] commit { reject } +   |  [`] WS* value] commit { reject } +   |  [lparen tok+ rparen] commit +   |  [lsquare WS* rsquare] commit +   |  [tok+ question tok+ colon tok+] commit +   |  [otherops] { stack_op(r1.op) } +   |  [value] { stack_value($r1) } +   |  [WS] { lhs = nil } + +   def collapsed +      result:reducer::collapsed +      [tok*] commit { flush_all() lhs.result = parse reducer::collapsed[values] if (!lhs.result) reject } + +   collapsed +   collapse(s:stream) +   { +      c:collapser = new collapser() +      c->stack = op_stack_new() +      c->values = '' +      c->next_is_unary = true +      parse r:collapsed(c)[s] +      op_stack_free(c->stack) +      return r +   } + +   collapsed +   collapsestr(s:str) +   { +      c:collapser = new collapser() +      c->stack = op_stack_new() +      c->values = '' +      c->next_is_unary = true +      parse r:collapsed(c)[s] +      op_stack_free(c->stack) +      return r +   } +end + +# r:collapser::collapsed = collapser::collapse(stdin) +# if (r) { +#    print($r.result, '\n') +# } else { +#    print('invalid expression\n') +# } diff --git a/src/compiler/types.lm b/src/compiler/types.lm new file mode 100644 index 0000000..34a9026 --- /dev/null +++ b/src/compiler/types.lm @@ -0,0 +1,55 @@ +context number +   context unsigned +      literal `true `false +      token OCT / '0'[0-7]+ / +      token DEC / [0-9]+ / +      token HEX / '0x' xdigit+ / +      int strtoull(a:str, b:int) = c_strtoull + +      def type +         value:int +         [`false] { lhs.value = 0 } +      |  [`true] { lhs.value = 1 } +      |  [OCT] { lhs.value = strtoull($r1, 8) } +      |  [DEC] { lhs.value = strtoull($r1, 10) } +      |  [HEX] { lhs.value = strtoull($r1, 16) } +   end + +   lex +      ignore / space+ / +      literal `+ `- +   end + +   def type +      value:int +      [unsigned::type] { lhs.value = r1.value } +   |  [`- type] { lhs.value = r2.value - (r2.value * 2) } +   |  [`+ type] { lhs.value = r2.value } +end + +context string +   rl ESC / '\\' / +   token ESC_CHR / ESC [abfnrtv\\'"e] / +   token ESC_HEX / ESC 'x' xdigit{2} / +   token ESC_OCT / ESC [0-7]{1,3} / +   token CHAR / ^cntrl - ['"] - ESC / +   literal `' `" + +   def raw +      [ESC_CHR] # TODO: how to output raw bytes? +   |  [ESC_HEX] # TODO: how to output raw bytes? +   |  [ESC_OCT] # TODO: how to output raw bytes? +   |  [CHAR] + +   def type +      length:int +      [`' raw:raw* `'] { i:int = 0 for s:raw in r2 i = i + 1 lhs.length = i } +   |  [`" raw:raw* `"] { i:int = 0 for s:raw in r2 i = i + 1 lhs.length = i } +end + +context name +   token NAME / [a-zA-Z_][a-zA-Z_0-9]* / + +   def type +      [NAME] +end diff --git a/src/fspec/bcode.c b/src/fspec/bcode.c deleted file mode 100644 index 70e4b08..0000000 --- a/src/fspec/bcode.c +++ /dev/null @@ -1,199 +0,0 @@ -#include <fspec/bcode.h> -#include <fspec/memory.h> -#include "private/bcode-types.h" - -#include <stdlib.h> -#include <string.h> -#include <assert.h> -#include <err.h> - -static_assert(sizeof(fspec_off) <= sizeof(((struct fspec_mem*)0)->len), "fspec_off should not be larger than what fspec_mem can represent"); -static_assert(sizeof(enum fspec_op) == sizeof(uint8_t), "enum fspec_op is expected to have size of uint8_t"); -static_assert(sizeof(struct fspec_bcode) == sizeof(enum fspec_op), "struct fspec_bcode is expected to have size of enum fspec_op"); -static_assert(sizeof(FSPEC_OP_LAST) <= 8, "op codes need more than 3 bits to be represented"); - -#if 0 -uint8_t -fspec_op_get_num_args(const struct fspec_op_code *code) -{ -   return code->op >> 2; -} - -static fspec_off -arg_data_len(const enum fspec_arg *arg) -{ -   assert(arg); - -   switch (*arg) { -      case FSPEC_ARG_NUM: -         return sizeof(fspec_num); - -      case FSPEC_ARG_VAR: -         return sizeof(fspec_var); - -      case FSPEC_ARG_STR: -      case FSPEC_ARG_OFF: -         return sizeof(fspec_off); - -      case FSPEC_ARG_DAT: -         { -            struct fspec_mem mem; -            fspec_arg_get_mem(arg, NULL, &mem); -            return sizeof(fspec_off) + mem.len; -         } - -      case FSPEC_ARG_EOF: -         break; - -      case FSPEC_ARG_LAST: -         errx(EXIT_FAILURE, "%s: unexpected argument type %u", __func__, *arg); -         break; -   } - -   return 0; -} - -static fspec_off -arg_len(const enum fspec_arg *arg) -{ -   return sizeof(*arg) + arg_data_len(arg); -} - -void -fspec_arg_get_mem(const enum fspec_arg *arg, const void *data, struct fspec_mem *out_mem) -{ -   assert(arg && out_mem); - -   switch (*arg) { -      case FSPEC_ARG_STR: -         { -            assert(data); -            fspec_off off; -            fspec_strsz len; -            memcpy(&off, (char*)arg + sizeof(*arg), sizeof(off)); -            memcpy(&len, (char*)data + off, sizeof(len)); -            out_mem->data = (char*)data + off + sizeof(len); -            out_mem->len = len; -         } -         break; - -      case FSPEC_ARG_DAT: -         { -            fspec_off len; -            memcpy(&len, (char*)arg + sizeof(*arg), sizeof(len)); -            out_mem->data = (char*)arg + sizeof(*arg) + sizeof(len); -            out_mem->len = len; -         } -         break; - -      case FSPEC_ARG_VAR: -      case FSPEC_ARG_NUM: -      case FSPEC_ARG_OFF: -         out_mem->data = (char*)arg + sizeof(*arg); -         out_mem->len = arg_data_len(arg); -         break; - -      case FSPEC_ARG_EOF: -         *out_mem = (struct fspec_mem){0}; -         break; - -      case FSPEC_ARG_LAST: -         errx(EXIT_FAILURE, "%s: unexpected argument type %u", __func__, *arg); -         break; -   } -} - -fspec_num -fspec_arg_get_num(const enum fspec_arg *arg) -{ -   assert(arg && *arg < FSPEC_ARG_LAST); -   fspec_num v; -   switch (*arg) { -      case FSPEC_ARG_NUM: -         memcpy(&v, arg + sizeof(*arg), sizeof(v)); -         break; - -      case FSPEC_ARG_VAR: -         { -            fspec_var var; -            memcpy(&var, arg + sizeof(*arg), sizeof(var)); -            v = var; -         } -         break; - -      case FSPEC_ARG_DAT: -      case FSPEC_ARG_OFF: -         { -            fspec_off off; -            memcpy(&off, arg + sizeof(*arg), sizeof(off)); -            v = off; -         } -         break; - -      case FSPEC_ARG_STR: -      case FSPEC_ARG_EOF: -      case FSPEC_ARG_LAST: -         errx(EXIT_FAILURE, "%s: unexpected argument type %u", __func__, *arg); -         break; -   } -   return v; -} - -const char* -fspec_arg_get_cstr(const enum fspec_arg *arg, const void *data) -{ -   assert(arg && *arg == FSPEC_ARG_STR); -   struct fspec_mem mem; -   fspec_arg_get_mem(arg, data, &mem); -   return (const char*)mem.data; -} - -const enum fspec_arg* -fspec_op_get_arg(const enum fspec_op *start, const void *end, const uint8_t nth, const uint32_t expect) -{ -   uint8_t i = 0; -   const enum fspec_arg *arg = NULL; -   for (const enum fspec_op *op = fspec_op_next(start, end, false); op && i < nth; op = fspec_op_next(op, end, false)) { -      if (*op != FSPEC_OP_ARG) -         return NULL; - -      arg = (void*)(op + 1); -      assert(*arg >= 0 && *arg < FSPEC_ARG_LAST); -      ++i; -   } - -   if (arg && !(expect & (1<<*arg))) -      errx(EXIT_FAILURE, "got unexpected argument of type %u", *arg); - -   return arg; -} - -const enum fspec_arg* -fspec_arg_next(const enum fspec_arg *arg, const void *end, const uint8_t nth, const uint32_t expect) -{ -   return fspec_op_get_arg((void*)(arg - 1), end, nth, expect); -} - -const enum fspec_op* -fspec_op_next(const enum fspec_op *start, const void *end, const bool skip_args) -{ -   assert(start && end); -   fspec_off off = sizeof(*start); -   if ((void*)start < end && *start == FSPEC_OP_ARG) -      off += arg_len((void*)(start + 1)); - -   for (const enum fspec_op *op = start + off; (void*)start < end && (void*)op < end; ++op) { -      if (*op >= FSPEC_OP_LAST) -         errx(EXIT_FAILURE, "got unexected opcode %u", *op); - -      if (skip_args && *op == FSPEC_OP_ARG) { -         op += arg_len((void*)(op + 1)); -         continue; -      } - -      return op; -   } - -   return NULL; -} -#endif diff --git a/src/fspec/bcode.h b/src/fspec/bcode.h deleted file mode 100644 index 3d216af..0000000 --- a/src/fspec/bcode.h +++ /dev/null @@ -1,92 +0,0 @@ -#pragma once - -#include <inttypes.h> -#include <stdint.h> -#include <stdbool.h> - -/** maximum range of numbers */ -#define PRI_FSPEC_NUM PRIu64 -typedef uint64_t fspec_num; - -enum fspec_visual { -   FSPEC_VISUAL_NUL, -   FSPEC_VISUAL_DEC, -   FSPEC_VISUAL_HEX, -   FSPEC_VISUAL_STR, -   FSPEC_VISUAL_LAST, -} __attribute__((packed)); - -enum fspec_type { -   FSPEC_TYPE_CODE, -   FSPEC_TYPE_CALL, -   FSPEC_TYPE_U8, -   FSPEC_TYPE_S8, -   FSPEC_TYPE_U16, -   FSPEC_TYPE_S16, -   FSPEC_TYPE_U32, -   FSPEC_TYPE_S32, -   FSPEC_TYPE_U64, -   FSPEC_TYPE_S64, -   FSPEC_TYPE_LAST, -} __attribute__((packed)); - -enum fspec_storage { -   FSPEC_STORAGE_DATA, -   FSPEC_STORAGE_LOCAL, -   FSPEC_STORAGE_LAST, -} __attribute__((packed)); - -enum fspec_builtin { -   FSPEC_BUILTIN_ADD, -   FSPEC_BUILTIN_SUB, -   FSPEC_BUILTIN_MUL, -   FSPEC_BUILTIN_DIV, -   FSPEC_BUILTIN_MOD, -   FSPEC_BUILTIN_BIT_AND, -   FSPEC_BUILTIN_BIT_OR, -   FSPEC_BUILTIN_BIT_XOR, -   FSPEC_BUILTIN_BIT_LEFT, -   FSPEC_BUILTIN_BIT_RIGHT, -   FSPEC_BUILTIN_DECLARE, -   FSPEC_BUILTIN_READ, -   FSPEC_BUILTIN_FILTER, -   FSPEC_BUILTIN_VISUAL, -   FSPEC_BUILTIN_LAST, -} __attribute__((packed)); - -enum fspec_op { -   FSPEC_OP_BUILTIN, -   FSPEC_OP_PUSH, -   FSPEC_OP_POP, -   FSPEC_OP_VAR, -   FSPEC_OP_LAST, -} __attribute__((packed)); - -struct fspec_bcode { -   char op, data[]; -} __attribute__((packed)); - -#if 0 -('fspc')(version) -OP_BUILTIN (declare) OP_PUSH OP_VAR8 (storage) OP_VAR8 (type) OP_VAR [name] OP_POP -OP_BUILTIN (filter) -OP_FUN FUN_ASSIGN VAR0 VAR [data] -OP_FUN FUN_READ -#endif - -#if 0 -uint8_t -fspec_op_get_num_args(const struct fspec_bcode *code); - -const struct fspec_bcode* -fspec_op_next(const struct fspec_bcode *code, const void *end, const bool skip_args); - -const struct fspec_bcode* -fspec_op_get_arg(const struct fspec_bcode *code, const void *end, const uint8_t nth, const uint32_t expect); - -const struct fspec_arg* -fspec_arg_next(const struct fspec_bcode *code, const void *end, const uint8_t nth, const uint32_t expect); - -fspec_num -fspec_ref_get_num(const struct fspec_bcode *code); -#endif diff --git a/src/fspec/lexer.h b/src/fspec/lexer.h deleted file mode 100644 index ef6e059..0000000 --- a/src/fspec/lexer.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#include <fspec/memory.h> - -#include <stdbool.h> - -enum fspec_lexer_section { -   FSPEC_SECTION_DATA, -   FSPEC_SECTION_CODE, -}; - -struct fspec_lexer; -struct fspec_lexer { -   struct { -      size_t (*read)(struct fspec_lexer *lexer, void *input, const size_t size, const size_t nmemb); -      size_t (*write)(struct fspec_lexer *lexer, const enum fspec_lexer_section section, const void *output, const size_t size, const size_t nmemb); -   } ops; - -   struct { -      struct fspec_mem input; -   } mem; -}; - -bool -fspec_lexer_parse(struct fspec_lexer *lexer, const char *name); diff --git a/src/fspec/memory.h b/src/fspec/memory.h deleted file mode 100644 index 768415a..0000000 --- a/src/fspec/memory.h +++ /dev/null @@ -1,8 +0,0 @@ -#pragma once - -#include <stddef.h> - -struct fspec_mem { -   void *data; -   size_t len; -}; diff --git a/src/fspec/private/bcode-types.h b/src/fspec/private/bcode-types.h deleted file mode 100644 index 8c9ce74..0000000 --- a/src/fspec/private/bcode-types.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once - -#include <inttypes.h> -#include <stdint.h> - -/** maximum size of string literals */ -#define PRI_FSPEC_STRSZ PRIu8 -typedef uint8_t fspec_strsz; - -/** maximum range of variable ids */ -#define PRI_FSPEC_VAR PRIu16 -typedef uint16_t fspec_var; - -/** maximum range of bytecode offsets */ -#define PRI_FSPEC_OFF PRIu32 -typedef uint32_t fspec_off; diff --git a/src/fspec/ragel/lexer-expr.h b/src/fspec/ragel/lexer-expr.h deleted file mode 100644 index 904736d..0000000 --- a/src/fspec/ragel/lexer-expr.h +++ /dev/null @@ -1,20 +0,0 @@ -#pragma once - -#include <fspec/memory.h> - -#include <stdbool.h> - -struct fspec_expr; -struct fspec_expr { -   struct { -      size_t (*read)(struct fspec_expr *lexer, void *input, const size_t size, const size_t nmemb); -      size_t (*write)(struct fspec_expr *lexer, const void *output, const size_t size, const size_t nmemb); -   } ops; - -   struct { -      struct fspec_mem input; -   } mem; -}; - -bool -fspec_expr_parse(struct fspec_expr *lexer, const char *name); diff --git a/src/fspec/ragel/lexer-expr.rl b/src/fspec/ragel/lexer-expr.rl deleted file mode 100644 index 0f1f83d..0000000 --- a/src/fspec/ragel/lexer-expr.rl +++ /dev/null @@ -1,118 +0,0 @@ -#include "lexer-expr.h" -#include "lexer-stack.h" -#include "util/ragel/ragel.h" - -#include <stdlib.h> -#include <stdio.h> -#include <assert.h> -#include <err.h> - -static uint8_t -precedence(char op) -{ -   switch (op) { -      case '^': return 4; -      case '*': return 3; -      case '/': return 3; -      case '+': return 2; -      case '-': return 2; -   } -   errx(EXIT_FAILURE, "unknown operator %c for precedence", op); -   return 0; -} - -static size_t -pop(char cur, char *mstack, size_t open) -{ -   static char cvar = 'a'; - -   // 1 + 2 + 4 + 3 * 2 / 2 * 2 * 2 - 2 * 2 + 5; -   while (open >= 3) { -      const char last_op = mstack[open - 2]; -      const uint8_t last_prio = precedence(last_op); -      const uint8_t new_prio = precedence(cur); - -      if (last_prio <= new_prio) -         break; - -      printf("%c = ", cvar); -      for (size_t i = open - 3; i < open; ++i) -         printf("%c ", mstack[i]); -      puts(";"); -      open -= 3; - -      mstack[open++] = cvar; -      ++cvar; -   } - -   return open; -} - -%%{ -   machine fspec_expr; -   include fspec_stack "lexer-stack.rl"; -   variable p ragel.p; -   variable pe ragel.pe; -   variable eof ragel.eof; -   write data noerror nofinal; - -   action op { -      open = pop(fc, mstack, open); -      mstack[open++] = fc; -   } - -   logical_operators = '&&' | '||' | '==' | '<' | '>' | '<=' | '>='; -   calc_operators = '-' | '+' | '/' | '*' | '%'; -   bitwise_operators = '&' | '|' | '^' | '<<' | '>>'; - -   main := |* -      calc_operators => op; -      stack_num => { mstack[open++] = fc;}; -      '(' => { }; -      ')' => { }; -      ' '; -      ';' => { -         printf("v = "); -         for (size_t i = 0; i < open; ++i) -            printf("%c ", mstack[i]); -         puts(";"); -      }; -      *|; -}%% - - -bool -fspec_expr_parse(struct fspec_expr *expr, const char *name) -{ -   int cs, act; -   const char *ts, *te; -   (void)ts; - -   size_t open = 0; -   char mstack[25]; - -   %% write init; - -   (void)fspec_expr_en_main; -   assert(expr); -   assert(expr->ops.read); -   assert(expr->ops.write); -   assert(expr->mem.input.data && expr->mem.input.len); -   assert(expr->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range"); - -   char var[256]; -   struct stack stack = { .var.buf.mem = { .data = var, .len = sizeof(var) } }; -   struct ragel ragel = { .name = name, .lineno = 1 }; - -   // static const fspec_num version = 0; - -   struct fspec_mem input = expr->mem.input; -   for (bool eof = false; !ragel.error && !eof;) { -      const size_t bytes = expr->ops.read(expr, input.data, 1, input.len); -      const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes }; -      ragel_feed_input(&ragel, (eof = (bytes < input.len)), &rl); -      %% write exec; -   } - -   return !ragel.error; -} diff --git a/src/fspec/ragel/lexer-stack.h b/src/fspec/ragel/lexer-stack.h deleted file mode 100644 index eebf055..0000000 --- a/src/fspec/ragel/lexer-stack.h +++ /dev/null @@ -1,42 +0,0 @@ -#pragma once - -#include "util/membuf.h" - -#include <stdint.h> - -struct varbuf { -   struct membuf buf; -   size_t offset; -}; - -void -varbuf_begin(struct varbuf *var); - -void -varbuf_reset(struct varbuf *var); - -void -varbuf_remove_last(struct varbuf *var); - -struct stack { -   struct varbuf var; - -   union { -      struct fspec_mem str; -      uint64_t num; -   }; - -   enum stack_type { -      STACK_STR, -      STACK_NUM, -   } type; -}; - -void -stack_num(struct stack *stack, const uint8_t base); - -const struct fspec_mem* -stack_get_str(const struct stack *stack); - -uint64_t -stack_get_num(const struct stack *stack); diff --git a/src/fspec/ragel/lexer-stack.rl b/src/fspec/ragel/lexer-stack.rl deleted file mode 100644 index 940f820..0000000 --- a/src/fspec/ragel/lexer-stack.rl +++ /dev/null @@ -1,153 +0,0 @@ -#include "lexer-stack.h" - -#include <stdlib.h> -#include <assert.h> -#include <err.h> - -void -varbuf_begin(struct varbuf *var) -{ -   assert(var); -   var->offset = var->buf.written; -   assert(var->offset <= var->buf.mem.len); -} - -void -varbuf_reset(struct varbuf *var) -{ -   assert(var); -   var->offset = var->buf.written = 0; -} - -void -varbuf_remove_last(struct varbuf *var) -{ -   assert(var); -   assert(var->buf.written >= var->offset); -   const size_t size = var->buf.written - var->offset; -   assert(var->buf.written >= size); -   var->buf.written -= size; -   assert(var->buf.written <= var->buf.mem.len); -} - -static void -stack_check_type(const struct stack *stack, const enum stack_type type) -{ -   assert(stack); - -   if (stack->type == type) -      return; - -   const char *got = (type == STACK_STR ? "str" : "num"), *expected = (stack->type == STACK_STR ? "str" : "num"); -   errx(EXIT_FAILURE, "tried to get '%s' from stack, but the last pushed type was '%s'", got, expected); -} - -void -stack_num(struct stack *stack, const uint8_t base) -{ -   assert(stack); -   membuf_terminate(&stack->var.buf, (char[]){ 0 }, 1); -   const char *str = (char*)stack->var.buf.mem.data + stack->var.offset; -   stack->type = STACK_NUM; -   stack->num = strtoll(str, NULL, base); -   varbuf_remove_last(&stack->var); -} - -const struct fspec_mem* -stack_get_str(const struct stack *stack) -{ -   stack_check_type(stack, STACK_STR); -   return &stack->str; -} - -uint64_t -stack_get_num(const struct stack *stack) -{ -   stack_check_type(stack, STACK_NUM); -   return stack->num; -} - -%%{ -   machine fspec_stack; - -   action stack_oct { -      stack_num(&stack, 8); -   } - -   action stack_hex { -      stack_num(&stack, 16); -   } - -   action stack_dec { -      stack_num(&stack, 10); -   } - -   action stack_str { -      membuf_terminate(&stack.var.buf, (char[]){ 0 }, 1); -      stack.type = STACK_STR; -      stack.str = stack.var.buf.mem; -      stack.str.len = stack.var.buf.written; -   } - -   action store_esc_num { -      const fspec_num v = stack_get_num(&stack); -      assert(v <= 255); -      membuf_append(&stack.var.buf, (uint8_t[]){ v }, sizeof(uint8_t)); -   } - -   action store_esc { -      const struct { const char e, v; } map[] = { -         { .e = 'a', .v = '\a' }, -         { .e = 'b', .v = '\b' }, -         { .e = 'f', .v = '\f' }, -         { .e = 'n', .v = '\n' }, -         { .e = 'r', .v = '\r' }, -         { .e = 't', .v = '\t' }, -         { .e = 'v', .v = '\v' }, -         { .e = '\\', .v = '\\' }, -         { .e = '\'', .v = '\'' }, -         { .e = '\"', .v = '"' }, -         { .e = 'e', .v = 0x1B }, -      }; - -      for (size_t i = 0; i < sizeof(map) / sizeof(map[0]); ++i) { -         if (fc != map[i].e) -            continue; - -         membuf_append(&stack.var.buf, &map[i].v, sizeof(map[i].v)); -         break; -      } -   } - -   action store { -      membuf_append(&stack.var.buf, fpc, 1); -   } - -   action begin_num { -      varbuf_begin(&stack.var); -   } - -   action begin_str { -      varbuf_reset(&stack.var); -   } - -   # Semantic -   quote = ['"]; -   esc = [abfnrtv\\'"e]; -   esc_chr = '\\'; -   esc_hex = 'x' <: xdigit{2}; -   hex = '0x' <: xdigit{1,}; -   oct = [0-7]{1,3}; -   dec = [\-+]? <: (([1-9] <: digit*) | '0'); -   name = ((alpha | '_') <: (alnum | '_')*); - -   # Stack -   stack_name = name >begin_str $store %stack_str; -   stack_hex = hex >begin_num $store %stack_hex; -   stack_dec = dec >begin_num $store %stack_dec; -   stack_oct = oct >begin_num $store %stack_oct; -   stack_esc_hex = esc_hex >begin_num <>*store %stack_hex; -   stack_esc = esc_chr <: ((stack_esc_hex | stack_oct) %store_esc_num | esc %~store_esc); -   stack_str = quote <: ((stack_esc? <: print? $store) - zlen)* >begin_str %stack_str :>> quote; -   stack_num = stack_dec | stack_hex; -}%% diff --git a/src/fspec/ragel/lexer.rl b/src/fspec/ragel/lexer.rl deleted file mode 100644 index 8354bc0..0000000 --- a/src/fspec/ragel/lexer.rl +++ /dev/null @@ -1,180 +0,0 @@ -#include <fspec/lexer.h> -#include <fspec/bcode.h> -#include "lexer-stack.h" -#include "util/ragel/ragel.h" -#include "fspec/private/bcode-types.h" - -#include <assert.h> - -%%{ -   machine fspec_lexer; -   include fspec_stack "lexer-stack.rl"; -   variable p ragel.p; -   variable pe ragel.pe; -   variable eof ragel.eof; -   write data noerror nofinal; - -   action arg_eof { -      // codebuf_append_arg(&state.out, FSPEC_ARG_EOF, NULL); -   } - -   action arg_num { -      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ stack_get_num(&state.stack) }); -   } - -   action arg_str { -      // const struct fspec_mem *str = stack_get_str(&state.stack); -      // codebuf_append_arg_cstr(&state.out, str->data, str->len); -   } - -   action arg_var { -      // state_append_arg_var(&state, true, stack_get_str(&state.stack)); -   } - -   action filter { -      // codebuf_append_op(&state.out, FSPEC_OP_FILTER); -   } - -   action goto { -      // codebuf_append_op(&state.out, FSPEC_OP_GOTO); -      // state_append_arg_var(&state, false, stack_get_str(&state.stack)); -   } - -   action vnul { -      // codebuf_append_op(&state.out, FSPEC_OP_VISUAL); -      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_NUL }); -   } - -   action vdec { -      // codebuf_append_op(&state.out, FSPEC_OP_VISUAL); -      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_DEC }); -   } - -   action vhex { -      // codebuf_append_op(&state.out, FSPEC_OP_VISUAL); -      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_HEX }); -   } - -   action vstr { -      // codebuf_append_op(&state.out, FSPEC_OP_VISUAL); -      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_STR }); -   } - -   action r8 { -      // codebuf_append_op(&state.out, FSPEC_OP_READ); -      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 8 }); -   } - -   action r16 { -      // codebuf_append_op(&state.out, FSPEC_OP_READ); -      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 16 }); -   } - -   action r32 { -      // codebuf_append_op(&state.out, FSPEC_OP_READ); -      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 32 }); -   } - -   action r64 { -      // codebuf_append_op(&state.out, FSPEC_OP_READ); -      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 64 }); -   } - -   action enum_member_end { -   } - -   action enum_member_start { -   } - -   action enum_end { -   } - -   action enum_start { -   } - -   action struct_member_end { -      // state_finish_declaration(&state, FSPEC_DECLARATION_MEMBER); -   } - -   action struct_member_start { -      // state_append_declaration(&state, FSPEC_DECLARATION_MEMBER, stack_get_str(&state.stack)); -   } - -   action struct_end { -      // state_finish_declaration(&state, FSPEC_DECLARATION_STRUCT); -   } - -   action struct_start { -      // state_append_declaration(&state, FSPEC_DECLARATION_STRUCT, stack_get_str(&state.stack)); -   } - -   action type_err { -      ragel_throw_error(&ragel, "unknown type name"); -   } - -   action visual_err { -      ragel_throw_error(&ragel, "unknown visualization"); -   } - -   action syntax_err { -      ragel_throw_error(&ragel, "malformed input (machine failed here or in next expression)"); -   } - -   action line { -      ragel_advance_line(&ragel); -   } - -   # Semantic -   newline = '\n'; -   valid = ^cntrl; -   comment = '//' <: valid* :>> newline; -   type = ('u8' | 's8') %r8 | ('u16' | 's16') %r16 | ('u32' | 's32') %r32 | ('u64' | 's64') %r64; -   visual = 'nul' %vnul | 'dec' %vdec | 'hex' %vhex | 'str' %vstr; - -   # Catchers -   catch_const_expr = stack_num %arg_num; -   catch_struct = 'struct ' <: stack_name; -   catch_enum = 'enum ' <: stack_name; -   catch_type = (catch_struct %goto | type) $!type_err; -   catch_args = stack_num %arg_num | stack_str %arg_str | stack_name %arg_var; -   catch_array = '[' <: (catch_args | '$' %arg_eof) :>> ']'; -   catch_filter = ' | ' %filter <: stack_name %arg_str :>> ('(' <: catch_args? <: (', ' <: catch_args)* :>> ')')?; -   catch_visual = ' ' <: visual $!visual_err; - -   # Abstract -   struct_member = stack_name %struct_member_start :>> ': ' <: (catch_type <: catch_array* catch_filter* catch_visual?) :>> ';' %struct_member_end; -   struct = catch_struct %struct_start :>> ' {' <: (space | comment | struct_member)* :>> '};' %struct_end; -   enum_member = stack_name %enum_member_start :>> (': ' <: catch_const_expr)? :>> ';' %enum_member_end; -   enum = catch_enum %enum_start :>> ' {' <: (space | comment | enum_member)* :>> '};' %enum_end; -   line = valid* :>> newline %line; -   main := ((space | comment | enum | struct)* & line*) $!syntax_err; -}%% - -bool -fspec_lexer_parse(struct fspec_lexer *lexer, const char *name) -{ -   int cs; -   %% write init; - -   (void)fspec_lexer_en_main; -   assert(lexer); -   assert(lexer->ops.read); -   assert(lexer->mem.input.data && lexer->mem.input.len); -   assert(lexer->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range"); - -   char var[256]; -   struct stack stack = { .var.buf.mem = { .data = var, .len = sizeof(var) } }; -   struct ragel ragel = { .name = name, .lineno = 1 }; - -   // static const fspec_num version = 0; - -   struct fspec_mem input = lexer->mem.input; -   for (bool eof = false; !ragel.error && !eof;) { -      const size_t bytes = lexer->ops.read(lexer, input.data, 1, input.len); -      const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes }; -      ragel_feed_input(&ragel, (eof = (bytes < input.len)), &rl); -      %% write exec; -   } - -   return !ragel.error; -} diff --git a/src/fspec/ragel/validator.rl b/src/fspec/ragel/validator.rl deleted file mode 100644 index 90ead21..0000000 --- a/src/fspec/ragel/validator.rl +++ /dev/null @@ -1,96 +0,0 @@ -#include <fspec/bcode.h> -#include <fspec/validator.h> -#include "util/ragel/ragel.h" -#include "fspec/private/bcode-types.h" - -#include <assert.h> - -struct stack { -   union { -      fspec_num num; -      fspec_off off; -      fspec_var var; -      fspec_strsz strsz; -      unsigned char b[sizeof(fspec_num)]; -   } u; -   uint8_t i; // writing index for u.b -}; - -struct state { -   struct ragel ragel; -   struct stack stack; -}; - -%%{ -   machine fspec_validator; -   variable p state.ragel.p; -   variable pe state.ragel.pe; -   variable eof state.ragel.eof; -   write data noerror nofinal; - -#   BLT_HEADER = 0; -#   BLT_ADD = 1; -#   BLT_SUB = 2; -#   BLT_MUL = 3; -#   BLT_DIV = 4; -#   BLT_MOD = 5; -#   BLT_BIT_AND = 6; -#   BLT_BIT_OR = 7; -#   BLT_BIT_XOR = 8; -#   BLT_BIT_LEFT = 9; -#   BLT_BIT_RIGHT = 10; -#   BLT_DECLARE = 11; -#   BLT_READ = 12; -#   BLT_GOTO = 13; -#   BLT_FILTER = 14; -#   BLT_VISUAL = 15; -# -#   builtins = BLT_HEADER | -#              BLT_ADD | BLT_SUB | BLT_MUL | BLT_DIV | BLT_MOD | -#              BLT_BIT_AND | BLT_BIT_OR | BLT_BIT_XOR | BLT_BIT_LEFT | BLT_BIT_RIGHT -#              BLT_DECLARE | BLT_READ | BLT_GOTO | BLT_FILTER | BLT_VISUAL; -# -#   OP_ARG = 0; -#   OP_REF = 1; -#   OP_BLT = 2 OP_ARG builtins; -#   OP_FUN = 3; -# -#   arg_ops = OP_REF | OP_FUN | OP_BUILTIN OP_FUN -# -#   BLT_DECLARE = OP_BUILTIN 10 OP_ARG 2 OP_REF OP_REF; -#   BLT_READ = OP_BUILTIN 11 OP_ARG 1..255 OP_REF (arg_ops)*; -# -#   pattern = ((BLT_READ | BLT_GOTO) BLT_FILTER* BLT_VISUAL?)* $!pattern_error; -#   main := (BLT_HEADER <: BLT_DECLARE* <: pattern) %check_decls $advance $!syntax_error; -   main := any*; -}%% - -bool -fspec_validator_parse(struct fspec_validator *validator, const char *name) -{ -   int cs; -   %% write init; - -   (void)fspec_validator_en_main; -   assert(validator); -   assert(validator->ops.read); -   assert(validator->mem.input.data && validator->mem.input.len); -   assert(validator->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range"); - -   struct state state = { -      .ragel.name = name, -      .ragel.lineno = 1, -   }; - -   static_assert(sizeof(state.stack.u) == sizeof(state.stack.u.b), "bytes doesn't represent the largest member in union"); - -   struct fspec_mem input = validator->mem.input; -   for (bool eof = false; !state.ragel.error && !eof;) { -      const size_t bytes = validator->ops.read(validator, input.data, 1, input.len); -      const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes, .binary = true }; -      ragel_feed_input(&state.ragel, (eof = (bytes < input.len)), &rl); -      %% write exec; -   } - -   return !state.ragel.error; -} diff --git a/src/fspec/validator.h b/src/fspec/validator.h deleted file mode 100644 index a20e98a..0000000 --- a/src/fspec/validator.h +++ /dev/null @@ -1,19 +0,0 @@ -#pragma once - -#include <fspec/memory.h> - -#include <stdbool.h> - -struct fspec_validator; -struct fspec_validator { -   struct { -      size_t (*read)(struct fspec_validator *validator, void *ptr, const size_t size, const size_t nmemb); -   } ops; - -   struct { -      struct fspec_mem input; -   } mem; -}; - -bool -fspec_validator_parse(struct fspec_validator *validator, const char *name); diff --git a/src/util/membuf.c b/src/util/membuf.c deleted file mode 100644 index 0602679..0000000 --- a/src/util/membuf.c +++ /dev/null @@ -1,31 +0,0 @@ -#include "membuf.h" - -#include <stdlib.h> -#include <assert.h> -#include <memory.h> -#include <err.h> - -static void -membuf_bounds_check(const struct membuf *buf, const size_t nmemb) -{ -   assert(buf); - -   if (buf->mem.len < nmemb || buf->written > buf->mem.len - nmemb) -      errx(EXIT_FAILURE, "%s: %zu bytes exceeds the maximum storage size of %zu bytes", __func__, buf->written + nmemb, buf->mem.len); -} - -void -membuf_terminate(struct membuf *buf, const void *data, const size_t data_sz) -{ -   assert(data || !data_sz); -   membuf_bounds_check(buf, data_sz); -   memcpy((char*)buf->mem.data + buf->written, data, data_sz); -} - -void -membuf_append(struct membuf *buf, const void *data, const size_t data_sz) -{ -   membuf_terminate(buf, data, data_sz); -   buf->written += data_sz; -   assert(buf->written <= buf->mem.len); -} diff --git a/src/util/membuf.h b/src/util/membuf.h deleted file mode 100644 index 86d8dde..0000000 --- a/src/util/membuf.h +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once - -#include <fspec/memory.h> - -struct membuf { -   struct fspec_mem mem; -   size_t written; -}; - -void -membuf_terminate(struct membuf *buf, const void *data, const size_t data_sz); - -void -membuf_append(struct membuf *buf, const void *data, const size_t data_sz); | 
