diff options
| author | Jari Vetoniemi <mailroxas@gmail.com> | 2018-02-23 12:54:43 +0200 | 
|---|---|---|
| committer | Jari Vetoniemi <mailroxas@gmail.com> | 2018-02-23 12:54:43 +0200 | 
| commit | 4750be2da326297830691c54adbab0a5dea14802 (patch) | |
| tree | ae7b9643ce3fdbf6c0ddc78c2626fb81416f1404 /src/fspec/ragel | |
| parent | 22fb54a917676e61f19d773cc4f931300ace925e (diff) | |
wip
Diffstat (limited to 'src/fspec/ragel')
| -rw-r--r-- | src/fspec/ragel/lexer-expr.h | 20 | ||||
| -rw-r--r-- | src/fspec/ragel/lexer-expr.rl | 122 | ||||
| -rw-r--r-- | src/fspec/ragel/lexer-stack.h | 42 | ||||
| -rw-r--r-- | src/fspec/ragel/lexer-stack.rl | 153 | ||||
| -rw-r--r-- | src/fspec/ragel/lexer.rl | 180 | ||||
| -rw-r--r-- | src/fspec/ragel/validator.rl | 96 | 
6 files changed, 613 insertions, 0 deletions
| diff --git a/src/fspec/ragel/lexer-expr.h b/src/fspec/ragel/lexer-expr.h new file mode 100644 index 0000000..904736d --- /dev/null +++ b/src/fspec/ragel/lexer-expr.h @@ -0,0 +1,20 @@ +#pragma once + +#include <fspec/memory.h> + +#include <stdbool.h> + +struct fspec_expr; +struct fspec_expr { +   struct { +      size_t (*read)(struct fspec_expr *lexer, void *input, const size_t size, const size_t nmemb); +      size_t (*write)(struct fspec_expr *lexer, const void *output, const size_t size, const size_t nmemb); +   } ops; + +   struct { +      struct fspec_mem input; +   } mem; +}; + +bool +fspec_expr_parse(struct fspec_expr *lexer, const char *name); diff --git a/src/fspec/ragel/lexer-expr.rl b/src/fspec/ragel/lexer-expr.rl new file mode 100644 index 0000000..2975043 --- /dev/null +++ b/src/fspec/ragel/lexer-expr.rl @@ -0,0 +1,122 @@ +#include "lexer-expr.h" +#include "lexer-stack.h" +#include "util/ragel/ragel.h" + +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> +#include <err.h> + +static uint8_t +precedence(char op) +{ +   switch (op) { +      case '^': return 4; +      case '*': return 3; +      case '/': return 3; +      case '+': return 2; +      case '-': return 2; +   } +   errx(EXIT_FAILURE, "unknown operator %c for precedence", op); +   return 0; +} + +static size_t +pop(char cur, char *mstack, size_t open) +{ +   static char cvar = 'a'; + +   // 1 + 2 + 4 + 3 * 2 / 2 * 2 * 2 - 2 * 2 + 5; +   while (open >= 3) { +      const char last_op = mstack[open - 2]; +      const uint8_t last_prio = precedence(last_op); +      const uint8_t new_prio = precedence(cur); + +      if (last_prio <= new_prio) +         break; + +      printf("%c = ", cvar); +      for (size_t i = open - 3; i < open; ++i) +         printf("%c ", mstack[i]); +      puts(";"); +      open -= 3; + +      mstack[open++] = cvar; +      ++cvar; +   } + +   return open; +} + +%%{ +   machine fspec_expr; +   include fspec_stack "lexer-stack.rl"; +   variable p ragel.p; +   variable pe ragel.pe; +   variable eof ragel.eof; +   write data noerror nofinal; + +   action op { +      open = pop(fc, mstack, open); +      mstack[open++] = fc; +   } + +   logical_operators = '&&' | '||' | '==' | '<' | '>' | '<=' | '>='; +   calc_operators = '-' | '+' | '/' | '*' | '%'; +   bitwise_operators = '&' | '|' | '^' | '<<' | '>>'; + +   main := |* +      '+' => op; +      '/' => op; +      '*' => op; +      '-' => op; +      '^' => op; +      stack_num => { mstack[open++] = fc;}; +      '(' => { }; +      ')' => { }; +      ' '; +      ';' => { +         printf("v = "); +         for (size_t i = 0; i < open; ++i) +            printf("%c ", mstack[i]); +         puts(";"); +      }; +      *|; +}%% + + +bool +fspec_expr_parse(struct fspec_expr *expr, const char *name) +{ +   int cs, act; +   const char *ts, *te; +   (void)ts; + +   size_t open = 0; +   char mstack[25]; + +   %% write init; + +   (void)fspec_expr_en_main; +   assert(expr); +   assert(expr->ops.read); +   assert(expr->ops.write); +   assert(expr->mem.input.data && expr->mem.input.len); +   assert(expr->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range"); + +   char var[256]; +   struct stack stack = { .var.buf.mem = { .data = var, .len = sizeof(var) } }; +   struct ragel ragel = { .name = name, .lineno = 1 }; + +   // static const fspec_num version = 0; + +   struct fspec_mem input = expr->mem.input; +   for (bool eof = false; !ragel.error && !eof;) { +      const size_t bytes = expr->ops.read(expr, input.data, 1, input.len); +      const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes }; +      ragel_feed_input(&ragel, (eof = (bytes < input.len)), &rl); +      %% write exec; +   } + +   return !ragel.error; +} diff --git a/src/fspec/ragel/lexer-stack.h b/src/fspec/ragel/lexer-stack.h new file mode 100644 index 0000000..eebf055 --- /dev/null +++ b/src/fspec/ragel/lexer-stack.h @@ -0,0 +1,42 @@ +#pragma once + +#include "util/membuf.h" + +#include <stdint.h> + +struct varbuf { +   struct membuf buf; +   size_t offset; +}; + +void +varbuf_begin(struct varbuf *var); + +void +varbuf_reset(struct varbuf *var); + +void +varbuf_remove_last(struct varbuf *var); + +struct stack { +   struct varbuf var; + +   union { +      struct fspec_mem str; +      uint64_t num; +   }; + +   enum stack_type { +      STACK_STR, +      STACK_NUM, +   } type; +}; + +void +stack_num(struct stack *stack, const uint8_t base); + +const struct fspec_mem* +stack_get_str(const struct stack *stack); + +uint64_t +stack_get_num(const struct stack *stack); diff --git a/src/fspec/ragel/lexer-stack.rl b/src/fspec/ragel/lexer-stack.rl new file mode 100644 index 0000000..940f820 --- /dev/null +++ b/src/fspec/ragel/lexer-stack.rl @@ -0,0 +1,153 @@ +#include "lexer-stack.h" + +#include <stdlib.h> +#include <assert.h> +#include <err.h> + +void +varbuf_begin(struct varbuf *var) +{ +   assert(var); +   var->offset = var->buf.written; +   assert(var->offset <= var->buf.mem.len); +} + +void +varbuf_reset(struct varbuf *var) +{ +   assert(var); +   var->offset = var->buf.written = 0; +} + +void +varbuf_remove_last(struct varbuf *var) +{ +   assert(var); +   assert(var->buf.written >= var->offset); +   const size_t size = var->buf.written - var->offset; +   assert(var->buf.written >= size); +   var->buf.written -= size; +   assert(var->buf.written <= var->buf.mem.len); +} + +static void +stack_check_type(const struct stack *stack, const enum stack_type type) +{ +   assert(stack); + +   if (stack->type == type) +      return; + +   const char *got = (type == STACK_STR ? "str" : "num"), *expected = (stack->type == STACK_STR ? "str" : "num"); +   errx(EXIT_FAILURE, "tried to get '%s' from stack, but the last pushed type was '%s'", got, expected); +} + +void +stack_num(struct stack *stack, const uint8_t base) +{ +   assert(stack); +   membuf_terminate(&stack->var.buf, (char[]){ 0 }, 1); +   const char *str = (char*)stack->var.buf.mem.data + stack->var.offset; +   stack->type = STACK_NUM; +   stack->num = strtoll(str, NULL, base); +   varbuf_remove_last(&stack->var); +} + +const struct fspec_mem* +stack_get_str(const struct stack *stack) +{ +   stack_check_type(stack, STACK_STR); +   return &stack->str; +} + +uint64_t +stack_get_num(const struct stack *stack) +{ +   stack_check_type(stack, STACK_NUM); +   return stack->num; +} + +%%{ +   machine fspec_stack; + +   action stack_oct { +      stack_num(&stack, 8); +   } + +   action stack_hex { +      stack_num(&stack, 16); +   } + +   action stack_dec { +      stack_num(&stack, 10); +   } + +   action stack_str { +      membuf_terminate(&stack.var.buf, (char[]){ 0 }, 1); +      stack.type = STACK_STR; +      stack.str = stack.var.buf.mem; +      stack.str.len = stack.var.buf.written; +   } + +   action store_esc_num { +      const fspec_num v = stack_get_num(&stack); +      assert(v <= 255); +      membuf_append(&stack.var.buf, (uint8_t[]){ v }, sizeof(uint8_t)); +   } + +   action store_esc { +      const struct { const char e, v; } map[] = { +         { .e = 'a', .v = '\a' }, +         { .e = 'b', .v = '\b' }, +         { .e = 'f', .v = '\f' }, +         { .e = 'n', .v = '\n' }, +         { .e = 'r', .v = '\r' }, +         { .e = 't', .v = '\t' }, +         { .e = 'v', .v = '\v' }, +         { .e = '\\', .v = '\\' }, +         { .e = '\'', .v = '\'' }, +         { .e = '\"', .v = '"' }, +         { .e = 'e', .v = 0x1B }, +      }; + +      for (size_t i = 0; i < sizeof(map) / sizeof(map[0]); ++i) { +         if (fc != map[i].e) +            continue; + +         membuf_append(&stack.var.buf, &map[i].v, sizeof(map[i].v)); +         break; +      } +   } + +   action store { +      membuf_append(&stack.var.buf, fpc, 1); +   } + +   action begin_num { +      varbuf_begin(&stack.var); +   } + +   action begin_str { +      varbuf_reset(&stack.var); +   } + +   # Semantic +   quote = ['"]; +   esc = [abfnrtv\\'"e]; +   esc_chr = '\\'; +   esc_hex = 'x' <: xdigit{2}; +   hex = '0x' <: xdigit{1,}; +   oct = [0-7]{1,3}; +   dec = [\-+]? <: (([1-9] <: digit*) | '0'); +   name = ((alpha | '_') <: (alnum | '_')*); + +   # Stack +   stack_name = name >begin_str $store %stack_str; +   stack_hex = hex >begin_num $store %stack_hex; +   stack_dec = dec >begin_num $store %stack_dec; +   stack_oct = oct >begin_num $store %stack_oct; +   stack_esc_hex = esc_hex >begin_num <>*store %stack_hex; +   stack_esc = esc_chr <: ((stack_esc_hex | stack_oct) %store_esc_num | esc %~store_esc); +   stack_str = quote <: ((stack_esc? <: print? $store) - zlen)* >begin_str %stack_str :>> quote; +   stack_num = stack_dec | stack_hex; +}%% diff --git a/src/fspec/ragel/lexer.rl b/src/fspec/ragel/lexer.rl new file mode 100644 index 0000000..b4a21dc --- /dev/null +++ b/src/fspec/ragel/lexer.rl @@ -0,0 +1,180 @@ +#include <fspec/lexer.h> +#include <fspec/bcode.h> +#include "lexer-stack.h" +#include "util/ragel/ragel.h" +#include "fspec/private/bcode-types.h" + +#include <assert.h> + +%%{ +   machine fspec_lexer; +   include fspec_stack "lexer-stack.rl"; +   variable p ragel.p; +   variable pe ragel.pe; +   variable eof ragel.eof; +   write data noerror nofinal; + +   action arg_eof { +      // codebuf_append_arg(&state.out, FSPEC_ARG_EOF, NULL); +   } + +   action arg_num { +      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ stack_get_num(&state.stack) }); +   } + +   action arg_str { +      // const struct fspec_mem *str = stack_get_str(&state.stack); +      // codebuf_append_arg_cstr(&state.out, str->data, str->len); +   } + +   action arg_var { +      // state_append_arg_var(&state, true, stack_get_str(&state.stack)); +   } + +   action filter { +      // codebuf_append_op(&state.out, FSPEC_OP_FILTER); +   } + +   action goto { +      // codebuf_append_op(&state.out, FSPEC_OP_GOTO); +      // state_append_arg_var(&state, false, stack_get_str(&state.stack)); +   } + +   action vnul { +      // codebuf_append_op(&state.out, FSPEC_OP_VISUAL); +      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_NUL }); +   } + +   action vdec { +      // codebuf_append_op(&state.out, FSPEC_OP_VISUAL); +      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_DEC }); +   } + +   action vhex { +      // codebuf_append_op(&state.out, FSPEC_OP_VISUAL); +      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_HEX }); +   } + +   action vstr { +      // codebuf_append_op(&state.out, FSPEC_OP_VISUAL); +      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ FSPEC_VISUAL_STR }); +   } + +   action r8 { +      // codebuf_append_op(&state.out, FSPEC_OP_READ); +      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 8 }); +   } + +   action r16 { +      // codebuf_append_op(&state.out, FSPEC_OP_READ); +      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 16 }); +   } + +   action r32 { +      // codebuf_append_op(&state.out, FSPEC_OP_READ); +      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 32 }); +   } + +   action r64 { +      // codebuf_append_op(&state.out, FSPEC_OP_READ); +      // codebuf_append_arg(&state.out, FSPEC_ARG_NUM, (fspec_num[]){ 64 }); +   } + +   action enum_member_end { +   } + +   action enum_member_start { +   } + +   action enum_end { +   } + +   action enum_start { +   } + +   action struct_member_end { +      // state_finish_declaration(&state, FSPEC_DECLARATION_MEMBER); +   } + +   action struct_member_start { +      // state_append_declaration(&state, FSPEC_DECLARATION_MEMBER, stack_get_str(&state.stack)); +   } + +   action struct_end { +      // state_finish_declaration(&state, FSPEC_DECLARATION_STRUCT); +   } + +   action struct_start { +      // state_append_declaration(&state, FSPEC_DECLARATION_STRUCT, stack_get_str(&state.stack)); +   } + +   action type_err { +      ragel_throw_error(&ragel, "unknown type name"); +   } + +   action visual_err { +      ragel_throw_error(&ragel, "unknown visualization"); +   } + +   action syntax_err { +      ragel_throw_error(&ragel, "malformed input (machine failed here or in next expression)"); +   } + +   action line { +      ragel_advance_line(&ragel); +   } + +   # Semantic +   newline = '\n'; +   valid = ^cntrl; +   comment = '//' <: valid* :>> newline; +   type = ('u8' | 's8') %r8 | ('u16' | 's16') %r16 | ('u32' | 's32') %r32 | ('u64' | 's32') %r64; +   visual = 'nul' %vnul | 'dec' %vdec | 'hex' %vhex | 'str' %vstr; + +   # Catchers +   catch_const_expr = stack_num %arg_num; +   catch_struct = 'struct ' <: stack_name; +   catch_enum = 'enum ' <: stack_name; +   catch_type = (catch_struct %goto | type) $!type_err; +   catch_args = stack_num %arg_num | stack_str %arg_str | stack_name %arg_var; +   catch_array = '[' <: (catch_args | '$' %arg_eof) :>> ']'; +   catch_filter = ' | ' %filter <: stack_name %arg_str :>> ('(' <: catch_args? <: (', ' <: catch_args)* :>> ')')?; +   catch_visual = ' ' <: visual $!visual_err; + +   # Abstract +   struct_member = stack_name %struct_member_start :>> ': ' <: (catch_type <: catch_array* catch_filter* catch_visual?) :>> ';' %struct_member_end; +   struct = catch_struct %struct_start :>> ' {' <: (space | comment | struct_member)* :>> '};' %struct_end; +   enum_member = stack_name %enum_member_start :>> (': ' <: catch_const_expr)? :>> ';' %enum_member_end; +   enum = catch_enum %enum_start :>> ' {' <: (space | comment | enum_member)* :>> '};' %enum_end; +   line = valid* :>> newline %line; +   main := ((space | comment | enum | struct)* & line*) $!syntax_err; +}%% + +bool +fspec_lexer_parse(struct fspec_lexer *lexer, const char *name) +{ +   int cs; +   %% write init; + +   (void)fspec_lexer_en_main; +   assert(lexer); +   assert(lexer->ops.read); +   assert(lexer->mem.input.data && lexer->mem.input.len); +   assert(lexer->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range"); + +   char var[256]; +   struct stack stack = { .var.buf.mem = { .data = var, .len = sizeof(var) } }; +   struct ragel ragel = { .name = name, .lineno = 1 }; + +   // static const fspec_num version = 0; + +   struct fspec_mem input = lexer->mem.input; +   for (bool eof = false; !ragel.error && !eof;) { +      const size_t bytes = lexer->ops.read(lexer, input.data, 1, input.len); +      const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes }; +      ragel_feed_input(&ragel, (eof = (bytes < input.len)), &rl); +      %% write exec; +   } + +   return !ragel.error; +} diff --git a/src/fspec/ragel/validator.rl b/src/fspec/ragel/validator.rl new file mode 100644 index 0000000..90ead21 --- /dev/null +++ b/src/fspec/ragel/validator.rl @@ -0,0 +1,96 @@ +#include <fspec/bcode.h> +#include <fspec/validator.h> +#include "util/ragel/ragel.h" +#include "fspec/private/bcode-types.h" + +#include <assert.h> + +struct stack { +   union { +      fspec_num num; +      fspec_off off; +      fspec_var var; +      fspec_strsz strsz; +      unsigned char b[sizeof(fspec_num)]; +   } u; +   uint8_t i; // writing index for u.b +}; + +struct state { +   struct ragel ragel; +   struct stack stack; +}; + +%%{ +   machine fspec_validator; +   variable p state.ragel.p; +   variable pe state.ragel.pe; +   variable eof state.ragel.eof; +   write data noerror nofinal; + +#   BLT_HEADER = 0; +#   BLT_ADD = 1; +#   BLT_SUB = 2; +#   BLT_MUL = 3; +#   BLT_DIV = 4; +#   BLT_MOD = 5; +#   BLT_BIT_AND = 6; +#   BLT_BIT_OR = 7; +#   BLT_BIT_XOR = 8; +#   BLT_BIT_LEFT = 9; +#   BLT_BIT_RIGHT = 10; +#   BLT_DECLARE = 11; +#   BLT_READ = 12; +#   BLT_GOTO = 13; +#   BLT_FILTER = 14; +#   BLT_VISUAL = 15; +# +#   builtins = BLT_HEADER | +#              BLT_ADD | BLT_SUB | BLT_MUL | BLT_DIV | BLT_MOD | +#              BLT_BIT_AND | BLT_BIT_OR | BLT_BIT_XOR | BLT_BIT_LEFT | BLT_BIT_RIGHT +#              BLT_DECLARE | BLT_READ | BLT_GOTO | BLT_FILTER | BLT_VISUAL; +# +#   OP_ARG = 0; +#   OP_REF = 1; +#   OP_BLT = 2 OP_ARG builtins; +#   OP_FUN = 3; +# +#   arg_ops = OP_REF | OP_FUN | OP_BUILTIN OP_FUN +# +#   BLT_DECLARE = OP_BUILTIN 10 OP_ARG 2 OP_REF OP_REF; +#   BLT_READ = OP_BUILTIN 11 OP_ARG 1..255 OP_REF (arg_ops)*; +# +#   pattern = ((BLT_READ | BLT_GOTO) BLT_FILTER* BLT_VISUAL?)* $!pattern_error; +#   main := (BLT_HEADER <: BLT_DECLARE* <: pattern) %check_decls $advance $!syntax_error; +   main := any*; +}%% + +bool +fspec_validator_parse(struct fspec_validator *validator, const char *name) +{ +   int cs; +   %% write init; + +   (void)fspec_validator_en_main; +   assert(validator); +   assert(validator->ops.read); +   assert(validator->mem.input.data && validator->mem.input.len); +   assert(validator->mem.input.len <= (size_t)~0 && "input storage size exceeds size_t range"); + +   struct state state = { +      .ragel.name = name, +      .ragel.lineno = 1, +   }; + +   static_assert(sizeof(state.stack.u) == sizeof(state.stack.u.b), "bytes doesn't represent the largest member in union"); + +   struct fspec_mem input = validator->mem.input; +   for (bool eof = false; !state.ragel.error && !eof;) { +      const size_t bytes = validator->ops.read(validator, input.data, 1, input.len); +      const struct ragel_mem rl = { .data = input.data, .end = (char*)input.data + bytes, .binary = true }; +      ragel_feed_input(&state.ragel, (eof = (bytes < input.len)), &rl); +      %% write exec; +   } + +   return !state.ragel.error; +} | 
