diff options
| -rw-r--r-- | Makefile | 5 | ||||
| -rw-r--r-- | misc/filespec.vim (renamed from vim/filespec.vim) | 0 | ||||
| -rw-r--r-- | misc/radare2/Makefile | 30 | ||||
| -rw-r--r-- | misc/radare2/asm_fspec.c | 142 | ||||
| -rw-r--r-- | spec/ability.fspec | 103 | ||||
| -rw-r--r-- | spec/bsw.fspec | 2 | ||||
| -rw-r--r-- | spec/eaf.fspec | 15 | ||||
| -rw-r--r-- | spec/elf.fspec | 20 | ||||
| -rw-r--r-- | spec/spell.fspec | 38 | ||||
| -rw-r--r-- | src/bin/fspec-dump.c | 540 | ||||
| -rw-r--r-- | src/bin/utils.h | 81 | ||||
| -rw-r--r-- | src/compiler/compiler.lm | 424 | ||||
| -rw-r--r-- | src/compiler/expr.lm | 4 | ||||
| -rw-r--r-- | src/compiler/native.c | 80 | 
14 files changed, 1314 insertions, 170 deletions
| @@ -8,11 +8,11 @@ WARNINGS := -Wall -Wextra -Wpedantic -Wformat=2 -Wstrict-aliasing=3 -Wstrict-ove  	-Wfloat-equal -Wcast-align -Wpointer-arith -Wchar-subscripts -Warray-bounds=2  override CFLAGS ?= -g -override CFLAGS += -std=c11 $(WARNINGS) +override CFLAGS += -std=c11 -D_DEFAULT_SOURCE $(WARNINGS)  override CPPFLAGS += -Isrc  override COLMFLAGS += -Isrc/compiler -bins = fspec-info dec2bin xidec xi2path xils xifile uneaf +bins = fspec-info fspec-dump dec2bin xidec xi2path xils xifile uneaf  all: $(bins)  %.c: %.lm @@ -35,6 +35,7 @@ fspec-compiler.a: src/compiler/compiler.c fspec-compiler-native.a  fspec-info: private LDLIBS += -lcolm  fspec-info: src/bin/fspec-info.c fspec-compiler.a fspec-compiler-native.a +fspec-dump: src/bin/fspec-dump.c  dec2bin: src/bin/misc/dec2bin.c diff --git a/vim/filespec.vim b/misc/filespec.vim index 15664d7..15664d7 100644 --- a/vim/filespec.vim +++ b/misc/filespec.vim diff --git a/misc/radare2/Makefile b/misc/radare2/Makefile new file mode 100644 index 0000000..aff2fdb --- /dev/null +++ b/misc/radare2/Makefile @@ -0,0 +1,30 @@ +MAKEFLAGS += --no-builtin-rules +R2_PLUGIN_PATH ?= $(shell r2 -H R2_USER_PLUGINS) + +# GCC 7: -Wstringop-overflow=, -Walloc-size-larger-than=, -Wduplicated-{branches,cond} +WARNINGS := -Wall -Wextra -Wformat=2 -Wstrict-aliasing=3 -Wstrict-overflow=5 -Wstack-usage=12500 \ +	-Wfloat-equal -Wcast-align -Wpointer-arith -Wchar-subscripts -Warray-bounds=2 + +override CFLAGS ?= -g +override CFLAGS += -std=c11 $(WARNINGS) + +libs = asm_fspec.so +all: $(libs) + +%.so: +	$(LINK.c) -fPIC -shared $(filter %.c,$^) $(LDLIBS) -o $@ + +asm_fspec.so: private CFLAGS += $(shell pkg-config --cflags r_anal) +asm_fspec.so: private LDLIBS += $(shell pkg-config --libs-only-l r_anal) +asm_fspec.so: asm_fspec.c + +install: $(libs) +	install -Dm755 $^ -t "$(R2_PLUGIN_PATH)" + +uninstall: +	$(RM) "$(R2_PLUGIN_PATH)"/asm_fspec.so + +clean: +	$(RM) $(libs) + +.PHONY: all clean install uninstall diff --git a/misc/radare2/asm_fspec.c b/misc/radare2/asm_fspec.c new file mode 100644 index 0000000..fa7c1ad --- /dev/null +++ b/misc/radare2/asm_fspec.c @@ -0,0 +1,142 @@ +/* radare - LGPL - Copyright 2018 - Jari Vetoniemi */ + +#include <stdio.h> +#include <string.h> +#include <r_types.h> +#include <r_util.h> +#include <r_lib.h> +#include <r_asm.h> + +enum fspec_instruction { +   INS_VERSION, +   INS_REG, +   INS_PUSH, +   INS_PUSHR, +   INS_STORE, +   INS_OP, +   INS_QUEUE, +   INS_IO, +   INS_EXEC, +   INS_CALL, +   INS_JMP, +   INS_JMPIF +}; + +enum fspec_operation { +   OP_UNM, +   OP_LNOT, +   OP_BNOT, +   OP_MUL, +   OP_DIV, +   OP_MOD, +   OP_ADD, +   OP_SUB, +   OP_SHIFTL, +   OP_SHIFTR, +   OP_LESS, +   OP_LESSEQ, +   OP_EQ, +   OP_NOTEQ, +   OP_BAND, +   OP_BOR, +   OP_BXOR, +   OP_LAND, +   OP_LOR, +   OP_CTERNARY, +   OP_SUBSCRIPT +}; + +static const char* +ins_name_str(const enum fspec_instruction name) +{ +   switch (name) { +      case INS_VERSION: return "version"; +      case INS_REG: return "reg"; +      case INS_PUSH: return "push"; +      case INS_PUSHR: return "pushr"; +      case INS_STORE: return "store"; +      case INS_OP: return "op"; +      case INS_QUEUE: return "queue"; +      case INS_IO: return "io"; +      case INS_EXEC: return "exec"; +      case INS_CALL: return "call"; +      case INS_JMP: return "jmp"; +      case INS_JMPIF: return "jmpif"; +   } +   return "invalid"; +} + +static const char* +op_name_str(const enum fspec_operation op) +{ +   switch (op) { +      case OP_UNM: return "unm"; +      case OP_LNOT: return "lnot"; +      case OP_BNOT: return "bnot"; +      case OP_MUL: return "mul"; +      case OP_DIV: return "div"; +      case OP_MOD: return "mod"; +      case OP_ADD: return "add"; +      case OP_SUB: return "sub"; +      case OP_SHIFTL: return "shiftl"; +      case OP_SHIFTR: return "shiftr"; +      case OP_LESS: return "less"; +      case OP_LESSEQ: return "lesseq"; +      case OP_EQ: return "eq"; +      case OP_NOTEQ: return "noteq"; +      case OP_BAND: return "band"; +      case OP_BOR: return "bor"; +      case OP_BXOR: return "bxor"; +      case OP_LAND: return "land"; +      case OP_LOR: return "lor"; +      case OP_CTERNARY: return "cternary"; +      case OP_SUBSCRIPT: return "subscript"; +   } +   return "invalid"; +} + +static int +disassemble(RAsm *a, RAsmOp *op, const ut8 *buf, int len) +{ +   union { +      struct { unsigned name:5; unsigned n:2; uint64_t v:57; } ins; +      uint8_t v[16]; +   } u = {0}; + +   memcpy(u.v, buf, R_MIN(sizeof(u.v[0]), len)); +   const uint8_t insw = sizeof(uint16_t) * (1 << u.ins.n); +   memcpy(u.v, buf, R_MIN(insw, len)); +   const char *buf_asm = "invalid"; + +   if (u.ins.name == INS_OP) +      buf_asm = sdb_fmt("%s %s", ins_name_str(u.ins.name), op_name_str(u.ins.v)); +   else if (u.ins.n == 0) +      buf_asm = sdb_fmt("%s 0x%02x", ins_name_str(u.ins.name), (uint16_t)u.ins.v); +   else if (u.ins.n == 1) +      buf_asm = sdb_fmt("%s 0x%04x", ins_name_str(u.ins.name), (uint32_t)u.ins.v); +   else if (u.ins.n == 2) +      buf_asm = sdb_fmt("%s 0x%08x", ins_name_str(u.ins.name), (uint64_t)u.ins.v); +   else +      return 0; + +   r_strbuf_set(&op->buf_asm, buf_asm); +   return (op->size = insw + (u.ins.name == INS_REG ? u.ins.v : 0)); +} + +RAsmPlugin r_asm_plugin_fspec = { +   .name = "fspec", +   .license = "LGPL3", +   .desc = "fspec disassembly plugin", +   .arch = "fspec", +   .bits = 16 | 32 | 64, +   .endian = R_SYS_ENDIAN_LITTLE, +   .disassemble = disassemble +}; + +#ifndef CORELIB +R_API RLibStruct radare_plugin = { +   .type = R_LIB_TYPE_ASM, +   .data = &r_asm_plugin_fspec, +   .version = R2_VERSION +}; +#endif diff --git a/spec/ability.fspec b/spec/ability.fspec index 80a07fd..aab2bcf 100644 --- a/spec/ability.fspec +++ b/spec/ability.fspec @@ -1,11 +1,94 @@ -struct ability { -   u16 index; -   u16 icon_id; -   u16 mp_cost; -   u16 unknown; -   u16 targets; -   u8 name[32] | encoding('sjis') str; // The encoding actually depends on ROM region -   u8 description[256] | encoding('sjis') str; // ^ Ditto, we can't express this (we need parser options) -   u8 padding[726] nul; -   struct ability ability[until (false)]; +struct dat { +   struct asd { +      u16 index; +      u16 icon_id; +      u16 mp_cost; +      u16 unknown; +      u16 targets; +      u8 name[32] | encoding('sjis') str; // The encoding actually depends on ROM region +      u8 description[256] | encoding('sjis') str; // ^ Ditto, we can't express this (we need parser options) +      u8 padding[726] nul; +   } ability[until (false)];  }; + +// # Instructions +// +// Instructions are variable-length with minimum size of 16 bits and maximum size of 128 bits[1]. +// They are encoded with the following schema `5:name 2:n (16 * 2^n - 7):modifiers`, where `name` specifies the +// instruction name and `n` specifies the total width of the instruction in bits with the formula `16 * 2^n`. +// +// Here's a table that shows how `n` maps to the instruction size: +// n | instruction size in bits +// 0 | 16 +// 1 | 32 +// 2 | 64 +// 3 | 128 +// +// In the list below the notation for instruction and its modifiers is `NAME<1:arg1, 1:arg2> [STACK1] (STACK2)` +// where `NAME` is the name of instruction, and inside the `<>` brackets are the modifiers for the instruction. +// The optional number before the modifier tells the modifier size in bits, if omitted rest of the bits are used. +// Inside `[]` brackets are the arguments that will be popped from the stack. `()` parenthesis are used instead, +// if the stack argument is optional. +// +// List of instructions: +// Name             | Hex  | Description +// VERSION<version> | 0x00 | Indicates the version of this bytecode. +// REG<len>         | 0x01 | Allocates a new register. +//                  *      * If `len` is not zero, the next `len` bytes will be stored in this register. +// PUSH<v>          | 0x02 | Pushes `v` to the stack. +// PUSHR<R>         | 0x03 | Pushes the contents of register at index `R` to the stack. +// STORE<R> [v]     | 0x04 | Stores `v` into register at index `R`. +// OP<op> [...]     | 0x05 | Performs operation specified by the `op`, and pushes the result to the stack. +// QUEUE<len> (...) | 0x06 | Queues next `len` bytes for execution for the next `IO` instruction. +//                  *      * The code is executed before or after `IO` instruction, depending whether VM is packing or unpacking. +// IO<sz> [R] (...) | 0x07 | Unpacking: Reads data from external VM input (usually a file) to register at index `R`. +//                  *      * Packing: Writes data to external VM output (usually a file) from register at index `R`. +//                  *      * `sz` is the size of the element in bits. +//                  *      * Rest of the stack is the number of elements, if empty, the elements to read/write is 1. +// EXEC<R> (...)    | 0x08 | Executes instructions stored in register at index `R`. +//                  *      * Rest of the stack is the number of times to execute, if empty, execution happens only once. +// CALL<R> (...)    | 0x09 | Calls a function. The name of the function is stored in register at index `R`. +// JMP<off>         | 0x0A | Jumps to the `off` (in bytes). +// JMPIF<off> [v]   | 0x0B | Performs `JMP`, if `v` is true. +// STRUCT<R> [R2]   | 0x0C | Describes register at index `R` as struct structure. Register at index `R2` contains the name of the struct. +// SELECT<R>        | 0x0D | Describes register at index `R` as select structure. +// FIELD<R> (...)   | 0x0E | Links field to last structure. Register at index `R` contains the name of the field. +//                  *      * Rest of the stack contains register indices for registers that are instances of this field. +// BSZ<s:1, v> [sz] | 0x0F | Describes last field as a primitive field. `s` describes whether the field is signed. `v` describes how the +//                  *      * field should be represented. `sz` contains the size of field in bits. +// REF<R>           | 0x10 | Describes field as a substructure. Register at index `R` contains the structure definition. +// FDIMENSION<sz>   | 0x11 | Adds fixed dimension to a field. `sz` indicates the size of the dimension. +// VDIMENSION       | 0x12 | Adds variable length dimension to a field. +// ENUM<R>          | 0x13 | Links field to a enum +// +// List of operations for the `OP` instruction: +// Name      | Hex  | Argc | Description +// UNM       | 0x00 | 1    | Unary minus operation `-r1`. +// LNOT      | 0x01 | 1    | Logical not operation `!r1`. +// BNOT      | 0x02 | 1    | Bitwise not operation `~r1`. +// MUL       | 0x03 | 2    | Multiplication operation `r1 * r2`. +// DIV       | 0x04 | 2    | Division operation `r1 * r2`. +// MOD       | 0x05 | 2    | Modulo operation `r1 % r2`. +// ADD       | 0x06 | 2    | Addition operation `r1 + r2`. +// SUB       | 0x07 | 2    | Substraction operation `r1 - r2`. +// SHIFTL    | 0x08 | 2    | Left shift operation `r1 << r2`. +// SHIFTR    | 0x09 | 2    | Right shift operation `r1 >> r2`. +// LESS      | 0x0A | 2    | Less than operation `r1 < r2`. +// LESSEQ    | 0x0B | 2    | Less or equal operation `r1 <= r2`. +// EQ        | 0x0C | 2    | Equal operation `r1 == r2`. +// NOTEQ     | 0x0D | 2    | Not equal operation `r1 != r2`. +// BAND      | 0x0E | 2    | Bitwise and operation `r1 & r2`. +// BOR       | 0x0F | 2    | Bitwise or operation `r1 | r2`. +// BXOR      | 0x10 | 2    | Bitwise xor operation `r1 ^ r2`. +// LAND      | 0x11 | 2    | Logical and operation `r1 && r2`. +// LOR       | 0x12 | 2    | Logical or operation `r1 || r2`. +// CTERNARY  | 0x13 | 3    | Conditional ternary operation `r1 ? r2 : r3` +// SUBSCRIPT | 0x14 | 2    | Subscript operation `r1[r2]` +// +// List of visuals for the `PIO` instruction: +// Name | Hex  | Representation +// NUL  | 0x00 | None +// DEC  | 0x01 | Decimal +// HEX  | 0x02 | Hexdecimal +// STR  | 0x03 | String +// FLT  | 0x04 | Float diff --git a/spec/bsw.fspec b/spec/bsw.fspec index 5e868eb..3714337 100644 --- a/spec/bsw.fspec +++ b/spec/bsw.fspec @@ -1,4 +1,4 @@ -struct v { +struct bsw {     u8 header[4] | matches('CWS\0') str;     u32 size dec;     u8 data[until(false)] | compression('zlib', size) hex; diff --git a/spec/eaf.fspec b/spec/eaf.fspec index 139539d..3373de9 100644 --- a/spec/eaf.fspec +++ b/spec/eaf.fspec @@ -1,10 +1,3 @@ -struct file { -   u8 path[256] | encoding('ascii') str; -   u64 offset; -   u64 size; -   u8 padding[16] nul; -}; -  struct eaf {     u8 header[4] | matches('#EAF') str;     u16 major; @@ -13,5 +6,11 @@ struct eaf {     u32 count;     u64 unknown;     u8 padding[100] nul; -   struct file files[count]; + +   struct { +      u8 path[256] | encoding('ascii') str; +      u64 offset hex; +      u64 size; +      u8 padding[16] nul; +   } metadata[count];  }; diff --git a/spec/elf.fspec b/spec/elf.fspec index 6f70459..a9eb1a0 100644 --- a/spec/elf.fspec +++ b/spec/elf.fspec @@ -198,9 +198,23 @@ struct elf {        u16 e_shnum;        u16 e_shstrndx; -      // TODO: need to handle offsets -      struct program_header e_ph[e_phnum:e_phentsize]; -      struct section_header e_sh[e_shnum:e_shentsize]; +      // TODO: would be nicer if we didn't need this select +      select (e_ident.ei_class) { +         e_ident.CLASS_32) +            struct { +               seek(arch.elf32.e_phoff); +               struct program_header e_ph[e_phnum:e_phentsize]; +               seek(arch.elf32.e_shoff); +               struct section_header e_sh[e_shnum:e_shentsize]; +            } elf32; +         e_ident.CLASS_64) +            struct { +               seek(arch.elf64.e_phoff); +               struct program_header e_ph[e_phnum:e_phentsize]; +               seek(arch.elf64.e_shoff); +               struct section_header e_sh[e_shnum:e_shentsize]; +            } elf64; +      } headers;     };     select (e_ident.ei_data) { diff --git a/spec/spell.fspec b/spec/spell.fspec index e1c012a..73b9cc4 100644 --- a/spec/spell.fspec +++ b/spec/spell.fspec @@ -1,22 +1,20 @@ -struct spell { -   u16 index; -   u16 type; // 1-6 for White/Black/Summon/Ninja/Bard/Blue -   u16 element; -   u16 targets; -   u16 skill; -   u16 mp_cost; -   u8 casting_time; // in quarter of seconds -   u8 recast_delay; // in quarter of seconds -   u8 level[24] hex; // 1 byte per job, 0xxFF if not learnable, first slot is NONE job so always 0xFF -   u16 id; // 0 for "unused" spells; often, but not always, equal to index -   u8 unknown; -   u8 jp_name[20] | encoding('sjis') str; -   u8 en_name[20] | encoding('ascii') str; -   u8 jp_description[128] | encoding('sjis') str; -   u8 en_description[128] | encoding('ascii') str; -   u8 padding[687] nul; -}; -  struct dat { -   struct spell spell[until (false)]; +   struct { +      u16 index; +      u16 type; // 1-6 for White/Black/Summon/Ninja/Bard/Blue +      u16 element; +      u16 targets; +      u16 skill; +      u16 mp_cost; +      u8 casting_time; // in quarter of seconds +      u8 recast_delay; // in quarter of seconds +      u8 level[24] hex; // 1 byte per job, 0xxFF if not learnable, first slot is NONE job so always 0xFF +      u16 id; // 0 for "unused" spells; often, but not always, equal to index +      u8 unknown; +      u8 jp_name[20] | encoding('sjis') str; +      u8 en_name[20] | encoding('ascii') str; +      u8 jp_description[128] | encoding('sjis') str; +      u8 en_description[128] | encoding('ascii') str; +      u8 padding[687] nul; +   } spell[until (false)];  }; diff --git a/src/bin/fspec-dump.c b/src/bin/fspec-dump.c new file mode 100644 index 0000000..195674f --- /dev/null +++ b/src/bin/fspec-dump.c @@ -0,0 +1,540 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <stdint.h> +#include <stdbool.h> +#include <limits.h> +#include <ctype.h> +#include <assert.h> +#include <err.h> + +#include "utils.h" + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) +#define DIV_ROUND_UP(a, b) (1 + ((a - 1) / b)) + +#define MAX(x, y) ((x) > (y) ? (x) : (y)) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +static const int INDSTP = 3; + +static size_t +to_hex(const uint8_t *buf, const size_t buf_sz, char *out, const size_t out_sz, const bool reverse) +{ +   assert(out); +   const char nibble[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; +   const uint8_t nbs = sizeof(nibble) - 1; + +   size_t w = 0, last_non_zero = w; +   for (size_t i = 0; i < buf_sz && out_sz > 2 && w < out_sz - 2; ++i) { +      for (uint8_t c = 0; c < CHAR_BIT / 8 && w < out_sz; ++c) { +         const size_t idx = (reverse ? (buf_sz - 1) - i : i); +         const uint8_t hi = (buf[idx] >> (4 * (c + 1))) & nbs; +         const uint8_t lo = (buf[idx] >> (8 * c)) & nbs; + +         if (w || hi || lo) { +            out[w++] = nibble[hi]; +            out[w++] = nibble[lo]; +            last_non_zero = (hi || lo ? w : last_non_zero); +         } +      } +   } + +   if (!w) { +      out[w++] = nibble[0]; +      out[w++] = nibble[0]; +   } else { +      w = last_non_zero; +   } + +   assert(w < out_sz); +   out[w] = 0; +   return w; +} + +static void +print_dec(const uint8_t *buf, const size_t size, const bool is_signed) +{ +   char hex[2 * sizeof(uint64_t) + 1]; +   to_hex(buf, size, hex, sizeof(hex), true); + +   if (is_signed) { +      printf("%ld", (int64_t)strtoll(hex, NULL, 16)); +   } else { +      printf("%lu", (uint64_t)strtoull(hex, NULL, 16)); +   } +} + +static void +print_udec(const uint8_t *buf, const size_t size) +{ +   print_dec(buf, size, false); +} + +static void +print_sdec(const uint8_t *buf, const size_t size) +{ +   print_dec(buf, size, true); +} + +static void +print_hex(const uint8_t *buf, const size_t size) +{ +   char hex[2 * sizeof(uint64_t) + 1]; +   to_hex(buf, size, hex, sizeof(hex), true); +   printf("0x%s", hex); +} + +static void +print_array(const uint8_t *buf, const size_t size, const size_t nmemb, void (*fun)(const uint8_t *buf, const size_t size), const int sindent) +{ +   const int indent = sindent + INDSTP; +   if (nmemb > 8) { +      printf("{\n%*s", indent, ""); +   } else if (nmemb > 1) { +      printf("{ "); +   } + +   for (size_t n = 0; n < nmemb; ++n) { +      fun(buf + n * size, size); +      printf("%s", (nmemb > 1 && n + 1 < nmemb ? ", " : "")); + +      if (n + 1 < nmemb && !((n + 1) % 8)) +         printf("\n%*s", indent, ""); +   } + +   if (nmemb > 8) { +      printf("\n%*s}\n", sindent, ""); +   } else { +      printf("%s\n", (nmemb > 1 ? " }" : "")); +   } +} + +static void +print_str(const char *buf, const size_t size, const size_t nmemb) +{ +   const bool has_nl = memchr(buf, '\n', size * nmemb); +   if (has_nl) +      puts("```"); + +   for (size_t n = 0; n < size * nmemb && buf[n] != 0; ++n) +      printf("%c", buf[n]); + +   puts((has_nl ? "```" : "")); +} + +enum fspec_visual { +   VISUAL_NUL, +   VISUAL_DEC, +   VISUAL_HEX, +   VISUAL_STR, +   VISUAL_FLT +}; + +static void +fspec_print(const uint8_t *buf, const size_t size, const size_t nmemb, const bool sign, const enum fspec_visual visual, const int indent) +{ +   const uint64_t szb = DIV_ROUND_UP(size, CHAR_BIT); +   switch (visual) { +      case VISUAL_STR: +         print_str((char*)buf, szb, nmemb); +         break; + +      case VISUAL_HEX: +         print_array(buf, szb, nmemb, print_hex, indent); +         break; + +      case VISUAL_DEC: +         print_array(buf, szb, nmemb, (sign ? print_sdec : print_udec), indent); +         break; + +      case VISUAL_FLT: +         // TODO +         break; + +      case VISUAL_NUL: +         break; +   } +} + +enum fspec_instruction { +   INS_VERSION, +   INS_REG, +   INS_PUSH, +   INS_PUSHR, +   INS_STORE, +   INS_OP, +   INS_QUEUE, +   INS_IO, +   INS_EXEC, +   INS_CALL, +   INS_JMP, +   INS_JMPIF, +}; + +enum fspec_operation { +   OP_UNM, +   OP_LNOT, +   OP_BNOT, +   OP_MUL, +   OP_DIV, +   OP_MOD, +   OP_ADD, +   OP_SUB, +   OP_SHIFTL, +   OP_SHIFTR, +   OP_LESS, +   OP_LESSEQ, +   OP_EQ, +   OP_NOTEQ, +   OP_BAND, +   OP_BOR, +   OP_BXOR, +   OP_LAND, +   OP_LOR, +   OP_CTERNARY, +   OP_SUBSCRIPT +}; + +struct fspec_register { +   uint64_t off, len; +   uint8_t shift[2]; +}; + +struct fspec_stack { +   struct fspec_register value[64]; +   uint8_t numv; +}; + +struct fspec_istream { +   size_t (*read)(void *ctx, void *buf, const size_t size); +}; + +struct fspec_buffer { +   uint8_t *data; +   uint64_t ptr, size; +}; + +static void +fspec_buffer_write(struct fspec_buffer *buf, const void *data, const size_t size) +{ +   assert(buf->ptr + size <= buf->size); +   memcpy(buf->data + buf->ptr, data, size); +   buf->ptr += size; +} + +struct fspec_ctx { +   struct fspec_buffer mem; +   struct fspec_stack S, R; +   struct fspec_istream ir, binary; +}; + +static void +stack_push_num(struct fspec_stack *stack, struct fspec_buffer *buf, const uint64_t v) +{ +   assert(stack->numv < ARRAY_SIZE(stack->value)); +   const uint8_t bsz = DIV_ROUND_UP(__builtin_ctzl((v ? v : 1)), CHAR_BIT); +   stack->value[stack->numv++] = (struct fspec_register){ .off = buf->ptr, .len = bsz }; +   const union { uint8_t u8[sizeof(v)]; uint64_t v; } u = { .v = v }; +   fspec_buffer_write(buf, u.u8, bsz); +} + +static void +stack_push(struct fspec_stack *stack, struct fspec_register *value) +{ +   assert(stack->numv < ARRAY_SIZE(stack->value)); +   stack->value[stack->numv++] = *value; +} + +static void +stack_pop(struct fspec_stack *stack, struct fspec_register *out_value) +{ +   assert(stack->numv > 0); +   *out_value = stack->value[--stack->numv]; +} + +static uint64_t +stack_pop_num(struct fspec_stack *stack, const struct fspec_buffer *buf) +{ +   assert(stack->numv > 0); +   const struct fspec_register v = stack->value[--stack->numv]; +   union { uint8_t u8[sizeof(uint64_t)]; uint64_t v; } u = {0}; +   memcpy(u.u8, buf->data + v.off, MIN(v.len, sizeof(u.u8))); +   return (u.v << v.shift[0]) >> v.shift[1]; +} + +static bool +is_binary(const uint8_t *data, const uint64_t len) +{ +   for (uint64_t i = 0; i < len; ++i) +      if (!isprint(data[i])) +         return true; +   return false; +} + +static void +fspec_seek(struct fspec_ctx *ctx) +{ +   const uint64_t off = stack_pop_num(&ctx->S, &ctx->mem); +   // fseek(ctx->input, off, SEEK_SET); +} + +static uint64_t +math(const enum fspec_operation op, const uint64_t r[3]) +{ +   switch (op) { +      case OP_UNM: return -r[0]; +      case OP_LNOT: return !r[0]; +      case OP_BNOT: return ~r[0]; +      case OP_MUL: return r[0] * r[1]; +      case OP_DIV: return r[0] / r[1]; +      case OP_MOD: return r[0] % r[1]; +      case OP_ADD: return r[0] + r[1]; +      case OP_SUB: return r[0] - r[1]; +      case OP_SHIFTL: return r[0] << r[1]; +      case OP_SHIFTR: return r[0] >> r[1]; +      case OP_LESS: return r[0] < r[1]; +      case OP_LESSEQ: return r[0] <= r[1]; +      case OP_EQ: return r[0] == r[1]; +      case OP_NOTEQ: return r[0] != r[1]; +      case OP_BAND: return r[0] & r[1]; +      case OP_BOR: return r[0] | r[1]; +      case OP_BXOR: return r[0] ^ r[1]; +      case OP_LAND: return r[0] && r[1]; +      case OP_LOR: return r[0] || r[1]; +      case OP_CTERNARY: return r[0] ? r[1] : r[2]; +      case OP_SUBSCRIPT: assert(0 && "should not happen"); +   } +   return 0; +} + +static void +do_op(struct fspec_ctx *ctx, const enum fspec_operation op) +{ +   const struct { +      char *name; +      uint8_t args; +   } map[] = { +      { "UNM", 1 }, { "LNOT", 1 }, { "BNOT", 1 }, // unary +      { "MUL", 2 }, { "DIV", 2 }, { "MOD", 2 }, { "ADD", 2 }, { "SUB", 2 }, // binary math +      { "SHIFTL", 2 }, { "SHIFTR", 2 }, // bitshifts +      { "LESS", 2 }, { "LESSEQ", 2 }, { "EQ", 2 }, { "NOTEQ", 2 }, // logical comparison +      { "BAND", 2 }, { "BOR", 2 }, { "BXOR", 2 }, // bitwise operations +      { "LAND", 2 }, { "LOR", 2 }, // logical and && or +      { "CTERNARY", 3 }, // ternary +      { "SUBSCRIPT", 2 }, // subscript +   }; + +   assert(op < ARRAY_SIZE(map)); + +   uint64_t r[3]; +   fprintf(stderr, "%s: ", map[op].name); +   for (uint8_t i = 0; i < map[op].args; ++i) { +      r[i] = stack_pop_num(&ctx->S, &ctx->mem); +      fprintf(stderr, "%lu%s", r[i], (i + 1 < map[op].args ? ", " : "\n")); +   } + +   stack_push_num(&ctx->S, &ctx->mem, math(op, r)); +} + +static bool +fspec_execute(struct fspec_ctx *ctx, const uint8_t *ir, const uint64_t irlen, const int ind) +{ +   const struct filter { +      const char *name, **unpacking, **packing; +   } filters[] = { +      { "encoding", (const char*[]){ "iconv", "-f", NULL }, (const char*[]){ "iconv", "-t", NULL } } +   }; + +   const struct function { +      const char *name; +      void (*fun)(struct fspec_ctx *ctx); +   } functions[] = { +      { "seek", fspec_seek } +   }; + +   for (const uint8_t *pc = ir; pc < ir + irlen;) { +      union { +         struct { unsigned name:5; unsigned n:2; uint64_t v:57; } ins; +         uint8_t v[16]; +      } u = {0}; + +      memcpy(u.v, pc, sizeof(u.v[0])); +      const uint8_t insw = sizeof(uint16_t) * (1 << u.ins.n); +      memcpy(u.v, pc, insw); +      pc += insw; + +      const uint64_t insv = u.ins.v; +      switch (u.ins.name) { +         case INS_VERSION: +            fprintf(stderr, "VERSION: %lu\n", insv); +            break; +         case INS_REG: +            stack_push(&ctx->R, (struct fspec_register[]){{ .off = pc - ctx->mem.data, .len = insv }}); +            if (is_binary(pc, insv)) { +               fprintf(stderr, "REG len: %lu, [binary data]\n", insv); +            } else { +               fprintf(stderr, "REG len: %lu, %.*s\n", insv, (int)insv, (char*)pc); +            } +            pc += insv; +            break; +         case INS_PUSH: +            fprintf(stderr, "PUSH v: %lu\n", insv); +            stack_push(&ctx->S, (struct fspec_register[]){{ .off = pc - ctx->mem.data - insw, .len = insw, .shift = {0,7} }}); +            break; +         case INS_PUSHR: +            fprintf(stderr, "PUSHR R: %lu\n", insv); +            stack_push(&ctx->S, &ctx->R.value[insv]); +            break; +         case INS_STORE: +            fprintf(stderr, "STORE R: %lu\n", insv); +            stack_pop(&ctx->S, &ctx->R.value[insv]); +            break; +         case INS_OP: +            fprintf(stderr, "OP op: %lu\n", insv); +            do_op(ctx, insv); +            break; +         case INS_QUEUE: +            fprintf(stderr, "QUEUE len: %lu\n", insv); +            break; +         case INS_IO: { +               const uint64_t R = stack_pop_num(&ctx->S, &ctx->mem); +               fprintf(stderr, "IO: sz: %lu, R: %lu\n", insv, R); +               ctx->R.value[R].off = ctx->mem.ptr; +               const uint64_t szb = DIV_ROUND_UP(insv, CHAR_BIT), bpe = (szb * CHAR_BIT) / insv; +               uint64_t nmemb = 1; +               do { +                  nmemb *= (ctx->S.numv ? stack_pop_num(&ctx->S, &ctx->mem) : 1) / bpe; +                  assert(ctx->mem.ptr + szb * nmemb <= ctx->mem.size); +                  ctx->mem.ptr += ctx->binary.read(ctx, ctx->mem.data + ctx->mem.ptr, szb * nmemb); +               } while (ctx->S.numv); +               if (ctx->mem.ptr == ctx->R.value[R].off) +                  return true; +               ctx->R.value[R].len = ctx->mem.ptr - ctx->R.value[R].off; +            } +            break; +         case INS_EXEC: { +               fprintf(stderr, "EXEC R: %lu\n", insv); +               uint64_t nmemb = 1; +               do { +                  nmemb *= (ctx->S.numv ? stack_pop_num(&ctx->S, &ctx->mem) : 1); +                  for (uint64_t i = 0; i < nmemb; ++i) +                     if (fspec_execute(ctx, ctx->mem.data + ctx->R.value[insv].off, ctx->R.value[insv].len, ind + INDSTP)) +                        return true; +               } while (ctx->S.numv); +            } +            break; +         case INS_CALL: { +               fprintf(stderr, "CALL R: %lu\n", insv); +               ctx->S.numv = 0; +#if 0 +               const struct filter *filter = NULL; +               const struct fspec_data *name = &ctx->D[num - 1]; +               for (size_t i = 0; i < ARRAY_SIZE(filters); ++i) { +                  if (strlen(filters[i].name) != name->len || memcmp(filters[i].name, (char*)ctx->code + name->off, name->len)) +                     continue; + +                  filter = &filters[i]; +                  break; +               } + +               if (filter) { +                  size_t i; +                  const char *args[32]; +                  for (i = 0; filters->unpacking[i]; ++i) { +                     args[i] = filters->unpacking[i]; +                     fprintf(stderr, "%zu: %s\n", i, args[i]); +                  } + +                  size_t aw = 0; +                  char additional[1024]; +                  memset(additional, 0, sizeof(additional)); +                  for (; ctx->S.written; ++i) { +                     const struct fspec_value v = stack_pop(&ctx->S); +                     if (v.type == FSPEC_VALUE_NUMBER) { +                        aw += snprintf(additional, sizeof(additional) - aw, "%lu", v.u.num) + 1; +                     } else if (v.type == FSPEC_VALUE_DATA) { +                        args[i] = additional + aw; +                        memcpy(additional + aw, (char*)ctx->code + v.u.data.off, v.u.data.len); +                        aw += v.u.data.len + 1; +                     } else if (v.type == FSPEC_VALUE_FIELD) { +                        args[i] = additional + aw; +                        memcpy(additional + aw, (char*)ctx->binary + v.u.data.off, v.u.data.len); +                        aw += v.u.data.len + 1; +                     } +                     fprintf(stderr, "%zu: %s\n", i, args[i]); +                  } +                  args[i] = NULL; + +                  struct proc p; +                  if (proc_open(args[0], (char*const*)args, &p)) { +                     ctx->bsz -= write(p.fds[0], ctx->binary + last->u.primitive.data.off, ctx->bsz - last->u.primitive.data.off); +                     close_fd(&p.fds[0]); +                     assert(ctx->bsz == last->u.primitive.data.off); +                     ssize_t rd; +                     for (; (rd = read(p.fds[1], ctx->binary + last->u.primitive.data.off, 1024)) == 1024; ctx->bsz += rd); +                     ctx->bsz += rd; +                     proc_close(&p); +                  } else { +                     warn("failed to spawn: %s", args[0]); +                  } +               } else { +                  ctx->S.numv = 0; +               } +#endif +            } +            break; +         case INS_JMP: +            fprintf(stderr, "JMP off: %lu\n", insv); +            pc = ir + insv; +            break; +         case INS_JMPIF: +            fprintf(stderr, "JMPIF off: %lu\n", insv); +            const uint64_t r1 = stack_pop_num(&ctx->S, &ctx->mem); +            pc = (r1 ? ir + insv : pc); +            break; +         default: +            errx(EXIT_FAILURE, "unknown instruction: %u\n", u.ins.name); +      } +   } +   return false; +} + +static FILE *input; + +static size_t +read_binary(void *ctx, void *ptr, const size_t size) +{ +   return fread(ptr, 1, size, input); +} + +static size_t +read_ir(void *ctx, void *ptr, const size_t size) +{ +   return fread(ptr, 1, size, stdin); +} + +int +main(int argc, char *argv[]) +{ +   input = fopen(argv[1], "rb"); + + +   struct fspec_ctx ctx = { +      .mem = { .data = calloc(4096, 4096), .size = 4096 * 4096 }, +      .ir.read = read_ir, +      .binary.read = read_binary, +      .R.numv = 1 +   }; + +   ctx.mem.ptr += ctx.ir.read(&ctx, ctx.mem.data, ctx.mem.size); +   fspec_execute(&ctx, ctx.mem.data, ctx.mem.ptr, 0); +   fspec_execute(&ctx, ctx.mem.data + ctx.R.value[ctx.R.numv - 1].off, ctx.R.value[ctx.R.numv - 1].len, 0); + +   for (uint64_t i = 0; i < ctx.R.numv; ++i) { +      printf("REG%lu: ", i); +      fspec_print(ctx.mem.data + ctx.R.value[i].off, 1, ctx.R.value[i].len, false, VISUAL_HEX, 0); +   } + +   return EXIT_SUCCESS; +} diff --git a/src/bin/utils.h b/src/bin/utils.h new file mode 100644 index 0000000..3baa1b1 --- /dev/null +++ b/src/bin/utils.h @@ -0,0 +1,81 @@ +#pragma once + +// #include <spawn.h> +#include <sys/wait.h> +#include <unistd.h> + +struct proc { +   pid_t pid; +   int fds[2]; +}; + +static inline void +close_fd(int *fd) +{ +   assert(fd); +   if (*fd >= 0) +      close(*fd); +} + +static inline bool +proc_open(const char *file, char *const argv[], struct proc *out_proc) +{ +   assert(file && argv && out_proc); +   *out_proc = (struct proc){0}; + +   int pipes[4]; +   pipe(&pipes[0]); /* parent */ +   pipe(&pipes[2]); /* child */ + +#if 0 +   // Doesn't work, no idea why +   posix_spawn_file_actions_t fa; +   if (posix_spawn_file_actions_init(&fa) != 0 || +       posix_spawn_file_actions_addclose(&fa, pipes[0]) != 0 || +       posix_spawn_file_actions_addclose(&fa, pipes[3]) != 0 || +       posix_spawn_file_actions_adddup2(&fa, pipes[2], 0) != 0 || +       posix_spawn_file_actions_adddup2(&fa, pipes[1], 1) != 0 || +       posix_spawn_file_actions_addclose(&fa, pipes[2]) != 0 || +       posix_spawn_file_actions_addclose(&fa, pipes[1]) != 0 || +       posix_spawnp(&out_proc->pid, file, &fa, NULL, argv, NULL) != 0) { +      posix_spawn_file_actions_destroy(&fa); +      for (uint8_t i = 0; i < ARRAY_SIZE(pipes); ++i) +         close(pipes[i]); +      return false; +   } +   posix_spawn_file_actions_destroy(&fa); +#else +   if ((out_proc->pid = fork()) > 0) { +      out_proc->fds[0] = pipes[3]; +      out_proc->fds[1] = pipes[0]; +      close(pipes[1]); +      close(pipes[2]); +      return true; +   } else { +      close(pipes[0]); +      close(pipes[3]); +      dup2(pipes[2], 0); +      dup2(pipes[1], 1); +      close(pipes[2]); +      close(pipes[1]); +      execvp(file, argv); +      _exit(0); +   } +#endif + +   out_proc->fds[0] = pipes[3]; +   out_proc->fds[1] = pipes[0]; +   close(pipes[1]); +   close(pipes[2]); +   return true; +} + +static inline void +proc_close(struct proc *proc) +{ +   assert(proc); +   waitpid(proc->pid, NULL, 0); +   close_fd(&proc->fds[0]); +   close_fd(&proc->fds[1]); +   *proc = (struct proc){0}; +} diff --git a/src/compiler/compiler.lm b/src/compiler/compiler.lm index 3571527..d0864b4 100644 --- a/src/compiler/compiler.lm +++ b/src/compiler/compiler.lm @@ -40,7 +40,7 @@ context fspec           def declaration              value:int -            [name:reference::variable::type `= expr::enum::type] { lhs.value = const_int_expr(r3.collapsed) enum_set(lhs.value) enum_inc() } +            [name:reference::variable::type `= expr::enum::type] commit { lhs.value = const_int_expr(r3.collapsed) enum_set(lhs.value) enum_inc() }           |  [name:reference::variable::type] { lhs.value = enum_get() enum_inc() }           def item @@ -81,14 +81,13 @@ context fspec        context select           lex              ignore / '//' [^\n]* '\n' | space+ / -            literal `( `) `{ `} `* +            literal `( `) `{ `} `* `:           end           literal `select           def item -            [`* `) data:declaration::type] -         |  [expr:expr::paren::type `) data:declaration::type] +            [expr:expr::select::type `) data:declaration::type]           def type              name:str @@ -105,7 +104,7 @@ context fspec        lex           ignore / '//' [^\n]* '\n' | space+ /           literal `: `[ `] `| `; -         token VISUAL / 'nul' | 'dec' | 'hex' | 'str' / +         token VISUAL / 'nul' | 'dec' | 'hex' | 'str' | 'flt' /        end        literal `enum `struct @@ -116,39 +115,12 @@ context fspec        def filter           [`| function:reference::function::type] -      def subscript +      def dimension           [`[ expr:expr::bracket::type `: slice:expr::bracket::type `]]        |  [`[ expr:expr::bracket::type `]]        def extra -         # if set, this field has trivial length, otherwise need to read subscripts -         length:collapser::collapsed -         [subscript:subscript* filter:filter* visual:visual?] { -            f:str = '' -            has_slice:bool -            for l:subscript in repeat(r1) { -               if (l.slice) { -                  f = '' -                  has_slice = true -                  break -               } - -               if (f != '') -                  f = f + '*' - -               if (l.expr.collapsed.result.value) { -                  f = f + '(' + $l.expr.collapsed.result.value + ')' -               } else { -                  f = f + '(' + $l.expr.collapsed + ')' -               } -            } - -            if (f == '' && !has_slice) -               f = '1' - -            if (f != '') -               lhs.length = collapser::stream(f) -         } +         [dimension:dimension* filter:filter* visual:visual?]        def type           # enum name <primitive> name <extra>; @@ -170,6 +142,8 @@ context fspec        |  [container:container::type name:reference::variable::type extra:extra `;]           # (enum|struct) name { ... } <filters>;        |  [container:container::type filter:filter* `;] +         # name(...); +      |  [function:reference::function::type `;]     end     def source @@ -211,14 +185,18 @@ pop_scope()  }  any +lookup_in_scope_no_error(type:str, name:str, s:scope) +{ +   cmap:map<str, any> = s->names->find(type) +   if (cmap) return cmap->find(name) +   return nil +} + +any  lookup_no_error(type:str, name:str) {     for s:scope in g_scopes { -      cmap:map<str, any> = s->names->find(type) -      if (cmap) { -         var:any = cmap->find(name) -         if (var) -            return var -      } +      v:any = lookup_in_scope_no_error(type, name, s) +      if (v) return v     }     return nil  } @@ -249,9 +227,10 @@ insert(type:str, name:str, var:any)  }  any -lookup(type:str, name:str) +lookup(type:str, name:str, s:scope)  { -   r:any = lookup_no_error(type, name) +   r:any = nil +   if (s) r = lookup_in_scope_no_error(type, name, s) else r = lookup_no_error(type, name)     if (!r) {        print('`', type, ' ', name, '` is not declared in this or outer scope!\n')        exit(1) @@ -259,119 +238,324 @@ lookup(type:str, name:str)     return r  } -str -container_name_str(s:str) { if (!s) return '<anon>' return s } +global g_cscope:map<any, scope> = new map<any, scope>() +global g_regs:map<any, int> = new map<any, int>() +global g_offs:map<any, int> = new map<any, int>() +global g_ops:map<str, int> = new map<str, int>() +global g_visuals:map<str, int> = new map<str, int>() +global g_types:map<str, int> = new map<str, int>() + +g_ops->insert('-#', 0) +g_ops->insert('!', 1) +g_ops->insert('~', 2) +g_ops->insert('*', 3) +g_ops->insert('/', 4) +g_ops->insert('%', 5) +g_ops->insert('#+', 6) +g_ops->insert('#-', 7) +g_ops->insert('<<', 8) +g_ops->insert('>>', 9) +g_ops->insert('<', 10) +g_ops->insert('<=', 11) +g_ops->insert('==', 12) +g_ops->insert('!=', 13) +g_ops->insert('&', 14) +g_ops->insert('|', 15) +g_ops->insert('^', 16) +g_ops->insert('&&', 17) +g_ops->insert('||', 18) +g_ops->insert(':', 19) +g_ops->insert(']', 20) + +g_visuals->insert('nul', 0) +g_visuals->insert('dec', 1) +g_visuals->insert('hex', 2) +g_visuals->insert('str', 3) +g_visuals->insert('flt', 4) + +g_types->insert('enum', 0) +g_types->insert('struct', 1) +g_types->insert('select', 2) # UNION + +global INS_VERSION:int = 0 +global INS_REG:int = 1 +global INS_PUSH:int = 2 +global INS_PUSHR:int = 3 +global INS_STORE:int = 4 +global INS_OP:int = 5 +global INS_QUEUE:int = 6 +global INS_IO:int = 7 +global INS_EXEC:int = 8 +global INS_CALL:int = 9 +global INS_JMP:int = 10 +global INS_JMPIF:int = 11 + +int insbuf_written() = c_insbuf_written +str flush_insbuf() = c_flush_insbuf +void write_ins(ins:int, num:int) = c_write_ins +void write_ins_with_data(ins:int, data:str) = c_write_ins_with_data + +global g_regc:int = 1 + +void +new_reg(v:any, data:str) +{ +   if (g_regs->find(v)) { +      print('Register for `', $v, '` already exists!\n') +      exit(1) +   } -str -signed_str(s:bool) { if (s) return 'signed' return 'unsigned' } +   if (!data) data = '' +   write_ins_with_data(INS_REG, data) +   print(flush_insbuf()) +   g_regs->insert(v, g_regc) +   g_regc = g_regc + 1 +} -global INDSTP:str = '░  ' +void +write_data_if_not_there(v:str) +{ +   if (!g_regs->find(v)) +      new_reg(v, v) +}  void -print_declaration(d:fspec::declaration::type, ind:str) +find_data_in(s:any)  { -   insert('variable', $d.name, d) -   print(ind, 'variable `', $d.name, "` that's ") +   for v:string::type in s +      write_data_if_not_there(v.raw) +   for v:reference::function::type in s +      write_data_if_not_there($v.name) +} -   c:fspec::container::type -   if (d.cref) c = lookup($d.cref, $d.parent) else c = d.container +write_ins(INS_VERSION, 1) +for e:expr::paren::type in source +   find_data_in(e.collapsed) +for e:expr::bracket::type in source +   find_data_in(e.collapsed) +for e:expr::arg::type in source +   find_data_in(e.collapsed) +for f:reference::function::type in source +   write_data_if_not_there($f.name) +for d:fspec::declaration::type in source { +   if (d.name) +      write_data_if_not_there($d.name) +   if (d.container && d.container.data.name) +      write_data_if_not_there($d.container.data.name) +} -   if (c) { -      print('`', c.data.type, ' ', container_name_str(c.data.name), '` ') +global g_fcr:int = g_regc +write_ins_with_data(INS_REG, '') +g_regc = g_regc + 1 -      if (c.data.select) -         print('with expression `', $c.data.select, '` ') +void +write_expr(expr:collapser::reducer::collapsed) +{ +   for o:collapser::reducer::operation in repeat(expr) { +      for v:collapser::reducer::value in child(o) { +         if (v.number) { +            write_ins(INS_PUSH, v.number.value) +         } else if (v.string) { +            write_ins(INS_PUSHR, g_regs->find(v.string.raw)) +         } else if (v.reference) { +            if (v.reference.variable) { +               write_ins(INS_PUSHR, g_regs->find(lookup('variable', $v.reference.variable.name, nil))) +            } else if (v.reference.function) { +               if ($v.reference.function.name != 'until') { +                  for a:expr::arg::type in v.reference.function +                     write_expr(a.collapsed.result) +                  write_ins(INS_CALL, g_regs->find($v.reference.function.name)) +               } +            } +         } +      } +      for vop:collapser::reducer::valueop in child(o) { +         if ($vop.op == '.') { +            s:scope = nil +            d:fspec::declaration::type = nil +            off:int = 0 +            for r:reference::variable::type in vop { +               d = lookup('variable', $r.name, s) +               if (d.container) { +                  s = g_cscope->find(%d.container) +                  off = g_offs->find(%d.container) +               } +            } +            write_ins(INS_PUSHR, off + g_regs->find(%d)) +         } else { +            write_ins(INS_OP, g_ops->find($vop.op)) +         } +      }     } +} -   if (d.primitive) -      print(d.primitive.bits, ' bits and ', signed_str(d.primitive.signed)) +void +write_postexpr(expr:collapser::reducer::collapsed, start:int) +{ +   for o:collapser::reducer::operation in repeat(expr) { +      for v:collapser::reducer::value in child(o) { +         if (v.reference && v.reference.function) { +            if ($v.reference.function.name == 'until') { +               for a:expr::arg::type in v.reference.function +                  write_expr(a.collapsed.result) +               write_ins(INS_OP, g_ops->find('!')) +               write_ins(INS_JMPIF, start) +            } +         } +      } +   } +} -   print('\n') +void +write_declaration(d:fspec::declaration::type, index:int) +{ +   if (!d.name) { +      print('something went wrong!\n') +      exit(1) +   } +   c:fspec::container::type = d.container +   if (d.cref) c = lookup($d.cref, $d.parent, nil) + +   locs:map<any, int> = new map<any, int>()     if (d.extra) { -      if (d.extra.length) { -         if (!d.extra.length.result.value || d.extra.length.result.value.reference) { -            print(ind, INDSTP, 'it has a variable length that needs to be computed with formula `', $d.extra.length, '`\n') -         } else { -            if (d.extra.length.result.value.number) { -               print(ind, INDSTP, 'it has a constant length of ', $d.extra.length.result.value, '\n') -            } else if (d.extra.length.result.value.string) { -               print(ind, INDSTP, '   its length will increase until pattern `', d.extra.length.result.value.string.escaped, '` has been read from stream\n') -            } -         } -      } else { -         print(ind, INDSTP, 'the subscripts contain slices, and thus needs some runtime loops to be computed\n') +      for l:fspec::declaration::dimension in repeat(d.extra.dimension) { +         locs->insert(%l, insbuf_written()) +         write_expr(l.expr.collapsed.result)        } -      for f:fspec::declaration::filter in repeat(d.extra.filter) -         print(ind, INDSTP, 'it needs to be filtered with `', $f.function, '`\n') +      #for f:fspec::declaration::filter in repeat(d.extra.filter) { +      #   for a:expr::arg::type in f +      #      write_expr(a.collapsed.result) +      #   write_ins(INS_CALL, g_regs->find($f.function.name)) +      #} +   } + +   if (!c) { +      write_ins(INS_PUSHR, g_fcr) +      write_ins(INS_PUSH, index) +      write_ins(INS_OP, g_ops->find('#+')) +      write_ins(INS_IO, d.primitive.bits) +   } else { +      write_ins(INS_PUSH, g_offs->find(%c)) +      write_ins(INS_STORE, g_fcr) +      write_ins(INS_EXEC, g_regs->find(%c)) +   } -      for v:fspec::declaration::visual in child(d.extra.visual) -         print(ind, INDSTP, 'it should be visualized as `', $v.name, '`\n') +   if (d.extra) { +      for l:fspec::declaration::dimension in repeat(d.extra.dimension) +         write_postexpr(l.expr.collapsed.result, locs->find(%l))     }  }  void -walk(d:fspec::declaration::type, ind:str) +walk1(d:fspec::declaration::type)  {     if (!d.container) {        print('something went wrong!\n')        exit(1)     } -   s:fspec::container::type = d.container -   insert($s.data.type, s.data.name, s) +   for i:fspec::container::strukt::item in repeat(d.container.data.items) +      if (i.data.container) +         walk1(i.data) -   if (d.name) { -      print_declaration(d, ind) -      ind = ind + INDSTP -      print(ind, 'and it contains\n') -   } else { -      print(ind, 'container `', s.data.type, ' ', container_name_str(s.data.name), '`') -      if ($d.filter != '') { -         print('\n') -         ind = ind + INDSTP -         for f:fspec::declaration::filter in repeat(d.filter) -            print(ind, 'it needs to be filtered with `', $f.function, '`\n') -         print(ind, 'and it contains\n') -      } else { -         print(' that contains\n') -      } +   for i:fspec::container::select::item in repeat(d.container.data.items) +      if (i.data.container) +         walk1(i.data) + +   g_offs->insert(%d.container, g_regc) + +   for i:fspec::container::enum::item in repeat(d.container.data.items) { +      # somehow need to get this constant time (not reg)     } -   if ($s.data.type == 'enum') { -      for i:fspec::container::enum::item in repeat(s.data.items) { -         print(ind, INDSTP, 'constant `', $i.decl.name, '` which value is ', $i.decl.value, '\n') -         insert('variable', $i.decl.name, i) -      } -   } else if ($s.data.type == 'struct') { +   for i:fspec::container::strukt::item in repeat(d.container.data.items) { +      if (i.data.primitive) +         new_reg(%i.data, nil) +   } + +   for i:fspec::container::select::item in repeat(d.container.data.items) { +      if (i.data.primitive) +         new_reg(%i.data, nil) +   } +} + +void +walk2(d:fspec::declaration::type) +{ +   if (!d.container) { +      print('something went wrong!\n') +      exit(1) +   } + +   insert($d.container.data.type, d.container.data.name, %d.container) + +   if ($d.container.data.type != 'enum')        push_scope() -      for i:fspec::container::strukt::item in repeat(s.data.items) { -         if (i.data.container) -            walk(i.data, ind + INDSTP) -         else -            print_declaration(i.data, ind + INDSTP) + +   for i:fspec::container::enum::item in repeat(d.container.data.items) +      insert('variable', $i.decl.name, %i.decl) + +   for i:fspec::container::strukt::item in repeat(d.container.data.items) { +      if (i.data.container) +         walk2(i.data) +      if (i.data.name) +         insert('variable', $i.data.name, %i.data) +   } + +   for i:fspec::container::select::item in repeat(d.container.data.items) { +      if (i.data.container) +         walk2(i.data) +      if (i.data.name) +         insert('variable', $i.data.name, %i.data) +   } + +   if (!d.name) { +      write_ins(INS_PUSH, g_offs->find(%d.container)) +      write_ins(INS_STORE, g_fcr) +   } + +   index:int = 0 +   for i:fspec::container::strukt::item in repeat(d.container.data.items) +      if (i.data.name) { +         write_declaration(i.data, index) +         index = index + 1 +      } else if (i.data.function) { +         for a:expr::arg::type in i.data.function +            write_expr(a.collapsed.result) +         write_ins(INS_CALL, g_regs->find($i.data.function.name))        } -      pop_scope() -   } else if ($s.data.type == 'select') { -      ind = ind + INDSTP -      push_scope() -      for i:fspec::container::select::item in repeat(s.data.items) { -         if (i.expr) -            print(ind, 'in case of (', $i.expr, ')\n') -         else -            print(ind, 'or otherwise\n') - -         if (i.data.container) -            walk(i.data, ind + INDSTP) -         else -            print_declaration(i.data, ind + INDSTP) + +   for i:fspec::container::select::item in repeat(d.container.data.items) { +      if (i.data.name) { +         if (i.expr) { +            write_expr(d.container.data.select.collapsed.result) +            write_expr(i.expr.collapsed.result) +            write_ins(INS_OP, g_ops->find('==')) +         } +         write_declaration(i.data, index) +         index = index + 1        } -      pop_scope()     } + +   if (insbuf_written()) { +      new_reg(%d.container, flush_insbuf()) +      print(flush_insbuf()) +   } + +   g_cscope->insert(%d, g_scopes->top) + +   if ($d.container.data.type != 'enum') +      pop_scope()  } +for d:fspec::declaration::type in repeat(source.items) +   walk1(d) +  push_scope()  for d:fspec::declaration::type in repeat(source.items) -   walk(d, '') +   walk2(d)  pop_scope() + +print(flush_insbuf()) diff --git a/src/compiler/expr.lm b/src/compiler/expr.lm index feff7af..7c650b4 100644 --- a/src/compiler/expr.lm +++ b/src/compiler/expr.lm @@ -37,9 +37,9 @@ context expr        def type           collapsed:collapser::collapsed -         [syntax+] { +         [syntax+] commit {              lhs.collapsed = collapser::stream($r1) -            if (!lhs.collapsed) reject +            if (!lhs.collapsed && $r1 != '*') reject           }     end diff --git a/src/compiler/native.c b/src/compiler/native.c index d0ca252..bbb0060 100644 --- a/src/compiler/native.c +++ b/src/compiler/native.c @@ -81,8 +81,7 @@ c_op_stack_top(program_t *prg, tree_t **sp, value_t a)        return NULL;     const union opstr op = { .u8 = stack->data[stack->index - 1] }; -   tree_t *s = construct_string(prg, colm_string_alloc_pointer(prg, opstr_lookup + op.offset, 1 + op.size)); -   return (str_t*)upref(prg, s); +   return (str_t*)upref(prg, construct_string(prg, colm_string_alloc_pointer(prg, opstr_lookup + op.offset, 1 + op.size)));  }  value_t @@ -111,6 +110,77 @@ c_op_stack_pop(program_t *prg, tree_t **sp, value_t a)     return r;  } +static uint8_t +bits_for_n(const uint8_t n, const uint8_t used) +{ +   return 16 * (1 << n) - used; +} + +static uint8_t +n_for_v(const uint64_t v, const uint8_t used) +{ +   const uint8_t bits = __builtin_ctzl((v ? v : 1)); +   if (used <= 16 && bits < bits_for_n(0, used)) +      return 0; +   else if (used <= 32 && bits < bits_for_n(1, used)) +      return 1; +   else if (used <= 64 && bits < bits_for_n(2, used)) +      return 2; + +   errx(EXIT_FAILURE, "numbers over 57 bits not supported right now.. sorry :D"); +   return 3; +} + +static void +vle_instruction(const uint8_t name, const uint64_t v, uint8_t out[16], uint8_t *out_written) +{ +   assert(out && out_written); +   const union { +      struct { unsigned name:5; unsigned n:2; uint64_t v:57; } ins; +      uint8_t v[16]; +   } u = { .ins = { .name = name, .n = n_for_v(v, 7), .v = v } }; +   *out_written = sizeof(uint16_t) * (1 << u.ins.n); +   memcpy(out, u.v, *out_written); +} + +struct insbuf { +   uint8_t data[sizeof(uint16_t) * 1024]; +   size_t written; +} insbuf = {0}; + +str_t* +c_flush_insbuf(program_t *prg, tree_t **sp) +{ +   tree_t *s = upref(prg, construct_string(prg, string_alloc_full(prg, insbuf.data, insbuf.written))); +   insbuf.written = 0; +   return (str_t*)s; +} + +int +c_insbuf_written(program_t *prg, tree_t **sp) +{ +   return insbuf.written; +} + +void +c_write_ins(program_t *prg, tree_t **sp, value_t a, value_t b) +{ +   uint8_t out[16], written; +   vle_instruction(a, b, out, &written); +   memcpy(&insbuf.data[insbuf.written], out, written); +   insbuf.written += written; +} + +void +c_write_ins_with_data(program_t *prg, tree_t **sp, value_t a, str_t *b) +{ +   assert(b); +   c_write_ins(prg, sp, a, b->value->length); +   memcpy(&insbuf.data[insbuf.written], b->value->data, b->value->length); +   insbuf.written += b->value->length; +   colm_tree_downref(prg, sp, (tree_t*)b); +} +  value_t  c_strtoull(program_t *prg, tree_t **sp, str_t *a, value_t b)  { @@ -131,6 +201,8 @@ str_t*  c_esc2chr(program_t *prg, tree_t **sp, str_t *a)  {     assert(a); +   const char hay = *a->value->data; +   colm_tree_downref(prg, sp, (tree_t*)a);     static const struct { const char e, v; } map[] = {        { .e = 'a', .v = '\a' }, @@ -147,14 +219,14 @@ c_esc2chr(program_t *prg, tree_t **sp, str_t *a)     };     for (size_t i = 0; i < ARRAY_SIZE(map); ++i) { -      if (*a->value->data != map[i].e) +      if (hay != map[i].e)           continue;        tree_t *s = construct_string(prg, colm_string_alloc_pointer(prg, &map[i].v, 1));        return (str_t*)upref(prg, s);     } -   errx(EXIT_FAILURE, "%s: unknown escape character `%c`", __func__, *a->value->data); +   errx(EXIT_FAILURE, "%s: unknown escape character `%c`", __func__, hay);     return NULL;  } | 
