Fixed bytecode generation errors.
h3rald h3rald@h3rald.com
Fri, 20 Dec 2024 13:08:02 +0100
6 files changed,
54 insertions(+),
41 deletions(-)
M
Makefile
→
Makefile
@@ -2,7 +2,7 @@ CC = gcc
CFLAGS = -Wall -Wextra -g LDFLAGS = -.PHONY: wasm, playground, clean, test, web +.PHONY: wasm, playground, clean hex: src/hex.c $(CC) $(CFLAGS) $(LDFLAGS) $< -o hex@@ -30,8 +30,10 @@ rm -f hex.exe
rm -f hex.js rm -f hex.wasm -test: - ./hex scripts/test.hex +test: hex + ./hex -b scripts/test.hex + ./hex scripts/test.hbx -web: playground - ./hex scripts/web.hex +web: playground hex + ./hex -b scripts/web.hex + ./hex scripts/web.hbx
M
scripts/test.hex
→
scripts/test.hex
@@ -120,7 +120,7 @@
("rm test.txt" exec 0x0 ==) ("rm test.txt" run 0x0 get 0x1 ==) ("ls web/contents" run 0x1 get "\n" split len 0x7 ==) - (args 0x1 get "scripts/test.hex" ==) + (args 0x1 get "scripts/test.hbx" ==) ;56 ((args len 0x2 ==) ("two") ("no") if "two" ==)@@ -209,7 +209,7 @@ ;112
("=" ord 0x3d ==) (0x3d chr "=" ==) - ("é" ord 0xffffffff ==) + (0xffffffff dec "-1" ==) (0x80 chr "" ==) ;116@@ -219,7 +219,6 @@ (("t" chr) (error) try "[symbol chr] Integer required" ==)
((() chr) (error) try "[symbol chr] Integer required" ==) ;120 - (0xffffffff dec "-1" ==) ) "tests" :
M
src/hex.h
→
src/hex.h
@@ -279,7 +279,6 @@ int hex_push_symbol(hex_context_t *ctx, hex_token_t *token);
hex_item_t hex_pop(hex_context_t *ctx); // Parser and interpreter -char *hex_process_string(const char *value); hex_token_t *hex_next_token(hex_context_t *ctx, const char **input, hex_file_position_t *position); int32_t hex_parse_integer(const char *hex_str); int hex_parse_quotation(hex_context_t *ctx, const char **input, hex_item_t *result, hex_file_position_t *position);@@ -296,6 +295,8 @@ void hex_lpad(const char *str, int total_length);
void hex_encode_length(uint8_t **bytecode, size_t *size, size_t length); int hex_is_binary(const uint8_t *data, size_t size); char *hex_bytes_to_string(const uint8_t *bytes, size_t size); +char *hex_process_string(const char *value); +size_t hex_min_bytes_to_encode_integer(int32_t value); // Native symbols int hex_symbol_store(hex_context_t *ctx);
M
src/stack.c
→
src/stack.c
@@ -137,32 +137,24 @@ {
hex_debug_item(ctx, "FREE", item); if (item.type == HEX_TYPE_STRING && item.data.str_value != NULL) { - hex_debug(ctx, "FREE: ** string start"); item.data.str_value = NULL; free(item.data.str_value); - hex_debug(ctx, "FREE: ** string end"); } else if (item.type == HEX_TYPE_QUOTATION && item.data.quotation_value != NULL) { - hex_debug(ctx, "FREE: ** quotation start"); hex_free_list(ctx, item.data.quotation_value, item.quotation_size); item.data.quotation_value = NULL; - hex_debug(ctx, "FREE: ** quotation end"); } else if (item.type == HEX_TYPE_NATIVE_SYMBOL && item.token->value != NULL) { - hex_debug(ctx, "FREE: ** native symbol start"); item.token = NULL; hex_free_token(item.token); - hex_debug(ctx, "FREE: ** native symbol end"); } else if (item.type == HEX_TYPE_USER_SYMBOL && item.token->value != NULL) { - hex_debug(ctx, "FREE: ** user symbol start"); item.token = NULL; hex_free_token(item.token); - hex_debug(ctx, "FREE: ** user symbol end"); } else {
M
src/utils.c
→
src/utils.c
@@ -356,3 +356,27 @@ }
*dst = '\0'; return processed_str; } + +size_t hex_min_bytes_to_encode_integer(int32_t value) +{ + // If value is negative, we need to return 4 bytes because we must preserve the sign bits. + if (value < 0) + { + return 4; + } + + // For positive values, check the minimal number of bytes needed. + for (int bytes = 1; bytes <= 4; bytes++) + { + int32_t mask = (1 << (bytes * 8)) - 1; + int32_t truncated_value = value & mask; + + // If the truncated value is equal to the original, this is the minimal byte size + if (truncated_value == value) + { + return bytes; + } + } + + return 4; // Default to 4 bytes if no smaller size is found. +}
M
src/vm.c
→
src/vm.c
@@ -24,37 +24,21 @@ }
(*bytecode)[*size] = HEX_OP_PUSHIN; *size += 1; // opcode // Encode the length of the integer value - size_t int_length = 0; - if (value >= -0x80 && value < 0x80) - { - int_length = 1; - } - else if (value >= -0x8000 && value < 0x8000) - { - int_length = 2; - } - else if (value >= -0x800000 && value < 0x800000) - { - int_length = 3; - } - else - { - int_length = 4; - } - hex_encode_length(bytecode, size, int_length); + size_t bytes = hex_min_bytes_to_encode_integer(value); + hex_encode_length(bytecode, size, bytes); // Encode the integer value in the minimum number of bytes, in little endian - if (value >= -0x80 && value < 0x80) + if (bytes == 1) { (*bytecode)[*size] = value & 0xFF; *size += 1; } - else if (value >= -0x8000 && value < 0x8000) + else if (bytes == 2) { (*bytecode)[*size] = value & 0xFF; (*bytecode)[*size + 1] = (value >> 8) & 0xFF; *size += 2; } - else if (value >= -0x800000 && value < 0x800000) + else if (bytes == 3) { (*bytecode)[*size] = value & 0xFF; (*bytecode)[*size + 1] = (value >> 8) & 0xFF;@@ -96,6 +80,16 @@ *bytecode = new_bytecode;
} (*bytecode)[*size] = HEX_OP_PUSHST; *size += 1; // opcode + // Check for multi-byte characters + for (size_t i = 0; i < len; i++) + { + if ((value[i] & 0x80) != 0) + { + hex_error(ctx, "[add bytecode string] Multi-byte characters are not supported - Cannot encode string: \"%s\"", value); + free(str); + return 1; + } + } hex_encode_length(bytecode, size, len); memcpy(&(*bytecode)[*size], value, len); *size += len;@@ -278,6 +272,7 @@ break;
} else { + (*n_items)--; // Decrement the number of items if it's not a valid token (it will be incremeneted anyway) // Ignore other tokens }@@ -340,7 +335,7 @@
*bytecode += length; *size -= length; - hex_debug(ctx, ">> PUSHIN: %d", value); + hex_debug(ctx, ">> PUSHIN: 0x%X", value); hex_item_t item = hex_integer_item(ctx, value); *result = item; return 0;@@ -367,7 +362,7 @@ } while (**bytecode & 0x80);
if (*size < length) { - hex_error(ctx, "[interpret bytecode string] Bytecode size too small to contain the string"); + hex_error(ctx, "[interpret bytecode string] Bytecode size (%d) too small to contain a string of length %d", *size, length); return 1; }@@ -423,7 +418,7 @@ hex_error(ctx, "(%d,%d) Unable to reference native symbol: %s (bytecode)", token->position.line, token->position.column, token->value);
hex_free_token(token); return 1; } - hex_debug(ctx, ">> NATSYM: %02X (%s)", opcode, symbol); + hex_debug(ctx, ">> NATSYM: 0x%02X (%s)", opcode, symbol); *result = item; return 0; }