all repos — hex @ e5cbcde17790686e02e40daa8fb0039a939fcb5b

A tiny, minimalist, slightly-esoteric concatenative programming lannguage.

Fixed bytecode generation errors.
h3rald h3rald@h3rald.com
Fri, 20 Dec 2024 13:08:02 +0100
commit

e5cbcde17790686e02e40daa8fb0039a939fcb5b

parent

a18f67cb1d1617dea62f44333faac9857b586cdd

6 files changed, 54 insertions(+), 41 deletions(-)

jump to
M MakefileMakefile

@@ -2,7 +2,7 @@ CC = gcc

CFLAGS = -Wall -Wextra -g LDFLAGS = -.PHONY: wasm, playground, clean, test, web +.PHONY: wasm, playground, clean hex: src/hex.c $(CC) $(CFLAGS) $(LDFLAGS) $< -o hex

@@ -30,8 +30,10 @@ rm -f hex.exe

rm -f hex.js rm -f hex.wasm -test: - ./hex scripts/test.hex +test: hex + ./hex -b scripts/test.hex + ./hex scripts/test.hbx -web: playground - ./hex scripts/web.hex +web: playground hex + ./hex -b scripts/web.hex + ./hex scripts/web.hbx
M scripts/test.hexscripts/test.hex

@@ -120,7 +120,7 @@

("rm test.txt" exec 0x0 ==) ("rm test.txt" run 0x0 get 0x1 ==) ("ls web/contents" run 0x1 get "\n" split len 0x7 ==) - (args 0x1 get "scripts/test.hex" ==) + (args 0x1 get "scripts/test.hbx" ==) ;56 ((args len 0x2 ==) ("two") ("no") if "two" ==)

@@ -209,7 +209,7 @@ ;112

("=" ord 0x3d ==) (0x3d chr "=" ==) - ("é" ord 0xffffffff ==) + (0xffffffff dec "-1" ==) (0x80 chr "" ==) ;116

@@ -219,7 +219,6 @@ (("t" chr) (error) try "[symbol chr] Integer required" ==)

((() chr) (error) try "[symbol chr] Integer required" ==) ;120 - (0xffffffff dec "-1" ==) ) "tests" :
M src/hex.hsrc/hex.h

@@ -279,7 +279,6 @@ int hex_push_symbol(hex_context_t *ctx, hex_token_t *token);

hex_item_t hex_pop(hex_context_t *ctx); // Parser and interpreter -char *hex_process_string(const char *value); hex_token_t *hex_next_token(hex_context_t *ctx, const char **input, hex_file_position_t *position); int32_t hex_parse_integer(const char *hex_str); int hex_parse_quotation(hex_context_t *ctx, const char **input, hex_item_t *result, hex_file_position_t *position);

@@ -296,6 +295,8 @@ void hex_lpad(const char *str, int total_length);

void hex_encode_length(uint8_t **bytecode, size_t *size, size_t length); int hex_is_binary(const uint8_t *data, size_t size); char *hex_bytes_to_string(const uint8_t *bytes, size_t size); +char *hex_process_string(const char *value); +size_t hex_min_bytes_to_encode_integer(int32_t value); // Native symbols int hex_symbol_store(hex_context_t *ctx);
M src/stack.csrc/stack.c

@@ -137,32 +137,24 @@ {

hex_debug_item(ctx, "FREE", item); if (item.type == HEX_TYPE_STRING && item.data.str_value != NULL) { - hex_debug(ctx, "FREE: ** string start"); item.data.str_value = NULL; free(item.data.str_value); - hex_debug(ctx, "FREE: ** string end"); } else if (item.type == HEX_TYPE_QUOTATION && item.data.quotation_value != NULL) { - hex_debug(ctx, "FREE: ** quotation start"); hex_free_list(ctx, item.data.quotation_value, item.quotation_size); item.data.quotation_value = NULL; - hex_debug(ctx, "FREE: ** quotation end"); } else if (item.type == HEX_TYPE_NATIVE_SYMBOL && item.token->value != NULL) { - hex_debug(ctx, "FREE: ** native symbol start"); item.token = NULL; hex_free_token(item.token); - hex_debug(ctx, "FREE: ** native symbol end"); } else if (item.type == HEX_TYPE_USER_SYMBOL && item.token->value != NULL) { - hex_debug(ctx, "FREE: ** user symbol start"); item.token = NULL; hex_free_token(item.token); - hex_debug(ctx, "FREE: ** user symbol end"); } else {
M src/utils.csrc/utils.c

@@ -356,3 +356,27 @@ }

*dst = '\0'; return processed_str; } + +size_t hex_min_bytes_to_encode_integer(int32_t value) +{ + // If value is negative, we need to return 4 bytes because we must preserve the sign bits. + if (value < 0) + { + return 4; + } + + // For positive values, check the minimal number of bytes needed. + for (int bytes = 1; bytes <= 4; bytes++) + { + int32_t mask = (1 << (bytes * 8)) - 1; + int32_t truncated_value = value & mask; + + // If the truncated value is equal to the original, this is the minimal byte size + if (truncated_value == value) + { + return bytes; + } + } + + return 4; // Default to 4 bytes if no smaller size is found. +}
M src/vm.csrc/vm.c

@@ -24,37 +24,21 @@ }

(*bytecode)[*size] = HEX_OP_PUSHIN; *size += 1; // opcode // Encode the length of the integer value - size_t int_length = 0; - if (value >= -0x80 && value < 0x80) - { - int_length = 1; - } - else if (value >= -0x8000 && value < 0x8000) - { - int_length = 2; - } - else if (value >= -0x800000 && value < 0x800000) - { - int_length = 3; - } - else - { - int_length = 4; - } - hex_encode_length(bytecode, size, int_length); + size_t bytes = hex_min_bytes_to_encode_integer(value); + hex_encode_length(bytecode, size, bytes); // Encode the integer value in the minimum number of bytes, in little endian - if (value >= -0x80 && value < 0x80) + if (bytes == 1) { (*bytecode)[*size] = value & 0xFF; *size += 1; } - else if (value >= -0x8000 && value < 0x8000) + else if (bytes == 2) { (*bytecode)[*size] = value & 0xFF; (*bytecode)[*size + 1] = (value >> 8) & 0xFF; *size += 2; } - else if (value >= -0x800000 && value < 0x800000) + else if (bytes == 3) { (*bytecode)[*size] = value & 0xFF; (*bytecode)[*size + 1] = (value >> 8) & 0xFF;

@@ -96,6 +80,16 @@ *bytecode = new_bytecode;

} (*bytecode)[*size] = HEX_OP_PUSHST; *size += 1; // opcode + // Check for multi-byte characters + for (size_t i = 0; i < len; i++) + { + if ((value[i] & 0x80) != 0) + { + hex_error(ctx, "[add bytecode string] Multi-byte characters are not supported - Cannot encode string: \"%s\"", value); + free(str); + return 1; + } + } hex_encode_length(bytecode, size, len); memcpy(&(*bytecode)[*size], value, len); *size += len;

@@ -278,6 +272,7 @@ break;

} else { + (*n_items)--; // Decrement the number of items if it's not a valid token (it will be incremeneted anyway) // Ignore other tokens }

@@ -340,7 +335,7 @@

*bytecode += length; *size -= length; - hex_debug(ctx, ">> PUSHIN: %d", value); + hex_debug(ctx, ">> PUSHIN: 0x%X", value); hex_item_t item = hex_integer_item(ctx, value); *result = item; return 0;

@@ -367,7 +362,7 @@ } while (**bytecode & 0x80);

if (*size < length) { - hex_error(ctx, "[interpret bytecode string] Bytecode size too small to contain the string"); + hex_error(ctx, "[interpret bytecode string] Bytecode size (%d) too small to contain a string of length %d", *size, length); return 1; }

@@ -423,7 +418,7 @@ hex_error(ctx, "(%d,%d) Unable to reference native symbol: %s (bytecode)", token->position.line, token->position.column, token->value);

hex_free_token(token); return 1; } - hex_debug(ctx, ">> NATSYM: %02X (%s)", opcode, symbol); + hex_debug(ctx, ">> NATSYM: 0x%02X (%s)", opcode, symbol); *result = item; return 0; }