Reviewed utility functions, changed some sizes.
h3rald h3rald@h3rald.com
Fri, 20 Dec 2024 08:08:40 +0100
5 files changed,
117 insertions(+),
149 deletions(-)
M
CHANGELOG.md
→
CHANGELOG.md
@@ -1,37 +1,1 @@
-<article> - <h2>Changelog</h2> - <h3>v0.2.0 — <em>Under development</em></h3> - -<h4>New Features</h4> -<ul> - <li>Implemented a virtual machine with a bytecode compiler and interpreter.</li> - <li>{{sym-read}}, {{sym-write}}, {{sym-append}} now support reading and writing from/to binary files as well.</li> - <li>{{sym-!}} can now evaluate a quotation of integers as hex bytecode.</li> -</ul> - -<h4>Fixes</h4> -<ul> - <li>Ensured that {{sym-dec}} is able to print negative integers in decimal format.</li> -</ul> - -<h4>Chores</h4> -<ul> - <li>Split the source code to different files, and now relying on an <a - href="https://github.com/h3rald/hex/blob/master/scripts/amalgamate.sh">amalgamate.sh</a> script to - concatenate them together before compiling</li> -</ul> -<h3>v0.1.0 — 2024-12-14</h3> - -<p>Initial release, featuring:</p> -<ul> - <li>A multi-platform executable for the <em>hex</em> interpreter.</li> - <li>Integrated REPL.</li> - <li>Integrated help and manual.</li> - <li>Debug mode.</li> - <li>0x40 (64) native symbols.</li> - <li>Support for 32bit hexadecimal integers, strings, and quotations (lists).</li> - <li>A complete <a href="https://hex.2c.fyi">web site</a> with more documentation and even an interactive playground. - </li> -</ul> - -</article> +<article> <h2>Changelog</h2> <h3>v0.2.0 — <em>Under development</em></h3> <h4>New Features</h4> <ul> <li>Implemented a virtual machine with a bytecode compiler and interpreter.</li> <li>{{sym-read}}, {{sym-write}}, {{sym-append}} now support reading and writing from/to binary files as well.</li> <li>{{sym-!}} can now evaluate a quotation of integers as hex bytecode.</li> <li>Increased maximum stack size to 256 items.</li> </ul> <h4>Fixes</h4> <ul> <li>Ensured that {{sym-dec}} is able to print negative integers in decimal format.</li> <li>Ensured that symbol identifiers cannot be longer than 256 characters.</li> </ul> <h4>Chores</h4> <ul> <li>Split the source code to different files, and now relying on an <a href="https://github.com/h3rald/hex/blob/master/scripts/amalgamate.sh">amalgamate.sh</a> script to concatenate them together before compiling</li> </ul> <h3>v0.1.0 — 2024-12-14</h3> <p>Initial release, featuring:</p> <ul> <li>A multi-platform executable for the <em>hex</em> interpreter.</li> <li>Integrated REPL.</li> <li>Integrated help and manual.</li> <li>Debug mode.</li> <li>0x40 (64) native symbols.</li> <li>Support for 32bit hexadecimal integers, strings, and quotations (lists).</li> <li>A complete <a href="https://hex.2c.fyi">web site</a> with more documentation and even an interactive playground. </li> </ul> </article>
M
src/hex.h
→
src/hex.h
@@ -279,7 +279,7 @@ int hex_push_symbol(hex_context_t *ctx, hex_token_t *token);
hex_item_t hex_pop(hex_context_t *ctx); // Parser and interpreter -char *hex_process_string(hex_context_t *ctx, const char *value); +char *hex_process_string(const char *value); hex_token_t *hex_next_token(hex_context_t *ctx, const char **input, hex_file_position_t *position); int32_t hex_parse_integer(const char *hex_str); int hex_parse_quotation(hex_context_t *ctx, const char **input, hex_item_t *result, hex_file_position_t *position);
M
src/stack.c
→
src/stack.c
@@ -61,68 +61,15 @@ }
return result; } -char *hex_process_string(hex_context_t *ctx, const char *value) +hex_item_t hex_string_item(hex_context_t *ctx, const char *value) { - int len = strlen(value); - char *processed_str = (char *)malloc(len + 1); - if (!processed_str) + char *str = hex_process_string(value); + if (str == NULL) { - hex_error(ctx, "Memory allocation failed"); - return NULL; + hex_error(ctx, "Failed to allocate memory for string"); + return (hex_item_t){.type = HEX_TYPE_INVALID}; } - - char *dst = processed_str; - const char *src = value; - while (*src) - { - if (*src == '\\' && *(src + 1)) - { - src++; - switch (*src) - { - case 'n': - *dst++ = '\n'; - break; - case 't': - *dst++ = '\t'; - break; - case 'r': - *dst++ = '\r'; - break; - case 'b': - *dst++ = '\b'; - break; - case 'f': - *dst++ = '\f'; - break; - case 'v': - *dst++ = '\v'; - break; - // case '\\': - // *dst++ = '\\'; - // break; - case '\"': - *dst++ = '\"'; - break; - default: - *dst++ = '\\'; - *dst++ = *src; - break; - } - } - else - { - *dst++ = *src; - } - src++; - } - *dst = '\0'; - return processed_str; -} - -hex_item_t hex_string_item(hex_context_t *ctx, const char *value) -{ - hex_item_t item = {.type = HEX_TYPE_STRING, .data.str_value = hex_process_string(ctx, value)}; + hex_item_t item = {.type = HEX_TYPE_STRING, .data.str_value = str}; return item; }
M
src/utils.c
→
src/utils.c
@@ -138,6 +138,39 @@ break;
} } +void hex_encode_length(uint8_t **bytecode, size_t *size, size_t length) +{ + while (length >= 0x80) + { + (*bytecode)[*size] = (length & 0x7F) | 0x80; + length >>= 7; + (*size)++; + } + (*bytecode)[*size] = length & 0x7F; + (*size)++; +} + +int hex_is_binary(const uint8_t *data, size_t size) +{ + const double binary_threshold = 0.1; // 10% of bytes being non-printable + size_t non_printable_count = 0; + for (size_t i = 0; i < size; i++) + { + uint8_t byte = data[i]; + // Check if the byte is a printable ASCII character or a common control character. + if (!((byte >= 32 && byte <= 126) || byte == 9 || byte == 10 || byte == 13)) + { + non_printable_count++; + } + // Early exit if the threshold is exceeded. + if ((double)non_printable_count / size > binary_threshold) + { + return 1; + } + } + return 0; +} + void hex_print_item(FILE *stream, hex_item_t item) { switch (item.type)@@ -177,15 +210,7 @@ case '\"':
fprintf(stream, "\\\""); break; default: - if ((unsigned char)*c < 32 || (unsigned char)*c > 126) - { - // Escape non-printable characters as hex (e.g., \x1F) - fprintf(stream, "\\x%02x", (unsigned char)*c); - } - else - { - fputc(*c, stream); - } + fputc(*c, stream); break; } }@@ -219,39 +244,6 @@ break;
} } -void hex_encode_length(uint8_t **bytecode, size_t *size, size_t length) -{ - while (length >= 0x80) - { - (*bytecode)[*size] = (length & 0x7F) | 0x80; - length >>= 7; - (*size)++; - } - (*bytecode)[*size] = length & 0x7F; - (*size)++; -} - -int hex_is_binary(const uint8_t *data, size_t size) -{ - const double binary_threshold = 0.1; // 10% of bytes being non-printable - size_t non_printable_count = 0; - for (size_t i = 0; i < size; i++) - { - uint8_t byte = data[i]; - // Check if the byte is a printable ASCII character or a common control character. - if (!((byte >= 32 && byte <= 126) || byte == 9 || byte == 10 || byte == 13)) - { - non_printable_count++; - } - // Early exit if the threshold is exceeded. - if ((double)non_printable_count / size > binary_threshold) - { - return 1; - } - } - return 0; -} - char *hex_bytes_to_string(const uint8_t *bytes, size_t size) { char *str = (char *)malloc(size * 6 + 1); // Allocate enough space for worst case (\uXXXX format)@@ -280,7 +272,7 @@ {
i++; // Skip the '\n' part of the '\r\n' sequence } *ptr++ = '\\'; - *ptr++ = 'n'; + *ptr++ = 'r'; break; case '\b': *ptr++ = '\\';@@ -294,20 +286,73 @@ case '\v':
*ptr++ = '\\'; *ptr++ = 'v'; break; - //case '\\': - // *ptr++ = '\\'; // Correctly handle backslash - // *ptr++ = '\\'; - // break; + case '\\': + *ptr++ = '\\'; + break; case '\"': *ptr++ = '\\'; *ptr++ = '\"'; break; default: - *ptr++ = byte; // Copy printable ASCII characters as is + *ptr++ = byte; break; } } - *ptr = '\0'; // Null-terminate the string - + *ptr = '\0'; return str; } + +char *hex_process_string(const char *value) +{ + int len = strlen(value); + char *processed_str = (char *)malloc(len + 1); + if (!processed_str) + { + return NULL; + } + + char *dst = processed_str; + const char *src = value; + while (*src) + { + if (*src == '\\' && *(src + 1)) + { + src++; + switch (*src) + { + case 'n': + *dst++ = '\n'; + break; + case 't': + *dst++ = '\t'; + break; + case 'r': + *dst++ = '\r'; + break; + case 'b': + *dst++ = '\b'; + break; + case 'f': + *dst++ = '\f'; + break; + case 'v': + *dst++ = '\v'; + break; + case '\"': + *dst++ = '\"'; + break; + default: + *dst++ = '\\'; + *dst++ = *src; + break; + } + } + else + { + *dst++ = *src; + } + src++; + } + *dst = '\0'; + return processed_str; +}
M
src/vm.c
→
src/vm.c
@@ -74,7 +74,13 @@ }
int hex_bytecode_string(hex_context_t *ctx, uint8_t **bytecode, size_t *size, size_t *capacity, const char *value) { - hex_debug(ctx, "PUSHST: \"%s\"", hex_process_string(ctx, value)); + char *str = hex_process_string(value); + if (!str) + { + hex_error(ctx, "Memory allocation failed"); + return 1; + } + hex_debug(ctx, "PUSHST: \"%s\"", str); size_t len = strlen(value); // Check if we need to resize the buffer (size + strlen + opcode (1) + max encoded length (4)) if (*size + len + 1 + 4 > *capacity)@@ -378,7 +384,13 @@ *size -= length;
hex_item_t item = hex_string_item(ctx, value); *result = item; - hex_debug(ctx, ">> PUSHST: \"%s\"", hex_process_string(ctx, value)); + char *str = hex_process_string(value); + if (!str) + { + hex_error(ctx, "Memory allocation failed"); + return 1; + } + hex_debug(ctx, ">> PUSHST: \"%s\"", str); return 0; }