code.H3RALD.com — hex: 565880095ec78d7659fa3b0f14cafbdd55225d39

Reviewed utility functions, changed some sizes.

h3rald h3rald@h3rald.com

Fri, 20 Dec 2024 08:08:40 +0100

commit

565880095ec78d7659fa3b0f14cafbdd55225d39

parent

9c2fced49b54df5e043c631771bd235dd73add64

5 files changed, 117 insertions(+), 149 deletions(-)

jump to

CHANGELOG.md

src/hex.h

src/stack.c

src/utils.c

src/vm.c

M CHANGELOG.md → CHANGELOG.md

@@ -1,37 +1,1 @@ 
-<article>
-    <h2>Changelog</h2>
-    <h3>v0.2.0 &mdash; <em>Under development</em></h3>
-
-<h4>New Features</h4>
-<ul>
-    <li>Implemented a virtual machine with a bytecode compiler and interpreter.</li>
-    <li>{{sym-read}}, {{sym-write}}, {{sym-append}} now support reading and writing from/to binary files as well.</li>
-    <li>{{sym-!}} can now evaluate a quotation of integers as hex bytecode.</li>
-</ul>
-
-<h4>Fixes</h4>
-<ul>
-    <li>Ensured that {{sym-dec}} is able to print negative integers in decimal format.</li>
-</ul>
-
-<h4>Chores</h4>
-<ul>
-    <li>Split the source code to different files, and now relying on an <a
-            href="https://github.com/h3rald/hex/blob/master/scripts/amalgamate.sh">amalgamate.sh</a> script to
-        concatenate them together before compiling</li>
-</ul>
-<h3>v0.1.0 &mdash; 2024-12-14</h3>
-
-<p>Initial release, featuring:</p>
-<ul>
-    <li>A multi-platform executable for the <em>hex</em> interpreter.</li>
-    <li>Integrated REPL.</li>
-    <li>Integrated help and manual.</li>
-    <li>Debug mode.</li>
-    <li>0x40 (64) native symbols.</li>
-    <li>Support for 32bit hexadecimal integers, strings, and quotations (lists).</li>
-    <li>A complete <a href="https://hex.2c.fyi">web site</a> with more documentation and even an interactive playground.
-    </li>
-</ul>
-
-</article>
+<article>
    <h2>Changelog</h2>
    <h3>v0.2.0 &mdash; <em>Under development</em></h3>

<h4>New Features</h4>
<ul>
    <li>Implemented a virtual machine with a bytecode compiler and interpreter.</li>
    <li>{{sym-read}}, {{sym-write}}, {{sym-append}} now support reading and writing from/to binary files as well.</li>
    <li>{{sym-!}} can now evaluate a quotation of integers as hex bytecode.</li>
    <li>Increased maximum stack size to 256 items.</li>
</ul>

<h4>Fixes</h4>
<ul>
    <li>Ensured that {{sym-dec}} is able to print negative integers in decimal format.</li>
    <li>Ensured that symbol identifiers cannot be longer than 256 characters.</li>
</ul>

<h4>Chores</h4>
<ul>
    <li>Split the source code to different files, and now relying on an <a
            href="https://github.com/h3rald/hex/blob/master/scripts/amalgamate.sh">amalgamate.sh</a> script to
        concatenate them together before compiling</li>
</ul>
<h3>v0.1.0 &mdash; 2024-12-14</h3>

<p>Initial release, featuring:</p>
<ul>
    <li>A multi-platform executable for the <em>hex</em> interpreter.</li>
    <li>Integrated REPL.</li>
    <li>Integrated help and manual.</li>
    <li>Debug mode.</li>
    <li>0x40 (64) native symbols.</li>
    <li>Support for 32bit hexadecimal integers, strings, and quotations (lists).</li>
    <li>A complete <a href="https://hex.2c.fyi">web site</a> with more documentation and even an interactive playground.
    </li>
</ul>

</article>

M src/hex.h → src/hex.h

@@ -279,7 +279,7 @@ int hex_push_symbol(hex_context_t *ctx, hex_token_t *token);
 hex_item_t hex_pop(hex_context_t *ctx);
 
 // Parser and interpreter
-char *hex_process_string(hex_context_t *ctx, const char *value);
+char *hex_process_string(const char *value);
 hex_token_t *hex_next_token(hex_context_t *ctx, const char **input, hex_file_position_t *position);
 int32_t hex_parse_integer(const char *hex_str);
 int hex_parse_quotation(hex_context_t *ctx, const char **input, hex_item_t *result, hex_file_position_t *position);

M src/stack.c → src/stack.c

@@ -61,68 +61,15 @@ }
     return result;
 }
 
-char *hex_process_string(hex_context_t *ctx, const char *value)
+hex_item_t hex_string_item(hex_context_t *ctx, const char *value)
 {
-    int len = strlen(value);
-    char *processed_str = (char *)malloc(len + 1);
-    if (!processed_str)
+    char *str = hex_process_string(value);
+    if (str == NULL)
     {
-        hex_error(ctx, "Memory allocation failed");
-        return NULL;
+        hex_error(ctx, "Failed to allocate memory for string");
+        return (hex_item_t){.type = HEX_TYPE_INVALID};
     }
-
-    char *dst = processed_str;
-    const char *src = value;
-    while (*src)
-    {
-        if (*src == '\\' && *(src + 1))
-        {
-            src++;
-            switch (*src)
-            {
-            case 'n':
-                *dst++ = '\n';
-                break;
-            case 't':
-                *dst++ = '\t';
-                break;
-            case 'r':
-                *dst++ = '\r';
-                break;
-            case 'b':
-                *dst++ = '\b';
-                break;
-            case 'f':
-                *dst++ = '\f';
-                break;
-            case 'v':
-                *dst++ = '\v';
-                break;
-            // case '\\':
-            //     *dst++ = '\\';
-            //     break;
-            case '\"':
-                *dst++ = '\"';
-                break;
-            default:
-                *dst++ = '\\';
-                *dst++ = *src;
-                break;
-            }
-        }
-        else
-        {
-            *dst++ = *src;
-        }
-        src++;
-    }
-    *dst = '\0';
-    return processed_str;
-}
-
-hex_item_t hex_string_item(hex_context_t *ctx, const char *value)
-{
-    hex_item_t item = {.type = HEX_TYPE_STRING, .data.str_value = hex_process_string(ctx, value)};
+    hex_item_t item = {.type = HEX_TYPE_STRING, .data.str_value = str};
     return item;
 }

M src/utils.c → src/utils.c

@@ -138,6 +138,39 @@ break;
     }
 }
 
+void hex_encode_length(uint8_t **bytecode, size_t *size, size_t length)
+{
+    while (length >= 0x80)
+    {
+        (*bytecode)[*size] = (length & 0x7F) | 0x80;
+        length >>= 7;
+        (*size)++;
+    }
+    (*bytecode)[*size] = length & 0x7F;
+    (*size)++;
+}
+
+int hex_is_binary(const uint8_t *data, size_t size)
+{
+    const double binary_threshold = 0.1; // 10% of bytes being non-printable
+    size_t non_printable_count = 0;
+    for (size_t i = 0; i < size; i++)
+    {
+        uint8_t byte = data[i];
+        // Check if the byte is a printable ASCII character or a common control character.
+        if (!((byte >= 32 && byte <= 126) || byte == 9 || byte == 10 || byte == 13))
+        {
+            non_printable_count++;
+        }
+        // Early exit if the threshold is exceeded.
+        if ((double)non_printable_count / size > binary_threshold)
+        {
+            return 1;
+        }
+    }
+    return 0;
+}
+
 void hex_print_item(FILE *stream, hex_item_t item)
 {
     switch (item.type)
@@ -177,15 +210,7 @@ case '\"':
                 fprintf(stream, "\\\"");
                 break;
             default:
-                if ((unsigned char)*c < 32 || (unsigned char)*c > 126)
-                {
-                    // Escape non-printable characters as hex (e.g., \x1F)
-                    fprintf(stream, "\\x%02x", (unsigned char)*c);
-                }
-                else
-                {
-                    fputc(*c, stream);
-                }
+                fputc(*c, stream);
                 break;
             }
         }
@@ -219,39 +244,6 @@ break;
     }
 }
 
-void hex_encode_length(uint8_t **bytecode, size_t *size, size_t length)
-{
-    while (length >= 0x80)
-    {
-        (*bytecode)[*size] = (length & 0x7F) | 0x80;
-        length >>= 7;
-        (*size)++;
-    }
-    (*bytecode)[*size] = length & 0x7F;
-    (*size)++;
-}
-
-int hex_is_binary(const uint8_t *data, size_t size)
-{
-    const double binary_threshold = 0.1; // 10% of bytes being non-printable
-    size_t non_printable_count = 0;
-    for (size_t i = 0; i < size; i++)
-    {
-        uint8_t byte = data[i];
-        // Check if the byte is a printable ASCII character or a common control character.
-        if (!((byte >= 32 && byte <= 126) || byte == 9 || byte == 10 || byte == 13))
-        {
-            non_printable_count++;
-        }
-        // Early exit if the threshold is exceeded.
-        if ((double)non_printable_count / size > binary_threshold)
-        {
-            return 1;
-        }
-    }
-    return 0;
-}
-
 char *hex_bytes_to_string(const uint8_t *bytes, size_t size)
 {
     char *str = (char *)malloc(size * 6 + 1); // Allocate enough space for worst case (\uXXXX format)
@@ -280,7 +272,7 @@ {
                 i++; // Skip the '\n' part of the '\r\n' sequence
             }
             *ptr++ = '\\';
-            *ptr++ = 'n';
+            *ptr++ = 'r';
             break;
         case '\b':
             *ptr++ = '\\';
@@ -294,20 +286,73 @@ case '\v':
             *ptr++ = '\\';
             *ptr++ = 'v';
             break;
-        //case '\\':
-        //    *ptr++ = '\\'; // Correctly handle backslash
-        //    *ptr++ = '\\';
-        //    break;
+        case '\\':
+            *ptr++ = '\\';
+            break;
         case '\"':
             *ptr++ = '\\';
             *ptr++ = '\"';
             break;
         default:
-            *ptr++ = byte; // Copy printable ASCII characters as is
+            *ptr++ = byte;
             break;
         }
     }
-    *ptr = '\0'; // Null-terminate the string
-
+    *ptr = '\0';
     return str;
 }
+
+char *hex_process_string(const char *value)
+{
+    int len = strlen(value);
+    char *processed_str = (char *)malloc(len + 1);
+    if (!processed_str)
+    {
+        return NULL;
+    }
+
+    char *dst = processed_str;
+    const char *src = value;
+    while (*src)
+    {
+        if (*src == '\\' && *(src + 1))
+        {
+            src++;
+            switch (*src)
+            {
+            case 'n':
+                *dst++ = '\n';
+                break;
+            case 't':
+                *dst++ = '\t';
+                break;
+            case 'r':
+                *dst++ = '\r';
+                break;
+            case 'b':
+                *dst++ = '\b';
+                break;
+            case 'f':
+                *dst++ = '\f';
+                break;
+            case 'v':
+                *dst++ = '\v';
+                break;
+            case '\"':
+                *dst++ = '\"';
+                break;
+            default:
+                *dst++ = '\\';
+                *dst++ = *src;
+                break;
+            }
+        }
+        else
+        {
+            *dst++ = *src;
+        }
+        src++;
+    }
+    *dst = '\0';
+    return processed_str;
+}

M src/vm.c → src/vm.c

@@ -74,7 +74,13 @@ }
 
 int hex_bytecode_string(hex_context_t *ctx, uint8_t **bytecode, size_t *size, size_t *capacity, const char *value)
 {
-    hex_debug(ctx, "PUSHST: \"%s\"", hex_process_string(ctx, value));
+    char *str = hex_process_string(value);
+    if (!str)
+    {
+        hex_error(ctx, "Memory allocation failed");
+        return 1;
+    }
+    hex_debug(ctx, "PUSHST: \"%s\"", str);
     size_t len = strlen(value);
     // Check if we need to resize the buffer (size + strlen + opcode (1) + max encoded length (4))
     if (*size + len + 1 + 4 > *capacity)
@@ -378,7 +384,13 @@ *size -= length;
 
     hex_item_t item = hex_string_item(ctx, value);
     *result = item;
-    hex_debug(ctx, ">> PUSHST: \"%s\"", hex_process_string(ctx, value));
+    char *str = hex_process_string(value);
+    if (!str)
+    {
+        hex_error(ctx, "Memory allocation failed");
+        return 1;
+    }
+    hex_debug(ctx, ">> PUSHST: \"%s\"", str);
     return 0;
 }

all repos — hex @ 565880095ec78d7659fa3b0f14cafbdd55225d39

A tiny, minimalist, slightly-esoteric concatenative programming lannguage.