Implementing symbol table.
h3rald h3rald@h3rald.com
Thu, 19 Dec 2024 07:35:29 +0100
7 files changed,
213 insertions(+),
15 deletions(-)
A
.vscode/settings.json
@@ -0,0 +1,8 @@
+{ + "files.associations": { + "*.erd": "json", + "*.vuerd": "json", + "*.mjs": "javascript", + "stdio.h": "c" + } +}
M
Makefile
→
Makefile
@@ -7,7 +7,7 @@
hex: src/hex.c $(CC) $(CFLAGS) $(LDFLAGS) $< -o hex -src/hex.c: src/hex.h src/error.c src/help.c src/helpers.c src/interpreter.c src/main.c src/parser.c src/registry.c src/stack.c src/stacktrace.c src/symbols.c src/vm.c +src/hex.c: src/hex.h src/error.c src/help.c src/helpers.c src/interpreter.c src/main.c src/parser.c src/registry.c src/stack.c src/stacktrace.c src/symbols.c src/vm.c src/symboltable.c bash scripts/amalgamate.sh web/assets/hex.wasm: src/hex.c web/assets/hex-playground.js
M
scripts/amalgamate.sh
→
scripts/amalgamate.sh
@@ -9,6 +9,7 @@ "src/error.c"
"src/help.c" "src/stacktrace.c" "src/parser.c" + "src/symboltable.c" "src/vm.c" "src/interpreter.c" "src/helpers.c"
M
src/hex.h
→
src/hex.h
@@ -26,8 +26,8 @@ #define HEX_REGISTRY_SIZE 1024
#define HEX_STACK_SIZE 128 #define HEX_STACK_TRACE_SIZE 16 #define HEX_NATIVE_SYMBOLS 64 - -const uint8_t HEX_BYTECODE_HEADER[6] = {0x01, 0x68, 0x65, 0x78, 0x01, 0x02}; +#define HEX_MAX_SYMBOL_LENGTH 255 +#define HEX_MAX_USER_SYMBOLS 65535 // Type Definitions typedef enum hex_item_type_t@@ -128,6 +128,12 @@ int errors_enabled;
int stack_trace_enabled; } hex_settings_t; +typedef struct hex_symbol_table_t +{ + char **symbols; + uint16_t count; +} hex_symbol_table_t; + typedef struct hex_context_t { hex_stack_t stack;@@ -135,6 +141,7 @@ hex_registry_t registry;
hex_stack_trace_t stack_trace; hex_settings_t settings; hex_doc_dictionary_t docs; + hex_symbol_table_t symbol_table; int hashbang; char error[256]; int argc;@@ -364,6 +371,16 @@ int hex_interpret_bytecode_native_symbol(hex_context_t *ctx, uint8_t opcode, size_t position, hex_item_t *result);
int hex_interpret_bytecode_user_symbol(hex_context_t *ctx, uint8_t **bytecode, size_t *size, size_t position, hex_item_t *result); int hex_interpret_bytecode_quotation(hex_context_t *ctx, uint8_t **bytecode, size_t *size, size_t position, hex_item_t *result); int hex_interpret_bytecode(hex_context_t *ctx, uint8_t *bytecode, size_t size); +void hex_header(hex_context_t *ctx, uint8_t header[8]); +int hex_validate_header(uint8_t header[8]); + +// Symbol table +void hex_symboltable_init(hex_context_t *ctx); +void hex_symboltable_free(hex_context_t *ctx); +int hex_symboltable_set(hex_context_t *ctx, const char *symbol); +int hex_symboltable_get(hex_context_t *ctx, const char *symbol); +void hex_decode_bytecode_symboltable(hex_context_t *ctx, const uint8_t *bytecode, size_t size); +uint8_t *hex_encode_bytecode_symboltable(hex_context_t *ctx, size_t *out_size); // REPL and initialization void hex_register_symbols(hex_context_t *ctx);
M
src/main.c
→
src/main.c
@@ -283,7 +283,14 @@ hex_error(ctx, "Failed to write file: %s", filename);
return 1; } hex_debug(ctx, "Writing bytecode to file: %s", filename); - fwrite(HEX_BYTECODE_HEADER, 1, 6, file); + uint8_t header[8]; + hex_header(ctx, header); + fwrite(header, 1, sizeof(header), file); + uint8_t *symbol_table = NULL; + size_t symbol_table_size = 0; + symbol_table = hex_encode_bytecode_symboltable(ctx, &symbol_table_size); + fwrite(symbol_table, 1, symbol_table_size, file); + free(symbol_table); fwrite(bytecode, 1, size, file); fclose(file); hex_debug(ctx, "Bytecode file written: %s", filename);
A
src/symboltable.c
@@ -0,0 +1,121 @@
+#ifndef HEX_H +#include "hex.h" +#endif + +void hex_symboltable_init(hex_context_t *ctx) +{ + ctx->symbol_table.count = 0; + ctx->symbol_table.symbols = malloc(HEX_MAX_USER_SYMBOLS * sizeof(char *)); +} + +void hex_symboltable_free(hex_context_t *ctx) +{ + for (uint16_t i = 0; i < ctx->symbol_table.count; ++i) + { + free(ctx->symbol_table.symbols[i]); + } + free(ctx->symbol_table.symbols); + ctx->symbol_table.count = 0; +} + +// Add a symbol to the table if it does not already exist +// Returns 0 on success, -1 if the symbol is too long or table is full +int hex_symboltable_set(hex_context_t *ctx, const char *symbol) +{ + hex_symbol_table_t *table = &ctx->symbol_table; + size_t len = strlen(symbol); + + // Check symbol length + if (len > HEX_MAX_SYMBOL_LENGTH) + { + return -1; // Symbol too long + } + + // Check if table is full + if (table->count >= HEX_MAX_USER_SYMBOLS) + { + return -1; // Table full + } + + // Check if symbol already exists + for (uint16_t i = 0; i < table->count; ++i) + { + if (strcmp(table->symbols[i], symbol) == 0) + { + return 0; // Symbol already exists, no-op + } + } + + // Add the symbol + table->symbols[table->count] = strdup(symbol); + table->count++; + return 0; +} + +// Get the index of a symbol in the table, or -1 if not found +int hex_symboltable_get(hex_context_t *ctx, const char *symbol) +{ + hex_symbol_table_t *table = &ctx->symbol_table; + for (uint16_t i = 0; i < table->count; ++i) + { + if (strcmp(table->symbols[i], symbol) == 0) + { + return i; + } + } + return -1; // Symbol not found +} + +// Decode a bytecode's symbol table into the hex_symbol_table_t structure +// Assumes input is well-formed +void hex_decode_bytecode_symboltable(hex_context_t *ctx, const uint8_t *bytecode, size_t size) +{ + hex_symbol_table_t *table = &ctx->symbol_table; + table->count = 0; + size_t offset = 0; + + while (offset < size) + { + if (table->count >= HEX_MAX_USER_SYMBOLS) + { + break; // Prevent overflow + } + + uint8_t str_len = bytecode[offset++]; + char *symbol = malloc(str_len + 1); + memcpy(symbol, bytecode + offset, str_len); + symbol[str_len] = '\0'; + offset += str_len; + + hex_symboltable_set(ctx, symbol); + free(symbol); + } +} + +// Encode the symbol table into a bytecode representation +// Returns bytecode buffer and sets out_size to the bytecode length +uint8_t *hex_encode_bytecode_symboltable(hex_context_t *ctx, size_t *out_size) +{ + hex_symbol_table_t *table = &ctx->symbol_table; + size_t total_size = 0; + + // Calculate total size + for (uint16_t i = 0; i < table->count; ++i) + { + total_size += 1 + strlen(table->symbols[i]); + } + + uint8_t *bytecode = malloc(total_size); + size_t offset = 0; + + for (uint16_t i = 0; i < table->count; ++i) + { + size_t len = strlen(table->symbols[i]); + bytecode[offset++] = (uint8_t)len; + memcpy(bytecode + offset, table->symbols[i], len); + offset += len; + } + + *out_size = total_size; + return bytecode; +}
M
src/vm.c
→
src/vm.c
@@ -530,11 +530,14 @@ hex_debug(ctx, "NATSYM[1]: (total size: %d) %s", *size, value);
} else { - hex_debug(ctx, "LOOKUP[%d]: %s", strlen(value), value); - // Check if we need to resize the buffer (size + strlen + opcode (1) + max encoded length (4)) - if (*size + strlen(value) + 1 + 4 > *capacity) + // Add to symbol table + hex_symboltable_set(ctx, value); + int index = hex_symboltable_get(ctx, value); + hex_debug(ctx, "LOOKUP[%d]: %d (%s)", index, value); + // Check if we need to resize the buffer (size + 1 opcode + 2 max index) + if (*size + 1 + 2 > *capacity) { - *capacity = (*size + strlen(value) + 1 + 4) * 2; + *capacity = (*size + 1 + 2) * 2; uint8_t *new_bytecode = (uint8_t *)realloc(*bytecode, *capacity); if (!new_bytecode) {@@ -545,9 +548,10 @@ *bytecode = new_bytecode;
} (*bytecode)[*size] = HEX_OP_LOOKUP; *size += 1; // opcode - encode_length(bytecode, size, strlen(value)); - memcpy(&(*bytecode)[*size], value, strlen(value)); - *size += strlen(value); + // Add index to bytecode (little endian) + (*bytecode)[*size] = index & 0xFF; + (*bytecode)[*size + 1] = (index >> 8) & 0xFF; + *size += 2; } return 0; }@@ -581,6 +585,7 @@ hex_token_t *token;
size_t capacity = 128; size_t size = 0; uint8_t *bytecode = (uint8_t *)malloc(capacity); + hex_symboltable_init(ctx); if (!bytecode) { hex_error(ctx, "Memory allocation failed");@@ -964,13 +969,29 @@ int hex_interpret_bytecode(hex_context_t *ctx, uint8_t *bytecode, size_t size)
{ size_t bytecode_size = size; size_t position = bytecode_size; - if (size < 6 || memcmp(bytecode, HEX_BYTECODE_HEADER, 6) != 0) + uint8_t header[8]; + memcpy(header, bytecode, 8); + int symbol_table_size = hex_validate_header(header); + if (symbol_table_size < 0) { - hex_error(ctx, "Invalid or missing bytecode header"); + hex_error(ctx, "Invalid bytecode header"); return 1; } - bytecode += 6; - size -= 6; + bytecode += 8; + size -= 8; + // Extract the symbol table + if (symbol_table_size > 0) + { + hex_decode_bytecode_symboltable(ctx, bytecode, symbol_table_size); + bytecode += symbol_table_size; + size -= symbol_table_size; + } + // Debug: Print all symbols in the symbol table + hex_debug(ctx, "Symbol Table:"); + for (size_t i = 0; i < ctx->symbol_table.count; i++) + { + hex_debug(ctx, "Symbol %zu: %s", i, ctx->symbol_table.symbols[i]); + } while (size > 0) { position = bytecode_size - size;@@ -1027,3 +1048,26 @@ }
} return 0; } + +void hex_header(hex_context_t *ctx, uint8_t header[8]) +{ + header[0] = 0x01; + header[1] = 'h'; + header[2] = 'e'; + header[3] = 'x'; + header[4] = 0x01; // version + uint16_t symbol_table_size = (uint16_t)ctx->symbol_table.count; + header[5] = symbol_table_size & 0xFF; + header[6] = (symbol_table_size >> 8) & 0xFF; + header[7] = 0x02; +} + +int hex_validate_header(uint8_t header[8]) +{ + if (header[0] != 0x01 || header[1] != 'h' || header[2] != 'e' || header[3] != 'x' || header[4] != 0x01 || header[7] != 0x02) + { + return -1; + } + uint16_t symbol_table_size = header[5] | (header[6] << 8); + return symbol_table_size; +}