Started implementing vm, refactoring.
@@ -16,7 +16,7 @@ }
int hex_get_doc(hex_doc_dictionary_t *docs, const char *key, hex_doc_entry_t *result) { - for (int i = 0; i < docs->size; i++) + for (size_t i = 0; i < docs->size; i++) { if (strcmp(docs->entries[i].name, key) == 0) {
@@ -118,7 +118,7 @@ fprintf(stream, "%s", item.token->value);
break; case HEX_TYPE_QUOTATION: fprintf(stream, "("); - for (int i = 0; i < item.quotation_size; i++) + for (size_t i = 0; i < item.quotation_size; i++) { if (i > 0) {@@ -199,7 +199,7 @@ break;
case HEX_TYPE_QUOTATION: fprintf(stream, "("); - for (int i = 0; i < item.quotation_size; i++) + for (size_t i = 0; i < item.quotation_size; i++) { if (i > 0) {@@ -218,8 +218,3 @@ fprintf(stream, "<unknown>");
break; } } - -int hex_is_symbol(hex_token_t *token, char *value) -{ - return strcmp(token->value, value) == 0; -}
@@ -27,6 +27,8 @@ #define HEX_STACK_SIZE 128
#define HEX_STACK_TRACE_SIZE 16 #define HEX_NATIVE_SYMBOLS 64 +const uint8_t HEX_BYTECODE_HEADER[6] = {0x01, 0x48, 0x45, 0x78, 0x01, 0x02}; + // Type Definitions typedef enum hex_item_type_t {@@ -75,8 +77,8 @@ char *str_value;
int (*fn_value)(hex_context_t *); struct hex_item_t **quotation_value; } data; - hex_token_t *token; // Token containing stack information (valid for HEX_TYPE_NATIVE_SYMBOL and HEX_TYPE_USER_SYMBOL) - int quotation_size; // Size of the quotation (valid for HEX_TYPE_QUOTATION) + hex_token_t *token; // Token containing stack information (valid for HEX_TYPE_NATIVE_SYMBOL and HEX_TYPE_USER_SYMBOL) + size_t quotation_size; // Size of the quotation (valid for HEX_TYPE_QUOTATION) } hex_item_t; typedef struct hex_registry_entry@@ -88,8 +90,8 @@
typedef struct hex_stack_trace_t { hex_token_t entries[HEX_STACK_TRACE_SIZE]; - int start; // Index of the oldest item - int size; // Current number of items in the buffer + int start; // Index of the oldest item + size_t size; // Current number of items in the buffer } hex_stack_trace_t; typedef struct hex_stack_t@@ -101,7 +103,7 @@
typedef struct hex_registry_t { hex_registry_entry_t entries[HEX_REGISTRY_SIZE]; - int size; + size_t size; } hex_registry_t; typedef struct hex_doc_entry_t@@ -115,7 +117,7 @@
typedef struct hex_doc_dictionary_t { hex_doc_entry_t entries[64]; - int size; + size_t size; } hex_doc_dictionary_t; typedef struct hex_settings_t@@ -138,6 +140,96 @@ int argc;
char **argv; } hex_context_t; +// Opcodes +typedef enum hex_opcode_t +{ + // Core Operations: <op> [prefix] <len> <data> + HEX_OP_LOOKUP = 0x00, + HEX_OP_PUSHIN = 0x01, + HEX_OP_PUSHST = 0x02, + HEX_OP_PUSHQT = 0x03, + + // Native Symbols + HEX_OP_STORE = 0x10, + HEX_OP_FREE = 0x11, + + HEX_OP_IF = 0x12, + HEX_OP_WHEN = 0x13, + HEX_OP_WHILE = 0x14, + HEX_OP_ERROR = 0x15, + HEX_OP_TRY = 0x16, + + HEX_OP_DUP = 0x17, + HEX_OP_STACK = 0x18, + HEX_OP_CLEAR = 0x19, + HEX_OP_POP = 0x1A, + HEX_OP_SWAP = 0x1B, + + HEX_OP_I = 0x1C, + HEX_OP_EVAL = 0x1D, + HEX_OP_QUOTE = 0x1E, + + HEX_OP_ADD = 0x1F, + HEX_OP_SUB = 0x20, + HEX_OP_MUL = 0x21, + HEX_OP_DIV = 0x22, + HEX_OP_MOD = 0x23, + + HEX_OP_BITAND = 0x24, + HEX_OP_BITOR = 0x25, + HEX_OP_BITXOR = 0x26, + HEX_OP_BITNOT = 0x27, + HEX_OP_SHL = 0x28, + HEX_OP_SHR = 0x29, + + HEX_OP_EQUAL = 0x2A, + HEX_OP_NOTEQUAL = 0x2B, + HEX_OP_GREATER = 0x2C, + HEX_OP_LESS = 0x2D, + HEX_OP_GREATEREQUAL = 0x2E, + HEX_OP_LESSEQUAL = 0x2F, + + HEX_OP_AND = 0x30, + HEX_OP_OR = 0x31, + HEX_OP_NOT = 0x32, + HEX_OP_XOR = 0x33, + + HEX_OP_INT = 0x34, + HEX_OP_STR = 0x35, + HEX_OP_DEC = 0x36, + HEX_OP_HEX = 0x37, + HEX_OP_ORD = 0x38, + HEX_OP_CHR = 0x39, + HEX_OP_TYPE = 0x3A, + + HEX_OP_CAT = 0x3B, + HEX_OP_LEN = 0x3C, + HEX_OP_GET = 0x3D, + HEX_OP_INDEX = 0x3E, + HEX_OP_JOIN = 0x3F, + + HEX_OP_SPLIT = 0x40, + HEX_OP_REPLACE = 0x41, + + HEX_OP_EACH = 0x42, + HEX_OP_MAP = 0x43, + HEX_OP_FILTER = 0x44, + + HEX_OP_PUTS = 0x45, + HEX_OP_WARN = 0x46, + HEX_OP_PRINT = 0x47, + HEX_OP_GETS = 0x48, + + HEX_OP_READ = 0x49, + HEX_OP_WRITE = 0x4A, + HEX_OP_APPEND = 0x4B, + HEX_OP_ARGS = 0x4C, + HEX_OP_EXIT = 0x4D, + HEX_OP_EXEC = 0x4E, + HEX_OP_RUN = 0x4F, + +} hex_opcode_t; + // Help System void hex_doc(hex_doc_dictionary_t *docs, const char *name, const char *description, const char *input, const char *output); int hex_get_doc(hex_doc_dictionary_t *docs, const char *key, hex_doc_entry_t *result);@@ -148,7 +240,7 @@
// Free data void hex_free_item(hex_context_t *ctx, hex_item_t item); void hex_free_token(hex_token_t *token); -void hex_free_list(hex_context_t *ctx, hex_item_t **quotation, int size); +void hex_free_list(hex_context_t *ctx, hex_item_t **quotation, size_t size); // Symbol management int hex_valid_user_symbol(hex_context_t *ctx, const char *symbol);@@ -168,13 +260,13 @@
// Item constructors hex_item_t hex_string_item(hex_context_t *ctx, const char *value); hex_item_t hex_integer_item(hex_context_t *ctx, int value); -hex_item_t hex_quotation_item(hex_context_t *ctx, hex_item_t **quotation, int size); +hex_item_t hex_quotation_item(hex_context_t *ctx, hex_item_t **quotation, size_t size); // Stack management int hex_push(hex_context_t *ctx, hex_item_t item); int hex_push_integer(hex_context_t *ctx, int value); int hex_push_string(hex_context_t *ctx, const char *value); -int hex_push_quotation(hex_context_t *ctx, hex_item_t **quotation, int size); +int hex_push_quotation(hex_context_t *ctx, hex_item_t **quotation, size_t size); int hex_push_symbol(hex_context_t *ctx, hex_token_t *token); hex_item_t hex_pop(hex_context_t *ctx);@@ -190,7 +282,6 @@ char *hex_itoa(int num, int base);
char *hex_itoa_dec(int num); char *hex_itoa_hex(int num); void hex_raw_print_item(FILE *stream, hex_item_t item); -int hex_is_symbol(hex_token_t *token, char *value); char *hex_type(hex_item_type_t type); // Native symbols@@ -258,6 +349,10 @@ int hex_symbol_dup(hex_context_t *ctx);
int hex_symbol_stack(hex_context_t *ctx); int hex_symbol_clear(hex_context_t *ctx); int hex_symbol_pop(hex_context_t *ctx); + +// VM +int hex_bytecode(hex_context_t *ctx, const char *input, uint8_t **output, size_t *output_size, hex_file_position_t *position); +int hex_quotation_bytecode(hex_context_t *ctx, hex_item_t *quotation, const char *input, hex_file_position_t *position, uint8_t **bytecode, size_t *size, size_t *capacity); // REPL and initialization void hex_register_symbols(hex_context_t *ctx);
@@ -149,12 +149,8 @@
// Tokenize and process the input hex_interpret(ctx, line, "<repl>", 1, 1); // Print the top item of the stack - if (ctx->stack.top >= 0) - { - hex_print_item(stdout, ctx->stack.entries[ctx->stack.top]); - // hex_print_item(stdout, HEX_STACK[HEX_TOP]); - printf("\n"); - } + hex_print_item(stdout, ctx->stack.entries[ctx->stack.top]); + printf("\n"); return 0; }@@ -262,7 +258,7 @@ "SYMBOLS\n"
" +---------+----------------------------+-------------------------------------------------+\n" " | Symbol | Input -> Output | Description |\n" " +---------+----------------------------+-------------------------------------------------+\n"); - for (int i = 0; i < docs->size; i++) + for (size_t i = 0; i < docs->size; i++) { printf(" | "); hex_rpad(docs->entries[i].name, 7);@@ -336,17 +332,26 @@ {
char *fileContent = hex_read_file(&ctx, file); if (generate_bytecode) { - uint8_t **bytecode; - size_t *bytecode_size; - if (hex_bytecode(&ctx, fileContent, &bytecode, &bytecode_size, file, 1, 1) != 0) + uint8_t *bytecode; + size_t bytecode_size = 0; + hex_file_position_t position; + position.column = 1; + position.line = 1 + ctx.hashbang; + position.filename = file; + if (hex_bytecode(&ctx, fileContent, &bytecode, &bytecode_size, &position) != 0) { hex_error(&ctx, "Failed to generate bytecode"); return 1; } - for (int i = 0; i < *bytecode_size; i++) + for (size_t i = 0; i < 6; i++) + { + printf("%02x ", HEX_BYTECODE_HEADER[i]); + } + for (size_t i = 0; i < bytecode_size; i++) { - printf("%02x ", (*bytecode)[i]); + printf("%02x ", bytecode[i]); } + printf("\n"); } else {
@@ -247,8 +247,8 @@
int hex_parse_quotation(hex_context_t *ctx, const char **input, hex_item_t *result, hex_file_position_t *position) { hex_item_t **quotation = NULL; - int capacity = 2; - int size = 0; + size_t capacity = 2; + size_t size = 0; int balanced = 1; quotation = (hex_item_t **)malloc(capacity * sizeof(hex_item_t *));
@@ -39,7 +39,7 @@ if (!native && hex_valid_user_symbol(ctx, key) == 0)
{ return 1; } - for (int i = 0; i < ctx->registry.size; i++) + for (size_t i = 0; i < ctx->registry.size; i++) { if (strcmp(ctx->registry.entries[i].key, key) == 0) {@@ -85,7 +85,7 @@ // Get a symbol value from the registry
int hex_get_symbol(hex_context_t *ctx, const char *key, hex_item_t *result) { - for (int i = 0; i < ctx->registry.size; i++) + for (size_t i = 0; i < ctx->registry.size; i++) { if (strcmp(ctx->registry.entries[i].key, key) == 0) {
@@ -133,7 +133,7 @@ hex_item_t item = {.type = HEX_TYPE_INTEGER, .data.int_value = value};
return item; } -hex_item_t hex_quotation_item(hex_context_t *ctx, hex_item_t **quotation, int size) +hex_item_t hex_quotation_item(hex_context_t *ctx, hex_item_t **quotation, size_t size) { (void)(ctx); hex_item_t item = {.type = HEX_TYPE_QUOTATION, .data.quotation_value = quotation, .quotation_size = size};@@ -150,7 +150,7 @@ {
return HEX_PUSH(ctx, hex_integer_item(ctx, value)); } -int hex_push_quotation(hex_context_t *ctx, hex_item_t **quotation, int size) +int hex_push_quotation(hex_context_t *ctx, hex_item_t **quotation, size_t size) { return HEX_PUSH(ctx, hex_quotation_item(ctx, quotation, size)); }@@ -222,9 +222,9 @@ hex_debug(ctx, "FREE: ** nothing to free");
} } -void hex_free_list(hex_context_t *ctx, hex_item_t **quotation, int size) +void hex_free_list(hex_context_t *ctx, hex_item_t **quotation, size_t size) { - for (int i = 0; i < size; i++) + for (size_t i = 0; i < size; i++) { HEX_FREE(ctx, *quotation[i]); }
@@ -34,7 +34,7 @@ return;
} fprintf(stderr, "[stack trace] (most recent symbol first):\n"); - for (int i = 0; i < ctx->stack_trace.size; i++) + for (size_t i = 0; i < ctx->stack_trace.size; i++) { int index = (ctx->stack_trace.start + ctx->stack_trace.size - 1 - i) % HEX_STACK_TRACE_SIZE; hex_token_t token = ctx->stack_trace.entries[index];
@@ -62,13 +62,13 @@ hex_error(ctx, "Cannot free native symbol '%s'", item.data.str_value);
HEX_FREE(ctx, item); return 1; } - for (int i = 0; i < ctx->registry.size; i++) + for (size_t i = 0; i < ctx->registry.size; i++) { if (strcmp(ctx->registry.entries[i].key, item.data.str_value) == 0) { free(ctx->registry.entries[i].key); HEX_FREE(ctx, ctx->registry.entries[i].value); - for (int j = i; j < ctx->registry.size - 1; j++) + for (size_t j = i; j < ctx->registry.size - 1; j++) { ctx->registry.entries[j] = ctx->registry.entries[j + 1]; }@@ -110,7 +110,7 @@ hex_error(ctx, "Symbol '.' requires a quotation");
HEX_FREE(ctx, item); return 1; } - for (int i = 0; i < item.quotation_size; i++) + for (size_t i = 0; i < item.quotation_size; i++) { if (hex_push(ctx, *item.data.quotation_value[i]) != 0) {@@ -660,7 +660,7 @@ return 0;
} else { - for (int i = 0; i < a.quotation_size; i++) + for (size_t i = 0; i < a.quotation_size; i++) { if (!hex_equal(*a.data.quotation_value[i], *b.data.quotation_value[i])) {@@ -1124,7 +1124,7 @@ {
hex_error(ctx, "Index must be an integer"); result = 1; } - else if (index.data.int_value < 0 || index.data.int_value >= list.quotation_size) + else if (index.data.int_value < 0 || (size_t)index.data.int_value >= list.quotation_size) { hex_error(ctx, "Index out of range"); result = 1;@@ -1185,7 +1185,7 @@ }
int result = -1; if (list.type == HEX_TYPE_QUOTATION) { - for (int i = 0; i < list.quotation_size; i++) + for (size_t i = 0; i < list.quotation_size; i++) { if (hex_equal(*list.data.quotation_value[i], item)) {@@ -1234,7 +1234,7 @@ int result = 0;
if (list.type == HEX_TYPE_QUOTATION && separator.type == HEX_TYPE_STRING) { int length = 0; - for (int i = 0; i < list.quotation_size; i++) + for (size_t i = 0; i < list.quotation_size; i++) { if (list.data.quotation_value[i]->type == HEX_TYPE_STRING) {@@ -1260,7 +1260,7 @@ HEX_FREE(ctx, separator);
return 1; } newStr[0] = '\0'; - for (int i = 0; i < list.quotation_size; i++) + for (size_t i = 0; i < list.quotation_size; i++) { strcat(newStr, list.data.quotation_value[i]->data.str_value); if (i < list.quotation_size - 1)@@ -1305,7 +1305,7 @@ {
if (strlen(separator.data.str_value) == 0) { // Separator is an empty string: split into individual characters - int size = strlen(str.data.str_value); + size_t size = strlen(str.data.str_value); hex_item_t **quotation = (hex_item_t **)malloc(size * sizeof(hex_item_t *)); if (!quotation) {@@ -1314,7 +1314,7 @@ result = 1;
} else { - for (int i = 0; i < size; i++) + for (size_t i = 0; i < size; i++) { quotation[i] = (hex_item_t *)malloc(sizeof(hex_item_t)); if (!quotation[i])@@ -1344,8 +1344,8 @@ else
{ // Separator is not empty: split as usual char *token = strtok(str.data.str_value, separator.data.str_value); - int capacity = 2; - int size = 0; + size_t capacity = 2; + size_t size = 0; hex_item_t **quotation = (hex_item_t **)malloc(capacity * sizeof(hex_item_t *)); if (!quotation) {@@ -1635,7 +1635,7 @@ return 1;
} else { - for (int i = 0; i < ctx->argc; i++) + for (size_t i = 0; i < (size_t)ctx->argc; i++) { quotation[i] = (hex_item_t *)malloc(sizeof(hex_item_t)); quotation[i]->type = HEX_TYPE_STRING;@@ -1891,7 +1891,7 @@ return 1;
} else { - for (int i = 0; i < condition.quotation_size; i++) + for (size_t i = 0; i < condition.quotation_size; i++) { if (hex_push(ctx, *condition.data.quotation_value[i]) != 0) {@@ -1904,7 +1904,7 @@ }
HEX_POP(ctx, evalResult); if (evalResult.type == HEX_TYPE_INTEGER && evalResult.data.int_value > 0) { - for (int i = 0; i < thenBlock.quotation_size; i++) + for (size_t i = 0; i < thenBlock.quotation_size; i++) { if (hex_push(ctx, *thenBlock.data.quotation_value[i]) != 0) {@@ -1917,7 +1917,7 @@ }
} else { - for (int i = 0; i < elseBlock.quotation_size; i++) + for (size_t i = 0; i < elseBlock.quotation_size; i++) { if (hex_push(ctx, *elseBlock.data.quotation_value[i]) != 0) {@@ -1956,7 +1956,7 @@ result = 1;
} else { - for (int i = 0; i < condition.quotation_size; i++) + for (size_t i = 0; i < condition.quotation_size; i++) { if (hex_push(ctx, *condition.data.quotation_value[i]) != 0) {@@ -1967,7 +1967,7 @@ }
HEX_POP(ctx, evalResult); if (evalResult.type == HEX_TYPE_INTEGER && evalResult.data.int_value > 0) { - for (int i = 0; i < action.quotation_size; i++) + for (size_t i = 0; i < action.quotation_size; i++) { if (hex_push(ctx, *action.data.quotation_value[i]) != 0) {@@ -2012,7 +2012,7 @@ else
{ while (1) { - for (int i = 0; i < condition.quotation_size; i++) + for (size_t i = 0; i < condition.quotation_size; i++) { if (hex_push(ctx, *condition.data.quotation_value[i]) != 0) {@@ -2026,7 +2026,7 @@ if (evalResult.type == HEX_TYPE_INTEGER && evalResult.data.int_value == 0)
{ break; } - for (int i = 0; i < action.quotation_size; i++) + for (size_t i = 0; i < action.quotation_size; i++) { if (hex_push(ctx, *action.data.quotation_value[i]) != 0) {@@ -2065,7 +2065,7 @@ return 1;
} else { - for (int i = 0; i < list.quotation_size; i++) + for (size_t i = 0; i < list.quotation_size; i++) { if (hex_push(ctx, *list.data.quotation_value[i]) != 0) {@@ -2073,7 +2073,7 @@ HEX_FREE(ctx, action);
HEX_FREE(ctx, list); return 1; } - for (int j = 0; j < action.quotation_size; j++) + for (size_t j = 0; j < action.quotation_size; j++) { if (hex_push(ctx, *action.data.quotation_value[j]) != 0) {@@ -2125,7 +2125,7 @@ strncpy(prevError, ctx->error, sizeof(ctx->error));
ctx->error[0] = '\0'; ctx->settings.errors_enabled = 0; - for (int i = 0; i < try_block.quotation_size; i++) + for (size_t i = 0; i < try_block.quotation_size; i++) { if (hex_push(ctx, *try_block.data.quotation_value[i]) != 0) {@@ -2136,7 +2136,7 @@ ctx->settings.errors_enabled = 1;
if (strcmp(ctx->error, "")) { - for (int i = 0; i < catch_block.quotation_size; i++) + for (size_t i = 0; i < catch_block.quotation_size; i++) { if (hex_push(ctx, *catch_block.data.quotation_value[i]) != 0) {@@ -2232,7 +2232,7 @@ HEX_FREE(ctx, action);
HEX_FREE(ctx, list); return 1; } - for (int i = 0; i < list.quotation_size; i++) + for (size_t i = 0; i < list.quotation_size; i++) { if (hex_push(ctx, *list.data.quotation_value[i]) != 0) {@@ -2241,7 +2241,7 @@ HEX_FREE(ctx, list);
hex_free_list(ctx, quotation, i); return 1; } - for (int j = 0; j < action.quotation_size; j++) + for (size_t j = 0; j < action.quotation_size; j++) { if (hex_push(ctx, *action.data.quotation_value[j]) != 0) {@@ -2298,8 +2298,8 @@ HEX_FREE(ctx, action);
HEX_FREE(ctx, list); return 1; } - int count = 0; - for (int i = 0; i < list.quotation_size; i++) + size_t count = 0; + for (size_t i = 0; i < list.quotation_size; i++) { if (hex_push(ctx, *list.data.quotation_value[i]) != 0) {@@ -2308,7 +2308,7 @@ HEX_FREE(ctx, list);
hex_free_list(ctx, quotation, count); return 1; } - for (int j = 0; j < action.quotation_size; j++) + for (size_t j = 0; j < action.quotation_size; j++) { if (hex_push(ctx, *action.data.quotation_value[j]) != 0) {@@ -2339,7 +2339,7 @@ {
hex_error(ctx, "An error occurred while filtering the list"); HEX_FREE(ctx, action); HEX_FREE(ctx, list); - for (int i = 0; i < count; i++) + for (size_t i = 0; i < count; i++) { HEX_FREE(ctx, *quotation[i]); }@@ -2409,7 +2409,7 @@ hex_error(ctx, "Memory allocation failed");
return 1; } int count = 0; - for (int i = 0; i <= ctx->stack.top; i++) + for (size_t i = 0; i <= (size_t)ctx->stack.top + 1; i++) { quotation[i] = (hex_item_t *)malloc(sizeof(hex_item_t)); if (!quotation[i])@@ -2419,7 +2419,6 @@ hex_free_list(ctx, quotation, count);
return 1; } *quotation[i] = ctx->stack.entries[i]; - //*quotation[i] = HEX_STACK[i]; count++; }@@ -2434,11 +2433,9 @@ }
int hex_symbol_clear(hex_context_t *ctx) { - - while (ctx->stack.top >= 0) + for (size_t i = 0; i <= (size_t)ctx->stack.top; i++) { - HEX_FREE(ctx, ctx->stack.entries[ctx->stack.top--]); - // HEX_FREE(ctx, HEX_STACK[HEX_TOP--]); + HEX_FREE(ctx, ctx->stack.entries[i]); } ctx->stack.top = -1; return 0;
@@ -6,99 +6,6 @@ ////////////////////////////////////////
// Virtual Machine // //////////////////////////////////////// -uint8_t HEX_BYTECODE_HEADER[6] = {0x01, 0x48, 0x45, 0x78, 0x01, 0x02}; - -// Opcodes - -typedef enum hex_opcode_t -{ - // Core Operations: <op> [prefix] <len> <data> - HEX_OP_LOOKUP = 0x00, - HEX_OP_PUSHIN = 0x01, - HEX_OP_PUSHST = 0x02, - HEX_OP_PUSHQT = 0x03, - - // Native Symbols - HEX_OP_STORE = 0x10, - HEX_OP_FREE = 0x11, - - HEX_OP_IF = 0x12, - HEX_OP_WHEN = 0x13, - HEX_OP_WHILE = 0x14, - HEX_OP_ERROR = 0x15, - HEX_OP_TRY = 0x16, - - HEX_OP_DUP = 0x17, - HEX_OP_STACK = 0x18, - HEX_OP_CLEAR = 0x19, - HEX_OP_POP = 0x1A, - HEX_OP_SWAP = 0x1B, - - HEX_OP_I = 0x1C, - HEX_OP_EVAL = 0x1D, - HEX_OP_QUOTE = 0x1E, - - HEX_OP_ADD = 0x1F, - HEX_OP_SUB = 0x20, - HEX_OP_MUL = 0x21, - HEX_OP_DIV = 0x22, - HEX_OP_MOD = 0x23, - - HEX_OP_BITAND = 0x24, - HEX_OP_BITOR = 0x25, - HEX_OP_BITXOR = 0x26, - HEX_OP_BITNOT = 0x27, - HEX_OP_SHL = 0x28, - HEX_OP_SHR = 0x29, - - HEX_OP_EQUAL = 0x2A, - HEX_OP_NOTEQUAL = 0x2B, - HEX_OP_GREATER = 0x2C, - HEX_OP_LESS = 0x2D, - HEX_OP_GREATEREQUAL = 0x2E, - HEX_OP_LESSEQUAL = 0x2F, - - HEX_OP_AND = 0x30, - HEX_OP_OR = 0x31, - HEX_OP_NOT = 0x32, - HEX_OP_XOR = 0x33, - - HEX_OP_INT = 0x34, - HEX_OP_STR = 0x35, - HEX_OP_DEC = 0x36, - HEX_OP_HEX = 0x37, - HEX_OP_ORD = 0x38, - HEX_OP_CHR = 0x39, - HEX_OP_TYPE = 0x3A, - - HEX_OP_CAT = 0x3B, - HEX_OP_LEN = 0x3C, - HEX_OP_GET = 0x3D, - HEX_OP_INDEX = 0x3E, - HEX_OP_JOIN = 0x3F, - - HEX_OP_SPLIT = 0x40, - HEX_OP_REPLACE = 0x41, - - HEX_OP_EACH = 0x42, - HEX_OP_MAP = 0x43, - HEX_OP_FILTER = 0x44, - - HEX_OP_PUTS = 0x45, - HEX_OP_WARN = 0x46, - HEX_OP_PRINT = 0x47, - HEX_OP_GETS = 0x48, - - HEX_OP_READ = 0x49, - HEX_OP_WRITE = 0x4A, - HEX_OP_APPEND = 0x4B, - HEX_OP_ARGS = 0x4C, - HEX_OP_EXIT = 0x4D, - HEX_OP_EXEC = 0x4E, - HEX_OP_RUN = 0x4F, - -} hex_opcode_t; - static void encode_length(uint8_t **bytecode, size_t *size, size_t *capacity, size_t length) { if (length < 0x80)@@ -131,9 +38,270 @@ (*bytecode)[(*size)++] = (uint8_t)(length & 0xFF);
} } -int hex_bytecode(hex_context_t *ctx, const char *input, uint8_t **output, size_t *output_size, const char *filename, int line, int column) +static uint8_t get_opcode(char *symbol) { - hex_file_position_t position = {filename, line, column}; + // Native Symbols + if (strcmp(symbol, ":") == 0) + { + return HEX_OP_STORE; + } + else if (strcmp(symbol, "#") == 0) + { + return HEX_OP_FREE; + } + else if (strcmp(symbol, "if") == 0) + { + return HEX_OP_IF; + } + else if (strcmp(symbol, "when") == 0) + { + return HEX_OP_WHEN; + } + else if (strcmp(symbol, "while") == 0) + { + return HEX_OP_WHILE; + } + else if (strcmp(symbol, "error") == 0) + { + return HEX_OP_ERROR; + } + else if (strcmp(symbol, "try") == 0) + { + return HEX_OP_TRY; + } + else if (strcmp(symbol, "dup") == 0) + { + return HEX_OP_DUP; + } + else if (strcmp(symbol, "stack") == 0) + { + return HEX_OP_STACK; + } + else if (strcmp(symbol, "clear") == 0) + { + return HEX_OP_CLEAR; + } + else if (strcmp(symbol, "pop") == 0) + { + return HEX_OP_POP; + } + else if (strcmp(symbol, "swap") == 0) + { + return HEX_OP_SWAP; + } + else if (strcmp(symbol, ".") == 0) + { + return HEX_OP_I; + } + else if (strcmp(symbol, "!") == 0) + { + return HEX_OP_EVAL; + } + else if (strcmp(symbol, "'") == 0) + { + return HEX_OP_QUOTE; + } + else if (strcmp(symbol, "+") == 0) + { + return HEX_OP_ADD; + } + else if (strcmp(symbol, "-") == 0) + { + return HEX_OP_SUB; + } + else if (strcmp(symbol, "*") == 0) + { + return HEX_OP_MUL; + } + else if (strcmp(symbol, "/") == 0) + { + return HEX_OP_DIV; + } + else if (strcmp(symbol, "%") == 0) + { + return HEX_OP_MOD; + } + else if (strcmp(symbol, "&") == 0) + { + return HEX_OP_BITAND; + } + else if (strcmp(symbol, "|") == 0) + { + return HEX_OP_BITOR; + } + else if (strcmp(symbol, "^") == 0) + { + return HEX_OP_BITXOR; + } + else if (strcmp(symbol, "~") == 0) + { + return HEX_OP_BITNOT; + } + else if (strcmp(symbol, "<<") == 0) + { + return HEX_OP_SHL; + } + else if (strcmp(symbol, ">>") == 0) + { + return HEX_OP_SHR; + } + else if (strcmp(symbol, "==") == 0) + { + return HEX_OP_EQUAL; + } + else if (strcmp(symbol, "!=") == 0) + { + return HEX_OP_NOTEQUAL; + } + else if (strcmp(symbol, ">") == 0) + { + return HEX_OP_GREATER; + } + else if (strcmp(symbol, "<") == 0) + { + return HEX_OP_LESS; + } + else if (strcmp(symbol, ">=") == 0) + { + return HEX_OP_GREATEREQUAL; + } + else if (strcmp(symbol, "<=") == 0) + { + return HEX_OP_LESSEQUAL; + } + else if (strcmp(symbol, "and") == 0) + { + return HEX_OP_AND; + } + else if (strcmp(symbol, "or") == 0) + { + return HEX_OP_OR; + } + else if (strcmp(symbol, "not") == 0) + { + return HEX_OP_NOT; + } + else if (strcmp(symbol, "xor") == 0) + { + return HEX_OP_XOR; + } + else if (strcmp(symbol, "int") == 0) + { + return HEX_OP_INT; + } + else if (strcmp(symbol, "str") == 0) + { + return HEX_OP_STR; + } + else if (strcmp(symbol, "dec") == 0) + { + return HEX_OP_DEC; + } + else if (strcmp(symbol, "hex") == 0) + { + return HEX_OP_HEX; + } + else if (strcmp(symbol, "ord") == 0) + { + return HEX_OP_ORD; + } + else if (strcmp(symbol, "chr") == 0) + { + return HEX_OP_CHR; + } + else if (strcmp(symbol, "type") == 0) + { + return HEX_OP_TYPE; + } + else if (strcmp(symbol, "cat") == 0) + { + return HEX_OP_CAT; + } + else if (strcmp(symbol, "len") == 0) + { + return HEX_OP_LEN; + } + else if (strcmp(symbol, "get") == 0) + { + return HEX_OP_GET; + } + else if (strcmp(symbol, "index") == 0) + { + return HEX_OP_INDEX; + } + else if (strcmp(symbol, "join") == 0) + { + return HEX_OP_JOIN; + } + else if (strcmp(symbol, "split") == 0) + { + return HEX_OP_SPLIT; + } + else if (strcmp(symbol, "replace") == 0) + { + return HEX_OP_REPLACE; + } + else if (strcmp(symbol, "each") == 0) + { + return HEX_OP_EACH; + } + else if (strcmp(symbol, "map") == 0) + { + return HEX_OP_MAP; + } + else if (strcmp(symbol, "filter") == 0) + { + return HEX_OP_FILTER; + } + else if (strcmp(symbol, "puts") == 0) + { + return HEX_OP_PUTS; + } + else if (strcmp(symbol, "warn") == 0) + { + return HEX_OP_WARN; + } + else if (strcmp(symbol, "print") == 0) + { + return HEX_OP_PRINT; + } + else if (strcmp(symbol, "gets") == 0) + { + return HEX_OP_GETS; + } + else if (strcmp(symbol, "read") == 0) + { + return HEX_OP_READ; + } + else if (strcmp(symbol, "write") == 0) + { + return HEX_OP_WRITE; + } + else if (strcmp(symbol, "append") == 0) + { + return HEX_OP_APPEND; + } + else if (strcmp(symbol, "args") == 0) + { + return HEX_OP_ARGS; + } + else if (strcmp(symbol, "exit") == 0) + { + return HEX_OP_EXIT; + } + else if (strcmp(symbol, "exec") == 0) + { + return HEX_OP_EXEC; + } + else if (strcmp(symbol, "run") == 0) + { + return HEX_OP_RUN; + } + return 0; +} + +int hex_bytecode(hex_context_t *ctx, const char *input, uint8_t **output, size_t *output_size, hex_file_position_t *position) +{ hex_token_t *token; size_t capacity = 128; size_t size = 0;@@ -144,7 +312,7 @@ hex_error(ctx, "Memory allocation failed");
return 1; } - while ((token = hex_next_token(ctx, &input, &position)) != NULL) + while ((token = hex_next_token(ctx, &input, position)) != NULL) { if (size >= capacity) {@@ -177,51 +345,41 @@ break;
case HEX_TOKEN_SYMBOL: if (hex_valid_native_symbol(ctx, token->value)) { + bytecode[size++] = get_opcode(token->value); + } + else + { + // Lookup user symbol + bytecode[size++] = HEX_OP_LOOKUP; size_t sym_len = strlen(token->value); encode_length(&bytecode, &size, &capacity, sym_len); memcpy(&bytecode[size], token->value, sym_len); size += sym_len; } - else - { - hex_error(ctx, "(%d,%d) Invalid symbol: %s", position.line, position.column, token->value); - hex_free_token(token); - free(bytecode); - return 1; - } break; case HEX_TOKEN_QUOTATION_START: { bytecode[size++] = HEX_OP_PUSHQT; hex_item_t quotation; - if (hex_parse_quotation(ctx, &input, "ation, &position) != 0) + if (hex_parse_quotation(ctx, &input, "ation, position) != 0) { hex_free_token(token); free(bytecode); return 1; } - // Recursively translate quotation to opcodes - uint8_t *quotation_bytecode; - size_t quotation_size; - if (hex_bytecode(ctx, quotation.data.quotation_value, "ation_bytecode, "ation_size, filename, line, column) != 0) + encode_length(&bytecode, &size, &capacity, quotation.quotation_size); + if (hex_quotation_bytecode(ctx, "ation, input, position, &bytecode, &size, &capacity) != 0) { hex_free_token(token); free(bytecode); return 1; } - encode_length(&bytecode, &size, &capacity, quotation_size); - memcpy(&bytecode[size], quotation_bytecode, quotation_size); - size += quotation_size; - free(quotation_bytecode); break; } default: - hex_error(ctx, "(%d,%d) Unexpected token: %s", position.line, position.column, token->value); - hex_free_token(token); - free(bytecode); - return 1; + // Ignore other tokens + break; } - hex_free_token(token); }@@ -229,3 +387,65 @@ *output = bytecode;
*output_size = size; return 0; } + +int hex_quotation_bytecode(hex_context_t *ctx, hex_item_t *quotation, const char *input, hex_file_position_t *position, uint8_t **bytecode, size_t *size, size_t *capacity) +{ + for (size_t i = 0; i < quotation->quotation_size; ++i) + { + hex_token_t *token = quotation->data.quotation_value[i]->token; + switch (token->type) + { + case HEX_TOKEN_INTEGER: + (*bytecode)[(*size)++] = HEX_OP_PUSHIN; + int32_t value = hex_parse_integer(token->value); + encode_length(bytecode, size, capacity, sizeof(int32_t)); + memcpy(&(*bytecode)[*size], &value, sizeof(int32_t)); + *size += sizeof(int32_t); + break; + + case HEX_TOKEN_STRING: + (*bytecode)[(*size)++] = HEX_OP_PUSHST; + size_t len = strlen(token->value); + encode_length(bytecode, size, capacity, len); + memcpy(&(*bytecode)[*size], token->value, len); + *size += len; + break; + + case HEX_TOKEN_SYMBOL: + if (hex_valid_native_symbol(ctx, token->value)) + { + (*bytecode)[(*size)++] = get_opcode(token->value); + } + else + { + // Lookup user symbol + (*bytecode)[(*size)++] = HEX_OP_LOOKUP; + size_t sym_len = strlen(token->value); + encode_length(bytecode, size, capacity, sym_len); + memcpy(&(*bytecode)[*size], token->value, sym_len); + *size += sym_len; + } + break; + + case HEX_TOKEN_QUOTATION_START: + { + (*bytecode)[(*size)++] = HEX_OP_PUSHQT; + hex_item_t nested_quotation; + if (hex_parse_quotation(ctx, &input, &nested_quotation, position) != 0) + { + return 1; + } + encode_length(bytecode, size, capacity, nested_quotation.quotation_size); + if (hex_quotation_bytecode(ctx, &nested_quotation, input, position, bytecode, size, capacity) != 0) + { + return 1; + } + break; + } + default: + // Ignore other tokens + break; + } + } + return 0; +}