all repos — hex @ 1937c9760ea99c6b1df88eb2e56c47748eabbcc1

A tiny, minimalist, slightly-esoteric concatenative programming lannguage.

Implementing vm.
h3rald h3rald@h3rald.com
Mon, 16 Dec 2024 19:06:12 +0100
commit

1937c9760ea99c6b1df88eb2e56c47748eabbcc1

parent

1111f7327f832999c4d971cb8ff09bdfeebe9a46

A .vscode/settings.json

@@ -0,0 +1,10 @@

+{ + "files.associations": { + "*.erd": "json", + "*.vuerd": "json", + "__bit_reference": "c", + "__split_buffer": "c", + "string": "c", + "vector": "c" + } +}
M src/helpers.csrc/helpers.c

@@ -114,7 +114,7 @@ fprintf(stream, "%s", item.data.str_value);

break; case HEX_TYPE_USER_SYMBOL: case HEX_TYPE_NATIVE_SYMBOL: - fprintf(stream, "%s", item.token->value); + fprintf(stream, "%s", item.token->data.value); break; case HEX_TYPE_QUOTATION: fprintf(stream, "(");

@@ -194,7 +194,7 @@ break;

case HEX_TYPE_USER_SYMBOL: case HEX_TYPE_NATIVE_SYMBOL: - fprintf(stream, "%s", item.token->value); + fprintf(stream, "%s", item.token->data.value); break; case HEX_TYPE_QUOTATION:
M src/hex.hsrc/hex.h

@@ -61,7 +61,13 @@

typedef struct hex_token_t { hex_token_type_t type; - char *value; + union + { + char *value; + struct hex_token_t **quotation_value; + + } data; + size_t quotation_size; hex_file_position_t position; } hex_token_t;

@@ -351,8 +357,9 @@ int hex_symbol_clear(hex_context_t *ctx);

int hex_symbol_pop(hex_context_t *ctx); // VM -int hex_bytecode(hex_context_t *ctx, const char *input, uint8_t **output, size_t *output_size, hex_file_position_t *position); -int hex_quotation_bytecode(hex_context_t *ctx, hex_item_t *quotation, const char *input, hex_file_position_t *position, uint8_t **bytecode, size_t *size, size_t *capacity); +int hex_bytecode(hex_context_t *ctx, const char *input, uint8_t **output, size_t *output_size, hex_file_position_t *position, int *open_quotations); +// int hex_quotation_bytecode(hex_context_t *ctx, hex_item_t *quotation, const char *input, hex_file_position_t *position, uint8_t **bytecode, size_t *size, size_t *capacity); +int hex_tokenize_quotation(hex_context_t *ctx, const char **input, hex_token_t *result, hex_file_position_t *position); // REPL and initialization void hex_register_symbols(hex_context_t *ctx);
M src/interpreter.csrc/interpreter.c

@@ -34,11 +34,11 @@ {

int result = 0; if (token->type == HEX_TOKEN_INTEGER) { - result = hex_push_integer(ctx, hex_parse_integer(token->value)); + result = hex_push_integer(ctx, hex_parse_integer(token->data.value)); } else if (token->type == HEX_TOKEN_STRING) { - result = hex_push_string(ctx, token->value); + result = hex_push_string(ctx, token->data.value); } else if (token->type == HEX_TOKEN_SYMBOL) {
M src/main.csrc/main.c

@@ -353,9 +353,15 @@ hex_file_position_t position;

position.column = 1; position.line = 1 + ctx.hashbang; position.filename = file; - if (hex_bytecode(&ctx, fileContent, &bytecode, &bytecode_size, &position) != 0) + int open_quotations = 0; + if (hex_bytecode(&ctx, fileContent, &bytecode, &bytecode_size, &position, &open_quotations) != 0) { hex_error(&ctx, "Failed to generate bytecode"); + return 1; + } + if (open_quotations != 0) + { + hex_error(&ctx, "File contains unbalanced quotations"); return 1; } if (hex_write_bytecode_file(&ctx, strcat(file, "b"), bytecode, bytecode_size) != 0)
M src/parser.csrc/parser.c

@@ -32,7 +32,7 @@ return NULL; // End of input

} hex_token_t *token = (hex_token_t *)malloc(sizeof(hex_token_t)); - token->value = NULL; + token->data.value = NULL; token->position.line = position->line; token->position.column = position->column;

@@ -46,9 +46,9 @@ ptr++;

position->column++; } int len = ptr - start; - token->value = (char *)malloc(len + 1); - strncpy(token->value, start, len); - token->value[len] = '\0'; + token->data.value = (char *)malloc(len + 1); + strncpy(token->data.value, start, len); + token->data.value[len] = '\0'; token->type = HEX_TOKEN_COMMENT; } else if (strncmp(ptr, "#|", 2) == 0)

@@ -81,9 +81,9 @@ }

ptr += 2; // Skip the "|#" suffix position->column += 2; int len = ptr - start; - token->value = (char *)malloc(len + 1); - strncpy(token->value, start, len); - token->value[len] = '\0'; + token->data.value = (char *)malloc(len + 1); + strncpy(token->data.value, start, len); + token->data.value[len] = '\0'; token->type = HEX_TOKEN_COMMENT; } else if (*ptr == '"')

@@ -136,8 +136,8 @@ hex_error(ctx, "(%d,%d) Unterminated string", position->line, position->column);

return token; } - token->value = (char *)malloc(len + 1); - char *dst = token->value; + token->data.value = (char *)malloc(len + 1); + char *dst = token->data.value; ptr = start; while (*ptr != '\0' && *ptr != '"')

@@ -175,9 +175,9 @@ ptr++;

position->column++; } int len = ptr - start; - token->value = (char *)malloc(len + 1); - strncpy(token->value, start, len); - token->value[len] = '\0'; + token->data.value = (char *)malloc(len + 1); + strncpy(token->data.value, start, len); + token->data.value[len] = '\0'; token->type = HEX_TOKEN_INTEGER; } else if (*ptr == '(')

@@ -203,10 +203,10 @@ position->column++;

} int len = ptr - start; - token->value = (char *)malloc(len + 1); - strncpy(token->value, start, len); - token->value[len] = '\0'; - if (hex_valid_native_symbol(ctx, token->value) || hex_valid_user_symbol(ctx, token->value)) + token->data.value = (char *)malloc(len + 1); + strncpy(token->data.value, start, len); + token->data.value[len] = '\0'; + if (hex_valid_native_symbol(ctx, token->data.value) || hex_valid_user_symbol(ctx, token->data.value)) { token->type = HEX_TOKEN_SYMBOL; }

@@ -282,23 +282,23 @@ hex_item_t *item = (hex_item_t *)malloc(sizeof(hex_item_t));

if (token->type == HEX_TOKEN_INTEGER) { - *item = hex_integer_item(ctx, hex_parse_integer(token->value)); + *item = hex_integer_item(ctx, hex_parse_integer(token->data.value)); quotation[size] = item; size++; } else if (token->type == HEX_TOKEN_STRING) { - *item = hex_string_item(ctx, token->value); + *item = hex_string_item(ctx, token->data.value); quotation[size] = item; size++; } else if (token->type == HEX_TOKEN_SYMBOL) { - if (hex_valid_native_symbol(ctx, token->value)) + if (hex_valid_native_symbol(ctx, token->data.value)) { item->type = HEX_TYPE_NATIVE_SYMBOL; hex_item_t value; - if (hex_get_symbol(ctx, token->value, &value)) + if (hex_get_symbol(ctx, token->data.value, &value)) { item->token = token; item->type = HEX_TYPE_NATIVE_SYMBOL;

@@ -306,7 +306,7 @@ item->data.fn_value = value.data.fn_value;

} else { - hex_error(ctx, "(%d,%d) Unable to reference native symbol: %s", position->line, position->column, token->value); + hex_error(ctx, "(%d,%d) Unable to reference native symbol: %s", position->line, position->column, token->data.value); hex_free_token(token); hex_free_list(ctx, quotation, size); return 1;

@@ -339,7 +339,7 @@ // Ignore comments

} else { - hex_error(ctx, "(%d,%d) Unexpected token in quotation: %d", position->line, position->column, token->value); + hex_error(ctx, "(%d,%d) Unexpected token in quotation: %d", position->line, position->column, token->data.value); hex_free_token(token); hex_free_list(ctx, quotation, size); return 1;
M src/stack.csrc/stack.c

@@ -11,7 +11,7 @@ void hex_free_token(hex_token_t *token)

{ if (token) { - free(token->value); + free(token->data.value); free(token); } }

@@ -30,13 +30,13 @@ int result = 0;

if (item.type == HEX_TYPE_USER_SYMBOL) { hex_item_t value; - if (hex_get_symbol(ctx, item.token->value, &value)) + if (hex_get_symbol(ctx, item.token->data.value, &value)) { result = HEX_PUSH(ctx, value); } else { - hex_error(ctx, "Undefined user symbol: %s", item.token->value); + hex_error(ctx, "Undefined user symbol: %s", item.token->data.value); HEX_FREE(ctx, value); result = 1; }

@@ -159,14 +159,14 @@ int hex_push_symbol(hex_context_t *ctx, hex_token_t *token)

{ add_to_stack_trace(ctx, token); hex_item_t value; - if (hex_get_symbol(ctx, token->value, &value)) + if (hex_get_symbol(ctx, token->data.value, &value)) { value.token = token; return HEX_PUSH(ctx, value); } else { - hex_error(ctx, "Undefined symbol: %s", token->value); + hex_error(ctx, "Undefined symbol: %s", token->data.value); return 1; } }

@@ -202,14 +202,14 @@ hex_free_list(ctx, item.data.quotation_value, item.quotation_size);

item.data.quotation_value = NULL; hex_debug(ctx, "FREE: ** quotation end"); } - else if (item.type == HEX_TYPE_NATIVE_SYMBOL && item.token->value != NULL) + else if (item.type == HEX_TYPE_NATIVE_SYMBOL && item.token->data.value != NULL) { hex_debug(ctx, "FREE: ** native symbol start"); item.token = NULL; hex_free_token(item.token); hex_debug(ctx, "FREE: ** native symbol end"); } - else if (item.type == HEX_TYPE_USER_SYMBOL && item.token->value != NULL) + else if (item.type == HEX_TYPE_USER_SYMBOL && item.token->data.value != NULL) { hex_debug(ctx, "FREE: ** user symbol start"); item.token = NULL;
M src/stacktrace.csrc/stacktrace.c

@@ -38,6 +38,6 @@ for (size_t i = 0; i < ctx->stack_trace.size; i++)

{ int index = (ctx->stack_trace.start + ctx->stack_trace.size - 1 - i) % HEX_STACK_TRACE_SIZE; hex_token_t token = ctx->stack_trace.entries[index]; - fprintf(stderr, " %s (%s:%d:%d)\n", token.value, token.position.filename, token.position.line, token.position.column); + fprintf(stderr, " %s (%s:%d:%d)\n", token.data.value, token.position.filename, token.position.line, token.position.column); } }
M src/symbols.csrc/symbols.c

@@ -638,7 +638,7 @@ return 0;

} if (a.type == HEX_TYPE_NATIVE_SYMBOL || a.type == HEX_TYPE_USER_SYMBOL) { - return (strcmp(a.token->value, b.token->value) == 0); + return (strcmp(a.token->data.value, b.token->data.value) == 0); } if (a.type != b.type) {

@@ -729,7 +729,7 @@ }

} else if (hex_is_type_symbol(it_a)) { - int cmp = strcmp(it_a->token->value, it_b->token->value); + int cmp = strcmp(it_a->token->data.value, it_b->token->data.value); if (cmp != 0) { is_greater = cmp > 0;
M src/vm.csrc/vm.c

@@ -300,7 +300,7 @@ }

return 0; } -int hex_bytecode(hex_context_t *ctx, const char *input, uint8_t **output, size_t *output_size, hex_file_position_t *position) +int hex_bytecode(hex_context_t *ctx, const char *input, uint8_t **output, size_t *output_size, hex_file_position_t *position, int *open_quotations) { hex_token_t *token; size_t capacity = 128;

@@ -311,7 +311,9 @@ {

hex_error(ctx, "Memory allocation failed"); return 1; } - + printf("-------\n"); + printf("%s", input); + printf("-------\n"); while ((token = hex_next_token(ctx, &input, position)) != NULL) { if (size >= capacity)

@@ -329,7 +331,7 @@ switch (token->type)

{ case HEX_TOKEN_INTEGER: bytecode[size++] = HEX_OP_PUSHIN; - int32_t value = hex_parse_integer(token->value); + int32_t value = hex_parse_integer(token->data.value); encode_length(&bytecode, &size, &capacity, sizeof(int32_t)); memcpy(&bytecode[size], &value, sizeof(int32_t)); size += sizeof(int32_t);

@@ -338,16 +340,16 @@ break;

case HEX_TOKEN_STRING: bytecode[size++] = HEX_OP_PUSHST; - size_t len = strlen(token->value); + size_t len = strlen(token->data.value); encode_length(&bytecode, &size, &capacity, len); - memcpy(&bytecode[size], token->value, len); - hex_debug(ctx, "PUSHST[%d]: %s", len, token->value); + memcpy(&bytecode[size], token->data.value, len); + hex_debug(ctx, "PUSHST[%d]: %s", len, token->data.value); size += len; break; case HEX_TOKEN_SYMBOL: - if (hex_valid_native_symbol(ctx, token->value)) + if (hex_valid_native_symbol(ctx, token->data.value)) { - char *symbol = token->value; + char *symbol = token->data.value; bytecode[size++] = get_opcode(symbol); hex_debug(ctx, "NATSYM[1]: %s", symbol); }

@@ -355,48 +357,129 @@ else

{ // Lookup user symbol bytecode[size++] = HEX_OP_LOOKUP; - size_t sym_len = strlen(token->value); + size_t sym_len = strlen(token->data.value); encode_length(&bytecode, &size, &capacity, sym_len); - memcpy(&bytecode[size], token->value, sym_len); + memcpy(&bytecode[size], token->data.value, sym_len); size += sym_len; - hex_debug(ctx, "LOOKUP[%d]: %s", sym_len, token->value); + hex_debug(ctx, "LOOKUP[%d]: %s", sym_len, token->data.value); } break; case HEX_TOKEN_QUOTATION_START: { bytecode[size++] = HEX_OP_PUSHQT; - hex_item_t quotation; - if (hex_parse_quotation(ctx, &input, &quotation, position) != 0) - { - hex_error(ctx, "Failed to parse quotation"); - hex_free_token(token); - free(bytecode); - return 1; - } + hex_token_t quotation; + hex_tokenize_quotation(ctx, &input, &quotation, position); hex_debug(ctx, "PUSHQT[%d]: <start>", quotation.quotation_size); encode_length(&bytecode, &size, &capacity, quotation.quotation_size); - if (hex_quotation_bytecode(ctx, &quotation, input, position, &bytecode, &size, &capacity) != 0) + // TODO: Incorrect, here we already tokenized the quotation input. + for (size_t i = 0; i < quotation.quotation_size; i++) { - hex_error(ctx, "Failed to generate bytecode for quotation"); - hex_free_token(token); - free(bytecode); - return 1; + if (hex_bytecode(ctx, input, output, output_size, position, open_quotations) != 0) + { + hex_error(ctx, "Failed to generate bytecode for quotation"); + hex_free_token(token); + free(bytecode); + return 1; + } } hex_debug(ctx, "PUSHQT[%d]: <end>", quotation.quotation_size); break; } + case HEX_TOKEN_QUOTATION_END: + open_quotations--; + break; default: // Ignore other tokens break; } hex_free_token(token); } - *output = bytecode; *output_size = size; return 0; } +int hex_tokenize_quotation(hex_context_t *ctx, const char **input, hex_token_t *result, hex_file_position_t *position) +{ + size_t capacity = 2; + size_t size = 0; + hex_token_t **quotation = (hex_token_t **)malloc(capacity * sizeof(hex_token_t *)); + int balanced = 1; + if (!quotation) + { + hex_error(ctx, "Memory allocation failed"); + return 1; + } + + hex_token_t *token; + while ((token = hex_next_token(ctx, input, position)) != NULL) + { + if (token->type == HEX_TOKEN_QUOTATION_END) + { + balanced--; + break; + } + + if (size >= capacity) + { + capacity *= 2; + quotation = (hex_token_t **)realloc(quotation, capacity * sizeof(hex_token_t *)); + if (!quotation) + { + hex_error(ctx, "(%d,%d), Memory allocation failed", position->line, position->column); + return 1; + } + } + + if (token->type == HEX_TOKEN_INTEGER || token->type == HEX_TOKEN_STRING) + { + + quotation[size] = token; + size++; + } + else if (token->type == HEX_TOKEN_SYMBOL) + { + token->position.filename = strdup(position->filename); + quotation[size] = token; + size++; + } + else if (token->type == HEX_TOKEN_QUOTATION_START) + { + hex_token_t nested_result; + if (hex_tokenize_quotation(ctx, input, &nested_result, position) != 0) + { + hex_free_token(token); + return 1; + } + quotation[size]->data.quotation_value = nested_result.data.quotation_value; + quotation[size]->quotation_size = nested_result.quotation_size; + size++; + } + else if (token->type == HEX_TOKEN_COMMENT) + { + // Ignore comments + } + else + { + hex_error(ctx, "(%d,%d) Unexpected token in quotation: %d", position->line, position->column, token->data.value); + hex_free_token(token); + return 1; + } + } + + if (balanced != 0) + { + hex_error(ctx, "(%d,%d) Unterminated quotation", position->line, position->column); + hex_free_token(token); + return 1; + } + + result->data.quotation_value = quotation; + result->quotation_size = size; + return 0; +} + +/* int hex_quotation_bytecode(hex_context_t *ctx, hex_item_t *quotation, const char *input, hex_file_position_t *position, uint8_t **bytecode, size_t *size, size_t *capacity) { for (size_t i = 0; i < quotation->quotation_size; ++i)

@@ -420,18 +503,18 @@ *size += len;

hex_debug(ctx, "PUSHST[%d]: %s", len, item->data.str_value); break; case HEX_TYPE_NATIVE_SYMBOL: - char *symbol = item->token->value; + char *symbol = item->token->data.value; (*bytecode)[(*size)++] = get_opcode(symbol); hex_debug(ctx, "NATSYM[1]: %s", symbol); break; case HEX_TYPE_USER_SYMBOL: // Lookup user symbol (*bytecode)[(*size)++] = HEX_OP_LOOKUP; - size_t sym_len = strlen(item->token->value); + size_t sym_len = strlen(item->token->data.value); encode_length(bytecode, size, capacity, sym_len); - memcpy(&(*bytecode)[*size], item->token->value, sym_len); + memcpy(&(*bytecode)[*size], item->token->data.value, sym_len); *size += sym_len; - hex_debug(ctx, "LOOKUP[%d]: %s", sym_len, item->token->value); + hex_debug(ctx, "LOOKUP[%d]: %s", sym_len, item->token->data.value); break; case HEX_TYPE_QUOTATION: (*bytecode)[(*size)++] = HEX_OP_PUSHQT;

@@ -453,3 +536,4 @@ }

} return 0; } +*/