all repos — hex @ 14696c9366a20d23ebd25f1ece9ffd2aa4739739

A tiny, minimalist, slightly-esoteric concatenative programming lannguage.

String processing
h3rald h3rald@h3rald.com
Fri, 03 Jan 2025 08:05:22 +0100
commit

14696c9366a20d23ebd25f1ece9ffd2aa4739739

parent

1d6ca69e0ccfc5eff2c44c460b8c834db1a61e3e

1 files changed, 55 insertions(+), 100 deletions(-)

jump to
M src/utils.csrc/utils.c

@@ -307,66 +307,45 @@ *ptr = '\0';

return str; } -char *hex_process_string(const char *value) -{ - int len = strlen(value); - char *processed_str = (char *)malloc(len + 1); - if (!processed_str) - { - return NULL; - } +char* hex_process_string(const char* input) { + size_t len = strlen(input); + size_t new_len = 0; - char *dst = processed_str; - const char *src = value; - while (*src) - { - if (*src == '\\' && *(src + 1)) - { - src++; - switch (*src) - { - case 'n': - *dst++ = '\n'; - break; - case 't': - *dst++ = '\t'; - break; - case 'r': - *dst++ = '\r'; - break; - case 'b': - *dst++ = '\b'; - break; - case 'f': - *dst++ = '\f'; - break; - case 'v': - *dst++ = '\v'; - break; - case '\"': - *dst++ = '\"'; + // First pass: Calculate the length of the escaped string + for (size_t i = 0; i < len; i++) { + switch (input[i]) { + case '\n': case '\t': case '\r': case '\\': + case '\"': case '\'': case '\v': case '\f': + case '\a': case '\b': + new_len += 2; // Escaped version adds one extra character break; - case '\\': - *dst++ = '\\'; - if (*(src + 2)) - { - *dst++ = *(src + 2); - src++; - } default: - *dst++ = '\\'; - *dst++ = *src; - break; - } + new_len++; } - else - { - *dst++ = *src; + } + + // Allocate memory for the escaped string + char* escaped = (char*)malloc(new_len + 1); + + // Second pass: Build the escaped string + size_t j = 0; + for (size_t i = 0; i < len; i++) { + switch (input[i]) { + case '\n': escaped[j++] = '\\'; escaped[j++] = 'n'; break; + case '\t': escaped[j++] = '\\'; escaped[j++] = 't'; break; + case '\r': escaped[j++] = '\\'; escaped[j++] = 'r'; break; + case '\\': escaped[j++] = '\\'; escaped[j++] = '\\'; break; + case '\"': escaped[j++] = '\\'; escaped[j++] = '\"'; break; + case '\'': escaped[j++] = '\\'; escaped[j++] = '\''; break; + case '\v': escaped[j++] = '\\'; escaped[j++] = 'v'; break; + case '\f': escaped[j++] = '\\'; escaped[j++] = 'f'; break; + case '\a': escaped[j++] = '\\'; escaped[j++] = 'a'; break; + case '\b': escaped[j++] = '\\'; escaped[j++] = 'b'; break; + default: escaped[j++] = input[i]; } - src++; } - *dst = '\0'; - return processed_str; + escaped[j] = '\0'; // Null-terminate the string + return escaped; } size_t hex_min_bytes_to_encode_integer(int32_t value)

@@ -394,57 +373,33 @@ return 4; // Default to 4 bytes if no smaller size is found.

} char *hex_unescape_string(const char *input) { - if (input == NULL) { - return NULL; // Handle null input - } - - // Allocate memory for the output string (worst-case size: same as input) - char *output = (char *)malloc(strlen(input) + 1); - if (output == NULL) { - fprintf(stderr, "Memory allocation failed\n"); - return NULL; - } - - const char *src = input; - char *dst = output; + size_t len = strlen(input); + char* unescaped = (char*)malloc(len + 1); - while (*src) { - if (*src == '\\') { - if (*(src + 1) == '\\') { - // Handle escaped backslash - *dst = '\\'; - src += 2; // Skip both backslashes - if (*(src + 2)) { - *dst = *(src + 2); - src += 1; - } - } else { - // Handle other escape sequences - switch (*(src + 1)) { - case 'n': *dst = '\n'; break; - case 't': *dst = '\t'; break; - case 'r': *dst = '\r'; break; - case '\'': *dst = '\''; break; - case '\"': *dst = '\"'; break; - case 'b': *dst = '\b'; break; - case 'f': *dst = '\f'; break; - case 'v': *dst = '\v'; break; - default: - // Unknown escape sequence, copy as-is - *dst++ = '\\'; - *dst = *(src + 1); - } - src += 2; // Skip backslash and escape character + size_t j = 0; + for (size_t i = 0; i < len; i++) { + if (input[i] == '\\' && i + 1 < len) { + switch (input[i + 1]) { + case 'n': unescaped[j++] = '\n'; i++; break; + case 't': unescaped[j++] = '\t'; i++; break; + case 'r': unescaped[j++] = '\r'; i++; break; + case '\\': unescaped[j++] = '\\'; i++; break; + case '\"': unescaped[j++] = '\"'; i++; break; + case '\'': unescaped[j++] = '\''; i++; break; + case 'v': unescaped[j++] = '\v'; i++; break; + case 'f': unescaped[j++] = '\f'; i++; break; + case 'a': unescaped[j++] = '\a'; i++; break; + case 'b': unescaped[j++] = '\b'; i++; break; + default: unescaped[j++] = input[i]; } } else { - // Copy normal characters - *dst = *src; - src++; + unescaped[j++] = input[i]; } - dst++; } + unescaped[j] = '\0'; // Null-terminate the string + return unescaped; +} - *dst = '\0'; // Null-terminate the output string - return output; +// Normalize newlines f }