vendor/slre.nim
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
#
# Copyright (c) 2004-2005 Sergey Lyubka <valenok@gmail.com>
# All rights reserved
#
# "THE BEER-WARE LICENSE" (Revision 42):
# Sergey Lyubka wrote this file. As long as you retain this notice you
# can do whatever you want with this stuff. If we meet some day, and you think
# this stuff is worth it, you can buy me a beer in return.
#
#
# This is a regular expression library that implements a subset of Perl RE.
# Please refer to http://slre.sourceforge.net for detailed description.
#
# Usage example (parsing HTTP request):
#
# struct slre slre;
# struct cap captures[4 + 1]; // Number of braket pairs + 1
# ...
#
# slre_compile(&slre,"^(GET|POST) (\S+) HTTP/(\S+?)\r\n");
#
# if (slre_match(&slre, buf, len, captures)) {
# printf("Request line length: %d\n", captures[0].len);
# printf("Method: %.*s\n", captures[1].len, captures[1].ptr);
# printf("URI: %.*s\n", captures[2].len, captures[2].ptr);
# }
#
# Supported syntax:
# ^ Match beginning of a buffer
# $ Match end of a buffer
# () Grouping and substring capturing
# [...] Match any character from set
# [^...] Match any character but ones from set
# \s Match whitespace
# \S Match non-whitespace
# \d Match decimal digit
# \r Match carriage return
# \n Match newline
# + Match one or more times (greedy)
# +? Match one or more times (non-greedy)
# * Match zero or more times (greedy)
# *? Match zero or more times (non-greedy)
# ? Match zero or once
# \xDD Match byte with hex value 0xDD
# \meta Match one of the meta character: ^$().[*+?\
#
{.compile: "vendor/slre/libslre.c".}
#
# Compiled regular expression
#
type
slre* = object
code*: array[256, cuchar]
data*: array[256, cuchar]
code_size*: cint
data_size*: cint
num_caps*: cint # Number of bracket pairs
anchored*: cint # Must match from string start
err_str*: cstring # Error string
#
# Captured substring
#
type
cap* = object
value*: cstring # Pointer to the substring
len*: cint # Substring length
#
# Compile regular expression. If success, 1 is returned.
# If error, 0 is returned and slre.err_str points to the error message.
#
proc slre_compile(a2: ptr slre; re: cstring): cint {.importc.}
#
# Return 1 if match, 0 if no match.
# If `captured_substrings' array is not NULL, then it is filled with the
# values of captured substrings. captured_substrings[0] element is always
# a full matched substring. The round bracket captures start from
# captured_substrings[1].
# It is assumed that the size of captured_substrings array is enough to
# hold all captures. The caller function must make sure it is! So, the
# array_size = number_of_round_bracket_pairs + 1
#
proc slre_match(a2: ptr slre; buf: cstring; buf_len: cint;
captured_substrings: openarray[cap]): cint {.importc.}
# High level API
from strutils import contains, replace, parseInt
from sequtils import delete
proc match*(s: string, re: string): seq[string] =
var rawre = cast[ptr slre](alloc0(sizeof(slre)))
if slre_compile(rawre, re) == 1:
var matches:array[10, cap]
if rawre.slre_match(s.cstring, s.len.cint, matches) == 1:
var res = newSeq[string](0)
for i in items(matches):
if i.value != nil:
var str = $(i.value)
res.add str.substr(0, i.len-1)
return res
else:
return newSeq[string](0)
else:
raise newException(ValueError, $(rawre.err_str))
proc gsub*(s_find: string, re: string, s_replace: string): string =
var matches = s_find.match(re)
if matches.len > 0:
var res = s_find.replace(matches[0], s_replace)
if matches.len > 1:
# Replace captures
var caps = res.match("\\$(\\d)")
if caps.len > 1:
# Remove first (global) match
caps.delete(0, 0)
for c in caps:
var ci = parseInt(c)
# Replace $-placeholders with captures
while res.contains("$"&c):
res = res.replace("$"&c, matches[ci])
return res
else:
return s_find
|