Merge branches/quickjs to trunk. This is the way.

git-svn-id: https://www.unprompted.com/svn/projects/tildefriends/trunk@3621 ed5197a5-7fde-0310-b194-c3ffbd925b24
This commit is contained in:
2021-01-02 18:10:00 +00:00
parent d293637741
commit 79022e1e1f
703 changed files with 419987 additions and 30640 deletions

View File

@@ -75,7 +75,7 @@ typedef struct {
int capture_count;
int total_capture_count; /* -1 = not computed yet */
int has_named_captures; /* -1 = don't know, 0 = no, 1 = yes */
void *mem_opaque;
void *opaque;
DynBuf group_names;
union {
char error_msg[TMP_BUF_SIZE];
@@ -110,12 +110,14 @@ static inline int is_digit(int c) {
return c >= '0' && c <= '9';
}
/* insert 'len' bytes at position 'pos' */
static void dbuf_insert(DynBuf *s, int pos, int len)
/* insert 'len' bytes at position 'pos'. Return < 0 if error. */
static int dbuf_insert(DynBuf *s, int pos, int len)
{
dbuf_realloc(s, s->size + len);
if (dbuf_realloc(s, s->size + len))
return -1;
memmove(s->buf + pos + len, s->buf + pos, s->size - pos);
s->size += len;
return 0;
}
/* canonicalize with the specific JS regexp rules */
@@ -228,7 +230,7 @@ static int cr_init_char_range(REParseState *s, CharRange *cr, uint32_t c)
invert = c & 1;
c_pt = char_range_table[c >> 1];
len = *c_pt++;
cr_init(cr, s->mem_opaque, lre_realloc);
cr_init(cr, s->opaque, lre_realloc);
for(i = 0; i < len * 2; i++) {
if (cr_add_point(cr, c_pt[i]))
goto fail;
@@ -434,8 +436,14 @@ static int __attribute__((format(printf, 2, 3))) re_parse_error(REParseState *s,
return -1;
}
/* Return -1 in case of overflow */
static int parse_digits(const uint8_t **pp)
static int re_parse_out_of_memory(REParseState *s)
{
return re_parse_error(s, "out of memory");
}
/* If allow_overflow is false, return -1 in case of
overflow. Otherwise return INT32_MAX. */
static int parse_digits(const uint8_t **pp, BOOL allow_overflow)
{
const uint8_t *p;
uint64_t v;
@@ -448,8 +456,12 @@ static int parse_digits(const uint8_t **pp)
if (c < '0' || c > '9')
break;
v = v * 10 + c - '0';
if (v >= INT32_MAX)
return -1;
if (v >= INT32_MAX) {
if (allow_overflow)
v = INT32_MAX;
else
return -1;
}
p++;
}
*pp = p;
@@ -557,7 +569,8 @@ int lre_parse_escape(const uint8_t **pp, int allow_utf16)
}
}
break;
case '0' ... '7':
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
c -= '0';
if (allow_utf16 == 2) {
/* only accept \0 not followed by digit */
@@ -612,7 +625,7 @@ static int parse_unicode_property(REParseState *s, CharRange *cr,
p++;
q = name;
while (is_unicode_char(*p)) {
if ((q - name) > sizeof(name) - 1)
if ((q - name) >= sizeof(name) - 1)
goto unknown_property_name;
*q++ = *p++;
}
@@ -621,7 +634,7 @@ static int parse_unicode_property(REParseState *s, CharRange *cr,
if (*p == '=') {
p++;
while (is_unicode_char(*p)) {
if ((q - value) > sizeof(value) - 1)
if ((q - value) >= sizeof(value) - 1)
return re_parse_error(s, "unknown unicode property value");
*q++ = *p++;
}
@@ -638,7 +651,7 @@ static int parse_unicode_property(REParseState *s, CharRange *cr,
} else if (!strcmp(name, "Script_Extensions") || !strcmp(name, "scx")) {
script_ext = TRUE;
do_script:
cr_init(cr, s->mem_opaque, lre_realloc);
cr_init(cr, s->opaque, lre_realloc);
ret = unicode_script(cr, value, script_ext);
if (ret) {
cr_free(cr);
@@ -648,7 +661,7 @@ static int parse_unicode_property(REParseState *s, CharRange *cr,
goto out_of_memory;
}
} else if (!strcmp(name, "General_Category") || !strcmp(name, "gc")) {
cr_init(cr, s->mem_opaque, lre_realloc);
cr_init(cr, s->opaque, lre_realloc);
ret = unicode_general_category(cr, value);
if (ret) {
cr_free(cr);
@@ -658,7 +671,7 @@ static int parse_unicode_property(REParseState *s, CharRange *cr,
goto out_of_memory;
}
} else if (value[0] == '\0') {
cr_init(cr, s->mem_opaque, lre_realloc);
cr_init(cr, s->opaque, lre_realloc);
ret = unicode_general_category(cr, name);
if (ret == -1) {
cr_free(cr);
@@ -688,7 +701,7 @@ static int parse_unicode_property(REParseState *s, CharRange *cr,
*pp = p;
return 0;
out_of_memory:
return re_parse_error(s, "out of memory");
return re_parse_out_of_memory(s);
}
#endif /* CONFIG_ALL_UNICODE */
@@ -851,7 +864,7 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
CharRange cr1_s, *cr1 = &cr1_s;
BOOL invert;
cr_init(cr, s->mem_opaque, lre_realloc);
cr_init(cr, s->opaque, lre_realloc);
p = *pp;
p++; /* skip '[' */
invert = FALSE;
@@ -923,7 +936,7 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
*pp = p;
return 0;
memory_error:
re_parse_error(s, "out of memory");
re_parse_out_of_memory(s);
fail:
cr_free(cr);
return -1;
@@ -1134,9 +1147,13 @@ static int re_parse_captures(REParseState *s, int *phas_named_captures,
}
}
capture_index++;
if (capture_index >= CAPTURE_COUNT_MAX)
goto done;
}
} else {
capture_index++;
if (capture_index >= CAPTURE_COUNT_MAX)
goto done;
}
break;
case '\\':
@@ -1150,6 +1167,7 @@ static int re_parse_captures(REParseState *s, int *phas_named_captures,
break;
}
}
done:
if (capture_name)
return -1;
else
@@ -1225,14 +1243,27 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
re_emit_op(s, REOP_prev);
break;
case '{':
/* As an extension (see ES6 annex B), we accept '{' not
followed by digits as a normal atom */
if (!is_digit(p[1])) {
if (s->is_utf16)
goto invalid_quant_count;
if (s->is_utf16) {
return re_parse_error(s, "syntax error");
} else if (!is_digit(p[1])) {
/* Annex B: we accept '{' not followed by digits as a
normal atom */
goto parse_class_atom;
} else {
const uint8_t *p1 = p + 1;
/* Annex B: error if it is like a repetition count */
parse_digits(&p1, TRUE);
if (*p1 == ',') {
p1++;
if (is_digit(*p1)) {
parse_digits(&p1, TRUE);
}
}
if (*p1 != '}') {
goto parse_class_atom;
}
}
/* fall tru */
/* fall thru */
case '*':
case '+':
case '?':
@@ -1277,6 +1308,8 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
return -1;
re_emit_op(s, REOP_match);
/* jump after the 'match' after the lookahead is successful */
if (dbuf_error(&s->byte_code))
return -1;
put_u32(s->byte_code.buf + pos, s->byte_code.size - (pos + 4));
} else if (p[2] == '<') {
p += 3;
@@ -1383,11 +1416,13 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
}
}
goto normal_char;
case '1' ... '9':
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8':
case '9':
{
const uint8_t *q = ++p;
c = parse_digits(&p);
c = parse_digits(&p, FALSE);
if (c < 0 || (c >= s->capture_count && c >= re_count_captures(s))) {
if (!s->is_utf16) {
/* Annex B.1.4: accept legacy octal */
@@ -1407,7 +1442,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
}
goto normal_char;
}
return re_parse_error(s, "back reference out of range in reguar expression");
return re_parse_error(s, "back reference out of range in regular expression");
}
emit_back_reference:
last_atom_start = s->byte_code.size;
@@ -1484,32 +1519,38 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
quant_max = 1;
goto quantifier;
case '{':
/* As an extension (see ES6 annex B), we accept '{' not
followed by digits as a normal atom */
if (!is_digit(p[1])) {
if (s->is_utf16)
goto invalid_quant_count;
break;
}
p++;
quant_min = parse_digits(&p);
if (quant_min < 0) {
invalid_quant_count:
return re_parse_error(s, "invalid repetition count");
}
quant_max = quant_min;
if (*p == ',') {
p++;
if (is_digit(*p)) {
quant_max = parse_digits(&p);
if (quant_max < 0 || quant_max < quant_min)
{
const uint8_t *p1 = p;
/* As an extension (see ES6 annex B), we accept '{' not
followed by digits as a normal atom */
if (!is_digit(p[1])) {
if (s->is_utf16)
goto invalid_quant_count;
} else {
quant_max = INT32_MAX; /* infinity */
break;
}
p++;
quant_min = parse_digits(&p, TRUE);
quant_max = quant_min;
if (*p == ',') {
p++;
if (is_digit(*p)) {
quant_max = parse_digits(&p, TRUE);
if (quant_max < quant_min) {
invalid_quant_count:
return re_parse_error(s, "invalid repetition count");
}
} else {
quant_max = INT32_MAX; /* infinity */
}
}
if (*p != '}' && !s->is_utf16) {
/* Annex B: normal atom if invalid '{' syntax */
p = p1;
break;
}
if (re_parse_expect(s, &p, '}'))
return -1;
}
if (re_parse_expect(s, &p, '}'))
return -1;
quantifier:
greedy = TRUE;
if (*p == '?') {
@@ -1524,12 +1565,15 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
if (quant_max > 0) {
/* specific optimization for simple quantifiers */
if (dbuf_error(&s->byte_code))
goto out_of_memory;
len = re_is_simple_quantifier(s->byte_code.buf + last_atom_start,
s->byte_code.size - last_atom_start);
if (len > 0) {
re_emit_op(s, REOP_match);
dbuf_insert(&s->byte_code, last_atom_start, 17);
if (dbuf_insert(&s->byte_code, last_atom_start, 17))
goto out_of_memory;
pos = last_atom_start;
s->byte_code.buf[pos++] = REOP_simple_greedy_quant;
put_u32(&s->byte_code.buf[pos],
@@ -1545,6 +1589,8 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
}
}
if (dbuf_error(&s->byte_code))
goto out_of_memory;
add_zero_advance_check = (re_check_advance(s->byte_code.buf + last_atom_start,
s->byte_code.size - last_atom_start) == 0);
} else {
@@ -1558,7 +1604,8 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
/* need to reset the capture in case the atom is
not executed */
if (last_capture_count != s->capture_count) {
dbuf_insert(&s->byte_code, last_atom_start, 3);
if (dbuf_insert(&s->byte_code, last_atom_start, 3))
goto out_of_memory;
s->byte_code.buf[last_atom_start++] = REOP_save_reset;
s->byte_code.buf[last_atom_start++] = last_capture_count;
s->byte_code.buf[last_atom_start++] = s->capture_count - 1;
@@ -1566,12 +1613,14 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
if (quant_max == 0) {
s->byte_code.size = last_atom_start;
} else if (quant_max == 1) {
dbuf_insert(&s->byte_code, last_atom_start, 5);
if (dbuf_insert(&s->byte_code, last_atom_start, 5))
goto out_of_memory;
s->byte_code.buf[last_atom_start] = REOP_split_goto_first +
greedy;
put_u32(s->byte_code.buf + last_atom_start + 1, len);
} else if (quant_max == INT32_MAX) {
dbuf_insert(&s->byte_code, last_atom_start, 5 + add_zero_advance_check);
if (dbuf_insert(&s->byte_code, last_atom_start, 5 + add_zero_advance_check))
goto out_of_memory;
s->byte_code.buf[last_atom_start] = REOP_split_goto_first +
greedy;
put_u32(s->byte_code.buf + last_atom_start + 1,
@@ -1587,7 +1636,8 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
re_emit_goto(s, REOP_goto, last_atom_start);
}
} else {
dbuf_insert(&s->byte_code, last_atom_start, 10);
if (dbuf_insert(&s->byte_code, last_atom_start, 10))
goto out_of_memory;
pos = last_atom_start;
s->byte_code.buf[pos++] = REOP_push_i32;
put_u32(s->byte_code.buf + pos, quant_max);
@@ -1605,7 +1655,8 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
if (quant_min == 1) {
/* nothing to add */
} else {
dbuf_insert(&s->byte_code, last_atom_start, 5);
if (dbuf_insert(&s->byte_code, last_atom_start, 5))
goto out_of_memory;
s->byte_code.buf[last_atom_start] = REOP_push_i32;
put_u32(s->byte_code.buf + last_atom_start + 1,
quant_min);
@@ -1646,6 +1697,8 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
done:
s->buf_ptr = p;
return 0;
out_of_memory:
return re_parse_out_of_memory(s);
}
static int re_parse_alternative(REParseState *s, BOOL is_backward_dir)
@@ -1686,6 +1739,9 @@ static int re_parse_disjunction(REParseState *s, BOOL is_backward_dir)
{
int start, len, pos;
if (lre_check_stack_overflow(s->opaque, 0))
return re_parse_error(s, "stack overflow");
start = s->byte_code.size;
if (re_parse_alternative(s, is_backward_dir))
return -1;
@@ -1695,7 +1751,9 @@ static int re_parse_disjunction(REParseState *s, BOOL is_backward_dir)
len = s->byte_code.size - start;
/* insert a split before the first alternative */
dbuf_insert(&s->byte_code, start, 5);
if (dbuf_insert(&s->byte_code, start, 5)) {
return re_parse_out_of_memory(s);
}
s->byte_code.buf[start] = REOP_split_next_first;
put_u32(s->byte_code.buf + start + 1, len + 5);
@@ -1769,7 +1827,7 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
BOOL is_sticky;
memset(s, 0, sizeof(*s));
s->mem_opaque = opaque;
s->opaque = opaque;
s->buf_ptr = (const uint8_t *)buf;
s->buf_end = s->buf_ptr + buf_len;
s->buf_start = s->buf_ptr;
@@ -1820,7 +1878,7 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
}
if (dbuf_error(&s->byte_code)) {
re_parse_error(s, "out of memory");
re_parse_out_of_memory(s);
goto error;
}
@@ -2486,6 +2544,17 @@ int lre_get_flags(const uint8_t *bc_buf)
return bc_buf[RE_HEADER_FLAGS];
}
/* Return NULL if no group names. Otherwise, return a pointer to
'capture_count - 1' zero terminated UTF-8 strings. */
const char *lre_get_groupnames(const uint8_t *bc_buf)
{
uint32_t re_bytecode_len;
if ((lre_get_flags(bc_buf) & LRE_FLAG_NAMED_GROUPS) == 0)
return NULL;
re_bytecode_len = get_u32(bc_buf + 3);
return (const char *)(bc_buf + 7 + re_bytecode_len);
}
#ifdef TEST
BOOL lre_check_stack_overflow(void *opaque, size_t alloca_size)