2023-11-07 17:30:39 +00:00
|
|
|
|
/* Copyright libuv contributors. All rights reserved.
|
2021-01-02 18:10:00 +00:00
|
|
|
|
*
|
|
|
|
|
* Permission to use, copy, modify, and/or distribute this software for any
|
|
|
|
|
* purpose with or without fee is hereby granted, provided that the above
|
|
|
|
|
* copyright notice and this permission notice appear in all copies.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
|
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
|
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
|
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
|
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
|
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
|
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/* Derived from https://github.com/bnoordhuis/punycode
|
|
|
|
|
* but updated to support IDNA 2008.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "uv.h"
|
2023-11-07 17:30:39 +00:00
|
|
|
|
#include "uv-common.h"
|
2021-01-02 18:10:00 +00:00
|
|
|
|
#include "idna.h"
|
2021-07-27 22:08:18 +00:00
|
|
|
|
#include <assert.h>
|
2021-01-02 18:10:00 +00:00
|
|
|
|
#include <string.h>
|
2022-03-07 21:34:07 +00:00
|
|
|
|
#include <limits.h> /* UINT_MAX */
|
2021-01-02 18:10:00 +00:00
|
|
|
|
|
2023-11-07 17:30:39 +00:00
|
|
|
|
|
|
|
|
|
static int32_t uv__wtf8_decode1(const char** input) {
|
|
|
|
|
uint32_t code_point;
|
|
|
|
|
uint8_t b1;
|
|
|
|
|
uint8_t b2;
|
|
|
|
|
uint8_t b3;
|
|
|
|
|
uint8_t b4;
|
|
|
|
|
|
|
|
|
|
b1 = **input;
|
|
|
|
|
if (b1 <= 0x7F)
|
|
|
|
|
return b1; /* ASCII code point */
|
|
|
|
|
if (b1 < 0xC2)
|
|
|
|
|
return -1; /* invalid: continuation byte */
|
|
|
|
|
code_point = b1;
|
|
|
|
|
|
|
|
|
|
b2 = *++*input;
|
|
|
|
|
if ((b2 & 0xC0) != 0x80)
|
|
|
|
|
return -1; /* invalid: not a continuation byte */
|
|
|
|
|
code_point = (code_point << 6) | (b2 & 0x3F);
|
|
|
|
|
if (b1 <= 0xDF)
|
|
|
|
|
return 0x7FF & code_point; /* two-byte character */
|
|
|
|
|
|
|
|
|
|
b3 = *++*input;
|
|
|
|
|
if ((b3 & 0xC0) != 0x80)
|
|
|
|
|
return -1; /* invalid: not a continuation byte */
|
|
|
|
|
code_point = (code_point << 6) | (b3 & 0x3F);
|
|
|
|
|
if (b1 <= 0xEF)
|
|
|
|
|
return 0xFFFF & code_point; /* three-byte character */
|
|
|
|
|
|
|
|
|
|
b4 = *++*input;
|
|
|
|
|
if ((b4 & 0xC0) != 0x80)
|
|
|
|
|
return -1; /* invalid: not a continuation byte */
|
|
|
|
|
code_point = (code_point << 6) | (b4 & 0x3F);
|
|
|
|
|
if (b1 <= 0xF4) {
|
|
|
|
|
code_point &= 0x1FFFFF;
|
|
|
|
|
if (code_point <= 0x10FFFF)
|
|
|
|
|
return code_point; /* four-byte character */
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* code point too large */
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2021-01-02 18:10:00 +00:00
|
|
|
|
static unsigned uv__utf8_decode1_slow(const char** p,
|
|
|
|
|
const char* pe,
|
|
|
|
|
unsigned a) {
|
|
|
|
|
unsigned b;
|
|
|
|
|
unsigned c;
|
|
|
|
|
unsigned d;
|
|
|
|
|
unsigned min;
|
|
|
|
|
|
|
|
|
|
if (a > 0xF7)
|
|
|
|
|
return -1;
|
|
|
|
|
|
2021-07-27 22:08:18 +00:00
|
|
|
|
switch (pe - *p) {
|
2021-01-02 18:10:00 +00:00
|
|
|
|
default:
|
|
|
|
|
if (a > 0xEF) {
|
|
|
|
|
min = 0x10000;
|
|
|
|
|
a = a & 7;
|
|
|
|
|
b = (unsigned char) *(*p)++;
|
|
|
|
|
c = (unsigned char) *(*p)++;
|
|
|
|
|
d = (unsigned char) *(*p)++;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
/* Fall through. */
|
|
|
|
|
case 2:
|
|
|
|
|
if (a > 0xDF) {
|
|
|
|
|
min = 0x800;
|
|
|
|
|
b = 0x80 | (a & 15);
|
|
|
|
|
c = (unsigned char) *(*p)++;
|
|
|
|
|
d = (unsigned char) *(*p)++;
|
|
|
|
|
a = 0;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
/* Fall through. */
|
|
|
|
|
case 1:
|
|
|
|
|
if (a > 0xBF) {
|
|
|
|
|
min = 0x80;
|
|
|
|
|
b = 0x80;
|
|
|
|
|
c = 0x80 | (a & 31);
|
|
|
|
|
d = (unsigned char) *(*p)++;
|
|
|
|
|
a = 0;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2021-07-27 22:08:18 +00:00
|
|
|
|
/* Fall through. */
|
|
|
|
|
case 0:
|
2021-01-02 18:10:00 +00:00
|
|
|
|
return -1; /* Invalid continuation byte. */
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (0x80 != (0xC0 & (b ^ c ^ d)))
|
|
|
|
|
return -1; /* Invalid sequence. */
|
|
|
|
|
|
|
|
|
|
b &= 63;
|
|
|
|
|
c &= 63;
|
|
|
|
|
d &= 63;
|
|
|
|
|
a = (a << 18) | (b << 12) | (c << 6) | d;
|
|
|
|
|
|
|
|
|
|
if (a < min)
|
|
|
|
|
return -1; /* Overlong sequence. */
|
|
|
|
|
|
|
|
|
|
if (a > 0x10FFFF)
|
|
|
|
|
return -1; /* Four-byte sequence > U+10FFFF. */
|
|
|
|
|
|
|
|
|
|
if (a >= 0xD800 && a <= 0xDFFF)
|
|
|
|
|
return -1; /* Surrogate pair. */
|
|
|
|
|
|
|
|
|
|
return a;
|
|
|
|
|
}
|
|
|
|
|
|
2023-11-07 17:30:39 +00:00
|
|
|
|
|
2021-01-02 18:10:00 +00:00
|
|
|
|
unsigned uv__utf8_decode1(const char** p, const char* pe) {
|
|
|
|
|
unsigned a;
|
|
|
|
|
|
2021-07-27 22:08:18 +00:00
|
|
|
|
assert(*p < pe);
|
|
|
|
|
|
2021-01-02 18:10:00 +00:00
|
|
|
|
a = (unsigned char) *(*p)++;
|
|
|
|
|
|
|
|
|
|
if (a < 128)
|
|
|
|
|
return a; /* ASCII, common case. */
|
|
|
|
|
|
|
|
|
|
return uv__utf8_decode1_slow(p, pe, a);
|
|
|
|
|
}
|
|
|
|
|
|
2023-11-07 17:30:39 +00:00
|
|
|
|
|
2021-01-02 18:10:00 +00:00
|
|
|
|
static int uv__idna_toascii_label(const char* s, const char* se,
|
|
|
|
|
char** d, char* de) {
|
|
|
|
|
static const char alphabet[] = "abcdefghijklmnopqrstuvwxyz0123456789";
|
|
|
|
|
const char* ss;
|
|
|
|
|
unsigned c;
|
|
|
|
|
unsigned h;
|
|
|
|
|
unsigned k;
|
|
|
|
|
unsigned n;
|
|
|
|
|
unsigned m;
|
|
|
|
|
unsigned q;
|
|
|
|
|
unsigned t;
|
|
|
|
|
unsigned x;
|
|
|
|
|
unsigned y;
|
|
|
|
|
unsigned bias;
|
|
|
|
|
unsigned delta;
|
|
|
|
|
unsigned todo;
|
|
|
|
|
int first;
|
|
|
|
|
|
|
|
|
|
h = 0;
|
|
|
|
|
ss = s;
|
|
|
|
|
todo = 0;
|
|
|
|
|
|
2021-07-27 22:08:18 +00:00
|
|
|
|
/* Note: after this loop we've visited all UTF-8 characters and know
|
|
|
|
|
* they're legal so we no longer need to check for decode errors.
|
|
|
|
|
*/
|
|
|
|
|
while (s < se) {
|
|
|
|
|
c = uv__utf8_decode1(&s, se);
|
|
|
|
|
|
2022-03-07 21:34:07 +00:00
|
|
|
|
if (c == UINT_MAX)
|
2021-07-27 22:08:18 +00:00
|
|
|
|
return UV_EINVAL;
|
|
|
|
|
|
2021-01-02 18:10:00 +00:00
|
|
|
|
if (c < 128)
|
|
|
|
|
h++;
|
|
|
|
|
else
|
|
|
|
|
todo++;
|
|
|
|
|
}
|
|
|
|
|
|
2021-07-27 22:08:18 +00:00
|
|
|
|
/* Only write "xn--" when there are non-ASCII characters. */
|
2021-01-02 18:10:00 +00:00
|
|
|
|
if (todo > 0) {
|
|
|
|
|
if (*d < de) *(*d)++ = 'x';
|
|
|
|
|
if (*d < de) *(*d)++ = 'n';
|
|
|
|
|
if (*d < de) *(*d)++ = '-';
|
|
|
|
|
if (*d < de) *(*d)++ = '-';
|
|
|
|
|
}
|
|
|
|
|
|
2021-07-27 22:08:18 +00:00
|
|
|
|
/* Write ASCII characters. */
|
2021-01-02 18:10:00 +00:00
|
|
|
|
x = 0;
|
|
|
|
|
s = ss;
|
2021-07-27 22:08:18 +00:00
|
|
|
|
while (s < se) {
|
|
|
|
|
c = uv__utf8_decode1(&s, se);
|
2022-03-07 21:34:07 +00:00
|
|
|
|
assert(c != UINT_MAX);
|
2021-07-27 22:08:18 +00:00
|
|
|
|
|
2021-01-02 18:10:00 +00:00
|
|
|
|
if (c > 127)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (*d < de)
|
|
|
|
|
*(*d)++ = c;
|
|
|
|
|
|
|
|
|
|
if (++x == h)
|
|
|
|
|
break; /* Visited all ASCII characters. */
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (todo == 0)
|
|
|
|
|
return h;
|
|
|
|
|
|
|
|
|
|
/* Only write separator when we've written ASCII characters first. */
|
|
|
|
|
if (h > 0)
|
|
|
|
|
if (*d < de)
|
|
|
|
|
*(*d)++ = '-';
|
|
|
|
|
|
|
|
|
|
n = 128;
|
|
|
|
|
bias = 72;
|
|
|
|
|
delta = 0;
|
|
|
|
|
first = 1;
|
|
|
|
|
|
|
|
|
|
while (todo > 0) {
|
|
|
|
|
m = -1;
|
|
|
|
|
s = ss;
|
2021-07-27 22:08:18 +00:00
|
|
|
|
|
|
|
|
|
while (s < se) {
|
|
|
|
|
c = uv__utf8_decode1(&s, se);
|
2022-03-07 21:34:07 +00:00
|
|
|
|
assert(c != UINT_MAX);
|
2021-07-27 22:08:18 +00:00
|
|
|
|
|
2021-01-02 18:10:00 +00:00
|
|
|
|
if (c >= n)
|
|
|
|
|
if (c < m)
|
|
|
|
|
m = c;
|
2021-07-27 22:08:18 +00:00
|
|
|
|
}
|
2021-01-02 18:10:00 +00:00
|
|
|
|
|
|
|
|
|
x = m - n;
|
|
|
|
|
y = h + 1;
|
|
|
|
|
|
|
|
|
|
if (x > ~delta / y)
|
|
|
|
|
return UV_E2BIG; /* Overflow. */
|
|
|
|
|
|
|
|
|
|
delta += x * y;
|
|
|
|
|
n = m;
|
|
|
|
|
|
|
|
|
|
s = ss;
|
2021-07-27 22:08:18 +00:00
|
|
|
|
while (s < se) {
|
|
|
|
|
c = uv__utf8_decode1(&s, se);
|
2022-03-07 21:34:07 +00:00
|
|
|
|
assert(c != UINT_MAX);
|
2021-07-27 22:08:18 +00:00
|
|
|
|
|
2021-01-02 18:10:00 +00:00
|
|
|
|
if (c < n)
|
|
|
|
|
if (++delta == 0)
|
|
|
|
|
return UV_E2BIG; /* Overflow. */
|
|
|
|
|
|
|
|
|
|
if (c != n)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
for (k = 36, q = delta; /* empty */; k += 36) {
|
|
|
|
|
t = 1;
|
|
|
|
|
|
|
|
|
|
if (k > bias)
|
|
|
|
|
t = k - bias;
|
|
|
|
|
|
|
|
|
|
if (t > 26)
|
|
|
|
|
t = 26;
|
|
|
|
|
|
|
|
|
|
if (q < t)
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
/* TODO(bnoordhuis) Since 1 <= t <= 26 and therefore
|
|
|
|
|
* 10 <= y <= 35, we can optimize the long division
|
|
|
|
|
* into a table-based reciprocal multiplication.
|
|
|
|
|
*/
|
|
|
|
|
x = q - t;
|
|
|
|
|
y = 36 - t; /* 10 <= y <= 35 since 1 <= t <= 26. */
|
|
|
|
|
q = x / y;
|
|
|
|
|
t = t + x % y; /* 1 <= t <= 35 because of y. */
|
|
|
|
|
|
|
|
|
|
if (*d < de)
|
|
|
|
|
*(*d)++ = alphabet[t];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (*d < de)
|
|
|
|
|
*(*d)++ = alphabet[q];
|
|
|
|
|
|
|
|
|
|
delta /= 2;
|
|
|
|
|
|
|
|
|
|
if (first) {
|
|
|
|
|
delta /= 350;
|
|
|
|
|
first = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* No overflow check is needed because |delta| was just
|
|
|
|
|
* divided by 2 and |delta+delta >= delta + delta/h|.
|
|
|
|
|
*/
|
|
|
|
|
h++;
|
|
|
|
|
delta += delta / h;
|
|
|
|
|
|
|
|
|
|
for (bias = 0; delta > 35 * 26 / 2; bias += 36)
|
|
|
|
|
delta /= 35;
|
|
|
|
|
|
|
|
|
|
bias += 36 * delta / (delta + 38);
|
|
|
|
|
delta = 0;
|
|
|
|
|
todo--;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
delta++;
|
|
|
|
|
n++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2023-11-07 17:30:39 +00:00
|
|
|
|
|
|
|
|
|
ssize_t uv__idna_toascii(const char* s, const char* se, char* d, char* de) {
|
2021-01-02 18:10:00 +00:00
|
|
|
|
const char* si;
|
|
|
|
|
const char* st;
|
|
|
|
|
unsigned c;
|
|
|
|
|
char* ds;
|
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
|
|
ds = d;
|
|
|
|
|
|
2021-07-27 22:08:18 +00:00
|
|
|
|
si = s;
|
|
|
|
|
while (si < se) {
|
2021-01-02 18:10:00 +00:00
|
|
|
|
st = si;
|
|
|
|
|
c = uv__utf8_decode1(&si, se);
|
|
|
|
|
|
2022-03-07 21:34:07 +00:00
|
|
|
|
if (c == UINT_MAX)
|
2021-07-27 22:08:18 +00:00
|
|
|
|
return UV_EINVAL;
|
|
|
|
|
|
2021-01-02 18:10:00 +00:00
|
|
|
|
if (c != '.')
|
|
|
|
|
if (c != 0x3002) /* 。 */
|
|
|
|
|
if (c != 0xFF0E) /* . */
|
|
|
|
|
if (c != 0xFF61) /* 。 */
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
rc = uv__idna_toascii_label(s, st, &d, de);
|
|
|
|
|
|
|
|
|
|
if (rc < 0)
|
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
|
|
if (d < de)
|
|
|
|
|
*d++ = '.';
|
|
|
|
|
|
|
|
|
|
s = si;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (s < se) {
|
|
|
|
|
rc = uv__idna_toascii_label(s, se, &d, de);
|
|
|
|
|
|
|
|
|
|
if (rc < 0)
|
|
|
|
|
return rc;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (d < de)
|
|
|
|
|
*d++ = '\0';
|
|
|
|
|
|
|
|
|
|
return d - ds; /* Number of bytes written. */
|
|
|
|
|
}
|
2023-11-07 17:30:39 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ssize_t uv_wtf8_length_as_utf16(const char* source_ptr) {
|
|
|
|
|
size_t w_target_len = 0;
|
|
|
|
|
int32_t code_point;
|
|
|
|
|
|
|
|
|
|
do {
|
|
|
|
|
code_point = uv__wtf8_decode1(&source_ptr);
|
|
|
|
|
if (code_point < 0)
|
|
|
|
|
return -1;
|
|
|
|
|
if (code_point > 0xFFFF)
|
|
|
|
|
w_target_len++;
|
|
|
|
|
w_target_len++;
|
|
|
|
|
} while (*source_ptr++);
|
|
|
|
|
|
|
|
|
|
return w_target_len;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void uv_wtf8_to_utf16(const char* source_ptr,
|
|
|
|
|
uint16_t* w_target,
|
|
|
|
|
size_t w_target_len) {
|
|
|
|
|
int32_t code_point;
|
|
|
|
|
|
|
|
|
|
do {
|
|
|
|
|
code_point = uv__wtf8_decode1(&source_ptr);
|
|
|
|
|
/* uv_wtf8_length_as_utf16 should have been called and checked first. */
|
|
|
|
|
assert(code_point >= 0);
|
|
|
|
|
if (code_point > 0x10000) {
|
|
|
|
|
assert(code_point < 0x10FFFF);
|
|
|
|
|
*w_target++ = (((code_point - 0x10000) >> 10) + 0xD800);
|
|
|
|
|
*w_target++ = ((code_point - 0x10000) & 0x3FF) + 0xDC00;
|
|
|
|
|
w_target_len -= 2;
|
|
|
|
|
} else {
|
|
|
|
|
*w_target++ = code_point;
|
|
|
|
|
w_target_len -= 1;
|
|
|
|
|
}
|
|
|
|
|
} while (*source_ptr++);
|
|
|
|
|
|
|
|
|
|
assert(w_target_len == 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static int32_t uv__get_surrogate_value(const uint16_t* w_source_ptr,
|
|
|
|
|
ssize_t w_source_len) {
|
|
|
|
|
uint16_t u;
|
|
|
|
|
uint16_t next;
|
|
|
|
|
|
|
|
|
|
u = w_source_ptr[0];
|
|
|
|
|
if (u >= 0xD800 && u <= 0xDBFF && w_source_len != 1) {
|
|
|
|
|
next = w_source_ptr[1];
|
|
|
|
|
if (next >= 0xDC00 && next <= 0xDFFF)
|
|
|
|
|
return 0x10000 + ((u - 0xD800) << 10) + (next - 0xDC00);
|
|
|
|
|
}
|
|
|
|
|
return u;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
size_t uv_utf16_length_as_wtf8(const uint16_t* w_source_ptr,
|
|
|
|
|
ssize_t w_source_len) {
|
|
|
|
|
size_t target_len;
|
|
|
|
|
int32_t code_point;
|
|
|
|
|
|
|
|
|
|
target_len = 0;
|
|
|
|
|
while (w_source_len) {
|
|
|
|
|
code_point = uv__get_surrogate_value(w_source_ptr, w_source_len);
|
|
|
|
|
/* Can be invalid UTF-8 but must be valid WTF-8. */
|
|
|
|
|
assert(code_point >= 0);
|
|
|
|
|
if (w_source_len < 0 && code_point == 0)
|
|
|
|
|
break;
|
|
|
|
|
if (code_point < 0x80)
|
|
|
|
|
target_len += 1;
|
|
|
|
|
else if (code_point < 0x800)
|
|
|
|
|
target_len += 2;
|
|
|
|
|
else if (code_point < 0x10000)
|
|
|
|
|
target_len += 3;
|
|
|
|
|
else {
|
|
|
|
|
target_len += 4;
|
|
|
|
|
w_source_ptr++;
|
|
|
|
|
if (w_source_len > 0)
|
|
|
|
|
w_source_len--;
|
|
|
|
|
}
|
|
|
|
|
w_source_ptr++;
|
|
|
|
|
if (w_source_len > 0)
|
|
|
|
|
w_source_len--;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return target_len;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int uv_utf16_to_wtf8(const uint16_t* w_source_ptr,
|
|
|
|
|
ssize_t w_source_len,
|
|
|
|
|
char** target_ptr,
|
|
|
|
|
size_t* target_len_ptr) {
|
|
|
|
|
size_t target_len;
|
|
|
|
|
char* target;
|
|
|
|
|
char* target_end;
|
|
|
|
|
int32_t code_point;
|
|
|
|
|
|
|
|
|
|
/* If *target_ptr is provided, then *target_len_ptr must be its length
|
|
|
|
|
* (excluding space for NUL), otherwise we will compute the target_len_ptr
|
|
|
|
|
* length and may return a new allocation in *target_ptr if target_ptr is
|
|
|
|
|
* provided. */
|
|
|
|
|
if (target_ptr == NULL || *target_ptr == NULL) {
|
|
|
|
|
target_len = uv_utf16_length_as_wtf8(w_source_ptr, w_source_len);
|
|
|
|
|
if (target_len_ptr != NULL)
|
|
|
|
|
*target_len_ptr = target_len;
|
|
|
|
|
} else {
|
|
|
|
|
target_len = *target_len_ptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (target_ptr == NULL)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
if (*target_ptr == NULL) {
|
|
|
|
|
target = uv__malloc(target_len + 1);
|
|
|
|
|
if (target == NULL) {
|
|
|
|
|
return UV_ENOMEM;
|
|
|
|
|
}
|
|
|
|
|
*target_ptr = target;
|
|
|
|
|
} else {
|
|
|
|
|
target = *target_ptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
target_end = target + target_len;
|
|
|
|
|
|
|
|
|
|
while (target != target_end && w_source_len) {
|
|
|
|
|
code_point = uv__get_surrogate_value(w_source_ptr, w_source_len);
|
|
|
|
|
/* Can be invalid UTF-8 but must be valid WTF-8. */
|
|
|
|
|
assert(code_point >= 0);
|
|
|
|
|
if (w_source_len < 0 && code_point == 0) {
|
|
|
|
|
w_source_len = 0;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (code_point < 0x80) {
|
|
|
|
|
*target++ = code_point;
|
|
|
|
|
} else if (code_point < 0x800) {
|
|
|
|
|
*target++ = 0xC0 | (code_point >> 6);
|
|
|
|
|
if (target == target_end)
|
|
|
|
|
break;
|
|
|
|
|
*target++ = 0x80 | (code_point & 0x3F);
|
|
|
|
|
} else if (code_point < 0x10000) {
|
|
|
|
|
*target++ = 0xE0 | (code_point >> 12);
|
|
|
|
|
if (target == target_end)
|
|
|
|
|
break;
|
|
|
|
|
*target++ = 0x80 | ((code_point >> 6) & 0x3F);
|
|
|
|
|
if (target == target_end)
|
|
|
|
|
break;
|
|
|
|
|
*target++ = 0x80 | (code_point & 0x3F);
|
|
|
|
|
} else {
|
|
|
|
|
*target++ = 0xF0 | (code_point >> 18);
|
|
|
|
|
if (target == target_end)
|
|
|
|
|
break;
|
|
|
|
|
*target++ = 0x80 | ((code_point >> 12) & 0x3F);
|
|
|
|
|
if (target == target_end)
|
|
|
|
|
break;
|
|
|
|
|
*target++ = 0x80 | ((code_point >> 6) & 0x3F);
|
|
|
|
|
if (target == target_end)
|
|
|
|
|
break;
|
|
|
|
|
*target++ = 0x80 | (code_point & 0x3F);
|
|
|
|
|
/* uv__get_surrogate_value consumed 2 input characters */
|
|
|
|
|
w_source_ptr++;
|
|
|
|
|
if (w_source_len > 0)
|
|
|
|
|
w_source_len--;
|
|
|
|
|
}
|
|
|
|
|
target_len = target - *target_ptr;
|
|
|
|
|
w_source_ptr++;
|
|
|
|
|
if (w_source_len > 0)
|
|
|
|
|
w_source_len--;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (target != target_end && target_len_ptr != NULL)
|
|
|
|
|
/* Did not fill all of the provided buffer, so update the target_len_ptr
|
|
|
|
|
* output with the space used. */
|
|
|
|
|
*target_len_ptr = target - *target_ptr;
|
|
|
|
|
|
|
|
|
|
/* Check if input fit into target exactly. */
|
|
|
|
|
if (w_source_len < 0 && target == target_end && w_source_ptr[0] == 0)
|
|
|
|
|
w_source_len = 0;
|
|
|
|
|
|
|
|
|
|
*target++ = '\0';
|
|
|
|
|
|
|
|
|
|
/* Characters remained after filling the buffer, compute the remaining length now. */
|
|
|
|
|
if (w_source_len) {
|
|
|
|
|
if (target_len_ptr != NULL)
|
|
|
|
|
*target_len_ptr = target_len + uv_utf16_length_as_wtf8(w_source_ptr, w_source_len);
|
|
|
|
|
return UV_ENOBUFS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|