14using UChar = char16_t;
15using UChar32 = uint32_t;
17bool isASCII(
UChar c) {
return !(c & ~0x7F); }
19const UChar replacementCharacter = 0xFFFD;
21inline int inlineUTF8SequenceLengthNonASCII(
char b0) {
22 if ((b0 & 0xC0) != 0xC0)
return 0;
23 if ((b0 & 0xE0) == 0xC0)
return 2;
24 if ((b0 & 0xF0) == 0xE0)
return 3;
25 if ((b0 & 0xF8) == 0xF0)
return 4;
29inline int inlineUTF8SequenceLength(
char b0) {
30 return isASCII(b0) ? 1 : inlineUTF8SequenceLengthNonASCII(b0);
38static const unsigned char firstByteMark[7] = {0x00, 0x00, 0xC0, 0xE0,
41enum ConversionResult {
48ConversionResult convertUTF16ToUTF8(
const UChar** sourceStart,
49 const UChar* sourceEnd,
char** targetStart,
50 char* targetEnd,
bool strict) {
51 ConversionResult
result = conversionOK;
52 const UChar* source = *sourceStart;
53 char* target = *targetStart;
54 while (source < sourceEnd) {
56 uint32_t bytesToWrite = 0;
57 const UChar32 byteMask = 0xBF;
58 const UChar32 byteMark = 0x80;
59 const UChar* oldSource =
61 ch =
static_cast<uint16_t>(*source++);
63 if (ch >= 0xD800 && ch <= 0xDBFF) {
65 if (source < sourceEnd) {
66 UChar32 ch2 =
static_cast<uint16_t>(*source);
68 if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
69 ch = ((ch - 0xD800) << 10) + (ch2 - 0xDC00) + 0x0010000;
83 if (ch >= 0xDC00 && ch <= 0xDFFF) {
90 if (ch <
static_cast<UChar32
>(0x80)) {
92 }
else if (ch <
static_cast<UChar32
>(0x800)) {
94 }
else if (ch <
static_cast<UChar32
>(0x10000)) {
96 }
else if (ch <
static_cast<UChar32
>(0x110000)) {
100 ch = replacementCharacter;
103 target += bytesToWrite;
104 if (target > targetEnd) {
106 target -= bytesToWrite;
110 switch (bytesToWrite) {
112 *--target =
static_cast<char>((ch | byteMark) & byteMask);
116 *--target =
static_cast<char>((ch | byteMark) & byteMask);
120 *--target =
static_cast<char>((ch | byteMark) & byteMask);
124 *--target =
static_cast<char>(ch | firstByteMark[bytesToWrite]);
126 target += bytesToWrite;
139#define U_IS_BMP(c) ((uint32_t)(c) <= 0xFFFF)
147#define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c)-0x010000) <= 0xFFFFF)
155#define U_IS_SURROGATE(c) (((c)&0xFFFFF800) == 0xD800)
164#define U16_LEAD(supplementary) (UChar)(((supplementary) >> 10) + 0xD7C0)
173#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3FF) | 0xDC00)
178static bool isLegalUTF8(
const unsigned char* source,
int length) {
180 const unsigned char* srcptr = source +
length;
186 if ((a = (*--srcptr)) < 0x80 || a > 0xBF)
return false;
189 if ((a = (*--srcptr)) < 0x80 || a > 0xBF)
return false;
192 if ((a = (*--srcptr)) > 0xBF)
return false;
197 if (a < 0xA0)
return false;
200 if (a > 0x9F)
return false;
203 if (a < 0x90)
return false;
206 if (a > 0x8F)
return false;
209 if (a < 0x80)
return false;
214 if (*source >= 0x80 && *source < 0xC2)
return false;
216 if (*source > 0xF4)
return false;
223static const UChar32 offsetsFromUTF8[6] = {0x00000000UL,
227 static_cast<UChar32
>(0xFA082080UL),
228 static_cast<UChar32
>(0x82082080UL)};
230static inline UChar32 readUTF8Sequence(
const char*& sequence,
size_t length) {
236 character +=
static_cast<unsigned char>(*sequence++);
240 character +=
static_cast<unsigned char>(*sequence++);
244 character +=
static_cast<unsigned char>(*sequence++);
248 character +=
static_cast<unsigned char>(*sequence++);
252 character +=
static_cast<unsigned char>(*sequence++);
256 character +=
static_cast<unsigned char>(*sequence++);
259 return character - offsetsFromUTF8[length - 1];
262ConversionResult convertUTF8ToUTF16(
const char** sourceStart,
263 const char* sourceEnd,
UChar** targetStart,
264 UChar* targetEnd,
bool* sourceAllASCII,
266 ConversionResult
result = conversionOK;
267 const char* source = *sourceStart;
268 UChar* target = *targetStart;
270 while (source < sourceEnd) {
271 int utf8SequenceLength = inlineUTF8SequenceLength(*source);
272 if (sourceEnd - source < utf8SequenceLength) {
277 if (!isLegalUTF8(
reinterpret_cast<const unsigned char*
>(source),
278 utf8SequenceLength)) {
283 UChar32
character = readUTF8Sequence(source, utf8SequenceLength);
285 if (target >= targetEnd) {
286 source -= utf8SequenceLength;
295 source -= utf8SequenceLength;
299 *target++ = replacementCharacter;
300 orAllData |= replacementCharacter;
307 if (target + 1 >= targetEnd) {
308 source -= utf8SequenceLength;
317 source -= utf8SequenceLength;
321 *target++ = replacementCharacter;
322 orAllData |= replacementCharacter;
329 if (sourceAllASCII) *sourceAllASCII = !(orAllData & ~0x7F);
336static inline void putUTF8Triple(
char*& buffer,
UChar ch) {
337 *buffer++ =
static_cast<char>(((ch >> 12) & 0x0F) | 0xE0);
338 *buffer++ =
static_cast<char>(((ch >> 6) & 0x3F) | 0x80);
339 *buffer++ =
static_cast<char>((ch & 0x3F) | 0x80);
344 if (!stringStart || !length)
return std::string();
356 if (length > std::numeric_limits<unsigned>::max() / 3)
return std::string();
358 std::string output(length * 3,
'\0');
359 const UChar* characters = stringStart;
361 char* buffer = &*output.begin();
362 char* buffer_end = &*output.end();
363 while (characters < characters_end) {
365 ConversionResult
result = convertUTF16ToUTF8(
366 &characters, characters_end, &buffer, buffer_end,
true);
371 if (
result != conversionOK) {
377 putUTF8Triple(buffer, replacementCharacter);
382 output.resize(buffer - output.data());
386std::basic_string<UChar>
UTF8ToUTF16(
const char* stringStart,
size_t length) {
387 if (!stringStart || !length)
return std::basic_string<UChar>();
388 std::vector<UChar> buffer(length);
389 UChar* bufferStart = buffer.data();
391 UChar* bufferCurrent = bufferStart;
392 const char* stringCurrent =
reinterpret_cast<const char*
>(stringStart);
393 if (convertUTF8ToUTF16(&stringCurrent,
394 reinterpret_cast<const char*
>(stringStart + length),
395 &bufferCurrent, bufferCurrent + buffer.size(),
nullptr,
396 true) != conversionOK)
397 return std::basic_string<UChar>();
398 size_t utf16Length = bufferCurrent - bufferStart;
399 return std::basic_string<UChar>(bufferStart, bufferStart + utf16Length);
std::optional< TNode< JSArray > > a
ZoneVector< RpoNumber > & result
InstructionOperand source
BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL int character
std::basic_string< UChar > UTF8ToUTF16(const char *stringStart, size_t length)
std::string UTF16ToUTF8(const UChar *stringStart, size_t length)
#define DCHECK_LE(v1, v2)
#define DCHECK_NE(v1, v2)
#define U_IS_SURROGATE(c)
#define U_IS_SUPPLEMENTARY(c)
#define U16_TRAIL(supplementary)
#define U16_LEAD(supplementary)