v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
unicode-decoder.h
Go to the documentation of this file.
1// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_STRINGS_UNICODE_DECODER_H_
6#define V8_STRINGS_UNICODE_DECODER_H_
7
8#include "src/base/vector.h"
10
11namespace v8 {
12namespace internal {
13
14// The return value may point to the first aligned word containing the first
15// non-one-byte character, rather than directly to the non-one-byte character.
16// If the return value is >= the passed length, the entire string was
17// one-byte.
18inline uint32_t NonAsciiStart(const uint8_t* chars, uint32_t length) {
19 const uint8_t* start = chars;
20 const uint8_t* limit = chars + length;
21
22 if (static_cast<size_t>(length) >= kIntptrSize) {
23 // Check unaligned bytes.
24 while (!IsAligned(reinterpret_cast<intptr_t>(chars), kIntptrSize)) {
25 if (*chars > unibrow::Utf8::kMaxOneByteChar) {
26 return static_cast<uint32_t>(chars - start);
27 }
28 ++chars;
29 }
30 // Check aligned words.
32 const uintptr_t non_one_byte_mask = kUintptrAllBitsSet / 0xFF * 0x80;
33 while (chars + sizeof(uintptr_t) <= limit) {
34 if (*reinterpret_cast<const uintptr_t*>(chars) & non_one_byte_mask) {
35 return static_cast<uint32_t>(chars - start);
36 }
37 chars += sizeof(uintptr_t);
38 }
39 }
40 // Check remaining unaligned bytes.
41 while (chars < limit) {
42 if (*chars > unibrow::Utf8::kMaxOneByteChar) {
43 return static_cast<uint32_t>(chars - start);
44 }
45 ++chars;
46 }
47
48 return static_cast<uint32_t>(chars - start);
49}
50
51template <class Decoder>
53 public:
54 enum class Encoding : uint8_t { kAscii, kLatin1, kUtf16, kInvalid };
55
56 bool is_invalid() const {
57 return static_cast<const Decoder&>(*this).is_invalid();
58 }
59 bool is_ascii() const { return encoding_ == Encoding::kAscii; }
60 bool is_one_byte() const { return encoding_ <= Encoding::kLatin1; }
61 int utf16_length() const {
63 return utf16_length_;
64 }
65 int non_ascii_start() const {
67 return non_ascii_start_;
68 }
69
70 template <typename Char>
71 void Decode(Char* out, base::Vector<const uint8_t> data);
72
73 protected:
78};
79
81 : public Utf8DecoderBase<Utf8Decoder> {
82 public:
85
86 // This decoder never fails; an invalid byte sequence decodes to U+FFFD and
87 // then the decode continues.
88 bool is_invalid() const {
89 DCHECK_NE(encoding_, Encoding::kInvalid);
90 return false;
91 }
92};
93
94#if V8_ENABLE_WEBASSEMBLY
95// Like Utf8Decoder above, except that instead of replacing invalid sequences
96// with U+FFFD, we have a separate Encoding::kInvalid state, and we also accept
97// isolated surrogates.
98class Wtf8Decoder : public Utf8DecoderBase<Wtf8Decoder> {
99 public:
100 explicit Wtf8Decoder(base::Vector<const uint8_t> data)
101 : Utf8DecoderBase(data) {}
102
103 bool is_invalid() const { return encoding_ == Encoding::kInvalid; }
104};
105
106// Like Utf8Decoder above, except that instead of replacing invalid sequences
107// with U+FFFD, we have a separate Encoding::kInvalid state.
108class StrictUtf8Decoder : public Utf8DecoderBase<StrictUtf8Decoder> {
109 public:
110 explicit StrictUtf8Decoder(base::Vector<const uint8_t> data)
111 : Utf8DecoderBase(data) {}
112
113 bool is_invalid() const { return encoding_ == Encoding::kInvalid; }
114};
115#endif // V8_ENABLE_WEBASSEMBLY
116
117} // namespace internal
118} // namespace v8
119
120#endif // V8_STRINGS_UNICODE_DECODER_H_
static const unsigned kMaxOneByteChar
Definition unicode.h:179
void Decode(Char *out, base::Vector< const uint8_t > data)
Utf8DecoderBase(base::Vector< const uint8_t > data)
Utf8Decoder(base::Vector< const uint8_t > data)
int start
constexpr uintptr_t kUintptrAllBitsSet
Definition v8-internal.h:94
uint32_t NonAsciiStart(const uint8_t *chars, uint32_t length)
constexpr int kIntptrSize
Definition globals.h:408
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
#define V8_EXPORT_PRIVATE
Definition macros.h:460
constexpr bool IsAligned(T value, U alignment)
Definition macros.h:403