v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
string-hasher-inl.h
Go to the documentation of this file.
1// Copyright 2017 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_STRINGS_STRING_HASHER_INL_H_
6#define V8_STRINGS_STRING_HASHER_INL_H_
7
9// Include the non-inl header before the rest of the headers.
10
11#include "src/common/globals.h"
12#include "src/utils/utils.h"
13
14#ifdef __SSE2__
15#include <emmintrin.h>
16#elif defined(__ARM_NEON__)
17#include <arm_neon.h>
18#endif
19
20// Comment inserted to prevent header reordering.
21#include <type_traits>
22
26#include "src/utils/utils-inl.h"
27#include "third_party/rapidhash-v8/rapidhash.h"
28
29namespace v8 {
30namespace internal {
31
32namespace detail {
33V8_EXPORT_PRIVATE uint64_t HashConvertingTo8Bit(const uint16_t* chars,
34 uint32_t length, uint64_t seed);
35
36template <typename T>
37uint32_t ConvertRawHashToUsableHash(T raw_hash) {
38 // Limit to the supported bits.
39 const int32_t hash = static_cast<int32_t>(raw_hash & String::HashBits::kMax);
40 // Ensure that the hash is kZeroHash, if the computed value is 0.
41 return hash == 0 ? StringHasher::kZeroHash : hash;
42}
43
44V8_INLINE bool IsOnly8Bit(const uint16_t* chars, unsigned len) {
45 // TODO(leszeks): This could be SIMD for efficiency on large strings, if we
46 // need it.
47 for (unsigned i = 0; i < len; ++i) {
48 if (chars[i] > 255) {
49 return false;
50 }
51 }
52 return true;
53}
54
55V8_INLINE uint64_t GetRapidHash(const uint8_t* chars, uint32_t length,
56 uint64_t seed) {
57 return rapidhash(chars, length, seed);
58}
59
60V8_INLINE uint64_t GetRapidHash(const uint16_t* chars, uint32_t length,
61 uint64_t seed) {
62 // For 2-byte strings we need to preserve the same hash for strings in just
63 // the latin-1 range.
64 if (V8_UNLIKELY(IsOnly8Bit(chars, length))) {
65 return detail::HashConvertingTo8Bit(chars, length, seed);
66 }
67 return rapidhash(reinterpret_cast<const uint8_t*>(chars), 2 * length, seed);
68}
69
70template <typename uchar>
71V8_INLINE uint32_t GetUsableRapidHash(const uchar* chars, uint32_t length,
72 uint64_t seed) {
73 return ConvertRawHashToUsableHash(GetRapidHash(chars, length, seed));
74}
75} // namespace detail
76
82
89
90uint32_t StringHasher::GetTrivialHash(uint32_t length) {
92 // The hash of a large string is simply computed from the length.
93 // Ensure that the max length is small enough to be encoded without losing
94 // information.
96 uint32_t hash = length;
98}
99
100uint32_t StringHasher::MakeArrayIndexHash(uint32_t value, uint32_t length) {
101 // For array indexes mix the length into the hash as an array index could
102 // be zero.
106
108 value |= length << String::ArrayIndexLengthBits::kShift;
109
113 return value;
114}
115
116namespace detail {
117
119
120// The array index type depends on the architecture, so that the multiplication
121// in TryParseArrayIndex stays fast.
122#if V8_HOST_ARCH_64_BIT
123using ArrayIndexT = uint64_t;
124#else
125using ArrayIndexT = uint32_t;
126#endif
127
128template <typename uchar>
130 uint32_t length, uint32_t& i,
131 ArrayIndexT& index) {
132 DCHECK_GT(length, 0);
134
135 // The leading character can only be a zero for the string "0"; otherwise this
136 // isn't a valid index string.
137 index = chars[0] - '0';
138 i = 1;
139 if (index > 9) return kNonIndex;
140 if (index == 0) {
141 if (length > 1) return kNonIndex;
142 DCHECK_EQ(length, 1);
143 return kSuccess;
144 }
145
146 if (length > String::kMaxArrayIndexSize) return kOverflow;
147
148 // TODO(leszeks): Use SIMD for this loop.
149 for (; i < length; i++) {
150 uchar c = chars[i];
151 uint32_t val = c - '0';
152 if (val > 9) return kNonIndex;
153 index = (10 * index) + val;
154 }
155 if constexpr (sizeof(index) == sizeof(uint64_t)) {
156 // If we have a large type for index, we'll never overflow it, so we can
157 // have a simple comparison for array index overflow.
158 if (index > String::kMaxArrayIndex) {
159 return kOverflow;
160 }
161 } else {
162 DCHECK(sizeof(index) == sizeof(uint32_t));
163 // If index is a 32-bit int, we have to get a bit creative with the overflow
164 // check.
166 // If length is String::kMaxArrayIndexSize, and we know there is no zero
167 // prefix, the minimum valid value is 1 followed by length - 1 zeros. If
168 // our value is smaller than this, then we overflowed.
169 //
170 // Additionally, String::kMaxArrayIndex is UInt32Max - 1, so we can fold
171 // in a check that index < UInt32Max by adding 1 to both sides, making
172 // index = UInt32Max overflows, and only then checking for overflow.
173 constexpr uint32_t kMinValidValue =
175 if (index + 1 < kMinValidValue + 1) {
176 // We won't try an integer index if there is overflow, so just return
177 // non-index.
179 return kNonIndex;
180 }
181 }
182 }
183 DCHECK_LT(index, TenToThe(length));
184 DCHECK_GE(index, TenToThe(length - 1));
185 return kSuccess;
186}
187
188// The following function wouldn't do anything on 32-bit platforms, because
189// kMaxArrayIndexSize == kMaxIntegerIndexSize there.
190#if V8_HOST_ARCH_64_BIT
191template <typename uchar>
192V8_INLINE IndexParseResult TryParseIntegerIndex(const uchar* chars,
193 uint32_t length, uint32_t i,
194 ArrayIndexT index) {
195 DCHECK_GT(length, 0);
197 DCHECK_GT(i, 0);
198 DCHECK_GT(index, 0);
200
201 for (; i < length; i++) {
202 // We should never be anywhere near overflowing, so we can just do
203 // one range check at the end.
204 static_assert(kMaxSafeIntegerUint64 < (kMaxUInt64 / 100));
205 DCHECK_LT(index, kMaxUInt64 / 100);
206
207 uchar c = chars[i];
208 uint32_t val = c - '0';
209 if (val > 9) return kNonIndex;
210 index = (10 * index) + val;
211 }
212 if (index > kMaxSafeIntegerUint64) return kOverflow;
213
214 return kSuccess;
215}
216#else
218#endif
219
220} // namespace detail
221
222template <typename char_t>
223uint32_t StringHasher::HashSequentialString(const char_t* chars_raw,
224 uint32_t length, uint64_t seed) {
225 static_assert(std::is_integral_v<char_t>);
226 static_assert(sizeof(char_t) <= 2);
227 using uchar = std::make_unsigned_t<char_t>;
228 const uchar* chars = reinterpret_cast<const uchar*>(chars_raw);
229 DCHECK_IMPLIES(length > 0, chars != nullptr);
230 if (length >= 1) {
231 if (length <= String::kMaxIntegerIndexSize) {
232 // Possible array or integer index; try to compute the array index hash.
234
236 uint32_t i;
237 switch (detail::TryParseArrayIndex(chars, length, i, index)) {
238 case detail::kSuccess:
240 return MakeArrayIndexHash(static_cast<uint32_t>(index), length);
242 // A non-index result from TryParseArrayIndex means we don't need to
243 // check for integer indices.
244 break;
245 case detail::kOverflow: {
246#if V8_HOST_ARCH_64_BIT
247 // On 64-bit, we might have a valid integer index even if the value
248 // overflowed an array index.
249 static_assert(String::kMaxArrayIndexSize <
251 switch (detail::TryParseIntegerIndex(chars, length, i, index)) {
252 case detail::kSuccess: {
253 uint32_t hash = String::CreateHashFieldValue(
254 detail::GetUsableRapidHash(chars, length, seed),
257 // The hash accidentally looks like a cached index. Fix that by
258 // setting a bit that looks like a longer-than-cacheable string
259 // length.
262 }
264 return hash;
265 }
268 break;
269 }
270#else
271 static_assert(String::kMaxArrayIndexSize ==
273#endif
274 break;
275 }
276 }
277 // If the we failed to compute an index hash, this falls through into the
278 // non-index hash case.
279 } else if (length > String::kMaxHashCalcLength) {
280 // We should never go down this path if we might have an index value.
283 return GetTrivialHash(length);
284 }
285 }
286
287 // Non-index hash.
289 detail::GetUsableRapidHash(chars, length, seed),
291}
292
293std::size_t SeededStringHasher::operator()(const char* name) const {
295 name, static_cast<uint32_t>(strlen(name)), hashseed_);
296}
297
298} // namespace internal
299} // namespace v8
300
301#endif // V8_STRINGS_STRING_HASHER_INL_H_
static constexpr U kMax
Definition bit-field.h:44
static constexpr int kShift
Definition bit-field.h:39
static const uint32_t kMaxArrayIndex
Definition name.h:152
static const int kMaxCachedArrayIndexLength
Definition name.h:150
static bool IsIntegerIndex(uint32_t raw_hash_field)
Definition name-inl.h:106
static uint32_t CreateHashFieldValue(uint32_t hash, HashFieldType type)
Definition name-inl.h:132
static bool ContainsCachedArrayIndex(uint32_t hash)
Definition name-inl.h:290
static const int kMaxArrayIndexSize
Definition name.h:155
static const int kArrayIndexValueBits
Definition name.h:169
static const int kMaxIntegerIndexSize
Definition name.h:163
V8_INLINE void AddCharacter(uint16_t c)
static V8_INLINE uint32_t GetTrivialHash(uint32_t length)
static V8_INLINE uint32_t MakeArrayIndexHash(uint32_t value, uint32_t length)
static const int kZeroHash
static uint32_t HashSequentialString(const char_t *chars, uint32_t length, uint64_t seed)
static const uint32_t kMaxHashCalcLength
Definition string.h:515
static const uint32_t kMaxLength
Definition string.h:511
V8_INLINE bool IsOnly8Bit(const uint16_t *chars, unsigned len)
V8_EXPORT_PRIVATE uint64_t HashConvertingTo8Bit(const uint16_t *chars, uint32_t length, uint64_t seed)
V8_INLINE IndexParseResult TryParseArrayIndex(const uchar *chars, uint32_t length, uint32_t &i, ArrayIndexT &index)
uint32_t ConvertRawHashToUsableHash(T raw_hash)
V8_INLINE uint64_t GetRapidHash(const uint8_t *chars, uint32_t length, uint64_t seed)
V8_INLINE uint32_t GetUsableRapidHash(const uchar *chars, uint32_t length, uint64_t seed)
constexpr uint64_t kMaxSafeIntegerUint64
Definition globals.h:1983
constexpr uint64_t kMaxUInt64
Definition globals.h:390
constexpr uint64_t TenToThe(uint32_t exponent)
Definition utils.h:544
return value
Definition map-inl.h:893
#define DCHECK_LE(v1, v2)
Definition logging.h:490
#define DCHECK_IMPLIES(v1, v2)
Definition logging.h:493
#define DCHECK_GE(v1, v2)
Definition logging.h:488
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_LT(v1, v2)
Definition logging.h:489
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
#define DCHECK_GT(v1, v2)
Definition logging.h:487
#define V8_EXPORT_PRIVATE
Definition macros.h:460
std::size_t operator()(const char *name) const
#define V8_INLINE
Definition v8config.h:500
#define V8_UNLIKELY(condition)
Definition v8config.h:660