v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
swiss-hash-table-helpers.h
Go to the documentation of this file.
1// Copyright 2021 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_OBJECTS_SWISS_HASH_TABLE_HELPERS_H_
6#define V8_OBJECTS_SWISS_HASH_TABLE_HELPERS_H_
7
8// Collection of swiss table helpers that are independent from a specific
9// container, like SwissNameDictionary. Taken almost in verbatim from Abseil,
10// comments in this file indicate what is taken from what Abseil file.
11
12#include <climits>
13#include <cstdint>
14#include <type_traits>
15
16#include "src/base/bits.h"
17#include "src/base/logging.h"
18#include "src/base/memory.h"
19
20// The following #defines are taken from Abseil's have_sse.h (but renamed).
21#ifndef V8_SWISS_TABLE_HAVE_SSE2_HOST
22#if (defined(__SSE2__) || \
23 (defined(_MSC_VER) && \
24 (defined(_M_X64) || (defined(_M_IX86) && _M_IX86_FP >= 2))))
25#define V8_SWISS_TABLE_HAVE_SSE2_HOST 1
26#else
27#define V8_SWISS_TABLE_HAVE_SSE2_HOST 0
28#endif
29#endif
30
31#ifndef V8_SWISS_TABLE_HAVE_SSSE3_HOST
32#if defined(__SSSE3__)
33#define V8_SWISS_TABLE_HAVE_SSSE3_HOST 1
34#else
35#define V8_SWISS_TABLE_HAVE_SSSE3_HOST 0
36#endif
37#endif
38
39#if V8_SWISS_TABLE_HAVE_SSSE3_HOST && !V8_SWISS_TABLE_HAVE_SSE2_HOST
40#error "Bad configuration!"
41#endif
42
43// Unlike Abseil, we cannot select SSE purely by host capabilities. When
44// creating a snapshot, the group width must be compatible. The SSE
45// implementation uses a group width of 16, whereas the non-SSE version uses 8.
46// Thus we select the group size based on target capabilities and, if the host
47// does not match, select a polyfill implementation. This means, in supported
48// cross-compiling configurations, we must be able to determine matching target
49// capabilities from the host.
50#ifndef V8_SWISS_TABLE_HAVE_SSE2_TARGET
51#if V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_X64
52// x64 always has SSE2, and ia32 without SSE2 is not supported by V8.
53#define V8_SWISS_TABLE_HAVE_SSE2_TARGET 1
54#else
55#define V8_SWISS_TABLE_HAVE_SSE2_TARGET 0
56#endif
57#endif
58
59#if V8_SWISS_TABLE_HAVE_SSE2_HOST
60#include <emmintrin.h>
61#endif
62
63#if V8_SWISS_TABLE_HAVE_SSSE3_HOST
64#include <tmmintrin.h>
65#endif
66
67namespace v8 {
68namespace internal {
69namespace swiss_table {
70
71// All definitions below are taken from Abseil's raw_hash_set.h with only minor
72// changes, like using existing V8 versions of certain helper functions.
73
74// Denotes the group of the control table currently being probed.
75// Implements quadratic probing by advancing by i groups after the i-th
76// (unsuccessful) probe.
77template <size_t GroupSize>
79 public:
80 ProbeSequence(uint32_t hash, uint32_t mask) {
81 // Mask must be a power of 2 minus 1.
82 DCHECK_EQ(0, ((mask + 1) & mask));
83 mask_ = mask;
84 offset_ = hash & mask_;
85 }
86 uint32_t offset() const { return offset_; }
87 uint32_t offset(int i) const { return (offset_ + i) & mask_; }
88
89 void next() {
90 index_ += GroupSize;
91 offset_ += index_;
92 offset_ &= mask_;
93 }
94
95 size_t index() const { return index_; }
96
97 private:
98 // Used for modulo calculation.
99 uint32_t mask_;
100
101 // The index/offset into the control table, meaning that {ctrl[offset_]} is
102 // the start of the group currently being probed, assuming that |ctrl| is the
103 // pointer to the beginning of the control table.
104 uint32_t offset_;
105
106 // States the number of probes that have been performed (starting at 0),
107 // multiplied by GroupSize.
108 uint32_t index_ = 0;
109};
110
111// An abstraction over a bitmask. It provides an easy way to iterate through the
112// indexes of the set bits of a bitmask. When Shift=0 (platforms with SSE),
113// this is a true bitmask.
114// When Shift=3 (used on non-SSE platforms), we obtain a "byte mask", where each
115// logical bit is represented by a full byte. The logical bit 0 is represented
116// as 0x00, whereas 1 is represented as 0x80. Other values must not appear.
117//
118// For example:
119// for (int i : BitMask<uint32_t, 16>(0x5)) -> yields 0, 2
120// for (int i : BitMask<uint64_t, 8, 3>(0x0000000080800000)) -> yields 2, 3
121template <class T, int SignificantBits, int Shift = 0>
122class BitMask {
123 static_assert(std::is_unsigned<T>::value);
124 static_assert(Shift == 0 || Shift == 3);
125
126 public:
127 // These are useful for unit tests (gunit).
128 using value_type = int;
131
132 explicit BitMask(T mask) : mask_(mask) {}
134 // Clear the least significant bit that is set.
135 mask_ &= (mask_ - 1);
136 return *this;
137 }
138 explicit operator bool() const { return mask_ != 0; }
139 int operator*() const { return LowestBitSet(); }
140 int LowestBitSet() const { return TrailingZeros(); }
141 int HighestBitSet() const {
142 return (sizeof(T) * CHAR_BIT - base::bits::CountLeadingZeros(mask_) - 1) >>
143 Shift;
144 }
145
146 BitMask begin() const { return *this; }
147 BitMask end() const { return BitMask(0); }
148
149 int TrailingZeros() const {
150 DCHECK_NE(mask_, 0);
152 }
153
154 int LeadingZeros() const {
155 constexpr int total_significant_bits = SignificantBits << Shift;
156 constexpr int extra_bits = sizeof(T) * 8 - total_significant_bits;
157 return base::bits::CountLeadingZeros(mask_ << extra_bits) >> Shift;
158 }
159
160 private:
161 friend bool operator==(const BitMask& a, const BitMask& b) {
162 return a.mask_ == b.mask_;
163 }
164 friend bool operator!=(const BitMask& a, const BitMask& b) {
165 return a.mask_ != b.mask_;
166 }
167
169};
170
171using ctrl_t = signed char;
172using h2_t = uint8_t;
173
174// The values here are selected for maximum performance. See the static asserts
175// below for details.
176enum Ctrl : ctrl_t {
177 kEmpty = -128, // 0b10000000
178 kDeleted = -2, // 0b11111110
179 kSentinel = -1, // 0b11111111
180};
181static_assert(
182 kEmpty & kDeleted & kSentinel & 0x80,
183 "Special markers need to have the MSB to make checking for them efficient");
184static_assert(kEmpty < kSentinel && kDeleted < kSentinel,
185 "kEmpty and kDeleted must be smaller than kSentinel to make the "
186 "SIMD test of IsEmptyOrDeleted() efficient");
187static_assert(kSentinel == -1,
188 "kSentinel must be -1 to elide loading it from memory into SIMD "
189 "registers (pcmpeqd xmm, xmm)");
190static_assert(kEmpty == -128,
191 "kEmpty must be -128 to make the SIMD check for its "
192 "existence efficient (psignb xmm, xmm)");
193static_assert(~kEmpty & ~kDeleted & kSentinel & 0x7F,
194 "kEmpty and kDeleted must share an unset bit that is not shared "
195 "by kSentinel to make the scalar test for MatchEmptyOrDeleted() "
196 "efficient");
197static_assert(kDeleted == -2,
198 "kDeleted must be -2 to make the implementation of "
199 "ConvertSpecialToEmptyAndFullToDeleted efficient");
200
201// See below for explanation of H2. Just here for documentation purposes, Swiss
202// Table implementations rely on this being 7.
203static constexpr int kH2Bits = 7;
204
205static constexpr int kNotFullMask = (1 << kH2Bits);
206static_assert(
208 "Special markers need to have the MSB to make checking for them efficient");
209
210// Extracts H1 from the given overall hash, which means discarding the lowest 7
211// bits of the overall hash. H1 is used to determine the first group to probe.
212inline static uint32_t H1(uint32_t hash) { return (hash >> kH2Bits); }
213
214// Extracts H2 from the given overall hash, which means using only the lowest 7
215// bits of the overall hash. H2 is stored in the control table byte for each
216// present entry.
217inline static swiss_table::ctrl_t H2(uint32_t hash) {
218 return hash & ((1 << kH2Bits) - 1);
219}
220
221#if V8_SWISS_TABLE_HAVE_SSE2_HOST
222struct GroupSse2Impl {
223 static constexpr size_t kWidth = 16; // the number of slots per group
224
225 explicit GroupSse2Impl(const ctrl_t* pos) {
226 ctrl = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pos));
227 }
228
229 // Returns a bitmask representing the positions of slots that match |hash|.
230 BitMask<uint32_t, kWidth> Match(h2_t hash) const {
231 auto match = _mm_set1_epi8(hash);
232 return BitMask<uint32_t, kWidth>(
233 _mm_movemask_epi8(_mm_cmpeq_epi8(match, ctrl)));
234 }
235
236 // Returns a bitmask representing the positions of empty slots.
237 BitMask<uint32_t, kWidth> MatchEmpty() const {
238#if V8_SWISS_TABLE_HAVE_SSSE3_HOST
239 // This only works because kEmpty is -128.
240 return BitMask<uint32_t, kWidth>(
241 _mm_movemask_epi8(_mm_sign_epi8(ctrl, ctrl)));
242#else
243 return Match(static_cast<h2_t>(kEmpty));
244#endif
245 }
246
247 __m128i ctrl;
248};
249#endif // V8_SWISS_TABLE_HAVE_SSE2_HOST
250
251// A portable, inefficient version of GroupSse2Impl. This exists so SSE2-less
252// hosts can generate snapshots for SSE2-capable targets.
254 static constexpr size_t kWidth = 16; // the number of slots per group
255
256 explicit GroupSse2Polyfill(const ctrl_t* pos) { memcpy(ctrl_, pos, kWidth); }
257
258 // Returns a bitmask representing the positions of slots that match |hash|.
260 uint32_t mask = 0;
261 for (size_t i = 0; i < kWidth; i++) {
262 if (static_cast<h2_t>(ctrl_[i]) == hash) {
263 mask |= 1u << i;
264 }
265 }
267 }
268
269 // Returns a bitmask representing the positions of empty slots.
271 return Match(static_cast<h2_t>(kEmpty));
272 }
273
274 private:
275 uint32_t MatchEmptyOrDeletedMask() const {
276 uint32_t mask = 0;
277 for (size_t i = 0; i < kWidth; i++) {
278 if (ctrl_[i] < kSentinel) {
279 mask |= 1u << i;
280 }
281 }
282 return mask;
283 }
284
286};
287
289 static constexpr size_t kWidth = 8; // the number of slots per group
290
291 explicit GroupPortableImpl(const ctrl_t* pos)
292 : ctrl(base::ReadLittleEndianValue<uint64_t>(
293 reinterpret_cast<uintptr_t>(const_cast<ctrl_t*>(pos)))) {}
294
295 static constexpr uint64_t kMsbs = 0x8080808080808080ULL;
296 static constexpr uint64_t kLsbs = 0x0101010101010101ULL;
297
298 // Returns a bitmask representing the positions of slots that match |hash|.
300 // For the technique, see:
301 // http://graphics.stanford.edu/~seander/bithacks.html##ValueInWord
302 // (Determine if a word has a byte equal to n).
303 //
304 // Caveat: there are false positives but:
305 // - they only occur if |hash| actually appears elsewhere in |ctrl|
306 // - they never occur on kEmpty, kDeleted, kSentinel
307 // - they will be handled gracefully by subsequent checks in code
308 //
309 // Example:
310 // v = 0x1716151413121110
311 // hash = 0x12
312 // retval = (v - lsbs) & ~v & msbs = 0x0000000080800000
313 auto x = ctrl ^ (kLsbs * hash);
314 return BitMask<uint64_t, kWidth, 3>((x - kLsbs) & ~x & kMsbs);
315 }
316
317 // Returns a bitmask representing the positions of empty slots.
321
322 uint64_t ctrl;
323};
324
325// Determine which Group implementation SwissNameDictionary uses.
326#if defined(V8_ENABLE_SWISS_NAME_DICTIONARY) && DEBUG
327// TODO(v8:11388) If v8_enable_swiss_name_dictionary is enabled, we are supposed
328// to use SwissNameDictionary as the dictionary backing store. If we want to use
329// the SIMD version of SwissNameDictionary, that would require us to compile SSE
330// instructions into the snapshot that exceed the minimum requirements for V8
331// SSE support. Therefore, this fails a DCHECK. However, given the experimental
332// nature of v8_enable_swiss_name_dictionary mode, we only except this to be run
333// by developers/bots, that always have the necessary instructions. This means
334// that if v8_enable_swiss_name_dictionary is enabled and debug mode isn't, we
335// ignore the DCHECK that would fail in debug mode. However, if both
336// v8_enable_swiss_name_dictionary and debug mode are enabled, we must fallback
337// to the non-SSE implementation. Given that V8 requires SSE2, there should be a
338// solution that doesn't require the workaround present here. Instead, the
339// backend should only use SSE2 when compiling the SIMD version of
340// SwissNameDictionary into the builtin.
342#elif V8_SWISS_TABLE_HAVE_SSE2_TARGET
343// Use a matching group size between host and target.
344#if V8_SWISS_TABLE_HAVE_SSE2_HOST
345using Group = GroupSse2Impl;
346#else
347#if V8_HOST_ARCH_IA32 || V8_HOST_ARCH_X64
348// If we do not detect SSE2 when building for the ia32/x64 target, the
349// V8_SWISS_TABLE_HAVE_SSE2_TARGET logic will incorrectly cause the final output
350// to use the inefficient polyfill implementation. Detect this case and warn if
351// it happens.
352#warning "Did not detect required SSE2 support on ia32/x64."
353#endif
355#endif
356#else
358#endif
359
360} // namespace swiss_table
361} // namespace internal
362} // namespace v8
363
364#endif // V8_OBJECTS_SWISS_HASH_TABLE_HELPERS_H_
#define T
SourcePosition pos
friend bool operator!=(const BitMask &a, const BitMask &b)
friend bool operator==(const BitMask &a, const BitMask &b)
int x
uint32_t const mask
constexpr unsigned CountLeadingZeros(T value)
Definition bits.h:100
constexpr unsigned CountTrailingZerosNonZero(T value)
Definition bits.h:173
static uint32_t H1(uint32_t hash)
static swiss_table::ctrl_t H2(uint32_t hash)
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
BitMask< uint64_t, kWidth, 3 > Match(h2_t hash) const
BitMask< uint64_t, kWidth, 3 > MatchEmpty() const
BitMask< uint32_t, kWidth > Match(h2_t hash) const