v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
memcopy.h
Go to the documentation of this file.
1// Copyright 2018 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_UTILS_MEMCOPY_H_
6#define V8_UTILS_MEMCOPY_H_
7
8#include <stdint.h>
9#include <stdlib.h>
10#include <string.h>
11
12#include <algorithm>
13
14#include "src/base/bits.h"
15#include "src/base/logging.h"
16#include "src/base/macros.h"
17#include "src/utils/utils.h"
18
19namespace v8 {
20namespace internal {
21
22using Address = uintptr_t;
23
24// ----------------------------------------------------------------------------
25// Generated memcpy/memmove for ia32 and arm.
26
28
29#if defined(V8_TARGET_ARCH_IA32)
30// Limit below which the extra overhead of the MemCopy function is likely
31// to outweigh the benefits of faster copying.
32const size_t kMinComplexMemCopy = 64;
33
34// Copy memory area. No restrictions.
35V8_EXPORT_PRIVATE void MemMove(void* dest, const void* src, size_t size);
36using MemMoveFunction = void (*)(void* dest, const void* src, size_t size);
37
38// Keep the distinction of "move" vs. "copy" for the benefit of other
39// architectures.
40V8_INLINE void MemCopy(void* dest, const void* src, size_t size) {
41 MemMove(dest, src, size);
42}
43#elif defined(V8_HOST_ARCH_ARM)
44using MemCopyUint8Function = void (*)(uint8_t* dest, const uint8_t* src,
45 size_t size);
46V8_EXPORT_PRIVATE extern MemCopyUint8Function memcopy_uint8_function;
47V8_INLINE void MemCopyUint8Wrapper(uint8_t* dest, const uint8_t* src,
48 size_t chars) {
49 memcpy(dest, src, chars);
50}
51// For values < 16, the assembler function is slower than the inlined C code.
52const size_t kMinComplexMemCopy = 16;
53V8_INLINE void MemCopy(void* dest, const void* src, size_t size) {
54 (*memcopy_uint8_function)(reinterpret_cast<uint8_t*>(dest),
55 reinterpret_cast<const uint8_t*>(src), size);
56}
57V8_EXPORT_PRIVATE V8_INLINE void MemMove(void* dest, const void* src,
58 size_t size) {
59 memmove(dest, src, size);
60}
61
62// For values < 12, the assembler function is slower than the inlined C code.
63const int kMinComplexConvertMemCopy = 12;
64#else
65#if defined(V8_OPTIMIZE_WITH_NEON)
66// We intentionally use misaligned read/writes for NEON intrinsics, disable
67// alignment sanitization explicitly.
68// Overlapping writes help to save instructions, e.g. doing 2 two-byte writes
69// instead 3 one-byte write for count == 3.
70template <typename IntType>
71V8_INLINE V8_CLANG_NO_SANITIZE("alignment") void OverlappingWrites(
72 void* dst, const void* src, size_t count) {
73 *reinterpret_cast<IntType*>(dst) = *reinterpret_cast<const IntType*>(src);
74 *reinterpret_cast<IntType*>(static_cast<uint8_t*>(dst) + count -
75 sizeof(IntType)) =
76 *reinterpret_cast<const IntType*>(static_cast<const uint8_t*>(src) +
77 count - sizeof(IntType));
78}
79
80V8_CLANG_NO_SANITIZE("alignment")
81inline void MemCopy(void* dst, const void* src, size_t count) {
82 auto* dst_u = static_cast<uint8_t*>(dst);
83 const auto* src_u = static_cast<const uint8_t*>(src);
84 // Common cases. Handle before doing clz.
85 if (count == 0) {
86 return;
87 }
88 if (count == 1) {
89 *dst_u = *src_u;
90 return;
91 }
92 const size_t order =
93 sizeof(count) * CHAR_BIT - base::bits::CountLeadingZeros(count - 1);
94 switch (order) {
95 case 1: // count: [2, 2]
96 *reinterpret_cast<uint16_t*>(dst_u) =
97 *reinterpret_cast<const uint16_t*>(src_u);
98 return;
99 case 2: // count: [3, 4]
100 OverlappingWrites<uint16_t>(dst_u, src_u, count);
101 return;
102 case 3: // count: [5, 8]
103 OverlappingWrites<uint32_t>(dst_u, src_u, count);
104 return;
105 case 4: // count: [9, 16]
106 OverlappingWrites<uint64_t>(dst_u, src_u, count);
107 return;
108 case 5: // count: [17, 32]
109 vst1q_u8(dst_u, vld1q_u8(src_u));
110 vst1q_u8(dst_u + count - sizeof(uint8x16_t),
111 vld1q_u8(src_u + count - sizeof(uint8x16_t)));
112 return;
113 default: // count: [33, ...]
114 vst1q_u8(dst_u, vld1q_u8(src_u));
115 for (size_t i = count % sizeof(uint8x16_t); i < count;
116 i += sizeof(uint8x16_t)) {
117 vst1q_u8(dst_u + i, vld1q_u8(src_u + i));
118 }
119 return;
120 }
121}
122#else // !defined(V8_OPTIMIZE_WITH_NEON)
123// Copy memory area to disjoint memory area.
124inline void MemCopy(void* dest, const void* src, size_t size) {
125 // Fast path for small sizes. The compiler will expand the {memcpy} for small
126 // fixed sizes to a sequence of move instructions. This avoids the overhead of
127 // the general {memcpy} function.
128 switch (size) {
129#define CASE(N) \
130 case N: \
131 memcpy(dest, src, N); \
132 return;
133 CASE(1)
134 CASE(2)
135 CASE(3)
136 CASE(4)
137 CASE(5)
138 CASE(6)
139 CASE(7)
140 CASE(8)
141 CASE(9)
142 CASE(10)
143 CASE(11)
144 CASE(12)
145 CASE(13)
146 CASE(14)
147 CASE(15)
148 CASE(16)
149#undef CASE
150 default:
151 memcpy(dest, src, size);
152 return;
153 }
154}
155#endif // !defined(V8_OPTIMIZE_WITH_NEON)
156#if V8_TARGET_BIG_ENDIAN
157inline void MemCopyAndSwitchEndianness(void* dst, void* src,
158 size_t num_elements,
159 size_t element_size) {
160#define COPY_LOOP(type, reverse) \
161 { \
162 for (uint32_t i = 0; i < num_elements; i++) { \
163 type t; \
164 type* s = reinterpret_cast<type*>(src) + i; \
165 type* d = reinterpret_cast<type*>(dst) + i; \
166 memcpy(&t, reinterpret_cast<void*>(s), element_size); \
167 t = reverse(t); \
168 memcpy(reinterpret_cast<void*>(d), &t, element_size); \
169 } \
170 return; \
171 }
172
173 switch (element_size) {
174 case 1:
175 MemCopy(dst, src, num_elements);
176 return;
177 case 2:
178 COPY_LOOP(uint16_t, ByteReverse16);
179 case 4:
180 COPY_LOOP(uint32_t, ByteReverse32);
181 case 8:
182 COPY_LOOP(uint64_t, ByteReverse64);
183 default:
184 UNREACHABLE();
185 }
186#undef COPY_LOOP
187}
188#endif
189V8_EXPORT_PRIVATE inline void MemMove(void* dest, const void* src,
190 size_t size) {
191 // Fast path for small sizes. The compiler will expand the {memmove} for small
192 // fixed sizes to a sequence of move instructions. This avoids the overhead of
193 // the general {memmove} function.
194 switch (size) {
195#define CASE(N) \
196 case N: \
197 memmove(dest, src, N); \
198 return;
199 CASE(1)
200 CASE(2)
201 CASE(3)
202 CASE(4)
203 CASE(5)
204 CASE(6)
205 CASE(7)
206 CASE(8)
207 CASE(9)
208 CASE(10)
209 CASE(11)
210 CASE(12)
211 CASE(13)
212 CASE(14)
213 CASE(15)
214 CASE(16)
215#undef CASE
216 default:
217 memmove(dest, src, size);
218 return;
219 }
220}
221const size_t kMinComplexMemCopy = 8;
222#endif // V8_TARGET_ARCH_IA32
223
224// Copies words from |src| to |dst|. The data spans must not overlap.
225// |src| and |dst| must be TWord-size aligned.
226template <size_t kBlockCopyLimit, typename T>
227inline void CopyImpl(T* dst_ptr, const T* src_ptr, size_t count) {
228 constexpr int kTWordSize = sizeof(T);
229#ifdef DEBUG
230 Address dst = reinterpret_cast<Address>(dst_ptr);
231 Address src = reinterpret_cast<Address>(src_ptr);
232 DCHECK(IsAligned(dst, kTWordSize));
233 DCHECK(IsAligned(src, kTWordSize));
234 DCHECK(((src <= dst) && ((src + count * kTWordSize) <= dst)) ||
235 ((dst <= src) && ((dst + count * kTWordSize) <= src)));
236#endif
237 if (count == 0) return;
238
239 // Use block copying MemCopy if the segment we're copying is
240 // enough to justify the extra call/setup overhead.
241 if (count < kBlockCopyLimit) {
242 do {
243 count--;
244 *dst_ptr++ = *src_ptr++;
245 } while (count > 0);
246 } else {
247 MemCopy(dst_ptr, src_ptr, count * kTWordSize);
248 }
249}
250
251// Copies kSystemPointerSize-sized words from |src| to |dst|. The data spans
252// must not overlap. |src| and |dst| must be kSystemPointerSize-aligned.
253inline void CopyWords(Address dst, const Address src, size_t num_words) {
254 static const size_t kBlockCopyLimit = 16;
255 CopyImpl<kBlockCopyLimit>(reinterpret_cast<Address*>(dst),
256 reinterpret_cast<const Address*>(src), num_words);
257}
258
259// Copies data from |src| to |dst|. The data spans must not overlap.
260template <typename T>
261inline void CopyBytes(T* dst, const T* src, size_t num_bytes) {
262 static_assert(sizeof(T) == 1);
263 if (num_bytes == 0) return;
264 CopyImpl<kMinComplexMemCopy>(dst, src, num_bytes);
265}
266
267inline void MemsetUint32(uint32_t* dest, uint32_t value, size_t counter) {
268#if V8_HOST_ARCH_IA32 || V8_HOST_ARCH_X64
269#define STOS "stosl"
270#endif
271
272#if defined(MEMORY_SANITIZER)
273 // MemorySanitizer does not understand inline assembly.
274#undef STOS
275#endif
276
277#if defined(__GNUC__) && defined(STOS)
278 asm volatile(
279 "cld;"
280 "rep ; " STOS
281 : "+&c"(counter), "+&D"(dest)
282 : "a"(value)
283 : "memory", "cc");
284#else
285 for (size_t i = 0; i < counter; i++) {
286 dest[i] = value;
287 }
288#endif
289
290#undef STOS
291}
292
293inline void MemsetPointer(Address* dest, Address value, size_t counter) {
294#if V8_HOST_ARCH_IA32
295#define STOS "stosl"
296#elif V8_HOST_ARCH_X64
297#define STOS "stosq"
298#endif
299
300#if defined(MEMORY_SANITIZER)
301 // MemorySanitizer does not understand inline assembly.
302#undef STOS
303#endif
304
305#if defined(__GNUC__) && defined(STOS)
306 asm volatile(
307 "cld;"
308 "rep ; " STOS
309 : "+&c"(counter), "+&D"(dest)
310 : "a"(value)
311 : "memory", "cc");
312#else
313 for (size_t i = 0; i < counter; i++) {
314 dest[i] = value;
315 }
316#endif
317
318#undef STOS
319}
320
321template <typename T, typename U>
322inline void MemsetPointer(T** dest, U* value, size_t counter) {
323#ifdef DEBUG
324 T* a = nullptr;
325 U* b = nullptr;
326 a = b; // Fake assignment to check assignability.
327 USE(a);
328#endif // DEBUG
329 MemsetPointer(reinterpret_cast<Address*>(dest),
330 reinterpret_cast<Address>(value), counter);
331}
332
333template <typename T>
334inline void MemsetPointer(T** dest, std::nullptr_t, size_t counter) {
335 MemsetPointer(reinterpret_cast<Address*>(dest), Address{0}, counter);
336}
337
338// Copy from 8bit/16bit chars to 8bit/16bit chars. Values are zero-extended if
339// needed. Ranges are not allowed to overlap.
340// The separate declaration is needed for the V8_NONNULL, which is not allowed
341// on a definition.
342template <typename SrcType, typename DstType>
343void CopyChars(DstType* dst, const SrcType* src, size_t count) V8_NONNULL(1, 2);
344
345template <typename SrcType, typename DstType>
346void CopyChars(DstType* dst, const SrcType* src, size_t count) {
347 static_assert(std::is_integral<SrcType>::value);
348 static_assert(std::is_integral<DstType>::value);
349 using SrcTypeUnsigned = typename std::make_unsigned<SrcType>::type;
350 using DstTypeUnsigned = typename std::make_unsigned<DstType>::type;
351
352#ifdef DEBUG
353 // Check for no overlap, otherwise {std::copy_n} cannot be used.
354 Address src_start = reinterpret_cast<Address>(src);
355 Address src_end = src_start + count * sizeof(SrcType);
356 Address dst_start = reinterpret_cast<Address>(dst);
357 Address dst_end = dst_start + count * sizeof(DstType);
358 DCHECK(src_end <= dst_start || dst_end <= src_start);
359#endif
360
361 auto* dst_u = reinterpret_cast<DstTypeUnsigned*>(dst);
362 auto* src_u = reinterpret_cast<const SrcTypeUnsigned*>(src);
363
364#if defined(V8_OPTIMIZE_WITH_NEON)
365 if constexpr (sizeof(DstType) == 1 && sizeof(SrcType) == 1) {
366 // Use simd optimized memcpy.
367 MemCopy(dst, src, count);
368 return;
369 }
370#endif // defined(V8_OPTIMIZE_WITH_NEON)
371
372 // Especially Atom CPUs profit from this explicit instantiation for small
373 // counts. This gives up to 20 percent improvement for microbenchmarks such as
374 // joining an array of small integers (2019-10-16).
375 switch (count) {
376#define CASE(N) \
377 case N: \
378 std::copy_n(src_u, N, dst_u); \
379 return;
380 CASE(1)
381 CASE(2)
382 CASE(3)
383 CASE(4)
384 CASE(5)
385 CASE(6)
386 CASE(7)
387 CASE(8)
388 CASE(9)
389 CASE(10)
390 CASE(11)
391 CASE(12)
392 CASE(13)
393 CASE(14)
394 CASE(15)
395 CASE(16)
396#undef CASE
397 default:
398 std::copy_n(src_u, count, dst_u);
399 return;
400 }
401}
402
403} // namespace internal
404} // namespace v8
405
406#endif // V8_UTILS_MEMCOPY_H_
#define T
uint32_t count
unsigned short uint16_t
Definition unicode.cc:39
constexpr unsigned CountLeadingZeros(T value)
Definition bits.h:100
void CopyBytes(T *dst, const T *src, size_t num_bytes)
Definition memcopy.h:261
static uint16_t ByteReverse16(uint16_t value)
Definition utils.h:762
static uint32_t ByteReverse32(uint32_t value)
Definition utils.h:770
void CopyImpl(T *dst_ptr, const T *src_ptr, size_t count)
Definition memcopy.h:227
constexpr int U
V8_EXPORT_PRIVATE void MemMove(void *dest, const void *src, size_t size)
Definition memcopy.h:189
void CopyWords(Address dst, const Address src, size_t num_words)
Definition memcopy.h:253
void CopyChars(DstType *dst, const SrcType *src, size_t count) V8_NONNULL(1
return value
Definition map-inl.h:893
void MemsetPointer(FullObjectSlot start, Tagged< Object > value, size_t counter)
Definition slots-inl.h:507
void MemCopy(void *dest, const void *src, size_t size)
Definition memcopy.h:124
static uint64_t ByteReverse64(uint64_t value)
Definition utils.h:779
const size_t kMinComplexMemCopy
Definition memcopy.h:221
void init_memcopy_functions()
Definition memcopy.cc:36
void MemsetUint32(uint32_t *dest, uint32_t value, size_t counter)
Definition memcopy.h:267
#define DCHECK(condition)
Definition logging.h:482
#define USE(...)
Definition macros.h:293
#define V8_EXPORT_PRIVATE
Definition macros.h:460
constexpr bool IsAligned(T value, U alignment)
Definition macros.h:403
#define V8_INLINE
Definition v8config.h:500
#define V8_CLANG_NO_SANITIZE(what)
defined(V8_TRIVIAL_ABI)
Definition v8config.h:765
#define V8_NONNULL(...)
Definition v8config.h:574