v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
string.h
Go to the documentation of this file.
1// Copyright 2017 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_OBJECTS_STRING_H_
6#define V8_OBJECTS_STRING_H_
7
8#include <memory>
9#include <optional>
10
11#include "src/base/bits.h"
14#include "src/base/strings.h"
15#include "src/common/globals.h"
16#include "src/heap/heap.h"
18#include "src/objects/map.h"
19#include "src/objects/name.h"
20#include "src/objects/smi.h"
21#include "src/objects/tagged.h"
24#include "third_party/simdutf/simdutf.h"
25
26// Has to be the last include (doesn't have include guards):
28
29namespace v8::internal {
30
31namespace maglev {
32class CheckedInternalizedString;
33class BuiltinStringFromCharCode;
34class MaglevGraphBuilder;
35} // namespace maglev
36
37namespace wasm {
38namespace baseline {
39class LiftoffCompiler;
40} // namespace baseline
41} // namespace wasm
42
44
45enum InstanceType : uint16_t;
46
47// The characteristics of a string are stored in its map. Retrieving these
48// few bits of information is moderately expensive, involving two memory
49// loads where the second is dependent on the first. To improve efficiency
50// the shape of the string is given its own class so that it can be retrieved
51// once and used for several string operations. A StringShape is small enough
52// to be passed by value and is immutable, but be aware that flattening a
53// string can potentially alter its shape. Also be aware that a GC caused by
54// something else can alter the shape of a string due to ConsString
55// shortcutting. Keeping these restrictions in mind has proven to be error-
56// prone and so we no longer put StringShapes in variables unless there is a
57// concrete performance benefit at that particular point in the code.
59 public:
60 V8_INLINE explicit StringShape(const Tagged<String> s);
61 V8_INLINE explicit StringShape(const Tagged<String> s,
62 PtrComprCageBase cage_base);
65 V8_INLINE bool IsSequential() const;
66 V8_INLINE bool IsExternal() const;
67 V8_INLINE bool IsCons() const;
68 V8_INLINE bool IsSliced() const;
69 V8_INLINE bool IsThin() const;
70 V8_INLINE bool IsDirect() const;
71 V8_INLINE bool IsIndirect() const;
72 V8_INLINE bool IsUncachedExternal() const;
73 V8_INLINE bool IsExternalOneByte() const;
74 V8_INLINE bool IsExternalTwoByte() const;
75 V8_INLINE bool IsSequentialOneByte() const;
76 V8_INLINE bool IsSequentialTwoByte() const;
77 V8_INLINE bool IsInternalized() const;
78 V8_INLINE bool IsShared() const;
80 V8_INLINE uint32_t encoding_tag() const;
83#ifdef DEBUG
84 inline uint32_t type() const { return type_; }
85 inline void invalidate() { valid_ = false; }
86 inline bool valid() const { return valid_; }
87#else
88 inline void invalidate() {}
89#endif
90
91 inline bool operator==(const StringShape& that) const {
92 return that.type_ == this->type_;
93 }
94
95 private:
96 uint32_t type_;
97#ifdef DEBUG
98 inline void set_valid() { valid_ = true; }
99 bool valid_;
100#else
101 inline void set_valid() {}
102#endif
103};
104
105// The String abstract class captures JavaScript string values:
106//
107// Ecma-262:
108// 4.3.16 String Value
109// A string value is a member of the type String and is a finite
110// ordered sequence of zero or more 16-bit unsigned integer values.
111//
112// All string values have a length field.
113V8_OBJECT class String : public Name {
114 public:
116
117 // Representation of the flat content of a String.
118 // A non-flat string doesn't have flat content.
119 // A flat string has content that's encoded as a sequence of either
120 // one-byte chars or two-byte UC16.
121 // Returned by String::GetFlatContent().
122 // Not safe to use from concurrent background threads.
123 // TODO(solanes): Move FlatContent into FlatStringReader, and make it private.
124 // This would de-duplicate code, as well as taking advantage of the fact that
125 // FlatStringReader is relocatable.
127 public:
128 inline ~FlatContent();
129
130 // Returns true if the string is flat and this structure contains content.
131 bool IsFlat() const { return state_ != NON_FLAT; }
132 // Returns true if the structure contains one-byte content.
133 bool IsOneByte() const { return state_ == ONE_BYTE; }
134 // Returns true if the structure contains two-byte content.
135 bool IsTwoByte() const { return state_ == TWO_BYTE; }
136
137 // Return the one byte content of the string. Only use if IsOneByte()
138 // returns true.
143 // Return the two-byte content of the string. Only use if IsTwoByte()
144 // returns true.
149
150 base::uc16 Get(uint32_t i) const {
151 DCHECK(i < length_);
153 if (state_ == ONE_BYTE) return onebyte_start[i];
154 return twobyte_start[i];
155 }
156
157 bool UsesSameString(const FlatContent& other) const {
158 return onebyte_start == other.onebyte_start;
159 }
160
161 // It is almost always a bug if the contents of a FlatContent changes during
162 // its lifetime, which can happen due to GC or bugs in concurrent string
163 // access. Rarely, callers need the ability to GC and have ensured safety in
164 // other ways, such as in IrregexpInterpreter. Those callers can disable the
165 // checksum verification with this call.
167#ifdef ENABLE_SLOW_DCHECKS
169#endif
170 }
171
172 uint32_t length() const { return length_; }
173
174 private:
176
177 // Constructors only used by String::GetFlatContent().
178 inline FlatContent(const uint8_t* start, uint32_t length,
179 const DisallowGarbageCollection& no_gc);
180 inline FlatContent(const base::uc16* start, uint32_t length,
181 const DisallowGarbageCollection& no_gc);
183 : onebyte_start(nullptr), length_(0), state_(NON_FLAT), no_gc_(no_gc) {}
184
185 union {
186 const uint8_t* onebyte_start;
188 };
189 uint32_t length_;
192
193 static constexpr uint32_t kChecksumVerificationDisabled = 0;
194
195#ifdef ENABLE_SLOW_DCHECKS
196 inline uint32_t ComputeChecksum() const;
197
198 uint32_t checksum_;
199#endif
200
201 friend class String;
202 friend class IterableSubString;
204
205 template <typename IsolateT>
207 void MakeThin(IsolateT* isolate, Tagged<String> canonical);
208
209 template <typename Char>
211 const DisallowGarbageCollection& no_gc);
212
213 // Get chars from sequential or external strings. May only be called when a
214 // SharedStringAccessGuard is not needed (i.e. on the main thread or on
215 // read-only strings).
216 template <typename Char>
217 inline const Char* GetDirectStringChars(
218 const DisallowGarbageCollection& no_gc) const;
219
220 // Get chars from sequential or external strings.
221 template <typename Char>
222 inline const Char* GetDirectStringChars(
223 const DisallowGarbageCollection& no_gc,
224 const SharedStringAccessGuardIfNeeded& access_guard) const;
225
226 // Returns the address of the character at an offset into this string.
227 // Requires: this->IsFlat()
228 const uint8_t* AddressOfCharacterAt(uint32_t start_index,
229 const DisallowGarbageCollection& no_gc);
230
231 inline uint32_t length() const;
232 inline uint32_t length(AcquireLoadTag) const;
233
234 inline void set_length(uint32_t hash);
235 inline void set_length(uint32_t hash, ReleaseStoreTag);
236
237 // Returns whether this string has only one-byte chars, i.e. all of them can
238 // be one-byte encoded. This might be the case even if the string is
239 // two-byte. Such strings may appear when the embedder prefers
240 // two-byte external representations even for one-byte data.
241 inline bool IsOneByteRepresentation() const;
242 inline bool IsTwoByteRepresentation() const;
243
244 // Cons and slices have an encoding flag that may not represent the actual
245 // encoding of the underlying string. This is taken into account here.
246 // This function is static because that helps it get inlined.
247 // Requires: string.IsFlat()
248 static inline bool IsOneByteRepresentationUnderneath(Tagged<String> string);
249
250 // Get and set individual two byte chars in the string.
251 inline void Set(uint32_t index, uint16_t value);
252 // Get individual two byte char in the string. Repeated calls
253 // to this method are not efficient unless the string is flat.
254 // If it is called from a background thread, the LocalIsolate version should
255 // be used.
256 V8_INLINE uint16_t Get(uint32_t index) const;
257 V8_INLINE uint16_t Get(uint32_t index, Isolate* isolate) const;
258 V8_INLINE uint16_t Get(uint32_t index, LocalIsolate* local_isolate) const;
259 // Method to pass down the access_guard. Useful for recursive calls such as
260 // ThinStrings where we go String::Get into ThinString::Get into String::Get
261 // again for the internalized string.
262 V8_INLINE uint16_t
263 Get(uint32_t index,
264 const SharedStringAccessGuardIfNeeded& access_guard) const;
265
266 // ES6 section 7.1.3.1 ToNumber Applied to the String Type
267 template <template <typename> typename HandleType>
268 requires(std::is_convertible_v<HandleType<String>, DirectHandle<String>>)
269 static HandleType<Number> ToNumber(Isolate* isolate,
270 HandleType<String> subject);
271
272 // Flattens the string. Checks first inline to see if it is
273 // necessary. The given `string` is in-place flattened, i.e. both
274 //
275 // `t = String::Flatten(s); s->IsFlat()` and
276 // `t = String::Flatten(s); t->IsFlat()`
277 //
278 // hold. `t` may be an unwrapped but semantically equivalent component of `s`.
279 //
280 // Non-flat ConsStrings are physically flattened by allocating a sequential
281 // string with the same data as the given string. The input `string` is
282 // mutated to a degenerate form, where the first component is the new
283 // sequential string and the second component is the empty string. This form
284 // is considered flat, i.e. the string is in-place flattened.
285 //
286 // Degenerate cons strings are handled specially by the garbage
287 // collector (see IsShortcutCandidate).
288
289 template <typename T, template <typename> typename HandleType>
290 requires(std::is_convertible_v<HandleType<T>, DirectHandle<String>>)
291 static V8_INLINE HandleType<String> Flatten(
292 Isolate* isolate, HandleType<T> string,
294 template <typename T, template <typename> typename HandleType>
295 requires(std::is_convertible_v<HandleType<T>, DirectHandle<String>>)
296 static V8_INLINE HandleType<String> Flatten(
297 LocalIsolate* isolate, HandleType<T> string,
299
300 // Tries to return the content of a flat string as a structure holding either
301 // a flat vector of char or of base::uc16.
302 // If the string isn't flat, and therefore doesn't have flat content, the
303 // returned structure will report so, and can't provide a vector of either
304 // kind.
305 // When using a SharedStringAccessGuard, the guard's must outlive the
306 // returned FlatContent.
312
313 // Returns the parent of a sliced string or first part of a flat cons string.
314 // Requires: StringShape(this).IsIndirect() && this->IsFlat()
315 inline Tagged<String> GetUnderlying() const;
316
317 // Shares the string. Checks inline if the string is already shared or can be
318 // shared by transitioning its map in-place. If neither is possible, flattens
319 // and copies into a new shared sequential string.
320 template <typename T, template <typename> typename HandleType>
321 requires(std::is_convertible_v<HandleType<T>, DirectHandle<String>>)
322 static inline HandleType<String> Share(Isolate* isolate,
323 HandleType<T> string);
324
325 // String relational comparison, implemented according to ES6 section 7.2.11
326 // Abstract Relational Comparison (step 5): The comparison of Strings uses a
327 // simple lexicographic ordering on sequences of code unit values. There is no
328 // attempt to use the more complex, semantically oriented definitions of
329 // character or string equality and collating order defined in the Unicode
330 // specification. Therefore String values that are canonically equal according
331 // to the Unicode standard could test as unequal. In effect this algorithm
332 // assumes that both Strings are already in normalized form. Also, note that
333 // for strings containing supplementary characters, lexicographic ordering on
334 // sequences of UTF-16 code unit values differs from that on sequences of code
335 // point values.
339
340 // Perform ES6 21.1.3.8, including checking arguments.
344 // Perform string match of pattern on subject, starting at start index.
345 // Caller must ensure that 0 <= start_index <= sub->length(), as this does not
346 // check any arguments.
347 static int IndexOf(Isolate* isolate, DirectHandle<String> receiver,
348 DirectHandle<String> search, uint32_t start_index);
349
350 static Tagged<Object> LastIndexOf(Isolate* isolate,
354
355 // Encapsulates logic related to a match and its capture groups as required
356 // by GetSubstitution.
357 class Match {
358 public:
362
363 // A named capture can be unmatched (either not specified in the pattern,
364 // or specified but unmatched in the current string), or matched.
366
367 virtual int CaptureCount() = 0;
368 virtual bool HasNamedCaptures() = 0;
370 bool* capture_exists) = 0;
372 CaptureState* state) = 0;
373
374 virtual ~Match() = default;
375 };
376
377 // ES#sec-getsubstitution
378 // GetSubstitution(matched, str, position, captures, replacement)
379 // Expand the $-expressions in the string and return a new string with
380 // the result.
381 // A {start_index} can be passed to specify where to start scanning the
382 // replacement string.
384 Isolate* isolate, Match* match, DirectHandle<String> replacement,
385 uint32_t start_index = 0);
386
387 // String equality operations.
388 inline bool Equals(Tagged<String> other) const;
389 inline static bool Equals(Isolate* isolate, DirectHandle<String> one,
391
393
394 // Check if this string matches the given vector of characters, either as a
395 // whole string or just a prefix.
396 //
397 // The Isolate is passed as "evidence" that this call is on the main thread,
398 // and to distinguish from the LocalIsolate overload.
399 template <EqualityType kEqType = EqualityType::kWholeString, typename Char>
400 inline bool IsEqualTo(base::Vector<const Char> str, Isolate* isolate) const;
401
402 // Check if this string matches the given vector of characters, either as a
403 // whole string or just a prefix.
404 //
405 // This is main-thread only, like the Isolate* overload, but additionally
406 // computes the PtrComprCageBase for IsEqualToImpl.
407 template <EqualityType kEqType = EqualityType::kWholeString, typename Char>
408 inline bool IsEqualTo(base::Vector<const Char> str) const;
409
410 // Check if this string matches the given vector of characters, either as a
411 // whole string or just a prefix.
412 //
413 // The LocalIsolate is passed to provide access to the string access lock,
414 // which is taken when reading the string's contents on a background thread.
415 template <EqualityType kEqType = EqualityType::kWholeString, typename Char>
416 inline bool IsEqualTo(base::Vector<const Char> str,
417 LocalIsolate* isolate) const;
418
421
422 // Returns true if the |str| is a valid ECMAScript identifier.
423 static bool IsIdentifier(Isolate* isolate, DirectHandle<String> str);
424
425 // Return a UTF8 representation of this string.
426 //
427 // The output string is null terminated and any null characters in the source
428 // string are replaced with spaces. The length of the output buffer is
429 // returned in length_output if that is not a null pointer. This string
430 // should be nearly flat, otherwise the performance of this method may be
431 // very slow (quadratic in the length).
432 std::unique_ptr<char[]> ToCString(uint32_t offset, uint32_t length,
433 size_t* length_output = nullptr);
434
435 V8_EXPORT_PRIVATE std::unique_ptr<char[]> ToCString(
436 size_t* length_output = nullptr);
437
438 // Externalization.
439 template <typename T>
440 bool MarkForExternalizationDuringGC(Isolate* isolate, T* resource);
441 template <typename T>
443 void MakeExternalDuringGC(Isolate* isolate, T* resource);
445 Isolate* isolate, v8::String::ExternalStringResource* resource);
449
450 // Conversion.
451 // "array index": an index allowed by the ES spec for JSArrays.
452 inline bool AsArrayIndex(uint32_t* index);
453
454 // This is used for calculating array indices but differs from an
455 // Array Index in the regard that this does not support the full
456 // array index range. This only supports positive numbers less than
457 // or equal to INT_MAX.
458 //
459 // String::AsArrayIndex might be a better fit if you're looking to
460 // calculate the array index.
461 //
462 // if val < 0 or val > INT_MAX, returns -1
463 // if 0 <= val <= INT_MAX, returns val
464 static int32_t ToArrayIndex(Address addr);
465
466 // "integer index": the string is the decimal representation of an
467 // integer in the range of a size_t. Useful for TypedArray accesses.
468 inline bool AsIntegerIndex(size_t* index);
469
470 // Trimming.
472
473 V8_EXPORT_PRIVATE void PrintOn(FILE* out);
474 V8_EXPORT_PRIVATE void PrintOn(std::ostream& out);
475
476 // Printing utility functions.
477 // - PrintUC16 prints the raw string contents to the given stream.
478 // Non-printable characters are formatted as hex, but otherwise the string
479 // is printed as-is.
480 // - StringShortPrint and StringPrint have extra formatting: they add a
481 // prefix and suffix depending on the string kind, may add other information
482 // such as the string heap object address, may truncate long strings, etc.
483 const char* PrefixForDebugPrint() const;
484 const char* SuffixForDebugPrint() const;
485 void StringShortPrint(StringStream* accumulator);
486 void PrintUC16(std::ostream& os, int start = 0, int end = -1);
487 void PrintUC16(StringStream* accumulator, int start, int end);
488
489 // Dispatched behavior.
490#if defined(DEBUG) || defined(OBJECT_PRINT)
491 char* ToAsciiArray();
492#endif
495
496 inline bool IsFlat() const;
497 inline bool IsShared() const;
498
499 // Max char codes.
502 static const int kMaxUtf16CodeUnit = 0xffff;
503 static const uint32_t kMaxUtf16CodeUnitU = kMaxUtf16CodeUnit;
504 static const base::uc32 kMaxCodePoint = 0x10ffff;
505
506 // Maximal string length.
507 // The max length is different on 32 and 64 bit platforms. Max length for
508 // 32-bit platforms is ~268.4M chars. On 64-bit platforms, max length is
509 // ~536.8M chars.
510 // See include/v8.h for the definition.
511 static const uint32_t kMaxLength = v8::String::kMaxLength;
512
513 // Max length for computing hash. For strings longer than this limit the
514 // string length is used as the hash value.
515 static const uint32_t kMaxHashCalcLength = 16383;
516
517 // Limit for truncation in short printing.
518 static const uint32_t kMaxShortPrintLength = 1024;
519
520 // Helper function for flattening strings.
521 template <typename SinkCharT>
523 static void WriteToFlat(Tagged<String> source, SinkCharT* sink,
524 uint32_t start, uint32_t length);
525 template <typename SinkCharT>
526 static void WriteToFlat(Tagged<String> source, SinkCharT* sink,
527 uint32_t start, uint32_t length,
528 const SharedStringAccessGuardIfNeeded& access_guard);
529
530 // TODO(jgruber): This is an ongoing performance experiment. Once done, we'll
531 // rename this to something more appropriate.
532 //
533 // `src_index` and `length` always refer to the desired substring within
534 // `src`. `dst` is guaranteed to fit `length`, and is written to
535 // starting at index 0.
536 template <typename SinkCharT>
538 static void WriteToFlat2(SinkCharT* dst, Tagged<ConsString> src,
539 uint32_t src_index, uint32_t length,
541 const DisallowGarbageCollection& no_gc);
542
543 // Computes the number of bytes required for the Utf8 encoding of the string.
544 //
545 // Note: if the given string is not already flat, it will be flattened by
546 // this operation to improve the performance of utf8 encoding.
547 static inline size_t Utf8Length(Isolate* isolate,
548 DirectHandle<String> string);
549
550 // Encodes the given string as Utf8 into the provided buffer.
551 //
552 // This operation will write at most {capacity} bytes into the output buffer
553 // but may write fewer bytes. The number of bytes written is returned. If the
554 // result should be null terminated, a null terminator will always be
555 // written, even if not the entire string could be encoded. As such, when
556 // null termination is requested, the capacity must be larger than zero.
557 //
558 // Note: if the given string is not already flat, it will be flattened by
559 // this operation to improve the performance of utf8 encoding.
560 enum class Utf8EncodingFlag {
561 kNoFlags = 0,
562 kNullTerminate = 1u << 0,
563 kReplaceInvalid = 1u << 1,
564 };
566 static size_t WriteUtf8(Isolate* isolate, DirectHandle<String> string,
567 char* buffer, size_t capacity,
568 Utf8EncodingFlags flags,
569 size_t* processed_characters_return = nullptr);
570
571 // Returns true if this string has no unpaired surrogates and false otherwise.
572 static inline bool IsWellFormedUnicode(Isolate* isolate,
573 DirectHandle<String> string);
574
575 static inline bool IsAscii(const char* chars, uint32_t length) {
576 return simdutf::validate_ascii(chars, length);
577 }
578
579 static inline bool IsAscii(const uint8_t* chars, uint32_t length) {
580 return simdutf::validate_ascii(reinterpret_cast<const char*>(chars),
581 length);
582 }
583
584 static inline uint32_t NonOneByteStart(const base::uc16* chars,
585 uint32_t length) {
586 DCHECK(IsAligned(reinterpret_cast<Address>(chars), sizeof(base::uc16)));
587 const uint16_t* start = chars;
588 const uint16_t* limit = chars + length;
589
590 if (static_cast<size_t>(length) >= kUIntptrSize) {
591 // Check unaligned chars.
592 while (!IsAligned(reinterpret_cast<Address>(chars), kUIntptrSize)) {
593 if (*chars > unibrow::Latin1::kMaxChar) {
594 return static_cast<uint32_t>(chars - start);
595 }
596 ++chars;
597 }
598
599 // Check aligned words.
600 static_assert(unibrow::Latin1::kMaxChar == 0xFF);
601#ifdef V8_TARGET_LITTLE_ENDIAN
602 const uintptr_t non_one_byte_mask = kUintptrAllBitsSet / 0xFFFF * 0xFF00;
603#else
604 const uintptr_t non_one_byte_mask = kUintptrAllBitsSet / 0xFFFF * 0x00FF;
605#endif
606 while (chars + sizeof(uintptr_t) <= limit) {
607 if (*reinterpret_cast<const uintptr_t*>(chars) & non_one_byte_mask) {
608 break;
609 }
610 chars += (sizeof(uintptr_t) / sizeof(base::uc16));
611 }
612 }
613
614 // Check remaining unaligned chars, or find non-one-byte char in word.
615 while (chars < limit) {
616 if (*chars > unibrow::Latin1::kMaxChar) {
617 return static_cast<uint32_t>(chars - start);
618 }
619 ++chars;
620 }
621
622 return static_cast<uint32_t>(chars - start);
623 }
624
625 static inline bool IsOneByte(const base::uc16* chars, uint32_t length) {
626 return NonOneByteStart(chars, length) >= length;
627 }
628
629 // May only be called when a SharedStringAccessGuard is not needed (i.e. on
630 // the main thread or on read-only strings).
631 template <class Visitor>
632 static inline Tagged<ConsString> VisitFlat(Visitor* visitor,
633 Tagged<String> string,
634 int offset = 0);
635
636 template <class Visitor>
637 static inline Tagged<ConsString> VisitFlat(
638 Visitor* visitor, Tagged<String> string, int offset,
639 const SharedStringAccessGuardIfNeeded& access_guard);
640
641 static uint32_t constexpr kInlineLineEndsSize = 32;
643
644 template <typename IsolateT>
645 static LineEndsVector CalculateLineEndsVector(IsolateT* isolate,
647 bool include_ending_line);
648
649 template <typename IsolateT>
650 static Handle<FixedArray> CalculateLineEnds(IsolateT* isolate,
652 bool include_ending_line);
653
654 // Returns true if string can be internalized without copying. In such cases
655 // the string is inserted into the string table and its map is changed to an
656 // internalized equivalent.
657 static inline bool IsInPlaceInternalizable(Tagged<String> string);
658 static inline bool IsInPlaceInternalizable(InstanceType instance_type);
659
661 InstanceType instance_type);
662
663 // Run different behavior for each concrete string class type, to a
664 // dispatcher which is overloaded on that class.
665 template <typename TDispatcher>
666 V8_INLINE auto DispatchToSpecificType(TDispatcher&& dispatcher) const
667 // Help out the type deduction in case TDispatcher returns different
668 // types for different strings.
669 -> std::common_type_t<
670 decltype(dispatcher(Tagged<SeqOneByteString>{})),
671 decltype(dispatcher(Tagged<SeqTwoByteString>{})),
672 decltype(dispatcher(Tagged<ExternalOneByteString>{})),
673 decltype(dispatcher(Tagged<ExternalTwoByteString>{})),
674 decltype(dispatcher(Tagged<ThinString>{})),
675 decltype(dispatcher(Tagged<ConsString>{})),
676 decltype(dispatcher(Tagged<SlicedString>{}))>;
677
678 // Similar to the above, but using instance type. Since there is no
679 // string to cast, the dispatcher has static methods for handling
680 // each concrete type.
681 template <typename TDispatcher, typename... TArgs>
682 static inline auto DispatchToSpecificTypeWithoutCast(
683 InstanceType instance_type, TArgs&&... args);
684
685 private:
686 friend class Name;
687 friend class CodeStubAssembler;
690 friend class SandboxTesting;
692
693 friend struct OffsetsForDebug;
694 friend class Accessors;
701
702 // Implementation of the Get() public methods. Do not use directly.
703 V8_INLINE uint16_t
704 GetImpl(uint32_t index,
705 const SharedStringAccessGuardIfNeeded& access_guard) const;
706
707 // Implementation of the IsEqualTo() public methods. Do not use directly.
708 template <EqualityType kEqType, typename Char>
711 const SharedStringAccessGuardIfNeeded& access_guard) const;
712
713 // Out-of-line IsEqualToImpl for ConsString.
714 template <typename Char>
717 const SharedStringAccessGuardIfNeeded& access_guard);
718
719 // Note: This is an inline method template and exporting it for windows
720 // component builds works only without the EXPORT_TEMPLATE_DECLARE macro.
721 template <template <typename> typename HandleType>
722 requires(std::is_convertible_v<HandleType<String>, DirectHandle<String>>)
723 V8_EXPORT_PRIVATE inline static HandleType<String> SlowFlatten(
724 Isolate* isolate, HandleType<ConsString> cons, AllocationType allocation);
725
726 V8_EXPORT_PRIVATE V8_INLINE static std::optional<FlatContent>
728 Tagged<String> string, uint32_t offset,
729 uint32_t length,
734
735 template <template <typename> typename HandleType>
736 requires(std::is_convertible_v<HandleType<String>, DirectHandle<String>>)
738 String> SlowShare(Isolate* isolate, HandleType<String> source);
739
740 // Slow case of String::Equals. This implementation works on any strings
741 // but it is most efficient on strings that are almost flat.
745
746 V8_EXPORT_PRIVATE static bool SlowEquals(Isolate* isolate,
749
750 // Slow case of AsArrayIndex.
751 V8_EXPORT_PRIVATE bool SlowAsArrayIndex(uint32_t* index);
752 V8_EXPORT_PRIVATE bool SlowAsIntegerIndex(size_t* index);
753
754 // Compute and set the hash code.
755 // The value returned is always a computed hash, even if the value stored is
756 // a forwarding index.
758 V8_EXPORT_PRIVATE uint32_t
760
761 uint32_t length_;
763
764template <>
766 static constexpr int kHeaderSize = sizeof(String);
767
768 // There are several defining limits imposed by our current implementation:
769 // - any string's length must fit into a Smi.
770 static_assert(String::kMaxLength <= kSmiMaxValue,
771 "String length must fit into a Smi");
772 // - adding two string lengths must still fit into a 32-bit int without
773 // overflow
774 static_assert(String::kMaxLength * 2 <= kMaxInt,
775 "String::kMaxLength * 2 must fit into an int32");
776 // - any heap object's size in bytes must be able to fit into a Smi, because
777 // its space on the heap might be filled with a Filler; for strings this
778 // means SeqTwoByteString::kMaxSize must be able to fit into a Smi.
779 static_assert(String::kMaxLength * 2 + kHeaderSize <= kSmiMaxValue,
780 "String object size in bytes must fit into a Smi");
781 // - any heap object's size in bytes must be able to fit into an int, because
782 // that's what our object handling code uses almost everywhere.
783 static_assert(String::kMaxLength * 2 + kHeaderSize <= kMaxInt,
784 "String object size in bytes must fit into an int");
785};
786
787// clang-format off
789 void String::WriteToFlat(Tagged<String> source, uint8_t* sink, uint32_t from,
790 uint32_t to);
792 void String::WriteToFlat(Tagged<String> source, uint16_t* sink, uint32_t from,
793 uint32_t to);
795 void String::WriteToFlat(Tagged<String> source, uint8_t* sink, uint32_t from,
796 uint32_t to, const SharedStringAccessGuardIfNeeded&);
798 void String::WriteToFlat(Tagged<String> source, uint16_t* sink, uint32_t from,
799 uint32_t to, const SharedStringAccessGuardIfNeeded&);
800// clang-format on
801
803 public:
804 inline SubStringRange(Tagged<String> string,
805 const DisallowGarbageCollection& no_gc, int first = 0,
806 int length = -1);
807 class iterator;
808 inline iterator begin();
809 inline iterator end();
810
811 private:
816};
817
818// The SeqString abstract class captures sequential string values.
819class SeqString : public String {
820 public:
821 // Truncate the string in-place if possible and return the result.
822 // In case of new_length == 0, the empty string is returned without
823 // truncating the original string.
824 V8_WARN_UNUSED_RESULT static Handle<String> Truncate(Isolate* isolate,
825 Handle<SeqString> string,
826 uint32_t new_length);
827
829 const int data_size;
830 const int padding_size;
831 bool operator==(const DataAndPaddingSizes& other) const {
832 return data_size == other.data_size && padding_size == other.padding_size;
833 }
834 };
835 DataAndPaddingSizes GetDataAndPaddingSizes() const;
836
837 // Zero out only the padding bytes of this string.
838 void ClearPadding();
839
841};
842
844 // TODO(neis): Possibly move some stuff from String here.
846
847// The OneByteString class captures sequential one-byte string objects.
848// Each character in the OneByteString is an one-byte character.
850 public:
851 static const bool kHasOneByteEncoding = true;
852 using Char = uint8_t;
853
854 V8_INLINE static constexpr int32_t DataSizeFor(int32_t length);
855 V8_INLINE static constexpr int32_t SizeFor(int32_t length);
856
857 // Dispatched behavior. The non SharedStringAccessGuardIfNeeded method is also
858 // defined for convenience and it will check that the access guard is not
859 // needed.
860 inline uint8_t Get(uint32_t index) const;
861 inline uint8_t Get(uint32_t index,
862 const SharedStringAccessGuardIfNeeded& access_guard) const;
863 inline void SeqOneByteStringSet(uint32_t index, uint16_t value);
864 inline void SeqOneByteStringSetChars(uint32_t index, const uint8_t* string,
865 uint32_t length);
866
867 // Get the address of the characters in this string.
868 inline Address GetCharsAddress() const;
869
870 // Get a pointer to the characters of the string. May only be called when a
871 // SharedStringAccessGuard is not needed (i.e. on the main thread or on
872 // read-only strings).
873 V8_INLINE uint8_t* GetChars(const DisallowGarbageCollection& no_gc);
874
875 // Get a pointer to the characters of the string.
876 V8_INLINE uint8_t* GetChars(
877 const DisallowGarbageCollection& no_gc,
878 const SharedStringAccessGuardIfNeeded& access_guard);
879
880 DataAndPaddingSizes GetDataAndPaddingSizes() const;
881
882 // Initializes padding bytes. Potentially zeros tail of the payload too!
883 inline void clear_padding_destructively(uint32_t length);
884
885 // Maximal memory usage for a single sequential one-byte string.
886 static const uint32_t kMaxCharsSize = kMaxLength;
887
888 inline int AllocatedSize() const;
889
890 // A SeqOneByteString have different maps depending on whether it is shared.
891 static inline bool IsCompatibleMap(Tagged<Map> map, ReadOnlyRoots roots);
892
893 class BodyDescriptor;
894
895 private:
896 friend struct OffsetsForDebug;
897 friend class CodeStubAssembler;
902 friend class SandboxTesting;
906
909
910template <>
913
914 static constexpr int kHeaderSize = sizeof(SeqOneByteString);
915 static constexpr int kMaxSize =
917
918 static_assert(static_cast<int>((kMaxSize - kHeaderSize) /
919 sizeof(SeqOneByteString::Char)) >=
921};
922
923// The TwoByteString class captures sequential unicode string objects.
924// Each character in the TwoByteString is a two-byte uint16_t.
926 public:
927 static const bool kHasOneByteEncoding = false;
928 using Char = uint16_t;
929
930 V8_INLINE static constexpr int32_t DataSizeFor(int32_t length);
931 V8_INLINE static constexpr int32_t SizeFor(int32_t length);
932
933 // Dispatched behavior.
934 inline uint16_t Get(
935 uint32_t index,
936 const SharedStringAccessGuardIfNeeded& access_guard) const;
937 inline void SeqTwoByteStringSet(uint32_t index, uint16_t value);
938
939 // Get the address of the characters in this string.
940 inline Address GetCharsAddress() const;
941
942 // Get a pointer to the characters of the string. May only be called when a
943 // SharedStringAccessGuard is not needed (i.e. on the main thread or on
944 // read-only strings).
945 inline base::uc16* GetChars(const DisallowGarbageCollection& no_gc);
946
947 // Get a pointer to the characters of the string.
948 inline base::uc16* GetChars(
949 const DisallowGarbageCollection& no_gc,
950 const SharedStringAccessGuardIfNeeded& access_guard);
951
953
954 // Initializes padding bytes. Potentially zeros tail of the payload too!
955 inline void clear_padding_destructively(uint32_t length);
956
957 // Maximal memory usage for a single sequential two-byte string.
958 static const uint32_t kMaxCharsSize = kMaxLength * sizeof(Char);
959
960 inline int AllocatedSize() const;
961
962 // A SeqTwoByteString have different maps depending on whether it is shared.
963 static inline bool IsCompatibleMap(Tagged<Map> map, ReadOnlyRoots roots);
964
965 class BodyDescriptor;
966
967 private:
968 friend struct OffsetsForDebug;
969 friend class CodeStubAssembler;
978
981
982template <>
985
986 static constexpr int kHeaderSize = sizeof(SeqTwoByteString);
987 static constexpr int kMaxSize =
989
990 static_assert(static_cast<int>((kMaxSize - kHeaderSize) /
991 sizeof(SeqTwoByteString::Char)) >=
993};
994
995// The ConsString class describes string values built by using the
996// addition operator on strings. A ConsString is a pair where the
997// first and second components are pointers to other string values.
998// One or both components of a ConsString can be pointers to other
999// ConsStrings, creating a binary tree of ConsStrings where the leaves
1000// are non-ConsString string values. The string value represented by
1001// a ConsString can be obtained by concatenating the leaf string
1002// values in a left-to-right depth-first traversal of the tree.
1004 public:
1005 inline Tagged<String> first() const;
1006 inline void set_first(Tagged<String> value,
1008
1009 inline Tagged<String> second() const;
1010 inline void set_second(Tagged<String> value,
1012
1013 // Doesn't check that the result is a string, even in debug mode. This is
1014 // useful during GC where the mark bits confuse the checks.
1015 inline Tagged<Object> unchecked_first() const;
1016
1017 // Doesn't check that the result is a string, even in debug mode. This is
1018 // useful during GC where the mark bits confuse the checks.
1019 inline Tagged<Object> unchecked_second() const;
1020
1021 V8_INLINE bool IsFlat() const;
1022
1023 // Dispatched behavior.
1024 V8_EXPORT_PRIVATE uint16_t
1025 Get(uint32_t index,
1026 const SharedStringAccessGuardIfNeeded& access_guard) const;
1027
1028 // Minimum length for a cons string.
1029 static const uint32_t kMinLength = 13;
1030
1032
1033 private:
1034 friend struct ObjectTraits<ConsString>;
1035 friend struct OffsetsForDebug;
1036 friend class V8HeapExplorer;
1037 friend class CodeStubAssembler;
1040 friend class SandboxTesting;
1045
1047
1051
1052template <>
1055 FixedBodyDescriptor<offsetof(ConsString, first_), sizeof(ConsString),
1056 sizeof(ConsString)>;
1057};
1058
1059// The ThinString class describes string objects that are just references
1060// to another string object. They are used for in-place internalization when
1061// the original string cannot actually be internalized in-place: in these
1062// cases, the original string is converted to a ThinString pointing at its
1063// internalized version (which is allocated as a new object).
1064// In terms of memory layout and most algorithms operating on strings,
1065// ThinStrings can be thought of as "one-part cons strings".
1067 public:
1068 inline Tagged<String> actual() const;
1069 inline void set_actual(Tagged<String> value,
1071
1072 inline Tagged<HeapObject> unchecked_actual() const;
1073
1074 V8_EXPORT_PRIVATE uint16_t
1075 Get(uint32_t index,
1076 const SharedStringAccessGuardIfNeeded& access_guard) const;
1077
1079
1080 private:
1081 friend struct ObjectTraits<ThinString>;
1082 friend struct OffsetsForDebug;
1083 friend class V8HeapExplorer;
1084 friend class CodeStubAssembler;
1092
1094
1097
1098template <>
1101 FixedBodyDescriptor<offsetof(ThinString, actual_), sizeof(ThinString),
1102 sizeof(ThinString)>;
1103};
1104
1105// The Sliced String class describes strings that are substrings of another
1106// sequential string. The motivation is to save time and memory when creating
1107// a substring. A Sliced String is described as a pointer to the parent,
1108// the offset from the start of the parent string and the length. Using
1109// a Sliced String therefore requires unpacking of the parent string and
1110// adding the offset to the start address. A substring of a Sliced String
1111// are not nested since the double indirection is simplified when creating
1112// such a substring.
1113// Currently missing features are:
1114// - truncating sliced string to enable otherwise unneeded parent to be GC'ed.
1116 public:
1117 inline Tagged<String> parent() const;
1118 inline void set_parent(Tagged<String> parent,
1120
1121 inline int32_t offset() const;
1122 inline void set_offset(int32_t offset);
1123
1124 // Dispatched behavior.
1125 V8_EXPORT_PRIVATE uint16_t
1126 Get(uint32_t index,
1127 const SharedStringAccessGuardIfNeeded& access_guard) const;
1128
1129 // Minimum length for a sliced string.
1130 static const uint32_t kMinLength = 13;
1131
1133 private:
1134 friend struct ObjectTraits<SlicedString>;
1135 friend struct OffsetsForDebug;
1136 friend class V8HeapExplorer;
1137 friend class CodeStubAssembler;
1138 friend class SandboxTesting;
1143
1145
1149
1150template <>
1154 sizeof(SlicedString)>;
1155};
1156
1157// TODO(leszeks): Build this out into a full V8 class.
1162
1163// The ExternalString class describes string values that are backed by
1164// a string resource that lies outside the V8 heap. ExternalStrings
1165// consist of the length field common to all strings, a pointer to the
1166// external resource. It is important to ensure (externally) that the
1167// resource is not deallocated while the ExternalString is live in the
1168// V8 heap.
1169//
1170// The API expects that all ExternalStrings are created through the
1171// API. Therefore, ExternalStrings should not be used internally.
1173 public:
1174 class BodyDescriptor;
1175
1177
1178 inline void InitExternalPointerFields(Isolate* isolate);
1179 inline void VisitExternalPointers(ObjectVisitor* visitor);
1180
1181 // Return whether the external string data pointer is not cached.
1182 inline bool is_uncached() const;
1183 // Size in bytes of the external payload.
1184 int ExternalPayloadSize() const;
1185
1186 // Used in the serializer/deserializer.
1187 inline Address resource_as_address() const;
1188 inline void set_address_as_resource(Isolate* isolate, Address address);
1189 inline uint32_t GetResourceRefForDeserialization();
1190 inline void SetResourceRefForSerialization(uint32_t ref);
1191
1192 // Disposes string's resource object if it has not already been disposed.
1193 inline void DisposeResource(Isolate* isolate);
1194
1196 Isolate* isolate);
1197
1198 private:
1200 friend struct OffsetsForDebug;
1201 friend class CodeStubAssembler;
1204
1205 protected:
1208
1209template <>
1212
1213 static_assert(offsetof(ExternalString, resource_) ==
1215};
1216
1217// The ExternalOneByteString class is an external string backed by an
1218// one-byte string.
1220 public:
1221 static const bool kHasOneByteEncoding = true;
1222 using Char = uint8_t;
1223
1225
1226 // The underlying resource.
1227 inline const Resource* resource() const;
1228
1229 // It is assumed that the previous resource is null. If it is not null, then
1230 // it is the responsibility of the caller the handle the previous resource.
1231 inline void SetResource(Isolate* isolate, const Resource* buffer);
1232
1233 // Used only during serialization.
1234 inline void set_resource(Isolate* isolate, const Resource* buffer);
1235
1236 // Update the pointer cache to the external character array.
1237 // The cached pointer is always valid, as the external character array does =
1238 // not move during lifetime. Deserialization is the only exception, after
1239 // which the pointer cache has to be refreshed.
1240 inline void update_data_cache(Isolate* isolate);
1241
1242 inline const uint8_t* GetChars() const;
1243
1244 // Dispatched behavior.
1245 inline uint8_t Get(uint32_t index,
1246 const SharedStringAccessGuardIfNeeded& access_guard) const;
1247
1248 private:
1249 // The underlying resource as a non-const pointer.
1250 inline Resource* mutable_resource();
1252
1253static_assert(sizeof(ExternalOneByteString) == sizeof(ExternalString));
1254
1255// The ExternalTwoByteString class is an external string backed by a UTF-16
1256// encoded string.
1258 public:
1259 static const bool kHasOneByteEncoding = false;
1260 using Char = uint16_t;
1261
1263
1264 // The underlying string resource.
1265 inline const Resource* resource() const;
1266
1267 // It is assumed that the previous resource is null. If it is not null, then
1268 // it is the responsibility of the caller the handle the previous resource.
1269 inline void SetResource(Isolate* isolate, const Resource* buffer);
1270
1271 // Used only during serialization.
1272 inline void set_resource(Isolate* isolate, const Resource* buffer);
1273
1274 // Update the pointer cache to the external character array.
1275 // The cached pointer is always valid, as the external character array does =
1276 // not move during lifetime. Deserialization is the only exception, after
1277 // which the pointer cache has to be refreshed.
1278 inline void update_data_cache(Isolate* isolate);
1279
1280 inline const uint16_t* GetChars() const;
1281
1282 // Dispatched behavior.
1283 inline uint16_t Get(
1284 uint32_t index,
1285 const SharedStringAccessGuardIfNeeded& access_guard) const;
1286
1287 // For regexp code.
1288 inline const uint16_t* ExternalTwoByteStringGetData(uint32_t start);
1289
1290 private:
1291 // The underlying resource as a non-const pointer.
1292 inline Resource* mutable_resource();
1294
1295static_assert(sizeof(ExternalTwoByteString) == sizeof(ExternalString));
1296
1297// A flat string reader provides random access to the contents of a
1298// string independent of the character width of the string. The handle
1299// must be valid as long as the reader is being used.
1300// Not safe to use from concurrent background threads.
1302 public:
1304 void PostGarbageCollection() override;
1305 inline base::uc32 Get(uint32_t index) const;
1306 template <typename Char>
1307 inline Char Get(uint32_t index) const;
1308 uint32_t length() const { return length_; }
1309
1310 private:
1313 uint32_t const length_;
1314 const void* start_;
1315};
1316
1317// This maintains an off-stack representation of the stack frames required
1318// to traverse a ConsString, allowing an entirely iterative and restartable
1319// traversal of the entire string
1321 public:
1322 inline ConsStringIterator() = default;
1323 inline explicit ConsStringIterator(Tagged<ConsString> cons_string,
1324 int offset = 0) {
1325 Reset(cons_string, offset);
1326 }
1329 inline void Reset(Tagged<ConsString> cons_string, int offset = 0) {
1330 depth_ = 0;
1331 // Next will always return nullptr.
1332 if (cons_string.is_null()) return;
1333 Initialize(cons_string, offset);
1334 }
1335 // Returns nullptr when complete. The offset_out parameter will be set to the
1336 // offset within the returned segment that the user should start looking at,
1337 // to match the offset passed into the constructor or Reset -- this will only
1338 // be non-zero immediately after construction or Reset, and only if those had
1339 // a non-zero offset.
1340 inline Tagged<String> Next(int* offset_out) {
1341 *offset_out = 0;
1342 if (depth_ == 0) return Tagged<String>();
1343 return Continue(offset_out);
1344 }
1345
1346 private:
1347 static const int kStackSize = 32;
1348 // Use a mask instead of doing modulo operations for stack wrapping.
1349 static const int kDepthMask = kStackSize - 1;
1350 static_assert(base::bits::IsPowerOfTwo(kStackSize),
1351 "kStackSize must be power of two");
1352 static inline int OffsetForDepth(int depth);
1353
1354 inline void PushLeft(Tagged<ConsString> string);
1355 inline void PushRight(Tagged<ConsString> string);
1356 inline void AdjustMaximumDepth();
1357 inline void Pop();
1358 inline bool StackBlown() { return maximum_depth_ - depth_ == kStackSize; }
1360 V8_EXPORT_PRIVATE Tagged<String> Continue(int* offset_out);
1361 Tagged<String> NextLeaf(bool* blew_stack);
1362 Tagged<String> Search(int* offset_out);
1363
1364 // Stack must always contain only frames for which right traversal
1365 // has not yet been performed.
1370 uint32_t consumed_;
1371};
1372
1374
1375template <typename Char>
1377
1378template <>
1383
1384template <>
1389
1390} // namespace v8::internal
1391
1393
1394#endif // V8_OBJECTS_STRING_H_
#define one
static const uint16_t kMaxChar
Definition unicode.h:142
static constexpr int kMaxLength
V8_EXPORT_PRIVATE Tagged< String > Continue(int *offset_out)
Definition string.cc:2084
void Reset(Tagged< ConsString > cons_string, int offset=0)
Definition string.h:1329
void PushRight(Tagged< ConsString > string)
Tagged< String > NextLeaf(bool *blew_stack)
Definition string.cc:2160
Tagged< ConsString > frames_[kStackSize]
Definition string.h:1366
Tagged< String > Search(int *offset_out)
Definition string.cc:2101
V8_EXPORT_PRIVATE void Initialize(Tagged< ConsString > cons_string, int offset)
Definition string.cc:2073
static int OffsetForDepth(int depth)
Tagged< String > Next(int *offset_out)
Definition string.h:1340
static const int kStackSize
Definition string.h:1347
ConsStringIterator & operator=(const ConsStringIterator &)=delete
Tagged< ConsString > root_
Definition string.h:1367
void PushLeft(Tagged< ConsString > string)
static const int kDepthMask
Definition string.h:1349
ConsStringIterator(const ConsStringIterator &)=delete
ConsStringIterator(Tagged< ConsString > cons_string, int offset=0)
Definition string.h:1323
V8_EXPORT_PRIVATE uint16_t Get(uint32_t index, const SharedStringAccessGuardIfNeeded &access_guard) const
Definition string.cc:2005
static const uint32_t kMinLength
Definition string.h:1029
friend struct OffsetsForDebug
Definition string.h:1035
V8_INLINE bool IsFlat() const
void set_first(Tagged< String > value, WriteBarrierMode mode=UPDATE_WRITE_BARRIER)
Tagged< String > first() const
Tagged< Object > unchecked_second() const
friend class SandboxTesting
Definition string.h:1040
TaggedMember< String > second_
Definition string.h:1049
void set_second(Tagged< String > value, WriteBarrierMode mode=UPDATE_WRITE_BARRIER)
Tagged< Object > unchecked_first() const
TaggedMember< String > first_
Definition string.h:1048
Tagged< String > second() const
friend class TorqueGeneratedConsStringAsserts
Definition string.h:1044
static const bool kHasOneByteEncoding
Definition string.h:1221
const Resource * resource() const
const uint8_t * GetChars() const
void update_data_cache(Isolate *isolate)
void SetResource(Isolate *isolate, const Resource *buffer)
uint8_t Get(uint32_t index, const SharedStringAccessGuardIfNeeded &access_guard) const
void set_resource(Isolate *isolate, const Resource *buffer)
void VisitExternalPointers(ObjectVisitor *visitor)
friend struct OffsetsForDebug
Definition string.h:1200
void SetResourceRefForSerialization(uint32_t ref)
void DisposeResource(Isolate *isolate)
int ExternalPayloadSize() const
Definition string.cc:2045
void InitExternalPointerFields(Isolate *isolate)
uint32_t GetResourceRefForDeserialization()
Address resource_as_address() const
friend class TorqueGeneratedExternalStringAsserts
Definition string.h:1203
void InitExternalPointerFieldsDuringExternalization(Tagged< Map > new_map, Isolate *isolate)
Definition string.cc:123
void set_address_as_resource(Isolate *isolate, Address address)
ExternalPointerMember< kExternalStringResourceDataTag > resource_data_
Definition string.h:1206
static const bool kHasOneByteEncoding
Definition string.h:1259
void SetResource(Isolate *isolate, const Resource *buffer)
uint16_t Get(uint32_t index, const SharedStringAccessGuardIfNeeded &access_guard) const
const uint16_t * GetChars() const
const uint16_t * ExternalTwoByteStringGetData(uint32_t start)
void set_resource(Isolate *isolate, const Resource *buffer)
void update_data_cache(Isolate *isolate)
const Resource * resource() const
uint32_t length() const
Definition string.h:1308
DirectHandle< String > str_
Definition string.h:1311
static const int kStringResourceOffset
friend class StringBuiltinsAssembler
Definition string.h:900
friend class ToDirectStringAssembler
Definition string.h:898
static const uint32_t kMaxCharsSize
Definition string.h:886
friend struct OffsetsForDebug
Definition string.h:896
friend class StringFromCharCodeAssembler
Definition string.h:901
friend class CodeStubAssembler
Definition string.h:897
friend class TorqueGeneratedSeqOneByteStringAsserts
Definition string.h:905
friend class IntlBuiltinsAssembler
Definition string.h:899
uint16_t Get(uint32_t index, const SharedStringAccessGuardIfNeeded &access_guard) const
static bool IsCompatibleMap(Tagged< Map > map, ReadOnlyRoots roots)
friend struct OffsetsForDebug
Definition string.h:968
void clear_padding_destructively(uint32_t length)
static V8_INLINE constexpr int32_t DataSizeFor(int32_t length)
static const uint32_t kMaxCharsSize
Definition string.h:958
friend class TorqueGeneratedSeqTwoByteStringAsserts
Definition string.h:977
DataAndPaddingSizes GetDataAndPaddingSizes() const
Definition string.cc:1978
friend class StringFromCharCodeAssembler
Definition string.h:973
void SeqTwoByteStringSet(uint32_t index, uint16_t value)
static const bool kHasOneByteEncoding
Definition string.h:927
static V8_INLINE constexpr int32_t SizeFor(int32_t length)
base::uc16 * GetChars(const DisallowGarbageCollection &no_gc)
friend struct OffsetsForDebug
Definition string.h:1135
Tagged< String > parent() const
V8_EXPORT_PRIVATE uint16_t Get(uint32_t index, const SharedStringAccessGuardIfNeeded &access_guard) const
Definition string.cc:2040
friend class SandboxTesting
Definition string.h:1138
TaggedMember< Smi > offset_
Definition string.h:1147
void set_parent(Tagged< String > parent, WriteBarrierMode mode=UPDATE_WRITE_BARRIER)
static const uint32_t kMinLength
Definition string.h:1130
TaggedMember< String > parent_
Definition string.h:1146
friend class TorqueGeneratedSlicedStringAsserts
Definition string.h:1142
void set_offset(int32_t offset)
V8_INLINE bool IsShared() const
Definition string-inl.h:200
V8_INLINE uint32_t encoding_tag() const
Definition string-inl.h:212
V8_INLINE bool IsThin() const
Definition string-inl.h:174
V8_INLINE bool IsExternalOneByte() const
Definition string-inl.h:238
V8_INLINE bool IsInternalized() const
Definition string-inl.h:163
V8_INLINE StringShape(const Tagged< String > s)
Definition string-inl.h:141
V8_INLINE bool IsIndirect() const
Definition string-inl.h:182
V8_INLINE uint32_t representation_encoding_and_shared_tag() const
Definition string-inl.h:220
V8_INLINE bool IsCons() const
Definition string-inl.h:170
V8_INLINE StringRepresentationTag representation_tag() const
Definition string-inl.h:207
V8_INLINE bool IsSliced() const
Definition string-inl.h:178
V8_INLINE bool IsExternalTwoByte() const
Definition string-inl.h:247
bool operator==(const StringShape &that) const
Definition string.h:91
V8_INLINE bool IsExternal() const
Definition string-inl.h:188
V8_INLINE bool IsSequentialOneByte() const
Definition string-inl.h:230
V8_INLINE uint32_t representation_and_encoding_tag() const
Definition string-inl.h:216
V8_INLINE bool IsDirect() const
Definition string-inl.h:186
V8_INLINE bool IsSequentialTwoByte() const
Definition string-inl.h:234
V8_INLINE bool IsUncachedExternal() const
Definition string-inl.h:196
V8_INLINE bool IsSequential() const
Definition string-inl.h:192
base::Vector< const uint8_t > ToOneByteVector() const
Definition string.h:139
static constexpr uint32_t kChecksumVerificationDisabled
Definition string.h:193
const uint8_t * onebyte_start
Definition string.h:186
FlatContent(const DisallowGarbageCollection &no_gc)
Definition string.h:182
bool UsesSameString(const FlatContent &other) const
Definition string.h:157
base::Vector< const base::uc16 > ToUC16Vector() const
Definition string.h:145
const DisallowGarbageCollection & no_gc_
Definition string.h:191
FlatContent(const uint8_t *start, uint32_t length, const DisallowGarbageCollection &no_gc)
Definition string-inl.h:890
const base::uc16 * twobyte_start
Definition string.h:187
base::uc16 Get(uint32_t i) const
Definition string.h:150
virtual int CaptureCount()=0
virtual ~Match()=default
virtual DirectHandle< String > GetMatch()=0
virtual MaybeDirectHandle< String > GetNamedCapture(DirectHandle< String > name, CaptureState *state)=0
virtual DirectHandle< String > GetSuffix()=0
virtual bool HasNamedCaptures()=0
virtual MaybeDirectHandle< String > GetCapture(int i, bool *capture_exists)=0
virtual DirectHandle< String > GetPrefix()=0
void StringShortPrint(StringStream *accumulator)
Definition string.cc:604
static V8_NOINLINE bool IsConsStringEqualToImpl(Tagged< ConsString > string, base::Vector< const Char > str, const SharedStringAccessGuardIfNeeded &access_guard)
static void WriteToFlat(Tagged< String > source, SinkCharT *sink, uint32_t start, uint32_t length)
Definition string.cc:772
V8_EXPORT_PRIVATE void PrintOn(FILE *out)
Definition string.cc:1910
static const uint32_t kMaxHashCalcLength
Definition string.h:515
void set_length(uint32_t hash)
Definition string-inl.h:133
friend class SharedStringTableInsertionKey
Definition string.h:689
friend struct OffsetsForDebug
Definition string.h:693
static const uint32_t kMaxLength
Definition string.h:511
static V8_INLINE HandleType< String > Flatten(Isolate *isolate, HandleType< T > string, AllocationType allocation=AllocationType::kYoung)
V8_EXPORT_PRIVATE uint32_t ComputeAndSetRawHash()
Definition string.cc:1827
static size_t WriteUtf8(Isolate *isolate, DirectHandle< String > string, char *buffer, size_t capacity, Utf8EncodingFlags flags, size_t *processed_characters_return=nullptr)
Definition string.cc:1113
static int32_t ToArrayIndex(Address addr)
Definition string.cc:648
static Tagged< Object > IndexOf(Isolate *isolate, DirectHandle< Object > receiver, DirectHandle< Object > search, DirectHandle< Object > position)
Definition string.cc:1426
const uint8_t * AddressOfCharacterAt(uint32_t start_index, const DisallowGarbageCollection &no_gc)
Definition string.cc:2206
static LineEndsVector CalculateLineEndsVector(IsolateT *isolate, DirectHandle< String > string, bool include_ending_line)
Definition string.cc:1164
V8_INLINE auto DispatchToSpecificType(TDispatcher &&dispatcher) const -> std::common_type_t< decltype(dispatcher(Tagged< SeqOneByteString >{})), decltype(dispatcher(Tagged< SeqTwoByteString >{})), decltype(dispatcher(Tagged< ExternalOneByteString >{})), decltype(dispatcher(Tagged< ExternalTwoByteString >{})), decltype(dispatcher(Tagged< ThinString >{})), decltype(dispatcher(Tagged< ConsString >{})), decltype(dispatcher(Tagged< SlicedString >{}))>
Definition string-inl.h:295
static uint32_t constexpr kInlineLineEndsSize
Definition string.h:641
V8_INLINE uint16_t GetImpl(uint32_t index, const SharedStringAccessGuardIfNeeded &access_guard) const
Definition string-inl.h:984
static bool IsAscii(const char *chars, uint32_t length)
Definition string.h:575
static const uint32_t kMaxOneByteCharCodeU
Definition string.h:501
bool IsTwoByteRepresentation() const
Definition string-inl.h:368
static bool IsAscii(const uint8_t *chars, uint32_t length)
Definition string.h:579
V8_EXPORT_PRIVATE static V8_INLINE std::optional< FlatContent > TryGetFlatContentFromDirectString(const DisallowGarbageCollection &no_gc, Tagged< String > string, uint32_t offset, uint32_t length, const SharedStringAccessGuardIfNeeded &)
Definition string-inl.h:855
static auto DispatchToSpecificTypeWithoutCast(InstanceType instance_type, TArgs &&... args)
Definition string-inl.h:257
static bool IsIdentifier(Isolate *isolate, DirectHandle< String > str)
Definition string.cc:1789
static const int32_t kMaxOneByteCharCode
Definition string.h:500
static V8_WARN_UNUSED_RESULT ComparisonResult Compare(Isolate *isolate, DirectHandle< String > x, DirectHandle< String > y)
Definition string.cc:1353
void MakeExternalDuringGC(Isolate *isolate, T *resource)
Definition string.cc:289
static uint32_t NonOneByteStart(const base::uc16 *chars, uint32_t length)
Definition string.h:584
void MakeThin(IsolateT *isolate, Tagged< String > canonical)
Definition string.cc:134
V8_INLINE base::Vector< const Char > GetCharVector(const DisallowGarbageCollection &no_gc)
const char * PrefixForDebugPrint() const
Definition string.cc:568
V8_EXPORT_PRIVATE bool SlowAsArrayIndex(uint32_t *index)
Definition string.cc:1881
uint32_t length() const
Definition string-inl.h:127
static bool IsInPlaceInternalizable(Tagged< String > string)
static HandleType< String > Share(Isolate *isolate, HandleType< T > string)
Definition string-inl.h:946
V8_EXPORT_PRIVATE bool HasOneBytePrefix(base::Vector< const char > str)
Definition string.cc:1762
static bool IsOneByteRepresentationUnderneath(Tagged< String > string)
Definition string-inl.h:373
V8_EXPORT_PRIVATE bool IsOneByteEqualTo(base::Vector< const char > str)
Definition string-inl.h:681
V8_EXPORT_PRIVATE FlatContent SlowGetFlatContent(const DisallowGarbageCollection &no_gc, const SharedStringAccessGuardIfNeeded &)
V8_EXPORT_PRIVATE bool SlowAsIntegerIndex(size_t *index)
Definition string.cc:1895
static V8_WARN_UNUSED_RESULT MaybeDirectHandle< String > GetSubstitution(Isolate *isolate, Match *match, DirectHandle< String > replacement, uint32_t start_index=0)
Definition string.cc:1495
friend class SandboxTesting
Definition string.h:690
static const uint32_t kMaxUtf16CodeUnitU
Definition string.h:503
bool IsShared() const
static const base::uc32 kMaxCodePoint
Definition string.h:504
static const int kMaxUtf16CodeUnit
Definition string.h:502
static size_t Utf8Length(Isolate *isolate, DirectHandle< String > string)
V8_EXPORT_PRIVATE bool SlowEquals(Tagged< String > other) const
Definition string.cc:1219
V8_INLINE uint16_t Get(uint32_t index) const
Definition string-inl.h:964
bool IsOneByteRepresentation() const
Definition string-inl.h:364
static Handle< FixedArray > CalculateLineEnds(IsolateT *isolate, DirectHandle< String > string, bool include_ending_line)
Definition string.cc:1194
static V8_INLINE HandleType< String > Flatten(LocalIsolate *isolate, HandleType< T > string, AllocationType allocation=AllocationType::kYoung)
void Set(uint32_t index, uint16_t value)
Definition string-inl.h:992
static V8_EXPORT_PRIVATE HandleType< String > SlowFlatten(Isolate *isolate, HandleType< ConsString > cons, AllocationType allocation)
Definition string-inl.h:714
static HandleType< Number > ToNumber(Isolate *isolate, HandleType< String > subject)
Definition string.cc:661
V8_INLINE bool IsEqualToImpl(base::Vector< const Char > str, const SharedStringAccessGuardIfNeeded &access_guard) const
EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE) static HandleType< String > SlowShare(Isolate *isolate
const char * SuffixForDebugPrint() const
Definition string.cc:598
bool IsEqualTo(base::Vector< const Char > str, Isolate *isolate) const
Definition string-inl.h:554
bool AsArrayIndex(uint32_t *index)
static bool IsWellFormedUnicode(Isolate *isolate, DirectHandle< String > string)
static void WriteToFlat2(SinkCharT *dst, Tagged< ConsString > src, uint32_t src_index, uint32_t length, const SharedStringAccessGuardIfNeeded &aguard, const DisallowGarbageCollection &no_gc)
Definition string.cc:1074
Tagged< String > GetUnderlying() const
static bool IsOneByte(const base::uc16 *chars, uint32_t length)
Definition string.h:625
V8_EXPORT_PRIVATE bool MakeExternal(Isolate *isolate, v8::String::ExternalStringResource *resource)
bool IsFlat() const
friend class wasm::baseline::LiftoffCompiler
Definition string.h:699
friend class TorqueGeneratedStringAsserts
Definition string.h:700
bool Equals(Tagged< String > other) const
Definition string-inl.h:535
std::unique_ptr< char[]> ToCString(uint32_t offset, uint32_t length, size_t *length_output=nullptr)
Definition string.cc:711
static const uint32_t kMaxShortPrintLength
Definition string.h:518
const Char * GetDirectStringChars(const DisallowGarbageCollection &no_gc) const
Definition string-inl.h:686
static bool IsInPlaceInternalizableExcludingExternal(InstanceType instance_type)
void PrintUC16(std::ostream &os, int start=0, int end=-1)
Definition string.cc:621
V8_EXPORT_PRIVATE V8_INLINE FlatContent GetFlatContent(const DisallowGarbageCollection &no_gc)
Definition string-inl.h:885
bool SupportsExternalization(v8::String::Encoding)
Definition string.cc:529
V8_OBJECT_INNER_CLASS class v8::internal::String::FlatContent V8_OBJECT_INNER_CLASS_END
static Tagged< Object > LastIndexOf(Isolate *isolate, DirectHandle< Object > receiver, DirectHandle< Object > search, DirectHandle< Object > position)
Definition string.cc:1689
static Tagged< ConsString > VisitFlat(Visitor *visitor, Tagged< String > string, int offset=0)
bool AsIntegerIndex(size_t *index)
bool MarkForExternalizationDuringGC(Isolate *isolate, T *resource)
const DisallowGarbageCollection & no_gc_
Definition string.h:815
Tagged< String > string_
Definition string.h:812
V8_INLINE constexpr bool is_null() const
Definition tagged.h:502
Tagged< String > actual() const
friend struct OffsetsForDebug
Definition string.h:1082
TaggedMember< String > actual_
Definition string.h:1095
V8_EXPORT_PRIVATE uint16_t Get(uint32_t index, const SharedStringAccessGuardIfNeeded &access_guard) const
Definition string.cc:2035
friend class TorqueGeneratedThinStringAsserts
Definition string.h:1091
Tagged< HeapObject > unchecked_actual() const
void set_actual(Tagged< String > value, WriteBarrierMode mode=UPDATE_WRITE_BARRIER)
ExternalPointerMember< kExternalStringResourceTag > resource_
Definition string.h:1160
#define OBJECT_POINTER_ALIGN(value)
Definition globals.h:1783
int start
int end
base::Vector< const DirectHandle< Object > > args
Definition execution.cc:74
#define EXPORT_TEMPLATE_DECLARE(export)
int32_t offset
TNode< Object > receiver
int x
int position
Definition liveedit.cc:290
const int length_
Definition mul-fft.cc:473
constexpr bool IsPowerOfTwo(T value)
Definition bits.h:187
uint32_t uc32
Definition strings.h:19
uint16_t uc16
Definition strings.h:18
V8_INLINE const Operation & Get(const Graph &graph, OpIndex index)
Definition graph.h:1231
@ UPDATE_WRITE_BARRIER
Definition objects.h:55
Tagged(T object) -> Tagged< T >
constexpr int kUIntptrSize
Definition globals.h:409
v8::internal::LoadHandler V8_OBJECT_END
constexpr uintptr_t kUintptrAllBitsSet
Definition v8-internal.h:94
const int kSmiMaxValue
constexpr int kMaxInt
Definition globals.h:374
Definition c-api.cc:87
#define V8_OBJECT_INNER_CLASS
#define DECL_VERIFIER(Name)
#define V8_OBJECT
#define DECL_PRINTER(Name)
#define EXPORT_DECL_VERIFIER(Name)
BytecodeSequenceNode * parent_
static constexpr int kMaxSize
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
#define V8_EXPORT_PRIVATE
Definition macros.h:460
constexpr bool IsAligned(T value, U alignment)
Definition macros.h:403
bool operator==(const DataAndPaddingSizes &other) const
Definition string.h:831
#define V8_INLINE
Definition v8config.h:500
#define V8_WARN_UNUSED_RESULT
Definition v8config.h:671
#define V8_NOINLINE
Definition v8config.h:586
wasm::ValueType type