v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
string.cc
Go to the documentation of this file.
1// Copyright 2019 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
6
10#include "src/common/globals.h"
14#include "src/heap/heap-inl.h"
22#include "src/objects/map.h"
23#include "src/objects/oddball.h"
32#include "src/utils/ostreams.h"
34
35namespace v8 {
36namespace internal {
37
38template <template <typename> typename HandleType>
39 requires(std::is_convertible_v<HandleType<String>, DirectHandle<String>>)
40HandleType<String> String::SlowShare(Isolate* isolate,
41 HandleType<String> source) {
42 DCHECK(v8_flags.shared_string_table);
43 HandleType<String> flat =
44 Flatten(isolate, source, AllocationType::kSharedOld);
45
46 // Do not recursively call Share, so directly compute the sharing strategy for
47 // the flat string, which could already be a copy or an existing string from
48 // e.g. a shortcut ConsString.
49 MaybeDirectHandle<Map> new_map;
50 switch (isolate->factory()->ComputeSharingStrategyForString(flat, &new_map)) {
52 break;
54 // A relaxed write is sufficient here, because at this point the string
55 // has not yet escaped the current thread.
57 flat->set_map_no_write_barrier(isolate, *new_map.ToHandleChecked());
58 return flat;
60 return flat;
61 }
62
63 uint32_t length = flat->length();
64 if (flat->IsOneByteRepresentation()) {
65 HandleType<SeqOneByteString> copy =
66 isolate->factory()->NewRawSharedOneByteString(length).ToHandleChecked();
68 WriteToFlat(*flat, copy->GetChars(no_gc), 0, length);
69 return copy;
70 }
71 HandleType<SeqTwoByteString> copy =
72 isolate->factory()->NewRawSharedTwoByteString(length).ToHandleChecked();
74 WriteToFlat(*flat, copy->GetChars(no_gc), 0, length);
75 return copy;
76}
77
78template V8_EXPORT_PRIVATE DirectHandle<String> String::SlowShare(
79 Isolate* isolate, DirectHandle<String> source);
80template V8_EXPORT_PRIVATE IndirectHandle<String> String::SlowShare(
81 Isolate* isolate, IndirectHandle<String> source);
82
83namespace {
84
85template <class StringClass>
86void MigrateExternalStringResource(Isolate* isolate,
89 Address to_resource_address = to->resource_as_address();
90 if (to_resource_address == kNullAddress) {
91 Tagged<StringClass> cast_from = Cast<StringClass>(from);
92 // |to| is a just-created internalized copy of |from|. Migrate the resource.
93 to->SetResource(isolate, cast_from->resource());
94 // Zap |from|'s resource pointer to reflect the fact that |from| has
95 // relinquished ownership of its resource.
96 isolate->heap()->UpdateExternalString(
97 from, Cast<ExternalString>(from)->ExternalPayloadSize(), 0);
98 cast_from->SetResource(isolate, nullptr);
99 } else if (to_resource_address != from->resource_as_address()) {
100 // |to| already existed and has its own resource. Finalize |from|.
101 isolate->heap()->FinalizeExternalString(from);
102 }
103}
104
105void MigrateExternalString(Isolate* isolate, Tagged<String> string,
106 Tagged<String> internalized) {
107 if (IsExternalOneByteString(internalized)) {
108 MigrateExternalStringResource(isolate, Cast<ExternalString>(string),
109 Cast<ExternalOneByteString>(internalized));
110 } else if (IsExternalTwoByteString(internalized)) {
111 MigrateExternalStringResource(isolate, Cast<ExternalString>(string),
112 Cast<ExternalTwoByteString>(internalized));
113 } else {
114 // If the external string is duped into an existing non-external
115 // internalized string, free its resource (it's about to be rewritten
116 // into a ThinString below).
117 isolate->heap()->FinalizeExternalString(string);
118 }
119}
120
121} // namespace
122
124 Tagged<Map> new_map, Isolate* isolate) {
125 resource_.Init(address(), isolate, kNullAddress);
126 bool is_uncached = (new_map->instance_type() & kUncachedExternalStringMask) ==
128 if (!is_uncached) {
130 }
131}
132
133template <typename IsolateT>
134void String::MakeThin(IsolateT* isolate, Tagged<String> internalized) {
136 DCHECK_NE(this, internalized);
137 DCHECK(IsInternalizedString(internalized));
138
139 Tagged<Map> initial_map = map(kAcquireLoad);
140 StringShape initial_shape(initial_map);
141
142 DCHECK(!initial_shape.IsThin());
143
144#ifdef DEBUG
145 // Check that shared strings can only transition to ThinStrings on the main
146 // thread when no other thread is active.
147 // The exception is during serialization, as no strings have escaped the
148 // thread yet.
149 if (initial_shape.IsShared() && !isolate->has_active_deserializer()) {
150 isolate->AsIsolate()->global_safepoint()->AssertActive();
151 }
152#endif
153
154 bool may_contain_recorded_slots = initial_shape.IsIndirect();
155 int old_size = SizeFromMap(initial_map);
156 ReadOnlyRoots roots(isolate);
157 Tagged<Map> target_map = internalized->IsOneByteRepresentation()
158 ? roots.thin_one_byte_string_map()
159 : roots.thin_two_byte_string_map();
160 if (initial_shape.IsExternal()) {
161 // Notify GC about the layout change before the transition to avoid
162 // concurrent marking from observing any in-between state (e.g.
163 // ExternalString map where the resource external pointer is overwritten
164 // with a tagged pointer).
165 // ExternalString -> ThinString transitions can only happen on the
166 // main-thread.
167 isolate->AsIsolate()->heap()->NotifyObjectLayoutChange(
170 MigrateExternalString(isolate->AsIsolate(), this, internalized);
171 }
172
173 // Update actual first and then do release store on the map word. This ensures
174 // that the concurrent marker will read the pointer when visiting a
175 // ThinString.
177 thin->set_actual(internalized);
178
179 DCHECK_GE(old_size, sizeof(ThinString));
180 int size_delta = old_size - sizeof(ThinString);
181 if (size_delta != 0) {
182 if (!Heap::IsLargeObject(thin)) {
183 isolate->heap()->NotifyObjectSizeChange(
184 thin, old_size, sizeof(ThinString),
185 may_contain_recorded_slots ? ClearRecordedSlots::kYes
187 } else {
188 // We don't need special handling for the combination IsLargeObject &&
189 // may_contain_recorded_slots, because indirect strings never get that
190 // large.
191 DCHECK(!may_contain_recorded_slots);
192 }
193 }
194
195 if (initial_shape.IsExternal()) {
196 set_map(isolate, target_map, kReleaseStore);
197 } else {
198 set_map_safe_transition(isolate, target_map, kReleaseStore);
199 }
200}
201
203 Isolate* isolate, Tagged<String> internalized);
205 LocalIsolate* isolate, Tagged<String> internalized);
206
207template <typename T>
208bool String::MarkForExternalizationDuringGC(Isolate* isolate, T* resource) {
209 uint32_t raw_hash = raw_hash_field(kAcquireLoad);
210 if (IsExternalForwardingIndex(raw_hash)) return false;
211 if (IsInternalizedForwardingIndex(raw_hash)) {
212 const int forwarding_index = ForwardingIndexValueBits::decode(raw_hash);
213 if (!isolate->string_forwarding_table()->TryUpdateExternalResource(
214 forwarding_index, resource)) {
215 // The external resource was concurrently updated by another thread.
216 return false;
217 }
218 resource->Unaccount(reinterpret_cast<v8::Isolate*>(isolate));
219 raw_hash = Name::IsExternalForwardingIndexBit::update(raw_hash, true);
221 return true;
222 }
223 // We need to store the hash in the forwarding table, as all non-external
224 // shared strings are in-place internalizable. In case the string gets
225 // internalized, we have to ensure that we can get the hash from the
226 // forwarding table to satisfy the invariant that all internalized strings
227 // have a computed hash value.
228 if (!IsHashFieldComputed(raw_hash)) {
229 raw_hash = EnsureRawHash();
230 }
231 DCHECK(IsHashFieldComputed(raw_hash));
232 resource->Unaccount(reinterpret_cast<v8::Isolate*>(isolate));
233 int forwarding_index =
234 isolate->string_forwarding_table()->AddExternalResourceAndHash(
235 this, resource, raw_hash);
238
239 return true;
240}
241
242namespace {
243
244template <bool is_one_byte>
245Tagged<Map> ComputeExternalStringMap(Isolate* isolate, Tagged<String> string,
246 int size) {
247 ReadOnlyRoots roots(isolate);
248 StringShape shape(string, isolate);
249 const bool is_internalized = shape.IsInternalized();
250 const bool is_shared = shape.IsShared();
251 if constexpr (is_one_byte) {
252 if (size < static_cast<int>(sizeof(ExternalString))) {
253 if (is_internalized) {
254 return roots.uncached_external_internalized_one_byte_string_map();
255 } else {
256 return is_shared ? roots.shared_uncached_external_one_byte_string_map()
257 : roots.uncached_external_one_byte_string_map();
258 }
259 } else {
260 if (is_internalized) {
261 return roots.external_internalized_one_byte_string_map();
262 } else {
263 return is_shared ? roots.shared_external_one_byte_string_map()
264 : roots.external_one_byte_string_map();
265 }
266 }
267 } else {
268 if (size < static_cast<int>(sizeof(ExternalString))) {
269 if (is_internalized) {
270 return roots.uncached_external_internalized_two_byte_string_map();
271 } else {
272 return is_shared ? roots.shared_uncached_external_two_byte_string_map()
273 : roots.uncached_external_two_byte_string_map();
274 }
275 } else {
276 if (is_internalized) {
277 return roots.external_internalized_two_byte_string_map();
278 } else {
279 return is_shared ? roots.shared_external_two_byte_string_map()
280 : roots.external_two_byte_string_map();
281 }
282 }
283 }
284}
285
286} // namespace
287
288template <typename T>
289void String::MakeExternalDuringGC(Isolate* isolate, T* resource) {
290 isolate->heap()->safepoint()->AssertActive();
291 DCHECK_NE(isolate->heap()->gc_state(), Heap::NOT_IN_GC);
292
293 constexpr bool is_one_byte =
294 std::is_base_of_v<v8::String::ExternalOneByteStringResource, T>;
295 int size = this->Size(); // Byte size of the original string.
296 DCHECK_GE(size, sizeof(UncachedExternalString));
297
298 // Morph the string to an external string by replacing the map and
299 // reinitializing the fields. This won't work if the space the existing
300 // string occupies is too small for a regular external string. Instead, we
301 // resort to an uncached external string instead, omitting the field caching
302 // the address of the backing store. When we encounter uncached external
303 // strings in generated code, we need to bailout to runtime.
304 Tagged<Map> new_map =
305 ComputeExternalStringMap<is_one_byte>(isolate, this, size);
306
307 // Byte size of the external String object.
308 int new_size = this->SizeFromMap(new_map);
309
310 // Shared strings are never indirect.
311 DCHECK(!StringShape(this).IsIndirect());
312
313 if (!isolate->heap()->IsLargeObject(this)) {
314 isolate->heap()->NotifyObjectSizeChange(this, size, new_size,
316 }
317
318 // The external pointer slots must be initialized before the new map is
319 // installed. Otherwise, a GC marking thread may see the new map before the
320 // slots are initialized and attempt to mark the (invalid) external pointers
321 // table entries as alive.
322 static_cast<ExternalString*>(this)
323 ->InitExternalPointerFieldsDuringExternalization(new_map, isolate);
324
325 // We are storing the new map using release store after creating a filler in
326 // the NotifyObjectSizeChange call for the left-over space to avoid races with
327 // the sweeper thread.
328 this->set_map(isolate, new_map, kReleaseStore);
329
330 if constexpr (is_one_byte) {
332 self->SetResource(isolate, resource);
333 } else {
335 self->SetResource(isolate, resource);
336 }
337 isolate->heap()->RegisterExternalString(this);
338}
339
345
346bool String::MakeExternal(Isolate* isolate,
348 // Disallow garbage collection to avoid possible GC vs string access deadlock.
350
351 // Externalizing twice leaks the external resource, so it's
352 // prohibited by the API.
353 DCHECK(
355 DCHECK(resource->IsCacheable());
356#ifdef ENABLE_SLOW_DCHECKS
357 if (v8_flags.enable_slow_asserts) {
358 // Assert that the resource and the string are equivalent.
359 DCHECK(static_cast<size_t>(this->length()) == resource->length());
360 base::ScopedVector<base::uc16> smart_chars(this->length());
361 String::WriteToFlat(this, smart_chars.begin(), 0, this->length());
362 DCHECK_EQ(0, memcmp(smart_chars.begin(), resource->data(),
363 resource->length() * sizeof(smart_chars[0])));
364 }
365#endif // DEBUG
366 int size = this->Size(); // Byte size of the original string.
367 // Abort if size does not allow in-place conversion.
368 if (size < static_cast<int>(sizeof(UncachedExternalString))) return false;
369 // Read-only strings cannot be made external, since that would mutate the
370 // string.
371 if (HeapLayout::InReadOnlySpace(this)) return false;
372 if (IsShared()) {
373 return MarkForExternalizationDuringGC(isolate, resource);
374 }
375 // For strings in the shared space we need the shared space isolate instead of
376 // the current isolate.
378 resource->Unaccount(reinterpret_cast<v8::Isolate*>(isolate));
379 isolate = isolate->shared_space_isolate();
380 }
381 bool is_internalized = IsInternalizedString(this);
382 bool has_pointers = StringShape(this).IsIndirect();
383
384 base::MutexGuardIf mutex_guard(isolate->internalized_string_access(),
385 is_internalized);
386 // Morph the string to an external string by replacing the map and
387 // reinitializing the fields. This won't work if the space the existing
388 // string occupies is too small for a regular external string. Instead, we
389 // resort to an uncached external string instead, omitting the field caching
390 // the address of the backing store. When we encounter uncached external
391 // strings in generated code, we need to bailout to runtime.
392 constexpr bool is_one_byte = false;
393 Tagged<Map> new_map =
394 ComputeExternalStringMap<is_one_byte>(isolate, this, size);
395
396 // Byte size of the external String object.
397 int new_size = this->SizeFromMap(new_map);
398
399 if (has_pointers) {
400 isolate->heap()->NotifyObjectLayoutChange(
403 }
404
405 if (!isolate->heap()->IsLargeObject(this)) {
406 isolate->heap()->NotifyObjectSizeChange(
407 this, size, new_size,
409 } else {
410 // We don't need special handling for the combination IsLargeObject &&
411 // has_pointers, because indirect strings never get that large.
412 DCHECK(!has_pointers);
413 }
414
415 // The external pointer slots must be initialized before the new map is
416 // installed. Otherwise, a GC marking thread may see the new map before the
417 // slots are initialized and attempt to mark the (invalid) external pointers
418 // table entries as alive.
419 static_cast<ExternalString*>(this)
420 ->InitExternalPointerFieldsDuringExternalization(new_map, isolate);
421
422 // We are storing the new map using release store after creating a filler in
423 // the NotifyObjectSizeChange call for the left-over space to avoid races with
424 // the sweeper thread.
425 this->set_map(isolate, new_map, kReleaseStore);
426
428 self->SetResource(isolate, resource);
429 isolate->heap()->RegisterExternalString(this);
430 // Force regeneration of the hash value.
431 if (is_internalized) self->EnsureHash();
432 return true;
433}
434
437 // Disallow garbage collection to avoid possible GC vs string access deadlock.
439
440 // Externalizing twice leaks the external resource, so it's
441 // prohibited by the API.
442 DCHECK(
444 DCHECK(resource->IsCacheable());
445#ifdef ENABLE_SLOW_DCHECKS
446 if (v8_flags.enable_slow_asserts) {
447 // Assert that the resource and the string are equivalent.
448 DCHECK(static_cast<size_t>(this->length()) == resource->length());
449 if (this->IsTwoByteRepresentation()) {
450 base::ScopedVector<uint16_t> smart_chars(this->length());
451 String::WriteToFlat(this, smart_chars.begin(), 0, this->length());
452 DCHECK(String::IsOneByte(smart_chars.begin(), this->length()));
453 }
454 base::ScopedVector<char> smart_chars(this->length());
455 String::WriteToFlat(this, smart_chars.begin(), 0, this->length());
456 DCHECK_EQ(0, memcmp(smart_chars.begin(), resource->data(),
457 resource->length() * sizeof(smart_chars[0])));
458 }
459#endif // DEBUG
460 int size = this->Size(); // Byte size of the original string.
461 // Abort if size does not allow in-place conversion.
462 if (size < static_cast<int>(sizeof(UncachedExternalString))) return false;
463 // Read-only strings cannot be made external, since that would mutate the
464 // string.
465 if (HeapLayout::InReadOnlySpace(this)) return false;
466 if (IsShared()) {
467 return MarkForExternalizationDuringGC(isolate, resource);
468 }
469 // For strings in the shared space we need the shared space isolate instead of
470 // the current isolate.
472 resource->Unaccount(reinterpret_cast<v8::Isolate*>(isolate));
473 isolate = isolate->shared_space_isolate();
474 }
475 bool is_internalized = IsInternalizedString(this);
476 bool has_pointers = StringShape(this).IsIndirect();
477
478 base::MutexGuardIf mutex_guard(isolate->internalized_string_access(),
479 is_internalized);
480 // Morph the string to an external string by replacing the map and
481 // reinitializing the fields. This won't work if the space the existing
482 // string occupies is too small for a regular external string. Instead, we
483 // resort to an uncached external string instead, omitting the field caching
484 // the address of the backing store. When we encounter uncached external
485 // strings in generated code, we need to bailout to runtime.
486 constexpr bool is_one_byte = true;
487 Tagged<Map> new_map =
488 ComputeExternalStringMap<is_one_byte>(isolate, this, size);
489
490 if (!isolate->heap()->IsLargeObject(this)) {
491 // Byte size of the external String object.
492 int new_size = this->SizeFromMap(new_map);
493
494 if (has_pointers) {
496 isolate->heap()->NotifyObjectLayoutChange(
499 }
500 isolate->heap()->NotifyObjectSizeChange(
501 this, size, new_size,
503 } else {
504 // We don't need special handling for the combination IsLargeObject &&
505 // has_pointers, because indirect strings never get that large.
506 DCHECK(!has_pointers);
507 }
508
509 // The external pointer slots must be initialized before the new map is
510 // installed. Otherwise, a GC marking thread may see the new map before the
511 // slots are initialized and attempt to mark the (invalid) external pointers
512 // table entries as alive.
513 static_cast<ExternalString*>(this)
514 ->InitExternalPointerFieldsDuringExternalization(new_map, isolate);
515
516 // We are storing the new map using release store after creating a filler in
517 // the NotifyObjectSizeChange call for the left-over space to avoid races with
518 // the sweeper thread.
519 this->set_map(isolate, new_map, kReleaseStore);
520
522 self->SetResource(isolate, resource);
523 isolate->heap()->RegisterExternalString(this);
524 // Force regeneration of the hash value.
525 if (is_internalized) self->EnsureHash();
526 return true;
527}
528
530 if (IsThinString(this)) {
531 return i::Cast<i::ThinString>(this)->actual()->SupportsExternalization(
532 encoding);
533 }
534
535 // RO_SPACE strings cannot be externalized.
536 if (HeapLayout::InReadOnlySpace(this)) {
537 return false;
538 }
539
540#if V8_COMPRESS_POINTERS && !V8_ENABLE_SANDBOX
541 // In this configuration, small strings may not be in-place externalizable.
542 if (this->Size() < static_cast<int>(sizeof(UncachedExternalString))) {
543 return false;
544 }
545#else
546 DCHECK_LE(sizeof(UncachedExternalString), this->Size());
547#endif
548
549 StringShape shape(this);
550
551 // Already an external string.
552 if (shape.IsExternal()) {
553 return false;
554 }
555
556 // Only strings in old space can be externalized.
558 return false;
559 }
560
561 // Encoding changes are not supported.
562 static_assert(kStringEncodingMask == 1 << 3);
563 static_assert(v8::String::Encoding::ONE_BYTE_ENCODING == 1 << 3);
564 static_assert(v8::String::Encoding::TWO_BYTE_ENCODING == 0);
565 return shape.encoding_tag() == static_cast<uint32_t>(encoding);
566}
567
568const char* String::PrefixForDebugPrint() const {
569 StringShape shape(this);
571 if (shape.IsInternalized()) {
572 return "u#";
573 } else if (shape.IsCons()) {
574 return "uc\"";
575 } else if (shape.IsThin()) {
576 return "u>\"";
577 } else if (shape.IsExternal()) {
578 return "ue\"";
579 } else {
580 return "u\"";
581 }
582 } else {
583 if (shape.IsInternalized()) {
584 return "#";
585 } else if (shape.IsCons()) {
586 return "c\"";
587 } else if (shape.IsThin()) {
588 return ">\"";
589 } else if (shape.IsExternal()) {
590 return "e\"";
591 } else {
592 return "\"";
593 }
594 }
595 UNREACHABLE();
596}
597
598const char* String::SuffixForDebugPrint() const {
599 StringShape shape(this);
600 if (shape.IsInternalized()) return "";
601 return "\"";
602}
603
605 const uint32_t len = length();
606 accumulator->Add("<String[%u]: ", len);
607 accumulator->Add(PrefixForDebugPrint());
608
609 if (len > kMaxShortPrintLength) {
610 accumulator->Add("...<truncated>>");
611 accumulator->Add(SuffixForDebugPrint());
612 accumulator->Put('>');
613 return;
614 }
615
616 PrintUC16(accumulator, 0, len);
617 accumulator->Add(SuffixForDebugPrint());
618 accumulator->Put('>');
619}
620
621void String::PrintUC16(std::ostream& os, int start, int end) {
622 if (end < 0) end = length();
623 StringCharacterStream stream(this, start);
624 for (int i = start; i < end && stream.HasMore(); i++) {
625 os << AsUC16(stream.GetNext());
626 }
627}
628
629void String::PrintUC16(StringStream* accumulator, int start, int end) {
630 if (end < 0) end = length();
631 StringCharacterStream stream(this, start);
632 for (int i = start; i < end && stream.HasMore(); i++) {
633 uint16_t c = stream.GetNext();
634 if (c == '\n') {
635 accumulator->Add("\\n");
636 } else if (c == '\r') {
637 accumulator->Add("\\r");
638 } else if (c == '\\') {
639 accumulator->Add("\\\\");
640 } else if (!std::isprint(c)) {
641 accumulator->Add("\\x%02x", c);
642 } else {
643 accumulator->Put(static_cast<char>(c));
644 }
645 }
646}
647
650 Tagged<String> key(addr);
651
652 uint32_t index;
653 if (!key->AsArrayIndex(&index)) return -1;
654 if (index <= INT_MAX) return index;
655 return -1;
656}
657
658// static
659template <template <typename> typename HandleType>
660 requires(std::is_convertible_v<HandleType<String>, DirectHandle<String>>)
661HandleType<Number> String::ToNumber(Isolate* isolate,
662 HandleType<String> subject) {
663 return isolate->factory()->NewNumber(
664 StringToDouble(isolate, subject, ALLOW_NON_DECIMAL_PREFIX));
665}
666
669 DirectHandle<String> subject);
672 IndirectHandle<String> subject);
673
675 const DisallowGarbageCollection& no_gc,
676 const SharedStringAccessGuardIfNeeded& access_guard) {
677 USE(no_gc);
678 Tagged<String> string = this;
679 StringShape shape(string);
680 uint32_t offset = 0;
681
682 // Extract cons- and sliced strings.
683 if (shape.IsCons()) {
685 if (!cons->IsFlat()) return FlatContent(no_gc);
686 string = cons->first();
687 shape = StringShape(string);
688 } else if (shape.IsSliced()) {
690 offset = slice->offset();
691 string = slice->parent();
692 shape = StringShape(string);
693 }
694
695 DCHECK(!shape.IsCons());
696 DCHECK(!shape.IsSliced());
697
698 // Extract thin strings.
699 if (shape.IsThin()) {
701 string = thin->actual();
702 shape = StringShape(string);
703 }
704
705 DCHECK(shape.IsDirect());
706 return TryGetFlatContentFromDirectString(no_gc, string, offset, length(),
707 access_guard)
708 .value();
709}
710
711std::unique_ptr<char[]> String::ToCString(uint32_t offset, uint32_t length,
712 size_t* length_return) {
713 DCHECK_LE(length, this->length());
714 DCHECK_LE(offset, this->length() - length);
715
716 StringCharacterStream stream(this, offset);
717
718 // First, compute the required size of the output buffer.
719 size_t utf8_bytes = 0;
720 uint32_t remaining_chars = length;
722 while (stream.HasMore() && remaining_chars-- != 0) {
723 uint16_t character = stream.GetNext();
724 utf8_bytes += unibrow::Utf8::Length(character, last);
725 last = character;
726 }
727 if (length_return) {
728 *length_return = utf8_bytes;
729 }
730
731 // Second, allocate the output buffer.
732 size_t capacity = utf8_bytes + 1;
733 char* result = NewArray<char>(capacity);
734
735 // Third, encode the string into the output buffer.
736 stream.Reset(this, offset);
737 size_t pos = 0;
738 remaining_chars = length;
740 while (stream.HasMore() && remaining_chars-- != 0) {
741 uint16_t character = stream.GetNext();
742 if (character == 0) {
743 character = ' ';
744 }
745
746 // Ensure that there's sufficient space for this character and the null
747 // terminator. This should normally always be the case, unless there is
748 // in-sandbox memory corruption.
749 // Alternatively, we could also over-allocate the output buffer by three
750 // bytes (the maximum we can write OOB) or consider allocating it inside
751 // the sandbox, but it's not clear if that would be worth the effort as the
752 // performance overhead of this check appears to be negligible in practice.
753 SBXCHECK_LE(unibrow::Utf8::Length(character, last) + 1, capacity - pos);
754
756
757 last = character;
758 }
759
760 DCHECK_LT(pos, capacity);
761 result[pos++] = 0;
762
763 return std::unique_ptr<char[]>(result);
764}
765
766std::unique_ptr<char[]> String::ToCString(size_t* length_return) {
767 return ToCString(0, length(), length_return);
768}
769
770// static
771template <typename SinkCharT>
772void String::WriteToFlat(Tagged<String> source, SinkCharT* sink, uint32_t start,
773 uint32_t length) {
775 return WriteToFlat(source, sink, start, length,
777}
778// static
779template <typename SinkCharT>
780void String::WriteToFlat(Tagged<String> source, SinkCharT* sink, uint32_t start,
781 uint32_t length,
782 const SharedStringAccessGuardIfNeeded& access_guard) {
784 if (length == 0) return;
785 while (true) {
786 DCHECK_GT(length, 0);
787 DCHECK_LE(length, source->length());
788 DCHECK_LT(start, source->length());
789 DCHECK_LE(start + length, source->length());
790
791 if (source->DispatchToSpecificType(base::overloaded{
792 [&](Tagged<SeqOneByteString> str) {
793 CopyChars(sink, str->GetChars(no_gc, access_guard) + start,
794 length);
795 return true;
796 },
797 [&](Tagged<SeqTwoByteString> str) {
798 CopyChars(sink, str->GetChars(no_gc, access_guard) + start,
799 length);
800 return true;
801 },
803 CopyChars(sink, str->GetChars() + start, length);
804 return true;
805 },
807 CopyChars(sink, str->GetChars() + start, length);
808 return true;
809 },
810 [&](Tagged<ConsString> cons_string) {
811 Tagged<String> first = cons_string->first();
812 uint32_t boundary = first->length();
813 // Here we explicitly use signed ints as the values can become
814 // negative. The sum of {first_length} and {second_length} is
815 // always {length}, but the values can become negative, in which
816 // case no characters of the respective string are needed.
817 int32_t first_length = boundary - start;
818 int32_t second_length = length - first_length;
819 DCHECK_EQ(static_cast<uint32_t>(first_length + second_length),
820 length);
821 if (second_length >= first_length) {
822 DCHECK_GT(second_length, 0);
823 // Right hand side is longer. Recurse over left.
824 if (first_length > 0) {
825 DCHECK_LT(first_length, length);
826 DCHECK_LT(second_length, length);
827
828 WriteToFlat(first, sink, start, first_length, access_guard);
829 if (start == 0 && cons_string->second() == first) {
830 DCHECK_LE(boundary * 2, length);
831 CopyChars(sink + boundary, sink, boundary);
832 return true;
833 }
834 sink += first_length;
835 start = 0;
836 length -= first_length;
837 } else {
838 start -= boundary;
839 }
840 source = cons_string->second();
841 } else {
842 DCHECK_GT(first_length, 0);
843 // Left hand side is longer. Recurse over right.
844 if (second_length > 0) {
845 DCHECK_LT(first_length, length);
846 DCHECK_LT(second_length, length);
847
848 uint32_t second_start = first_length;
849 DCHECK_EQ(second_start + second_length, length);
850 Tagged<String> second = cons_string->second();
851 // When repeatedly appending to a string, we get a cons string
852 // that is unbalanced to the left, a list, essentially. We
853 // inline the common case of sequential one-byte right child.
854 if (second_length == 1) {
855 sink[second_start] =
856 static_cast<SinkCharT>(second->Get(0, access_guard));
857 } else if (IsSeqOneByteString(second)) {
858 CopyChars(sink + second_start,
859 Cast<SeqOneByteString>(second)->GetChars(
860 no_gc, access_guard),
861 second_length);
862 } else {
863 WriteToFlat(second, sink + second_start, 0, second_length,
864 access_guard);
865 }
866 length -= second_length;
867 }
868 source = first;
869 }
870 return length == 0;
871 },
872 [&](Tagged<SlicedString> slice) {
873 uint32_t offset = slice->offset();
874 source = slice->parent();
875 start += offset;
876 return false;
877 },
878 [&](Tagged<ThinString> thin_string) {
879 source = thin_string->actual();
880 return false;
881 }})) {
882 return;
883 }
884 }
885 UNREACHABLE();
886}
887
888namespace {
889
890template <typename SinkCharT>
891SinkCharT* WriteNonConsToFlat2(Tagged<String> src, StringShape shape,
892 SinkCharT* dst, uint32_t src_index,
893 uint32_t length,
894 const SharedStringAccessGuardIfNeeded& aguard,
895 const DisallowGarbageCollection& no_gc) {
896 DCHECK(!shape.IsCons());
897 DCHECK_LE(src_index + length, src->length());
898 DCHECK_EQ(shape, StringShape{src});
899
900 switch (shape.representation_and_encoding_tag()) {
902 auto s = Cast<SeqOneByteString>(src);
903 CopyChars(dst, s->GetChars(no_gc, aguard) + src_index, length);
904 return dst + length;
905 }
907 auto s = Cast<SeqTwoByteString>(src);
908 CopyChars(dst, s->GetChars(no_gc, aguard) + src_index, length);
909 return dst + length;
910 }
912 auto s = Cast<ExternalOneByteString>(src);
913 CopyChars(dst, s->GetChars() + src_index, length);
914 return dst + length;
915 }
917 auto s = Cast<ExternalTwoByteString>(src);
918 CopyChars(dst, s->GetChars() + src_index, length);
919 return dst + length;
920 }
923 auto s = Cast<SlicedString>(src);
924 Tagged<String> parent = s->parent();
925 return WriteNonConsToFlat2(parent, StringShape{parent}, dst,
926 src_index + s->offset(), length, aguard,
927 no_gc);
928 }
931 Tagged<String> actual = Cast<ThinString>(src)->actual();
932 return WriteNonConsToFlat2(actual, StringShape{actual}, dst, src_index,
933 length, aguard, no_gc);
934 }
937 UNREACHABLE();
938 }
939
940 UNREACHABLE();
941}
942
943enum WriteToFlatImplVariant {
944 kWTFSeqOneByte,
945 kWTFGeneric,
946};
947
948// A SmallVector-based stack with a cached top element. The cached top is vital
949// for arm64 performance. This would be more natural within a class, but sadly
950// arm64 performance regresses significantly if so, since that also causes the
951// cached top to be spilled onto the stack.
952using wtf_stack_t = base::SmallVector<Tagged<String>, 32>;
953using wtf_stack_top_t = Tagged<String>;
954
955V8_INLINE void wtf_push(wtf_stack_top_t& top, wtf_stack_t& stack,
956 Tagged<String> value) {
957 if (!top.is_null()) stack.push_back(top);
958 top = value;
959}
960
961V8_INLINE bool wtf_try_pop(wtf_stack_top_t& top, wtf_stack_t& stack,
962 Tagged<String>* value) {
963 if (V8_LIKELY(!top.is_null())) {
964 *value = top;
965 top = {};
966 return true;
967 }
968 if (V8_LIKELY(!stack.empty())) {
969 *value = stack.back();
970 stack.pop_back();
971 return true;
972 }
973 return false;
974}
975
976// Omits repeated flattening of one string (based on pointer identity) by
977// remembering its first flattened position, and simply copying that region
978// when encountering it again.
979template <typename SinkCharT>
980class WriteToFlat_RepeatOptimizer final {
981 public:
982 V8_INLINE void RecordFirstOccurrence(Tagged<String> s,
983 const SinkCharT* position) {
984 enabled_ = true;
985 auto it = first_occurrence_.find(s.ptr());
986 if (it == first_occurrence_.end()) {
987 first_occurrence_.insert({s.ptr(), position});
988 }
989 }
990
991 V8_INLINE bool TryApply(Tagged<String> s, SinkCharT** current_position) {
992 if (V8_UNLIKELY(enabled_)) {
993 auto it = first_occurrence_.find(s.ptr());
994 if (it != first_occurrence_.end()) {
995 const SinkCharT* previous_position = it->second;
996 if (*current_position != previous_position) {
997 uint32_t length = s->length();
998 DCHECK_LE(*current_position, previous_position - length);
999 previous_position -= length;
1000 (*current_position) -= length;
1001 CopyChars(*current_position, previous_position, length);
1002 return true;
1003 }
1004 }
1005 }
1006 return false;
1007 }
1008
1009 V8_INLINE bool enabled() const { return enabled_; }
1010
1011 private:
1012 // Only enable once we've seen a candidate, to reduce overhead.
1013 bool enabled_ = false;
1014 // Maps a Tagged<String>::ptr() to its first flattened occurrence.
1015 std::unordered_map<Address, const SinkCharT*> first_occurrence_;
1016};
1017
1018template <WriteToFlatImplVariant kVariant, typename SinkCharT>
1019V8_INLINE void WriteToFlat2Impl(SinkCharT*& rdst, wtf_stack_top_t& top,
1020 wtf_stack_t& stack,
1021 WriteToFlat_RepeatOptimizer<SinkCharT>& ropt,
1022 const SharedStringAccessGuardIfNeeded& aguard,
1023 const DisallowGarbageCollection& no_gc) {
1024 Tagged<String> s;
1025 while (V8_LIKELY(wtf_try_pop(top, stack, &s))) {
1026 StringShape shape{s};
1027
1028 if constexpr (kVariant == kWTFGeneric) {
1029 if (V8_UNLIKELY(ropt.TryApply(s, &rdst))) continue;
1030 }
1031
1032 // Descend into the rightmost leaf and push left branches onto the stack.
1033 //
1034 // Alternatively, we could always flatten the shorter side first, where
1035 // substring length is used as a heuristic for substring tree depth, in
1036 // order to minimize stack size. That approach has different trade-offs,
1037 // for example: the stack would have to store both the string and the
1038 // current `rdst` value, and the write sequence may be less cache-friendly.
1039 while (shape.IsCons()) {
1040 auto cons = Cast<ConsString>(s);
1041 auto first = cons->first();
1042 wtf_push(top, stack, first);
1043 s = cons->second();
1044 if (V8_UNLIKELY(s == first)) {
1045 ropt.RecordFirstOccurrence(s, rdst);
1046 }
1047 shape = StringShape{s};
1048 }
1049
1050 if constexpr (kVariant == kWTFSeqOneByte) {
1051 if (!shape.IsSequentialOneByte() || V8_UNLIKELY(ropt.enabled())) {
1052 // Exit the specialized variant. Note the caller MUST follow up with
1053 // the kGeneric variant.
1054 wtf_push(top, stack, s);
1055 return;
1056 }
1057 uint8_t* chars = Cast<SeqOneByteString>(s)->GetChars(no_gc, aguard);
1058 uint32_t length = s->length();
1059 rdst -= length;
1060 CopyChars(rdst, chars, length);
1061 } else {
1062 static_assert(kVariant == kWTFGeneric);
1063 uint32_t length = s->length();
1064 rdst -= length;
1065 WriteNonConsToFlat2(s, shape, rdst, 0, length, aguard, no_gc);
1066 }
1067 }
1068}
1069
1070} // namespace
1071
1072// static
1073template <typename SinkCharT>
1074void String::WriteToFlat2(SinkCharT* dst, Tagged<ConsString> src,
1075 uint32_t src_index, uint32_t length,
1076 const SharedStringAccessGuardIfNeeded& aguard,
1077 const DisallowGarbageCollection& no_gc) {
1078 DCHECK_NE(length, 0);
1079 DCHECK(!src->IsFlat());
1080 DCHECK_LE(src_index + length, src->length());
1081
1082 // Limitations of the current implementation, which only supports flattening
1083 // the entire string.
1084 DCHECK_EQ(src_index, 0);
1085 DCHECK_EQ(length, src->length());
1086
1087 // The most common form of cons strings are degenerate unbalanced left-heavy
1088 // binary trees (i.e. where `second` is a flat string and `first` another
1089 // cons string). This form is created when building a string by appending
1090 // repeatedly: `str = "a" + "b" + ... + "z";
1091 //
1092 // To optimize for this, we flatten in reverse-DFS order, i.e. right-to-left.
1093 // This way, the stack never grows beyond size 1. Additionally, we elide the
1094 // stack push for the element that will immediately be processed next.
1095 // Finally, the iterative algorithm is split into two physically separate
1096 // loops - the first is optimized for cases when the cons tree contains only
1097 // sequential one-byte strings. The second handles all other cases
1098 // generically.
1099 //
1100 // Note this implementation is highly tuned. Please don't change anything
1101 // without watching benchmark scores.
1102
1103 SinkCharT* rdst = dst + length; // Reverse cursor.
1104 wtf_stack_t stack{src->first()};
1105 wtf_stack_top_t top = src->second();
1106 WriteToFlat_RepeatOptimizer<SinkCharT> ropt;
1107
1108 WriteToFlat2Impl<kWTFSeqOneByte>(rdst, top, stack, ropt, aguard, no_gc);
1109 WriteToFlat2Impl<kWTFGeneric>(rdst, top, stack, ropt, aguard, no_gc);
1110}
1111
1112// static
1113size_t String::WriteUtf8(Isolate* isolate, DirectHandle<String> string,
1114 char* buffer, size_t capacity, Utf8EncodingFlags flags,
1115 size_t* processed_characters_return) {
1116 DCHECK_IMPLIES(flags & Utf8EncodingFlag::kNullTerminate, capacity > 0);
1117 DCHECK_IMPLIES(capacity > 0, buffer != nullptr);
1118
1119 string = Flatten(isolate, string);
1120
1122 FlatContent content = string->GetFlatContent(no_gc);
1123 DCHECK(content.IsFlat());
1124
1125 auto encoding_result = content.IsOneByte()
1127 content.ToOneByteVector(), buffer, capacity,
1128 flags & Utf8EncodingFlag::kNullTerminate,
1129 flags & Utf8EncodingFlag::kReplaceInvalid)
1131 content.ToUC16Vector(), buffer, capacity,
1132 flags & Utf8EncodingFlag::kNullTerminate,
1133 flags & Utf8EncodingFlag::kReplaceInvalid);
1134
1135 if (processed_characters_return != nullptr) {
1136 *processed_characters_return = encoding_result.characters_processed;
1137 }
1138
1139 return encoding_result.bytes_written;
1140}
1141
1142template <typename SourceChar>
1145 bool include_ending_line) {
1146 const int src_len = src.length();
1147 for (int i = 0; i < src_len - 1; i++) {
1148 SourceChar current = src[i];
1149 SourceChar next = src[i + 1];
1150 if (IsLineTerminatorSequence(current, next)) line_ends->push_back(i);
1151 }
1152
1153 if (src_len > 0 && IsLineTerminatorSequence(src[src_len - 1], 0)) {
1154 line_ends->push_back(src_len - 1);
1155 }
1156 if (include_ending_line) {
1157 // Include one character beyond the end of script. The rewriter uses that
1158 // position for the implicit return statement.
1159 line_ends->push_back(src_len);
1160 }
1161}
1162
1163template <typename IsolateT>
1164String::LineEndsVector String::CalculateLineEndsVector(
1165 IsolateT* isolate, DirectHandle<String> src, bool include_ending_line) {
1166 src = Flatten(isolate, src);
1167 // Rough estimate of line count based on a roughly estimated average
1168 // length of packed code. Most scripts have < 32 lines.
1169 int line_count_estimate = (src->length() >> 6) + 16;
1170 LineEndsVector line_ends;
1171 line_ends.reserve(line_count_estimate);
1172 {
1174 // Dispatch on type of strings.
1175 String::FlatContent content = src->GetFlatContent(no_gc);
1176 DCHECK(content.IsFlat());
1177 if (content.IsOneByte()) {
1178 CalculateLineEndsImpl(&line_ends, content.ToOneByteVector(),
1179 include_ending_line);
1180 } else {
1181 CalculateLineEndsImpl(&line_ends, content.ToUC16Vector(),
1182 include_ending_line);
1183 }
1184 }
1185 return line_ends;
1186}
1187
1188template String::LineEndsVector String::CalculateLineEndsVector(
1189 Isolate* isolate, DirectHandle<String> src, bool include_ending_line);
1190template String::LineEndsVector String::CalculateLineEndsVector(
1191 LocalIsolate* isolate, DirectHandle<String> src, bool include_ending_line);
1192
1193template <typename IsolateT>
1194Handle<FixedArray> String::CalculateLineEnds(IsolateT* isolate,
1196 bool include_ending_line) {
1197 LineEndsVector line_ends =
1198 CalculateLineEndsVector(isolate, src, include_ending_line);
1199 int line_count = static_cast<int>(line_ends.size());
1200 Handle<FixedArray> array =
1201 isolate->factory()->NewFixedArray(line_count, AllocationType::kOld);
1202 {
1204 Tagged<FixedArray> raw_array = *array;
1205 for (int i = 0; i < line_count; i++) {
1206 raw_array->set(i, Smi::FromInt(line_ends[i]));
1207 }
1208 }
1209 return array;
1210}
1211
1212template Handle<FixedArray> String::CalculateLineEnds(Isolate* isolate,
1214 bool include_ending_line);
1215template Handle<FixedArray> String::CalculateLineEnds(LocalIsolate* isolate,
1217 bool include_ending_line);
1218
1219bool String::SlowEquals(Tagged<String> other) const {
1220 DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(this));
1221 DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(other));
1222 return SlowEquals(other, SharedStringAccessGuardIfNeeded::NotNeeded());
1223}
1224
1225bool String::SlowEquals(
1226 Tagged<String> other,
1227 const SharedStringAccessGuardIfNeeded& access_guard) const {
1229 // Fast check: negative check with lengths.
1230 uint32_t len = length();
1231 if (len != other->length()) return false;
1232 if (len == 0) return true;
1233
1234 // Fast check: if at least one ThinString is involved, dereference it/them
1235 // and restart.
1236 if (IsThinString(this) || IsThinString(other)) {
1237 if (IsThinString(other)) other = Cast<ThinString>(other)->actual();
1238 if (IsThinString(this)) {
1239 return Cast<ThinString>(this)->actual()->Equals(other);
1240 } else {
1241 return this->Equals(other);
1242 }
1243 }
1244
1245 // Fast check: if hash code is computed for both strings
1246 // a fast negative check can be performed.
1247 uint32_t this_hash;
1248 uint32_t other_hash;
1249 if (TryGetHash(&this_hash) && other->TryGetHash(&other_hash)) {
1250#ifdef ENABLE_SLOW_DCHECKS
1251 if (v8_flags.enable_slow_asserts) {
1252 if (this_hash != other_hash) {
1253 bool found_difference = false;
1254 for (uint32_t i = 0; i < len; i++) {
1255 if (Get(i) != other->Get(i)) {
1256 found_difference = true;
1257 break;
1258 }
1259 }
1260 DCHECK(found_difference);
1261 }
1262 }
1263#endif
1264 if (this_hash != other_hash) return false;
1265 }
1266
1267 // We know the strings are both non-empty. Compare the first chars
1268 // before we try to flatten the strings.
1269 if (this->Get(0, access_guard) != other->Get(0, access_guard)) return false;
1270
1271 if (IsSeqOneByteString(this) && IsSeqOneByteString(other)) {
1272 const uint8_t* str1 =
1273 Cast<SeqOneByteString>(this)->GetChars(no_gc, access_guard);
1274 const uint8_t* str2 =
1275 Cast<SeqOneByteString>(other)->GetChars(no_gc, access_guard);
1276 return CompareCharsEqual(str1, str2, len);
1277 }
1278
1279 StringComparator comparator;
1280 return comparator.Equals(this, other, access_guard);
1281}
1282
1283// static
1284bool String::SlowEquals(Isolate* isolate, DirectHandle<String> one,
1286 // Fast check: negative check with lengths.
1287 const uint32_t one_length = one->length();
1288 if (one_length != two->length()) return false;
1289 if (one_length == 0) return true;
1290
1291 // Fast check: if at least one ThinString is involved, dereference it/them
1292 // and restart.
1293 if (IsThinString(*one) || IsThinString(*two)) {
1294 if (IsThinString(*one)) {
1295 one = direct_handle(Cast<ThinString>(*one)->actual(), isolate);
1296 }
1297 if (IsThinString(*two)) {
1298 two = direct_handle(Cast<ThinString>(*two)->actual(), isolate);
1299 }
1300 return String::Equals(isolate, one, two);
1301 }
1302
1303 // Fast check: if hash code is computed for both strings
1304 // a fast negative check can be performed.
1305 uint32_t one_hash;
1306 uint32_t two_hash;
1307 if (one->TryGetHash(&one_hash) && two->TryGetHash(&two_hash)) {
1308#ifdef ENABLE_SLOW_DCHECKS
1309 if (v8_flags.enable_slow_asserts) {
1310 if (one_hash != two_hash) {
1311 bool found_difference = false;
1312 for (uint32_t i = 0; i < one_length; i++) {
1313 if (one->Get(i) != two->Get(i)) {
1314 found_difference = true;
1315 break;
1316 }
1317 }
1318 DCHECK(found_difference);
1319 }
1320 }
1321#endif
1322 if (one_hash != two_hash) return false;
1323 }
1324
1325 // We know the strings are both non-empty. Compare the first chars
1326 // before we try to flatten the strings.
1327 if (one->Get(0) != two->Get(0)) return false;
1328
1329 one = String::Flatten(isolate, one);
1330 two = String::Flatten(isolate, two);
1331
1333 String::FlatContent flat1 = one->GetFlatContent(no_gc);
1334 String::FlatContent flat2 = two->GetFlatContent(no_gc);
1335
1336 if (flat1.IsOneByte() && flat2.IsOneByte()) {
1337 return CompareCharsEqual(flat1.ToOneByteVector().begin(),
1338 flat2.ToOneByteVector().begin(), one_length);
1339 } else if (flat1.IsTwoByte() && flat2.IsTwoByte()) {
1340 return CompareCharsEqual(flat1.ToUC16Vector().begin(),
1341 flat2.ToUC16Vector().begin(), one_length);
1342 } else if (flat1.IsOneByte() && flat2.IsTwoByte()) {
1343 return CompareCharsEqual(flat1.ToOneByteVector().begin(),
1344 flat2.ToUC16Vector().begin(), one_length);
1345 } else if (flat1.IsTwoByte() && flat2.IsOneByte()) {
1346 return CompareCharsEqual(flat1.ToUC16Vector().begin(),
1347 flat2.ToOneByteVector().begin(), one_length);
1348 }
1349 UNREACHABLE();
1350}
1351
1352// static
1355 // A few fast case tests before we flatten.
1356 if (x.is_identical_to(y)) {
1357 return ComparisonResult::kEqual;
1358 } else if (y->length() == 0) {
1359 return x->length() == 0 ? ComparisonResult::kEqual
1360 : ComparisonResult::kGreaterThan;
1361 } else if (x->length() == 0) {
1362 return ComparisonResult::kLessThan;
1363 }
1364
1365 int const d = x->Get(0) - y->Get(0);
1366 if (d < 0) {
1367 return ComparisonResult::kLessThan;
1368 } else if (d > 0) {
1369 return ComparisonResult::kGreaterThan;
1370 }
1371
1372 // Slow case.
1373 x = String::Flatten(isolate, x);
1374 y = String::Flatten(isolate, y);
1375
1377 ComparisonResult result = ComparisonResult::kEqual;
1378 uint32_t prefix_length = x->length();
1379 if (y->length() < prefix_length) {
1380 prefix_length = y->length();
1381 result = ComparisonResult::kGreaterThan;
1382 } else if (y->length() > prefix_length) {
1383 result = ComparisonResult::kLessThan;
1384 }
1385 int r;
1386 String::FlatContent x_content = x->GetFlatContent(no_gc);
1387 String::FlatContent y_content = y->GetFlatContent(no_gc);
1388 if (x_content.IsOneByte()) {
1389 base::Vector<const uint8_t> x_chars = x_content.ToOneByteVector();
1390 if (y_content.IsOneByte()) {
1391 base::Vector<const uint8_t> y_chars = y_content.ToOneByteVector();
1392 r = CompareChars(x_chars.begin(), y_chars.begin(), prefix_length);
1393 } else {
1394 base::Vector<const base::uc16> y_chars = y_content.ToUC16Vector();
1395 r = CompareChars(x_chars.begin(), y_chars.begin(), prefix_length);
1396 }
1397 } else {
1398 base::Vector<const base::uc16> x_chars = x_content.ToUC16Vector();
1399 if (y_content.IsOneByte()) {
1400 base::Vector<const uint8_t> y_chars = y_content.ToOneByteVector();
1401 r = CompareChars(x_chars.begin(), y_chars.begin(), prefix_length);
1402 } else {
1403 base::Vector<const base::uc16> y_chars = y_content.ToUC16Vector();
1404 r = CompareChars(x_chars.begin(), y_chars.begin(), prefix_length);
1405 }
1406 }
1407 if (r < 0) {
1408 result = ComparisonResult::kLessThan;
1409 } else if (r > 0) {
1410 result = ComparisonResult::kGreaterThan;
1411 }
1412 return result;
1413}
1414
1415namespace {
1416
1417uint32_t ToValidIndex(Tagged<String> str, Tagged<Object> number) {
1418 uint32_t index = PositiveNumberToUint32(number);
1419 uint32_t length = str->length();
1420 if (index > length) return length;
1421 return index;
1422}
1423
1424} // namespace
1425
1427 DirectHandle<Object> search,
1429 if (IsNullOrUndefined(*receiver, isolate)) {
1431 isolate, NewTypeError(MessageTemplate::kCalledOnNullOrUndefined,
1432 isolate->factory()->NewStringFromAsciiChecked(
1433 "String.prototype.indexOf")));
1434 }
1435 DirectHandle<String> receiver_string;
1436 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, receiver_string,
1437 Object::ToString(isolate, receiver));
1438
1439 DirectHandle<String> search_string;
1440 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
1441 Object::ToString(isolate, search));
1442
1444 Object::ToInteger(isolate, position));
1445
1446 uint32_t index = ToValidIndex(*receiver_string, *position);
1447 return Smi::FromInt(
1448 String::IndexOf(isolate, receiver_string, search_string, index));
1449}
1450
1451namespace {
1452
1453template <typename T>
1454int SearchString(Isolate* isolate, String::FlatContent receiver_content,
1455 base::Vector<T> pat_vector, int start_index) {
1456 if (receiver_content.IsOneByte()) {
1457 return SearchString(isolate, receiver_content.ToOneByteVector(), pat_vector,
1458 start_index);
1459 }
1460 return SearchString(isolate, receiver_content.ToUC16Vector(), pat_vector,
1461 start_index);
1462}
1463
1464} // namespace
1465
1466int String::IndexOf(Isolate* isolate, DirectHandle<String> receiver,
1467 DirectHandle<String> search, uint32_t start_index) {
1468 DCHECK_LE(start_index, receiver->length());
1469
1470 uint32_t search_length = search->length();
1471 if (search_length == 0) return start_index;
1472
1473 uint32_t receiver_length = receiver->length();
1474 if (start_index + search_length > receiver_length) return -1;
1475
1476 receiver = String::Flatten(isolate, receiver);
1477 search = String::Flatten(isolate, search);
1478
1479 DisallowGarbageCollection no_gc; // ensure vectors stay valid
1480 // Extract flattened substrings of cons strings before getting encoding.
1481 String::FlatContent receiver_content = receiver->GetFlatContent(no_gc);
1482 String::FlatContent search_content = search->GetFlatContent(no_gc);
1483
1484 // dispatch on type of strings
1485 if (search_content.IsOneByte()) {
1486 base::Vector<const uint8_t> pat_vector = search_content.ToOneByteVector();
1487 return SearchString<const uint8_t>(isolate, receiver_content, pat_vector,
1488 start_index);
1489 }
1490 base::Vector<const base::uc16> pat_vector = search_content.ToUC16Vector();
1491 return SearchString<const base::uc16>(isolate, receiver_content, pat_vector,
1492 start_index);
1493}
1494
1495MaybeDirectHandle<String> String::GetSubstitution(
1496 Isolate* isolate, Match* match, DirectHandle<String> replacement,
1497 uint32_t start_index) {
1498 Factory* factory = isolate->factory();
1499
1500 const int replacement_length = replacement->length();
1501 const int captures_length = match->CaptureCount();
1502
1503 replacement = String::Flatten(isolate, replacement);
1504
1505 DirectHandle<String> dollar_string =
1507 int next_dollar_ix =
1508 String::IndexOf(isolate, replacement, dollar_string, start_index);
1509 if (next_dollar_ix < 0) {
1510 return replacement;
1511 }
1512
1513 IncrementalStringBuilder builder(isolate);
1514
1515 if (next_dollar_ix > 0) {
1516 builder.AppendString(factory->NewSubString(replacement, 0, next_dollar_ix));
1517 }
1518
1519 while (true) {
1520 const int peek_ix = next_dollar_ix + 1;
1521 if (peek_ix >= replacement_length) {
1522 builder.AppendCharacter('$');
1523 return builder.Finish();
1524 }
1525
1526 int continue_from_ix = -1;
1527 const uint16_t peek = replacement->Get(peek_ix);
1528 switch (peek) {
1529 case '$': // $$
1530 builder.AppendCharacter('$');
1531 continue_from_ix = peek_ix + 1;
1532 break;
1533 case '&': // $& - match
1534 builder.AppendString(match->GetMatch());
1535 continue_from_ix = peek_ix + 1;
1536 break;
1537 case '`': // $` - prefix
1538 builder.AppendString(match->GetPrefix());
1539 continue_from_ix = peek_ix + 1;
1540 break;
1541 case '\'': // $' - suffix
1542 builder.AppendString(match->GetSuffix());
1543 continue_from_ix = peek_ix + 1;
1544 break;
1545 case '0':
1546 case '1':
1547 case '2':
1548 case '3':
1549 case '4':
1550 case '5':
1551 case '6':
1552 case '7':
1553 case '8':
1554 case '9': {
1555 // Valid indices are $1 .. $9, $01 .. $09 and $10 .. $99
1556 int scaled_index = (peek - '0');
1557 int advance = 1;
1558
1559 if (peek_ix + 1 < replacement_length) {
1560 const uint16_t next_peek = replacement->Get(peek_ix + 1);
1561 if (next_peek >= '0' && next_peek <= '9') {
1562 const int new_scaled_index = scaled_index * 10 + (next_peek - '0');
1563 if (new_scaled_index < captures_length) {
1564 scaled_index = new_scaled_index;
1565 advance = 2;
1566 }
1567 }
1568 }
1569
1570 if (scaled_index == 0 || scaled_index >= captures_length) {
1571 builder.AppendCharacter('$');
1572 continue_from_ix = peek_ix;
1573 break;
1574 }
1575
1576 bool capture_exists;
1577 DirectHandle<String> capture;
1579 isolate, capture, match->GetCapture(scaled_index, &capture_exists));
1580 if (capture_exists) builder.AppendString(capture);
1581 continue_from_ix = peek_ix + advance;
1582 break;
1583 }
1584 case '<': { // $<name> - named capture
1585 using CaptureState = String::Match::CaptureState;
1586
1587 if (!match->HasNamedCaptures()) {
1588 builder.AppendCharacter('$');
1589 continue_from_ix = peek_ix;
1590 break;
1591 }
1592
1593 DirectHandle<String> bracket_string =
1595 const int closing_bracket_ix =
1596 String::IndexOf(isolate, replacement, bracket_string, peek_ix + 1);
1597
1598 if (closing_bracket_ix == -1) {
1599 // No closing bracket was found, treat '$<' as a string literal.
1600 builder.AppendCharacter('$');
1601 continue_from_ix = peek_ix;
1602 break;
1603 }
1604
1605 DirectHandle<String> capture_name =
1606 factory->NewSubString(replacement, peek_ix + 1, closing_bracket_ix);
1607 DirectHandle<String> capture;
1608 CaptureState capture_state;
1610 isolate, capture,
1611 match->GetNamedCapture(capture_name, &capture_state));
1612
1613 if (capture_state == CaptureState::MATCHED) {
1614 builder.AppendString(capture);
1615 }
1616
1617 continue_from_ix = closing_bracket_ix + 1;
1618 break;
1619 }
1620 default:
1621 builder.AppendCharacter('$');
1622 continue_from_ix = peek_ix;
1623 break;
1624 }
1625
1626 // Go the the next $ in the replacement.
1627 // TODO(jgruber): Single-char lookups could be much more efficient.
1628 DCHECK_NE(continue_from_ix, -1);
1629 next_dollar_ix =
1630 String::IndexOf(isolate, replacement, dollar_string, continue_from_ix);
1631
1632 // Return if there are no more $ characters in the replacement. If we
1633 // haven't reached the end, we need to append the suffix.
1634 if (next_dollar_ix < 0) {
1635 if (continue_from_ix < replacement_length) {
1636 builder.AppendString(factory->NewSubString(
1637 replacement, continue_from_ix, replacement_length));
1638 }
1639 return builder.Finish();
1640 }
1641
1642 // Append substring between the previous and the next $ character.
1643 if (next_dollar_ix > continue_from_ix) {
1644 builder.AppendString(
1645 factory->NewSubString(replacement, continue_from_ix, next_dollar_ix));
1646 }
1647 }
1648
1649 UNREACHABLE();
1650}
1651
1652namespace { // for String.Prototype.lastIndexOf
1653
1654template <typename schar, typename pchar>
1655int StringMatchBackwards(base::Vector<const schar> subject,
1657 int pattern_length = pattern.length();
1658 DCHECK_GE(pattern_length, 1);
1659 DCHECK(idx + pattern_length <= subject.length());
1660
1661 if (sizeof(schar) == 1 && sizeof(pchar) > 1) {
1662 for (int i = 0; i < pattern_length; i++) {
1663 base::uc16 c = pattern[i];
1664 if (c > String::kMaxOneByteCharCode) {
1665 return -1;
1666 }
1667 }
1668 }
1669
1670 pchar pattern_first_char = pattern[0];
1671 for (int i = idx; i >= 0; i--) {
1672 if (subject[i] != pattern_first_char) continue;
1673 int j = 1;
1674 while (j < pattern_length) {
1675 if (pattern[j] != subject[i + j]) {
1676 break;
1677 }
1678 j++;
1679 }
1680 if (j == pattern_length) {
1681 return i;
1682 }
1683 }
1684 return -1;
1685}
1686
1687} // namespace
1688
1689Tagged<Object> String::LastIndexOf(Isolate* isolate,
1691 DirectHandle<Object> search,
1693 if (IsNullOrUndefined(*receiver, isolate)) {
1695 isolate, NewTypeError(MessageTemplate::kCalledOnNullOrUndefined,
1696 isolate->factory()->NewStringFromAsciiChecked(
1697 "String.prototype.lastIndexOf")));
1698 }
1699 DirectHandle<String> receiver_string;
1700 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, receiver_string,
1701 Object::ToString(isolate, receiver));
1702
1703 DirectHandle<String> search_string;
1704 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
1705 Object::ToString(isolate, search));
1706
1708 Object::ToNumber(isolate, position));
1709
1710 uint32_t start_index;
1711
1712 if (IsNaN(*position)) {
1713 start_index = receiver_string->length();
1714 } else {
1716 Object::ToInteger(isolate, position));
1717 start_index = ToValidIndex(*receiver_string, *position);
1718 }
1719
1720 uint32_t pattern_length = search_string->length();
1721 uint32_t receiver_length = receiver_string->length();
1722
1723 if (start_index + pattern_length > receiver_length) {
1724 start_index = receiver_length - pattern_length;
1725 }
1726
1727 if (pattern_length == 0) {
1728 return Smi::FromInt(start_index);
1729 }
1730
1731 receiver_string = String::Flatten(isolate, receiver_string);
1732 search_string = String::Flatten(isolate, search_string);
1733
1734 int last_index = -1;
1735 DisallowGarbageCollection no_gc; // ensure vectors stay valid
1736
1737 String::FlatContent receiver_content = receiver_string->GetFlatContent(no_gc);
1738 String::FlatContent search_content = search_string->GetFlatContent(no_gc);
1739
1740 if (search_content.IsOneByte()) {
1741 base::Vector<const uint8_t> pat_vector = search_content.ToOneByteVector();
1742 if (receiver_content.IsOneByte()) {
1743 last_index = StringMatchBackwards(receiver_content.ToOneByteVector(),
1744 pat_vector, start_index);
1745 } else {
1746 last_index = StringMatchBackwards(receiver_content.ToUC16Vector(),
1747 pat_vector, start_index);
1748 }
1749 } else {
1750 base::Vector<const base::uc16> pat_vector = search_content.ToUC16Vector();
1751 if (receiver_content.IsOneByte()) {
1752 last_index = StringMatchBackwards(receiver_content.ToOneByteVector(),
1753 pat_vector, start_index);
1754 } else {
1755 last_index = StringMatchBackwards(receiver_content.ToUC16Vector(),
1756 pat_vector, start_index);
1757 }
1758 }
1759 return Smi::FromInt(last_index);
1760}
1761
1762bool String::HasOneBytePrefix(base::Vector<const char> str) {
1763 DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(this));
1764 return IsEqualToImpl<EqualityType::kPrefix>(
1765 str, SharedStringAccessGuardIfNeeded::NotNeeded());
1766}
1767
1768namespace {
1769
1770template <typename Char>
1771bool IsIdentifierVector(base::Vector<Char> vec) {
1772 if (vec.empty()) {
1773 return false;
1774 }
1775 if (!IsIdentifierStart(vec[0])) {
1776 return false;
1777 }
1778 for (size_t i = 1; i < vec.size(); ++i) {
1779 if (!IsIdentifierPart(vec[i])) {
1780 return false;
1781 }
1782 }
1783 return true;
1784}
1785
1786} // namespace
1787
1788// static
1789bool String::IsIdentifier(Isolate* isolate, DirectHandle<String> str) {
1790 str = String::Flatten(isolate, str);
1792 String::FlatContent flat = str->GetFlatContent(no_gc);
1793 return flat.IsOneByte() ? IsIdentifierVector(flat.ToOneByteVector())
1794 : IsIdentifierVector(flat.ToUC16Vector());
1795}
1796
1797namespace {
1798
1799template <typename Char>
1800uint32_t HashString(Tagged<String> string, size_t start, uint32_t length,
1801 uint64_t seed,
1802 const SharedStringAccessGuardIfNeeded& access_guard) {
1804
1805 if (length > String::kMaxHashCalcLength) {
1806 return StringHasher::GetTrivialHash(length);
1807 }
1808
1809 std::unique_ptr<Char[]> buffer;
1810 const Char* chars;
1811
1812 if (IsConsString(string)) {
1813 DCHECK_EQ(0, start);
1814 DCHECK(!string->IsFlat());
1815 buffer.reset(new Char[length]);
1816 String::WriteToFlat(string, buffer.get(), 0, length, access_guard);
1817 chars = buffer.get();
1818 } else {
1819 chars = string->GetDirectStringChars<Char>(no_gc, access_guard) + start;
1820 }
1821
1822 return StringHasher::HashSequentialString<Char>(chars, length, seed);
1823}
1824
1825} // namespace
1826
1827uint32_t String::ComputeAndSetRawHash() {
1828 DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(this));
1829 return ComputeAndSetRawHash(SharedStringAccessGuardIfNeeded::NotNeeded());
1830}
1831
1832uint32_t String::ComputeAndSetRawHash(
1833 const SharedStringAccessGuardIfNeeded& access_guard) {
1835 // Should only be called if hash code has not yet been computed.
1836 //
1837 // If in-place internalizable strings are shared, there may be calls to
1838 // ComputeAndSetRawHash in parallel. Since only flat strings are in-place
1839 // internalizable and their contents do not change, the result hash is the
1840 // same. The raw hash field is stored with relaxed ordering.
1841 DCHECK_IMPLIES(!v8_flags.shared_string_table, !HasHashCode());
1842
1843 // Store the hash code in the object.
1844 uint64_t seed = HashSeed(EarlyGetReadOnlyRoots());
1845 size_t start = 0;
1846 Tagged<String> string = this;
1847 StringShape shape(string);
1848 if (shape.IsSliced()) {
1849 Tagged<SlicedString> sliced = Cast<SlicedString>(string);
1850 start = sliced->offset();
1851 string = sliced->parent();
1852 shape = StringShape(string);
1853 }
1854 if (shape.IsCons() && string->IsFlat()) {
1855 string = Cast<ConsString>(string)->first();
1856 shape = StringShape(string);
1857 }
1858 if (shape.IsThin()) {
1859 string = Cast<ThinString>(string)->actual();
1860 shape = StringShape(string);
1861 if (length() == string->length()) {
1862 uint32_t raw_hash = string->RawHash();
1863 DCHECK(IsHashFieldComputed(raw_hash));
1864 set_raw_hash_field(raw_hash);
1865 return raw_hash;
1866 }
1867 }
1868 uint32_t raw_hash_field =
1870 ? HashString<uint8_t>(string, start, length(), seed, access_guard)
1871 : HashString<uint16_t>(string, start, length(), seed, access_guard);
1872 set_raw_hash_field_if_empty(raw_hash_field);
1873 // Check the hash code is there (or a forwarding index if the string was
1874 // internalized/externalized in parallel).
1875 DCHECK(HasHashCode() || HasForwardingIndex(kAcquireLoad));
1876 // Ensure that the hash value of 0 is never computed.
1877 DCHECK_NE(HashBits::decode(raw_hash_field), 0);
1878 return raw_hash_field;
1879}
1880
1881bool String::SlowAsArrayIndex(uint32_t* index) {
1883 uint32_t length = this->length();
1884 if (length <= kMaxCachedArrayIndexLength) {
1885 uint32_t field = EnsureRawHash(); // Force computation of hash code.
1886 if (!IsIntegerIndex(field)) return false;
1887 *index = ArrayIndexValueBits::decode(field);
1888 return true;
1889 }
1890 if (length == 0 || length > kMaxArrayIndexSize) return false;
1891 StringCharacterStream stream(this);
1892 return StringToIndex(&stream, index);
1893}
1894
1895bool String::SlowAsIntegerIndex(size_t* index) {
1897 uint32_t length = this->length();
1898 if (length <= kMaxCachedArrayIndexLength) {
1899 uint32_t field = EnsureRawHash(); // Force computation of hash code.
1900 if (!IsIntegerIndex(field)) return false;
1901 *index = ArrayIndexValueBits::decode(field);
1902 return true;
1903 }
1904 if (length == 0 || length > kMaxIntegerIndexSize) return false;
1905 StringCharacterStream stream(this);
1906 return StringToIndex<StringCharacterStream, size_t, kToIntegerIndex>(&stream,
1907 index);
1908}
1909
1910void String::PrintOn(FILE* file) {
1911 uint32_t length = this->length();
1912 for (uint32_t i = 0; i < length; i++) {
1913 PrintF(file, "%c", Get(i));
1914 }
1915}
1916
1917void String::PrintOn(std::ostream& ostream) {
1918 uint32_t length = this->length();
1919 for (uint32_t i = 0; i < length; i++) {
1920 ostream.put(Get(i));
1921 }
1922}
1923
1924Handle<String> SeqString::Truncate(Isolate* isolate, Handle<SeqString> string,
1925 uint32_t new_length) {
1926 if (new_length == 0) return isolate->factory()->empty_string();
1927
1928 int new_size, old_size;
1929 uint32_t old_length = string->length();
1930 if (old_length <= new_length) return string;
1931
1932 if (IsSeqOneByteString(*string)) {
1933 old_size = SeqOneByteString::SizeFor(old_length);
1934 new_size = SeqOneByteString::SizeFor(new_length);
1935 } else {
1936 DCHECK(IsSeqTwoByteString(*string));
1937 old_size = SeqTwoByteString::SizeFor(old_length);
1938 new_size = SeqTwoByteString::SizeFor(new_length);
1939 }
1940
1941#if DEBUG
1942 Address start_of_string = (*string).address();
1943 DCHECK(IsAligned(start_of_string, kObjectAlignment));
1944 DCHECK(IsAligned(start_of_string + new_size, kObjectAlignment));
1945#endif
1946
1947 Heap* heap = isolate->heap();
1948 if (!heap->IsLargeObject(*string)) {
1949 // Sizes are pointer size aligned, so that we can use filler objects
1950 // that are a multiple of pointer size.
1951 // No slot invalidation needed since this method is only used on freshly
1952 // allocated strings.
1953 heap->NotifyObjectSizeChange(*string, old_size, new_size,
1954 ClearRecordedSlots::kNo);
1955 }
1956 // We are storing the new length using release store after creating a filler
1957 // for the left-over space to avoid races with the sweeper thread.
1958 string->set_length(new_length, kReleaseStore);
1959 string->ClearPadding();
1960
1961 return string;
1962}
1963
1964SeqString::DataAndPaddingSizes SeqString::GetDataAndPaddingSizes() const {
1965 if (IsSeqOneByteString(this)) {
1966 return Cast<SeqOneByteString>(this)->GetDataAndPaddingSizes();
1967 }
1968 return Cast<SeqTwoByteString>(this)->GetDataAndPaddingSizes();
1969}
1970
1971SeqString::DataAndPaddingSizes SeqOneByteString::GetDataAndPaddingSizes()
1972 const {
1973 int data_size = sizeof(SeqOneByteString) + length() * kOneByteSize;
1974 int padding_size = SizeFor(length()) - data_size;
1975 return DataAndPaddingSizes{data_size, padding_size};
1976}
1977
1978SeqString::DataAndPaddingSizes SeqTwoByteString::GetDataAndPaddingSizes()
1979 const {
1980 int data_size = sizeof(SeqTwoByteString) + length() * base::kUC16Size;
1981 int padding_size = SizeFor(length()) - data_size;
1982 return DataAndPaddingSizes{data_size, padding_size};
1983}
1984
1985#ifdef VERIFY_HEAP
1986V8_EXPORT_PRIVATE void SeqString::SeqStringVerify(Isolate* isolate) {
1987 StringVerify(isolate);
1988 CHECK(IsSeqString(this, isolate));
1989 DataAndPaddingSizes sz = GetDataAndPaddingSizes();
1990 auto padding = reinterpret_cast<char*>(address() + sz.data_size);
1991 CHECK(sz.padding_size <= kTaggedSize);
1992 for (int i = 0; i < sz.padding_size; ++i) {
1993 CHECK_EQ(padding[i], 0);
1994 }
1995}
1996#endif // VERIFY_HEAP
1997
1998void SeqString::ClearPadding() {
1999 DataAndPaddingSizes sz = GetDataAndPaddingSizes();
2000 DCHECK_EQ(sz.data_size + sz.padding_size, Size());
2001 if (sz.padding_size == 0) return;
2002 memset(reinterpret_cast<void*>(address() + sz.data_size), 0, sz.padding_size);
2003}
2004
2005uint16_t ConsString::Get(
2006 uint32_t index, const SharedStringAccessGuardIfNeeded& access_guard) const {
2007 DCHECK(index >= 0 && index < this->length());
2008
2009 // Check for a flattened cons string
2010 if (second()->length() == 0) {
2011 Tagged<String> left = first();
2012 return left->Get(index);
2013 }
2014
2015 Tagged<String> string = Cast<String>(this);
2016
2017 while (true) {
2018 if (StringShape(string).IsCons()) {
2019 Tagged<ConsString> cons_string = Cast<ConsString>(string);
2020 Tagged<String> left = cons_string->first();
2021 if (left->length() > index) {
2022 string = left;
2023 } else {
2024 index -= left->length();
2025 string = cons_string->second();
2026 }
2027 } else {
2028 return string->Get(index, access_guard);
2029 }
2030 }
2031
2032 UNREACHABLE();
2033}
2034
2035uint16_t ThinString::Get(
2036 uint32_t index, const SharedStringAccessGuardIfNeeded& access_guard) const {
2037 return actual()->Get(index, access_guard);
2038}
2039
2040uint16_t SlicedString::Get(
2041 uint32_t index, const SharedStringAccessGuardIfNeeded& access_guard) const {
2042 return parent()->Get(offset() + index, access_guard);
2043}
2044
2045int ExternalString::ExternalPayloadSize() const {
2046 int length_multiplier = IsTwoByteRepresentation() ? i::kShortSize : kCharSize;
2047 return length() * length_multiplier;
2048}
2049
2050FlatStringReader::FlatStringReader(Isolate* isolate, DirectHandle<String> str)
2051 : Relocatable(isolate), str_(str), length_(str->length()) {
2052#if DEBUG
2053 // Check that this constructor is called only from the main thread.
2054 DCHECK_EQ(ThreadId::Current(), isolate->thread_id());
2055#endif
2057}
2058
2060 DCHECK(str_->IsFlat());
2062 // This does not actually prevent the vector from being relocated later.
2063 String::FlatContent content = str_->GetFlatContent(no_gc);
2064 DCHECK(content.IsFlat());
2065 is_one_byte_ = content.IsOneByte();
2066 if (is_one_byte_) {
2067 start_ = content.ToOneByteVector().begin();
2068 } else {
2069 start_ = content.ToUC16Vector().begin();
2070 }
2071}
2072
2074 int offset) {
2075 DCHECK(!cons_string.is_null());
2076 root_ = cons_string;
2077 consumed_ = offset;
2078 // Force stack blown condition to trigger restart.
2079 depth_ = 1;
2081 DCHECK(StackBlown());
2082}
2083
2085 DCHECK_NE(depth_, 0);
2086 DCHECK_EQ(0, *offset_out);
2087 bool blew_stack = StackBlown();
2089 // Get the next leaf if there is one.
2090 if (!blew_stack) string = NextLeaf(&blew_stack);
2091 // Restart search from root.
2092 if (blew_stack) {
2093 DCHECK(string.is_null());
2094 string = Search(offset_out);
2095 }
2096 // Ensure future calls return null immediately.
2097 if (string.is_null()) Reset({});
2098 return string;
2099}
2100
2102 Tagged<ConsString> cons_string = root_;
2103 // Reset the stack, pushing the root string.
2104 depth_ = 1;
2105 maximum_depth_ = 1;
2106 frames_[0] = cons_string;
2107 const uint32_t consumed = consumed_;
2108 uint32_t offset = 0;
2109 while (true) {
2110 // Loop until the string is found which contains the target offset.
2111 Tagged<String> string = cons_string->first();
2112 uint32_t length = string->length();
2113 int32_t type;
2114 if (consumed < offset + length) {
2115 // Target offset is in the left branch.
2116 // Keep going if we're still in a ConString.
2117 type = string->map()->instance_type();
2118 if ((type & kStringRepresentationMask) == kConsStringTag) {
2119 cons_string = Cast<ConsString>(string);
2120 PushLeft(cons_string);
2121 continue;
2122 }
2123 // Tell the stack we're done descending.
2125 } else {
2126 // Descend right.
2127 // Update progress through the string.
2128 offset += length;
2129 // Keep going if we're still in a ConString.
2130 string = cons_string->second();
2131 type = string->map()->instance_type();
2132 if ((type & kStringRepresentationMask) == kConsStringTag) {
2133 cons_string = Cast<ConsString>(string);
2134 PushRight(cons_string);
2135 continue;
2136 }
2137 // Need this to be updated for the current string.
2138 length = string->length();
2139 // Account for the possibility of an empty right leaf.
2140 // This happens only if we have asked for an offset outside the string.
2141 if (length == 0) {
2142 // Reset so future operations will return null immediately.
2143 Reset({});
2144 return {};
2145 }
2146 // Tell the stack we're done descending.
2148 // Pop stack so next iteration is in correct place.
2149 Pop();
2150 }
2151 DCHECK_NE(length, 0);
2152 // Adjust return values and exit.
2154 *offset_out = consumed - offset;
2155 return string;
2156 }
2157 UNREACHABLE();
2158}
2159
2161 while (true) {
2162 // Tree traversal complete.
2163 if (depth_ == 0) {
2164 *blew_stack = false;
2165 return {};
2166 }
2167 // We've lost track of higher nodes.
2168 if (StackBlown()) {
2169 *blew_stack = true;
2170 return {};
2171 }
2172 // Go right.
2173 Tagged<ConsString> cons_string = frames_[OffsetForDepth(depth_ - 1)];
2174 Tagged<String> string = cons_string->second();
2175 int32_t type = string->map()->instance_type();
2176 if ((type & kStringRepresentationMask) != kConsStringTag) {
2177 // Pop stack so next iteration is in correct place.
2178 Pop();
2179 uint32_t length = string->length();
2180 // Could be a flattened ConsString.
2181 if (length == 0) continue;
2182 consumed_ += length;
2183 return string;
2184 }
2185 cons_string = Cast<ConsString>(string);
2186 PushRight(cons_string);
2187 // Need to traverse all the way left.
2188 while (true) {
2189 // Continue left.
2190 string = cons_string->first();
2191 type = string->map()->instance_type();
2192 if ((type & kStringRepresentationMask) != kConsStringTag) {
2194 uint32_t length = string->length();
2195 if (length == 0) break; // Skip empty left-hand sides of ConsStrings.
2196 consumed_ += length;
2197 return string;
2198 }
2199 cons_string = Cast<ConsString>(string);
2200 PushLeft(cons_string);
2201 }
2202 }
2203 UNREACHABLE();
2204}
2205
2207 uint32_t start_index, const DisallowGarbageCollection& no_gc) {
2208 DCHECK(IsFlat());
2209 Tagged<String> subject = this;
2210 StringShape shape(subject);
2211 if (IsConsString(subject)) {
2212 subject = Cast<ConsString>(subject)->first();
2213 shape = StringShape(subject);
2214 } else if (IsSlicedString(subject)) {
2215 start_index += Cast<SlicedString>(subject)->offset();
2216 subject = Cast<SlicedString>(subject)->parent();
2217 shape = StringShape(subject);
2218 }
2219 if (IsThinString(subject)) {
2220 subject = Cast<ThinString>(subject)->actual();
2221 shape = StringShape(subject);
2222 }
2223 CHECK_LE(0, start_index);
2224 CHECK_LE(start_index, subject->length());
2225 switch (shape.representation_and_encoding_tag()) {
2227 return reinterpret_cast<const uint8_t*>(
2228 Cast<SeqOneByteString>(subject)->GetChars(no_gc) + start_index);
2230 return reinterpret_cast<const uint8_t*>(
2231 Cast<SeqTwoByteString>(subject)->GetChars(no_gc) + start_index);
2233 return reinterpret_cast<const uint8_t*>(
2234 Cast<ExternalOneByteString>(subject)->GetChars() + start_index);
2236 return reinterpret_cast<const uint8_t*>(
2237 Cast<ExternalTwoByteString>(subject)->GetChars() + start_index);
2238 default:
2239 UNREACHABLE();
2240 }
2241}
2242
2244 Tagged<String>, uint16_t*, uint32_t, uint32_t);
2246 Tagged<String>, uint8_t*, uint32_t, uint32_t);
2248 Tagged<String>, uint16_t*, uint32_t, uint32_t to,
2251 Tagged<String>, uint8_t*, uint32_t, uint32_t,
2254 uint8_t*, Tagged<ConsString>, uint32_t, uint32_t,
2257 uint16_t*, Tagged<ConsString>, uint32_t, uint32_t,
2259
2260namespace {
2261// Check that the constants defined in src/objects/instance-type.h coincides
2262// with the Torque-definition of string instance types in src/objects/string.tq.
2263
2264DEFINE_TORQUE_GENERATED_STRING_INSTANCE_TYPE()
2265
2266static_assert(kStringRepresentationMask == RepresentationBits::kMask);
2267
2268static_assert(kStringEncodingMask == IsOneByteBit::kMask);
2269static_assert(kTwoByteStringTag == IsOneByteBit::encode(false));
2270static_assert(kOneByteStringTag == IsOneByteBit::encode(true));
2271
2272static_assert(kUncachedExternalStringMask == IsUncachedBit::kMask);
2273static_assert(kUncachedExternalStringTag == IsUncachedBit::encode(true));
2274
2275static_assert(kIsNotInternalizedMask == IsNotInternalizedBit::kMask);
2276static_assert(kNotInternalizedTag == IsNotInternalizedBit::encode(true));
2277static_assert(kInternalizedTag == IsNotInternalizedBit::encode(false));
2278} // namespace
2279
2280} // namespace internal
2281} // namespace v8
#define one
ThreadLocalTop * top
#define SBXCHECK_LE(lhs, rhs)
Definition check.h:67
SourcePosition pos
static const int kNoPreviousCharacter
Definition unicode.h:102
static unsigned Length(uchar chr, int previous)
static unsigned Encode(char *out, uchar c, int previous, bool replace_invalid=false)
virtual const char * data() const =0
virtual void Unaccount(Isolate *isolate)
virtual const uint16_t * data() const =0
virtual size_t length() const =0
bool IsOneByte() const
Definition api.cc:5620
static constexpr T decode(U value)
Definition bit-field.h:66
static V8_NODISCARD constexpr U update(U previous, T value)
Definition bit-field.h:61
size_t size() const
void reserve(size_t new_capacity)
int length() const
Definition vector.h:64
constexpr bool empty() const
Definition vector.h:73
constexpr size_t size() const
Definition vector.h:70
constexpr T * begin() const
Definition vector.h:96
V8_EXPORT_PRIVATE Tagged< String > Continue(int *offset_out)
Definition string.cc:2084
void Reset(Tagged< ConsString > cons_string, int offset=0)
Definition string.h:1329
void PushRight(Tagged< ConsString > string)
Tagged< String > NextLeaf(bool *blew_stack)
Definition string.cc:2160
Tagged< ConsString > frames_[kStackSize]
Definition string.h:1366
Tagged< String > Search(int *offset_out)
Definition string.cc:2101
V8_EXPORT_PRIVATE void Initialize(Tagged< ConsString > cons_string, int offset)
Definition string.cc:2073
static int OffsetForDepth(int depth)
static const int kStackSize
Definition string.h:1347
Tagged< ConsString > root_
Definition string.h:1367
void PushLeft(Tagged< ConsString > string)
void Init(Address host_address, IsolateForSandbox isolate, Address value)
void InitExternalPointerFieldsDuringExternalization(Tagged< Map > new_map, Isolate *isolate)
Definition string.cc:123
ExternalPointerMember< kExternalStringResourceDataTag > resource_data_
Definition string.h:1206
Handle< String > LookupSingleCharacterStringFromCode(uint16_t code)
HandleType< String > NewSubString(HandleType< T > str, uint32_t begin, uint32_t end)
Definition factory-inl.h:88
void PostGarbageCollection() override
Definition string.cc:2059
DirectHandle< String > str_
Definition string.h:1311
static V8_INLINE bool InYoungGeneration(Tagged< Object > object)
static V8_INLINE bool InWritableSharedSpace(Tagged< HeapObject > object)
static V8_INLINE bool InReadOnlySpace(Tagged< HeapObject > object)
static V8_INLINE bool InAnySharedSpace(Tagged< HeapObject > object)
void set_map(Isolate *isolate, Tagged< Map > value)
Tagged< Map > map() const
void set_map_safe_transition(IsolateT *isolate, Tagged< Map > value, ReleaseStoreTag)
V8_EXPORT_PRIVATE int SizeFromMap(Tagged< Map > map) const
Definition objects.cc:1935
MaybeDirectHandle< String > Finish()
V8_INLINE void AppendCharacter(uint8_t c)
V8_INLINE void AppendString(std::string_view str)
uint32_t raw_hash_field() const
Definition name.h:47
static uint32_t CreateExternalForwardingIndex(uint32_t index)
Definition name-inl.h:146
uint32_t EnsureRawHash()
Definition name-inl.h:175
void set_raw_hash_field(uint32_t hash)
Definition name.h:55
static bool IsInternalizedForwardingIndex(uint32_t raw_hash_field)
Definition name-inl.h:118
static bool IsExternalForwardingIndex(uint32_t raw_hash_field)
Definition name-inl.h:125
static bool IsHashFieldComputed(uint32_t raw_hash_field)
Definition name-inl.h:96
static bool IsNeeded(Tagged< String > str, LocalIsolate *local_isolate)
Definition string-inl.h:76
static SharedStringAccessGuardIfNeeded NotNeeded()
Definition string-inl.h:72
void Reset(Tagged< String > string, int offset=0)
static bool Equals(State *state_1, State *state_2, int to_check)
V8_INLINE bool IsShared() const
Definition string-inl.h:200
V8_INLINE uint32_t encoding_tag() const
Definition string-inl.h:212
V8_INLINE bool IsThin() const
Definition string-inl.h:174
V8_INLINE bool IsInternalized() const
Definition string-inl.h:163
V8_INLINE bool IsIndirect() const
Definition string-inl.h:182
V8_INLINE bool IsCons() const
Definition string-inl.h:170
V8_INLINE bool IsSliced() const
Definition string-inl.h:178
V8_INLINE bool IsExternal() const
Definition string-inl.h:188
V8_INLINE uint32_t representation_and_encoding_tag() const
Definition string-inl.h:216
void Add(const char *format)
base::Vector< const uint8_t > ToOneByteVector() const
Definition string.h:139
base::Vector< const base::uc16 > ToUC16Vector() const
Definition string.h:145
virtual int CaptureCount()=0
virtual DirectHandle< String > GetMatch()=0
virtual MaybeDirectHandle< String > GetNamedCapture(DirectHandle< String > name, CaptureState *state)=0
virtual DirectHandle< String > GetSuffix()=0
virtual bool HasNamedCaptures()=0
virtual MaybeDirectHandle< String > GetCapture(int i, bool *capture_exists)=0
virtual DirectHandle< String > GetPrefix()=0
void StringShortPrint(StringStream *accumulator)
Definition string.cc:604
static void WriteToFlat(Tagged< String > source, SinkCharT *sink, uint32_t start, uint32_t length)
Definition string.cc:772
static int32_t ToArrayIndex(Address addr)
Definition string.cc:648
const uint8_t * AddressOfCharacterAt(uint32_t start_index, const DisallowGarbageCollection &no_gc)
Definition string.cc:2206
bool IsTwoByteRepresentation() const
Definition string-inl.h:368
V8_EXPORT_PRIVATE static V8_INLINE std::optional< FlatContent > TryGetFlatContentFromDirectString(const DisallowGarbageCollection &no_gc, Tagged< String > string, uint32_t offset, uint32_t length, const SharedStringAccessGuardIfNeeded &)
Definition string-inl.h:855
void MakeExternalDuringGC(Isolate *isolate, T *resource)
Definition string.cc:289
void MakeThin(IsolateT *isolate, Tagged< String > canonical)
Definition string.cc:134
const char * PrefixForDebugPrint() const
Definition string.cc:568
uint32_t length() const
Definition string-inl.h:127
V8_EXPORT_PRIVATE FlatContent SlowGetFlatContent(const DisallowGarbageCollection &no_gc, const SharedStringAccessGuardIfNeeded &)
bool IsShared() const
static HandleType< Number > ToNumber(Isolate *isolate, HandleType< String > subject)
Definition string.cc:661
const char * SuffixForDebugPrint() const
Definition string.cc:598
static void WriteToFlat2(SinkCharT *dst, Tagged< ConsString > src, uint32_t src_index, uint32_t length, const SharedStringAccessGuardIfNeeded &aguard, const DisallowGarbageCollection &no_gc)
Definition string.cc:1074
V8_EXPORT_PRIVATE bool MakeExternal(Isolate *isolate, v8::String::ExternalStringResource *resource)
bool IsFlat() const
std::unique_ptr< char[]> ToCString(uint32_t offset, uint32_t length, size_t *length_output=nullptr)
Definition string.cc:711
static const uint32_t kMaxShortPrintLength
Definition string.h:518
void PrintUC16(std::ostream &os, int start=0, int end=-1)
Definition string.cc:621
bool SupportsExternalization(v8::String::Encoding)
Definition string.cc:529
bool MarkForExternalizationDuringGC(Isolate *isolate, T *resource)
V8_INLINE constexpr bool is_null() const
Definition tagged.h:502
static ThreadId Current()
Definition thread-id.h:32
ExternalPointerMember< kExternalStringResourceTag > resource_
Definition string.h:1160
int start
int end
#define ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, dst, call)
Definition isolate.h:284
#define ASSIGN_RETURN_ON_EXCEPTION(isolate, dst, call)
Definition isolate.h:291
#define THROW_NEW_ERROR_RETURN_FAILURE(isolate, call)
Definition isolate.h:294
#define EXPORT_TEMPLATE_DEFINE(export)
OptionalOpIndex index
int32_t offset
TNode< Object > receiver
std::string pattern
double second
ZoneVector< RpoNumber > & result
int y
int x
int position
Definition liveedit.cc:290
int s
Definition mul-fft.cc:297
const int length_
Definition mul-fft.cc:473
int r
Definition mul-fft.cc:298
uint16_t uc16
Definition strings.h:18
V8_INLINE constexpr bool IsInternalizedString(InstanceType instance_type)
V8_INLINE const Operation & Get(const Graph &graph, OpIndex index)
Definition graph.h:1231
const uint32_t kStringEncodingMask
bool IsNaN(Tagged< Object > obj)
PerThreadAssertScopeDebugOnly< false, SAFEPOINTS_ASSERT, HEAP_ALLOCATION_ASSERT > DisallowGarbageCollection
uint32_t PositiveNumberToUint32(Tagged< Object > number)
constexpr intptr_t kObjectAlignment
Definition globals.h:930
bool IsLineTerminatorSequence(base::uc32 c, base::uc32 next)
constexpr int kOneByteSize
Definition globals.h:703
const uint32_t kTwoByteStringTag
void PrintF(const char *format,...)
Definition utils.cc:39
const uint32_t kUncachedExternalStringTag
constexpr int kCharSize
Definition globals.h:396
Tagged(T object) -> Tagged< T >
const uint32_t kUncachedExternalStringMask
BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL int character
const uint32_t kNotInternalizedTag
V8_INLINE DirectHandle< T > direct_handle(Tagged< T > object, Isolate *isolate)
const uint32_t kOneByteStringTag
bool IsNullOrUndefined(Tagged< Object > obj, Isolate *isolate)
Handle< To > UncheckedCast(Handle< From > value)
Definition handles-inl.h:55
bool CompareCharsEqual(const lchar *lhs, const rchar *rhs, size_t chars)
Definition utils.h:509
Handle< T > IndirectHandle
Definition globals.h:1086
int SearchString(Isolate *isolate, base::Vector< const SubjectChar > subject, base::Vector< const PatternChar > pattern, int start_index)
const uint32_t kStringRepresentationMask
void CopyChars(DstType *dst, const SrcType *src, size_t count) V8_NONNULL(1
bool StringToIndex(Stream *stream, index_t *index)
Definition utils-inl.h:59
V8_EXPORT_PRIVATE FlagValues v8_flags
@ ALLOW_NON_DECIMAL_PREFIX
const uint32_t kInternalizedTag
int CompareChars(const lchar *lhs, const rchar *rhs, size_t chars)
Definition utils.h:536
const uint32_t kIsNotInternalizedMask
uint64_t HashSeed(Isolate *isolate)
static constexpr Address kNullAddress
Definition v8-internal.h:53
JSArrayBuffer::IsDetachableBit is_shared
static void CalculateLineEndsImpl(String::LineEndsVector *line_ends, base::Vector< const SourceChar > src, bool include_ending_line)
Definition string.cc:1143
constexpr int kShortSize
Definition globals.h:397
double StringToDouble(const char *str, ConversionFlag flags, double empty_string_val)
T * NewArray(size_t size)
Definition allocation.h:43
template const char * string
BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL int size_t search_length
Tagged< To > Cast(Tagged< From > value, const v8::SourceLocation &loc=INIT_SOURCE_LOCATION_IN_DEBUG)
Definition casting.h:150
static constexpr ReleaseStoreTag kReleaseStore
Definition globals.h:2910
static constexpr AcquireLoadTag kAcquireLoad
Definition globals.h:2908
#define UNREACHABLE()
Definition logging.h:67
#define DCHECK_LE(v1, v2)
Definition logging.h:490
#define CHECK(condition)
Definition logging.h:124
#define CHECK_LE(lhs, rhs)
#define DCHECK_IMPLIES(v1, v2)
Definition logging.h:493
#define DCHECK_NE(v1, v2)
Definition logging.h:486
#define DCHECK_GE(v1, v2)
Definition logging.h:488
#define CHECK_EQ(lhs, rhs)
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_LT(v1, v2)
Definition logging.h:489
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
#define DCHECK_GT(v1, v2)
Definition logging.h:487
#define USE(...)
Definition macros.h:293
#define V8_EXPORT_PRIVATE
Definition macros.h:460
constexpr bool IsAligned(T value, U alignment)
Definition macros.h:403
std::unordered_map< Address, const SinkCharT * > first_occurrence_
Definition string.cc:1015
bool enabled_
Definition string.cc:1013
#define V8_INLINE
Definition v8config.h:500
#define V8_LIKELY(condition)
Definition v8config.h:661
#define V8_UNLIKELY(condition)
Definition v8config.h:660
std::unique_ptr< ValueMirror > value
wasm::ValueType type