v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
js-segment-iterator.cc
Go to the documentation of this file.
1// Copyright 2018 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_INTL_SUPPORT
6#error Internationalization is expected to be enabled.
7#endif // V8_INTL_SUPPORT
8
10
11#include <map>
12#include <memory>
13#include <string>
14
16#include "src/heap/factory.h"
22#include "unicode/brkiter.h"
23
24namespace v8 {
25namespace internal {
26
30
31// ecma402 #sec-createsegmentiterator
33 Isolate* isolate, DirectHandle<String> input_string,
34 icu::BreakIterator* incoming_break_iterator,
35 JSSegmenter::Granularity granularity) {
36 // Clone a copy for both the ownership and not sharing with containing and
37 // other calls to the iterator because icu::BreakIterator keep the iteration
38 // position internally and cannot be shared across multiple calls to
39 // JSSegmentIterator::Create and JSSegments::Containing.
40 std::shared_ptr<icu::BreakIterator> break_iterator{
41 incoming_break_iterator->clone()};
42 DCHECK_NOT_NULL(break_iterator);
43 DirectHandle<Map> map(isolate->native_context()->intl_segment_iterator_map(),
44 isolate);
45
46 // 5. Set iterator.[[IteratedStringNextSegmentCodeUnitIndex]] to 0.
47 break_iterator->first();
48 DirectHandle<Managed<icu::BreakIterator>> managed_break_iterator =
49 Managed<icu::BreakIterator>::From(isolate, 0, break_iterator);
50
51 std::shared_ptr<icu::UnicodeString> string =
52 std::make_shared<icu::UnicodeString>();
53 break_iterator->getText().getText(*string);
55 Managed<icu::UnicodeString>::From(isolate, 0, string);
56
57 break_iterator->setText(*string);
58
59 // Now all properties are ready, so we can allocate the result object.
60 DirectHandle<JSObject> result = isolate->factory()->NewJSObjectFromMap(map);
62 DirectHandle<JSSegmentIterator> segment_iterator =
64
65 segment_iterator->set_flags(0);
66 segment_iterator->set_granularity(granularity);
67 segment_iterator->set_icu_break_iterator(*managed_break_iterator);
68 segment_iterator->set_raw_string(*input_string);
69 segment_iterator->set_unicode_string(*unicode_string);
70
71 return segment_iterator;
72}
73
74// ecma402 #sec-%segmentiteratorprototype%.next
76 Isolate* isolate, DirectHandle<JSSegmentIterator> segment_iterator) {
77 // Sketches of ideas for future performance improvements, roughly in order
78 // of difficulty:
79 // - Add a fast path for grapheme segmentation of one-byte strings that
80 // entirely skips calling into ICU.
81 // - When we enter this function, perform a batch of calls into ICU and
82 // stash away the results, so the next couple of invocations can access
83 // them from a (Torque?) builtin without calling into C++.
84 // - Implement compiler support for escape-analyzing the JSSegmentDataObject
85 // and avoid allocating it when possible.
86
87 // TODO(v8:14681): We StackCheck here to break execution in the event of an
88 // interrupt. Ordinarily in JS loops, this stack check should already be
89 // occurring, however some loops implemented within CodeStubAssembler and
90 // Torque builtins do not currently implement these checks. A preferable
91 // solution which would benefit other iterators implemented in C++ include:
92 // 1) Performing the stack check in CEntry, which would provide a solution
93 // for all methods implemented in C++.
94 //
95 // 2) Rewriting the loop to include an outer loop, which performs periodic
96 // stack checks every N loop bodies (where N is some arbitrary heuristic
97 // selected to allow short loop counts to run with few interruptions).
99
100 Factory* factory = isolate->factory();
101 icu::BreakIterator* icu_break_iterator =
102 segment_iterator->icu_break_iterator()->raw();
103 // 5. Let startIndex be iterator.[[IteratedStringNextSegmentCodeUnitIndex]].
104 int32_t start_index = icu_break_iterator->current();
105 // 6. Let endIndex be ! FindBoundary(segmenter, string, startIndex, after).
106 int32_t end_index = icu_break_iterator->next();
107
108 // 7. If endIndex is not finite, then
109 if (end_index == icu::BreakIterator::DONE) {
110 // a. Return ! CreateIterResultObject(undefined, true).
111 return factory->NewJSIteratorResult(isolate->factory()->undefined_value(),
112 true);
113 }
114
115 // 8. Set iterator.[[IteratedStringNextSegmentCodeUnitIndex]] to endIndex.
116
117 // 9. Let segmentData be ! CreateSegmentDataObject(segmenter, string,
118 // startIndex, endIndex).
119
121 if (segment_iterator->granularity() == JSSegmenter::Granularity::GRAPHEME &&
122 start_index == end_index - 1) {
123 // Fast path: use cached segment string and skip avoidable handle creations.
124 DirectHandle<String> segment;
125 uint16_t code = segment_iterator->raw_string()->Get(start_index);
126 if (code > unibrow::Latin1::kMaxChar) {
127 segment = factory->LookupSingleCharacterStringFromCode(code);
128 }
130 if (!Smi::IsValid(start_index)) index = factory->NewHeapNumber(start_index);
132 isolate->native_context()->intl_segment_data_object_map(), isolate);
133 segment_data = Cast<JSSegmentDataObject>(factory->NewJSObjectFromMap(map));
134 Tagged<JSSegmentDataObject> raw = *segment_data;
136 // We can skip write barriers because {segment_data} is the last object
137 // that was allocated.
138 raw->set_segment(
140 ? Cast<String>(factory->single_character_string_table()->get(code))
141 : *segment,
143 raw->set_index(
144 Smi::IsValid(start_index) ? Smi::FromInt(start_index) : *index,
146 raw->set_input(segment_iterator->raw_string(), SKIP_WRITE_BARRIER);
147 } else {
149 isolate, segment_data,
151 isolate, segment_iterator->granularity(), icu_break_iterator,
152 direct_handle(segment_iterator->raw_string(), isolate),
153 *segment_iterator->unicode_string()->raw(), start_index,
154 end_index));
155 }
156
157 // 10. Return ! CreateIterResultObject(segmentData, false).
158 return factory->NewJSIteratorResult(segment_data, false);
159}
160
161} // namespace internal
162} // namespace v8
static const uint16_t kMaxChar
Definition unicode.h:142
Handle< HeapNumber > NewHeapNumber(double value)
Handle< String > LookupSingleCharacterStringFromCode(uint16_t code)
Handle< JSObject > NewJSObjectFromMap(DirectHandle< Map > map, AllocationType allocation=AllocationType::kYoung, DirectHandle< AllocationSite > allocation_site=DirectHandle< AllocationSite >::null(), NewJSObjectType=NewJSObjectType::kNoAPIWrapper)
Definition factory.cc:3135
DirectHandle< JSIteratorResult > NewJSIteratorResult(DirectHandle< Object > value, bool done)
Definition factory.cc:3569
Handle< String > GranularityAsString(Isolate *isolate) const
static V8_WARN_UNUSED_RESULT MaybeDirectHandle< JSReceiver > Next(Isolate *isolate, DirectHandle< JSSegmentIterator > segment_iterator_holder)
JSSegmenter::Granularity granularity() const
static V8_WARN_UNUSED_RESULT MaybeDirectHandle< JSSegmentIterator > Create(Isolate *isolate, DirectHandle< String > input_string, icu::BreakIterator *icu_break_iterator, JSSegmenter::Granularity granularity)
static Handle< String > GetGranularityString(Isolate *isolate, Granularity granularity)
static V8_WARN_UNUSED_RESULT MaybeDirectHandle< JSSegmentDataObject > CreateSegmentDataObject(Isolate *isolate, JSSegmenter::Granularity granularity, icu::BreakIterator *break_iterator, DirectHandle< String > input_string, const icu::UnicodeString &unicode_string, int32_t start_index, int32_t end_index)
static DirectHandle< Managed< CppType > > From(Isolate *isolate, size_t estimated_size, std::shared_ptr< CppType > shared_ptr, AllocationType allocation_type=AllocationType::kYoung)
Definition managed-inl.h:27
static constexpr Tagged< Smi > FromInt(int value)
Definition smi.h:38
static bool constexpr IsValid(T value)
Definition smi.h:67
#define ASSIGN_RETURN_ON_EXCEPTION(isolate, dst, call)
Definition isolate.h:291
#define STACK_CHECK(isolate, result_value)
Definition isolate.h:3067
std::map< const std::string, const std::string > map
ZoneVector< RpoNumber > & result
@ SKIP_WRITE_BARRIER
Definition objects.h:52
V8_INLINE DirectHandle< T > direct_handle(Tagged< T > object, Isolate *isolate)
Tagged< To > Cast(Tagged< From > value, const v8::SourceLocation &loc=INIT_SOURCE_LOCATION_IN_DEBUG)
Definition casting.h:150
#define DCHECK_NOT_NULL(val)
Definition logging.h:492