v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
js-segments.cc
Go to the documentation of this file.
1// Copyright 2020 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_INTL_SUPPORT
6#error Internationalization is expected to be enabled.
7#endif // V8_INTL_SUPPORT
8
10
11#include <map>
12#include <memory>
13#include <string>
14
16#include "src/heap/factory.h"
23#include "unicode/brkiter.h"
24
25namespace v8 {
26namespace internal {
27
28// ecma402 #sec-createsegmentsobject
30 Isolate* isolate, DirectHandle<JSSegmenter> segmenter,
31 DirectHandle<String> string) {
32 std::shared_ptr<icu::BreakIterator> break_iterator{
33 segmenter->icu_break_iterator()->raw()->clone()};
34 DCHECK_NOT_NULL(break_iterator);
35
37 Intl::SetTextToBreakIterator(isolate, string, break_iterator.get());
38 DirectHandle<Managed<icu::BreakIterator>> managed_break_iterator =
39 Managed<icu::BreakIterator>::From(isolate, 0, std::move(break_iterator));
40
41 // 1. Let internalSlotsList be « [[SegmentsSegmenter]], [[SegmentsString]] ».
42 // 2. Let segments be ! ObjectCreate(%Segments.prototype%, internalSlotsList).
43 DirectHandle<Map> map(isolate->native_context()->intl_segments_map(),
44 isolate);
45 DirectHandle<JSObject> result = isolate->factory()->NewJSObjectFromMap(map);
46
48 segments->set_flags(0);
49
50 // 3. Set segments.[[SegmentsSegmenter]] to segmenter.
51 segments->set_icu_break_iterator(*managed_break_iterator);
52 segments->set_granularity(segmenter->granularity());
53
54 // 4. Set segments.[[SegmentsString]] to string.
55 segments->set_raw_string(*string);
56 segments->set_unicode_string(*unicode_string);
57
58 // 5. Return segments.
59 return segments;
60}
61
62// ecma402 #sec-%segmentsprototype%.containing
64 Isolate* isolate, DirectHandle<JSSegments> segments, double n_double) {
65 // 5. Let len be the length of string.
66 int32_t len = segments->unicode_string()->raw()->length();
67
68 // 7. If n < 0 or n ≥ len, return undefined.
69 if (n_double < 0 || n_double >= len) {
70 return isolate->factory()->undefined_value();
71 }
72
73 int32_t n = static_cast<int32_t>(n_double);
74 // n may point to the surrogate tail- adjust it back to the lead.
75 n = segments->unicode_string()->raw()->getChar32Start(n);
76
77 icu::BreakIterator* break_iterator = segments->icu_break_iterator()->raw();
78 // 8. Let startIndex be ! FindBoundary(segmenter, string, n, before).
79 int32_t start_index =
80 break_iterator->isBoundary(n) ? n : break_iterator->preceding(n);
81
82 // 9. Let endIndex be ! FindBoundary(segmenter, string, n, after).
83 int32_t end_index = break_iterator->following(n);
84
85 // 10. Return ! CreateSegmentDataObject(segmenter, string, startIndex,
86 // endIndex).
88 isolate, segments->granularity(), break_iterator,
89 direct_handle(segments->raw_string(), isolate),
90 *(segments->unicode_string()->raw()), start_index, end_index);
91}
92
93namespace {
94
95bool CurrentSegmentIsWordLike(icu::BreakIterator* break_iterator) {
96 int32_t rule_status = break_iterator->getRuleStatus();
97 return (rule_status >= UBRK_WORD_NUMBER &&
98 rule_status < UBRK_WORD_NUMBER_LIMIT) ||
99 (rule_status >= UBRK_WORD_LETTER &&
100 rule_status < UBRK_WORD_LETTER_LIMIT) ||
101 (rule_status >= UBRK_WORD_KANA &&
102 rule_status < UBRK_WORD_KANA_LIMIT) ||
103 (rule_status >= UBRK_WORD_IDEO && rule_status < UBRK_WORD_IDEO_LIMIT);
104}
105
106} // namespace
107
108// ecma402 #sec-createsegmentdataobject
110 Isolate* isolate, JSSegmenter::Granularity granularity,
111 icu::BreakIterator* break_iterator, DirectHandle<String> input_string,
112 const icu::UnicodeString& unicode_string, int32_t start_index,
113 int32_t end_index) {
114 Factory* factory = isolate->factory();
115
116 // 1. Let len be the length of string.
117 // 2. Assert: startIndex ≥ 0.
118 DCHECK_GE(start_index, 0);
119 // 3. Assert: endIndex ≤ len.
120 DCHECK_LE(end_index, unicode_string.length());
121 // 4. Assert: startIndex < endIndex.
122 DCHECK_LT(start_index, end_index);
123
124 // 5. Let result be ! ObjectCreate(%ObjectPrototype%).
127 ? isolate->native_context()->intl_segment_data_object_wordlike_map()
128 : isolate->native_context()->intl_segment_data_object_map(),
129 isolate);
132
133 // 6. Let segment be the String value equal to the substring of string
134 // consisting of the code units at indices startIndex (inclusive) through
135 // endIndex (exclusive).
136 DirectHandle<String> segment;
138 isolate, segment,
139 Intl::ToString(isolate, unicode_string, start_index, end_index));
140 DirectHandle<Number> index = factory->NewNumberFromInt(start_index);
141
142 // 7. Perform ! CreateDataPropertyOrThrow(result, "segment", segment).
145 raw->set_segment(*segment);
146 // 8. Perform ! CreateDataPropertyOrThrow(result, "index", startIndex).
147 raw->set_index(*index);
148 // 9. Perform ! CreateDataPropertyOrThrow(result, "input", string).
149 raw->set_input(*input_string);
150
151 // 10. Let granularity be segmenter.[[SegmenterGranularity]].
152 // 11. If granularity is "word", then
154 // a. Let isWordLike be a Boolean value indicating whether the segment in
155 // string is "word-like" according to locale segmenter.[[Locale]].
156 DirectHandle<Boolean> is_word_like =
157 factory->ToBoolean(CurrentSegmentIsWordLike(break_iterator));
158 // b. Perform ! CreateDataPropertyOrThrow(result, "isWordLike", isWordLike).
159 Cast<JSSegmentDataObjectWithIsWordLike>(raw)->set_is_word_like(
160 *is_word_like);
161 }
162 return result;
163}
164
168
169} // namespace internal
170} // namespace v8
Handle< Boolean > ToBoolean(bool value)
Handle< Number > NewNumberFromInt(int32_t value)
Handle< JSObject > NewJSObjectFromMap(DirectHandle< Map > map, AllocationType allocation=AllocationType::kYoung, DirectHandle< AllocationSite > allocation_site=DirectHandle< AllocationSite >::null(), NewJSObjectType=NewJSObjectType::kNoAPIWrapper)
Definition factory.cc:3135
static V8_WARN_UNUSED_RESULT MaybeHandle< String > ToString(Isolate *isolate, const icu::UnicodeString &string)
static DirectHandle< Managed< icu::UnicodeString > > SetTextToBreakIterator(Isolate *isolate, DirectHandle< String > text, icu::BreakIterator *break_iterator)
static Handle< String > GetGranularityString(Isolate *isolate, Granularity granularity)
JSSegmenter::Granularity granularity() const
static V8_WARN_UNUSED_RESULT MaybeDirectHandle< JSSegmentDataObject > CreateSegmentDataObject(Isolate *isolate, JSSegmenter::Granularity granularity, icu::BreakIterator *break_iterator, DirectHandle< String > input_string, const icu::UnicodeString &unicode_string, int32_t start_index, int32_t end_index)
static V8_WARN_UNUSED_RESULT MaybeDirectHandle< Object > Containing(Isolate *isolate, DirectHandle< JSSegments > segments_holder, double n)
Handle< String > GranularityAsString(Isolate *isolate) const
static V8_WARN_UNUSED_RESULT MaybeDirectHandle< JSSegments > Create(Isolate *isolate, DirectHandle< JSSegmenter > segmenter, DirectHandle< String > string)
static DirectHandle< Managed< CppType > > From(Isolate *isolate, size_t estimated_size, std::shared_ptr< CppType > shared_ptr, AllocationType allocation_type=AllocationType::kYoung)
Definition managed-inl.h:27
#define ASSIGN_RETURN_ON_EXCEPTION(isolate, dst, call)
Definition isolate.h:291
std::map< const std::string, const std::string > map
ZoneVector< RpoNumber > & result
int n
Definition mul-fft.cc:296
V8_INLINE DirectHandle< T > direct_handle(Tagged< T > object, Isolate *isolate)
Tagged< To > Cast(Tagged< From > value, const v8::SourceLocation &loc=INIT_SOURCE_LOCATION_IN_DEBUG)
Definition casting.h:150
#define DCHECK_LE(v1, v2)
Definition logging.h:490
#define DCHECK_NOT_NULL(val)
Definition logging.h:492
#define DCHECK_GE(v1, v2)
Definition logging.h:488
#define DCHECK_LT(v1, v2)
Definition logging.h:489