5#ifndef V8_STRINGS_STRING_SEARCH_H_
6#define V8_STRINGS_STRING_SEARCH_H_
56template <
typename PatternChar,
typename SubjectChar>
63 if (
sizeof(PatternChar) >
sizeof(SubjectChar)) {
71 if (pattern_length == 1) {
86 if (
sizeof(PatternChar) == 1) {
136 SubjectChar char_code) {
137 if (
sizeof(SubjectChar) == 1) {
138 return bad_char_occurrence[
static_cast<int>(char_code)];
140 if (
sizeof(PatternChar) == 1) {
144 return bad_char_occurrence[
static_cast<unsigned int>(char_code)];
149 return bad_char_occurrence[equiv_class];
185template <
typename T,
typename U>
187 return reinterpret_cast<T
>(
188 (
reinterpret_cast<uintptr_t
>(
value) & ~(alignment - 1)));
192 return std::max(
static_cast<uint8_t
>(
character & 0xFF),
198template <
typename PatternChar,
typename SubjectChar>
202 const PatternChar pattern_first_char =
pattern[0];
205 if (
sizeof(SubjectChar) == 2 && pattern_first_char == 0) {
209 for (
int i = index;
i < max_n; ++
i) {
210 if (subject[
i] == 0)
return i;
215 const SubjectChar search_char =
static_cast<SubjectChar
>(pattern_first_char);
219 const SubjectChar* char_pos =
reinterpret_cast<const SubjectChar*
>(
220 memchr(subject.
begin() +
pos, search_byte,
221 (max_n -
pos) *
sizeof(SubjectChar)));
222 if (char_pos ==
nullptr)
return -1;
223 char_pos =
AlignDown(char_pos,
sizeof(SubjectChar));
224 pos =
static_cast<int>(char_pos - subject.
begin());
225 if (subject[
pos] == search_char)
return pos;
226 }
while (++
pos < max_n);
235template <
typename PatternChar,
typename SubjectChar>
240 PatternChar pattern_first_char = search->
pattern_[0];
241 if (
sizeof(PatternChar) >
sizeof(SubjectChar)) {
242 if (exceedsOneByte(pattern_first_char)) {
253template <
typename PatternChar,
typename SubjectChar>
263 }
while (
pos < length);
268template <
typename PatternChar,
typename SubjectChar>
274 int pattern_length =
pattern.length();
276 int n = subject.
length() - pattern_length;
279 if (
i == -1)
return -1;
285 pattern_length - 1)) {
296template <
typename PatternChar,
typename SubjectChar>
301 int subject_length = subject.
length();
302 int pattern_length =
pattern.length();
309 PatternChar last_char =
pattern[pattern_length - 1];
310 int index = start_index;
312 while (index <= subject_length - pattern_length) {
313 int j = pattern_length - 1;
315 while (last_char != (c = subject[index + j])) {
316 int shift = j - CharOccurrence(bad_char_occurence, c);
318 if (index > subject_length - pattern_length) {
322 while (j >= 0 &&
pattern[j] == (c = subject[index + j])) j--;
325 }
else if (j <
start) {
328 index += pattern_length - 1 -
329 CharOccurrence(bad_char_occurence,
330 static_cast<SubjectChar
>(last_char));
332 int gs_shift = good_suffix_shift[j + 1];
333 int bc_occ = CharOccurrence(bad_char_occurence, c);
334 int shift = j - bc_occ;
335 if (gs_shift > shift) {
345template <
typename PatternChar,
typename SubjectChar>
347 int pattern_length = pattern_.length();
348 const PatternChar*
pattern = pattern_.begin();
352 int length = pattern_length -
start;
356 int* shift_table = good_suffix_shift_table();
357 int* suffix_table = this->suffix_table();
360 for (
int i =
start;
i < pattern_length;
i++) {
363 shift_table[pattern_length] = 1;
364 suffix_table[pattern_length] = pattern_length + 1;
366 if (pattern_length <=
start) {
371 PatternChar last_char =
pattern[pattern_length - 1];
372 int suffix = pattern_length + 1;
374 int i = pattern_length;
377 while (suffix <= pattern_length && c !=
pattern[suffix - 1]) {
378 if (shift_table[suffix] == length) {
379 shift_table[suffix] = suffix -
i;
381 suffix = suffix_table[suffix];
383 suffix_table[--
i] = --suffix;
384 if (suffix == pattern_length) {
387 if (shift_table[pattern_length] == length) {
388 shift_table[pattern_length] = pattern_length -
i;
390 suffix_table[--
i] = pattern_length;
393 suffix_table[--
i] = --suffix;
399 if (suffix < pattern_length) {
400 for (
int i =
start;
i <= pattern_length;
i++) {
401 if (shift_table[
i] == length) {
402 shift_table[
i] = suffix -
start;
405 suffix = suffix_table[suffix];
415template <
typename PatternChar,
typename SubjectChar>
420 int subject_length = subject.
length();
421 int pattern_length =
pattern.length();
423 int badness = -pattern_length;
426 PatternChar last_char =
pattern[pattern_length - 1];
427 int last_char_shift =
429 CharOccurrence(char_occurrences,
static_cast<SubjectChar
>(last_char));
431 int index = start_index;
432 while (index <= subject_length - pattern_length) {
433 int j = pattern_length - 1;
435 while (last_char != (subject_char = subject[index + j])) {
436 int bc_occ = CharOccurrence(char_occurrences, subject_char);
437 int shift = j - bc_occ;
439 badness += 1 - shift;
440 if (index > subject_length - pattern_length) {
445 while (j >= 0 &&
pattern[j] == (subject[index + j])) j--;
449 index += last_char_shift;
454 badness += (pattern_length - j) - last_char_shift;
458 return BoyerMooreSearch(search, subject, index);
465template <
typename PatternChar,
typename SubjectChar>
467 int pattern_length = pattern_.length();
469 int* bad_char_occurrence = bad_char_table();
476 int table_size = AlphabetSize();
478 memset(bad_char_occurrence, -1, table_size *
sizeof(*bad_char_occurrence));
480 for (
int i = 0;
i < table_size;
i++) {
481 bad_char_occurrence[
i] =
start - 1;
484 for (
int i =
start;
i < pattern_length - 1;
i++) {
485 PatternChar c = pattern_[
i];
486 int bucket = (
sizeof(PatternChar) == 1) ? c : c % AlphabetSize();
487 bad_char_occurrence[bucket] =
i;
497template <
typename PatternChar,
typename SubjectChar>
502 int pattern_length =
pattern.length();
506 int badness = -10 - (pattern_length << 2);
510 for (
int i = index, n = subject.
length() - pattern_length;
i <= n;
i++) {
514 if (
i == -1)
return -1;
522 }
while (j < pattern_length);
523 if (j == pattern_length) {
529 search->
strategy_ = &BoyerMooreHorspoolSearch;
530 return BoyerMooreHorspoolSearch(search, subject,
i);
540template <
typename SubjectChar,
typename PatternChar>
544 return search.
Search(subject, start_index);
550template <
typename SubjectChar,
typename PatternChar>
552 int subject_length,
const PatternChar* pattern_ptr,
553 int pattern_length,
int start_index) {
constexpr T * begin() const
static const int kUC16AlphabetSize
static const int kBMMaxShift
static const int kLatin1AlphabetSize
static bool IsOneByteString(base::Vector< const uint8_t > string)
static const int kUC16AlphabetSize
static const int kBMMinPatternLength
static const int kBMMaxShift
static bool IsOneByteString(base::Vector< const base::uc16 > string)
static int BoyerMooreSearch(StringSearch< PatternChar, SubjectChar > *search, base::Vector< const SubjectChar > subject, int start_index)
int(*)(StringSearch< PatternChar, SubjectChar > *, base::Vector< const SubjectChar >, int) SearchFunction
static int LinearSearch(StringSearch< PatternChar, SubjectChar > *search, base::Vector< const SubjectChar > subject, int start_index)
static int InitialSearch(StringSearch< PatternChar, SubjectChar > *search, base::Vector< const SubjectChar > subject, int start_index)
void PopulateBoyerMooreTable()
static int AlphabetSize()
static bool exceedsOneByte(uint16_t c)
static int SingleCharSearch(StringSearch< PatternChar, SubjectChar > *search, base::Vector< const SubjectChar > subject, int start_index)
base::Vector< const PatternChar > pattern_
int * good_suffix_shift_table()
int Search(base::Vector< const SubjectChar > subject, int index)
static bool exceedsOneByte(uint8_t c)
StringSearch(Isolate *isolate, base::Vector< const PatternChar > pattern)
static int FailSearch(StringSearch< PatternChar, SubjectChar > *, base::Vector< const SubjectChar >, int)
void PopulateBoyerMooreHorspoolTable()
static int BoyerMooreHorspoolSearch(StringSearch< PatternChar, SubjectChar > *search, base::Vector< const SubjectChar > subject, int start_index)
static int CharOccurrence(int *bad_char_occurrence, SubjectChar char_code)
static const uint32_t kMaxOneByteCharCodeU
bool CharCompare(const PatternChar *pattern, const SubjectChar *subject, int length)
BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL BUILTIN_FP_CALL int character
int FindFirstCharacter(base::Vector< const PatternChar > pattern, base::Vector< const SubjectChar > subject, int index)
T AlignDown(T value, U alignment)
int SearchString(Isolate *isolate, base::Vector< const SubjectChar > subject, base::Vector< const PatternChar > pattern, int start_index)
uint8_t GetHighestValueByte(base::uc16 character)
intptr_t SearchStringRaw(Isolate *isolate, const SubjectChar *subject_ptr, int subject_length, const PatternChar *pattern_ptr, int pattern_length, int start_index)
#define DCHECK_LE(v1, v2)
#define DCHECK_GE(v1, v2)
#define DCHECK_EQ(v1, v2)
#define DCHECK_GT(v1, v2)