v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
string-case.cc
Go to the documentation of this file.
1// Copyright 2016 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
6
7#include "src/base/logging.h"
10#include "src/strings/unicode.h"
11#include "src/utils/utils.h"
12
13namespace v8 {
14namespace internal {
15
16// FastAsciiConvert tries to do character processing on a word_t basis if
17// source and destination strings are properly aligned. Natural alignment of
18// string data depends on kTaggedSize so we define word_t via Tagged_t.
19using word_t = std::make_unsigned_t<Tagged_t>;
20
21constexpr word_t kWordTAllBitsSet = std::numeric_limits<word_t>::max();
24
25#ifdef DEBUG
26template <class CaseMapping>
27void CheckFastAsciiConvert(char* dst, const char* src, uint32_t length) {
28 constexpr bool is_to_lower = CaseMapping::kIsToLower;
29 constexpr char lo = is_to_lower ? 'A' : 'a';
30 constexpr char hi = is_to_lower ? 'Z' : 'z';
31 constexpr int diff = is_to_lower ? ('a' - 'A') : ('A' - 'a');
32 for (uint32_t i = 0; i < length; i++) {
33 if (lo <= src[i] && src[i] <= hi) {
34 DCHECK_EQ(dst[i], src[i] + diff);
35 } else {
36 DCHECK_EQ(src[i], dst[i]);
37 }
38 }
39}
40#endif
41
42// Given a word and two range boundaries returns a word with high bit
43// set in every byte iff the corresponding input byte was strictly in
44// the range (m, n). All the other bits in the result are cleared.
45// This function is only useful when it can be inlined and the
46// boundaries are statically known.
47// Requires: all bytes in the input word and the boundaries must be
48// ASCII (less than 0x7F).
49template <char low, char high>
50static inline word_t AsciiRangeMask(word_t w) {
51 // Use strict inequalities since in edge cases the function could be
52 // further simplified.
53 DCHECK_LT(0, low);
54 DCHECK_LT(low, high);
55 // Has high bit set in every w byte less than n.
56 word_t tmp1 = kOneInEveryByte * (0x7F + high) - w;
57 // Has high bit set in every w byte greater than m.
58 word_t tmp2 = w + kOneInEveryByte * (0x7F - low);
59 return (tmp1 & tmp2 & (kOneInEveryByte * 0x80));
60}
61
62template <class CaseMapping>
63uint32_t FastAsciiCasePrefixLength(const char* src, uint32_t length) {
64 const char* saved_src = src;
66 // We rely on the distance between upper and lower case letters
67 // being a known power of 2.
68 DCHECK_EQ('a' - 'A', 1 << 5);
69 // Boundaries for the range of input characters that require conversion.
70 constexpr bool is_lower = CaseMapping::kIsToLower;
71 constexpr char lo = is_lower ? 'A' - 1 : 'a' - 1;
72 constexpr char hi = is_lower ? 'Z' + 1 : 'z' + 1;
73 const char* const limit = src + length;
74
75 // Only attempt processing one word at a time if src is also aligned.
76 if (IsAligned(reinterpret_cast<Address>(src), sizeof(word_t))) {
77 // Process the prefix of the input that requires no conversion one aligned
78 // (machine) word at a time.
79 while (src <= limit - sizeof(word_t)) {
80 const word_t w = *reinterpret_cast<const word_t*>(src);
81 if ((w & kAsciiMask) != 0) return static_cast<int>(src - saved_src);
82 if (AsciiRangeMask<lo, hi>(w) != 0) {
83 return static_cast<int>(src - saved_src);
84 }
85 src += sizeof(word_t);
86 }
87 }
88 // Process the last few bytes of the input (or the whole input if
89 // unaligned access is not supported).
90 for (; src < limit; ++src) {
91 char c = *src;
92 if ((c & kAsciiMask) != 0 || (lo < c && c < hi)) {
93 return static_cast<int>(src - saved_src);
94 }
95 }
96 return length;
97}
98
100 const char* src, uint32_t length);
102 const char* src, uint32_t length);
103
104template <class CaseMapping>
105uint32_t FastAsciiConvert(char* dst, const char* src, uint32_t length) {
106#ifdef DEBUG
107 char* saved_dst = dst;
108#endif
109 const char* saved_src = src;
111 // We rely on the distance between upper and lower case letters
112 // being a known power of 2.
113 DCHECK_EQ('a' - 'A', 1 << 5);
114 // Boundaries for the range of input characters that require conversion.
115 constexpr bool is_lower = CaseMapping::kIsToLower;
116 constexpr char lo = is_lower ? 'A' - 1 : 'a' - 1;
117 constexpr char hi = is_lower ? 'Z' + 1 : 'z' + 1;
118 const char* const limit = src + length;
119
120 // dst is newly allocated and always aligned.
121 // Only attempt processing one word at a time if src and dst are aligned.
122
123 if (IsAligned(reinterpret_cast<Address>(dst), sizeof(word_t)) &&
124 IsAligned(reinterpret_cast<Address>(src), sizeof(word_t))) {
125 // Process the prefix of the input that requires no conversion one aligned
126 // (machine) word at a time.
127 while (src <= limit - sizeof(word_t)) {
128 const word_t w = *reinterpret_cast<const word_t*>(src);
129 if ((w & kAsciiMask) != 0) return static_cast<int>(src - saved_src);
130 if (AsciiRangeMask<lo, hi>(w) != 0) break;
131 *reinterpret_cast<word_t*>(dst) = w;
132 src += sizeof(word_t);
133 dst += sizeof(word_t);
134 }
135 // Process the remainder of the input performing conversion when
136 // required one word at a time.
137 while (src <= limit - sizeof(word_t)) {
138 const word_t w = *reinterpret_cast<const word_t*>(src);
139 if ((w & kAsciiMask) != 0) return static_cast<int>(src - saved_src);
141 // The mask has high (7th) bit set in every byte that needs
142 // conversion and we know that the distance between cases is
143 // 1 << 5.
144 *reinterpret_cast<word_t*>(dst) = w ^ (m >> 2);
145 src += sizeof(word_t);
146 dst += sizeof(word_t);
147 }
148 }
149 // Process the last few bytes of the input (or the whole input if
150 // unaligned access is not supported).
151 while (src < limit) {
152 char c = *src;
153 if ((c & kAsciiMask) != 0) return static_cast<int>(src - saved_src);
154 if (lo < c && c < hi) {
155 c ^= (1 << 5);
156 }
157 *dst = c;
158 ++src;
159 ++dst;
160 }
161
162#ifdef DEBUG
163 CheckFastAsciiConvert<CaseMapping>(saved_dst, saved_src, length);
164#endif
165
166 return length;
167}
168
169template uint32_t FastAsciiConvert<unibrow::ToLowercase>(char* dst,
170 const char* src,
171 uint32_t length);
172template uint32_t FastAsciiConvert<unibrow::ToUppercase>(char* dst,
173 const char* src,
174 uint32_t length);
175
176} // namespace internal
177} // namespace v8
int m
Definition mul-fft.cc:294
template uint32_t FastAsciiConvert< unibrow::ToLowercase >(char *dst, const char *src, uint32_t length)
constexpr word_t kAsciiMask
template uint32_t FastAsciiCasePrefixLength< unibrow::ToUppercase >(const char *src, uint32_t length)
template uint32_t FastAsciiConvert< unibrow::ToUppercase >(char *dst, const char *src, uint32_t length)
uint32_t FastAsciiConvert(char *dst, const char *src, uint32_t length)
std::make_unsigned_t< Tagged_t > word_t
constexpr word_t kOneInEveryByte
uint32_t FastAsciiCasePrefixLength(const char *src, uint32_t length)
static word_t AsciiRangeMask(word_t w)
template uint32_t FastAsciiCasePrefixLength< unibrow::ToLowercase >(const char *src, uint32_t length)
constexpr word_t kWordTAllBitsSet
#define DCHECK_LT(v1, v2)
Definition logging.h:489
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
constexpr bool IsAligned(T value, U alignment)
Definition macros.h:403