v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
char-predicates-inl.h
Go to the documentation of this file.
1// Copyright 2011 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_STRINGS_CHAR_PREDICATES_INL_H_
6#define V8_STRINGS_CHAR_PREDICATES_INL_H_
7
9// Include the non-inl header before the rest of the headers.
10
11#include "src/base/bounds.h"
12#include "src/utils/utils.h"
13
14namespace v8 {
15namespace internal {
16
17// If c is in 'A'-'Z' or 'a'-'z', return its lower-case.
18// Else, return something outside of 'A'-'Z' and 'a'-'z'.
19// Note: it ignores LOCALE.
20inline constexpr int AsciiAlphaToLower(base::uc32 c) { return c | 0x20; }
21
22inline constexpr bool IsCarriageReturn(base::uc32 c) { return c == 0x000D; }
23
24inline constexpr bool IsLineFeed(base::uc32 c) { return c == 0x000A; }
25
26inline constexpr bool IsAsciiIdentifier(base::uc32 c) {
27 return IsAlphaNumeric(c) || c == '$' || c == '_';
28}
29
30inline constexpr bool IsAlphaNumeric(base::uc32 c) {
31 return base::IsInRange(AsciiAlphaToLower(c), 'a', 'z') || IsDecimalDigit(c);
32}
33
34inline constexpr bool IsDecimalDigit(base::uc32 c) {
35 // ECMA-262, 3rd, 7.8.3 (p 16)
36 return base::IsInRange(c, '0', '9');
37}
38
39inline constexpr bool IsHexDigit(base::uc32 c) {
40 // ECMA-262, 3rd, 7.6 (p 15)
41 return IsDecimalDigit(c) || base::IsInRange(AsciiAlphaToLower(c), 'a', 'f');
42}
43
44inline constexpr bool IsOctalDigit(base::uc32 c) {
45 // ECMA-262, 6th, 7.8.3
46 return base::IsInRange(c, '0', '7');
47}
48
49inline constexpr bool IsNonOctalDecimalDigit(base::uc32 c) {
50 return base::IsInRange(c, '8', '9');
51}
52
53inline constexpr bool IsBinaryDigit(base::uc32 c) {
54 // ECMA-262, 6th, 7.8.3
55 return c == '0' || c == '1';
56}
57
58inline constexpr bool IsAscii(base::uc32 c) { return !(c & ~0x7F); }
59
60inline constexpr bool IsAsciiLower(base::uc32 c) {
61 return base::IsInRange(c, 'a', 'z');
62}
63
64inline constexpr bool IsAsciiUpper(base::uc32 c) {
65 return base::IsInRange(c, 'A', 'Z');
66}
67
68inline constexpr base::uc32 ToAsciiUpper(base::uc32 c) {
69 return c & ~(IsAsciiLower(c) << 5);
70}
71
72inline constexpr base::uc32 ToAsciiLower(base::uc32 c) {
73 return c | (IsAsciiUpper(c) << 5);
74}
75
76inline constexpr bool IsRegExpWord(base::uc32 c) {
77 return IsAlphaNumeric(c) || c == '_';
78}
79
80// Constexpr cache table for character flags.
88
89// See http://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
90// ID_Start. Additionally includes '_' and '$'.
91constexpr bool IsOneByteIDStart(base::uc32 c) {
92 return c == 0x0024 || (c >= 0x0041 && c <= 0x005A) || c == 0x005F ||
93 (c >= 0x0061 && c <= 0x007A) || c == 0x00AA || c == 0x00B5 ||
94 c == 0x00BA || (c >= 0x00C0 && c <= 0x00D6) ||
95 (c >= 0x00D8 && c <= 0x00F6) || (c >= 0x00F8 && c <= 0x00FF);
96}
97
98// See http://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
99// ID_Continue. Additionally includes '_' and '$'.
101 return c == 0x0024 || (c >= 0x0030 && c <= 0x0039) || c == 0x005F ||
102 (c >= 0x0041 && c <= 0x005A) || (c >= 0x0061 && c <= 0x007A) ||
103 c == 0x00AA || c == 0x00B5 || c == 0x00B7 || c == 0x00BA ||
104 (c >= 0x00C0 && c <= 0x00D6) || (c >= 0x00D8 && c <= 0x00F6) ||
105 (c >= 0x00F8 && c <= 0x00FF);
106}
107
109 return c == '\t' || c == '\v' || c == '\f' || c == ' ' || c == u'\xa0';
110}
111
112constexpr uint8_t BuildOneByteCharFlags(base::uc32 c) {
113 uint8_t result = 0;
114 if (IsOneByteIDStart(c) || c == '\\') result |= kIsIdentifierStart;
115 if (IsOneByteIDContinue(c) || c == '\\') result |= kIsIdentifierPart;
116 if (IsOneByteWhitespace(c)) {
118 }
119 if (c == '\r' || c == '\n') {
121 }
122 // Add markers to identify 0x2028 and 0x2029.
123 if (c == static_cast<uint8_t>(0x2028) || c == static_cast<uint8_t>(0x2029)) {
125 }
126 return result;
127}
128const constexpr uint8_t kOneByteCharFlags[256] = {
129#define BUILD_CHAR_FLAGS(N) BuildOneByteCharFlags(N),
131#undef BUILD_CHAR_FLAGS
132#define BUILD_CHAR_FLAGS(N) BuildOneByteCharFlags(N + 128),
134#undef BUILD_CHAR_FLAGS
135};
136
138 if (!base::IsInRange(c, 0, 255)) return IsIdentifierStartSlow(c);
140 static_cast<bool>(kOneByteCharFlags[c] & kIsIdentifierStart));
142}
143
145 if (!base::IsInRange(c, 0, 255)) return IsIdentifierPartSlow(c);
147 static_cast<bool>(kOneByteCharFlags[c] & kIsIdentifierPart));
149}
150
152 if (!base::IsInRange(c, 0, 255)) return IsWhiteSpaceSlow(c);
154 static_cast<bool>(kOneByteCharFlags[c] & kIsWhiteSpace));
156}
157
165
167 if (kOneByteCharFlags[static_cast<uint8_t>(c)] & kMaybeLineEnd) {
168 if (c == '\n') return true;
169 if (c == '\r') return next != '\n';
170 return base::IsInRange(static_cast<unsigned int>(c), 0x2028u, 0x2029u);
171 }
172 return false;
173}
174
175} // namespace internal
176
177} // namespace v8
178
179#endif // V8_STRINGS_CHAR_PREDICATES_INL_H_
#define BUILD_CHAR_FLAGS(N)
ZoneVector< RpoNumber > & result
uint32_t uc32
Definition strings.h:19
constexpr bool IsInRange(T value, U lower_limit, U higher_limit)
Definition bounds.h:20
bool IsIdentifierStart(base::uc32 c)
constexpr bool IsHexDigit(base::uc32 c)
constexpr bool IsOneByteIDContinue(base::uc32 c)
bool IsLineTerminatorSequence(base::uc32 c, base::uc32 next)
constexpr bool IsAsciiIdentifier(base::uc32 c)
constexpr bool IsCarriageReturn(base::uc32 c)
constexpr bool IsAsciiLower(base::uc32 c)
constexpr bool IsAscii(base::uc32 c)
bool IsWhiteSpaceOrLineTerminator(base::uc32 c)
constexpr bool IsOctalDigit(base::uc32 c)
constexpr bool IsNonOctalDecimalDigit(base::uc32 c)
bool IsIdentifierPartSlow(base::uc32 c)
bool IsWhiteSpace(base::uc32 c)
constexpr bool IsOneByteIDStart(base::uc32 c)
constexpr uint8_t BuildOneByteCharFlags(base::uc32 c)
constexpr bool IsRegExpWord(base::uc32 c)
constexpr bool IsDecimalDigit(base::uc32 c)
bool IsWhiteSpaceOrLineTerminatorSlow(base::uc32 c)
constexpr base::uc32 ToAsciiUpper(base::uc32 c)
constexpr bool IsAsciiUpper(base::uc32 c)
constexpr bool IsBinaryDigit(base::uc32 c)
constexpr bool IsLineFeed(base::uc32 c)
const constexpr uint8_t kOneByteCharFlags[256]
constexpr int AsciiAlphaToLower(base::uc32 c)
constexpr bool IsAlphaNumeric(base::uc32 c)
bool IsIdentifierStartSlow(base::uc32 c)
bool IsWhiteSpaceSlow(base::uc32 c)
bool IsIdentifierPart(base::uc32 c)
constexpr bool IsOneByteWhitespace(base::uc32 c)
constexpr base::uc32 ToAsciiLower(base::uc32 c)
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
#define INT_0_TO_127_LIST(V)
Definition utils.h:625