v8
V8 is Google’s open source high-performance JavaScript and WebAssembly engine, written in C++.
Loading...
Searching...
No Matches
regexp-macro-assembler-x64.cc
Go to the documentation of this file.
1// Copyright 2012 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#if V8_TARGET_ARCH_X64
6
8
11#include "src/heap/factory.h"
12#include "src/logging/log.h"
16
17namespace v8 {
18namespace internal {
19
20/*
21 * This assembler uses the following register assignment convention
22 * - rdx : Currently loaded character(s) as Latin1 or UC16. Must be loaded
23 * using LoadCurrentCharacter before using any of the dispatch methods.
24 * Temporarily stores the index of capture start after a matching pass
25 * for a global regexp.
26 * - rdi : Current position in input, as negative offset from end of string.
27 * Please notice that this is the byte offset, not the character
28 * offset! Is always a 32-bit signed (negative) offset, but must be
29 * maintained sign-extended to 64 bits, since it is used as index.
30 * - rsi : End of input (points to byte after last character in input),
31 * so that rsi+rdi points to the current character.
32 * - rbp : Frame pointer. Used to access arguments, local variables and
33 * RegExp registers.
34 * - rsp : Points to tip of C stack.
35 * - rcx : Points to tip of backtrack stack. The backtrack stack contains
36 * only 32-bit values. Most are offsets from some base (e.g., character
37 * positions from end of string or code location from InstructionStream
38 * pointer).
39 * - r8 : InstructionStream object pointer. Used to convert between absolute
40 * and code-object-relative addresses.
41 *
42 * The registers rax, rbx, r9 and r11 are free to use for computations.
43 * If changed to use r12+, they should be saved as callee-save registers.
44 * The macro assembler special register r13 (kRootRegister) isn't special
45 * during execution of RegExp code (it doesn't hold the value assumed when
46 * creating JS code), so Root related macro operations can be used.
47 *
48 * xmm0 - xmm5 are free to use. On Windows, xmm6 - xmm15 are callee-saved and
49 * therefore need to be saved/restored.
50 *
51 * Each call to a C++ method should retain these registers.
52 *
53 * The stack will have the following content, in some order, indexable from the
54 * frame pointer (see, e.g., kDirectCallOffset):
55 * - Address regexp (address of the JSRegExp object; unused in native
56 * code, passed to match signature of interpreter)
57 * - Isolate* isolate (address of the current isolate)
58 * - direct_call (if 1, direct call from JavaScript code, if 0 call
59 * through the runtime system)
60 * - capture array size (may fit multiple sets of matches)
61 * - int* capture_array (int[num_saved_registers_], for output).
62 * - end of input (address of end of string)
63 * - start of input (address of first character in string)
64 * - start index (character index of start)
65 * - String input_string (input string)
66 * - return address
67 * - backup of callee save registers (rbx, possibly rsi and rdi).
68 * - success counter (only useful for global regexp to count matches)
69 * - Offset of location before start of input (effectively character
70 * string start - 1). Used to initialize capture registers to a
71 * non-position.
72 * - At start of string (if 1, we are starting at the start of the
73 * string, otherwise 0)
74 * - register 0 rbp[-n] (Only positions must be stored in the first
75 * - register 1 rbp[-n-8] num_saved_registers_ registers)
76 * - ...
77 *
78 * The first num_saved_registers_ registers are initialized to point to
79 * "character -1" in the string (i.e., char_size() bytes before the first
80 * character of the string). The remaining registers starts out uninitialized.
81 *
82 * The argument values must be provided by the calling code by calling the
83 * code's entry address cast to a function pointer with the following signature:
84 * int (*match)(String input_string,
85 * int start_index,
86 * Address start,
87 * Address end,
88 * int* capture_output_array,
89 * int num_capture_registers,
90 * bool direct_call = false,
91 * Isolate* isolate,
92 * Address regexp);
93 */
94
95#define __ ACCESS_MASM((&masm_))
96
98
100 Mode mode,
101 int registers_to_save)
102 : NativeRegExpMacroAssembler(isolate, zone),
103 masm_(isolate, CodeObjectRequired::kYes,
104 NewAssemblerBuffer(kRegExpCodeSize)),
105 no_root_array_scope_(&masm_),
106 code_relative_fixup_positions_(zone),
107 mode_(mode),
108 num_registers_(registers_to_save),
109 num_saved_registers_(registers_to_save),
110 entry_label_(),
111 start_label_(),
112 success_label_(),
113 backtrack_label_(),
114 exit_label_() {
115 DCHECK_EQ(0, registers_to_save % 2);
116 __ CodeEntry();
117 __ jmp(&entry_label_); // We'll write the entry code when we know more.
118 __ bind(&start_label_); // And then continue from here.
119}
120
121RegExpMacroAssemblerX64::~RegExpMacroAssemblerX64() {
122 // Unuse labels in case we throw away the assembler without calling GetCode.
123 entry_label_.Unuse();
124 start_label_.Unuse();
125 success_label_.Unuse();
126 backtrack_label_.Unuse();
127 exit_label_.Unuse();
128 check_preempt_label_.Unuse();
129 stack_overflow_label_.Unuse();
130 fallback_label_.Unuse();
131}
132
133int RegExpMacroAssemblerX64::stack_limit_slack_slot_count() {
134 return RegExpStack::kStackLimitSlackSlotCount;
135}
136
137void RegExpMacroAssemblerX64::AdvanceCurrentPosition(int by) {
138 if (by != 0) {
139 __ addq(rdi, Immediate(by * char_size()));
140 }
141}
142
143
144void RegExpMacroAssemblerX64::AdvanceRegister(int reg, int by) {
145 DCHECK_LE(0, reg);
146 DCHECK_GT(num_registers_, reg);
147 if (by != 0) {
148 __ addq(register_location(reg), Immediate(by));
149 }
150}
151
152
153void RegExpMacroAssemblerX64::Backtrack() {
154 CheckPreemption();
155 if (has_backtrack_limit()) {
156 Label next;
157 __ incq(Operand(rbp, kBacktrackCountOffset));
158 __ cmpq(Operand(rbp, kBacktrackCountOffset), Immediate(backtrack_limit()));
159 __ j(not_equal, &next);
160
161 // Backtrack limit exceeded.
162 if (can_fallback()) {
163 __ jmp(&fallback_label_);
164 } else {
165 // Can't fallback, so we treat it as a failed match.
166 Fail();
167 }
168
169 __ bind(&next);
170 }
171 // Pop InstructionStream offset from backtrack stack, add InstructionStream
172 // and jump to location.
173 Pop(rbx);
174 __ addq(rbx, code_object_pointer());
175
176 // TODO(sroettger): This jump needs an endbr64 instruction but the code is
177 // performance sensitive. Needs more thought how to do this in a fast way.
178 __ jmp(rbx, /*notrack=*/true);
179}
180
181
182void RegExpMacroAssemblerX64::Bind(Label* label) {
183 __ bind(label);
184}
185
186
187void RegExpMacroAssemblerX64::CheckCharacter(uint32_t c, Label* on_equal) {
188 __ cmpl(current_character(), Immediate(c));
189 BranchOrBacktrack(equal, on_equal);
190}
191
192void RegExpMacroAssemblerX64::CheckCharacterGT(base::uc16 limit,
193 Label* on_greater) {
194 __ cmpl(current_character(), Immediate(limit));
195 BranchOrBacktrack(greater, on_greater);
196}
197
198void RegExpMacroAssemblerX64::CheckAtStart(int cp_offset, Label* on_at_start) {
199 __ leaq(rax, Operand(rdi, -char_size() + cp_offset * char_size()));
200 __ cmpq(rax, Operand(rbp, kStringStartMinusOneOffset));
201 BranchOrBacktrack(equal, on_at_start);
202}
203
204void RegExpMacroAssemblerX64::CheckNotAtStart(int cp_offset,
205 Label* on_not_at_start) {
206 __ leaq(rax, Operand(rdi, -char_size() + cp_offset * char_size()));
207 __ cmpq(rax, Operand(rbp, kStringStartMinusOneOffset));
208 BranchOrBacktrack(not_equal, on_not_at_start);
209}
210
211void RegExpMacroAssemblerX64::CheckCharacterLT(base::uc16 limit,
212 Label* on_less) {
213 __ cmpl(current_character(), Immediate(limit));
214 BranchOrBacktrack(less, on_less);
215}
216
217void RegExpMacroAssemblerX64::CheckGreedyLoop(Label* on_equal) {
218 Label fallthrough;
219 __ cmpl(rdi, Operand(backtrack_stackpointer(), 0));
220 __ j(not_equal, &fallthrough);
221 Drop();
222 BranchOrBacktrack(on_equal);
223 __ bind(&fallthrough);
224}
225
226void RegExpMacroAssemblerX64::CallCFunctionFromIrregexpCode(
227 ExternalReference function, int num_arguments) {
228 // Irregexp code must not set fast_c_call_caller_fp and fast_c_call_caller_pc
229 // since
230 //
231 // 1. it may itself have been called using CallCFunction and nested calls are
232 // unsupported, and
233 // 2. it may itself have been called directly from C where the frame pointer
234 // might not be set (-fomit-frame-pointer), and thus frame iteration would
235 // fail.
236 //
237 // See also: crbug.com/v8/12670#c17.
238 __ CallCFunction(function, num_arguments, SetIsolateDataSlots::kNo);
239}
240
241// Push (pop) caller-saved registers used by irregexp.
242void RegExpMacroAssemblerX64::PushCallerSavedRegisters() {
243#ifndef V8_TARGET_OS_WIN
244 // Callee-save in Microsoft 64-bit ABI, but not in AMD64 ABI.
245 __ pushq(rsi);
246 __ pushq(rdi);
247#endif
248 __ pushq(rcx);
249}
250
251void RegExpMacroAssemblerX64::PopCallerSavedRegisters() {
252 __ popq(rcx);
253#ifndef V8_TARGET_OS_WIN
254 __ popq(rdi);
255 __ popq(rsi);
256#endif
257}
258
259void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
260 int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
261 Label fallthrough;
262 ReadPositionFromRegister(rdx, start_reg); // Offset of start of capture
263 ReadPositionFromRegister(rbx, start_reg + 1); // Offset of end of capture
264 __ subq(rbx, rdx); // Length of capture.
265
266 // -----------------------
267 // rdx = Start offset of capture.
268 // rbx = Length of capture
269
270 // At this point, the capture registers are either both set or both cleared.
271 // If the capture length is zero, then the capture is either empty or cleared.
272 // Fall through in both cases.
273 __ j(equal, &fallthrough);
274
275 // -----------------------
276 // rdx - Start of capture
277 // rbx - length of capture
278 // Check that there are sufficient characters left in the input.
279 if (read_backward) {
280 __ movl(rax, Operand(rbp, kStringStartMinusOneOffset));
281 __ addl(rax, rbx);
282 __ cmpl(rdi, rax);
283 BranchOrBacktrack(less_equal, on_no_match);
284 } else {
285 __ movl(rax, rdi);
286 __ addl(rax, rbx);
287 BranchOrBacktrack(greater, on_no_match);
288 }
289
290 if (mode_ == LATIN1) {
291 Label loop_increment;
292 if (on_no_match == nullptr) {
293 on_no_match = &backtrack_label_;
294 }
295
296 __ leaq(r9, Operand(rsi, rdx, times_1, 0));
297 __ leaq(r11, Operand(rsi, rdi, times_1, 0));
298 if (read_backward) {
299 __ subq(r11, rbx); // Offset by length when matching backwards.
300 }
301 __ addq(rbx, r9); // End of capture
302 // ---------------------
303 // r11 - current input character address
304 // r9 - current capture character address
305 // rbx - end of capture
306
307 Label loop;
308 __ bind(&loop);
309 __ movzxbl(rdx, Operand(r9, 0));
310 __ movzxbl(rax, Operand(r11, 0));
311 // al - input character
312 // dl - capture character
313 __ cmpb(rax, rdx);
314 __ j(equal, &loop_increment);
315
316 // Mismatch, try case-insensitive match (converting letters to lower-case).
317 // I.e., if or-ing with 0x20 makes values equal and in range 'a'-'z', it's
318 // a match.
319 __ orq(rax, Immediate(0x20)); // Convert match character to lower-case.
320 __ orq(rdx, Immediate(0x20)); // Convert capture character to lower-case.
321 __ cmpb(rax, rdx);
322 __ j(not_equal, on_no_match); // Definitely not equal.
323 __ subb(rax, Immediate('a'));
324 __ cmpb(rax, Immediate('z' - 'a'));
325 __ j(below_equal, &loop_increment); // In range 'a'-'z'.
326 // Latin-1: Check for values in range [224,254] but not 247.
327 __ subb(rax, Immediate(224 - 'a'));
328 __ cmpb(rax, Immediate(254 - 224));
329 __ j(above, on_no_match); // Weren't Latin-1 letters.
330 __ cmpb(rax, Immediate(247 - 224)); // Check for 247.
331 __ j(equal, on_no_match);
332 __ bind(&loop_increment);
333 // Increment pointers into match and capture strings.
334 __ addq(r11, Immediate(1));
335 __ addq(r9, Immediate(1));
336 // Compare to end of capture, and loop if not done.
337 __ cmpq(r9, rbx);
338 __ j(below, &loop);
339
340 // Compute new value of character position after the matched part.
341 __ movq(rdi, r11);
342 __ subq(rdi, rsi);
343 if (read_backward) {
344 // Subtract match length if we matched backward.
345 __ addq(rdi, register_location(start_reg));
346 __ subq(rdi, register_location(start_reg + 1));
347 }
348 } else {
349 DCHECK(mode_ == UC16);
350 PushCallerSavedRegisters();
351
352 static const int num_arguments = 4;
353 __ PrepareCallCFunction(num_arguments);
354
355 // Put arguments into parameter registers. Parameters are
356 // Address byte_offset1 - Address captured substring's start.
357 // Address byte_offset2 - Address of current character position.
358 // size_t byte_length - length of capture in bytes(!)
359 // Isolate* isolate.
360#ifdef V8_TARGET_OS_WIN
361 DCHECK(rcx == kCArgRegs[0]);
362 DCHECK(rdx == kCArgRegs[1]);
363 // Compute and set byte_offset1 (start of capture).
364 __ leaq(rcx, Operand(rsi, rdx, times_1, 0));
365 // Set byte_offset2.
366 __ leaq(rdx, Operand(rsi, rdi, times_1, 0));
367 if (read_backward) {
368 __ subq(rdx, rbx);
369 }
370#else // AMD64 calling convention
371 DCHECK(rdi == kCArgRegs[0]);
372 DCHECK(rsi == kCArgRegs[1]);
373 // Compute byte_offset2 (current position = rsi+rdi).
374 __ leaq(rax, Operand(rsi, rdi, times_1, 0));
375 // Compute and set byte_offset1 (start of capture).
376 __ leaq(rdi, Operand(rsi, rdx, times_1, 0));
377 // Set byte_offset2.
378 __ movq(rsi, rax);
379 if (read_backward) {
380 __ subq(rsi, rbx);
381 }
382#endif // V8_TARGET_OS_WIN
383
384 // Set byte_length.
385 __ movq(kCArgRegs[2], rbx);
386 // Isolate.
387 __ LoadAddress(kCArgRegs[3], ExternalReference::isolate_address(isolate()));
388
389 {
390 AllowExternalCallThatCantCauseGC scope(&masm_);
391 ExternalReference compare =
392 unicode
393 ? ExternalReference::re_case_insensitive_compare_unicode()
394 : ExternalReference::re_case_insensitive_compare_non_unicode();
395 CallCFunctionFromIrregexpCode(compare, num_arguments);
396 }
397
398 // Restore original values before reacting on result value.
399 __ Move(code_object_pointer(), masm_.CodeObject());
400 PopCallerSavedRegisters();
401
402 // Check if function returned non-zero for success or zero for failure.
403 __ testq(rax, rax);
404 BranchOrBacktrack(zero, on_no_match);
405 // On success, advance position by length of capture.
406 // Requires that rbx is callee save (true for both Win64 and AMD64 ABIs).
407 if (read_backward) {
408 __ subq(rdi, rbx);
409 } else {
410 __ addq(rdi, rbx);
411 }
412 }
413 __ bind(&fallthrough);
414}
415
416void RegExpMacroAssemblerX64::CheckNotBackReference(int start_reg,
417 bool read_backward,
418 Label* on_no_match) {
419 Label fallthrough;
420
421 // Find length of back-referenced capture.
422 ReadPositionFromRegister(rdx, start_reg); // Offset of start of capture
423 ReadPositionFromRegister(rax, start_reg + 1); // Offset of end of capture
424 __ subq(rax, rdx); // Length to check.
425
426 // At this point, the capture registers are either both set or both cleared.
427 // If the capture length is zero, then the capture is either empty or cleared.
428 // Fall through in both cases.
429 __ j(equal, &fallthrough);
430
431 // -----------------------
432 // rdx - Start of capture
433 // rax - length of capture
434 // Check that there are sufficient characters left in the input.
435 if (read_backward) {
436 __ movl(rbx, Operand(rbp, kStringStartMinusOneOffset));
437 __ addl(rbx, rax);
438 __ cmpl(rdi, rbx);
439 BranchOrBacktrack(less_equal, on_no_match);
440 } else {
441 __ movl(rbx, rdi);
442 __ addl(rbx, rax);
443 BranchOrBacktrack(greater, on_no_match);
444 }
445
446 // Compute pointers to match string and capture string
447 __ leaq(rbx, Operand(rsi, rdi, times_1, 0)); // Start of match.
448 if (read_backward) {
449 __ subq(rbx, rax); // Offset by length when matching backwards.
450 }
451 __ addq(rdx, rsi); // Start of capture.
452 __ leaq(r9, Operand(rdx, rax, times_1, 0)); // End of capture
453
454 // -----------------------
455 // rbx - current capture character address.
456 // rbx - current input character address .
457 // r9 - end of input to match (capture length after rbx).
458
459 Label loop;
460 __ bind(&loop);
461 if (mode_ == LATIN1) {
462 __ movzxbl(rax, Operand(rdx, 0));
463 __ cmpb(rax, Operand(rbx, 0));
464 } else {
465 DCHECK(mode_ == UC16);
466 __ movzxwl(rax, Operand(rdx, 0));
467 __ cmpw(rax, Operand(rbx, 0));
468 }
469 BranchOrBacktrack(not_equal, on_no_match);
470 // Increment pointers into capture and match string.
471 __ addq(rbx, Immediate(char_size()));
472 __ addq(rdx, Immediate(char_size()));
473 // Check if we have reached end of match area.
474 __ cmpq(rdx, r9);
475 __ j(below, &loop);
476
477 // Success.
478 // Set current character position to position after match.
479 __ movq(rdi, rbx);
480 __ subq(rdi, rsi);
481 if (read_backward) {
482 // Subtract match length if we matched backward.
483 __ addq(rdi, register_location(start_reg));
484 __ subq(rdi, register_location(start_reg + 1));
485 }
486
487 __ bind(&fallthrough);
488}
489
490
491void RegExpMacroAssemblerX64::CheckNotCharacter(uint32_t c,
492 Label* on_not_equal) {
493 __ cmpl(current_character(), Immediate(c));
494 BranchOrBacktrack(not_equal, on_not_equal);
495}
496
497
498void RegExpMacroAssemblerX64::CheckCharacterAfterAnd(uint32_t c,
499 uint32_t mask,
500 Label* on_equal) {
501 if (c == 0) {
502 __ testl(current_character(), Immediate(mask));
503 } else {
504 __ Move(rax, mask);
505 __ andq(rax, current_character());
506 __ cmpl(rax, Immediate(c));
507 }
508 BranchOrBacktrack(equal, on_equal);
509}
510
511
512void RegExpMacroAssemblerX64::CheckNotCharacterAfterAnd(uint32_t c,
513 uint32_t mask,
514 Label* on_not_equal) {
515 if (c == 0) {
516 __ testl(current_character(), Immediate(mask));
517 } else {
518 __ Move(rax, mask);
519 __ andq(rax, current_character());
520 __ cmpl(rax, Immediate(c));
521 }
522 BranchOrBacktrack(not_equal, on_not_equal);
523}
524
525void RegExpMacroAssemblerX64::CheckNotCharacterAfterMinusAnd(
526 base::uc16 c, base::uc16 minus, base::uc16 mask, Label* on_not_equal) {
527 DCHECK_GT(String::kMaxUtf16CodeUnit, minus);
528 __ leal(rax, Operand(current_character(), -minus));
529 __ andl(rax, Immediate(mask));
530 __ cmpl(rax, Immediate(c));
531 BranchOrBacktrack(not_equal, on_not_equal);
532}
533
534void RegExpMacroAssemblerX64::CheckCharacterInRange(base::uc16 from,
535 base::uc16 to,
536 Label* on_in_range) {
537 __ leal(rax, Operand(current_character(), -from));
538 __ cmpl(rax, Immediate(to - from));
539 BranchOrBacktrack(below_equal, on_in_range);
540}
541
542void RegExpMacroAssemblerX64::CheckCharacterNotInRange(base::uc16 from,
543 base::uc16 to,
544 Label* on_not_in_range) {
545 __ leal(rax, Operand(current_character(), -from));
546 __ cmpl(rax, Immediate(to - from));
547 BranchOrBacktrack(above, on_not_in_range);
548}
549
550void RegExpMacroAssemblerX64::CallIsCharacterInRangeArray(
551 const ZoneList<CharacterRange>* ranges) {
552 PushCallerSavedRegisters();
553
554 static const int kNumArguments = 2;
555 __ PrepareCallCFunction(kNumArguments);
556
557 __ Move(kCArgRegs[0], current_character());
558 __ Move(kCArgRegs[1], GetOrAddRangeArray(ranges));
559
560 {
561 // We have a frame (set up in GetCode), but the assembler doesn't know.
562 FrameScope scope(&masm_, StackFrame::MANUAL);
563 CallCFunctionFromIrregexpCode(
564 ExternalReference::re_is_character_in_range_array(), kNumArguments);
565 }
566
567 PopCallerSavedRegisters();
568 __ Move(code_object_pointer(), masm_.CodeObject());
569}
570
571bool RegExpMacroAssemblerX64::CheckCharacterInRangeArray(
572 const ZoneList<CharacterRange>* ranges, Label* on_in_range) {
573 CallIsCharacterInRangeArray(ranges);
574 __ testq(rax, rax);
575 BranchOrBacktrack(not_zero, on_in_range);
576 return true;
577}
578
579bool RegExpMacroAssemblerX64::CheckCharacterNotInRangeArray(
580 const ZoneList<CharacterRange>* ranges, Label* on_not_in_range) {
581 CallIsCharacterInRangeArray(ranges);
582 __ testq(rax, rax);
583 BranchOrBacktrack(zero, on_not_in_range);
584 return true;
585}
586
587void RegExpMacroAssemblerX64::CheckBitInTable(
588 Handle<ByteArray> table,
589 Label* on_bit_set) {
590 __ Move(rax, table);
591 Register index = current_character();
592 if (mode_ != LATIN1 || kTableMask != String::kMaxOneByteCharCode) {
593 __ movq(rbx, current_character());
594 __ andq(rbx, Immediate(kTableMask));
595 index = rbx;
596 }
597 __ cmpb(FieldOperand(rax, index, times_1, OFFSET_OF_DATA_START(ByteArray)),
598 Immediate(0));
599 BranchOrBacktrack(not_equal, on_bit_set);
600}
601
602void RegExpMacroAssemblerX64::SkipUntilBitInTable(
603 int cp_offset, Handle<ByteArray> table,
604 Handle<ByteArray> nibble_table_array, int advance_by) {
605 Label cont, scalar_repeat;
606
607 const bool use_simd = SkipUntilBitInTableUseSimd(advance_by);
608 if (use_simd) {
609 DCHECK(!nibble_table_array.is_null());
610 Label simd_repeat, found, scalar;
611 static constexpr int kVectorSize = 16;
612 const int kCharsPerVector = kVectorSize / char_size();
613
614 // Fallback to scalar version if there are less than kCharsPerVector chars
615 // left in the subject.
616 // We subtract 1 because CheckPosition assumes we are reading 1 character
617 // plus cp_offset. So the -1 is the the character that is assumed to be
618 // read by default.
619 CheckPosition(cp_offset + kCharsPerVector - 1, &scalar);
620
621 // Load table and mask constants.
622 // For a description of the table layout, check the comment on
623 // BoyerMooreLookahead::GetSkipTable in regexp-compiler.cc.
624 XMMRegister nibble_table = xmm0;
625 __ Move(r11, nibble_table_array);
626 __ Movdqu(nibble_table, FieldOperand(r11, OFFSET_OF_DATA_START(ByteArray)));
627 XMMRegister nibble_mask = xmm1;
628 __ Move(r11, 0x0f0f0f0f'0f0f0f0f);
629 __ movq(nibble_mask, r11);
630 __ Movddup(nibble_mask, nibble_mask);
631 XMMRegister hi_nibble_lookup_mask = xmm2;
632 __ Move(r11, 0x80402010'08040201);
633 __ movq(hi_nibble_lookup_mask, r11);
634 __ Movddup(hi_nibble_lookup_mask, hi_nibble_lookup_mask);
635
636 Bind(&simd_repeat);
637 // Load next characters into vector.
638 XMMRegister input_vec = xmm3;
639 __ Movdqu(input_vec, Operand(rsi, rdi, times_1, cp_offset));
640
641 // Extract low nibbles.
642 // lo_nibbles = input & 0x0f
643 XMMRegister lo_nibbles = xmm4;
644 if (CpuFeatures::IsSupported(AVX)) {
645 __ Andps(lo_nibbles, nibble_mask, input_vec);
646 } else {
647 __ Movdqa(lo_nibbles, nibble_mask);
648 __ Andps(lo_nibbles, lo_nibbles, input_vec);
649 }
650 // Extract high nibbles.
651 // hi_nibbles = (input >> 4) & 0x0f
652 __ Psrlw(input_vec, uint8_t{4});
653 XMMRegister hi_nibbles = ReassignRegister(input_vec);
654 __ Andps(hi_nibbles, hi_nibbles, nibble_mask);
655
656 // Get rows of nibbles table based on low nibbles.
657 // row = nibble_table[lo_nibbles]
658 XMMRegister row = xmm5;
659 __ Pshufb(row, nibble_table, lo_nibbles);
660
661 // Check if high nibble is set in row.
662 // bitmask = 1 << (hi_nibbles & 0x7)
663 // = hi_nibbles_lookup_mask[hi_nibbles] & 0x7
664 // Note: The hi_nibbles & 0x7 part is implicitly executed, as pshufb sets
665 // the result byte to zero if bit 7 is set in the source byte.
666 XMMRegister bitmask = ReassignRegister(lo_nibbles);
667 __ Pshufb(bitmask, hi_nibble_lookup_mask, hi_nibbles);
668
669 // result = row & bitmask == bitmask
670 XMMRegister result = ReassignRegister(row);
671 __ Andps(result, result, bitmask);
672 __ Pcmpeqb(result, result, bitmask);
673
674 // Check if any bit is set.
675 // Copy the most significant bit of each result byte to r11.
676 __ Pmovmskb(r11, result);
677 __ testl(r11, r11);
678 __ j(not_zero, &found);
679
680 // The maximum lookahead for boyer moore is less than vector size, so we can
681 // ignore advance_by in the vectorized version.
682 AdvanceCurrentPosition(kCharsPerVector);
683 CheckPosition(cp_offset + kCharsPerVector - 1, &scalar);
684 __ jmp(&simd_repeat);
685
686 Bind(&found);
687 // Extract position.
688 __ bsfl(r11, r11);
689 if (mode_ == UC16) {
690 // Make sure that we skip an even number of bytes in 2-byte subjects.
691 // Odd skips can happen if the higher byte produced a match.
692 // False positives should be rare and are no problem in general, as the
693 // following instructions will check for an exact match.
694 __ andl(r11, Immediate(0xfffe));
695 }
696 __ addq(rdi, r11);
697 __ jmp(&cont);
698 Bind(&scalar);
699 }
700
701 // Scalar version.
702 Register table_reg = r9;
703 __ Move(table_reg, table);
704
705 Bind(&scalar_repeat);
706 CheckPosition(cp_offset, &cont);
707 LoadCurrentCharacterUnchecked(cp_offset, 1);
708 Register index = current_character();
709 if (mode_ != LATIN1 || kTableMask != String::kMaxOneByteCharCode) {
710 index = rbx;
711 __ movq(index, current_character());
712 __ andq(index, Immediate(kTableMask));
713 }
714 __ cmpb(
715 FieldOperand(table_reg, index, times_1, OFFSET_OF_DATA_START(ByteArray)),
716 Immediate(0));
717 __ j(not_equal, &cont);
718 AdvanceCurrentPosition(advance_by);
719 __ jmp(&scalar_repeat);
720
721 __ bind(&cont);
722}
723
724bool RegExpMacroAssemblerX64::SkipUntilBitInTableUseSimd(int advance_by) {
725 // To use the SIMD variant we require SSSE3 as there is no shuffle equivalent
726 // in older extensions.
727 // In addition we only use SIMD instead of the scalar version if we advance by
728 // 1 byte in each iteration. For higher values the scalar version performs
729 // better.
730 return v8_flags.regexp_simd && advance_by * char_size() == 1 &&
731 CpuFeatures::IsSupported(SSSE3);
732}
733
734bool RegExpMacroAssemblerX64::CheckSpecialClassRanges(StandardCharacterSet type,
735 Label* on_no_match) {
736 // Range checks (c in min..max) are generally implemented by an unsigned
737 // (c - min) <= (max - min) check, using the sequence:
738 // leal(rax, Operand(current_character(), -min)) or sub(rax, Immediate(min))
739 // cmpl(rax, Immediate(max - min))
740 // TODO(jgruber): No custom implementation (yet): s(UC16), S(UC16).
741 switch (type) {
742 case StandardCharacterSet::kWhitespace:
743 // Match space-characters.
744 if (mode_ == LATIN1) {
745 // One byte space characters are '\t'..'\r', ' ' and \u00a0.
746 Label success;
747 __ cmpl(current_character(), Immediate(' '));
748 __ j(equal, &success, Label::kNear);
749 // Check range 0x09..0x0D.
750 __ leal(rax, Operand(current_character(), -'\t'));
751 __ cmpl(rax, Immediate('\r' - '\t'));
752 __ j(below_equal, &success, Label::kNear);
753 // \u00a0 (NBSP).
754 __ cmpl(rax, Immediate(0x00A0 - '\t'));
755 BranchOrBacktrack(not_equal, on_no_match);
756 __ bind(&success);
757 return true;
758 }
759 return false;
760 case StandardCharacterSet::kNotWhitespace:
761 // The emitted code for generic character classes is good enough.
762 return false;
763 case StandardCharacterSet::kDigit:
764 // Match ASCII digits ('0'..'9').
765 __ leal(rax, Operand(current_character(), -'0'));
766 __ cmpl(rax, Immediate('9' - '0'));
767 BranchOrBacktrack(above, on_no_match);
768 return true;
769 case StandardCharacterSet::kNotDigit:
770 // Match non ASCII-digits.
771 __ leal(rax, Operand(current_character(), -'0'));
772 __ cmpl(rax, Immediate('9' - '0'));
773 BranchOrBacktrack(below_equal, on_no_match);
774 return true;
775 case StandardCharacterSet::kNotLineTerminator: {
776 // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
777 __ movl(rax, current_character());
778 __ xorl(rax, Immediate(0x01));
779 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
780 __ subl(rax, Immediate(0x0B));
781 __ cmpl(rax, Immediate(0x0C - 0x0B));
782 BranchOrBacktrack(below_equal, on_no_match);
783 if (mode_ == UC16) {
784 // Compare original value to 0x2028 and 0x2029, using the already
785 // computed (current_char ^ 0x01 - 0x0B). I.e., check for
786 // 0x201D (0x2028 - 0x0B) or 0x201E.
787 __ subl(rax, Immediate(0x2028 - 0x0B));
788 __ cmpl(rax, Immediate(0x2029 - 0x2028));
789 BranchOrBacktrack(below_equal, on_no_match);
790 }
791 return true;
792 }
793 case StandardCharacterSet::kLineTerminator: {
794 // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
795 __ movl(rax, current_character());
796 __ xorl(rax, Immediate(0x01));
797 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
798 __ subl(rax, Immediate(0x0B));
799 __ cmpl(rax, Immediate(0x0C - 0x0B));
800 if (mode_ == LATIN1) {
801 BranchOrBacktrack(above, on_no_match);
802 } else {
803 Label done;
804 BranchOrBacktrack(below_equal, &done);
805 // Compare original value to 0x2028 and 0x2029, using the already
806 // computed (current_char ^ 0x01 - 0x0B). I.e., check for
807 // 0x201D (0x2028 - 0x0B) or 0x201E.
808 __ subl(rax, Immediate(0x2028 - 0x0B));
809 __ cmpl(rax, Immediate(0x2029 - 0x2028));
810 BranchOrBacktrack(above, on_no_match);
811 __ bind(&done);
812 }
813 return true;
814 }
815 case StandardCharacterSet::kWord: {
816 if (mode_ != LATIN1) {
817 // Table is 256 entries, so all Latin1 characters can be tested.
818 __ cmpl(current_character(), Immediate('z'));
819 BranchOrBacktrack(above, on_no_match);
820 }
821 __ Move(rbx, ExternalReference::re_word_character_map());
822 DCHECK_EQ(0,
823 word_character_map[0]); // Character '\0' is not a word char.
824 __ testb(Operand(rbx, current_character(), times_1, 0),
825 current_character());
826 BranchOrBacktrack(zero, on_no_match);
827 return true;
828 }
829 case StandardCharacterSet::kNotWord: {
830 Label done;
831 if (mode_ != LATIN1) {
832 // Table is 256 entries, so all Latin1 characters can be tested.
833 __ cmpl(current_character(), Immediate('z'));
834 __ j(above, &done);
835 }
836 __ Move(rbx, ExternalReference::re_word_character_map());
837 DCHECK_EQ(0,
838 word_character_map[0]); // Character '\0' is not a word char.
839 __ testb(Operand(rbx, current_character(), times_1, 0),
840 current_character());
841 BranchOrBacktrack(not_zero, on_no_match);
842 if (mode_ != LATIN1) {
843 __ bind(&done);
844 }
845 return true;
846 }
847
848 case StandardCharacterSet::kEverything:
849 // Match any character.
850 return true;
851 }
852}
853
854void RegExpMacroAssemblerX64::BindJumpTarget(Label* label) {
855 Bind(label);
856 // TODO(sroettger): There should be an endbr64 instruction here, but it needs
857 // more thought how to avoid perf regressions.
858}
859
860void RegExpMacroAssemblerX64::Fail() {
861 static_assert(FAILURE == 0); // Return value for failure is zero.
862 if (!global()) {
863 __ Move(rax, FAILURE);
864 }
865 __ jmp(&exit_label_);
866}
867
868void RegExpMacroAssemblerX64::LoadRegExpStackPointerFromMemory(Register dst) {
869 ExternalReference ref =
870 ExternalReference::address_of_regexp_stack_stack_pointer(isolate());
871 __ movq(dst, __ ExternalReferenceAsOperand(ref, dst));
872}
873
874void RegExpMacroAssemblerX64::StoreRegExpStackPointerToMemory(
875 Register src, Register scratch) {
876 ExternalReference ref =
877 ExternalReference::address_of_regexp_stack_stack_pointer(isolate());
878 __ movq(__ ExternalReferenceAsOperand(ref, scratch), src);
879}
880
881void RegExpMacroAssemblerX64::PushRegExpBasePointer(Register stack_pointer,
882 Register scratch) {
883 ExternalReference ref =
884 ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
885 __ movq(scratch, __ ExternalReferenceAsOperand(ref, scratch));
886 __ subq(scratch, stack_pointer);
887 __ movq(Operand(rbp, kRegExpStackBasePointerOffset), scratch);
888}
889
890void RegExpMacroAssemblerX64::PopRegExpBasePointer(Register stack_pointer_out,
891 Register scratch) {
892 ExternalReference ref =
893 ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
894 __ movq(scratch, Operand(rbp, kRegExpStackBasePointerOffset));
895 __ movq(stack_pointer_out,
896 __ ExternalReferenceAsOperand(ref, stack_pointer_out));
897 __ subq(stack_pointer_out, scratch);
898 StoreRegExpStackPointerToMemory(stack_pointer_out, scratch);
899}
900
901DirectHandle<HeapObject> RegExpMacroAssemblerX64::GetCode(
902 DirectHandle<String> source, RegExpFlags flags) {
903 Label return_rax;
904 // Finalize code - write the entry point code now we know how many registers
905 // we need.
906 __ bind(&entry_label_);
907
908 // Tell the system that we have a stack frame. Because the type is MANUAL, no
909 // physical frame is generated.
910 FrameScope scope(&masm_, StackFrame::MANUAL);
911
912 // Actually emit code to start a new stack frame. This pushes the frame type
913 // marker into the stack slot at kFrameTypeOffset.
914 static_assert(kFrameTypeOffset == -1 * kSystemPointerSize);
915 __ EnterFrame(StackFrame::IRREGEXP);
916
917 // Save parameters and callee-save registers. Order here should correspond
918 // to order of kBackup_ebx etc.
919#ifdef V8_TARGET_OS_WIN
920 // MSVC passes arguments in rcx, rdx, r8, r9, with backing stack slots.
921 // Store register parameters in pre-allocated stack slots.
922 __ movq(Operand(rbp, kInputStringOffset), kCArgRegs[0]);
923 __ movq(Operand(rbp, kStartIndexOffset),
924 kCArgRegs[1]); // Passed as int32 in edx.
925 __ movq(Operand(rbp, kInputStartOffset), kCArgRegs[2]);
926 __ movq(Operand(rbp, kInputEndOffset), kCArgRegs[3]);
927
928 static_assert(kNumCalleeSaveRegisters == 3);
929 static_assert(kBackupRsiOffset == -2 * kSystemPointerSize);
930 static_assert(kBackupRdiOffset == -3 * kSystemPointerSize);
931 static_assert(kBackupRbxOffset == -4 * kSystemPointerSize);
932 __ pushq(rsi);
933 __ pushq(rdi);
934 __ pushq(rbx);
935#else
936 // GCC passes arguments in rdi, rsi, rdx, rcx, r8, r9 (and then on stack).
937 // Push register parameters on stack for reference.
938 static_assert(kInputStringOffset == -2 * kSystemPointerSize);
939 static_assert(kStartIndexOffset == -3 * kSystemPointerSize);
940 static_assert(kInputStartOffset == -4 * kSystemPointerSize);
941 static_assert(kInputEndOffset == -5 * kSystemPointerSize);
942 static_assert(kRegisterOutputOffset == -6 * kSystemPointerSize);
943 static_assert(kNumOutputRegistersOffset == -7 * kSystemPointerSize);
944 __ pushq(kCArgRegs[0]);
945 __ pushq(kCArgRegs[1]);
946 __ pushq(kCArgRegs[2]);
947 __ pushq(kCArgRegs[3]);
948 __ pushq(r8);
949 __ pushq(r9);
950
951 static_assert(kNumCalleeSaveRegisters == 1);
952 static_assert(kBackupRbxOffset == -8 * kSystemPointerSize);
953 __ pushq(rbx);
954#endif
955
956 static_assert(kSuccessfulCapturesOffset ==
957 kLastCalleeSaveRegister - kSystemPointerSize);
958 __ Push(Immediate(0)); // Number of successful matches in a global regexp.
959 static_assert(kStringStartMinusOneOffset ==
960 kSuccessfulCapturesOffset - kSystemPointerSize);
961 __ Push(Immediate(0)); // Make room for "string start - 1" constant.
962 static_assert(kBacktrackCountOffset ==
963 kStringStartMinusOneOffset - kSystemPointerSize);
964 __ Push(Immediate(0)); // The backtrack counter.
965 static_assert(kRegExpStackBasePointerOffset ==
966 kBacktrackCountOffset - kSystemPointerSize);
967 __ Push(Immediate(0)); // The regexp stack base ptr.
968
969 // Initialize backtrack stack pointer. It must not be clobbered from here on.
970 // Note the backtrack_stackpointer is *not* callee-saved.
971 static_assert(backtrack_stackpointer() == rcx);
972 LoadRegExpStackPointerFromMemory(backtrack_stackpointer());
973
974 // Store the regexp base pointer - we'll later restore it / write it to
975 // memory when returning from this irregexp code object.
976 PushRegExpBasePointer(backtrack_stackpointer(), kScratchRegister);
977
978 {
979 // Check if we have space on the stack for registers.
980 Label stack_limit_hit, stack_ok;
981
982 ExternalReference stack_limit =
983 ExternalReference::address_of_jslimit(isolate());
984 __ movq(r9, rsp);
985 __ Move(kScratchRegister, stack_limit);
986 __ subq(r9, Operand(kScratchRegister, 0));
987 Immediate extra_space_for_variables(num_registers_ * kSystemPointerSize);
988
989 // Handle it if the stack pointer is already below the stack limit.
990 __ j(below_equal, &stack_limit_hit);
991 // Check if there is room for the variable number of registers above
992 // the stack limit.
993 __ cmpq(r9, extra_space_for_variables);
994 __ j(above_equal, &stack_ok);
995 // Exit with OutOfMemory exception. There is not enough space on the stack
996 // for our working registers.
997 __ Move(rax, EXCEPTION);
998 __ jmp(&return_rax);
999
1000 __ bind(&stack_limit_hit);
1001 __ Move(code_object_pointer(), masm_.CodeObject());
1002 __ pushq(backtrack_stackpointer());
1003 // CallCheckStackGuardState preserves no registers beside rbp and rsp.
1004 CallCheckStackGuardState(extra_space_for_variables);
1005 __ popq(backtrack_stackpointer());
1006 __ testq(rax, rax);
1007 // If returned value is non-zero, we exit with the returned value as result.
1008 __ j(not_zero, &return_rax);
1009
1010 __ bind(&stack_ok);
1011 }
1012
1013 // Allocate space on stack for registers.
1014 __ AllocateStackSpace(num_registers_ * kSystemPointerSize);
1015 // Load string length.
1016 __ movq(rsi, Operand(rbp, kInputEndOffset));
1017 // Load input position.
1018 __ movq(rdi, Operand(rbp, kInputStartOffset));
1019 // Set up rdi to be negative offset from string end.
1020 __ subq(rdi, rsi);
1021 // Set rax to address of char before start of the string
1022 // (effectively string position -1).
1023 __ movq(rbx, Operand(rbp, kStartIndexOffset));
1024 __ negq(rbx);
1025 __ leaq(rax, Operand(rdi, rbx, CharSizeScaleFactor(), -char_size()));
1026 // Store this value in a local variable, for use when clearing
1027 // position registers.
1028 __ movq(Operand(rbp, kStringStartMinusOneOffset), rax);
1029
1030 // Initialize code object pointer.
1031 __ Move(code_object_pointer(), masm_.CodeObject());
1032
1033 Label load_char_start_regexp; // Execution restarts here for global regexps.
1034 {
1035 Label start_regexp;
1036
1037 // Load newline if index is at start, previous character otherwise.
1038 __ cmpl(Operand(rbp, kStartIndexOffset), Immediate(0));
1039 __ j(not_equal, &load_char_start_regexp, Label::kNear);
1040 __ Move(current_character(), '\n');
1041 __ jmp(&start_regexp, Label::kNear);
1042
1043 // Global regexp restarts matching here.
1044 __ bind(&load_char_start_regexp);
1045 // Load previous char as initial value of current character register.
1046 LoadCurrentCharacterUnchecked(-1, 1);
1047
1048 __ bind(&start_regexp);
1049 }
1050
1051 // Initialize on-stack registers.
1052 if (num_saved_registers_ > 0) {
1053 // Fill saved registers with initial value = start offset - 1
1054 // Fill in stack push order, to avoid accessing across an unwritten
1055 // page (a problem on Windows).
1056 if (num_saved_registers_ > 8) {
1057 __ Move(r9, kRegisterZeroOffset);
1058 Label init_loop;
1059 __ bind(&init_loop);
1060 __ movq(Operand(rbp, r9, times_1, 0), rax);
1061 __ subq(r9, Immediate(kSystemPointerSize));
1062 __ cmpq(r9, Immediate(kRegisterZeroOffset -
1063 num_saved_registers_ * kSystemPointerSize));
1064 __ j(greater, &init_loop);
1065 } else { // Unroll the loop.
1066 for (int i = 0; i < num_saved_registers_; i++) {
1067 __ movq(register_location(i), rax);
1068 }
1069 }
1070 }
1071
1072 __ jmp(&start_label_);
1073
1074 // Exit code:
1075 if (success_label_.is_linked()) {
1076 // Save captures when successful.
1077 __ bind(&success_label_);
1078 if (num_saved_registers_ > 0) {
1079 // copy captures to output
1080 __ movq(rdx, Operand(rbp, kStartIndexOffset));
1081 __ movq(rbx, Operand(rbp, kRegisterOutputOffset));
1082 __ movq(rcx, Operand(rbp, kInputEndOffset));
1083 __ subq(rcx, Operand(rbp, kInputStartOffset));
1084 if (mode_ == UC16) {
1085 __ leaq(rcx, Operand(rcx, rdx, CharSizeScaleFactor(), 0));
1086 } else {
1087 __ addq(rcx, rdx);
1088 }
1089 for (int i = 0; i < num_saved_registers_; i++) {
1090 __ movq(rax, register_location(i));
1091 if (i == 0 && global_with_zero_length_check()) {
1092 // Keep capture start in rdx for the zero-length check later.
1093 __ movq(rdx, rax);
1094 }
1095 __ addq(rax, rcx); // Convert to index from start, not end.
1096 if (mode_ == UC16) {
1097 __ sarq(rax, Immediate(1)); // Convert byte index to character index.
1098 }
1099 __ movl(Operand(rbx, i * kIntSize), rax);
1100 }
1101 }
1102
1103 if (global()) {
1104 // Restart matching if the regular expression is flagged as global.
1105 // Increment success counter.
1106 __ incq(Operand(rbp, kSuccessfulCapturesOffset));
1107 // Capture results have been stored, so the number of remaining global
1108 // output registers is reduced by the number of stored captures.
1109 __ movsxlq(rcx, Operand(rbp, kNumOutputRegistersOffset));
1110 __ subq(rcx, Immediate(num_saved_registers_));
1111 // Check whether we have enough room for another set of capture results.
1112 __ cmpq(rcx, Immediate(num_saved_registers_));
1113 __ j(less, &exit_label_);
1114
1115 __ movq(Operand(rbp, kNumOutputRegistersOffset), rcx);
1116 // Advance the location for output.
1117 __ addq(Operand(rbp, kRegisterOutputOffset),
1118 Immediate(num_saved_registers_ * kIntSize));
1119
1120 // Restore the original regexp stack pointer value (effectively, pop the
1121 // stored base pointer).
1122 PopRegExpBasePointer(backtrack_stackpointer(), kScratchRegister);
1123
1124 Label reload_string_start_minus_one;
1125
1126 if (global_with_zero_length_check()) {
1127 // Special case for zero-length matches.
1128 // rdx: capture start index
1129 __ cmpq(rdi, rdx);
1130 // Not a zero-length match, restart.
1131 __ j(not_equal, &reload_string_start_minus_one);
1132 // rdi (offset from the end) is zero if we already reached the end.
1133 __ testq(rdi, rdi);
1134 __ j(zero, &exit_label_, Label::kNear);
1135 // Advance current position after a zero-length match.
1136 Label advance;
1137 __ bind(&advance);
1138 if (mode_ == UC16) {
1139 __ addq(rdi, Immediate(2));
1140 } else {
1141 __ incq(rdi);
1142 }
1143 if (global_unicode()) CheckNotInSurrogatePair(0, &advance);
1144 }
1145
1146 __ bind(&reload_string_start_minus_one);
1147 // Prepare rax to initialize registers with its value in the next run.
1148 // Must be immediately before the jump to avoid clobbering.
1149 __ movq(rax, Operand(rbp, kStringStartMinusOneOffset));
1150
1151 __ jmp(&load_char_start_regexp);
1152 } else {
1153 __ Move(rax, SUCCESS);
1154 }
1155 }
1156
1157 __ bind(&exit_label_);
1158 if (global()) {
1159 // Return the number of successful captures.
1160 __ movq(rax, Operand(rbp, kSuccessfulCapturesOffset));
1161 }
1162
1163 __ bind(&return_rax);
1164 // Restore the original regexp stack pointer value (effectively, pop the
1165 // stored base pointer).
1166 PopRegExpBasePointer(backtrack_stackpointer(), kScratchRegister);
1167
1168#ifdef V8_TARGET_OS_WIN
1169 // Restore callee save registers.
1170 __ leaq(rsp, Operand(rbp, kLastCalleeSaveRegister));
1171 static_assert(kNumCalleeSaveRegisters == 3);
1172 static_assert(kBackupRsiOffset == -2 * kSystemPointerSize);
1173 static_assert(kBackupRdiOffset == -3 * kSystemPointerSize);
1174 static_assert(kBackupRbxOffset == -4 * kSystemPointerSize);
1175 __ popq(rbx);
1176 __ popq(rdi);
1177 __ popq(rsi);
1178#else
1179 // Restore callee save register.
1180 static_assert(kNumCalleeSaveRegisters == 1);
1181 __ movq(rbx, Operand(rbp, kBackupRbxOffset));
1182#endif
1183
1184 __ LeaveFrame(StackFrame::IRREGEXP);
1185 __ ret(0);
1186
1187 // Backtrack code (branch target for conditional backtracks).
1188 if (backtrack_label_.is_linked()) {
1189 __ bind(&backtrack_label_);
1190 Backtrack();
1191 }
1192
1193 Label exit_with_exception;
1194
1195 // Preempt-code.
1196 if (check_preempt_label_.is_linked()) {
1197 SafeCallTarget(&check_preempt_label_);
1198
1199 __ pushq(rdi);
1200
1201 StoreRegExpStackPointerToMemory(backtrack_stackpointer(), kScratchRegister);
1202
1203 CallCheckStackGuardState();
1204 __ testq(rax, rax);
1205 // If returning non-zero, we should end execution with the given
1206 // result as return value.
1207 __ j(not_zero, &return_rax);
1208
1209 // Restore registers.
1210 __ Move(code_object_pointer(), masm_.CodeObject());
1211 __ popq(rdi);
1212
1213 LoadRegExpStackPointerFromMemory(backtrack_stackpointer());
1214
1215 // String might have moved: Reload esi from frame.
1216 __ movq(rsi, Operand(rbp, kInputEndOffset));
1217 SafeReturn();
1218 }
1219
1220 // Backtrack stack overflow code.
1221 if (stack_overflow_label_.is_linked()) {
1222 SafeCallTarget(&stack_overflow_label_);
1223 // Reached if the backtrack-stack limit has been hit.
1224
1225 PushCallerSavedRegisters();
1226
1227 // Call GrowStack(isolate).
1228
1229 StoreRegExpStackPointerToMemory(backtrack_stackpointer(), kScratchRegister);
1230
1231 static constexpr int kNumArguments = 1;
1232 __ PrepareCallCFunction(kNumArguments);
1233 __ LoadAddress(kCArgRegs[0], ExternalReference::isolate_address(isolate()));
1234
1235 ExternalReference grow_stack = ExternalReference::re_grow_stack();
1236 CallCFunctionFromIrregexpCode(grow_stack, kNumArguments);
1237 // If nullptr is returned, we have failed to grow the stack, and must exit
1238 // with a stack-overflow exception.
1239 __ testq(rax, rax);
1240 __ j(equal, &exit_with_exception);
1241 PopCallerSavedRegisters();
1242 // Otherwise use return value as new stack pointer.
1243 __ movq(backtrack_stackpointer(), rax);
1244 // Restore saved registers and continue.
1245 __ Move(code_object_pointer(), masm_.CodeObject());
1246 SafeReturn();
1247 }
1248
1249 if (exit_with_exception.is_linked()) {
1250 // If any of the code above needed to exit with an exception.
1251 __ bind(&exit_with_exception);
1252 // Exit with Result EXCEPTION(-1) to signal thrown exception.
1253 __ Move(rax, EXCEPTION);
1254 __ jmp(&return_rax);
1255 }
1256
1257 if (fallback_label_.is_linked()) {
1258 __ bind(&fallback_label_);
1259 __ Move(rax, FALLBACK_TO_EXPERIMENTAL);
1260 __ jmp(&return_rax);
1261 }
1262
1263 FixupCodeRelativePositions();
1264
1265 CodeDesc code_desc;
1266 Isolate* isolate = this->isolate();
1267 masm_.GetCode(isolate, &code_desc);
1268 DirectHandle<Code> code =
1269 Factory::CodeBuilder(isolate, code_desc, CodeKind::REGEXP)
1270 .set_self_reference(masm_.CodeObject())
1271 .set_empty_source_position_table()
1272 .Build();
1273 PROFILE(isolate,
1274 RegExpCodeCreateEvent(Cast<AbstractCode>(code), source, flags));
1275 return Cast<HeapObject>(code);
1276}
1277
1278void RegExpMacroAssemblerX64::GoTo(Label* to) { BranchOrBacktrack(to); }
1279
1280void RegExpMacroAssemblerX64::IfRegisterGE(int reg,
1281 int comparand,
1282 Label* if_ge) {
1283 __ cmpq(register_location(reg), Immediate(comparand));
1284 BranchOrBacktrack(greater_equal, if_ge);
1285}
1286
1287
1288void RegExpMacroAssemblerX64::IfRegisterLT(int reg,
1289 int comparand,
1290 Label* if_lt) {
1291 __ cmpq(register_location(reg), Immediate(comparand));
1292 BranchOrBacktrack(less, if_lt);
1293}
1294
1295
1296void RegExpMacroAssemblerX64::IfRegisterEqPos(int reg,
1297 Label* if_eq) {
1298 __ cmpq(rdi, register_location(reg));
1299 BranchOrBacktrack(equal, if_eq);
1300}
1301
1302
1303RegExpMacroAssembler::IrregexpImplementation
1304 RegExpMacroAssemblerX64::Implementation() {
1305 return kX64Implementation;
1306}
1307
1308
1309void RegExpMacroAssemblerX64::PopCurrentPosition() {
1310 Pop(rdi);
1311}
1312
1313
1314void RegExpMacroAssemblerX64::PopRegister(int register_index) {
1315 Pop(rax);
1316 __ movq(register_location(register_index), rax);
1317}
1318
1319
1320void RegExpMacroAssemblerX64::PushBacktrack(Label* label) {
1321 Push(label);
1322 CheckStackLimit();
1323}
1324
1325
1326void RegExpMacroAssemblerX64::PushCurrentPosition() {
1327 Push(rdi);
1328 CheckStackLimit();
1329}
1330
1331
1332void RegExpMacroAssemblerX64::PushRegister(int register_index,
1333 StackCheckFlag check_stack_limit) {
1334 __ movq(rax, register_location(register_index));
1335 Push(rax);
1336 if (check_stack_limit) {
1337 CheckStackLimit();
1338 } else if (V8_UNLIKELY(v8_flags.slow_debug_code)) {
1339 AssertAboveStackLimitMinusSlack();
1340 }
1341}
1342
1343void RegExpMacroAssemblerX64::ReadCurrentPositionFromRegister(int reg) {
1344 __ movq(rdi, register_location(reg));
1345}
1346
1347
1348void RegExpMacroAssemblerX64::ReadPositionFromRegister(Register dst, int reg) {
1349 __ movq(dst, register_location(reg));
1350}
1351
1352// Preserves a position-independent representation of the stack pointer in reg:
1353// reg = top - sp.
1354void RegExpMacroAssemblerX64::WriteStackPointerToRegister(int reg) {
1355 ExternalReference stack_top_address =
1356 ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
1357 __ movq(rax, __ ExternalReferenceAsOperand(stack_top_address, rax));
1358 __ subq(rax, backtrack_stackpointer());
1359 __ movq(register_location(reg), rax);
1360}
1361
1362void RegExpMacroAssemblerX64::ReadStackPointerFromRegister(int reg) {
1363 ExternalReference stack_top_address =
1364 ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
1365 __ movq(backtrack_stackpointer(),
1366 __ ExternalReferenceAsOperand(stack_top_address,
1367 backtrack_stackpointer()));
1368 __ subq(backtrack_stackpointer(), register_location(reg));
1369}
1370
1371void RegExpMacroAssemblerX64::SetCurrentPositionFromEnd(int by) {
1372 Label after_position;
1373 __ cmpq(rdi, Immediate(-by * char_size()));
1374 __ j(greater_equal, &after_position, Label::kNear);
1375 __ Move(rdi, -by * char_size());
1376 // On RegExp code entry (where this operation is used), the character before
1377 // the current position is expected to be already loaded.
1378 // We have advanced the position, so it's safe to read backwards.
1379 LoadCurrentCharacterUnchecked(-1, 1);
1380 __ bind(&after_position);
1381}
1382
1383
1384void RegExpMacroAssemblerX64::SetRegister(int register_index, int to) {
1385 DCHECK(register_index >= num_saved_registers_); // Reserved for positions!
1386 __ movq(register_location(register_index), Immediate(to));
1387}
1388
1389
1390bool RegExpMacroAssemblerX64::Succeed() {
1391 __ jmp(&success_label_);
1392 return global();
1393}
1394
1395
1396void RegExpMacroAssemblerX64::WriteCurrentPositionToRegister(int reg,
1397 int cp_offset) {
1398 if (cp_offset == 0) {
1399 __ movq(register_location(reg), rdi);
1400 } else {
1401 __ leaq(rax, Operand(rdi, cp_offset * char_size()));
1402 __ movq(register_location(reg), rax);
1403 }
1404}
1405
1406
1407void RegExpMacroAssemblerX64::ClearRegisters(int reg_from, int reg_to) {
1408 DCHECK(reg_from <= reg_to);
1409 __ movq(rax, Operand(rbp, kStringStartMinusOneOffset));
1410 for (int reg = reg_from; reg <= reg_to; reg++) {
1411 __ movq(register_location(reg), rax);
1412 }
1413}
1414
1415// Private methods:
1416
1417void RegExpMacroAssemblerX64::CallCheckStackGuardState(Immediate extra_space) {
1418 // This function call preserves no register values. Caller should
1419 // store anything volatile in a C call or overwritten by this function.
1420 static const int num_arguments = 4;
1421 __ PrepareCallCFunction(num_arguments);
1422#ifdef V8_TARGET_OS_WIN
1423 // Fourth argument: Extra space for variables.
1424 __ movq(kCArgRegs[3], extra_space);
1425 // Second argument: InstructionStream of self. (Do this before overwriting
1426 // r8 (kCArgRegs[2])).
1427 __ movq(kCArgRegs[1], code_object_pointer());
1428 // Third argument: RegExp code frame pointer.
1429 __ movq(kCArgRegs[2], rbp);
1430 // First argument: Next address on the stack (will be address of
1431 // return address).
1432 __ leaq(kCArgRegs[0], Operand(rsp, -kSystemPointerSize));
1433#else
1434 // Fourth argument: Extra space for variables.
1435 __ movq(kCArgRegs[3], extra_space);
1436 // Third argument: RegExp code frame pointer.
1437 __ movq(kCArgRegs[2], rbp);
1438 // Second argument: InstructionStream of self.
1439 __ movq(kCArgRegs[1], code_object_pointer());
1440 // First argument: Next address on the stack (will be address of
1441 // return address).
1442 __ leaq(kCArgRegs[0], Operand(rsp, -kSystemPointerSize));
1443#endif
1444 ExternalReference stack_check =
1445 ExternalReference::re_check_stack_guard_state();
1446 CallCFunctionFromIrregexpCode(stack_check, num_arguments);
1447}
1448
1449// Helper function for reading a value out of a stack frame.
1450template <typename T>
1451static T& frame_entry(Address re_frame, int frame_offset) {
1452 return reinterpret_cast<T&>(Memory<int32_t>(re_frame + frame_offset));
1453}
1454
1455
1456template <typename T>
1457static T* frame_entry_address(Address re_frame, int frame_offset) {
1458 return reinterpret_cast<T*>(re_frame + frame_offset);
1459}
1460
1461int RegExpMacroAssemblerX64::CheckStackGuardState(Address* return_address,
1462 Address raw_code,
1463 Address re_frame,
1464 uintptr_t extra_space) {
1465 Tagged<InstructionStream> re_code =
1466 Cast<InstructionStream>(Tagged<Object>(raw_code));
1467 return NativeRegExpMacroAssembler::CheckStackGuardState(
1468 frame_entry<Isolate*>(re_frame, kIsolateOffset),
1469 frame_entry<int>(re_frame, kStartIndexOffset),
1470 static_cast<RegExp::CallOrigin>(
1471 frame_entry<int>(re_frame, kDirectCallOffset)),
1472 return_address, re_code,
1473 frame_entry_address<Address>(re_frame, kInputStringOffset),
1474 frame_entry_address<const uint8_t*>(re_frame, kInputStartOffset),
1475 frame_entry_address<const uint8_t*>(re_frame, kInputEndOffset),
1476 extra_space);
1477}
1478
1479Operand RegExpMacroAssemblerX64::register_location(int register_index) {
1480 DCHECK(register_index < (1<<30));
1481 if (num_registers_ <= register_index) {
1482 num_registers_ = register_index + 1;
1483 }
1484 return Operand(rbp,
1485 kRegisterZeroOffset - register_index * kSystemPointerSize);
1486}
1487
1488
1489void RegExpMacroAssemblerX64::CheckPosition(int cp_offset,
1490 Label* on_outside_input) {
1491 if (cp_offset >= 0) {
1492 __ cmpl(rdi, Immediate(-cp_offset * char_size()));
1493 BranchOrBacktrack(greater_equal, on_outside_input);
1494 } else {
1495 __ leaq(rax, Operand(rdi, cp_offset * char_size()));
1496 __ cmpq(rax, Operand(rbp, kStringStartMinusOneOffset));
1497 BranchOrBacktrack(less_equal, on_outside_input);
1498 }
1499}
1500
1501void RegExpMacroAssemblerX64::BranchOrBacktrack(Label* to) {
1502 if (to == nullptr) {
1503 Backtrack();
1504 return;
1505 }
1506 __ jmp(to);
1507}
1508
1509void RegExpMacroAssemblerX64::BranchOrBacktrack(Condition condition,
1510 Label* to) {
1511 __ j(condition, to ? to : &backtrack_label_);
1512}
1513
1514void RegExpMacroAssemblerX64::SafeCall(Label* to) {
1515 __ call(to);
1516}
1517
1518
1519void RegExpMacroAssemblerX64::SafeCallTarget(Label* label) {
1520 __ bind(label);
1521 __ subq(Operand(rsp, 0), code_object_pointer());
1522}
1523
1524
1525void RegExpMacroAssemblerX64::SafeReturn() {
1526 __ addq(Operand(rsp, 0), code_object_pointer());
1527 __ ret(0);
1528}
1529
1530
1531void RegExpMacroAssemblerX64::Push(Register source) {
1532 DCHECK(source != backtrack_stackpointer());
1533 // Notice: This updates flags, unlike normal Push.
1534 __ subq(backtrack_stackpointer(), Immediate(kIntSize));
1535 __ movl(Operand(backtrack_stackpointer(), 0), source);
1536}
1537
1538
1539void RegExpMacroAssemblerX64::Push(Immediate value) {
1540 // Notice: This updates flags, unlike normal Push.
1541 __ subq(backtrack_stackpointer(), Immediate(kIntSize));
1542 __ movl(Operand(backtrack_stackpointer(), 0), value);
1543}
1544
1545
1546void RegExpMacroAssemblerX64::FixupCodeRelativePositions() {
1547 for (int position : code_relative_fixup_positions_) {
1548 // The position succeeds a relative label offset from position.
1549 // Patch the relative offset to be relative to the InstructionStream object
1550 // pointer instead.
1551 int patch_position = position - kIntSize;
1552 int offset = masm_.long_at(patch_position);
1553 masm_.long_at_put(
1554 patch_position,
1555 offset + position + InstructionStream::kHeaderSize - kHeapObjectTag);
1556 }
1557 code_relative_fixup_positions_.Rewind(0);
1558}
1559
1560
1561void RegExpMacroAssemblerX64::Push(Label* backtrack_target) {
1562 __ subq(backtrack_stackpointer(), Immediate(kIntSize));
1563 __ movl(Operand(backtrack_stackpointer(), 0), backtrack_target);
1564 MarkPositionForCodeRelativeFixup();
1565}
1566
1567
1568void RegExpMacroAssemblerX64::Pop(Register target) {
1569 DCHECK(target != backtrack_stackpointer());
1570 __ movsxlq(target, Operand(backtrack_stackpointer(), 0));
1571 // Notice: This updates flags, unlike normal Pop.
1572 __ addq(backtrack_stackpointer(), Immediate(kIntSize));
1573}
1574
1575
1576void RegExpMacroAssemblerX64::Drop() {
1577 __ addq(backtrack_stackpointer(), Immediate(kIntSize));
1578}
1579
1580
1581void RegExpMacroAssemblerX64::CheckPreemption() {
1582 // Check for preemption.
1583 Label no_preempt;
1584 ExternalReference stack_limit =
1585 ExternalReference::address_of_jslimit(isolate());
1586 __ load_rax(stack_limit);
1587 __ cmpq(rsp, rax);
1588 __ j(above, &no_preempt);
1589
1590 SafeCall(&check_preempt_label_);
1591
1592 __ bind(&no_preempt);
1593}
1594
1595
1596void RegExpMacroAssemblerX64::CheckStackLimit() {
1597 Label no_stack_overflow;
1598 ExternalReference stack_limit =
1599 ExternalReference::address_of_regexp_stack_limit_address(isolate());
1600 __ load_rax(stack_limit);
1601 __ cmpq(backtrack_stackpointer(), rax);
1602 __ j(above, &no_stack_overflow);
1603
1604 SafeCall(&stack_overflow_label_);
1605
1606 __ bind(&no_stack_overflow);
1607}
1608
1609void RegExpMacroAssemblerX64::AssertAboveStackLimitMinusSlack() {
1610 DCHECK(v8_flags.slow_debug_code);
1611 Label no_stack_overflow;
1612 ASM_CODE_COMMENT_STRING(&masm_, "AssertAboveStackLimitMinusSlack");
1613 auto l = ExternalReference::address_of_regexp_stack_limit_address(isolate());
1614 __ load_rax(l);
1615 __ subq(rax, Immediate(RegExpStack::kStackLimitSlackSize));
1616 __ cmpq(backtrack_stackpointer(), rax);
1617 __ j(above, &no_stack_overflow);
1618 __ int3();
1619 __ bind(&no_stack_overflow);
1620}
1621
1622void RegExpMacroAssemblerX64::LoadCurrentCharacterUnchecked(int cp_offset,
1623 int characters) {
1624 if (mode_ == LATIN1) {
1625 if (characters == 4) {
1626 __ movl(current_character(), Operand(rsi, rdi, times_1, cp_offset));
1627 } else if (characters == 2) {
1628 __ movzxwl(current_character(), Operand(rsi, rdi, times_1, cp_offset));
1629 } else {
1630 DCHECK_EQ(1, characters);
1631 __ movzxbl(current_character(), Operand(rsi, rdi, times_1, cp_offset));
1632 }
1633 } else {
1634 DCHECK(mode_ == UC16);
1635 if (characters == 2) {
1636 __ movl(current_character(),
1637 Operand(rsi, rdi, times_1, cp_offset * sizeof(base::uc16)));
1638 } else {
1639 DCHECK_EQ(1, characters);
1640 __ movzxwl(current_character(),
1641 Operand(rsi, rdi, times_1, cp_offset * sizeof(base::uc16)));
1642 }
1643 }
1644}
1645
1646#undef __
1647
1648} // namespace internal
1649} // namespace v8
1650
1651#endif // V8_TARGET_ARCH_X64
friend Zone
Definition asm-types.cc:195
RegExpMacroAssemblerX64(Isolate *isolate, Zone *zone, Mode mode, int registers_to_save)
#define PROFILE(the_isolate, Call)
Definition code-events.h:59
RecordWriteMode const mode_
const CodeDesc * code_desc
#define ASM_CODE_COMMENT_STRING(asm,...)
Definition assembler.h:618
Label label
Isolate * isolate
int32_t offset
ZoneVector< RpoNumber > & result
LiftoffRegister reg
int position
Definition liveedit.cc:290
uint32_t const mask
MaglevAssembler *const masm_
Address grow_stack(Isolate *isolate, void *current_sp, size_t frame_size, size_t gap, Address current_fp)
constexpr int kIntSize
Definition globals.h:400
Operand FieldOperand(Register object, int offset)
constexpr int kSystemPointerSize
Definition globals.h:410
std::unique_ptr< AssemblerBuffer > NewAssemblerBuffer(int size)
Definition assembler.cc:167
V8_EXPORT_PRIVATE FlagValues v8_flags
Register ReassignRegister(Register &source)
uint32_t compare
#define DCHECK_LE(v1, v2)
Definition logging.h:490
#define DCHECK(condition)
Definition logging.h:482
#define DCHECK_EQ(v1, v2)
Definition logging.h:485
#define DCHECK_GT(v1, v2)
Definition logging.h:487
#define OFFSET_OF_DATA_START(Type)
#define V8_UNLIKELY(condition)
Definition v8config.h:660