LCOV - output.info - js/src/irregexp/RegExpEngine.cpp

LCOV - code coverage report

Current view:	top level - js/src/irregexp - RegExpEngine.cpp (source / functions)		Hit	Total	Coverage
Test:	output.info	Lines:	923	2229	41.4 %
Date:	2018-08-07 16:42:27	Functions:	0	0	-
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
       2             :  * vim: set ts=8 sts=4 et sw=4 tw=99: */
       3             : 
       4             : // Copyright 2012 the V8 project authors. All rights reserved.
       5             : // Redistribution and use in source and binary forms, with or without
       6             : // modification, are permitted provided that the following conditions are
       7             : // met:
       8             : //
       9             : //     * Redistributions of source code must retain the above copyright
      10             : //       notice, this list of conditions and the following disclaimer.
      11             : //     * Redistributions in binary form must reproduce the above
      12             : //       copyright notice, this list of conditions and the following
      13             : //       disclaimer in the documentation and/or other materials provided
      14             : //       with the distribution.
      15             : //     * Neither the name of Google Inc. nor the names of its
      16             : //       contributors may be used to endorse or promote products derived
      17             : //       from this software without specific prior written permission.
      18             : //
      19             : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
      20             : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
      21             : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
      22             : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
      23             : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
      24             : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
      25             : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
      26             : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
      27             : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
      28             : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
      29             : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      30             : 
      31             : #include "irregexp/RegExpEngine.h"
      32             : 
      33             : #include <utility>
      34             : 
      35             : #include "gc/GC.h"
      36             : #include "irregexp/NativeRegExpMacroAssembler.h"
      37             : #include "irregexp/RegExpCharacters.h"
      38             : #include "irregexp/RegExpMacroAssembler.h"
      39             : #include "jit/ExecutableAllocator.h"
      40             : #include "jit/JitCommon.h"
      41             : 
      42             : #include "irregexp/RegExpCharacters-inl.h"
      43             : 
      44             : using namespace js;
      45             : using namespace js::irregexp;
      46             : 
      47             : using mozilla::ArrayLength;
      48             : using mozilla::DebugOnly;
      49             : using mozilla::Maybe;
      50             : 
      51             : #define DEFINE_ACCEPT(Type)                                          \
      52             :     void Type##Node::Accept(NodeVisitor* visitor) {                  \
      53             :         visitor->Visit##Type(this);                                  \
      54             :     }
      55        2628 : FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)
      56             : #undef DEFINE_ACCEPT
      57             : 
      58           0 : void LoopChoiceNode::Accept(NodeVisitor* visitor) {
      59         165 :     visitor->VisitLoopChoice(this);
      60         165 : }
      61             : 
      62             : static const int kMaxLookaheadForBoyerMoore = 8;
      63             : 
      64        2793 : RegExpNode::RegExpNode(LifoAlloc* alloc)
      65           0 :   : replacement_(nullptr), trace_count_(0), alloc_(alloc)
      66             : {
      67        2793 :     bm_info_[0] = bm_info_[1] = nullptr;
      68           0 : }
      69             : 
      70             : static const int kMaxOneByteCharCode = 0xff;
      71             : static const int kMaxUtf16CodeUnit = 0xffff;
      72             : 
      73             : static char16_t
      74             : MaximumCharacter(bool latin1)
      75             : {
      76        3932 :     return latin1 ? kMaxOneByteCharCode : kMaxUtf16CodeUnit;
      77             : }
      78             : 
      79             : static void
      80         144 : AddClass(const int* elmv, int elmc,
      81             :          CharacterRangeVector* ranges)
      82             : {
      83           0 :     elmc--;
      84           0 :     MOZ_ASSERT(elmv[elmc] == 0x10000);
      85           0 :     for (int i = 0; i < elmc; i += 2) {
      86         939 :         MOZ_ASSERT(elmv[i] < elmv[i + 1]);
      87           0 :         ranges->append(CharacterRange(elmv[i], elmv[i + 1] - 1));
      88             :     }
      89         144 : }
      90             : 
      91             : void
      92         150 : js::irregexp::AddClassNegated(const int* elmv,
      93             :                               int elmc,
      94             :                               CharacterRangeVector* ranges)
      95             : {
      96           0 :     elmc--;
      97           0 :     MOZ_ASSERT(elmv[elmc] == 0x10000);
      98         150 :     MOZ_ASSERT(elmv[0] != 0x0000);
      99           0 :     MOZ_ASSERT(elmv[elmc-1] != kMaxUtf16CodeUnit);
     100             :     char16_t last = 0x0000;
     101           0 :     for (int i = 0; i < elmc; i += 2) {
     102           0 :         MOZ_ASSERT(last <= elmv[i] - 1);
     103           0 :         MOZ_ASSERT(elmv[i] < elmv[i + 1]);
     104        1084 :         ranges->append(CharacterRange(last, elmv[i] - 1));
     105           0 :         last = elmv[i + 1];
     106             :     }
     107         300 :     ranges->append(CharacterRange(last, kMaxUtf16CodeUnit));
     108         150 : }
     109             : 
     110             : void
     111         371 : CharacterRange::AddClassEscape(LifoAlloc* alloc, char16_t type,
     112             :                                CharacterRangeVector* ranges)
     113             : {
     114           0 :     switch (type) {
     115             :       case 's':
     116          79 :         AddClass(kSpaceRanges, kSpaceRangeCount, ranges);
     117           0 :         break;
     118             :       case 'S':
     119          13 :         AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges);
     120           0 :         break;
     121             :       case 'w':
     122          28 :         AddClass(kWordRanges, kWordRangeCount, ranges);
     123           0 :         break;
     124             :       case 'W':
     125           1 :         AddClassNegated(kWordRanges, kWordRangeCount, ranges);
     126           0 :         break;
     127             :       case 'd':
     128          37 :         AddClass(kDigitRanges, kDigitRangeCount, ranges);
     129           0 :         break;
     130             :       case 'D':
     131           0 :         AddClassNegated(kDigitRanges, kDigitRangeCount, ranges);
     132           0 :         break;
     133             :       case '.':
     134         136 :         AddClassNegated(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges);
     135         136 :         break;
     136             :         // This is not a character range as defined by the spec but a
     137             :         // convenient shorthand for a character class that matches any
     138             :         // character.
     139             :       case '*':
     140          77 :         ranges->append(CharacterRange::Everything());
     141          77 :         break;
     142             :         // This is the set of characters matched by the $ and ^ symbols
     143             :         // in multiline mode.
     144             :       case 'n':
     145           0 :         AddClass(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges);
     146           0 :         break;
     147             :       default:
     148           0 :         MOZ_CRASH("Bad character class escape");
     149             :     }
     150         371 : }
     151             : 
     152             : // Add class escape, excluding surrogate pair range.
     153             : void
     154           0 : CharacterRange::AddClassEscapeUnicode(LifoAlloc* alloc, char16_t type,
     155             :                                       CharacterRangeVector* ranges, bool ignore_case)
     156             : {
     157           0 :     switch (type) {
     158             :       case 's':
     159             :       case 'd':
     160           0 :         return AddClassEscape(alloc, type, ranges);
     161             :         break;
     162             :       case 'S':
     163           0 :         AddClassNegated(kSpaceAndSurrogateRanges, kSpaceAndSurrogateRangeCount, ranges);
     164           0 :         break;
     165             :       case 'w':
     166           0 :         if (ignore_case)
     167           0 :             AddClass(kIgnoreCaseWordRanges, kIgnoreCaseWordRangeCount, ranges);
     168             :         else
     169           0 :             AddClassEscape(alloc, type, ranges);
     170             :         break;
     171             :       case 'W':
     172           0 :         if (ignore_case) {
     173             :             AddClass(kNegatedIgnoreCaseWordAndSurrogateRanges,
     174           0 :                      kNegatedIgnoreCaseWordAndSurrogateRangeCount, ranges);
     175             :         } else {
     176           0 :             AddClassNegated(kWordAndSurrogateRanges, kWordAndSurrogateRangeCount, ranges);
     177             :         }
     178             :         break;
     179             :       case 'D':
     180           0 :         AddClassNegated(kDigitAndSurrogateRanges, kDigitAndSurrogateRangeCount, ranges);
     181           0 :         break;
     182             :       default:
     183           0 :         MOZ_CRASH("Bad type!");
     184             :     }
     185             : }
     186             : 
     187             : static bool
     188           0 : RangesContainLatin1Equivalents(const CharacterRangeVector& ranges, bool unicode)
     189             : {
     190           0 :     for (size_t i = 0; i < ranges.length(); i++) {
     191             :         // TODO(dcarney): this could be a lot more efficient.
     192           0 :         if (RangeContainsLatin1Equivalents(ranges[i], unicode))
     193             :             return true;
     194             :     }
     195             :     return false;
     196             : }
     197             : 
     198             : static const size_t kEcma262UnCanonicalizeMaxWidth = 4;
     199             : 
     200             : // Returns the number of characters in the equivalence class, omitting those
     201             : // that cannot occur in the source string if it is a one byte string.
     202             : static MOZ_ALWAYS_INLINE int
     203         883 : GetCaseIndependentLetters(char16_t character,
     204             :                           bool latin1_subject,
     205             :                           bool unicode,
     206             :                           const char16_t* choices,
     207             :                           size_t choices_length,
     208             :                           char16_t* letters)
     209             : {
     210           0 :     size_t count = 0;
     211        5298 :     for (size_t i = 0; i < choices_length; i++) {
     212        4415 :         char16_t c = choices[i];
     213             : 
     214             :         // Skip characters that can't appear in one byte strings.
     215        4415 :         if (!unicode && latin1_subject && c > kMaxOneByteCharCode)
     216             :             continue;
     217             : 
     218             :         // Watch for duplicates.
     219             :         bool found = false;
     220        5443 :         for (size_t j = 0; j < count; j++) {
     221        3532 :             if (letters[j] == c) {
     222             :                 found = true;
     223             :                 break;
     224             :             }
     225             :         }
     226        4415 :         if (found)
     227             :             continue;
     228             : 
     229        1397 :         letters[count++] = c;
     230             :     }
     231             : 
     232         883 :     return count;
     233             : }
     234             : 
     235             : static int
     236         883 : GetCaseIndependentLetters(char16_t character,
     237             :                           bool latin1_subject,
     238             :                           bool unicode,
     239             :                           char16_t* letters)
     240             : {
     241         883 :     if (unicode) {
     242             :         const char16_t choices[] = {
     243             :             character,
     244           0 :             unicode::FoldCase(character),
     245           0 :             unicode::ReverseFoldCase1(character),
     246           0 :             unicode::ReverseFoldCase2(character),
     247           0 :             unicode::ReverseFoldCase3(character),
     248           0 :         };
     249           0 :         return GetCaseIndependentLetters(character, latin1_subject, unicode,
     250           0 :                                          choices, ArrayLength(choices), letters);
     251             :     }
     252             : 
     253           0 :     char16_t upper = unicode::ToUpperCase(character);
     254           0 :     unicode::CodepointsWithSameUpperCase others(character);
     255           0 :     char16_t other1 = others.other1();
     256        1766 :     char16_t other2 = others.other2();
     257        1766 :     char16_t other3 = others.other3();
     258             : 
     259             :     // ES 2017 draft 996af87b7072b3c3dd2b1def856c66f456102215 21.2.4.2
     260             :     // step 3.g.
     261             :     // The standard requires that non-ASCII characters cannot have ASCII
     262             :     // character codes in their equivalence class, even though this
     263             :     // situation occurs multiple times in the Unicode tables.
     264             :     static const unsigned kMaxAsciiCharCode = 127;
     265         883 :     if (upper <= kMaxAsciiCharCode) {
     266         883 :         if (character > kMaxAsciiCharCode) {
     267             :             // If Canonicalize(character) == character, all other characters
     268             :             // should be ignored.
     269           0 :             return GetCaseIndependentLetters(character, latin1_subject, unicode,
     270           0 :                                              &character, 1, letters);
     271             :         }
     272             : 
     273           0 :         if (other1 > kMaxAsciiCharCode)
     274           0 :             other1 = character;
     275           0 :         if (other2 > kMaxAsciiCharCode)
     276           1 :             other2 = character;
     277         883 :         if (other3 > kMaxAsciiCharCode)
     278           0 :             other3 = character;
     279             :     }
     280             : 
     281             :     const char16_t choices[] = {
     282             :         character,
     283             :         upper,
     284             :         other1,
     285             :         other2,
     286             :         other3
     287           0 :     };
     288         883 :     return GetCaseIndependentLetters(character, latin1_subject, unicode,
     289         883 :                                      choices, ArrayLength(choices), letters);
     290             : }
     291             : 
     292             : void
     293           0 : CharacterRange::AddCaseEquivalents(bool is_latin1, bool unicode, CharacterRangeVector* ranges)
     294             : {
     295          83 :     char16_t bottom = from();
     296           0 :     char16_t top = to();
     297             : 
     298          83 :     if (is_latin1 && !RangeContainsLatin1Equivalents(*this, unicode)) {
     299           0 :         if (bottom > kMaxOneByteCharCode)
     300             :             return;
     301          57 :         if (top > kMaxOneByteCharCode)
     302           0 :             top = kMaxOneByteCharCode;
     303             :     } else {
     304             :         // Nothing to do for surrogates.
     305          26 :         if (bottom >= unicode::LeadSurrogateMin && top <= unicode::TrailSurrogateMax)
     306             :             return;
     307             :     }
     308             : 
     309           0 :     for (char16_t c = bottom;; c++) {
     310             :         char16_t chars[kEcma262UnCanonicalizeMaxWidth];
     311           0 :         size_t length = GetCaseIndependentLetters(c, is_latin1, unicode, chars);
     312             : 
     313           0 :         for (size_t i = 0; i < length; i++) {
     314        1254 :             char16_t other = chars[i];
     315        1254 :             if (other == c)
     316             :                 continue;
     317             : 
     318             :             // Try to combine with an existing range.
     319             :             bool found = false;
     320           0 :             for (size_t i = 0; i < ranges->length(); i++) {
     321        2094 :                 CharacterRange& range = (*ranges)[i];
     322        4188 :                 if (range.Contains(other)) {
     323             :                     found = true;
     324             :                     break;
     325           0 :                 } else if (other == range.from() - 1) {
     326           0 :                     range.set_from(other);
     327           0 :                     found = true;
     328           0 :                     break;
     329           0 :                 } else if (other == range.to() + 1) {
     330           0 :                     range.set_to(other);
     331         250 :                     found = true;
     332         250 :                     break;
     333             :                 }
     334             :             }
     335             : 
     336         480 :             if (!found)
     337          22 :                 ranges->append(CharacterRange::Singleton(other));
     338             :         }
     339             : 
     340           0 :         if (c == top)
     341             :             break;
     342         691 :     }
     343             : }
     344             : 
     345             : static bool
     346           0 : CompareInverseRanges(const CharacterRangeVector& ranges, const int* special_class, size_t length)
     347             : {
     348           0 :     length--;  // Remove final 0x10000.
     349           0 :     MOZ_ASSERT(special_class[length] == 0x10000);
     350           0 :     MOZ_ASSERT(ranges.length() != 0);
     351           0 :     MOZ_ASSERT(length != 0);
     352         418 :     MOZ_ASSERT(special_class[0] != 0);
     353           0 :     if (ranges.length() != (length >> 1) + 1)
     354             :         return false;
     355          85 :     CharacterRange range = ranges[0];
     356           0 :     if (range.from() != 0)
     357             :         return false;
     358         350 :     for (size_t i = 0; i < length; i += 2) {
     359           0 :         if (special_class[i] != (range.to() + 1))
     360             :             return false;
     361         306 :         range = ranges[(i >> 1) + 1];
     362         153 :         if (special_class[i+1] != range.from())
     363             :             return false;
     364             :     }
     365           0 :     if (range.to() != 0xffff)
     366             :         return false;
     367          44 :     return true;
     368             : }
     369             : 
     370             : static bool
     371           0 : CompareRanges(const CharacterRangeVector& ranges, const int* special_class, size_t length)
     372             : {
     373           0 :     length--;  // Remove final 0x10000.
     374         361 :     MOZ_ASSERT(special_class[length] == 0x10000);
     375           0 :     if (ranges.length() * 2 != length)
     376             :         return false;
     377           0 :     for (size_t i = 0; i < length; i += 2) {
     378         216 :         CharacterRange range = ranges[i >> 1];
     379         108 :         if (range.from() != special_class[i] || range.to() != special_class[i + 1] - 1)
     380             :             return false;
     381             :     }
     382             :     return true;
     383             : }
     384             : 
     385             : bool
     386         201 : RegExpCharacterClass::is_standard(LifoAlloc* alloc)
     387             : {
     388             :     // TODO(lrn): Remove need for this function, by not throwing away information
     389             :     // along the way.
     390           0 :     if (is_negated_)
     391             :         return false;
     392           0 :     if (set_.is_standard())
     393             :         return true;
     394           0 :     if (CompareRanges(set_.ranges(alloc), kSpaceRanges, kSpaceRangeCount)) {
     395          10 :         set_.set_standard_set_type('s');
     396           0 :         return true;
     397             :     }
     398           0 :     if (CompareInverseRanges(set_.ranges(alloc), kSpaceRanges, kSpaceRangeCount)) {
     399           6 :         set_.set_standard_set_type('S');
     400           0 :         return true;
     401             :     }
     402         145 :     if (CompareInverseRanges(set_.ranges(alloc),
     403             :                              kLineTerminatorRanges,
     404             :                              kLineTerminatorRangeCount)) {
     405          82 :         set_.set_standard_set_type('.');
     406           0 :         return true;
     407             :     }
     408         104 :     if (CompareRanges(set_.ranges(alloc),
     409             :                       kLineTerminatorRanges,
     410             :                       kLineTerminatorRangeCount)) {
     411           0 :         set_.set_standard_set_type('n');
     412           0 :         return true;
     413             :     }
     414           0 :     if (CompareRanges(set_.ranges(alloc), kWordRanges, kWordRangeCount)) {
     415          12 :         set_.set_standard_set_type('w');
     416           0 :         return true;
     417             :     }
     418           0 :     if (CompareInverseRanges(set_.ranges(alloc), kWordRanges, kWordRangeCount)) {
     419           0 :         set_.set_standard_set_type('W');
     420           0 :         return true;
     421             :     }
     422             :     return false;
     423             : }
     424             : 
     425             : bool
     426           0 : CharacterRange::IsCanonical(const CharacterRangeVector& ranges)
     427             : {
     428         566 :     int n = ranges.length();
     429         566 :     if (n <= 1)
     430             :         return true;
     431             : 
     432           0 :     int max = ranges[0].to();
     433           0 :     for (int i = 1; i < n; i++) {
     434        1816 :         CharacterRange next_range = ranges[i];
     435           0 :         if (next_range.from() <= max + 1)
     436             :             return false;
     437         862 :         max = next_range.to();
     438             :     }
     439             :     return true;
     440             : }
     441             : 
     442             : // Move a number of elements in a zonelist to another position
     443             : // in the same list. Handles overlapping source and target areas.
     444             : static
     445          74 : void MoveRanges(CharacterRangeVector& list, int from, int to, int count)
     446             : {
     447             :     // Ranges are potentially overlapping.
     448           0 :     if (from < to) {
     449         193 :         for (int i = count - 1; i >= 0; i--)
     450           0 :             list[to + i] = list[from + i];
     451             :     } else {
     452          12 :         for (int i = 0; i < count; i++)
     453           0 :             list[to + i] = list[from + i];
     454             :     }
     455          74 : }
     456             : 
     457             : static int
     458         107 : InsertRangeInCanonicalList(CharacterRangeVector& list,
     459             :                            int count,
     460             :                            CharacterRange insert)
     461             : {
     462             :     // Inserts a range into list[0..count[, which must be sorted
     463             :     // by from value and non-overlapping and non-adjacent, using at most
     464             :     // list[0..count] for the result. Returns the number of resulting
     465             :     // canonicalized ranges. Inserting a range may collapse existing ranges into
     466             :     // fewer ranges, so the return value can be anything in the range 1..count+1.
     467           0 :     char16_t from = insert.from();
     468           0 :     char16_t to = insert.to();
     469           0 :     int start_pos = 0;
     470           0 :     int end_pos = count;
     471           0 :     for (int i = count - 1; i >= 0; i--) {
     472         516 :         CharacterRange current = list[i];
     473           0 :         if (current.from() > to + 1) {
     474             :             end_pos = i;
     475           0 :         } else if (current.to() + 1 < from) {
     476          63 :             start_pos = i + 1;
     477          63 :             break;
     478             :         }
     479             :     }
     480             : 
     481             :     // Inserted range overlaps, or is adjacent to, ranges at positions
     482             :     // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are
     483             :     // not affected by the insertion.
     484             :     // If start_pos == end_pos, the range must be inserted before start_pos.
     485             :     // if start_pos < end_pos, the entire range from start_pos to end_pos
     486             :     // must be merged with the insert range.
     487             : 
     488           0 :     if (start_pos == end_pos) {
     489             :         // Insert between existing ranges at position start_pos.
     490          83 :         if (start_pos < count) {
     491           0 :             MoveRanges(list, start_pos, start_pos + 1, count - start_pos);
     492             :         }
     493         166 :         list[start_pos] = insert;
     494           0 :         return count + 1;
     495             :     }
     496           0 :     if (start_pos + 1 == end_pos) {
     497             :         // Replace single existing range at position start_pos.
     498           0 :         CharacterRange to_replace = list[start_pos];
     499           0 :         int new_from = Min(to_replace.from(), from);
     500          44 :         int new_to = Max(to_replace.to(), to);
     501          44 :         list[start_pos] = CharacterRange(new_from, new_to);
     502             :         return count;
     503             :     }
     504             :     // Replace a number of existing ranges from start_pos to end_pos - 1.
     505             :     // Move the remaining ranges down.
     506             : 
     507           0 :     int new_from = Min(list[start_pos].from(), from);
     508           0 :     int new_to = Max(list[end_pos - 1].to(), to);
     509           2 :     if (end_pos < count) {
     510           0 :         MoveRanges(list, end_pos, start_pos + 1, count - end_pos);
     511             :     }
     512           4 :     list[start_pos] = CharacterRange(new_from, new_to);
     513           2 :     return count - (end_pos - start_pos) + 1;
     514             : }
     515             : 
     516             : void
     517           0 : CharacterRange::Canonicalize(CharacterRangeVector& character_ranges)
     518             : {
     519          46 :     if (character_ranges.length() <= 1) return;
     520             :     // Check whether ranges are already canonical (increasing, non-overlapping,
     521             :     // non-adjacent).
     522           0 :     int n = character_ranges.length();
     523           0 :     int max = character_ranges[0].to();
     524           0 :     int i = 1;
     525           0 :     while (i < n) {
     526         210 :         CharacterRange current = character_ranges[i];
     527         105 :         if (current.from() <= max + 1) {
     528             :             break;
     529             :         }
     530          59 :         max = current.to();
     531          59 :         i++;
     532             :     }
     533             :     // Canonical until the i'th range. If that's all of them, we are done.
     534          46 :     if (i == n) return;
     535             : 
     536             :     // The ranges at index i and forward are not canonicalized. Make them so by
     537             :     // doing the equivalent of insertion sort (inserting each into the previous
     538             :     // list, in order).
     539             :     // Notice that inserting a range can reduce the number of ranges in the
     540             :     // result due to combining of adjacent and overlapping ranges.
     541          46 :     int read = i;  // Range to insert.
     542           0 :     size_t num_canonical = i;  // Length of canonicalized part of list.
     543             :     do {
     544           0 :         num_canonical = InsertRangeInCanonicalList(character_ranges,
     545             :                                                    num_canonical,
     546           0 :                                                    character_ranges[read]);
     547         107 :         read++;
     548           0 :     } while (read < n);
     549             : 
     550          72 :     while (character_ranges.length() > num_canonical)
     551             :         character_ranges.popBack();
     552             : 
     553          46 :     MOZ_ASSERT(CharacterRange::IsCanonical(character_ranges));
     554             : }
     555             : 
     556             : // -------------------------------------------------------------------
     557             : // SeqRegExpNode
     558             : 
     559             : class VisitMarker
     560             : {
     561             :   public:
     562        2734 :     explicit VisitMarker(NodeInfo* info)
     563           0 :       : info_(info)
     564             :     {
     565           0 :         MOZ_ASSERT(!info->visited);
     566        2734 :         info->visited = true;
     567           0 :     }
     568             :     ~VisitMarker() {
     569        2734 :         info_->visited = false;
     570             :     }
     571             :   private:
     572             :     NodeInfo* info_;
     573             : };
     574             : 
     575             : bool
     576           0 : SeqRegExpNode::FillInBMInfo(int offset,
     577             :                             int budget,
     578             :                             BoyerMooreLookahead* bm,
     579             :                             bool not_at_start)
     580             : {
     581           0 :     if (!bm->CheckOverRecursed())
     582             :         return false;
     583           0 :     if (!on_success_->FillInBMInfo(offset, budget - 1, bm, not_at_start))
     584             :         return false;
     585           0 :     if (offset == 0)
     586           0 :         set_bm_info(not_at_start, bm);
     587             :     return true;
     588             : }
     589             : 
     590             : RegExpNode*
     591           0 : SeqRegExpNode::FilterLATIN1(int depth, bool ignore_case, bool unicode)
     592             : {
     593        1949 :     if (info()->replacement_calculated)
     594           0 :         return replacement();
     595             : 
     596        1141 :     if (depth < 0)
     597           0 :         return this;
     598             : 
     599           0 :     MOZ_ASSERT(!info()->visited);
     600        2282 :     VisitMarker marker(info());
     601        2282 :     return FilterSuccessor(depth - 1, ignore_case, unicode);
     602             : }
     603             : 
     604             : RegExpNode*
     605           0 : SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case, bool unicode)
     606             : {
     607           0 :     RegExpNode* next = on_success_->FilterLATIN1(depth - 1, ignore_case, unicode);
     608        2130 :     if (next == nullptr)
     609           0 :         return set_replacement(nullptr);
     610             : 
     611        2130 :     on_success_ = next;
     612        4260 :     return set_replacement(this);
     613             : }
     614             : 
     615             : // -------------------------------------------------------------------
     616             : // ActionNode
     617             : 
     618             : int
     619           0 : ActionNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
     620             : {
     621           0 :     if (budget <= 0)
     622             :         return 0;
     623           0 :     if (action_type_ == POSITIVE_SUBMATCH_SUCCESS)
     624             :         return 0;  // Rewinds input!
     625           0 :     return on_success()->EatsAtLeast(still_to_find,
     626             :                                      budget - 1,
     627        2678 :                                      not_at_start);
     628             : }
     629             : 
     630             : bool
     631         279 : ActionNode::FillInBMInfo(int offset,
     632             :                          int budget,
     633             :                          BoyerMooreLookahead* bm,
     634             :                          bool not_at_start)
     635             : {
     636         279 :     if (!bm->CheckOverRecursed())
     637             :         return false;
     638             : 
     639         279 :     if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) {
     640         279 :         if (!on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start))
     641             :             return false;
     642             :     }
     643         279 :     SaveBMInfo(bm, not_at_start, offset);
     644             : 
     645             :     return true;
     646             : }
     647             : 
     648             : /* static */ ActionNode*
     649           0 : ActionNode::SetRegister(int reg,
     650             :                         int val,
     651             :                         RegExpNode* on_success)
     652             : {
     653           0 :     ActionNode* result = on_success->alloc()->newInfallible<ActionNode>(SET_REGISTER, on_success);
     654           1 :     result->data_.u_store_register.reg = reg;
     655          31 :     result->data_.u_store_register.value = val;
     656           0 :     return result;
     657             : }
     658             : 
     659             : /* static */ ActionNode*
     660           0 : ActionNode::IncrementRegister(int reg, RegExpNode* on_success)
     661             : {
     662           1 :     ActionNode* result = on_success->alloc()->newInfallible<ActionNode>(INCREMENT_REGISTER, on_success);
     663          31 :     result->data_.u_increment_register.reg = reg;
     664           0 :     return result;
     665             : }
     666             : 
     667             : /* static */ ActionNode*
     668           0 : ActionNode::StorePosition(int reg, bool is_capture, RegExpNode* on_success)
     669             : {
     670           0 :     ActionNode* result = on_success->alloc()->newInfallible<ActionNode>(STORE_POSITION, on_success);
     671           1 :     result->data_.u_position_register.reg = reg;
     672         928 :     result->data_.u_position_register.is_capture = is_capture;
     673           0 :     return result;
     674             : }
     675             : 
     676             : /* static */ ActionNode*
     677           0 : ActionNode::ClearCaptures(Interval range, RegExpNode* on_success)
     678             : {
     679           0 :     ActionNode* result = on_success->alloc()->newInfallible<ActionNode>(CLEAR_CAPTURES, on_success);
     680           1 :     result->data_.u_clear_captures.range_from = range.from();
     681          15 :     result->data_.u_clear_captures.range_to = range.to();
     682           0 :     return result;
     683             : }
     684             : 
     685             : /* static */ ActionNode*
     686           0 : ActionNode::BeginSubmatch(int stack_pointer_reg, int position_reg, RegExpNode* on_success)
     687             : {
     688           0 :     ActionNode* result = on_success->alloc()->newInfallible<ActionNode>(BEGIN_SUBMATCH, on_success);
     689           1 :     result->data_.u_submatch.stack_pointer_register = stack_pointer_reg;
     690          11 :     result->data_.u_submatch.current_position_register = position_reg;
     691           0 :     return result;
     692             : }
     693             : 
     694             : /* static */ ActionNode*
     695           0 : ActionNode::PositiveSubmatchSuccess(int stack_pointer_reg,
     696             :                                     int restore_reg,
     697             :                                     int clear_capture_count,
     698             :                                     int clear_capture_from,
     699             :                                     RegExpNode* on_success)
     700             : {
     701           0 :     ActionNode* result = on_success->alloc()->newInfallible<ActionNode>(POSITIVE_SUBMATCH_SUCCESS, on_success);
     702           0 :     result->data_.u_submatch.stack_pointer_register = stack_pointer_reg;
     703           0 :     result->data_.u_submatch.current_position_register = restore_reg;
     704           1 :     result->data_.u_submatch.clear_register_count = clear_capture_count;
     705          10 :     result->data_.u_submatch.clear_register_from = clear_capture_from;
     706           0 :     return result;
     707             : }
     708             : 
     709             : /* static */ ActionNode*
     710           0 : ActionNode::EmptyMatchCheck(int start_register,
     711             :                             int repetition_register,
     712             :                             int repetition_limit,
     713             :                             RegExpNode* on_success)
     714             : {
     715           0 :     ActionNode* result = on_success->alloc()->newInfallible<ActionNode>(EMPTY_MATCH_CHECK, on_success);
     716           0 :     result->data_.u_empty_match_check.start_register = start_register;
     717           0 :     result->data_.u_empty_match_check.repetition_register = repetition_register;
     718           0 :     result->data_.u_empty_match_check.repetition_limit = repetition_limit;
     719           0 :     return result;
     720             : }
     721             : 
     722             : // -------------------------------------------------------------------
     723             : // TextNode
     724             : 
     725             : int
     726           0 : TextNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
     727             : {
     728        1745 :     int answer = Length();
     729           0 :     if (answer >= still_to_find)
     730             :         return answer;
     731         400 :     if (budget <= 0)
     732             :         return answer;
     733             : 
     734             :     // We are not at start after this node so we set the last argument to 'true'.
     735           0 :     return answer + on_success()->EatsAtLeast(still_to_find - answer,
     736             :                                               budget - 1,
     737         800 :                                               true);
     738             : }
     739             : 
     740             : int
     741           0 : TextNode::GreedyLoopTextLength()
     742             : {
     743         891 :     TextElement elm = elements()[elements().length() - 1];
     744         297 :     return elm.cp_offset() + elm.length();
     745             : }
     746             : 
     747             : RegExpNode*
     748           0 : TextNode::FilterLATIN1(int depth, bool ignore_case, bool unicode)
     749             : {
     750        1018 :     if (info()->replacement_calculated)
     751           0 :         return replacement();
     752             : 
     753         989 :     if (depth < 0)
     754           0 :         return this;
     755             : 
     756           0 :     MOZ_ASSERT(!info()->visited);
     757           0 :     VisitMarker marker(info());
     758           0 :     int element_count = elements().length();
     759           0 :     for (int i = 0; i < element_count; i++) {
     760           0 :         TextElement elm = elements()[i];
     761           0 :         if (elm.text_type() == TextElement::ATOM) {
     762           0 :             CharacterVector& quarks = const_cast<CharacterVector&>(elm.atom()->data());
     763           0 :             for (size_t j = 0; j < quarks.length(); j++) {
     764       21008 :                 uint16_t c = quarks[j];
     765           0 :                 if (c <= kMaxOneByteCharCode)
     766             :                     continue;
     767           0 :                 if (!ignore_case)
     768           0 :                     return set_replacement(nullptr);
     769             : 
     770             :                 // Here, we need to check for characters whose upper and lower cases
     771             :                 // are outside the Latin-1 range.
     772           0 :                 char16_t converted = ConvertNonLatin1ToLatin1(c, unicode);
     773           0 :                 if (converted == 0) {
     774             :                     // Character is outside Latin-1 completely
     775           0 :                     return set_replacement(nullptr);
     776             :                 }
     777             : 
     778             :                 // Convert quark to Latin-1 in place.
     779           0 :                 quarks[j] = converted;
     780             :             }
     781             :         } else {
     782         190 :             MOZ_ASSERT(elm.text_type() == TextElement::CHAR_CLASS);
     783           0 :             RegExpCharacterClass* cc = elm.char_class();
     784             : 
     785           0 :             CharacterRangeVector& ranges = cc->ranges(alloc());
     786         190 :             if (!CharacterRange::IsCanonical(ranges))
     787          18 :                 CharacterRange::Canonicalize(ranges);
     788             : 
     789             :             // Now they are in order so we only need to look at the first.
     790           0 :             int range_count = ranges.length();
     791           0 :             if (cc->is_negated()) {
     792           0 :                 if (range_count != 0 &&
     793          26 :                     ranges[0].from() == 0 &&
     794           0 :                     ranges[0].to() >= kMaxOneByteCharCode)
     795             :                 {
     796             :                     // This will be handled in a later filter.
     797           0 :                     if (ignore_case && RangesContainLatin1Equivalents(ranges, unicode))
     798           0 :                         continue;
     799           0 :                     return set_replacement(nullptr);
     800             :                 }
     801             :             } else {
     802         354 :                 if (range_count == 0 ||
     803         177 :                     ranges[0].from() > kMaxOneByteCharCode)
     804             :                 {
     805             :                     // This will be handled in a later filter.
     806           0 :                     if (ignore_case && RangesContainLatin1Equivalents(ranges, unicode))
     807             :                         continue;
     808           0 :                     return set_replacement(nullptr);
     809             :                 }
     810             :             }
     811             :         }
     812             :     }
     813         989 :     return FilterSuccessor(depth - 1, ignore_case, unicode);
     814             : }
     815             : 
     816             : void
     817           0 : TextNode::CalculateOffsets()
     818             : {
     819        2156 :     int element_count = elements().length();
     820             : 
     821             :     // Set up the offsets of the elements relative to the start.  This is a fixed
     822             :     // quantity since a TextNode can only contain fixed-width things.
     823           0 :     int cp_offset = 0;
     824           0 :     for (int i = 0; i < element_count; i++) {
     825           0 :         TextElement& elm = elements()[i];
     826        2216 :         elm.set_cp_offset(cp_offset);
     827           0 :         cp_offset += elm.length();
     828             :     }
     829           0 : }
     830             : 
     831           0 : void TextNode::MakeCaseIndependent(bool is_latin1, bool unicode)
     832             : {
     833           0 :     int element_count = elements().length();
     834           0 :     for (int i = 0; i < element_count; i++) {
     835           0 :         TextElement elm = elements()[i];
     836          56 :         if (elm.text_type() == TextElement::CHAR_CLASS) {
     837          30 :             RegExpCharacterClass* cc = elm.char_class();
     838             : 
     839             :             // None of the standard character classes is different in the case
     840             :             // independent case and it slows us down if we don't know that.
     841          30 :             if (cc->is_standard(alloc()))
     842           3 :                 continue;
     843             : 
     844             :             // Similarly, there's nothing to do for the character class
     845             :             // containing all characters except line terminators and surrogates.
     846             :             // This one is added by UnicodeEverythingAtom.
     847          54 :             CharacterRangeVector& ranges = cc->ranges(alloc());
     848          27 :             if (CompareInverseRanges(ranges,
     849             :                                      kLineTerminatorAndSurrogateRanges,
     850             :                                      kLineTerminatorAndSurrogateRangeCount))
     851             :             {
     852             :                 continue;
     853             :             }
     854             : 
     855           0 :             int range_count = ranges.length();
     856         110 :             for (int j = 0; j < range_count; j++)
     857         166 :                 ranges[j].AddCaseEquivalents(is_latin1, unicode, &ranges);
     858             :         }
     859             :     }
     860          56 : }
     861             : 
     862             : // -------------------------------------------------------------------
     863             : // AssertionNode
     864             : 
     865             : int
     866           0 : AssertionNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
     867             : {
     868         322 :     if (budget <= 0)
     869             :         return 0;
     870             : 
     871             :     // If we know we are not at the start and we are asked "how many characters
     872             :     // will you match if you succeed?" then we can answer anything since false
     873             :     // implies false.  So lets just return the max answer (still_to_find) since
     874             :     // that won't prevent us from preloading a lot of characters for the other
     875             :     // branches in the node graph.
     876         322 :     if (assertion_type() == AT_START && not_at_start)
     877             :         return still_to_find;
     878             : 
     879         284 :     return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start);
     880             : }
     881             : 
     882             : bool
     883           0 : AssertionNode::FillInBMInfo(int offset, int budget, BoyerMooreLookahead* bm, bool not_at_start)
     884             : {
     885          23 :     if (!bm->CheckOverRecursed())
     886             :         return false;
     887             : 
     888             :     // Match the behaviour of EatsAtLeast on this node.
     889          23 :     if (assertion_type() == AT_START && not_at_start)
     890             :         return true;
     891             : 
     892           0 :     if (!on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start))
     893             :         return false;
     894          23 :     SaveBMInfo(bm, not_at_start, offset);
     895             :     return true;
     896             : }
     897             : 
     898             : // -------------------------------------------------------------------
     899             : // BackReferenceNode
     900             : 
     901             : int
     902           0 : BackReferenceNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
     903             : {
     904           0 :     if (budget <= 0)
     905             :         return 0;
     906           0 :     return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start);
     907             : }
     908             : 
     909             : bool
     910           0 : BackReferenceNode::FillInBMInfo(int offset, int budget, BoyerMooreLookahead* bm, bool not_at_start)
     911             : {
     912             :     // Working out the set of characters that a backreference can match is too
     913             :     // hard, so we just say that any character can match.
     914           0 :     bm->SetRest(offset);
     915           0 :     SaveBMInfo(bm, not_at_start, offset);
     916           0 :     return true;
     917             : }
     918             : 
     919             : // -------------------------------------------------------------------
     920             : // ChoiceNode
     921             : 
     922             : int
     923         651 : ChoiceNode::EatsAtLeastHelper(int still_to_find,
     924             :                               int budget,
     925             :                               RegExpNode* ignore_this_node,
     926             :                               bool not_at_start)
     927             : {
     928         651 :     if (budget <= 0)
     929             :         return 0;
     930             : 
     931           0 :     int min = 100;
     932           0 :     size_t choice_count = alternatives().length();
     933           0 :     budget = (budget - 1) / choice_count;
     934           0 :     for (size_t i = 0; i < choice_count; i++) {
     935        4536 :         RegExpNode* node = alternatives()[i].node();
     936           0 :         if (node == ignore_this_node) continue;
     937             :         int node_eats_at_least =
     938           0 :             node->EatsAtLeast(still_to_find, budget, not_at_start);
     939           0 :         if (node_eats_at_least < min)
     940         808 :             min = node_eats_at_least;
     941        1935 :         if (min == 0)
     942             :             return 0;
     943             :     }
     944             :     return min;
     945             : }
     946             : 
     947             : int
     948           0 : ChoiceNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
     949             : {
     950         316 :     return EatsAtLeastHelper(still_to_find,
     951             :                              budget,
     952             :                              nullptr,
     953         632 :                              not_at_start);
     954             : }
     955             : 
     956             : void
     957         323 : ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
     958             :                                  RegExpCompiler* compiler,
     959             :                                  int characters_filled_in,
     960             :                                  bool not_at_start)
     961             : {
     962           0 :     not_at_start = (not_at_start || not_at_start_);
     963           0 :     int choice_count = alternatives().length();
     964         323 :     MOZ_ASSERT(choice_count > 0);
     965         969 :     alternatives()[0].node()->GetQuickCheckDetails(details,
     966             :                                                    compiler,
     967             :                                                    characters_filled_in,
     968           0 :                                                    not_at_start);
     969           0 :     for (int i = 1; i < choice_count; i++) {
     970        1910 :         QuickCheckDetails new_details(details->characters());
     971        1910 :         RegExpNode* node = alternatives()[i].node();
     972             :         node->GetQuickCheckDetails(&new_details, compiler,
     973             :                                    characters_filled_in,
     974           0 :                                    not_at_start);
     975             :         // Here we merge the quick match details of the two branches.
     976           0 :         details->Merge(&new_details, characters_filled_in);
     977             :     }
     978         323 : }
     979             : 
     980             : bool
     981          69 : ChoiceNode::FillInBMInfo(int offset,
     982             :                          int budget,
     983             :                          BoyerMooreLookahead* bm,
     984             :                          bool not_at_start)
     985             : {
     986          69 :     if (!bm->CheckOverRecursed())
     987             :         return false;
     988             : 
     989           0 :     const GuardedAlternativeVector& alts = alternatives();
     990           0 :     budget = (budget - 1) / alts.length();
     991           0 :     for (size_t i = 0; i < alts.length(); i++) {
     992           0 :         const GuardedAlternative& alt = alts[i];
     993           0 :         if (alt.guards() != nullptr && alt.guards()->length() != 0) {
     994           3 :             bm->SetRest(offset);  // Give up trying to fill in info.
     995           3 :             SaveBMInfo(bm, not_at_start, offset);
     996             :             return true;
     997             :         }
     998         378 :         if (!alt.node()->FillInBMInfo(offset, budget, bm, not_at_start))
     999             :             return false;
    1000             :     }
    1001          66 :     SaveBMInfo(bm, not_at_start, offset);
    1002             :     return true;
    1003             : }
    1004             : 
    1005             : RegExpNode*
    1006           0 : ChoiceNode::FilterLATIN1(int depth, bool ignore_case, bool unicode)
    1007             : {
    1008           0 :     if (info()->replacement_calculated)
    1009           1 :         return replacement();
    1010           0 :     if (depth < 0)
    1011           0 :         return this;
    1012           0 :     if (info()->visited)
    1013           0 :         return this;
    1014         546 :     VisitMarker marker(info());
    1015           0 :     int choice_count = alternatives().length();
    1016             : 
    1017           0 :     for (int i = 0; i < choice_count; i++) {
    1018           0 :         const GuardedAlternative alternative = alternatives()[i];
    1019           0 :         if (alternative.guards() != nullptr && alternative.guards()->length() != 0) {
    1020          46 :             set_replacement(this);
    1021          23 :             return this;
    1022             :         }
    1023             :     }
    1024             : 
    1025             :     int surviving = 0;
    1026             :     RegExpNode* survivor = nullptr;
    1027        2246 :     for (int i = 0; i < choice_count; i++) {
    1028           0 :         GuardedAlternative alternative = alternatives()[i];
    1029             :         RegExpNode* replacement =
    1030           0 :             alternative.node()->FilterLATIN1(depth - 1, ignore_case, unicode);
    1031           0 :         MOZ_ASSERT(replacement != this);  // No missing EMPTY_MATCH_CHECK.
    1032           0 :         if (replacement != nullptr) {
    1033           0 :             alternatives()[i].set_node(replacement);
    1034         998 :             surviving++;
    1035         998 :             survivor = replacement;
    1036             :         }
    1037             :     }
    1038         250 :     if (surviving < 2)
    1039           0 :         return set_replacement(survivor);
    1040             : 
    1041         500 :     set_replacement(this);
    1042         250 :     if (surviving == choice_count)
    1043             :         return this;
    1044             : 
    1045             :     // Only some of the nodes survived the filtering.  We need to rebuild the
    1046             :     // alternatives list.
    1047           0 :     GuardedAlternativeVector new_alternatives(*alloc());
    1048           0 :     new_alternatives.reserve(surviving);
    1049           0 :     for (int i = 0; i < choice_count; i++) {
    1050             :         RegExpNode* replacement =
    1051           0 :             alternatives()[i].node()->FilterLATIN1(depth - 1, ignore_case, unicode);
    1052           0 :         if (replacement != nullptr) {
    1053           0 :             alternatives()[i].set_node(replacement);
    1054           0 :             new_alternatives.append(alternatives()[i]);
    1055             :         }
    1056             :     }
    1057             : 
    1058           0 :     alternatives_ = std::move(new_alternatives);
    1059           0 :     return this;
    1060             : }
    1061             : 
    1062             : // -------------------------------------------------------------------
    1063             : // NegativeLookaheadChoiceNode
    1064             : 
    1065             : bool
    1066           0 : NegativeLookaheadChoiceNode::FillInBMInfo(int offset,
    1067             :                                           int budget,
    1068             :                                           BoyerMooreLookahead* bm,
    1069             :                                           bool not_at_start)
    1070             : {
    1071           0 :     if (!bm->CheckOverRecursed())
    1072             :         return false;
    1073             : 
    1074           0 :     if (!alternatives()[1].node()->FillInBMInfo(offset, budget - 1, bm, not_at_start))
    1075             :         return false;
    1076           0 :     if (offset == 0)
    1077           0 :         set_bm_info(not_at_start, bm);
    1078             :     return true;
    1079             : }
    1080             : 
    1081             : int
    1082           0 : NegativeLookaheadChoiceNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
    1083             : {
    1084           3 :     if (budget <= 0)
    1085             :         return 0;
    1086             : 
    1087             :     // Alternative 0 is the negative lookahead, alternative 1 is what comes
    1088             :     // afterwards.
    1089           6 :     RegExpNode* node = alternatives()[1].node();
    1090           3 :     return node->EatsAtLeast(still_to_find, budget - 1, not_at_start);
    1091             : }
    1092             : 
    1093             : void
    1094           0 : NegativeLookaheadChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
    1095             :                                                   RegExpCompiler* compiler,
    1096             :                                                   int filled_in,
    1097             :                                                   bool not_at_start)
    1098             : {
    1099             :     // Alternative 0 is the negative lookahead, alternative 1 is what comes
    1100             :     // afterwards.
    1101           0 :     RegExpNode* node = alternatives()[1].node();
    1102           0 :     return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);
    1103             : }
    1104             : 
    1105             : RegExpNode*
    1106           0 : NegativeLookaheadChoiceNode::FilterLATIN1(int depth, bool ignore_case, bool unicode)
    1107             : {
    1108           0 :     if (info()->replacement_calculated)
    1109           0 :         return replacement();
    1110           0 :     if (depth < 0)
    1111           0 :         return this;
    1112           1 :     if (info()->visited)
    1113           0 :         return this;
    1114             : 
    1115           2 :     VisitMarker marker(info());
    1116             : 
    1117             :     // Alternative 0 is the negative lookahead, alternative 1 is what comes
    1118             :     // afterwards.
    1119           2 :     RegExpNode* node = alternatives()[1].node();
    1120           0 :     RegExpNode* replacement = node->FilterLATIN1(depth - 1, ignore_case, unicode);
    1121             : 
    1122           0 :     if (replacement == nullptr)
    1123           0 :         return set_replacement(nullptr);
    1124           0 :     alternatives()[1].set_node(replacement);
    1125             : 
    1126           2 :     RegExpNode* neg_node = alternatives()[0].node();
    1127           1 :     RegExpNode* neg_replacement = neg_node->FilterLATIN1(depth - 1, ignore_case, unicode);
    1128             : 
    1129             :     // If the negative lookahead is always going to fail then
    1130             :     // we don't need to check it.
    1131           1 :     if (neg_replacement == nullptr)
    1132           0 :         return set_replacement(replacement);
    1133             : 
    1134           3 :     alternatives()[0].set_node(neg_replacement);
    1135           2 :     return set_replacement(this);
    1136             : }
    1137             : 
    1138             : // -------------------------------------------------------------------
    1139             : // LoopChoiceNode
    1140             : 
    1141             : void
    1142           0 : GuardedAlternative::AddGuard(LifoAlloc* alloc, Guard* guard)
    1143             : {
    1144           0 :     if (guards_ == nullptr)
    1145           0 :         guards_ = alloc->newInfallible<GuardVector>(*alloc);
    1146          47 :     guards_->append(guard);
    1147          47 : }
    1148             : 
    1149             : void
    1150           0 : LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt)
    1151             : {
    1152           0 :     MOZ_ASSERT(loop_node_ == nullptr);
    1153           0 :     AddAlternative(alt);
    1154         165 :     loop_node_ = alt.node();
    1155         165 : }
    1156             : 
    1157             : 
    1158             : void
    1159           0 : LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt)
    1160             : {
    1161           0 :     MOZ_ASSERT(continue_node_ == nullptr);
    1162           0 :     AddAlternative(alt);
    1163         165 :     continue_node_ = alt.node();
    1164         165 : }
    1165             : 
    1166             : int
    1167           0 : LoopChoiceNode::EatsAtLeast(int still_to_find,  int budget, bool not_at_start)
    1168             : {
    1169         670 :     return EatsAtLeastHelper(still_to_find,
    1170             :                              budget - 1,
    1171             :                              loop_node_,
    1172         670 :                              not_at_start);
    1173             : }
    1174             : 
    1175             : void
    1176         286 : LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
    1177             :                                      RegExpCompiler* compiler,
    1178             :                                      int characters_filled_in,
    1179             :                                      bool not_at_start)
    1180             : {
    1181           0 :     if (body_can_be_zero_length_ || info()->visited)
    1182             :         return;
    1183         384 :     VisitMarker marker(info());
    1184         192 :     return ChoiceNode::GetQuickCheckDetails(details,
    1185             :                                             compiler,
    1186             :                                             characters_filled_in,
    1187         192 :                                             not_at_start);
    1188             : }
    1189             : 
    1190             : bool
    1191          33 : LoopChoiceNode::FillInBMInfo(int offset,
    1192             :                              int budget,
    1193             :                              BoyerMooreLookahead* bm,
    1194             :                              bool not_at_start)
    1195             : {
    1196           0 :     if (body_can_be_zero_length_ || budget <= 0) {
    1197           1 :         bm->SetRest(offset);
    1198           1 :         SaveBMInfo(bm, not_at_start, offset);
    1199             :         return true;
    1200             :     }
    1201           0 :     if (!ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start))
    1202             :         return false;
    1203          32 :     SaveBMInfo(bm, not_at_start, offset);
    1204             :     return true;
    1205             : }
    1206             : 
    1207             : RegExpNode*
    1208           0 : LoopChoiceNode::FilterLATIN1(int depth, bool ignore_case, bool unicode)
    1209             : {
    1210           0 :     if (info()->replacement_calculated)
    1211           1 :         return replacement();
    1212           0 :     if (depth < 0)
    1213           0 :         return this;
    1214         253 :     if (info()->visited)
    1215         115 :         return this;
    1216             : 
    1217             :     {
    1218         414 :         VisitMarker marker(info());
    1219             : 
    1220             :         RegExpNode* continue_replacement =
    1221         138 :             continue_node_->FilterLATIN1(depth - 1, ignore_case, unicode);
    1222             : 
    1223             :         // If we can't continue after the loop then there is no sense in doing the
    1224             :         // loop.
    1225         138 :         if (continue_replacement == nullptr)
    1226           0 :             return set_replacement(nullptr);
    1227             :     }
    1228             : 
    1229         138 :     return ChoiceNode::FilterLATIN1(depth - 1, ignore_case, unicode);
    1230             : }
    1231             : 
    1232             : // -------------------------------------------------------------------
    1233             : // Analysis
    1234             : 
    1235             : void
    1236           0 : Analysis::EnsureAnalyzed(RegExpNode* that)
    1237             : {
    1238        3607 :     if (!CheckRecursionLimit(cx)) {
    1239           0 :         failASCII("Stack overflow");
    1240             :         return;
    1241             :     }
    1242             : 
    1243           0 :     if (that->info()->been_analyzed || that->info()->being_analyzed)
    1244             :         return;
    1245           0 :     that->info()->being_analyzed = true;
    1246           0 :     that->Accept(this);
    1247        2793 :     that->info()->being_analyzed = false;
    1248        2793 :     that->info()->been_analyzed = true;
    1249             : }
    1250             : 
    1251             : void
    1252         150 : Analysis::VisitEnd(EndNode* that)
    1253             : {
    1254             :     // nothing to do
    1255         150 : }
    1256             : 
    1257             : void
    1258           0 : Analysis::VisitText(TextNode* that)
    1259             : {
    1260           0 :     if (ignore_case_)
    1261           0 :         that->MakeCaseIndependent(is_latin1_, unicode_);
    1262           0 :     EnsureAnalyzed(that->on_success());
    1263        1078 :     if (!has_failed()) {
    1264           0 :         that->CalculateOffsets();
    1265             :     }
    1266        1078 : }
    1267             : 
    1268             : void
    1269           0 : Analysis::VisitAction(ActionNode* that)
    1270             : {
    1271        1026 :     RegExpNode* target = that->on_success();
    1272           0 :     EnsureAnalyzed(target);
    1273             : 
    1274        1026 :     if (!has_failed()) {
    1275             :         // If the next node is interested in what it follows then this node
    1276             :         // has to be interested too so it can pass the information on.
    1277           0 :         that->info()->AddFromFollowing(target->info());
    1278             :     }
    1279        1026 : }
    1280             : 
    1281             : void
    1282           0 : Analysis::VisitChoice(ChoiceNode* that)
    1283             : {
    1284           0 :     NodeInfo* info = that->info();
    1285             : 
    1286           0 :     for (size_t i = 0; i < that->alternatives().length(); i++) {
    1287           0 :         RegExpNode* node = that->alternatives()[i].node();
    1288         796 :         EnsureAnalyzed(node);
    1289         796 :         if (has_failed()) return;
    1290             : 
    1291             :         // Anything the following nodes need to know has to be known by
    1292             :         // this node also, so it can pass it on.
    1293        1592 :         info->AddFromFollowing(node->info());
    1294             :     }
    1295             : }
    1296             : 
    1297             : void
    1298           0 : Analysis::VisitLoopChoice(LoopChoiceNode* that)
    1299             : {
    1300           0 :     NodeInfo* info = that->info();
    1301           0 :     for (size_t i = 0; i < that->alternatives().length(); i++) {
    1302           0 :         RegExpNode* node = that->alternatives()[i].node();
    1303           0 :         if (node != that->loop_node()) {
    1304           0 :             EnsureAnalyzed(node);
    1305         165 :             if (has_failed()) return;
    1306         165 :             info->AddFromFollowing(node->info());
    1307             :         }
    1308             :     }
    1309             : 
    1310             :     // Check the loop last since it may need the value of this node
    1311             :     // to get a correct result.
    1312           0 :     EnsureAnalyzed(that->loop_node());
    1313         165 :     if (!has_failed())
    1314         165 :         info->AddFromFollowing(that->loop_node()->info());
    1315             : }
    1316             : 
    1317             : void
    1318           0 : Analysis::VisitBackReference(BackReferenceNode* that)
    1319             : {
    1320           0 :     EnsureAnalyzed(that->on_success());
    1321           0 : }
    1322             : 
    1323             : void
    1324           0 : Analysis::VisitAssertion(AssertionNode* that)
    1325             : {
    1326         228 :     EnsureAnalyzed(that->on_success());
    1327         228 : }
    1328             : 
    1329             : // -------------------------------------------------------------------
    1330             : // Implementation of the Irregexp regular expression engine.
    1331             : //
    1332             : // The Irregexp regular expression engine is intended to be a complete
    1333             : // implementation of ECMAScript regular expressions.  It generates either
    1334             : // bytecodes or native code.
    1335             : 
    1336             : //   The Irregexp regexp engine is structured in three steps.
    1337             : //   1) The parser generates an abstract syntax tree.  See RegExpAST.cpp.
    1338             : //   2) From the AST a node network is created.  The nodes are all
    1339             : //      subclasses of RegExpNode.  The nodes represent states when
    1340             : //      executing a regular expression.  Several optimizations are
    1341             : //      performed on the node network.
    1342             : //   3) From the nodes we generate either byte codes or native code
    1343             : //      that can actually execute the regular expression (perform
    1344             : //      the search).  The code generation step is described in more
    1345             : //      detail below.
    1346             : 
    1347             : // Code generation.
    1348             : //
    1349             : //   The nodes are divided into four main categories.
    1350             : //   * Choice nodes
    1351             : //        These represent places where the regular expression can
    1352             : //        match in more than one way.  For example on entry to an
    1353             : //        alternation (foo|bar) or a repetition (*, +, ? or {}).
    1354             : //   * Action nodes
    1355             : //        These represent places where some action should be
    1356             : //        performed.  Examples include recording the current position
    1357             : //        in the input string to a register (in order to implement
    1358             : //        captures) or other actions on register for example in order
    1359             : //        to implement the counters needed for {} repetitions.
    1360             : //   * Matching nodes
    1361             : //        These attempt to match some element part of the input string.
    1362             : //        Examples of elements include character classes, plain strings
    1363             : //        or back references.
    1364             : //   * End nodes
    1365             : //        These are used to implement the actions required on finding
    1366             : //        a successful match or failing to find a match.
    1367             : //
    1368             : //   The code generated (whether as byte codes or native code) maintains
    1369             : //   some state as it runs.  This consists of the following elements:
    1370             : //
    1371             : //   * The capture registers.  Used for string captures.
    1372             : //   * Other registers.  Used for counters etc.
    1373             : //   * The current position.
    1374             : //   * The stack of backtracking information.  Used when a matching node
    1375             : //     fails to find a match and needs to try an alternative.
    1376             : //
    1377             : // Conceptual regular expression execution model:
    1378             : //
    1379             : //   There is a simple conceptual model of regular expression execution
    1380             : //   which will be presented first.  The actual code generated is a more
    1381             : //   efficient simulation of the simple conceptual model:
    1382             : //
    1383             : //   * Choice nodes are implemented as follows:
    1384             : //     For each choice except the last {
    1385             : //       push current position
    1386             : //       push backtrack code location
    1387             : //       <generate code to test for choice>
    1388             : //       backtrack code location:
    1389             : //       pop current position
    1390             : //     }
    1391             : //     <generate code to test for last choice>
    1392             : //
    1393             : //   * Actions nodes are generated as follows
    1394             : //     <push affected registers on backtrack stack>
    1395             : //     <generate code to perform action>
    1396             : //     push backtrack code location
    1397             : //     <generate code to test for following nodes>
    1398             : //     backtrack code location:
    1399             : //     <pop affected registers to restore their state>
    1400             : //     <pop backtrack location from stack and go to it>
    1401             : //
    1402             : //   * Matching nodes are generated as follows:
    1403             : //     if input string matches at current position
    1404             : //       update current position
    1405             : //       <generate code to test for following nodes>
    1406             : //     else
    1407             : //       <pop backtrack location from stack and go to it>
    1408             : //
    1409             : //   Thus it can be seen that the current position is saved and restored
    1410             : //   by the choice nodes, whereas the registers are saved and restored by
    1411             : //   by the action nodes that manipulate them.
    1412             : //
    1413             : //   The other interesting aspect of this model is that nodes are generated
    1414             : //   at the point where they are needed by a recursive call to Emit().  If
    1415             : //   the node has already been code generated then the Emit() call will
    1416             : //   generate a jump to the previously generated code instead.  In order to
    1417             : //   limit recursion it is possible for the Emit() function to put the node
    1418             : //   on a work list for later generation and instead generate a jump.  The
    1419             : //   destination of the jump is resolved later when the code is generated.
    1420             : //
    1421             : // Actual regular expression code generation.
    1422             : //
    1423             : //   Code generation is actually more complicated than the above.  In order
    1424             : //   to improve the efficiency of the generated code some optimizations are
    1425             : //   performed
    1426             : //
    1427             : //   * Choice nodes have 1-character lookahead.
    1428             : //     A choice node looks at the following character and eliminates some of
    1429             : //     the choices immediately based on that character.  This is not yet
    1430             : //     implemented.
    1431             : //   * Simple greedy loops store reduced backtracking information.
    1432             : //     A quantifier like /.*foo/m will greedily match the whole input.  It will
    1433             : //     then need to backtrack to a point where it can match "foo".  The naive
    1434             : //     implementation of this would push each character position onto the
    1435             : //     backtracking stack, then pop them off one by one.  This would use space
    1436             : //     proportional to the length of the input string.  However since the "."
    1437             : //     can only match in one way and always has a constant length (in this case
    1438             : //     of 1) it suffices to store the current position on the top of the stack
    1439             : //     once.  Matching now becomes merely incrementing the current position and
    1440             : //     backtracking becomes decrementing the current position and checking the
    1441             : //     result against the stored current position.  This is faster and saves
    1442             : //     space.
    1443             : //   * The current state is virtualized.
    1444             : //     This is used to defer expensive operations until it is clear that they
    1445             : //     are needed and to generate code for a node more than once, allowing
    1446             : //     specialized an efficient versions of the code to be created. This is
    1447             : //     explained in the section below.
    1448             : //
    1449             : // Execution state virtualization.
    1450             : //
    1451             : //   Instead of emitting code, nodes that manipulate the state can record their
    1452             : //   manipulation in an object called the Trace.  The Trace object can record a
    1453             : //   current position offset, an optional backtrack code location on the top of
    1454             : //   the virtualized backtrack stack and some register changes.  When a node is
    1455             : //   to be emitted it can flush the Trace or update it.  Flushing the Trace
    1456             : //   will emit code to bring the actual state into line with the virtual state.
    1457             : //   Avoiding flushing the state can postpone some work (e.g. updates of capture
    1458             : //   registers).  Postponing work can save time when executing the regular
    1459             : //   expression since it may be found that the work never has to be done as a
    1460             : //   failure to match can occur.  In addition it is much faster to jump to a
    1461             : //   known backtrack code location than it is to pop an unknown backtrack
    1462             : //   location from the stack and jump there.
    1463             : //
    1464             : //   The virtual state found in the Trace affects code generation.  For example
    1465             : //   the virtual state contains the difference between the actual current
    1466             : //   position and the virtual current position, and matching code needs to use
    1467             : //   this offset to attempt a match in the correct location of the input
    1468             : //   string.  Therefore code generated for a non-trivial trace is specialized
    1469             : //   to that trace.  The code generator therefore has the ability to generate
    1470             : //   code for each node several times.  In order to limit the size of the
    1471             : //   generated code there is an arbitrary limit on how many specialized sets of
    1472             : //   code may be generated for a given node.  If the limit is reached, the
    1473             : //   trace is flushed and a generic version of the code for a node is emitted.
    1474             : //   This is subsequently used for that node.  The code emitted for non-generic
    1475             : //   trace is not recorded in the node and so it cannot currently be reused in
    1476             : //   the event that code generation is requested for an identical trace.
    1477             : 
    1478             : /* static */ TextElement
    1479           0 : TextElement::Atom(RegExpAtom* atom)
    1480             : {
    1481         922 :     return TextElement(ATOM, atom);
    1482             : }
    1483             : 
    1484             : /* static */ TextElement
    1485           0 : TextElement::CharClass(RegExpCharacterClass* char_class)
    1486             : {
    1487         273 :     return TextElement(CHAR_CLASS, char_class);
    1488             : }
    1489             : 
    1490             : int
    1491           0 : TextElement::length() const
    1492             : {
    1493           0 :     switch (text_type()) {
    1494             :       case ATOM:
    1495       10034 :         return atom()->length();
    1496             :       case CHAR_CLASS:
    1497             :         return 1;
    1498             :     }
    1499           0 :     MOZ_CRASH("Bad text type");
    1500             : }
    1501             : 
    1502             : class FrequencyCollator
    1503             : {
    1504             :   public:
    1505           0 :     FrequencyCollator() : total_samples_(0) {
    1506       19221 :         for (int i = 0; i < RegExpMacroAssembler::kTableSize; i++) {
    1507       19072 :             frequencies_[i] = CharacterFrequency(i);
    1508             :         }
    1509             :     }
    1510             : 
    1511             :     void CountCharacter(int character) {
    1512           0 :         int index = (character & RegExpMacroAssembler::kTableMask);
    1513        8700 :         frequencies_[index].Increment();
    1514        4350 :         total_samples_++;
    1515             :     }
    1516             : 
    1517             :     // Does not measure in percent, but rather per-128 (the table size from the
    1518             :     // regexp macro assembler).
    1519           0 :     int Frequency(int in_character) {
    1520        1068 :         MOZ_ASSERT((in_character & RegExpMacroAssembler::kTableMask) == in_character);
    1521           0 :         if (total_samples_ < 1) return 1;  // Division by zero.
    1522             :         int freq_in_per128 =
    1523        1065 :             (frequencies_[in_character].counter() * 128) / total_samples_;
    1524        1065 :         return freq_in_per128;
    1525             :     }
    1526             : 
    1527             :   private:
    1528             :     class CharacterFrequency {
    1529             :       public:
    1530       19072 :         CharacterFrequency() : counter_(0), character_(-1) { }
    1531             :         explicit CharacterFrequency(int character)
    1532             :           : counter_(0), character_(character)
    1533             :         {}
    1534             : 
    1535        4350 :         void Increment() { counter_++; }
    1536             :         int counter() { return counter_; }
    1537             :         int character() { return character_; }
    1538             : 
    1539             :      private:
    1540             :         int counter_;
    1541             :         int character_;
    1542             :     };
    1543             : 
    1544             :   private:
    1545             :     CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];
    1546             :     int total_samples_;
    1547             : };
    1548             : 
    1549         149 : class irregexp::RegExpCompiler
    1550             : {
    1551             :   public:
    1552             :     RegExpCompiler(JSContext* cx, LifoAlloc* alloc, int capture_count,
    1553             :                    bool ignore_case, bool is_latin1, bool match_only, bool unicode);
    1554             : 
    1555             :     int AllocateRegister() {
    1556          53 :         if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
    1557           0 :             reg_exp_too_big_ = true;
    1558             :             return next_register_;
    1559             :         }
    1560          53 :         return next_register_++;
    1561             :     }
    1562             : 
    1563             :     RegExpCode Assemble(JSContext* cx,
    1564             :                         RegExpMacroAssembler* assembler,
    1565             :                         RegExpNode* start,
    1566             :                         int capture_count);
    1567             : 
    1568           0 :     inline void AddWork(RegExpNode* node) {
    1569           0 :         AutoEnterOOMUnsafeRegion oomUnsafe;
    1570           0 :         if (!work_list_.append(node))
    1571           0 :             oomUnsafe.crash("AddWork");
    1572           0 :     }
    1573             : 
    1574             :     static const int kImplementationOffset = 0;
    1575             :     static const int kNumberOfRegistersOffset = 0;
    1576             :     static const int kCodeOffset = 1;
    1577             : 
    1578             :     RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
    1579             :     EndNode* accept() { return accept_; }
    1580             : 
    1581             :     static const int kMaxRecursion = 100;
    1582             :     inline int recursion_depth() { return recursion_depth_; }
    1583        3739 :     inline void IncrementRecursionDepth() { recursion_depth_++; }
    1584           0 :     inline void DecrementRecursionDepth() { recursion_depth_--; }
    1585             : 
    1586           0 :     void SetRegExpTooBig() { reg_exp_too_big_ = true; }
    1587             :     inline bool isRegExpTooBig() { return reg_exp_too_big_; }
    1588             : 
    1589             :     inline bool ignore_case() { return ignore_case_; }
    1590             :     inline bool latin1() { return latin1_; }
    1591             :     inline bool unicode() { return unicode_; }
    1592        1068 :     FrequencyCollator* frequency_collator() { return &frequency_collator_; }
    1593             : 
    1594             :     int current_expansion_factor() { return current_expansion_factor_; }
    1595             :     void set_current_expansion_factor(int value) {
    1596         442 :         current_expansion_factor_ = value;
    1597             :     }
    1598             : 
    1599             :     JSContext* cx() const { return cx_; }
    1600             :     LifoAlloc* alloc() const { return alloc_; }
    1601             : 
    1602             :     static const int kNoRegister = -1;
    1603             : 
    1604             :     bool CheckOverRecursed();
    1605             : 
    1606             :   private:
    1607             :     EndNode* accept_;
    1608             :     int next_register_;
    1609             :     Vector<RegExpNode*, 4, SystemAllocPolicy> work_list_;
    1610             :     int recursion_depth_;
    1611             :     RegExpMacroAssembler* macro_assembler_;
    1612             :     bool ignore_case_;
    1613             :     bool latin1_;
    1614             :     bool match_only_;
    1615             :     bool unicode_;
    1616             :     bool reg_exp_too_big_;
    1617             :     int current_expansion_factor_;
    1618             :     FrequencyCollator frequency_collator_;
    1619             :     JSContext* cx_;
    1620             :     LifoAlloc* alloc_;
    1621             : };
    1622             : 
    1623             : class RecursionCheck
    1624             : {
    1625             :   public:
    1626        3739 :     explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
    1627           0 :         compiler->IncrementRecursionDepth();
    1628             :     }
    1629        7478 :     ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
    1630             : 
    1631             :   private:
    1632             :     RegExpCompiler* compiler_;
    1633             : };
    1634             : 
    1635             : static inline bool
    1636             : IsLatin1Equivalent(char16_t c, RegExpCompiler* compiler)
    1637             : {
    1638       20550 :     if (c <= kMaxOneByteCharCode)
    1639             :         return true;
    1640             : 
    1641           0 :     if (!compiler->ignore_case())
    1642             :         return false;
    1643             : 
    1644           0 :     char16_t converted = ConvertNonLatin1ToLatin1(c, compiler->unicode());
    1645             : 
    1646           0 :     return converted != 0 && converted <= kMaxOneByteCharCode;
    1647             : }
    1648             : 
    1649             : // Attempts to compile the regexp using an Irregexp code generator.  Returns
    1650             : // a fixed array or a null handle depending on whether it succeeded.
    1651           0 : RegExpCompiler::RegExpCompiler(JSContext* cx, LifoAlloc* alloc, int capture_count,
    1652         149 :                                bool ignore_case, bool latin1, bool match_only, bool unicode)
    1653         149 :   : next_register_(2 * (capture_count + 1)),
    1654             :     recursion_depth_(0),
    1655             :     ignore_case_(ignore_case),
    1656             :     latin1_(latin1),
    1657             :     match_only_(match_only),
    1658             :     unicode_(unicode),
    1659             :     reg_exp_too_big_(false),
    1660             :     current_expansion_factor_(1),
    1661             :     frequency_collator_(),
    1662             :     cx_(cx),
    1663           0 :     alloc_(alloc)
    1664             : {
    1665           0 :     accept_ = alloc->newInfallible<EndNode>(alloc, EndNode::ACCEPT);
    1666         149 :     MOZ_ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);
    1667         149 : }
    1668             : 
    1669             : RegExpCode
    1670         149 : RegExpCompiler::Assemble(JSContext* cx,
    1671             :                          RegExpMacroAssembler* assembler,
    1672             :                          RegExpNode* start,
    1673             :                          int capture_count)
    1674             : {
    1675         149 :     macro_assembler_ = assembler;
    1676         298 :     macro_assembler_->set_slow_safe(false);
    1677             : 
    1678             :     // The LifoAlloc used by the regexp compiler is infallible and is currently
    1679             :     // expected to crash on OOM. Thus we have to disable the assertions made to
    1680             :     // prevent us from allocating any new chunk in the LifoAlloc. This is needed
    1681             :     // because the jit::MacroAssembler turns these assertions on by default.
    1682           0 :     LifoAlloc::AutoFallibleScope fallibleAllocator(alloc());
    1683             : 
    1684           0 :     jit::Label fail;
    1685           0 :     macro_assembler_->PushBacktrack(&fail);
    1686           0 :     Trace new_trace;
    1687           0 :     start->Emit(this, &new_trace);
    1688         149 :     macro_assembler_->BindBacktrack(&fail);
    1689           0 :     macro_assembler_->Fail();
    1690             : 
    1691         149 :     while (!work_list_.empty())
    1692           0 :         work_list_.popCopy()->Emit(this, &new_trace);
    1693             : 
    1694           0 :     RegExpCode code = macro_assembler_->GenerateCode(cx, match_only_);
    1695         149 :     if (code.empty())
    1696           0 :         return RegExpCode();
    1697             : 
    1698           0 :     if (reg_exp_too_big_) {
    1699           0 :         code.destroy();
    1700           0 :         js::gc::AutoSuppressGC suppress(cx);
    1701           0 :         JS_ReportErrorASCII(cx, "regexp too big");
    1702           0 :         return RegExpCode();
    1703             :     }
    1704             : 
    1705         149 :     return code;
    1706             : }
    1707             : 
    1708             : template <typename CharT>
    1709             : static void
    1710           7 : SampleChars(FrequencyCollator* collator, const CharT* chars, size_t length)
    1711             : {
    1712             :     // Sample some characters from the middle of the string.
    1713             :     static const int kSampleSize = 128;
    1714             : 
    1715           0 :     int chars_sampled = 0;
    1716           0 :     int half_way = (int(length) - kSampleSize) / 2;
    1717        4499 :     for (size_t i = Max(0, half_way);
    1718        4499 :          i < length && chars_sampled < kSampleSize;
    1719             :          i++, chars_sampled++)
    1720             :     {
    1721           0 :         collator->CountCharacter(chars[i]);
    1722             :     }
    1723           7 : }
    1724             : 
    1725             : static bool
    1726             : IsNativeRegExpEnabled(JSContext* cx)
    1727             : {
    1728             : #ifdef JS_CODEGEN_NONE
    1729             :     return false;
    1730             : #else
    1731         298 :     return cx->options().nativeRegExp();
    1732             : #endif
    1733             : }
    1734             : 
    1735             : RegExpCode
    1736         149 : irregexp::CompilePattern(JSContext* cx, HandleRegExpShared shared, RegExpCompileData* data,
    1737             :                          HandleLinearString sample, bool is_global, bool ignore_case,
    1738             :                          bool is_latin1, bool match_only, bool force_bytecode, bool sticky,
    1739             :                          bool unicode, RegExpShared::JitCodeTables& tables)
    1740             : {
    1741           1 :     if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
    1742           0 :         JS_ReportErrorASCII(cx, "regexp too big");
    1743           0 :         return RegExpCode();
    1744             :     }
    1745             : 
    1746           0 :     LifoAlloc& alloc = cx->tempLifoAlloc();
    1747             :     RegExpCompiler compiler(cx, &alloc, data->capture_count, ignore_case, is_latin1, match_only,
    1748         149 :                             unicode);
    1749             : 
    1750             :     // Sample some characters from the middle of the string.
    1751           0 :     if (sample->hasLatin1Chars()) {
    1752         284 :         JS::AutoCheckCannotGC nogc;
    1753           0 :         SampleChars(compiler.frequency_collator(), sample->latin1Chars(nogc), sample->length());
    1754             :     } else {
    1755          14 :         JS::AutoCheckCannotGC nogc;
    1756          28 :         SampleChars(compiler.frequency_collator(), sample->twoByteChars(nogc), sample->length());
    1757             :     }
    1758             : 
    1759             :     // Wrap the body of the regexp in capture #0.
    1760         149 :     RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,
    1761             :                                                       0,
    1762             :                                                       &compiler,
    1763           0 :                                                       compiler.accept());
    1764           0 :     RegExpNode* node = captured_body;
    1765           0 :     bool is_end_anchored = data->tree->IsAnchoredAtEnd();
    1766           0 :     bool is_start_anchored = sticky || data->tree->IsAnchoredAtStart();
    1767         149 :     int max_length = data->tree->max_match();
    1768         149 :     if (!is_start_anchored) {
    1769             :         // Add a .*? at the beginning, outside the body capture, unless
    1770             :         // this expression is anchored at the beginning.
    1771             :         RegExpNode* loop_node =
    1772         116 :             RegExpQuantifier::ToNode(0,
    1773             :                                      RegExpTree::kInfinity,
    1774             :                                      false,
    1775         116 :                                      alloc.newInfallible<RegExpCharacterClass>('*'),
    1776             :                                      &compiler,
    1777             :                                      captured_body,
    1778           0 :                                      data->contains_anchor);
    1779             : 
    1780          58 :         if (data->contains_anchor) {
    1781             :             // Unroll loop once, to take care of the case that might start
    1782             :             // at the start of input.
    1783           0 :             ChoiceNode* first_step_node = alloc.newInfallible<ChoiceNode>(&alloc, 2);
    1784             :             RegExpNode* char_class =
    1785           0 :                 alloc.newInfallible<TextNode>(alloc.newInfallible<RegExpCharacterClass>('*'), loop_node);
    1786           0 :             first_step_node->AddAlternative(GuardedAlternative(captured_body));
    1787          38 :             first_step_node->AddAlternative(GuardedAlternative(char_class));
    1788          19 :             node = first_step_node;
    1789             :         } else {
    1790             :             node = loop_node;
    1791             :         }
    1792             :     }
    1793             : 
    1794           0 :     if (compiler.isRegExpTooBig()) {
    1795             :         // This might erase the over-recurse error, if any.
    1796           0 :         JS_ReportErrorASCII(cx, "regexp too big");
    1797           0 :         return RegExpCode();
    1798             :     }
    1799             : 
    1800         149 :     if (is_latin1) {
    1801         142 :         node = node->FilterLATIN1(RegExpCompiler::kMaxRecursion, ignore_case, unicode);
    1802             :         // Do it again to propagate the new nodes to places where they were not
    1803             :         // put because they had not been calculated yet.
    1804         142 :         if (node != nullptr) {
    1805         142 :             node = node->FilterLATIN1(RegExpCompiler::kMaxRecursion, ignore_case, unicode);
    1806             :         }
    1807             :     }
    1808             : 
    1809         149 :     if (node == nullptr)
    1810           0 :         node = alloc.newInfallible<EndNode>(&alloc, EndNode::BACKTRACK);
    1811             : 
    1812           0 :     Analysis analysis(cx, ignore_case, is_latin1, unicode);
    1813           0 :     analysis.EnsureAnalyzed(node);
    1814           0 :     if (analysis.has_failed()) {
    1815           0 :         JS_ReportErrorASCII(cx, "%s", analysis.errorMessage());
    1816           0 :         return RegExpCode();
    1817             :     }
    1818             : 
    1819             :     // We should not GC when we have a jit::MacroAssembler on the stack. Check
    1820             :     // this here because the static analysis does not understand the
    1821             :     // Maybe<NativeRegExpMacroAssembler> below.
    1822           0 :     JS::AutoCheckCannotGC nogc(cx);
    1823             : 
    1824           0 :     Maybe<jit::JitContext> ctx;
    1825         298 :     Maybe<NativeRegExpMacroAssembler> native_assembler;
    1826         298 :     Maybe<InterpretedRegExpMacroAssembler> interpreted_assembler;
    1827             : 
    1828             :     RegExpMacroAssembler* assembler;
    1829           0 :     if (IsNativeRegExpEnabled(cx) &&
    1830           0 :         !force_bytecode &&
    1831         447 :         jit::CanLikelyAllocateMoreExecutableMemory() &&
    1832         298 :         shared->getSource()->length() < 32 * 1024)
    1833             :     {
    1834             :         NativeRegExpMacroAssembler::Mode mode =
    1835         149 :             is_latin1 ? NativeRegExpMacroAssembler::LATIN1
    1836           0 :                       : NativeRegExpMacroAssembler::CHAR16;
    1837             : 
    1838           0 :         ctx.emplace(cx, (jit::TempAllocator*) nullptr);
    1839         149 :         native_assembler.emplace(cx, &alloc, mode, (data->capture_count + 1) * 2, tables);
    1840           0 :         assembler = native_assembler.ptr();
    1841             :     } else {
    1842           0 :         interpreted_assembler.emplace(cx, &alloc, (data->capture_count + 1) * 2);
    1843           0 :         assembler = interpreted_assembler.ptr();
    1844             :     }
    1845             : 
    1846             :     // Inserted here, instead of in Assembler, because it depends on information
    1847             :     // in the AST that isn't replicated in the Node structure.
    1848             :     static const int kMaxBacksearchLimit = 1024;
    1849         149 :     if (is_end_anchored &&
    1850           0 :         !is_start_anchored &&
    1851             :         max_length < kMaxBacksearchLimit) {
    1852           4 :         assembler->SetCurrentPositionFromEnd(max_length);
    1853             :     }
    1854             : 
    1855         149 :     if (is_global) {
    1856           0 :         assembler->set_global_mode((data->tree->min_match() > 0)
    1857             :                                    ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK
    1858           0 :                                    : RegExpMacroAssembler::GLOBAL);
    1859             :     }
    1860             : 
    1861         149 :     return compiler.Assemble(cx, assembler, node, data->capture_count);
    1862             : }
    1863             : 
    1864             : template <typename CharT>
    1865             : RegExpRunStatus
    1866        2017 : irregexp::ExecuteCode(JSContext* cx, jit::JitCode* codeBlock, const CharT* chars, size_t start,
    1867             :                       size_t length, MatchPairs* matches, size_t* endIndex)
    1868             : {
    1869             :     typedef void (*RegExpCodeSignature)(InputOutputData*);
    1870             : 
    1871           0 :     InputOutputData data(chars, chars + length, start, matches, endIndex);
    1872             : 
    1873        2017 :     RegExpCodeSignature function = reinterpret_cast<RegExpCodeSignature>(codeBlock->raw());
    1874             : 
    1875             :     {
    1876        4034 :         JS::AutoSuppressGCAnalysis nogc;
    1877        2017 :         CALL_GENERATED_1(function, &data);
    1878             :     }
    1879             : 
    1880        2017 :     return (RegExpRunStatus) data.result;
    1881             : }
    1882             : 
    1883             : template RegExpRunStatus
    1884             : irregexp::ExecuteCode(JSContext* cx, jit::JitCode* codeBlock, const Latin1Char* chars, size_t start,
    1885             :                       size_t length, MatchPairs* matches, size_t* endIndex);
    1886             : 
    1887             : template RegExpRunStatus
    1888             : irregexp::ExecuteCode(JSContext* cx, jit::JitCode* codeBlock, const char16_t* chars, size_t start,
    1889             :                       size_t length, MatchPairs* matches, size_t* endIndex);
    1890             : 
    1891             : // -------------------------------------------------------------------
    1892             : // Tree to graph conversion
    1893             : 
    1894             : RegExpNode*
    1895         844 : RegExpAtom::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
    1896             : {
    1897             :     TextElementVector* elms =
    1898           0 :         compiler->alloc()->newInfallible<TextElementVector>(*compiler->alloc());
    1899         844 :     elms->append(TextElement::Atom(this));
    1900         844 :     return compiler->alloc()->newInfallible<TextNode>(elms, on_success);
    1901             : }
    1902             : 
    1903             : RegExpNode*
    1904           0 : RegExpText::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
    1905             : {
    1906          16 :     return compiler->alloc()->newInfallible<TextNode>(&elements_, on_success);
    1907             : }
    1908             : 
    1909             : RegExpNode*
    1910           0 : RegExpCharacterClass::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
    1911             : {
    1912         199 :     return compiler->alloc()->newInfallible<TextNode>(this, on_success);
    1913             : }
    1914             : 
    1915             : RegExpNode*
    1916           0 : RegExpDisjunction::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
    1917             : {
    1918         112 :     if (!compiler->CheckOverRecursed())
    1919             :         return on_success;
    1920             : 
    1921           0 :     const RegExpTreeVector& alternatives = this->alternatives();
    1922           0 :     size_t length = alternatives.length();
    1923           0 :     ChoiceNode* result = compiler->alloc()->newInfallible<ChoiceNode>(compiler->alloc(), length);
    1924           0 :     for (size_t i = 0; i < length && !compiler->isRegExpTooBig(); i++) {
    1925        1456 :         GuardedAlternative alternative(alternatives[i]->ToNode(compiler, on_success));
    1926         728 :         result->AddAlternative(alternative);
    1927             :     }
    1928             :     return result;
    1929             : }
    1930             : 
    1931             : RegExpNode*
    1932           0 : RegExpQuantifier::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
    1933             : {
    1934           0 :     return ToNode(min(),
    1935             :                   max(),
    1936         121 :                   is_greedy(),
    1937             :                   body(),
    1938             :                   compiler,
    1939         121 :                   on_success);
    1940             : }
    1941             : 
    1942             : // Scoped object to keep track of how much we unroll quantifier loops in the
    1943             : // regexp graph generator.
    1944             : class RegExpExpansionLimiter
    1945             : {
    1946             :   public:
    1947             :     static const int kMaxExpansionFactor = 6;
    1948           0 :     RegExpExpansionLimiter(RegExpCompiler* compiler, int factor)
    1949           0 :       : compiler_(compiler),
    1950         221 :         saved_expansion_factor_(compiler->current_expansion_factor()),
    1951           0 :         ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor)
    1952             :     {
    1953           0 :         MOZ_ASSERT(factor > 0);
    1954         221 :         if (ok_to_expand_) {
    1955           0 :             if (factor > kMaxExpansionFactor) {
    1956             :                 // Avoid integer overflow of the current expansion factor.
    1957           7 :                 ok_to_expand_ = false;
    1958           0 :                 compiler->set_current_expansion_factor(kMaxExpansionFactor + 1);
    1959             :             } else {
    1960           0 :                 int new_factor = saved_expansion_factor_ * factor;
    1961         214 :                 ok_to_expand_ = (new_factor <= kMaxExpansionFactor);
    1962         214 :                 compiler->set_current_expansion_factor(new_factor);
    1963             :             }
    1964             :         }
    1965         221 :     }
    1966             : 
    1967             :     ~RegExpExpansionLimiter() {
    1968         442 :         compiler_->set_current_expansion_factor(saved_expansion_factor_);
    1969             :     }
    1970             : 
    1971             :     bool ok_to_expand() { return ok_to_expand_; }
    1972             : 
    1973             :   private:
    1974             :     RegExpCompiler* compiler_;
    1975             :     int saved_expansion_factor_;
    1976             :     bool ok_to_expand_;
    1977             : };
    1978             : 
    1979             : /* static */ RegExpNode*
    1980         222 : RegExpQuantifier::ToNode(int min,
    1981             :                          int max,
    1982             :                          bool is_greedy,
    1983             :                          RegExpTree* body,
    1984             :                          RegExpCompiler* compiler,
    1985             :                          RegExpNode* on_success,
    1986             :                          bool not_at_start /* = false */)
    1987             : {
    1988             :     // x{f, t} becomes this:
    1989             :     //
    1990             :     //             (r++)<-.
    1991             :     //               |     `
    1992             :     //               |     (x)
    1993             :     //               v     ^
    1994             :     //      (r=0)-->(?)---/ [if r < t]
    1995             :     //               |
    1996             :     //   [if r >= f] \----> ...
    1997             :     //
    1998             : 
    1999             :     // 15.10.2.5 RepeatMatcher algorithm.
    2000             :     // The parser has already eliminated the case where max is 0.  In the case
    2001             :     // where max_match is zero the parser has removed the quantifier if min was
    2002             :     // > 0 and removed the atom if min was 0.  See AddQuantifierToAtom.
    2003             : 
    2004             :     // If we know that we cannot match zero length then things are a little
    2005             :     // simpler since we don't need to make the special zero length match check
    2006             :     // from step 2.1.  If the min and max are small we can unroll a little in
    2007             :     // this case.
    2008             :     static const int kMaxUnrolledMinMatches = 3;  // Unroll (foo)+ and (foo){3,}
    2009             :     static const int kMaxUnrolledMaxMatches = 3;  // Unroll (foo)? and (foo){x,3}
    2010             : 
    2011         222 :     if (max == 0)
    2012             :         return on_success;  // This can happen due to recursion.
    2013             : 
    2014         222 :     if (!compiler->CheckOverRecursed())
    2015             :         return on_success;
    2016             : 
    2017           0 :     bool body_can_be_empty = (body->min_match() == 0);
    2018           0 :     int body_start_reg = RegExpCompiler::kNoRegister;
    2019           0 :     Interval capture_registers = body->CaptureRegisters();
    2020         222 :     bool needs_capture_clearing = !capture_registers.is_empty();
    2021           0 :     LifoAlloc* alloc = compiler->alloc();
    2022             : 
    2023           0 :     if (body_can_be_empty) {
    2024           0 :         body_start_reg = compiler->AllocateRegister();
    2025         222 :     } else if (!needs_capture_clearing) {
    2026             :         // Only unroll if there are no captures and the body can't be
    2027             :         // empty.
    2028             :         {
    2029           0 :             RegExpExpansionLimiter limiter(compiler, min + ((max != min) ? 1 : 0));
    2030         207 :             if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {
    2031          43 :                 int new_max = (max == kInfinity) ? max : max - min;
    2032             :                 // Recurse once to get the loop or optional matches after the fixed
    2033             :                 // ones.
    2034          43 :                 RegExpNode* answer = ToNode(0, new_max, is_greedy, body, compiler, on_success, true);
    2035             :                 // Unroll the forced matches from 0 to min.  This can cause chains of
    2036             :                 // TextNodes (which the parser does not generate).  These should be
    2037             :                 // combined if it turns out they hinder good code generation.
    2038           0 :                 for (int i = 0; i < min; i++)
    2039          43 :                     answer = body->ToNode(compiler, answer);
    2040          86 :                 return answer;
    2041             :             }
    2042             :         }
    2043           0 :         if (max <= kMaxUnrolledMaxMatches && min == 0) {
    2044           0 :             MOZ_ASSERT(max > 0);  // Due to the 'if' above.
    2045          14 :             RegExpExpansionLimiter limiter(compiler, max);
    2046          14 :             if (limiter.ok_to_expand()) {
    2047             :                 // Unroll the optional matches up to max.
    2048             :                 RegExpNode* answer = on_success;
    2049           0 :                 for (int i = 0; i < max; i++) {
    2050           0 :                     ChoiceNode* alternation = alloc->newInfallible<ChoiceNode>(alloc, 2);
    2051           0 :                     if (is_greedy) {
    2052          28 :                         alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, answer)));
    2053           0 :                         alternation->AddAlternative(GuardedAlternative(on_success));
    2054             :                     } else {
    2055           0 :                         alternation->AddAlternative(GuardedAlternative(on_success));
    2056           0 :                         alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, answer)));
    2057             :                     }
    2058          14 :                     answer = alternation;
    2059           0 :                     if (not_at_start) alternation->set_not_at_start();
    2060             :                 }
    2061          28 :                 return answer;
    2062             :             }
    2063             :         }
    2064             :     }
    2065           0 :     bool has_min = min > 0;
    2066         165 :     bool has_max = max < RegExpTree::kInfinity;
    2067           0 :     bool needs_counter = has_min || has_max;
    2068             :     int reg_ctr = needs_counter
    2069           0 :         ? compiler->AllocateRegister()
    2070           0 :         : RegExpCompiler::kNoRegister;
    2071           0 :     LoopChoiceNode* center = alloc->newInfallible<LoopChoiceNode>(alloc, body->min_match() == 0);
    2072         165 :     if (not_at_start)
    2073           0 :         center->set_not_at_start();
    2074             :     RegExpNode* loop_return = needs_counter
    2075           0 :         ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
    2076         165 :         : static_cast<RegExpNode*>(center);
    2077         165 :     if (body_can_be_empty) {
    2078             :         // If the body can be empty we need to check if it was and then
    2079             :         // backtrack.
    2080           0 :         loop_return = ActionNode::EmptyMatchCheck(body_start_reg,
    2081             :                                                   reg_ctr,
    2082             :                                                   min,
    2083           0 :                                                   loop_return);
    2084             :     }
    2085         165 :     RegExpNode* body_node = body->ToNode(compiler, loop_return);
    2086         165 :     if (body_can_be_empty) {
    2087             :         // If the body can be empty we need to store the start position
    2088             :         // so we can bail out if it was empty.
    2089           0 :         body_node = ActionNode::StorePosition(body_start_reg, false, body_node);
    2090             :     }
    2091           0 :     if (needs_capture_clearing) {
    2092             :         // Before entering the body of this loop we need to clear captures.
    2093           0 :         body_node = ActionNode::ClearCaptures(capture_registers, body_node);
    2094             :     }
    2095           0 :     GuardedAlternative body_alt(body_node);
    2096           0 :     if (has_max) {
    2097          60 :         Guard* body_guard = alloc->newInfallible<Guard>(reg_ctr, Guard::LT, max);
    2098           0 :         body_alt.AddGuard(alloc, body_guard);
    2099             :     }
    2100           0 :     GuardedAlternative rest_alt(on_success);
    2101           0 :     if (has_min) {
    2102          34 :         Guard* rest_guard = alloc->newInfallible<Guard>(reg_ctr, Guard::GEQ, min);
    2103           0 :         rest_alt.AddGuard(alloc, rest_guard);
    2104             :     }
    2105           0 :     if (is_greedy) {
    2106         101 :         center->AddLoopAlternative(body_alt);
    2107           0 :         center->AddContinueAlternative(rest_alt);
    2108             :     } else {
    2109          64 :         center->AddContinueAlternative(rest_alt);
    2110           0 :         center->AddLoopAlternative(body_alt);
    2111             :     }
    2112         165 :     if (needs_counter)
    2113          62 :         return ActionNode::SetRegister(reg_ctr, 0, center);
    2114             :     return center;
    2115             : }
    2116             : 
    2117             : RegExpNode*
    2118         228 : RegExpAssertion::ToNode(RegExpCompiler* compiler,
    2119             :                         RegExpNode* on_success)
    2120             : {
    2121         228 :     NodeInfo info;
    2122           0 :     LifoAlloc* alloc = compiler->alloc();
    2123             : 
    2124           0 :     switch (assertion_type()) {
    2125             :       case START_OF_LINE:
    2126           0 :         return AssertionNode::AfterNewline(on_success);
    2127             :       case START_OF_INPUT:
    2128           0 :         return AssertionNode::AtStart(on_success);
    2129             :       case BOUNDARY:
    2130           0 :         return AssertionNode::AtBoundary(on_success);
    2131             :       case NON_BOUNDARY:
    2132           0 :         return AssertionNode::AtNonBoundary(on_success);
    2133             :       case END_OF_INPUT:
    2134         114 :         return AssertionNode::AtEnd(on_success);
    2135             :       case END_OF_LINE: {
    2136             :         // Compile $ in multiline regexps as an alternation with a positive
    2137             :         // lookahead in one side and an end-of-input on the other side.
    2138             :         // We need two registers for the lookahead.
    2139           0 :         int stack_pointer_register = compiler->AllocateRegister();
    2140           0 :         int position_register = compiler->AllocateRegister();
    2141             :         // The ChoiceNode to distinguish between a newline and end-of-input.
    2142           0 :         ChoiceNode* result = alloc->newInfallible<ChoiceNode>(alloc, 2);
    2143             :         // Create a newline atom.
    2144           0 :         CharacterRangeVector* newline_ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
    2145           0 :         CharacterRange::AddClassEscape(alloc, 'n', newline_ranges);
    2146           0 :         RegExpCharacterClass* newline_atom = alloc->newInfallible<RegExpCharacterClass>('n');
    2147             :         TextNode* newline_matcher =
    2148           0 :             alloc->newInfallible<TextNode>(newline_atom,
    2149           0 :                 ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
    2150             :                                                     position_register,
    2151             :                                                     0,  // No captures inside.
    2152             :                                                     -1,  // Ignored if no captures.
    2153           0 :                                                     on_success));
    2154             :         // Create an end-of-input matcher.
    2155             :         RegExpNode* end_of_line =
    2156           0 :             ActionNode::BeginSubmatch(stack_pointer_register, position_register, newline_matcher);
    2157             : 
    2158             :         // Add the two alternatives to the ChoiceNode.
    2159           0 :         GuardedAlternative eol_alternative(end_of_line);
    2160           0 :         result->AddAlternative(eol_alternative);
    2161           0 :         GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));
    2162           0 :         result->AddAlternative(end_alternative);
    2163             :         return result;
    2164             :       }
    2165             :       case NOT_AFTER_LEAD_SURROGATE:
    2166           0 :         return AssertionNode::NotAfterLeadSurrogate(on_success);
    2167             :       case NOT_IN_SURROGATE_PAIR:
    2168           0 :         return AssertionNode::NotInSurrogatePair(on_success);
    2169             :       default:
    2170           0 :         MOZ_CRASH("Bad assertion type");
    2171             :     }
    2172             :     return on_success;
    2173             : }
    2174             : 
    2175             : RegExpNode*
    2176           0 : RegExpBackReference::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
    2177             : {
    2178           0 :     return compiler->alloc()->newInfallible<BackReferenceNode>(RegExpCapture::StartRegister(index()),
    2179           0 :                                                                RegExpCapture::EndRegister(index()),
    2180           0 :                                                                on_success);
    2181             : }
    2182             : 
    2183             : RegExpNode*
    2184           0 : RegExpEmpty::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
    2185             : {
    2186           0 :     return on_success;
    2187             : }
    2188             : 
    2189             : RegExpNode*
    2190           0 : RegExpLookahead::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
    2191             : {
    2192          22 :     int stack_pointer_register = compiler->AllocateRegister();
    2193           0 :     int position_register = compiler->AllocateRegister();
    2194             : 
    2195           0 :     const int registers_per_capture = 2;
    2196          11 :     const int register_of_first_capture = 2;
    2197           0 :     int register_count = capture_count_ * registers_per_capture;
    2198             :     int register_start =
    2199           0 :         register_of_first_capture + capture_from_ * registers_per_capture;
    2200             : 
    2201          11 :     if (!compiler->CheckOverRecursed())
    2202             :         return on_success;
    2203             : 
    2204           0 :     if (is_positive()) {
    2205             :         RegExpNode* bodyNode =
    2206          20 :             body()->ToNode(compiler,
    2207          10 :                            ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
    2208             :                                                                position_register,
    2209             :                                                                register_count,
    2210             :                                                                register_start,
    2211          20 :                                                                on_success));
    2212           0 :         return ActionNode::BeginSubmatch(stack_pointer_register,
    2213             :                                          position_register,
    2214          10 :                                          bodyNode);
    2215             :     }
    2216             : 
    2217             :     // We use a ChoiceNode for a negative lookahead because it has most of
    2218             :     // the characteristics we need.  It has the body of the lookahead as its
    2219             :     // first alternative and the expression after the lookahead of the second
    2220             :     // alternative.  If the first alternative succeeds then the
    2221             :     // NegativeSubmatchSuccess will unwind the stack including everything the
    2222             :     // choice node set up and backtrack.  If the first alternative fails then
    2223             :     // the second alternative is tried, which is exactly the desired result
    2224             :     // for a negative lookahead.  The NegativeLookaheadChoiceNode is a special
    2225             :     // ChoiceNode that knows to ignore the first exit when calculating quick
    2226             :     // checks.
    2227           1 :     LifoAlloc* alloc = compiler->alloc();
    2228             : 
    2229             :     RegExpNode* success =
    2230             :         alloc->newInfallible<NegativeSubmatchSuccess>(alloc,
    2231             :                                                       stack_pointer_register,
    2232             :                                                       position_register,
    2233             :                                                       register_count,
    2234           1 :                                                       register_start);
    2235           2 :     GuardedAlternative body_alt(body()->ToNode(compiler, success));
    2236             : 
    2237             :     ChoiceNode* choice_node =
    2238           0 :         alloc->newInfallible<NegativeLookaheadChoiceNode>(alloc, body_alt, GuardedAlternative(on_success));
    2239             : 
    2240           0 :     return ActionNode::BeginSubmatch(stack_pointer_register,
    2241             :                                      position_register,
    2242           1 :                                      choice_node);
    2243             : }
    2244             : 
    2245             : RegExpNode*
    2246           0 : RegExpCapture::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
    2247             : {
    2248         315 :     return ToNode(body(), index(), compiler, on_success);
    2249             : }
    2250             : 
    2251             : /* static */ RegExpNode*
    2252         464 : RegExpCapture::ToNode(RegExpTree* body,
    2253             :                       int index,
    2254             :                       RegExpCompiler* compiler,
    2255             :                       RegExpNode* on_success)
    2256             : {
    2257         464 :     if (!compiler->CheckOverRecursed())
    2258             :         return on_success;
    2259             : 
    2260           0 :     int start_reg = RegExpCapture::StartRegister(index);
    2261           0 :     int end_reg = RegExpCapture::EndRegister(index);
    2262           0 :     RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
    2263         464 :     RegExpNode* body_node = body->ToNode(compiler, store_end);
    2264         464 :     return ActionNode::StorePosition(start_reg, true, body_node);
    2265             : }
    2266             : 
    2267             : RegExpNode*
    2268           0 : RegExpAlternative::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
    2269             : {
    2270         196 :     if (!compiler->CheckOverRecursed())
    2271             :         return on_success;
    2272             : 
    2273           0 :     const RegExpTreeVector& children = nodes();
    2274           0 :     RegExpNode* current = on_success;
    2275         813 :     for (int i = children.length() - 1; i >= 0 && !compiler->isRegExpTooBig(); i--)
    2276        1234 :         current = children[i]->ToNode(compiler, current);
    2277             :     return current;
    2278             : }
    2279             : 
    2280             : // -------------------------------------------------------------------
    2281             : // BoyerMooreLookahead
    2282             : 
    2283             : ContainedInLattice
    2284       10480 : irregexp::AddRange(ContainedInLattice containment,
    2285             :                    const int* ranges,
    2286             :                    int ranges_length,
    2287             :                    Interval new_range)
    2288             : {
    2289           0 :     MOZ_ASSERT((ranges_length & 1) == 1);
    2290       10480 :     MOZ_ASSERT(ranges[ranges_length - 1] == kMaxUtf16CodeUnit + 1);
    2291       10480 :     if (containment == kLatticeUnknown) return containment;
    2292             :     bool inside = false;
    2293             :     int last = 0;
    2294       58541 :     for (int i = 0; i < ranges_length; inside = !inside, last = ranges[i], i++) {
    2295             :         // Consider the range from last to ranges[i].
    2296             :         // We haven't got to the new range yet.
    2297       33839 :         if (ranges[i] <= new_range.from())
    2298             :             continue;
    2299             : 
    2300             :         // New range is wholly inside last-ranges[i].  Note that new_range.to() is
    2301             :         // inclusive, but the values in ranges are not.
    2302        9137 :         if (last <= new_range.from() && new_range.to() < ranges[i])
    2303       18198 :             return Combine(containment, inside ? kLatticeIn : kLatticeOut);
    2304             : 
    2305             :         return kLatticeUnknown;
    2306             :     }
    2307             :     return containment;
    2308             : }
    2309             : 
    2310             : void
    2311           0 : BoyerMoorePositionInfo::Set(int character)
    2312             : {
    2313        2480 :     SetInterval(Interval(character, character));
    2314           0 : }
    2315             : 
    2316             : void
    2317           0 : BoyerMoorePositionInfo::SetInterval(const Interval& interval)
    2318             : {
    2319           0 :     s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval);
    2320        2620 :     if (unicode_ignore_case_)
    2321           0 :         w_ = AddRange(w_, kIgnoreCaseWordRanges, kIgnoreCaseWordRangeCount, interval);
    2322             :     else
    2323           0 :         w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);
    2324           0 :     d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval);
    2325           0 :     surrogate_ =
    2326           0 :         AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval);
    2327           0 :     if (interval.to() - interval.from() >= kMapSize - 1) {
    2328           0 :         if (map_count_ != kMapSize) {
    2329           0 :             map_count_ = kMapSize;
    2330        1290 :             for (int i = 0; i < kMapSize; i++)
    2331        2560 :                 map_[i] = true;
    2332             :         }
    2333             :         return;
    2334             :     }
    2335           0 :     MOZ_ASSERT(interval.from() <= interval.to());
    2336           0 :     for (int i = interval.from(); i != interval.to() + 1; i++) {
    2337           0 :         int mod_character = (i & kMask);
    2338           0 :         if (!map_[mod_character]) {
    2339        1871 :             map_count_++;
    2340           0 :             map_[mod_character] = true;
    2341             :         }
    2342        3781 :         if (map_count_ == kMapSize)
    2343             :             return;
    2344             :     }
    2345             : }
    2346             : 
    2347             : void
    2348           0 : BoyerMoorePositionInfo::SetAll()
    2349             : {
    2350           0 :     s_ = w_ = d_ = kLatticeUnknown;
    2351           0 :     if (map_count_ != kMapSize) {
    2352           0 :         map_count_ = kMapSize;
    2353        3096 :         for (int i = 0; i < kMapSize; i++)
    2354           0 :             map_[i] = true;
    2355             :     }
    2356           0 : }
    2357             : 
    2358          59 : BoyerMooreLookahead::BoyerMooreLookahead(LifoAlloc* alloc, size_t length, RegExpCompiler* compiler)
    2359           0 :   : length_(length), compiler_(compiler), bitmaps_(*alloc)
    2360             : {
    2361          59 :     bool unicode_ignore_case = compiler->unicode() && compiler->ignore_case();
    2362           0 :     max_char_ = MaximumCharacter(compiler->latin1());
    2363             : 
    2364           0 :     bitmaps_.reserve(length);
    2365           0 :     for (size_t i = 0; i < length; i++)
    2366         286 :         bitmaps_.append(alloc->newInfallible<BoyerMoorePositionInfo>(alloc, unicode_ignore_case));
    2367          59 : }
    2368             : 
    2369             : // Find the longest range of lookahead that has the fewest number of different
    2370             : // characters that can occur at a given position.  Since we are optimizing two
    2371             : // different parameters at once this is a tradeoff.
    2372           0 : bool BoyerMooreLookahead::FindWorthwhileInterval(int* from, int* to) {
    2373          58 :   int biggest_points = 0;
    2374             :   // If more than 32 characters out of 128 can occur it is unlikely that we can
    2375             :   // be lucky enough to step forwards much of the time.
    2376           0 :   const int kMaxMax = 32;
    2377           0 :   for (int max_number_of_chars = 4;
    2378         232 :        max_number_of_chars < kMaxMax;
    2379           0 :        max_number_of_chars *= 2) {
    2380             :     biggest_points =
    2381           0 :         FindBestInterval(max_number_of_chars, biggest_points, from, to);
    2382             :   }
    2383          58 :   if (biggest_points == 0) return false;
    2384           0 :   return true;
    2385             : }
    2386             : 
    2387             : // Find the highest-points range between 0 and length_ where the character
    2388             : // information is not too vague.  'Too vague' means that there are more than
    2389             : // max_number_of_chars that can occur at this position.  Calculates the number
    2390             : // of points as the product of width-of-the-range and
    2391             : // probability-of-finding-one-of-the-characters, where the probability is
    2392             : // calculated using the frequency distribution of the sample subject string.
    2393             : int
    2394         174 : BoyerMooreLookahead::FindBestInterval(int max_number_of_chars, int old_biggest_points,
    2395             :                                       int* from, int* to)
    2396             : {
    2397           0 :     int biggest_points = old_biggest_points;
    2398             :     static const int kSize = RegExpMacroAssembler::kTableSize;
    2399           0 :     for (int i = 0; i < length_; ) {
    2400         837 :         while (i < length_ && Count(i) > max_number_of_chars) i++;
    2401         217 :         if (i == length_) break;
    2402             :         int remembered_from = i;
    2403             :         bool union_map[kSize];
    2404           0 :         for (int j = 0; j < kSize; j++) union_map[j] = false;
    2405           0 :         while (i < length_ && Count(i) <= max_number_of_chars) {
    2406           0 :             BoyerMoorePositionInfo* map = bitmaps_[i];
    2407       79980 :             for (int j = 0; j < kSize; j++) union_map[j] |= map->at(j);
    2408         620 :             i++;
    2409             :         }
    2410             :         int frequency = 0;
    2411       38550 :         for (int j = 0; j < kSize; j++) {
    2412       19200 :             if (union_map[j]) {
    2413             :                 // Add 1 to the frequency to give a small per-character boost for
    2414             :                 // the cases where our sampling is not good enough and many
    2415             :                 // characters have a frequency of zero.  This means the frequency
    2416             :                 // can theoretically be up to 2*kSize though we treat it mostly as
    2417             :                 // a fraction of kSize.
    2418        2136 :                 frequency += compiler_->frequency_collator()->Frequency(j) + 1;
    2419             :             }
    2420             :         }
    2421             :         // We use the probability of skipping times the distance we are skipping to
    2422             :         // judge the effectiveness of this.  Actually we have a cut-off:  By
    2423             :         // dividing by 2 we switch off the skipping if the probability of skipping
    2424             :         // is less than 50%.  This is because the multibyte mask-and-compare
    2425             :         // skipping in quickcheck is more likely to do well on this case.
    2426         223 :         bool in_quickcheck_range = ((i - remembered_from < 4) ||
    2427          76 :                                     (compiler_->latin1() ? remembered_from <= 4 : remembered_from <= 2));
    2428             :         // Called 'probability' but it is only a rough estimate and can actually
    2429             :         // be outside the 0-kSize range.
    2430           0 :         int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency;
    2431           0 :         int points = (i - remembered_from) * probability;
    2432           0 :         if (points > biggest_points) {
    2433           0 :             *from = remembered_from;
    2434          53 :             *to = i - 1;
    2435          53 :             biggest_points = points;
    2436             :         }
    2437             :     }
    2438         174 :     return biggest_points;
    2439             : }
    2440             : 
    2441             : // Take all the characters that will not prevent a successful match if they
    2442             : // occur in the subject string in the range between min_lookahead and
    2443             : // max_lookahead (inclusive) measured from the current position.  If the
    2444             : // character at max_lookahead offset is not one of these characters, then we
    2445             : // can safely skip forwards by the number of characters in the range.
    2446          35 : int BoyerMooreLookahead::GetSkipTable(int min_lookahead,
    2447             :                                       int max_lookahead,
    2448             :                                       uint8_t* boolean_skip_table)
    2449             : {
    2450           0 :     const int kSize = RegExpMacroAssembler::kTableSize;
    2451             : 
    2452          35 :     const int kSkipArrayEntry = 0;
    2453           0 :     const int kDontSkipArrayEntry = 1;
    2454             : 
    2455           0 :     for (int i = 0; i < kSize; i++)
    2456        4480 :         boolean_skip_table[i] = kSkipArrayEntry;
    2457           0 :     int skip = max_lookahead + 1 - min_lookahead;
    2458             : 
    2459           0 :     for (int i = max_lookahead; i >= min_lookahead; i--) {
    2460           0 :         BoyerMoorePositionInfo* map = bitmaps_[i];
    2461           0 :         for (int j = 0; j < kSize; j++) {
    2462       21120 :             if (map->at(j))
    2463         581 :                 boolean_skip_table[j] = kDontSkipArrayEntry;
    2464             :         }
    2465             :     }
    2466             : 
    2467          35 :     return skip;
    2468             : }
    2469             : 
    2470             : // See comment on the implementation of GetSkipTable.
    2471             : bool
    2472           0 : BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm)
    2473             : {
    2474           0 :     const int kSize = RegExpMacroAssembler::kTableSize;
    2475             : 
    2476          58 :     int min_lookahead = 0;
    2477           0 :     int max_lookahead = 0;
    2478             : 
    2479          58 :     if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead))
    2480             :         return false;
    2481             : 
    2482           0 :     bool found_single_character = false;
    2483           0 :     int single_character = 0;
    2484           0 :     for (int i = max_lookahead; i >= min_lookahead; i--) {
    2485           0 :         BoyerMoorePositionInfo* map = bitmaps_[i];
    2486         110 :         if (map->map_count() > 1 ||
    2487           8 :             (found_single_character && map->map_count() != 0)) {
    2488             :             found_single_character = false;
    2489             :             break;
    2490             :         }
    2491        2812 :         for (int j = 0; j < kSize; j++) {
    2492        1416 :             if (map->at(j)) {
    2493             :                 found_single_character = true;
    2494             :                 single_character = j;
    2495             :                 break;
    2496             :             }
    2497             :         }
    2498             :     }
    2499             : 
    2500           0 :     int lookahead_width = max_lookahead + 1 - min_lookahead;
    2501             : 
    2502          47 :     if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {
    2503             :         // The mask-compare can probably handle this better.
    2504             :         return false;
    2505             :     }
    2506             : 
    2507           0 :     if (found_single_character) {
    2508           0 :         jit::Label cont, again;
    2509           0 :         masm->Bind(&again);
    2510           0 :         masm->LoadCurrentCharacter(max_lookahead, &cont, true);
    2511           0 :         if (max_char_ > kSize) {
    2512           0 :             masm->CheckCharacterAfterAnd(single_character,
    2513             :                                          RegExpMacroAssembler::kTableMask,
    2514           0 :                                          &cont);
    2515             :         } else {
    2516           0 :             masm->CheckCharacter(single_character, &cont);
    2517             :         }
    2518           0 :         masm->AdvanceCurrentPosition(lookahead_width);
    2519           0 :         masm->JumpOrBacktrack(&again);
    2520           0 :         masm->Bind(&cont);
    2521             :         return true;
    2522             :     }
    2523             : 
    2524           0 :     RegExpShared::JitCodeTable boolean_skip_table;
    2525             :     {
    2526           0 :         AutoEnterOOMUnsafeRegion oomUnsafe;
    2527           1 :         boolean_skip_table.reset(static_cast<uint8_t*>(js_malloc(kSize)));
    2528          35 :         if (!boolean_skip_table)
    2529           0 :             oomUnsafe.crash("Table malloc");
    2530             :     }
    2531             : 
    2532          35 :     int skip_distance = GetSkipTable(min_lookahead, max_lookahead, boolean_skip_table.get());
    2533           0 :     MOZ_ASSERT(skip_distance != 0);
    2534             : 
    2535           0 :     jit::Label cont, again;
    2536           0 :     masm->Bind(&again);
    2537           0 :     masm->LoadCurrentCharacter(max_lookahead, &cont, true);
    2538           0 :     masm->CheckBitInTable(std::move(boolean_skip_table), &cont);
    2539           0 :     masm->AdvanceCurrentPosition(skip_distance);
    2540          35 :     masm->JumpOrBacktrack(&again);
    2541          35 :     masm->Bind(&cont);
    2542             : 
    2543             :     return true;
    2544             : }
    2545             : 
    2546             : bool
    2547           0 : RegExpCompiler::CheckOverRecursed()
    2548             : {
    2549        1795 :     if (!CheckRecursionLimitDontReport(cx())) {
    2550             : #ifdef JS_MORE_DETERMINISTIC
    2551             :         // We don't report overrecursion here, but we throw an exception later
    2552             :         // and this still affects differential testing. Mimic ReportOverRecursed
    2553             :         // (the fuzzers check for this particular string).
    2554             :         fprintf(stderr, "ReportOverRecursed called\n");
    2555             : #endif
    2556           0 :         SetRegExpTooBig();
    2557           0 :         return false;
    2558             :     }
    2559             : 
    2560             :     return true;
    2561             : }
    2562             : 
    2563             : bool
    2564           0 : BoyerMooreLookahead::CheckOverRecursed()
    2565             : {
    2566        1580 :     return compiler()->CheckOverRecursed();
    2567             : }
    2568             : 
    2569             : // -------------------------------------------------------------------
    2570             : // Trace
    2571             : 
    2572           0 : bool Trace::DeferredAction::Mentions(int that)
    2573             : {
    2574       12330 :     if (action_type() == ActionNode::CLEAR_CAPTURES) {
    2575          85 :         Interval range = static_cast<DeferredClearCaptures*>(this)->range();
    2576             :         return range.Contains(that);
    2577             :     }
    2578       12245 :     return reg() == that;
    2579             : }
    2580             : 
    2581           0 : bool Trace::mentions_reg(int reg)
    2582             : {
    2583         127 :     for (DeferredAction* action = actions_; action != nullptr; action = action->next()) {
    2584           0 :         if (action->Mentions(reg))
    2585             :             return true;
    2586             :     }
    2587             :     return false;
    2588             : }
    2589             : 
    2590             : bool
    2591           0 : Trace::GetStoredPosition(int reg, int* cp_offset)
    2592             : {
    2593           0 :     MOZ_ASSERT(0 == *cp_offset);
    2594           0 :     for (DeferredAction* action = actions_; action != nullptr; action = action->next()) {
    2595           0 :         if (action->Mentions(reg)) {
    2596           0 :             if (action->action_type() == ActionNode::STORE_POSITION) {
    2597           0 :                 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset();
    2598           0 :                 return true;
    2599             :             }
    2600             :             return false;
    2601             :         }
    2602             :     }
    2603             :     return false;
    2604             : }
    2605             : 
    2606             : int
    2607           0 : Trace::FindAffectedRegisters(LifoAlloc* alloc, OutSet* affected_registers)
    2608             : {
    2609           0 :     int max_register = RegExpCompiler::kNoRegister;
    2610           0 :     for (DeferredAction* action = actions_; action != nullptr; action = action->next()) {
    2611           0 :         if (action->action_type() == ActionNode::CLEAR_CAPTURES) {
    2612           0 :             Interval range = static_cast<DeferredClearCaptures*>(action)->range();
    2613           0 :             for (int i = range.from(); i <= range.to(); i++)
    2614          60 :                 affected_registers->Set(alloc, i);
    2615           0 :             if (range.to() > max_register) max_register = range.to();
    2616             :         } else {
    2617        3351 :             affected_registers->Set(alloc, action->reg());
    2618        3351 :             if (action->reg() > max_register) max_register = action->reg();
    2619             :         }
    2620             :     }
    2621        1136 :     return max_register;
    2622             : }
    2623             : 
    2624             : void
    2625        1136 : Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
    2626             :                                 int max_register,
    2627             :                                 OutSet& registers_to_pop,
    2628             :                                 OutSet& registers_to_clear)
    2629             : {
    2630           0 :     for (int reg = max_register; reg >= 0; reg--) {
    2631       12956 :         if (registers_to_pop.Get(reg)) assembler->PopRegister(reg);
    2632           0 :         else if (registers_to_clear.Get(reg)) {
    2633             :             int clear_to = reg;
    2634           0 :             while (reg > 0 && registers_to_clear.Get(reg - 1))
    2635         897 :                 reg--;
    2636         859 :             assembler->ClearRegisters(reg, clear_to);
    2637             :         }
    2638             :     }
    2639        1136 : }
    2640             : 
    2641             : enum DeferredActionUndoType {
    2642             :     DEFER_IGNORE,
    2643             :     DEFER_RESTORE,
    2644             :     DEFER_CLEAR
    2645             : };
    2646             : 
    2647             : void
    2648        1136 : Trace::PerformDeferredActions(LifoAlloc* alloc,
    2649             :                               RegExpMacroAssembler* assembler,
    2650             :                               int max_register,
    2651             :                               OutSet& affected_registers,
    2652             :                               OutSet* registers_to_pop,
    2653             :                               OutSet* registers_to_clear)
    2654             : {
    2655             :     // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
    2656        1136 :     const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
    2657             : 
    2658             :     // Count pushes performed to force a stack limit check occasionally.
    2659           0 :     int pushes = 0;
    2660             : 
    2661       14989 :     for (int reg = 0; reg <= max_register; reg++) {
    2662       13853 :         if (!affected_registers.Get(reg))
    2663             :             continue;
    2664             : 
    2665             :         // The chronologically first deferred action in the trace
    2666             :         // is used to infer the action needed to restore a register
    2667             :         // to its previous state (or not, if it's safe to ignore it).
    2668           0 :         DeferredActionUndoType undo_action = DEFER_IGNORE;
    2669             : 
    2670           0 :         int value = 0;
    2671           0 :         bool absolute = false;
    2672        3362 :         bool clear = false;
    2673        3362 :         int store_position = -1;
    2674             :         // This is a little tricky because we are scanning the actions in reverse
    2675             :         // historical order (newest first).
    2676       15692 :         for (DeferredAction* action = actions_;
    2677           0 :              action != nullptr;
    2678             :              action = action->next()) {
    2679       12330 :             if (action->Mentions(reg)) {
    2680        3411 :                 switch (action->action_type()) {
    2681             :                   case ActionNode::SET_REGISTER: {
    2682             :                     Trace::DeferredSetRegister* psr =
    2683           0 :                         static_cast<Trace::DeferredSetRegister*>(action);
    2684           0 :                     if (!absolute) {
    2685          39 :                         value += psr->value();
    2686          39 :                         absolute = true;
    2687             :                     }
    2688             :                     // SET_REGISTER is currently only used for newly introduced loop
    2689             :                     // counters. They can have a significant previous value if they
    2690             :                     // occour in a loop. TODO(lrn): Propagate this information, so
    2691             :                     // we can set undo_action to IGNORE if we know there is no value to
    2692             :                     // restore.
    2693           0 :                     undo_action = DEFER_RESTORE;
    2694          39 :                     MOZ_ASSERT(store_position == -1);
    2695          39 :                     MOZ_ASSERT(!clear);
    2696             :                     break;
    2697             :                   }
    2698             :                   case ActionNode::INCREMENT_REGISTER:
    2699          43 :                     if (!absolute) {
    2700           0 :                         value++;
    2701             :                     }
    2702          43 :                     MOZ_ASSERT(store_position == -1);
    2703          43 :                     MOZ_ASSERT(!clear);
    2704             :                     undo_action = DEFER_RESTORE;
    2705             :                     break;
    2706             :                   case ActionNode::STORE_POSITION: {
    2707             :                     Trace::DeferredCapture* pc =
    2708           0 :                         static_cast<Trace::DeferredCapture*>(action);
    2709        3269 :                     if (!clear && store_position == -1) {
    2710        3269 :                         store_position = pc->cp_offset();
    2711             :                     }
    2712             : 
    2713             :                     // For captures we know that stores and clears alternate.
    2714             :                     // Other register, are never cleared, and if the occur
    2715             :                     // inside a loop, they might be assigned more than once.
    2716        3269 :                     if (reg <= 1) {
    2717             :                         // Registers zero and one, aka "capture zero", is
    2718             :                         // always set correctly if we succeed. There is no
    2719             :                         // need to undo a setting on backtrack, because we
    2720             :                         // will set it again or fail.
    2721             :                         undo_action = DEFER_IGNORE;
    2722             :                     } else {
    2723           0 :                         undo_action = pc->is_capture() ? DEFER_CLEAR : DEFER_RESTORE;
    2724             :                     }
    2725        3269 :                     MOZ_ASSERT(!absolute);
    2726        3269 :                     MOZ_ASSERT(value == 0);
    2727             :                     break;
    2728             :                   }
    2729             :                   case ActionNode::CLEAR_CAPTURES: {
    2730             :                     // Since we're scanning in reverse order, if we've already
    2731             :                     // set the position we have to ignore historically earlier
    2732             :                     // clearing operations.
    2733          60 :                     if (store_position == -1) {
    2734           0 :                         clear = true;
    2735             :                     }
    2736           0 :                     undo_action = DEFER_RESTORE;
    2737          60 :                     MOZ_ASSERT(!absolute);
    2738          60 :                     MOZ_ASSERT(value == 0);
    2739             :                     break;
    2740             :                   }
    2741             :                   default:
    2742           0 :                     MOZ_CRASH("Bad action");
    2743             :                 }
    2744             :             }
    2745             :         }
    2746             :         // Prepare for the undo-action (e.g., push if it's going to be popped).
    2747        3362 :         if (undo_action == DEFER_RESTORE) {
    2748           0 :             pushes++;
    2749             :             RegExpMacroAssembler::StackCheckFlag stack_check =
    2750           0 :                 RegExpMacroAssembler::kNoStackLimitCheck;
    2751           0 :             if (pushes == push_limit) {
    2752           0 :                 stack_check = RegExpMacroAssembler::kCheckStackLimit;
    2753           0 :                 pushes = 0;
    2754             :             }
    2755             : 
    2756           0 :             assembler->PushRegister(reg, stack_check);
    2757           0 :             registers_to_pop->Set(alloc, reg);
    2758        3220 :         } else if (undo_action == DEFER_CLEAR) {
    2759        1756 :             registers_to_clear->Set(alloc, reg);
    2760             :         }
    2761             :         // Perform the chronologically last action (or accumulated increment)
    2762             :         // for the register.
    2763           0 :         if (store_position != -1) {
    2764           0 :             assembler->WriteCurrentPositionToRegister(reg, store_position);
    2765           0 :         } else if (clear) {
    2766           0 :             assembler->ClearRegisters(reg, reg);
    2767           0 :         } else if (absolute) {
    2768           0 :             assembler->SetRegister(reg, value);
    2769          43 :         } else if (value != 0) {
    2770          43 :             assembler->AdvanceRegister(reg, value);
    2771             :         }
    2772             :     }
    2773        1136 : }
    2774             : 
    2775             : // This is called as we come into a loop choice node and some other tricky
    2776             : // nodes.  It normalizes the state of the code generator to ensure we can
    2777             : // generate generic code.
    2778           0 : void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor)
    2779             : {
    2780           0 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    2781             : 
    2782           0 :     MOZ_ASSERT(!is_trivial());
    2783             : 
    2784        1246 :     if (actions_ == nullptr && backtrack() == nullptr) {
    2785             :         // Here we just have some deferred cp advances to fix and we are back to
    2786             :         // a normal situation.  We may also have to forget some information gained
    2787             :         // through a quick check that was already performed.
    2788           0 :         if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);
    2789             :         // Create a new trivial state and generate the node with that.
    2790         110 :         Trace new_state;
    2791         110 :         successor->Emit(compiler, &new_state);
    2792             :         return;
    2793             :     }
    2794             : 
    2795             :     // Generate deferred actions here along with code to undo them again.
    2796           0 :     OutSet affected_registers;
    2797             : 
    2798        1136 :     if (backtrack() != nullptr) {
    2799             :         // Here we have a concrete backtrack location.  These are set up by choice
    2800             :         // nodes and so they indicate that we have a deferred save of the current
    2801             :         // position which we may need to emit here.
    2802         976 :         assembler->PushCurrentPosition();
    2803             :     }
    2804             : 
    2805           0 :     int max_register = FindAffectedRegisters(compiler->alloc(), &affected_registers);
    2806           0 :     OutSet registers_to_pop;
    2807        1136 :     OutSet registers_to_clear;
    2808        1136 :     PerformDeferredActions(compiler->alloc(),
    2809             :                            assembler,
    2810             :                            max_register,
    2811             :                            affected_registers,
    2812             :                            &registers_to_pop,
    2813           0 :                            &registers_to_clear);
    2814        1136 :     if (cp_offset_ != 0)
    2815        1031 :         assembler->AdvanceCurrentPosition(cp_offset_);
    2816             : 
    2817             :     // Create a new trivial state and generate the node with that.
    2818           0 :     jit::Label undo;
    2819           0 :     assembler->PushBacktrack(&undo);
    2820        1136 :     Trace new_state;
    2821        1136 :     successor->Emit(compiler, &new_state);
    2822             : 
    2823             :     // On backtrack we need to restore state.
    2824        1136 :     assembler->BindBacktrack(&undo);
    2825             :     RestoreAffectedRegisters(assembler,
    2826             :                              max_register,
    2827             :                              registers_to_pop,
    2828           0 :                              registers_to_clear);
    2829        1136 :     if (backtrack() == nullptr) {
    2830           0 :         assembler->Backtrack();
    2831             :     } else {
    2832         976 :         assembler->PopCurrentPosition();
    2833         976 :         assembler->JumpOrBacktrack(backtrack());
    2834             :     }
    2835             : }
    2836             : 
    2837             : void
    2838           0 : Trace::InvalidateCurrentCharacter()
    2839             : {
    2840         129 :     characters_preloaded_ = 0;
    2841           0 : }
    2842             : 
    2843             : void
    2844           0 : Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler)
    2845             : {
    2846        1364 :     MOZ_ASSERT(by > 0);
    2847             :     // We don't have an instruction for shifting the current character register
    2848             :     // down or for using a shifted value for anything so lets just forget that
    2849             :     // we preloaded any characters into it.
    2850        1364 :     characters_preloaded_ = 0;
    2851             :     // Adjust the offsets of the quick check performed information.  This
    2852             :     // information is used to find out what we already determined about the
    2853             :     // characters by means of mask and compare.
    2854           0 :     quick_check_performed_.Advance(by);
    2855           1 :     cp_offset_ += by;
    2856           1 :     if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
    2857           0 :         compiler->SetRegExpTooBig();
    2858           0 :         cp_offset_ = 0;
    2859             :     }
    2860        2728 :     bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);
    2861        1364 : }
    2862             : 
    2863             : void
    2864           0 : OutSet::Set(LifoAlloc* alloc, unsigned value)
    2865             : {
    2866        5309 :     if (value < kFirstLimit) {
    2867           0 :         first_ |= (1 << value);
    2868             :     } else {
    2869         304 :         if (remaining_ == nullptr)
    2870           0 :             remaining_ = alloc->newInfallible<RemainingVector>(*alloc);
    2871             : 
    2872        1064 :         for (size_t i = 0; i < remaining().length(); i++) {
    2873         304 :             if (remaining()[i] == value)
    2874             :                 return;
    2875             :         }
    2876         304 :         remaining().append(value);
    2877             :     }
    2878             : }
    2879             : 
    2880             : bool
    2881           0 : OutSet::Get(unsigned value)
    2882             : {
    2883           0 :     if (value < kFirstLimit)
    2884       26400 :         return (first_ & (1 << value)) != 0;
    2885           0 :     if (remaining_ == nullptr)
    2886             :         return false;
    2887       79120 :     for (size_t i = 0; i < remaining().length(); i++) {
    2888       39940 :         if (remaining()[i] == value)
    2889             :             return true;
    2890             :     }
    2891             :     return false;
    2892             : }
    2893             : 
    2894             : // -------------------------------------------------------------------
    2895             : // Graph emitting
    2896             : 
    2897             : void
    2898           0 : NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace)
    2899             : {
    2900           1 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    2901             : 
    2902             :     // Omit flushing the trace. We discard the entire stack frame anyway.
    2903             : 
    2904           2 :     if (!label()->bound()) {
    2905             :         // We are completely independent of the trace, since we ignore it,
    2906             :         // so this code can be used as the generic version.
    2907           2 :         assembler->Bind(label());
    2908             :     }
    2909             : 
    2910             :     // Throw away everything on the backtrack stack since the start
    2911             :     // of the negative submatch and restore the character position.
    2912           1 :     assembler->ReadCurrentPositionFromRegister(current_position_register_);
    2913           0 :     assembler->ReadBacktrackStackPointerFromRegister(stack_pointer_register_);
    2914             : 
    2915           1 :     if (clear_capture_count_ > 0) {
    2916             :         // Clear any captures that might have been performed during the success
    2917             :         // of the body of the negative look-ahead.
    2918           0 :         int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1;
    2919           0 :         assembler->ClearRegisters(clear_capture_start_, clear_capture_end);
    2920             :     }
    2921             : 
    2922             :     // Now that we have unwound the stack we find at the top of the stack the
    2923             :     // backtrack that the BeginSubmatch node got.
    2924           1 :     assembler->Backtrack();
    2925           1 : }
    2926             : 
    2927             : void
    2928           0 : EndNode::Emit(RegExpCompiler* compiler, Trace* trace)
    2929             : {
    2930           0 :     if (!trace->is_trivial()) {
    2931         551 :         trace->Flush(compiler, this);
    2932           0 :         return;
    2933             :     }
    2934           0 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    2935        1102 :     if (!label()->bound()) {
    2936           0 :         assembler->Bind(label());
    2937             :     }
    2938           0 :     switch (action_) {
    2939             :     case ACCEPT:
    2940         551 :         assembler->Succeed();
    2941           0 :         return;
    2942             :     case BACKTRACK:
    2943           0 :         assembler->JumpOrBacktrack(trace->backtrack());
    2944           0 :         return;
    2945             :     case NEGATIVE_SUBMATCH_SUCCESS:
    2946             :         // This case is handled in a different virtual method.
    2947           0 :         MOZ_CRASH("Bad action: NEGATIVE_SUBMATCH_SUCCESS");
    2948             :     }
    2949           0 :     MOZ_CRASH("Bad action");
    2950             : }
    2951             : 
    2952             : // Emit the code to check for a ^ in multiline mode (1-character lookbehind
    2953             : // that matches newline or the start of input).
    2954             : static void
    2955           0 : EmitHat(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace)
    2956             : {
    2957           1 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    2958             : 
    2959             :     // We will be loading the previous character into the current character
    2960             :     // register.
    2961           1 :     Trace new_trace(*trace);
    2962           0 :     new_trace.InvalidateCurrentCharacter();
    2963             : 
    2964           2 :     jit::Label ok;
    2965           1 :     if (new_trace.cp_offset() == 0) {
    2966             :         // The start of input counts as a newline in this context, so skip to
    2967             :         // ok if we are at the start.
    2968           1 :         assembler->CheckAtStart(&ok);
    2969             :     }
    2970             : 
    2971             :     // We already checked that we are not at the start of input so it must be
    2972             :     // OK to load the previous character.
    2973           0 :     assembler->LoadCurrentCharacter(new_trace.cp_offset() -1, new_trace.backtrack(), false);
    2974             : 
    2975           1 :     if (!assembler->CheckSpecialCharacterClass('n', new_trace.backtrack())) {
    2976             :         // Newline means \n, \r, 0x2028 or 0x2029.
    2977           0 :         if (!compiler->latin1())
    2978           0 :             assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);
    2979           0 :         assembler->CheckCharacter('\n', &ok);
    2980           0 :         assembler->CheckNotCharacter('\r', new_trace.backtrack());
    2981             :     }
    2982           0 :     assembler->Bind(&ok);
    2983           1 :     on_success->Emit(compiler, &new_trace);
    2984           1 : }
    2985             : 
    2986             : // Assert that the next character cannot be a part of a surrogate pair.
    2987             : static void
    2988           0 : EmitNotAfterLeadSurrogate(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace)
    2989             : {
    2990           0 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    2991             : 
    2992             :     // We will be loading the previous character into the current character
    2993             :     // register.
    2994           0 :     Trace new_trace(*trace);
    2995           0 :     new_trace.InvalidateCurrentCharacter();
    2996             : 
    2997           0 :     jit::Label ok;
    2998           0 :     if (new_trace.cp_offset() == 0)
    2999           0 :         assembler->CheckAtStart(&ok);
    3000             : 
    3001             :     // We already checked that we are not at the start of input so it must be
    3002             :     // OK to load the previous character.
    3003           0 :     assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, new_trace.backtrack(), false);
    3004           0 :     assembler->CheckCharacterInRange(unicode::LeadSurrogateMin, unicode::LeadSurrogateMax,
    3005           0 :                                      new_trace.backtrack());
    3006             : 
    3007           0 :     assembler->Bind(&ok);
    3008           0 :     on_success->Emit(compiler, &new_trace);
    3009           0 : }
    3010             : 
    3011             : // Assert that the next character is not a trail surrogate that has a
    3012             : // corresponding lead surrogate.
    3013             : static void
    3014           0 : EmitNotInSurrogatePair(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace)
    3015             : {
    3016           0 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    3017             : 
    3018           0 :     jit::Label ok;
    3019           0 :     assembler->CheckPosition(trace->cp_offset(), &ok);
    3020             : 
    3021             :     // We will be loading the next and previous characters into the current
    3022             :     // character register.
    3023           0 :     Trace new_trace(*trace);
    3024           0 :     new_trace.InvalidateCurrentCharacter();
    3025             : 
    3026           0 :     if (new_trace.cp_offset() == 0)
    3027           0 :         assembler->CheckAtStart(&ok);
    3028             : 
    3029             :     // First check if next character is a trail surrogate.
    3030           0 :     assembler->LoadCurrentCharacter(new_trace.cp_offset(), new_trace.backtrack(), false);
    3031             :     assembler->CheckCharacterNotInRange(unicode::TrailSurrogateMin, unicode::TrailSurrogateMax,
    3032           0 :                                         &ok);
    3033             : 
    3034             :     // Next check if previous character is a lead surrogate.
    3035             :     // We already checked that we are not at the start of input so it must be
    3036             :     // OK to load the previous character.
    3037           0 :     assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, new_trace.backtrack(), false);
    3038           0 :     assembler->CheckCharacterInRange(unicode::LeadSurrogateMin, unicode::LeadSurrogateMax,
    3039           0 :                                      new_trace.backtrack());
    3040             : 
    3041           0 :     assembler->Bind(&ok);
    3042           0 :     on_success->Emit(compiler, &new_trace);
    3043           0 : }
    3044             : 
    3045             : // Check for [0-9A-Z_a-z].
    3046             : static void
    3047           2 : EmitWordCheck(RegExpMacroAssembler* assembler,
    3048             :               jit::Label* word, jit::Label* non_word, bool fall_through_on_word,
    3049             :               bool unicode_ignore_case)
    3050             : {
    3051           0 :     if (!unicode_ignore_case &&
    3052           2 :         assembler->CheckSpecialCharacterClass(fall_through_on_word ? 'w' : 'W',
    3053           2 :                                               fall_through_on_word ? non_word : word))
    3054             :     {
    3055             :         // Optimized implementation available.
    3056             :         return;
    3057             :     }
    3058             : 
    3059           0 :     if (unicode_ignore_case) {
    3060           0 :         assembler->CheckCharacter(0x017F, word);
    3061           0 :         assembler->CheckCharacter(0x212A, word);
    3062             :     }
    3063             : 
    3064           0 :     assembler->CheckCharacterGT('z', non_word);
    3065           0 :     assembler->CheckCharacterLT('0', non_word);
    3066           0 :     assembler->CheckCharacterGT('a' - 1, word);
    3067           0 :     assembler->CheckCharacterLT('9' + 1, word);
    3068           0 :     assembler->CheckCharacterLT('A', non_word);
    3069           0 :     assembler->CheckCharacterLT('Z' + 1, word);
    3070             : 
    3071           0 :     if (fall_through_on_word)
    3072           0 :         assembler->CheckNotCharacter('_', non_word);
    3073             :     else
    3074           0 :         assembler->CheckCharacter('_', word);
    3075             : }
    3076             : 
    3077             : // Emit the code to handle \b and \B (word-boundary or non-word-boundary).
    3078             : void
    3079           0 : AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace)
    3080             : {
    3081           0 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    3082           0 :     Trace::TriBool next_is_word_character = Trace::UNKNOWN;
    3083           0 :     bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE);
    3084           4 :     BoyerMooreLookahead* lookahead = bm_info(not_at_start);
    3085           0 :     if (lookahead == nullptr) {
    3086             :         int eats_at_least =
    3087           0 :             Min(kMaxLookaheadForBoyerMoore, EatsAtLeast(kMaxLookaheadForBoyerMoore,
    3088             :                                                         kRecursionBudget,
    3089           2 :                                                         not_at_start));
    3090           0 :         if (eats_at_least >= 1) {
    3091             :             BoyerMooreLookahead* bm =
    3092           0 :                 alloc()->newInfallible<BoyerMooreLookahead>(alloc(), eats_at_least, compiler);
    3093           0 :             FillInBMInfo(0, kRecursionBudget, bm, not_at_start);
    3094           0 :             if (bm->at(0)->is_non_word())
    3095           1 :                 next_is_word_character = Trace::FALSE_VALUE;
    3096           1 :             if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;
    3097             :         }
    3098             :     } else {
    3099           0 :         if (lookahead->at(0)->is_non_word())
    3100           0 :             next_is_word_character = Trace::FALSE_VALUE;
    3101           1 :         if (lookahead->at(0)->is_word())
    3102           0 :             next_is_word_character = Trace::TRUE_VALUE;
    3103             :     }
    3104           0 :     bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY);
    3105           0 :     if (next_is_word_character == Trace::UNKNOWN) {
    3106           0 :         jit::Label before_non_word;
    3107           0 :         jit::Label before_word;
    3108           0 :         if (trace->characters_preloaded() != 1) {
    3109           0 :             assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
    3110             :         }
    3111             :         // Fall through on non-word.
    3112           0 :         EmitWordCheck(assembler, &before_word, &before_non_word, false,
    3113           0 :                       compiler->unicode() && compiler->ignore_case());
    3114             :         // Next character is not a word character.
    3115           0 :         assembler->Bind(&before_non_word);
    3116           0 :         jit::Label ok;
    3117           0 :         BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
    3118           0 :         assembler->JumpOrBacktrack(&ok);
    3119             : 
    3120           0 :         assembler->Bind(&before_word);
    3121           0 :         BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
    3122           0 :         assembler->Bind(&ok);
    3123           2 :     } else if (next_is_word_character == Trace::TRUE_VALUE) {
    3124           0 :         BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
    3125             :     } else {
    3126           1 :         MOZ_ASSERT(next_is_word_character == Trace::FALSE_VALUE);
    3127           0 :         BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
    3128             :     }
    3129           2 : }
    3130             : 
    3131             : void
    3132           2 : AssertionNode::BacktrackIfPrevious(RegExpCompiler* compiler,
    3133             :                                    Trace* trace,
    3134             :                                    AssertionNode::IfPrevious backtrack_if_previous)
    3135             : {
    3136           0 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    3137           2 :     Trace new_trace(*trace);
    3138           0 :     new_trace.InvalidateCurrentCharacter();
    3139             : 
    3140           0 :     jit::Label fall_through, dummy;
    3141             : 
    3142           2 :     jit::Label* non_word = backtrack_if_previous == kIsNonWord ? new_trace.backtrack() : &fall_through;
    3143           0 :     jit::Label* word     = backtrack_if_previous == kIsNonWord ? &fall_through : new_trace.backtrack();
    3144             : 
    3145           2 :     if (new_trace.cp_offset() == 0) {
    3146             :         // The start of input counts as a non-word character, so the question is
    3147             :         // decided if we are at the start.
    3148           1 :         assembler->CheckAtStart(non_word);
    3149             :     }
    3150             :     // We already checked that we are not at the start of input so it must be
    3151             :     // OK to load the previous character.
    3152           0 :     assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false);
    3153           2 :     EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord,
    3154           0 :                   compiler->unicode() && compiler->ignore_case());
    3155             : 
    3156           0 :     assembler->Bind(&fall_through);
    3157           2 :     on_success()->Emit(compiler, &new_trace);
    3158           2 : }
    3159             : 
    3160             : void
    3161         116 : AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,
    3162             :                                     RegExpCompiler* compiler,
    3163             :                                     int filled_in,
    3164             :                                     bool not_at_start)
    3165             : {
    3166         116 :     if (assertion_type_ == AT_START && not_at_start) {
    3167          72 :         details->set_cannot_match();
    3168             :         return;
    3169             :     }
    3170          44 :     return on_success()->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);
    3171             : }
    3172             : 
    3173             : void
    3174           0 : AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace)
    3175             : {
    3176         522 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    3177           0 :     switch (assertion_type_) {
    3178             :       case AT_END: {
    3179           0 :         jit::Label ok;
    3180           0 :         assembler->CheckPosition(trace->cp_offset(), &ok);
    3181         408 :         assembler->JumpOrBacktrack(trace->backtrack());
    3182         408 :         assembler->Bind(&ok);
    3183             :         break;
    3184             :       }
    3185             :       case AT_START: {
    3186           0 :         if (trace->at_start() == Trace::FALSE_VALUE) {
    3187           0 :             assembler->JumpOrBacktrack(trace->backtrack());
    3188           0 :             return;
    3189             :         }
    3190           0 :         if (trace->at_start() == Trace::UNKNOWN) {
    3191           0 :             assembler->CheckNotAtStart(trace->backtrack());
    3192           0 :             Trace at_start_trace = *trace;
    3193         111 :             at_start_trace.set_at_start(true);
    3194         111 :             on_success()->Emit(compiler, &at_start_trace);
    3195             :             return;
    3196             :         }
    3197             :       }
    3198             :         break;
    3199             :       case AFTER_NEWLINE:
    3200           1 :         EmitHat(compiler, on_success(), trace);
    3201           1 :         return;
    3202             :       case AT_BOUNDARY:
    3203             :       case AT_NON_BOUNDARY: {
    3204           2 :         EmitBoundaryCheck(compiler, trace);
    3205           2 :         return;
    3206             :       }
    3207             :       case NOT_AFTER_LEAD_SURROGATE:
    3208           0 :         EmitNotAfterLeadSurrogate(compiler, on_success(), trace);
    3209           0 :         return;
    3210             :       case NOT_IN_SURROGATE_PAIR:
    3211           0 :         EmitNotInSurrogatePair(compiler, on_success(), trace);
    3212           0 :         return;
    3213             :     }
    3214         408 :     on_success()->Emit(compiler, trace);
    3215             : }
    3216             : 
    3217             : static bool
    3218             : DeterminedAlready(QuickCheckDetails* quick_check, int offset)
    3219             : {
    3220           0 :     if (quick_check == nullptr)
    3221             :         return false;
    3222           0 :     if (offset >= quick_check->characters())
    3223             :         return false;
    3224       10551 :     return quick_check->positions(offset)->determines_perfectly;
    3225             : }
    3226             : 
    3227             : static void
    3228             : UpdateBoundsCheck(int index, int* checked_up_to)
    3229             : {
    3230       20807 :     if (index > *checked_up_to)
    3231        1090 :         *checked_up_to = index;
    3232             : }
    3233             : 
    3234             : static void
    3235           8 : EmitBoundaryTest(RegExpMacroAssembler* masm,
    3236             :                  int border,
    3237             :                  jit::Label* fall_through,
    3238             :                  jit::Label* above_or_equal,
    3239             :                  jit::Label* below)
    3240             : {
    3241           0 :     if (below != fall_through) {
    3242           0 :         masm->CheckCharacterLT(border, below);
    3243           8 :         if (above_or_equal != fall_through)
    3244           0 :             masm->JumpOrBacktrack(above_or_equal);
    3245             :     } else {
    3246           0 :         masm->CheckCharacterGT(border - 1, above_or_equal);
    3247             :     }
    3248           8 : }
    3249             : 
    3250             : static void
    3251         178 : EmitDoubleBoundaryTest(RegExpMacroAssembler* masm,
    3252             :                        int first,
    3253             :                        int last,
    3254             :                        jit::Label* fall_through,
    3255             :                        jit::Label* in_range,
    3256             :                        jit::Label* out_of_range)
    3257             : {
    3258           0 :     if (in_range == fall_through) {
    3259          48 :         if (first == last)
    3260           0 :             masm->CheckNotCharacter(first, out_of_range);
    3261             :         else
    3262           0 :             masm->CheckCharacterNotInRange(first, last, out_of_range);
    3263             :     } else {
    3264         130 :         if (first == last)
    3265           0 :             masm->CheckCharacter(first, in_range);
    3266             :         else
    3267           0 :             masm->CheckCharacterInRange(first, last, in_range);
    3268         130 :         if (out_of_range != fall_through)
    3269           0 :             masm->JumpOrBacktrack(out_of_range);
    3270             :     }
    3271         178 : }
    3272             : 
    3273             : typedef InfallibleVector<int, 4> RangeBoundaryVector;
    3274             : 
    3275             : // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even.
    3276             : // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd.
    3277             : static void
    3278          17 : EmitUseLookupTable(RegExpMacroAssembler* masm,
    3279             :                    RangeBoundaryVector& ranges,
    3280             :                    int start_index,
    3281             :                    int end_index,
    3282             :                    int min_char,
    3283             :                    jit::Label* fall_through,
    3284             :                    jit::Label* even_label,
    3285             :                    jit::Label* odd_label)
    3286             : {
    3287             :     static const int kSize = RegExpMacroAssembler::kTableSize;
    3288             :     static const int kMask = RegExpMacroAssembler::kTableMask;
    3289             : 
    3290          51 :     DebugOnly<int> base = (min_char & ~kMask);
    3291             : 
    3292             :     // Assert that everything is on one kTableSize page.
    3293           0 :     for (int i = start_index; i <= end_index; i++)
    3294         316 :         MOZ_ASSERT((ranges[i] & ~kMask) == base);
    3295          17 :     MOZ_ASSERT(start_index == 0 || (ranges[start_index - 1] & ~kMask) <= base);
    3296             : 
    3297             :     char templ[kSize];
    3298             :     jit::Label* on_bit_set;
    3299             :     jit::Label* on_bit_clear;
    3300             :     int bit;
    3301          17 :     if (even_label == fall_through) {
    3302             :         on_bit_set = odd_label;
    3303             :         on_bit_clear = even_label;
    3304             :         bit = 1;
    3305             :     } else {
    3306           0 :         on_bit_set = even_label;
    3307          17 :         on_bit_clear = odd_label;
    3308           0 :         bit = 0;
    3309             :     }
    3310           0 :     for (int i = 0; i < (ranges[start_index] & kMask) && i < kSize; i++)
    3311           0 :         templ[i] = bit;
    3312           0 :     int j = 0;
    3313           0 :     bit ^= 1;
    3314           0 :     for (int i = start_index; i < end_index; i++) {
    3315        2405 :         for (j = (ranges[i] & kMask); j < (ranges[i + 1] & kMask); j++) {
    3316           0 :             templ[j] = bit;
    3317             :         }
    3318           0 :         bit ^= 1;
    3319             :     }
    3320         965 :     for (int i = j; i < kSize; i++) {
    3321         474 :         templ[i] = bit;
    3322             :     }
    3323             : 
    3324             :     // TODO(erikcorry): Cache these.
    3325           0 :     RegExpShared::JitCodeTable ba;
    3326             :     {
    3327           0 :         AutoEnterOOMUnsafeRegion oomUnsafe;
    3328           1 :         ba.reset(static_cast<uint8_t*>(js_malloc(kSize)));
    3329          17 :         if (!ba)
    3330           0 :             oomUnsafe.crash("Table malloc");
    3331             :     }
    3332             : 
    3333        2193 :     for (int i = 0; i < kSize; i++)
    3334           0 :         ba[i] = templ[i];
    3335             : 
    3336           0 :     masm->CheckBitInTable(std::move(ba), on_bit_set);
    3337           0 :     if (on_bit_clear != fall_through)
    3338          17 :         masm->JumpOrBacktrack(on_bit_clear);
    3339          17 : }
    3340             : 
    3341             : static void
    3342          95 : CutOutRange(RegExpMacroAssembler* masm,
    3343             :             RangeBoundaryVector& ranges,
    3344             :             int start_index,
    3345             :             int end_index,
    3346             :             int cut_index,
    3347             :             jit::Label* even_label,
    3348             :             jit::Label* odd_label)
    3349             : {
    3350           0 :     bool odd = (((cut_index - start_index) & 1) == 1);
    3351           0 :     jit::Label* in_range_label = odd ? odd_label : even_label;
    3352           0 :     jit::Label dummy;
    3353           0 :     EmitDoubleBoundaryTest(masm,
    3354         190 :                            ranges[cut_index],
    3355         190 :                            ranges[cut_index + 1] - 1,
    3356             :                            &dummy,
    3357             :                            in_range_label,
    3358          95 :                            &dummy);
    3359          95 :     MOZ_ASSERT(!dummy.used());
    3360             :     // Cut out the single range by rewriting the array.  This creates a new
    3361             :     // range that is a merger of the two ranges on either side of the one we
    3362             :     // are cutting out.  The oddity of the labels is preserved.
    3363           0 :     for (int j = cut_index; j > start_index; j--)
    3364           0 :         ranges[j] = ranges[j - 1];
    3365           0 :     for (int j = cut_index + 1; j < end_index; j++)
    3366         621 :         ranges[j] = ranges[j + 1];
    3367          95 : }
    3368             : 
    3369             : // Unicode case.  Split the search space into kSize spaces that are handled
    3370             : // with recursion.
    3371             : static void
    3372          27 : SplitSearchSpace(RangeBoundaryVector& ranges,
    3373             :                  int start_index,
    3374             :                  int end_index,
    3375             :                  int* new_start_index,
    3376             :                  int* new_end_index,
    3377             :                  int* border)
    3378             : {
    3379             :     static const int kSize = RegExpMacroAssembler::kTableSize;
    3380             :     static const int kMask = RegExpMacroAssembler::kTableMask;
    3381             : 
    3382          54 :     int first = ranges[start_index];
    3383           0 :     int last = ranges[end_index] - 1;
    3384             : 
    3385           0 :     *new_start_index = start_index;
    3386           0 :     *border = (ranges[start_index] & ~kMask) + kSize;
    3387         443 :     while (*new_start_index < end_index) {
    3388           0 :         if (ranges[*new_start_index] > *border)
    3389             :             break;
    3390         208 :         (*new_start_index)++;
    3391             :     }
    3392             :     // new_start_index is the index of the first edge that is beyond the
    3393             :     // current kSize space.
    3394             : 
    3395             :     // For very large search spaces we do a binary chop search of the non-ASCII
    3396             :     // space instead of just going to the end of the current kSize space.  The
    3397             :     // heuristics are complicated a little by the fact that any 128-character
    3398             :     // encoding space can be quickly tested with a table lookup, so we don't
    3399             :     // wish to do binary chop search at a smaller granularity than that.  A
    3400             :     // 128-character space can take up a lot of space in the ranges array if,
    3401             :     // for example, we only want to match every second character (eg. the lower
    3402             :     // case characters on some Unicode pages).
    3403          27 :     int binary_chop_index = (end_index + start_index) / 2;
    3404             :     // The first test ensures that we get to the code that handles the ASCII
    3405             :     // range with a single not-taken branch, speeding up this important
    3406             :     // character range (even non-ASCII charset-based text has spaces and
    3407             :     // punctuation).
    3408           0 :     if (*border - 1 > kMaxOneByteCharCode &&  // ASCII case.
    3409           0 :         end_index - start_index > (*new_start_index - start_index) * 2 &&
    3410           0 :         last - first > kSize * 2 &&
    3411          29 :         binary_chop_index > *new_start_index &&
    3412           0 :         ranges[binary_chop_index] >= first + 2 * kSize)
    3413             :     {
    3414           2 :         int scan_forward_for_section_border = binary_chop_index;;
    3415           0 :         int new_border = (ranges[binary_chop_index] | kMask) + 1;
    3416             : 
    3417           0 :         while (scan_forward_for_section_border < end_index) {
    3418           0 :             if (ranges[scan_forward_for_section_border] > new_border) {
    3419           0 :                 *new_start_index = scan_forward_for_section_border;
    3420           1 :                 *border = new_border;
    3421           0 :                 break;
    3422             :             }
    3423           7 :             scan_forward_for_section_border++;
    3424             :         }
    3425             :     }
    3426             : 
    3427           0 :     MOZ_ASSERT(*new_start_index > start_index);
    3428           0 :     *new_end_index = *new_start_index - 1;
    3429           0 :     if (ranges[*new_end_index] == *border)
    3430           0 :         (*new_end_index)--;
    3431           0 :     if (*border >= ranges[end_index]) {
    3432           0 :         *border = ranges[end_index];
    3433          22 :         *new_start_index = end_index;  // Won't be used.
    3434           0 :         *new_end_index = end_index - 1;
    3435             :     }
    3436          27 : }
    3437             : 
    3438             : // Gets a series of segment boundaries representing a character class.  If the
    3439             : // character is in the range between an even and an odd boundary (counting from
    3440             : // start_index) then go to even_label, otherwise go to odd_label.  We already
    3441             : // know that the character is in the range of min_char to max_char inclusive.
    3442             : // Either label can be nullptr indicating backtracking.  Either label can also be
    3443             : // equal to the fall_through label.
    3444             : static void
    3445         232 : GenerateBranches(RegExpMacroAssembler* masm,
    3446             :                  RangeBoundaryVector& ranges,
    3447             :                  int start_index,
    3448             :                  int end_index,
    3449             :                  char16_t min_char,
    3450             :                  char16_t max_char,
    3451             :                  jit::Label* fall_through,
    3452             :                  jit::Label* even_label,
    3453             :                  jit::Label* odd_label)
    3454             : {
    3455         464 :     int first = ranges[start_index];
    3456           0 :     int last = ranges[end_index] - 1;
    3457             : 
    3458         232 :     MOZ_ASSERT(min_char < first);
    3459             : 
    3460             :     // Just need to test if the character is before or on-or-after
    3461             :     // a particular character.
    3462           0 :     if (start_index == end_index) {
    3463           8 :         EmitBoundaryTest(masm, first, fall_through, even_label, odd_label);
    3464           8 :         return;
    3465             :     }
    3466             : 
    3467             :     // Another almost trivial case:  There is one interval in the middle that is
    3468             :     // different from the end intervals.
    3469           0 :     if (start_index + 1 == end_index) {
    3470          83 :         EmitDoubleBoundaryTest(masm, first, last, fall_through, even_label, odd_label);
    3471          83 :         return;
    3472             :     }
    3473             : 
    3474             :     // It's not worth using table lookup if there are very few intervals in the
    3475             :     // character class.
    3476         141 :     if (end_index - start_index <= 6) {
    3477             :         // It is faster to test for individual characters, so we look for those
    3478             :         // first, then try arbitrary ranges in the second round.
    3479             :         static int kNoCutIndex = -1;
    3480           0 :         int cut = kNoCutIndex;
    3481          95 :         for (int i = start_index; i < end_index; i++) {
    3482         804 :             if (ranges[i] == ranges[i + 1] - 1) {
    3483             :                 cut = i;
    3484             :                 break;
    3485             :             }
    3486             :         }
    3487           0 :         if (cut == kNoCutIndex) cut = start_index;
    3488           0 :         CutOutRange(masm, ranges, start_index, end_index, cut, even_label, odd_label);
    3489          95 :         MOZ_ASSERT(end_index - start_index >= 2);
    3490          95 :         GenerateBranches(masm,
    3491             :                          ranges,
    3492             :                          start_index + 1,
    3493             :                          end_index - 1,
    3494             :                          min_char,
    3495             :                          max_char,
    3496             :                          fall_through,
    3497             :                          even_label,
    3498          95 :                          odd_label);
    3499          95 :         return;
    3500             :     }
    3501             : 
    3502             :     // If there are a lot of intervals in the regexp, then we will use tables to
    3503             :     // determine whether the character is inside or outside the character class.
    3504             :     static const int kBits = RegExpMacroAssembler::kTableSizeBits;
    3505             : 
    3506          46 :     if ((max_char >> kBits) == (min_char >> kBits)) {
    3507             :         EmitUseLookupTable(masm,
    3508             :                            ranges,
    3509             :                            start_index,
    3510             :                            end_index,
    3511             :                            min_char,
    3512             :                            fall_through,
    3513             :                            even_label,
    3514          17 :                            odd_label);
    3515          17 :         return;
    3516             :     }
    3517             : 
    3518          29 :     if ((min_char >> kBits) != (first >> kBits)) {
    3519           2 :         masm->CheckCharacterLT(first, odd_label);
    3520             :         GenerateBranches(masm,
    3521             :                          ranges,
    3522             :                          start_index + 1,
    3523             :                          end_index,
    3524             :                          first,
    3525             :                          max_char,
    3526             :                          fall_through,
    3527             :                          odd_label,
    3528           2 :                          even_label);
    3529           2 :         return;
    3530             :     }
    3531             : 
    3532           0 :     int new_start_index = 0;
    3533          27 :     int new_end_index = 0;
    3534          27 :     int border = 0;
    3535             : 
    3536             :     SplitSearchSpace(ranges,
    3537             :                      start_index,
    3538             :                      end_index,
    3539             :                      &new_start_index,
    3540             :                      &new_end_index,
    3541           0 :                      &border);
    3542             : 
    3543           0 :     jit::Label handle_rest;
    3544          27 :     jit::Label* above = &handle_rest;
    3545          27 :     if (border == last + 1) {
    3546             :         // We didn't find any section that started after the limit, so everything
    3547             :         // above the border is one of the terminal labels.
    3548          22 :         above = (end_index & 1) != (start_index & 1) ? odd_label : even_label;
    3549          22 :         MOZ_ASSERT(new_end_index == end_index - 1);
    3550             :     }
    3551             : 
    3552           0 :     MOZ_ASSERT(start_index <= new_end_index);
    3553           0 :     MOZ_ASSERT(new_start_index <= end_index);
    3554           0 :     MOZ_ASSERT(start_index < new_start_index);
    3555          27 :     MOZ_ASSERT(new_end_index < end_index);
    3556          27 :     MOZ_ASSERT(new_end_index + 1 == new_start_index ||
    3557             :                (new_end_index + 2 == new_start_index &&
    3558             :                 border == ranges[new_end_index + 1]));
    3559           0 :     MOZ_ASSERT(min_char < border - 1);
    3560           0 :     MOZ_ASSERT(border < max_char);
    3561          54 :     MOZ_ASSERT(ranges[new_end_index] < border);
    3562          76 :     MOZ_ASSERT(border < ranges[new_start_index] ||
    3563             :                (border == ranges[new_start_index] &&
    3564             :                 new_start_index == end_index &&
    3565             :                 new_end_index == end_index - 1 &&
    3566             :                 border == last + 1));
    3567           0 :     MOZ_ASSERT(new_start_index == 0 || border >= ranges[new_start_index - 1]);
    3568             : 
    3569           0 :     masm->CheckCharacterGT(border - 1, above);
    3570          54 :     jit::Label dummy;
    3571          27 :     GenerateBranches(masm,
    3572             :                      ranges,
    3573             :                      start_index,
    3574             :                      new_end_index,
    3575             :                      min_char,
    3576          27 :                      border - 1,
    3577             :                      &dummy,
    3578             :                      even_label,
    3579           0 :                      odd_label);
    3580           0 :     if (handle_rest.used()) {
    3581           0 :         masm->Bind(&handle_rest);
    3582           5 :         bool flip = (new_start_index & 1) != (start_index & 1);
    3583           5 :         GenerateBranches(masm,
    3584             :                          ranges,
    3585             :                          new_start_index,
    3586             :                          end_index,
    3587             :                          border,
    3588             :                          max_char,
    3589             :                          &dummy,
    3590             :                          flip ? odd_label : even_label,
    3591           5 :                          flip ? even_label : odd_label);
    3592             :     }
    3593             : }
    3594             : 
    3595             : static void
    3596         248 : EmitCharClass(LifoAlloc* alloc,
    3597             :               RegExpMacroAssembler* macro_assembler,
    3598             :               RegExpCharacterClass* cc,
    3599             :               bool latin1,
    3600             :               jit::Label* on_failure,
    3601             :               int cp_offset,
    3602             :               bool check_offset,
    3603             :               bool preloaded)
    3604             : {
    3605           0 :     CharacterRangeVector& ranges = cc->ranges(alloc);
    3606         248 :     if (!CharacterRange::IsCanonical(ranges)) {
    3607          28 :         CharacterRange::Canonicalize(ranges);
    3608             :     }
    3609             : 
    3610         248 :     int max_char = MaximumCharacter(latin1);
    3611           0 :     int range_count = ranges.length();
    3612             : 
    3613           0 :     int last_valid_range = range_count - 1;
    3614           0 :     while (last_valid_range >= 0) {
    3615         762 :         CharacterRange& range = ranges[last_valid_range];
    3616         381 :         if (range.from() <= max_char) {
    3617             :             break;
    3618             :         }
    3619         133 :         last_valid_range--;
    3620             :     }
    3621             : 
    3622           1 :     if (last_valid_range < 0) {
    3623           0 :         if (!cc->is_negated()) {
    3624           0 :             macro_assembler->JumpOrBacktrack(on_failure);
    3625             :         }
    3626           0 :         if (check_offset) {
    3627           0 :             macro_assembler->CheckPosition(cp_offset, on_failure);
    3628             :         }
    3629         145 :         return;
    3630             :     }
    3631             : 
    3632           0 :     if (last_valid_range == 0 &&
    3633           0 :         ranges[0].IsEverything(max_char)) {
    3634          77 :         if (cc->is_negated()) {
    3635           0 :             macro_assembler->JumpOrBacktrack(on_failure);
    3636             :         } else {
    3637             :             // This is a common case hit by non-anchored expressions.
    3638          77 :             if (check_offset) {
    3639           5 :                 macro_assembler->CheckPosition(cp_offset, on_failure);
    3640             :             }
    3641             :         }
    3642             :         return;
    3643             :     }
    3644           0 :     if (last_valid_range == 0 &&
    3645         191 :         !cc->is_negated() &&
    3646           1 :         ranges[0].IsEverything(max_char)) {
    3647             :         // This is a common case hit by non-anchored expressions.
    3648           0 :         if (check_offset) {
    3649           0 :             macro_assembler->CheckPosition(cp_offset, on_failure);
    3650             :         }
    3651             :         return;
    3652             :     }
    3653             : 
    3654         171 :     if (!preloaded) {
    3655         159 :         macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
    3656             :     }
    3657             : 
    3658           0 :     if (cc->is_standard(alloc) &&
    3659          71 :         macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),
    3660          71 :                                                     on_failure)) {
    3661             :         return;
    3662             :     }
    3663             : 
    3664             :     // A new list with ascending entries.  Each entry is a code unit
    3665             :     // where there is a boundary between code units that are part of
    3666             :     // the class and code units that are not.  Normally we insert an
    3667             :     // entry at zero which goes to the failure label, but if there
    3668             :     // was already one there we fall through for success on that entry.
    3669             :     // Subsequent entries have alternating meaning (success/failure).
    3670             :     RangeBoundaryVector* range_boundaries =
    3671           0 :         alloc->newInfallible<RangeBoundaryVector>(*alloc);
    3672             : 
    3673           0 :     bool zeroth_entry_is_failure = !cc->is_negated();
    3674             : 
    3675           0 :     range_boundaries->reserve(last_valid_range);
    3676           0 :     for (int i = 0; i <= last_valid_range; i++) {
    3677           0 :         CharacterRange& range = ranges[i];
    3678           0 :         if (range.from() == 0) {
    3679           3 :             MOZ_ASSERT(i == 0);
    3680           0 :             zeroth_entry_is_failure = !zeroth_entry_is_failure;
    3681             :         } else {
    3682           0 :             range_boundaries->append(range.from());
    3683             :         }
    3684           0 :         range_boundaries->append(range.to() + 1);
    3685             :     }
    3686           0 :     int end_index = range_boundaries->length() - 1;
    3687         206 :     if ((*range_boundaries)[end_index] > max_char)
    3688           0 :         end_index--;
    3689             : 
    3690         206 :     jit::Label fall_through;
    3691         103 :     GenerateBranches(macro_assembler,
    3692             :                      *range_boundaries,
    3693             :                      0,  // start_index.
    3694             :                      end_index,
    3695             :                      0,  // min_char.
    3696             :                      max_char,
    3697             :                      &fall_through,
    3698             :                      zeroth_entry_is_failure ? &fall_through : on_failure,
    3699         103 :                      zeroth_entry_is_failure ? on_failure : &fall_through);
    3700         103 :     macro_assembler->Bind(&fall_through);
    3701             : }
    3702             : 
    3703             : typedef bool EmitCharacterFunction(RegExpCompiler* compiler,
    3704             :                                    char16_t c,
    3705             :                                    jit::Label* on_failure,
    3706             :                                    int cp_offset,
    3707             :                                    bool check,
    3708             :                                    bool preloaded);
    3709             : 
    3710             : static inline bool
    3711       20551 : EmitSimpleCharacter(RegExpCompiler* compiler,
    3712             :                     char16_t c,
    3713             :                     jit::Label* on_failure,
    3714             :                     int cp_offset,
    3715             :                     bool check,
    3716             :                     bool preloaded)
    3717             : {
    3718           0 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    3719           0 :     bool bound_checked = false;
    3720           0 :     if (!preloaded) {
    3721       20551 :         assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
    3722           0 :         bound_checked = true;
    3723             :     }
    3724       20551 :     assembler->CheckNotCharacter(c, on_failure);
    3725       20551 :     return bound_checked;
    3726             : }
    3727             : 
    3728             : // Emit character for case independent match, when GetCaseIndependentLetters
    3729             : // returns single character.
    3730             : // This is used by the following 2 cases:
    3731             : //   * non-letters (things that don't have case)
    3732             : //   * letters that map across Latin1 and non-Latin1, and non-Latin1 case is
    3733             : //     filtered out because of Latin1 match
    3734             : static inline bool
    3735           9 : EmitAtomSingle(RegExpCompiler* compiler,
    3736             :                char16_t c,
    3737             :                jit::Label* on_failure,
    3738             :                int cp_offset,
    3739             :                bool check,
    3740             :                bool preloaded)
    3741             : {
    3742           9 :     RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
    3743           0 :     bool latin1 = compiler->latin1();
    3744             :     char16_t chars[kEcma262UnCanonicalizeMaxWidth];
    3745           9 :     int length = GetCaseIndependentLetters(c, latin1, compiler->unicode(), chars);
    3746           9 :     if (length != 1)
    3747             :         return false;
    3748             : 
    3749           0 :     bool checked = false;
    3750           0 :     if (!preloaded) {
    3751           4 :         macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
    3752           0 :         checked = check;
    3753             :     }
    3754           4 :     macro_assembler->CheckNotCharacter(chars[0], on_failure);
    3755           4 :     return checked;
    3756             : }
    3757             : 
    3758             : static bool
    3759           5 : ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
    3760             :                           bool latin1,
    3761             :                           char16_t c1,
    3762             :                           char16_t c2,
    3763             :                           jit::Label* on_failure)
    3764             : {
    3765           0 :     char16_t char_mask = MaximumCharacter(latin1);
    3766             : 
    3767           0 :     MOZ_ASSERT(c1 != c2);
    3768           0 :     if (c1 > c2) {
    3769           0 :         char16_t tmp = c1;
    3770           5 :         c1 = c2;
    3771           5 :         c2 = tmp;
    3772             :     }
    3773             : 
    3774           0 :     char16_t exor = c1 ^ c2;
    3775             :     // Check whether exor has only one bit set.
    3776           0 :     if (((exor - 1) & exor) == 0) {
    3777             :         // If c1 and c2 differ only by one bit.
    3778           0 :         char16_t mask = char_mask ^ exor;
    3779           5 :         macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);
    3780           5 :         return true;
    3781             :     }
    3782             : 
    3783           0 :     char16_t diff = c2 - c1;
    3784           0 :     if (((diff - 1) & diff) == 0 && c1 >= diff) {
    3785             :         // If the characters differ by 2^n but don't differ by one bit then
    3786             :         // subtract the difference from the found character, then do the or
    3787             :         // trick.  We avoid the theoretical case where negative numbers are
    3788             :         // involved in order to simplify code generation.
    3789           0 :         char16_t mask = char_mask ^ diff;
    3790           0 :         macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,
    3791             :                                                         diff,
    3792             :                                                         mask,
    3793           0 :                                                         on_failure);
    3794           0 :         return true;
    3795             :     }
    3796             :     return false;
    3797             : }
    3798             : 
    3799             : // Emit character for case independent match, when GetCaseIndependentLetters
    3800             : // returns multiple characters.
    3801             : static inline bool
    3802           9 : EmitAtomMulti(RegExpCompiler* compiler,
    3803             :               char16_t c,
    3804             :               jit::Label* on_failure,
    3805             :               int cp_offset,
    3806             :               bool check,
    3807             :               bool preloaded)
    3808             : {
    3809           9 :     RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
    3810           0 :     bool latin1 = compiler->latin1();
    3811             :     char16_t chars[kEcma262UnCanonicalizeMaxWidth];
    3812           9 :     int length = GetCaseIndependentLetters(c, latin1, compiler->unicode(), chars);
    3813           9 :     if (length <= 1) return false;
    3814             :     // We may not need to check against the end of the input string
    3815             :     // if this character lies before a character that matched.
    3816           0 :     if (!preloaded)
    3817           5 :         macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
    3818           0 :     jit::Label ok;
    3819             :     MOZ_ASSERT(kEcma262UnCanonicalizeMaxWidth == 4);
    3820           0 :     switch (length) {
    3821             :       case 2: {
    3822           0 :         if (ShortCutEmitCharacterPair(macro_assembler,
    3823             :                                       latin1,
    3824           5 :                                       chars[0],
    3825           5 :                                       chars[1],
    3826             :                                       on_failure)) {
    3827             :         } else {
    3828           0 :             macro_assembler->CheckCharacter(chars[0], &ok);
    3829           0 :             macro_assembler->CheckNotCharacter(chars[1], on_failure);
    3830           0 :             macro_assembler->Bind(&ok);
    3831             :         }
    3832             :         break;
    3833             :       }
    3834             :       case 4:
    3835           0 :         macro_assembler->CheckCharacter(chars[3], &ok);
    3836             :         MOZ_FALLTHROUGH;
    3837             :       case 3:
    3838           0 :         macro_assembler->CheckCharacter(chars[0], &ok);
    3839           0 :         macro_assembler->CheckCharacter(chars[1], &ok);
    3840           0 :         macro_assembler->CheckNotCharacter(chars[2], on_failure);
    3841           0 :         macro_assembler->Bind(&ok);
    3842           0 :         break;
    3843             :       default:
    3844           0 :         MOZ_CRASH("Bad length");
    3845             :     }
    3846             :     return true;
    3847             : }
    3848             : 
    3849             : // We call this repeatedly to generate code for each pass over the text node.
    3850             : // The passes are in increasing order of difficulty because we hope one
    3851             : // of the first passes will fail in which case we are saved the work of the
    3852             : // later passes.  for example for the case independent regexp /%[asdfghjkl]a/
    3853             : // we will check the '%' in the first pass, the case independent 'a' in the
    3854             : // second pass and the character class in the last pass.
    3855             : //
    3856             : // The passes are done from right to left, so for example to test for /bar/
    3857             : // we will first test for an 'r' with offset 2, then an 'a' with offset 1
    3858             : // and then a 'b' with offset 0.  This means we can avoid the end-of-input
    3859             : // bounds check most of the time.  In the example we only need to check for
    3860             : // end-of-input when loading the putative 'r'.
    3861             : //
    3862             : // A slight complication involves the fact that the first character may already
    3863             : // be fetched into a register by the previous node.  In this case we want to
    3864             : // do the test for that character first.  We do this in separate passes.  The
    3865             : // 'preloaded' argument indicates that we are doing such a 'pass'.  If such a
    3866             : // pass has been performed then subsequent passes will have true in
    3867             : // first_element_checked to indicate that that character does not need to be
    3868             : // checked again.
    3869             : //
    3870             : // In addition to all this we are passed a Trace, which can
    3871             : // contain an AlternativeGeneration object.  In this AlternativeGeneration
    3872             : // object we can see details of any quick check that was already passed in
    3873             : // order to get to the code we are now generating.  The quick check can involve
    3874             : // loading characters, which means we do not need to recheck the bounds
    3875             : // up to the limit the quick check already checked.  In addition the quick
    3876             : // check can have involved a mask and compare operation which may simplify
    3877             : // or obviate the need for further checks at some character positions.
    3878             : void
    3879        4178 : TextNode::TextEmitPass(RegExpCompiler* compiler,
    3880             :                        TextEmitPassType pass,
    3881             :                        bool preloaded,
    3882             :                        Trace* trace,
    3883             :                        bool first_element_checked,
    3884             :                        int* checked_up_to)
    3885             : {
    3886           0 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    3887           0 :     bool latin1 = compiler->latin1();
    3888           0 :     jit::Label* backtrack = trace->backtrack();
    3889           0 :     QuickCheckDetails* quick_check = trace->quick_check_performed();
    3890           0 :     int element_count = elements().length();
    3891           0 :     for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
    3892           0 :         TextElement elm = elements()[i];
    3893           0 :         int cp_offset = trace->cp_offset() + elm.cp_offset();
    3894           0 :         if (elm.text_type() == TextElement::ATOM) {
    3895           0 :             const CharacterVector& quarks = elm.atom()->data();
    3896           0 :             for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
    3897           0 :                 if (first_element_checked && i == 0 && j == 0) continue;
    3898           0 :                 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
    3899       61679 :                 EmitCharacterFunction* emit_function = nullptr;
    3900           0 :                 switch (pass) {
    3901             :                   case NON_LATIN1_MATCH:
    3902           0 :                     MOZ_ASSERT(latin1);
    3903           0 :                     if (!IsLatin1Equivalent(quarks[j], compiler)) {
    3904           0 :                         assembler->JumpOrBacktrack(backtrack);
    3905           0 :                         return;
    3906             :                     }
    3907             :                     break;
    3908             :                   case CASE_SINGLE_CHARACTER_MATCH:
    3909           9 :                     emit_function = &EmitAtomSingle;
    3910           0 :                     break;
    3911             :                   case SIMPLE_CHARACTER_MATCH:
    3912       20551 :                     emit_function = &EmitSimpleCharacter;
    3913           0 :                     break;
    3914             :                   case CASE_MUTLI_CHARACTER_MATCH:
    3915           9 :                     emit_function = &EmitAtomMulti;
    3916           9 :                     break;
    3917             :                   default:
    3918             :                     break;
    3919             :                 }
    3920       61679 :                 if (emit_function != nullptr) {
    3921             :                     // emit_function is a function pointer. Suppress static
    3922             :                     // analysis false positives.
    3923           0 :                     JS::AutoSuppressGCAnalysis suppress;
    3924       61707 :                     bool bound_checked = emit_function(compiler,
    3925       41138 :                                                        quarks[j],
    3926             :                                                        backtrack,
    3927             :                                                        cp_offset + j,
    3928           0 :                                                        *checked_up_to < cp_offset + j,
    3929       20569 :                                                        preloaded);
    3930       20569 :                     if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
    3931             :                 }
    3932             :             }
    3933             :         } else {
    3934           0 :             MOZ_ASSERT(TextElement::CHAR_CLASS == elm.text_type());
    3935           0 :             if (pass == CHARACTER_CLASS_MATCH) {
    3936           0 :                 if (first_element_checked && i == 0) continue;
    3937           0 :                 if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
    3938         248 :                 RegExpCharacterClass* cc = elm.char_class();
    3939         496 :                 EmitCharClass(alloc(),
    3940             :                               assembler,
    3941             :                               cc,
    3942             :                               latin1,
    3943             :                               backtrack,
    3944             :                               cp_offset,
    3945         248 :                               *checked_up_to < cp_offset,
    3946         248 :                               preloaded);
    3947             :                 UpdateBoundsCheck(cp_offset, checked_up_to);
    3948             :             }
    3949             :         }
    3950             :     }
    3951             : }
    3952             : 
    3953             : int
    3954           0 : TextNode::Length()
    3955             : {
    3956           0 :     TextElement elm = elements()[elements().length() - 1];
    3957        4473 :     MOZ_ASSERT(elm.cp_offset() >= 0);
    3958        4473 :     return elm.cp_offset() + elm.length();
    3959             : }
    3960             : 
    3961             : bool
    3962           0 : TextNode::SkipPass(int int_pass, bool ignore_case)
    3963             : {
    3964           0 :     TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass);
    3965           0 :     if (ignore_case)
    3966         252 :         return pass == SIMPLE_CHARACTER_MATCH;
    3967        5340 :     return pass == CASE_SINGLE_CHARACTER_MATCH || pass == CASE_MUTLI_CHARACTER_MATCH;
    3968             : }
    3969             : 
    3970             : // This generates the code to match a text node.  A text node can contain
    3971             : // straight character sequences (possibly to be matched in a case-independent
    3972             : // way) and character classes.  For efficiency we do not do this in a single
    3973             : // pass from left to right.  Instead we pass over the text node several times,
    3974             : // emitting code for some character positions every time.  See the comment on
    3975             : // TextEmitPass for details.
    3976             : void
    3977           0 : TextNode::Emit(RegExpCompiler* compiler, Trace* trace)
    3978             : {
    3979           0 :     LimitResult limit_result = LimitVersions(compiler, trace);
    3980        1392 :     if (limit_result == DONE) return;
    3981           0 :     MOZ_ASSERT(limit_result == CONTINUE);
    3982             : 
    3983        1364 :     if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {
    3984           0 :         compiler->SetRegExpTooBig();
    3985             :         return;
    3986             :     }
    3987             : 
    3988           0 :     if (compiler->latin1()) {
    3989        1319 :         int dummy = 0;
    3990        1319 :         TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy);
    3991             :     }
    3992             : 
    3993           0 :     bool first_elt_done = false;
    3994        1364 :     int bound_checked_to = trace->cp_offset() - 1;
    3995        1364 :     bound_checked_to += trace->bound_checked_up_to();
    3996             : 
    3997             :     // If a character is preloaded into the current character register then
    3998             :     // check that now.
    3999           0 :     if (trace->characters_preloaded() == 1) {
    4000         306 :         for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
    4001         272 :             if (!SkipPass(pass, compiler->ignore_case())) {
    4002             :                 TextEmitPass(compiler,
    4003             :                              static_cast<TextEmitPassType>(pass),
    4004             :                              true,
    4005             :                              trace,
    4006             :                              false,
    4007          74 :                              &bound_checked_to);
    4008             :             }
    4009             :         }
    4010             :         first_elt_done = true;
    4011             :     }
    4012             : 
    4013           0 :     for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
    4014       10912 :         if (!SkipPass(pass, compiler->ignore_case())) {
    4015        2785 :             TextEmitPass(compiler,
    4016             :                          static_cast<TextEmitPassType>(pass),
    4017             :                          false,
    4018             :                          trace,
    4019             :                          first_elt_done,
    4020        2785 :                          &bound_checked_to);
    4021             :         }
    4022             :     }
    4023             : 
    4024           0 :     Trace successor_trace(*trace);
    4025           0 :     successor_trace.set_at_start(false);
    4026           0 :     successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);
    4027        2728 :     RecursionCheck rc(compiler);
    4028        1364 :     on_success()->Emit(compiler, &successor_trace);
    4029             : }
    4030             : 
    4031             : void
    4032           0 : LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace)
    4033             : {
    4034         647 :     RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
    4035           0 :     if (trace->stop_node() == this) {
    4036             :         int text_length =
    4037         204 :             GreedyLoopTextLengthForAlternative(&alternatives()[0]);
    4038          68 :         MOZ_ASSERT(text_length != kNodeIsTooComplexForGreedyLoops);
    4039             :         // Update the counter-based backtracking info on the stack.  This is an
    4040             :         // optimization for greedy loops (see below).
    4041           0 :         MOZ_ASSERT(trace->cp_offset() == text_length);
    4042           0 :         macro_assembler->AdvanceCurrentPosition(text_length);
    4043          68 :         macro_assembler->JumpOrBacktrack(trace->loop_label());
    4044           0 :         return;
    4045             :     }
    4046           0 :     MOZ_ASSERT(trace->stop_node() == nullptr);
    4047           0 :     if (!trace->is_trivial()) {
    4048         270 :         trace->Flush(compiler, this);
    4049           0 :         return;
    4050             :     }
    4051         309 :     ChoiceNode::Emit(compiler, trace);
    4052             : }
    4053             : 
    4054             : /* Code generation for choice nodes.
    4055             :  *
    4056             :  * We generate quick checks that do a mask and compare to eliminate a
    4057             :  * choice.  If the quick check succeeds then it jumps to the continuation to
    4058             :  * do slow checks and check subsequent nodes.  If it fails (the common case)
    4059             :  * it falls through to the next choice.
    4060             :  *
    4061             :  * Here is the desired flow graph.  Nodes directly below each other imply
    4062             :  * fallthrough.  Alternatives 1 and 2 have quick checks.  Alternative
    4063             :  * 3 doesn't have a quick check so we have to call the slow check.
    4064             :  * Nodes are marked Qn for quick checks and Sn for slow checks.  The entire
    4065             :  * regexp continuation is generated directly after the Sn node, up to the
    4066             :  * next JumpOrBacktrack if we decide to reuse some already generated code.  Some
    4067             :  * nodes expect preload_characters to be preloaded into the current
    4068             :  * character register.  R nodes do this preloading.  Vertices are marked
    4069             :  * F for failures and S for success (possible success in the case of quick
    4070             :  * nodes).  L, V, < and > are used as arrow heads.
    4071             :  *
    4072             :  * ----------> R
    4073             :  *             |
    4074             :  *             V
    4075             :  *            Q1 -----> S1
    4076             :  *             |   S   /
    4077             :  *            F|      /
    4078             :  *             |    F/
    4079             :  *             |    /
    4080             :  *             |   R
    4081             :  *             |  /
    4082             :  *             V L
    4083             :  *            Q2 -----> S2
    4084             :  *             |   S   /
    4085             :  *            F|      /
    4086             :  *             |    F/
    4087             :  *             |    /
    4088             :  *             |   R
    4089             :  *             |  /
    4090             :  *             V L
    4091             :  *            S3
    4092             :  *             |
    4093             :  *            F|
    4094             :  *             |
    4095             :  *             R
    4096             :  *             |
    4097             :  * backtrack   V
    4098             :  * <----------Q4
    4099             :  *   \    F    |
    4100             :  *    \        |S
    4101             :  *     \   F   V
    4102             :  *      \-----S4
    4103             :  *
    4104             :  * For greedy loops we reverse our expectation and expect to match rather
    4105             :  * than fail. Therefore we want the loop code to look like this (U is the
    4106             :  * unwind code that steps back in the greedy loop).  The following alternatives
    4107             :  * look the same as above.
    4108             :  *              _____
    4109             :  *             /     \
    4110             :  *             V     |
    4111             :  * ----------> S1    |
    4112             :  *            /|     |
    4113             :  *           / |S    |
    4114             :  *         F/  \_____/
    4115             :  *         /
    4116             :  *        |<-----------
    4117             :  *        |            \
    4118             :  *        V             \
    4119             :  *        Q2 ---> S2     \
    4120             :  *        |  S   /       |
    4121             :  *       F|     /        |
    4122             :  *        |   F/         |
    4123             :  *        |   /          |
    4124             :  *        |  R           |
    4125             :  *        | /            |
    4126             :  *   F    VL             |
    4127             :  * <------U              |
    4128             :  * back   |S             |
    4129             :  *        \______________/
    4130             :  */
    4131             : 
    4132             : // This class is used when generating the alternatives in a choice node.  It
    4133             : // records the way the alternative is being code generated.
    4134        3983 : class irregexp::AlternativeGeneration
    4135             : {
    4136             :   public:
    4137             :     AlternativeGeneration()
    4138        3983 :       : possible_success(),
    4139             :         expects_preload(false),
    4140             :         after(),
    4141       15932 :         quick_check_details()
    4142             :     {}
    4143             : 
    4144             :     jit::Label possible_success;
    4145             :     bool expects_preload;
    4146             :     jit::Label after;
    4147             :     QuickCheckDetails quick_check_details;
    4148             : };
    4149             : 
    4150             : void
    4151          47 : ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
    4152             :                           Guard* guard, Trace* trace)
    4153             : {
    4154           0 :     switch (guard->op()) {
    4155             :       case Guard::LT:
    4156          60 :         MOZ_ASSERT(!trace->mentions_reg(guard->reg()));
    4157           0 :         macro_assembler->IfRegisterGE(guard->reg(),
    4158             :                                       guard->value(),
    4159          60 :                                       trace->backtrack());
    4160           0 :         break;
    4161             :       case Guard::GEQ:
    4162          34 :         MOZ_ASSERT(!trace->mentions_reg(guard->reg()));
    4163           0 :         macro_assembler->IfRegisterLT(guard->reg(),
    4164             :                                       guard->value(),
    4165          34 :                                       trace->backtrack());
    4166           0 :         break;
    4167             :     }
    4168          47 : }
    4169             : 
    4170             : int
    4171           0 : ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_least)
    4172             : {
    4173           0 :     int preload_characters = Min(4, eats_at_least);
    4174           0 :     if (compiler->macro_assembler()->CanReadUnaligned()) {
    4175           0 :         bool latin1 = compiler->latin1();
    4176           1 :         if (latin1) {
    4177         354 :             if (preload_characters > 4)
    4178           0 :                 preload_characters = 4;
    4179             :             // We can't preload 3 characters because there is no machine instruction
    4180             :             // to do that.  We can't just load 4 because we could be reading
    4181             :             // beyond the end of the string, which could cause a memory fault.
    4182         354 :             if (preload_characters == 3)
    4183           0 :                 preload_characters = 2;
    4184             :         } else {
    4185          24 :             if (preload_characters > 2)
    4186           4 :                 preload_characters = 2;
    4187             :         }
    4188             :     } else {
    4189           0 :         if (preload_characters > 1)
    4190           0 :             preload_characters = 1;
    4191             :     }
    4192         378 :     return preload_characters;
    4193             : }
    4194             : 
    4195             : RegExpNode*
    4196           0 : TextNode::GetSuccessorOfOmnivorousTextNode(RegExpCompiler* compiler)
    4197             : {
    4198         254 :     if (elements().length() != 1)
    4199             :         return nullptr;
    4200             : 
    4201         252 :     TextElement elm = elements()[0];
    4202         126 :     if (elm.text_type() != TextElement::CHAR_CLASS)
    4203             :         return nullptr;
    4204             : 
    4205          82 :     RegExpCharacterClass* node = elm.char_class();
    4206           0 :     CharacterRangeVector& ranges = node->ranges(alloc());
    4207             : 
    4208          82 :     if (!CharacterRange::IsCanonical(ranges))
    4209           0 :         CharacterRange::Canonicalize(ranges);
    4210             : 
    4211          82 :     if (node->is_negated())
    4212           0 :         return ranges.length() == 0 ? on_success() : nullptr;
    4213             : 
    4214          81 :     if (ranges.length() != 1)
    4215             :         return nullptr;
    4216             : 
    4217         154 :     uint32_t max_char = MaximumCharacter(compiler->latin1());
    4218         231 :     return ranges[0].IsEverything(max_char) ? on_success() : nullptr;
    4219             : }
    4220             : 
    4221             : // Finds the fixed match length of a sequence of nodes that goes from
    4222             : // this alternative and back to this choice node.  If there are variable
    4223             : // length nodes or other complications in the way then return a sentinel
    4224             : // value indicating that a greedy loop cannot be constructed.
    4225             : int
    4226           0 : ChoiceNode::GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative)
    4227             : {
    4228         446 :     int length = 0;
    4229         446 :     RegExpNode* node = alternative->node();
    4230             :     // Later we will generate code for all these text nodes using recursion
    4231             :     // so we have to limit the max number.
    4232           0 :     int recursion_depth = 0;
    4233         743 :     while (node != this) {
    4234         607 :         if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {
    4235             :             return kNodeIsTooComplexForGreedyLoops;
    4236             :         }
    4237         607 :         int node_length = node->GreedyLoopTextLength();
    4238         607 :         if (node_length == kNodeIsTooComplexForGreedyLoops) {
    4239             :             return kNodeIsTooComplexForGreedyLoops;
    4240             :         }
    4241           0 :         length += node_length;
    4242         297 :         SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
    4243         297 :         node = seq_node->on_success();
    4244             :     }
    4245             :     return length;
    4246             : }
    4247             : 
    4248             : // Creates a list of AlternativeGenerations.  If the list has a reasonable
    4249             : // size then it is on the stack, otherwise the excess is on the heap.
    4250             : class AlternativeGenerationList
    4251             : {
    4252             :   public:
    4253         378 :     AlternativeGenerationList(LifoAlloc* alloc, size_t count)
    4254           0 :       : alt_gens_(*alloc)
    4255             :     {
    4256           0 :         alt_gens_.reserve(count);
    4257           0 :         for (size_t i = 0; i < count && i < kAFew; i++)
    4258           0 :             alt_gens_.append(a_few_alt_gens_ + i);
    4259           0 :         for (size_t i = kAFew; i < count; i++) {
    4260           0 :             AutoEnterOOMUnsafeRegion oomUnsafe;
    4261           1 :             AlternativeGeneration* gen = js_new<AlternativeGeneration>();
    4262           0 :             if (!gen)
    4263           0 :                 oomUnsafe.crash("AlternativeGenerationList js_new");
    4264           0 :             alt_gens_.append(gen);
    4265             :         }
    4266           0 :     }
    4267             : 
    4268           0 :     ~AlternativeGenerationList() {
    4269           0 :         for (size_t i = kAFew; i < alt_gens_.length(); i++) {
    4270         406 :             js_delete(alt_gens_[i]);
    4271           0 :             alt_gens_[i] = nullptr;
    4272             :         }
    4273         378 :     }
    4274             : 
    4275             :     AlternativeGeneration* at(int i) {
    4276        6324 :         return alt_gens_[i];
    4277             :     }
    4278             : 
    4279             :   private:
    4280             :     static const size_t kAFew = 10;
    4281             :     InfallibleVector<AlternativeGeneration*, 1> alt_gens_;
    4282             :     AlternativeGeneration a_few_alt_gens_[kAFew];
    4283             : };
    4284             : 
    4285             : void
    4286           0 : ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace)
    4287             : {
    4288         526 :     RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
    4289           0 :     size_t choice_count = alternatives().length();
    4290             : #ifdef DEBUG
    4291           0 :     for (size_t i = 0; i < choice_count - 1; i++) {
    4292           0 :         const GuardedAlternative& alternative = alternatives()[i];
    4293           0 :         const GuardVector* guards = alternative.guards();
    4294           0 :         if (guards) {
    4295         240 :             for (size_t j = 0; j < guards->length(); j++)
    4296         160 :                 MOZ_ASSERT(!trace->mentions_reg((*guards)[j]->reg()));
    4297             :         }
    4298             :     }
    4299             : #endif
    4300             : 
    4301           0 :     LimitResult limit_result = LimitVersions(compiler, trace);
    4302         674 :     if (limit_result == DONE) return;
    4303           0 :     MOZ_ASSERT(limit_result == CONTINUE);
    4304             : 
    4305           0 :     int new_flush_budget = trace->flush_budget() / choice_count;
    4306           0 :     if (trace->flush_budget() == 0 && trace->actions() != nullptr) {
    4307           0 :         trace->Flush(compiler, this);
    4308           0 :         return;
    4309             :     }
    4310             : 
    4311           0 :     RecursionCheck rc(compiler);
    4312             : 
    4313           0 :     Trace* current_trace = trace;
    4314             : 
    4315           0 :     int text_length = GreedyLoopTextLengthForAlternative(&alternatives()[0]);
    4316           0 :     bool greedy_loop = false;
    4317           0 :     jit::Label greedy_loop_label;
    4318           0 :     Trace counter_backtrack_trace;
    4319         756 :     counter_backtrack_trace.set_backtrack(&greedy_loop_label);
    4320           0 :     if (not_at_start()) counter_backtrack_trace.set_at_start(false);
    4321             : 
    4322         378 :     if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
    4323             :         // Here we have special handling for greedy loops containing only text nodes
    4324             :         // and other simple nodes.  These are handled by pushing the current
    4325             :         // position on the stack and then incrementing the current position each
    4326             :         // time around the switch.  On backtrack we decrement the current position
    4327             :         // and check it against the pushed value.  This avoids pushing backtrack
    4328             :         // information for each iteration of the loop, which could take up a lot of
    4329             :         // space.
    4330           0 :         greedy_loop = true;
    4331           0 :         MOZ_ASSERT(trace->stop_node() == nullptr);
    4332           0 :         macro_assembler->PushCurrentPosition();
    4333           0 :         current_trace = &counter_backtrack_trace;
    4334           0 :         jit::Label greedy_match_failed;
    4335           0 :         Trace greedy_match_trace;
    4336           0 :         if (not_at_start()) greedy_match_trace.set_at_start(false);
    4337           0 :         greedy_match_trace.set_backtrack(&greedy_match_failed);
    4338           0 :         jit::Label loop_label;
    4339           0 :         macro_assembler->Bind(&loop_label);
    4340           0 :         greedy_match_trace.set_stop_node(this);
    4341           0 :         greedy_match_trace.set_loop_label(&loop_label);
    4342         136 :         alternatives()[0].node()->Emit(compiler, &greedy_match_trace);
    4343          68 :         macro_assembler->Bind(&greedy_match_failed);
    4344             :     }
    4345             : 
    4346         756 :     jit::Label second_choice;  // For use in greedy matches.
    4347           0 :     macro_assembler->Bind(&second_choice);
    4348             : 
    4349           0 :     size_t first_normal_choice = greedy_loop ? 1 : 0;
    4350             : 
    4351           0 :     bool not_at_start = current_trace->at_start() == Trace::FALSE_VALUE;
    4352         378 :     const int kEatsAtLeastNotYetInitialized = -1;
    4353           0 :     int eats_at_least = kEatsAtLeastNotYetInitialized;
    4354             : 
    4355           0 :     bool skip_was_emitted = false;
    4356             : 
    4357           0 :     if (!greedy_loop && choice_count == 2) {
    4358           0 :         GuardedAlternative alt1 = alternatives()[1];
    4359           0 :         if (!alt1.guards() || alt1.guards()->length() == 0) {
    4360         182 :             RegExpNode* eats_anything_node = alt1.node();
    4361         182 :             if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) == this) {
    4362             :                 // At this point we know that we are at a non-greedy loop that will eat
    4363             :                 // any character one at a time.  Any non-anchored regexp has such a
    4364             :                 // loop prepended to it in order to find where it starts.  We look for
    4365             :                 // a pattern of the form ...abc... where we can look 6 characters ahead
    4366             :                 // and step forwards 3 if the character is not one of abc.  Abc need
    4367             :                 // not be atoms, they can be any reasonably limited character class or
    4368             :                 // small alternation.
    4369           0 :                 MOZ_ASSERT(trace->is_trivial());  // This is the case on LoopChoiceNodes.
    4370           0 :                 BoyerMooreLookahead* lookahead = bm_info(not_at_start);
    4371          58 :                 if (lookahead == nullptr) {
    4372         116 :                     eats_at_least = Min(kMaxLookaheadForBoyerMoore,
    4373             :                                         EatsAtLeast(kMaxLookaheadForBoyerMoore,
    4374             :                                                     kRecursionBudget,
    4375          58 :                                                     not_at_start));
    4376           0 :                     if (eats_at_least >= 1) {
    4377             :                         BoyerMooreLookahead* bm =
    4378           0 :                             alloc()->newInfallible<BoyerMooreLookahead>(alloc(), eats_at_least, compiler);
    4379           0 :                         GuardedAlternative alt0 = alternatives()[0];
    4380          58 :                         alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, not_at_start);
    4381          58 :                         skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);
    4382             :                     }
    4383             :                 } else {
    4384           0 :                     skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler);
    4385             :                 }
    4386             :             }
    4387             :         }
    4388             :     }
    4389             : 
    4390           0 :     if (eats_at_least == kEatsAtLeastNotYetInitialized) {
    4391             :         // Save some time by looking at most one machine word ahead.
    4392         320 :         eats_at_least =
    4393           0 :             EatsAtLeast(compiler->latin1() ? 4 : 2, kRecursionBudget, not_at_start);
    4394             :     }
    4395           0 :     int preload_characters = CalculatePreloadCharacters(compiler, eats_at_least);
    4396             : 
    4397           0 :     bool preload_is_current = !skip_was_emitted &&
    4398         721 :         (current_trace->characters_preloaded() == preload_characters);
    4399           0 :     bool preload_has_checked_bounds = preload_is_current;
    4400             : 
    4401         756 :     AlternativeGenerationList alt_gens(alloc(), choice_count);
    4402             : 
    4403             :     // For now we just call all choices one after the other.  The idea ultimately
    4404             :     // is to use the Dispatch table to try only the relevant ones.
    4405           0 :     for (size_t i = first_normal_choice; i < choice_count; i++) {
    4406           0 :         GuardedAlternative alternative = alternatives()[i];
    4407           0 :         AlternativeGeneration* alt_gen = alt_gens.at(i);
    4408           0 :         alt_gen->quick_check_details.set_characters(preload_characters);
    4409           0 :         const GuardVector* guards = alternative.guards();
    4410        1306 :         Trace new_trace(*current_trace);
    4411           0 :         new_trace.set_characters_preloaded(preload_is_current ?
    4412             :                                            preload_characters :
    4413           0 :                                            0);
    4414        1306 :         if (preload_has_checked_bounds) {
    4415           0 :             new_trace.set_bound_checked_up_to(preload_characters);
    4416             :         }
    4417           0 :         new_trace.quick_check_performed()->Clear();
    4418           0 :         if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);
    4419           0 :         alt_gen->expects_preload = preload_is_current;
    4420           0 :         bool generate_full_check_inline = false;
    4421        2611 :         if (try_to_emit_quick_check_for_alternative(i) &&
    4422        1305 :             alternative.node()->EmitQuickCheck(compiler,
    4423             :                                                &new_trace,
    4424             :                                                preload_has_checked_bounds,
    4425             :                                                &alt_gen->possible_success,
    4426             :                                                &alt_gen->quick_check_details,
    4427             :                                                i < choice_count - 1)) {
    4428             :             // Quick check was generated for this choice.
    4429        1084 :             preload_is_current = true;
    4430        1084 :             preload_has_checked_bounds = true;
    4431             :             // On the last choice in the ChoiceNode we generated the quick
    4432             :             // check to fall through on possible success.  So now we need to
    4433             :             // generate the full check inline.
    4434           0 :             if (i == choice_count - 1) {
    4435           0 :                 macro_assembler->Bind(&alt_gen->possible_success);
    4436           0 :                 new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
    4437           0 :                 new_trace.set_characters_preloaded(preload_characters);
    4438         448 :                 new_trace.set_bound_checked_up_to(preload_characters);
    4439           0 :                 generate_full_check_inline = true;
    4440             :             }
    4441           0 :         } else if (alt_gen->quick_check_details.cannot_match()) {
    4442          19 :             if (i == choice_count - 1 && !greedy_loop) {
    4443           0 :                 macro_assembler->JumpOrBacktrack(trace->backtrack());
    4444             :             }
    4445          19 :             continue;
    4446             :         } else {
    4447             :             // No quick check was generated.  Put the full code here.
    4448             :             // If this is not the first choice then there could be slow checks from
    4449             :             // previous cases that go here when they fail.  There's no reason to
    4450             :             // insist that they preload characters since the slow check we are about
    4451             :             // to generate probably can't use it.
    4452         203 :             if (i != first_normal_choice) {
    4453         126 :                 alt_gen->expects_preload = false;
    4454             :                 new_trace.InvalidateCurrentCharacter();
    4455             :             }
    4456         203 :             if (i < choice_count - 1) {
    4457          49 :                 new_trace.set_backtrack(&alt_gen->after);
    4458             :             }
    4459             :             generate_full_check_inline = true;
    4460             :         }
    4461           0 :         if (generate_full_check_inline) {
    4462           0 :             if (new_trace.actions() != nullptr)
    4463           0 :                 new_trace.set_flush_budget(new_flush_budget);
    4464           0 :             if (guards) {
    4465          57 :                 for (size_t j = 0; j < guards->length(); j++)
    4466           0 :                     GenerateGuard(macro_assembler, (*guards)[j], &new_trace);
    4467             :             }
    4468         427 :             alternative.node()->Emit(compiler, &new_trace);
    4469           0 :             preload_is_current = false;
    4470             :         }
    4471           0 :         macro_assembler->Bind(&alt_gen->after);
    4472             :     }
    4473         378 :     if (greedy_loop) {
    4474           0 :         macro_assembler->Bind(&greedy_loop_label);
    4475             :         // If we have unwound to the bottom then backtrack.
    4476           0 :         macro_assembler->CheckGreedyLoop(trace->backtrack());
    4477             :         // Otherwise try the second priority at an earlier position.
    4478          68 :         macro_assembler->AdvanceCurrentPosition(-text_length);
    4479          68 :         macro_assembler->JumpOrBacktrack(&second_choice);
    4480             :     }
    4481             : 
    4482             :     // At this point we need to generate slow checks for the alternatives where
    4483             :     // the quick check was inlined.  We can recognize these because the associated
    4484             :     // label was bound.
    4485           0 :     for (size_t i = first_normal_choice; i < choice_count - 1; i++) {
    4486        1856 :         AlternativeGeneration* alt_gen = alt_gens.at(i);
    4487         928 :         Trace new_trace(*current_trace);
    4488             :         // If there are actions to be flushed we have to limit how many times
    4489             :         // they are flushed.  Take the budget of the parent trace and distribute
    4490             :         // it fairly amongst the children.
    4491         928 :         if (new_trace.actions() != nullptr) {
    4492           0 :             new_trace.set_flush_budget(new_flush_budget);
    4493             :         }
    4494           0 :         EmitOutOfLineContinuation(compiler,
    4495             :                                   &new_trace,
    4496        1856 :                                   alternatives()[i],
    4497             :                                   alt_gen,
    4498             :                                   preload_characters,
    4499        2784 :                                   alt_gens.at(i + 1)->expects_preload);
    4500             :     }
    4501             : }
    4502             : 
    4503             : void
    4504         928 : ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
    4505             :                                       Trace* trace,
    4506             :                                       GuardedAlternative alternative,
    4507             :                                       AlternativeGeneration* alt_gen,
    4508             :                                       int preload_characters,
    4509             :                                       bool next_expects_preload)
    4510             : {
    4511        1856 :     if (!alt_gen->possible_success.used())
    4512           0 :         return;
    4513             : 
    4514           0 :     RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
    4515           0 :     macro_assembler->Bind(&alt_gen->possible_success);
    4516           0 :     Trace out_of_line_trace(*trace);
    4517           0 :     out_of_line_trace.set_characters_preloaded(preload_characters);
    4518           0 :     out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details);
    4519           0 :     if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE);
    4520           0 :     const GuardVector* guards = alternative.guards();
    4521           0 :     if (next_expects_preload) {
    4522           0 :         jit::Label reload_current_char;
    4523           0 :         out_of_line_trace.set_backtrack(&reload_current_char);
    4524           0 :         if (guards) {
    4525          84 :             for (size_t j = 0; j < guards->length(); j++)
    4526           0 :                 GenerateGuard(macro_assembler, (*guards)[j], &out_of_line_trace);
    4527             :         }
    4528         778 :         alternative.node()->Emit(compiler, &out_of_line_trace);
    4529         778 :         macro_assembler->Bind(&reload_current_char);
    4530             :         // Reload the current character, since the next quick check expects that.
    4531             :         // We don't need to check bounds here because we only get into this
    4532             :         // code through a quick check which already did the checked load.
    4533         778 :         macro_assembler->LoadCurrentCharacter(trace->cp_offset(),
    4534             :                                               nullptr,
    4535             :                                               false,
    4536        1556 :                                               preload_characters);
    4537           0 :         macro_assembler->JumpOrBacktrack(&(alt_gen->after));
    4538             :     } else {
    4539           0 :         out_of_line_trace.set_backtrack(&(alt_gen->after));
    4540           0 :         if (guards) {
    4541           0 :             for (size_t j = 0; j < guards->length(); j++)
    4542           0 :                 GenerateGuard(macro_assembler, (*guards)[j], &out_of_line_trace);
    4543             :         }
    4544          82 :         alternative.node()->Emit(compiler, &out_of_line_trace);
    4545             :     }
    4546             : }
    4547             : 
    4548             : void
    4549           0 : ActionNode::Emit(RegExpCompiler* compiler, Trace* trace)
    4550             : {
    4551           0 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    4552           0 :     LimitResult limit_result = LimitVersions(compiler, trace);
    4553        2735 :     if (limit_result == DONE) return;
    4554           0 :     MOZ_ASSERT(limit_result == CONTINUE);
    4555             : 
    4556           0 :     RecursionCheck rc(compiler);
    4557             : 
    4558        1997 :     switch (action_type_) {
    4559             :       case STORE_POSITION: {
    4560             :         Trace::DeferredCapture
    4561             :             new_capture(data_.u_position_register.reg,
    4562           0 :                         data_.u_position_register.is_capture,
    4563           0 :                         trace);
    4564           0 :         Trace new_trace = *trace;
    4565        1848 :         new_trace.add_action(&new_capture);
    4566        1848 :         on_success()->Emit(compiler, &new_trace);
    4567             :         break;
    4568             :       }
    4569             :       case INCREMENT_REGISTER: {
    4570             :         Trace::DeferredIncrementRegister
    4571           0 :             new_increment(data_.u_increment_register.reg);
    4572           0 :         Trace new_trace = *trace;
    4573          43 :         new_trace.add_action(&new_increment);
    4574          43 :         on_success()->Emit(compiler, &new_trace);
    4575             :         break;
    4576             :       }
    4577             :       case SET_REGISTER: {
    4578             :         Trace::DeferredSetRegister
    4579           0 :             new_set(data_.u_store_register.reg, data_.u_store_register.value);
    4580           0 :         Trace new_trace = *trace;
    4581          39 :         new_trace.add_action(&new_set);
    4582          39 :         on_success()->Emit(compiler, &new_trace);
    4583             :         break;
    4584             :       }
    4585             :       case CLEAR_CAPTURES: {
    4586             :         Trace::DeferredClearCaptures
    4587             :             new_capture(Interval(data_.u_clear_captures.range_from,
    4588           0 :                                  data_.u_clear_captures.range_to));
    4589           0 :         Trace new_trace = *trace;
    4590          15 :         new_trace.add_action(&new_capture);
    4591          15 :         on_success()->Emit(compiler, &new_trace);
    4592             :         break;
    4593             :       }
    4594             :       case BEGIN_SUBMATCH:
    4595          22 :         if (!trace->is_trivial()) {
    4596           0 :             trace->Flush(compiler, this);
    4597             :         } else {
    4598           0 :             assembler->WriteCurrentPositionToRegister(data_.u_submatch.current_position_register, 0);
    4599          11 :             assembler->WriteBacktrackStackPointerToRegister(data_.u_submatch.stack_pointer_register);
    4600          11 :             on_success()->Emit(compiler, trace);
    4601             :         }
    4602             :         break;
    4603             :       case EMPTY_MATCH_CHECK: {
    4604           0 :         int start_pos_reg = data_.u_empty_match_check.start_register;
    4605           0 :         int stored_pos = 0;
    4606           0 :         int rep_reg = data_.u_empty_match_check.repetition_register;
    4607           0 :         bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);
    4608           0 :         bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos);
    4609           0 :         if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) {
    4610             :             // If we know we haven't advanced and there is no minimum we
    4611             :             // can just backtrack immediately.
    4612           0 :             assembler->JumpOrBacktrack(trace->backtrack());
    4613           0 :         } else if (know_dist && stored_pos < trace->cp_offset()) {
    4614             :             // If we know we've advanced we can generate the continuation
    4615             :             // immediately.
    4616           0 :             on_success()->Emit(compiler, trace);
    4617           0 :         } else if (!trace->is_trivial()) {
    4618           0 :             trace->Flush(compiler, this);
    4619             :         } else {
    4620           0 :             jit::Label skip_empty_check;
    4621             :             // If we have a minimum number of repetitions we check the current
    4622             :             // number first and skip the empty check if it's not enough.
    4623           0 :             if (has_minimum) {
    4624           0 :                 int limit = data_.u_empty_match_check.repetition_limit;
    4625           0 :                 assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);
    4626             :             }
    4627             :             // If the match is empty we bail out, otherwise we fall through
    4628             :             // to the on-success continuation.
    4629           0 :             assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,
    4630           0 :                                        trace->backtrack());
    4631           0 :             assembler->Bind(&skip_empty_check);
    4632           0 :             on_success()->Emit(compiler, trace);
    4633             :         }
    4634             :         break;
    4635             :       }
    4636             :       case POSITIVE_SUBMATCH_SUCCESS: {
    4637           0 :         if (!trace->is_trivial()) {
    4638          20 :             trace->Flush(compiler, this);
    4639           0 :             return;
    4640             :         }
    4641           0 :         assembler->ReadCurrentPositionFromRegister(data_.u_submatch.current_position_register);
    4642           0 :         assembler->ReadBacktrackStackPointerFromRegister(data_.u_submatch.stack_pointer_register);
    4643           0 :         int clear_register_count = data_.u_submatch.clear_register_count;
    4644           0 :         if (clear_register_count == 0) {
    4645          10 :             on_success()->Emit(compiler, trace);
    4646           0 :             return;
    4647             :         }
    4648           0 :         int clear_registers_from = data_.u_submatch.clear_register_from;
    4649           0 :         jit::Label clear_registers_backtrack;
    4650           0 :         Trace new_trace = *trace;
    4651           0 :         new_trace.set_backtrack(&clear_registers_backtrack);
    4652           0 :         on_success()->Emit(compiler, &new_trace);
    4653             : 
    4654           0 :         assembler->Bind(&clear_registers_backtrack);
    4655           0 :         int clear_registers_to = clear_registers_from + clear_register_count - 1;
    4656           0 :         assembler->ClearRegisters(clear_registers_from, clear_registers_to);
    4657             : 
    4658           0 :         MOZ_ASSERT(trace->backtrack() == nullptr);
    4659           0 :         assembler->Backtrack();
    4660             :         return;
    4661             :       }
    4662             :       default:
    4663           0 :         MOZ_CRASH("Bad action");
    4664             :     }
    4665             : }
    4666             : 
    4667             : void
    4668           0 : BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace)
    4669             : {
    4670           0 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    4671           0 :     if (!trace->is_trivial()) {
    4672           0 :         trace->Flush(compiler, this);
    4673           0 :         return;
    4674             :     }
    4675             : 
    4676           0 :     LimitResult limit_result = LimitVersions(compiler, trace);
    4677           0 :     if (limit_result == DONE) return;
    4678           0 :     MOZ_ASSERT(limit_result == CONTINUE);
    4679             : 
    4680           0 :     RecursionCheck rc(compiler);
    4681             : 
    4682           0 :     MOZ_ASSERT(start_reg_ + 1 == end_reg_);
    4683           0 :     if (compiler->ignore_case()) {
    4684           0 :         assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
    4685             :                                                    trace->backtrack(),
    4686           0 :                                                    compiler->unicode());
    4687             :     } else {
    4688           0 :         assembler->CheckNotBackReference(start_reg_, trace->backtrack());
    4689             :     }
    4690           0 :     on_success()->Emit(compiler, trace);
    4691             : }
    4692             : 
    4693             : RegExpNode::LimitResult
    4694        4639 : RegExpNode::LimitVersions(RegExpCompiler* compiler, Trace* trace)
    4695             : {
    4696             :     // If we are generating a greedy loop then don't stop and don't reuse code.
    4697        4639 :     if (trace->stop_node() != nullptr)
    4698             :         return CONTINUE;
    4699             : 
    4700           0 :     RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
    4701        4571 :     if (trace->is_trivial()) {
    4702        1780 :         if (label()->bound()) {
    4703             :             // We are being asked to generate a generic version, but that's already
    4704             :             // been done so just go to it.
    4705        1012 :             macro_assembler->JumpOrBacktrack(label());
    4706           0 :             return DONE;
    4707             :         }
    4708         384 :         if (compiler->recursion_depth() >= RegExpCompiler::kMaxRecursion) {
    4709             :             // To avoid too deep recursion we push the node to the work queue and just
    4710             :             // generate a goto here.
    4711           0 :             compiler->AddWork(this);
    4712           0 :             macro_assembler->JumpOrBacktrack(label());
    4713           0 :             return DONE;
    4714             :         }
    4715             :         // Generate generic version of the node and bind the label for later use.
    4716         768 :         macro_assembler->Bind(label());
    4717         384 :         return CONTINUE;
    4718             :     }
    4719             : 
    4720             :     // We are being asked to make a non-generic version.  Keep track of how many
    4721             :     // non-generic versions we generate so as not to overdo it.
    4722           0 :     trace_count_++;
    4723        6968 :     if (trace_count_ < kMaxCopiesCodeGenerated &&
    4724        3287 :         compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) {
    4725             :         return CONTINUE;
    4726             :     }
    4727             : 
    4728             :     // If we get here code has been generated for this node too many times or
    4729             :     // recursion is too deep.  Time to switch to a generic version.  The code for
    4730             :     // generic versions above can handle deep recursion properly.
    4731         394 :     trace->Flush(compiler, this);
    4732         394 :     return DONE;
    4733             : }
    4734             : 
    4735             : bool
    4736        1305 : RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
    4737             :                            Trace* trace,
    4738             :                            bool preload_has_checked_bounds,
    4739             :                            jit::Label* on_possible_success,
    4740             :                            QuickCheckDetails* details,
    4741             :                            bool fall_through_on_failure)
    4742             : {
    4743           0 :     if (details->characters() == 0) return false;
    4744           0 :     GetQuickCheckDetails(
    4745           0 :                          details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE);
    4746           0 :     if (details->cannot_match()) return false;
    4747        1185 :     if (!details->Rationalize(compiler->latin1())) return false;
    4748           0 :     MOZ_ASSERT(details->characters() == 1 ||
    4749             :                compiler->macro_assembler()->CanReadUnaligned());
    4750        1084 :     uint32_t mask = details->mask();
    4751           0 :     uint32_t value = details->value();
    4752             : 
    4753           0 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    4754             : 
    4755        1084 :     if (trace->characters_preloaded() != details->characters()) {
    4756           0 :         assembler->LoadCurrentCharacter(trace->cp_offset(),
    4757             :                                         trace->backtrack(),
    4758         258 :                                         !preload_has_checked_bounds,
    4759         516 :                                         details->characters());
    4760             :     }
    4761             : 
    4762           0 :     bool need_mask = true;
    4763             : 
    4764        1084 :     if (details->characters() == 1) {
    4765             :         // If number of characters preloaded is 1 then we used a byte or 16 bit
    4766             :         // load so the value is already masked down.
    4767          76 :         uint32_t char_mask = MaximumCharacter(compiler->latin1());
    4768          38 :         if ((mask & char_mask) == char_mask) need_mask = false;
    4769             :         mask &= char_mask;
    4770             :     } else {
    4771             :         // For 2-character preloads in LATIN1 mode or 1-character preloads in
    4772             :         // TWO_BYTE mode we also use a 16 bit load with zero extend.
    4773           0 :         if (details->characters() == 2 && compiler->latin1()) {
    4774         149 :             if ((mask & 0xffff) == 0xffff) need_mask = false;
    4775         897 :         } else if (details->characters() == 1 && !compiler->latin1()) {
    4776             :             if ((mask & 0xffff) == 0xffff) need_mask = false;
    4777             :         } else {
    4778         897 :             if (mask == 0xffffffff) need_mask = false;
    4779             :         }
    4780             :     }
    4781             : 
    4782           0 :     if (fall_through_on_failure) {
    4783         860 :         if (need_mask) {
    4784           0 :             assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);
    4785             :         } else {
    4786         766 :             assembler->CheckCharacter(value, on_possible_success);
    4787             :         }
    4788             :     } else {
    4789         224 :         if (need_mask) {
    4790           0 :             assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack());
    4791             :         } else {
    4792         179 :             assembler->CheckNotCharacter(value, trace->backtrack());
    4793             :         }
    4794             :     }
    4795             :     return true;
    4796             : }
    4797             : 
    4798             : bool
    4799         419 : TextNode::FillInBMInfo(int initial_offset,
    4800             :                        int budget,
    4801             :                        BoyerMooreLookahead* bm,
    4802             :                        bool not_at_start)
    4803             : {
    4804         419 :     if (!bm->CheckOverRecursed())
    4805             :         return false;
    4806             : 
    4807         419 :     if (initial_offset >= bm->length())
    4808             :         return true;
    4809             : 
    4810           0 :     int offset = initial_offset;
    4811           0 :     int max_char = bm->max_char();
    4812           0 :     for (size_t i = 0; i < elements().length(); i++) {
    4813           0 :         if (offset >= bm->length()) {
    4814           0 :             if (initial_offset == 0)
    4815           0 :                 set_bm_info(not_at_start, bm);
    4816           0 :             return true;
    4817             :         }
    4818           0 :         TextElement text = elements()[i];
    4819           0 :         if (text.text_type() == TextElement::ATOM) {
    4820           0 :             RegExpAtom* atom = text.atom();
    4821           0 :             for (int j = 0; j < atom->length(); j++, offset++) {
    4822           0 :                 if (offset >= bm->length()) {
    4823         304 :                     if (initial_offset == 0)
    4824         279 :                         set_bm_info(not_at_start, bm);
    4825             :                     return true;
    4826             :                 }
    4827        4942 :                 char16_t character = atom->data()[j];
    4828           0 :                 if (bm->compiler()->ignore_case()) {
    4829             :                     char16_t chars[kEcma262UnCanonicalizeMaxWidth];
    4830           0 :                     int length = GetCaseIndependentLetters(character,
    4831           0 :                                                            bm->max_char() == kMaxOneByteCharCode,
    4832           0 :                                                            bm->compiler()->unicode(),
    4833           0 :                                                            chars);
    4834          27 :                     for (int j = 0; j < length; j++)
    4835           0 :                         bm->Set(offset, chars[j]);
    4836             :                 } else {
    4837        2462 :                     if (character <= max_char) bm->Set(offset, character);
    4838             :                 }
    4839             :             }
    4840             :         } else {
    4841           0 :             MOZ_ASSERT(TextElement::CHAR_CLASS == text.text_type());
    4842           0 :             RegExpCharacterClass* char_class = text.char_class();
    4843         108 :             const CharacterRangeVector& ranges = char_class->ranges(alloc());
    4844          54 :             if (char_class->is_negated()) {
    4845             :                 bm->SetAll(offset);
    4846             :             } else {
    4847           0 :                 for (size_t k = 0; k < ranges.length(); k++) {
    4848         214 :                     const CharacterRange& range = ranges[k];
    4849           0 :                     if (range.from() > max_char)
    4850             :                         continue;
    4851         280 :                     int to = Min(max_char, static_cast<int>(range.to()));
    4852         280 :                     bm->SetInterval(offset, Interval(range.from(), to));
    4853             :                 }
    4854             :             }
    4855          54 :             offset++;
    4856             :         }
    4857             :     }
    4858         114 :     if (offset >= bm->length()) {
    4859          62 :         if (initial_offset == 0) set_bm_info(not_at_start, bm);
    4860             :         return true;
    4861             :     }
    4862         104 :     if (!on_success()->FillInBMInfo(offset,
    4863             :                                     budget - 1,
    4864             :                                     bm,
    4865           0 :                                     true))   // Not at start after a text node.
    4866             :         return false;
    4867          52 :     if (initial_offset == 0)
    4868          22 :         set_bm_info(not_at_start, bm);
    4869             :     return true;
    4870             : }
    4871             : 
    4872             : // -------------------------------------------------------------------
    4873             : // QuickCheckDetails
    4874             : 
    4875             : // Takes the left-most 1-bit and smears it out, setting all bits to its right.
    4876             : static inline uint32_t
    4877             : SmearBitsRight(uint32_t v)
    4878             : {
    4879           0 :     v |= v >> 1;
    4880           0 :     v |= v >> 2;
    4881           0 :     v |= v >> 4;
    4882         506 :     v |= v >> 8;
    4883         506 :     v |= v >> 16;
    4884             :     return v;
    4885             : }
    4886             : 
    4887             : // Here is the meat of GetQuickCheckDetails (see also the comment on the
    4888             : // super-class in the .h file).
    4889             : //
    4890             : // We iterate along the text object, building up for each character a
    4891             : // mask and value that can be used to test for a quick failure to match.
    4892             : // The masks and values for the positions will be combined into a single
    4893             : // machine word for the current character width in order to be used in
    4894             : // generating a quick check.
    4895             : void
    4896        2320 : TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
    4897             :                                RegExpCompiler* compiler,
    4898             :                                int characters_filled_in,
    4899             :                                bool not_at_start)
    4900             : {
    4901           0 :     MOZ_ASSERT(characters_filled_in < details->characters());
    4902        2320 :     int characters = details->characters();
    4903           0 :     int char_mask = MaximumCharacter(compiler->latin1());
    4904             : 
    4905           0 :     for (size_t k = 0; k < elements().length(); k++) {
    4906           0 :         TextElement elm = elements()[k];
    4907           0 :         if (elm.text_type() == TextElement::ATOM) {
    4908        2028 :             const CharacterVector& quarks = elm.atom()->data();
    4909           0 :             for (size_t i = 0; i < (size_t) characters && i < quarks.length(); i++) {
    4910             :                 QuickCheckDetails::Position* pos =
    4911           0 :                     details->positions(characters_filled_in);
    4912        6504 :                 char16_t c = quarks[i];
    4913        6504 :                 if (c > char_mask) {
    4914             :                     // If we expect a non-LATIN1 character from an LATIN1 string,
    4915             :                     // there is no way we can match. Not even case independent
    4916             :                     // matching can turn an LATIN1 character into non-LATIN1 or
    4917             :                     // vice versa.
    4918           0 :                     details->set_cannot_match();
    4919           0 :                     pos->determines_perfectly = false;
    4920           0 :                     return;
    4921             :                 }
    4922           0 :                 if (compiler->ignore_case()) {
    4923             :                     char16_t chars[kEcma262UnCanonicalizeMaxWidth];
    4924           0 :                     size_t length = GetCaseIndependentLetters(c, compiler->latin1(),
    4925           0 :                                                               compiler->unicode(), chars);
    4926          82 :                     MOZ_ASSERT(length != 0);  // Can only happen if c > char_mask (see above).
    4927          82 :                     if (length == 1) {
    4928             :                         // This letter has no case equivalents, so it's nice and simple
    4929             :                         // and the mask-compare will determine definitely whether we have
    4930             :                         // a match at this character position.
    4931           0 :                         pos->mask = char_mask;
    4932          67 :                         pos->value = c;
    4933           0 :                         pos->determines_perfectly = true;
    4934             :                     } else {
    4935           0 :                         uint32_t common_bits = char_mask;
    4936           0 :                         uint32_t bits = chars[0];
    4937           0 :                         for (size_t j = 1; j < length; j++) {
    4938           0 :                             uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);
    4939          15 :                             common_bits ^= differing_bits;
    4940          15 :                             bits &= common_bits;
    4941             :                         }
    4942             :                         // If length is 2 and common bits has only one zero in it then
    4943             :                         // our mask and compare instruction will determine definitely
    4944             :                         // whether we have a match at this character position.  Otherwise
    4945             :                         // it can only be an approximate check.
    4946           0 :                         uint32_t one_zero = (common_bits | ~char_mask);
    4947          15 :                         if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {
    4948           0 :                             pos->determines_perfectly = true;
    4949             :                         }
    4950          15 :                         pos->mask = common_bits;
    4951          15 :                         pos->value = bits;
    4952             :                     }
    4953             :                 } else {
    4954             :                     // Don't ignore case.  Nice simple case where the mask-compare will
    4955             :                     // determine definitely whether we have a match at this character
    4956             :                     // position.
    4957           0 :                     pos->mask = char_mask;
    4958        6422 :                     pos->value = c;
    4959           0 :                     pos->determines_perfectly = true;
    4960             :                 }
    4961           0 :                 characters_filled_in++;
    4962        6504 :                 MOZ_ASSERT(characters_filled_in <= details->characters());
    4963        6504 :                 if (characters_filled_in == details->characters()) {
    4964             :                     return;
    4965             :                 }
    4966             :             }
    4967             :         } else {
    4968             :             QuickCheckDetails::Position* pos =
    4969           0 :                 details->positions(characters_filled_in);
    4970           0 :             RegExpCharacterClass* tree = elm.char_class();
    4971         590 :             const CharacterRangeVector& ranges = tree->ranges(alloc());
    4972         295 :             if (tree->is_negated()) {
    4973             :                 // A quick check uses multi-character mask and compare.  There is no
    4974             :                 // useful way to incorporate a negative char class into this scheme
    4975             :                 // so we just conservatively create a mask and value that will always
    4976             :                 // succeed.
    4977          47 :                 pos->mask = 0;
    4978          47 :                 pos->value = 0;
    4979             :             } else {
    4980             :                 size_t first_range = 0;
    4981           0 :                 while (ranges[first_range].from() > char_mask) {
    4982           0 :                     first_range++;
    4983           0 :                     if (first_range == ranges.length()) {
    4984           0 :                         details->set_cannot_match();
    4985           0 :                         pos->determines_perfectly = false;
    4986           0 :                         return;
    4987             :                     }
    4988             :                 }
    4989           0 :                 CharacterRange range = ranges[first_range];
    4990           0 :                 char16_t from = range.from();
    4991           0 :                 char16_t to = range.to();
    4992         248 :                 if (to > char_mask) {
    4993           0 :                     to = char_mask;
    4994             :                 }
    4995         248 :                 uint32_t differing_bits = (from ^ to);
    4996             :                 // A mask and compare is only perfect if the differing bits form a
    4997             :                 // number like 00011111 with one single block of trailing 1s.
    4998           0 :                 if ((differing_bits & (differing_bits + 1)) == 0 &&
    4999         145 :                     from + differing_bits == to) {
    5000           0 :                     pos->determines_perfectly = true;
    5001             :                 }
    5002           0 :                 uint32_t common_bits = ~SmearBitsRight(differing_bits);
    5003           0 :                 uint32_t bits = (from & common_bits);
    5004           0 :                 for (size_t i = first_range + 1; i < ranges.length(); i++) {
    5005           0 :                     CharacterRange range = ranges[i];
    5006           0 :                     char16_t from = range.from();
    5007           0 :                     char16_t to = range.to();
    5008         343 :                     if (from > char_mask) continue;
    5009         258 :                     if (to > char_mask) to = char_mask;
    5010             :                     // Here we are combining more ranges into the mask and compare
    5011             :                     // value.  With each new range the mask becomes more sparse and
    5012             :                     // so the chances of a false positive rise.  A character class
    5013             :                     // with multiple ranges is assumed never to be equivalent to a
    5014             :                     // mask and compare operation.
    5015           0 :                     pos->determines_perfectly = false;
    5016           0 :                     uint32_t new_common_bits = (from ^ to);
    5017           0 :                     new_common_bits = ~SmearBitsRight(new_common_bits);
    5018           0 :                     common_bits &= new_common_bits;
    5019           0 :                     bits &= new_common_bits;
    5020           0 :                     uint32_t differing_bits = (from & common_bits) ^ bits;
    5021         258 :                     common_bits ^= differing_bits;
    5022           0 :                     bits &= common_bits;
    5023             :                 }
    5024         248 :                 pos->mask = common_bits;
    5025           0 :                 pos->value = bits;
    5026             :             }
    5027           0 :             characters_filled_in++;
    5028         295 :             MOZ_ASSERT(characters_filled_in <= details->characters());
    5029         295 :             if (characters_filled_in == details->characters()) {
    5030             :                 return;
    5031             :             }
    5032             :         }
    5033             :     }
    5034           0 :     MOZ_ASSERT(characters_filled_in != details->characters());
    5035         327 :     if (!details->cannot_match()) {
    5036         327 :         on_success()-> GetQuickCheckDetails(details,
    5037             :                                             compiler,
    5038             :                                             characters_filled_in,
    5039         327 :                                             true);
    5040             :     }
    5041             : }
    5042             : 
    5043             : void
    5044           0 : QuickCheckDetails::Clear()
    5045             : {
    5046           0 :     for (int i = 0; i < characters_; i++) {
    5047           0 :         positions_[i].mask = 0;
    5048        5006 :         positions_[i].value = 0;
    5049           0 :         positions_[i].determines_perfectly = false;
    5050             :     }
    5051        2564 :     characters_ = 0;
    5052           0 : }
    5053             : 
    5054             : void
    5055           0 : QuickCheckDetails::Advance(int by)
    5056             : {
    5057        1364 :     MOZ_ASSERT(by >= 0);
    5058        1364 :     if (by >= characters_) {
    5059             :         Clear();
    5060             :         return;
    5061             :     }
    5062         466 :     for (int i = 0; i < characters_ - by; i++) {
    5063           0 :         positions_[i] = positions_[by + i];
    5064             :     }
    5065           0 :     for (int i = characters_ - by; i < characters_; i++) {
    5066           0 :         positions_[i].mask = 0;
    5067         146 :         positions_[i].value = 0;
    5068           0 :         positions_[i].determines_perfectly = false;
    5069             :     }
    5070         106 :     characters_ -= by;
    5071             :     // We could change mask_ and value_ here but we would never advance unless
    5072             :     // they had already been used in a check and they won't be used again because
    5073             :     // it would gain us nothing.  So there's no point.
    5074             : }
    5075             : 
    5076             : bool
    5077           0 : QuickCheckDetails::Rationalize(bool is_latin1)
    5078             : {
    5079        1185 :     bool found_useful_op = false;
    5080           0 :     uint32_t char_mask = MaximumCharacter(is_latin1);
    5081             : 
    5082           0 :     mask_ = 0;
    5083           0 :     value_ = 0;
    5084           0 :     int char_shift = 0;
    5085           0 :     for (int i = 0; i < characters_; i++) {
    5086           0 :         Position* pos = &positions_[i];
    5087           0 :         if ((pos->mask & kMaxOneByteCharCode) != 0)
    5088           0 :             found_useful_op = true;
    5089           0 :         mask_ |= (pos->mask & char_mask) << char_shift;
    5090        4173 :         value_ |= (pos->value & char_mask) << char_shift;
    5091           0 :         char_shift += is_latin1 ? 8 : 16;
    5092             :     }
    5093        1185 :     return found_useful_op;
    5094             : }
    5095             : 
    5096           0 : void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index)
    5097             : {
    5098         955 :     MOZ_ASSERT(characters_ == other->characters_);
    5099           0 :     if (other->cannot_match_)
    5100             :         return;
    5101           0 :     if (cannot_match_) {
    5102          53 :         *this = *other;
    5103           0 :         return;
    5104             :     }
    5105           0 :     for (int i = from_index; i < characters_; i++) {
    5106           0 :         QuickCheckDetails::Position* pos = positions(i);
    5107           0 :         QuickCheckDetails::Position* other_pos = other->positions(i);
    5108           0 :         if (pos->mask != other_pos->mask ||
    5109        1125 :             pos->value != other_pos->value ||
    5110         419 :             !other_pos->determines_perfectly) {
    5111             :             // Our mask-compare operation will be approximate unless we have the
    5112             :             // exact same operation on both sides of the alternation.
    5113           0 :             pos->determines_perfectly = false;
    5114             :         }
    5115           0 :         pos->mask &= other_pos->mask;
    5116           0 :         pos->value &= pos->mask;
    5117           0 :         other_pos->value &= pos->mask;
    5118           0 :         char16_t differing_bits = (pos->value ^ other_pos->value);
    5119             :         pos->mask &= ~differing_bits;
    5120             :         pos->value &= pos->mask;
    5121             :     }
    5122             : }

Generated by: LCOV version 1.13-14-ga5dd952