LCOV - code coverage report
Current view: top level - js/src/frontend - TokenStream.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 292 1008 29.0 %
Date: 2018-08-07 16:35:00 Functions: 0 0 -
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
       2             :  * vim: set ts=8 sts=4 et sw=4 tw=99:
       3             :  * This Source Code Form is subject to the terms of the Mozilla Public
       4             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       5             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
       6             : 
       7             : // JS lexical scanner.
       8             : 
       9             : #include "frontend/TokenStream.h"
      10             : 
      11             : #include "mozilla/ArrayUtils.h"
      12             : #include "mozilla/Attributes.h"
      13             : #include "mozilla/IntegerTypeTraits.h"
      14             : #include "mozilla/Likely.h"
      15             : #include "mozilla/MemoryChecking.h"
      16             : #include "mozilla/PodOperations.h"
      17             : #include "mozilla/ScopeExit.h"
      18             : #include "mozilla/TextUtils.h"
      19             : 
      20             : #include <ctype.h>
      21             : #include <stdarg.h>
      22             : #include <stdio.h>
      23             : #include <string.h>
      24             : #include <utility>
      25             : 
      26             : #include "jsexn.h"
      27             : #include "jsnum.h"
      28             : 
      29             : #include "frontend/BytecodeCompiler.h"
      30             : #include "frontend/Parser.h"
      31             : #include "frontend/ReservedWords.h"
      32             : #include "js/CharacterEncoding.h"
      33             : #include "js/UniquePtr.h"
      34             : #include "util/StringBuffer.h"
      35             : #include "util/Unicode.h"
      36             : #include "vm/HelperThreads.h"
      37             : #include "vm/JSAtom.h"
      38             : #include "vm/JSContext.h"
      39             : #include "vm/Realm.h"
      40             : 
      41             : using mozilla::ArrayLength;
      42             : using mozilla::AssertedCast;
      43             : using mozilla::IsAscii;
      44             : using mozilla::IsAsciiAlpha;
      45             : using mozilla::IsAsciiDigit;
      46             : using mozilla::MakeScopeExit;
      47             : using mozilla::PodCopy;
      48             : 
      49             : struct ReservedWordInfo
      50             : {
      51             :     const char* chars;         // C string with reserved word text
      52             :     js::frontend::TokenKind tokentype;
      53             : };
      54             : 
      55             : static const ReservedWordInfo reservedWords[] = {
      56             : #define RESERVED_WORD_INFO(word, name, type) \
      57             :     {js_##word##_str, js::frontend::type},
      58             :     FOR_EACH_JAVASCRIPT_RESERVED_WORD(RESERVED_WORD_INFO)
      59             : #undef RESERVED_WORD_INFO
      60             : };
      61             : 
      62             : // Returns a ReservedWordInfo for the specified characters, or nullptr if the
      63      493398 : // string is not a reserved word.
      64             : template <typename CharT>
      65      493398 : static const ReservedWordInfo*
      66             : FindReservedWord(const CharT* s, size_t length)
      67             : {
      68             :     MOZ_ASSERT(length != 0);
      69             : 
      70             :     size_t i;
      71             :     const ReservedWordInfo* rw;
      72             :     const char* chars;
      73             : 
      74             : #define JSRW_LENGTH()           length
      75             : #define JSRW_AT(column)         s[column]
      76             : #define JSRW_GOT_MATCH(index)   i = (index); goto got_match;
      77             : #define JSRW_TEST_GUESS(index)  i = (index); goto test_guess;
      78             : #define JSRW_NO_MATCH()         goto no_match;
      79             : #include "frontend/ReservedWordsGenerated.h"
      80             : #undef JSRW_NO_MATCH
      81             : #undef JSRW_TEST_GUESS
      82             : #undef JSRW_GOT_MATCH
      83             : #undef JSRW_AT
      84       85952 : #undef JSRW_LENGTH
      85             : 
      86             :   got_match:
      87           0 :     return &reservedWords[i];
      88           0 : 
      89           0 :   test_guess:
      90      164796 :     rw = &reservedWords[i];
      91             :     chars = rw->chars;
      92             :     do {
      93             :         if (*s++ != (unsigned char)(*chars++))
      94             :             goto no_match;
      95             :     } while (--length != 0);
      96             :     return rw;
      97             : 
      98             :   no_match:
      99             :     return nullptr;
     100      148189 : }
     101             : 
     102           0 : static const ReservedWordInfo*
     103           0 : FindReservedWord(JSLinearString* str)
     104           0 : {
     105      296378 :     JS::AutoCheckCannotGC nogc;
     106             :     return str->hasLatin1Chars()
     107             :            ? FindReservedWord(str->latin1Chars(nogc), str->length())
     108             :            : FindReservedWord(str->twoByteChars(nogc), str->length());
     109             : }
     110        1305 : 
     111             : template <typename CharT>
     112             : static bool
     113             : IsIdentifier(const CharT* chars, size_t length)
     114        1305 : {
     115             :     using namespace js;
     116             : 
     117        2610 :     if (length == 0)
     118             :         return false;
     119             : 
     120           0 :     if (!unicode::IsIdentifierStart(char16_t(*chars)))
     121           0 :         return false;
     122       32080 : 
     123             :     const CharT* end = chars + length;
     124             :     while (++chars != end) {
     125             :         if (!unicode::IsIdentifierPart(char16_t(*chars)))
     126             :             return false;
     127             :     }
     128             : 
     129             :     return true;
     130           0 : }
     131             : 
     132             : static uint32_t
     133             : GetSingleCodePoint(const char16_t** p, const char16_t* end)
     134             : {
     135           0 :     using namespace js;
     136           0 : 
     137           0 :     uint32_t codePoint;
     138           0 :     if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(**p)) && *p + 1 < end) {
     139           0 :         char16_t lead = **p;
     140           0 :         char16_t maybeTrail = *(*p + 1);
     141             :         if (unicode::IsTrailSurrogate(maybeTrail)) {
     142             :             *p += 2;
     143             :             return unicode::UTF16Decode(lead, maybeTrail);
     144           0 :         }
     145           0 :     }
     146           0 : 
     147             :     codePoint = **p;
     148             :     (*p)++;
     149             :     return codePoint;
     150           0 : }
     151             : 
     152             : static bool
     153             : IsIdentifierMaybeNonBMP(const char16_t* chars, size_t length)
     154           0 : {
     155             :     using namespace js;
     156             : 
     157           0 :     if (IsIdentifier(chars, length))
     158             :         return true;
     159             : 
     160           0 :     if (length == 0)
     161           0 :         return false;
     162             : 
     163             :     const char16_t* p = chars;
     164           0 :     const char16_t* end = chars + length;
     165           0 :     uint32_t codePoint;
     166             : 
     167             :     codePoint = GetSingleCodePoint(&p, end);
     168           0 :     if (!unicode::IsIdentifierStart(codePoint))
     169           0 :         return false;
     170           0 : 
     171             :     while (p < end) {
     172             :         codePoint = GetSingleCodePoint(&p, end);
     173             :         if (!unicode::IsIdentifierPart(codePoint))
     174             :             return false;
     175             :     }
     176             : 
     177             :     return true;
     178             : }
     179             : 
     180             : namespace js {
     181             : 
     182         513 : namespace frontend {
     183             : 
     184           0 : bool
     185           0 : IsIdentifier(JSLinearString* str)
     186           0 : {
     187           1 :     JS::AutoCheckCannotGC nogc;
     188           0 :     MOZ_ASSERT(str);
     189             :     if (str->hasLatin1Chars())
     190             :         return ::IsIdentifier(str->latin1Chars(nogc), str->length());
     191             :     return ::IsIdentifierMaybeNonBMP(str->twoByteChars(nogc), str->length());
     192         792 : }
     193             : 
     194         792 : bool
     195             : IsIdentifier(const char* chars, size_t length)
     196             : {
     197             :     return ::IsIdentifier(chars, length);
     198           0 : }
     199             : 
     200           0 : bool
     201             : IsIdentifier(const char16_t* chars, size_t length)
     202             : {
     203             :     return ::IsIdentifier(chars, length);
     204           0 : }
     205             : 
     206           0 : bool
     207           0 : IsKeyword(JSLinearString* str)
     208             : {
     209             :     if (const ReservedWordInfo* rw = FindReservedWord(str))
     210             :         return TokenKindIsKeyword(rw->tokentype);
     211             : 
     212             :     return false;
     213      148189 : }
     214             : 
     215           0 : TokenKind
     216         670 : ReservedWordTokenKind(PropertyName* str)
     217             : {
     218             :     if (const ReservedWordInfo* rw = FindReservedWord(str))
     219             :         return rw->tokentype;
     220             : 
     221             :     return TokenKind::Name;
     222           0 : }
     223             : 
     224           0 : const char*
     225           0 : ReservedWordToCharZ(PropertyName* str)
     226             : {
     227             :     if (const ReservedWordInfo* rw = FindReservedWord(str))
     228             :         return ReservedWordToCharZ(rw->tokentype);
     229             : 
     230             :     return nullptr;
     231           0 : }
     232             : 
     233           0 : const char*
     234           0 : ReservedWordToCharZ(TokenKind tt)
     235             : {
     236           0 :     MOZ_ASSERT(tt != TokenKind::Name);
     237             :     switch (tt) {
     238             : #define EMIT_CASE(word, name, type) case type: return js_##word##_str;
     239           0 :       FOR_EACH_JAVASCRIPT_RESERVED_WORD(EMIT_CASE)
     240             : #undef EMIT_CASE
     241             :       default:
     242             :         MOZ_ASSERT_UNREACHABLE("Not a reserved word PropertyName.");
     243             :     }
     244             :     return nullptr;
     245        4958 : }
     246             : 
     247           0 : PropertyName*
     248        4958 : TokenStreamAnyChars::reservedWordToPropertyName(TokenKind tt) const
     249             : {
     250        9916 :     MOZ_ASSERT(tt != TokenKind::Name);
     251             :     switch (tt) {
     252             : #define EMIT_CASE(word, name, type) case type: return cx->names().name;
     253           0 :       FOR_EACH_JAVASCRIPT_RESERVED_WORD(EMIT_CASE)
     254             : #undef EMIT_CASE
     255             :       default:
     256             :         MOZ_ASSERT_UNREACHABLE("Not a reserved word TokenKind.");
     257             :     }
     258           0 :     return nullptr;
     259           0 : }
     260        3056 : 
     261             : TokenStreamAnyChars::SourceCoords::SourceCoords(JSContext* cx, uint32_t ln, uint32_t col,
     262             :                                                 uint32_t initialLineOffset)
     263             :   : lineStartOffsets_(cx), initialLineNum_(ln), initialColumn_(col), lastLineIndex_(0)
     264             : {
     265             :     // This is actually necessary!  Removing it causes compile errors on
     266             :     // GCC and clang.  You could try declaring this:
     267             :     //
     268             :     //   const uint32_t TokenStreamAnyChars::SourceCoords::MAX_PTR;
     269        1528 :     //
     270             :     // which fixes the GCC/clang error, but causes bustage on Windows.  Sigh.
     271             :     //
     272             :     uint32_t maxPtr = MAX_PTR;
     273             : 
     274           0 :     // The first line begins at buffer offset |initialLineOffset|.  MAX_PTR is
     275           0 :     // the sentinel.  The appends cannot fail because |lineStartOffsets_| has
     276           0 :     // statically-allocated elements.
     277           0 :     MOZ_ASSERT(lineStartOffsets_.capacity() >= 2);
     278        1528 :     MOZ_ALWAYS_TRUE(lineStartOffsets_.reserve(2));
     279             :     lineStartOffsets_.infallibleAppend(initialLineOffset);
     280             :     lineStartOffsets_.infallibleAppend(maxPtr);
     281      212173 : }
     282             : 
     283           0 : MOZ_ALWAYS_INLINE bool
     284      212173 : TokenStreamAnyChars::SourceCoords::add(uint32_t lineNum, uint32_t lineStartOffset)
     285             : {
     286      212173 :     uint32_t lineIndex = lineNumToIndex(lineNum);
     287             :     uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
     288             : 
     289      212179 :     MOZ_ASSERT(lineStartOffsets_[0] <= lineStartOffset &&
     290             :                lineStartOffsets_[sentinelIndex] == MAX_PTR);
     291             : 
     292             :     if (lineIndex == sentinelIndex) {
     293           0 :         // We haven't seen this newline before.  Update lineStartOffsets_
     294      212160 :         // only if lineStartOffsets_.append succeeds, to keep sentinel.
     295             :         // Otherwise return false to tell TokenStream about OOM.
     296             :         uint32_t maxPtr = MAX_PTR;
     297             :         if (!lineStartOffsets_.append(maxPtr)) {
     298             :             static_assert(mozilla::IsSame<decltype(lineStartOffsets_.allocPolicy()),
     299           0 :                                           TempAllocPolicy&>::value,
     300             :                           "this function's caller depends on it reporting an "
     301             :                           "error on failure, as TempAllocPolicy ensures");
     302      212162 :             return false;
     303             :         }
     304             : 
     305             :         lineStartOffsets_[lineIndex] = lineStartOffset;
     306             :     } else {
     307          19 :         // We have seen this newline before (and ungot it).  Do nothing (other
     308             :         // than checking it hasn't mysteriously changed).
     309             :         // This path can be executed after hitting OOM, so check lineIndex.
     310             :         MOZ_ASSERT_IF(lineIndex < sentinelIndex, lineStartOffsets_[lineIndex] == lineStartOffset);
     311             :     }
     312             :     return true;
     313           0 : }
     314             : 
     315           0 : MOZ_ALWAYS_INLINE bool
     316           0 : TokenStreamAnyChars::SourceCoords::fill(const TokenStreamAnyChars::SourceCoords& other)
     317           0 : {
     318             :     MOZ_ASSERT(lineStartOffsets_[0] == other.lineStartOffsets_[0]);
     319           0 :     MOZ_ASSERT(lineStartOffsets_.back() == MAX_PTR);
     320             :     MOZ_ASSERT(other.lineStartOffsets_.back() == MAX_PTR);
     321             : 
     322           0 :     if (lineStartOffsets_.length() >= other.lineStartOffsets_.length())
     323           0 :         return true;
     324             : 
     325           0 :     uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
     326           0 :     lineStartOffsets_[sentinelIndex] = other.lineStartOffsets_[sentinelIndex];
     327             : 
     328             :     for (size_t i = sentinelIndex + 1; i < other.lineStartOffsets_.length(); i++) {
     329             :         if (!lineStartOffsets_.append(other.lineStartOffsets_[i]))
     330             :             return false;
     331             :     }
     332             :     return true;
     333      387078 : }
     334             : 
     335             : MOZ_ALWAYS_INLINE uint32_t
     336             : TokenStreamAnyChars::SourceCoords::lineIndexOf(uint32_t offset) const
     337      387078 : {
     338             :     uint32_t iMin, iMax, iMid;
     339             : 
     340             :     if (lineStartOffsets_[lastLineIndex_] <= offset) {
     341           0 :         // If we reach here, offset is on a line the same as or higher than
     342      222319 :         // last time.  Check first for the +0, +1, +2 cases, because they
     343             :         // typically cover 85--98% of cases.
     344             :         if (offset < lineStartOffsets_[lastLineIndex_ + 1])
     345             :             return lastLineIndex_;      // lineIndex is same as last time
     346           0 : 
     347           0 :         // If we reach here, there must be at least one more entry (plus the
     348       83656 :         // sentinel).  Try it.
     349             :         lastLineIndex_++;
     350             :         if (offset < lineStartOffsets_[lastLineIndex_ + 1])
     351           0 :             return lastLineIndex_;      // lineIndex is one higher than last time
     352           0 : 
     353       21257 :         // The same logic applies here.
     354             :         lastLineIndex_++;
     355             :         if (offset < lineStartOffsets_[lastLineIndex_ + 1]) {
     356             :             return lastLineIndex_;      // lineIndex is two higher than last time
     357             :         }
     358           0 : 
     359       43744 :         // No luck.  Oh well, we have a better-than-default starting point for
     360             :         // the binary search.
     361             :         iMin = lastLineIndex_ + 1;
     362             :         MOZ_ASSERT(iMin < lineStartOffsets_.length() - 1);   // -1 due to the sentinel
     363             : 
     364             :     } else {
     365             :         iMin = 0;
     366             :     }
     367             : 
     368             :     // This is a binary search with deferred detection of equality, which was
     369           0 :     // marginally faster in this case than a standard binary search.
     370           0 :     // The -2 is because |lineStartOffsets_.length() - 1| is the sentinel, and we
     371           0 :     // want one before that.
     372           0 :     iMax = lineStartOffsets_.length() - 2;
     373      145500 :     while (iMax > iMin) {
     374             :         iMid = iMin + (iMax - iMin) / 2;
     375             :         if (offset >= lineStartOffsets_[iMid + 1])
     376             :             iMin = iMid + 1;    // offset is above lineStartOffsets_[iMid]
     377           0 :         else
     378           0 :             iMax = iMid;        // offset is below or within lineStartOffsets_[iMid]
     379           0 :     }
     380       59883 :     MOZ_ASSERT(iMax == iMin);
     381             :     MOZ_ASSERT(lineStartOffsets_[iMin] <= offset && offset < lineStartOffsets_[iMin + 1]);
     382             :     lastLineIndex_ = iMin;
     383             :     return iMin;
     384      144125 : }
     385             : 
     386           0 : uint32_t
     387      353006 : TokenStreamAnyChars::SourceCoords::lineNum(uint32_t offset) const
     388             : {
     389             :     uint32_t lineIndex = lineIndexOf(offset);
     390             :     return lineIndexToNum(lineIndex);
     391      184123 : }
     392             : 
     393      184123 : uint32_t
     394             : TokenStreamAnyChars::SourceCoords::columnIndex(uint32_t offset) const
     395             : {
     396             :     return lineIndexAndOffsetToColumn(lineIndexOf(offset), offset);
     397       26490 : }
     398             : 
     399             : void
     400           0 : TokenStreamAnyChars::SourceCoords::lineNumAndColumnIndex(uint32_t offset, uint32_t* lineNum,
     401           0 :                                                          uint32_t* column) const
     402           0 : {
     403       26490 :     uint32_t lineIndex = lineIndexOf(offset);
     404             :     *lineNum = lineIndexToNum(lineIndex);
     405           0 :     *column = lineIndexAndOffsetToColumn(lineIndex, offset);
     406           0 : }
     407        1528 : 
     408             : TokenStreamAnyChars::TokenStreamAnyChars(JSContext* cx, const ReadOnlyCompileOptions& options,
     409             :                                          StrictModeGetter* smg)
     410             :   : srcCoords(cx, options.lineno, options.column, options.scriptSourceOffset),
     411             :     options_(options),
     412        1528 :     tokens(),
     413             :     cursor_(0),
     414             :     lookahead(),
     415             :     lineno(options.lineno),
     416        1528 :     flags(),
     417             :     linebase(0),
     418             :     prevLinebase(size_t(-1)),
     419             :     filename_(options.filename()),
     420           0 :     displayURL_(nullptr),
     421       18336 :     sourceMapURL_(nullptr),
     422             :     cx(cx),
     423             :     mutedErrors(options.mutedErrors()),
     424        1528 :     strictModeGetter(smg)
     425        1528 : {
     426        1528 :     // |isExprEnding| was initially zeroed: overwrite the true entries here.
     427           0 :     isExprEnding[size_t(TokenKind::Comma)] = true;
     428           0 :     isExprEnding[size_t(TokenKind::Semi)] = true;
     429           0 :     isExprEnding[size_t(TokenKind::Colon)] = true;
     430           0 :     isExprEnding[size_t(TokenKind::Rp)] = true;
     431             :     isExprEnding[size_t(TokenKind::Rb)] = true;
     432             :     isExprEnding[size_t(TokenKind::Rc)] = true;
     433           0 : }
     434             : 
     435             : template<typename CharT>
     436        4584 : TokenStreamCharsBase<CharT>::TokenStreamCharsBase(JSContext* cx, const CharT* chars, size_t length,
     437           0 :                                                   size_t startOffset)
     438             :   : sourceUnits(chars, length, startOffset),
     439             :     tokenbuf(cx)
     440           0 : {}
     441             : 
     442             : template<typename CharT, class AnyCharsAccess>
     443        3056 : TokenStreamSpecific<CharT, AnyCharsAccess>::TokenStreamSpecific(JSContext* cx,
     444           0 :                                                                 const ReadOnlyCompileOptions& options,
     445             :                                                                 const CharT* base, size_t length)
     446             :   : TokenStreamChars<CharT, AnyCharsAccess>(cx, base, length, options.scriptSourceOffset)
     447           0 : {}
     448             : 
     449             : bool
     450             : TokenStreamAnyChars::checkOptions()
     451           0 : {
     452           0 :     // Constrain starting columns to half of the range of a signed 32-bit value,
     453           0 :     // to avoid overflow.
     454             :     if (options().column >= mozilla::MaxValue<int32_t>::value / 2 + 1) {
     455             :         reportErrorNoOffset(JSMSG_BAD_COLUMN_NUMBER);
     456             :         return false;
     457             :     }
     458             : 
     459             :     return true;
     460             : }
     461             : 
     462             : // Use the fastest available getc.
     463             : #if defined(HAVE_GETC_UNLOCKED)
     464             : # define fast_getc getc_unlocked
     465             : #elif defined(HAVE__GETC_NOLOCK)
     466             : # define fast_getc _getc_nolock
     467             : #else
     468             : # define fast_getc getc
     469             : #endif
     470             : 
     471      212174 : MOZ_MUST_USE MOZ_ALWAYS_INLINE bool
     472      212174 : TokenStreamAnyChars::internalUpdateLineInfoForEOL(uint32_t lineStartOffset)
     473      212174 : {
     474      212174 :     prevLinebase = linebase;
     475             :     linebase = lineStartOffset;
     476             :     lineno++;
     477             :     return srcCoords.add(lineno, linebase);
     478           0 : }
     479             : 
     480          18 : void
     481          18 : TokenStreamAnyChars::undoInternalUpdateLineInfoForEOL()
     482           0 : {
     483          18 :     MOZ_ASSERT(prevLinebase != size_t(-1)); // we should never get more than one EOL
     484           0 :     linebase = prevLinebase;
     485             :     prevLinebase = size_t(-1);
     486             :     lineno--;
     487             : }
     488             : 
     489      187931 : MOZ_ALWAYS_INLINE void
     490             : TokenStreamAnyChars::updateFlagsForEOL()
     491             : {
     492             :     flags.isDirtyLine = false;
     493             : }
     494             : 
     495             : // This gets a full code point, starting from an already-consumed leading code
     496             : // unit, normalizing EOL sequences to '\n', also updating line/column info as
     497        2694 : // needed.
     498             : template<class AnyCharsAccess>
     499           0 : bool
     500             : TokenStreamChars<char16_t, AnyCharsAccess>::getCodePoint(int32_t* cp)
     501           0 : {
     502           0 :     TokenStreamAnyChars& anyChars = anyCharsAccess();
     503           0 : 
     504           0 :     if (MOZ_UNLIKELY(sourceUnits.atEnd())) {
     505             :         anyChars.flags.isEOF = true;
     506             :         *cp = EOF;
     507        5388 :         return true;
     508             :     }
     509             : 
     510             :     int32_t c = sourceUnits.getCodeUnit();
     511        2694 : 
     512             :     do {
     513             :         // Normalize the char16_t if it was a newline.
     514        2669 :         if (MOZ_UNLIKELY(c == '\n'))
     515             :             break;
     516           0 : 
     517           0 :         if (MOZ_UNLIKELY(c == '\r')) {
     518             :             // If it's a \r\n sequence: treat as a single EOL, skip over the \n.
     519             :             if (MOZ_LIKELY(!sourceUnits.atEnd()))
     520             :                 sourceUnits.matchCodeUnit('\n');
     521             : 
     522        2669 :             break;
     523             :         }
     524             : 
     525        2669 :         if (MOZ_UNLIKELY(c == unicode::LINE_SEPARATOR || c == unicode::PARA_SEPARATOR))
     526        2669 :             break;
     527             : 
     528             :         *cp = c;
     529          25 :         return true;
     530             :     } while (false);
     531             : 
     532          25 :     if (!updateLineInfoForEOL())
     533          25 :         return false;
     534             : 
     535             :     *cp = '\n';
     536             :     return true;
     537             : }
     538          81 : 
     539             : template<class AnyCharsAccess>
     540         162 : bool
     541             : TokenStreamChars<char16_t, AnyCharsAccess>::getNonAsciiCodePoint(char16_t lead, int32_t* codePoint)
     542          81 : {
     543             :     MOZ_ASSERT(!isAsciiCodePoint(lead),
     544             :                "ASCII code unit/point must be handled separately");
     545             :     MOZ_ASSERT(lead == sourceUnits.previousCodeUnit(),
     546          81 :                "getNonAsciiCodePoint called incorrectly");
     547             : 
     548             :     // The code point is usually |lead|: overwrite later if needed.
     549             :     *codePoint = lead;
     550           0 : 
     551          81 :     // ECMAScript specifically requires that unpaired UTF-16 surrogates be
     552             :     // treated as the corresponding code point and not as an error.  See
     553             :     // <https://tc39.github.io/ecma262/#sec-ecmascript-language-types-string-type>.
     554           0 :     // Thus this function does not consider any sequence of 16-bit numbers to
     555             :     // be intrinsically in error.
     556           0 : 
     557             :     // Dispense with single-unit code points and lone trailing surrogates.
     558             :     if (MOZ_LIKELY(!unicode::IsLeadSurrogate(lead))) {
     559           0 :         if (MOZ_UNLIKELY(lead == unicode::LINE_SEPARATOR ||
     560             :                          lead == unicode::PARA_SEPARATOR))
     561             :         {
     562           0 :             if (!updateLineInfoForEOL()) {
     563             : #ifdef DEBUG
     564           0 :                 *codePoint = EOF; // sentinel value to hopefully cause errors
     565             : #endif
     566             :                 MOZ_MAKE_MEM_UNDEFINED(codePoint, sizeof(*codePoint));
     567             :                 return false;
     568             :             }
     569             : 
     570             :             *codePoint = '\n';
     571             :         } else {
     572           0 :             MOZ_ASSERT(!SourceUnits::isRawEOLChar(*codePoint));
     573             :         }
     574             : 
     575           0 :         return true;
     576             :     }
     577             : 
     578             :     // Also handle a lead surrogate not paired with a trailing surrogate.
     579             :     if (MOZ_UNLIKELY(sourceUnits.atEnd() ||
     580           0 :                      !unicode::IsTrailSurrogate(sourceUnits.peekCodeUnit())))
     581           0 :     {
     582             :         MOZ_ASSERT(!SourceUnits::isRawEOLChar(*codePoint));
     583             :         return true;
     584             :     }
     585             : 
     586             :     // Otherwise we have a multi-unit code point.
     587             :     *codePoint = unicode::UTF16Decode(lead, sourceUnits.getCodeUnit());
     588             :     MOZ_ASSERT(!SourceUnits::isRawEOLChar(*codePoint));
     589             :     return true;
     590             : }
     591             : 
     592             : template<typename CharT, class AnyCharsAccess>
     593             : void
     594             : GeneralTokenStreamChars<CharT, AnyCharsAccess>::ungetChar(int32_t c)
     595             : {
     596             :     if (c == EOF)
     597             :         return;
     598           0 : 
     599    11195698 :     sourceUnits.ungetCodeUnit();
     600             :     if (c == '\n') {
     601           0 :         int32_t c2 = sourceUnits.peekCodeUnit();
     602             :         MOZ_ASSERT(SourceUnits::isRawEOLChar(c2));
     603             : 
     604             :         // If it's a \r\n sequence, also unget the \r.
     605             :         if (c2 == CharT('\n') && !sourceUnits.atStart())
     606             :             sourceUnits.ungetOptionalCRBeforeLF();
     607         852 : 
     608             :         anyCharsAccess().undoInternalUpdateLineInfoForEOL();
     609         852 :     } else {
     610             :         MOZ_ASSERT(sourceUnits.peekCodeUnit() == c);
     611             :     }
     612         852 : }
     613           0 : 
     614           0 : template<class AnyCharsAccess>
     615           0 : void
     616             : TokenStreamChars<char16_t, AnyCharsAccess>::ungetCodePointIgnoreEOL(uint32_t codePoint)
     617             : {
     618           0 :     MOZ_ASSERT(!sourceUnits.atStart());
     619          18 : 
     620             :     unsigned numUnits = 0;
     621           0 :     char16_t units[2];
     622             :     unicode::UTF16Encode(codePoint, units, &numUnits);
     623        1668 : 
     624             :     MOZ_ASSERT(numUnits == 1 || numUnits == 2);
     625             : 
     626             :     while (numUnits-- > 0)
     627             :         ungetCodeUnit(units[numUnits]);
     628             : }
     629           0 : 
     630             : template<class AnyCharsAccess>
     631           0 : void
     632             : TokenStreamChars<char16_t, AnyCharsAccess>::ungetLineTerminator()
     633             : {
     634      639066 :     sourceUnits.ungetCodeUnit();
     635             : 
     636             :     char16_t last = sourceUnits.peekCodeUnit();
     637             :     MOZ_ASSERT(SourceUnits::isRawEOLChar(last));
     638             : 
     639           0 :     if (last == '\n')
     640             :         sourceUnits.ungetOptionalCRBeforeLF();
     641           0 : 
     642             :     anyCharsAccess().undoInternalUpdateLineInfoForEOL();
     643           0 : }
     644             : 
     645           0 : template<typename CharT>
     646             : size_t
     647           0 : SourceUnits<CharT>::findEOLMax(size_t start, size_t max)
     648             : {
     649           0 :     const CharT* p = codeUnitPtrAt(start);
     650           0 : 
     651           0 :     size_t n = 0;
     652             :     while (true) {
     653             :         if (p >= limit_)
     654             :             break;
     655           0 :         if (n >= max)
     656             :             break;
     657           0 :         n++;
     658             : 
     659           0 :         // This stops at U+2028 LINE SEPARATOR or U+2029 PARAGRAPH SEPARATOR in
     660           0 :         // string and template literals.  These code points do affect line and
     661             :         // column coordinates, even as they encode their literal values.
     662           0 :         if (isRawEOLChar(*p++))
     663           0 :             break;
     664             :     }
     665           0 :     return start + n;
     666           0 : }
     667             : 
     668             : template<typename CharT, class AnyCharsAccess>
     669             : bool
     670             : TokenStreamSpecific<CharT, AnyCharsAccess>::advance(size_t position)
     671             : {
     672             :     const CharT* end = sourceUnits.codeUnitPtrAt(position);
     673             :     while (sourceUnits.addressOfNextCodeUnit() < end) {
     674       11554 :         int32_t c;
     675             :         if (!getCodePoint(&c))
     676             :             return false;
     677           0 :     }
     678           0 : 
     679           0 :     TokenStreamAnyChars& anyChars = anyCharsAccess();
     680             :     Token* cur = const_cast<Token*>(&anyChars.currentToken());
     681             :     cur->pos.begin = sourceUnits.offset();
     682           0 :     MOZ_MAKE_MEM_UNDEFINED(&cur->type, sizeof(cur->type));
     683             :     anyChars.lookahead = 0;
     684             :     return true;
     685           0 : }
     686      160670 : 
     687             : template<typename CharT, class AnyCharsAccess>
     688       11554 : void
     689             : TokenStreamSpecific<CharT, AnyCharsAccess>::seek(const Position& pos)
     690             : {
     691             :     TokenStreamAnyChars& anyChars = anyCharsAccess();
     692             : 
     693           0 :     sourceUnits.setAddressOfNextCodeUnit(pos.buf, /* allowPoisoned = */ true);
     694             :     anyChars.flags = pos.flags;
     695           0 :     anyChars.lineno = pos.lineno;
     696             :     anyChars.linebase = pos.linebase;
     697           0 :     anyChars.prevLinebase = pos.prevLinebase;
     698             :     anyChars.lookahead = pos.lookahead;
     699           0 : 
     700             :     anyChars.tokens[anyChars.cursor()] = pos.currentToken;
     701           0 :     for (unsigned i = 0; i < anyChars.lookahead; i++)
     702             :         anyChars.tokens[anyChars.aheadCursor(1 + i)] = pos.lookaheadTokens[i];
     703           0 : }
     704             : 
     705             : template<typename CharT, class AnyCharsAccess>
     706             : bool
     707             : TokenStreamSpecific<CharT, AnyCharsAccess>::seek(const Position& pos,
     708           0 :                                                  const TokenStreamAnyChars& other)
     709             : {
     710             :     if (!anyCharsAccess().srcCoords.fill(other.srcCoords))
     711           0 :         return false;
     712             : 
     713             :     seek(pos);
     714             :     return true;
     715             : }
     716           0 : 
     717             : template<typename CharT, class AnyCharsAccess>
     718           0 : bool
     719           0 : TokenStreamSpecific<CharT, AnyCharsAccess>::reportStrictModeErrorNumberVA(UniquePtr<JSErrorNotes> notes,
     720             :                                                                           uint32_t offset,
     721           0 :                                                                           bool strictMode,
     722           0 :                                                                           unsigned errorNumber,
     723             :                                                                           va_list* args)
     724             : {
     725           0 :     TokenStreamAnyChars& anyChars = anyCharsAccess();
     726           0 :     if (!strictMode && !anyChars.options().extraWarningsOption)
     727           0 :         return true;
     728             : 
     729           0 :     ErrorMetadata metadata;
     730           0 :     if (!computeErrorMetadata(&metadata, offset))
     731             :         return false;
     732             : 
     733             :     if (strictMode) {
     734             :         ReportCompileError(anyChars.cx, std::move(metadata), std::move(notes), JSREPORT_ERROR, errorNumber,
     735           0 :                            *args);
     736             :         return false;
     737        4816 :     }
     738             : 
     739           0 :     return anyChars.compileWarning(std::move(metadata), std::move(notes), JSREPORT_WARNING | JSREPORT_STRICT,
     740        2408 :                                    errorNumber, *args);
     741           0 : }
     742           0 : 
     743           0 : bool
     744           0 : TokenStreamAnyChars::compileWarning(ErrorMetadata&& metadata, UniquePtr<JSErrorNotes> notes,
     745             :                                     unsigned flags, unsigned errorNumber, va_list args)
     746           0 : {
     747           0 :     if (options().werrorOption) {
     748        7282 :         flags &= ~JSREPORT_WARNING;
     749        2408 :         ReportCompileError(cx, std::move(metadata), std::move(notes), flags, errorNumber, args);
     750             :         return false;
     751             :     }
     752             : 
     753           0 :     return ReportCompileWarning(cx, std::move(metadata), std::move(notes), flags, errorNumber, args);
     754             : }
     755             : 
     756           0 : void
     757             : TokenStreamAnyChars::computeErrorMetadataNoOffset(ErrorMetadata* err)
     758             : {
     759           0 :     err->isMuted = mutedErrors;
     760           0 :     err->filename = filename_;
     761             :     err->lineNumber = 0;
     762             :     err->columnNumber = 0;
     763             : 
     764             :     MOZ_ASSERT(err->lineOfContext == nullptr);
     765           0 : }
     766             : 
     767             : bool
     768             : TokenStreamAnyChars::fillExcludingContext(ErrorMetadata* err, uint32_t offset)
     769             : {
     770             :     err->isMuted = mutedErrors;
     771           0 : 
     772           0 :     // If this TokenStreamAnyChars doesn't have location information, try to
     773             :     // get it from the caller.
     774             :     if (!filename_ && !cx->helperThread()) {
     775           0 :         NonBuiltinFrameIter iter(cx,
     776           0 :                                  FrameIter::FOLLOW_DEBUGGER_EVAL_PREV_LINK,
     777             :                                  cx->realm()->principals());
     778             :         if (!iter.done() && iter.filename()) {
     779           0 :             err->filename = iter.filename();
     780           0 :             err->lineNumber = iter.computeLine(&err->columnNumber);
     781             :             return false;
     782           0 :         }
     783             :     }
     784             : 
     785           0 :     // Otherwise use this TokenStreamAnyChars's location information.
     786           0 :     err->filename = filename_;
     787             :     srcCoords.lineNumAndColumnIndex(offset, &err->lineNumber, &err->columnNumber);
     788             :     return true;
     789             : }
     790           0 : 
     791             : template<typename CharT, class AnyCharsAccess>
     792             : bool
     793           0 : TokenStreamSpecific<CharT, AnyCharsAccess>::hasTokenizationStarted() const
     794           0 : {
     795           0 :     const TokenStreamAnyChars& anyChars = anyCharsAccess();
     796           0 :     return anyChars.isCurrentTokenType(TokenKind::Eof) && !anyChars.isEOF();
     797             : }
     798             : 
     799           0 : void
     800             : TokenStreamAnyChars::lineAndColumnAt(size_t offset, uint32_t* line, uint32_t* column) const
     801             : {
     802             :     srcCoords.lineNumAndColumnIndex(offset, line, column);
     803           0 : }
     804             : 
     805           0 : template<typename CharT, class AnyCharsAccess>
     806           0 : void
     807           0 : TokenStreamSpecific<CharT, AnyCharsAccess>::currentLineAndColumn(uint32_t* line, uint32_t* column) const
     808           0 : {
     809             :     const TokenStreamAnyChars& anyChars = anyCharsAccess();
     810           0 :     uint32_t offset = anyChars.currentToken().pos.begin;
     811           0 :     anyChars.srcCoords.lineNumAndColumnIndex(offset, line, column);
     812             : }
     813             : 
     814           0 : template<typename CharT, class AnyCharsAccess>
     815             : bool
     816           0 : TokenStreamSpecific<CharT, AnyCharsAccess>::computeErrorMetadata(ErrorMetadata* err,
     817             :                                                                  uint32_t offset)
     818             : {
     819             :     if (offset == NoOffset) {
     820           0 :         anyCharsAccess().computeErrorMetadataNoOffset(err);
     821           0 :         return true;
     822             :     }
     823           0 : 
     824           0 :     // This function's return value isn't a success/failure indication: it
     825           0 :     // returns true if this TokenStream's location information could be used,
     826           0 :     // and it returns false when that information can't be used (and so we
     827           0 :     // can't provide a line of context).
     828             :     if (!anyCharsAccess().fillExcludingContext(err, offset))
     829             :         return true;
     830             : 
     831             :     // Add a line of context from this TokenStream to help with debugging.
     832           0 :     return computeLineOfContext(err, offset);
     833           0 : }
     834           0 : 
     835             : template<typename CharT, class AnyCharsAccess>
     836             : bool
     837             : TokenStreamSpecific<CharT, AnyCharsAccess>::computeLineOfContext(ErrorMetadata* err,
     838             :                                                                  uint32_t offset)
     839           0 : {
     840             :     // This function presumes |err| is filled in *except* for line-of-context
     841           0 :     // fields.  It exists to make |TokenStreamSpecific::computeErrorMetadata|,
     842       13857 :     // above, more readable.
     843             :     TokenStreamAnyChars& anyChars = anyCharsAccess();
     844             : 
     845             :     // We only have line-start information for the current line.  If the error
     846           1 :     // is on a different line, we can't easily provide context.  (This means
     847             :     // any error in a multi-line token, e.g. an unterminated multiline string
     848         408 :     // literal, won't have context.)
     849         408 :     if (err->lineNumber != anyChars.lineno)
     850             :         return true;
     851             : 
     852             :     constexpr size_t windowRadius = ErrorMetadata::lineOfContextRadius;
     853       13041 : 
     854             :     // The window must start within the current line, no earlier than
     855       26082 :     // |windowRadius| characters before |offset|.
     856           0 :     MOZ_ASSERT(offset >= anyChars.linebase);
     857           0 :     size_t windowStart = (offset - anyChars.linebase > windowRadius) ?
     858           0 :                          offset - windowRadius :
     859             :                          anyChars.linebase;
     860             : 
     861             :     // The window must start within the portion of the current line that we
     862           0 :     // actually have in our buffer.
     863             :     if (windowStart < sourceUnits.startOffset())
     864             :         windowStart = sourceUnits.startOffset();
     865           0 : 
     866           0 :     // The window must end within the current line, no later than
     867           0 :     // windowRadius after offset.
     868             :     size_t windowEnd = sourceUnits.findEOLMax(offset, windowRadius);
     869             :     size_t windowLength = windowEnd - windowStart;
     870             :     MOZ_ASSERT(windowLength <= windowRadius * 2);
     871             : 
     872             :     // Create the windowed string, not including the potential line
     873             :     // terminator.
     874           0 :     StringBuffer windowBuf(anyChars.cx);
     875             :     if (!windowBuf.append(codeUnitPtrAt(windowStart), windowLength) ||
     876             :         !windowBuf.append('\0'))
     877             :     {
     878           0 :         return false;
     879             :     }
     880             : 
     881             :     err->lineOfContext.reset(windowBuf.stealChars());
     882             :     if (!err->lineOfContext)
     883           0 :         return false;
     884             : 
     885             :     err->lineLength = windowLength;
     886             :     err->tokenOffset = offset - windowStart;
     887             :     return true;
     888             : }
     889           0 : 
     890             : 
     891             : template<typename CharT, class AnyCharsAccess>
     892             : bool
     893             : TokenStreamSpecific<CharT, AnyCharsAccess>::reportStrictModeError(unsigned errorNumber, ...)
     894             : {
     895           0 :     va_list args;
     896             :     va_start(args, errorNumber);
     897             : 
     898           0 :     TokenStreamAnyChars& anyChars = anyCharsAccess();
     899             :     bool result = reportStrictModeErrorNumberVA(nullptr, anyChars.currentToken().pos.begin,
     900             :                                                 anyChars.strictMode(), errorNumber, &args);
     901             : 
     902           0 :     va_end(args);
     903           0 :     return result;
     904             : }
     905           0 : 
     906             : template<typename CharT, class AnyCharsAccess>
     907             : void
     908             : TokenStreamSpecific<CharT, AnyCharsAccess>::reportError(unsigned errorNumber, ...)
     909           0 : {
     910           0 :     va_list args;
     911             :     va_start(args, errorNumber);
     912             : 
     913             :     TokenStreamAnyChars& anyChars = anyCharsAccess();
     914           0 :     ErrorMetadata metadata;
     915           0 :     if (computeErrorMetadata(&metadata, anyChars.currentToken().pos.begin)) {
     916           0 :         ReportCompileError(anyChars.cx, std::move(metadata), nullptr, JSREPORT_ERROR, errorNumber,
     917             :                            args);
     918             :     }
     919             : 
     920           0 :     va_end(args);
     921           0 : }
     922           0 : 
     923             : void
     924             : TokenStreamAnyChars::reportErrorNoOffset(unsigned errorNumber, ...)
     925             : {
     926             :     va_list args;
     927           0 :     va_start(args, errorNumber);
     928           0 : 
     929             :     reportErrorNoOffsetVA(errorNumber, args);
     930             : 
     931           0 :     va_end(args);
     932           0 : }
     933           0 : 
     934             : void
     935             : TokenStreamAnyChars::reportErrorNoOffsetVA(unsigned errorNumber, va_list args)
     936             : {
     937             :     ErrorMetadata metadata;
     938             :     computeErrorMetadataNoOffset(&metadata);
     939           0 : 
     940             :     ReportCompileError(cx, std::move(metadata), nullptr, JSREPORT_ERROR, errorNumber, args);
     941             : }
     942           0 : 
     943             : template<typename CharT, class AnyCharsAccess>
     944           0 : bool
     945           0 : TokenStreamSpecific<CharT, AnyCharsAccess>::warning(unsigned errorNumber, ...)
     946           0 : {
     947             :     va_list args;
     948           0 :     va_start(args, errorNumber);
     949           0 : 
     950             :     ErrorMetadata metadata;
     951             :     bool result =
     952             :         computeErrorMetadata(&metadata, anyCharsAccess().currentToken().pos.begin) &&
     953             :         anyCharsAccess().compileWarning(std::move(metadata), nullptr, JSREPORT_WARNING, errorNumber,
     954           0 :                                         args);
     955             : 
     956             :     va_end(args);
     957           0 :     return result;
     958             : }
     959           0 : 
     960           0 : template<typename CharT, class AnyCharsAccess>
     961           0 : bool
     962           0 : TokenStreamSpecific<CharT, AnyCharsAccess>::reportExtraWarningErrorNumberVA(UniquePtr<JSErrorNotes> notes,
     963             :                                                                             uint32_t offset,
     964             :                                                                             unsigned errorNumber,
     965             :                                                                             va_list* args)
     966           0 : {
     967           0 :     TokenStreamAnyChars& anyChars = anyCharsAccess();
     968             :     if (!anyChars.options().extraWarningsOption)
     969             :         return true;
     970           0 : 
     971             :     ErrorMetadata metadata;
     972             :     if (!computeErrorMetadata(&metadata, offset))
     973           0 :         return false;
     974             : 
     975           0 :     return anyChars.compileWarning(std::move(metadata), std::move(notes), JSREPORT_STRICT | JSREPORT_WARNING,
     976             :                                    errorNumber, *args);
     977           0 : }
     978           0 : 
     979             : template<typename CharT, class AnyCharsAccess>
     980             : void
     981           0 : TokenStreamSpecific<CharT, AnyCharsAccess>::error(unsigned errorNumber, ...)
     982             : {
     983           0 :     va_list args;
     984           0 :     va_start(args, errorNumber);
     985             : 
     986           0 :     ErrorMetadata metadata;
     987           0 :     if (computeErrorMetadata(&metadata, sourceUnits.offset())) {
     988             :         TokenStreamAnyChars& anyChars = anyCharsAccess();
     989             :         ReportCompileError(anyChars.cx, std::move(metadata), nullptr, JSREPORT_ERROR, errorNumber,
     990             :                            args);
     991           0 :     }
     992             : 
     993             :     va_end(args);
     994           0 : }
     995             : 
     996           0 : template<typename CharT, class AnyCharsAccess>
     997             : void
     998           0 : TokenStreamSpecific<CharT, AnyCharsAccess>::errorAtVA(uint32_t offset, unsigned errorNumber, va_list *args)
     999           0 : {
    1000           0 :     ErrorMetadata metadata;
    1001             :     if (computeErrorMetadata(&metadata, offset)) {
    1002           0 :         TokenStreamAnyChars& anyChars = anyCharsAccess();
    1003           0 :         ReportCompileError(anyChars.cx, std::move(metadata), nullptr, JSREPORT_ERROR, errorNumber,
    1004             :                            *args);
    1005             :     }
    1006             : }
    1007             : 
    1008           1 : 
    1009             : template<typename CharT, class AnyCharsAccess>
    1010             : void
    1011             : TokenStreamSpecific<CharT, AnyCharsAccess>::errorAt(uint32_t offset, unsigned errorNumber, ...)
    1012             : {
    1013           0 :     va_list args;
    1014           0 :     va_start(args, errorNumber);
    1015             : 
    1016             :     errorAtVA(offset, errorNumber, &args);
    1017           0 : 
    1018           0 :     va_end(args);
    1019             : }
    1020             : 
    1021           0 : // We have encountered a '\': check for a Unicode escape sequence after it.
    1022           0 : // Return the length of the escape sequence and the character code point (by
    1023             : // value) if we found a Unicode escape sequence.  Otherwise, return 0.  In both
    1024             : // cases, do not advance along the buffer.
    1025             : template<typename CharT, class AnyCharsAccess>
    1026             : uint32_t
    1027           0 : GeneralTokenStreamChars<CharT, AnyCharsAccess>::matchUnicodeEscape(uint32_t* codePoint)
    1028             : {
    1029             :     MOZ_ASSERT(sourceUnits.previousCodeUnit() == '\\');
    1030           0 : 
    1031             :     int32_t unit = getCodeUnit();
    1032           0 :     if (unit != 'u') {
    1033           0 :         // NOTE: |unit| may be EOF here.
    1034           0 :         ungetCodeUnit(unit);
    1035           0 :         MOZ_ASSERT(sourceUnits.previousCodeUnit() == '\\');
    1036             :         return 0;
    1037             :     }
    1038             : 
    1039           0 :     CharT cp[3];
    1040           0 :     unit = getCodeUnit();
    1041             :     if (JS7_ISHEX(unit) &&
    1042             :         sourceUnits.peekCodeUnits(3, cp) &&
    1043             :         JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]))
    1044           0 :     {
    1045             :         *codePoint = (JS7_UNHEX(unit) << 12) |
    1046           0 :                      (JS7_UNHEX(cp[0]) << 8) |
    1047           0 :                      (JS7_UNHEX(cp[1]) << 4) |
    1048           0 :                      JS7_UNHEX(cp[2]);
    1049           0 :         sourceUnits.skipCodeUnits(3);
    1050             :         return 5;
    1051             :     }
    1052           0 : 
    1053             :     if (unit == '{')
    1054             :         return matchExtendedUnicodeEscape(codePoint);
    1055             : 
    1056             :     // NOTE: |unit| may be EOF here, so this ungets either one or two units.
    1057           0 :     ungetCodeUnit(unit);
    1058             :     ungetCodeUnit('u');
    1059             :     MOZ_ASSERT(sourceUnits.previousCodeUnit() == '\\');
    1060           0 :     return 0;
    1061             : }
    1062           0 : 
    1063             : template<typename CharT, class AnyCharsAccess>
    1064           0 : uint32_t
    1065           0 : GeneralTokenStreamChars<CharT, AnyCharsAccess>::matchExtendedUnicodeEscape(uint32_t* codePoint)
    1066             : {
    1067             :     MOZ_ASSERT(sourceUnits.previousCodeUnit() == '{');
    1068             : 
    1069             :     int32_t unit = getCodeUnit();
    1070             : 
    1071             :     // Skip leading zeroes.
    1072             :     uint32_t leadingZeroes = 0;
    1073           0 :     while (unit == '0') {
    1074             :         leadingZeroes++;
    1075           0 :         unit = getCodeUnit();
    1076           0 :     }
    1077           0 : 
    1078           0 :     size_t i = 0;
    1079             :     uint32_t code = 0;
    1080             :     while (JS7_ISHEX(unit) && i < 6) {
    1081             :         code = (code << 4) | JS7_UNHEX(unit);
    1082             :         unit = getCodeUnit();
    1083           0 :         i++;
    1084           0 :     }
    1085           0 : 
    1086             :     uint32_t gotten =
    1087           0 :         2 + // 'u{'
    1088           0 :         leadingZeroes +
    1089           0 :         i + // significant hexdigits
    1090           0 :         (unit != EOF); // subtract a get if it didn't contribute to length
    1091           0 : 
    1092           0 :     if (unit == '}' && (leadingZeroes > 0 || i > 0) && code <= unicode::NonBMPMax) {
    1093           0 :         *codePoint = code;
    1094             :         return gotten;
    1095             :     }
    1096             : 
    1097             :     sourceUnits.unskipCodeUnits(gotten);
    1098           0 :     MOZ_ASSERT(sourceUnits.previousCodeUnit() == '\\');
    1099           0 :     return 0;
    1100           0 : }
    1101             : 
    1102             : template<typename CharT, class AnyCharsAccess>
    1103             : uint32_t
    1104             : GeneralTokenStreamChars<CharT, AnyCharsAccess>::matchUnicodeEscapeIdStart(uint32_t* codePoint)
    1105           0 : {
    1106             :     uint32_t length = matchUnicodeEscape(codePoint);
    1107             :     if (MOZ_LIKELY(length > 0)) {
    1108           0 :         if (MOZ_LIKELY(unicode::IsIdentifierStart(*codePoint)))
    1109             :             return length;
    1110             : 
    1111             :         sourceUnits.unskipCodeUnits(length);
    1112           0 :     }
    1113           0 : 
    1114           0 :     MOZ_ASSERT(sourceUnits.previousCodeUnit() == '\\');
    1115             :     return 0;
    1116             : }
    1117             : 
    1118             : template<typename CharT, class AnyCharsAccess>
    1119             : bool
    1120           0 : GeneralTokenStreamChars<CharT, AnyCharsAccess>::matchUnicodeEscapeIdent(uint32_t* codePoint)
    1121           0 : {
    1122           0 :     uint32_t length = matchUnicodeEscape(codePoint);
    1123           0 :     if (MOZ_LIKELY(length > 0)) {
    1124             :         if (MOZ_LIKELY(unicode::IsIdentifierPart(*codePoint)))
    1125             :             return true;
    1126             : 
    1127           0 :         sourceUnits.unskipCodeUnits(length);
    1128           0 :     }
    1129           0 : 
    1130             :     MOZ_ASSERT(sourceUnits.previousCodeUnit() == '\\');
    1131             :     return false;
    1132             : }
    1133             : 
    1134           0 : // Helper function which returns true if the first length(q) characters in p are
    1135           0 : // the same as the characters in q.
    1136           0 : template<typename CharT>
    1137           0 : static bool
    1138           0 : CharsMatch(const CharT* p, const char* q)
    1139             : {
    1140           0 :     while (*q) {
    1141             :         if (*p++ != *q++)
    1142             :             return false;
    1143             :     }
    1144             : 
    1145           0 :     return true;
    1146             : }
    1147           0 : 
    1148           0 : template<typename CharT, class AnyCharsAccess>
    1149           0 : bool
    1150           0 : TokenStreamSpecific<CharT, AnyCharsAccess>::getDirectives(bool isMultiline,
    1151             :                                                           bool shouldWarnDeprecated)
    1152             : {
    1153             :     // Match directive comments used in debugging, such as "//# sourceURL" and
    1154             :     // "//# sourceMappingURL". Use of "//@" instead of "//#" is deprecated.
    1155             :     //
    1156             :     // To avoid a crashing bug in IE, several JavaScript transpilers wrap single
    1157           0 :     // line comments containing a source mapping URL inside a multiline
    1158             :     // comment. To avoid potentially expensive lookahead and backtracking, we
    1159           0 :     // only check for this case if we encounter a '#' character.
    1160           0 : 
    1161           0 :     bool res = getDisplayURL(isMultiline, shouldWarnDeprecated) &&
    1162           0 :                getSourceMappingURL(isMultiline, shouldWarnDeprecated);
    1163             :     if (!res)
    1164             :         badToken();
    1165             : 
    1166             :     return res;
    1167             : }
    1168             : 
    1169             : template<>
    1170             : MOZ_MUST_USE bool
    1171             : TokenStreamCharsBase<char16_t>::copyTokenbufTo(JSContext* cx,
    1172             :                                                UniquePtr<char16_t[], JS::FreePolicy>* destination)
    1173       11274 : {
    1174       11256 :     size_t length = tokenbuf.length();
    1175             : 
    1176             :     *destination = cx->make_pod_array<char16_t>(length + 1);
    1177             :     if (!*destination)
    1178             :         return false;
    1179             : 
    1180             :     PodCopy(destination->get(), tokenbuf.begin(), length);
    1181             :     (*destination)[length] = '\0';
    1182             :     return true;
    1183        5492 : }
    1184             : 
    1185             : template<typename CharT, class AnyCharsAccess>
    1186             : MOZ_MUST_USE bool
    1187             : TokenStreamSpecific<CharT, AnyCharsAccess>::getDirective(bool isMultiline,
    1188             :                                                          bool shouldWarnDeprecated,
    1189             :                                                          const char* directive,
    1190             :                                                          uint8_t directiveLength,
    1191             :                                                          const char* errorMsgPragma,
    1192             :                                                          UniquePtr<char16_t[], JS::FreePolicy>* destination)
    1193             : {
    1194       10984 :     MOZ_ASSERT(directiveLength <= 18);
    1195       10984 :     char16_t peeked[18];
    1196           0 : 
    1197           0 :     // If there aren't enough characters left, it can't be the desired
    1198             :     // directive.
    1199           0 :     if (!sourceUnits.peekCodeUnits(directiveLength, peeked))
    1200             :         return true;
    1201             : 
    1202             :     // It's also not the desired directive if the characters don't match.
    1203             :     if (!CharsMatch(peeked, directive))
    1204          18 :         return true;
    1205             : 
    1206             :     if (shouldWarnDeprecated) {
    1207          18 :         if (!warning(JSMSG_DEPRECATED_PRAGMA, errorMsgPragma))
    1208             :             return false;
    1209          72 :     }
    1210          18 : 
    1211             :     sourceUnits.skipCodeUnits(directiveLength);
    1212             :     tokenbuf.clear();
    1213          54 : 
    1214          18 :     do {
    1215          18 :         int32_t unit = peekCodeUnit();
    1216             :         if (unit == EOF)
    1217             :             break;
    1218             : 
    1219             :         if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
    1220       10984 :             if (unicode::IsSpaceOrBOM2(unit))
    1221             :                 break;
    1222             : 
    1223             :             consumeKnownCodeUnit(unit);
    1224             : 
    1225             :             // Debugging directives can occur in both single- and multi-line
    1226             :             // comments. If we're currently inside a multi-line comment, we
    1227       10984 :             // also must recognize multi-line comment terminators.
    1228             :             if (isMultiline && unit == '*' && peekCodeUnit() == '/') {
    1229             :                 ungetCodeUnit('*');
    1230             :                 break;
    1231             :             }
    1232           0 : 
    1233             :             if (!tokenbuf.append(unit))
    1234             :                 return false;
    1235             : 
    1236       10966 :             continue;
    1237             :         }
    1238             : 
    1239          18 :         int32_t codePoint;
    1240           0 :         if (!getCodePoint(&codePoint))
    1241             :             return false;
    1242             : 
    1243             :         if (unicode::IsSpaceOrBOM2(codePoint)) {
    1244           0 :             ungetNonAsciiNormalizedCodePoint(codePoint);
    1245           0 :             break;
    1246             :         }
    1247             : 
    1248             :         if (!appendCodePointToTokenbuf(codePoint))
    1249         845 :             return false;
    1250           0 :     } while (true);
    1251             : 
    1252           0 :     if (tokenbuf.empty()) {
    1253             :         // The directive's URL was missing, but comments can contain anything,
    1254             :         // so it isn't an error.
    1255           0 :         return true;
    1256             :     }
    1257             : 
    1258             :     return copyTokenbufTo(anyCharsAccess().cx, destination);
    1259             : }
    1260           0 : 
    1261           0 : template<typename CharT, class AnyCharsAccess>
    1262           0 : bool
    1263           0 : TokenStreamSpecific<CharT, AnyCharsAccess>::getDisplayURL(bool isMultiline,
    1264             :                                                           bool shouldWarnDeprecated)
    1265             : {
    1266           0 :     // Match comments of the form "//# sourceURL=<url>" or
    1267           0 :     // "/\* //# sourceURL=<url> *\/"
    1268             :     //
    1269             :     // Note that while these are labeled "sourceURL" in the source text,
    1270          18 :     // internally we refer to it as a "displayURL" to distinguish what the
    1271             :     // developer would like to refer to the source as from the source's actual
    1272             :     // URL.
    1273             : 
    1274             :     static const char sourceURLDirective[] = " sourceURL=";
    1275             :     constexpr uint8_t sourceURLDirectiveLength = ArrayLength(sourceURLDirective) - 1;
    1276          36 :     return getDirective(isMultiline, shouldWarnDeprecated,
    1277             :                         sourceURLDirective, sourceURLDirectiveLength,
    1278             :                         "sourceURL", &anyCharsAccess().displayURL_);
    1279             : }
    1280             : 
    1281           0 : template<typename CharT, class AnyCharsAccess>
    1282             : bool
    1283             : TokenStreamSpecific<CharT, AnyCharsAccess>::getSourceMappingURL(bool isMultiline,
    1284             :                                                                 bool shouldWarnDeprecated)
    1285             : {
    1286             :     // Match comments of the form "//# sourceMappingURL=<url>" or
    1287             :     // "/\* //# sourceMappingURL=<url> *\/"
    1288             : 
    1289             :     static const char sourceMappingURLDirective[] = " sourceMappingURL=";
    1290             :     constexpr uint8_t sourceMappingURLDirectiveLength = ArrayLength(sourceMappingURLDirective) - 1;
    1291             :     return getDirective(isMultiline, shouldWarnDeprecated,
    1292             :                         sourceMappingURLDirective, sourceMappingURLDirectiveLength,
    1293        5492 :                         "sourceMappingURL", &anyCharsAccess().sourceMapURL_);
    1294           0 : }
    1295             : 
    1296       16476 : template<typename CharT, class AnyCharsAccess>
    1297             : MOZ_ALWAYS_INLINE Token*
    1298             : GeneralTokenStreamChars<CharT, AnyCharsAccess>::newTokenInternal(TokenKind kind, TokenStart start,
    1299             :                                                                  TokenKind* out)
    1300             : {
    1301        5492 :     MOZ_ASSERT(kind < TokenKind::Limit);
    1302             :     MOZ_ASSERT(kind != TokenKind::Eol,
    1303             :                "TokenKind::Eol should never be used in an actual Token, only "
    1304             :                "returned by peekTokenSameLine()");
    1305             : 
    1306             :     TokenStreamAnyChars& anyChars = anyCharsAccess();
    1307             :     anyChars.flags.isDirtyLine = true;
    1308        5492 : 
    1309           0 :     Token* token = anyChars.allocateToken();
    1310             : 
    1311       16476 :     *out = token->type = kind;
    1312             :     token->pos = TokenPos(start.offset(), this->sourceUnits.offset());
    1313             :     MOZ_ASSERT(token->pos.begin <= token->pos.end);
    1314             : 
    1315             :     // NOTE: |token->modifier| and |token->modifierException| are set in
    1316      883823 :     //       |newToken()| so that optimized, non-debug code won't do any work
    1317             :     //       to pass a modifier-argument that will never be used.
    1318             : 
    1319      883823 :     return token;
    1320      883823 : }
    1321             : 
    1322             : template<typename CharT, class AnyCharsAccess>
    1323             : MOZ_COLD bool
    1324      883823 : GeneralTokenStreamChars<CharT, AnyCharsAccess>::badToken()
    1325           1 : {
    1326             :     // We didn't get a token, so don't set |flags.isDirtyLine|.
    1327      883823 :     anyCharsAccess().flags.hadError = true;
    1328             : 
    1329      883823 :     // Poisoning sourceUnits on error establishes an invariant: once an
    1330     2651461 :     // erroneous token has been seen, sourceUnits will not be consulted again.
    1331      883819 :     // This is true because the parser will deal with the illegal token by
    1332             :     // aborting parsing immediately.
    1333             :     sourceUnits.poisonInDebug();
    1334             : 
    1335             :     return false;
    1336             : };
    1337      883819 : 
    1338             : template<>
    1339             : MOZ_MUST_USE bool
    1340             : TokenStreamCharsBase<char16_t>::appendCodePointToTokenbuf(uint32_t codePoint)
    1341             : {
    1342           0 :     char16_t units[2];
    1343             :     unsigned numUnits = 0;
    1344             :     unicode::UTF16Encode(codePoint, units, &numUnits);
    1345           0 : 
    1346             :     MOZ_ASSERT(numUnits == 1 || numUnits == 2,
    1347             :                "UTF-16 code points are only encoded in one or two units");
    1348             : 
    1349             :     if (!tokenbuf.append(units[0]))
    1350             :         return false;
    1351           0 : 
    1352             :     if (numUnits == 1)
    1353           0 :         return true;
    1354             : 
    1355             :     return tokenbuf.append(units[1]);
    1356             : }
    1357             : 
    1358           0 : template<typename CharT, class AnyCharsAccess>
    1359             : bool
    1360             : TokenStreamSpecific<CharT, AnyCharsAccess>::putIdentInTokenbuf(const CharT* identStart)
    1361           0 : {
    1362           0 :     const CharT* const originalAddress = sourceUnits.addressOfNextCodeUnit();
    1363             :     sourceUnits.setAddressOfNextCodeUnit(identStart);
    1364           0 : 
    1365             :     auto restoreNextRawCharAddress =
    1366             :         MakeScopeExit([this, originalAddress]() {
    1367           0 :             this->sourceUnits.setAddressOfNextCodeUnit(originalAddress);
    1368             :         });
    1369             : 
    1370           0 :     tokenbuf.clear();
    1371             :     do {
    1372             :         int32_t unit = getCodeUnit();
    1373           0 :         if (unit == EOF)
    1374             :             break;
    1375             : 
    1376             :         uint32_t codePoint;
    1377             :         if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
    1378           0 :             if (MOZ_LIKELY(unicode::IsIdentifierPart(char16_t(unit)))) {
    1379             :                 if (!tokenbuf.append(unit))
    1380             :                     return false;
    1381           0 : 
    1382             :                 continue;
    1383             :             }
    1384             : 
    1385           0 :             if (unit != '\\' || !matchUnicodeEscapeIdent(&codePoint))
    1386           0 :                 break;
    1387           0 :         } else {
    1388             :             int32_t cp;
    1389           0 :             if (!getNonAsciiCodePoint(unit, &cp))
    1390           0 :                 return false;
    1391             : 
    1392           0 :             codePoint = AssertedCast<uint32_t>(cp);
    1393             :         }
    1394             : 
    1395             :         if (!unicode::IsIdentifierPart(codePoint)) {
    1396           0 :             if (MOZ_UNLIKELY(codePoint == unicode::LINE_SEPARATOR ||
    1397             :                              codePoint == unicode::PARA_SEPARATOR))
    1398           0 :             {
    1399           0 :                 // |restoreNextRawCharAddress| undoes all gets, but it doesn't
    1400             :                 // revert line/column updates.  The ASCII code path never
    1401             :                 // updates line/column state, so only Unicode separators gotten
    1402           0 :                 // by |getNonAsciiCodePoint| require this.
    1403           0 :                 anyCharsAccess().undoInternalUpdateLineInfoForEOL();
    1404           0 :             }
    1405             :             break;
    1406           0 :         }
    1407           0 : 
    1408           0 :         if (!appendCodePointToTokenbuf(codePoint))
    1409             :             return false;
    1410             :     } while (true);
    1411           0 : 
    1412           0 :     return true;
    1413           0 : }
    1414           0 : 
    1415             : template<typename CharT, class AnyCharsAccess>
    1416             : MOZ_MUST_USE bool
    1417           0 : TokenStreamSpecific<CharT, AnyCharsAccess>::identifierName(TokenStart start,
    1418           0 :                                                            const CharT* identStart,
    1419             :                                                            IdentifierEscapes escaping,
    1420           0 :                                                            Modifier modifier, TokenKind* out)
    1421             : {
    1422             :     // Run the bad-token code for every path out of this function except the
    1423             :     // two success-cases.
    1424             :     auto noteBadToken = MakeScopeExit([this]() {
    1425           0 :         this->badToken();
    1426             :     });
    1427             : 
    1428             :     // We've already consumed an initial code point in the identifer, to *know*
    1429           0 :     // that this is an identifier.  So no need to worry about not consuming any
    1430             :     // code points in the loop below.
    1431             :     int32_t unit;
    1432             :     while (true) {
    1433             :         unit = getCodeUnit();
    1434           0 :         if (unit == EOF)
    1435             :             break;
    1436             : 
    1437             :         if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
    1438             :             if (MOZ_UNLIKELY(!unicode::IsIdentifierPart(static_cast<char16_t>(unit)))) {
    1439             :                 // Handle a Unicode escape -- otherwise it's not part of the
    1440             :                 // identifier.
    1441           0 :                 uint32_t codePoint;
    1442           0 :                 if (unit != '\\' || !matchUnicodeEscapeIdent(&codePoint)) {
    1443     1035626 :                     ungetCodeUnit(unit);
    1444             :                     break;
    1445             :                 }
    1446             : 
    1447     5249898 :                 escaping = IdentifierEscapes::SawUnicodeEscape;
    1448     2624949 :             }
    1449             :         } else {
    1450             :             int32_t codePoint;
    1451             :             if (!getNonAsciiCodePoint(unit, &codePoint))
    1452     5249848 :                 return false;
    1453             : 
    1454     2624949 :             if (!unicode::IsIdentifierPart(uint32_t(codePoint))) {
    1455           0 :                 ungetNonAsciiNormalizedCodePoint(codePoint);
    1456             :                 break;
    1457             :             }
    1458           0 :         }
    1459             :     }
    1460             : 
    1461     2624949 :     const CharT* chars;
    1462             :     size_t length;
    1463      345148 :     if (escaping == IdentifierEscapes::SawUnicodeEscape) {
    1464             :         // Identifiers containing Unicode escapes have to be converted into
    1465           0 :         // tokenbuf before atomizing.
    1466             :         if (!putIdentInTokenbuf(identStart))
    1467             :             return false;
    1468      345198 : 
    1469             :         chars = tokenbuf.begin();
    1470             :         length = tokenbuf.length();
    1471             :     } else {
    1472      345208 :         // Escape-free identifiers can be created directly from sourceUnits.
    1473             :         chars = identStart;
    1474             :         length = sourceUnits.addressOfNextCodeUnit() - identStart;
    1475           0 : 
    1476             :         // Represent reserved words lacking escapes as reserved word tokens.
    1477             :         if (const ReservedWordInfo* rw = FindReservedWord(chars, length)) {
    1478           0 :             noteBadToken.release();
    1479           0 :             newSimpleToken(rw->tokentype, start, modifier, out);
    1480             :             return true;
    1481             :         }
    1482      345208 :     }
    1483      345208 : 
    1484             :     JSAtom* atom = atomizeChars(anyCharsAccess().cx, chars, length);
    1485             :     if (!atom)
    1486      345208 :         return false;
    1487      102560 : 
    1488      205120 :     noteBadToken.release();
    1489      102560 :     newNameToken(atom->asPropertyName(), start, modifier, out);
    1490             :     return true;
    1491             : }
    1492             : 
    1493      727981 : enum FirstCharKind {
    1494      242689 :     // A char16_t has the 'OneChar' kind if it, by itself, constitutes a valid
    1495             :     // token that cannot also be a prefix of a longer token.  E.g. ';' has the
    1496             :     // OneChar kind, but '+' does not, because '++' and '+=' are valid longer tokens
    1497      242689 :     // that begin with '+'.
    1498      242689 :     //
    1499      242692 :     // The few token kinds satisfying these properties cover roughly 35--45%
    1500             :     // of the tokens seen in practice.
    1501             :     //
    1502             :     // We represent the 'OneChar' kind with any positive value less than
    1503             :     // TokenKind::Limit.  This representation lets us associate
    1504             :     // each one-char token char16_t with a TokenKind and thus avoid
    1505             :     // a subsequent char16_t-to-TokenKind conversion.
    1506             :     OneChar_Min = 0,
    1507             :     OneChar_Max = size_t(TokenKind::Limit) - 1,
    1508             : 
    1509             :     Space = size_t(TokenKind::Limit),
    1510             :     Ident,
    1511             :     Dec,
    1512             :     String,
    1513             :     EOL,
    1514             :     ZeroDigit,
    1515             :     Other,
    1516             : 
    1517             :     LastCharKind = Other
    1518             : };
    1519             : 
    1520             : // OneChar: 40,  41,  44,  58,  59,  63,  91,  93,  123, 125, 126:
    1521             : //          '(', ')', ',', ':', ';', '?', '[', ']', '{', '}', '~'
    1522             : // Ident:   36, 65..90, 95, 97..122: '$', 'A'..'Z', '_', 'a'..'z'
    1523             : // Dot:     46: '.'
    1524             : // Equals:  61: '='
    1525             : // String:  34, 39, 96: '"', '\'', '`'
    1526             : // Dec:     49..57: '1'..'9'
    1527             : // Plus:    43: '+'
    1528             : // ZeroDigit:  48: '0'
    1529             : // Space:   9, 11, 12, 32: '\t', '\v', '\f', ' '
    1530             : // EOL:     10, 13: '\n', '\r'
    1531             : //
    1532             : #define T_COMMA     size_t(TokenKind::Comma)
    1533             : #define T_COLON     size_t(TokenKind::Colon)
    1534             : #define T_BITNOT    size_t(TokenKind::BitNot)
    1535             : #define T_LP        size_t(TokenKind::Lp)
    1536             : #define T_RP        size_t(TokenKind::Rp)
    1537             : #define T_SEMI      size_t(TokenKind::Semi)
    1538             : #define T_HOOK      size_t(TokenKind::Hook)
    1539             : #define T_LB        size_t(TokenKind::Lb)
    1540             : #define T_RB        size_t(TokenKind::Rb)
    1541             : #define T_LC        size_t(TokenKind::Lc)
    1542             : #define T_RC        size_t(TokenKind::Rc)
    1543             : #define _______     Other
    1544             : static const uint8_t firstCharKinds[] = {
    1545             : /*         0        1        2        3        4        5        6        7        8        9    */
    1546             : /*   0+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______,   Space,
    1547             : /*  10+ */     EOL,   Space,   Space,     EOL, _______, _______, _______, _______, _______, _______,
    1548             : /*  20+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
    1549             : /*  30+ */ _______, _______,   Space, _______,  String, _______,   Ident, _______, _______,  String,
    1550             : /*  40+ */    T_LP,    T_RP, _______, _______, T_COMMA, _______, _______, _______,ZeroDigit,    Dec,
    1551             : /*  50+ */     Dec,     Dec,     Dec,     Dec,     Dec,     Dec,     Dec,     Dec, T_COLON,  T_SEMI,
    1552             : /*  60+ */ _______, _______, _______,  T_HOOK, _______,   Ident,   Ident,   Ident,   Ident,   Ident,
    1553             : /*  70+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
    1554             : /*  80+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
    1555             : /*  90+ */   Ident,    T_LB, _______,    T_RB, _______,   Ident,  String,   Ident,   Ident,   Ident,
    1556             : /* 100+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
    1557             : /* 110+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
    1558             : /* 120+ */   Ident,   Ident,   Ident,    T_LC, _______,    T_RC,T_BITNOT, _______
    1559             : };
    1560             : #undef T_COMMA
    1561             : #undef T_COLON
    1562             : #undef T_BITNOT
    1563             : #undef T_LP
    1564             : #undef T_RP
    1565             : #undef T_SEMI
    1566             : #undef T_HOOK
    1567             : #undef T_LB
    1568             : #undef T_RB
    1569             : #undef T_LC
    1570             : #undef T_RC
    1571             : #undef _______
    1572             : 
    1573             : static_assert(LastCharKind < (1 << (sizeof(firstCharKinds[0]) * 8)),
    1574             :               "Elements of firstCharKinds[] are too small");
    1575             : 
    1576             : template<typename CharT, class AnyCharsAccess>
    1577             : void
    1578             : GeneralTokenStreamChars<CharT, AnyCharsAccess>::consumeRestOfSingleLineComment()
    1579             : {
    1580             :     int32_t c;
    1581             :     do {
    1582             :         c = getCodeUnit();
    1583             :     } while (c != EOF && !SourceUnits::isRawEOLChar(c));
    1584             : 
    1585             :     ungetCodeUnit(c);
    1586             : }
    1587       16094 : 
    1588             : template<typename CharT, class AnyCharsAccess>
    1589             : MOZ_MUST_USE bool
    1590           0 : TokenStreamSpecific<CharT, AnyCharsAccess>::decimalNumber(int32_t unit, TokenStart start,
    1591      851635 :                                                           const CharT* numStart,
    1592     1703270 :                                                           Modifier modifier, TokenKind* out)
    1593             : {
    1594       16094 :     // Run the bad-token code for every path out of this function except the
    1595       16094 :     // one success-case.
    1596             :     auto noteBadToken = MakeScopeExit([this]() {
    1597             :         this->badToken();
    1598             :     });
    1599        8277 : 
    1600             :     // Consume integral component digits.
    1601             :     while (IsAsciiDigit(unit))
    1602             :         unit = getCodeUnit();
    1603             : 
    1604             :     // Numbers contain no escapes, so we can read directly from |sourceUnits|.
    1605           0 :     double dval;
    1606           0 :     DecimalPoint decimalPoint = NoDecimal;
    1607       24831 :     if (unit != '.' && unit != 'e' && unit != 'E') {
    1608             :         // NOTE: |unit| may be EOF here.
    1609             :         ungetCodeUnit(unit);
    1610       16046 : 
    1611        7769 :         // Most numbers are pure decimal integers without fractional component
    1612             :         // or exponential notation.  Handle that with optimized code.
    1613             :         if (!GetDecimalInteger(anyCharsAccess().cx, numStart, sourceUnits.addressOfNextCodeUnit(),
    1614             :                                &dval))
    1615           0 :         {
    1616        8277 :             return false;
    1617           1 :         }
    1618             :     } else {
    1619             :         // Consume any decimal dot and fractional component.
    1620             :         if (unit == '.') {
    1621           1 :             decimalPoint = HasDecimal;
    1622             :             do {
    1623             :                 unit = getCodeUnit();
    1624             :             } while (IsAsciiDigit(unit));
    1625             :         }
    1626             : 
    1627             :         // Consume any exponential notation.
    1628          55 :         if (unit == 'e' || unit == 'E') {
    1629             :             unit = getCodeUnit();
    1630         130 :             if (unit == '+' || unit == '-')
    1631           0 :                 unit = getCodeUnit();
    1632             : 
    1633             :             // Exponential notation must contain at least one digit.
    1634             :             if (!IsAsciiDigit(unit)) {
    1635             :                 ungetCodeUnit(unit);
    1636          55 :                 error(JSMSG_MISSING_EXPONENT);
    1637           0 :                 return false;
    1638           0 :             }
    1639           0 : 
    1640             :             // Consume exponential digits.
    1641             :             do {
    1642           0 :                 unit = getCodeUnit();
    1643           0 :             } while (IsAsciiDigit(unit));
    1644           0 :         }
    1645           0 : 
    1646             :         ungetCodeUnit(unit);
    1647             : 
    1648             :         const CharT* dummy;
    1649           0 :         if (!js_strtod(anyCharsAccess().cx, numStart, sourceUnits.addressOfNextCodeUnit(), &dummy,
    1650           0 :                        &dval))
    1651             :         {
    1652             :            return false;
    1653             :         }
    1654          55 :     }
    1655             : 
    1656             :     // Number followed by IdentifierStart is an error.  (This is the only place
    1657           0 :     // in ECMAScript where token boundary is inadequate to properly separate
    1658             :     // two tokens, necessitating this unaesthetic lookahead.)
    1659             :     if (unit != EOF) {
    1660             :         if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
    1661             :             if (unicode::IsIdentifierStart(char16_t(unit))) {
    1662             :                 error(JSMSG_IDSTART_AFTER_NUMBER);
    1663             :                 return false;
    1664             :             }
    1665             :         } else {
    1666             :             int32_t codePoint;
    1667           0 :             if (!getCodePoint(&codePoint))
    1668           0 :                 return false;
    1669           0 : 
    1670           0 :             ungetNonAsciiNormalizedCodePoint(codePoint);
    1671             : 
    1672             :             if (unicode::IsIdentifierStart(uint32_t(codePoint))) {
    1673           0 :                 error(JSMSG_IDSTART_AFTER_NUMBER);
    1674             :                 return false;
    1675             :             }
    1676       16550 :         }
    1677             :     }
    1678             : 
    1679        8275 :     noteBadToken.release();
    1680             :     newNumberToken(dval, decimalPoint, start, modifier, out);
    1681           0 :     return true;
    1682             : }
    1683           0 : 
    1684             : template<typename CharT, class AnyCharsAccess>
    1685           0 : MOZ_MUST_USE bool
    1686           0 : TokenStreamSpecific<CharT, AnyCharsAccess>::regexpLiteral(TokenStart start, TokenKind* out)
    1687             : {
    1688             :     MOZ_ASSERT(sourceUnits.previousCodeUnit() == '/');
    1689             :     tokenbuf.clear();
    1690             : 
    1691        8275 :     auto ProcessNonAsciiCodePoint = [this](CharT lead) {
    1692             :         int32_t codePoint;
    1693             :         if (!this->getNonAsciiCodePoint(lead, &codePoint))
    1694             :             return false;
    1695           0 : 
    1696        8277 :         if (codePoint == '\n') {
    1697           0 :             this->ungetLineTerminator();
    1698             :             this->reportError(JSMSG_UNTERMINATED_REGEXP);
    1699             :             return false;
    1700             :         }
    1701             : 
    1702           0 :         return this->appendCodePointToTokenbuf(codePoint);
    1703             :     };
    1704           0 : 
    1705           0 :     auto ReportUnterminatedRegExp = [this](CharT unit) {
    1706             :         this->ungetCodeUnit(unit);
    1707           0 :         this->error(JSMSG_UNTERMINATED_REGEXP);
    1708             :     };
    1709           0 : 
    1710             :     bool inCharClass = false;
    1711             :     do {
    1712           0 :         int32_t unit = getCodeUnit();
    1713           0 :         if (unit == EOF) {
    1714           0 :             ReportUnterminatedRegExp(unit);
    1715           0 :             return badToken();
    1716             :         }
    1717             : 
    1718           0 :         if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
    1719         296 :             if (unit == '\\')  {
    1720             :                 if (!tokenbuf.append(unit))
    1721             :                     return badToken();
    1722           0 : 
    1723           0 :                 unit = getCodeUnit();
    1724         296 :                 if (unit == EOF) {
    1725             :                     ReportUnterminatedRegExp(unit);
    1726         296 :                     return badToken();
    1727             :                 }
    1728           0 : 
    1729        4508 :                 // Fallthrough only handles ASCII code points, so
    1730           0 :                 // deal with non-ASCII and skip everything else.
    1731           0 :                 if (MOZ_UNLIKELY(!isAsciiCodePoint(unit))) {
    1732             :                     if (!ProcessNonAsciiCodePoint(unit))
    1733             :                         return badToken();
    1734           0 : 
    1735        4508 :                     continue;
    1736         378 :                 }
    1737           0 :             } else if (unit == '[') {
    1738             :                 inCharClass = true;
    1739         756 :             } else if (unit == ']') {
    1740           0 :                 inCharClass = false;
    1741           0 :             } else if (unit == '/' && !inCharClass) {
    1742           0 :                 // For IE compat, allow unescaped / in char classes.
    1743             :                 break;
    1744             :             }
    1745             : 
    1746             :             if (unit == '\r' || unit == '\n') {
    1747           0 :                 ReportUnterminatedRegExp(unit);
    1748           0 :                 return badToken();
    1749           0 :             }
    1750             : 
    1751           0 :             if (!tokenbuf.append(unit))
    1752             :                 return badToken();
    1753           0 :         } else {
    1754             :             if (!ProcessNonAsciiCodePoint(unit))
    1755           0 :                 return badToken();
    1756             :         }
    1757        3833 :     } while (true);
    1758             : 
    1759             :     int32_t unit;
    1760             :     RegExpFlag reflags = NoFlags;
    1761             :     while (true) {
    1762           0 :         RegExpFlag flag;
    1763           0 :         unit = getCodeUnit();
    1764           0 :         if (unit == 'g')
    1765             :             flag = GlobalFlag;
    1766             :         else if (unit == 'i')
    1767        4212 :             flag = IgnoreCaseFlag;
    1768           0 :         else if (unit == 'm')
    1769             :             flag = MultilineFlag;
    1770           0 :         else if (unit == 'y')
    1771           0 :             flag = StickyFlag;
    1772             :         else if (unit == 'u')
    1773             :             flag = UnicodeFlag;
    1774             :         else if (IsAsciiAlpha(unit))
    1775             :             flag = NoFlags;
    1776         296 :         else
    1777         158 :             break;
    1778             : 
    1779         908 :         if ((reflags & flag) || flag == NoFlags) {
    1780         454 :             ungetCodeUnit(unit);
    1781             :             char buf[2] = { char(unit), '\0' };
    1782           0 :             error(JSMSG_BAD_REGEXP_FLAG, buf);
    1783             :             return badToken();
    1784         301 :         }
    1785             : 
    1786         298 :         reflags = RegExpFlag(reflags | flag);
    1787             :     }
    1788           0 :     ungetCodeUnit(unit);
    1789             : 
    1790           0 :     newRegExpToken(reflags, start, out);
    1791             :     return true;
    1792             : }
    1793             : 
    1794             : template<typename CharT, class AnyCharsAccess>
    1795         158 : MOZ_MUST_USE bool
    1796           0 : TokenStreamSpecific<CharT, AnyCharsAccess>::getTokenInternal(TokenKind* const ttp,
    1797           0 :                                                              const Modifier modifier)
    1798           0 : {
    1799           0 :     // Assume we'll fail: success cases will overwrite this.
    1800             : #ifdef DEBUG
    1801             :     *ttp = TokenKind::Limit;
    1802           0 : #endif
    1803             :     MOZ_MAKE_MEM_UNDEFINED(ttp, sizeof(*ttp));
    1804           0 : 
    1805             :     // Check if in the middle of a template string. Have to get this out of
    1806           0 :     // the way first.
    1807           0 :     if (MOZ_UNLIKELY(modifier == TemplateTail))
    1808             :         return getStringOrTemplateToken('`', modifier, ttp);
    1809             : 
    1810             :     // This loop runs more than once only when whitespace or comments are
    1811             :     // encountered.
    1812           0 :     do {
    1813             :         int32_t unit = getCodeUnit();
    1814             :         if (MOZ_UNLIKELY(unit == EOF)) {
    1815             :             MOZ_ASSERT(sourceUnits.atEnd());
    1816             :             anyCharsAccess().flags.isEOF = true;
    1817           0 :             TokenStart start(sourceUnits, 0);
    1818             :             newSimpleToken(TokenKind::Eof, start, modifier, ttp);
    1819             :             return true;
    1820             :         }
    1821             : 
    1822             :         if (MOZ_UNLIKELY(!isAsciiCodePoint(unit))) {
    1823      883981 :             // Non-ASCII code points can only be identifiers or whitespace.
    1824        1086 :             // It would be nice to compute these *after* discarding whitespace,
    1825             :             // but IN A WORLD where |unicode::IsSpaceOrBOM2| requires consuming
    1826             :             // a variable number of code points, it's easier to assume it's an
    1827             :             // identifier and maybe do a little wasted work, than to unget and
    1828             :             // compute and reget if whitespace.
    1829           0 :             TokenStart start(sourceUnits, -1);
    1830           0 :             const CharT* identStart = sourceUnits.addressOfNextCodeUnit() - 1;
    1831           0 : 
    1832        2592 :             int32_t codePoint;
    1833             :             if (!getNonAsciiCodePoint(unit, &codePoint))
    1834             :                 return badToken();
    1835             : 
    1836     4480494 :             if (unicode::IsSpaceOrBOM2(codePoint)) {
    1837             :                 if (codePoint == unicode::LINE_SEPARATOR || codePoint == unicode::PARA_SEPARATOR) {
    1838             :                     if (!updateLineInfoForEOL())
    1839             :                         return badToken();
    1840             : 
    1841     2240247 :                     anyCharsAccess().updateFlagsForEOL();
    1842           0 :                 }
    1843           0 : 
    1844             :                 continue;
    1845             :             }
    1846           0 : 
    1847           0 :             static_assert(isAsciiCodePoint('$'),
    1848             :                           "IdentifierStart contains '$', but as "
    1849           0 :                           "!IsUnicodeIDStart('$'), ensure that '$' is never "
    1850             :                           "handled here");
    1851             :             static_assert(isAsciiCodePoint('_'),
    1852           0 :                           "IdentifierStart contains '_', but as "
    1853             :                           "!IsUnicodeIDStart('_'), ensure that '_' is never "
    1854             :                           "handled here");
    1855             : 
    1856             :             if (unicode::IsUnicodeIDStart(uint32_t(codePoint)))
    1857           0 :                 return identifierName(start, identStart, IdentifierEscapes::None, modifier, ttp);
    1858           0 : 
    1859             :             ungetCodePointIgnoreEOL(codePoint);
    1860             :             error(JSMSG_ILLEGAL_CHARACTER);
    1861             :             return badToken();
    1862             :         } // !isAsciiCodePoint(unit)
    1863             : 
    1864             :         // Get the token kind, based on the first char.  The ordering of c1kind
    1865             :         // comparison is based on the frequency of tokens in real code:
    1866             :         // Parsemark (which represents typical JS code on the web) and the
    1867             :         // Unreal demo (which represents asm.js code).
    1868           0 :         //
    1869           0 :         //                  Parsemark   Unreal
    1870             :         //  OneChar         32.9%       39.7%
    1871           0 :         //  Space           25.0%        0.6%
    1872           0 :         //  Ident           19.2%       36.4%
    1873             :         //  Dec              7.2%        5.1%
    1874             :         //  String           7.9%        0.0%
    1875           0 :         //  EOL              1.7%        0.0%
    1876           0 :         //  ZeroDigit        0.4%        4.9%
    1877             :         //  Other            5.7%       13.3%
    1878           0 :         //
    1879           0 :         // The ordering is based mostly only Parsemark frequencies, with Unreal
    1880           0 :         // frequencies used to break close categories (e.g. |Dec| and
    1881             :         // |String|).  |Other| is biggish, but no other token kind is common
    1882             :         // enough for it to be worth adding extra values to FirstCharKind.
    1883             :         FirstCharKind c1kind = FirstCharKind(firstCharKinds[unit]);
    1884             : 
    1885             :         // Look for an unambiguous single-char token.
    1886             :         //
    1887             :         if (c1kind <= OneChar_Max) {
    1888             :             TokenStart start(sourceUnits, -1);
    1889             :             newSimpleToken(TokenKind(c1kind), start, modifier, ttp);
    1890             :             return true;
    1891             :         }
    1892             : 
    1893             :         // Skip over non-EOL whitespace chars.
    1894             :         //
    1895             :         if (c1kind == Space)
    1896             :             continue;
    1897             : 
    1898             :         // Look for an identifier.
    1899             :         //
    1900             :         if (c1kind == Ident) {
    1901             :             TokenStart start(sourceUnits, -1);
    1902     2240247 :             return identifierName(start, sourceUnits.addressOfNextCodeUnit() - 1,
    1903             :                                   IdentifierEscapes::None, modifier, ttp);
    1904             :         }
    1905             : 
    1906     2240247 :         // Look for a decimal number.
    1907      710762 :         //
    1908      710765 :         if (c1kind == Dec) {
    1909             :             TokenStart start(sourceUnits, -1);
    1910             :             const CharT* numStart = sourceUnits.addressOfNextCodeUnit() - 1;
    1911             :             return decimalNumber(unit, start, numStart, modifier, ttp);
    1912             :         }
    1913             : 
    1914           0 :         // Look for a string or a template string.
    1915             :         //
    1916             :         if (c1kind == String)
    1917             :             return getStringOrTemplateToken(static_cast<char>(unit), modifier, ttp);
    1918             : 
    1919           0 :         // Skip over EOL chars, updating line state along the way.
    1920           0 :         //
    1921           0 :         if (c1kind == EOL) {
    1922      345224 :             // If it's a \r\n sequence, consume it as a single EOL.
    1923             :             if (unit == '\r' && !sourceUnits.atEnd())
    1924             :                 sourceUnits.matchCodeUnit('\n');
    1925             : 
    1926             :             if (!updateLineInfoForEOL())
    1927      385122 :                 return badToken();
    1928        9612 : 
    1929        4806 :             anyCharsAccess().updateFlagsForEOL();
    1930           0 :             continue;
    1931             :         }
    1932             : 
    1933             :         // From a '0', look for a hexadecimal, binary, octal, or "noctal" (a
    1934             :         // number starting with '0' that contains '8' or '9' and is treated as
    1935      380316 :         // decimal) number.
    1936           0 :         //
    1937             :         if (c1kind == ZeroDigit) {
    1938             :             TokenStart start(sourceUnits, -1);
    1939             : 
    1940      349322 :             int radix;
    1941             :             const CharT* numStart;
    1942      182640 :             unit = getCodeUnit();
    1943           0 :             if (unit == 'x' || unit == 'X') {
    1944             :                 radix = 16;
    1945           0 :                 unit = getCodeUnit();
    1946           0 :                 if (!JS7_ISHEX(unit)) {
    1947             :                     // NOTE: |unit| may be EOF here.
    1948           0 :                     ungetCodeUnit(unit);
    1949             :                     error(JSMSG_MISSING_HEXDIGITS);
    1950             :                     return badToken();
    1951             :                 }
    1952             : 
    1953             :                 // one past the '0x'
    1954             :                 numStart = sourceUnits.addressOfNextCodeUnit() - 1;
    1955             : 
    1956           0 :                 while (JS7_ISHEX(unit))
    1957        7368 :                     unit = getCodeUnit();
    1958             :             } else if (unit == 'b' || unit == 'B') {
    1959             :                 radix = 2;
    1960             :                 unit = getCodeUnit();
    1961        7368 :                 if (unit != '0' && unit != '1') {
    1962        3684 :                     // NOTE: |unit| may be EOF here.
    1963           0 :                     ungetCodeUnit(unit);
    1964           0 :                     error(JSMSG_MISSING_BINARY_DIGITS);
    1965           0 :                     return badToken();
    1966           0 :                 }
    1967           0 : 
    1968           0 :                 // one past the '0b'
    1969             :                 numStart = sourceUnits.addressOfNextCodeUnit() - 1;
    1970             : 
    1971             :                 while (unit == '0' || unit == '1')
    1972         211 :                     unit = getCodeUnit();
    1973             :             } else if (unit == 'o' || unit == 'O') {
    1974        1898 :                 radix = 8;
    1975         738 :                 unit = getCodeUnit();
    1976           0 :                 if (!JS7_ISOCT(unit)) {
    1977          12 :                     // NOTE: |unit| may be EOF here.
    1978          24 :                     ungetCodeUnit(unit);
    1979          12 :                     error(JSMSG_MISSING_OCTAL_DIGITS);
    1980           0 :                     return badToken();
    1981           0 :                 }
    1982           0 : 
    1983             :                 // one past the '0o'
    1984             :                 numStart = sourceUnits.addressOfNextCodeUnit() - 1;
    1985             : 
    1986          12 :                 while (JS7_ISOCT(unit))
    1987             :                     unit = getCodeUnit();
    1988          36 :             } else if (IsAsciiDigit(unit)) {
    1989          24 :                 radix = 8;
    1990        3461 :                 // one past the '0'
    1991           0 :                 numStart = sourceUnits.addressOfNextCodeUnit() - 1;
    1992           0 : 
    1993           3 :                 do {
    1994           0 :                     // Octal integer literals are not permitted in strict mode
    1995           0 :                     // code.
    1996           0 :                     if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL))
    1997             :                         return badToken();
    1998             : 
    1999             :                     // Outside strict mode, we permit 08 and 09 as decimal
    2000           0 :                     // numbers, which makes our behaviour a superset of the
    2001             :                     // ECMA numeric grammar. We might not always be so
    2002           0 :                     // permissive, so we warn about it.
    2003           0 :                     if (unit >= '8') {
    2004           0 :                         if (!warning(JSMSG_BAD_OCTAL, unit == '8' ? "08" : "09"))
    2005           0 :                             return badToken();
    2006             : 
    2007           0 :                         // Use the decimal scanner for the rest of the number.
    2008             :                         return decimalNumber(unit, start, numStart, modifier, ttp);
    2009           0 :                     }
    2010             : 
    2011             :                     unit = getCodeUnit();
    2012           0 :                 } while (IsAsciiDigit(unit));
    2013           0 :             } else {
    2014             :                 // '0' not followed by [XxBbOo0-9];  scan as a decimal number.
    2015             :                 numStart = sourceUnits.addressOfNextCodeUnit() - 1;
    2016             : 
    2017             :                 // NOTE: |unit| may be EOF here.  (This is permitted by case #3
    2018             :                 //       in TokenStream.h docs for this function.)
    2019           0 :                 return decimalNumber(unit, start, numStart, modifier, ttp);
    2020           0 :             }
    2021           0 : 
    2022             :             // Check for an identifier-start code point immediately after the
    2023             :             // number.  This must be an error, and somewhat surprisingly, if
    2024           0 :             // a check doesn't happen here, it never will.
    2025             :             if (MOZ_UNLIKELY(unit == EOF)) {
    2026             :                 // Technically this isn't necessary -- ungetting EOF does
    2027           0 :                 // nothing -- but it's conceptually nicer if we consider all
    2028           0 :                 // gets requiring an unget to revert them.
    2029             :                 ungetCodeUnit(unit);
    2030             :             } else if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
    2031        3458 :                 ungetCodeUnit(unit);
    2032             : 
    2033        3458 :                 if (unicode::IsIdentifierStart(char16_t(unit))) {
    2034             :                     error(JSMSG_IDSTART_AFTER_NUMBER);
    2035         226 :                     return badToken();
    2036             :                 }
    2037           0 :             } else {
    2038           0 :                 int32_t codePoint;
    2039           0 :                 if (!getNonAsciiCodePoint(unit, &codePoint))
    2040           0 :                     return badToken();
    2041             : 
    2042             :                 ungetCodePointIgnoreEOL(codePoint);
    2043         226 :                 if (codePoint == unicode::LINE_SEPARATOR || codePoint == unicode::PARA_SEPARATOR)
    2044             :                     anyCharsAccess().undoInternalUpdateLineInfoForEOL();
    2045             : 
    2046         452 :                 if (unicode::IsIdentifierStart(uint32_t(codePoint))) {
    2047             :                     error(JSMSG_IDSTART_AFTER_NUMBER);
    2048             :                     return badToken();
    2049         226 :                 }
    2050             :             }
    2051           0 : 
    2052             :             double dval;
    2053           0 :             const char16_t* dummy;
    2054             :             if (!GetPrefixInteger(anyCharsAccess().cx, numStart,
    2055             :                                   sourceUnits.addressOfNextCodeUnit(), radix, &dummy, &dval))
    2056           0 :             {
    2057           0 :                 return badToken();
    2058             :             }
    2059             : 
    2060             :             newNumberToken(dval, NoDecimal, start, modifier, ttp);
    2061             :             return true;
    2062           0 :         }
    2063             : 
    2064             :         MOZ_ASSERT(c1kind == Other);
    2065             : 
    2066             :         // This handles everything else.  Simple tokens distinguished solely by
    2067             :         // TokenKind should set |simpleKind| and break, to share simple-token
    2068           0 :         // creation code for all such tokens.  All other tokens must be handled
    2069             :         // by returning (or by continuing from the loop enclosing this).
    2070             :         //
    2071           0 :         TokenStart start(sourceUnits, -1);
    2072             :         TokenKind simpleKind;
    2073             : #ifdef DEBUG
    2074         226 :         simpleKind = TokenKind::Limit; // sentinel value for code after switch
    2075           0 : #endif
    2076             :         switch (static_cast<CharT>(unit)) {
    2077             :           case '.':
    2078      162998 :             unit = getCodeUnit();
    2079             :             if (IsAsciiDigit(unit)) {
    2080             :                 return decimalNumber('.', start, sourceUnits.addressOfNextCodeUnit() - 2, modifier,
    2081             :                                      ttp);
    2082             :             }
    2083             : 
    2084             :             if (unit == '.') {
    2085           0 :                 if (matchCodeUnit('.')) {
    2086             :                     simpleKind = TokenKind::TripleDot;
    2087             :                     break;
    2088           1 :                 }
    2089             :             }
    2090           0 : 
    2091             :             // NOTE: |unit| may be EOF here.  A stray '.' at EOF would be an
    2092           1 :             //       error, but subsequent code will handle it.
    2093      168712 :             ungetCodeUnit(unit);
    2094           0 : 
    2095           1 :             simpleKind = TokenKind::Dot;
    2096             :             break;
    2097             : 
    2098           0 :           case '=':
    2099         428 :             if (matchCodeUnit('='))
    2100             :                 simpleKind = matchCodeUnit('=') ? TokenKind::StrictEq : TokenKind::Eq;
    2101             :             else if (matchCodeUnit('>'))
    2102             :                 simpleKind = TokenKind::Arrow;
    2103             :             else
    2104       83916 :                 simpleKind = TokenKind::Assign;
    2105             :             break;
    2106           0 : 
    2107           0 :           case '+':
    2108             :             if (matchCodeUnit('+'))
    2109             :                 simpleKind = TokenKind::Inc;
    2110       35404 :             else
    2111           0 :                 simpleKind = matchCodeUnit('=') ? TokenKind::AddAssign : TokenKind::Add;
    2112           1 :             break;
    2113             : 
    2114             :           case '\\': {
    2115       27356 :             uint32_t codePoint;
    2116             :             if (uint32_t escapeLength = matchUnicodeEscapeIdStart(&codePoint)) {
    2117             :                 return identifierName(start,
    2118             :                                       sourceUnits.addressOfNextCodeUnit() - escapeLength - 1,
    2119           0 :                                       IdentifierEscapes::SawUnicodeEscape, modifier, ttp);
    2120             :             }
    2121             : 
    2122        4924 :             // We could point "into" a mistyped escape, e.g. for "\u{41H}" we
    2123             :             // could point at the 'H'.  But we don't do that now, so the
    2124             :             // character after the '\' isn't necessarily bad, so just point at
    2125             :             // the start of the actually-invalid escape.
    2126             :             ungetCodeUnit('\\');
    2127           0 :             error(JSMSG_BAD_ESCAPE);
    2128           0 :             return badToken();
    2129           0 :           }
    2130           0 : 
    2131             :           case '|':
    2132             :             if (matchCodeUnit('|'))
    2133             :                 simpleKind = TokenKind::Or;
    2134             : #ifdef ENABLE_PIPELINE_OPERATOR
    2135             :             else if (matchCodeUnit('>'))
    2136             :                 simpleKind = TokenKind::Pipeline;
    2137           0 : #endif
    2138           0 :             else
    2139           0 :                 simpleKind = matchCodeUnit('=') ? TokenKind::BitOrAssign : TokenKind::BitOr;
    2140             :             break;
    2141             : 
    2142             :           case '^':
    2143           0 :             simpleKind = matchCodeUnit('=') ? TokenKind::BitXorAssign : TokenKind::BitXor;
    2144             :             break;
    2145             : 
    2146             :           case '&':
    2147             :             if (matchCodeUnit('&'))
    2148             :                 simpleKind = TokenKind::And;
    2149             :             else
    2150         291 :                 simpleKind = matchCodeUnit('=') ? TokenKind::BitAndAssign : TokenKind::BitAnd;
    2151             :             break;
    2152             : 
    2153             :           case '!':
    2154           9 :             if (matchCodeUnit('='))
    2155             :                 simpleKind = matchCodeUnit('=') ? TokenKind::StrictNe : TokenKind::Ne;
    2156             :             else
    2157             :                 simpleKind = TokenKind::Not;
    2158           0 :             break;
    2159             : 
    2160             :           case '<':
    2161           0 :             if (anyCharsAccess().options().allowHTMLComments) {
    2162             :                 // Treat HTML begin-comment as comment-till-end-of-line.
    2163             :                 if (matchCodeUnit('!')) {
    2164             :                     if (matchCodeUnit('-')) {
    2165        6765 :                         if (matchCodeUnit('-')) {
    2166        1697 :                             consumeRestOfSingleLineComment();
    2167             :                             continue;
    2168             :                         }
    2169             :                         ungetCodeUnit('-');
    2170             :                     }
    2171             :                     ungetCodeUnit('!');
    2172        1924 :                 }
    2173             :             }
    2174         962 :             if (matchCodeUnit('<'))
    2175           0 :                 simpleKind = matchCodeUnit('=') ? TokenKind::LshAssign : TokenKind::Lsh;
    2176           0 :             else
    2177           0 :                 simpleKind = matchCodeUnit('=') ? TokenKind::Le : TokenKind::Lt;
    2178           0 :             break;
    2179             : 
    2180           0 :           case '>':
    2181             :             if (matchCodeUnit('>')) {
    2182           0 :                 if (matchCodeUnit('>'))
    2183             :                     simpleKind = matchCodeUnit('=') ? TokenKind::UrshAssign : TokenKind::Ursh;
    2184             :                 else
    2185           0 :                     simpleKind = matchCodeUnit('=') ? TokenKind::RshAssign : TokenKind::Rsh;
    2186           0 :             } else {
    2187             :                 simpleKind = matchCodeUnit('=') ? TokenKind::Ge : TokenKind::Gt;
    2188         923 :             }
    2189             :             break;
    2190             : 
    2191             :           case '*':
    2192           0 :             if (matchCodeUnit('*'))
    2193          26 :                 simpleKind = matchCodeUnit('=') ? TokenKind::PowAssign : TokenKind::Pow;
    2194           7 :             else
    2195             :                 simpleKind = matchCodeUnit('=') ? TokenKind::MulAssign : TokenKind::Mul;
    2196           0 :             break;
    2197             : 
    2198         778 :           case '/':
    2199             :             // Look for a single-line comment.
    2200             :             if (matchCodeUnit('/')) {
    2201             :                 unit = getCodeUnit();
    2202             :                 if (unit == '@' || unit == '#') {
    2203         365 :                     bool shouldWarn = unit == '@';
    2204           0 :                     if (!getDirectives(false, shouldWarn))
    2205             :                         return false;
    2206         360 :                 } else {
    2207             :                     // NOTE: |unit| may be EOF here.
    2208             :                     ungetCodeUnit(unit);
    2209             :                 }
    2210             : 
    2211           0 :                 consumeRestOfSingleLineComment();
    2212       32188 :                 continue;
    2213       16094 :             }
    2214          94 : 
    2215          94 :             // Look for a multi-line comment.
    2216             :             if (matchCodeUnit('*')) {
    2217             :                 TokenStreamAnyChars& anyChars = anyCharsAccess();
    2218           0 :                 unsigned linenoBefore = anyChars.lineno;
    2219             : 
    2220             :                 do {
    2221       16094 :                     int32_t unit = getCodeUnit();
    2222       16094 :                     if (unit == EOF) {
    2223             :                         reportError(JSMSG_UNTERMINATED_COMMENT);
    2224             :                         return badToken();
    2225             :                     }
    2226        5822 : 
    2227       10778 :                     if (unit == '*' && matchCodeUnit('/'))
    2228        5389 :                         break;
    2229             : 
    2230             :                     if (unit == '@' || unit == '#') {
    2231     2470888 :                         bool shouldWarn = unit == '@';
    2232           0 :                         if (!getDirectives(true, shouldWarn))
    2233           0 :                             return badToken();
    2234           0 :                     } else if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
    2235             :                         int32_t codePoint;
    2236             :                         if (!getFullAsciiCodePoint(unit, &codePoint))
    2237     1235444 :                             return badToken();
    2238             :                     } else {
    2239             :                         int32_t codePoint;
    2240           0 :                         if (!getNonAsciiCodePoint(unit, &codePoint))
    2241           0 :                             return badToken();
    2242           0 :                     }
    2243           0 :                 } while (true);
    2244           0 : 
    2245             :                 if (linenoBefore != anyChars.lineno)
    2246     1224577 :                     anyChars.updateFlagsForEOL();
    2247           0 : 
    2248             :                 continue;
    2249             :             }
    2250          81 : 
    2251           0 :             // Look for a regexp.
    2252             :             if (modifier == Operand)
    2253             :                 return regexpLiteral(start, ttp);
    2254             : 
    2255           0 :             simpleKind = matchCodeUnit('=') ? TokenKind::DivAssign : TokenKind::Div;
    2256             :             break;
    2257             : 
    2258             :           case '%':
    2259             :             simpleKind = matchCodeUnit('=') ? TokenKind::ModAssign : TokenKind::Mod;
    2260             :             break;
    2261             : 
    2262           1 :           case '-':
    2263           1 :             if (matchCodeUnit('-')) {
    2264             :                 if (anyCharsAccess().options().allowHTMLComments &&
    2265         138 :                     !anyCharsAccess().flags.isDirtyLine)
    2266             :                 {
    2267             :                     if (matchCodeUnit('>')) {
    2268             :                         consumeRestOfSingleLineComment();
    2269           1 :                         continue;
    2270             :                     }
    2271             :                 }
    2272             : 
    2273           0 :                 simpleKind = TokenKind::Dec;
    2274           0 :             } else {
    2275           0 :                 simpleKind = matchCodeUnit('=') ? TokenKind::SubAssign : TokenKind::Sub;
    2276             :             }
    2277          10 :             break;
    2278           0 : 
    2279           0 :           default:
    2280             :             // We consumed a bad ASCII code point/unit.  Put it back so the
    2281             :             // error location is the bad code point.
    2282             :             ungetCodeUnit(unit);
    2283             :             error(JSMSG_ILLEGAL_CHARACTER);
    2284             :             return badToken();
    2285         975 :         } // switch (static_cast<CharT>(unit))
    2286             : 
    2287             :         MOZ_ASSERT(simpleKind != TokenKind::Limit,
    2288             :                    "switch-statement should have set |simpleKind| before "
    2289             :                    "breaking");
    2290             : 
    2291             :         newSimpleToken(simpleKind, start, modifier, ttp);
    2292           0 :         return true;
    2293           0 :     } while (true);
    2294           0 : }
    2295             : 
    2296             : template<typename CharT, class AnyCharsAccess>
    2297           0 : bool
    2298             : TokenStreamSpecific<CharT, AnyCharsAccess>::getStringOrTemplateToken(char untilChar,
    2299             :                                                                      Modifier modifier,
    2300             :                                                                      TokenKind* out)
    2301           0 : {
    2302      141207 :     MOZ_ASSERT(untilChar == '\'' || untilChar == '"' || untilChar == '`',
    2303             :                "unexpected string/template literal delimiter");
    2304             : 
    2305             :     bool parsingTemplate = (untilChar == '`');
    2306             :     bool templateHead = false;
    2307             : 
    2308           0 :     TokenStart start(sourceUnits, -1);
    2309             :     tokenbuf.clear();
    2310             : 
    2311             :     // Run the bad-token code for every path out of this function except the
    2312       32080 :     // one success-case.
    2313             :     auto noteBadToken = MakeScopeExit([this]() {
    2314             :         this->badToken();
    2315             :     });
    2316             : 
    2317       32080 :     auto ReportPrematureEndOfLiteral = [this, untilChar](unsigned errnum) {
    2318       32080 :         // Unicode separators aren't end-of-line in template or (as of
    2319             :         // recently) string literals, so this assertion doesn't allow them.
    2320           0 :         MOZ_ASSERT(this->sourceUnits.atEnd() ||
    2321           0 :                    this->sourceUnits.peekCodeUnit() == '\r' ||
    2322             :                    this->sourceUnits.peekCodeUnit() == '\n',
    2323             :                    "must be parked at EOF or EOL to call this function");
    2324             : 
    2325           0 :         // The various errors reported here include language like "in a ''
    2326           0 :         // literal" or similar, with '' being '', "", or `` as appropriate.
    2327           0 :         const char delimiters[] = { untilChar, untilChar, '\0' };
    2328             : 
    2329             :         this->error(errnum, delimiters);
    2330             :         return;
    2331             :     };
    2332     1191732 : 
    2333      564875 :     // We need to detect any of these chars:  " or ', \n (or its
    2334           0 :     // equivalents), \\, EOF.  Because we detect EOL sequences here and
    2335           0 :     // put them back immediately, we can use getCodeUnit().
    2336           0 :     int32_t unit;
    2337             :     while ((unit = getCodeUnit()) != untilChar) {
    2338             :         if (unit == EOF) {
    2339             :             ReportPrematureEndOfLiteral(JSMSG_EOF_BEFORE_END_OF_LITERAL);
    2340           0 :             return false;
    2341             :         }
    2342             : 
    2343             :         // Non-ASCII code points are always directly appended -- even
    2344             :         // U+2028 LINE SEPARATOR and U+2029 PARAGRAPH SEPARATOR that are
    2345           0 :         // ordinarily LineTerminatorSequences.  (They contribute their literal
    2346             :         // values to template and [as of recently] string literals, but they're
    2347             :         // line terminators when computing line/column coordinates.)  Handle
    2348           0 :         // the non-ASCI case early for readability.
    2349           0 :         if (MOZ_UNLIKELY(!isAsciiCodePoint(unit))) {
    2350           0 :             static_assert(mozilla::IsSame<CharT, char16_t>::value,
    2351             :                           "need a getNonAsciiCodePoint that doesn't normalize "
    2352             :                           "LineTerminatorSequences to correctly handle UTF-8");
    2353             : 
    2354           0 :             int32_t codePoint;
    2355           0 :             if (unit == unicode::LINE_SEPARATOR || unit == unicode::PARA_SEPARATOR) {
    2356           0 :                 if (!updateLineInfoForEOL())
    2357         282 :                     return false;
    2358          19 : 
    2359           8 :                 anyCharsAccess().updateFlagsForEOL();
    2360           0 : 
    2361             :                 codePoint = unit;
    2362             :             } else {
    2363             :                 if (!getNonAsciiCodePoint(unit, &codePoint))
    2364             :                     return false;
    2365             :             }
    2366             : 
    2367             :             if (!appendCodePointToTokenbuf(codePoint))
    2368             :                 return false;
    2369        1136 : 
    2370           0 :             continue;
    2371           0 :         }
    2372           0 : 
    2373           0 :         if (unit == '\\') {
    2374           0 :             // When parsing templates, we don't immediately report errors for
    2375             :             // invalid escapes; these are handled by the parser.  We don't
    2376           0 :             // append to tokenbuf in those cases because it won't be read.
    2377           0 :             unit = getCodeUnit();
    2378           0 :             if (unit == EOF) {
    2379           0 :                 ReportPrematureEndOfLiteral(JSMSG_EOF_IN_ESCAPE_IN_LITERAL);
    2380           0 :                 return false;
    2381             :             }
    2382           0 : 
    2383           0 :             // Non-ASCII |unit| isn't handled by code after this, so dedicate
    2384             :             // an unlikely special-case to it and then continue.
    2385           0 :             if (MOZ_UNLIKELY(!isAsciiCodePoint(unit))) {
    2386           0 :                 int32_t codePoint;
    2387             :                 if (!getNonAsciiCodePoint(unit, &codePoint))
    2388           0 :                     return false;
    2389           0 : 
    2390           0 :                 // If we consumed U+2028 LINE SEPARATOR or U+2029 PARAGRAPH
    2391           0 :                 // SEPARATOR, they'll be normalized to '\n'.  '\' followed by
    2392           0 :                 // LineContinuation represents no code points, so don't append
    2393             :                 // in this case.
    2394           0 :                 if (codePoint != '\n') {
    2395           0 :                     if (!tokenbuf.append(unit))
    2396             :                         return false;
    2397           0 :                 }
    2398           0 : 
    2399             :                 continue;
    2400             :             }
    2401             : 
    2402             :             switch (static_cast<CharT>(unit)) {
    2403           0 :               case 'b': unit = '\b'; break;
    2404           0 :               case 'f': unit = '\f'; break;
    2405             :               case 'n': unit = '\n'; break;
    2406             :               case 'r': unit = '\r'; break;
    2407             :               case 't': unit = '\t'; break;
    2408           0 :               case 'v': unit = '\v'; break;
    2409             : 
    2410           0 :               case '\r':
    2411           0 :                 sourceUnits.matchCodeUnit('\n');
    2412             :                 MOZ_FALLTHROUGH;
    2413           0 :               case '\n': {
    2414           0 :                 // LineContinuation represents no code points.  We're manually
    2415             :                 // consuming a LineTerminatorSequence, so we must manually
    2416           0 :                 // update line/column info.
    2417           0 :                 if (!updateLineInfoForEOL())
    2418             :                     return false;
    2419             : 
    2420           0 :                 continue;
    2421           0 :               }
    2422           0 : 
    2423           0 :               // Unicode character specification.
    2424           0 :               case 'u': {
    2425             :                 int32_t c2 = getCodeUnit();
    2426           0 :                 if (c2 == EOF) {
    2427           0 :                     ReportPrematureEndOfLiteral(JSMSG_EOF_IN_ESCAPE_IN_LITERAL);
    2428             :                     return false;
    2429           0 :                 }
    2430           0 : 
    2431             :                 // First handle a delimited Unicode escape, e.g. \u{1F4A9}.
    2432             :                 if (c2 == '{') {
    2433             :                     uint32_t start = sourceUnits.offset() - 3;
    2434             :                     uint32_t code = 0;
    2435             :                     bool first = true;
    2436           0 :                     bool valid = true;
    2437           0 :                     do {
    2438             :                         int32_t u3 = getCodeUnit();
    2439           0 :                         if (u3 == EOF) {
    2440           0 :                             if (parsingTemplate) {
    2441           0 :                                 TokenStreamAnyChars& anyChars = anyCharsAccess();
    2442             :                                 anyChars.setInvalidTemplateEscape(start,
    2443           0 :                                                                   InvalidEscapeType::Unicode);
    2444             :                                 valid = false;
    2445           0 :                                 break;
    2446             :                             }
    2447         568 :                             reportInvalidEscapeError(start, InvalidEscapeType::Unicode);
    2448             :                             return false;
    2449             :                         }
    2450             :                         if (u3 == '}') {
    2451           0 :                             if (first) {
    2452           0 :                                 if (parsingTemplate) {
    2453             :                                     TokenStreamAnyChars& anyChars = anyCharsAccess();
    2454        2272 :                                     anyChars.setInvalidTemplateEscape(start,
    2455        1704 :                                                                       InvalidEscapeType::Unicode);
    2456        1704 :                                     valid = false;
    2457           0 :                                     break;
    2458           0 :                                 }
    2459             :                                 reportInvalidEscapeError(start, InvalidEscapeType::Unicode);
    2460           0 :                                 return false;
    2461           0 :                             }
    2462           0 :                             break;
    2463           0 :                         }
    2464           0 : 
    2465           0 :                         // Beware: |u3| may be a non-ASCII code point here; if
    2466             :                         // so it'll pass into this |if|-block.
    2467           0 :                         if (!JS7_ISHEX(u3)) {
    2468           0 :                             if (parsingTemplate) {
    2469             :                                 // We put the character back so that we read it
    2470           0 :                                 // on the next pass, which matters if it was
    2471             :                                 // '`' or '\'.
    2472             :                                 ungetCodeUnit(u3);
    2473             : 
    2474             :                                 TokenStreamAnyChars& anyChars = anyCharsAccess();
    2475             :                                 anyChars.setInvalidTemplateEscape(start,
    2476           6 :                                                                   InvalidEscapeType::Unicode);
    2477           0 :                                 valid = false;
    2478           2 :                                 break;
    2479             :                             }
    2480           0 :                             reportInvalidEscapeError(start, InvalidEscapeType::Unicode);
    2481           0 :                             return false;
    2482           0 :                         }
    2483           0 : 
    2484           0 :                         code = (code << 4) | JS7_UNHEX(u3);
    2485             :                         if (code > unicode::NonBMPMax) {
    2486           0 :                             if (parsingTemplate) {
    2487           0 :                                 TokenStreamAnyChars& anyChars = anyCharsAccess();
    2488             :                                 anyChars.setInvalidTemplateEscape(start + 3,
    2489           2 :                                                                   InvalidEscapeType::UnicodeOverflow);
    2490             :                                 valid = false;
    2491             :                                 break;
    2492             :                             }
    2493             :                             reportInvalidEscapeError(start + 3, InvalidEscapeType::UnicodeOverflow);
    2494         129 :                             return false;
    2495           1 :                         }
    2496             : 
    2497           7 :                         first = false;
    2498             :                     } while (true);
    2499             : 
    2500             :                     if (!valid)
    2501          14 :                         continue;
    2502           0 : 
    2503           0 :                     MOZ_ASSERT(code <= unicode::NonBMPMax);
    2504           0 :                     if (!appendCodePointToTokenbuf(code))
    2505             :                         return false;
    2506           0 : 
    2507             :                     continue;
    2508           0 :                 } // end of delimited Unicode escape handling
    2509             : 
    2510           0 :                 // Otherwise it must be a fixed-length \uXXXX Unicode escape.
    2511             :                 // If it isn't, this is usually an error -- but if this is a
    2512             :                 // template literal, we must defer error reporting because
    2513           7 :                 // malformed escapes are okay in *tagged* template literals.
    2514           0 :                 CharT cp[3];
    2515           0 :                 if (JS7_ISHEX(c2) &&
    2516           0 :                     sourceUnits.peekCodeUnits(3, cp) &&
    2517             :                     JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]))
    2518           0 :                 {
    2519           0 :                     unit = (JS7_UNHEX(c2) << 12) |
    2520           0 :                            (JS7_UNHEX(cp[0]) << 8) |
    2521           0 :                            (JS7_UNHEX(cp[1]) << 4) |
    2522           0 :                            JS7_UNHEX(cp[2]);
    2523             :                     sourceUnits.skipCodeUnits(3);
    2524             :                 } else {
    2525             :                     // Beware: |c2| may not be an ASCII code point here!
    2526             :                     ungetCodeUnit(c2);
    2527             :                     uint32_t start = sourceUnits.offset() - 2;
    2528           7 :                     if (parsingTemplate) {
    2529             :                         TokenStreamAnyChars& anyChars = anyCharsAccess();
    2530             :                         anyChars.setInvalidTemplateEscape(start, InvalidEscapeType::Unicode);
    2531             :                         continue;
    2532      563860 :                     }
    2533         937 :                     reportInvalidEscapeError(start, InvalidEscapeType::Unicode);
    2534             :                     return false;
    2535           0 :                 }
    2536           0 :                 break;
    2537           0 :               } // case 'u'
    2538             : 
    2539             :               // Hexadecimal character specification.
    2540             :               case 'x': {
    2541           1 :                 CharT cp[2];
    2542           0 :                 if (sourceUnits.peekCodeUnits(2, cp) &&
    2543             :                     JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]))
    2544             :                 {
    2545           0 :                     unit = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
    2546           0 :                     sourceUnits.skipCodeUnits(2);
    2547             :                 } else {
    2548             :                     uint32_t start = sourceUnits.offset() - 2;
    2549           1 :                     if (parsingTemplate) {
    2550             :                         TokenStreamAnyChars& anyChars = anyCharsAccess();
    2551             :                         anyChars.setInvalidTemplateEscape(start, InvalidEscapeType::Hexadecimal);
    2552             :                         continue;
    2553             :                     }
    2554             :                     reportInvalidEscapeError(start, InvalidEscapeType::Hexadecimal);
    2555             :                     return false;
    2556             :                 }
    2557             :                 break;
    2558             :               }
    2559             : 
    2560             :               default: {
    2561             :                 if (!JS7_ISOCT(unit))
    2562             :                     break;
    2563             : 
    2564             :                 // Octal character specification.
    2565             :                 int32_t val = JS7_UNOCT(unit);
    2566             : 
    2567             :                 unit = peekCodeUnit();
    2568             :                 if (MOZ_UNLIKELY(unit == EOF)) {
    2569             :                     ReportPrematureEndOfLiteral(JSMSG_EOF_IN_ESCAPE_IN_LITERAL);
    2570             :                     return false;
    2571             :                 }
    2572             : 
    2573             :                 // Strict mode code allows only \0, then a non-digit.
    2574             :                 if (val != 0 || IsAsciiDigit(unit)) {
    2575             :                     TokenStreamAnyChars& anyChars = anyCharsAccess();
    2576             :                     if (parsingTemplate) {
    2577             :                         anyChars.setInvalidTemplateEscape(sourceUnits.offset() - 2,
    2578             :                                                           InvalidEscapeType::Octal);
    2579             :                         continue;
    2580             :                     }
    2581             :                     if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL))
    2582             :                         return false;
    2583             :                     anyChars.flags.sawOctalEscape = true;
    2584             :                 }
    2585             : 
    2586             :                 if (JS7_ISOCT(unit)) {
    2587             :                     val = 8 * val + JS7_UNOCT(unit);
    2588             :                     consumeKnownCodeUnit(unit);
    2589             : 
    2590             :                     unit = peekCodeUnit();
    2591             :                     if (MOZ_UNLIKELY(unit == EOF)) {
    2592             :                         ReportPrematureEndOfLiteral(JSMSG_EOF_IN_ESCAPE_IN_LITERAL);
    2593             :                         return false;
    2594             :                     }
    2595             : 
    2596             :                     if (JS7_ISOCT(unit)) {
    2597             :                         int32_t save = val;
    2598             :                         val = 8 * val + JS7_UNOCT(unit);
    2599             :                         if (val <= 0xFF)
    2600             :                             consumeKnownCodeUnit(unit);
    2601             :                         else
    2602             :                             val = save;
    2603             :                     }
    2604             :                 }
    2605             : 
    2606             :                 unit = char16_t(val);
    2607             :                 break;
    2608             :               } // default
    2609             :             }
    2610             : 
    2611             :             if (!tokenbuf.append(unit))
    2612             :                 return false;
    2613             : 
    2614             :             continue;
    2615             :         } // (unit == '\\')
    2616             : 
    2617             :         if (unit == '\r' || unit == '\n') {
    2618             :             if (!parsingTemplate) {
    2619             :                 // String literals don't allow ASCII line breaks.
    2620             :                 ungetCodeUnit(unit);
    2621             :                 ReportPrematureEndOfLiteral(JSMSG_EOL_BEFORE_END_OF_STRING);
    2622             :                 return false;
    2623             :             }
    2624             : 
    2625             :             if (unit == '\r') {
    2626             :                 unit = '\n';
    2627             : 
    2628             :                 // If it's a \r\n sequence: treat as a single EOL, skip over the \n.
    2629             :                 if (!sourceUnits.atEnd())
    2630             :                     sourceUnits.matchCodeUnit('\n');
    2631             :             }
    2632             : 
    2633             :             if (!updateLineInfoForEOL())
    2634             :                 return false;
    2635             : 
    2636             :             anyCharsAccess().updateFlagsForEOL();
    2637             :         } else if (parsingTemplate && unit == '$' && matchCodeUnit('{')) {
    2638             :             templateHead = true;
    2639             :             break;
    2640             :         }
    2641             : 
    2642             :         if (!tokenbuf.append(unit))
    2643             :             return false;
    2644             :     }
    2645             : 
    2646             :     JSAtom* atom = atomizeChars(anyCharsAccess().cx, tokenbuf.begin(), tokenbuf.length());
    2647             :     if (!atom)
    2648             :         return false;
    2649             : 
    2650             :     noteBadToken.release();
    2651             : 
    2652             :     MOZ_ASSERT_IF(!parsingTemplate, !templateHead);
    2653             : 
    2654             :     TokenKind kind = !parsingTemplate
    2655             :                      ? TokenKind::String
    2656             :                      : templateHead
    2657             :                      ? TokenKind::TemplateHead
    2658             :                      : TokenKind::NoSubsTemplate;
    2659             :     newAtomToken(kind, atom, start, modifier, out);
    2660             :     return true;
    2661             : }
    2662             : 
    2663             : const char*
    2664             : TokenKindToDesc(TokenKind tt)
    2665             : {
    2666             :     switch (tt) {
    2667             : #define EMIT_CASE(name, desc) case TokenKind::name: return desc;
    2668             :       FOR_EACH_TOKEN_KIND(EMIT_CASE)
    2669             : #undef EMIT_CASE
    2670             :       case TokenKind::Limit:
    2671             :         MOZ_ASSERT_UNREACHABLE("TokenKind::Limit should not be passed.");
    2672             :         break;
    2673             :     }
    2674             : 
    2675             :     return "<bad TokenKind>";
    2676             : }
    2677             : 
    2678             : #ifdef DEBUG
    2679             : const char*
    2680             : TokenKindToString(TokenKind tt)
    2681             : {
    2682             :     switch (tt) {
    2683             : #define EMIT_CASE(name, desc) case TokenKind::name: return "TokenKind::" #name;
    2684             :       FOR_EACH_TOKEN_KIND(EMIT_CASE)
    2685             : #undef EMIT_CASE
    2686             :       case TokenKind::Limit: break;
    2687             :     }
    2688             : 
    2689             :     return "<bad TokenKind>";
    2690             : }
    2691             : #endif
    2692             : 
    2693             : template class frontend::TokenStreamCharsBase<char16_t>;
    2694             : 
    2695             : template class frontend::TokenStreamChars<char16_t, frontend::TokenStreamAnyCharsAccess>;
    2696             : template class frontend::TokenStreamSpecific<char16_t, frontend::TokenStreamAnyCharsAccess>;
    2697             : 
    2698             : template class
    2699             : frontend::TokenStreamChars<char16_t, frontend::ParserAnyCharsAccess<frontend::GeneralParser<frontend::FullParseHandler, char16_t>>>;
    2700             : template class
    2701             : frontend::TokenStreamChars<char16_t, frontend::ParserAnyCharsAccess<frontend::GeneralParser<frontend::SyntaxParseHandler, char16_t>>>;
    2702             : 
    2703             : template class
    2704             : frontend::TokenStreamSpecific<char16_t, frontend::ParserAnyCharsAccess<frontend::GeneralParser<frontend::FullParseHandler, char16_t>>>;
    2705             : template class
    2706             : frontend::TokenStreamSpecific<char16_t, frontend::ParserAnyCharsAccess<frontend::GeneralParser<frontend::SyntaxParseHandler, char16_t>>>;
    2707             : 
    2708             : } // namespace frontend
    2709             : 
    2710             : } // namespace js
    2711             : 
    2712             : 
    2713             : JS_FRIEND_API(int)
    2714             : js_fgets(char* buf, int size, FILE* file)
    2715             : {
    2716             :     int n, i, c;
    2717             :     bool crflag;
    2718             : 
    2719             :     n = size - 1;
    2720             :     if (n < 0)
    2721             :         return -1;
    2722             : 
    2723             :     crflag = false;
    2724             :     for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) {
    2725             :         buf[i] = c;
    2726             :         if (c == '\n') {        // any \n ends a line
    2727             :             i++;                // keep the \n; we know there is room for \0
    2728             :             break;
    2729             :         }
    2730             :         if (crflag) {           // \r not followed by \n ends line at the \r
    2731             :             ungetc(c, file);
    2732             :             break;              // and overwrite c in buf with \0
    2733             :         }
    2734             :         crflag = (c == '\r');
    2735             :     }
    2736             : 
    2737             :     buf[i] = '\0';
    2738             :     return i;
    2739             : }

Generated by: LCOV version 1.13-14-ga5dd952