Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 : * vim: set ts=8 sts=4 et sw=4 tw=99:
3 : * This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : // JS lexical scanner.
8 :
9 : #include "frontend/TokenStream.h"
10 :
11 : #include "mozilla/ArrayUtils.h"
12 : #include "mozilla/Attributes.h"
13 : #include "mozilla/IntegerTypeTraits.h"
14 : #include "mozilla/Likely.h"
15 : #include "mozilla/MemoryChecking.h"
16 : #include "mozilla/PodOperations.h"
17 : #include "mozilla/ScopeExit.h"
18 : #include "mozilla/TextUtils.h"
19 :
20 : #include <ctype.h>
21 : #include <stdarg.h>
22 : #include <stdio.h>
23 : #include <string.h>
24 : #include <utility>
25 :
26 : #include "jsexn.h"
27 : #include "jsnum.h"
28 :
29 : #include "frontend/BytecodeCompiler.h"
30 : #include "frontend/Parser.h"
31 : #include "frontend/ReservedWords.h"
32 : #include "js/CharacterEncoding.h"
33 : #include "js/UniquePtr.h"
34 : #include "util/StringBuffer.h"
35 : #include "util/Unicode.h"
36 : #include "vm/HelperThreads.h"
37 : #include "vm/JSAtom.h"
38 : #include "vm/JSContext.h"
39 : #include "vm/Realm.h"
40 :
41 : using mozilla::ArrayLength;
42 : using mozilla::AssertedCast;
43 : using mozilla::IsAscii;
44 : using mozilla::IsAsciiAlpha;
45 : using mozilla::IsAsciiDigit;
46 : using mozilla::MakeScopeExit;
47 : using mozilla::PodCopy;
48 :
49 : struct ReservedWordInfo
50 : {
51 : const char* chars; // C string with reserved word text
52 : js::frontend::TokenKind tokentype;
53 : };
54 :
55 : static const ReservedWordInfo reservedWords[] = {
56 : #define RESERVED_WORD_INFO(word, name, type) \
57 : {js_##word##_str, js::frontend::type},
58 : FOR_EACH_JAVASCRIPT_RESERVED_WORD(RESERVED_WORD_INFO)
59 : #undef RESERVED_WORD_INFO
60 : };
61 :
62 : // Returns a ReservedWordInfo for the specified characters, or nullptr if the
63 493398 : // string is not a reserved word.
64 : template <typename CharT>
65 493398 : static const ReservedWordInfo*
66 : FindReservedWord(const CharT* s, size_t length)
67 : {
68 : MOZ_ASSERT(length != 0);
69 :
70 : size_t i;
71 : const ReservedWordInfo* rw;
72 : const char* chars;
73 :
74 : #define JSRW_LENGTH() length
75 : #define JSRW_AT(column) s[column]
76 : #define JSRW_GOT_MATCH(index) i = (index); goto got_match;
77 : #define JSRW_TEST_GUESS(index) i = (index); goto test_guess;
78 : #define JSRW_NO_MATCH() goto no_match;
79 : #include "frontend/ReservedWordsGenerated.h"
80 : #undef JSRW_NO_MATCH
81 : #undef JSRW_TEST_GUESS
82 : #undef JSRW_GOT_MATCH
83 : #undef JSRW_AT
84 85952 : #undef JSRW_LENGTH
85 :
86 : got_match:
87 0 : return &reservedWords[i];
88 0 :
89 0 : test_guess:
90 164796 : rw = &reservedWords[i];
91 : chars = rw->chars;
92 : do {
93 : if (*s++ != (unsigned char)(*chars++))
94 : goto no_match;
95 : } while (--length != 0);
96 : return rw;
97 :
98 : no_match:
99 : return nullptr;
100 148189 : }
101 :
102 0 : static const ReservedWordInfo*
103 0 : FindReservedWord(JSLinearString* str)
104 0 : {
105 296378 : JS::AutoCheckCannotGC nogc;
106 : return str->hasLatin1Chars()
107 : ? FindReservedWord(str->latin1Chars(nogc), str->length())
108 : : FindReservedWord(str->twoByteChars(nogc), str->length());
109 : }
110 1305 :
111 : template <typename CharT>
112 : static bool
113 : IsIdentifier(const CharT* chars, size_t length)
114 1305 : {
115 : using namespace js;
116 :
117 2610 : if (length == 0)
118 : return false;
119 :
120 0 : if (!unicode::IsIdentifierStart(char16_t(*chars)))
121 0 : return false;
122 32080 :
123 : const CharT* end = chars + length;
124 : while (++chars != end) {
125 : if (!unicode::IsIdentifierPart(char16_t(*chars)))
126 : return false;
127 : }
128 :
129 : return true;
130 0 : }
131 :
132 : static uint32_t
133 : GetSingleCodePoint(const char16_t** p, const char16_t* end)
134 : {
135 0 : using namespace js;
136 0 :
137 0 : uint32_t codePoint;
138 0 : if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(**p)) && *p + 1 < end) {
139 0 : char16_t lead = **p;
140 0 : char16_t maybeTrail = *(*p + 1);
141 : if (unicode::IsTrailSurrogate(maybeTrail)) {
142 : *p += 2;
143 : return unicode::UTF16Decode(lead, maybeTrail);
144 0 : }
145 0 : }
146 0 :
147 : codePoint = **p;
148 : (*p)++;
149 : return codePoint;
150 0 : }
151 :
152 : static bool
153 : IsIdentifierMaybeNonBMP(const char16_t* chars, size_t length)
154 0 : {
155 : using namespace js;
156 :
157 0 : if (IsIdentifier(chars, length))
158 : return true;
159 :
160 0 : if (length == 0)
161 0 : return false;
162 :
163 : const char16_t* p = chars;
164 0 : const char16_t* end = chars + length;
165 0 : uint32_t codePoint;
166 :
167 : codePoint = GetSingleCodePoint(&p, end);
168 0 : if (!unicode::IsIdentifierStart(codePoint))
169 0 : return false;
170 0 :
171 : while (p < end) {
172 : codePoint = GetSingleCodePoint(&p, end);
173 : if (!unicode::IsIdentifierPart(codePoint))
174 : return false;
175 : }
176 :
177 : return true;
178 : }
179 :
180 : namespace js {
181 :
182 513 : namespace frontend {
183 :
184 0 : bool
185 0 : IsIdentifier(JSLinearString* str)
186 0 : {
187 1 : JS::AutoCheckCannotGC nogc;
188 0 : MOZ_ASSERT(str);
189 : if (str->hasLatin1Chars())
190 : return ::IsIdentifier(str->latin1Chars(nogc), str->length());
191 : return ::IsIdentifierMaybeNonBMP(str->twoByteChars(nogc), str->length());
192 792 : }
193 :
194 792 : bool
195 : IsIdentifier(const char* chars, size_t length)
196 : {
197 : return ::IsIdentifier(chars, length);
198 0 : }
199 :
200 0 : bool
201 : IsIdentifier(const char16_t* chars, size_t length)
202 : {
203 : return ::IsIdentifier(chars, length);
204 0 : }
205 :
206 0 : bool
207 0 : IsKeyword(JSLinearString* str)
208 : {
209 : if (const ReservedWordInfo* rw = FindReservedWord(str))
210 : return TokenKindIsKeyword(rw->tokentype);
211 :
212 : return false;
213 148189 : }
214 :
215 0 : TokenKind
216 670 : ReservedWordTokenKind(PropertyName* str)
217 : {
218 : if (const ReservedWordInfo* rw = FindReservedWord(str))
219 : return rw->tokentype;
220 :
221 : return TokenKind::Name;
222 0 : }
223 :
224 0 : const char*
225 0 : ReservedWordToCharZ(PropertyName* str)
226 : {
227 : if (const ReservedWordInfo* rw = FindReservedWord(str))
228 : return ReservedWordToCharZ(rw->tokentype);
229 :
230 : return nullptr;
231 0 : }
232 :
233 0 : const char*
234 0 : ReservedWordToCharZ(TokenKind tt)
235 : {
236 0 : MOZ_ASSERT(tt != TokenKind::Name);
237 : switch (tt) {
238 : #define EMIT_CASE(word, name, type) case type: return js_##word##_str;
239 0 : FOR_EACH_JAVASCRIPT_RESERVED_WORD(EMIT_CASE)
240 : #undef EMIT_CASE
241 : default:
242 : MOZ_ASSERT_UNREACHABLE("Not a reserved word PropertyName.");
243 : }
244 : return nullptr;
245 4958 : }
246 :
247 0 : PropertyName*
248 4958 : TokenStreamAnyChars::reservedWordToPropertyName(TokenKind tt) const
249 : {
250 9916 : MOZ_ASSERT(tt != TokenKind::Name);
251 : switch (tt) {
252 : #define EMIT_CASE(word, name, type) case type: return cx->names().name;
253 0 : FOR_EACH_JAVASCRIPT_RESERVED_WORD(EMIT_CASE)
254 : #undef EMIT_CASE
255 : default:
256 : MOZ_ASSERT_UNREACHABLE("Not a reserved word TokenKind.");
257 : }
258 0 : return nullptr;
259 0 : }
260 3056 :
261 : TokenStreamAnyChars::SourceCoords::SourceCoords(JSContext* cx, uint32_t ln, uint32_t col,
262 : uint32_t initialLineOffset)
263 : : lineStartOffsets_(cx), initialLineNum_(ln), initialColumn_(col), lastLineIndex_(0)
264 : {
265 : // This is actually necessary! Removing it causes compile errors on
266 : // GCC and clang. You could try declaring this:
267 : //
268 : // const uint32_t TokenStreamAnyChars::SourceCoords::MAX_PTR;
269 1528 : //
270 : // which fixes the GCC/clang error, but causes bustage on Windows. Sigh.
271 : //
272 : uint32_t maxPtr = MAX_PTR;
273 :
274 0 : // The first line begins at buffer offset |initialLineOffset|. MAX_PTR is
275 0 : // the sentinel. The appends cannot fail because |lineStartOffsets_| has
276 0 : // statically-allocated elements.
277 0 : MOZ_ASSERT(lineStartOffsets_.capacity() >= 2);
278 1528 : MOZ_ALWAYS_TRUE(lineStartOffsets_.reserve(2));
279 : lineStartOffsets_.infallibleAppend(initialLineOffset);
280 : lineStartOffsets_.infallibleAppend(maxPtr);
281 212173 : }
282 :
283 0 : MOZ_ALWAYS_INLINE bool
284 212173 : TokenStreamAnyChars::SourceCoords::add(uint32_t lineNum, uint32_t lineStartOffset)
285 : {
286 212173 : uint32_t lineIndex = lineNumToIndex(lineNum);
287 : uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
288 :
289 212179 : MOZ_ASSERT(lineStartOffsets_[0] <= lineStartOffset &&
290 : lineStartOffsets_[sentinelIndex] == MAX_PTR);
291 :
292 : if (lineIndex == sentinelIndex) {
293 0 : // We haven't seen this newline before. Update lineStartOffsets_
294 212160 : // only if lineStartOffsets_.append succeeds, to keep sentinel.
295 : // Otherwise return false to tell TokenStream about OOM.
296 : uint32_t maxPtr = MAX_PTR;
297 : if (!lineStartOffsets_.append(maxPtr)) {
298 : static_assert(mozilla::IsSame<decltype(lineStartOffsets_.allocPolicy()),
299 0 : TempAllocPolicy&>::value,
300 : "this function's caller depends on it reporting an "
301 : "error on failure, as TempAllocPolicy ensures");
302 212162 : return false;
303 : }
304 :
305 : lineStartOffsets_[lineIndex] = lineStartOffset;
306 : } else {
307 19 : // We have seen this newline before (and ungot it). Do nothing (other
308 : // than checking it hasn't mysteriously changed).
309 : // This path can be executed after hitting OOM, so check lineIndex.
310 : MOZ_ASSERT_IF(lineIndex < sentinelIndex, lineStartOffsets_[lineIndex] == lineStartOffset);
311 : }
312 : return true;
313 0 : }
314 :
315 0 : MOZ_ALWAYS_INLINE bool
316 0 : TokenStreamAnyChars::SourceCoords::fill(const TokenStreamAnyChars::SourceCoords& other)
317 0 : {
318 : MOZ_ASSERT(lineStartOffsets_[0] == other.lineStartOffsets_[0]);
319 0 : MOZ_ASSERT(lineStartOffsets_.back() == MAX_PTR);
320 : MOZ_ASSERT(other.lineStartOffsets_.back() == MAX_PTR);
321 :
322 0 : if (lineStartOffsets_.length() >= other.lineStartOffsets_.length())
323 0 : return true;
324 :
325 0 : uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
326 0 : lineStartOffsets_[sentinelIndex] = other.lineStartOffsets_[sentinelIndex];
327 :
328 : for (size_t i = sentinelIndex + 1; i < other.lineStartOffsets_.length(); i++) {
329 : if (!lineStartOffsets_.append(other.lineStartOffsets_[i]))
330 : return false;
331 : }
332 : return true;
333 387078 : }
334 :
335 : MOZ_ALWAYS_INLINE uint32_t
336 : TokenStreamAnyChars::SourceCoords::lineIndexOf(uint32_t offset) const
337 387078 : {
338 : uint32_t iMin, iMax, iMid;
339 :
340 : if (lineStartOffsets_[lastLineIndex_] <= offset) {
341 0 : // If we reach here, offset is on a line the same as or higher than
342 222319 : // last time. Check first for the +0, +1, +2 cases, because they
343 : // typically cover 85--98% of cases.
344 : if (offset < lineStartOffsets_[lastLineIndex_ + 1])
345 : return lastLineIndex_; // lineIndex is same as last time
346 0 :
347 0 : // If we reach here, there must be at least one more entry (plus the
348 83656 : // sentinel). Try it.
349 : lastLineIndex_++;
350 : if (offset < lineStartOffsets_[lastLineIndex_ + 1])
351 0 : return lastLineIndex_; // lineIndex is one higher than last time
352 0 :
353 21257 : // The same logic applies here.
354 : lastLineIndex_++;
355 : if (offset < lineStartOffsets_[lastLineIndex_ + 1]) {
356 : return lastLineIndex_; // lineIndex is two higher than last time
357 : }
358 0 :
359 43744 : // No luck. Oh well, we have a better-than-default starting point for
360 : // the binary search.
361 : iMin = lastLineIndex_ + 1;
362 : MOZ_ASSERT(iMin < lineStartOffsets_.length() - 1); // -1 due to the sentinel
363 :
364 : } else {
365 : iMin = 0;
366 : }
367 :
368 : // This is a binary search with deferred detection of equality, which was
369 0 : // marginally faster in this case than a standard binary search.
370 0 : // The -2 is because |lineStartOffsets_.length() - 1| is the sentinel, and we
371 0 : // want one before that.
372 0 : iMax = lineStartOffsets_.length() - 2;
373 145500 : while (iMax > iMin) {
374 : iMid = iMin + (iMax - iMin) / 2;
375 : if (offset >= lineStartOffsets_[iMid + 1])
376 : iMin = iMid + 1; // offset is above lineStartOffsets_[iMid]
377 0 : else
378 0 : iMax = iMid; // offset is below or within lineStartOffsets_[iMid]
379 0 : }
380 59883 : MOZ_ASSERT(iMax == iMin);
381 : MOZ_ASSERT(lineStartOffsets_[iMin] <= offset && offset < lineStartOffsets_[iMin + 1]);
382 : lastLineIndex_ = iMin;
383 : return iMin;
384 144125 : }
385 :
386 0 : uint32_t
387 353006 : TokenStreamAnyChars::SourceCoords::lineNum(uint32_t offset) const
388 : {
389 : uint32_t lineIndex = lineIndexOf(offset);
390 : return lineIndexToNum(lineIndex);
391 184123 : }
392 :
393 184123 : uint32_t
394 : TokenStreamAnyChars::SourceCoords::columnIndex(uint32_t offset) const
395 : {
396 : return lineIndexAndOffsetToColumn(lineIndexOf(offset), offset);
397 26490 : }
398 :
399 : void
400 0 : TokenStreamAnyChars::SourceCoords::lineNumAndColumnIndex(uint32_t offset, uint32_t* lineNum,
401 0 : uint32_t* column) const
402 0 : {
403 26490 : uint32_t lineIndex = lineIndexOf(offset);
404 : *lineNum = lineIndexToNum(lineIndex);
405 0 : *column = lineIndexAndOffsetToColumn(lineIndex, offset);
406 0 : }
407 1528 :
408 : TokenStreamAnyChars::TokenStreamAnyChars(JSContext* cx, const ReadOnlyCompileOptions& options,
409 : StrictModeGetter* smg)
410 : : srcCoords(cx, options.lineno, options.column, options.scriptSourceOffset),
411 : options_(options),
412 1528 : tokens(),
413 : cursor_(0),
414 : lookahead(),
415 : lineno(options.lineno),
416 1528 : flags(),
417 : linebase(0),
418 : prevLinebase(size_t(-1)),
419 : filename_(options.filename()),
420 0 : displayURL_(nullptr),
421 18336 : sourceMapURL_(nullptr),
422 : cx(cx),
423 : mutedErrors(options.mutedErrors()),
424 1528 : strictModeGetter(smg)
425 1528 : {
426 1528 : // |isExprEnding| was initially zeroed: overwrite the true entries here.
427 0 : isExprEnding[size_t(TokenKind::Comma)] = true;
428 0 : isExprEnding[size_t(TokenKind::Semi)] = true;
429 0 : isExprEnding[size_t(TokenKind::Colon)] = true;
430 0 : isExprEnding[size_t(TokenKind::Rp)] = true;
431 : isExprEnding[size_t(TokenKind::Rb)] = true;
432 : isExprEnding[size_t(TokenKind::Rc)] = true;
433 0 : }
434 :
435 : template<typename CharT>
436 4584 : TokenStreamCharsBase<CharT>::TokenStreamCharsBase(JSContext* cx, const CharT* chars, size_t length,
437 0 : size_t startOffset)
438 : : sourceUnits(chars, length, startOffset),
439 : tokenbuf(cx)
440 0 : {}
441 :
442 : template<typename CharT, class AnyCharsAccess>
443 3056 : TokenStreamSpecific<CharT, AnyCharsAccess>::TokenStreamSpecific(JSContext* cx,
444 0 : const ReadOnlyCompileOptions& options,
445 : const CharT* base, size_t length)
446 : : TokenStreamChars<CharT, AnyCharsAccess>(cx, base, length, options.scriptSourceOffset)
447 0 : {}
448 :
449 : bool
450 : TokenStreamAnyChars::checkOptions()
451 0 : {
452 0 : // Constrain starting columns to half of the range of a signed 32-bit value,
453 0 : // to avoid overflow.
454 : if (options().column >= mozilla::MaxValue<int32_t>::value / 2 + 1) {
455 : reportErrorNoOffset(JSMSG_BAD_COLUMN_NUMBER);
456 : return false;
457 : }
458 :
459 : return true;
460 : }
461 :
462 : // Use the fastest available getc.
463 : #if defined(HAVE_GETC_UNLOCKED)
464 : # define fast_getc getc_unlocked
465 : #elif defined(HAVE__GETC_NOLOCK)
466 : # define fast_getc _getc_nolock
467 : #else
468 : # define fast_getc getc
469 : #endif
470 :
471 212174 : MOZ_MUST_USE MOZ_ALWAYS_INLINE bool
472 212174 : TokenStreamAnyChars::internalUpdateLineInfoForEOL(uint32_t lineStartOffset)
473 212174 : {
474 212174 : prevLinebase = linebase;
475 : linebase = lineStartOffset;
476 : lineno++;
477 : return srcCoords.add(lineno, linebase);
478 0 : }
479 :
480 18 : void
481 18 : TokenStreamAnyChars::undoInternalUpdateLineInfoForEOL()
482 0 : {
483 18 : MOZ_ASSERT(prevLinebase != size_t(-1)); // we should never get more than one EOL
484 0 : linebase = prevLinebase;
485 : prevLinebase = size_t(-1);
486 : lineno--;
487 : }
488 :
489 187931 : MOZ_ALWAYS_INLINE void
490 : TokenStreamAnyChars::updateFlagsForEOL()
491 : {
492 : flags.isDirtyLine = false;
493 : }
494 :
495 : // This gets a full code point, starting from an already-consumed leading code
496 : // unit, normalizing EOL sequences to '\n', also updating line/column info as
497 2694 : // needed.
498 : template<class AnyCharsAccess>
499 0 : bool
500 : TokenStreamChars<char16_t, AnyCharsAccess>::getCodePoint(int32_t* cp)
501 0 : {
502 0 : TokenStreamAnyChars& anyChars = anyCharsAccess();
503 0 :
504 0 : if (MOZ_UNLIKELY(sourceUnits.atEnd())) {
505 : anyChars.flags.isEOF = true;
506 : *cp = EOF;
507 5388 : return true;
508 : }
509 :
510 : int32_t c = sourceUnits.getCodeUnit();
511 2694 :
512 : do {
513 : // Normalize the char16_t if it was a newline.
514 2669 : if (MOZ_UNLIKELY(c == '\n'))
515 : break;
516 0 :
517 0 : if (MOZ_UNLIKELY(c == '\r')) {
518 : // If it's a \r\n sequence: treat as a single EOL, skip over the \n.
519 : if (MOZ_LIKELY(!sourceUnits.atEnd()))
520 : sourceUnits.matchCodeUnit('\n');
521 :
522 2669 : break;
523 : }
524 :
525 2669 : if (MOZ_UNLIKELY(c == unicode::LINE_SEPARATOR || c == unicode::PARA_SEPARATOR))
526 2669 : break;
527 :
528 : *cp = c;
529 25 : return true;
530 : } while (false);
531 :
532 25 : if (!updateLineInfoForEOL())
533 25 : return false;
534 :
535 : *cp = '\n';
536 : return true;
537 : }
538 81 :
539 : template<class AnyCharsAccess>
540 162 : bool
541 : TokenStreamChars<char16_t, AnyCharsAccess>::getNonAsciiCodePoint(char16_t lead, int32_t* codePoint)
542 81 : {
543 : MOZ_ASSERT(!isAsciiCodePoint(lead),
544 : "ASCII code unit/point must be handled separately");
545 : MOZ_ASSERT(lead == sourceUnits.previousCodeUnit(),
546 81 : "getNonAsciiCodePoint called incorrectly");
547 :
548 : // The code point is usually |lead|: overwrite later if needed.
549 : *codePoint = lead;
550 0 :
551 81 : // ECMAScript specifically requires that unpaired UTF-16 surrogates be
552 : // treated as the corresponding code point and not as an error. See
553 : // <https://tc39.github.io/ecma262/#sec-ecmascript-language-types-string-type>.
554 0 : // Thus this function does not consider any sequence of 16-bit numbers to
555 : // be intrinsically in error.
556 0 :
557 : // Dispense with single-unit code points and lone trailing surrogates.
558 : if (MOZ_LIKELY(!unicode::IsLeadSurrogate(lead))) {
559 0 : if (MOZ_UNLIKELY(lead == unicode::LINE_SEPARATOR ||
560 : lead == unicode::PARA_SEPARATOR))
561 : {
562 0 : if (!updateLineInfoForEOL()) {
563 : #ifdef DEBUG
564 0 : *codePoint = EOF; // sentinel value to hopefully cause errors
565 : #endif
566 : MOZ_MAKE_MEM_UNDEFINED(codePoint, sizeof(*codePoint));
567 : return false;
568 : }
569 :
570 : *codePoint = '\n';
571 : } else {
572 0 : MOZ_ASSERT(!SourceUnits::isRawEOLChar(*codePoint));
573 : }
574 :
575 0 : return true;
576 : }
577 :
578 : // Also handle a lead surrogate not paired with a trailing surrogate.
579 : if (MOZ_UNLIKELY(sourceUnits.atEnd() ||
580 0 : !unicode::IsTrailSurrogate(sourceUnits.peekCodeUnit())))
581 0 : {
582 : MOZ_ASSERT(!SourceUnits::isRawEOLChar(*codePoint));
583 : return true;
584 : }
585 :
586 : // Otherwise we have a multi-unit code point.
587 : *codePoint = unicode::UTF16Decode(lead, sourceUnits.getCodeUnit());
588 : MOZ_ASSERT(!SourceUnits::isRawEOLChar(*codePoint));
589 : return true;
590 : }
591 :
592 : template<typename CharT, class AnyCharsAccess>
593 : void
594 : GeneralTokenStreamChars<CharT, AnyCharsAccess>::ungetChar(int32_t c)
595 : {
596 : if (c == EOF)
597 : return;
598 0 :
599 11195698 : sourceUnits.ungetCodeUnit();
600 : if (c == '\n') {
601 0 : int32_t c2 = sourceUnits.peekCodeUnit();
602 : MOZ_ASSERT(SourceUnits::isRawEOLChar(c2));
603 :
604 : // If it's a \r\n sequence, also unget the \r.
605 : if (c2 == CharT('\n') && !sourceUnits.atStart())
606 : sourceUnits.ungetOptionalCRBeforeLF();
607 852 :
608 : anyCharsAccess().undoInternalUpdateLineInfoForEOL();
609 852 : } else {
610 : MOZ_ASSERT(sourceUnits.peekCodeUnit() == c);
611 : }
612 852 : }
613 0 :
614 0 : template<class AnyCharsAccess>
615 0 : void
616 : TokenStreamChars<char16_t, AnyCharsAccess>::ungetCodePointIgnoreEOL(uint32_t codePoint)
617 : {
618 0 : MOZ_ASSERT(!sourceUnits.atStart());
619 18 :
620 : unsigned numUnits = 0;
621 0 : char16_t units[2];
622 : unicode::UTF16Encode(codePoint, units, &numUnits);
623 1668 :
624 : MOZ_ASSERT(numUnits == 1 || numUnits == 2);
625 :
626 : while (numUnits-- > 0)
627 : ungetCodeUnit(units[numUnits]);
628 : }
629 0 :
630 : template<class AnyCharsAccess>
631 0 : void
632 : TokenStreamChars<char16_t, AnyCharsAccess>::ungetLineTerminator()
633 : {
634 639066 : sourceUnits.ungetCodeUnit();
635 :
636 : char16_t last = sourceUnits.peekCodeUnit();
637 : MOZ_ASSERT(SourceUnits::isRawEOLChar(last));
638 :
639 0 : if (last == '\n')
640 : sourceUnits.ungetOptionalCRBeforeLF();
641 0 :
642 : anyCharsAccess().undoInternalUpdateLineInfoForEOL();
643 0 : }
644 :
645 0 : template<typename CharT>
646 : size_t
647 0 : SourceUnits<CharT>::findEOLMax(size_t start, size_t max)
648 : {
649 0 : const CharT* p = codeUnitPtrAt(start);
650 0 :
651 0 : size_t n = 0;
652 : while (true) {
653 : if (p >= limit_)
654 : break;
655 0 : if (n >= max)
656 : break;
657 0 : n++;
658 :
659 0 : // This stops at U+2028 LINE SEPARATOR or U+2029 PARAGRAPH SEPARATOR in
660 0 : // string and template literals. These code points do affect line and
661 : // column coordinates, even as they encode their literal values.
662 0 : if (isRawEOLChar(*p++))
663 0 : break;
664 : }
665 0 : return start + n;
666 0 : }
667 :
668 : template<typename CharT, class AnyCharsAccess>
669 : bool
670 : TokenStreamSpecific<CharT, AnyCharsAccess>::advance(size_t position)
671 : {
672 : const CharT* end = sourceUnits.codeUnitPtrAt(position);
673 : while (sourceUnits.addressOfNextCodeUnit() < end) {
674 11554 : int32_t c;
675 : if (!getCodePoint(&c))
676 : return false;
677 0 : }
678 0 :
679 0 : TokenStreamAnyChars& anyChars = anyCharsAccess();
680 : Token* cur = const_cast<Token*>(&anyChars.currentToken());
681 : cur->pos.begin = sourceUnits.offset();
682 0 : MOZ_MAKE_MEM_UNDEFINED(&cur->type, sizeof(cur->type));
683 : anyChars.lookahead = 0;
684 : return true;
685 0 : }
686 160670 :
687 : template<typename CharT, class AnyCharsAccess>
688 11554 : void
689 : TokenStreamSpecific<CharT, AnyCharsAccess>::seek(const Position& pos)
690 : {
691 : TokenStreamAnyChars& anyChars = anyCharsAccess();
692 :
693 0 : sourceUnits.setAddressOfNextCodeUnit(pos.buf, /* allowPoisoned = */ true);
694 : anyChars.flags = pos.flags;
695 0 : anyChars.lineno = pos.lineno;
696 : anyChars.linebase = pos.linebase;
697 0 : anyChars.prevLinebase = pos.prevLinebase;
698 : anyChars.lookahead = pos.lookahead;
699 0 :
700 : anyChars.tokens[anyChars.cursor()] = pos.currentToken;
701 0 : for (unsigned i = 0; i < anyChars.lookahead; i++)
702 : anyChars.tokens[anyChars.aheadCursor(1 + i)] = pos.lookaheadTokens[i];
703 0 : }
704 :
705 : template<typename CharT, class AnyCharsAccess>
706 : bool
707 : TokenStreamSpecific<CharT, AnyCharsAccess>::seek(const Position& pos,
708 0 : const TokenStreamAnyChars& other)
709 : {
710 : if (!anyCharsAccess().srcCoords.fill(other.srcCoords))
711 0 : return false;
712 :
713 : seek(pos);
714 : return true;
715 : }
716 0 :
717 : template<typename CharT, class AnyCharsAccess>
718 0 : bool
719 0 : TokenStreamSpecific<CharT, AnyCharsAccess>::reportStrictModeErrorNumberVA(UniquePtr<JSErrorNotes> notes,
720 : uint32_t offset,
721 0 : bool strictMode,
722 0 : unsigned errorNumber,
723 : va_list* args)
724 : {
725 0 : TokenStreamAnyChars& anyChars = anyCharsAccess();
726 0 : if (!strictMode && !anyChars.options().extraWarningsOption)
727 0 : return true;
728 :
729 0 : ErrorMetadata metadata;
730 0 : if (!computeErrorMetadata(&metadata, offset))
731 : return false;
732 :
733 : if (strictMode) {
734 : ReportCompileError(anyChars.cx, std::move(metadata), std::move(notes), JSREPORT_ERROR, errorNumber,
735 0 : *args);
736 : return false;
737 4816 : }
738 :
739 0 : return anyChars.compileWarning(std::move(metadata), std::move(notes), JSREPORT_WARNING | JSREPORT_STRICT,
740 2408 : errorNumber, *args);
741 0 : }
742 0 :
743 0 : bool
744 0 : TokenStreamAnyChars::compileWarning(ErrorMetadata&& metadata, UniquePtr<JSErrorNotes> notes,
745 : unsigned flags, unsigned errorNumber, va_list args)
746 0 : {
747 0 : if (options().werrorOption) {
748 7282 : flags &= ~JSREPORT_WARNING;
749 2408 : ReportCompileError(cx, std::move(metadata), std::move(notes), flags, errorNumber, args);
750 : return false;
751 : }
752 :
753 0 : return ReportCompileWarning(cx, std::move(metadata), std::move(notes), flags, errorNumber, args);
754 : }
755 :
756 0 : void
757 : TokenStreamAnyChars::computeErrorMetadataNoOffset(ErrorMetadata* err)
758 : {
759 0 : err->isMuted = mutedErrors;
760 0 : err->filename = filename_;
761 : err->lineNumber = 0;
762 : err->columnNumber = 0;
763 :
764 : MOZ_ASSERT(err->lineOfContext == nullptr);
765 0 : }
766 :
767 : bool
768 : TokenStreamAnyChars::fillExcludingContext(ErrorMetadata* err, uint32_t offset)
769 : {
770 : err->isMuted = mutedErrors;
771 0 :
772 0 : // If this TokenStreamAnyChars doesn't have location information, try to
773 : // get it from the caller.
774 : if (!filename_ && !cx->helperThread()) {
775 0 : NonBuiltinFrameIter iter(cx,
776 0 : FrameIter::FOLLOW_DEBUGGER_EVAL_PREV_LINK,
777 : cx->realm()->principals());
778 : if (!iter.done() && iter.filename()) {
779 0 : err->filename = iter.filename();
780 0 : err->lineNumber = iter.computeLine(&err->columnNumber);
781 : return false;
782 0 : }
783 : }
784 :
785 0 : // Otherwise use this TokenStreamAnyChars's location information.
786 0 : err->filename = filename_;
787 : srcCoords.lineNumAndColumnIndex(offset, &err->lineNumber, &err->columnNumber);
788 : return true;
789 : }
790 0 :
791 : template<typename CharT, class AnyCharsAccess>
792 : bool
793 0 : TokenStreamSpecific<CharT, AnyCharsAccess>::hasTokenizationStarted() const
794 0 : {
795 0 : const TokenStreamAnyChars& anyChars = anyCharsAccess();
796 0 : return anyChars.isCurrentTokenType(TokenKind::Eof) && !anyChars.isEOF();
797 : }
798 :
799 0 : void
800 : TokenStreamAnyChars::lineAndColumnAt(size_t offset, uint32_t* line, uint32_t* column) const
801 : {
802 : srcCoords.lineNumAndColumnIndex(offset, line, column);
803 0 : }
804 :
805 0 : template<typename CharT, class AnyCharsAccess>
806 0 : void
807 0 : TokenStreamSpecific<CharT, AnyCharsAccess>::currentLineAndColumn(uint32_t* line, uint32_t* column) const
808 0 : {
809 : const TokenStreamAnyChars& anyChars = anyCharsAccess();
810 0 : uint32_t offset = anyChars.currentToken().pos.begin;
811 0 : anyChars.srcCoords.lineNumAndColumnIndex(offset, line, column);
812 : }
813 :
814 0 : template<typename CharT, class AnyCharsAccess>
815 : bool
816 0 : TokenStreamSpecific<CharT, AnyCharsAccess>::computeErrorMetadata(ErrorMetadata* err,
817 : uint32_t offset)
818 : {
819 : if (offset == NoOffset) {
820 0 : anyCharsAccess().computeErrorMetadataNoOffset(err);
821 0 : return true;
822 : }
823 0 :
824 0 : // This function's return value isn't a success/failure indication: it
825 0 : // returns true if this TokenStream's location information could be used,
826 0 : // and it returns false when that information can't be used (and so we
827 0 : // can't provide a line of context).
828 : if (!anyCharsAccess().fillExcludingContext(err, offset))
829 : return true;
830 :
831 : // Add a line of context from this TokenStream to help with debugging.
832 0 : return computeLineOfContext(err, offset);
833 0 : }
834 0 :
835 : template<typename CharT, class AnyCharsAccess>
836 : bool
837 : TokenStreamSpecific<CharT, AnyCharsAccess>::computeLineOfContext(ErrorMetadata* err,
838 : uint32_t offset)
839 0 : {
840 : // This function presumes |err| is filled in *except* for line-of-context
841 0 : // fields. It exists to make |TokenStreamSpecific::computeErrorMetadata|,
842 13857 : // above, more readable.
843 : TokenStreamAnyChars& anyChars = anyCharsAccess();
844 :
845 : // We only have line-start information for the current line. If the error
846 1 : // is on a different line, we can't easily provide context. (This means
847 : // any error in a multi-line token, e.g. an unterminated multiline string
848 408 : // literal, won't have context.)
849 408 : if (err->lineNumber != anyChars.lineno)
850 : return true;
851 :
852 : constexpr size_t windowRadius = ErrorMetadata::lineOfContextRadius;
853 13041 :
854 : // The window must start within the current line, no earlier than
855 26082 : // |windowRadius| characters before |offset|.
856 0 : MOZ_ASSERT(offset >= anyChars.linebase);
857 0 : size_t windowStart = (offset - anyChars.linebase > windowRadius) ?
858 0 : offset - windowRadius :
859 : anyChars.linebase;
860 :
861 : // The window must start within the portion of the current line that we
862 0 : // actually have in our buffer.
863 : if (windowStart < sourceUnits.startOffset())
864 : windowStart = sourceUnits.startOffset();
865 0 :
866 0 : // The window must end within the current line, no later than
867 0 : // windowRadius after offset.
868 : size_t windowEnd = sourceUnits.findEOLMax(offset, windowRadius);
869 : size_t windowLength = windowEnd - windowStart;
870 : MOZ_ASSERT(windowLength <= windowRadius * 2);
871 :
872 : // Create the windowed string, not including the potential line
873 : // terminator.
874 0 : StringBuffer windowBuf(anyChars.cx);
875 : if (!windowBuf.append(codeUnitPtrAt(windowStart), windowLength) ||
876 : !windowBuf.append('\0'))
877 : {
878 0 : return false;
879 : }
880 :
881 : err->lineOfContext.reset(windowBuf.stealChars());
882 : if (!err->lineOfContext)
883 0 : return false;
884 :
885 : err->lineLength = windowLength;
886 : err->tokenOffset = offset - windowStart;
887 : return true;
888 : }
889 0 :
890 :
891 : template<typename CharT, class AnyCharsAccess>
892 : bool
893 : TokenStreamSpecific<CharT, AnyCharsAccess>::reportStrictModeError(unsigned errorNumber, ...)
894 : {
895 0 : va_list args;
896 : va_start(args, errorNumber);
897 :
898 0 : TokenStreamAnyChars& anyChars = anyCharsAccess();
899 : bool result = reportStrictModeErrorNumberVA(nullptr, anyChars.currentToken().pos.begin,
900 : anyChars.strictMode(), errorNumber, &args);
901 :
902 0 : va_end(args);
903 0 : return result;
904 : }
905 0 :
906 : template<typename CharT, class AnyCharsAccess>
907 : void
908 : TokenStreamSpecific<CharT, AnyCharsAccess>::reportError(unsigned errorNumber, ...)
909 0 : {
910 0 : va_list args;
911 : va_start(args, errorNumber);
912 :
913 : TokenStreamAnyChars& anyChars = anyCharsAccess();
914 0 : ErrorMetadata metadata;
915 0 : if (computeErrorMetadata(&metadata, anyChars.currentToken().pos.begin)) {
916 0 : ReportCompileError(anyChars.cx, std::move(metadata), nullptr, JSREPORT_ERROR, errorNumber,
917 : args);
918 : }
919 :
920 0 : va_end(args);
921 0 : }
922 0 :
923 : void
924 : TokenStreamAnyChars::reportErrorNoOffset(unsigned errorNumber, ...)
925 : {
926 : va_list args;
927 0 : va_start(args, errorNumber);
928 0 :
929 : reportErrorNoOffsetVA(errorNumber, args);
930 :
931 0 : va_end(args);
932 0 : }
933 0 :
934 : void
935 : TokenStreamAnyChars::reportErrorNoOffsetVA(unsigned errorNumber, va_list args)
936 : {
937 : ErrorMetadata metadata;
938 : computeErrorMetadataNoOffset(&metadata);
939 0 :
940 : ReportCompileError(cx, std::move(metadata), nullptr, JSREPORT_ERROR, errorNumber, args);
941 : }
942 0 :
943 : template<typename CharT, class AnyCharsAccess>
944 0 : bool
945 0 : TokenStreamSpecific<CharT, AnyCharsAccess>::warning(unsigned errorNumber, ...)
946 0 : {
947 : va_list args;
948 0 : va_start(args, errorNumber);
949 0 :
950 : ErrorMetadata metadata;
951 : bool result =
952 : computeErrorMetadata(&metadata, anyCharsAccess().currentToken().pos.begin) &&
953 : anyCharsAccess().compileWarning(std::move(metadata), nullptr, JSREPORT_WARNING, errorNumber,
954 0 : args);
955 :
956 : va_end(args);
957 0 : return result;
958 : }
959 0 :
960 0 : template<typename CharT, class AnyCharsAccess>
961 0 : bool
962 0 : TokenStreamSpecific<CharT, AnyCharsAccess>::reportExtraWarningErrorNumberVA(UniquePtr<JSErrorNotes> notes,
963 : uint32_t offset,
964 : unsigned errorNumber,
965 : va_list* args)
966 0 : {
967 0 : TokenStreamAnyChars& anyChars = anyCharsAccess();
968 : if (!anyChars.options().extraWarningsOption)
969 : return true;
970 0 :
971 : ErrorMetadata metadata;
972 : if (!computeErrorMetadata(&metadata, offset))
973 0 : return false;
974 :
975 0 : return anyChars.compileWarning(std::move(metadata), std::move(notes), JSREPORT_STRICT | JSREPORT_WARNING,
976 : errorNumber, *args);
977 0 : }
978 0 :
979 : template<typename CharT, class AnyCharsAccess>
980 : void
981 0 : TokenStreamSpecific<CharT, AnyCharsAccess>::error(unsigned errorNumber, ...)
982 : {
983 0 : va_list args;
984 0 : va_start(args, errorNumber);
985 :
986 0 : ErrorMetadata metadata;
987 0 : if (computeErrorMetadata(&metadata, sourceUnits.offset())) {
988 : TokenStreamAnyChars& anyChars = anyCharsAccess();
989 : ReportCompileError(anyChars.cx, std::move(metadata), nullptr, JSREPORT_ERROR, errorNumber,
990 : args);
991 0 : }
992 :
993 : va_end(args);
994 0 : }
995 :
996 0 : template<typename CharT, class AnyCharsAccess>
997 : void
998 0 : TokenStreamSpecific<CharT, AnyCharsAccess>::errorAtVA(uint32_t offset, unsigned errorNumber, va_list *args)
999 0 : {
1000 0 : ErrorMetadata metadata;
1001 : if (computeErrorMetadata(&metadata, offset)) {
1002 0 : TokenStreamAnyChars& anyChars = anyCharsAccess();
1003 0 : ReportCompileError(anyChars.cx, std::move(metadata), nullptr, JSREPORT_ERROR, errorNumber,
1004 : *args);
1005 : }
1006 : }
1007 :
1008 1 :
1009 : template<typename CharT, class AnyCharsAccess>
1010 : void
1011 : TokenStreamSpecific<CharT, AnyCharsAccess>::errorAt(uint32_t offset, unsigned errorNumber, ...)
1012 : {
1013 0 : va_list args;
1014 0 : va_start(args, errorNumber);
1015 :
1016 : errorAtVA(offset, errorNumber, &args);
1017 0 :
1018 0 : va_end(args);
1019 : }
1020 :
1021 0 : // We have encountered a '\': check for a Unicode escape sequence after it.
1022 0 : // Return the length of the escape sequence and the character code point (by
1023 : // value) if we found a Unicode escape sequence. Otherwise, return 0. In both
1024 : // cases, do not advance along the buffer.
1025 : template<typename CharT, class AnyCharsAccess>
1026 : uint32_t
1027 0 : GeneralTokenStreamChars<CharT, AnyCharsAccess>::matchUnicodeEscape(uint32_t* codePoint)
1028 : {
1029 : MOZ_ASSERT(sourceUnits.previousCodeUnit() == '\\');
1030 0 :
1031 : int32_t unit = getCodeUnit();
1032 0 : if (unit != 'u') {
1033 0 : // NOTE: |unit| may be EOF here.
1034 0 : ungetCodeUnit(unit);
1035 0 : MOZ_ASSERT(sourceUnits.previousCodeUnit() == '\\');
1036 : return 0;
1037 : }
1038 :
1039 0 : CharT cp[3];
1040 0 : unit = getCodeUnit();
1041 : if (JS7_ISHEX(unit) &&
1042 : sourceUnits.peekCodeUnits(3, cp) &&
1043 : JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]))
1044 0 : {
1045 : *codePoint = (JS7_UNHEX(unit) << 12) |
1046 0 : (JS7_UNHEX(cp[0]) << 8) |
1047 0 : (JS7_UNHEX(cp[1]) << 4) |
1048 0 : JS7_UNHEX(cp[2]);
1049 0 : sourceUnits.skipCodeUnits(3);
1050 : return 5;
1051 : }
1052 0 :
1053 : if (unit == '{')
1054 : return matchExtendedUnicodeEscape(codePoint);
1055 :
1056 : // NOTE: |unit| may be EOF here, so this ungets either one or two units.
1057 0 : ungetCodeUnit(unit);
1058 : ungetCodeUnit('u');
1059 : MOZ_ASSERT(sourceUnits.previousCodeUnit() == '\\');
1060 0 : return 0;
1061 : }
1062 0 :
1063 : template<typename CharT, class AnyCharsAccess>
1064 0 : uint32_t
1065 0 : GeneralTokenStreamChars<CharT, AnyCharsAccess>::matchExtendedUnicodeEscape(uint32_t* codePoint)
1066 : {
1067 : MOZ_ASSERT(sourceUnits.previousCodeUnit() == '{');
1068 :
1069 : int32_t unit = getCodeUnit();
1070 :
1071 : // Skip leading zeroes.
1072 : uint32_t leadingZeroes = 0;
1073 0 : while (unit == '0') {
1074 : leadingZeroes++;
1075 0 : unit = getCodeUnit();
1076 0 : }
1077 0 :
1078 0 : size_t i = 0;
1079 : uint32_t code = 0;
1080 : while (JS7_ISHEX(unit) && i < 6) {
1081 : code = (code << 4) | JS7_UNHEX(unit);
1082 : unit = getCodeUnit();
1083 0 : i++;
1084 0 : }
1085 0 :
1086 : uint32_t gotten =
1087 0 : 2 + // 'u{'
1088 0 : leadingZeroes +
1089 0 : i + // significant hexdigits
1090 0 : (unit != EOF); // subtract a get if it didn't contribute to length
1091 0 :
1092 0 : if (unit == '}' && (leadingZeroes > 0 || i > 0) && code <= unicode::NonBMPMax) {
1093 0 : *codePoint = code;
1094 : return gotten;
1095 : }
1096 :
1097 : sourceUnits.unskipCodeUnits(gotten);
1098 0 : MOZ_ASSERT(sourceUnits.previousCodeUnit() == '\\');
1099 0 : return 0;
1100 0 : }
1101 :
1102 : template<typename CharT, class AnyCharsAccess>
1103 : uint32_t
1104 : GeneralTokenStreamChars<CharT, AnyCharsAccess>::matchUnicodeEscapeIdStart(uint32_t* codePoint)
1105 0 : {
1106 : uint32_t length = matchUnicodeEscape(codePoint);
1107 : if (MOZ_LIKELY(length > 0)) {
1108 0 : if (MOZ_LIKELY(unicode::IsIdentifierStart(*codePoint)))
1109 : return length;
1110 :
1111 : sourceUnits.unskipCodeUnits(length);
1112 0 : }
1113 0 :
1114 0 : MOZ_ASSERT(sourceUnits.previousCodeUnit() == '\\');
1115 : return 0;
1116 : }
1117 :
1118 : template<typename CharT, class AnyCharsAccess>
1119 : bool
1120 0 : GeneralTokenStreamChars<CharT, AnyCharsAccess>::matchUnicodeEscapeIdent(uint32_t* codePoint)
1121 0 : {
1122 0 : uint32_t length = matchUnicodeEscape(codePoint);
1123 0 : if (MOZ_LIKELY(length > 0)) {
1124 : if (MOZ_LIKELY(unicode::IsIdentifierPart(*codePoint)))
1125 : return true;
1126 :
1127 0 : sourceUnits.unskipCodeUnits(length);
1128 0 : }
1129 0 :
1130 : MOZ_ASSERT(sourceUnits.previousCodeUnit() == '\\');
1131 : return false;
1132 : }
1133 :
1134 0 : // Helper function which returns true if the first length(q) characters in p are
1135 0 : // the same as the characters in q.
1136 0 : template<typename CharT>
1137 0 : static bool
1138 0 : CharsMatch(const CharT* p, const char* q)
1139 : {
1140 0 : while (*q) {
1141 : if (*p++ != *q++)
1142 : return false;
1143 : }
1144 :
1145 0 : return true;
1146 : }
1147 0 :
1148 0 : template<typename CharT, class AnyCharsAccess>
1149 0 : bool
1150 0 : TokenStreamSpecific<CharT, AnyCharsAccess>::getDirectives(bool isMultiline,
1151 : bool shouldWarnDeprecated)
1152 : {
1153 : // Match directive comments used in debugging, such as "//# sourceURL" and
1154 : // "//# sourceMappingURL". Use of "//@" instead of "//#" is deprecated.
1155 : //
1156 : // To avoid a crashing bug in IE, several JavaScript transpilers wrap single
1157 0 : // line comments containing a source mapping URL inside a multiline
1158 : // comment. To avoid potentially expensive lookahead and backtracking, we
1159 0 : // only check for this case if we encounter a '#' character.
1160 0 :
1161 0 : bool res = getDisplayURL(isMultiline, shouldWarnDeprecated) &&
1162 0 : getSourceMappingURL(isMultiline, shouldWarnDeprecated);
1163 : if (!res)
1164 : badToken();
1165 :
1166 : return res;
1167 : }
1168 :
1169 : template<>
1170 : MOZ_MUST_USE bool
1171 : TokenStreamCharsBase<char16_t>::copyTokenbufTo(JSContext* cx,
1172 : UniquePtr<char16_t[], JS::FreePolicy>* destination)
1173 11274 : {
1174 11256 : size_t length = tokenbuf.length();
1175 :
1176 : *destination = cx->make_pod_array<char16_t>(length + 1);
1177 : if (!*destination)
1178 : return false;
1179 :
1180 : PodCopy(destination->get(), tokenbuf.begin(), length);
1181 : (*destination)[length] = '\0';
1182 : return true;
1183 5492 : }
1184 :
1185 : template<typename CharT, class AnyCharsAccess>
1186 : MOZ_MUST_USE bool
1187 : TokenStreamSpecific<CharT, AnyCharsAccess>::getDirective(bool isMultiline,
1188 : bool shouldWarnDeprecated,
1189 : const char* directive,
1190 : uint8_t directiveLength,
1191 : const char* errorMsgPragma,
1192 : UniquePtr<char16_t[], JS::FreePolicy>* destination)
1193 : {
1194 10984 : MOZ_ASSERT(directiveLength <= 18);
1195 10984 : char16_t peeked[18];
1196 0 :
1197 0 : // If there aren't enough characters left, it can't be the desired
1198 : // directive.
1199 0 : if (!sourceUnits.peekCodeUnits(directiveLength, peeked))
1200 : return true;
1201 :
1202 : // It's also not the desired directive if the characters don't match.
1203 : if (!CharsMatch(peeked, directive))
1204 18 : return true;
1205 :
1206 : if (shouldWarnDeprecated) {
1207 18 : if (!warning(JSMSG_DEPRECATED_PRAGMA, errorMsgPragma))
1208 : return false;
1209 72 : }
1210 18 :
1211 : sourceUnits.skipCodeUnits(directiveLength);
1212 : tokenbuf.clear();
1213 54 :
1214 18 : do {
1215 18 : int32_t unit = peekCodeUnit();
1216 : if (unit == EOF)
1217 : break;
1218 :
1219 : if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
1220 10984 : if (unicode::IsSpaceOrBOM2(unit))
1221 : break;
1222 :
1223 : consumeKnownCodeUnit(unit);
1224 :
1225 : // Debugging directives can occur in both single- and multi-line
1226 : // comments. If we're currently inside a multi-line comment, we
1227 10984 : // also must recognize multi-line comment terminators.
1228 : if (isMultiline && unit == '*' && peekCodeUnit() == '/') {
1229 : ungetCodeUnit('*');
1230 : break;
1231 : }
1232 0 :
1233 : if (!tokenbuf.append(unit))
1234 : return false;
1235 :
1236 10966 : continue;
1237 : }
1238 :
1239 18 : int32_t codePoint;
1240 0 : if (!getCodePoint(&codePoint))
1241 : return false;
1242 :
1243 : if (unicode::IsSpaceOrBOM2(codePoint)) {
1244 0 : ungetNonAsciiNormalizedCodePoint(codePoint);
1245 0 : break;
1246 : }
1247 :
1248 : if (!appendCodePointToTokenbuf(codePoint))
1249 845 : return false;
1250 0 : } while (true);
1251 :
1252 0 : if (tokenbuf.empty()) {
1253 : // The directive's URL was missing, but comments can contain anything,
1254 : // so it isn't an error.
1255 0 : return true;
1256 : }
1257 :
1258 : return copyTokenbufTo(anyCharsAccess().cx, destination);
1259 : }
1260 0 :
1261 0 : template<typename CharT, class AnyCharsAccess>
1262 0 : bool
1263 0 : TokenStreamSpecific<CharT, AnyCharsAccess>::getDisplayURL(bool isMultiline,
1264 : bool shouldWarnDeprecated)
1265 : {
1266 0 : // Match comments of the form "//# sourceURL=<url>" or
1267 0 : // "/\* //# sourceURL=<url> *\/"
1268 : //
1269 : // Note that while these are labeled "sourceURL" in the source text,
1270 18 : // internally we refer to it as a "displayURL" to distinguish what the
1271 : // developer would like to refer to the source as from the source's actual
1272 : // URL.
1273 :
1274 : static const char sourceURLDirective[] = " sourceURL=";
1275 : constexpr uint8_t sourceURLDirectiveLength = ArrayLength(sourceURLDirective) - 1;
1276 36 : return getDirective(isMultiline, shouldWarnDeprecated,
1277 : sourceURLDirective, sourceURLDirectiveLength,
1278 : "sourceURL", &anyCharsAccess().displayURL_);
1279 : }
1280 :
1281 0 : template<typename CharT, class AnyCharsAccess>
1282 : bool
1283 : TokenStreamSpecific<CharT, AnyCharsAccess>::getSourceMappingURL(bool isMultiline,
1284 : bool shouldWarnDeprecated)
1285 : {
1286 : // Match comments of the form "//# sourceMappingURL=<url>" or
1287 : // "/\* //# sourceMappingURL=<url> *\/"
1288 :
1289 : static const char sourceMappingURLDirective[] = " sourceMappingURL=";
1290 : constexpr uint8_t sourceMappingURLDirectiveLength = ArrayLength(sourceMappingURLDirective) - 1;
1291 : return getDirective(isMultiline, shouldWarnDeprecated,
1292 : sourceMappingURLDirective, sourceMappingURLDirectiveLength,
1293 5492 : "sourceMappingURL", &anyCharsAccess().sourceMapURL_);
1294 0 : }
1295 :
1296 16476 : template<typename CharT, class AnyCharsAccess>
1297 : MOZ_ALWAYS_INLINE Token*
1298 : GeneralTokenStreamChars<CharT, AnyCharsAccess>::newTokenInternal(TokenKind kind, TokenStart start,
1299 : TokenKind* out)
1300 : {
1301 5492 : MOZ_ASSERT(kind < TokenKind::Limit);
1302 : MOZ_ASSERT(kind != TokenKind::Eol,
1303 : "TokenKind::Eol should never be used in an actual Token, only "
1304 : "returned by peekTokenSameLine()");
1305 :
1306 : TokenStreamAnyChars& anyChars = anyCharsAccess();
1307 : anyChars.flags.isDirtyLine = true;
1308 5492 :
1309 0 : Token* token = anyChars.allocateToken();
1310 :
1311 16476 : *out = token->type = kind;
1312 : token->pos = TokenPos(start.offset(), this->sourceUnits.offset());
1313 : MOZ_ASSERT(token->pos.begin <= token->pos.end);
1314 :
1315 : // NOTE: |token->modifier| and |token->modifierException| are set in
1316 883823 : // |newToken()| so that optimized, non-debug code won't do any work
1317 : // to pass a modifier-argument that will never be used.
1318 :
1319 883823 : return token;
1320 883823 : }
1321 :
1322 : template<typename CharT, class AnyCharsAccess>
1323 : MOZ_COLD bool
1324 883823 : GeneralTokenStreamChars<CharT, AnyCharsAccess>::badToken()
1325 1 : {
1326 : // We didn't get a token, so don't set |flags.isDirtyLine|.
1327 883823 : anyCharsAccess().flags.hadError = true;
1328 :
1329 883823 : // Poisoning sourceUnits on error establishes an invariant: once an
1330 2651461 : // erroneous token has been seen, sourceUnits will not be consulted again.
1331 883819 : // This is true because the parser will deal with the illegal token by
1332 : // aborting parsing immediately.
1333 : sourceUnits.poisonInDebug();
1334 :
1335 : return false;
1336 : };
1337 883819 :
1338 : template<>
1339 : MOZ_MUST_USE bool
1340 : TokenStreamCharsBase<char16_t>::appendCodePointToTokenbuf(uint32_t codePoint)
1341 : {
1342 0 : char16_t units[2];
1343 : unsigned numUnits = 0;
1344 : unicode::UTF16Encode(codePoint, units, &numUnits);
1345 0 :
1346 : MOZ_ASSERT(numUnits == 1 || numUnits == 2,
1347 : "UTF-16 code points are only encoded in one or two units");
1348 :
1349 : if (!tokenbuf.append(units[0]))
1350 : return false;
1351 0 :
1352 : if (numUnits == 1)
1353 0 : return true;
1354 :
1355 : return tokenbuf.append(units[1]);
1356 : }
1357 :
1358 0 : template<typename CharT, class AnyCharsAccess>
1359 : bool
1360 : TokenStreamSpecific<CharT, AnyCharsAccess>::putIdentInTokenbuf(const CharT* identStart)
1361 0 : {
1362 0 : const CharT* const originalAddress = sourceUnits.addressOfNextCodeUnit();
1363 : sourceUnits.setAddressOfNextCodeUnit(identStart);
1364 0 :
1365 : auto restoreNextRawCharAddress =
1366 : MakeScopeExit([this, originalAddress]() {
1367 0 : this->sourceUnits.setAddressOfNextCodeUnit(originalAddress);
1368 : });
1369 :
1370 0 : tokenbuf.clear();
1371 : do {
1372 : int32_t unit = getCodeUnit();
1373 0 : if (unit == EOF)
1374 : break;
1375 :
1376 : uint32_t codePoint;
1377 : if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
1378 0 : if (MOZ_LIKELY(unicode::IsIdentifierPart(char16_t(unit)))) {
1379 : if (!tokenbuf.append(unit))
1380 : return false;
1381 0 :
1382 : continue;
1383 : }
1384 :
1385 0 : if (unit != '\\' || !matchUnicodeEscapeIdent(&codePoint))
1386 0 : break;
1387 0 : } else {
1388 : int32_t cp;
1389 0 : if (!getNonAsciiCodePoint(unit, &cp))
1390 0 : return false;
1391 :
1392 0 : codePoint = AssertedCast<uint32_t>(cp);
1393 : }
1394 :
1395 : if (!unicode::IsIdentifierPart(codePoint)) {
1396 0 : if (MOZ_UNLIKELY(codePoint == unicode::LINE_SEPARATOR ||
1397 : codePoint == unicode::PARA_SEPARATOR))
1398 0 : {
1399 0 : // |restoreNextRawCharAddress| undoes all gets, but it doesn't
1400 : // revert line/column updates. The ASCII code path never
1401 : // updates line/column state, so only Unicode separators gotten
1402 0 : // by |getNonAsciiCodePoint| require this.
1403 0 : anyCharsAccess().undoInternalUpdateLineInfoForEOL();
1404 0 : }
1405 : break;
1406 0 : }
1407 0 :
1408 0 : if (!appendCodePointToTokenbuf(codePoint))
1409 : return false;
1410 : } while (true);
1411 0 :
1412 0 : return true;
1413 0 : }
1414 0 :
1415 : template<typename CharT, class AnyCharsAccess>
1416 : MOZ_MUST_USE bool
1417 0 : TokenStreamSpecific<CharT, AnyCharsAccess>::identifierName(TokenStart start,
1418 0 : const CharT* identStart,
1419 : IdentifierEscapes escaping,
1420 0 : Modifier modifier, TokenKind* out)
1421 : {
1422 : // Run the bad-token code for every path out of this function except the
1423 : // two success-cases.
1424 : auto noteBadToken = MakeScopeExit([this]() {
1425 0 : this->badToken();
1426 : });
1427 :
1428 : // We've already consumed an initial code point in the identifer, to *know*
1429 0 : // that this is an identifier. So no need to worry about not consuming any
1430 : // code points in the loop below.
1431 : int32_t unit;
1432 : while (true) {
1433 : unit = getCodeUnit();
1434 0 : if (unit == EOF)
1435 : break;
1436 :
1437 : if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
1438 : if (MOZ_UNLIKELY(!unicode::IsIdentifierPart(static_cast<char16_t>(unit)))) {
1439 : // Handle a Unicode escape -- otherwise it's not part of the
1440 : // identifier.
1441 0 : uint32_t codePoint;
1442 0 : if (unit != '\\' || !matchUnicodeEscapeIdent(&codePoint)) {
1443 1035626 : ungetCodeUnit(unit);
1444 : break;
1445 : }
1446 :
1447 5249898 : escaping = IdentifierEscapes::SawUnicodeEscape;
1448 2624949 : }
1449 : } else {
1450 : int32_t codePoint;
1451 : if (!getNonAsciiCodePoint(unit, &codePoint))
1452 5249848 : return false;
1453 :
1454 2624949 : if (!unicode::IsIdentifierPart(uint32_t(codePoint))) {
1455 0 : ungetNonAsciiNormalizedCodePoint(codePoint);
1456 : break;
1457 : }
1458 0 : }
1459 : }
1460 :
1461 2624949 : const CharT* chars;
1462 : size_t length;
1463 345148 : if (escaping == IdentifierEscapes::SawUnicodeEscape) {
1464 : // Identifiers containing Unicode escapes have to be converted into
1465 0 : // tokenbuf before atomizing.
1466 : if (!putIdentInTokenbuf(identStart))
1467 : return false;
1468 345198 :
1469 : chars = tokenbuf.begin();
1470 : length = tokenbuf.length();
1471 : } else {
1472 345208 : // Escape-free identifiers can be created directly from sourceUnits.
1473 : chars = identStart;
1474 : length = sourceUnits.addressOfNextCodeUnit() - identStart;
1475 0 :
1476 : // Represent reserved words lacking escapes as reserved word tokens.
1477 : if (const ReservedWordInfo* rw = FindReservedWord(chars, length)) {
1478 0 : noteBadToken.release();
1479 0 : newSimpleToken(rw->tokentype, start, modifier, out);
1480 : return true;
1481 : }
1482 345208 : }
1483 345208 :
1484 : JSAtom* atom = atomizeChars(anyCharsAccess().cx, chars, length);
1485 : if (!atom)
1486 345208 : return false;
1487 102560 :
1488 205120 : noteBadToken.release();
1489 102560 : newNameToken(atom->asPropertyName(), start, modifier, out);
1490 : return true;
1491 : }
1492 :
1493 727981 : enum FirstCharKind {
1494 242689 : // A char16_t has the 'OneChar' kind if it, by itself, constitutes a valid
1495 : // token that cannot also be a prefix of a longer token. E.g. ';' has the
1496 : // OneChar kind, but '+' does not, because '++' and '+=' are valid longer tokens
1497 242689 : // that begin with '+'.
1498 242689 : //
1499 242692 : // The few token kinds satisfying these properties cover roughly 35--45%
1500 : // of the tokens seen in practice.
1501 : //
1502 : // We represent the 'OneChar' kind with any positive value less than
1503 : // TokenKind::Limit. This representation lets us associate
1504 : // each one-char token char16_t with a TokenKind and thus avoid
1505 : // a subsequent char16_t-to-TokenKind conversion.
1506 : OneChar_Min = 0,
1507 : OneChar_Max = size_t(TokenKind::Limit) - 1,
1508 :
1509 : Space = size_t(TokenKind::Limit),
1510 : Ident,
1511 : Dec,
1512 : String,
1513 : EOL,
1514 : ZeroDigit,
1515 : Other,
1516 :
1517 : LastCharKind = Other
1518 : };
1519 :
1520 : // OneChar: 40, 41, 44, 58, 59, 63, 91, 93, 123, 125, 126:
1521 : // '(', ')', ',', ':', ';', '?', '[', ']', '{', '}', '~'
1522 : // Ident: 36, 65..90, 95, 97..122: '$', 'A'..'Z', '_', 'a'..'z'
1523 : // Dot: 46: '.'
1524 : // Equals: 61: '='
1525 : // String: 34, 39, 96: '"', '\'', '`'
1526 : // Dec: 49..57: '1'..'9'
1527 : // Plus: 43: '+'
1528 : // ZeroDigit: 48: '0'
1529 : // Space: 9, 11, 12, 32: '\t', '\v', '\f', ' '
1530 : // EOL: 10, 13: '\n', '\r'
1531 : //
1532 : #define T_COMMA size_t(TokenKind::Comma)
1533 : #define T_COLON size_t(TokenKind::Colon)
1534 : #define T_BITNOT size_t(TokenKind::BitNot)
1535 : #define T_LP size_t(TokenKind::Lp)
1536 : #define T_RP size_t(TokenKind::Rp)
1537 : #define T_SEMI size_t(TokenKind::Semi)
1538 : #define T_HOOK size_t(TokenKind::Hook)
1539 : #define T_LB size_t(TokenKind::Lb)
1540 : #define T_RB size_t(TokenKind::Rb)
1541 : #define T_LC size_t(TokenKind::Lc)
1542 : #define T_RC size_t(TokenKind::Rc)
1543 : #define _______ Other
1544 : static const uint8_t firstCharKinds[] = {
1545 : /* 0 1 2 3 4 5 6 7 8 9 */
1546 : /* 0+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, Space,
1547 : /* 10+ */ EOL, Space, Space, EOL, _______, _______, _______, _______, _______, _______,
1548 : /* 20+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
1549 : /* 30+ */ _______, _______, Space, _______, String, _______, Ident, _______, _______, String,
1550 : /* 40+ */ T_LP, T_RP, _______, _______, T_COMMA, _______, _______, _______,ZeroDigit, Dec,
1551 : /* 50+ */ Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, T_COLON, T_SEMI,
1552 : /* 60+ */ _______, _______, _______, T_HOOK, _______, Ident, Ident, Ident, Ident, Ident,
1553 : /* 70+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
1554 : /* 80+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
1555 : /* 90+ */ Ident, T_LB, _______, T_RB, _______, Ident, String, Ident, Ident, Ident,
1556 : /* 100+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
1557 : /* 110+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
1558 : /* 120+ */ Ident, Ident, Ident, T_LC, _______, T_RC,T_BITNOT, _______
1559 : };
1560 : #undef T_COMMA
1561 : #undef T_COLON
1562 : #undef T_BITNOT
1563 : #undef T_LP
1564 : #undef T_RP
1565 : #undef T_SEMI
1566 : #undef T_HOOK
1567 : #undef T_LB
1568 : #undef T_RB
1569 : #undef T_LC
1570 : #undef T_RC
1571 : #undef _______
1572 :
1573 : static_assert(LastCharKind < (1 << (sizeof(firstCharKinds[0]) * 8)),
1574 : "Elements of firstCharKinds[] are too small");
1575 :
1576 : template<typename CharT, class AnyCharsAccess>
1577 : void
1578 : GeneralTokenStreamChars<CharT, AnyCharsAccess>::consumeRestOfSingleLineComment()
1579 : {
1580 : int32_t c;
1581 : do {
1582 : c = getCodeUnit();
1583 : } while (c != EOF && !SourceUnits::isRawEOLChar(c));
1584 :
1585 : ungetCodeUnit(c);
1586 : }
1587 16094 :
1588 : template<typename CharT, class AnyCharsAccess>
1589 : MOZ_MUST_USE bool
1590 0 : TokenStreamSpecific<CharT, AnyCharsAccess>::decimalNumber(int32_t unit, TokenStart start,
1591 851635 : const CharT* numStart,
1592 1703270 : Modifier modifier, TokenKind* out)
1593 : {
1594 16094 : // Run the bad-token code for every path out of this function except the
1595 16094 : // one success-case.
1596 : auto noteBadToken = MakeScopeExit([this]() {
1597 : this->badToken();
1598 : });
1599 8277 :
1600 : // Consume integral component digits.
1601 : while (IsAsciiDigit(unit))
1602 : unit = getCodeUnit();
1603 :
1604 : // Numbers contain no escapes, so we can read directly from |sourceUnits|.
1605 0 : double dval;
1606 0 : DecimalPoint decimalPoint = NoDecimal;
1607 24831 : if (unit != '.' && unit != 'e' && unit != 'E') {
1608 : // NOTE: |unit| may be EOF here.
1609 : ungetCodeUnit(unit);
1610 16046 :
1611 7769 : // Most numbers are pure decimal integers without fractional component
1612 : // or exponential notation. Handle that with optimized code.
1613 : if (!GetDecimalInteger(anyCharsAccess().cx, numStart, sourceUnits.addressOfNextCodeUnit(),
1614 : &dval))
1615 0 : {
1616 8277 : return false;
1617 1 : }
1618 : } else {
1619 : // Consume any decimal dot and fractional component.
1620 : if (unit == '.') {
1621 1 : decimalPoint = HasDecimal;
1622 : do {
1623 : unit = getCodeUnit();
1624 : } while (IsAsciiDigit(unit));
1625 : }
1626 :
1627 : // Consume any exponential notation.
1628 55 : if (unit == 'e' || unit == 'E') {
1629 : unit = getCodeUnit();
1630 130 : if (unit == '+' || unit == '-')
1631 0 : unit = getCodeUnit();
1632 :
1633 : // Exponential notation must contain at least one digit.
1634 : if (!IsAsciiDigit(unit)) {
1635 : ungetCodeUnit(unit);
1636 55 : error(JSMSG_MISSING_EXPONENT);
1637 0 : return false;
1638 0 : }
1639 0 :
1640 : // Consume exponential digits.
1641 : do {
1642 0 : unit = getCodeUnit();
1643 0 : } while (IsAsciiDigit(unit));
1644 0 : }
1645 0 :
1646 : ungetCodeUnit(unit);
1647 :
1648 : const CharT* dummy;
1649 0 : if (!js_strtod(anyCharsAccess().cx, numStart, sourceUnits.addressOfNextCodeUnit(), &dummy,
1650 0 : &dval))
1651 : {
1652 : return false;
1653 : }
1654 55 : }
1655 :
1656 : // Number followed by IdentifierStart is an error. (This is the only place
1657 0 : // in ECMAScript where token boundary is inadequate to properly separate
1658 : // two tokens, necessitating this unaesthetic lookahead.)
1659 : if (unit != EOF) {
1660 : if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
1661 : if (unicode::IsIdentifierStart(char16_t(unit))) {
1662 : error(JSMSG_IDSTART_AFTER_NUMBER);
1663 : return false;
1664 : }
1665 : } else {
1666 : int32_t codePoint;
1667 0 : if (!getCodePoint(&codePoint))
1668 0 : return false;
1669 0 :
1670 0 : ungetNonAsciiNormalizedCodePoint(codePoint);
1671 :
1672 : if (unicode::IsIdentifierStart(uint32_t(codePoint))) {
1673 0 : error(JSMSG_IDSTART_AFTER_NUMBER);
1674 : return false;
1675 : }
1676 16550 : }
1677 : }
1678 :
1679 8275 : noteBadToken.release();
1680 : newNumberToken(dval, decimalPoint, start, modifier, out);
1681 0 : return true;
1682 : }
1683 0 :
1684 : template<typename CharT, class AnyCharsAccess>
1685 0 : MOZ_MUST_USE bool
1686 0 : TokenStreamSpecific<CharT, AnyCharsAccess>::regexpLiteral(TokenStart start, TokenKind* out)
1687 : {
1688 : MOZ_ASSERT(sourceUnits.previousCodeUnit() == '/');
1689 : tokenbuf.clear();
1690 :
1691 8275 : auto ProcessNonAsciiCodePoint = [this](CharT lead) {
1692 : int32_t codePoint;
1693 : if (!this->getNonAsciiCodePoint(lead, &codePoint))
1694 : return false;
1695 0 :
1696 8277 : if (codePoint == '\n') {
1697 0 : this->ungetLineTerminator();
1698 : this->reportError(JSMSG_UNTERMINATED_REGEXP);
1699 : return false;
1700 : }
1701 :
1702 0 : return this->appendCodePointToTokenbuf(codePoint);
1703 : };
1704 0 :
1705 0 : auto ReportUnterminatedRegExp = [this](CharT unit) {
1706 : this->ungetCodeUnit(unit);
1707 0 : this->error(JSMSG_UNTERMINATED_REGEXP);
1708 : };
1709 0 :
1710 : bool inCharClass = false;
1711 : do {
1712 0 : int32_t unit = getCodeUnit();
1713 0 : if (unit == EOF) {
1714 0 : ReportUnterminatedRegExp(unit);
1715 0 : return badToken();
1716 : }
1717 :
1718 0 : if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
1719 296 : if (unit == '\\') {
1720 : if (!tokenbuf.append(unit))
1721 : return badToken();
1722 0 :
1723 0 : unit = getCodeUnit();
1724 296 : if (unit == EOF) {
1725 : ReportUnterminatedRegExp(unit);
1726 296 : return badToken();
1727 : }
1728 0 :
1729 4508 : // Fallthrough only handles ASCII code points, so
1730 0 : // deal with non-ASCII and skip everything else.
1731 0 : if (MOZ_UNLIKELY(!isAsciiCodePoint(unit))) {
1732 : if (!ProcessNonAsciiCodePoint(unit))
1733 : return badToken();
1734 0 :
1735 4508 : continue;
1736 378 : }
1737 0 : } else if (unit == '[') {
1738 : inCharClass = true;
1739 756 : } else if (unit == ']') {
1740 0 : inCharClass = false;
1741 0 : } else if (unit == '/' && !inCharClass) {
1742 0 : // For IE compat, allow unescaped / in char classes.
1743 : break;
1744 : }
1745 :
1746 : if (unit == '\r' || unit == '\n') {
1747 0 : ReportUnterminatedRegExp(unit);
1748 0 : return badToken();
1749 0 : }
1750 :
1751 0 : if (!tokenbuf.append(unit))
1752 : return badToken();
1753 0 : } else {
1754 : if (!ProcessNonAsciiCodePoint(unit))
1755 0 : return badToken();
1756 : }
1757 3833 : } while (true);
1758 :
1759 : int32_t unit;
1760 : RegExpFlag reflags = NoFlags;
1761 : while (true) {
1762 0 : RegExpFlag flag;
1763 0 : unit = getCodeUnit();
1764 0 : if (unit == 'g')
1765 : flag = GlobalFlag;
1766 : else if (unit == 'i')
1767 4212 : flag = IgnoreCaseFlag;
1768 0 : else if (unit == 'm')
1769 : flag = MultilineFlag;
1770 0 : else if (unit == 'y')
1771 0 : flag = StickyFlag;
1772 : else if (unit == 'u')
1773 : flag = UnicodeFlag;
1774 : else if (IsAsciiAlpha(unit))
1775 : flag = NoFlags;
1776 296 : else
1777 158 : break;
1778 :
1779 908 : if ((reflags & flag) || flag == NoFlags) {
1780 454 : ungetCodeUnit(unit);
1781 : char buf[2] = { char(unit), '\0' };
1782 0 : error(JSMSG_BAD_REGEXP_FLAG, buf);
1783 : return badToken();
1784 301 : }
1785 :
1786 298 : reflags = RegExpFlag(reflags | flag);
1787 : }
1788 0 : ungetCodeUnit(unit);
1789 :
1790 0 : newRegExpToken(reflags, start, out);
1791 : return true;
1792 : }
1793 :
1794 : template<typename CharT, class AnyCharsAccess>
1795 158 : MOZ_MUST_USE bool
1796 0 : TokenStreamSpecific<CharT, AnyCharsAccess>::getTokenInternal(TokenKind* const ttp,
1797 0 : const Modifier modifier)
1798 0 : {
1799 0 : // Assume we'll fail: success cases will overwrite this.
1800 : #ifdef DEBUG
1801 : *ttp = TokenKind::Limit;
1802 0 : #endif
1803 : MOZ_MAKE_MEM_UNDEFINED(ttp, sizeof(*ttp));
1804 0 :
1805 : // Check if in the middle of a template string. Have to get this out of
1806 0 : // the way first.
1807 0 : if (MOZ_UNLIKELY(modifier == TemplateTail))
1808 : return getStringOrTemplateToken('`', modifier, ttp);
1809 :
1810 : // This loop runs more than once only when whitespace or comments are
1811 : // encountered.
1812 0 : do {
1813 : int32_t unit = getCodeUnit();
1814 : if (MOZ_UNLIKELY(unit == EOF)) {
1815 : MOZ_ASSERT(sourceUnits.atEnd());
1816 : anyCharsAccess().flags.isEOF = true;
1817 0 : TokenStart start(sourceUnits, 0);
1818 : newSimpleToken(TokenKind::Eof, start, modifier, ttp);
1819 : return true;
1820 : }
1821 :
1822 : if (MOZ_UNLIKELY(!isAsciiCodePoint(unit))) {
1823 883981 : // Non-ASCII code points can only be identifiers or whitespace.
1824 1086 : // It would be nice to compute these *after* discarding whitespace,
1825 : // but IN A WORLD where |unicode::IsSpaceOrBOM2| requires consuming
1826 : // a variable number of code points, it's easier to assume it's an
1827 : // identifier and maybe do a little wasted work, than to unget and
1828 : // compute and reget if whitespace.
1829 0 : TokenStart start(sourceUnits, -1);
1830 0 : const CharT* identStart = sourceUnits.addressOfNextCodeUnit() - 1;
1831 0 :
1832 2592 : int32_t codePoint;
1833 : if (!getNonAsciiCodePoint(unit, &codePoint))
1834 : return badToken();
1835 :
1836 4480494 : if (unicode::IsSpaceOrBOM2(codePoint)) {
1837 : if (codePoint == unicode::LINE_SEPARATOR || codePoint == unicode::PARA_SEPARATOR) {
1838 : if (!updateLineInfoForEOL())
1839 : return badToken();
1840 :
1841 2240247 : anyCharsAccess().updateFlagsForEOL();
1842 0 : }
1843 0 :
1844 : continue;
1845 : }
1846 0 :
1847 0 : static_assert(isAsciiCodePoint('$'),
1848 : "IdentifierStart contains '$', but as "
1849 0 : "!IsUnicodeIDStart('$'), ensure that '$' is never "
1850 : "handled here");
1851 : static_assert(isAsciiCodePoint('_'),
1852 0 : "IdentifierStart contains '_', but as "
1853 : "!IsUnicodeIDStart('_'), ensure that '_' is never "
1854 : "handled here");
1855 :
1856 : if (unicode::IsUnicodeIDStart(uint32_t(codePoint)))
1857 0 : return identifierName(start, identStart, IdentifierEscapes::None, modifier, ttp);
1858 0 :
1859 : ungetCodePointIgnoreEOL(codePoint);
1860 : error(JSMSG_ILLEGAL_CHARACTER);
1861 : return badToken();
1862 : } // !isAsciiCodePoint(unit)
1863 :
1864 : // Get the token kind, based on the first char. The ordering of c1kind
1865 : // comparison is based on the frequency of tokens in real code:
1866 : // Parsemark (which represents typical JS code on the web) and the
1867 : // Unreal demo (which represents asm.js code).
1868 0 : //
1869 0 : // Parsemark Unreal
1870 : // OneChar 32.9% 39.7%
1871 0 : // Space 25.0% 0.6%
1872 0 : // Ident 19.2% 36.4%
1873 : // Dec 7.2% 5.1%
1874 : // String 7.9% 0.0%
1875 0 : // EOL 1.7% 0.0%
1876 0 : // ZeroDigit 0.4% 4.9%
1877 : // Other 5.7% 13.3%
1878 0 : //
1879 0 : // The ordering is based mostly only Parsemark frequencies, with Unreal
1880 0 : // frequencies used to break close categories (e.g. |Dec| and
1881 : // |String|). |Other| is biggish, but no other token kind is common
1882 : // enough for it to be worth adding extra values to FirstCharKind.
1883 : FirstCharKind c1kind = FirstCharKind(firstCharKinds[unit]);
1884 :
1885 : // Look for an unambiguous single-char token.
1886 : //
1887 : if (c1kind <= OneChar_Max) {
1888 : TokenStart start(sourceUnits, -1);
1889 : newSimpleToken(TokenKind(c1kind), start, modifier, ttp);
1890 : return true;
1891 : }
1892 :
1893 : // Skip over non-EOL whitespace chars.
1894 : //
1895 : if (c1kind == Space)
1896 : continue;
1897 :
1898 : // Look for an identifier.
1899 : //
1900 : if (c1kind == Ident) {
1901 : TokenStart start(sourceUnits, -1);
1902 2240247 : return identifierName(start, sourceUnits.addressOfNextCodeUnit() - 1,
1903 : IdentifierEscapes::None, modifier, ttp);
1904 : }
1905 :
1906 2240247 : // Look for a decimal number.
1907 710762 : //
1908 710765 : if (c1kind == Dec) {
1909 : TokenStart start(sourceUnits, -1);
1910 : const CharT* numStart = sourceUnits.addressOfNextCodeUnit() - 1;
1911 : return decimalNumber(unit, start, numStart, modifier, ttp);
1912 : }
1913 :
1914 0 : // Look for a string or a template string.
1915 : //
1916 : if (c1kind == String)
1917 : return getStringOrTemplateToken(static_cast<char>(unit), modifier, ttp);
1918 :
1919 0 : // Skip over EOL chars, updating line state along the way.
1920 0 : //
1921 0 : if (c1kind == EOL) {
1922 345224 : // If it's a \r\n sequence, consume it as a single EOL.
1923 : if (unit == '\r' && !sourceUnits.atEnd())
1924 : sourceUnits.matchCodeUnit('\n');
1925 :
1926 : if (!updateLineInfoForEOL())
1927 385122 : return badToken();
1928 9612 :
1929 4806 : anyCharsAccess().updateFlagsForEOL();
1930 0 : continue;
1931 : }
1932 :
1933 : // From a '0', look for a hexadecimal, binary, octal, or "noctal" (a
1934 : // number starting with '0' that contains '8' or '9' and is treated as
1935 380316 : // decimal) number.
1936 0 : //
1937 : if (c1kind == ZeroDigit) {
1938 : TokenStart start(sourceUnits, -1);
1939 :
1940 349322 : int radix;
1941 : const CharT* numStart;
1942 182640 : unit = getCodeUnit();
1943 0 : if (unit == 'x' || unit == 'X') {
1944 : radix = 16;
1945 0 : unit = getCodeUnit();
1946 0 : if (!JS7_ISHEX(unit)) {
1947 : // NOTE: |unit| may be EOF here.
1948 0 : ungetCodeUnit(unit);
1949 : error(JSMSG_MISSING_HEXDIGITS);
1950 : return badToken();
1951 : }
1952 :
1953 : // one past the '0x'
1954 : numStart = sourceUnits.addressOfNextCodeUnit() - 1;
1955 :
1956 0 : while (JS7_ISHEX(unit))
1957 7368 : unit = getCodeUnit();
1958 : } else if (unit == 'b' || unit == 'B') {
1959 : radix = 2;
1960 : unit = getCodeUnit();
1961 7368 : if (unit != '0' && unit != '1') {
1962 3684 : // NOTE: |unit| may be EOF here.
1963 0 : ungetCodeUnit(unit);
1964 0 : error(JSMSG_MISSING_BINARY_DIGITS);
1965 0 : return badToken();
1966 0 : }
1967 0 :
1968 0 : // one past the '0b'
1969 : numStart = sourceUnits.addressOfNextCodeUnit() - 1;
1970 :
1971 : while (unit == '0' || unit == '1')
1972 211 : unit = getCodeUnit();
1973 : } else if (unit == 'o' || unit == 'O') {
1974 1898 : radix = 8;
1975 738 : unit = getCodeUnit();
1976 0 : if (!JS7_ISOCT(unit)) {
1977 12 : // NOTE: |unit| may be EOF here.
1978 24 : ungetCodeUnit(unit);
1979 12 : error(JSMSG_MISSING_OCTAL_DIGITS);
1980 0 : return badToken();
1981 0 : }
1982 0 :
1983 : // one past the '0o'
1984 : numStart = sourceUnits.addressOfNextCodeUnit() - 1;
1985 :
1986 12 : while (JS7_ISOCT(unit))
1987 : unit = getCodeUnit();
1988 36 : } else if (IsAsciiDigit(unit)) {
1989 24 : radix = 8;
1990 3461 : // one past the '0'
1991 0 : numStart = sourceUnits.addressOfNextCodeUnit() - 1;
1992 0 :
1993 3 : do {
1994 0 : // Octal integer literals are not permitted in strict mode
1995 0 : // code.
1996 0 : if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL))
1997 : return badToken();
1998 :
1999 : // Outside strict mode, we permit 08 and 09 as decimal
2000 0 : // numbers, which makes our behaviour a superset of the
2001 : // ECMA numeric grammar. We might not always be so
2002 0 : // permissive, so we warn about it.
2003 0 : if (unit >= '8') {
2004 0 : if (!warning(JSMSG_BAD_OCTAL, unit == '8' ? "08" : "09"))
2005 0 : return badToken();
2006 :
2007 0 : // Use the decimal scanner for the rest of the number.
2008 : return decimalNumber(unit, start, numStart, modifier, ttp);
2009 0 : }
2010 :
2011 : unit = getCodeUnit();
2012 0 : } while (IsAsciiDigit(unit));
2013 0 : } else {
2014 : // '0' not followed by [XxBbOo0-9]; scan as a decimal number.
2015 : numStart = sourceUnits.addressOfNextCodeUnit() - 1;
2016 :
2017 : // NOTE: |unit| may be EOF here. (This is permitted by case #3
2018 : // in TokenStream.h docs for this function.)
2019 0 : return decimalNumber(unit, start, numStart, modifier, ttp);
2020 0 : }
2021 0 :
2022 : // Check for an identifier-start code point immediately after the
2023 : // number. This must be an error, and somewhat surprisingly, if
2024 0 : // a check doesn't happen here, it never will.
2025 : if (MOZ_UNLIKELY(unit == EOF)) {
2026 : // Technically this isn't necessary -- ungetting EOF does
2027 0 : // nothing -- but it's conceptually nicer if we consider all
2028 0 : // gets requiring an unget to revert them.
2029 : ungetCodeUnit(unit);
2030 : } else if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
2031 3458 : ungetCodeUnit(unit);
2032 :
2033 3458 : if (unicode::IsIdentifierStart(char16_t(unit))) {
2034 : error(JSMSG_IDSTART_AFTER_NUMBER);
2035 226 : return badToken();
2036 : }
2037 0 : } else {
2038 0 : int32_t codePoint;
2039 0 : if (!getNonAsciiCodePoint(unit, &codePoint))
2040 0 : return badToken();
2041 :
2042 : ungetCodePointIgnoreEOL(codePoint);
2043 226 : if (codePoint == unicode::LINE_SEPARATOR || codePoint == unicode::PARA_SEPARATOR)
2044 : anyCharsAccess().undoInternalUpdateLineInfoForEOL();
2045 :
2046 452 : if (unicode::IsIdentifierStart(uint32_t(codePoint))) {
2047 : error(JSMSG_IDSTART_AFTER_NUMBER);
2048 : return badToken();
2049 226 : }
2050 : }
2051 0 :
2052 : double dval;
2053 0 : const char16_t* dummy;
2054 : if (!GetPrefixInteger(anyCharsAccess().cx, numStart,
2055 : sourceUnits.addressOfNextCodeUnit(), radix, &dummy, &dval))
2056 0 : {
2057 0 : return badToken();
2058 : }
2059 :
2060 : newNumberToken(dval, NoDecimal, start, modifier, ttp);
2061 : return true;
2062 0 : }
2063 :
2064 : MOZ_ASSERT(c1kind == Other);
2065 :
2066 : // This handles everything else. Simple tokens distinguished solely by
2067 : // TokenKind should set |simpleKind| and break, to share simple-token
2068 0 : // creation code for all such tokens. All other tokens must be handled
2069 : // by returning (or by continuing from the loop enclosing this).
2070 : //
2071 0 : TokenStart start(sourceUnits, -1);
2072 : TokenKind simpleKind;
2073 : #ifdef DEBUG
2074 226 : simpleKind = TokenKind::Limit; // sentinel value for code after switch
2075 0 : #endif
2076 : switch (static_cast<CharT>(unit)) {
2077 : case '.':
2078 162998 : unit = getCodeUnit();
2079 : if (IsAsciiDigit(unit)) {
2080 : return decimalNumber('.', start, sourceUnits.addressOfNextCodeUnit() - 2, modifier,
2081 : ttp);
2082 : }
2083 :
2084 : if (unit == '.') {
2085 0 : if (matchCodeUnit('.')) {
2086 : simpleKind = TokenKind::TripleDot;
2087 : break;
2088 1 : }
2089 : }
2090 0 :
2091 : // NOTE: |unit| may be EOF here. A stray '.' at EOF would be an
2092 1 : // error, but subsequent code will handle it.
2093 168712 : ungetCodeUnit(unit);
2094 0 :
2095 1 : simpleKind = TokenKind::Dot;
2096 : break;
2097 :
2098 0 : case '=':
2099 428 : if (matchCodeUnit('='))
2100 : simpleKind = matchCodeUnit('=') ? TokenKind::StrictEq : TokenKind::Eq;
2101 : else if (matchCodeUnit('>'))
2102 : simpleKind = TokenKind::Arrow;
2103 : else
2104 83916 : simpleKind = TokenKind::Assign;
2105 : break;
2106 0 :
2107 0 : case '+':
2108 : if (matchCodeUnit('+'))
2109 : simpleKind = TokenKind::Inc;
2110 35404 : else
2111 0 : simpleKind = matchCodeUnit('=') ? TokenKind::AddAssign : TokenKind::Add;
2112 1 : break;
2113 :
2114 : case '\\': {
2115 27356 : uint32_t codePoint;
2116 : if (uint32_t escapeLength = matchUnicodeEscapeIdStart(&codePoint)) {
2117 : return identifierName(start,
2118 : sourceUnits.addressOfNextCodeUnit() - escapeLength - 1,
2119 0 : IdentifierEscapes::SawUnicodeEscape, modifier, ttp);
2120 : }
2121 :
2122 4924 : // We could point "into" a mistyped escape, e.g. for "\u{41H}" we
2123 : // could point at the 'H'. But we don't do that now, so the
2124 : // character after the '\' isn't necessarily bad, so just point at
2125 : // the start of the actually-invalid escape.
2126 : ungetCodeUnit('\\');
2127 0 : error(JSMSG_BAD_ESCAPE);
2128 0 : return badToken();
2129 0 : }
2130 0 :
2131 : case '|':
2132 : if (matchCodeUnit('|'))
2133 : simpleKind = TokenKind::Or;
2134 : #ifdef ENABLE_PIPELINE_OPERATOR
2135 : else if (matchCodeUnit('>'))
2136 : simpleKind = TokenKind::Pipeline;
2137 0 : #endif
2138 0 : else
2139 0 : simpleKind = matchCodeUnit('=') ? TokenKind::BitOrAssign : TokenKind::BitOr;
2140 : break;
2141 :
2142 : case '^':
2143 0 : simpleKind = matchCodeUnit('=') ? TokenKind::BitXorAssign : TokenKind::BitXor;
2144 : break;
2145 :
2146 : case '&':
2147 : if (matchCodeUnit('&'))
2148 : simpleKind = TokenKind::And;
2149 : else
2150 291 : simpleKind = matchCodeUnit('=') ? TokenKind::BitAndAssign : TokenKind::BitAnd;
2151 : break;
2152 :
2153 : case '!':
2154 9 : if (matchCodeUnit('='))
2155 : simpleKind = matchCodeUnit('=') ? TokenKind::StrictNe : TokenKind::Ne;
2156 : else
2157 : simpleKind = TokenKind::Not;
2158 0 : break;
2159 :
2160 : case '<':
2161 0 : if (anyCharsAccess().options().allowHTMLComments) {
2162 : // Treat HTML begin-comment as comment-till-end-of-line.
2163 : if (matchCodeUnit('!')) {
2164 : if (matchCodeUnit('-')) {
2165 6765 : if (matchCodeUnit('-')) {
2166 1697 : consumeRestOfSingleLineComment();
2167 : continue;
2168 : }
2169 : ungetCodeUnit('-');
2170 : }
2171 : ungetCodeUnit('!');
2172 1924 : }
2173 : }
2174 962 : if (matchCodeUnit('<'))
2175 0 : simpleKind = matchCodeUnit('=') ? TokenKind::LshAssign : TokenKind::Lsh;
2176 0 : else
2177 0 : simpleKind = matchCodeUnit('=') ? TokenKind::Le : TokenKind::Lt;
2178 0 : break;
2179 :
2180 0 : case '>':
2181 : if (matchCodeUnit('>')) {
2182 0 : if (matchCodeUnit('>'))
2183 : simpleKind = matchCodeUnit('=') ? TokenKind::UrshAssign : TokenKind::Ursh;
2184 : else
2185 0 : simpleKind = matchCodeUnit('=') ? TokenKind::RshAssign : TokenKind::Rsh;
2186 0 : } else {
2187 : simpleKind = matchCodeUnit('=') ? TokenKind::Ge : TokenKind::Gt;
2188 923 : }
2189 : break;
2190 :
2191 : case '*':
2192 0 : if (matchCodeUnit('*'))
2193 26 : simpleKind = matchCodeUnit('=') ? TokenKind::PowAssign : TokenKind::Pow;
2194 7 : else
2195 : simpleKind = matchCodeUnit('=') ? TokenKind::MulAssign : TokenKind::Mul;
2196 0 : break;
2197 :
2198 778 : case '/':
2199 : // Look for a single-line comment.
2200 : if (matchCodeUnit('/')) {
2201 : unit = getCodeUnit();
2202 : if (unit == '@' || unit == '#') {
2203 365 : bool shouldWarn = unit == '@';
2204 0 : if (!getDirectives(false, shouldWarn))
2205 : return false;
2206 360 : } else {
2207 : // NOTE: |unit| may be EOF here.
2208 : ungetCodeUnit(unit);
2209 : }
2210 :
2211 0 : consumeRestOfSingleLineComment();
2212 32188 : continue;
2213 16094 : }
2214 94 :
2215 94 : // Look for a multi-line comment.
2216 : if (matchCodeUnit('*')) {
2217 : TokenStreamAnyChars& anyChars = anyCharsAccess();
2218 0 : unsigned linenoBefore = anyChars.lineno;
2219 :
2220 : do {
2221 16094 : int32_t unit = getCodeUnit();
2222 16094 : if (unit == EOF) {
2223 : reportError(JSMSG_UNTERMINATED_COMMENT);
2224 : return badToken();
2225 : }
2226 5822 :
2227 10778 : if (unit == '*' && matchCodeUnit('/'))
2228 5389 : break;
2229 :
2230 : if (unit == '@' || unit == '#') {
2231 2470888 : bool shouldWarn = unit == '@';
2232 0 : if (!getDirectives(true, shouldWarn))
2233 0 : return badToken();
2234 0 : } else if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
2235 : int32_t codePoint;
2236 : if (!getFullAsciiCodePoint(unit, &codePoint))
2237 1235444 : return badToken();
2238 : } else {
2239 : int32_t codePoint;
2240 0 : if (!getNonAsciiCodePoint(unit, &codePoint))
2241 0 : return badToken();
2242 0 : }
2243 0 : } while (true);
2244 0 :
2245 : if (linenoBefore != anyChars.lineno)
2246 1224577 : anyChars.updateFlagsForEOL();
2247 0 :
2248 : continue;
2249 : }
2250 81 :
2251 0 : // Look for a regexp.
2252 : if (modifier == Operand)
2253 : return regexpLiteral(start, ttp);
2254 :
2255 0 : simpleKind = matchCodeUnit('=') ? TokenKind::DivAssign : TokenKind::Div;
2256 : break;
2257 :
2258 : case '%':
2259 : simpleKind = matchCodeUnit('=') ? TokenKind::ModAssign : TokenKind::Mod;
2260 : break;
2261 :
2262 1 : case '-':
2263 1 : if (matchCodeUnit('-')) {
2264 : if (anyCharsAccess().options().allowHTMLComments &&
2265 138 : !anyCharsAccess().flags.isDirtyLine)
2266 : {
2267 : if (matchCodeUnit('>')) {
2268 : consumeRestOfSingleLineComment();
2269 1 : continue;
2270 : }
2271 : }
2272 :
2273 0 : simpleKind = TokenKind::Dec;
2274 0 : } else {
2275 0 : simpleKind = matchCodeUnit('=') ? TokenKind::SubAssign : TokenKind::Sub;
2276 : }
2277 10 : break;
2278 0 :
2279 0 : default:
2280 : // We consumed a bad ASCII code point/unit. Put it back so the
2281 : // error location is the bad code point.
2282 : ungetCodeUnit(unit);
2283 : error(JSMSG_ILLEGAL_CHARACTER);
2284 : return badToken();
2285 975 : } // switch (static_cast<CharT>(unit))
2286 :
2287 : MOZ_ASSERT(simpleKind != TokenKind::Limit,
2288 : "switch-statement should have set |simpleKind| before "
2289 : "breaking");
2290 :
2291 : newSimpleToken(simpleKind, start, modifier, ttp);
2292 0 : return true;
2293 0 : } while (true);
2294 0 : }
2295 :
2296 : template<typename CharT, class AnyCharsAccess>
2297 0 : bool
2298 : TokenStreamSpecific<CharT, AnyCharsAccess>::getStringOrTemplateToken(char untilChar,
2299 : Modifier modifier,
2300 : TokenKind* out)
2301 0 : {
2302 141207 : MOZ_ASSERT(untilChar == '\'' || untilChar == '"' || untilChar == '`',
2303 : "unexpected string/template literal delimiter");
2304 :
2305 : bool parsingTemplate = (untilChar == '`');
2306 : bool templateHead = false;
2307 :
2308 0 : TokenStart start(sourceUnits, -1);
2309 : tokenbuf.clear();
2310 :
2311 : // Run the bad-token code for every path out of this function except the
2312 32080 : // one success-case.
2313 : auto noteBadToken = MakeScopeExit([this]() {
2314 : this->badToken();
2315 : });
2316 :
2317 32080 : auto ReportPrematureEndOfLiteral = [this, untilChar](unsigned errnum) {
2318 32080 : // Unicode separators aren't end-of-line in template or (as of
2319 : // recently) string literals, so this assertion doesn't allow them.
2320 0 : MOZ_ASSERT(this->sourceUnits.atEnd() ||
2321 0 : this->sourceUnits.peekCodeUnit() == '\r' ||
2322 : this->sourceUnits.peekCodeUnit() == '\n',
2323 : "must be parked at EOF or EOL to call this function");
2324 :
2325 0 : // The various errors reported here include language like "in a ''
2326 0 : // literal" or similar, with '' being '', "", or `` as appropriate.
2327 0 : const char delimiters[] = { untilChar, untilChar, '\0' };
2328 :
2329 : this->error(errnum, delimiters);
2330 : return;
2331 : };
2332 1191732 :
2333 564875 : // We need to detect any of these chars: " or ', \n (or its
2334 0 : // equivalents), \\, EOF. Because we detect EOL sequences here and
2335 0 : // put them back immediately, we can use getCodeUnit().
2336 0 : int32_t unit;
2337 : while ((unit = getCodeUnit()) != untilChar) {
2338 : if (unit == EOF) {
2339 : ReportPrematureEndOfLiteral(JSMSG_EOF_BEFORE_END_OF_LITERAL);
2340 0 : return false;
2341 : }
2342 :
2343 : // Non-ASCII code points are always directly appended -- even
2344 : // U+2028 LINE SEPARATOR and U+2029 PARAGRAPH SEPARATOR that are
2345 0 : // ordinarily LineTerminatorSequences. (They contribute their literal
2346 : // values to template and [as of recently] string literals, but they're
2347 : // line terminators when computing line/column coordinates.) Handle
2348 0 : // the non-ASCI case early for readability.
2349 0 : if (MOZ_UNLIKELY(!isAsciiCodePoint(unit))) {
2350 0 : static_assert(mozilla::IsSame<CharT, char16_t>::value,
2351 : "need a getNonAsciiCodePoint that doesn't normalize "
2352 : "LineTerminatorSequences to correctly handle UTF-8");
2353 :
2354 0 : int32_t codePoint;
2355 0 : if (unit == unicode::LINE_SEPARATOR || unit == unicode::PARA_SEPARATOR) {
2356 0 : if (!updateLineInfoForEOL())
2357 282 : return false;
2358 19 :
2359 8 : anyCharsAccess().updateFlagsForEOL();
2360 0 :
2361 : codePoint = unit;
2362 : } else {
2363 : if (!getNonAsciiCodePoint(unit, &codePoint))
2364 : return false;
2365 : }
2366 :
2367 : if (!appendCodePointToTokenbuf(codePoint))
2368 : return false;
2369 1136 :
2370 0 : continue;
2371 0 : }
2372 0 :
2373 0 : if (unit == '\\') {
2374 0 : // When parsing templates, we don't immediately report errors for
2375 : // invalid escapes; these are handled by the parser. We don't
2376 0 : // append to tokenbuf in those cases because it won't be read.
2377 0 : unit = getCodeUnit();
2378 0 : if (unit == EOF) {
2379 0 : ReportPrematureEndOfLiteral(JSMSG_EOF_IN_ESCAPE_IN_LITERAL);
2380 0 : return false;
2381 : }
2382 0 :
2383 0 : // Non-ASCII |unit| isn't handled by code after this, so dedicate
2384 : // an unlikely special-case to it and then continue.
2385 0 : if (MOZ_UNLIKELY(!isAsciiCodePoint(unit))) {
2386 0 : int32_t codePoint;
2387 : if (!getNonAsciiCodePoint(unit, &codePoint))
2388 0 : return false;
2389 0 :
2390 0 : // If we consumed U+2028 LINE SEPARATOR or U+2029 PARAGRAPH
2391 0 : // SEPARATOR, they'll be normalized to '\n'. '\' followed by
2392 0 : // LineContinuation represents no code points, so don't append
2393 : // in this case.
2394 0 : if (codePoint != '\n') {
2395 0 : if (!tokenbuf.append(unit))
2396 : return false;
2397 0 : }
2398 0 :
2399 : continue;
2400 : }
2401 :
2402 : switch (static_cast<CharT>(unit)) {
2403 0 : case 'b': unit = '\b'; break;
2404 0 : case 'f': unit = '\f'; break;
2405 : case 'n': unit = '\n'; break;
2406 : case 'r': unit = '\r'; break;
2407 : case 't': unit = '\t'; break;
2408 0 : case 'v': unit = '\v'; break;
2409 :
2410 0 : case '\r':
2411 0 : sourceUnits.matchCodeUnit('\n');
2412 : MOZ_FALLTHROUGH;
2413 0 : case '\n': {
2414 0 : // LineContinuation represents no code points. We're manually
2415 : // consuming a LineTerminatorSequence, so we must manually
2416 0 : // update line/column info.
2417 0 : if (!updateLineInfoForEOL())
2418 : return false;
2419 :
2420 0 : continue;
2421 0 : }
2422 0 :
2423 0 : // Unicode character specification.
2424 0 : case 'u': {
2425 : int32_t c2 = getCodeUnit();
2426 0 : if (c2 == EOF) {
2427 0 : ReportPrematureEndOfLiteral(JSMSG_EOF_IN_ESCAPE_IN_LITERAL);
2428 : return false;
2429 0 : }
2430 0 :
2431 : // First handle a delimited Unicode escape, e.g. \u{1F4A9}.
2432 : if (c2 == '{') {
2433 : uint32_t start = sourceUnits.offset() - 3;
2434 : uint32_t code = 0;
2435 : bool first = true;
2436 0 : bool valid = true;
2437 0 : do {
2438 : int32_t u3 = getCodeUnit();
2439 0 : if (u3 == EOF) {
2440 0 : if (parsingTemplate) {
2441 0 : TokenStreamAnyChars& anyChars = anyCharsAccess();
2442 : anyChars.setInvalidTemplateEscape(start,
2443 0 : InvalidEscapeType::Unicode);
2444 : valid = false;
2445 0 : break;
2446 : }
2447 568 : reportInvalidEscapeError(start, InvalidEscapeType::Unicode);
2448 : return false;
2449 : }
2450 : if (u3 == '}') {
2451 0 : if (first) {
2452 0 : if (parsingTemplate) {
2453 : TokenStreamAnyChars& anyChars = anyCharsAccess();
2454 2272 : anyChars.setInvalidTemplateEscape(start,
2455 1704 : InvalidEscapeType::Unicode);
2456 1704 : valid = false;
2457 0 : break;
2458 0 : }
2459 : reportInvalidEscapeError(start, InvalidEscapeType::Unicode);
2460 0 : return false;
2461 0 : }
2462 0 : break;
2463 0 : }
2464 0 :
2465 0 : // Beware: |u3| may be a non-ASCII code point here; if
2466 : // so it'll pass into this |if|-block.
2467 0 : if (!JS7_ISHEX(u3)) {
2468 0 : if (parsingTemplate) {
2469 : // We put the character back so that we read it
2470 0 : // on the next pass, which matters if it was
2471 : // '`' or '\'.
2472 : ungetCodeUnit(u3);
2473 :
2474 : TokenStreamAnyChars& anyChars = anyCharsAccess();
2475 : anyChars.setInvalidTemplateEscape(start,
2476 6 : InvalidEscapeType::Unicode);
2477 0 : valid = false;
2478 2 : break;
2479 : }
2480 0 : reportInvalidEscapeError(start, InvalidEscapeType::Unicode);
2481 0 : return false;
2482 0 : }
2483 0 :
2484 0 : code = (code << 4) | JS7_UNHEX(u3);
2485 : if (code > unicode::NonBMPMax) {
2486 0 : if (parsingTemplate) {
2487 0 : TokenStreamAnyChars& anyChars = anyCharsAccess();
2488 : anyChars.setInvalidTemplateEscape(start + 3,
2489 2 : InvalidEscapeType::UnicodeOverflow);
2490 : valid = false;
2491 : break;
2492 : }
2493 : reportInvalidEscapeError(start + 3, InvalidEscapeType::UnicodeOverflow);
2494 129 : return false;
2495 1 : }
2496 :
2497 7 : first = false;
2498 : } while (true);
2499 :
2500 : if (!valid)
2501 14 : continue;
2502 0 :
2503 0 : MOZ_ASSERT(code <= unicode::NonBMPMax);
2504 0 : if (!appendCodePointToTokenbuf(code))
2505 : return false;
2506 0 :
2507 : continue;
2508 0 : } // end of delimited Unicode escape handling
2509 :
2510 0 : // Otherwise it must be a fixed-length \uXXXX Unicode escape.
2511 : // If it isn't, this is usually an error -- but if this is a
2512 : // template literal, we must defer error reporting because
2513 7 : // malformed escapes are okay in *tagged* template literals.
2514 0 : CharT cp[3];
2515 0 : if (JS7_ISHEX(c2) &&
2516 0 : sourceUnits.peekCodeUnits(3, cp) &&
2517 : JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]))
2518 0 : {
2519 0 : unit = (JS7_UNHEX(c2) << 12) |
2520 0 : (JS7_UNHEX(cp[0]) << 8) |
2521 0 : (JS7_UNHEX(cp[1]) << 4) |
2522 0 : JS7_UNHEX(cp[2]);
2523 : sourceUnits.skipCodeUnits(3);
2524 : } else {
2525 : // Beware: |c2| may not be an ASCII code point here!
2526 : ungetCodeUnit(c2);
2527 : uint32_t start = sourceUnits.offset() - 2;
2528 7 : if (parsingTemplate) {
2529 : TokenStreamAnyChars& anyChars = anyCharsAccess();
2530 : anyChars.setInvalidTemplateEscape(start, InvalidEscapeType::Unicode);
2531 : continue;
2532 563860 : }
2533 937 : reportInvalidEscapeError(start, InvalidEscapeType::Unicode);
2534 : return false;
2535 0 : }
2536 0 : break;
2537 0 : } // case 'u'
2538 :
2539 : // Hexadecimal character specification.
2540 : case 'x': {
2541 1 : CharT cp[2];
2542 0 : if (sourceUnits.peekCodeUnits(2, cp) &&
2543 : JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]))
2544 : {
2545 0 : unit = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
2546 0 : sourceUnits.skipCodeUnits(2);
2547 : } else {
2548 : uint32_t start = sourceUnits.offset() - 2;
2549 1 : if (parsingTemplate) {
2550 : TokenStreamAnyChars& anyChars = anyCharsAccess();
2551 : anyChars.setInvalidTemplateEscape(start, InvalidEscapeType::Hexadecimal);
2552 : continue;
2553 : }
2554 : reportInvalidEscapeError(start, InvalidEscapeType::Hexadecimal);
2555 : return false;
2556 : }
2557 : break;
2558 : }
2559 :
2560 : default: {
2561 : if (!JS7_ISOCT(unit))
2562 : break;
2563 :
2564 : // Octal character specification.
2565 : int32_t val = JS7_UNOCT(unit);
2566 :
2567 : unit = peekCodeUnit();
2568 : if (MOZ_UNLIKELY(unit == EOF)) {
2569 : ReportPrematureEndOfLiteral(JSMSG_EOF_IN_ESCAPE_IN_LITERAL);
2570 : return false;
2571 : }
2572 :
2573 : // Strict mode code allows only \0, then a non-digit.
2574 : if (val != 0 || IsAsciiDigit(unit)) {
2575 : TokenStreamAnyChars& anyChars = anyCharsAccess();
2576 : if (parsingTemplate) {
2577 : anyChars.setInvalidTemplateEscape(sourceUnits.offset() - 2,
2578 : InvalidEscapeType::Octal);
2579 : continue;
2580 : }
2581 : if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL))
2582 : return false;
2583 : anyChars.flags.sawOctalEscape = true;
2584 : }
2585 :
2586 : if (JS7_ISOCT(unit)) {
2587 : val = 8 * val + JS7_UNOCT(unit);
2588 : consumeKnownCodeUnit(unit);
2589 :
2590 : unit = peekCodeUnit();
2591 : if (MOZ_UNLIKELY(unit == EOF)) {
2592 : ReportPrematureEndOfLiteral(JSMSG_EOF_IN_ESCAPE_IN_LITERAL);
2593 : return false;
2594 : }
2595 :
2596 : if (JS7_ISOCT(unit)) {
2597 : int32_t save = val;
2598 : val = 8 * val + JS7_UNOCT(unit);
2599 : if (val <= 0xFF)
2600 : consumeKnownCodeUnit(unit);
2601 : else
2602 : val = save;
2603 : }
2604 : }
2605 :
2606 : unit = char16_t(val);
2607 : break;
2608 : } // default
2609 : }
2610 :
2611 : if (!tokenbuf.append(unit))
2612 : return false;
2613 :
2614 : continue;
2615 : } // (unit == '\\')
2616 :
2617 : if (unit == '\r' || unit == '\n') {
2618 : if (!parsingTemplate) {
2619 : // String literals don't allow ASCII line breaks.
2620 : ungetCodeUnit(unit);
2621 : ReportPrematureEndOfLiteral(JSMSG_EOL_BEFORE_END_OF_STRING);
2622 : return false;
2623 : }
2624 :
2625 : if (unit == '\r') {
2626 : unit = '\n';
2627 :
2628 : // If it's a \r\n sequence: treat as a single EOL, skip over the \n.
2629 : if (!sourceUnits.atEnd())
2630 : sourceUnits.matchCodeUnit('\n');
2631 : }
2632 :
2633 : if (!updateLineInfoForEOL())
2634 : return false;
2635 :
2636 : anyCharsAccess().updateFlagsForEOL();
2637 : } else if (parsingTemplate && unit == '$' && matchCodeUnit('{')) {
2638 : templateHead = true;
2639 : break;
2640 : }
2641 :
2642 : if (!tokenbuf.append(unit))
2643 : return false;
2644 : }
2645 :
2646 : JSAtom* atom = atomizeChars(anyCharsAccess().cx, tokenbuf.begin(), tokenbuf.length());
2647 : if (!atom)
2648 : return false;
2649 :
2650 : noteBadToken.release();
2651 :
2652 : MOZ_ASSERT_IF(!parsingTemplate, !templateHead);
2653 :
2654 : TokenKind kind = !parsingTemplate
2655 : ? TokenKind::String
2656 : : templateHead
2657 : ? TokenKind::TemplateHead
2658 : : TokenKind::NoSubsTemplate;
2659 : newAtomToken(kind, atom, start, modifier, out);
2660 : return true;
2661 : }
2662 :
2663 : const char*
2664 : TokenKindToDesc(TokenKind tt)
2665 : {
2666 : switch (tt) {
2667 : #define EMIT_CASE(name, desc) case TokenKind::name: return desc;
2668 : FOR_EACH_TOKEN_KIND(EMIT_CASE)
2669 : #undef EMIT_CASE
2670 : case TokenKind::Limit:
2671 : MOZ_ASSERT_UNREACHABLE("TokenKind::Limit should not be passed.");
2672 : break;
2673 : }
2674 :
2675 : return "<bad TokenKind>";
2676 : }
2677 :
2678 : #ifdef DEBUG
2679 : const char*
2680 : TokenKindToString(TokenKind tt)
2681 : {
2682 : switch (tt) {
2683 : #define EMIT_CASE(name, desc) case TokenKind::name: return "TokenKind::" #name;
2684 : FOR_EACH_TOKEN_KIND(EMIT_CASE)
2685 : #undef EMIT_CASE
2686 : case TokenKind::Limit: break;
2687 : }
2688 :
2689 : return "<bad TokenKind>";
2690 : }
2691 : #endif
2692 :
2693 : template class frontend::TokenStreamCharsBase<char16_t>;
2694 :
2695 : template class frontend::TokenStreamChars<char16_t, frontend::TokenStreamAnyCharsAccess>;
2696 : template class frontend::TokenStreamSpecific<char16_t, frontend::TokenStreamAnyCharsAccess>;
2697 :
2698 : template class
2699 : frontend::TokenStreamChars<char16_t, frontend::ParserAnyCharsAccess<frontend::GeneralParser<frontend::FullParseHandler, char16_t>>>;
2700 : template class
2701 : frontend::TokenStreamChars<char16_t, frontend::ParserAnyCharsAccess<frontend::GeneralParser<frontend::SyntaxParseHandler, char16_t>>>;
2702 :
2703 : template class
2704 : frontend::TokenStreamSpecific<char16_t, frontend::ParserAnyCharsAccess<frontend::GeneralParser<frontend::FullParseHandler, char16_t>>>;
2705 : template class
2706 : frontend::TokenStreamSpecific<char16_t, frontend::ParserAnyCharsAccess<frontend::GeneralParser<frontend::SyntaxParseHandler, char16_t>>>;
2707 :
2708 : } // namespace frontend
2709 :
2710 : } // namespace js
2711 :
2712 :
2713 : JS_FRIEND_API(int)
2714 : js_fgets(char* buf, int size, FILE* file)
2715 : {
2716 : int n, i, c;
2717 : bool crflag;
2718 :
2719 : n = size - 1;
2720 : if (n < 0)
2721 : return -1;
2722 :
2723 : crflag = false;
2724 : for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) {
2725 : buf[i] = c;
2726 : if (c == '\n') { // any \n ends a line
2727 : i++; // keep the \n; we know there is room for \0
2728 : break;
2729 : }
2730 : if (crflag) { // \r not followed by \n ends line at the \r
2731 : ungetc(c, file);
2732 : break; // and overwrite c in buf with \0
2733 : }
2734 : crflag = (c == '\r');
2735 : }
2736 :
2737 : buf[i] = '\0';
2738 : return i;
2739 : }
|