LCOV - code coverage report
Current view: top level - parser/html - nsHtml5StreamParser.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 1 858 0.1 %
Date: 2018-08-07 16:35:00 Functions: 0 0 -
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2             : /* vim: set sw=2 ts=2 et tw=79: */
       3             : /* This Source Code Form is subject to the terms of the Mozilla Public
       4             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       5             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
       6             : 
       7             : #include "nsHtml5StreamParser.h"
       8             : 
       9             : #include "mozilla/DebugOnly.h"
      10             : #include "mozilla/Encoding.h"
      11             : #include "nsContentUtils.h"
      12             : #include "nsHtml5Tokenizer.h"
      13             : #include "nsIHttpChannel.h"
      14             : #include "nsHtml5Parser.h"
      15             : #include "nsHtml5TreeBuilder.h"
      16             : #include "nsHtml5AtomTable.h"
      17             : #include "nsHtml5Module.h"
      18             : #include "nsHtml5StreamParserPtr.h"
      19             : #include "nsIDocShell.h"
      20             : #include "nsIScriptError.h"
      21             : #include "mozilla/Preferences.h"
      22             : #include "mozilla/StaticPrefs.h"
      23             : #include "mozilla/SystemGroup.h"
      24             : #include "mozilla/UniquePtrExtensions.h"
      25             : #include "nsHtml5Highlighter.h"
      26             : #include "expat_config.h"
      27             : #include "expat.h"
      28             : #include "nsINestedURI.h"
      29             : #include "nsCharsetSource.h"
      30             : #include "nsIWyciwygChannel.h"
      31             : #include "nsIThreadRetargetableRequest.h"
      32             : #include "nsPrintfCString.h"
      33             : #include "nsNetUtil.h"
      34             : #include "nsXULAppAPI.h"
      35             : #include "mozilla/SchedulerGroup.h"
      36             : #include "nsJSEnvironment.h"
      37             : 
      38             : using namespace mozilla;
      39             : 
      40             : /*
      41             :  * Note that nsHtml5StreamParser implements cycle collecting AddRef and
      42             :  * Release. Therefore, nsHtml5StreamParser must never be refcounted from
      43             :  * the parser thread!
      44             :  *
      45             :  * To work around this limitation, runnables posted by the main thread to the
      46             :  * parser thread hold their reference to the stream parser in an
      47             :  * nsHtml5StreamParserPtr. Upon creation, nsHtml5StreamParserPtr addrefs the
      48             :  * object it holds
      49             :  * just like a regular nsRefPtr. This is OK, since the creation of the
      50             :  * runnable and the nsHtml5StreamParserPtr happens on the main thread.
      51             :  *
      52             :  * When the runnable is done on the parser thread, the destructor of
      53             :  * nsHtml5StreamParserPtr runs there. It doesn't call Release on the held object
      54             :  * directly. Instead, it posts another runnable back to the main thread where
      55             :  * that runnable calls Release on the wrapped object.
      56             :  *
      57             :  * When posting runnables in the other direction, the runnables have to be
      58             :  * created on the main thread when nsHtml5StreamParser is instantiated and
      59             :  * held for the lifetime of the nsHtml5StreamParser. This works, because the
      60             :  * same runnabled can be dispatched multiple times and currently runnables
      61             :  * posted from the parser thread to main thread don't need to wrap any
      62             :  * runnable-specific data. (In the other direction, the runnables most notably
      63             :  * wrap the byte data of the stream.)
      64             :  */
      65           0 : NS_IMPL_CYCLE_COLLECTING_ADDREF(nsHtml5StreamParser)
      66           0 : NS_IMPL_CYCLE_COLLECTING_RELEASE(nsHtml5StreamParser)
      67             : 
      68           0 : NS_INTERFACE_TABLE_HEAD(nsHtml5StreamParser)
      69           0 :   NS_INTERFACE_TABLE(nsHtml5StreamParser, nsICharsetDetectionObserver)
      70           0 :   NS_INTERFACE_TABLE_TO_MAP_SEGUE_CYCLE_COLLECTION(nsHtml5StreamParser)
      71           0 : NS_INTERFACE_MAP_END
      72             : 
      73             : NS_IMPL_CYCLE_COLLECTION_CLASS(nsHtml5StreamParser)
      74             : 
      75           0 : NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsHtml5StreamParser)
      76           0 :   tmp->DropTimer();
      77           0 :   NS_IMPL_CYCLE_COLLECTION_UNLINK(mObserver)
      78           0 :   NS_IMPL_CYCLE_COLLECTION_UNLINK(mRequest)
      79           0 :   NS_IMPL_CYCLE_COLLECTION_UNLINK(mOwner)
      80           0 :   tmp->mExecutorFlusher = nullptr;
      81           0 :   tmp->mLoadFlusher = nullptr;
      82           0 :   tmp->mExecutor = nullptr;
      83           0 :   NS_IMPL_CYCLE_COLLECTION_UNLINK(mChardet)
      84           0 : NS_IMPL_CYCLE_COLLECTION_UNLINK_END
      85             : 
      86           0 : NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsHtml5StreamParser)
      87           0 :   NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mObserver)
      88           0 :   NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mRequest)
      89           0 :   NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mOwner)
      90             :   // hack: count the strongly owned edge wrapped in the runnable
      91           0 :   if (tmp->mExecutorFlusher) {
      92           0 :     NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mExecutorFlusher->mExecutor");
      93           0 :     cb.NoteXPCOMChild(static_cast<nsIContentSink*>(tmp->mExecutor));
      94             :   }
      95             :   // hack: count the strongly owned edge wrapped in the runnable
      96           0 :   if (tmp->mLoadFlusher) {
      97           0 :     NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mLoadFlusher->mExecutor");
      98           0 :     cb.NoteXPCOMChild(static_cast<nsIContentSink*>(tmp->mExecutor));
      99             :   }
     100             :   // hack: count self if held by mChardet
     101           0 :   if (tmp->mChardet) {
     102           0 :     NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mChardet->mObserver");
     103           0 :     cb.NoteXPCOMChild(static_cast<nsICharsetDetectionObserver*>(tmp));
     104             :   }
     105           0 : NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
     106             : 
     107           0 : class nsHtml5ExecutorFlusher : public Runnable
     108             : {
     109             : private:
     110             :   RefPtr<nsHtml5TreeOpExecutor> mExecutor;
     111             : 
     112             : public:
     113           0 :   explicit nsHtml5ExecutorFlusher(nsHtml5TreeOpExecutor* aExecutor)
     114           0 :     : Runnable("nsHtml5ExecutorFlusher")
     115           0 :     , mExecutor(aExecutor)
     116             :   {
     117           0 :   }
     118           0 :   NS_IMETHOD Run() override
     119             :   {
     120           0 :     if (!mExecutor->isInList()) {
     121           0 :       mExecutor->RunFlushLoop();
     122             :     }
     123           0 :     return NS_OK;
     124             :   }
     125             : };
     126             : 
     127           0 : class nsHtml5LoadFlusher : public Runnable
     128             : {
     129             : private:
     130             :   RefPtr<nsHtml5TreeOpExecutor> mExecutor;
     131             : 
     132             : public:
     133           0 :   explicit nsHtml5LoadFlusher(nsHtml5TreeOpExecutor* aExecutor)
     134           0 :     : Runnable("nsHtml5LoadFlusher")
     135           0 :     , mExecutor(aExecutor)
     136             :   {
     137           0 :   }
     138           0 :   NS_IMETHOD Run() override
     139             :   {
     140           0 :     mExecutor->FlushSpeculativeLoads();
     141           0 :     return NS_OK;
     142             :   }
     143             : };
     144             : 
     145           0 : nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
     146             :                                          nsHtml5Parser* aOwner,
     147           0 :                                          eParserMode aMode)
     148             :   : mSniffingLength(0)
     149             :   , mBomState(eBomState::BOM_SNIFFING_NOT_STARTED)
     150             :   , mCharsetSource(kCharsetUninitialized)
     151             :   , mEncoding(WINDOWS_1252_ENCODING)
     152             :   , mReparseForbidden(false)
     153             :   , mLastBuffer(nullptr) // Will be filled when starting
     154             :   , mExecutor(aExecutor)
     155             :   , mTreeBuilder(new nsHtml5TreeBuilder(
     156           0 :       (aMode == VIEW_SOURCE_HTML || aMode == VIEW_SOURCE_XML)
     157           0 :         ? nullptr
     158           0 :         : mExecutor->GetStage(),
     159           0 :       aMode == NORMAL ? mExecutor->GetStage() : nullptr))
     160           0 :   , mTokenizer(new nsHtml5Tokenizer(mTreeBuilder, aMode == VIEW_SOURCE_XML))
     161             :   , mTokenizerMutex("nsHtml5StreamParser mTokenizerMutex")
     162             :   , mOwner(aOwner)
     163             :   , mLastWasCR(false)
     164             :   , mStreamState(eHtml5StreamState::STREAM_NOT_STARTED)
     165             :   , mSpeculating(false)
     166             :   , mAtEOF(false)
     167             :   , mSpeculationMutex("nsHtml5StreamParser mSpeculationMutex")
     168             :   , mSpeculationFailureCount(0)
     169             :   , mTerminated(false)
     170             :   , mInterrupted(false)
     171             :   , mTerminatedMutex("nsHtml5StreamParser mTerminatedMutex")
     172           0 :   , mEventTarget(nsHtml5Module::GetStreamParserThread()->SerialEventTarget())
     173           0 :   , mExecutorFlusher(new nsHtml5ExecutorFlusher(aExecutor))
     174           0 :   , mLoadFlusher(new nsHtml5LoadFlusher(aExecutor))
     175             :   , mFeedChardet(false)
     176             :   , mInitialEncodingWasFromParentFrame(false)
     177             :   , mHasHadErrors(false)
     178           0 :   , mFlushTimer(NS_NewTimer())
     179             :   , mFlushTimerMutex("nsHtml5StreamParser mFlushTimerMutex")
     180             :   , mFlushTimerArmed(false)
     181             :   , mFlushTimerEverFired(false)
     182           0 :   , mMode(aMode)
     183             : {
     184           0 :   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
     185           0 :   mFlushTimer->SetTarget(mEventTarget);
     186             : #ifdef DEBUG
     187           0 :   mAtomTable.SetPermittedLookupEventTarget(mEventTarget);
     188             : #endif
     189           0 :   mTokenizer->setInterner(&mAtomTable);
     190           0 :   mTokenizer->setEncodingDeclarationHandler(this);
     191             : 
     192           0 :   if (aMode == VIEW_SOURCE_HTML || aMode == VIEW_SOURCE_XML) {
     193             :     nsHtml5Highlighter* highlighter =
     194           0 :       new nsHtml5Highlighter(mExecutor->GetStage());
     195           0 :     mTokenizer->EnableViewSource(highlighter);   // takes ownership
     196           0 :     mTreeBuilder->EnableViewSource(highlighter); // doesn't own
     197             :   }
     198             : 
     199             :   // Chardet instantiation adapted from File.
     200             :   // Chardet is initialized here even if it turns out to be useless
     201             :   // to make the chardet refcount its observer (nsHtml5StreamParser)
     202             :   // on the main thread.
     203           0 :   nsAutoCString detectorName;
     204           0 :   Preferences::GetLocalizedCString("intl.charset.detector", detectorName);
     205           0 :   if (!detectorName.IsEmpty()) {
     206           0 :     nsAutoCString detectorContractID;
     207           0 :     detectorContractID.AssignLiteral(NS_CHARSET_DETECTOR_CONTRACTID_BASE);
     208           0 :     detectorContractID += detectorName;
     209           0 :     if ((mChardet = do_CreateInstance(detectorContractID.get()))) {
     210           0 :       (void)mChardet->Init(this);
     211           0 :       mFeedChardet = true;
     212             :     }
     213             :   }
     214             : 
     215             :   // There's a zeroing operator new for everything else
     216           0 : }
     217             : 
     218           0 : nsHtml5StreamParser::~nsHtml5StreamParser()
     219             : {
     220           0 :   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
     221           0 :   mTokenizer->end();
     222             : #ifdef DEBUG
     223             :   {
     224           0 :     mozilla::MutexAutoLock flushTimerLock(mFlushTimerMutex);
     225           0 :     MOZ_ASSERT(!mFlushTimer, "Flush timer was not dropped before dtor!");
     226             :   }
     227           0 :   mRequest = nullptr;
     228           0 :   mObserver = nullptr;
     229           0 :   mUnicodeDecoder = nullptr;
     230           0 :   mSniffingBuffer = nullptr;
     231           0 :   mMetaScanner = nullptr;
     232           0 :   mFirstBuffer = nullptr;
     233           0 :   mExecutor = nullptr;
     234           0 :   mTreeBuilder = nullptr;
     235           0 :   mTokenizer = nullptr;
     236           0 :   mOwner = nullptr;
     237             : #endif
     238           0 : }
     239             : 
     240             : nsresult
     241           0 : nsHtml5StreamParser::GetChannel(nsIChannel** aChannel)
     242             : {
     243           0 :   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
     244           0 :   return mRequest ? CallQueryInterface(mRequest, aChannel)
     245           0 :                   : NS_ERROR_NOT_AVAILABLE;
     246             : }
     247             : 
     248             : NS_IMETHODIMP
     249           0 : nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf)
     250             : {
     251           0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
     252           0 :   if (aConf == eBestAnswer || aConf == eSureAnswer) {
     253           0 :     mFeedChardet = false; // just in case
     254             :     auto encoding =
     255           0 :       Encoding::ForLabelNoReplacement(nsDependentCString(aCharset));
     256           0 :     if (!encoding) {
     257             :       return NS_OK;
     258             :     }
     259           0 :     if (HasDecoder()) {
     260           0 :       if (mEncoding == encoding) {
     261           0 :         NS_ASSERTION(mCharsetSource < kCharsetFromAutoDetection,
     262             :                      "Why are we running chardet at all?");
     263           0 :         mCharsetSource = kCharsetFromAutoDetection;
     264           0 :         mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
     265             :       } else {
     266             :         // We've already committed to a decoder. Request a reload from the
     267             :         // docshell.
     268           0 :         mTreeBuilder->NeedsCharsetSwitchTo(
     269           0 :           WrapNotNull(encoding), kCharsetFromAutoDetection, 0);
     270           0 :         FlushTreeOpsAndDisarmTimer();
     271           0 :         Interrupt();
     272             :       }
     273             :     } else {
     274             :       // Got a confident answer from the sniffing buffer. That code will
     275             :       // take care of setting up the decoder.
     276           0 :       mEncoding = WrapNotNull(encoding);
     277           0 :       mCharsetSource = kCharsetFromAutoDetection;
     278           0 :       mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
     279             :     }
     280             :   }
     281             :   return NS_OK;
     282             : }
     283             : 
     284             : void
     285           0 : nsHtml5StreamParser::SetViewSourceTitle(nsIURI* aURL)
     286             : {
     287           0 :   if (aURL) {
     288           0 :     nsCOMPtr<nsIURI> temp;
     289             :     bool isViewSource;
     290           0 :     aURL->SchemeIs("view-source", &isViewSource);
     291           0 :     if (isViewSource) {
     292           0 :       nsCOMPtr<nsINestedURI> nested = do_QueryInterface(aURL);
     293           0 :       nested->GetInnerURI(getter_AddRefs(temp));
     294             :     } else {
     295           0 :       temp = aURL;
     296             :     }
     297             :     bool isData;
     298           0 :     temp->SchemeIs("data", &isData);
     299           0 :     if (isData) {
     300             :       // Avoid showing potentially huge data: URLs. The three last bytes are
     301             :       // UTF-8 for an ellipsis.
     302           0 :       mViewSourceTitle.AssignLiteral("data:\xE2\x80\xA6");
     303             :     } else {
     304           0 :       nsresult rv = temp->GetSpec(mViewSourceTitle);
     305           0 :       if (NS_FAILED(rv)) {
     306           0 :         mViewSourceTitle.AssignLiteral("\xE2\x80\xA6");
     307             :       }
     308             :     }
     309             :   }
     310           0 : }
     311             : 
     312             : nsresult
     313           0 : nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
     314             :   const uint8_t* aFromSegment, // can be null
     315             :   uint32_t aCount,
     316             :   uint32_t* aWriteCount)
     317             : {
     318           0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
     319           0 :   nsresult rv = NS_OK;
     320           0 :   mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
     321           0 :   if (mSniffingBuffer) {
     322             :     uint32_t writeCount;
     323           0 :     rv = WriteStreamBytes(mSniffingBuffer.get(), mSniffingLength, &writeCount);
     324           0 :     NS_ENSURE_SUCCESS(rv, rv);
     325           0 :     mSniffingBuffer = nullptr;
     326             :   }
     327           0 :   mMetaScanner = nullptr;
     328           0 :   if (aFromSegment) {
     329           0 :     rv = WriteStreamBytes(aFromSegment, aCount, aWriteCount);
     330             :   }
     331             :   return rv;
     332             : }
     333             : 
     334             : nsresult
     335           0 : nsHtml5StreamParser::SetupDecodingFromBom(NotNull<const Encoding*> aEncoding)
     336             : {
     337           0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
     338           0 :   mEncoding = aEncoding;
     339           0 :   mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
     340           0 :   mCharsetSource = kCharsetFromByteOrderMark;
     341           0 :   mFeedChardet = false;
     342           0 :   mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
     343           0 :   mSniffingBuffer = nullptr;
     344           0 :   mMetaScanner = nullptr;
     345           0 :   mBomState = BOM_SNIFFING_OVER;
     346           0 :   return NS_OK;
     347             : }
     348             : 
     349             : void
     350           0 : nsHtml5StreamParser::SniffBOMlessUTF16BasicLatin(const uint8_t* aFromSegment,
     351             :                                                  uint32_t aCountToSniffingLimit)
     352             : {
     353             :   // Avoid underspecified heuristic craziness for XHR
     354           0 :   if (mMode == LOAD_AS_DATA) {
     355           0 :     return;
     356             :   }
     357             :   // Make sure there's enough data. Require room for "<title></title>"
     358           0 :   if (mSniffingLength + aCountToSniffingLimit < 30) {
     359             :     return;
     360             :   }
     361             :   // even-numbered bytes tracked at 0, odd-numbered bytes tracked at 1
     362           0 :   bool byteZero[2] = { false, false };
     363           0 :   bool byteNonZero[2] = { false, false };
     364           0 :   uint32_t i = 0;
     365           0 :   if (mSniffingBuffer) {
     366           0 :     for (; i < mSniffingLength; ++i) {
     367           0 :       if (mSniffingBuffer[i]) {
     368           0 :         if (byteNonZero[1 - (i % 2)]) {
     369             :           return;
     370             :         }
     371           0 :         byteNonZero[i % 2] = true;
     372             :       } else {
     373           0 :         if (byteZero[1 - (i % 2)]) {
     374             :           return;
     375             :         }
     376           0 :         byteZero[i % 2] = true;
     377             :       }
     378             :     }
     379             :   }
     380           0 :   if (aFromSegment) {
     381           0 :     for (uint32_t j = 0; j < aCountToSniffingLimit; ++j) {
     382           0 :       if (aFromSegment[j]) {
     383           0 :         if (byteNonZero[1 - ((i + j) % 2)]) {
     384             :           return;
     385             :         }
     386           0 :         byteNonZero[(i + j) % 2] = true;
     387             :       } else {
     388           0 :         if (byteZero[1 - ((i + j) % 2)]) {
     389             :           return;
     390             :         }
     391           0 :         byteZero[(i + j) % 2] = true;
     392             :       }
     393             :     }
     394             :   }
     395             : 
     396           0 :   if (byteNonZero[0]) {
     397           0 :     mEncoding = UTF_16LE_ENCODING;
     398             :   } else {
     399           0 :     mEncoding = UTF_16BE_ENCODING;
     400             :   }
     401           0 :   mCharsetSource = kCharsetFromIrreversibleAutoDetection;
     402           0 :   mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
     403           0 :   mFeedChardet = false;
     404           0 :   mTreeBuilder->MaybeComplainAboutCharset("EncBomlessUtf16", true, 0);
     405             : }
     406             : 
     407             : void
     408           0 : nsHtml5StreamParser::SetEncodingFromExpat(const char16_t* aEncoding)
     409             : {
     410           0 :   if (aEncoding) {
     411           0 :     nsDependentString utf16(aEncoding);
     412           0 :     nsAutoCString utf8;
     413           0 :     CopyUTF16toUTF8(utf16, utf8);
     414           0 :     auto encoding = PreferredForInternalEncodingDecl(utf8);
     415           0 :     if (encoding) {
     416           0 :       mEncoding = WrapNotNull(encoding);
     417           0 :       mCharsetSource = kCharsetFromMetaTag; // closest for XML
     418           0 :       return;
     419             :     }
     420             :     // else the page declared an encoding Gecko doesn't support and we'd
     421             :     // end up defaulting to UTF-8 anyway. Might as well fall through here
     422             :     // right away and let the encoding be set to UTF-8 which we'd default to
     423             :     // anyway.
     424             :   }
     425           0 :   mEncoding = UTF_8_ENCODING;           // XML defaults to UTF-8 without a BOM
     426           0 :   mCharsetSource = kCharsetFromMetaTag; // means confident
     427             : }
     428             : 
     429             : // A separate user data struct is used instead of passing the
     430             : // nsHtml5StreamParser instance as user data in order to avoid including
     431             : // expat.h in nsHtml5StreamParser.h. Doing that would cause naming conflicts.
     432             : // Using a separate user data struct also avoids bloating nsHtml5StreamParser
     433             : // by one pointer.
     434             : struct UserData
     435             : {
     436             :   XML_Parser mExpat;
     437             :   nsHtml5StreamParser* mStreamParser;
     438             : };
     439             : 
     440             : // Using no-namespace handler callbacks to avoid including expat.h in
     441             : // nsHtml5StreamParser.h, since doing so would cause naming conclicts.
     442             : static void
     443           0 : HandleXMLDeclaration(void* aUserData,
     444             :                      const XML_Char* aVersion,
     445             :                      const XML_Char* aEncoding,
     446             :                      int aStandalone)
     447             : {
     448           0 :   UserData* ud = static_cast<UserData*>(aUserData);
     449           0 :   ud->mStreamParser->SetEncodingFromExpat(
     450           0 :     reinterpret_cast<const char16_t*>(aEncoding));
     451           0 :   XML_StopParser(ud->mExpat, false);
     452           0 : }
     453             : 
     454             : static void
     455           0 : HandleStartElement(void* aUserData,
     456             :                    const XML_Char* aName,
     457             :                    const XML_Char** aAtts)
     458             : {
     459           0 :   UserData* ud = static_cast<UserData*>(aUserData);
     460           0 :   XML_StopParser(ud->mExpat, false);
     461           0 : }
     462             : 
     463             : static void
     464           0 : HandleEndElement(void* aUserData, const XML_Char* aName)
     465             : {
     466           0 :   UserData* ud = static_cast<UserData*>(aUserData);
     467           0 :   XML_StopParser(ud->mExpat, false);
     468           0 : }
     469             : 
     470             : static void
     471           0 : HandleComment(void* aUserData, const XML_Char* aName)
     472             : {
     473           0 :   UserData* ud = static_cast<UserData*>(aUserData);
     474           0 :   XML_StopParser(ud->mExpat, false);
     475           0 : }
     476             : 
     477             : static void
     478           0 : HandleProcessingInstruction(void* aUserData,
     479             :                             const XML_Char* aTarget,
     480             :                             const XML_Char* aData)
     481             : {
     482           0 :   UserData* ud = static_cast<UserData*>(aUserData);
     483           0 :   XML_StopParser(ud->mExpat, false);
     484           0 : }
     485             : 
     486             : nsresult
     487           0 : nsHtml5StreamParser::FinalizeSniffing(
     488             :   const uint8_t* aFromSegment, // can be null
     489             :   uint32_t aCount,
     490             :   uint32_t* aWriteCount,
     491             :   uint32_t aCountToSniffingLimit)
     492             : {
     493           0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
     494           0 :   NS_ASSERTION(mCharsetSource < kCharsetFromParentForced,
     495             :                "Should not finalize sniffing when using forced charset.");
     496           0 :   if (mMode == VIEW_SOURCE_XML) {
     497             :     static const XML_Memory_Handling_Suite memsuite = {
     498             :       (void* (*)(size_t))moz_xmalloc,
     499             :       (void* (*)(void*, size_t))moz_xrealloc,
     500             :       free
     501             :     };
     502             : 
     503             :     static const char16_t kExpatSeparator[] = { 0xFFFF, '\0' };
     504             : 
     505             :     static const char16_t kISO88591[] = { 'I', 'S', 'O', '-', '8', '8',
     506             :                                           '5', '9', '-', '1', '\0' };
     507             : 
     508             :     UserData ud;
     509           0 :     ud.mStreamParser = this;
     510             : 
     511             :     // If we got this far, the stream didn't have a BOM. UTF-16-encoded XML
     512             :     // documents MUST begin with a BOM. We don't support EBCDIC and such.
     513             :     // Thus, at this point, what we have is garbage or something encoded using
     514             :     // a rough ASCII superset. ISO-8859-1 allows us to decode ASCII bytes
     515             :     // without throwing errors when bytes have the most significant bit set
     516             :     // and without triggering expat's unknown encoding code paths. This is
     517             :     // enough to be able to use expat to parse the XML declaration in order
     518             :     // to extract the encoding name from it.
     519           0 :     ud.mExpat = XML_ParserCreate_MM(kISO88591, &memsuite, kExpatSeparator);
     520           0 :     XML_SetXmlDeclHandler(ud.mExpat, HandleXMLDeclaration);
     521           0 :     XML_SetElementHandler(ud.mExpat, HandleStartElement, HandleEndElement);
     522           0 :     XML_SetCommentHandler(ud.mExpat, HandleComment);
     523           0 :     XML_SetProcessingInstructionHandler(ud.mExpat, HandleProcessingInstruction);
     524           0 :     XML_SetUserData(ud.mExpat, static_cast<void*>(&ud));
     525             : 
     526           0 :     XML_Status status = XML_STATUS_OK;
     527             : 
     528             :     // aFromSegment points to the data obtained from the current network
     529             :     // event. mSniffingBuffer (if it exists) contains the data obtained before
     530             :     // the current event. Thus, mSniffingLenth bytes of mSniffingBuffer
     531             :     // followed by aCountToSniffingLimit bytes from aFromSegment are the
     532             :     // first 1024 bytes of the file (or the file as a whole if the file is
     533             :     // 1024 bytes long or shorter). Thus, we parse both buffers, but if the
     534             :     // first call succeeds already, we skip parsing the second buffer.
     535           0 :     if (mSniffingBuffer) {
     536           0 :       status = XML_Parse(ud.mExpat,
     537           0 :                          reinterpret_cast<const char*>(mSniffingBuffer.get()),
     538           0 :                          mSniffingLength,
     539           0 :                          false);
     540             :     }
     541           0 :     if (status == XML_STATUS_OK && mCharsetSource < kCharsetFromMetaTag &&
     542             :         aFromSegment) {
     543           0 :       status = XML_Parse(ud.mExpat,
     544             :                          reinterpret_cast<const char*>(aFromSegment),
     545             :                          aCountToSniffingLimit,
     546           0 :                          false);
     547             :     }
     548           0 :     XML_ParserFree(ud.mExpat);
     549             : 
     550           0 :     if (mCharsetSource < kCharsetFromMetaTag) {
     551             :       // Failed to get an encoding from the XML declaration. XML defaults
     552             :       // confidently to UTF-8 in this case.
     553             :       // It is also possible that the document has an XML declaration that is
     554             :       // longer than 1024 bytes, but that case is not worth worrying about.
     555           0 :       mEncoding = UTF_8_ENCODING;
     556           0 :       mCharsetSource = kCharsetFromMetaTag; // means confident
     557             :     }
     558             : 
     559             :     return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
     560           0 :       aFromSegment, aCount, aWriteCount);
     561             :   }
     562             : 
     563             :   // meta scan failed.
     564           0 :   if (mCharsetSource >= kCharsetFromHintPrevDoc) {
     565           0 :     mFeedChardet = false;
     566             :     return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
     567           0 :       aFromSegment, aCount, aWriteCount);
     568             :   }
     569             :   // Check for BOMless UTF-16 with Basic
     570             :   // Latin content for compat with IE. See bug 631751.
     571           0 :   SniffBOMlessUTF16BasicLatin(aFromSegment, aCountToSniffingLimit);
     572             :   // the charset may have been set now
     573             :   // maybe try chardet now;
     574           0 :   if (mFeedChardet) {
     575             :     bool dontFeed;
     576             :     nsresult rv;
     577           0 :     if (mSniffingBuffer) {
     578           0 :       rv = mChardet->DoIt(
     579           0 :         (const char*)mSniffingBuffer.get(), mSniffingLength, &dontFeed);
     580           0 :       mFeedChardet = !dontFeed;
     581           0 :       NS_ENSURE_SUCCESS(rv, rv);
     582             :     }
     583           0 :     if (mFeedChardet && aFromSegment) {
     584           0 :       rv = mChardet->DoIt((const char*)aFromSegment,
     585             :                           // Avoid buffer boundary-dependent behavior when
     586             :                           // reparsing is forbidden. If reparse is forbidden,
     587             :                           // act as if we only saw the first 1024 bytes.
     588             :                           // When reparsing isn't forbidden, buffer boundaries
     589             :                           // can have an effect on whether the page is loaded
     590             :                           // once or twice. :-(
     591           0 :                           mReparseForbidden ? aCountToSniffingLimit : aCount,
     592           0 :                           &dontFeed);
     593           0 :       mFeedChardet = !dontFeed;
     594           0 :       NS_ENSURE_SUCCESS(rv, rv);
     595             :     }
     596           0 :     if (mFeedChardet && (!aFromSegment || mReparseForbidden)) {
     597             :       // mReparseForbidden is checked so that we get to use the sniffing
     598             :       // buffer with the best guess so far if we aren't allowed to guess
     599             :       // better later.
     600           0 :       mFeedChardet = false;
     601           0 :       rv = mChardet->Done();
     602           0 :       NS_ENSURE_SUCCESS(rv, rv);
     603             :     }
     604             :     // fall thru; callback may have changed charset
     605             :   }
     606           0 :   if (mCharsetSource == kCharsetUninitialized) {
     607             :     // Hopefully this case is never needed, but dealing with it anyway
     608           0 :     mEncoding = WINDOWS_1252_ENCODING;
     609           0 :     mCharsetSource = kCharsetFromFallback;
     610           0 :     mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
     611           0 :   } else if (mMode == LOAD_AS_DATA && mCharsetSource == kCharsetFromFallback) {
     612           0 :     NS_ASSERTION(mReparseForbidden, "Reparse should be forbidden for XHR");
     613           0 :     NS_ASSERTION(!mFeedChardet, "Should not feed chardet for XHR");
     614           0 :     NS_ASSERTION(mEncoding == UTF_8_ENCODING, "XHR should default to UTF-8");
     615             :     // Now mark charset source as non-weak to signal that we have a decision
     616           0 :     mCharsetSource = kCharsetFromDocTypeDefault;
     617           0 :     mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
     618             :   }
     619             :   return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
     620           0 :     aFromSegment, aCount, aWriteCount);
     621             : }
     622             : 
     623             : nsresult
     624           0 : nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
     625             :                                       uint32_t aCount,
     626             :                                       uint32_t* aWriteCount)
     627             : {
     628           0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
     629             :   nsresult rv = NS_OK;
     630             :   uint32_t writeCount;
     631             : 
     632             :   // mEncoding and mCharsetSource potentially have come from channel or higher
     633             :   // by now. If we find a BOM, SetupDecodingFromBom() will overwrite them.
     634             :   // If we don't find a BOM, the previously set values of mEncoding and
     635             :   // mCharsetSource are not modified by the BOM sniffing here.
     636           0 :   for (uint32_t i = 0; i < aCount && mBomState != BOM_SNIFFING_OVER; i++) {
     637           0 :     switch (mBomState) {
     638             :       case BOM_SNIFFING_NOT_STARTED:
     639           0 :         NS_ASSERTION(i == 0, "Bad BOM sniffing state.");
     640           0 :         switch (*aFromSegment) {
     641             :           case 0xEF:
     642           0 :             mBomState = SEEN_UTF_8_FIRST_BYTE;
     643           0 :             break;
     644             :           case 0xFF:
     645           0 :             mBomState = SEEN_UTF_16_LE_FIRST_BYTE;
     646           0 :             break;
     647             :           case 0xFE:
     648           0 :             mBomState = SEEN_UTF_16_BE_FIRST_BYTE;
     649           0 :             break;
     650             :           default:
     651           0 :             mBomState = BOM_SNIFFING_OVER;
     652           0 :             break;
     653             :         }
     654             :         break;
     655             :       case SEEN_UTF_16_LE_FIRST_BYTE:
     656           0 :         if (aFromSegment[i] == 0xFE) {
     657             :           rv = SetupDecodingFromBom(
     658           0 :             UTF_16LE_ENCODING); // upper case is the raw form
     659           0 :           NS_ENSURE_SUCCESS(rv, rv);
     660           0 :           uint32_t count = aCount - (i + 1);
     661           0 :           rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount);
     662           0 :           NS_ENSURE_SUCCESS(rv, rv);
     663           0 :           *aWriteCount = writeCount + (i + 1);
     664           0 :           return rv;
     665             :         }
     666           0 :         mBomState = BOM_SNIFFING_OVER;
     667           0 :         break;
     668             :       case SEEN_UTF_16_BE_FIRST_BYTE:
     669           0 :         if (aFromSegment[i] == 0xFF) {
     670             :           rv = SetupDecodingFromBom(
     671           0 :             UTF_16BE_ENCODING); // upper case is the raw form
     672           0 :           NS_ENSURE_SUCCESS(rv, rv);
     673           0 :           uint32_t count = aCount - (i + 1);
     674           0 :           rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount);
     675           0 :           NS_ENSURE_SUCCESS(rv, rv);
     676           0 :           *aWriteCount = writeCount + (i + 1);
     677           0 :           return rv;
     678             :         }
     679           0 :         mBomState = BOM_SNIFFING_OVER;
     680           0 :         break;
     681             :       case SEEN_UTF_8_FIRST_BYTE:
     682           0 :         if (aFromSegment[i] == 0xBB) {
     683           0 :           mBomState = SEEN_UTF_8_SECOND_BYTE;
     684             :         } else {
     685           0 :           mBomState = BOM_SNIFFING_OVER;
     686             :         }
     687             :         break;
     688             :       case SEEN_UTF_8_SECOND_BYTE:
     689           0 :         if (aFromSegment[i] == 0xBF) {
     690             :           rv =
     691           0 :             SetupDecodingFromBom(UTF_8_ENCODING); // upper case is the raw form
     692           0 :           NS_ENSURE_SUCCESS(rv, rv);
     693           0 :           uint32_t count = aCount - (i + 1);
     694           0 :           rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount);
     695           0 :           NS_ENSURE_SUCCESS(rv, rv);
     696           0 :           *aWriteCount = writeCount + (i + 1);
     697           0 :           return rv;
     698             :         }
     699           0 :         mBomState = BOM_SNIFFING_OVER;
     700           0 :         break;
     701             :       default:
     702           0 :         mBomState = BOM_SNIFFING_OVER;
     703           0 :         break;
     704             :     }
     705             :   }
     706             :   // if we get here, there either was no BOM or the BOM sniffing isn't complete
     707             :   // yet
     708             : 
     709           0 :   MOZ_ASSERT(mCharsetSource != kCharsetFromByteOrderMark,
     710             :              "Should not come here if BOM was found.");
     711           0 :   MOZ_ASSERT(mCharsetSource != kCharsetFromOtherComponent,
     712             :              "kCharsetFromOtherComponent is for XSLT.");
     713             : 
     714           0 :   if (mBomState == BOM_SNIFFING_OVER && mCharsetSource == kCharsetFromChannel) {
     715             :     // There was no BOM and the charset came from channel. mEncoding
     716             :     // still contains the charset from the channel as set by an
     717             :     // earlier call to SetDocumentCharset(), since we didn't find a BOM and
     718             :     // overwrite mEncoding. (Note that if the user has overridden the charset,
     719             :     // we don't come here but check <meta> for XSS-dangerous charsets first.)
     720           0 :     mFeedChardet = false;
     721           0 :     mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
     722             :     return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
     723           0 :       aFromSegment, aCount, aWriteCount);
     724             :   }
     725             : 
     726           0 :   if (!mMetaScanner &&
     727           0 :       (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA)) {
     728           0 :     mMetaScanner = new nsHtml5MetaScanner(mTreeBuilder);
     729             :   }
     730             : 
     731           0 :   if (mSniffingLength + aCount >= NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE) {
     732             :     // this is the last buffer
     733             :     uint32_t countToSniffingLimit =
     734           0 :       NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE - mSniffingLength;
     735           0 :     if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) {
     736             :       nsHtml5ByteReadable readable(aFromSegment,
     737           0 :                                    aFromSegment + countToSniffingLimit);
     738           0 :       nsAutoCString charset;
     739           0 :       auto encoding = mMetaScanner->sniff(&readable);
     740             :       // Due to the way nsHtml5Portability reports OOM, ask the tree buider
     741             :       nsresult rv;
     742           0 :       if (NS_FAILED((rv = mTreeBuilder->IsBroken()))) {
     743           0 :         MarkAsBroken(rv);
     744           0 :         return rv;
     745             :       }
     746           0 :       if (encoding) {
     747             :         // meta scan successful; honor overrides unless meta is XSS-dangerous
     748           0 :         if ((mCharsetSource == kCharsetFromParentForced ||
     749           0 :              mCharsetSource == kCharsetFromUserForced) &&
     750           0 :             (encoding->IsAsciiCompatible() ||
     751           0 :              encoding == ISO_2022_JP_ENCODING)) {
     752             :           // Honor override
     753             :           return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
     754           0 :             aFromSegment, aCount, aWriteCount);
     755             :         }
     756           0 :         mEncoding = WrapNotNull(encoding);
     757           0 :         mCharsetSource = kCharsetFromMetaPrescan;
     758           0 :         mFeedChardet = false;
     759           0 :         mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
     760             :         return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
     761           0 :           aFromSegment, aCount, aWriteCount);
     762             :       }
     763             :     }
     764           0 :     if (mCharsetSource == kCharsetFromParentForced ||
     765             :         mCharsetSource == kCharsetFromUserForced) {
     766             :       // meta not found, honor override
     767             :       return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
     768           0 :         aFromSegment, aCount, aWriteCount);
     769             :     }
     770             :     return FinalizeSniffing(
     771           0 :       aFromSegment, aCount, aWriteCount, countToSniffingLimit);
     772             :   }
     773             : 
     774             :   // not the last buffer
     775           0 :   if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) {
     776           0 :     nsHtml5ByteReadable readable(aFromSegment, aFromSegment + aCount);
     777           0 :     auto encoding = mMetaScanner->sniff(&readable);
     778             :     // Due to the way nsHtml5Portability reports OOM, ask the tree buider
     779             :     nsresult rv;
     780           0 :     if (NS_FAILED((rv = mTreeBuilder->IsBroken()))) {
     781           0 :       MarkAsBroken(rv);
     782           0 :       return rv;
     783             :     }
     784           0 :     if (encoding) {
     785             :       // meta scan successful; honor overrides unless meta is XSS-dangerous
     786           0 :       if ((mCharsetSource == kCharsetFromParentForced ||
     787           0 :            mCharsetSource == kCharsetFromUserForced) &&
     788           0 :           (encoding->IsAsciiCompatible() || encoding == ISO_2022_JP_ENCODING)) {
     789             :         // Honor override
     790             :         return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
     791           0 :           aFromSegment, aCount, aWriteCount);
     792             :       }
     793           0 :       mEncoding = WrapNotNull(encoding);
     794           0 :       mCharsetSource = kCharsetFromMetaPrescan;
     795           0 :       mFeedChardet = false;
     796           0 :       mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
     797             :       return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
     798           0 :         aFromSegment, aCount, aWriteCount);
     799             :     }
     800             :   }
     801             : 
     802           0 :   if (!mSniffingBuffer) {
     803           0 :     mSniffingBuffer = MakeUniqueFallible<uint8_t[]>(
     804           0 :       NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE);
     805           0 :     if (!mSniffingBuffer) {
     806             :       return NS_ERROR_OUT_OF_MEMORY;
     807             :     }
     808             :   }
     809           0 :   memcpy(&mSniffingBuffer[mSniffingLength], aFromSegment, aCount);
     810           0 :   mSniffingLength += aCount;
     811           0 :   *aWriteCount = aCount;
     812           0 :   return NS_OK;
     813             : }
     814             : 
     815             : nsresult
     816           0 : nsHtml5StreamParser::WriteStreamBytes(const uint8_t* aFromSegment,
     817             :                                       uint32_t aCount,
     818             :                                       uint32_t* aWriteCount)
     819             : {
     820           0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
     821             :   // mLastBuffer should always point to a buffer of the size
     822             :   // NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE.
     823           0 :   if (!mLastBuffer) {
     824           0 :     NS_WARNING("mLastBuffer should not be null!");
     825           0 :     MarkAsBroken(NS_ERROR_NULL_POINTER);
     826           0 :     return NS_ERROR_NULL_POINTER;
     827             :   }
     828           0 :   size_t totalRead = 0;
     829           0 :   auto src = MakeSpan(aFromSegment, aCount);
     830             :   for (;;) {
     831           0 :     auto dst = mLastBuffer->TailAsSpan(NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
     832             :     uint32_t result;
     833             :     size_t read;
     834             :     size_t written;
     835             :     bool hadErrors;
     836           0 :     Tie(result, read, written, hadErrors) =
     837           0 :       mUnicodeDecoder->DecodeToUTF16(src, dst, false);
     838           0 :     if (hadErrors && !mHasHadErrors) {
     839           0 :       mHasHadErrors = true;
     840           0 :       if (mEncoding == UTF_8_ENCODING) {
     841           0 :         mTreeBuilder->TryToEnableEncodingMenu();
     842             :       }
     843             :     }
     844           0 :     src = src.From(read);
     845           0 :     totalRead += read;
     846           0 :     mLastBuffer->AdvanceEnd(written);
     847           0 :     if (result == kOutputFull) {
     848             :       RefPtr<nsHtml5OwningUTF16Buffer> newBuf =
     849           0 :         nsHtml5OwningUTF16Buffer::FalliblyCreate(
     850           0 :           NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
     851           0 :       if (!newBuf) {
     852           0 :         MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
     853           0 :         return NS_ERROR_OUT_OF_MEMORY;
     854             :       }
     855           0 :       mLastBuffer = (mLastBuffer->next = newBuf.forget());
     856             :     } else {
     857           0 :       MOZ_ASSERT(totalRead == aCount,
     858             :                  "The Unicode decoder consumed the wrong number of bytes.");
     859           0 :       *aWriteCount = totalRead;
     860           0 :       return NS_OK;
     861             :     }
     862             :   }
     863             : }
     864             : 
     865           0 : class MaybeRunCollector : public Runnable
     866             : {
     867             : public:
     868           0 :   explicit MaybeRunCollector(nsIDocShell* aDocShell)
     869           0 :     : Runnable("MaybeRunCollector")
     870           0 :     , mDocShell(aDocShell)
     871             :   {
     872           0 :   }
     873             : 
     874           0 :   NS_IMETHOD Run() override
     875             :   {
     876           0 :     nsJSContext::MaybeRunNextCollectorSlice(mDocShell,
     877           0 :                                             JS::gcreason::HTML_PARSER);
     878           0 :     return NS_OK;
     879             :   }
     880             : 
     881             :   nsCOMPtr<nsIDocShell> mDocShell;
     882             : };
     883             : 
     884             : nsresult
     885           0 : nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aContext)
     886             : {
     887           0 :   MOZ_RELEASE_ASSERT(STREAM_NOT_STARTED == mStreamState,
     888             :                      "Got OnStartRequest when the stream had already started.");
     889           0 :   MOZ_ASSERT(
     890             :     !mExecutor->HasStarted(),
     891             :     "Got OnStartRequest at the wrong stage in the executor life cycle.");
     892           0 :   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
     893           0 :   if (mObserver) {
     894           0 :     mObserver->OnStartRequest(aRequest, aContext);
     895             :   }
     896           0 :   mRequest = aRequest;
     897             : 
     898           0 :   mStreamState = STREAM_BEING_READ;
     899             : 
     900           0 :   if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) {
     901           0 :     mTokenizer->StartViewSource(NS_ConvertUTF8toUTF16(mViewSourceTitle));
     902             :   }
     903             : 
     904             :   // For View Source, the parser should run with scripts "enabled" if a normal
     905             :   // load would have scripts enabled.
     906             :   bool scriptingEnabled =
     907           0 :     mMode == LOAD_AS_DATA ? false : mExecutor->IsScriptEnabled();
     908           0 :   mOwner->StartTokenizer(scriptingEnabled);
     909             : 
     910           0 :   bool isSrcdoc = false;
     911           0 :   nsCOMPtr<nsIChannel> channel;
     912           0 :   nsresult rv = GetChannel(getter_AddRefs(channel));
     913           0 :   if (NS_SUCCEEDED(rv)) {
     914           0 :     isSrcdoc = NS_IsSrcdocChannel(channel);
     915             :   }
     916           0 :   mTreeBuilder->setIsSrcdocDocument(isSrcdoc);
     917           0 :   mTreeBuilder->setScriptingEnabled(scriptingEnabled);
     918           0 :   mTreeBuilder->SetPreventScriptExecution(
     919           0 :     !((mMode == NORMAL) && scriptingEnabled));
     920           0 :   mTokenizer->start();
     921           0 :   mExecutor->Start();
     922           0 :   mExecutor->StartReadingFromStage();
     923             : 
     924           0 :   if (mMode == PLAIN_TEXT) {
     925           0 :     mTreeBuilder->StartPlainText();
     926           0 :     mTokenizer->StartPlainText();
     927           0 :   } else if (mMode == VIEW_SOURCE_PLAIN) {
     928           0 :     nsAutoString viewSourceTitle;
     929           0 :     CopyUTF8toUTF16(mViewSourceTitle, viewSourceTitle);
     930           0 :     mTreeBuilder->EnsureBufferSpace(viewSourceTitle.Length());
     931           0 :     mTreeBuilder->StartPlainTextViewSource(viewSourceTitle);
     932           0 :     mTokenizer->StartPlainText();
     933             :   }
     934             : 
     935             :   /*
     936             :    * If you move the following line, be very careful not to cause
     937             :    * WillBuildModel to be called before the document has had its
     938             :    * script global object set.
     939             :    */
     940           0 :   rv = mExecutor->WillBuildModel(eDTDMode_unknown);
     941           0 :   NS_ENSURE_SUCCESS(rv, rv);
     942             : 
     943             :   RefPtr<nsHtml5OwningUTF16Buffer> newBuf =
     944           0 :     nsHtml5OwningUTF16Buffer::FalliblyCreate(
     945           0 :       NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
     946           0 :   if (!newBuf) {
     947             :     // marks this stream parser as terminated,
     948             :     // which prevents entry to code paths that
     949             :     // would use mFirstBuffer or mLastBuffer.
     950           0 :     return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
     951             :   }
     952           0 :   NS_ASSERTION(!mFirstBuffer, "How come we have the first buffer set?");
     953           0 :   NS_ASSERTION(!mLastBuffer, "How come we have the last buffer set?");
     954           0 :   mFirstBuffer = mLastBuffer = newBuf;
     955             : 
     956           0 :   rv = NS_OK;
     957             : 
     958             :   // The line below means that the encoding can end up being wrong if
     959             :   // a view-source URL is loaded without having the encoding hint from a
     960             :   // previous normal load in the history.
     961           0 :   mReparseForbidden = !(mMode == NORMAL || mMode == PLAIN_TEXT);
     962             : 
     963           0 :   mDocGroup = mExecutor->GetDocument()->GetDocGroup();
     964             : 
     965           0 :   nsCOMPtr<nsIHttpChannel> httpChannel(do_QueryInterface(mRequest, &rv));
     966           0 :   if (NS_SUCCEEDED(rv)) {
     967             :     // Non-HTTP channels are bogus enough that we let them work with unlabeled
     968             :     // runnables for now. Asserting for HTTP channels only.
     969           0 :     MOZ_ASSERT(mDocGroup || mMode == LOAD_AS_DATA,
     970             :                "How come the doc group is still null?");
     971             : 
     972           0 :     nsAutoCString method;
     973           0 :     Unused << httpChannel->GetRequestMethod(method);
     974             :     // XXX does Necko have a way to renavigate POST, etc. without hitting
     975             :     // the network?
     976           0 :     if (!method.EqualsLiteral("GET")) {
     977             :       // This is the old Gecko behavior but the HTML5 spec disagrees.
     978             :       // Don't reparse on POST.
     979           0 :       mReparseForbidden = true;
     980           0 :       mFeedChardet = false; // can't restart anyway
     981             :     }
     982             :   }
     983             : 
     984             :   // Attempt to retarget delivery of data (via OnDataAvailable) to the parser
     985             :   // thread, rather than through the main thread.
     986             :   nsCOMPtr<nsIThreadRetargetableRequest> threadRetargetableRequest =
     987           0 :     do_QueryInterface(mRequest, &rv);
     988           0 :   if (threadRetargetableRequest) {
     989           0 :     rv = threadRetargetableRequest->RetargetDeliveryTo(mEventTarget);
     990           0 :     if (NS_SUCCEEDED(rv)) {
     991             :       // Parser thread should be now ready to get data from necko and parse it
     992             :       // and main thread might have a chance to process a collector slice.
     993             :       // We need to do this asynchronously so that necko may continue processing
     994             :       // the request.
     995             :       nsCOMPtr<nsIRunnable> runnable =
     996           0 :         new MaybeRunCollector(mExecutor->GetDocument()->GetDocShell());
     997             :       mozilla::SystemGroup::Dispatch(mozilla::TaskCategory::GarbageCollection,
     998           0 :                                      runnable.forget());
     999             :     }
    1000             :   }
    1001             : 
    1002           0 :   if (NS_FAILED(rv)) {
    1003           0 :     NS_WARNING("Failed to retarget HTML data delivery to the parser thread.");
    1004             :   }
    1005             : 
    1006           0 :   if (mCharsetSource == kCharsetFromParentFrame) {
    1007             :     // Remember this in case chardet overwrites mCharsetSource
    1008           0 :     mInitialEncodingWasFromParentFrame = true;
    1009             :   }
    1010             : 
    1011           0 :   if (mCharsetSource >= kCharsetFromAutoDetection) {
    1012           0 :     mFeedChardet = false;
    1013             :   }
    1014             : 
    1015           0 :   nsCOMPtr<nsIWyciwygChannel> wyciwygChannel(do_QueryInterface(mRequest));
    1016           0 :   if (mCharsetSource < kCharsetFromUtf8OnlyMime && !wyciwygChannel) {
    1017             :     // we aren't ready to commit to an encoding yet
    1018             :     // leave converter uninstantiated for now
    1019             :     return NS_OK;
    1020             :   }
    1021             : 
    1022             :   // We are reloading a document.open()ed doc or loading JSON/WebVTT/etc. into
    1023             :   // a browsing context. In the latter case, there's no need to remove the
    1024             :   // BOM manually here, because the UTF-8 decoder removes it.
    1025           0 :   mReparseForbidden = true;
    1026           0 :   mFeedChardet = false;
    1027             : 
    1028             :   // Instantiate the converter here to avoid BOM sniffing.
    1029           0 :   mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
    1030           0 :   return NS_OK;
    1031             : }
    1032             : 
    1033             : nsresult
    1034           0 : nsHtml5StreamParser::CheckListenerChain()
    1035             : {
    1036           0 :   NS_ASSERTION(NS_IsMainThread(), "Should be on the main thread!");
    1037           0 :   if (!mObserver) {
    1038             :     return NS_OK;
    1039             :   }
    1040             :   nsresult rv;
    1041             :   nsCOMPtr<nsIThreadRetargetableStreamListener> retargetable =
    1042           0 :     do_QueryInterface(mObserver, &rv);
    1043           0 :   if (NS_SUCCEEDED(rv) && retargetable) {
    1044           0 :     rv = retargetable->CheckListenerChain();
    1045             :   }
    1046           0 :   return rv;
    1047             : }
    1048             : 
    1049             : void
    1050           0 : nsHtml5StreamParser::DoStopRequest()
    1051             : {
    1052           0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
    1053           0 :   MOZ_RELEASE_ASSERT(STREAM_BEING_READ == mStreamState,
    1054             :                      "Stream ended without being open.");
    1055           0 :   mTokenizerMutex.AssertCurrentThreadOwns();
    1056             : 
    1057           0 :   if (IsTerminated()) {
    1058           0 :     return;
    1059             :   }
    1060             : 
    1061           0 :   mStreamState = STREAM_ENDED;
    1062             : 
    1063           0 :   if (!mUnicodeDecoder) {
    1064             :     uint32_t writeCount;
    1065             :     nsresult rv;
    1066           0 :     if (NS_FAILED(rv = FinalizeSniffing(nullptr, 0, &writeCount, 0))) {
    1067           0 :       MarkAsBroken(rv);
    1068           0 :       return;
    1069             :     }
    1070           0 :   } else if (mFeedChardet) {
    1071           0 :     mChardet->Done();
    1072             :   }
    1073             : 
    1074           0 :   MOZ_ASSERT(mUnicodeDecoder,
    1075             :              "Should have a decoder after finalizing sniffing.");
    1076             : 
    1077             :   // mLastBuffer should always point to a buffer of the size
    1078             :   // NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE.
    1079           0 :   if (!mLastBuffer) {
    1080           0 :     NS_WARNING("mLastBuffer should not be null!");
    1081           0 :     MarkAsBroken(NS_ERROR_NULL_POINTER);
    1082           0 :     return;
    1083             :   }
    1084             : 
    1085           0 :   Span<uint8_t> src; // empty span
    1086             :   for (;;) {
    1087           0 :     auto dst = mLastBuffer->TailAsSpan(NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
    1088             :     uint32_t result;
    1089             :     size_t read;
    1090             :     size_t written;
    1091             :     bool hadErrors;
    1092           0 :     Tie(result, read, written, hadErrors) =
    1093           0 :       mUnicodeDecoder->DecodeToUTF16(src, dst, true);
    1094           0 :     if (hadErrors && !mHasHadErrors) {
    1095           0 :       mHasHadErrors = true;
    1096           0 :       if (mEncoding == UTF_8_ENCODING) {
    1097           0 :         mTreeBuilder->TryToEnableEncodingMenu();
    1098             :       }
    1099             :     }
    1100           0 :     MOZ_ASSERT(read == 0, "How come an empty span was read form?");
    1101           0 :     mLastBuffer->AdvanceEnd(written);
    1102           0 :     if (result == kOutputFull) {
    1103             :       RefPtr<nsHtml5OwningUTF16Buffer> newBuf =
    1104           0 :         nsHtml5OwningUTF16Buffer::FalliblyCreate(
    1105           0 :           NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
    1106           0 :       if (!newBuf) {
    1107           0 :         MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
    1108           0 :         return;
    1109             :       }
    1110           0 :       mLastBuffer = (mLastBuffer->next = newBuf.forget());
    1111             :     } else {
    1112             :       break;
    1113             :     }
    1114             :   }
    1115             : 
    1116           0 :   if (IsTerminatedOrInterrupted()) {
    1117             :     return;
    1118             :   }
    1119             : 
    1120           0 :   ParseAvailableData();
    1121             : }
    1122             : 
    1123           0 : class nsHtml5RequestStopper : public Runnable
    1124             : {
    1125             : private:
    1126             :   nsHtml5StreamParserPtr mStreamParser;
    1127             : 
    1128             : public:
    1129           0 :   explicit nsHtml5RequestStopper(nsHtml5StreamParser* aStreamParser)
    1130           0 :     : Runnable("nsHtml5RequestStopper")
    1131           0 :     , mStreamParser(aStreamParser)
    1132             :   {
    1133           0 :   }
    1134           0 :   NS_IMETHOD Run() override
    1135             :   {
    1136           0 :     mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex);
    1137           0 :     mStreamParser->DoStopRequest();
    1138           0 :     return NS_OK;
    1139             :   }
    1140             : };
    1141             : 
    1142             : nsresult
    1143           0 : nsHtml5StreamParser::OnStopRequest(nsIRequest* aRequest,
    1144             :                                    nsISupports* aContext,
    1145             :                                    nsresult status)
    1146             : {
    1147           0 :   NS_ASSERTION(mRequest == aRequest, "Got Stop on wrong stream.");
    1148           0 :   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
    1149           0 :   if (mObserver) {
    1150           0 :     mObserver->OnStopRequest(aRequest, aContext, status);
    1151             :   }
    1152           0 :   nsCOMPtr<nsIRunnable> stopper = new nsHtml5RequestStopper(this);
    1153           0 :   if (NS_FAILED(mEventTarget->Dispatch(stopper, nsIThread::DISPATCH_NORMAL))) {
    1154           0 :     NS_WARNING("Dispatching StopRequest event failed.");
    1155             :   }
    1156           0 :   return NS_OK;
    1157             : }
    1158             : 
    1159             : void
    1160           0 : nsHtml5StreamParser::DoDataAvailable(const uint8_t* aBuffer, uint32_t aLength)
    1161             : {
    1162           0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
    1163           0 :   MOZ_RELEASE_ASSERT(STREAM_BEING_READ == mStreamState,
    1164             :                      "DoDataAvailable called when stream not open.");
    1165           0 :   mTokenizerMutex.AssertCurrentThreadOwns();
    1166             : 
    1167           0 :   if (IsTerminated()) {
    1168           0 :     return;
    1169             :   }
    1170             : 
    1171             :   uint32_t writeCount;
    1172             :   nsresult rv;
    1173           0 :   if (HasDecoder()) {
    1174           0 :     if (mFeedChardet) {
    1175             :       bool dontFeed;
    1176           0 :       mChardet->DoIt((const char*)aBuffer, aLength, &dontFeed);
    1177           0 :       mFeedChardet = !dontFeed;
    1178             :     }
    1179           0 :     rv = WriteStreamBytes(aBuffer, aLength, &writeCount);
    1180             :   } else {
    1181           0 :     rv = SniffStreamBytes(aBuffer, aLength, &writeCount);
    1182             :   }
    1183           0 :   if (NS_FAILED(rv)) {
    1184           0 :     MarkAsBroken(rv);
    1185           0 :     return;
    1186             :   }
    1187           0 :   NS_ASSERTION(writeCount == aLength,
    1188             :                "Wrong number of stream bytes written/sniffed.");
    1189             : 
    1190           0 :   if (IsTerminatedOrInterrupted()) {
    1191             :     return;
    1192             :   }
    1193             : 
    1194           0 :   ParseAvailableData();
    1195             : 
    1196           0 :   if (mFlushTimerArmed || mSpeculating) {
    1197             :     return;
    1198             :   }
    1199             : 
    1200             :   {
    1201           0 :     mozilla::MutexAutoLock flushTimerLock(mFlushTimerMutex);
    1202           0 :     mFlushTimer->InitWithNamedFuncCallback(
    1203             :       nsHtml5StreamParser::TimerCallback,
    1204             :       static_cast<void*>(this),
    1205           0 :       mFlushTimerEverFired ? StaticPrefs::html5_flushtimer_initialdelay()
    1206           2 :                            : StaticPrefs::html5_flushtimer_subsequentdelay(),
    1207             :       nsITimer::TYPE_ONE_SHOT,
    1208           0 :       "nsHtml5StreamParser::DoDataAvailable");
    1209             :   }
    1210           0 :   mFlushTimerArmed = true;
    1211             : }
    1212             : 
    1213           0 : class nsHtml5DataAvailable : public Runnable
    1214             : {
    1215             : private:
    1216             :   nsHtml5StreamParserPtr mStreamParser;
    1217             :   UniquePtr<uint8_t[]> mData;
    1218             :   uint32_t mLength;
    1219             : 
    1220             : public:
    1221           0 :   nsHtml5DataAvailable(nsHtml5StreamParser* aStreamParser,
    1222             :                        UniquePtr<uint8_t[]> aData,
    1223             :                        uint32_t aLength)
    1224           0 :     : Runnable("nsHtml5DataAvailable")
    1225             :     , mStreamParser(aStreamParser)
    1226           0 :     , mData(std::move(aData))
    1227           0 :     , mLength(aLength)
    1228             :   {
    1229           0 :   }
    1230           0 :   NS_IMETHOD Run() override
    1231             :   {
    1232           0 :     mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex);
    1233           0 :     mStreamParser->DoDataAvailable(mData.get(), mLength);
    1234           0 :     return NS_OK;
    1235             :   }
    1236             : };
    1237             : 
    1238             : nsresult
    1239           0 : nsHtml5StreamParser::OnDataAvailable(nsIRequest* aRequest,
    1240             :                                      nsISupports* aContext,
    1241             :                                      nsIInputStream* aInStream,
    1242             :                                      uint64_t aSourceOffset,
    1243             :                                      uint32_t aLength)
    1244             : {
    1245             :   nsresult rv;
    1246           0 :   if (NS_FAILED(rv = mExecutor->IsBroken())) {
    1247             :     return rv;
    1248             :   }
    1249             : 
    1250           0 :   NS_ASSERTION(mRequest == aRequest, "Got data on wrong stream.");
    1251             :   uint32_t totalRead;
    1252             :   // Main thread to parser thread dispatch requires copying to buffer first.
    1253           0 :   if (NS_IsMainThread()) {
    1254           0 :     auto data = MakeUniqueFallible<uint8_t[]>(aLength);
    1255           0 :     if (!data) {
    1256           0 :       return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
    1257             :     }
    1258             :     rv =
    1259           0 :       aInStream->Read(reinterpret_cast<char*>(data.get()), aLength, &totalRead);
    1260           0 :     NS_ENSURE_SUCCESS(rv, rv);
    1261           0 :     NS_ASSERTION(totalRead <= aLength, "Read more bytes than were available?");
    1262             : 
    1263             :     nsCOMPtr<nsIRunnable> dataAvailable =
    1264           0 :       new nsHtml5DataAvailable(this, std::move(data), totalRead);
    1265           0 :     if (NS_FAILED(
    1266             :           mEventTarget->Dispatch(dataAvailable, nsIThread::DISPATCH_NORMAL))) {
    1267           0 :       NS_WARNING("Dispatching DataAvailable event failed.");
    1268             :     }
    1269             :     return rv;
    1270             :   } else {
    1271           0 :     NS_ASSERTION(IsParserThread(), "Wrong thread!");
    1272           0 :     mozilla::MutexAutoLock autoLock(mTokenizerMutex);
    1273             : 
    1274             :     // Read directly from response buffer.
    1275             :     rv =
    1276           0 :       aInStream->ReadSegments(CopySegmentsToParser, this, aLength, &totalRead);
    1277           0 :     if (NS_FAILED(rv)) {
    1278           0 :       NS_WARNING("Failed reading response data to parser");
    1279           0 :       return rv;
    1280             :     }
    1281             :     return NS_OK;
    1282             :   }
    1283             : }
    1284             : 
    1285             : /* static */ nsresult
    1286           0 : nsHtml5StreamParser::CopySegmentsToParser(nsIInputStream* aInStream,
    1287             :                                           void* aClosure,
    1288             :                                           const char* aFromSegment,
    1289             :                                           uint32_t aToOffset,
    1290             :                                           uint32_t aCount,
    1291             :                                           uint32_t* aWriteCount)
    1292             : {
    1293           0 :   nsHtml5StreamParser* parser = static_cast<nsHtml5StreamParser*>(aClosure);
    1294             : 
    1295           0 :   parser->DoDataAvailable((const uint8_t*)aFromSegment, aCount);
    1296             :   // Assume DoDataAvailable consumed all available bytes.
    1297           0 :   *aWriteCount = aCount;
    1298           0 :   return NS_OK;
    1299             : }
    1300             : 
    1301             : const Encoding*
    1302           0 : nsHtml5StreamParser::PreferredForInternalEncodingDecl(
    1303             :   const nsACString& aEncoding)
    1304             : {
    1305           0 :   const Encoding* newEncoding = Encoding::ForLabel(aEncoding);
    1306           0 :   if (!newEncoding) {
    1307             :     // the encoding name is bogus
    1308           0 :     mTreeBuilder->MaybeComplainAboutCharset(
    1309           0 :       "EncMetaUnsupported", true, mTokenizer->getLineNumber());
    1310           0 :     return nullptr;
    1311             :   }
    1312             : 
    1313           0 :   if (newEncoding == UTF_16BE_ENCODING || newEncoding == UTF_16LE_ENCODING) {
    1314           0 :     mTreeBuilder->MaybeComplainAboutCharset(
    1315           0 :       "EncMetaUtf16", true, mTokenizer->getLineNumber());
    1316           0 :     newEncoding = UTF_8_ENCODING;
    1317             :   }
    1318             : 
    1319           0 :   if (newEncoding == X_USER_DEFINED_ENCODING) {
    1320             :     // WebKit/Blink hack for Indian and Armenian legacy sites
    1321           0 :     mTreeBuilder->MaybeComplainAboutCharset(
    1322           0 :       "EncMetaUserDefined", true, mTokenizer->getLineNumber());
    1323           0 :     newEncoding = WINDOWS_1252_ENCODING;
    1324             :   }
    1325             : 
    1326           0 :   if (newEncoding == mEncoding) {
    1327           0 :     if (mCharsetSource < kCharsetFromMetaPrescan) {
    1328           0 :       if (mInitialEncodingWasFromParentFrame) {
    1329           0 :         mTreeBuilder->MaybeComplainAboutCharset(
    1330           0 :           "EncLateMetaFrame", false, mTokenizer->getLineNumber());
    1331             :       } else {
    1332           0 :         mTreeBuilder->MaybeComplainAboutCharset(
    1333           0 :           "EncLateMeta", false, mTokenizer->getLineNumber());
    1334             :       }
    1335             :     }
    1336           0 :     mCharsetSource = kCharsetFromMetaTag; // become confident
    1337           0 :     mFeedChardet = false;                 // don't feed chardet when confident
    1338           0 :     return nullptr;
    1339             :   }
    1340             : 
    1341             :   return newEncoding;
    1342             : }
    1343             : 
    1344             : bool
    1345           0 : nsHtml5StreamParser::internalEncodingDeclaration(nsHtml5String aEncoding)
    1346             : {
    1347             :   // This code needs to stay in sync with
    1348             :   // nsHtml5MetaScanner::tryCharset. Unfortunately, the
    1349             :   // trickery with member fields there leads to some copy-paste reuse. :-(
    1350           0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
    1351           0 :   if (mCharsetSource >= kCharsetFromMetaTag) { // this threshold corresponds to
    1352             :                                                // "confident" in the HTML5 spec
    1353             :     return false;
    1354             :   }
    1355             : 
    1356           0 :   nsString newEncoding16; // Not Auto, because using it to hold nsStringBuffer*
    1357           0 :   aEncoding.ToString(newEncoding16);
    1358           0 :   nsAutoCString newEncoding;
    1359           0 :   CopyUTF16toUTF8(newEncoding16, newEncoding);
    1360             : 
    1361           0 :   auto encoding = PreferredForInternalEncodingDecl(newEncoding);
    1362           0 :   if (!encoding) {
    1363             :     return false;
    1364             :   }
    1365             : 
    1366           0 :   if (mReparseForbidden) {
    1367             :     // This mReparseForbidden check happens after the call to
    1368             :     // PreferredForInternalEncodingDecl so that if that method calls
    1369             :     // MaybeComplainAboutCharset, its charset complaint wins over the one
    1370             :     // below.
    1371           0 :     mTreeBuilder->MaybeComplainAboutCharset(
    1372           0 :       "EncLateMetaTooLate", true, mTokenizer->getLineNumber());
    1373           0 :     return false; // not reparsing even if we wanted to
    1374             :   }
    1375             : 
    1376             :   // Avoid having the chardet ask for another restart after this restart
    1377             :   // request.
    1378           0 :   mFeedChardet = false;
    1379           0 :   mTreeBuilder->NeedsCharsetSwitchTo(
    1380           0 :     WrapNotNull(encoding), kCharsetFromMetaTag, mTokenizer->getLineNumber());
    1381           0 :   FlushTreeOpsAndDisarmTimer();
    1382           0 :   Interrupt();
    1383             :   // the tree op executor will cause the stream parser to terminate
    1384             :   // if the charset switch request is accepted or it'll uninterrupt
    1385             :   // if the request failed. Note that if the restart request fails,
    1386             :   // we don't bother trying to make chardet resume. Might as well
    1387             :   // assume that chardet-requested restarts would fail, too.
    1388           0 :   return true;
    1389             : }
    1390             : 
    1391             : void
    1392           0 : nsHtml5StreamParser::FlushTreeOpsAndDisarmTimer()
    1393             : {
    1394           0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
    1395           0 :   if (mFlushTimerArmed) {
    1396             :     // avoid calling Cancel if the flush timer isn't armed to avoid acquiring
    1397             :     // a mutex
    1398             :     {
    1399           0 :       mozilla::MutexAutoLock flushTimerLock(mFlushTimerMutex);
    1400           0 :       mFlushTimer->Cancel();
    1401             :     }
    1402           0 :     mFlushTimerArmed = false;
    1403             :   }
    1404           0 :   if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) {
    1405           0 :     mTokenizer->FlushViewSource();
    1406             :   }
    1407           0 :   mTreeBuilder->Flush();
    1408           0 :   nsCOMPtr<nsIRunnable> runnable(mExecutorFlusher);
    1409           0 :   if (NS_FAILED(DispatchToMain(runnable.forget()))) {
    1410           0 :     NS_WARNING("failed to dispatch executor flush event");
    1411             :   }
    1412           0 : }
    1413             : 
    1414             : void
    1415           0 : nsHtml5StreamParser::ParseAvailableData()
    1416             : {
    1417           0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
    1418           0 :   mTokenizerMutex.AssertCurrentThreadOwns();
    1419             : 
    1420           0 :   if (IsTerminatedOrInterrupted()) {
    1421             :     return;
    1422             :   }
    1423             : 
    1424           0 :   if (mSpeculating && !IsSpeculationEnabled()) {
    1425             :     return;
    1426             :   }
    1427             : 
    1428             :   for (;;) {
    1429           0 :     if (!mFirstBuffer->hasMore()) {
    1430           0 :       if (mFirstBuffer == mLastBuffer) {
    1431           0 :         switch (mStreamState) {
    1432             :           case STREAM_BEING_READ:
    1433             :             // never release the last buffer.
    1434           0 :             if (!mSpeculating) {
    1435             :               // reuse buffer space if not speculating
    1436           0 :               mFirstBuffer->setStart(0);
    1437           0 :               mFirstBuffer->setEnd(0);
    1438             :             }
    1439           0 :             mTreeBuilder->FlushLoads();
    1440             :             {
    1441             :               // Dispatch this runnable unconditionally, because the loads
    1442             :               // that need flushing may have been flushed earlier even if the
    1443             :               // flush right above here did nothing.
    1444           0 :               nsCOMPtr<nsIRunnable> runnable(mLoadFlusher);
    1445           0 :               if (NS_FAILED(DispatchToMain(runnable.forget()))) {
    1446           0 :                 NS_WARNING("failed to dispatch load flush event");
    1447             :               }
    1448             :             }
    1449           0 :             return; // no more data for now but expecting more
    1450             :           case STREAM_ENDED:
    1451           0 :             if (mAtEOF) {
    1452             :               return;
    1453             :             }
    1454           0 :             mAtEOF = true;
    1455           0 :             if (mCharsetSource < kCharsetFromMetaTag) {
    1456           0 :               if (mInitialEncodingWasFromParentFrame) {
    1457             :                 // Unfortunately, this check doesn't take effect for
    1458             :                 // cross-origin frames, so cross-origin ad frames that have
    1459             :                 // no text and only an image or a Flash embed get the more
    1460             :                 // severe message from the next if block. The message is
    1461             :                 // technically accurate, though.
    1462           0 :                 mTreeBuilder->MaybeComplainAboutCharset(
    1463           0 :                   "EncNoDeclarationFrame", false, 0);
    1464           0 :               } else if (mMode == NORMAL) {
    1465           0 :                 mTreeBuilder->MaybeComplainAboutCharset(
    1466           0 :                   "EncNoDeclaration", true, 0);
    1467           0 :               } else if (mMode == PLAIN_TEXT) {
    1468           0 :                 mTreeBuilder->MaybeComplainAboutCharset(
    1469           0 :                   "EncNoDeclarationPlain", true, 0);
    1470             :               }
    1471             :             }
    1472           0 :             if (NS_SUCCEEDED(mTreeBuilder->IsBroken())) {
    1473           0 :               mTokenizer->eof();
    1474             :               nsresult rv;
    1475           0 :               if (NS_FAILED((rv = mTreeBuilder->IsBroken()))) {
    1476           0 :                 MarkAsBroken(rv);
    1477             :               } else {
    1478           0 :                 mTreeBuilder->StreamEnded();
    1479           0 :                 if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) {
    1480           0 :                   mTokenizer->EndViewSource();
    1481             :                 }
    1482             :               }
    1483             :             }
    1484           0 :             FlushTreeOpsAndDisarmTimer();
    1485           0 :             return; // no more data and not expecting more
    1486             :           default:
    1487           0 :             NS_NOTREACHED("It should be impossible to reach this.");
    1488             :             return;
    1489             :         }
    1490             :       }
    1491           0 :       mFirstBuffer = mFirstBuffer->next;
    1492           0 :       continue;
    1493             :     }
    1494             : 
    1495             :     // now we have a non-empty buffer
    1496           0 :     mFirstBuffer->adjust(mLastWasCR);
    1497           0 :     mLastWasCR = false;
    1498           0 :     if (mFirstBuffer->hasMore()) {
    1499           0 :       if (!mTokenizer->EnsureBufferSpace(mFirstBuffer->getLength())) {
    1500           0 :         MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
    1501           0 :         return;
    1502             :       }
    1503           0 :       mLastWasCR = mTokenizer->tokenizeBuffer(mFirstBuffer);
    1504             :       nsresult rv;
    1505           0 :       if (NS_FAILED((rv = mTreeBuilder->IsBroken()))) {
    1506           0 :         MarkAsBroken(rv);
    1507           0 :         return;
    1508             :       }
    1509             :       // At this point, internalEncodingDeclaration() may have called
    1510             :       // Terminate, but that never happens together with script.
    1511             :       // Can't assert that here, though, because it's possible that the main
    1512             :       // thread has called Terminate() while this thread was parsing.
    1513           0 :       if (mTreeBuilder->HasScript()) {
    1514             :         // HasScript() cannot return true if the tree builder is preventing
    1515             :         // script execution.
    1516           0 :         MOZ_ASSERT(mMode == NORMAL);
    1517           0 :         mozilla::MutexAutoLock speculationAutoLock(mSpeculationMutex);
    1518             :         nsHtml5Speculation* speculation =
    1519             :           new nsHtml5Speculation(mFirstBuffer,
    1520           0 :                                  mFirstBuffer->getStart(),
    1521           0 :                                  mTokenizer->getLineNumber(),
    1522           0 :                                  mTreeBuilder->newSnapshot());
    1523           0 :         mTreeBuilder->AddSnapshotToScript(speculation->GetSnapshot(),
    1524           0 :                                           speculation->GetStartLineNumber());
    1525           0 :         FlushTreeOpsAndDisarmTimer();
    1526           0 :         mTreeBuilder->SetOpSink(speculation);
    1527           0 :         mSpeculations.AppendElement(speculation); // adopts the pointer
    1528           0 :         mSpeculating = true;
    1529             :       }
    1530           0 :       if (IsTerminatedOrInterrupted()) {
    1531             :         return;
    1532             :       }
    1533             :     }
    1534             :   }
    1535             : }
    1536             : 
    1537           0 : class nsHtml5StreamParserContinuation : public Runnable
    1538             : {
    1539             : private:
    1540             :   nsHtml5StreamParserPtr mStreamParser;
    1541             : 
    1542             : public:
    1543           0 :   explicit nsHtml5StreamParserContinuation(nsHtml5StreamParser* aStreamParser)
    1544           0 :     : Runnable("nsHtml5StreamParserContinuation")
    1545           0 :     , mStreamParser(aStreamParser)
    1546             :   {
    1547           0 :   }
    1548           0 :   NS_IMETHOD Run() override
    1549             :   {
    1550           0 :     mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex);
    1551           0 :     mStreamParser->Uninterrupt();
    1552           0 :     mStreamParser->ParseAvailableData();
    1553           0 :     return NS_OK;
    1554             :   }
    1555             : };
    1556             : 
    1557             : void
    1558           0 : nsHtml5StreamParser::ContinueAfterScripts(nsHtml5Tokenizer* aTokenizer,
    1559             :                                           nsHtml5TreeBuilder* aTreeBuilder,
    1560             :                                           bool aLastWasCR)
    1561             : {
    1562           0 :   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
    1563           0 :   NS_ASSERTION(!(mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML),
    1564             :                "ContinueAfterScripts called in view source mode!");
    1565           0 :   if (NS_FAILED(mExecutor->IsBroken())) {
    1566             :     return;
    1567             :   }
    1568             : #ifdef DEBUG
    1569           0 :   mExecutor->AssertStageEmpty();
    1570             : #endif
    1571           0 :   bool speculationFailed = false;
    1572             :   {
    1573           0 :     mozilla::MutexAutoLock speculationAutoLock(mSpeculationMutex);
    1574           0 :     if (mSpeculations.IsEmpty()) {
    1575           0 :       NS_NOTREACHED("ContinueAfterScripts called without speculations.");
    1576           0 :       return;
    1577             :     }
    1578           0 :     nsHtml5Speculation* speculation = mSpeculations.ElementAt(0);
    1579           0 :     if (aLastWasCR || !aTokenizer->isInDataState() ||
    1580           0 :         !aTreeBuilder->snapshotMatches(speculation->GetSnapshot())) {
    1581           0 :       speculationFailed = true;
    1582             :       // We've got a failed speculation :-(
    1583           0 :       MaybeDisableFutureSpeculation();
    1584           0 :       Interrupt(); // Make the parser thread release the tokenizer mutex sooner
    1585             :       // now fall out of the speculationAutoLock into the tokenizerAutoLock
    1586             :       // block
    1587             :     } else {
    1588             :       // We've got a successful speculation!
    1589           0 :       if (mSpeculations.Length() > 1) {
    1590             :         // the first speculation isn't the current speculation, so there's
    1591             :         // no need to bother the parser thread.
    1592           0 :         speculation->FlushToSink(mExecutor);
    1593           0 :         NS_ASSERTION(!mExecutor->IsScriptExecuting(),
    1594             :                      "ParseUntilBlocked() was supposed to ensure we don't come "
    1595             :                      "here when scripts are executing.");
    1596           0 :         NS_ASSERTION(
    1597             :           mExecutor->IsInFlushLoop(),
    1598             :           "How are we here if "
    1599             :           "RunFlushLoop() didn't call ParseUntilBlocked() which is the "
    1600             :           "only caller of this method?");
    1601           0 :         mSpeculations.RemoveElementAt(0);
    1602           0 :         return;
    1603             :       }
    1604             :       // else
    1605           0 :       Interrupt(); // Make the parser thread release the tokenizer mutex sooner
    1606             : 
    1607             :       // now fall through
    1608             :       // the first speculation is the current speculation. Need to
    1609             :       // release the the speculation mutex and acquire the tokenizer
    1610             :       // mutex. (Just acquiring the other mutex here would deadlock)
    1611             :     }
    1612             :   }
    1613             :   {
    1614           0 :     mozilla::MutexAutoLock tokenizerAutoLock(mTokenizerMutex);
    1615             : #ifdef DEBUG
    1616             :     {
    1617           0 :       mAtomTable.SetPermittedLookupEventTarget(
    1618           0 :         GetMainThreadSerialEventTarget());
    1619             :     }
    1620             : #endif
    1621             :     // In principle, the speculation mutex should be acquired here,
    1622             :     // but there's no point, because the parser thread only acquires it
    1623             :     // when it has also acquired the tokenizer mutex and we are already
    1624             :     // holding the tokenizer mutex.
    1625           0 :     if (speculationFailed) {
    1626             :       // Rewind the stream
    1627           0 :       mAtEOF = false;
    1628           0 :       nsHtml5Speculation* speculation = mSpeculations.ElementAt(0);
    1629           0 :       mFirstBuffer = speculation->GetBuffer();
    1630           0 :       mFirstBuffer->setStart(speculation->GetStart());
    1631           0 :       mTokenizer->setLineNumber(speculation->GetStartLineNumber());
    1632             : 
    1633           0 :       nsContentUtils::ReportToConsole(nsIScriptError::warningFlag,
    1634           0 :                                       NS_LITERAL_CSTRING("DOM Events"),
    1635           0 :                                       mExecutor->GetDocument(),
    1636             :                                       nsContentUtils::eDOM_PROPERTIES,
    1637             :                                       "SpeculationFailed",
    1638             :                                       nullptr,
    1639             :                                       0,
    1640             :                                       nullptr,
    1641             :                                       EmptyString(),
    1642           0 :                                       speculation->GetStartLineNumber());
    1643             : 
    1644           0 :       nsHtml5OwningUTF16Buffer* buffer = mFirstBuffer->next;
    1645           0 :       while (buffer) {
    1646           0 :         buffer->setStart(0);
    1647           0 :         buffer = buffer->next;
    1648             :       }
    1649             : 
    1650           0 :       mSpeculations.Clear(); // potentially a huge number of destructors
    1651             :                              // run here synchronously on the main thread...
    1652             : 
    1653           0 :       mTreeBuilder->flushCharacters(); // empty the pending buffer
    1654           0 :       mTreeBuilder->ClearOps();        // now get rid of the failed ops
    1655             : 
    1656           0 :       mTreeBuilder->SetOpSink(mExecutor->GetStage());
    1657           0 :       mExecutor->StartReadingFromStage();
    1658           0 :       mSpeculating = false;
    1659             : 
    1660             :       // Copy state over
    1661           0 :       mLastWasCR = aLastWasCR;
    1662           0 :       mTokenizer->loadState(aTokenizer);
    1663           0 :       mTreeBuilder->loadState(aTreeBuilder, &mAtomTable);
    1664             :     } else {
    1665             :       // We've got a successful speculation and at least a moment ago it was
    1666             :       // the current speculation
    1667           0 :       mSpeculations.ElementAt(0)->FlushToSink(mExecutor);
    1668           0 :       NS_ASSERTION(!mExecutor->IsScriptExecuting(),
    1669             :                    "ParseUntilBlocked() was supposed to ensure we don't come "
    1670             :                    "here when scripts are executing.");
    1671           0 :       NS_ASSERTION(
    1672             :         mExecutor->IsInFlushLoop(),
    1673             :         "How are we here if "
    1674             :         "RunFlushLoop() didn't call ParseUntilBlocked() which is the "
    1675             :         "only caller of this method?");
    1676           0 :       mSpeculations.RemoveElementAt(0);
    1677           0 :       if (mSpeculations.IsEmpty()) {
    1678             :         // yes, it was still the only speculation. Now stop speculating
    1679             :         // However, before telling the executor to read from stage, flush
    1680             :         // any pending ops straight to the executor, because otherwise
    1681             :         // they remain unflushed until we get more data from the network.
    1682           0 :         mTreeBuilder->SetOpSink(mExecutor);
    1683           0 :         mTreeBuilder->Flush(true);
    1684           0 :         mTreeBuilder->SetOpSink(mExecutor->GetStage());
    1685           0 :         mExecutor->StartReadingFromStage();
    1686           0 :         mSpeculating = false;
    1687             :       }
    1688             :     }
    1689           0 :     nsCOMPtr<nsIRunnable> event = new nsHtml5StreamParserContinuation(this);
    1690           0 :     if (NS_FAILED(mEventTarget->Dispatch(event, nsIThread::DISPATCH_NORMAL))) {
    1691           0 :       NS_WARNING("Failed to dispatch nsHtml5StreamParserContinuation");
    1692             :     }
    1693             : // A stream event might run before this event runs, but that's harmless.
    1694             : #ifdef DEBUG
    1695           0 :     mAtomTable.SetPermittedLookupEventTarget(mEventTarget);
    1696             : #endif
    1697             :   }
    1698             : }
    1699             : 
    1700             : void
    1701           0 : nsHtml5StreamParser::ContinueAfterFailedCharsetSwitch()
    1702             : {
    1703           0 :   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
    1704           0 :   nsCOMPtr<nsIRunnable> event = new nsHtml5StreamParserContinuation(this);
    1705           0 :   if (NS_FAILED(mEventTarget->Dispatch(event, nsIThread::DISPATCH_NORMAL))) {
    1706           0 :     NS_WARNING("Failed to dispatch nsHtml5StreamParserContinuation");
    1707             :   }
    1708           0 : }
    1709             : 
    1710           0 : class nsHtml5TimerKungFu : public Runnable
    1711             : {
    1712             : private:
    1713             :   nsHtml5StreamParserPtr mStreamParser;
    1714             : 
    1715             : public:
    1716           0 :   explicit nsHtml5TimerKungFu(nsHtml5StreamParser* aStreamParser)
    1717           0 :     : Runnable("nsHtml5TimerKungFu")
    1718           0 :     , mStreamParser(aStreamParser)
    1719             :   {
    1720           0 :   }
    1721           0 :   NS_IMETHOD Run() override
    1722             :   {
    1723           0 :     mozilla::MutexAutoLock flushTimerLock(mStreamParser->mFlushTimerMutex);
    1724           0 :     if (mStreamParser->mFlushTimer) {
    1725           0 :       mStreamParser->mFlushTimer->Cancel();
    1726           0 :       mStreamParser->mFlushTimer = nullptr;
    1727             :     }
    1728           0 :     return NS_OK;
    1729             :   }
    1730             : };
    1731             : 
    1732             : void
    1733           0 : nsHtml5StreamParser::DropTimer()
    1734             : {
    1735           0 :   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
    1736             :   /*
    1737             :    * Simply nulling out the timer wouldn't work, because if the timer is
    1738             :    * armed, it needs to be canceled first. Simply canceling it first wouldn't
    1739             :    * work, because nsTimerImpl::Cancel is not safe for calling from outside
    1740             :    * the thread where nsTimerImpl::Fire would run. It's not safe to
    1741             :    * dispatch a runnable to cancel the timer from the destructor of this
    1742             :    * class, because the timer has a weak (void*) pointer back to this instance
    1743             :    * of the stream parser and having the timer fire before the runnable
    1744             :    * cancels it would make the timer access a deleted object.
    1745             :    *
    1746             :    * This DropTimer method addresses these issues. This method must be called
    1747             :    * on the main thread before the destructor of this class is reached.
    1748             :    * The nsHtml5TimerKungFu object has an nsHtml5StreamParserPtr that addrefs
    1749             :    * this
    1750             :    * stream parser object to keep it alive until the runnable is done.
    1751             :    * The runnable cancels the timer on the parser thread, drops the timer
    1752             :    * and lets nsHtml5StreamParserPtr send a runnable back to the main thread to
    1753             :    * release the stream parser.
    1754             :    */
    1755           0 :   mozilla::MutexAutoLock flushTimerLock(mFlushTimerMutex);
    1756           0 :   if (mFlushTimer) {
    1757           0 :     nsCOMPtr<nsIRunnable> event = new nsHtml5TimerKungFu(this);
    1758           0 :     if (NS_FAILED(mEventTarget->Dispatch(event, nsIThread::DISPATCH_NORMAL))) {
    1759           0 :       NS_WARNING("Failed to dispatch TimerKungFu event");
    1760             :     }
    1761             :   }
    1762           0 : }
    1763             : 
    1764             : // Using a static, because the method name Notify is taken by the chardet
    1765             : // callback.
    1766             : void
    1767           0 : nsHtml5StreamParser::TimerCallback(nsITimer* aTimer, void* aClosure)
    1768             : {
    1769           0 :   (static_cast<nsHtml5StreamParser*>(aClosure))->TimerFlush();
    1770           0 : }
    1771             : 
    1772             : void
    1773           0 : nsHtml5StreamParser::TimerFlush()
    1774             : {
    1775           0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
    1776           0 :   mozilla::MutexAutoLock autoLock(mTokenizerMutex);
    1777             : 
    1778           0 :   NS_ASSERTION(!mSpeculating, "Flush timer fired while speculating.");
    1779             : 
    1780             :   // The timer fired if we got here. No need to cancel it. Mark it as
    1781             :   // not armed, though.
    1782           0 :   mFlushTimerArmed = false;
    1783             : 
    1784           0 :   mFlushTimerEverFired = true;
    1785             : 
    1786           0 :   if (IsTerminatedOrInterrupted()) {
    1787           0 :     return;
    1788             :   }
    1789             : 
    1790           0 :   if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) {
    1791           0 :     mTreeBuilder->Flush(); // delete useless ops
    1792           0 :     if (mTokenizer->FlushViewSource()) {
    1793           0 :       nsCOMPtr<nsIRunnable> runnable(mExecutorFlusher);
    1794           0 :       if (NS_FAILED(DispatchToMain(runnable.forget()))) {
    1795           0 :         NS_WARNING("failed to dispatch executor flush event");
    1796             :       }
    1797             :     }
    1798             :   } else {
    1799             :     // we aren't speculating and we don't know when new data is
    1800             :     // going to arrive. Send data to the main thread.
    1801           0 :     if (mTreeBuilder->Flush(true)) {
    1802           0 :       nsCOMPtr<nsIRunnable> runnable(mExecutorFlusher);
    1803           0 :       if (NS_FAILED(DispatchToMain(runnable.forget()))) {
    1804           0 :         NS_WARNING("failed to dispatch executor flush event");
    1805             :       }
    1806             :     }
    1807             :   }
    1808             : }
    1809             : 
    1810             : void
    1811           0 : nsHtml5StreamParser::MarkAsBroken(nsresult aRv)
    1812             : {
    1813           0 :   NS_ASSERTION(IsParserThread(), "Wrong thread!");
    1814           0 :   mTokenizerMutex.AssertCurrentThreadOwns();
    1815             : 
    1816           0 :   Terminate();
    1817           0 :   mTreeBuilder->MarkAsBroken(aRv);
    1818           0 :   mozilla::DebugOnly<bool> hadOps = mTreeBuilder->Flush(false);
    1819           0 :   NS_ASSERTION(hadOps, "Should have had the markAsBroken op!");
    1820           0 :   nsCOMPtr<nsIRunnable> runnable(mExecutorFlusher);
    1821           0 :   if (NS_FAILED(DispatchToMain(runnable.forget()))) {
    1822           0 :     NS_WARNING("failed to dispatch executor flush event");
    1823             :   }
    1824           0 : }
    1825             : 
    1826             : nsresult
    1827           0 : nsHtml5StreamParser::DispatchToMain(already_AddRefed<nsIRunnable>&& aRunnable)
    1828             : {
    1829           0 :   if (mDocGroup) {
    1830           0 :     return mDocGroup->Dispatch(TaskCategory::Network, std::move(aRunnable));
    1831             :   }
    1832             :   return SchedulerGroup::UnlabeledDispatch(TaskCategory::Network,
    1833           0 :                                            std::move(aRunnable));
    1834             : }

Generated by: LCOV version 1.13-14-ga5dd952