LCOV - code coverage report
Current view: top level - js/public - ProfilingStack.h (source / functions) Hit Total Coverage
Test: output.info Lines: 28 86 32.6 %
Date: 2018-08-07 16:35:00 Functions: 0 0 -
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
       2             :  * vim: set ts=8 sts=4 et sw=4 tw=99:
       3             :  * This Source Code Form is subject to the terms of the Mozilla Public
       4             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       5             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
       6             : 
       7             : #ifndef js_ProfilingStack_h
       8             : #define js_ProfilingStack_h
       9             : 
      10             : #include <algorithm>
      11             : #include <stdint.h>
      12             : 
      13             : #include "jstypes.h"
      14             : 
      15             : #include "js/TypeDecls.h"
      16             : #include "js/Utility.h"
      17             : 
      18             : #ifdef JS_BROKEN_GCC_ATTRIBUTE_WARNING
      19             : #pragma GCC diagnostic push
      20             : #pragma GCC diagnostic ignored "-Wattributes"
      21             : #endif // JS_BROKEN_GCC_ATTRIBUTE_WARNING
      22             : 
      23             : class JS_PUBLIC_API(JSTracer);
      24             : 
      25             : #ifdef JS_BROKEN_GCC_ATTRIBUTE_WARNING
      26             : #pragma GCC diagnostic pop
      27             : #endif // JS_BROKEN_GCC_ATTRIBUTE_WARNING
      28             : 
      29             : class ProfilingStack;
      30             : 
      31             : // This file defines the classes ProfilingStack and ProfilingStackFrame.
      32             : // The ProfilingStack manages an array of ProfilingStackFrames.
      33             : // It keeps track of the "label stack" and the JS interpreter stack.
      34             : // The two stack types are interleaved.
      35             : //
      36             : // Usage:
      37             : //
      38             : //  ProfilingStack* profilingStack = ...;
      39             : //
      40             : //  // For label frames:
      41             : //  profilingStack->pushLabelFrame(...);
      42             : //  // Execute some code. When finished, pop the frame:
      43             : //  profilingStack->pop();
      44             : //
      45             : //  // For JS stack frames:
      46             : //  profilingStack->pushJSFrame(...);
      47             : //  // Execute some code. When finished, pop the frame:
      48             : //  profilingStack->pop();
      49             : //
      50             : //
      51             : // Concurrency considerations
      52             : //
      53             : // A thread's profiling stack (and the frames inside it) is only modified by
      54             : // that thread. However, the profiling stack can be *read* by a different thread,
      55             : // the sampler thread: Whenever the profiler wants to sample a given thread A,
      56             : // the following happens:
      57             : //  (1) Thread A is suspended.
      58             : //  (2) The sampler thread (thread S) reads the ProfilingStack of thread A,
      59             : //      including all ProfilingStackFrames that are currently in that stack
      60             : //      (profilingStack->frames[0..profilingStack->stackSize()]).
      61             : //  (3) Thread A is resumed.
      62             : //
      63             : // Thread suspension is achieved using platform-specific APIs; refer to each
      64             : // platform's Sampler::SuspendAndSampleAndResumeThread implementation in
      65             : // platform-*.cpp for details.
      66             : //
      67             : // When the thread is suspended, the values in profilingStack->stackPointer and in
      68             : // the stack frame range profilingStack->frames[0..profilingStack->stackPointer] need
      69             : // to be in a consistent state, so that thread S does not read partially-
      70             : // constructed stack frames. More specifically, we have two requirements:
      71             : //  (1) When adding a new frame at the top of the stack, its ProfilingStackFrame
      72             : //      data needs to be put in place *before* the stackPointer is incremented,
      73             : //      and the compiler + CPU need to know that this order matters.
      74             : //  (2) When popping an frame from the stack and then preparing the
      75             : //      ProfilingStackFrame data for the next frame that is about to be pushed,
      76             : //      the decrement of the stackPointer in pop() needs to happen *before* the
      77             : //      ProfilingStackFrame for the new frame is being popuplated, and the
      78             : //      compiler + CPU need to know that this order matters.
      79             : //
      80             : // We can express the relevance of these orderings in multiple ways.
      81             : // Option A is to make stackPointer an atomic with SequentiallyConsistent
      82             : // memory ordering. This would ensure that no writes in thread A would be
      83             : // reordered across any writes to stackPointer, which satisfies requirements
      84             : // (1) and (2) at the same time. Option A is the simplest.
      85             : // Option B is to use ReleaseAcquire memory ordering both for writes to
      86             : // stackPointer *and* for writes to ProfilingStackFrame fields. Release-stores
      87             : // ensure that all writes that happened *before this write in program order* are
      88             : // not reordered to happen after this write. ReleaseAcquire ordering places no
      89             : // requirements on the ordering of writes that happen *after* this write in
      90             : // program order.
      91             : // Using release-stores for writes to stackPointer expresses requirement (1),
      92             : // and using release-stores for writes to the ProfilingStackFrame fields
      93             : // expresses requirement (2).
      94             : //
      95             : // Option B is more complicated than option A, but has much better performance
      96             : // on x86/64: In a microbenchmark run on a Macbook Pro from 2017, switching
      97             : // from option A to option B reduced the overhead of pushing+popping a
      98             : // ProfilingStackFrame by 10 nanoseconds.
      99             : // On x86/64, release-stores require no explicit hardware barriers or lock
     100             : // instructions.
     101             : // On ARM/64, option B may be slower than option A, because the compiler will
     102             : // generate hardware barriers for every single release-store instead of just
     103             : // for the writes to stackPointer. However, the actual performance impact of
     104             : // this has not yet been measured on ARM, so we're currently using option B
     105             : // everywhere. This is something that we may want to change in the future once
     106             : // we've done measurements.
     107             : 
     108             : namespace js {
     109             : 
     110             : // A call stack can be specified to the JS engine such that all JS entry/exits
     111             : // to functions push/pop a stack frame to/from the specified stack.
     112             : //
     113             : // For more detailed information, see vm/GeckoProfiler.h.
     114             : //
     115             : class ProfilingStackFrame
     116             : {
     117             :     // A ProfilingStackFrame represents either a label frame or a JS frame.
     118             : 
     119             :     // WARNING WARNING WARNING
     120             :     //
     121             :     // All the fields below are Atomic<...,ReleaseAcquire>. This is needed so
     122             :     // that writes to these fields are release-writes, which ensures that
     123             :     // earlier writes in this thread don't get reordered after the writes to
     124             :     // these fields. In particular, the decrement of the stack pointer in
     125             :     // ProfilingStack::pop() is a write that *must* happen before the values in
     126             :     // this ProfilingStackFrame are changed. Otherwise, the sampler thread might
     127             :     // see an inconsistent state where the stack pointer still points to a
     128             :     // ProfilingStackFrame which has already been popped off the stack and whose
     129             :     // fields have now been partially repopulated with new values.
     130             :     // See the "Concurrency considerations" paragraph at the top of this file
     131             :     // for more details.
     132             : 
     133             :     // Descriptive label for this stack frame. Must be a static string! Can be
     134             :     // an empty string, but not a null pointer.
     135             :     mozilla::Atomic<const char*, mozilla::ReleaseAcquire> label_;
     136             : 
     137             :     // An additional descriptive string of this frame which is combined with
     138             :     // |label_| in profiler output. Need not be (and usually isn't) static. Can
     139             :     // be null.
     140             :     mozilla::Atomic<const char*, mozilla::ReleaseAcquire> dynamicString_;
     141             : 
     142             :     // Stack pointer for non-JS stack frames, the script pointer otherwise.
     143             :     mozilla::Atomic<void*, mozilla::ReleaseAcquire> spOrScript;
     144             : 
     145             :     // Line number for non-JS stack frames, the bytecode offset otherwise.
     146             :     mozilla::Atomic<int32_t, mozilla::ReleaseAcquire> lineOrPcOffset;
     147             : 
     148             :     // Bits 0...1 hold the Kind. Bits 2...31 hold the category.
     149             :     mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> kindAndCategory_;
     150             : 
     151             :     static int32_t pcToOffset(JSScript* aScript, jsbytecode* aPc);
     152             : 
     153             :   public:
     154       52830 :     ProfilingStackFrame() = default;
     155           0 :     ProfilingStackFrame& operator=(const ProfilingStackFrame& other)
     156             :     {
     157           0 :         label_ = other.label();
     158           0 :         dynamicString_ = other.dynamicString();
     159           0 :         void* spScript = other.spOrScript;
     160           0 :         spOrScript = spScript;
     161           0 :         int32_t offset = other.lineOrPcOffset;
     162           0 :         lineOrPcOffset = offset;
     163           0 :         uint32_t kindAndCategory = other.kindAndCategory_;
     164           0 :         kindAndCategory_ = kindAndCategory;
     165           0 :         return *this;
     166             :     }
     167             : 
     168             :     enum class Kind : uint32_t {
     169             :         // A regular label frame. These usually come from AutoProfilerLabel.
     170             :         LABEL = 0,
     171             : 
     172             :         // A special frame indicating the start of a run of JS profiling stack
     173             :         // frames. SP_MARKER frames are ignored, except for the sp field.
     174             :         // These frames are needed to get correct ordering between JS and LABEL
     175             :         // frames because JS frames don't carry sp information.
     176             :         // SP is short for "stack pointer".
     177             :         SP_MARKER = 1,
     178             : 
     179             :         // A normal JS frame.
     180             :         JS_NORMAL = 2,
     181             : 
     182             :         // An interpreter JS frame that has OSR-ed into baseline. JS_NORMAL
     183             :         // frames can be converted to JS_OSR and back. JS_OSR frames are
     184             :         // ignored.
     185             :         JS_OSR = 3,
     186             : 
     187             :         KIND_BITCOUNT = 2,
     188             :         KIND_MASK = (1 << KIND_BITCOUNT) - 1
     189             :     };
     190             : 
     191             :     // Keep these in sync with devtools/client/performance/modules/categories.js
     192             :     enum class Category : uint32_t {
     193             :         IDLE,
     194             :         OTHER,
     195             :         LAYOUT,
     196             :         JS,
     197             :         GCCC,
     198             :         NETWORK,
     199             :         GRAPHICS,
     200             :         DOM,
     201             : 
     202             :         FIRST    = OTHER,
     203             :         LAST     = DOM,
     204             :     };
     205             : 
     206             :     static_assert(uint32_t(Category::LAST) <= (UINT32_MAX >> uint32_t(Kind::KIND_BITCOUNT)),
     207             :                   "Too many categories to fit into u32 with two bits reserved for the kind");
     208             : 
     209        7536 :     bool isLabelFrame() const
     210             :     {
     211       20215 :         return kind() == Kind::LABEL;
     212             :     }
     213             : 
     214           0 :     bool isSpMarkerFrame() const
     215             :     {
     216       14914 :         return kind() == Kind::SP_MARKER;
     217             :     }
     218             : 
     219           0 :     bool isJsFrame() const
     220             :     {
     221       14953 :         Kind k = kind();
     222           0 :         return k == Kind::JS_NORMAL || k == Kind::JS_OSR;
     223             :     }
     224             : 
     225             :     void setLabel(const char* aLabel) { label_ = aLabel; }
     226           0 :     const char* label() const { return label_; }
     227             : 
     228           0 :     const char* dynamicString() const { return dynamicString_; }
     229             : 
     230       20215 :     void initLabelFrame(const char* aLabel, const char* aDynamicString, void* sp,
     231             :                         uint32_t aLine, Category aCategory)
     232             :     {
     233           0 :         label_ = aLabel;
     234       40429 :         dynamicString_ = aDynamicString;
     235       40430 :         spOrScript = sp;
     236           0 :         lineOrPcOffset = static_cast<int32_t>(aLine);
     237           0 :         kindAndCategory_ = uint32_t(Kind::LABEL) | (uint32_t(aCategory) << uint32_t(Kind::KIND_BITCOUNT));
     238           0 :         MOZ_ASSERT(isLabelFrame());
     239           0 :     }
     240             : 
     241           0 :     void initSpMarkerFrame(void* sp)
     242             :     {
     243       29828 :         label_ = "";
     244           0 :         dynamicString_ = nullptr;
     245       29828 :         spOrScript = sp;
     246           0 :         lineOrPcOffset = 0;
     247           0 :         kindAndCategory_ = uint32_t(Kind::SP_MARKER) | (uint32_t(Category::OTHER) << uint32_t(Kind::KIND_BITCOUNT));
     248           0 :         MOZ_ASSERT(isSpMarkerFrame());
     249           0 :     }
     250             : 
     251           0 :     void initJsFrame(const char* aLabel, const char* aDynamicString, JSScript* aScript,
     252             :                      jsbytecode* aPc)
     253             :     {
     254           0 :         label_ = aLabel;
     255       29828 :         dynamicString_ = aDynamicString;
     256       29828 :         spOrScript = aScript;
     257           0 :         lineOrPcOffset = pcToOffset(aScript, aPc);
     258           0 :         kindAndCategory_ = uint32_t(Kind::JS_NORMAL) | (uint32_t(Category::JS) << uint32_t(Kind::KIND_BITCOUNT));
     259           0 :         MOZ_ASSERT(isJsFrame());
     260           0 :     }
     261             : 
     262           0 :     void setKind(Kind aKind) {
     263           0 :         kindAndCategory_ = uint32_t(aKind) | (uint32_t(category()) << uint32_t(Kind::KIND_BITCOUNT));
     264           0 :     }
     265             : 
     266           0 :     Kind kind() const {
     267           0 :         return Kind(kindAndCategory_ & uint32_t(Kind::KIND_MASK));
     268             :     }
     269             : 
     270           0 :     Category category() const {
     271           0 :         return Category(kindAndCategory_ >> uint32_t(Kind::KIND_BITCOUNT));
     272             :     }
     273             : 
     274           0 :     void* stackAddress() const {
     275           0 :         MOZ_ASSERT(!isJsFrame());
     276           0 :         return spOrScript;
     277             :     }
     278             : 
     279             :     JS_PUBLIC_API(JSScript*) script() const;
     280             : 
     281           0 :     uint32_t line() const {
     282           0 :         MOZ_ASSERT(!isJsFrame());
     283           0 :         return static_cast<uint32_t>(lineOrPcOffset);
     284             :     }
     285             : 
     286             :     // Note that the pointer returned might be invalid.
     287          10 :     JSScript* rawScript() const {
     288          10 :         MOZ_ASSERT(isJsFrame());
     289          20 :         void* script = spOrScript;
     290           0 :         return static_cast<JSScript*>(script);
     291             :     }
     292             : 
     293             :     // We can't know the layout of JSScript, so look in vm/GeckoProfiler.cpp.
     294             :     JS_FRIEND_API(jsbytecode*) pc() const;
     295             :     void setPC(jsbytecode* pc);
     296             : 
     297             :     void trace(JSTracer* trc);
     298             : 
     299             :     // The offset of a pc into a script's code can actually be 0, so to
     300             :     // signify a nullptr pc, use a -1 index. This is checked against in
     301             :     // pc() and setPC() to set/get the right pc.
     302             :     static const int32_t NullPCOffset = -1;
     303             : };
     304             : 
     305             : JS_FRIEND_API(void)
     306             : SetContextProfilingStack(JSContext* cx, ProfilingStack* profilingStack);
     307             : 
     308             : // GetContextProfilingStack also exists, but it's defined in RootingAPI.h.
     309             : 
     310             : JS_FRIEND_API(void)
     311             : EnableContextProfilingStack(JSContext* cx, bool enabled);
     312             : 
     313             : JS_FRIEND_API(void)
     314             : RegisterContextProfilingEventMarker(JSContext* cx, void (*fn)(const char*));
     315             : 
     316             : } // namespace js
     317             : 
     318             : namespace JS {
     319             : 
     320             : typedef void
     321             : (* RegisterThreadCallback)(const char* threadName, void* stackBase);
     322             : 
     323             : typedef void
     324             : (* UnregisterThreadCallback)();
     325             : 
     326             : JS_FRIEND_API(void)
     327             : SetProfilingThreadCallbacks(RegisterThreadCallback registerThread,
     328             :                             UnregisterThreadCallback unregisterThread);
     329             : 
     330             : } // namespace JS
     331             : 
     332             : // Each thread has its own ProfilingStack. That thread modifies the ProfilingStack,
     333             : // pushing and popping elements as necessary.
     334             : //
     335             : // The ProfilingStack is also read periodically by the profiler's sampler thread.
     336             : // This happens only when the thread that owns the ProfilingStack is suspended.
     337             : // So there are no genuine parallel accesses.
     338             : //
     339             : // However, it is possible for pushing/popping to be interrupted by a periodic
     340             : // sample. Because of this, we need pushing/popping to be effectively atomic.
     341             : //
     342             : // - When pushing a new frame, we increment the stack pointer -- making the new
     343             : //   frame visible to the sampler thread -- only after the new frame has been
     344             : //   fully written. The stack pointer is Atomic<uint32_t,ReleaseAcquire>, so
     345             : //   the increment is a release-store, which ensures that this store is not
     346             : //   reordered before the writes of the frame.
     347             : //
     348             : // - When popping an old frame, the only operation is the decrementing of the
     349             : //   stack pointer, which is obviously atomic.
     350             : //
     351             : class ProfilingStack final
     352             : {
     353             :   public:
     354             :     ProfilingStack()
     355         249 :       : stackPointer(0)
     356             :     {}
     357             : 
     358             :     ~ProfilingStack();
     359             : 
     360       20214 :     void pushLabelFrame(const char* label, const char* dynamicString, void* sp,
     361             :                         uint32_t line, js::ProfilingStackFrame::Category category) {
     362       40429 :         uint32_t oldStackPointer = stackPointer;
     363             : 
     364           0 :         if (MOZ_LIKELY(capacity > oldStackPointer) || MOZ_LIKELY(ensureCapacitySlow()))
     365           0 :             frames[oldStackPointer].initLabelFrame(label, dynamicString, sp, line, category);
     366             : 
     367             :         // This must happen at the end! The compiler will not reorder this
     368             :         // update because stackPointer is Atomic<..., ReleaseAcquire>, so any
     369             :         // the writes above will not be reordered below the stackPointer store.
     370             :         // Do the read and the write as two separate statements, in order to
     371             :         // make it clear that we don't need an atomic increment, which would be
     372             :         // more expensive on x86 than the separate operations done here.
     373             :         // This thread is the only one that ever changes the value of
     374             :         // stackPointer.
     375           0 :         stackPointer = oldStackPointer + 1;
     376       20215 :     }
     377             : 
     378       14914 :     void pushSpMarkerFrame(void* sp) {
     379           0 :         uint32_t oldStackPointer = stackPointer;
     380             : 
     381           0 :         if (MOZ_LIKELY(capacity > oldStackPointer) || MOZ_LIKELY(ensureCapacitySlow()))
     382           0 :             frames[oldStackPointer].initSpMarkerFrame(sp);
     383             : 
     384             :         // This must happen at the end, see the comment in pushLabelFrame.
     385           0 :         stackPointer = oldStackPointer + 1;
     386           0 :     }
     387             : 
     388           0 :     void pushJsFrame(const char* label, const char* dynamicString, JSScript* script,
     389             :                      jsbytecode* pc) {
     390       29828 :         uint32_t oldStackPointer = stackPointer;
     391             : 
     392       14914 :         if (MOZ_LIKELY(capacity > oldStackPointer) || MOZ_LIKELY(ensureCapacitySlow()))
     393       29828 :             frames[oldStackPointer].initJsFrame(label, dynamicString, script, pc);
     394             : 
     395             :         // This must happen at the end, see the comment in pushLabelFrame.
     396           0 :         stackPointer = oldStackPointer + 1;
     397           0 :     }
     398             : 
     399           0 :     void pop() {
     400       99965 :         MOZ_ASSERT(stackPointer > 0);
     401             :         // Do the read and the write as two separate statements, in order to
     402             :         // make it clear that we don't need an atomic decrement, which would be
     403             :         // more expensive on x86 than the separate operations done here.
     404             :         // This thread is the only one that ever changes the value of
     405             :         // stackPointer.
     406       99975 :         uint32_t oldStackPointer = stackPointer;
     407       99982 :         stackPointer = oldStackPointer - 1;
     408       49993 :     }
     409             : 
     410          12 :     uint32_t stackSize() const { return std::min(uint32_t(stackPointer), stackCapacity()); }
     411             :     uint32_t stackCapacity() const { return capacity; }
     412             : 
     413             :   private:
     414             :     // Out of line path for expanding the buffer, since otherwise this would get inlined in every
     415             :     // DOM WebIDL call.
     416             :     MOZ_COLD MOZ_MUST_USE bool ensureCapacitySlow();
     417             : 
     418             :     // No copying.
     419             :     ProfilingStack(const ProfilingStack&) = delete;
     420             :     void operator=(const ProfilingStack&) = delete;
     421             : 
     422             :     // No moving either.
     423             :     ProfilingStack(ProfilingStack&&) = delete;
     424             :     void operator=(ProfilingStack&&) = delete;
     425             : 
     426             :     uint32_t capacity = 0;
     427             : 
     428             :   public:
     429             : 
     430             :     // The pointer to the stack frames, this is read from the profiler thread and written from the
     431             :     // current thread.
     432             :     //
     433             :     // This is effectively a unique pointer.
     434             :     mozilla::Atomic<js::ProfilingStackFrame*> frames { nullptr };
     435             : 
     436             :     // This may exceed the capacity, so instead use the stackSize() method to
     437             :     // determine the number of valid frames in stackFrames. When this is less
     438             :     // than stackCapacity(), it refers to the first free stackframe past the top
     439             :     // of the in-use stack (i.e. frames[stackPointer - 1] is the top stack
     440             :     // frame).
     441             :     //
     442             :     // WARNING WARNING WARNING
     443             :     //
     444             :     // This is an atomic variable that uses ReleaseAcquire memory ordering.
     445             :     // See the "Concurrency considerations" paragraph at the top of this file
     446             :     // for more details.
     447             :     mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> stackPointer;
     448             : };
     449             : 
     450             : namespace js {
     451             : 
     452             : class AutoGeckoProfilerEntry;
     453             : class GeckoProfilerEntryMarker;
     454             : class GeckoProfilerBaselineOSRMarker;
     455             : 
     456             : class GeckoProfilerThread
     457             : {
     458             :     friend class AutoGeckoProfilerEntry;
     459             :     friend class GeckoProfilerEntryMarker;
     460             :     friend class GeckoProfilerBaselineOSRMarker;
     461             : 
     462             :     ProfilingStack*         profilingStack_;
     463             : 
     464             :   public:
     465             :     GeckoProfilerThread();
     466             : 
     467             :     uint32_t stackPointer() { MOZ_ASSERT(installed()); return profilingStack_->stackPointer; }
     468             :     ProfilingStackFrame* stack() { return profilingStack_->frames; }
     469             :     ProfilingStack* getProfilingStack() { return profilingStack_; }
     470             : 
     471             :     /* management of whether instrumentation is on or off */
     472             :     bool installed() { return profilingStack_ != nullptr; }
     473             : 
     474             :     void setProfilingStack(ProfilingStack* profilingStack);
     475             :     void trace(JSTracer* trc);
     476             : 
     477             :     /*
     478             :      * Functions which are the actual instrumentation to track run information
     479             :      *
     480             :      *   - enter: a function has started to execute
     481             :      *   - updatePC: updates the pc information about where a function
     482             :      *               is currently executing
     483             :      *   - exit: this function has ceased execution, and no further
     484             :      *           entries/exits will be made
     485             :      */
     486             :     bool enter(JSContext* cx, JSScript* script, JSFunction* maybeFun);
     487             :     void exit(JSScript* script, JSFunction* maybeFun);
     488             :     inline void updatePC(JSContext* cx, JSScript* script, jsbytecode* pc);
     489             : };
     490             : 
     491             : } // namespace js
     492             : 
     493             : #endif  /* js_ProfilingStack_h */

Generated by: LCOV version 1.13-14-ga5dd952