Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 : * vim: set ts=8 sts=4 et sw=4 tw=99:
3 : * This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : #ifndef js_ProfilingStack_h
8 : #define js_ProfilingStack_h
9 :
10 : #include <algorithm>
11 : #include <stdint.h>
12 :
13 : #include "jstypes.h"
14 :
15 : #include "js/TypeDecls.h"
16 : #include "js/Utility.h"
17 :
18 : #ifdef JS_BROKEN_GCC_ATTRIBUTE_WARNING
19 : #pragma GCC diagnostic push
20 : #pragma GCC diagnostic ignored "-Wattributes"
21 : #endif // JS_BROKEN_GCC_ATTRIBUTE_WARNING
22 :
23 : class JS_PUBLIC_API(JSTracer);
24 :
25 : #ifdef JS_BROKEN_GCC_ATTRIBUTE_WARNING
26 : #pragma GCC diagnostic pop
27 : #endif // JS_BROKEN_GCC_ATTRIBUTE_WARNING
28 :
29 : class ProfilingStack;
30 :
31 : // This file defines the classes ProfilingStack and ProfilingStackFrame.
32 : // The ProfilingStack manages an array of ProfilingStackFrames.
33 : // It keeps track of the "label stack" and the JS interpreter stack.
34 : // The two stack types are interleaved.
35 : //
36 : // Usage:
37 : //
38 : // ProfilingStack* profilingStack = ...;
39 : //
40 : // // For label frames:
41 : // profilingStack->pushLabelFrame(...);
42 : // // Execute some code. When finished, pop the frame:
43 : // profilingStack->pop();
44 : //
45 : // // For JS stack frames:
46 : // profilingStack->pushJSFrame(...);
47 : // // Execute some code. When finished, pop the frame:
48 : // profilingStack->pop();
49 : //
50 : //
51 : // Concurrency considerations
52 : //
53 : // A thread's profiling stack (and the frames inside it) is only modified by
54 : // that thread. However, the profiling stack can be *read* by a different thread,
55 : // the sampler thread: Whenever the profiler wants to sample a given thread A,
56 : // the following happens:
57 : // (1) Thread A is suspended.
58 : // (2) The sampler thread (thread S) reads the ProfilingStack of thread A,
59 : // including all ProfilingStackFrames that are currently in that stack
60 : // (profilingStack->frames[0..profilingStack->stackSize()]).
61 : // (3) Thread A is resumed.
62 : //
63 : // Thread suspension is achieved using platform-specific APIs; refer to each
64 : // platform's Sampler::SuspendAndSampleAndResumeThread implementation in
65 : // platform-*.cpp for details.
66 : //
67 : // When the thread is suspended, the values in profilingStack->stackPointer and in
68 : // the stack frame range profilingStack->frames[0..profilingStack->stackPointer] need
69 : // to be in a consistent state, so that thread S does not read partially-
70 : // constructed stack frames. More specifically, we have two requirements:
71 : // (1) When adding a new frame at the top of the stack, its ProfilingStackFrame
72 : // data needs to be put in place *before* the stackPointer is incremented,
73 : // and the compiler + CPU need to know that this order matters.
74 : // (2) When popping an frame from the stack and then preparing the
75 : // ProfilingStackFrame data for the next frame that is about to be pushed,
76 : // the decrement of the stackPointer in pop() needs to happen *before* the
77 : // ProfilingStackFrame for the new frame is being popuplated, and the
78 : // compiler + CPU need to know that this order matters.
79 : //
80 : // We can express the relevance of these orderings in multiple ways.
81 : // Option A is to make stackPointer an atomic with SequentiallyConsistent
82 : // memory ordering. This would ensure that no writes in thread A would be
83 : // reordered across any writes to stackPointer, which satisfies requirements
84 : // (1) and (2) at the same time. Option A is the simplest.
85 : // Option B is to use ReleaseAcquire memory ordering both for writes to
86 : // stackPointer *and* for writes to ProfilingStackFrame fields. Release-stores
87 : // ensure that all writes that happened *before this write in program order* are
88 : // not reordered to happen after this write. ReleaseAcquire ordering places no
89 : // requirements on the ordering of writes that happen *after* this write in
90 : // program order.
91 : // Using release-stores for writes to stackPointer expresses requirement (1),
92 : // and using release-stores for writes to the ProfilingStackFrame fields
93 : // expresses requirement (2).
94 : //
95 : // Option B is more complicated than option A, but has much better performance
96 : // on x86/64: In a microbenchmark run on a Macbook Pro from 2017, switching
97 : // from option A to option B reduced the overhead of pushing+popping a
98 : // ProfilingStackFrame by 10 nanoseconds.
99 : // On x86/64, release-stores require no explicit hardware barriers or lock
100 : // instructions.
101 : // On ARM/64, option B may be slower than option A, because the compiler will
102 : // generate hardware barriers for every single release-store instead of just
103 : // for the writes to stackPointer. However, the actual performance impact of
104 : // this has not yet been measured on ARM, so we're currently using option B
105 : // everywhere. This is something that we may want to change in the future once
106 : // we've done measurements.
107 :
108 : namespace js {
109 :
110 : // A call stack can be specified to the JS engine such that all JS entry/exits
111 : // to functions push/pop a stack frame to/from the specified stack.
112 : //
113 : // For more detailed information, see vm/GeckoProfiler.h.
114 : //
115 : class ProfilingStackFrame
116 : {
117 : // A ProfilingStackFrame represents either a label frame or a JS frame.
118 :
119 : // WARNING WARNING WARNING
120 : //
121 : // All the fields below are Atomic<...,ReleaseAcquire>. This is needed so
122 : // that writes to these fields are release-writes, which ensures that
123 : // earlier writes in this thread don't get reordered after the writes to
124 : // these fields. In particular, the decrement of the stack pointer in
125 : // ProfilingStack::pop() is a write that *must* happen before the values in
126 : // this ProfilingStackFrame are changed. Otherwise, the sampler thread might
127 : // see an inconsistent state where the stack pointer still points to a
128 : // ProfilingStackFrame which has already been popped off the stack and whose
129 : // fields have now been partially repopulated with new values.
130 : // See the "Concurrency considerations" paragraph at the top of this file
131 : // for more details.
132 :
133 : // Descriptive label for this stack frame. Must be a static string! Can be
134 : // an empty string, but not a null pointer.
135 : mozilla::Atomic<const char*, mozilla::ReleaseAcquire> label_;
136 :
137 : // An additional descriptive string of this frame which is combined with
138 : // |label_| in profiler output. Need not be (and usually isn't) static. Can
139 : // be null.
140 : mozilla::Atomic<const char*, mozilla::ReleaseAcquire> dynamicString_;
141 :
142 : // Stack pointer for non-JS stack frames, the script pointer otherwise.
143 : mozilla::Atomic<void*, mozilla::ReleaseAcquire> spOrScript;
144 :
145 : // Line number for non-JS stack frames, the bytecode offset otherwise.
146 : mozilla::Atomic<int32_t, mozilla::ReleaseAcquire> lineOrPcOffset;
147 :
148 : // Bits 0...1 hold the Kind. Bits 2...31 hold the category.
149 : mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> kindAndCategory_;
150 :
151 : static int32_t pcToOffset(JSScript* aScript, jsbytecode* aPc);
152 :
153 : public:
154 46848 : ProfilingStackFrame() = default;
155 0 : ProfilingStackFrame& operator=(const ProfilingStackFrame& other)
156 : {
157 0 : label_ = other.label();
158 0 : dynamicString_ = other.dynamicString();
159 0 : void* spScript = other.spOrScript;
160 0 : spOrScript = spScript;
161 0 : int32_t offset = other.lineOrPcOffset;
162 0 : lineOrPcOffset = offset;
163 0 : uint32_t kindAndCategory = other.kindAndCategory_;
164 0 : kindAndCategory_ = kindAndCategory;
165 0 : return *this;
166 : }
167 :
168 : enum class Kind : uint32_t {
169 : // A regular label frame. These usually come from AutoProfilerLabel.
170 : LABEL = 0,
171 :
172 : // A special frame indicating the start of a run of JS profiling stack
173 : // frames. SP_MARKER frames are ignored, except for the sp field.
174 : // These frames are needed to get correct ordering between JS and LABEL
175 : // frames because JS frames don't carry sp information.
176 : // SP is short for "stack pointer".
177 : SP_MARKER = 1,
178 :
179 : // A normal JS frame.
180 : JS_NORMAL = 2,
181 :
182 : // An interpreter JS frame that has OSR-ed into baseline. JS_NORMAL
183 : // frames can be converted to JS_OSR and back. JS_OSR frames are
184 : // ignored.
185 : JS_OSR = 3,
186 :
187 : KIND_BITCOUNT = 2,
188 : KIND_MASK = (1 << KIND_BITCOUNT) - 1
189 : };
190 :
191 : // Keep these in sync with devtools/client/performance/modules/categories.js
192 : enum class Category : uint32_t {
193 : IDLE,
194 : OTHER,
195 : LAYOUT,
196 : JS,
197 : GCCC,
198 : NETWORK,
199 : GRAPHICS,
200 : DOM,
201 :
202 : FIRST = OTHER,
203 : LAST = DOM,
204 : };
205 :
206 : static_assert(uint32_t(Category::LAST) <= (UINT32_MAX >> uint32_t(Kind::KIND_BITCOUNT)),
207 : "Too many categories to fit into u32 with two bits reserved for the kind");
208 :
209 7421 : bool isLabelFrame() const
210 : {
211 20145 : return kind() == Kind::LABEL;
212 : }
213 :
214 0 : bool isSpMarkerFrame() const
215 : {
216 14945 : return kind() == Kind::SP_MARKER;
217 : }
218 :
219 0 : bool isJsFrame() const
220 : {
221 15031 : Kind k = kind();
222 0 : return k == Kind::JS_NORMAL || k == Kind::JS_OSR;
223 : }
224 :
225 : void setLabel(const char* aLabel) { label_ = aLabel; }
226 0 : const char* label() const { return label_; }
227 :
228 0 : const char* dynamicString() const { return dynamicString_; }
229 :
230 20147 : void initLabelFrame(const char* aLabel, const char* aDynamicString, void* sp,
231 : uint32_t aLine, Category aCategory)
232 : {
233 0 : label_ = aLabel;
234 40291 : dynamicString_ = aDynamicString;
235 40290 : spOrScript = sp;
236 0 : lineOrPcOffset = static_cast<int32_t>(aLine);
237 0 : kindAndCategory_ = uint32_t(Kind::LABEL) | (uint32_t(aCategory) << uint32_t(Kind::KIND_BITCOUNT));
238 0 : MOZ_ASSERT(isLabelFrame());
239 0 : }
240 :
241 0 : void initSpMarkerFrame(void* sp)
242 : {
243 29890 : label_ = "";
244 0 : dynamicString_ = nullptr;
245 29890 : spOrScript = sp;
246 0 : lineOrPcOffset = 0;
247 0 : kindAndCategory_ = uint32_t(Kind::SP_MARKER) | (uint32_t(Category::OTHER) << uint32_t(Kind::KIND_BITCOUNT));
248 0 : MOZ_ASSERT(isSpMarkerFrame());
249 0 : }
250 :
251 0 : void initJsFrame(const char* aLabel, const char* aDynamicString, JSScript* aScript,
252 : jsbytecode* aPc)
253 : {
254 0 : label_ = aLabel;
255 29890 : dynamicString_ = aDynamicString;
256 29890 : spOrScript = aScript;
257 0 : lineOrPcOffset = pcToOffset(aScript, aPc);
258 0 : kindAndCategory_ = uint32_t(Kind::JS_NORMAL) | (uint32_t(Category::JS) << uint32_t(Kind::KIND_BITCOUNT));
259 0 : MOZ_ASSERT(isJsFrame());
260 0 : }
261 :
262 0 : void setKind(Kind aKind) {
263 0 : kindAndCategory_ = uint32_t(aKind) | (uint32_t(category()) << uint32_t(Kind::KIND_BITCOUNT));
264 0 : }
265 :
266 0 : Kind kind() const {
267 0 : return Kind(kindAndCategory_ & uint32_t(Kind::KIND_MASK));
268 : }
269 :
270 0 : Category category() const {
271 0 : return Category(kindAndCategory_ >> uint32_t(Kind::KIND_BITCOUNT));
272 : }
273 :
274 0 : void* stackAddress() const {
275 0 : MOZ_ASSERT(!isJsFrame());
276 0 : return spOrScript;
277 : }
278 :
279 : JS_PUBLIC_API(JSScript*) script() const;
280 :
281 0 : uint32_t line() const {
282 0 : MOZ_ASSERT(!isJsFrame());
283 0 : return static_cast<uint32_t>(lineOrPcOffset);
284 : }
285 :
286 : // Note that the pointer returned might be invalid.
287 26 : JSScript* rawScript() const {
288 26 : MOZ_ASSERT(isJsFrame());
289 52 : void* script = spOrScript;
290 0 : return static_cast<JSScript*>(script);
291 : }
292 :
293 : // We can't know the layout of JSScript, so look in vm/GeckoProfiler.cpp.
294 : JS_FRIEND_API(jsbytecode*) pc() const;
295 : void setPC(jsbytecode* pc);
296 :
297 : void trace(JSTracer* trc);
298 :
299 : // The offset of a pc into a script's code can actually be 0, so to
300 : // signify a nullptr pc, use a -1 index. This is checked against in
301 : // pc() and setPC() to set/get the right pc.
302 : static const int32_t NullPCOffset = -1;
303 : };
304 :
305 : JS_FRIEND_API(void)
306 : SetContextProfilingStack(JSContext* cx, ProfilingStack* profilingStack);
307 :
308 : // GetContextProfilingStack also exists, but it's defined in RootingAPI.h.
309 :
310 : JS_FRIEND_API(void)
311 : EnableContextProfilingStack(JSContext* cx, bool enabled);
312 :
313 : JS_FRIEND_API(void)
314 : RegisterContextProfilingEventMarker(JSContext* cx, void (*fn)(const char*));
315 :
316 : } // namespace js
317 :
318 : namespace JS {
319 :
320 : typedef void
321 : (* RegisterThreadCallback)(const char* threadName, void* stackBase);
322 :
323 : typedef void
324 : (* UnregisterThreadCallback)();
325 :
326 : JS_FRIEND_API(void)
327 : SetProfilingThreadCallbacks(RegisterThreadCallback registerThread,
328 : UnregisterThreadCallback unregisterThread);
329 :
330 : } // namespace JS
331 :
332 : // Each thread has its own ProfilingStack. That thread modifies the ProfilingStack,
333 : // pushing and popping elements as necessary.
334 : //
335 : // The ProfilingStack is also read periodically by the profiler's sampler thread.
336 : // This happens only when the thread that owns the ProfilingStack is suspended.
337 : // So there are no genuine parallel accesses.
338 : //
339 : // However, it is possible for pushing/popping to be interrupted by a periodic
340 : // sample. Because of this, we need pushing/popping to be effectively atomic.
341 : //
342 : // - When pushing a new frame, we increment the stack pointer -- making the new
343 : // frame visible to the sampler thread -- only after the new frame has been
344 : // fully written. The stack pointer is Atomic<uint32_t,ReleaseAcquire>, so
345 : // the increment is a release-store, which ensures that this store is not
346 : // reordered before the writes of the frame.
347 : //
348 : // - When popping an old frame, the only operation is the decrementing of the
349 : // stack pointer, which is obviously atomic.
350 : //
351 : class ProfilingStack final
352 : {
353 : public:
354 : ProfilingStack()
355 228 : : stackPointer(0)
356 : {}
357 :
358 : ~ProfilingStack();
359 :
360 20147 : void pushLabelFrame(const char* label, const char* dynamicString, void* sp,
361 : uint32_t line, js::ProfilingStackFrame::Category category) {
362 40294 : uint32_t oldStackPointer = stackPointer;
363 :
364 0 : if (MOZ_LIKELY(capacity > oldStackPointer) || MOZ_LIKELY(ensureCapacitySlow()))
365 0 : frames[oldStackPointer].initLabelFrame(label, dynamicString, sp, line, category);
366 :
367 : // This must happen at the end! The compiler will not reorder this
368 : // update because stackPointer is Atomic<..., ReleaseAcquire>, so any
369 : // the writes above will not be reordered below the stackPointer store.
370 : // Do the read and the write as two separate statements, in order to
371 : // make it clear that we don't need an atomic increment, which would be
372 : // more expensive on x86 than the separate operations done here.
373 : // This thread is the only one that ever changes the value of
374 : // stackPointer.
375 0 : stackPointer = oldStackPointer + 1;
376 20145 : }
377 :
378 14945 : void pushSpMarkerFrame(void* sp) {
379 0 : uint32_t oldStackPointer = stackPointer;
380 :
381 0 : if (MOZ_LIKELY(capacity > oldStackPointer) || MOZ_LIKELY(ensureCapacitySlow()))
382 0 : frames[oldStackPointer].initSpMarkerFrame(sp);
383 :
384 : // This must happen at the end, see the comment in pushLabelFrame.
385 0 : stackPointer = oldStackPointer + 1;
386 0 : }
387 :
388 0 : void pushJsFrame(const char* label, const char* dynamicString, JSScript* script,
389 : jsbytecode* pc) {
390 29890 : uint32_t oldStackPointer = stackPointer;
391 :
392 14945 : if (MOZ_LIKELY(capacity > oldStackPointer) || MOZ_LIKELY(ensureCapacitySlow()))
393 29890 : frames[oldStackPointer].initJsFrame(label, dynamicString, script, pc);
394 :
395 : // This must happen at the end, see the comment in pushLabelFrame.
396 0 : stackPointer = oldStackPointer + 1;
397 0 : }
398 :
399 0 : void pop() {
400 99986 : MOZ_ASSERT(stackPointer > 0);
401 : // Do the read and the write as two separate statements, in order to
402 : // make it clear that we don't need an atomic decrement, which would be
403 : // more expensive on x86 than the separate operations done here.
404 : // This thread is the only one that ever changes the value of
405 : // stackPointer.
406 99986 : uint32_t oldStackPointer = stackPointer;
407 99989 : stackPointer = oldStackPointer - 1;
408 49995 : }
409 :
410 12 : uint32_t stackSize() const { return std::min(uint32_t(stackPointer), stackCapacity()); }
411 : uint32_t stackCapacity() const { return capacity; }
412 :
413 : private:
414 : // Out of line path for expanding the buffer, since otherwise this would get inlined in every
415 : // DOM WebIDL call.
416 : MOZ_COLD MOZ_MUST_USE bool ensureCapacitySlow();
417 :
418 : // No copying.
419 : ProfilingStack(const ProfilingStack&) = delete;
420 : void operator=(const ProfilingStack&) = delete;
421 :
422 : // No moving either.
423 : ProfilingStack(ProfilingStack&&) = delete;
424 : void operator=(ProfilingStack&&) = delete;
425 :
426 : uint32_t capacity = 0;
427 :
428 : public:
429 :
430 : // The pointer to the stack frames, this is read from the profiler thread and written from the
431 : // current thread.
432 : //
433 : // This is effectively a unique pointer.
434 : mozilla::Atomic<js::ProfilingStackFrame*> frames { nullptr };
435 :
436 : // This may exceed the capacity, so instead use the stackSize() method to
437 : // determine the number of valid frames in stackFrames. When this is less
438 : // than stackCapacity(), it refers to the first free stackframe past the top
439 : // of the in-use stack (i.e. frames[stackPointer - 1] is the top stack
440 : // frame).
441 : //
442 : // WARNING WARNING WARNING
443 : //
444 : // This is an atomic variable that uses ReleaseAcquire memory ordering.
445 : // See the "Concurrency considerations" paragraph at the top of this file
446 : // for more details.
447 : mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> stackPointer;
448 : };
449 :
450 : namespace js {
451 :
452 : class AutoGeckoProfilerEntry;
453 : class GeckoProfilerEntryMarker;
454 : class GeckoProfilerBaselineOSRMarker;
455 :
456 : class GeckoProfilerThread
457 : {
458 : friend class AutoGeckoProfilerEntry;
459 : friend class GeckoProfilerEntryMarker;
460 : friend class GeckoProfilerBaselineOSRMarker;
461 :
462 : ProfilingStack* profilingStack_;
463 :
464 : public:
465 : GeckoProfilerThread();
466 :
467 : uint32_t stackPointer() { MOZ_ASSERT(installed()); return profilingStack_->stackPointer; }
468 : ProfilingStackFrame* stack() { return profilingStack_->frames; }
469 : ProfilingStack* getProfilingStack() { return profilingStack_; }
470 :
471 : /* management of whether instrumentation is on or off */
472 : bool installed() { return profilingStack_ != nullptr; }
473 :
474 : void setProfilingStack(ProfilingStack* profilingStack);
475 : void trace(JSTracer* trc);
476 :
477 : /*
478 : * Functions which are the actual instrumentation to track run information
479 : *
480 : * - enter: a function has started to execute
481 : * - updatePC: updates the pc information about where a function
482 : * is currently executing
483 : * - exit: this function has ceased execution, and no further
484 : * entries/exits will be made
485 : */
486 : bool enter(JSContext* cx, JSScript* script, JSFunction* maybeFun);
487 : void exit(JSScript* script, JSFunction* maybeFun);
488 : inline void updatePC(JSContext* cx, JSScript* script, jsbytecode* pc);
489 : };
490 :
491 : } // namespace js
492 :
493 : #endif /* js_ProfilingStack_h */
|