Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : /* Various predicates and operations on IEEE-754 floating point types. */
8 :
9 : #ifndef mozilla_FloatingPoint_h
10 : #define mozilla_FloatingPoint_h
11 :
12 : #include "mozilla/Assertions.h"
13 : #include "mozilla/Attributes.h"
14 : #include "mozilla/Casting.h"
15 : #include "mozilla/MathAlgorithms.h"
16 : #include "mozilla/MemoryChecking.h"
17 : #include "mozilla/Types.h"
18 : #include "mozilla/TypeTraits.h"
19 :
20 : #include <limits>
21 : #include <stdint.h>
22 :
23 : namespace mozilla {
24 :
25 : /*
26 : * It's reasonable to ask why we have this header at all. Don't isnan,
27 : * copysign, the built-in comparison operators, and the like solve these
28 : * problems? Unfortunately, they don't. We've found that various compilers
29 : * (MSVC, MSVC when compiling with PGO, and GCC on OS X, at least) miscompile
30 : * the standard methods in various situations, so we can't use them. Some of
31 : * these compilers even have problems compiling seemingly reasonable bitwise
32 : * algorithms! But with some care we've found algorithms that seem to not
33 : * trigger those compiler bugs.
34 : *
35 : * For the aforementioned reasons, be very wary of making changes to any of
36 : * these algorithms. If you must make changes, keep a careful eye out for
37 : * compiler bustage, particularly PGO-specific bustage.
38 : */
39 :
40 : struct FloatTypeTraits
41 : {
42 : using Bits = uint32_t;
43 :
44 : static constexpr unsigned kExponentBias = 127;
45 : static constexpr unsigned kExponentShift = 23;
46 :
47 : static constexpr Bits kSignBit = 0x80000000UL;
48 : static constexpr Bits kExponentBits = 0x7F800000UL;
49 : static constexpr Bits kSignificandBits = 0x007FFFFFUL;
50 : };
51 :
52 : struct DoubleTypeTraits
53 : {
54 : using Bits = uint64_t;
55 :
56 : static constexpr unsigned kExponentBias = 1023;
57 : static constexpr unsigned kExponentShift = 52;
58 :
59 : static constexpr Bits kSignBit = 0x8000000000000000ULL;
60 : static constexpr Bits kExponentBits = 0x7ff0000000000000ULL;
61 : static constexpr Bits kSignificandBits = 0x000fffffffffffffULL;
62 : };
63 :
64 : template<typename T> struct SelectTrait;
65 : template<> struct SelectTrait<float> : public FloatTypeTraits {};
66 : template<> struct SelectTrait<double> : public DoubleTypeTraits {};
67 :
68 : /*
69 : * This struct contains details regarding the encoding of floating-point
70 : * numbers that can be useful for direct bit manipulation. As of now, the
71 : * template parameter has to be float or double.
72 : *
73 : * The nested typedef |Bits| is the unsigned integral type with the same size
74 : * as T: uint32_t for float and uint64_t for double (static assertions
75 : * double-check these assumptions).
76 : *
77 : * kExponentBias is the offset that is subtracted from the exponent when
78 : * computing the value, i.e. one plus the opposite of the mininum possible
79 : * exponent.
80 : * kExponentShift is the shift that one needs to apply to retrieve the
81 : * exponent component of the value.
82 : *
83 : * kSignBit contains a bits mask. Bit-and-ing with this mask will result in
84 : * obtaining the sign bit.
85 : * kExponentBits contains the mask needed for obtaining the exponent bits and
86 : * kSignificandBits contains the mask needed for obtaining the significand
87 : * bits.
88 : *
89 : * Full details of how floating point number formats are encoded are beyond
90 : * the scope of this comment. For more information, see
91 : * http://en.wikipedia.org/wiki/IEEE_floating_point
92 : * http://en.wikipedia.org/wiki/Floating_point#IEEE_754:_floating_point_in_modern_computers
93 : */
94 : template<typename T>
95 : struct FloatingPoint : public SelectTrait<T>
96 : {
97 : using Base = SelectTrait<T>;
98 : using Bits = typename Base::Bits;
99 :
100 : static_assert((Base::kSignBit & Base::kExponentBits) == 0,
101 : "sign bit shouldn't overlap exponent bits");
102 : static_assert((Base::kSignBit & Base::kSignificandBits) == 0,
103 : "sign bit shouldn't overlap significand bits");
104 : static_assert((Base::kExponentBits & Base::kSignificandBits) == 0,
105 : "exponent bits shouldn't overlap significand bits");
106 :
107 : static_assert((Base::kSignBit | Base::kExponentBits | Base::kSignificandBits) ==
108 : ~Bits(0),
109 : "all bits accounted for");
110 :
111 : /*
112 : * These implementations assume float/double are 32/64-bit single/double
113 : * format number types compatible with the IEEE-754 standard. C++ don't
114 : * require this to be the case. But we required this in implementations of
115 : * these algorithms that preceded this header, so we shouldn't break anything
116 : * if we keep doing so.
117 : */
118 : static_assert(sizeof(T) == sizeof(Bits), "Bits must be same size as T");
119 : };
120 :
121 : /** Determines whether a float/double is NaN. */
122 : template<typename T>
123 : static MOZ_ALWAYS_INLINE bool
124 0 : IsNaN(T aValue)
125 : {
126 : /*
127 : * A float/double is NaN if all exponent bits are 1 and the significand
128 : * contains at least one non-zero bit.
129 : */
130 : typedef FloatingPoint<T> Traits;
131 : typedef typename Traits::Bits Bits;
132 0 : return (BitwiseCast<Bits>(aValue) & Traits::kExponentBits) == Traits::kExponentBits &&
133 0 : (BitwiseCast<Bits>(aValue) & Traits::kSignificandBits) != 0;
134 : }
135 :
136 : /** Determines whether a float/double is +Infinity or -Infinity. */
137 : template<typename T>
138 : static MOZ_ALWAYS_INLINE bool
139 776 : IsInfinite(T aValue)
140 : {
141 : /* Infinities have all exponent bits set to 1 and an all-0 significand. */
142 : typedef FloatingPoint<T> Traits;
143 : typedef typename Traits::Bits Bits;
144 0 : Bits bits = BitwiseCast<Bits>(aValue);
145 0 : return (bits & ~Traits::kSignBit) == Traits::kExponentBits;
146 : }
147 :
148 : /** Determines whether a float/double is not NaN or infinite. */
149 : template<typename T>
150 : static MOZ_ALWAYS_INLINE bool
151 10448 : IsFinite(T aValue)
152 : {
153 : /*
154 : * NaN and Infinities are the only non-finite floats/doubles, and both have
155 : * all exponent bits set to 1.
156 : */
157 : typedef FloatingPoint<T> Traits;
158 : typedef typename Traits::Bits Bits;
159 0 : Bits bits = BitwiseCast<Bits>(aValue);
160 0 : return (bits & Traits::kExponentBits) != Traits::kExponentBits;
161 : }
162 :
163 : /**
164 : * Determines whether a float/double is negative or -0. It is an error
165 : * to call this method on a float/double which is NaN.
166 : */
167 : template<typename T>
168 : static MOZ_ALWAYS_INLINE bool
169 0 : IsNegative(T aValue)
170 : {
171 0 : MOZ_ASSERT(!IsNaN(aValue), "NaN does not have a sign");
172 :
173 : /* The sign bit is set if the double is negative. */
174 : typedef FloatingPoint<T> Traits;
175 : typedef typename Traits::Bits Bits;
176 0 : Bits bits = BitwiseCast<Bits>(aValue);
177 0 : return (bits & Traits::kSignBit) != 0;
178 : }
179 :
180 : /** Determines whether a float/double represents -0. */
181 : template<typename T>
182 : static MOZ_ALWAYS_INLINE bool
183 7610 : IsNegativeZero(T aValue)
184 : {
185 : /* Only the sign bit is set if the value is -0. */
186 : typedef FloatingPoint<T> Traits;
187 : typedef typename Traits::Bits Bits;
188 0 : Bits bits = BitwiseCast<Bits>(aValue);
189 0 : return bits == Traits::kSignBit;
190 : }
191 :
192 : /** Determines wether a float/double represents +0. */
193 : template<typename T>
194 : static MOZ_ALWAYS_INLINE bool
195 : IsPositiveZero(T aValue)
196 : {
197 : /* All bits are zero if the value is +0. */
198 : typedef FloatingPoint<T> Traits;
199 : typedef typename Traits::Bits Bits;
200 0 : Bits bits = BitwiseCast<Bits>(aValue);
201 0 : return bits == 0;
202 : }
203 :
204 : /**
205 : * Returns 0 if a float/double is NaN or infinite;
206 : * otherwise, the float/double is returned.
207 : */
208 : template<typename T>
209 : static MOZ_ALWAYS_INLINE T
210 0 : ToZeroIfNonfinite(T aValue)
211 : {
212 0 : return IsFinite(aValue) ? aValue : 0;
213 : }
214 :
215 : /**
216 : * Returns the exponent portion of the float/double.
217 : *
218 : * Zero is not special-cased, so ExponentComponent(0.0) is
219 : * -int_fast16_t(Traits::kExponentBias).
220 : */
221 : template<typename T>
222 : static MOZ_ALWAYS_INLINE int_fast16_t
223 : ExponentComponent(T aValue)
224 : {
225 : /*
226 : * The exponent component of a float/double is an unsigned number, biased
227 : * from its actual value. Subtract the bias to retrieve the actual exponent.
228 : */
229 : typedef FloatingPoint<T> Traits;
230 : typedef typename Traits::Bits Bits;
231 0 : Bits bits = BitwiseCast<Bits>(aValue);
232 0 : return int_fast16_t((bits & Traits::kExponentBits) >> Traits::kExponentShift) -
233 0 : int_fast16_t(Traits::kExponentBias);
234 : }
235 :
236 : /** Returns +Infinity. */
237 : template<typename T>
238 : static MOZ_ALWAYS_INLINE T
239 3929 : PositiveInfinity()
240 : {
241 : /*
242 : * Positive infinity has all exponent bits set, sign bit set to 0, and no
243 : * significand.
244 : */
245 : typedef FloatingPoint<T> Traits;
246 0 : return BitwiseCast<T>(Traits::kExponentBits);
247 : }
248 :
249 : /** Returns -Infinity. */
250 : template<typename T>
251 : static MOZ_ALWAYS_INLINE T
252 3930 : NegativeInfinity()
253 : {
254 : /*
255 : * Negative infinity has all exponent bits set, sign bit set to 1, and no
256 : * significand.
257 : */
258 : typedef FloatingPoint<T> Traits;
259 0 : return BitwiseCast<T>(Traits::kSignBit | Traits::kExponentBits);
260 : }
261 :
262 : /**
263 : * Computes the bit pattern for a NaN with the specified sign bit and
264 : * significand bits.
265 : */
266 : template<typename T,
267 : int SignBit,
268 : typename FloatingPoint<T>::Bits Significand>
269 : struct SpecificNaNBits
270 : {
271 : using Traits = FloatingPoint<T>;
272 :
273 : static_assert(SignBit == 0 || SignBit == 1, "bad sign bit");
274 : static_assert((Significand & ~Traits::kSignificandBits) == 0,
275 : "significand must only have significand bits set");
276 : static_assert(Significand & Traits::kSignificandBits,
277 : "significand must be nonzero");
278 :
279 : static constexpr typename Traits::Bits value =
280 : (SignBit * Traits::kSignBit) | Traits::kExponentBits | Significand;
281 : };
282 :
283 : /**
284 : * Constructs a NaN value with the specified sign bit and significand bits.
285 : *
286 : * There is also a variant that returns the value directly. In most cases, the
287 : * two variants should be identical. However, in the specific case of x86
288 : * chips, the behavior differs: returning floating-point values directly is done
289 : * through the x87 stack, and x87 loads and stores turn signaling NaNs into
290 : * quiet NaNs... silently. Returning floating-point values via outparam,
291 : * however, is done entirely within the SSE registers when SSE2 floating-point
292 : * is enabled in the compiler, which has semantics-preserving behavior you would
293 : * expect.
294 : *
295 : * If preserving the distinction between signaling NaNs and quiet NaNs is
296 : * important to you, you should use the outparam version. In all other cases,
297 : * you should use the direct return version.
298 : */
299 : template<typename T>
300 : static MOZ_ALWAYS_INLINE void
301 0 : SpecificNaN(int signbit, typename FloatingPoint<T>::Bits significand, T* result)
302 : {
303 : typedef FloatingPoint<T> Traits;
304 0 : MOZ_ASSERT(signbit == 0 || signbit == 1);
305 0 : MOZ_ASSERT((significand & ~Traits::kSignificandBits) == 0);
306 0 : MOZ_ASSERT(significand & Traits::kSignificandBits);
307 :
308 0 : BitwiseCast<T>((signbit ? Traits::kSignBit : 0) |
309 : Traits::kExponentBits |
310 : significand,
311 : result);
312 0 : MOZ_ASSERT(IsNaN(*result));
313 0 : }
314 :
315 : template<typename T>
316 : static MOZ_ALWAYS_INLINE T
317 0 : SpecificNaN(int signbit, typename FloatingPoint<T>::Bits significand)
318 : {
319 : T t;
320 0 : SpecificNaN(signbit, significand, &t);
321 0 : return t;
322 : }
323 :
324 : /** Computes the smallest non-zero positive float/double value. */
325 : template<typename T>
326 : static MOZ_ALWAYS_INLINE T
327 : MinNumberValue()
328 : {
329 : typedef FloatingPoint<T> Traits;
330 : typedef typename Traits::Bits Bits;
331 0 : return BitwiseCast<T>(Bits(1));
332 : }
333 :
334 : namespace detail {
335 :
336 : template<typename Float, typename SignedInteger>
337 : inline bool
338 0 : NumberEqualsSignedInteger(Float aValue, SignedInteger* aInteger)
339 : {
340 : static_assert(IsSame<Float, float>::value || IsSame<Float, double>::value,
341 : "Float must be an IEEE-754 floating point type");
342 : static_assert(IsSigned<SignedInteger>::value,
343 : "this algorithm only works for signed types: a different one "
344 : "will be required for unsigned types");
345 : static_assert(sizeof(SignedInteger) >= sizeof(int),
346 : "this function *might* require some finessing for signed types "
347 : "subject to integral promotion before it can be used on them");
348 :
349 : MOZ_MAKE_MEM_UNDEFINED(aInteger, sizeof(*aInteger));
350 :
351 : // NaNs and infinities are not integers.
352 0 : if (!IsFinite(aValue)) {
353 : return false;
354 : }
355 :
356 : // Otherwise do direct comparisons against the minimum/maximum |SignedInteger|
357 : // values that can be encoded in |Float|.
358 :
359 : constexpr SignedInteger MaxIntValue =
360 0 : std::numeric_limits<SignedInteger>::max(); // e.g. INT32_MAX
361 : constexpr SignedInteger MinValue =
362 0 : std::numeric_limits<SignedInteger>::min(); // e.g. INT32_MIN
363 :
364 : static_assert(IsPowerOfTwo(Abs(MinValue)),
365 : "MinValue should be is a small power of two, thus exactly "
366 : "representable in float/double both");
367 :
368 0 : constexpr unsigned SignedIntegerWidth = CHAR_BIT * sizeof(SignedInteger);
369 0 : constexpr unsigned ExponentShift = FloatingPoint<Float>::kExponentShift;
370 :
371 : // Careful! |MaxIntValue| may not be the maximum |SignedInteger| value that
372 : // can be encoded in |Float|. Its |SignedIntegerWidth - 1| bits of precision
373 : // may exceed |Float|'s |ExponentShift + 1| bits of precision. If necessary,
374 : // compute the maximum |SignedInteger| that fits in |Float| from IEEE-754
375 : // first principles. (|MinValue| doesn't have this problem because as a
376 : // [relatively] small power of two it's always representable in |Float|.)
377 :
378 : // Per C++11 [expr.const]p2, unevaluated subexpressions of logical AND/OR and
379 : // conditional expressions *may* contain non-constant expressions, without
380 : // making the enclosing expression not constexpr. MSVC implements this -- but
381 : // it sometimes warns about undefined behavior in unevaluated subexpressions.
382 : // This bites us if we initialize |MaxValue| the obvious way including an
383 : // |uint64_t(1) << (SignedIntegerWidth - 2 - ExponentShift)| subexpression.
384 : // Pull that shift-amount out and give it a not-too-huge value when it's in an
385 : // unevaluated subexpression. 🙄
386 : constexpr unsigned PrecisionExceededShiftAmount =
387 : ExponentShift > SignedIntegerWidth - 1
388 : ? 0
389 0 : : SignedIntegerWidth - 2 - ExponentShift;
390 :
391 : constexpr SignedInteger MaxValue =
392 : ExponentShift > SignedIntegerWidth - 1
393 : ? MaxIntValue
394 : : SignedInteger((uint64_t(1) << (SignedIntegerWidth - 1)) -
395 0 : (uint64_t(1) << PrecisionExceededShiftAmount));
396 :
397 0 : if (static_cast<Float>(MinValue) <= aValue &&
398 : aValue <= static_cast<Float>(MaxValue))
399 : {
400 0 : auto possible = static_cast<SignedInteger>(aValue);
401 0 : if (static_cast<Float>(possible) == aValue) {
402 0 : *aInteger = possible;
403 0 : return true;
404 : }
405 : }
406 :
407 : return false;
408 : }
409 :
410 : template<typename Float, typename SignedInteger>
411 : inline bool
412 0 : NumberIsSignedInteger(Float aValue, SignedInteger* aInteger)
413 : {
414 : static_assert(IsSame<Float, float>::value || IsSame<Float, double>::value,
415 : "Float must be an IEEE-754 floating point type");
416 : static_assert(IsSigned<SignedInteger>::value,
417 : "this algorithm only works for signed types: a different one "
418 : "will be required for unsigned types");
419 : static_assert(sizeof(SignedInteger) >= sizeof(int),
420 : "this function *might* require some finessing for signed types "
421 : "subject to integral promotion before it can be used on them");
422 :
423 : MOZ_MAKE_MEM_UNDEFINED(aInteger, sizeof(*aInteger));
424 :
425 0 : if (IsNegativeZero(aValue)) {
426 : return false;
427 : }
428 :
429 0 : return NumberEqualsSignedInteger(aValue, aInteger);
430 : }
431 :
432 : } // namespace detail
433 :
434 : /**
435 : * If |aValue| is identical to some |int32_t| value, set |*aInt32| to that value
436 : * and return true. Otherwise return false, leaving |*aInt32| in an
437 : * indeterminate state.
438 : *
439 : * This method returns false for negative zero. If you want to consider -0 to
440 : * be 0, use NumberEqualsInt32 below.
441 : */
442 : template<typename T>
443 : static MOZ_ALWAYS_INLINE bool
444 : NumberIsInt32(T aValue, int32_t* aInt32)
445 : {
446 0 : return detail::NumberIsSignedInteger(aValue, aInt32);
447 : }
448 :
449 : /**
450 : * If |aValue| is equal to some int32_t value (where -0 and +0 are considered
451 : * equal), set |*aInt32| to that value and return true. Otherwise return false,
452 : * leaving |*aInt32| in an indeterminate state.
453 : *
454 : * |NumberEqualsInt32(-0.0, ...)| will return true. To test whether a value can
455 : * be losslessly converted to |int32_t| and back, use NumberIsInt32 above.
456 : */
457 : template<typename T>
458 : static MOZ_ALWAYS_INLINE bool
459 : NumberEqualsInt32(T aValue, int32_t* aInt32)
460 : {
461 0 : return detail::NumberEqualsSignedInteger(aValue, aInt32);
462 : }
463 :
464 : /**
465 : * Computes a NaN value. Do not use this method if you depend upon a particular
466 : * NaN value being returned.
467 : */
468 : template<typename T>
469 : static MOZ_ALWAYS_INLINE T
470 0 : UnspecifiedNaN()
471 : {
472 : /*
473 : * If we can use any quiet NaN, we might as well use the all-ones NaN,
474 : * since it's cheap to materialize on common platforms (such as x64, where
475 : * this value can be represented in a 32-bit signed immediate field, allowing
476 : * it to be stored to memory in a single instruction).
477 : */
478 : typedef FloatingPoint<T> Traits;
479 0 : return SpecificNaN<T>(1, Traits::kSignificandBits);
480 : }
481 :
482 : /**
483 : * Compare two doubles for equality, *without* equating -0 to +0, and equating
484 : * any NaN value to any other NaN value. (The normal equality operators equate
485 : * -0 with +0, and they equate NaN to no other value.)
486 : */
487 : template<typename T>
488 : static inline bool
489 0 : NumbersAreIdentical(T aValue1, T aValue2)
490 : {
491 : typedef FloatingPoint<T> Traits;
492 : typedef typename Traits::Bits Bits;
493 0 : if (IsNaN(aValue1)) {
494 0 : return IsNaN(aValue2);
495 : }
496 0 : return BitwiseCast<Bits>(aValue1) == BitwiseCast<Bits>(aValue2);
497 : }
498 :
499 : namespace detail {
500 :
501 : template<typename T>
502 : struct FuzzyEqualsEpsilon;
503 :
504 : template<>
505 : struct FuzzyEqualsEpsilon<float>
506 : {
507 : // A number near 1e-5 that is exactly representable in a float.
508 : static float value() { return 1.0f / (1 << 17); }
509 : };
510 :
511 : template<>
512 : struct FuzzyEqualsEpsilon<double>
513 : {
514 : // A number near 1e-12 that is exactly representable in a double.
515 : static double value() { return 1.0 / (1LL << 40); }
516 : };
517 :
518 : } // namespace detail
519 :
520 : /**
521 : * Compare two floating point values for equality, modulo rounding error. That
522 : * is, the two values are considered equal if they are both not NaN and if they
523 : * are less than or equal to aEpsilon apart. The default value of aEpsilon is
524 : * near 1e-5.
525 : *
526 : * For most scenarios you will want to use FuzzyEqualsMultiplicative instead,
527 : * as it is more reasonable over the entire range of floating point numbers.
528 : * This additive version should only be used if you know the range of the
529 : * numbers you are dealing with is bounded and stays around the same order of
530 : * magnitude.
531 : */
532 : template<typename T>
533 : static MOZ_ALWAYS_INLINE bool
534 : FuzzyEqualsAdditive(T aValue1, T aValue2,
535 : T aEpsilon = detail::FuzzyEqualsEpsilon<T>::value())
536 : {
537 : static_assert(IsFloatingPoint<T>::value, "floating point type required");
538 0 : return Abs(aValue1 - aValue2) <= aEpsilon;
539 : }
540 :
541 : /**
542 : * Compare two floating point values for equality, allowing for rounding error
543 : * relative to the magnitude of the values. That is, the two values are
544 : * considered equal if they are both not NaN and they are less than or equal to
545 : * some aEpsilon apart, where the aEpsilon is scaled by the smaller of the two
546 : * argument values.
547 : *
548 : * In most cases you will want to use this rather than FuzzyEqualsAdditive, as
549 : * this function effectively masks out differences in the bottom few bits of
550 : * the floating point numbers being compared, regardless of what order of
551 : * magnitude those numbers are at.
552 : */
553 : template<typename T>
554 : static MOZ_ALWAYS_INLINE bool
555 0 : FuzzyEqualsMultiplicative(T aValue1, T aValue2,
556 : T aEpsilon = detail::FuzzyEqualsEpsilon<T>::value())
557 : {
558 : static_assert(IsFloatingPoint<T>::value, "floating point type required");
559 : // can't use std::min because of bug 965340
560 0 : T smaller = Abs(aValue1) < Abs(aValue2) ? Abs(aValue1) : Abs(aValue2);
561 0 : return Abs(aValue1 - aValue2) <= aEpsilon * smaller;
562 : }
563 :
564 : /**
565 : * Returns true if |aValue| can be losslessly represented as an IEEE-754 single
566 : * precision number, false otherwise. All NaN values are considered
567 : * representable (even though the bit patterns of double precision NaNs can't
568 : * all be exactly represented in single precision).
569 : */
570 : MOZ_MUST_USE
571 : extern MFBT_API bool
572 : IsFloat32Representable(double aValue);
573 :
574 : } /* namespace mozilla */
575 :
576 : #endif /* mozilla_FloatingPoint_h */
|