OGRE  2.0
Object-Oriented Graphics Rendering Engine
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
OgreMathlibSSE2.h
Go to the documentation of this file.
1 /*
2 -----------------------------------------------------------------------------
3 This source file is part of OGRE
4  (Object-oriented Graphics Rendering Engine)
5 For the latest info, see http://www.ogre3d.org/
6 
7 Copyright (c) 2000-2014 Torus Knot Software Ltd
8 
9 Permission is hereby granted, free of charge, to any person obtaining a copy
10 of this software and associated documentation files (the "Software"), to deal
11 in the Software without restriction, including without limitation the rights
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom the Software is
14 furnished to do so, subject to the following conditions:
15 
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18 
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 THE SOFTWARE.
26 -----------------------------------------------------------------------------
27 */
28 
29 #ifndef __MathlibSSE2_H__
30 #define __MathlibSSE2_H__
31 
32 #if __OGRE_HAVE_SSE
33 
34 #ifndef __Mathlib_H__
35  #error "Don't include this file directly. include Math/Array/OgreMathlib.h"
36 #endif
37 
38 #include "OgrePrerequisites.h"
39 
40 namespace Ogre
41 {
42  class ArrayRadian
43  {
45 
46  public:
47  explicit ArrayRadian ( ArrayReal r ) : mRad( r ) {}
48  //ArrayRadian ( const ArrayDegree& d );
49  ArrayRadian& operator = ( const ArrayReal &f ) { mRad = f; return *this; }
50  ArrayRadian& operator = ( const ArrayRadian &r ) { mRad = r.mRad; return *this; }
51  //ArrayRadian& operator = ( const ArrayDegree& d );
52 
53  //ArrayReal valueDegrees() const; // see bottom of this file
54  ArrayReal valueRadians() const { return mRad; }
55 
56  inline const ArrayRadian& operator + () const;
57  inline ArrayRadian operator + ( const ArrayRadian& r ) const;
58  //inline ArrayRadian operator + ( const ArrayDegree& d ) const;
59  inline ArrayRadian& operator += ( const ArrayRadian& r );
60  //inline ArrayRadian& operator += ( const ArrayDegree& d );
61  inline ArrayRadian operator - () const;
62  inline ArrayRadian operator - ( const ArrayRadian& r ) const;
63  //inline ArrayRadian operator - ( const ArrayDegree& d ) const;
64  inline ArrayRadian& operator -= ( const ArrayRadian& r );
65  //inline ArrayRadian& operator -= ( const ArrayDegree& d );
66  inline ArrayRadian operator * ( ArrayReal f ) const;
67  inline ArrayRadian operator * ( const ArrayRadian& f ) const;
68  inline ArrayRadian& operator *= ( ArrayReal f );
69  inline ArrayRadian operator / ( ArrayReal f ) const;
70  inline ArrayRadian& operator /= ( ArrayReal f );
71 
72  inline ArrayReal operator < ( const ArrayRadian& r ) const;
73  inline ArrayReal operator <= ( const ArrayRadian& r ) const;
74  inline ArrayReal operator == ( const ArrayRadian& r ) const;
75  inline ArrayReal operator != ( const ArrayRadian& r ) const;
76  inline ArrayReal operator >= ( const ArrayRadian& r ) const;
77  inline ArrayReal operator > ( const ArrayRadian& r ) const;
78  };
79 
80  class _OgreExport MathlibSSE2
81  {
82  public:
83  static const ArrayReal HALF; //0.5f, 0.5f, 0.5f, 0.5f
84  static const ArrayReal ONE; //1.0f, 1.0f, 1.0f, 1.0f
85  static const ArrayReal THREE; //3.0f, 3.0f, 3.0f, 3.0f
86  static const ArrayReal NEG_ONE; //-1.0f, -1.0f, -1.0f, -1.0f
87  static const ArrayReal PI; //PI, PI, PI, PI
88  static const ArrayReal TWO_PI; //2*PI, 2*PI, 2*PI, 2*PI
89  static const ArrayReal ONE_DIV_2PI; //1 / 2PI, 1 / 2PI, 1 / 2PI, 1 / 2PI
90  static const ArrayReal fEpsilon; //1e-6f, 1e-6f, 1e-6f, 1e-6f
91  static const ArrayReal fSqEpsilon; //1e-12f, 1e-12f, 1e-12f, 1e-12f
92  static const ArrayReal OneMinusEpsilon;//1 - 1e-6f, 1 - 1e-6f, 1 - 1e-6f, 1 - 1e-6f
93  static const ArrayReal fDeg2Rad; //Math::fDeg2Rad, Math::fDeg2Rad, Math::fDeg2Rad, Math::fDeg2Rad
94  static const ArrayReal fRad2Deg; //Math::fRad2Deg, Math::fRad2Deg, Math::fRad2Deg, Math::fRad2Deg
95  static const ArrayReal FLOAT_MIN; //FLT_MIN, FLT_MIN, FLT_MIN, FLT_MIN
96  static const ArrayReal SIGN_MASK; //0x80000000, 0x80000000, 0x80000000, 0x80000000
97  //INFINITE is taken in Windows, INFINITY by C99 (bloody macros). A joke on Infinite Tea
98  static const ArrayReal INFINITEA; //Inf, Inf, Inf, Inf
99  static const ArrayReal MAX_NEG; //Max negative number (x4)
100  static const ArrayReal MAX_POS; //Max negative number (x4)
101  static const ArrayReal LAST_AFFINE_COLUMN;//0, 0, 0, 1
102 
109  static inline ArrayReal Abs4( ArrayReal a )
110  {
111  return _mm_andnot_ps( _mm_set1_ps( -0.0f ), a );
112  }
113 
134  static inline ArrayReal Cmov4( ArrayReal arg1, ArrayReal arg2, ArrayMaskR mask )
135  {
136  assert( _mm_movemask_ps( _mm_cmpeq_ps( arg1, arg1 ) ) == 0x0f &&
137  _mm_movemask_ps( _mm_cmpeq_ps( arg2, arg2 ) ) == 0x0f &&
138  "Passing NaN values to CMov4" );
139 #ifndef NDEBUG
140  ArrayReal newNan1 = _mm_mul_ps( arg1, _mm_setzero_ps() ); //+-Inf * 0 = nan
141  ArrayReal newNan2 = _mm_mul_ps( arg2, _mm_setzero_ps() ); //+-Inf * 0 = nan
142  assert( _mm_movemask_ps( _mm_cmpeq_ps( newNan1, newNan1 ) ) == 0x0f &&
143  _mm_movemask_ps( _mm_cmpeq_ps( newNan2, newNan2 ) ) == 0x0f &&
144  "Passing +/- Infinity values to CMov4" );
145 #endif
146 
147  ArrayReal t = _mm_sub_ps( arg1, arg2 ); // t = arg1 - arg2
148  return _mm_add_ps( arg2, _mm_and_ps( t, mask ) ); // r = arg2 + (t & mask)
149  }
150 
177  #
178  static inline __m128 CmovRobust( __m128 arg1, __m128 arg2, __m128 mask )
179  {
180  return _mm_or_ps( _mm_and_ps( arg1, mask ), _mm_andnot_ps( mask, arg2 ) );
181  }
182  static inline __m128d CmovRobust( __m128d arg1, __m128d arg2, __m128d mask )
183  {
184  return _mm_or_pd( _mm_and_pd( arg1, mask ), _mm_andnot_pd( mask, arg2 ) );
185  }
186  static inline ArrayInt CmovRobust( ArrayInt arg1, ArrayInt arg2, ArrayMaskI mask )
187  {
188  return _mm_or_si128( _mm_and_si128( arg1, mask ), _mm_andnot_si128( mask, arg2 ) );
189  }
190 
195  static inline ArrayReal And( ArrayReal a, ArrayReal b )
196  {
197  return _mm_and_ps( a, b );
198  }
199  static inline ArrayInt And( ArrayInt a, ArrayInt b )
200  {
201  return _mm_and_si128( a, b );
202  }
203 
208  static inline __m128i And( __m128i a, uint32 b )
209  {
210  return _mm_and_si128( a, _mm_set1_epi32( b ) );
211  }
212 
223  static inline __m128i TestFlags4( __m128i a, __m128i b )
224  {
225  // !( (a & b) == 0 ) --> ( (a & b) == 0 ) ^ -1
226  return _mm_xor_si128( _mm_cmpeq_epi32( _mm_and_si128( a, b ), _mm_setzero_si128() ),
227  _mm_set1_epi32( -1 ) );
228  }
229 
234  static inline __m128i AndNot( __m128i a, __m128i b )
235  {
236  return _mm_andnot_si128( b, a );
237  }
238 
243  static inline __m128 Or( __m128 a, __m128 b )
244  {
245  return _mm_or_ps( a, b );
246  }
247  static inline __m128i Or( __m128i a, __m128i b )
248  {
249  return _mm_or_si128( a, b );
250  }
251 
256  static inline __m128 CompareLess( __m128 a, __m128 b )
257  {
258  return _mm_cmplt_ps( a, b );
259  }
260 
265  static inline __m128 CompareLessEqual( __m128 a, __m128 b )
266  {
267  return _mm_cmple_ps( a, b );
268  }
269 
274  static inline __m128 CompareGreater( __m128 a, __m128 b )
275  {
276  return _mm_cmpgt_ps( a, b );
277  }
278 
283  static inline __m128 CompareGreaterEqual( __m128 a, __m128 b )
284  {
285  return _mm_cmpge_ps( a, b );
286  }
287 
288  static inline ArrayReal SetAll( Real val )
289  {
290  return _mm_set_ps1( val );
291  }
292 
293  static inline ArrayInt SetAll( uint32 val )
294  {
295  return _mm_set1_epi32( val );
296  }
297 
302  static inline ArrayReal isInfinity( ArrayReal a )
303  {
304  return _mm_cmpeq_ps( a, MathlibSSE2::INFINITEA );
305  }
306 
308  static inline ArrayReal Max( ArrayReal a, ArrayReal b )
309  {
310  return _mm_max_ps( a, b );
311  }
312 
314  static inline ArrayReal Min( ArrayReal a, ArrayReal b )
315  {
316  return _mm_min_ps( a, b );
317  }
318 
323  static inline Real CollapseMin( ArrayReal a )
324  {
325  float r;
326  ArrayReal t0 = _mm_shuffle_ps( a, a, _MM_SHUFFLE( 2, 3, 2, 3 ) );
327  t0 = _mm_min_ps( a, t0 );
328  a = _mm_shuffle_ps( t0, t0, _MM_SHUFFLE( 1, 1, 0, 0 ) );
329  t0 = _mm_min_ps( a, t0 );
330  _mm_store_ss( &r, t0 );
331  return r;
332  }
333 
338  static inline Real CollapseMax( ArrayReal a )
339  {
340  float r;
341  ArrayReal t0 = _mm_shuffle_ps( a, a, _MM_SHUFFLE( 2, 3, 2, 3 ) );
342  t0 = _mm_max_ps( a, t0 );
343  a = _mm_shuffle_ps( t0, t0, _MM_SHUFFLE( 1, 1, 0, 0 ) );
344  t0 = _mm_max_ps( a, t0 );
345  _mm_store_ss( &r, t0 );
346  return r;
347  }
348 
370  static inline ArrayReal Inv4( ArrayReal val )
371  {
372  ArrayReal inv = _mm_rcp_ps( val );
373  ArrayReal twoRcp = _mm_add_ps( inv, inv ); //2 * rcp( f )
374  ArrayReal rightSide= _mm_mul_ps( val, _mm_mul_ps( inv, inv ) ); //f * rcp( f ) * rcp( f )
375  rightSide = _mm_and_ps( rightSide, _mm_cmpneq_ps( val, _mm_setzero_ps() ) ); //Nuke this NaN
376  return _mm_sub_ps( twoRcp, rightSide );
377  }
378 
401  static inline ArrayReal InvNonZero4( ArrayReal val )
402  {
403  ArrayReal inv = _mm_rcp_ps( val );
404  ArrayReal twoRcp = _mm_add_ps( inv, inv ); //2 * rcp( f )
405  ArrayReal rightSide= _mm_mul_ps( val, _mm_mul_ps( inv, inv ) ); //f * rcp( f ) * rcp( f )
406  return _mm_sub_ps( twoRcp, rightSide );
407  }
408 
427  static inline ArrayReal InvSqrt4( ArrayReal f )
428  {
429  ArrayReal invSqrt = _mm_rsqrt_ps( f );
430 
431  ArrayReal halfInvSqrt= _mm_mul_ps( HALF, invSqrt ); //0.5 * rsqrt( f )
432  ArrayReal rightSide = _mm_mul_ps( invSqrt, _mm_mul_ps( f, invSqrt ) ); //f * rsqrt( f ) * rsqrt( f )
433  rightSide = _mm_and_ps( rightSide, _mm_cmpneq_ps( f, _mm_setzero_ps() ) );//Nuke this NaN
434  return _mm_mul_ps( halfInvSqrt, _mm_sub_ps( THREE, rightSide ) ); //halfInvSqrt*(3 - rightSide)
435  }
436 
457  static inline ArrayReal InvSqrtNonZero4( ArrayReal f )
458  {
459  ArrayReal invSqrt = _mm_rsqrt_ps( f );
460 
461  ArrayReal halfInvSqrt= _mm_mul_ps( HALF, invSqrt ); //0.5 * rsqrt( f )
462  ArrayReal rightSide = _mm_mul_ps( invSqrt, _mm_mul_ps( f, invSqrt ) ); //f * rsqrt( f ) * rsqrt( f )
463  return _mm_mul_ps( halfInvSqrt, _mm_sub_ps( THREE, rightSide ) ); //halfInvSqrt*(3 - rightSide)
464  }
465 
474  static inline ArrayReal Modf4( ArrayReal x, ArrayReal &outIntegral );
475 
482  static inline ArrayReal ACos4( ArrayReal x );
483 
490  static ArrayReal Sin4( ArrayReal x );
491 
498  static ArrayReal Cos4( ArrayReal x );
499 
509  static void SinCos4( ArrayReal x, ArrayReal &outSin, ArrayReal &outCos );
510  };
511 
512 #if OGRE_COMPILER != OGRE_COMPILER_CLANG && OGRE_COMPILER != OGRE_COMPILER_GNUC
513 // inline ArrayReal operator - ( ArrayReal l ) { return _mm_xor_ps( l, MathlibSSE2::SIGN_MASK ); }
514 // inline ArrayReal operator + ( ArrayReal l, Real r ) { return _mm_add_ps( l, _mm_set1_ps( r ) ); }
515 // inline ArrayReal operator + ( Real l, ArrayReal r ) { return _mm_add_ps( _mm_set1_ps( l ), r ); }
516  inline ArrayReal operator + ( ArrayReal l, ArrayReal r ) { return _mm_add_ps( l, r ); }
517 // inline ArrayReal operator - ( ArrayReal l, Real r ) { return _mm_sub_ps( l, _mm_set1_ps( r ) ); }
518 // inline ArrayReal operator - ( Real l, ArrayReal r ) { return _mm_sub_ps( _mm_set1_ps( l ), r ); }
519  inline ArrayReal operator - ( ArrayReal l, ArrayReal r ) { return _mm_sub_ps( l, r ); }
520 // inline ArrayReal operator * ( ArrayReal l, Real r ) { return _mm_mul_ps( l, _mm_set1_ps( r ) ); }
521 // inline ArrayReal operator * ( Real l, ArrayReal r ) { return _mm_mul_ps( _mm_set1_ps( l ), r ); }
522  inline ArrayReal operator * ( ArrayReal l, ArrayReal r ) { return _mm_mul_ps( l, r ); }
523 // inline ArrayReal operator / ( ArrayReal l, Real r ) { return _mm_div_ps( l, _mm_set1_ps( r ) ); }
524 // inline ArrayReal operator / ( Real l, ArrayReal r ) { return _mm_div_ps( _mm_set1_ps( l ), r ); }
525  inline ArrayReal operator / ( ArrayReal l, ArrayReal r ) { return _mm_div_ps( l, r ); }
526 #endif
527 }
528 
529 #include "OgreMathlibSSE2.inl"
530 
531 #endif
532 #endif
Radian & operator+=(const Radian &r)
Definition: OgreMath.h:67
float Real
Software floating point type.
bool ArrayMaskR
unsigned int uint32
Definition: OgrePlatform.h:420
#define _OgreExport
Definition: OgrePlatform.h:255
bool operator<=(const Radian &r) const
Definition: OgreMath.h:81
Real mRad
Definition: OgreMath.h:51
Real ArrayReal
Radian & operator/=(Real f)
Definition: OgreMath.h:78
Radian ArrayRadian
bool operator>(const Radian &r) const
Definition: OgreMath.h:85
bool operator!=(const Radian &r) const
Definition: OgreMath.h:83
Radian & operator-=(const Radian &r)
Definition: OgreMath.h:72
Radian operator*(Real a, const Radian &b)
Definition: OgreMath.h:782
Radian operator/(Real a, const Radian &b)
Definition: OgreMath.h:787
Radian operator/(Real f) const
Definition: OgreMath.h:77
bool ArrayMaskI
Real valueRadians() const
Definition: OgreMath.h:61
Radian & operator=(const Real &f)
Definition: OgreMath.h:56
bool operator<(const Radian &r) const
Definition: OgreMath.h:80
const Radian & operator+() const
Definition: OgreMath.h:64
bool operator>=(const Radian &r) const
Definition: OgreMath.h:84
Radian & operator*=(Real f)
Definition: OgreMath.h:76
bool operator==(const Radian &r) const
Definition: OgreMath.h:82
Radian operator-() const
Definition: OgreMath.h:69
Radian operator*(Real f) const
Definition: OgreMath.h:74
uint32 ArrayInt