OGRE  2.0
Object-Oriented Graphics Rendering Engine
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
SSE2/Single/OgreArrayMatrixAf4x3.h
Go to the documentation of this file.
1 /*
2 -----------------------------------------------------------------------------
3 This source file is part of OGRE
4  (Object-oriented Graphics Rendering Engine)
5 For the latest info, see http://www.ogre3d.org/
6 
7 Copyright (c) 2000-2014 Torus Knot Software Ltd
8 
9 Permission is hereby granted, free of charge, to any person obtaining a copy
10 of this software and associated documentation files (the "Software"), to deal
11 in the Software without restriction, including without limitation the rights
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom the Software is
14 furnished to do so, subject to the following conditions:
15 
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18 
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 THE SOFTWARE.
26 -----------------------------------------------------------------------------
27 */
28 #ifndef _SSE2_ArrayMatrixAf4x3_H_
29 #define _SSE2_ArrayMatrixAf4x3_H_
30 
31 #ifndef _ArrayMatrixAf4x3_H_
32  #error "Don't include this file directly. include Math/Array/OgreArrayMatrix4.h"
33 #endif
34 
35 #include "OgreMatrix4.h"
36 
37 #include "Math/Array/OgreMathlib.h"
40 
41 namespace Ogre
42 {
43  class SimpleMatrixAf4x3;
44 
69  class _OgreExport ArrayMatrixAf4x3
70  {
71  public:
72  ArrayReal mChunkBase[12];
73 
76  {
77  //Using a loop minimizes instruction count (better i-cache)
78  //Doing 4 at a time per iteration maximizes instruction pairing
79  //Unrolling the whole loop is i-cache unfriendly and
80  //becomes unmaintainable (16 lines!?)
81  for( size_t i=0; i<12; i+=4 )
82  {
83  mChunkBase[i ] = copy.mChunkBase[i ];
84  mChunkBase[i+1] = copy.mChunkBase[i+1];
85  mChunkBase[i+2] = copy.mChunkBase[i+2];
86  mChunkBase[i+3] = copy.mChunkBase[i+3];
87  }
88  }
89 
91  void setAll( const Matrix4 &m )
92  {
93  mChunkBase[0] = _mm_set_ps1( m._m[0] );
94  mChunkBase[1] = _mm_set_ps1( m._m[1] );
95  mChunkBase[2] = _mm_set_ps1( m._m[2] );
96  mChunkBase[3] = _mm_set_ps1( m._m[3] );
97  mChunkBase[4] = _mm_set_ps1( m._m[4] );
98  mChunkBase[5] = _mm_set_ps1( m._m[5] );
99  mChunkBase[6] = _mm_set_ps1( m._m[6] );
100  mChunkBase[7] = _mm_set_ps1( m._m[7] );
101  mChunkBase[8] = _mm_set_ps1( m._m[8] );
102  mChunkBase[9] = _mm_set_ps1( m._m[9] );
103  mChunkBase[10] = _mm_set_ps1( m._m[10] );
104  mChunkBase[11] = _mm_set_ps1( m._m[11] );
105  }
106 
108  {
109  ArrayMatrixAf4x3 retVal;
110  retVal.setAll( m );
111  return retVal;
112  }
113 
114  // Concatenation
116 
117  inline ArrayVector3 operator * ( const ArrayVector3 &rhs ) const;
118 
121  FORCEINLINE void operator *= ( const ArrayMatrixAf4x3 &rhs );
122 
134  inline void fromQuaternion( const ArrayQuaternion &q );
135 
137  inline void makeTransform( const ArrayVector3 &position, const ArrayVector3 &scale,
138  const ArrayQuaternion &orientation );
139 
144  inline void setToInverse(void);
145 
149  inline void setToInverseDegeneratesAsIdentity(void);
150 
161  inline void retain( ArrayMaskR orientation, ArrayMaskR scale );
162 
167  inline void streamToAoS( Matrix4 * RESTRICT_ALIAS dst ) const;
168  inline void storeToAoS( SimpleMatrixAf4x3 * RESTRICT_ALIAS src ) const;
169  inline void streamToAoS( SimpleMatrixAf4x3 * RESTRICT_ALIAS src ) const;
170 
175  inline void loadFromAoS( const Matrix4 * RESTRICT_ALIAS src );
176  inline void loadFromAoS( const SimpleMatrixAf4x3 * RESTRICT_ALIAS src );
177  inline void loadFromAoS( const SimpleMatrixAf4x3 * * RESTRICT_ALIAS src );
178 
179  static const ArrayMatrixAf4x3 IDENTITY;
180  };
181 
186  class _OgreExport SimpleMatrixAf4x3
187  {
188  public:
189  ArrayReal mChunkBase[3];
190 
193  {
194  mChunkBase[0] = row0;
195  mChunkBase[1] = row1;
196  mChunkBase[2] = row2;
197  }
198 
200  void load( const Matrix4 &src )
201  {
202  mChunkBase[0] = _mm_load_ps( src._m );
203  mChunkBase[1] = _mm_load_ps( src._m+4 );
204  mChunkBase[2] = _mm_load_ps( src._m+8 );
205  }
206 
208  void store( Matrix4 *dst ) const
209  {
210  float * RESTRICT_ALIAS dstPtr = reinterpret_cast<float*>( dst );
211 
212  _mm_store_ps( dstPtr, mChunkBase[0] );
213  _mm_store_ps( dstPtr + 4, mChunkBase[1] );
214  _mm_store_ps( dstPtr + 8, mChunkBase[2] );
215  dstPtr += 12;
216  *dstPtr++ = 0;
217  *dstPtr++ = 0;
218  *dstPtr++ = 0;
219  *dstPtr++ = 1;
220  }
221 
223  void store4x3( Matrix4 *dst ) const
224  {
225  float * RESTRICT_ALIAS dstPtr = reinterpret_cast<float*>( dst );
226 
227  _mm_store_ps( dstPtr, mChunkBase[0] );
228  _mm_store_ps( dstPtr + 4, mChunkBase[1] );
229  _mm_store_ps( dstPtr + 8, mChunkBase[2] );
230  }
231 
233  void store4x3( float * RESTRICT_ALIAS dst ) const
234  {
235  _mm_store_ps( dst, mChunkBase[0] );
236  _mm_store_ps( dst + 4, mChunkBase[1] );
237  _mm_store_ps( dst + 8, mChunkBase[2] );
238  }
239 
241  void streamTo4x3( float * RESTRICT_ALIAS dst ) const
242  {
243 #ifndef OGRE_RENDERSYSTEM_API_ALIGN_COMPATIBILITY
244  _mm_stream_ps( dst, mChunkBase[0] );
245  _mm_stream_ps( dst+4, mChunkBase[1] );
246  _mm_stream_ps( dst+8, mChunkBase[2] );
247 #else
248  _mm_storeu_ps( dst, mChunkBase[0] );
249  _mm_storeu_ps( dst+4, mChunkBase[1] );
250  _mm_storeu_ps( dst+8, mChunkBase[2] );
251 #endif
252  }
253 
254  static const SimpleMatrixAf4x3 IDENTITY;
255  };
256 
260 }
261 
262 #include "OgreArrayMatrixAf4x3.inl"
263 
264 #endif
void streamTo4x3(float *RESTRICT_ALIAS dst) const
Copies our 4x3 contents using memory write combining when possible.
Class encapsulating a standard 4x4 homogeneous matrix.
Definition: OgreMatrix4.h:79
bool ArrayMaskR
#define _OgreExport
Definition: OgrePlatform.h:255
void store4x3(float *RESTRICT_ALIAS dst) const
Assumes dst is aligned.
Real _m[16]
Definition: OgreMatrix4.h:91
Real ArrayReal
Cache-friendly container of AFFINE 4x4 matrices represented as a SoA array.
Radian operator*(Real a, const Radian &b)
Definition: OgreMath.h:782
#define RESTRICT_ALIAS
Definition: OgrePlatform.h:448
void load(const Matrix4 &src)
Assumes src is aligned.
SimpleMatrixAf4x3(ArrayReal row0, ArrayReal row1, ArrayReal row2)
void setAll(const Matrix4 &m)
Sets all packed matrices to the same value as the scalar input matrix.
Cache-friendly array of 3-dimensional represented as a SoA array.
void store(Matrix4 *dst) const
Assumes dst is aligned.
Simple wrap up to load an AoS matrix 4x3 using SSE.
ArrayMatrixAf4x3(const ArrayMatrixAf4x3 &copy)
#define FORCEINLINE
Definition: OgrePlatform.h:104
static ArrayMatrixAf4x3 createAllFromMatrix4(const Matrix4 &m)
void store4x3(Matrix4 *dst) const
Assumes dst is aligned.
Cache-friendly array of Quaternion represented as a SoA array.