OGRE  2.0
Object-Oriented Graphics Rendering Engine
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
NEON/Single/OgreArrayMatrix4.h
Go to the documentation of this file.
1 /*
2 -----------------------------------------------------------------------------
3 This source file is part of OGRE
4  (Object-oriented Graphics Rendering Engine)
5 For the latest info, see http://www.ogre3d.org/
6 
7 Copyright (c) 2000-2014 Torus Knot Software Ltd
8 
9 Permission is hereby granted, free of charge, to any person obtaining a copy
10 of this software and associated documentation files (the "Software"), to deal
11 in the Software without restriction, including without limitation the rights
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom the Software is
14 furnished to do so, subject to the following conditions:
15 
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18 
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 THE SOFTWARE.
26 -----------------------------------------------------------------------------
27 */
28 #ifndef __NEON_ArrayMatrix4_H__
29 #define __NEON_ArrayMatrix4_H__
30 
31 #ifndef __ArrayMatrix4_H__
32  #error "Don't include this file directly. include Math/Array/OgreArrayMatrix4.h"
33 #endif
34 
35 #include "OgreMatrix4.h"
36 
37 #include "Math/Array/OgreMathlib.h"
40 
41 namespace Ogre
42 {
43  class SimpleMatrix4;
44 
69  class _OgreExport ArrayMatrix4
70  {
71  public:
72  ArrayReal mChunkBase[16];
73 
75  ArrayMatrix4( const ArrayMatrix4 &copy )
76  {
77  //Using a loop minimizes instruction count (better i-cache)
78  //Doing 4 at a time per iteration maximizes instruction pairing
79  //Unrolling the whole loop is i-cache unfriendly and
80  //becomes unmaintainable (16 lines!?)
81  for( size_t i=0; i<16; i+=4 )
82  {
83  mChunkBase[i ] = copy.mChunkBase[i ];
84  mChunkBase[i+1] = copy.mChunkBase[i+1];
85  mChunkBase[i+2] = copy.mChunkBase[i+2];
86  mChunkBase[i+3] = copy.mChunkBase[i+3];
87  }
88  }
89 
90  void getAsMatrix4( Matrix4 &out, size_t index ) const
91  {
92  //Be careful of not writing to these regions or else strict aliasing rule gets broken!!!
93  const Real * RESTRICT_ALIAS aliasedReal = reinterpret_cast<const Real*>( mChunkBase );
94  Real * RESTRICT_ALIAS matrix = reinterpret_cast<Real*>( out._m );
95  for( size_t i=0; i<16; i+=4 )
96  {
97  matrix[i ] = aliasedReal[ARRAY_PACKED_REALS * (i ) + index];
98  matrix[i+1] = aliasedReal[ARRAY_PACKED_REALS * (i+1) + index];
99  matrix[i+2] = aliasedReal[ARRAY_PACKED_REALS * (i+2) + index];
100  matrix[i+3] = aliasedReal[ARRAY_PACKED_REALS * (i+3) + index];
101  }
102  }
103 
106  Matrix4 getAsMatrix4( size_t index ) const
107  {
108  Matrix4 retVal;
109  getAsMatrix4( retVal, index );
110 
111  return retVal;
112  }
113 
114  void setFromMatrix4( const Matrix4 &m, size_t index )
115  {
116  Real * RESTRICT_ALIAS aliasedReal = reinterpret_cast<Real*>( mChunkBase );
117  const Real * RESTRICT_ALIAS matrix = reinterpret_cast<const Real*>( m._m );
118  for( size_t i=0; i<16; i+=4 )
119  {
120  aliasedReal[ARRAY_PACKED_REALS * (i ) + index] = matrix[i ];
121  aliasedReal[ARRAY_PACKED_REALS * (i+1) + index] = matrix[i+1];
122  aliasedReal[ARRAY_PACKED_REALS * (i+2) + index] = matrix[i+2];
123  aliasedReal[ARRAY_PACKED_REALS * (i+3) + index] = matrix[i+3];
124  }
125  }
126 
128  void setAll( const Matrix4 &m )
129  {
130  mChunkBase[0] = vdupq_n_f32( m._m[0] );
131  mChunkBase[1] = vdupq_n_f32( m._m[1] );
132  mChunkBase[2] = vdupq_n_f32( m._m[2] );
133  mChunkBase[3] = vdupq_n_f32( m._m[3] );
134  mChunkBase[4] = vdupq_n_f32( m._m[4] );
135  mChunkBase[5] = vdupq_n_f32( m._m[5] );
136  mChunkBase[6] = vdupq_n_f32( m._m[6] );
137  mChunkBase[7] = vdupq_n_f32( m._m[7] );
138  mChunkBase[8] = vdupq_n_f32( m._m[8] );
139  mChunkBase[9] = vdupq_n_f32( m._m[9] );
140  mChunkBase[10] = vdupq_n_f32( m._m[10] );
141  mChunkBase[11] = vdupq_n_f32( m._m[11] );
142  mChunkBase[12] = vdupq_n_f32( m._m[12] );
143  mChunkBase[13] = vdupq_n_f32( m._m[13] );
144  mChunkBase[14] = vdupq_n_f32( m._m[14] );
145  mChunkBase[15] = vdupq_n_f32( m._m[15] );
146  }
147 
149  {
150  ArrayMatrix4 retVal;
151  retVal.mChunkBase[0] = vdupq_n_f32( m._m[0] );
152  retVal.mChunkBase[1] = vdupq_n_f32( m._m[1] );
153  retVal.mChunkBase[2] = vdupq_n_f32( m._m[2] );
154  retVal.mChunkBase[3] = vdupq_n_f32( m._m[3] );
155  retVal.mChunkBase[4] = vdupq_n_f32( m._m[4] );
156  retVal.mChunkBase[5] = vdupq_n_f32( m._m[5] );
157  retVal.mChunkBase[6] = vdupq_n_f32( m._m[6] );
158  retVal.mChunkBase[7] = vdupq_n_f32( m._m[7] );
159  retVal.mChunkBase[8] = vdupq_n_f32( m._m[8] );
160  retVal.mChunkBase[9] = vdupq_n_f32( m._m[9] );
161  retVal.mChunkBase[10] = vdupq_n_f32( m._m[10] );
162  retVal.mChunkBase[11] = vdupq_n_f32( m._m[11] );
163  retVal.mChunkBase[12] = vdupq_n_f32( m._m[12] );
164  retVal.mChunkBase[13] = vdupq_n_f32( m._m[13] );
165  retVal.mChunkBase[14] = vdupq_n_f32( m._m[14] );
166  retVal.mChunkBase[15] = vdupq_n_f32( m._m[15] );
167  return retVal;
168  }
169 
175  inline ArrayMatrix4& operator = ( const ArrayMatrix4& rkMatrix )
176  {
177  for( size_t i=0; i<16; i+=4 )
178  {
179  mChunkBase[i ] = rkMatrix.mChunkBase[i ];
180  mChunkBase[i+1] = rkMatrix.mChunkBase[i+1];
181  mChunkBase[i+2] = rkMatrix.mChunkBase[i+2];
182  mChunkBase[i+3] = rkMatrix.mChunkBase[i+3];
183  }
184  return *this;
185  }
186 
187  // Concatenation
188  inline friend ArrayMatrix4 operator * ( const ArrayMatrix4 &lhs, const ArrayMatrix4 &rhs );
189 
190  inline ArrayVector3 operator * ( const ArrayVector3 &rhs ) const;
191 
194  inline void operator *= ( const ArrayMatrix4 &rhs );
195 
207  inline void fromQuaternion( const ArrayQuaternion &q );
208 
210  inline void makeTransform( const ArrayVector3 &position, const ArrayVector3 &scale,
211  const ArrayQuaternion &orientation );
212 
217  inline void storeToAoS( Matrix4 * RESTRICT_ALIAS dst ) const;
218 
223  inline void loadFromAoS( const Matrix4 * RESTRICT_ALIAS src );
224  inline void loadFromAoS( const SimpleMatrix4 * RESTRICT_ALIAS src );
225 
227  inline bool isAffine() const;
228 
229  static const ArrayMatrix4 IDENTITY;
230  };
231 
236  class _OgreExport SimpleMatrix4
237  {
238  public:
239  ArrayReal mChunkBase[4];
240 
242  void load( const Matrix4 &src )
243  {
244  mChunkBase[0] = vld1q_f32( src._m );
245  mChunkBase[1] = vld1q_f32( src._m+4 );
246  mChunkBase[2] = vld1q_f32( src._m+8 );
247  mChunkBase[3] = vld1q_f32( src._m+12 );
248  }
249  };
250 
254 }
255 
256 #include "OgreArrayMatrix4.inl"
257 
258 #endif
Class encapsulating a standard 4x4 homogeneous matrix.
Definition: OgreMatrix4.h:79
float Real
Software floating point type.
#define _OgreExport
Definition: OgrePlatform.h:255
Cache-friendly container of 4x4 matrices represented as a SoA array.
ArrayReal mChunkBase[16]
Real _m[16]
Definition: OgreMatrix4.h:91
Real ArrayReal
Simple wrap up to load an AoS matrix 4x4 using SSE.
Radian operator*(Real a, const Radian &b)
Definition: OgreMath.h:782
#define RESTRICT_ALIAS
Definition: OgrePlatform.h:448
ArrayMatrix4(const ArrayMatrix4 &copy)
Cache-friendly array of 3-dimensional represented as a SoA array.
void getAsMatrix4(Matrix4 &out, size_t index) const
void setAll(const Matrix4 &m)
Sets all packed matrices to the same value as the scalar input matrix.
#define ARRAY_PACKED_REALS
void setFromMatrix4(const Matrix4 &m, size_t index)
static ArrayMatrix4 createAllFromMatrix4(const Matrix4 &m)
Cache-friendly array of Quaternion represented as a SoA array.
Matrix4 getAsMatrix4(size_t index) const
STRONGLY Prefer using.
void load(const Matrix4 &src)
Assumes src is aligned.