OGRE  2.0
Object-Oriented Graphics Rendering Engine
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
OgreArrayConfig.h
Go to the documentation of this file.
1 /*
2 -----------------------------------------------------------------------------
3 This source file is part of OGRE
4  (Object-oriented Graphics Rendering Engine)
5 For the latest info, see http://www.ogre3d.org/
6 
7 Copyright (c) 2000-2014 Torus Knot Software Ltd
8 
9 Permission is hereby granted, free of charge, to any person obtaining a copy
10 of this software and associated documentation files (the "Software"), to deal
11 in the Software without restriction, including without limitation the rights
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom the Software is
14 furnished to do so, subject to the following conditions:
15 
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18 
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 THE SOFTWARE.
26 -----------------------------------------------------------------------------
27 */
28 #ifndef __OgreArrayConfig_H__
29 #define __OgreArrayConfig_H__
30 
31 #include "OgreConfig.h"
33 
34 #if OGRE_USE_SIMD == 1
35  #if OGRE_CPU == OGRE_CPU_X86
36  //x86/x64 - SSE2
37  #if OGRE_DOUBLE_PRECISION == 1
38  #include <emmintrin.h>
39  #define ARRAY_PACKED_REALS 2
40  namespace Ogre {
41  typedef __m128d ArrayReal;
42  }
43  #else
44  #include <xmmintrin.h>
45  #include <emmintrin.h> //SSE Math library still needs SSE2
46  #define ARRAY_PACKED_REALS 4
47  namespace Ogre {
48  typedef __m128 ArrayReal;
49  typedef __m128 ArrayMaskR;
50 
51  #define ARRAY_REAL_ZERO _mm_setzero_ps()
52  #define ARRAY_INT_ZERO _mm_setzero_si128()
53 
54  class ArrayRadian;
55  }
56 
57  #define OGRE_PREFETCH_T0( x ) _mm_prefetch( x, _MM_HINT_T0 )
58  #define OGRE_PREFETCH_T1( x ) _mm_prefetch( x, _MM_HINT_T1 )
59  #define OGRE_PREFETCH_T2( x ) _mm_prefetch( x, _MM_HINT_T2 )
60  #define OGRE_PREFETCH_NTA( x ) _mm_prefetch( x, _MM_HINT_NTA )
61 
62  //Distance (in ArrayMemoryManager's slots) used to keep fetching data. This also
63  //means the memory manager needs to allocate extra memory for them.
64  #define OGRE_PREFETCH_SLOT_DISTANCE 4*ARRAY_PACKED_REALS //Must be multiple of ARRAY_PACKED_REALS
65  #endif
66 
67  namespace Ogre {
68  typedef __m128i ArrayInt;
69  typedef __m128i ArrayMaskI;
70  }
71 
73  #define _mm_madd_ps( a, b, c ) _mm_add_ps( c, _mm_mul_ps( a, b ) )
74  #define _mm_nmsub_ps( a, b, c ) _mm_sub_ps( c, _mm_mul_ps( a, b ) )
76 
78  #define CastRealToInt( x ) _mm_castps_si128( x )
79  #define CastIntToReal( x ) _mm_castsi128_ps( x )
80  #define CastArrayToReal( outFloatPtr, arraySimd ) _mm_store_ps( outFloatPtr, arraySimd )
82 
83  #elif OGRE_CPU == OGRE_CPU_ARM
84  // ARM - NEON
85  #include <arm_neon.h>
86  #if OGRE_DOUBLE_PRECISION == 1
87  #error Double precision with SIMD on ARM is not supported
88  #else
89  #define ARRAY_PACKED_REALS 4
90  namespace Ogre {
91  typedef float32x4_t ArrayReal;
92  typedef uint32x4_t ArrayMaskR;
93 
94  #define ARRAY_REAL_ZERO vdupq_n_f32( 0.0f )
95  #define ARRAY_INT_ZERO vdupq_n_u32( 0 )
96 
97  class ArrayRadian;
98  }
99 
100  // Make sure that we have the preload macro. Might not be available with some compilers.
101  #ifndef __pld
102  #define __pld(x) asm volatile ( "pld [%[addr]]\n" :: [addr] "r" (x) : "cc" );
103  #endif
104 
105  #if defined(__arm64__)
106  #define OGRE_PREFETCH_T0( x ) asm volatile ( "prfm pldl1keep, [%[addr]]\n" :: [addr] "r" (x) : "cc" );
107  #define OGRE_PREFETCH_T1( x ) asm volatile ( "prfm pldl2keep, [%[addr]]\n" :: [addr] "r" (x) : "cc" );
108  #define OGRE_PREFETCH_T2( x ) asm volatile ( "prfm pldl3keep, [%[addr]]\n" :: [addr] "r" (x) : "cc" );
109  #define OGRE_PREFETCH_NTA( x ) asm volatile ( "prfm pldl1strm, [%[addr]]\n" :: [addr] "r" (x) : "cc" );
110  #else
111  #define OGRE_PREFETCH_T0( x ) __pld(x)
112  #define OGRE_PREFETCH_T1( x ) __pld(x)
113  #define OGRE_PREFETCH_T2( x ) __pld(x)
114  #define OGRE_PREFETCH_NTA( x ) __pld(x)
115  #endif
116 
117  //Distance (in ArrayMemoryManager's slots) used to keep fetching data. This also
118  //means the memory manager needs to allocate extra memory for them.
119  #define OGRE_PREFETCH_SLOT_DISTANCE 4*ARRAY_PACKED_REALS //Must be multiple of ARRAY_PACKED_REALS
120  #endif
121 
122  namespace Ogre {
123  typedef int32x4_t ArrayInt;
124  typedef uint32x4_t ArrayMaskI;
125  }
126 
128  #define _mm_madd_ps( a, b, c ) vmlaq_f32( c, a, b )
129  #define _mm_nmsub_ps( a, b, c ) vmlsq_f32( c, a, b )
131 
133  //#define CastRealToInt( x ) vreinterpretq_s32_f32( x )
134  //#define CastIntToReal( x ) vreinterpretq_f32_s32( x )
135  #define CastRealToInt( x ) ( x )
136  #define CastIntToReal( x ) ( x )
137  #define CastArrayToReal( outFloatPtr, arraySimd ) vst1q_f32( outFloatPtr, arraySimd )
139 
140  #else
141  //Unsupported architecture, tell user to reconfigure. We could silently fallback to C,
142  //but this is very green code, and architecture may be x86 with a rare compiler.
143  #error "Unknown platform or platform not supported for SIMD. Build Ogre without OGRE_USE_SIMD"
144  #endif
145 #else
146  //No SIMD, use C implementation
147  #define ARRAY_PACKED_REALS 1
148  namespace Ogre {
149  typedef Real ArrayReal;
151  typedef Radian ArrayRadian;
152  typedef uint32 ArrayInt;
153  typedef bool ArrayMaskR;
154  typedef bool ArrayMaskI;
155 
156  //Do NOT I REPEAT DO NOT change these to static_cast<Ogre::Real>(x) and static_cast<int>(x)
157  //These are not conversions. They're reinterpretations!
158  #define CastIntToReal( x ) (x)
159  #define CastRealToInt( x ) (x)
160 
161  #define ogre_madd( a, b, c ) ( (c) + ( (a) * (b) ) )
162 
163  #define OGRE_PREFETCH_T0( x ) ((void)0)
164  #define OGRE_PREFETCH_T1( x ) ((void)0)
165  #define OGRE_PREFETCH_T2( x ) ((void)0)
166  #define OGRE_PREFETCH_NTA( x ) ((void)0)
167 
168  #define ARRAY_INT_ZERO 0
169 
171  #define CastArrayToReal( outFloatPtr, arraySimd ) (*(outFloatPtr) = arraySimd)
172 
173  //Distance (in ArrayMemoryManager's slots) used to keep fetching data. This also
174  //means the memory manager needs to allocate extra memory for them.
175  #define OGRE_PREFETCH_SLOT_DISTANCE 0 //Must be multiple of ARRAY_PACKED_REALS
176  }
177 #endif
178 
179 #endif
float Real
Software floating point type.
bool ArrayMaskR
unsigned int uint32
Definition: OgrePlatform.h:420
Real ArrayReal
Radian ArrayRadian
bool ArrayMaskI
Wrapper class which indicates a given angle value is in Radians.
Definition: OgreMath.h:49
uint32 ArrayInt