00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #ifndef EIGEN_PACKET_MATH_ALTIVEC_H
00026 #define EIGEN_PACKET_MATH_ALTIVEC_H
00027
00028 namespace Eigen {
00029
00030 namespace internal {
00031
00032 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
00033 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
00034 #endif
00035
00036 #ifndef EIGEN_HAS_FUSE_CJMADD
00037 #define EIGEN_HAS_FUSE_CJMADD 1
00038 #endif
00039
00040
00041 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
00042 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16
00043 #endif
00044
00045 typedef __vector float Packet4f;
00046 typedef __vector int Packet4i;
00047 typedef __vector unsigned int Packet4ui;
00048 typedef __vector __bool int Packet4bi;
00049 typedef __vector short int Packet8i;
00050 typedef __vector unsigned char Packet16uc;
00051
00052
00053
00054
00055 #define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \
00056 Packet4f p4f_##NAME = (Packet4f) vec_splat_s32(X)
00057
00058 #define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
00059 Packet4i p4i_##NAME = vec_splat_s32(X)
00060
00061 #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
00062 Packet4f p4f_##NAME = pset1<Packet4f>(X)
00063
00064 #define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
00065 Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1<int>(X))
00066
00067 #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
00068 Packet4i p4i_##NAME = pset1<Packet4i>(X)
00069
00070 #define DST_CHAN 1
00071 #define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride))
00072
00073
00074 static Packet4f p4f_COUNTDOWN = { 3.0, 2.0, 1.0, 0.0 };
00075 static Packet4i p4i_COUNTDOWN = { 3, 2, 1, 0 };
00076 static Packet16uc p16uc_REVERSE = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3};
00077 static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0);
00078 static Packet16uc p16uc_DUPLICATE = {0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7};
00079
00080 static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0);
00081 static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0);
00082 static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1);
00083 static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16);
00084 static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1);
00085 static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0);
00086 static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1);
00087
00088 template<> struct packet_traits<float> : default_packet_traits
00089 {
00090 typedef Packet4f type;
00091 enum {
00092 Vectorizable = 1,
00093 AlignedOnScalar = 1,
00094 size=4,
00095
00096
00097 HasSin = 0,
00098 HasCos = 0,
00099 HasLog = 0,
00100 HasExp = 0,
00101 HasSqrt = 0
00102 };
00103 };
00104 template<> struct packet_traits<int> : default_packet_traits
00105 {
00106 typedef Packet4i type;
00107 enum {
00108
00109 Vectorizable = 1,
00110 AlignedOnScalar = 1,
00111 size=4
00112 };
00113 };
00114
00115 template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4}; };
00116 template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4}; };
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162 template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
00163
00164 float EIGEN_ALIGN16 af[4];
00165 af[0] = from;
00166 Packet4f vc = vec_ld(0, af);
00167 vc = vec_splat(vc, 0);
00168 return vc;
00169 }
00170
00171 template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
00172 int EIGEN_ALIGN16 ai[4];
00173 ai[0] = from;
00174 Packet4i vc = vec_ld(0, ai);
00175 vc = vec_splat(vc, 0);
00176 return vc;
00177 }
00178
00179 template<> EIGEN_STRONG_INLINE Packet4f plset<float>(const float& a) { return vec_add(pset1<Packet4f>(a), p4f_COUNTDOWN); }
00180 template<> EIGEN_STRONG_INLINE Packet4i plset<int>(const int& a) { return vec_add(pset1<Packet4i>(a), p4i_COUNTDOWN); }
00181
00182 template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_add(a,b); }
00183 template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_add(a,b); }
00184
00185 template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_sub(a,b); }
00186 template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_sub(a,b); }
00187
00188 template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return psub<Packet4f>(p4f_ZERO, a); }
00189 template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return psub<Packet4i>(p4i_ZERO, a); }
00190
00191 template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b,p4f_ZERO); }
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228 template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
00229 {
00230 Packet4f t, y_0, y_1, res;
00231
00232
00233 y_0 = vec_re(b);
00234
00235
00236 t = vec_nmsub(y_0, b, p4f_ONE);
00237 y_1 = vec_madd(y_0, t, y_0);
00238
00239 res = vec_madd(a, y_1, p4f_ZERO);
00240 return res;
00241 }
00242
00243 template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& , const Packet4i& )
00244 { eigen_assert(false && "packet integer division are not supported by AltiVec");
00245 return pset1<Packet4i>(0);
00246 }
00247
00248
00249 template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a, b, c); }
00250 template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
00251
00252 template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); }
00253 template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
00254
00255 template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); }
00256 template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
00257
00258
00259 template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
00260 template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
00261
00262 template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_or(a, b); }
00263 template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); }
00264
00265 template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_xor(a, b); }
00266 template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); }
00267
00268 template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); }
00269 template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); }
00270
00271 template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
00272 template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); }
00273
00274 template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from)
00275 {
00276 EIGEN_DEBUG_ALIGNED_LOAD
00277
00278 Packet16uc MSQ, LSQ;
00279 Packet16uc mask;
00280 MSQ = vec_ld(0, (unsigned char *)from);
00281 LSQ = vec_ld(15, (unsigned char *)from);
00282 mask = vec_lvsl(0, from);
00283 return (Packet4f) vec_perm(MSQ, LSQ, mask);
00284
00285 }
00286 template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from)
00287 {
00288 EIGEN_DEBUG_ALIGNED_LOAD
00289
00290 Packet16uc MSQ, LSQ;
00291 Packet16uc mask;
00292 MSQ = vec_ld(0, (unsigned char *)from);
00293 LSQ = vec_ld(15, (unsigned char *)from);
00294 mask = vec_lvsl(0, from);
00295 return (Packet4i) vec_perm(MSQ, LSQ, mask);
00296 }
00297
00298 template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
00299 {
00300 Packet4f p;
00301 if((ptrdiff_t(&from) % 16) == 0) p = pload<Packet4f>(from);
00302 else p = ploadu<Packet4f>(from);
00303 return vec_perm(p, p, p16uc_DUPLICATE);
00304 }
00305 template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
00306 {
00307 Packet4i p;
00308 if((ptrdiff_t(&from) % 16) == 0) p = pload<Packet4i>(from);
00309 else p = ploadu<Packet4i>(from);
00310 return vec_perm(p, p, p16uc_DUPLICATE);
00311 }
00312
00313 template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); }
00314 template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); }
00315
00316 template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from)
00317 {
00318 EIGEN_DEBUG_UNALIGNED_STORE
00319
00320
00321 Packet16uc MSQ, LSQ, edges;
00322 Packet16uc edgeAlign, align;
00323
00324 MSQ = vec_ld(0, (unsigned char *)to);
00325 LSQ = vec_ld(15, (unsigned char *)to);
00326 edgeAlign = vec_lvsl(0, to);
00327 edges=vec_perm(LSQ,MSQ,edgeAlign);
00328 align = vec_lvsr( 0, to );
00329 MSQ = vec_perm(edges,(Packet16uc)from,align);
00330 LSQ = vec_perm((Packet16uc)from,edges,align);
00331 vec_st( LSQ, 15, (unsigned char *)to );
00332 vec_st( MSQ, 0, (unsigned char *)to );
00333 }
00334 template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from)
00335 {
00336 EIGEN_DEBUG_UNALIGNED_STORE
00337
00338
00339 Packet16uc MSQ, LSQ, edges;
00340 Packet16uc edgeAlign, align;
00341
00342 MSQ = vec_ld(0, (unsigned char *)to);
00343 LSQ = vec_ld(15, (unsigned char *)to);
00344 edgeAlign = vec_lvsl(0, to);
00345 edges=vec_perm(LSQ, MSQ, edgeAlign);
00346 align = vec_lvsr( 0, to );
00347 MSQ = vec_perm(edges, (Packet16uc) from, align);
00348 LSQ = vec_perm((Packet16uc) from, edges, align);
00349 vec_st( LSQ, 15, (unsigned char *)to );
00350 vec_st( MSQ, 0, (unsigned char *)to );
00351 }
00352
00353 template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); }
00354 template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); }
00355
00356 template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; }
00357 template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; }
00358
00359 template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { return (Packet4f)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE); }
00360 template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { return (Packet4i)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE); }
00361
00362 template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); }
00363 template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); }
00364
00365 template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
00366 {
00367 Packet4f b, sum;
00368 b = (Packet4f) vec_sld(a, a, 8);
00369 sum = vec_add(a, b);
00370 b = (Packet4f) vec_sld(sum, sum, 4);
00371 sum = vec_add(sum, b);
00372 return pfirst(sum);
00373 }
00374
00375 template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
00376 {
00377 Packet4f v[4], sum[4];
00378
00379
00380
00381
00382 v[0] = vec_mergeh(vecs[0], vecs[2]);
00383 v[1] = vec_mergel(vecs[0], vecs[2]);
00384 v[2] = vec_mergeh(vecs[1], vecs[3]);
00385 v[3] = vec_mergel(vecs[1], vecs[3]);
00386
00387 sum[0] = vec_mergeh(v[0], v[2]);
00388 sum[1] = vec_mergel(v[0], v[2]);
00389 sum[2] = vec_mergeh(v[1], v[3]);
00390 sum[3] = vec_mergel(v[1], v[3]);
00391
00392
00393
00394 sum[0] = vec_add(sum[0], sum[1]);
00395
00396 sum[1] = vec_add(sum[2], sum[3]);
00397
00398 sum[0] = vec_add(sum[0], sum[1]);
00399
00400 return sum[0];
00401 }
00402
00403 template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
00404 {
00405 Packet4i sum;
00406 sum = vec_sums(a, p4i_ZERO);
00407 sum = vec_sld(sum, p4i_ZERO, 12);
00408 return pfirst(sum);
00409 }
00410
00411 template<> EIGEN_STRONG_INLINE Packet4i preduxp<Packet4i>(const Packet4i* vecs)
00412 {
00413 Packet4i v[4], sum[4];
00414
00415
00416
00417
00418 v[0] = vec_mergeh(vecs[0], vecs[2]);
00419 v[1] = vec_mergel(vecs[0], vecs[2]);
00420 v[2] = vec_mergeh(vecs[1], vecs[3]);
00421 v[3] = vec_mergel(vecs[1], vecs[3]);
00422
00423 sum[0] = vec_mergeh(v[0], v[2]);
00424 sum[1] = vec_mergel(v[0], v[2]);
00425 sum[2] = vec_mergeh(v[1], v[3]);
00426 sum[3] = vec_mergel(v[1], v[3]);
00427
00428
00429
00430 sum[0] = vec_add(sum[0], sum[1]);
00431
00432 sum[1] = vec_add(sum[2], sum[3]);
00433
00434 sum[0] = vec_add(sum[0], sum[1]);
00435
00436 return sum[0];
00437 }
00438
00439
00440
00441 template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
00442 {
00443 Packet4f prod;
00444 prod = pmul(a, (Packet4f)vec_sld(a, a, 8));
00445 return pfirst(pmul(prod, (Packet4f)vec_sld(prod, prod, 4)));
00446 }
00447
00448 template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
00449 {
00450 EIGEN_ALIGN16 int aux[4];
00451 pstore(aux, a);
00452 return aux[0] * aux[1] * aux[2] * aux[3];
00453 }
00454
00455
00456 template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
00457 {
00458 Packet4f b, res;
00459 b = vec_min(a, vec_sld(a, a, 8));
00460 res = vec_min(b, vec_sld(b, b, 4));
00461 return pfirst(res);
00462 }
00463
00464 template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
00465 {
00466 Packet4i b, res;
00467 b = vec_min(a, vec_sld(a, a, 8));
00468 res = vec_min(b, vec_sld(b, b, 4));
00469 return pfirst(res);
00470 }
00471
00472
00473 template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
00474 {
00475 Packet4f b, res;
00476 b = vec_max(a, vec_sld(a, a, 8));
00477 res = vec_max(b, vec_sld(b, b, 4));
00478 return pfirst(res);
00479 }
00480
00481 template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
00482 {
00483 Packet4i b, res;
00484 b = vec_max(a, vec_sld(a, a, 8));
00485 res = vec_max(b, vec_sld(b, b, 4));
00486 return pfirst(res);
00487 }
00488
00489 template<int Offset>
00490 struct palign_impl<Offset,Packet4f>
00491 {
00492 static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
00493 {
00494 if (Offset!=0)
00495 first = vec_sld(first, second, Offset*4);
00496 }
00497 };
00498
00499 template<int Offset>
00500 struct palign_impl<Offset,Packet4i>
00501 {
00502 static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
00503 {
00504 if (Offset!=0)
00505 first = vec_sld(first, second, Offset*4);
00506 }
00507 };
00508
00509 }
00510
00511 }
00512
00513 #endif // EIGEN_PACKET_MATH_ALTIVEC_H