From 8ada36b01bdac6442ec1d9bedda9b996b138e5bc Mon Sep 17 00:00:00 2001 From: paulopieczarka Date: Tue, 13 Mar 2018 21:29:38 -0300 Subject: [PATCH] Atividade 1. --- atividade2/avx | Bin 0 -> 8816 bytes atividade2/avx.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 atividade2/avx create mode 100644 atividade2/avx.c diff --git a/atividade2/avx b/atividade2/avx new file mode 100644 index 0000000000000000000000000000000000000000..90eb9c5303ee2a840d6620fae27d9363df88b888 GIT binary patch literal 8816 zcmeHMZETy>6~4BUCQXCmmap`K)dL68mbgxvhLi$*osafS<+hlO>Fy9_%U_-ppM$|v1wBI!K4Yad8~z$j}c(2!hm|ux$iyp z>$fgKn)Z()o%h`H+;i^dx!3nzKi(1Ac&TG9&qoX$veDm9W0;DtOCP@a@38U^^`UP^$detKb6T)KA0GEDU2T z5sRk=jQ*j0Vr(=Sn=nSABjMrLU>teLRJea1Wf=qE*f0QcBs7sSM#8bU7)V5;fCooL z;|ey!=1^yAo3X~f*58y*7gB5d>(DAE#+gRHOnx2EXk2;zQ;TEsDeobLT^Ea8Lo@TR z($h;Q-zieiiRNm0rKDKn*dw6U(@R2BfT?^YuLW02L1xukaEb$_fCaa%^A-zkjdT6b zhn4zM zsV^d(ntCxK^(xY-i5E{u-AOui{o-+{zgXoJ14p4t3QcwUD)mF>voFywyzB)S%q8Y_ z>5~_NyMo=p9Xs`@?`1j~&!gdii2hVrT|G?n$&S|=Gc!L!fk!f-n)YZ0RwoNZbToc6 z^8;j_oqMqSeY8ayjht+J>&*1TiZUO;{{T!{yU#=F`Ml~S^+H|^kos<3)ku9WuXd3- zlUI95y_i?WNTvFzqQky}Xt~w?u#bj;5HA0)@3=y^J%@cK6!K2KoQy)UdE}fz{+dU! z3VAz^%o6ftefdk6t25J&?ydIWyJD+MhkRbC(06ahr%8prP4?v!CpK{UpWGndt+2 z%9fp(PV5QYgz!9FHuKz9xt#v}FDOdVe+W&!H`50oH2Lw&9>~Uw{?yjjBKq2D86N3{ zi0HtB!9Bsf!JeMZslK_>Gi@-dSuPqs>YRMzA${^4#N2FGcjv1a4VCM!PM@nJ@oHDO zCT4Df?Y{i3 zDSwy#{eQ{=rZ2ltfIc~^PtR=B9k1wbTuxn|FKq#rhIaOxHtRol_g^Z!=<-5Z5i@bb zbk8g0uK;pULADX<> zjuLX$y^V=-=<7d7zfT*=gFQ6*!H0sz>7#}EKRb<)qdlmw<|5QHvVG^_ zuA>}k#<%qC2v|qpXr(6AF(o-qEhoW}_8Fo~nIyO<(uCB z|1aNT?KNFe{hC*<$*dccyj96&CGCEj+uH8**6->|$5UzVI)9Tt(9oQgTH|Al&Hg}> zfBgy*_pS*vHV2vmwEKTO(T%0Wsa;`2_nPjKxJ#r;;$``GvKHn-be$||KTmMH728*c zQuE|Obo2Zc+s_vqC&hSGKAwv41^IPVj8}`SnwMhy8ljb#4;M}WbpErG3nv3!r`9|d zP7A$i{TAa3^Y0MF_#%<5(yWk7yKvIrb#Kjd;n#=Pdog~!pktq%TsWnaicc3-Bi}=< zJ}z2qYCRX@i-lGaUn1xjYbTdH2^FKXPRqn6xnk5jZ#l5L*xOJ0pA+LhTcH)X&^}0b zz_!2Ad5LiQzDvLR4(yiPPTnteO8m<4JfO6S#q z`~E74OQ`5`H*jweVk*Cq_?2<>0B{d}{dm6EwGQ~A66>y4;%rrrk4oGvGAd5^_w~qC z?0*5A>b1xD3&6dFV)LmC@P*FBLbt{Lp9r_?17*tNZQwL+=Tv*-85DVMfIW!M`)%d? zM%ian{I68ydX*IzJAjvZx7&G@ za^98ome_~;6RBh>Juu+!$79@7s@c4vm-liSFtsO?kwsy47Y%HgXTq43~e_&g1YiAp*EIIP=5fI*J zjSk%~OVitT2xD_-duuRcY~QqLXGgcu9c&GCP!00QvOk%Yjh7FWn#NC*^cZP$c5gKb z1J%8?jfSZ^+&7Gu&1fW?3ZszXhtNU+O(Gs0?21i9OFWJitYkF?6=gn>qQIgTQ_M(m z)EElKBUESSc34GXaU-3KViXD;pizL{mrSZ+{y54rWh!Cjj7Jm6*l4^+F_4#t4u{D>rN)L+!Y>EKkEDNa z6ly9ufiJ}=j7H_``J+QBGKV54gf-LE3{lgA(+~H<4Aq23V*O~tXo`vlLZRTt=nFp< z%Lo<#ukZg=((f;pc7EUIb0z-|#rF`dhit7xZy*(5Y#WmJ{K^0Gcooiq_fOX8EyU{2 z=hT!c(8q>K+Wp&sU&391>p$kyq~$RMaQn+8TDVUsRcu3o?ndm-=U(|r3H*6wI1iHg zZok*u2aN7c?BDCspb4Clr4R>Ak^kDqkN#(%yAS(km48$ms+FmlPX)ileW-*x3bp>Xo2vh4oP0HZOX z_7U;@IivjfUTo%?b#a>HB-rZD_kpbP?>(ANz`5w})F3fyyW_#5wR>iNCKhGcES9+DBMxC>e*OI2>3t%!~ z|Kn8}G?D+m^)9IuKKA-gAIN!DR$YP?`Qn~_&L>oj&H7?}o*3BtJle9r7rj&^%B9PBP! z00;}@5i`O5O*WkUSuR9A*KIX@Rz0{?v!TkgsYS7C^?y>e|9LfW^!~pZ%>ALgjEI(5 Ssrm!zfsD`X(ik6m{r?3sDQzAA literal 0 HcmV?d00001 diff --git a/atividade2/avx.c b/atividade2/avx.c new file mode 100644 index 0000000..f77254d --- /dev/null +++ b/atividade2/avx.c @@ -0,0 +1,54 @@ +/* +Exemplo de código AVX utilizando Intel instrinsics e gcc built-in functions + +Compile usando: +gcc avx.c -o avx -mavx -O3 + + +http://www.songho.ca/misc/sse/sse.html + +https://gcc.gnu.org/onlinedocs/gcc-4.9.2/gcc/X86-Built-in-Functions.html#X86-Built-in-Functions +https://gcc.gnu.org/onlinedocs/gcc-4.9.2/gcc/Vector-Extensions.html#Vector-Extensions +https://software.intel.com/sites/landingpage/IntrinsicsGuide/ +https://msdn.microsoft.com/en-us/library/26td21ds.aspx + +AVX +https://software.intel.com/en-us/articles/introduction-to-intel-advanced-vector-extensions/ + +*/ + +#include +#include // AVX + +__m256 negPixel(float *pixel) +{ + __m256 p = _mm256_load_ps(pixel); + __m256 max = _mm256_set1_ps(255); + return _mm256_sub_ps(max, p); +} + +int main (int argc, char *argv[]) +{ + float *im = (float*)_mm_malloc (sizeof(float) * 64, 32); //aloca um vetor de 32 bytes (256 bits) alinhado em endereços múltiplos de 16 bytes. + float *im2 = (float*)_mm_malloc (sizeof(float) * 64, 32); + + int i = 0; + for (i = 0; i < 64; ++i) { + im[i] = i; + } + + for(i = 0; i <= 56; i+=8) + { + __m256 pixel = negPixel(&im[i]); + _mm256_store_ps(&im2[i], pixel); + } + + for (i = 0; i < 64; ++i) { + printf("%f\t~~~~~~> %f\n", im[i], im2[i]); + } + + _mm_free(im); + _mm_free(im2); + + return 0; +}