From 1d0a78fa65835f24b42a8bbba89e65caef457de0 Mon Sep 17 00:00:00 2001 From: RI2757 Date: Mon, 13 Apr 2015 20:11:59 -0300 Subject: [PATCH] =?UTF-8?q?Exerc=C3=ADcio=201?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- sse.c | 72 +++++++++++++++++++++++++---------------------------------- 1 file changed, 31 insertions(+), 41 deletions(-) diff --git a/sse.c b/sse.c index 211722f..16ac603 100644 --- a/sse.c +++ b/sse.c @@ -2,7 +2,7 @@ Exemplo de código SSE utilizando Intel instrinsics e gcc built-in functions Compile usando: -gcc sse.c -o sse -msse -msse4.2 -O3 +gcc sse.c -o sse -msse -msse4.2 http://www.songho.ca/misc/sse/sse.html @@ -24,7 +24,6 @@ AVX #include // SSE3 #include // SSE4.1 - #define VECTOR_SIZE 4 typedef float v4sf __attribute__ ((vector_size(sizeof(float)*VECTOR_SIZE))); @@ -34,7 +33,6 @@ typedef union f4vector float f[VECTOR_SIZE]; } f4vector; - void add_intel_intrinsics(float *a, float *b, float *c) { __m128 va = _mm_load_ps (a); @@ -52,60 +50,52 @@ void add_intel_intrinsics(float *a, float *b, float *c) */ } - - -v4sf add_gcc_builtin(v4sf a, v4sf b) -{ - return __builtin_ia32_addps (a, b); -} - - - int main (int argc, char *argv[]) { float *a __attribute__ ((aligned(16))) = (float*)malloc (sizeof(float) * 4); //aloca um vetor de 16bytes (128bits) alinhado em endereços múltiplos de 16bytes. float *b __attribute__ ((aligned(16))) = (float*)malloc (sizeof(float) * 4); - float *c __attribute__ ((aligned(16))) = (float*)malloc (sizeof(float) * 4); - + int i = 0; - + int j = 10; + int x, y, w=2,h=2, c=0; for (i = 0; i < 4; ++i) { - a[i] = i; - b[i] = i; + a[i] = 200+j; + b[i] = 10; + j = j +10; } - - printf("Intel SSE\n"); - - add_intel_intrinsics(a, b, c); + printf("vetor A \n"); for (i = 0; i < 4; ++i) { - printf("%f\n", c[i]); + printf("%.2f\n", a[i]); } - - free(a); - free(b); - free(c); - - printf("\nGCC Built-in Functions\n"); - - v4sf d, e, f; - - d = (v4sf){0, 1, 2, 3}; - e = (v4sf){0, 1, 2, 3}; - f = add_gcc_builtin(d, e); + printf("\nvetor B \n"); for (i = 0; i < 4; ++i) { - printf("%f\n", f[i]); + printf("%.2f\n", b[i]); } - - printf("\nGCC implicity vectorization\n"); - - f = d + e; +for(i=0;i<(h*w);i+=4){ + __m128 img= _mm_load_ps(a); + __m128 intensidade= _mm_load_ps(b); + __m128 r = _mm_add_ps(img,intensidade); + _mm_store_ps(b,r); + c+=4; + } + + printf("\nvetor Alterado\n"); + for(y=0;y