Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 60 additions & 4 deletions stream.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,16 @@
/* program constitutes acceptance of these licensing restrictions. */
/* 5. Absolutely no warranty is expressed or implied. */
/*-----------------------------------------------------------------------*/
#ifdef OMPGPU
#ifdef OMPGPU_UNIFIED
#pragma omp requires unified_address
#pragma omp requires unified_shared_memory
#endif
#endif

# include <stdio.h>
# include <unistd.h>
# include <stdlib.h>
# include <math.h>
# include <float.h>
# include <limits.h>
Expand Down Expand Up @@ -91,7 +99,7 @@
* per array.
*/
#ifndef STREAM_ARRAY_SIZE
# define STREAM_ARRAY_SIZE 10000000
# define STREAM_ARRAY_SIZE 10000000l
#endif

/* 2) STREAM runs each kernel "NTIMES" times and reports the *best* result
Expand Down Expand Up @@ -176,9 +184,9 @@
#define STREAM_TYPE double
#endif

static STREAM_TYPE a[STREAM_ARRAY_SIZE+OFFSET],
b[STREAM_ARRAY_SIZE+OFFSET],
c[STREAM_ARRAY_SIZE+OFFSET];
static STREAM_TYPE* a = NULL;
static STREAM_TYPE* b = NULL;
static STREAM_TYPE* c = NULL;

static double avgtime[4] = {0}, maxtime[4] = {0},
mintime[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
Expand Down Expand Up @@ -214,6 +222,17 @@ main()
STREAM_TYPE scalar;
double t, times[4][NTIMES];

a= calloc((STREAM_ARRAY_SIZE+OFFSET),sizeof(STREAM_TYPE));
b= calloc((STREAM_ARRAY_SIZE+OFFSET),sizeof(STREAM_TYPE));
c= calloc((STREAM_ARRAY_SIZE+OFFSET),sizeof(STREAM_TYPE));

#ifdef OMPGPU
#ifndef OMPGPU_UNIFIED
#pragma omp target enter data map(to:a[0:(STREAM_ARRAY_SIZE+OFFSET)])
#pragma omp target enter data map(to:b[0:(STREAM_ARRAY_SIZE+OFFSET)])
#pragma omp target enter data map(to:c[0:(STREAM_ARRAY_SIZE+OFFSET)])
#endif
#endif
/* --- SETUP --- determine precision and check timing --- */

printf(HLINE);
Expand Down Expand Up @@ -245,6 +264,9 @@ main()

#ifdef _OPENMP
printf(HLINE);
#ifdef OMPGPU
printf ("Using accelerator\n");
#else
#pragma omp parallel
{
#pragma omp master
Expand All @@ -254,17 +276,24 @@ main()
}
}
#endif
#endif

#ifndef OMPGPU
#ifdef _OPENMP
k = 0;
#pragma omp parallel
#pragma omp atomic
k++;
printf ("Number of Threads counted = %i\n",k);
#endif
#endif

/* Get initial value for system clock. */
#ifdef OMPGPU
#pragma omp target teams distribute parallel for map(tofrom:a[0:(STREAM_ARRAY_SIZE+OFFSET)],b[0:(STREAM_ARRAY_SIZE+OFFSET)],c[0:(STREAM_ARRAY_SIZE+OFFSET)])
#else
#pragma omp parallel for
#endif
for (j=0; j<STREAM_ARRAY_SIZE; j++) {
a[j] = 1.0;
b[j] = 2.0;
Expand All @@ -283,7 +312,11 @@ main()
}

t = mysecond();
#ifdef OMPGPU
#pragma omp target teams distribute parallel for map(tofrom:a[0:(STREAM_ARRAY_SIZE+OFFSET)])
#else
#pragma omp parallel for
#endif
for (j = 0; j < STREAM_ARRAY_SIZE; j++)
a[j] = 2.0E0 * a[j];
t = 1.0E6 * (mysecond() - t);
Expand All @@ -310,7 +343,11 @@ main()
#ifdef TUNED
tuned_STREAM_Copy();
#else
#ifdef OMPGPU
#pragma omp target teams distribute parallel for map(tofrom:a[0:(STREAM_ARRAY_SIZE+OFFSET)],c[0:(STREAM_ARRAY_SIZE+OFFSET)])
#else
#pragma omp parallel for
#endif
for (j=0; j<STREAM_ARRAY_SIZE; j++)
c[j] = a[j];
#endif
Expand All @@ -320,7 +357,11 @@ main()
#ifdef TUNED
tuned_STREAM_Scale(scalar);
#else
#ifdef OMPGPU
#pragma omp target teams distribute parallel for map(tofrom:b[0:(STREAM_ARRAY_SIZE+OFFSET)],c[0:(STREAM_ARRAY_SIZE+OFFSET)])
#else
#pragma omp parallel for
#endif
for (j=0; j<STREAM_ARRAY_SIZE; j++)
b[j] = scalar*c[j];
#endif
Expand All @@ -330,7 +371,11 @@ main()
#ifdef TUNED
tuned_STREAM_Add();
#else
#ifdef OMPGPU
#pragma omp target teams distribute parallel for map(tofrom:a[0:(STREAM_ARRAY_SIZE+OFFSET)],b[0:(STREAM_ARRAY_SIZE+OFFSET)],c[0:(STREAM_ARRAY_SIZE+OFFSET)])
#else
#pragma omp parallel for
#endif
for (j=0; j<STREAM_ARRAY_SIZE; j++)
c[j] = a[j]+b[j];
#endif
Expand All @@ -340,7 +385,11 @@ main()
#ifdef TUNED
tuned_STREAM_Triad(scalar);
#else
#ifdef OMPGPU
#pragma omp target teams distribute parallel for map(tofrom:a[0:(STREAM_ARRAY_SIZE+OFFSET)],b[0:(STREAM_ARRAY_SIZE+OFFSET)],c[0:(STREAM_ARRAY_SIZE+OFFSET)])
#else
#pragma omp parallel for
#endif
for (j=0; j<STREAM_ARRAY_SIZE; j++)
a[j] = b[j]+scalar*c[j];
#endif
Expand Down Expand Up @@ -371,6 +420,13 @@ main()
}
printf(HLINE);

#ifdef OMPGPU
#ifndef OMPGPU_UNIFIED
#pragma omp target exit data map(from:a[0:(STREAM_ARRAY_SIZE+OFFSET)])
#pragma omp target exit data map(from:b[0:(STREAM_ARRAY_SIZE+OFFSET)])
#pragma omp target exit data map(from:c[0:(STREAM_ARRAY_SIZE+OFFSET)])
#endif
#endif
/* --- Check Results --- */
checkSTREAMresults();
printf(HLINE);
Expand Down