diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
new file mode 100644
index 0000000..a92b491
--- /dev/null
+++ b/.github/workflows/cmake.yml
@@ -0,0 +1,51 @@
+name: cmake
+
+on:
+  push:
+    paths:
+      - "**.c"
+      - "**.f"
+      - "**/CMakeLists.txt"
+      - ".github/workflows/cmake.yml"
+
+env:
+  CTEST_NO_TESTS_ACTION: error
+
+
+jobs:
+
+  unix:
+    timeout-minutes: 5
+
+    strategy:
+      matrix:
+        cc: [gcc-12, clang]
+        os: [ubuntu-latest, macos-latest]
+
+    runs-on: ${{ matrix.os }}
+
+    env:
+      CC: ${{ matrix.cc }}
+      FC: gfortran-12
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - run: cmake -B build
+
+    - run: cmake --build build --parallel
+
+    - run: ctest --test-dir build -V
+
+
+  windows-msvc:
+    runs-on: windows-latest
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - run: cmake -G "Visual Studio 17 2022" -B build -Dfortran=no
+
+    - run: cmake --build build --parallel --config Release
+
+    - run: ctest --test-dir build -V -C Release
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..b29ea54
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,69 @@
+cmake_minimum_required(VERSION 3.15)
+
+project(
+  STREAM
+  VERSION 1.0
+  DESCRIPTION "STREAM benchmark"
+  LANGUAGES C)
+
+enable_testing()
+
+option(fortran "build fortran version" ON)
+
+if(fortran)
+  enable_language(Fortran)
+endif()
+
+if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)")
+  add_compile_options("$<$<COMPILE_LANGUAGE:C>:-O3;-march=native;-Wall>")
+elseif(CMAKE_C_COMPILER_ID MATCHES "Intel")
+  add_compile_options("$<$<COMPILE_LANGUAGE:C>:-O3;-Wall>")
+  if(WIN32)
+    add_compile_options("$<$<COMPILE_LANGUAGE:C>:/QxHost>")
+  else()
+    add_compile_options("$<$<COMPILE_LANGUAGE:C>:-xHost>")
+  endif()
+elseif(CMAKE_C_COMPILER_ID STREQUAL "MSVC")
+  add_compile_options("$<$<COMPILE_LANGUAGE:C>:/W3>")
+endif()
+
+if(CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
+  add_compile_options("$<$<COMPILE_LANGUAGE:Fortran>:-O3;-march=native;-Werror=line-truncation;-Wall>")
+elseif(CMAKE_C_COMPILER_ID MATCHES "Intel")
+  add_compile_options("$<$<COMPILE_LANGUAGE:Fortran>:-O3;-warn>")
+  if(WIN32)
+    add_compile_options("$<$<COMPILE_LANGUAGE:Fortran>:/QxHost>")
+  else()
+    add_compile_options("$<$<COMPILE_LANGUAGE:Fortran>:-xHost>")
+  endif()
+endif()
+
+# Look for OpenMP support is found, link it to the executables
+# Note that if you are using clang on macOS, you will need to
+# install libomp via Homebrew and then set the following
+# environment variables:
+#   export OpenMP_ROOT=$(brew --prefix)/opt/libomp
+# see https://www.scivision.dev/cmake-openmp/ for more details
+
+find_package(OpenMP COMPONENTS C Fortran)
+
+# --- C stream_c
+
+add_executable(stream_c stream.c)
+target_link_libraries(stream_c PRIVATE $<$<BOOL:${OpenMP_C_FOUND}>:OpenMP::OpenMP_C>)
+
+add_test(NAME STREAM_C COMMAND stream_c)
+
+# --- Fortran stream_f
+
+if(fortran)
+add_executable(stream_f stream.f)
+target_link_libraries(stream_f PRIVATE $<$<BOOL:${OpenMP_Fortran_FOUND}>:OpenMP::OpenMP_Fortran>)
+
+add_test(NAME STREAM_Fortran COMMAND stream_f)
+endif()
+
+# --- ignore build directory
+if(NOT PROJECT_SOURCE_DIR STREQUAL PROJECT_BINARY_DIR)
+  file(GENERATE OUTPUT .gitignore CONTENT "*")
+endif()
diff --git a/Makefile b/Makefile
index 7746d86..518b758 100644
--- a/Makefile
+++ b/Makefile
@@ -6,10 +6,9 @@ FFLAGS = -O2 -fopenmp
 
 all: stream_f.exe stream_c.exe
 
-stream_f.exe: stream.f mysecond.o
-	$(CC) $(CFLAGS) -c mysecond.c
+stream_f.exe: stream.f
 	$(FC) $(FFLAGS) -c stream.f
-	$(FC) $(FFLAGS) stream.o mysecond.o -o stream_f.exe
+	$(FC) $(FFLAGS) stream.o -o stream_f.exe
 
 stream_c.exe: stream.c
 	$(CC) $(CFLAGS) stream.c -o stream_c.exe
diff --git a/mysecond.c b/mysecond.c
deleted file mode 100644
index d206a4a..0000000
--- a/mysecond.c
+++ /dev/null
@@ -1,27 +0,0 @@
-/* A gettimeofday routine to give access to the wall
-   clock timer on most UNIX-like systems.
-
-   This version defines two entry points -- with 
-   and without appended underscores, so it *should*
-   automagically link with FORTRAN */
-
-#include <sys/time.h>
-
-double mysecond()
-{
-/* struct timeval { long        tv_sec;
-            long        tv_usec;        };
-
-struct timezone { int   tz_minuteswest;
-             int        tz_dsttime;      };     */
-
-        struct timeval tp;
-        struct timezone tzp;
-        int i;
-
-        i = gettimeofday(&tp,&tzp);
-        return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 );
-}
-
-double mysecond_() {return mysecond();}
-
diff --git a/stream.c b/stream.c
index 9bbd6ce..c5df71b 100644
--- a/stream.c
+++ b/stream.c
@@ -41,12 +41,19 @@
 /*  5. Absolutely no warranty is expressed or implied.                   */
 /*-----------------------------------------------------------------------*/
 # include <stdio.h>
-# include <unistd.h>
 # include <math.h>
 # include <float.h>
-# include <limits.h>
+
+#ifdef _MSC_VER
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#define ssize_t ptrdiff_t
+#define CLOCK_MONOTONIC 0
+#else
 # include <sys/time.h>
+#endif
 
+#include <time.h>
 /*-----------------------------------------------------------------------
  * INSTRUCTIONS:
  *
@@ -62,13 +69,13 @@
  *           Example 1: One Xeon E3 with 8 MB L3 cache
  *               STREAM_ARRAY_SIZE should be >= 4 million, giving
  *               an array size of 30.5 MB and a total memory requirement
- *               of 91.5 MB.  
+ *               of 91.5 MB.
  *           Example 2: Two Xeon E5's with 20 MB L3 cache each (using OpenMP)
  *               STREAM_ARRAY_SIZE should be >= 20 million, giving
  *               an array size of 153 MB and a total memory requirement
- *               of 458 MB.  
+ *               of 458 MB.
  *       (b) The size should be large enough so that the 'timing calibration'
- *           output by the program is at least 20 clock-ticks.  
+ *           output by the program is at least 20 clock-ticks.
  *           Example: most versions of Windows have a 10 millisecond timer
  *               granularity.  20 "ticks" at 10 ms/tic is 200 milliseconds.
  *               If the chip is capable of 10 GB/s, it moves 2 GB in 200 msec.
@@ -77,7 +84,7 @@
  *      Version 5.10 increases the default array size from 2 million
  *          elements to 10 million elements in response to the increasing
  *          size of L3 caches.  The new default size is large enough for caches
- *          up to 20 MB. 
+ *          up to 20 MB.
  *      Version 5.10 changes the loop index variables from "register int"
  *          to "ssize_t", which allows array indices >2^32 (4 billion)
  *          on properly configured 64-bit systems.  Additional compiler options
@@ -113,8 +120,8 @@
 #endif
 
 /*  Users are allowed to modify the "OFFSET" variable, which *may* change the
- *         relative alignment of the arrays (though compilers may change the 
- *         effective offset by making the arrays non-contiguous on some systems). 
+ *         relative alignment of the arrays (though compilers may change the
+ *         effective offset by making the arrays non-contiguous on some systems).
  *      Use of non-zero values for OFFSET can be especially helpful if the
  *         STREAM_ARRAY_SIZE is set to a value close to a large power of 2.
  *      OFFSET can also be set on the compile line without changing the source
@@ -126,7 +133,7 @@
 
 /*
  *	3) Compile the code with optimization.  Many compilers generate
- *       unreasonably bad code before the optimizer tightens things up.  
+ *       unreasonably bad code before the optimizer tightens things up.
  *     If the results are unreasonably good, on the other hand, the
  *       optimizer might be too smart for me!
  *
@@ -137,7 +144,7 @@
  *     To use multiple cores, you need to tell the compiler to obey the OpenMP
  *       directives in the code.  This varies by compiler, but a common example is
  *            gcc -O -fopenmp stream.c -o stream_omp
- *       The environment variable OMP_NUM_THREADS allows runtime control of the 
+ *       The environment variable OMP_NUM_THREADS allows runtime control of the
  *         number of threads/cores used when the resulting "stream_omp" program
  *         is executed.
  *
@@ -146,9 +153,9 @@
  *     to the compile line.
  *     Note that this changes the minimum array sizes required --- see (1) above.
  *
- *     The preprocessor directive "TUNED" does not do much -- it simply causes the 
+ *     The preprocessor directive "TUNED" does not do much -- it simply causes the
  *       code to call separate functions to execute each kernel.  Trivial versions
- *       of these functions are provided, but they are *not* tuned -- they just 
+ *       of these functions are provided, but they are *not* tuned -- they just
  *       provide predefined interfaces to be replaced with tuned code.
  *
  *
@@ -193,7 +200,26 @@ static double	bytes[4] = {
     3 * sizeof(STREAM_TYPE) * STREAM_ARRAY_SIZE
     };
 
-extern double mysecond();
+#ifdef _MSC_VER
+int clock_gettime(int dummy, struct timespec *spec)
+{
+  /* https://stackoverflow.com/a/31335254 */
+   __int64 wintime;
+   GetSystemTimeAsFileTime((FILETIME*)&wintime);
+   wintime      -=116444736000000000i64;  //1jan1601 to 1jan1970
+   spec->tv_sec  =wintime / 10000000i64;           //seconds
+   spec->tv_nsec =wintime % 10000000i64 *100;      //nano-seconds
+   return 0;
+}
+#endif
+
+double mysecond()
+{
+	struct timespec tic;
+	clock_gettime(CLOCK_MONOTONIC, &tic);
+	return tic.tv_sec + tic.tv_nsec * 1e-9;
+}
+
 extern void checkSTREAMresults();
 #ifdef TUNED
 extern void tuned_STREAM_Copy();
@@ -233,19 +259,19 @@ main()
 #endif
 
     printf("Array size = %llu (elements), Offset = %d (elements)\n" , (unsigned long long) STREAM_ARRAY_SIZE, OFFSET);
-    printf("Memory per array = %.1f MiB (= %.1f GiB).\n", 
+    printf("Memory per array = %.1f MiB (= %.1f GiB).\n",
 	BytesPerWord * ( (double) STREAM_ARRAY_SIZE / 1024.0/1024.0),
 	BytesPerWord * ( (double) STREAM_ARRAY_SIZE / 1024.0/1024.0/1024.0));
     printf("Total memory required = %.1f MiB (= %.1f GiB).\n",
 	(3.0 * BytesPerWord) * ( (double) STREAM_ARRAY_SIZE / 1024.0/1024.),
 	(3.0 * BytesPerWord) * ( (double) STREAM_ARRAY_SIZE / 1024.0/1024./1024.));
     printf("Each kernel will be executed %d times.\n", NTIMES);
-    printf(" The *best* time for each kernel (excluding the first iteration)\n"); 
+    printf(" The *best* time for each kernel (excluding the first iteration)\n");
     printf(" will be used to compute the reported bandwidth.\n");
 
 #ifdef _OPENMP
     printf(HLINE);
-#pragma omp parallel 
+#pragma omp parallel
     {
 #pragma omp master
 	{
@@ -258,7 +284,7 @@ main()
 #ifdef _OPENMP
 	k = 0;
 #pragma omp parallel
-#pragma omp atomic 
+#pragma omp atomic
 		k++;
     printf ("Number of Threads counted = %i\n",k);
 #endif
@@ -273,7 +299,7 @@ main()
 
     printf(HLINE);
 
-    if  ( (quantum = checktick()) >= 1) 
+    if  ( (quantum = checktick()) >= 1)
 	printf("Your clock granularity/precision appears to be "
 	    "%d microseconds.\n", quantum);
     else {
@@ -282,10 +308,11 @@ main()
 	quantum = 1;
     }
 
-    t = mysecond();
+	t = mysecond();
 #pragma omp parallel for
     for (j = 0; j < STREAM_ARRAY_SIZE; j++)
 		a[j] = 2.0E0 * a[j];
+
     t = 1.0E6 * (mysecond() - t);
 
     printf("Each test below will take on the order"
@@ -300,7 +327,7 @@ main()
     printf("For best results, please be sure you know the\n");
     printf("precision of your system timer.\n");
     printf(HLINE);
-    
+
     /*	--- MAIN LOOP --- repeat test cases NTIMES times --- */
 
     scalar = 3.0;
@@ -315,7 +342,7 @@ main()
 	    c[j] = a[j];
 #endif
 	times[0][k] = mysecond() - times[0][k];
-	
+
 	times[1][k] = mysecond();
 #ifdef TUNED
         tuned_STREAM_Scale(scalar);
@@ -325,7 +352,7 @@ main()
 	    b[j] = scalar*c[j];
 #endif
 	times[1][k] = mysecond() - times[1][k];
-	
+
 	times[2][k] = mysecond();
 #ifdef TUNED
         tuned_STREAM_Add();
@@ -335,7 +362,7 @@ main()
 	    c[j] = a[j]+b[j];
 #endif
 	times[2][k] = mysecond() - times[2][k];
-	
+
 	times[3][k] = mysecond();
 #ifdef TUNED
         tuned_STREAM_Triad(scalar);
@@ -358,7 +385,7 @@ main()
 	    maxtime[j] = MAX(maxtime[j], times[j][k]);
 	    }
 	}
-    
+
     printf("Function    Best Rate MB/s  Avg time     Min time     Max time\n");
     for (j=0; j<4; j++) {
 		avgtime[j] = avgtime[j]/(double)(NTIMES-1);
@@ -411,22 +438,6 @@ checktick()
     }
 
 
-
-/* A gettimeofday routine to give access to the wall
-   clock timer on most UNIX-like systems.  */
-
-#include <sys/time.h>
-
-double mysecond()
-{
-        struct timeval tp;
-        struct timezone tzp;
-        int i;
-
-        i = gettimeofday(&tp,&tzp);
-        return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 );
-}
-
 #ifndef abs
 #define abs(a) ((a) >= 0 ? (a) : -(a))
 #endif
@@ -476,7 +487,7 @@ void checkSTREAMresults ()
 		epsilon = 1.e-13;
 	}
 	else {
-		printf("WEIRD: sizeof(STREAM_TYPE) = %lu\n",sizeof(STREAM_TYPE));
+		printf("WEIRD: sizeof(STREAM_TYPE) = %zu\n",sizeof(STREAM_TYPE));
 		epsilon = 1.e-6;
 	}
 
diff --git a/stream.f b/stream.f
index e93e453..6aa0dc0 100644
--- a/stream.f
+++ b/stream.f
@@ -25,7 +25,7 @@
 *         accordance with the STREAM Run Rules must be clearly
 *         labelled whenever they are published.  Examples of
 *         proper labelling include:
-*         "tuned STREAM benchmark results" 
+*         "tuned STREAM benchmark results"
 *         "based on a variant of the STREAM benchmark code"
 *         Other comparable, clear and reasonable labelling is
 *         acceptable.
@@ -48,11 +48,9 @@
 * Most of the content is currently hosted at:
 *          http://www.cs.virginia.edu/stream/
 *
-* BRIEF INSTRUCTIONS: 
+* BRIEF INSTRUCTIONS:
 *       0) See http://www.cs.virginia.edu/stream/ref.html for details
-*       1) STREAM requires a timing function called mysecond().
-*          Several examples are provided in this directory.
-*          "CPU" timers are only allowed for uniprocessor runs.
+*       1) "CPU" timers are only allowed for uniprocessor runs.
 *          "Wall-clock" timers are required for all multiprocessor runs.
 *       2) The STREAM array sizes must be set to size the test.
 *          The value "N" must be chosen so that each of the three
@@ -64,7 +62,7 @@
 *          that makes each array 4x larger than the last-level cache.
 *          The intent is to determine the *best* sustainable bandwidth
 *          available with this simple coding.  Of course, lower values
-*          are usually fairly easy to obtain on cached machines, but 
+*          are usually fairly easy to obtain on cached machines, but
 *          by keeping the test to the *best* results, the answers are
 *          easier to interpret.
 *          You may put the arrays in common or not, at your discretion.
@@ -91,48 +89,51 @@
 *=========================================================================
 *
       PROGRAM stream
-*     IMPLICIT NONE
+
+      use, intrinsic :: iso_fortran_env, only : int64
+      IMPLICIT NONE
 C     .. Parameters ..
-      INTEGER n,offset,ndim,ntimes
+      INTEGER :: n,offset,ndim,ntimes
       PARAMETER (n=20000000,offset=0,ndim=n+offset,ntimes=10)
 C     ..
 C     .. Local Scalars ..
-      DOUBLE PRECISION scalar,t
-      INTEGER j,k,nbpw,quantum
+      DOUBLE PRECISION :: scalar
+      integer(int64) :: t64, tic, toc
+      integer(int64) :: tick_rate
+      INTEGER ::j,k,nbpw
 C     ..
 C     .. Local Arrays ..
-      DOUBLE PRECISION maxtime(4),mintime(4),avgtime(4),
+      DOUBLE PRECISION :: maxtime(4),mintime(4),avgtime(4),
      $                 times(4,ntimes)
-      INTEGER bytes(4)
-      CHARACTER label(4)*11
+      INTEGER :: bytes(4)
+      CHARACTER(11) ::label(4)
 C     ..
-C     .. External Functions ..
-      DOUBLE PRECISION mysecond
-      INTEGER checktick,realsize
-      EXTERNAL mysecond,checktick,realsize
-!$    INTEGER omp_get_num_threads
-!$    EXTERNAL omp_get_num_threads
+
+!$    INTEGER, external :: omp_get_num_threads
 C     ..
 C     .. Intrinsic Functions ..
 C
-      INTRINSIC dble,max,min,nint,sqrt
+      INTRINSIC :: dble,max,min,nint,sqrt
 C     ..
 C     .. Arrays in Common ..
-      DOUBLE PRECISION a(ndim),b(ndim),c(ndim)
-C     ..
-C     .. Common blocks ..
-*     COMMON a,b,c
+      DOUBLE PRECISION, allocatable, dimension(:) :: a, b, c
 C     ..
 C     .. Data statements ..
-      DATA avgtime/4*0.0D0/,mintime/4*1.0D+36/,maxtime/4*0.0D0/
-      DATA label/'Copy:      ','Scale:     ','Add:       ',
-     $     'Triad:     '/
-      DATA bytes/2,2,3,3/
+      avgtime = 0
+      mintime = huge(0)
+      maxtime = 0
+      label = ['Copy:      ','Scale:     ','Add:       ','Triad:     ']
+      bytes = [2,2,3,3]
 C     ..
 
 *       --- SETUP --- determine precision and check timing ---
 
-      nbpw = realsize()
+      allocate(a(ndim), b(ndim), c(ndim))
+
+      call system_clock(COUNT_RATE=tick_rate)
+C     set timing to max precision, typically sub-microsecond
+
+      nbpw = storage_size(a)/8
 
       PRINT *,'----------------------------------------------'
       PRINT *,'STREAM Version $Revision: 5.6 $'
@@ -164,62 +165,66 @@ PROGRAM stream
           b(j) = 0.5D0
           c(j) = 0.0D0
    10 CONTINUE
-      t = mysecond()
+      call system_clock(count=tic)
 !$OMP PARALLEL DO
       DO 20 j = 1,n
           a(j) = 0.5d0*a(j)
    20 CONTINUE
-      t = mysecond() - t
+      call system_clock(count=toc)
+      t64 = toc - tic
       PRINT *,'----------------------------------------------------'
-      quantum = checktick()
-      WRITE (*,FMT=9000)
-     $  'Your clock granularity/precision appears to be ',quantum,
-     $  ' microseconds'
+
+      print '(a,f10.3)','Clock granularity/precision (microseconds):',
+     &   1/dble(tick_rate) * 1e6
       PRINT *,'----------------------------------------------------'
 
 *       --- MAIN LOOP --- repeat test cases NTIMES times ---
       scalar = 0.5d0*a(1)
       DO 70 k = 1,ntimes
 
-          t = mysecond()
-          a(1) = a(1) + t
+          call system_clock(count=tic)
+          a(1) = a(1) + tic
 !$OMP PARALLEL DO
           DO 30 j = 1,n
               c(j) = a(j)
    30     CONTINUE
-          t = mysecond() - t
-          c(n) = c(n) + t
-          times(1,k) = t
+          call system_clock(count=toc)
+          t64 = toc - tic
+          c(n) = c(n) + t64
+          times(1,k) = t64 / dble(tick_rate)
 
-          t = mysecond()
-          c(1) = c(1) + t
+          call system_clock(count=tic)
+          c(1) = c(1) + tic
 !$OMP PARALLEL DO
           DO 40 j = 1,n
               b(j) = scalar*c(j)
    40     CONTINUE
-          t = mysecond() - t
-          b(n) = b(n) + t
-          times(2,k) = t
+          call system_clock(count=toc)
+          t64 = toc - tic
+          b(n) = b(n) + t64
+          times(2,k) = t64 / dble(tick_rate)
 
-          t = mysecond()
-          a(1) = a(1) + t
+          call system_clock(count=tic)
+          a(1) = a(1) + tic
 !$OMP PARALLEL DO
           DO 50 j = 1,n
               c(j) = a(j) + b(j)
    50     CONTINUE
-          t = mysecond() - t
-          c(n) = c(n) + t
-          times(3,k) = t
+          call system_clock(count=toc)
+          t64 = toc - tic
+          c(n) = c(n) + t64
+          times(3,k) = t64 / dble(tick_rate)
 
-          t = mysecond()
-          b(1) = b(1) + t
+          call system_clock(count=tic)
+          b(1) = b(1) + tic
 !$OMP PARALLEL DO
           DO 60 j = 1,n
               a(j) = b(j) + scalar*c(j)
    60     CONTINUE
-          t = mysecond() - t
-          a(n) = a(n) + t
-          times(4,k) = t
+          call system_clock(count=toc)
+          t64 = toc - tic
+          a(n) = a(n) + t64
+          times(4,k) = t64 / dble(tick_rate)
    70 CONTINUE
 
 *       --- SUMMARY ---
@@ -240,171 +245,26 @@ PROGRAM stream
       CALL checksums (a,b,c,n,ntimes)
       PRINT *,'----------------------------------------------------'
 
- 9000 FORMAT (1x,a,i6,a)
  9010 FORMAT (1x,a,i10)
  9020 FORMAT (1x,a,i4,a)
  9030 FORMAT (1x,a,i3,a,a)
  9040 FORMAT ('Function',5x,'Rate (MB/s)  Avg time   Min time  Max time'
      $       )
  9050 FORMAT (a,4 (f10.4,2x))
-      END
-
-*-------------------------------------
-* INTEGER FUNCTION dblesize()
-*
-* A semi-portable way to determine the precision of DOUBLE PRECISION
-* in Fortran.
-* Here used to guess how many bytes of storage a DOUBLE PRECISION
-* number occupies.
-*
-      INTEGER FUNCTION realsize()
-*     IMPLICIT NONE
-
-C     .. Local Scalars ..
-      DOUBLE PRECISION result,test
-      INTEGER j,ndigits
-C     ..
-C     .. Local Arrays ..
-      DOUBLE PRECISION ref(30)
-C     ..
-C     .. External Subroutines ..
-      EXTERNAL confuse
-C     ..
-C     .. Intrinsic Functions ..
-      INTRINSIC abs,acos,log10,sqrt
-C     ..
-
-C       Test #1 - compare single(1.0d0+delta) to 1.0d0
-
-   10 DO 20 j = 1,30
-          ref(j) = 1.0d0 + 10.0d0** (-j)
-   20 CONTINUE
-
-      DO 30 j = 1,30
-          test = ref(j)
-          ndigits = j
-          CALL confuse(test,result)
-          IF (test.EQ.1.0D0) THEN
-              GO TO 40
-          END IF
-   30 CONTINUE
-      GO TO 50
-
-   40 WRITE (*,FMT='(a)')
-     $  '----------------------------------------------'
-      WRITE (*,FMT='(1x,a,i2,a)') 'Double precision appears to have ',
-     $  ndigits,' digits of accuracy'
-      IF (ndigits.LE.8) THEN
-          realsize = 4
-      ELSE
-          realsize = 8
-      END IF
-      WRITE (*,FMT='(1x,a,i1,a)') 'Assuming ',realsize,
-     $  ' bytes per DOUBLE PRECISION word'
-      WRITE (*,FMT='(a)')
-     $  '----------------------------------------------'
-      RETURN
-
-   50 PRINT *,'Hmmmm.  I am unable to determine the size.'
-      PRINT *,'Please enter the number of Bytes per DOUBLE PRECISION',
-     $  ' number : '
-      READ (*,FMT=*) realsize
-      IF (realsize.NE.4 .AND. realsize.NE.8) THEN
-          PRINT *,'Your answer ',realsize,' does not make sense.'
-          PRINT *,'Try again.'
-          PRINT *,'Please enter the number of Bytes per ',
-     $      'DOUBLE PRECISION number : '
-          READ (*,FMT=*) realsize
-      END IF
-      PRINT *,'You have manually entered a size of ',realsize,
-     $  ' bytes per DOUBLE PRECISION number'
-      WRITE (*,FMT='(a)')
-     $  '----------------------------------------------'
-      END
-
-      SUBROUTINE confuse(q,r)
-*     IMPLICIT NONE
-C     .. Scalar Arguments ..
-      DOUBLE PRECISION q,r
-C     ..
-C     .. Intrinsic Functions ..
-      INTRINSIC cos
-C     ..
-      r = cos(q)
-      RETURN
-      END
-
-* A semi-portable way to determine the clock granularity
-* Adapted from a code by John Henning of Digital Equipment Corporation
-*
-      INTEGER FUNCTION checktick()
-*     IMPLICIT NONE
-
-C     .. Parameters ..
-      INTEGER n
-      PARAMETER (n=20)
-C     ..
-C     .. Local Scalars ..
-      DOUBLE PRECISION t1,t2
-      INTEGER i,j,jmin
-C     ..
-C     .. Local Arrays ..
-      DOUBLE PRECISION timesfound(n)
-C     ..
-C     .. External Functions ..
-      DOUBLE PRECISION mysecond
-      EXTERNAL mysecond
-C     ..
-C     .. Intrinsic Functions ..
-      INTRINSIC max,min,nint
-C     ..
-      i = 0
-
-   10 t2 = mysecond()
-      IF (t2.EQ.t1) GO TO 10
-
-      t1 = t2
-      i = i + 1
-      timesfound(i) = t1
-      IF (i.LT.n) GO TO 10
-
-      jmin = 1000000
-      DO 20 i = 2,n
-          j = nint((timesfound(i)-timesfound(i-1))*1d6)
-          jmin = min(jmin,max(j,0))
-   20 CONTINUE
-
-      IF (jmin.GT.0) THEN
-          checktick = jmin
-      ELSE
-          PRINT *,'Your clock granularity appears to be less ',
-     $      'than one microsecond'
-          checktick = 1
-      END IF
-      RETURN
-
-*      PRINT 14, timesfound(1)*1d6
-*      DO 20 i=2,n
-*         PRINT 14, timesfound(i)*1d6,
-*     &       nint((timesfound(i)-timesfound(i-1))*1d6)
-*   14    FORMAT (1X, F18.4, 1X, i8)
-*   20 CONTINUE
-
-      END
-
 
+      contains
 
 
       SUBROUTINE checksums(a,b,c,n,ntimes)
-*     IMPLICIT NONE
+      IMPLICIT NONE
 C     ..
 C     .. Arguments ..
-      DOUBLE PRECISION a(*),b(*),c(*)
-      INTEGER n,ntimes
+      DOUBLE PRECISION, intent(in), dimension(:) :: a, b, c
+      INTEGER, intent(in) :: n, ntimes
 C     ..
 C     .. Local Scalars ..
-      DOUBLE PRECISION aa,bb,cc,scalar,suma,sumb,sumc,epsilon
-      INTEGER k
+      DOUBLE PRECISION :: aa,bb,cc,scalar,suma,sumb,sumc,epsilon
+      INTEGER :: k
 C     ..
 
 C     Repeat the main loop, but with scalars only.
@@ -430,9 +290,9 @@ SUBROUTINE checksums(a,b,c,n,ntimes)
 C     elements, which are modified using the timing results
 C     to confuse aggressive optimizers.
 
-      suma = 0.0d0
-      sumb = 0.0d0
-      sumc = 0.0d0
+      suma = 0
+      sumb = 0
+      sumc = 0
 !$OMP PARALLEL DO REDUCTION(+:suma,sumb,sumc)
       DO 110 j = 2,n-1
           suma = suma + a(j)
@@ -442,21 +302,26 @@ SUBROUTINE checksums(a,b,c,n,ntimes)
 
       epsilon = 1.D-6
 
-      IF (ABS(suma-aa)/suma .GT. epsilon) THEN
+      IF (ABS(suma-aa)/suma > epsilon) THEN
           PRINT *,'Failed Validation on array a()'
           PRINT *,'Target   Sum of a is = ',aa
           PRINT *,'Computed Sum of a is = ',suma
-      ELSEIF (ABS(sumb-bb)/sumb .GT. epsilon) THEN
+          error stop
+      ELSEIF (ABS(sumb-bb)/sumb > epsilon) THEN
           PRINT *,'Failed Validation on array b()'
           PRINT *,'Target   Sum of b is = ',bb
           PRINT *,'Computed Sum of b is = ',sumb
-      ELSEIF (ABS(sumc-cc)/sumc .GT. epsilon) THEN
+          error stop
+      ELSEIF (ABS(sumc-cc)/sumc > epsilon) THEN
           PRINT *,'Failed Validation on array c()'
           PRINT *,'Target   Sum of c is = ',cc
           PRINT *,'Computed Sum of c is = ',sumc
+          error stop
       ELSE
           PRINT *,'Solution Validates!'
       ENDIF
 
-      END
+      END subroutine checksums
+
 
+      END program stream