diff --git a/scripts/mvf_read_benchmark.py b/scripts/mvf_read_benchmark.py index 4c007ca8..021763ab 100755 --- a/scripts/mvf_read_benchmark.py +++ b/scripts/mvf_read_benchmark.py @@ -34,6 +34,9 @@ parser.add_argument('--joint', action='store_true', help='Load vis, weights, flags together') parser.add_argument('--applycal', help='Calibration solutions to apply') parser.add_argument('--workers', type=int, help='Number of dask workers') +parser.add_argument('--blperchunk', type=int, help='Adjust chunks to specified number of baselines per chunk') +parser.add_argument('--chperchunk', type=int, help='Adjust chunks to specified number of channels per chunk') + args = parser.parse_args() logging.basicConfig(level='INFO', format='%(asctime)s [%(levelname)s] %(message)s') @@ -51,7 +54,24 @@ f.select(dumps=np.s_[:args.dumps]) # Trigger creation of the dask graphs, population of sensor cache for applycal etc _ = (f.vis[0, 0, 0], f.weights[0, 0, 0], f.flags[0, 0, 0]) +nblc = min(args.blperchunk if args.blperchunk else f.vis.dataset.chunksize[0], + f.vis.dataset.shape[0]) +nch = min(args.chperchunk if args.chperchunk else f.vis.dataset.chunksize[1], + f.vis.dataset.shape[1]) +cs = f.vis.dataset.chunksize +cs = tuple([nblc, nch, cs[2]]) +f.vis.dataset.rechunk(cs) +f.weights.dataset.rechunk(cs) +f.flags.dataset.rechunk(cs) +csMB = np.prod(tuple([*cs[0:2], args.time])) * (f.vis.dataset.nbytes // f.vis.dataset.size) / 1024.0**2 +visshpGB = f.vis.dataset.nbytes / 1024.0**3 +logging.info(f'Selected visibility chunk size {csMB:.2f} MiB of ' + f'total selection size {visshpGB:.2f} GiB') logging.info('Selection complete') +chunk_sizeB = f.vis.dataset.nbytes // f.vis.dataset.size + \ + f.weights.dataset.nbytes // f.weights.dataset.size + \ + f.flags.dataset.nbytes // f.weights.dataset.size + start = time.time() last_time = start for st in range(0, f.shape[0], args.time): @@ -65,8 +85,8 @@ current_time = time.time() elapsed = current_time - last_time last_time = current_time - size = np.prod(vis.shape) * 10 - logging.info('Loaded %d dumps (%.3f MB/s)', vis.shape[0], size / elapsed / 1e6) -size = np.prod(f.shape) * 10 + size = np.prod(vis.shape) * chunk_sizeB / 1024.**2 + logging.info('Loaded %d dumps (%.3f MiB/s)', vis.shape[0], size / elapsed) +size = np.prod(f.shape) * chunk_sizeB / 1024.**2 elapsed = time.time() - start -logging.info('Loaded %d bytes in %.3f s (%.3f MB/s)', size, elapsed, size / elapsed / 1e6) +logging.info('Loaded %d bytes in %.3f s (%.3f MiB/s)', size, elapsed, size / elapsed)