From fea4b30d07042d234be49b883151f4c58282ef36 Mon Sep 17 00:00:00 2001
From: "Anna M. Matsekh" <matsekh@ch-fe2.local>
Date: Thu, 24 Feb 2022 16:48:38 -0700
Subject: [PATCH 1/2] Intial commit of the automated testing scripts

---
 TestScripts/lpi-deck/lpi_2d_F6_test          | 978 ++++++++++++++++++
 TestScripts/lpi-deck/lpi_2d_F6_test-nx12     | 987 +++++++++++++++++++
 TestScripts/lpi-deck/lpi_2d_F6_test-nx24     | 987 +++++++++++++++++++
 TestScripts/lpi-deck/lpi_2d_F6_test-nx288    | 987 +++++++++++++++++++
 TestScripts/lpi-deck/lpi_2d_F6_test-nx48     | 987 +++++++++++++++++++
 TestScripts/lpi-deck/lpi_2d_F6_test-nx864    | 987 +++++++++++++++++++
 TestScripts/lpi-deck/lpi_2d_F6_test-nx96     | 987 +++++++++++++++++++
 TestScripts/lpi-deck/lpi_2d_F6_test.original | 978 ++++++++++++++++++
 TestScripts/vpic-build.sh                    | 232 +++++
 TestScripts/vpic-test.sh                     | 110 +++
 TestScripts/vpic_strong_scaling.sh           |  34 +
 TestScripts/vpic_thread_scaling.sh           |  36 +
 TestScripts/vpic_weak_scaling.sh             |  57 ++
 13 files changed, 8347 insertions(+)
 create mode 100755 TestScripts/lpi-deck/lpi_2d_F6_test
 create mode 100755 TestScripts/lpi-deck/lpi_2d_F6_test-nx12
 create mode 100755 TestScripts/lpi-deck/lpi_2d_F6_test-nx24
 create mode 100755 TestScripts/lpi-deck/lpi_2d_F6_test-nx288
 create mode 100755 TestScripts/lpi-deck/lpi_2d_F6_test-nx48
 create mode 100755 TestScripts/lpi-deck/lpi_2d_F6_test-nx864
 create mode 100755 TestScripts/lpi-deck/lpi_2d_F6_test-nx96
 create mode 100755 TestScripts/lpi-deck/lpi_2d_F6_test.original
 create mode 100755 TestScripts/vpic-build.sh
 create mode 100755 TestScripts/vpic-test.sh
 create mode 100755 TestScripts/vpic_strong_scaling.sh
 create mode 100755 TestScripts/vpic_thread_scaling.sh
 create mode 100755 TestScripts/vpic_weak_scaling.sh

diff --git a/TestScripts/lpi-deck/lpi_2d_F6_test b/TestScripts/lpi-deck/lpi_2d_F6_test
new file mode 100755
index 00000000..033ed8d9
--- /dev/null
+++ b/TestScripts/lpi-deck/lpi_2d_F6_test
@@ -0,0 +1,978 @@
+// Stress Tensor(e_hydro)_4/abs(Charge Density(e_hydro)+ 1.0e-8) - (Momentum Density(e_hydro)_Y/abs(Charge Density(e_hydro)+ 1.0e-8)) * (Current Density(e_hydro)_Y/(Charge Density(e_hydro) + 1.0e-8))
+
+//========================================================================
+//
+// LPI 3D deck - Linearly polarized (in y) plane wave incident from left 
+//               boundary 
+//
+// Adapted from Albright's Lightning 3D LPI deck.  
+// B. Albright, X-1-PTA; 28 Jan. 2007
+// Lin Yin, X-1-PTA, 23 Feb 2009, for Cerrillos test 
+// 
+// Executable creates its own directory structure.  Remove the old with: 
+//
+// rm -rf rundata ehydro Hhydro Hehydro restart poynting velocity particle field
+//========================================================================
+
+// Employ turnstiles to partially serialize the high-volume file writes. 
+// In this case, the restart dumps.  Set NUM_TURNSTILES to be the desired
+// number of simultaneous writes. 
+#define NUM_TURNSTILES 128
+
+begin_globals {
+  double e0;                   // peak amplitude of oscillating electric field
+  double omega;                // angular freq. of the beam
+  int field_interval;          // how often to dump field and hydro
+  int particle_interval;       // how often to dump particle data
+  int poynting_interval;       // how often to compute poynting flux on boundary
+  int restart_interval;        // how often to write restart data
+  int quota_check_interval;    // how often to check if quote exceeded
+  double quota_sec;            // run quota in sec
+  int rtoggle;                 // enable save of last 2 restart files for safety
+  int load_particles;          // were particles loaded? 
+  double topology_x;           // domain topology needed to normalize Poynting diagnostic 
+  double topology_y;
+  double topology_z;
+  int mobile_ions;
+  int H_present; 
+  int He_present; 
+
+  // Parameters for 3d Gaussian wave launch
+  double lambda;               
+  double waist;                // how wide the focused beam is
+  double width;                
+  double zcenter;              // center of beam at boundary in z 
+  double ycenter;              // center of beam at boundary in y
+  double xfocus;               // how far from boundary to focus
+  double mask;                 // # gaussian widths from beam center where I is nonzero
+
+  // Ponyting diagnostic flags - which output to turn on
+
+  // write_backscatter_only flag:  when this flag is nonzero, it means to only compute 
+  // poynting data for the lower-x surface.  This flag affects both the summed poynting 
+  // data as well as the surface data. 
+
+  int write_poynting_data;     // Whether to write poynting data to file (or just stdout)
+
+  int write_backscatter_only;  // nonzero means we only write backscatter diagnostic for fields
+
+  // write_poynting_sum is nonzero if you wish to write a file containing the integrated
+  // poynting data on one or more surfaces.  If write_backscatter_only is nonzero, then 
+  // the output file will be a time series of double precision numbers containing the 
+  // integrated poynting flux at that point in time.  If write_backscatter_only is zero,
+  // then for each time step, six double precision numbers are written, containing the 
+  // poynting flux through the lower x, upper x, lower y, upper y, lower z, and upper z
+  // surfaces. 
+
+  int write_poynting_sum;      // nonzero if we wish to write integrated Poynting data
+
+  // write_poynting_faces is probably useless, but I put it here anyway in case it's not. 
+  // When this flag is nonzero, it will print out the poynting flux at each of a 2D array 
+  // of points on the surface.  When this flag is turned on and write_backscatter_only is 
+  // nonzero, then only the 2D array of points on the lower-x boundary surface are written
+  // for each time step.  When this flag is turned on and write_bacscatter_only is 
+  // zero, then it will write 2D array data for the lower x, upper x, lower y, upper y, 
+  // lower z, upper z surfaces for each time step. 
+
+  int write_poynting_faces;    // nonzero if we wish to write Poynting data on sim boundaries 
+
+  // write_eb_faces is nonzero when we wish to get the raw e and b data at the boundary
+  // (e.g., to be used with a filter to distinguish between SRS and SBS backscatter).  
+  // When this flag is on and write_backscatter_only is nonzero, then only the 2D array
+  // of points on the lower-x boundary surface are written for each time step.  When 
+  // this flag is turned on and write_backscatteR_only is zero, then it will write 2D
+  // array data for the lower x, upper x, lower y, upper y, lower z, upper z surfaces for
+  // each time step.  When turned on, four files are produced: e1, e2, cb1, cb2.  The 
+  // values of the quantities printed depend on the face one is considering:  for the 
+  // x faces, e1 = ey, e2 = ez, cb1 = cby, cb2 = cbz.  Similarly for y and z surfaces, 
+  // but where 1 and 2 stand for (z,x) and (x,y) coordinates, respectively.  
+
+  int write_eb_faces;          // nonzero if we wish to write E and B data on sim boundaries
+
+  // Needed for movie files
+  float vthe;                   // vthe/c  
+  float vthi_He;                // vthi_He/c
+  float vthi_H;                 // vthi_H/c
+  int   velocity_interval;      // how frequently to dump binned velocity space data
+
+  // Dump parameters for standard VPIC output formats
+  DumpParameters fdParams;
+  DumpParameters hedParams;
+  DumpParameters hHdParams;
+  DumpParameters hHedParams;
+  std::vector<DumpParameters *> outputParams;
+};
+
+begin_initialization {
+  
+  // System of units
+  double ec         = 4.8032e-10;          // stat coulomb
+  double c_vac      = 2.99792458e10;       // cm/sec
+  double m_e        = 9.1094e-28;          // g
+  double k_b        = 1.6022e-12;          // erg/eV
+  double mec2       = m_e*c_vac*c_vac/k_b; 
+  double mpc2       = mec2*1836.0; 
+
+  double cfl_req    = 0.98;                // How close to Courant should we try to run
+  double damp       = 0;                   // How much radiation damping
+  double iv_thick   = 2;                   // Thickness of impermeable vacuum (in cells)
+
+  // Experimental parameters
+
+  double t_e               = 600;         // electron temperature, eV
+  double t_i               = 150;         // ion temperature, eV
+  double n_e_over_n_crit   = 0.05;         // n_e/n_crit
+  double vacuum_wavelength = 527 * 1e-7;   // third micron light (cm)
+  double laser_intensity   = 2.5e15 * 1e7;   // in ergs/cm^2 (note: 1 W = 1e7 ergs)
+
+  // Simulation parameters 
+
+#if 0
+  // 2048 processors
+  double Lx                = 4*35.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 2*6.0 * 1e-4;              
+  double Lz                = 2*6.0 * 1e-4;                 
+  double nx                = 8192;
+  double ny                = 512;
+  double nz                = 512; 
+  double topology_x        = 64;
+  double topology_y        = 4;
+  double topology_z        = 8;            
+  // single-processor mesh = 128 x 128 x 64
+#endif
+
+#if 0
+  // 14300x4 processors
+  double Lx                = 120.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 12.0 * 1e-4;              
+  double Lz                = 12.0 * 1e-4;                 
+  double nx                = 6292;
+  double ny                = 440;
+  double nz                = 440; 
+  double topology_x        = 143;
+  double topology_y        = 10;
+  double topology_z        = 10;            
+  // single-processor mesh = 44 x 44 x 44
+#endif
+
+#if 0
+  // 2*4096 processors
+  double Lx                = 4*35.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 2*6.0 * 1e-4;              
+  double Lz                = 2*6.0 * 1e-4;                 
+  double nx                = 8192;
+  double ny                = 512;
+  double nz                = 512; 
+  double topology_x        = 128;
+  double topology_y        = 8;
+  double topology_z        = 8;            
+  // single-processor mesh = 64 x 64 x 64
+#endif
+
+  // This choice of nx, ny, nz and topology values imply that the  
+  // grid is 102 x 42 x 42 cells.  The transverse size is slightly smaller than 
+  // in the original deck in order to enable more choices for down-sampling/striding 
+  // the field and hydro output.  (The original had 43 cells in each direction, which
+  // is prime and therefore not strideable with Ben's output format). 
+
+# if 0
+  // one processor problem, for debugging
+  Lx /= topology_x;  nx /= topology_x;
+  Ly /= topology_y;  ny /= topology_y;   
+  Lz /= topology_z;  nz /= topology_z; 
+
+  topology_x = 1; 
+  topology_y = 1;  
+  topology_z = 1; 
+# endif  
+
+# if 0
+  // twoprocessor problem, for debugging
+  Lx /= (topology_x/2);  nx /= (topology_x/2);
+  Ly /= topology_y;  ny /= topology_y;   
+  Lz /= topology_z;  nz /= topology_z; 
+
+  topology_x = 2; 
+  topology_y = 1;  
+  topology_z = 1; 
+# endif  
+
+// Run a smaller problem for debugging purposes
+#if 0
+  double Lx                = 120.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 1.20 * 1e-4;              
+  double Lz                = 1.20 * 1e-4;                 
+  double nx                = 6292;
+  double ny                = 44;
+  double nz                = 44; 
+  double topology_x        = 143;
+  double topology_y        = 1;
+  double topology_z        = 1;            
+
+  // nproc() x 1 x 1 : 
+  double topology          = nproc(); 
+  Lx         /= 143; Lx         *= topology; 
+  nx         /= 143; nx         *= topology;
+  topology_x /= 143; topology_x *= topology; 
+
+  // single-processor mesh = 44 x 44 x 44
+#endif
+
+  float box_size_x =  120.0* 1e-4;
+  float box_size_z =  12.0* 1e-4;
+  
+  int mobile_ions           = 1;      // Whether or not to push ions
+  double nppc               = 512;    // Ave. number of particles/cell in ea. species
+  //double nppc               = 32;    // Ave. number of particles/cell in ea. species
+  int load_particles        = 1;      // Flag to turn on/off particle load 
+  int He_present            = 1;  
+  int H_present             = 1;  
+  double f_He               = 1;    // Ratio of number density of He to total ion density
+  double f_H                = 1-f_He; // Ratio of number density of H  to total ion density
+  if ( f_He == 1 ) H_present = 0; 
+  if ( f_H == 1  ) He_present = 0; 
+
+// Here _He is N3+
+  // Precompute some useful variables. 
+  double A_H                = 1;
+  double Z_H                = 1;
+  double A_He               = 14;
+  double Z_He               = 3; 
+  double mic2_H             = mpc2*A_H;
+  double mic2_He            = mpc2*A_He;
+  double mime_H             = mic2_H /mec2; 
+  double mime_He            = mic2_He/mec2; 
+                            
+  double uthe               = sqrt(t_e/mec2);     // vthe/c
+  double uthi_H             = sqrt(t_i/mic2_H);   // vthi/c for H
+  double uthi_He            = sqrt(t_i/mic2_He);  // vthi/c for He
+
+  // Plasma skin deptth in cm
+  double delta = (vacuum_wavelength / (2*M_PI) ) / sqrt( n_e_over_n_crit ); 
+
+  double n_e   = c_vac*c_vac*m_e/(4*M_PI*ec*ec*delta*delta); // electron density in cm^-3
+  double debye = uthe*delta;                      // electron Debye length (cm)
+  double omega = sqrt( 1/n_e_over_n_crit );       // laser beam freq. in wpe
+
+  double nx                = 7776;
+  double ny                = 1;
+  double nz                = 549; 
+  
+#if 1
+  // DEBUG - run on 16 proc.  
+  nx = nx/81;
+  box_size_x /= 81.;  
+#endif
+
+  double hx = box_size_x/(delta*nx);  // in c/wpe
+  double hz = box_size_z/(delta*nz);  // in c/wpe
+  double hy = hz; 
+
+  double cell_size_x       = hx*delta/debye;      // Cell size in Debye lengths
+  double cell_size_z       = hz*delta/debye;
+//double cell_size_y       = hy*delta/debye;
+
+  double Lx                = nx*hx;   // in c/wpe
+  double Ly                = ny*hy;   // in c/wpe
+  double Lz                = nz*hz;   // in c/wpe
+
+  double topology_x        = nproc();
+  double topology_y        = 1;
+  double topology_z        = 1;            
+
+  double f_number          = 6;                   // f/# of beam
+  double lambda            = vacuum_wavelength/delta; // vacuum wavelength in c/wpe
+  double waist             = f_number*lambda;     // width of beam at focus in c/wpe
+  double xfocus            = Lx/2;                // in c/wpe
+  double ycenter           = 0;                   // center of spot in y on lhs boundary
+  double zcenter           = 0;                   // center of spot in z on lhs boundary
+  double mask              = 1.5;                 // set drive I=0 outside r>mask*width at lhs boundary
+  double width = waist*sqrt( 1 + (lambda*xfocus/(M_PI*waist*waist))*(lambda*xfocus/(M_PI*waist*waist))); 
+
+  // Peak instantaneous E field in "natural units" 
+  double e0    = sqrt( 2*laser_intensity / (m_e*c_vac*c_vac*c_vac*n_e) );  
+//e0                       = e0*(waist/width);    // at entrance (3D Gaussian) 
+  e0 = e0*sqrt(waist/width); // 2D, at entrance
+
+  double dt                = cfl_req*courant_length(Lx,Ly,Lz,nx,ny,nz); // in 1/wpe; n.b. c=1 in nat. units
+  double nsteps_cycle      = trunc_granular(2*M_PI/(dt*omega),1)+1; 
+  dt                       = 2*M_PI/omega/nsteps_cycle; // nsteps_cycle time steps in one laser cycle
+
+  double wpe1ps = 1e-12*c_vac/delta;
+
+  double t_stop            = 100*wpe1ps;               // Runtime in 1/wpe
+  int particle_interval    = 0; 
+  int poynting_interval    = int(M_PI/((omega+1.5)*dt));                  // Num. steps between dumping poynting flux
+  int field_interval       = int(0.01*wpe1ps/dt);         // Num. steps between saving field, hydro data
+  int velocity_interval    = int(0.1*wpe1ps/dt);        // How frequently to dump binned velocity space data
+  int restart_interval     = 10000000;       // Num. steps between restart dumps
+  int quota_check_interval = 20;
+
+  // Ben:  This quota thing gracefully terminates after writing a final restart after 
+  // 11.5 hours; if you want a longer time before shutdown, set this value larger.  If 
+  // you want the code to just run all weekend, then set it to very long (2400.*3500, e.g.) 
+
+  double quota_sec         = 23.7*3600;           // Run quota in sec. 
+
+  // Turn on integrated backscatter poynting diagnostic - right now there is a bug in this, so we 
+  // only write the integrated backscatter time history on the left face. 
+
+  int write_poynting_data = 1;                    // Whether to write poynting data to file (or just stdout)
+
+  int write_backscatter_only = 0;                 // Nonzero means only write lower x face
+  int write_poynting_sum   = 0;                   // Whether to write integrated Poynting data 
+  int write_poynting_faces = 0;                   // Whether to write poynting data on sim boundary faces
+  int write_eb_faces       = 0;                   // Whether to write e and b field data on sim boundary faces
+
+  double N_e               = nppc*nx*ny*nz;       // Number of macro electrons in box
+  double Np_e              = Lx*Ly*Lz;            // "Number" of "physical" electrons in box (nat. units)
+  double q_e               = -Np_e/N_e;           // Charge per macro electron
+  double N_i               = N_e;                 // Number of macro ions of each species in box 
+  double Np_i              = Np_e/(Z_H*f_H+Z_He*f_He); // "Number" of "physical" ions of each sp. in box
+  double qi_H              = Z_H *f_H *Np_i/N_i;  // Charge per H  macro ion
+  double qi_He             = Z_He*f_He*Np_i/N_i;  // Charge per He macro ion 
+  
+  // Print simulation parameters
+
+  sim_log("***** Simulation parameters *****");
+  sim_log("* Processors:                     "<<nproc());
+  sim_log("* Topology:                       "<<topology_x<<" "<<topology_y<<" "<<topology_z); 
+  sim_log("* nsteps_cycle =                 "<<nsteps_cycle);
+  sim_log("* Time step, max time, nsteps:    "<<dt<<" "<<t_stop<<" "<<int(t_stop/(dt))); 
+  sim_log("* Debye length, XYZ cell sizes:   "<<debye<<" "<<cell_size_x<<" "<<cell_size_z);
+  sim_log("* Real cell sizes (in Debyes):    "<<hx/uthe<<" "<<hy/uthe<<" "<<hz/uthe);
+  sim_log("* Lx, Ly, Lz =                    "<<Lx<<" "<<Ly<<" "<<Lz);
+  sim_log("* nx, ny, nz =                    "<<nx<<" "<<ny<<" "<<nz);
+  sim_log("* Charge/macro electron =         "<<q_e);
+  sim_log("* Average particles/processor:    "<<N_e/nproc());
+  sim_log("* Average particles/cell:         "<<nppc);
+  sim_log("* Omega_0, Omega_pe:              "<<omega<<" "<<1);
+  sim_log("* Plasma density, ne/nc:          "<<n_e<<" "<<n_e_over_n_crit);
+  sim_log("* Vac wavelength (nm):            "<<vacuum_wavelength*1e7);
+  sim_log("* I_laser (W/cm^2):               "<<laser_intensity/1e7);
+  sim_log("* T_e, T_i (eV)                   "<<t_e<<" "<<t_i);
+  sim_log("* m_e, m_H, m_He                  "<<"1 "<<mime_H<<" "<<mime_He);
+  sim_log("* Radiation damping:              "<<damp);
+  sim_log("* Fraction of courant limit:      "<<cfl_req);
+  sim_log("* vthe/c:                         "<<uthe);
+  sim_log("* vthi_H /c:                      "<<uthi_H);
+  sim_log("* vthe_He/c:                      "<<uthi_He);
+  sim_log("* emax at entrance:               "<<e0);
+  sim_log("* emax at waist:                  "<<e0/(waist/width));
+  sim_log("* Poynting interval:              "<<poynting_interval); 
+  sim_log("* field interval:                 "<<field_interval); 
+  sim_log("* restart interval:               "<<restart_interval); 
+  sim_log("* num vacuum edge grids:          "<<iv_thick);
+  sim_log("* width, waist, xfocus:           "<<width<<" "<<waist<<" "<<xfocus); 
+  sim_log("* ycenter, zcenter, mask:         "<<ycenter<<" "<<zcenter<<" "<<mask); 
+  sim_log("* field_interval                  "<<field_interval); 
+  sim_log("* velocity_interval               "<<velocity_interval); 
+  sim_log("* restart_interval:               "<<restart_interval); 
+  sim_log("* quota_check_interval:           "<<quota_check_interval); 
+  sim_log("* write_poynting_sum:             "<<(write_poynting_sum ? "Yes" : "No")); 
+  sim_log("* write_poynting_faces:           "<<(write_poynting_faces? "Yes" : "No")); 
+  sim_log("* write_eb_faces:                 "<<(write_eb_faces ? "Yes" : "No")); 
+  sim_log("* write_backscatter_only:         "<<(write_backscatter_only ? "Yes" : "No")); 
+  sim_log("*********************************");
+
+  // Set up high level simulation parameters
+
+  sim_log("Setting up high-level simulation parameters.");
+  num_step             = int(t_stop/(dt));
+  num_step             = 20000;
+  sim_log("num_step: " << num_step); 
+
+  status_interval      = 200; 
+  sync_shared_interval = status_interval/1;
+  clean_div_e_interval = status_interval/1;
+  clean_div_b_interval = status_interval/10; 
+  
+  // Turn off some of the spam
+  verbose = 1; 
+
+  // For maxwellian reinjectoin, we need more than the default number of 
+  // passes (3) through the boundary handler
+  // Note:  We have to adjust sort intervals for maximum performance on Cell. 
+  // Note: On 1 PE fails after 2094 steps. Increasing num_comm_round to 10
+  // allows it to run > 25,000 steps.
+  num_comm_round = 6;
+
+  global->e0                   = e0; 
+  global->omega                = omega; 
+  global->field_interval       = field_interval; 
+  global->particle_interval    = particle_interval;
+  global->poynting_interval    = poynting_interval;
+  global->restart_interval     = restart_interval;
+  global->quota_check_interval = quota_check_interval;
+  global->quota_sec            = quota_sec;
+  global->rtoggle              = 0;
+  global->load_particles       = load_particles;
+  global->mobile_ions          = mobile_ions; 
+  global->H_present            = H_present; 
+  global->He_present           = He_present; 
+  global->topology_x           = topology_x;  
+  global->topology_y           = topology_y;  
+  global->topology_z           = topology_z;  
+  global->xfocus               = xfocus;  
+  global->ycenter              = ycenter; 
+  global->zcenter              = zcenter; 
+  global->mask                 = mask; 
+  global->waist                = waist; 
+  global->width                = width; 
+  global->lambda               = lambda; 
+
+  global->write_poynting_data  = write_poynting_data;
+
+  global->write_poynting_sum   = write_poynting_sum; 
+  global->write_poynting_faces = write_poynting_faces; 
+  global->write_eb_faces       = write_eb_faces;      
+  global->write_backscatter_only = write_backscatter_only; 
+
+  global->vthe                 = uthe; 
+  global->vthi_H               = uthi_H; 
+  global->vthi_He              = uthi_He; 
+  global->velocity_interval    = velocity_interval; 
+
+  // Set up the species
+  // Allow additional local particles in case of non-uniformity.
+
+  // Set up grid
+  sim_log("Setting up computational grid."); 
+  grid->dx       = hx; 
+  grid->dy       = hy; 
+  grid->dz       = hz; 
+  grid->dt       = dt; 
+  grid->cvac     = 1;
+  grid->eps0     = 1; 
+
+  sim_log("Setting up absorbing mesh."); 
+  define_periodic_grid( 0,          -0.5*Ly,    -0.5*Lz,        // Low corner
+                         Lx,         0.5*Ly,     0.5*Lz,        // High corner 
+                         nx,         ny,         nz,            // Resolution
+                         topology_x, topology_y, topology_z);    // Topology
+
+  if ( rank() == 0) {
+    set_domain_field_bc( BOUNDARY(-1,0,0), absorb_fields );
+  }
+  if ( rank() == nproc() -1) {
+    set_domain_field_bc( BOUNDARY( 1,0,0), absorb_fields );
+  }
+
+    set_domain_field_bc( BOUNDARY( 0,0, 1), absorb_fields );
+    set_domain_field_bc( BOUNDARY( 0,0,-1), absorb_fields );
+
+  sim_log("Setting up species."); 
+  double max_local_np = 2.0*N_e/nproc(); 
+  double max_local_nm = max_local_np/10.0;
+  species_t * electron = define_species("electron", -1, 1, max_local_np, max_local_nm, 20, 1); 
+
+  // Start with two ion species.  We have option to go to Xe and Kr gas fills if 
+  // we need a higher ion/electron macroparticle ratio.  
+
+  species_t *ion_H, *ion_He; 
+  if ( mobile_ions ) {
+    if ( H_present  ) ion_H  = define_species("H",  Z_H,  mime_H,  max_local_np, max_local_nm, 100, 1); 
+    if ( He_present ) ion_He = define_species("He", Z_He, mime_He, max_local_np, max_local_nm, 100, 1); 
+  }
+
+  // From grid/partition.c: used to determine which domains are on edge
+# define RANK_TO_INDEX(rank,ix,iy,iz) BEGIN_PRIMITIVE {                   \
+    int _ix, _iy, _iz;                                                    \
+    _ix  = (rank);                        /* ix = ix+gpx*( iy+gpy*iz ) */ \
+    _iy  = _ix/int(global->topology_x);   /* iy = iy+gpy*iz */            \
+    _ix -= _iy*int(global->topology_x);   /* ix = ix */                   \
+    _iz  = _iy/int(global->topology_y);   /* iz = iz */                   \
+    _iy -= _iz*int(global->topology_y);   /* iy = iy */                   \
+    (ix) = _ix;                                                           \
+    (iy) = _iy;                                                           \
+    (iz) = _iz;                                                           \
+  } END_PRIMITIVE 
+
+  sim_log("Overriding x boundaries to absorb fields."); 
+  int ix, iy, iz;        // Domain location in mesh
+  RANK_TO_INDEX( int(rank()), ix, iy, iz ); 
+
+  // Set up Maxwellian reinjection B.C. 
+  sim_log("Setting up Maxwellian reinjection boundary condition."); 
+
+  particle_bc_t * maxwellian_reinjection = 
+    define_particle_bc( maxwellian_reflux( species_list, entropy ) ); 
+  set_reflux_temp( maxwellian_reinjection, electron, uthe, uthe );
+  if ( mobile_ions ) { 
+    if ( H_present  ) set_reflux_temp( maxwellian_reinjection, ion_H,  uthi_H,  uthi_H  ); 
+    if ( He_present ) set_reflux_temp( maxwellian_reinjection, ion_He, uthi_He, uthi_He ); 
+  }
+
+  // Set up materials
+  sim_log("Setting up materials."); 
+  define_material( "vacuum", 1 );
+  define_field_array( NULL, damp ); 
+ 
+  // Paint the simulation volume with materials and boundary conditions
+# define iv_region (   x<      hx*iv_thick || x>Lx  -hx*iv_thick  \
+                    || z<-Lz/2+hz*iv_thick || z>Lz/2-hz*iv_thick ) /* all boundaries are i.v. */ 
+  set_region_bc( iv_region, maxwellian_reinjection, maxwellian_reinjection, maxwellian_reinjection ); 
+
+  // Load particles 
+  if ( load_particles ) {
+    sim_log("Loading particles.");
+    // Fast load of particles--don't bother fixing artificial domain correlations
+    double xmin=grid->x0, xmax=grid->x1;
+    double ymin=grid->y0, ymax=grid->y1;
+    double zmin=grid->z0, zmax=grid->z1;
+    repeat( N_e/(topology_x*topology_y*topology_z) ) {
+      double x = uniform( rng(0), xmin, xmax );
+      double y = uniform( rng(0), ymin, ymax );
+      double z = uniform( rng(0), zmin, zmax );
+      if ( iv_region ) continue;           // Particle fell in iv_region.  Don't load.
+      // third to last arg is "weight," a positive number
+      inject_particle( electron, x, y, z,
+                       normal( rng(0), 0, uthe),
+                       normal( rng(0), 0, uthe),
+                       normal( rng(0), 0, uthe), -q_e, 0, 0 );
+      if ( mobile_ions ) {
+        if ( H_present )  // Inject an H macroion on top of macroelectron
+          inject_particle( ion_H, x, y, z, 
+                           normal( rng(0), 0, uthi_H), 
+                           normal( rng(0), 0, uthi_H), 
+                           normal( rng(0), 0, uthi_H), qi_H, 0, 0 ); 
+        if ( He_present ) // Inject an H macroion on top of macroelectron
+          inject_particle( ion_He, x, y, z, 
+                           normal( rng(0), 0, uthi_He), 
+                           normal( rng(0), 0, uthi_He), 
+                           normal( rng(0), 0, uthi_He), qi_He, 0, 0 ); 
+      }
+    }
+  }
+
+
+ /*--------------------------------------------------------------------------
+  * New dump definition
+  *------------------------------------------------------------------------*/
+
+ /*--------------------------------------------------------------------------
+  * Set data output format
+  * 
+  * This option allows the user to specify the data format for an output
+  * dump.  Legal settings are 'band' and 'band_interleave'.  Band-interleave
+  * format is the native storage format for data in VPIC.  For field data,
+  * this looks something like:
+  * 
+  *   ex0 ey0 ez0 div_e_err0 cbx0 ... ex1 ey1 ez1 div_e_err1 cbx1 ...
+  *   
+  * Banded data format stores all data of a particular state variable as a
+  * contiguous array, and is easier for ParaView to process efficiently. 
+  * Banded data looks like:
+  * 
+  *   ex0 ex1 ex2 ... exN ey0 ey1 ey2 ...
+  *   
+  *------------------------------------------------------------------------*/
+  sim_log( "Setting up hydro and field diagnostics." );
+
+  global->fdParams.format = band;
+  sim_log ( "Field output format          : band" );
+
+  global->hedParams.format = band;
+  sim_log ( "Electron hydro output format : band" );
+
+  global->hHdParams.format = band;
+  sim_log ( "Hydrogen hydro output format : band" );
+
+  global->hHedParams.format = band;
+  sim_log ( "Helium hydro output format   : band" );
+
+ /*--------------------------------------------------------------------------
+  * Set stride
+  * 
+  * This option allows data down-sampling at output.  Data are down-sampled
+  * in each dimension by the stride specified for that dimension.  For
+  * example, to down-sample the x-dimension of the field data by a factor
+  * of 2, i.e., half as many data will be output, select:
+  * 
+  *   global->fdParams.stride_x = 2;
+  *
+  * The following 2-D example shows down-sampling of a 7x7 grid (nx = 7,
+  * ny = 7.  With ghost-cell padding the actual extents of the grid are 9x9.
+  * Setting the strides in x and y to equal 2 results in an output grid of
+  * nx = 4, ny = 4, with actual extents 6x6.
+  *
+  * G G G G G G G G G
+  * G X X X X X X X G
+  * G X X X X X X X G         G G G G G G
+  * G X X X X X X X G         G X X X X G
+  * G X X X X X X X G   ==>   G X X X X G
+  * G X X X X X X X G         G X X X X G
+  * G X X X X X X X G         G X X X X G
+  * G X X X X X X X G         G G G G G G
+  * G G G G G G G G G
+  *
+  * Note that grid extents in each dimension must be evenly divisible by
+  * the stride for that dimension:
+  *
+  *   nx = 150;
+  *   global->fdParams.stride_x = 10; // legal -> 150/10 = 15
+  *
+  *   global->fdParams.stride_x = 8; // illegal!!! -> 150/8 = 18.75
+  *------------------------------------------------------------------------*/
+
+  // Strides for field and hydro arrays.  Note that here we have defined them 
+  // the same for fields and all hydro species; if desired, we could use different
+  // strides for each.   Also note that strides must divide evenly into the number 
+  // of cells in a given domain. 
+
+  // Define strides and test that they evenly divide into grid->nx, ny, nz
+  int stride_x = 1, stride_y = 1, stride_z = 1; 
+  if ( int(grid->nx)%stride_x ) ERROR(("Stride doesn't evenly divide grid->nx.")); 
+  if ( int(grid->ny)%stride_y ) ERROR(("Stride doesn't evenly divide grid->ny.")); 
+  if ( int(grid->nz)%stride_z ) ERROR(("Stride doesn't evenly divide grid->nz.")); 
+
+  //----------------------------------------------------------------------
+  // Fields
+
+  // relative path to fields data from global header
+  sprintf(global->fdParams.baseDir, "field");
+
+  // base file name for fields output
+  sprintf(global->fdParams.baseFileName, "fields");
+
+  // set field strides
+  global->fdParams.stride_x = stride_x;
+  global->fdParams.stride_y = stride_y;
+  global->fdParams.stride_z = stride_z;
+  sim_log ( "Fields x-stride " << global->fdParams.stride_x );
+  sim_log ( "Fields y-stride " << global->fdParams.stride_y );
+  sim_log ( "Fields z-stride " << global->fdParams.stride_z );
+
+  // add field parameters to list
+  global->outputParams.push_back(&global->fdParams);
+
+  //----------------------------------------------------------------------
+  // Electron hydro
+
+  // relative path to electron species data from global header
+  sprintf(global->hedParams.baseDir, "ehydro");
+
+  // base file name for fields output
+  sprintf(global->hedParams.baseFileName, "e_hydro");
+
+  // set electron hydro strides
+  global->hedParams.stride_x = stride_x;
+  global->hedParams.stride_y = stride_y;
+  global->hedParams.stride_z = stride_z;
+  sim_log ( "Electron species x-stride " << global->hedParams.stride_x );
+  sim_log ( "Electron species y-stride " << global->hedParams.stride_y );
+  sim_log ( "Electron species z-stride " << global->hedParams.stride_z );
+
+  // add electron hydro parameters to list
+  global->outputParams.push_back(&global->hedParams);
+
+  //----------------------------------------------------------------------
+  // Hydrogen hydro
+
+  // relative path to electron species data from global header
+  sprintf(global->hHdParams.baseDir, "Hhydro");
+
+  // base file name for fields output
+  sprintf(global->hHdParams.baseFileName, "H_hydro");
+
+  // set hydrogen hydro strides
+  global->hHdParams.stride_x = stride_x;
+  global->hHdParams.stride_y = stride_y;
+  global->hHdParams.stride_z = stride_z;
+  sim_log ( "Ion species x-stride " << global->hHdParams.stride_x );
+  sim_log ( "Ion species y-stride " << global->hHdParams.stride_y );
+  sim_log ( "Ion species z-stride " << global->hHdParams.stride_z );
+
+  // add hydrogen hydro parameters to list
+  global->outputParams.push_back(&global->hHdParams);
+
+  //----------------------------------------------------------------------
+  // Helium hydro
+
+  // relative path to electron species data from global header
+  sprintf(global->hHedParams.baseDir, "Hehydro");
+
+  // base file name for fields output
+  sprintf(global->hHedParams.baseFileName, "He_hydro");
+
+  // set helium hydro strides
+  global->hHedParams.stride_x = stride_x;
+  global->hHedParams.stride_y = stride_y;
+  global->hHedParams.stride_z = stride_z;
+  sim_log ( "Ion species x-stride " << global->hHedParams.stride_x );
+  sim_log ( "Ion species y-stride " << global->hHedParams.stride_y );
+  sim_log ( "Ion species z-stride " << global->hHedParams.stride_z );
+
+  // add helium hydro parameters to list
+  global->outputParams.push_back(&global->hHedParams);
+
+ /*-----------------------------------------------------------------------
+  * Set output fields
+  *
+  * It is now possible to select which state-variables are output on a
+  * per-dump basis.  Variables are selected by passing an or-list of
+  * state-variables by name.  For example, to only output the x-component
+  * of the electric field and the y-component of the magnetic field, the
+  * user would call output_variables like:
+  *
+  *   global->fdParams.output_variables( ex | cby );
+  *
+  * NOTE: OUTPUT VARIABLES ARE ONLY USED FOR THE BANDED FORMAT.  IF THE
+  * FORMAT IS BAND-INTERLEAVE, ALL VARIABLES ARE OUTPUT AND CALLS TO
+  * 'output_variables' WILL HAVE NO EFFECT.
+  *
+  * ALSO: DEFAULT OUTPUT IS NONE!  THIS IS DUE TO THE WAY THAT VPIC
+  * HANDLES GLOBAL VARIABLES IN THE INPUT DECK AND IS UNAVOIDABLE.
+  *
+  * For convenience, the output variable 'all' is defined:
+  *
+  *   global->fdParams.output_variables( all );
+  *------------------------------------------------------------------------*/
+ /* CUT AND PASTE AS A STARTING POINT
+  * REMEMBER TO ADD APPROPRIATE GLOBAL DUMPPARAMETERS VARIABLE
+
+   output_variables( all );
+
+   output_variables( electric | div_e_err | magnetic | div_b_err |
+                     tca      | rhob      | current  | rhof |
+                     emat     | nmat      | fmat     | cmat );
+
+   output_variables( current_density  | charge_density |
+                     momentum_density | ke_density     | stress_tensor );
+  */
+
+  //global->fdParams.output_variables( all );
+  global->fdParams.output_variables( electric | magnetic );
+
+  //global->hedParams.output_variables( all );
+  //global->hedParams.output_variables( current_density | momentum_density );
+  global->hedParams.output_variables(  current_density  | charge_density |
+                                       momentum_density | ke_density |
+                                       stress_tensor );
+  global->hHdParams.output_variables(  current_density  | charge_density |
+                                       momentum_density | ke_density |
+                                       stress_tensor );
+  global->hHedParams.output_variables( current_density  | charge_density |
+                                       momentum_density | ke_density |
+                                       stress_tensor );
+
+ /*--------------------------------------------------------------------------
+  * Convenience functions for simlog output
+  *------------------------------------------------------------------------*/
+  char varlist[256];
+
+  create_field_list(varlist, global->fdParams);
+  sim_log ( "Fields variable list: " << varlist );
+
+  create_hydro_list(varlist, global->hedParams);
+  sim_log ( "Electron species variable list: " << varlist );
+
+  create_hydro_list(varlist, global->hHdParams);
+  sim_log ( "Ion species variable list: " << varlist );
+
+ /*------------------------------------------------------------------------*/
+
+  sim_log("***Finished with user-specified initialization ***"); 
+
+  // Upon completion of the initialization, the following occurs:
+  // - The synchronization error (tang E, norm B) is computed between domains
+  //   and tang E / norm B are synchronized by averaging where discrepancies
+  //   are encountered.
+  // - The initial divergence error of the magnetic field is computed and
+  //   one pass of cleaning is done (for good measure)
+  // - The bound charge density necessary to give the simulation an initially
+  //   clean divergence e is computed.
+  // - The particle momentum is uncentered from u_0 to u_{-1/2}
+  // - The user diagnostics are called on the initial state
+  // - The physics loop is started
+  //
+  // The physics loop consists of:
+  // - Advance particles from x_0,u_{-1/2} to x_1,u_{1/2}
+  // - User particle injection at x_{1-age}, u_{1/2} (use inject_particles)
+  // - User current injection (adjust field(x,y,z).jfx, jfy, jfz)
+  // - Advance B from B_0 to B_{1/2}
+  // - Advance E from E_0 to E_1
+  // - User field injection to E_1 (adjust field(x,y,z).ex,ey,ez,cbx,cby,cbz)
+  // - Advance B from B_{1/2} to B_1
+  // - (periodically) Divergence clean electric field
+  // - (periodically) Divergence clean magnetic field
+  // - (periodically) Synchronize shared tang e and norm b
+  // - Increment the time step
+  // - Call user diagnostics
+  // - (periodically) Print a status message
+} 
+
+
+begin_diagnostics {
+
+  int mobile_ions=global->mobile_ions,
+      H_present=global->H_present,
+      He_present=global->He_present;
+
+  if ( step()%200==0 ) sim_log("Time step: "<<step());
+
+# define should_dump(x) \
+  (global->x##_interval>0 && remainder(step(),global->x##_interval)==0)
+
+  // Do a mkdir at time t=0 to ensure we have all the directories we need
+  if ( step()==0 ) {
+    dump_mkdir("rundata"); 
+    dump_mkdir("fft"); 
+    dump_mkdir("field"); 
+    dump_mkdir("ehydro"); 
+    dump_mkdir("Hhydro"); 
+    dump_mkdir("Hehydro"); 
+    dump_mkdir("restart"); 
+    dump_mkdir("particle"); 
+    dump_mkdir("poynting"); 
+    dump_mkdir("velocity"); 
+
+    // Turn off rundata for now
+    // dump_grid("rundata/grid");
+    // dump_materials("rundata/materials");
+    // dump_species("rundata/species");
+    global_header("global", global->outputParams);
+
+  }
+
+  // Field and hydro data
+
+  if ( should_dump(field) ) {
+    field_dump( global->fdParams ); 
+
+#if 1  
+    if ( global->load_particles ) {
+      hydro_dump( "electron", global->hedParams );
+      if ( global->mobile_ions ) {
+        if ( global->H_present  ) hydro_dump( "H",  global->hHdParams );
+        if ( global->He_present ) hydro_dump( "He", global->hHedParams );
+      }
+    }
+#endif
+
+  }
+
+  // Particle dump data
+#if 0  
+  if ( should_dump(particle) && global->load_particles ) {
+    dump_particles( "electron", "particle/eparticle" );
+    if ( global->mobile_ions ) {
+      if ( global->H_present  ) dump_particles( "H",  "particle/Hparticle" );
+      if ( global->He_present ) dump_particles( "He", "particle/Heparticle" );
+    }
+  } 
+#endif
+
+  
+#define ALLOCATE(A,LEN,TYPE)                                             \
+  if ( !((A)=(TYPE *)malloc((size_t)(LEN)*sizeof(TYPE))) ) ERROR(("Cannot allocate."));
+
+  //--------------------------------------------------------------------------- 
+
+  // Restart dump filenames are by default tagged with the current timestep.
+  // If you do not want to do this add "0" to the dump_restart arguments. One
+  // reason to not tag the dumps is if you want only one restart dump (the most
+  // recent one) to be stored at a time to conserve on file space. Note: A
+  // restart dump should be the _very_ _last_ dump made in a diagnostics
+  // routine. If not, the diagnostics performed after the restart dump but
+  // before the next timestep will be missed on a restart. Restart dumps are
+  // in a binary format. Each rank makes a restart dump.
+
+  // Restarting from restart files is accomplished by running the executable 
+  // with "restart restart" as additional command line arguments.  The executable
+  // must be identical to that used to generate the restart dumps or else 
+  // the behavior may be unpredictable. 
+
+  // Note:  restart is not currently working with custom boundary conditions
+  // (such as the reflux condition) and has not been tested with emission 
+  // turned on.  
+  
+  if ( step()>0 && should_dump(restart) ) {
+    static const char * restart_fbase[2] = { "restart/restart0", "restart/restart1" };
+
+    //BEGIN_TURNSTILE(NUM_TURNSTILES); 
+      checkpt( restart_fbase[global->rtoggle], step() );
+    //END_TURNSTILE;
+    global->rtoggle^=1;
+  }
+
+  // Shut down simulation when wall clock time exceeds global->quota_sec. 
+  // Note that the mp_elapsed() is guaranteed to return the same value for all
+  // processors (i.e., elapsed time on proc #0), and therefore the abort will 
+  // be synchronized across processors. 
+
+  if ( step()>0 && global->quota_check_interval && (step()%global->quota_check_interval)==0 ) {
+    if ( uptime() > global->quota_sec ) {
+
+      //BEGIN_TURNSTILE(NUM_TURNSTILES); 
+        checkpt( "restart/restart", step() );
+      //END_TURNSTILE;
+
+      sim_log( "Restart dump restart completed." );
+      sim_log( "Allowed runtime exceeded for this job.  Terminating." );
+      mp_barrier(); // Just to be safe
+      halt_mp();
+      exit(0);
+    }
+  }
+
+} 
+
+
+begin_field_injection { 
+  // Inject a light wave from lhs boundary with E aligned along y
+  // Use scalar diffraction theory for the Gaussian beam source.  (This is approximate). 
+
+  // For quiet startup (i.e., so that we don't propagate a delta-function noise
+  // pulse at time t=0) we multiply by a constant phase term exp(i phi) where: 
+  //   phi = k*global->xfocus+atan(h)    (3d) 
+
+  // Inject from the left a field of the form ey = e0 sin( omega t )
+
+# define DY    ( grid->y0 + (iy-0.5)*grid->dy - global->ycenter )
+# define DZ    ( grid->z0 + (iz-1  )*grid->dz - global->zcenter )
+# define R2    ( DY*DY + DZ*DZ )                                   
+# define R2Z    ( DZ*DZ )                                   
+# define PHASE ( -global->omega*t + h*R2Z/(global->width*global->width) )
+# define MASK  ( R2Z<=pow(global->mask*global->width,2) ? 1 : 0 )
+
+  if ( grid->x0==0 ) {               // Node is on left boundary
+    double alpha      = grid->cvac*grid->dt/grid->dx;
+    double emax_coeff = (4/(1+alpha))*global->omega*grid->dt*global->e0;
+    double prefactor  = emax_coeff*sqrt(2/M_PI); 
+    double t          = grid->dt*step(); 
+
+    // Compute Rayleigh length in c/wpe
+    double rl         = M_PI*global->waist*global->waist/global->lambda; 
+
+    double pulse_shape_factor = 1;
+    float pulse_length        = 70;  // units of 1/wpe
+    float sin_t_tau           = sin(0.5*t*M_PI/pulse_length);
+    pulse_shape_factor        = ( t<pulse_length ? sin_t_tau : 1 );
+    double h                  = global->xfocus/rl;   // Distance / Rayleigh length
+
+    // Loop over all Ey values on left edge of this node
+    for ( int iz=1; iz<=grid->nz+1; ++iz ) 
+      for ( int iy=1; iy<=grid->ny; ++iy )  
+        field(1,iy,iz).ey += prefactor 
+                             * cos(PHASE) 
+//                           * exp(-R2/(global->width*global->width))  // 3D
+                             * exp(-R2Z/(global->width*global->width)) 
+                             * MASK * pulse_shape_factor; 
+  }
+}
+
+
+begin_particle_injection {
+  // No particle injection for this simulation
+}
+
+
+begin_current_injection {
+  // No current injection for this simulation
+}
+
+begin_particle_collisions {
+  // No particle collisions for this simulation
+}
+
diff --git a/TestScripts/lpi-deck/lpi_2d_F6_test-nx12 b/TestScripts/lpi-deck/lpi_2d_F6_test-nx12
new file mode 100755
index 00000000..31ee9ff3
--- /dev/null
+++ b/TestScripts/lpi-deck/lpi_2d_F6_test-nx12
@@ -0,0 +1,987 @@
+// Stress Tensor(e_hydro)_4/abs(Charge Density(e_hydro)+ 1.0e-8) - (Momentum Density(e_hydro)_Y/abs(Charge Density(e_hydro)+ 1.0e-8)) * (Current Density(e_hydro)_Y/(Charge Density(e_hydro) + 1.0e-8))
+
+//========================================================================
+//
+// LPI 3D deck - Linearly polarized (in y) plane wave incident from left 
+//               boundary 
+//
+// Adapted from Albright's Lightning 3D LPI deck.  
+// B. Albright, X-1-PTA; 28 Jan. 2007
+// Lin Yin, X-1-PTA, 23 Feb 2009, for Cerrillos test 
+// 
+// Executable creates its own directory structure.  Remove the old with: 
+//
+// rm -rf rundata ehydro Hhydro Hehydro restart poynting velocity particle field
+//========================================================================
+
+// Employ turnstiles to partially serialize the high-volume file writes. 
+// In this case, the restart dumps.  Set NUM_TURNSTILES to be the desired
+// number of simultaneous writes. 
+#define NUM_TURNSTILES 128
+
+begin_globals {
+  double e0;                   // peak amplitude of oscillating electric field
+  double omega;                // angular freq. of the beam
+  int field_interval;          // how often to dump field and hydro
+  int particle_interval;       // how often to dump particle data
+  int poynting_interval;       // how often to compute poynting flux on boundary
+  int restart_interval;        // how often to write restart data
+  int quota_check_interval;    // how often to check if quote exceeded
+  double quota_sec;            // run quota in sec
+  int rtoggle;                 // enable save of last 2 restart files for safety
+  int load_particles;          // were particles loaded? 
+  double topology_x;           // domain topology needed to normalize Poynting diagnostic 
+  double topology_y;
+  double topology_z;
+  int mobile_ions;
+  int H_present; 
+  int He_present; 
+
+  // Parameters for 3d Gaussian wave launch
+  double lambda;               
+  double waist;                // how wide the focused beam is
+  double width;                
+  double zcenter;              // center of beam at boundary in z 
+  double ycenter;              // center of beam at boundary in y
+  double xfocus;               // how far from boundary to focus
+  double mask;                 // # gaussian widths from beam center where I is nonzero
+
+  // Ponyting diagnostic flags - which output to turn on
+
+  // write_backscatter_only flag:  when this flag is nonzero, it means to only compute 
+  // poynting data for the lower-x surface.  This flag affects both the summed poynting 
+  // data as well as the surface data. 
+
+  int write_poynting_data;     // Whether to write poynting data to file (or just stdout)
+
+  int write_backscatter_only;  // nonzero means we only write backscatter diagnostic for fields
+
+  // write_poynting_sum is nonzero if you wish to write a file containing the integrated
+  // poynting data on one or more surfaces.  If write_backscatter_only is nonzero, then 
+  // the output file will be a time series of double precision numbers containing the 
+  // integrated poynting flux at that point in time.  If write_backscatter_only is zero,
+  // then for each time step, six double precision numbers are written, containing the 
+  // poynting flux through the lower x, upper x, lower y, upper y, lower z, and upper z
+  // surfaces. 
+
+  int write_poynting_sum;      // nonzero if we wish to write integrated Poynting data
+
+  // write_poynting_faces is probably useless, but I put it here anyway in case it's not. 
+  // When this flag is nonzero, it will print out the poynting flux at each of a 2D array 
+  // of points on the surface.  When this flag is turned on and write_backscatter_only is 
+  // nonzero, then only the 2D array of points on the lower-x boundary surface are written
+  // for each time step.  When this flag is turned on and write_bacscatter_only is 
+  // zero, then it will write 2D array data for the lower x, upper x, lower y, upper y, 
+  // lower z, upper z surfaces for each time step. 
+
+  int write_poynting_faces;    // nonzero if we wish to write Poynting data on sim boundaries 
+
+  // write_eb_faces is nonzero when we wish to get the raw e and b data at the boundary
+  // (e.g., to be used with a filter to distinguish between SRS and SBS backscatter).  
+  // When this flag is on and write_backscatter_only is nonzero, then only the 2D array
+  // of points on the lower-x boundary surface are written for each time step.  When 
+  // this flag is turned on and write_backscatteR_only is zero, then it will write 2D
+  // array data for the lower x, upper x, lower y, upper y, lower z, upper z surfaces for
+  // each time step.  When turned on, four files are produced: e1, e2, cb1, cb2.  The 
+  // values of the quantities printed depend on the face one is considering:  for the 
+  // x faces, e1 = ey, e2 = ez, cb1 = cby, cb2 = cbz.  Similarly for y and z surfaces, 
+  // but where 1 and 2 stand for (z,x) and (x,y) coordinates, respectively.  
+
+  int write_eb_faces;          // nonzero if we wish to write E and B data on sim boundaries
+
+  // Needed for movie files
+  float vthe;                   // vthe/c  
+  float vthi_He;                // vthi_He/c
+  float vthi_H;                 // vthi_H/c
+  int   velocity_interval;      // how frequently to dump binned velocity space data
+
+  // Dump parameters for standard VPIC output formats
+  DumpParameters fdParams;
+  DumpParameters hedParams;
+  DumpParameters hHdParams;
+  DumpParameters hHedParams;
+  std::vector<DumpParameters *> outputParams;
+};
+
+begin_initialization {
+  
+  // System of units
+  double ec         = 4.8032e-10;          // stat coulomb
+  double c_vac      = 2.99792458e10;       // cm/sec
+  double m_e        = 9.1094e-28;          // g
+  double k_b        = 1.6022e-12;          // erg/eV
+  double mec2       = m_e*c_vac*c_vac/k_b; 
+  double mpc2       = mec2*1836.0; 
+
+  double cfl_req    = 0.98;                // How close to Courant should we try to run
+  double damp       = 0;                   // How much radiation damping
+  double iv_thick   = 2;                   // Thickness of impermeable vacuum (in cells)
+
+  // Experimental parameters
+
+  double t_e               = 600;         // electron temperature, eV
+  double t_i               = 150;         // ion temperature, eV
+  double n_e_over_n_crit   = 0.05;         // n_e/n_crit
+  double vacuum_wavelength = 527 * 1e-7;   // third micron light (cm)
+  double laser_intensity   = 2.5e15 * 1e7;   // in ergs/cm^2 (note: 1 W = 1e7 ergs)
+
+  // Simulation parameters 
+
+#if 0
+  // 2048 processors
+  double Lx                = 4*35.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 2*6.0 * 1e-4;              
+  double Lz                = 2*6.0 * 1e-4;                 
+  double nx                = 8192;
+  double ny                = 512;
+  double nz                = 512; 
+  double topology_x        = 64;
+  double topology_y        = 4;
+  double topology_z        = 8;            
+  // single-processor mesh = 128 x 128 x 64
+#endif
+
+#if 0
+  // 14300x4 processors
+  double Lx                = 120.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 12.0 * 1e-4;              
+  double Lz                = 12.0 * 1e-4;                 
+  double nx                = 6292;
+  double ny                = 440;
+  double nz                = 440; 
+  double topology_x        = 143;
+  double topology_y        = 10;
+  double topology_z        = 10;            
+  // single-processor mesh = 44 x 44 x 44
+#endif
+
+#if 0
+  // 2*4096 processors
+  double Lx                = 4*35.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 2*6.0 * 1e-4;              
+  double Lz                = 2*6.0 * 1e-4;                 
+  double nx                = 8192;
+  double ny                = 512;
+  double nz                = 512; 
+  double topology_x        = 128;
+  double topology_y        = 8;
+  double topology_z        = 8;            
+  // single-processor mesh = 64 x 64 x 64
+#endif
+
+  // This choice of nx, ny, nz and topology values imply that the  
+  // grid is 102 x 42 x 42 cells.  The transverse size is slightly smaller than 
+  // in the original deck in order to enable more choices for down-sampling/striding 
+  // the field and hydro output.  (The original had 43 cells in each direction, which
+  // is prime and therefore not strideable with Ben's output format). 
+
+# if 0
+  // one processor problem, for debugging
+  Lx /= topology_x;  nx /= topology_x;
+  Ly /= topology_y;  ny /= topology_y;   
+  Lz /= topology_z;  nz /= topology_z; 
+
+  topology_x = 1; 
+  topology_y = 1;  
+  topology_z = 1; 
+# endif  
+
+# if 0
+  // twoprocessor problem, for debugging
+  Lx /= (topology_x/2);  nx /= (topology_x/2);
+  Ly /= topology_y;  ny /= topology_y;   
+  Lz /= topology_z;  nz /= topology_z; 
+
+  topology_x = 2; 
+  topology_y = 1;  
+  topology_z = 1; 
+# endif  
+
+// Run a smaller problem for debugging purposes
+#if 0
+  double Lx                = 120.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 1.20 * 1e-4;              
+  double Lz                = 1.20 * 1e-4;                 
+  double nx                = 6292;
+  double ny                = 44;
+  double nz                = 44; 
+  double topology_x        = 143;
+  double topology_y        = 1;
+  double topology_z        = 1;            
+
+  // nproc() x 1 x 1 : 
+  double topology          = nproc(); 
+  Lx         /= 143; Lx         *= topology; 
+  nx         /= 143; nx         *= topology;
+  topology_x /= 143; topology_x *= topology; 
+
+  // single-processor mesh = 44 x 44 x 44
+#endif
+
+  float box_size_x =  120.0* 1e-4;
+  float box_size_z =  12.0* 1e-4;
+  
+  int mobile_ions           = 1;      // Whether or not to push ions
+  double nppc               = 512;    // Ave. number of particles/cell in ea. species
+  //double nppc               = 32;    // Ave. number of particles/cell in ea. species
+  int load_particles        = 1;      // Flag to turn on/off particle load 
+  int He_present            = 1;  
+  int H_present             = 1;  
+  double f_He               = 1;    // Ratio of number density of He to total ion density
+  double f_H                = 1-f_He; // Ratio of number density of H  to total ion density
+  if ( f_He == 1 ) H_present = 0; 
+  if ( f_H == 1  ) He_present = 0; 
+
+// Here _He is N3+
+  // Precompute some useful variables. 
+  double A_H                = 1;
+  double Z_H                = 1;
+  double A_He               = 14;
+  double Z_He               = 3; 
+  double mic2_H             = mpc2*A_H;
+  double mic2_He            = mpc2*A_He;
+  double mime_H             = mic2_H /mec2; 
+  double mime_He            = mic2_He/mec2; 
+                            
+  double uthe               = sqrt(t_e/mec2);     // vthe/c
+  double uthi_H             = sqrt(t_i/mic2_H);   // vthi/c for H
+  double uthi_He            = sqrt(t_i/mic2_He);  // vthi/c for He
+
+  // Plasma skin deptth in cm
+  double delta = (vacuum_wavelength / (2*M_PI) ) / sqrt( n_e_over_n_crit ); 
+
+  double n_e   = c_vac*c_vac*m_e/(4*M_PI*ec*ec*delta*delta); // electron density in cm^-3
+  double debye = uthe*delta;                      // electron Debye length (cm)
+  double omega = sqrt( 1/n_e_over_n_crit );       // laser beam freq. in wpe
+
+  double nx                = 7776;
+  double ny                = 1;
+  double nz                = 549; 
+
+// Weak Scaling Test --matsekh
+#if 1
+  // nx = 7776 / {9, 27, 81, 162, 324, 648} == {864, 288, 96, 48, 24, 12}  
+  nx = nx/648; 
+  box_size_x /= 648.;
+#endif
+// End Weak Scaling Test --matsekh
+
+#if 0
+  // DEBUG - run on 16 proc.  
+  nx = nx/81; // default nx=7776/81==96 --matsekh
+  box_size_x /= 81.;  
+
+#endif
+
+  double hx = box_size_x/(delta*nx);  // in c/wpe
+  double hz = box_size_z/(delta*nz);  // in c/wpe
+  double hy = hz; 
+
+  double cell_size_x       = hx*delta/debye;      // Cell size in Debye lengths
+  double cell_size_z       = hz*delta/debye;
+//double cell_size_y       = hy*delta/debye;
+
+  double Lx                = nx*hx;   // in c/wpe
+  double Ly                = ny*hy;   // in c/wpe
+  double Lz                = nz*hz;   // in c/wpe
+
+  double topology_x        = nproc();
+  double topology_y        = 1;
+  double topology_z        = 1;            
+
+  double f_number          = 6;                   // f/# of beam
+  double lambda            = vacuum_wavelength/delta; // vacuum wavelength in c/wpe
+  double waist             = f_number*lambda;     // width of beam at focus in c/wpe
+  double xfocus            = Lx/2;                // in c/wpe
+  double ycenter           = 0;                   // center of spot in y on lhs boundary
+  double zcenter           = 0;                   // center of spot in z on lhs boundary
+  double mask              = 1.5;                 // set drive I=0 outside r>mask*width at lhs boundary
+  double width = waist*sqrt( 1 + (lambda*xfocus/(M_PI*waist*waist))*(lambda*xfocus/(M_PI*waist*waist))); 
+
+  // Peak instantaneous E field in "natural units" 
+  double e0    = sqrt( 2*laser_intensity / (m_e*c_vac*c_vac*c_vac*n_e) );  
+//e0                       = e0*(waist/width);    // at entrance (3D Gaussian) 
+  e0 = e0*sqrt(waist/width); // 2D, at entrance
+
+  double dt                = cfl_req*courant_length(Lx,Ly,Lz,nx,ny,nz); // in 1/wpe; n.b. c=1 in nat. units
+  double nsteps_cycle      = trunc_granular(2*M_PI/(dt*omega),1)+1; 
+  dt                       = 2*M_PI/omega/nsteps_cycle; // nsteps_cycle time steps in one laser cycle
+
+  double wpe1ps = 1e-12*c_vac/delta;
+
+  double t_stop            = 100*wpe1ps;               // Runtime in 1/wpe
+  int particle_interval    = 0; 
+  int poynting_interval    = int(M_PI/((omega+1.5)*dt));                  // Num. steps between dumping poynting flux
+  int field_interval       = int(0.01*wpe1ps/dt);         // Num. steps between saving field, hydro data
+  int velocity_interval    = int(0.1*wpe1ps/dt);        // How frequently to dump binned velocity space data
+  int restart_interval     = 10000000;       // Num. steps between restart dumps
+  int quota_check_interval = 20;
+
+  // Ben:  This quota thing gracefully terminates after writing a final restart after 
+  // 11.5 hours; if you want a longer time before shutdown, set this value larger.  If 
+  // you want the code to just run all weekend, then set it to very long (2400.*3500, e.g.) 
+
+  double quota_sec         = 23.7*3600;           // Run quota in sec. 
+
+  // Turn on integrated backscatter poynting diagnostic - right now there is a bug in this, so we 
+  // only write the integrated backscatter time history on the left face. 
+
+  int write_poynting_data = 1;                    // Whether to write poynting data to file (or just stdout)
+
+  int write_backscatter_only = 0;                 // Nonzero means only write lower x face
+  int write_poynting_sum   = 0;                   // Whether to write integrated Poynting data 
+  int write_poynting_faces = 0;                   // Whether to write poynting data on sim boundary faces
+  int write_eb_faces       = 0;                   // Whether to write e and b field data on sim boundary faces
+
+  double N_e               = nppc*nx*ny*nz;       // Number of macro electrons in box
+  double Np_e              = Lx*Ly*Lz;            // "Number" of "physical" electrons in box (nat. units)
+  double q_e               = -Np_e/N_e;           // Charge per macro electron
+  double N_i               = N_e;                 // Number of macro ions of each species in box 
+  double Np_i              = Np_e/(Z_H*f_H+Z_He*f_He); // "Number" of "physical" ions of each sp. in box
+  double qi_H              = Z_H *f_H *Np_i/N_i;  // Charge per H  macro ion
+  double qi_He             = Z_He*f_He*Np_i/N_i;  // Charge per He macro ion 
+  
+  // Print simulation parameters
+
+  sim_log("***** Simulation parameters *****");
+  sim_log("* Processors:                     "<<nproc());
+  sim_log("* Topology:                       "<<topology_x<<" "<<topology_y<<" "<<topology_z); 
+  sim_log("* nsteps_cycle =                 "<<nsteps_cycle);
+  sim_log("* Time step, max time, nsteps:    "<<dt<<" "<<t_stop<<" "<<int(t_stop/(dt))); 
+  sim_log("* Debye length, XYZ cell sizes:   "<<debye<<" "<<cell_size_x<<" "<<cell_size_z);
+  sim_log("* Real cell sizes (in Debyes):    "<<hx/uthe<<" "<<hy/uthe<<" "<<hz/uthe);
+  sim_log("* Lx, Ly, Lz =                    "<<Lx<<" "<<Ly<<" "<<Lz);
+  sim_log("* nx, ny, nz =                    "<<nx<<" "<<ny<<" "<<nz);
+  sim_log("* Charge/macro electron =         "<<q_e);
+  sim_log("* Average particles/processor:    "<<N_e/nproc());
+  sim_log("* Average particles/cell:         "<<nppc);
+  sim_log("* Omega_0, Omega_pe:              "<<omega<<" "<<1);
+  sim_log("* Plasma density, ne/nc:          "<<n_e<<" "<<n_e_over_n_crit);
+  sim_log("* Vac wavelength (nm):            "<<vacuum_wavelength*1e7);
+  sim_log("* I_laser (W/cm^2):               "<<laser_intensity/1e7);
+  sim_log("* T_e, T_i (eV)                   "<<t_e<<" "<<t_i);
+  sim_log("* m_e, m_H, m_He                  "<<"1 "<<mime_H<<" "<<mime_He);
+  sim_log("* Radiation damping:              "<<damp);
+  sim_log("* Fraction of courant limit:      "<<cfl_req);
+  sim_log("* vthe/c:                         "<<uthe);
+  sim_log("* vthi_H /c:                      "<<uthi_H);
+  sim_log("* vthe_He/c:                      "<<uthi_He);
+  sim_log("* emax at entrance:               "<<e0);
+  sim_log("* emax at waist:                  "<<e0/(waist/width));
+  sim_log("* Poynting interval:              "<<poynting_interval); 
+  sim_log("* field interval:                 "<<field_interval); 
+  sim_log("* restart interval:               "<<restart_interval); 
+  sim_log("* num vacuum edge grids:          "<<iv_thick);
+  sim_log("* width, waist, xfocus:           "<<width<<" "<<waist<<" "<<xfocus); 
+  sim_log("* ycenter, zcenter, mask:         "<<ycenter<<" "<<zcenter<<" "<<mask); 
+  sim_log("* field_interval                  "<<field_interval); 
+  sim_log("* velocity_interval               "<<velocity_interval); 
+  sim_log("* restart_interval:               "<<restart_interval); 
+  sim_log("* quota_check_interval:           "<<quota_check_interval); 
+  sim_log("* write_poynting_sum:             "<<(write_poynting_sum ? "Yes" : "No")); 
+  sim_log("* write_poynting_faces:           "<<(write_poynting_faces? "Yes" : "No")); 
+  sim_log("* write_eb_faces:                 "<<(write_eb_faces ? "Yes" : "No")); 
+  sim_log("* write_backscatter_only:         "<<(write_backscatter_only ? "Yes" : "No")); 
+  sim_log("*********************************");
+
+  // Set up high level simulation parameters
+
+  sim_log("Setting up high-level simulation parameters.");
+  num_step             = int(t_stop/(dt));
+  num_step             = 20000;
+  sim_log("num_step: " << num_step); 
+
+  status_interval      = 200; 
+  sync_shared_interval = status_interval/1;
+  clean_div_e_interval = status_interval/1;
+  clean_div_b_interval = status_interval/10; 
+  
+  // Turn off some of the spam
+  verbose = 1; 
+
+  // For maxwellian reinjectoin, we need more than the default number of 
+  // passes (3) through the boundary handler
+  // Note:  We have to adjust sort intervals for maximum performance on Cell. 
+  // Note: On 1 PE fails after 2094 steps. Increasing num_comm_round to 10
+  // allows it to run > 25,000 steps.
+  num_comm_round = 6;
+
+  global->e0                   = e0; 
+  global->omega                = omega; 
+  global->field_interval       = field_interval; 
+  global->particle_interval    = particle_interval;
+  global->poynting_interval    = poynting_interval;
+  global->restart_interval     = restart_interval;
+  global->quota_check_interval = quota_check_interval;
+  global->quota_sec            = quota_sec;
+  global->rtoggle              = 0;
+  global->load_particles       = load_particles;
+  global->mobile_ions          = mobile_ions; 
+  global->H_present            = H_present; 
+  global->He_present           = He_present; 
+  global->topology_x           = topology_x;  
+  global->topology_y           = topology_y;  
+  global->topology_z           = topology_z;  
+  global->xfocus               = xfocus;  
+  global->ycenter              = ycenter; 
+  global->zcenter              = zcenter; 
+  global->mask                 = mask; 
+  global->waist                = waist; 
+  global->width                = width; 
+  global->lambda               = lambda; 
+
+  global->write_poynting_data  = write_poynting_data;
+
+  global->write_poynting_sum   = write_poynting_sum; 
+  global->write_poynting_faces = write_poynting_faces; 
+  global->write_eb_faces       = write_eb_faces;      
+  global->write_backscatter_only = write_backscatter_only; 
+
+  global->vthe                 = uthe; 
+  global->vthi_H               = uthi_H; 
+  global->vthi_He              = uthi_He; 
+  global->velocity_interval    = velocity_interval; 
+
+  // Set up the species
+  // Allow additional local particles in case of non-uniformity.
+
+  // Set up grid
+  sim_log("Setting up computational grid."); 
+  grid->dx       = hx; 
+  grid->dy       = hy; 
+  grid->dz       = hz; 
+  grid->dt       = dt; 
+  grid->cvac     = 1;
+  grid->eps0     = 1; 
+
+  sim_log("Setting up absorbing mesh."); 
+  define_periodic_grid( 0,          -0.5*Ly,    -0.5*Lz,        // Low corner
+                         Lx,         0.5*Ly,     0.5*Lz,        // High corner 
+                         nx,         ny,         nz,            // Resolution
+                         topology_x, topology_y, topology_z);    // Topology
+
+  if ( rank() == 0) {
+    set_domain_field_bc( BOUNDARY(-1,0,0), absorb_fields );
+  }
+  if ( rank() == nproc() -1) {
+    set_domain_field_bc( BOUNDARY( 1,0,0), absorb_fields );
+  }
+
+    set_domain_field_bc( BOUNDARY( 0,0, 1), absorb_fields );
+    set_domain_field_bc( BOUNDARY( 0,0,-1), absorb_fields );
+
+  sim_log("Setting up species."); 
+  double max_local_np = 2.0*N_e/nproc(); 
+  double max_local_nm = max_local_np/10.0;
+  species_t * electron = define_species("electron", -1, 1, max_local_np, max_local_nm, 20, 1); 
+
+  // Start with two ion species.  We have option to go to Xe and Kr gas fills if 
+  // we need a higher ion/electron macroparticle ratio.  
+
+  species_t *ion_H, *ion_He; 
+  if ( mobile_ions ) {
+    if ( H_present  ) ion_H  = define_species("H",  Z_H,  mime_H,  max_local_np, max_local_nm, 100, 1); 
+    if ( He_present ) ion_He = define_species("He", Z_He, mime_He, max_local_np, max_local_nm, 100, 1); 
+  }
+
+  // From grid/partition.c: used to determine which domains are on edge
+# define RANK_TO_INDEX(rank,ix,iy,iz) BEGIN_PRIMITIVE {                   \
+    int _ix, _iy, _iz;                                                    \
+    _ix  = (rank);                        /* ix = ix+gpx*( iy+gpy*iz ) */ \
+    _iy  = _ix/int(global->topology_x);   /* iy = iy+gpy*iz */            \
+    _ix -= _iy*int(global->topology_x);   /* ix = ix */                   \
+    _iz  = _iy/int(global->topology_y);   /* iz = iz */                   \
+    _iy -= _iz*int(global->topology_y);   /* iy = iy */                   \
+    (ix) = _ix;                                                           \
+    (iy) = _iy;                                                           \
+    (iz) = _iz;                                                           \
+  } END_PRIMITIVE 
+
+  sim_log("Overriding x boundaries to absorb fields."); 
+  int ix, iy, iz;        // Domain location in mesh
+  RANK_TO_INDEX( int(rank()), ix, iy, iz ); 
+
+  // Set up Maxwellian reinjection B.C. 
+  sim_log("Setting up Maxwellian reinjection boundary condition."); 
+
+  particle_bc_t * maxwellian_reinjection = 
+    define_particle_bc( maxwellian_reflux( species_list, entropy ) ); 
+  set_reflux_temp( maxwellian_reinjection, electron, uthe, uthe );
+  if ( mobile_ions ) { 
+    if ( H_present  ) set_reflux_temp( maxwellian_reinjection, ion_H,  uthi_H,  uthi_H  ); 
+    if ( He_present ) set_reflux_temp( maxwellian_reinjection, ion_He, uthi_He, uthi_He ); 
+  }
+
+  // Set up materials
+  sim_log("Setting up materials."); 
+  define_material( "vacuum", 1 );
+  define_field_array( NULL, damp ); 
+ 
+  // Paint the simulation volume with materials and boundary conditions
+# define iv_region (   x<      hx*iv_thick || x>Lx  -hx*iv_thick  \
+                    || z<-Lz/2+hz*iv_thick || z>Lz/2-hz*iv_thick ) /* all boundaries are i.v. */ 
+  set_region_bc( iv_region, maxwellian_reinjection, maxwellian_reinjection, maxwellian_reinjection ); 
+
+  // Load particles 
+  if ( load_particles ) {
+    sim_log("Loading particles.");
+    // Fast load of particles--don't bother fixing artificial domain correlations
+    double xmin=grid->x0, xmax=grid->x1;
+    double ymin=grid->y0, ymax=grid->y1;
+    double zmin=grid->z0, zmax=grid->z1;
+    repeat( N_e/(topology_x*topology_y*topology_z) ) {
+      double x = uniform( rng(0), xmin, xmax );
+      double y = uniform( rng(0), ymin, ymax );
+      double z = uniform( rng(0), zmin, zmax );
+      if ( iv_region ) continue;           // Particle fell in iv_region.  Don't load.
+      // third to last arg is "weight," a positive number
+      inject_particle( electron, x, y, z,
+                       normal( rng(0), 0, uthe),
+                       normal( rng(0), 0, uthe),
+                       normal( rng(0), 0, uthe), -q_e, 0, 0 );
+      if ( mobile_ions ) {
+        if ( H_present )  // Inject an H macroion on top of macroelectron
+          inject_particle( ion_H, x, y, z, 
+                           normal( rng(0), 0, uthi_H), 
+                           normal( rng(0), 0, uthi_H), 
+                           normal( rng(0), 0, uthi_H), qi_H, 0, 0 ); 
+        if ( He_present ) // Inject an H macroion on top of macroelectron
+          inject_particle( ion_He, x, y, z, 
+                           normal( rng(0), 0, uthi_He), 
+                           normal( rng(0), 0, uthi_He), 
+                           normal( rng(0), 0, uthi_He), qi_He, 0, 0 ); 
+      }
+    }
+  }
+
+
+ /*--------------------------------------------------------------------------
+  * New dump definition
+  *------------------------------------------------------------------------*/
+
+ /*--------------------------------------------------------------------------
+  * Set data output format
+  * 
+  * This option allows the user to specify the data format for an output
+  * dump.  Legal settings are 'band' and 'band_interleave'.  Band-interleave
+  * format is the native storage format for data in VPIC.  For field data,
+  * this looks something like:
+  * 
+  *   ex0 ey0 ez0 div_e_err0 cbx0 ... ex1 ey1 ez1 div_e_err1 cbx1 ...
+  *   
+  * Banded data format stores all data of a particular state variable as a
+  * contiguous array, and is easier for ParaView to process efficiently. 
+  * Banded data looks like:
+  * 
+  *   ex0 ex1 ex2 ... exN ey0 ey1 ey2 ...
+  *   
+  *------------------------------------------------------------------------*/
+  sim_log( "Setting up hydro and field diagnostics." );
+
+  global->fdParams.format = band;
+  sim_log ( "Field output format          : band" );
+
+  global->hedParams.format = band;
+  sim_log ( "Electron hydro output format : band" );
+
+  global->hHdParams.format = band;
+  sim_log ( "Hydrogen hydro output format : band" );
+
+  global->hHedParams.format = band;
+  sim_log ( "Helium hydro output format   : band" );
+
+ /*--------------------------------------------------------------------------
+  * Set stride
+  * 
+  * This option allows data down-sampling at output.  Data are down-sampled
+  * in each dimension by the stride specified for that dimension.  For
+  * example, to down-sample the x-dimension of the field data by a factor
+  * of 2, i.e., half as many data will be output, select:
+  * 
+  *   global->fdParams.stride_x = 2;
+  *
+  * The following 2-D example shows down-sampling of a 7x7 grid (nx = 7,
+  * ny = 7.  With ghost-cell padding the actual extents of the grid are 9x9.
+  * Setting the strides in x and y to equal 2 results in an output grid of
+  * nx = 4, ny = 4, with actual extents 6x6.
+  *
+  * G G G G G G G G G
+  * G X X X X X X X G
+  * G X X X X X X X G         G G G G G G
+  * G X X X X X X X G         G X X X X G
+  * G X X X X X X X G   ==>   G X X X X G
+  * G X X X X X X X G         G X X X X G
+  * G X X X X X X X G         G X X X X G
+  * G X X X X X X X G         G G G G G G
+  * G G G G G G G G G
+  *
+  * Note that grid extents in each dimension must be evenly divisible by
+  * the stride for that dimension:
+  *
+  *   nx = 150;
+  *   global->fdParams.stride_x = 10; // legal -> 150/10 = 15
+  *
+  *   global->fdParams.stride_x = 8; // illegal!!! -> 150/8 = 18.75
+  *------------------------------------------------------------------------*/
+
+  // Strides for field and hydro arrays.  Note that here we have defined them 
+  // the same for fields and all hydro species; if desired, we could use different
+  // strides for each.   Also note that strides must divide evenly into the number 
+  // of cells in a given domain. 
+
+  // Define strides and test that they evenly divide into grid->nx, ny, nz
+  int stride_x = 1, stride_y = 1, stride_z = 1; 
+  if ( int(grid->nx)%stride_x ) ERROR(("Stride doesn't evenly divide grid->nx.")); 
+  if ( int(grid->ny)%stride_y ) ERROR(("Stride doesn't evenly divide grid->ny.")); 
+  if ( int(grid->nz)%stride_z ) ERROR(("Stride doesn't evenly divide grid->nz.")); 
+
+  //----------------------------------------------------------------------
+  // Fields
+
+  // relative path to fields data from global header
+  sprintf(global->fdParams.baseDir, "field");
+
+  // base file name for fields output
+  sprintf(global->fdParams.baseFileName, "fields");
+
+  // set field strides
+  global->fdParams.stride_x = stride_x;
+  global->fdParams.stride_y = stride_y;
+  global->fdParams.stride_z = stride_z;
+  sim_log ( "Fields x-stride " << global->fdParams.stride_x );
+  sim_log ( "Fields y-stride " << global->fdParams.stride_y );
+  sim_log ( "Fields z-stride " << global->fdParams.stride_z );
+
+  // add field parameters to list
+  global->outputParams.push_back(&global->fdParams);
+
+  //----------------------------------------------------------------------
+  // Electron hydro
+
+  // relative path to electron species data from global header
+  sprintf(global->hedParams.baseDir, "ehydro");
+
+  // base file name for fields output
+  sprintf(global->hedParams.baseFileName, "e_hydro");
+
+  // set electron hydro strides
+  global->hedParams.stride_x = stride_x;
+  global->hedParams.stride_y = stride_y;
+  global->hedParams.stride_z = stride_z;
+  sim_log ( "Electron species x-stride " << global->hedParams.stride_x );
+  sim_log ( "Electron species y-stride " << global->hedParams.stride_y );
+  sim_log ( "Electron species z-stride " << global->hedParams.stride_z );
+
+  // add electron hydro parameters to list
+  global->outputParams.push_back(&global->hedParams);
+
+  //----------------------------------------------------------------------
+  // Hydrogen hydro
+
+  // relative path to electron species data from global header
+  sprintf(global->hHdParams.baseDir, "Hhydro");
+
+  // base file name for fields output
+  sprintf(global->hHdParams.baseFileName, "H_hydro");
+
+  // set hydrogen hydro strides
+  global->hHdParams.stride_x = stride_x;
+  global->hHdParams.stride_y = stride_y;
+  global->hHdParams.stride_z = stride_z;
+  sim_log ( "Ion species x-stride " << global->hHdParams.stride_x );
+  sim_log ( "Ion species y-stride " << global->hHdParams.stride_y );
+  sim_log ( "Ion species z-stride " << global->hHdParams.stride_z );
+
+  // add hydrogen hydro parameters to list
+  global->outputParams.push_back(&global->hHdParams);
+
+  //----------------------------------------------------------------------
+  // Helium hydro
+
+  // relative path to electron species data from global header
+  sprintf(global->hHedParams.baseDir, "Hehydro");
+
+  // base file name for fields output
+  sprintf(global->hHedParams.baseFileName, "He_hydro");
+
+  // set helium hydro strides
+  global->hHedParams.stride_x = stride_x;
+  global->hHedParams.stride_y = stride_y;
+  global->hHedParams.stride_z = stride_z;
+  sim_log ( "Ion species x-stride " << global->hHedParams.stride_x );
+  sim_log ( "Ion species y-stride " << global->hHedParams.stride_y );
+  sim_log ( "Ion species z-stride " << global->hHedParams.stride_z );
+
+  // add helium hydro parameters to list
+  global->outputParams.push_back(&global->hHedParams);
+
+ /*-----------------------------------------------------------------------
+  * Set output fields
+  *
+  * It is now possible to select which state-variables are output on a
+  * per-dump basis.  Variables are selected by passing an or-list of
+  * state-variables by name.  For example, to only output the x-component
+  * of the electric field and the y-component of the magnetic field, the
+  * user would call output_variables like:
+  *
+  *   global->fdParams.output_variables( ex | cby );
+  *
+  * NOTE: OUTPUT VARIABLES ARE ONLY USED FOR THE BANDED FORMAT.  IF THE
+  * FORMAT IS BAND-INTERLEAVE, ALL VARIABLES ARE OUTPUT AND CALLS TO
+  * 'output_variables' WILL HAVE NO EFFECT.
+  *
+  * ALSO: DEFAULT OUTPUT IS NONE!  THIS IS DUE TO THE WAY THAT VPIC
+  * HANDLES GLOBAL VARIABLES IN THE INPUT DECK AND IS UNAVOIDABLE.
+  *
+  * For convenience, the output variable 'all' is defined:
+  *
+  *   global->fdParams.output_variables( all );
+  *------------------------------------------------------------------------*/
+ /* CUT AND PASTE AS A STARTING POINT
+  * REMEMBER TO ADD APPROPRIATE GLOBAL DUMPPARAMETERS VARIABLE
+
+   output_variables( all );
+
+   output_variables( electric | div_e_err | magnetic | div_b_err |
+                     tca      | rhob      | current  | rhof |
+                     emat     | nmat      | fmat     | cmat );
+
+   output_variables( current_density  | charge_density |
+                     momentum_density | ke_density     | stress_tensor );
+  */
+
+  //global->fdParams.output_variables( all );
+  global->fdParams.output_variables( electric | magnetic );
+
+  //global->hedParams.output_variables( all );
+  //global->hedParams.output_variables( current_density | momentum_density );
+  global->hedParams.output_variables(  current_density  | charge_density |
+                                       momentum_density | ke_density |
+                                       stress_tensor );
+  global->hHdParams.output_variables(  current_density  | charge_density |
+                                       momentum_density | ke_density |
+                                       stress_tensor );
+  global->hHedParams.output_variables( current_density  | charge_density |
+                                       momentum_density | ke_density |
+                                       stress_tensor );
+
+ /*--------------------------------------------------------------------------
+  * Convenience functions for simlog output
+  *------------------------------------------------------------------------*/
+  char varlist[256];
+
+  create_field_list(varlist, global->fdParams);
+  sim_log ( "Fields variable list: " << varlist );
+
+  create_hydro_list(varlist, global->hedParams);
+  sim_log ( "Electron species variable list: " << varlist );
+
+  create_hydro_list(varlist, global->hHdParams);
+  sim_log ( "Ion species variable list: " << varlist );
+
+ /*------------------------------------------------------------------------*/
+
+  sim_log("***Finished with user-specified initialization ***"); 
+
+  // Upon completion of the initialization, the following occurs:
+  // - The synchronization error (tang E, norm B) is computed between domains
+  //   and tang E / norm B are synchronized by averaging where discrepancies
+  //   are encountered.
+  // - The initial divergence error of the magnetic field is computed and
+  //   one pass of cleaning is done (for good measure)
+  // - The bound charge density necessary to give the simulation an initially
+  //   clean divergence e is computed.
+  // - The particle momentum is uncentered from u_0 to u_{-1/2}
+  // - The user diagnostics are called on the initial state
+  // - The physics loop is started
+  //
+  // The physics loop consists of:
+  // - Advance particles from x_0,u_{-1/2} to x_1,u_{1/2}
+  // - User particle injection at x_{1-age}, u_{1/2} (use inject_particles)
+  // - User current injection (adjust field(x,y,z).jfx, jfy, jfz)
+  // - Advance B from B_0 to B_{1/2}
+  // - Advance E from E_0 to E_1
+  // - User field injection to E_1 (adjust field(x,y,z).ex,ey,ez,cbx,cby,cbz)
+  // - Advance B from B_{1/2} to B_1
+  // - (periodically) Divergence clean electric field
+  // - (periodically) Divergence clean magnetic field
+  // - (periodically) Synchronize shared tang e and norm b
+  // - Increment the time step
+  // - Call user diagnostics
+  // - (periodically) Print a status message
+} 
+
+
+begin_diagnostics {
+
+  int mobile_ions=global->mobile_ions,
+      H_present=global->H_present,
+      He_present=global->He_present;
+
+  if ( step()%200==0 ) sim_log("Time step: "<<step());
+
+# define should_dump(x) \
+  (global->x##_interval>0 && remainder(step(),global->x##_interval)==0)
+
+  // Do a mkdir at time t=0 to ensure we have all the directories we need
+  if ( step()==0 ) {
+    dump_mkdir("rundata"); 
+    dump_mkdir("fft"); 
+    dump_mkdir("field"); 
+    dump_mkdir("ehydro"); 
+    dump_mkdir("Hhydro"); 
+    dump_mkdir("Hehydro"); 
+    dump_mkdir("restart"); 
+    dump_mkdir("particle"); 
+    dump_mkdir("poynting"); 
+    dump_mkdir("velocity"); 
+
+    // Turn off rundata for now
+    // dump_grid("rundata/grid");
+    // dump_materials("rundata/materials");
+    // dump_species("rundata/species");
+    global_header("global", global->outputParams);
+
+  }
+
+  // Field and hydro data
+
+  if ( should_dump(field) ) {
+    field_dump( global->fdParams ); 
+
+#if 1  
+    if ( global->load_particles ) {
+      hydro_dump( "electron", global->hedParams );
+      if ( global->mobile_ions ) {
+        if ( global->H_present  ) hydro_dump( "H",  global->hHdParams );
+        if ( global->He_present ) hydro_dump( "He", global->hHedParams );
+      }
+    }
+#endif
+
+  }
+
+  // Particle dump data
+#if 0  
+  if ( should_dump(particle) && global->load_particles ) {
+    dump_particles( "electron", "particle/eparticle" );
+    if ( global->mobile_ions ) {
+      if ( global->H_present  ) dump_particles( "H",  "particle/Hparticle" );
+      if ( global->He_present ) dump_particles( "He", "particle/Heparticle" );
+    }
+  } 
+#endif
+
+  
+#define ALLOCATE(A,LEN,TYPE)                                             \
+  if ( !((A)=(TYPE *)malloc((size_t)(LEN)*sizeof(TYPE))) ) ERROR(("Cannot allocate."));
+
+  //--------------------------------------------------------------------------- 
+
+  // Restart dump filenames are by default tagged with the current timestep.
+  // If you do not want to do this add "0" to the dump_restart arguments. One
+  // reason to not tag the dumps is if you want only one restart dump (the most
+  // recent one) to be stored at a time to conserve on file space. Note: A
+  // restart dump should be the _very_ _last_ dump made in a diagnostics
+  // routine. If not, the diagnostics performed after the restart dump but
+  // before the next timestep will be missed on a restart. Restart dumps are
+  // in a binary format. Each rank makes a restart dump.
+
+  // Restarting from restart files is accomplished by running the executable 
+  // with "restart restart" as additional command line arguments.  The executable
+  // must be identical to that used to generate the restart dumps or else 
+  // the behavior may be unpredictable. 
+
+  // Note:  restart is not currently working with custom boundary conditions
+  // (such as the reflux condition) and has not been tested with emission 
+  // turned on.  
+  
+  if ( step()>0 && should_dump(restart) ) {
+    static const char * restart_fbase[2] = { "restart/restart0", "restart/restart1" };
+
+    //BEGIN_TURNSTILE(NUM_TURNSTILES); 
+      checkpt( restart_fbase[global->rtoggle], step() );
+    //END_TURNSTILE;
+    global->rtoggle^=1;
+  }
+
+  // Shut down simulation when wall clock time exceeds global->quota_sec. 
+  // Note that the mp_elapsed() is guaranteed to return the same value for all
+  // processors (i.e., elapsed time on proc #0), and therefore the abort will 
+  // be synchronized across processors. 
+
+  if ( step()>0 && global->quota_check_interval && (step()%global->quota_check_interval)==0 ) {
+    if ( uptime() > global->quota_sec ) {
+
+      //BEGIN_TURNSTILE(NUM_TURNSTILES); 
+        checkpt( "restart/restart", step() );
+      //END_TURNSTILE;
+
+      sim_log( "Restart dump restart completed." );
+      sim_log( "Allowed runtime exceeded for this job.  Terminating." );
+      mp_barrier(); // Just to be safe
+      halt_mp();
+      exit(0);
+    }
+  }
+
+} 
+
+
+begin_field_injection { 
+  // Inject a light wave from lhs boundary with E aligned along y
+  // Use scalar diffraction theory for the Gaussian beam source.  (This is approximate). 
+
+  // For quiet startup (i.e., so that we don't propagate a delta-function noise
+  // pulse at time t=0) we multiply by a constant phase term exp(i phi) where: 
+  //   phi = k*global->xfocus+atan(h)    (3d) 
+
+  // Inject from the left a field of the form ey = e0 sin( omega t )
+
+# define DY    ( grid->y0 + (iy-0.5)*grid->dy - global->ycenter )
+# define DZ    ( grid->z0 + (iz-1  )*grid->dz - global->zcenter )
+# define R2    ( DY*DY + DZ*DZ )                                   
+# define R2Z    ( DZ*DZ )                                   
+# define PHASE ( -global->omega*t + h*R2Z/(global->width*global->width) )
+# define MASK  ( R2Z<=pow(global->mask*global->width,2) ? 1 : 0 )
+
+  if ( grid->x0==0 ) {               // Node is on left boundary
+    double alpha      = grid->cvac*grid->dt/grid->dx;
+    double emax_coeff = (4/(1+alpha))*global->omega*grid->dt*global->e0;
+    double prefactor  = emax_coeff*sqrt(2/M_PI); 
+    double t          = grid->dt*step(); 
+
+    // Compute Rayleigh length in c/wpe
+    double rl         = M_PI*global->waist*global->waist/global->lambda; 
+
+    double pulse_shape_factor = 1;
+    float pulse_length        = 70;  // units of 1/wpe
+    float sin_t_tau           = sin(0.5*t*M_PI/pulse_length);
+    pulse_shape_factor        = ( t<pulse_length ? sin_t_tau : 1 );
+    double h                  = global->xfocus/rl;   // Distance / Rayleigh length
+
+    // Loop over all Ey values on left edge of this node
+    for ( int iz=1; iz<=grid->nz+1; ++iz ) 
+      for ( int iy=1; iy<=grid->ny; ++iy )  
+        field(1,iy,iz).ey += prefactor 
+                             * cos(PHASE) 
+//                           * exp(-R2/(global->width*global->width))  // 3D
+                             * exp(-R2Z/(global->width*global->width)) 
+                             * MASK * pulse_shape_factor; 
+  }
+}
+
+
+begin_particle_injection {
+  // No particle injection for this simulation
+}
+
+
+begin_current_injection {
+  // No current injection for this simulation
+}
+
+begin_particle_collisions {
+  // No particle collisions for this simulation
+}
+
diff --git a/TestScripts/lpi-deck/lpi_2d_F6_test-nx24 b/TestScripts/lpi-deck/lpi_2d_F6_test-nx24
new file mode 100755
index 00000000..7475f131
--- /dev/null
+++ b/TestScripts/lpi-deck/lpi_2d_F6_test-nx24
@@ -0,0 +1,987 @@
+// Stress Tensor(e_hydro)_4/abs(Charge Density(e_hydro)+ 1.0e-8) - (Momentum Density(e_hydro)_Y/abs(Charge Density(e_hydro)+ 1.0e-8)) * (Current Density(e_hydro)_Y/(Charge Density(e_hydro) + 1.0e-8))
+
+//========================================================================
+//
+// LPI 3D deck - Linearly polarized (in y) plane wave incident from left 
+//               boundary 
+//
+// Adapted from Albright's Lightning 3D LPI deck.  
+// B. Albright, X-1-PTA; 28 Jan. 2007
+// Lin Yin, X-1-PTA, 23 Feb 2009, for Cerrillos test 
+// 
+// Executable creates its own directory structure.  Remove the old with: 
+//
+// rm -rf rundata ehydro Hhydro Hehydro restart poynting velocity particle field
+//========================================================================
+
+// Employ turnstiles to partially serialize the high-volume file writes. 
+// In this case, the restart dumps.  Set NUM_TURNSTILES to be the desired
+// number of simultaneous writes. 
+#define NUM_TURNSTILES 128
+
+begin_globals {
+  double e0;                   // peak amplitude of oscillating electric field
+  double omega;                // angular freq. of the beam
+  int field_interval;          // how often to dump field and hydro
+  int particle_interval;       // how often to dump particle data
+  int poynting_interval;       // how often to compute poynting flux on boundary
+  int restart_interval;        // how often to write restart data
+  int quota_check_interval;    // how often to check if quote exceeded
+  double quota_sec;            // run quota in sec
+  int rtoggle;                 // enable save of last 2 restart files for safety
+  int load_particles;          // were particles loaded? 
+  double topology_x;           // domain topology needed to normalize Poynting diagnostic 
+  double topology_y;
+  double topology_z;
+  int mobile_ions;
+  int H_present; 
+  int He_present; 
+
+  // Parameters for 3d Gaussian wave launch
+  double lambda;               
+  double waist;                // how wide the focused beam is
+  double width;                
+  double zcenter;              // center of beam at boundary in z 
+  double ycenter;              // center of beam at boundary in y
+  double xfocus;               // how far from boundary to focus
+  double mask;                 // # gaussian widths from beam center where I is nonzero
+
+  // Ponyting diagnostic flags - which output to turn on
+
+  // write_backscatter_only flag:  when this flag is nonzero, it means to only compute 
+  // poynting data for the lower-x surface.  This flag affects both the summed poynting 
+  // data as well as the surface data. 
+
+  int write_poynting_data;     // Whether to write poynting data to file (or just stdout)
+
+  int write_backscatter_only;  // nonzero means we only write backscatter diagnostic for fields
+
+  // write_poynting_sum is nonzero if you wish to write a file containing the integrated
+  // poynting data on one or more surfaces.  If write_backscatter_only is nonzero, then 
+  // the output file will be a time series of double precision numbers containing the 
+  // integrated poynting flux at that point in time.  If write_backscatter_only is zero,
+  // then for each time step, six double precision numbers are written, containing the 
+  // poynting flux through the lower x, upper x, lower y, upper y, lower z, and upper z
+  // surfaces. 
+
+  int write_poynting_sum;      // nonzero if we wish to write integrated Poynting data
+
+  // write_poynting_faces is probably useless, but I put it here anyway in case it's not. 
+  // When this flag is nonzero, it will print out the poynting flux at each of a 2D array 
+  // of points on the surface.  When this flag is turned on and write_backscatter_only is 
+  // nonzero, then only the 2D array of points on the lower-x boundary surface are written
+  // for each time step.  When this flag is turned on and write_bacscatter_only is 
+  // zero, then it will write 2D array data for the lower x, upper x, lower y, upper y, 
+  // lower z, upper z surfaces for each time step. 
+
+  int write_poynting_faces;    // nonzero if we wish to write Poynting data on sim boundaries 
+
+  // write_eb_faces is nonzero when we wish to get the raw e and b data at the boundary
+  // (e.g., to be used with a filter to distinguish between SRS and SBS backscatter).  
+  // When this flag is on and write_backscatter_only is nonzero, then only the 2D array
+  // of points on the lower-x boundary surface are written for each time step.  When 
+  // this flag is turned on and write_backscatteR_only is zero, then it will write 2D
+  // array data for the lower x, upper x, lower y, upper y, lower z, upper z surfaces for
+  // each time step.  When turned on, four files are produced: e1, e2, cb1, cb2.  The 
+  // values of the quantities printed depend on the face one is considering:  for the 
+  // x faces, e1 = ey, e2 = ez, cb1 = cby, cb2 = cbz.  Similarly for y and z surfaces, 
+  // but where 1 and 2 stand for (z,x) and (x,y) coordinates, respectively.  
+
+  int write_eb_faces;          // nonzero if we wish to write E and B data on sim boundaries
+
+  // Needed for movie files
+  float vthe;                   // vthe/c  
+  float vthi_He;                // vthi_He/c
+  float vthi_H;                 // vthi_H/c
+  int   velocity_interval;      // how frequently to dump binned velocity space data
+
+  // Dump parameters for standard VPIC output formats
+  DumpParameters fdParams;
+  DumpParameters hedParams;
+  DumpParameters hHdParams;
+  DumpParameters hHedParams;
+  std::vector<DumpParameters *> outputParams;
+};
+
+begin_initialization {
+  
+  // System of units
+  double ec         = 4.8032e-10;          // stat coulomb
+  double c_vac      = 2.99792458e10;       // cm/sec
+  double m_e        = 9.1094e-28;          // g
+  double k_b        = 1.6022e-12;          // erg/eV
+  double mec2       = m_e*c_vac*c_vac/k_b; 
+  double mpc2       = mec2*1836.0; 
+
+  double cfl_req    = 0.98;                // How close to Courant should we try to run
+  double damp       = 0;                   // How much radiation damping
+  double iv_thick   = 2;                   // Thickness of impermeable vacuum (in cells)
+
+  // Experimental parameters
+
+  double t_e               = 600;         // electron temperature, eV
+  double t_i               = 150;         // ion temperature, eV
+  double n_e_over_n_crit   = 0.05;         // n_e/n_crit
+  double vacuum_wavelength = 527 * 1e-7;   // third micron light (cm)
+  double laser_intensity   = 2.5e15 * 1e7;   // in ergs/cm^2 (note: 1 W = 1e7 ergs)
+
+  // Simulation parameters 
+
+#if 0
+  // 2048 processors
+  double Lx                = 4*35.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 2*6.0 * 1e-4;              
+  double Lz                = 2*6.0 * 1e-4;                 
+  double nx                = 8192;
+  double ny                = 512;
+  double nz                = 512; 
+  double topology_x        = 64;
+  double topology_y        = 4;
+  double topology_z        = 8;            
+  // single-processor mesh = 128 x 128 x 64
+#endif
+
+#if 0
+  // 14300x4 processors
+  double Lx                = 120.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 12.0 * 1e-4;              
+  double Lz                = 12.0 * 1e-4;                 
+  double nx                = 6292;
+  double ny                = 440;
+  double nz                = 440; 
+  double topology_x        = 143;
+  double topology_y        = 10;
+  double topology_z        = 10;            
+  // single-processor mesh = 44 x 44 x 44
+#endif
+
+#if 0
+  // 2*4096 processors
+  double Lx                = 4*35.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 2*6.0 * 1e-4;              
+  double Lz                = 2*6.0 * 1e-4;                 
+  double nx                = 8192;
+  double ny                = 512;
+  double nz                = 512; 
+  double topology_x        = 128;
+  double topology_y        = 8;
+  double topology_z        = 8;            
+  // single-processor mesh = 64 x 64 x 64
+#endif
+
+  // This choice of nx, ny, nz and topology values imply that the  
+  // grid is 102 x 42 x 42 cells.  The transverse size is slightly smaller than 
+  // in the original deck in order to enable more choices for down-sampling/striding 
+  // the field and hydro output.  (The original had 43 cells in each direction, which
+  // is prime and therefore not strideable with Ben's output format). 
+
+# if 0
+  // one processor problem, for debugging
+  Lx /= topology_x;  nx /= topology_x;
+  Ly /= topology_y;  ny /= topology_y;   
+  Lz /= topology_z;  nz /= topology_z; 
+
+  topology_x = 1; 
+  topology_y = 1;  
+  topology_z = 1; 
+# endif  
+
+# if 0
+  // twoprocessor problem, for debugging
+  Lx /= (topology_x/2);  nx /= (topology_x/2);
+  Ly /= topology_y;  ny /= topology_y;   
+  Lz /= topology_z;  nz /= topology_z; 
+
+  topology_x = 2; 
+  topology_y = 1;  
+  topology_z = 1; 
+# endif  
+
+// Run a smaller problem for debugging purposes
+#if 0
+  double Lx                = 120.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 1.20 * 1e-4;              
+  double Lz                = 1.20 * 1e-4;                 
+  double nx                = 6292;
+  double ny                = 44;
+  double nz                = 44; 
+  double topology_x        = 143;
+  double topology_y        = 1;
+  double topology_z        = 1;            
+
+  // nproc() x 1 x 1 : 
+  double topology          = nproc(); 
+  Lx         /= 143; Lx         *= topology; 
+  nx         /= 143; nx         *= topology;
+  topology_x /= 143; topology_x *= topology; 
+
+  // single-processor mesh = 44 x 44 x 44
+#endif
+
+  float box_size_x =  120.0* 1e-4;
+  float box_size_z =  12.0* 1e-4;
+  
+  int mobile_ions           = 1;      // Whether or not to push ions
+  double nppc               = 512;    // Ave. number of particles/cell in ea. species
+  //double nppc               = 32;    // Ave. number of particles/cell in ea. species
+  int load_particles        = 1;      // Flag to turn on/off particle load 
+  int He_present            = 1;  
+  int H_present             = 1;  
+  double f_He               = 1;    // Ratio of number density of He to total ion density
+  double f_H                = 1-f_He; // Ratio of number density of H  to total ion density
+  if ( f_He == 1 ) H_present = 0; 
+  if ( f_H == 1  ) He_present = 0; 
+
+// Here _He is N3+
+  // Precompute some useful variables. 
+  double A_H                = 1;
+  double Z_H                = 1;
+  double A_He               = 14;
+  double Z_He               = 3; 
+  double mic2_H             = mpc2*A_H;
+  double mic2_He            = mpc2*A_He;
+  double mime_H             = mic2_H /mec2; 
+  double mime_He            = mic2_He/mec2; 
+                            
+  double uthe               = sqrt(t_e/mec2);     // vthe/c
+  double uthi_H             = sqrt(t_i/mic2_H);   // vthi/c for H
+  double uthi_He            = sqrt(t_i/mic2_He);  // vthi/c for He
+
+  // Plasma skin deptth in cm
+  double delta = (vacuum_wavelength / (2*M_PI) ) / sqrt( n_e_over_n_crit ); 
+
+  double n_e   = c_vac*c_vac*m_e/(4*M_PI*ec*ec*delta*delta); // electron density in cm^-3
+  double debye = uthe*delta;                      // electron Debye length (cm)
+  double omega = sqrt( 1/n_e_over_n_crit );       // laser beam freq. in wpe
+
+  double nx                = 7776;
+  double ny                = 1;
+  double nz                = 549; 
+  
+// Weak Scaling Test --matsekh
+#if 1
+  // nx = 7776 / {9, 27, 81, 162, 324, 648} == {864, 288, 96, 48, 24, 12}  
+  nx = nx/324; 
+  box_size_x /= 324.;
+#endif
+// End Weak Scaling Test --matsekh
+
+#if 0
+  // DEBUG - run on 16 proc.  
+  nx = nx/81; // default nx=7776/81==96 --matsekh
+  box_size_x /= 81.;  
+
+#endif
+
+  double hx = box_size_x/(delta*nx);  // in c/wpe
+  double hz = box_size_z/(delta*nz);  // in c/wpe
+  double hy = hz; 
+
+  double cell_size_x       = hx*delta/debye;      // Cell size in Debye lengths
+  double cell_size_z       = hz*delta/debye;
+//double cell_size_y       = hy*delta/debye;
+
+  double Lx                = nx*hx;   // in c/wpe
+  double Ly                = ny*hy;   // in c/wpe
+  double Lz                = nz*hz;   // in c/wpe
+
+  double topology_x        = nproc();
+  double topology_y        = 1;
+  double topology_z        = 1;            
+
+  double f_number          = 6;                   // f/# of beam
+  double lambda            = vacuum_wavelength/delta; // vacuum wavelength in c/wpe
+  double waist             = f_number*lambda;     // width of beam at focus in c/wpe
+  double xfocus            = Lx/2;                // in c/wpe
+  double ycenter           = 0;                   // center of spot in y on lhs boundary
+  double zcenter           = 0;                   // center of spot in z on lhs boundary
+  double mask              = 1.5;                 // set drive I=0 outside r>mask*width at lhs boundary
+  double width = waist*sqrt( 1 + (lambda*xfocus/(M_PI*waist*waist))*(lambda*xfocus/(M_PI*waist*waist))); 
+
+  // Peak instantaneous E field in "natural units" 
+  double e0    = sqrt( 2*laser_intensity / (m_e*c_vac*c_vac*c_vac*n_e) );  
+//e0                       = e0*(waist/width);    // at entrance (3D Gaussian) 
+  e0 = e0*sqrt(waist/width); // 2D, at entrance
+
+  double dt                = cfl_req*courant_length(Lx,Ly,Lz,nx,ny,nz); // in 1/wpe; n.b. c=1 in nat. units
+  double nsteps_cycle      = trunc_granular(2*M_PI/(dt*omega),1)+1; 
+  dt                       = 2*M_PI/omega/nsteps_cycle; // nsteps_cycle time steps in one laser cycle
+
+  double wpe1ps = 1e-12*c_vac/delta;
+
+  double t_stop            = 100*wpe1ps;               // Runtime in 1/wpe
+  int particle_interval    = 0; 
+  int poynting_interval    = int(M_PI/((omega+1.5)*dt));                  // Num. steps between dumping poynting flux
+  int field_interval       = int(0.01*wpe1ps/dt);         // Num. steps between saving field, hydro data
+  int velocity_interval    = int(0.1*wpe1ps/dt);        // How frequently to dump binned velocity space data
+  int restart_interval     = 10000000;       // Num. steps between restart dumps
+  int quota_check_interval = 20;
+
+  // Ben:  This quota thing gracefully terminates after writing a final restart after 
+  // 11.5 hours; if you want a longer time before shutdown, set this value larger.  If 
+  // you want the code to just run all weekend, then set it to very long (2400.*3500, e.g.) 
+
+  double quota_sec         = 23.7*3600;           // Run quota in sec. 
+
+  // Turn on integrated backscatter poynting diagnostic - right now there is a bug in this, so we 
+  // only write the integrated backscatter time history on the left face. 
+
+  int write_poynting_data = 1;                    // Whether to write poynting data to file (or just stdout)
+
+  int write_backscatter_only = 0;                 // Nonzero means only write lower x face
+  int write_poynting_sum   = 0;                   // Whether to write integrated Poynting data 
+  int write_poynting_faces = 0;                   // Whether to write poynting data on sim boundary faces
+  int write_eb_faces       = 0;                   // Whether to write e and b field data on sim boundary faces
+
+  double N_e               = nppc*nx*ny*nz;       // Number of macro electrons in box
+  double Np_e              = Lx*Ly*Lz;            // "Number" of "physical" electrons in box (nat. units)
+  double q_e               = -Np_e/N_e;           // Charge per macro electron
+  double N_i               = N_e;                 // Number of macro ions of each species in box 
+  double Np_i              = Np_e/(Z_H*f_H+Z_He*f_He); // "Number" of "physical" ions of each sp. in box
+  double qi_H              = Z_H *f_H *Np_i/N_i;  // Charge per H  macro ion
+  double qi_He             = Z_He*f_He*Np_i/N_i;  // Charge per He macro ion 
+  
+  // Print simulation parameters
+
+  sim_log("***** Simulation parameters *****");
+  sim_log("* Processors:                     "<<nproc());
+  sim_log("* Topology:                       "<<topology_x<<" "<<topology_y<<" "<<topology_z); 
+  sim_log("* nsteps_cycle =                 "<<nsteps_cycle);
+  sim_log("* Time step, max time, nsteps:    "<<dt<<" "<<t_stop<<" "<<int(t_stop/(dt))); 
+  sim_log("* Debye length, XYZ cell sizes:   "<<debye<<" "<<cell_size_x<<" "<<cell_size_z);
+  sim_log("* Real cell sizes (in Debyes):    "<<hx/uthe<<" "<<hy/uthe<<" "<<hz/uthe);
+  sim_log("* Lx, Ly, Lz =                    "<<Lx<<" "<<Ly<<" "<<Lz);
+  sim_log("* nx, ny, nz =                    "<<nx<<" "<<ny<<" "<<nz);
+  sim_log("* Charge/macro electron =         "<<q_e);
+  sim_log("* Average particles/processor:    "<<N_e/nproc());
+  sim_log("* Average particles/cell:         "<<nppc);
+  sim_log("* Omega_0, Omega_pe:              "<<omega<<" "<<1);
+  sim_log("* Plasma density, ne/nc:          "<<n_e<<" "<<n_e_over_n_crit);
+  sim_log("* Vac wavelength (nm):            "<<vacuum_wavelength*1e7);
+  sim_log("* I_laser (W/cm^2):               "<<laser_intensity/1e7);
+  sim_log("* T_e, T_i (eV)                   "<<t_e<<" "<<t_i);
+  sim_log("* m_e, m_H, m_He                  "<<"1 "<<mime_H<<" "<<mime_He);
+  sim_log("* Radiation damping:              "<<damp);
+  sim_log("* Fraction of courant limit:      "<<cfl_req);
+  sim_log("* vthe/c:                         "<<uthe);
+  sim_log("* vthi_H /c:                      "<<uthi_H);
+  sim_log("* vthe_He/c:                      "<<uthi_He);
+  sim_log("* emax at entrance:               "<<e0);
+  sim_log("* emax at waist:                  "<<e0/(waist/width));
+  sim_log("* Poynting interval:              "<<poynting_interval); 
+  sim_log("* field interval:                 "<<field_interval); 
+  sim_log("* restart interval:               "<<restart_interval); 
+  sim_log("* num vacuum edge grids:          "<<iv_thick);
+  sim_log("* width, waist, xfocus:           "<<width<<" "<<waist<<" "<<xfocus); 
+  sim_log("* ycenter, zcenter, mask:         "<<ycenter<<" "<<zcenter<<" "<<mask); 
+  sim_log("* field_interval                  "<<field_interval); 
+  sim_log("* velocity_interval               "<<velocity_interval); 
+  sim_log("* restart_interval:               "<<restart_interval); 
+  sim_log("* quota_check_interval:           "<<quota_check_interval); 
+  sim_log("* write_poynting_sum:             "<<(write_poynting_sum ? "Yes" : "No")); 
+  sim_log("* write_poynting_faces:           "<<(write_poynting_faces? "Yes" : "No")); 
+  sim_log("* write_eb_faces:                 "<<(write_eb_faces ? "Yes" : "No")); 
+  sim_log("* write_backscatter_only:         "<<(write_backscatter_only ? "Yes" : "No")); 
+  sim_log("*********************************");
+
+  // Set up high level simulation parameters
+
+  sim_log("Setting up high-level simulation parameters.");
+  num_step             = int(t_stop/(dt));
+  num_step             = 20000;
+  sim_log("num_step: " << num_step); 
+
+  status_interval      = 200; 
+  sync_shared_interval = status_interval/1;
+  clean_div_e_interval = status_interval/1;
+  clean_div_b_interval = status_interval/10; 
+  
+  // Turn off some of the spam
+  verbose = 1; 
+
+  // For maxwellian reinjectoin, we need more than the default number of 
+  // passes (3) through the boundary handler
+  // Note:  We have to adjust sort intervals for maximum performance on Cell. 
+  // Note: On 1 PE fails after 2094 steps. Increasing num_comm_round to 10
+  // allows it to run > 25,000 steps.
+  num_comm_round = 6;
+
+  global->e0                   = e0; 
+  global->omega                = omega; 
+  global->field_interval       = field_interval; 
+  global->particle_interval    = particle_interval;
+  global->poynting_interval    = poynting_interval;
+  global->restart_interval     = restart_interval;
+  global->quota_check_interval = quota_check_interval;
+  global->quota_sec            = quota_sec;
+  global->rtoggle              = 0;
+  global->load_particles       = load_particles;
+  global->mobile_ions          = mobile_ions; 
+  global->H_present            = H_present; 
+  global->He_present           = He_present; 
+  global->topology_x           = topology_x;  
+  global->topology_y           = topology_y;  
+  global->topology_z           = topology_z;  
+  global->xfocus               = xfocus;  
+  global->ycenter              = ycenter; 
+  global->zcenter              = zcenter; 
+  global->mask                 = mask; 
+  global->waist                = waist; 
+  global->width                = width; 
+  global->lambda               = lambda; 
+
+  global->write_poynting_data  = write_poynting_data;
+
+  global->write_poynting_sum   = write_poynting_sum; 
+  global->write_poynting_faces = write_poynting_faces; 
+  global->write_eb_faces       = write_eb_faces;      
+  global->write_backscatter_only = write_backscatter_only; 
+
+  global->vthe                 = uthe; 
+  global->vthi_H               = uthi_H; 
+  global->vthi_He              = uthi_He; 
+  global->velocity_interval    = velocity_interval; 
+
+  // Set up the species
+  // Allow additional local particles in case of non-uniformity.
+
+  // Set up grid
+  sim_log("Setting up computational grid."); 
+  grid->dx       = hx; 
+  grid->dy       = hy; 
+  grid->dz       = hz; 
+  grid->dt       = dt; 
+  grid->cvac     = 1;
+  grid->eps0     = 1; 
+
+  sim_log("Setting up absorbing mesh."); 
+  define_periodic_grid( 0,          -0.5*Ly,    -0.5*Lz,        // Low corner
+                         Lx,         0.5*Ly,     0.5*Lz,        // High corner 
+                         nx,         ny,         nz,            // Resolution
+                         topology_x, topology_y, topology_z);    // Topology
+
+  if ( rank() == 0) {
+    set_domain_field_bc( BOUNDARY(-1,0,0), absorb_fields );
+  }
+  if ( rank() == nproc() -1) {
+    set_domain_field_bc( BOUNDARY( 1,0,0), absorb_fields );
+  }
+
+    set_domain_field_bc( BOUNDARY( 0,0, 1), absorb_fields );
+    set_domain_field_bc( BOUNDARY( 0,0,-1), absorb_fields );
+
+  sim_log("Setting up species."); 
+  double max_local_np = 2.0*N_e/nproc(); 
+  double max_local_nm = max_local_np/10.0;
+  species_t * electron = define_species("electron", -1, 1, max_local_np, max_local_nm, 20, 1); 
+
+  // Start with two ion species.  We have option to go to Xe and Kr gas fills if 
+  // we need a higher ion/electron macroparticle ratio.  
+
+  species_t *ion_H, *ion_He; 
+  if ( mobile_ions ) {
+    if ( H_present  ) ion_H  = define_species("H",  Z_H,  mime_H,  max_local_np, max_local_nm, 100, 1); 
+    if ( He_present ) ion_He = define_species("He", Z_He, mime_He, max_local_np, max_local_nm, 100, 1); 
+  }
+
+  // From grid/partition.c: used to determine which domains are on edge
+# define RANK_TO_INDEX(rank,ix,iy,iz) BEGIN_PRIMITIVE {                   \
+    int _ix, _iy, _iz;                                                    \
+    _ix  = (rank);                        /* ix = ix+gpx*( iy+gpy*iz ) */ \
+    _iy  = _ix/int(global->topology_x);   /* iy = iy+gpy*iz */            \
+    _ix -= _iy*int(global->topology_x);   /* ix = ix */                   \
+    _iz  = _iy/int(global->topology_y);   /* iz = iz */                   \
+    _iy -= _iz*int(global->topology_y);   /* iy = iy */                   \
+    (ix) = _ix;                                                           \
+    (iy) = _iy;                                                           \
+    (iz) = _iz;                                                           \
+  } END_PRIMITIVE 
+
+  sim_log("Overriding x boundaries to absorb fields."); 
+  int ix, iy, iz;        // Domain location in mesh
+  RANK_TO_INDEX( int(rank()), ix, iy, iz ); 
+
+  // Set up Maxwellian reinjection B.C. 
+  sim_log("Setting up Maxwellian reinjection boundary condition."); 
+
+  particle_bc_t * maxwellian_reinjection = 
+    define_particle_bc( maxwellian_reflux( species_list, entropy ) ); 
+  set_reflux_temp( maxwellian_reinjection, electron, uthe, uthe );
+  if ( mobile_ions ) { 
+    if ( H_present  ) set_reflux_temp( maxwellian_reinjection, ion_H,  uthi_H,  uthi_H  ); 
+    if ( He_present ) set_reflux_temp( maxwellian_reinjection, ion_He, uthi_He, uthi_He ); 
+  }
+
+  // Set up materials
+  sim_log("Setting up materials."); 
+  define_material( "vacuum", 1 );
+  define_field_array( NULL, damp ); 
+ 
+  // Paint the simulation volume with materials and boundary conditions
+# define iv_region (   x<      hx*iv_thick || x>Lx  -hx*iv_thick  \
+                    || z<-Lz/2+hz*iv_thick || z>Lz/2-hz*iv_thick ) /* all boundaries are i.v. */ 
+  set_region_bc( iv_region, maxwellian_reinjection, maxwellian_reinjection, maxwellian_reinjection ); 
+
+  // Load particles 
+  if ( load_particles ) {
+    sim_log("Loading particles.");
+    // Fast load of particles--don't bother fixing artificial domain correlations
+    double xmin=grid->x0, xmax=grid->x1;
+    double ymin=grid->y0, ymax=grid->y1;
+    double zmin=grid->z0, zmax=grid->z1;
+    repeat( N_e/(topology_x*topology_y*topology_z) ) {
+      double x = uniform( rng(0), xmin, xmax );
+      double y = uniform( rng(0), ymin, ymax );
+      double z = uniform( rng(0), zmin, zmax );
+      if ( iv_region ) continue;           // Particle fell in iv_region.  Don't load.
+      // third to last arg is "weight," a positive number
+      inject_particle( electron, x, y, z,
+                       normal( rng(0), 0, uthe),
+                       normal( rng(0), 0, uthe),
+                       normal( rng(0), 0, uthe), -q_e, 0, 0 );
+      if ( mobile_ions ) {
+        if ( H_present )  // Inject an H macroion on top of macroelectron
+          inject_particle( ion_H, x, y, z, 
+                           normal( rng(0), 0, uthi_H), 
+                           normal( rng(0), 0, uthi_H), 
+                           normal( rng(0), 0, uthi_H), qi_H, 0, 0 ); 
+        if ( He_present ) // Inject an H macroion on top of macroelectron
+          inject_particle( ion_He, x, y, z, 
+                           normal( rng(0), 0, uthi_He), 
+                           normal( rng(0), 0, uthi_He), 
+                           normal( rng(0), 0, uthi_He), qi_He, 0, 0 ); 
+      }
+    }
+  }
+
+
+ /*--------------------------------------------------------------------------
+  * New dump definition
+  *------------------------------------------------------------------------*/
+
+ /*--------------------------------------------------------------------------
+  * Set data output format
+  * 
+  * This option allows the user to specify the data format for an output
+  * dump.  Legal settings are 'band' and 'band_interleave'.  Band-interleave
+  * format is the native storage format for data in VPIC.  For field data,
+  * this looks something like:
+  * 
+  *   ex0 ey0 ez0 div_e_err0 cbx0 ... ex1 ey1 ez1 div_e_err1 cbx1 ...
+  *   
+  * Banded data format stores all data of a particular state variable as a
+  * contiguous array, and is easier for ParaView to process efficiently. 
+  * Banded data looks like:
+  * 
+  *   ex0 ex1 ex2 ... exN ey0 ey1 ey2 ...
+  *   
+  *------------------------------------------------------------------------*/
+  sim_log( "Setting up hydro and field diagnostics." );
+
+  global->fdParams.format = band;
+  sim_log ( "Field output format          : band" );
+
+  global->hedParams.format = band;
+  sim_log ( "Electron hydro output format : band" );
+
+  global->hHdParams.format = band;
+  sim_log ( "Hydrogen hydro output format : band" );
+
+  global->hHedParams.format = band;
+  sim_log ( "Helium hydro output format   : band" );
+
+ /*--------------------------------------------------------------------------
+  * Set stride
+  * 
+  * This option allows data down-sampling at output.  Data are down-sampled
+  * in each dimension by the stride specified for that dimension.  For
+  * example, to down-sample the x-dimension of the field data by a factor
+  * of 2, i.e., half as many data will be output, select:
+  * 
+  *   global->fdParams.stride_x = 2;
+  *
+  * The following 2-D example shows down-sampling of a 7x7 grid (nx = 7,
+  * ny = 7.  With ghost-cell padding the actual extents of the grid are 9x9.
+  * Setting the strides in x and y to equal 2 results in an output grid of
+  * nx = 4, ny = 4, with actual extents 6x6.
+  *
+  * G G G G G G G G G
+  * G X X X X X X X G
+  * G X X X X X X X G         G G G G G G
+  * G X X X X X X X G         G X X X X G
+  * G X X X X X X X G   ==>   G X X X X G
+  * G X X X X X X X G         G X X X X G
+  * G X X X X X X X G         G X X X X G
+  * G X X X X X X X G         G G G G G G
+  * G G G G G G G G G
+  *
+  * Note that grid extents in each dimension must be evenly divisible by
+  * the stride for that dimension:
+  *
+  *   nx = 150;
+  *   global->fdParams.stride_x = 10; // legal -> 150/10 = 15
+  *
+  *   global->fdParams.stride_x = 8; // illegal!!! -> 150/8 = 18.75
+  *------------------------------------------------------------------------*/
+
+  // Strides for field and hydro arrays.  Note that here we have defined them 
+  // the same for fields and all hydro species; if desired, we could use different
+  // strides for each.   Also note that strides must divide evenly into the number 
+  // of cells in a given domain. 
+
+  // Define strides and test that they evenly divide into grid->nx, ny, nz
+  int stride_x = 1, stride_y = 1, stride_z = 1; 
+  if ( int(grid->nx)%stride_x ) ERROR(("Stride doesn't evenly divide grid->nx.")); 
+  if ( int(grid->ny)%stride_y ) ERROR(("Stride doesn't evenly divide grid->ny.")); 
+  if ( int(grid->nz)%stride_z ) ERROR(("Stride doesn't evenly divide grid->nz.")); 
+
+  //----------------------------------------------------------------------
+  // Fields
+
+  // relative path to fields data from global header
+  sprintf(global->fdParams.baseDir, "field");
+
+  // base file name for fields output
+  sprintf(global->fdParams.baseFileName, "fields");
+
+  // set field strides
+  global->fdParams.stride_x = stride_x;
+  global->fdParams.stride_y = stride_y;
+  global->fdParams.stride_z = stride_z;
+  sim_log ( "Fields x-stride " << global->fdParams.stride_x );
+  sim_log ( "Fields y-stride " << global->fdParams.stride_y );
+  sim_log ( "Fields z-stride " << global->fdParams.stride_z );
+
+  // add field parameters to list
+  global->outputParams.push_back(&global->fdParams);
+
+  //----------------------------------------------------------------------
+  // Electron hydro
+
+  // relative path to electron species data from global header
+  sprintf(global->hedParams.baseDir, "ehydro");
+
+  // base file name for fields output
+  sprintf(global->hedParams.baseFileName, "e_hydro");
+
+  // set electron hydro strides
+  global->hedParams.stride_x = stride_x;
+  global->hedParams.stride_y = stride_y;
+  global->hedParams.stride_z = stride_z;
+  sim_log ( "Electron species x-stride " << global->hedParams.stride_x );
+  sim_log ( "Electron species y-stride " << global->hedParams.stride_y );
+  sim_log ( "Electron species z-stride " << global->hedParams.stride_z );
+
+  // add electron hydro parameters to list
+  global->outputParams.push_back(&global->hedParams);
+
+  //----------------------------------------------------------------------
+  // Hydrogen hydro
+
+  // relative path to electron species data from global header
+  sprintf(global->hHdParams.baseDir, "Hhydro");
+
+  // base file name for fields output
+  sprintf(global->hHdParams.baseFileName, "H_hydro");
+
+  // set hydrogen hydro strides
+  global->hHdParams.stride_x = stride_x;
+  global->hHdParams.stride_y = stride_y;
+  global->hHdParams.stride_z = stride_z;
+  sim_log ( "Ion species x-stride " << global->hHdParams.stride_x );
+  sim_log ( "Ion species y-stride " << global->hHdParams.stride_y );
+  sim_log ( "Ion species z-stride " << global->hHdParams.stride_z );
+
+  // add hydrogen hydro parameters to list
+  global->outputParams.push_back(&global->hHdParams);
+
+  //----------------------------------------------------------------------
+  // Helium hydro
+
+  // relative path to electron species data from global header
+  sprintf(global->hHedParams.baseDir, "Hehydro");
+
+  // base file name for fields output
+  sprintf(global->hHedParams.baseFileName, "He_hydro");
+
+  // set helium hydro strides
+  global->hHedParams.stride_x = stride_x;
+  global->hHedParams.stride_y = stride_y;
+  global->hHedParams.stride_z = stride_z;
+  sim_log ( "Ion species x-stride " << global->hHedParams.stride_x );
+  sim_log ( "Ion species y-stride " << global->hHedParams.stride_y );
+  sim_log ( "Ion species z-stride " << global->hHedParams.stride_z );
+
+  // add helium hydro parameters to list
+  global->outputParams.push_back(&global->hHedParams);
+
+ /*-----------------------------------------------------------------------
+  * Set output fields
+  *
+  * It is now possible to select which state-variables are output on a
+  * per-dump basis.  Variables are selected by passing an or-list of
+  * state-variables by name.  For example, to only output the x-component
+  * of the electric field and the y-component of the magnetic field, the
+  * user would call output_variables like:
+  *
+  *   global->fdParams.output_variables( ex | cby );
+  *
+  * NOTE: OUTPUT VARIABLES ARE ONLY USED FOR THE BANDED FORMAT.  IF THE
+  * FORMAT IS BAND-INTERLEAVE, ALL VARIABLES ARE OUTPUT AND CALLS TO
+  * 'output_variables' WILL HAVE NO EFFECT.
+  *
+  * ALSO: DEFAULT OUTPUT IS NONE!  THIS IS DUE TO THE WAY THAT VPIC
+  * HANDLES GLOBAL VARIABLES IN THE INPUT DECK AND IS UNAVOIDABLE.
+  *
+  * For convenience, the output variable 'all' is defined:
+  *
+  *   global->fdParams.output_variables( all );
+  *------------------------------------------------------------------------*/
+ /* CUT AND PASTE AS A STARTING POINT
+  * REMEMBER TO ADD APPROPRIATE GLOBAL DUMPPARAMETERS VARIABLE
+
+   output_variables( all );
+
+   output_variables( electric | div_e_err | magnetic | div_b_err |
+                     tca      | rhob      | current  | rhof |
+                     emat     | nmat      | fmat     | cmat );
+
+   output_variables( current_density  | charge_density |
+                     momentum_density | ke_density     | stress_tensor );
+  */
+
+  //global->fdParams.output_variables( all );
+  global->fdParams.output_variables( electric | magnetic );
+
+  //global->hedParams.output_variables( all );
+  //global->hedParams.output_variables( current_density | momentum_density );
+  global->hedParams.output_variables(  current_density  | charge_density |
+                                       momentum_density | ke_density |
+                                       stress_tensor );
+  global->hHdParams.output_variables(  current_density  | charge_density |
+                                       momentum_density | ke_density |
+                                       stress_tensor );
+  global->hHedParams.output_variables( current_density  | charge_density |
+                                       momentum_density | ke_density |
+                                       stress_tensor );
+
+ /*--------------------------------------------------------------------------
+  * Convenience functions for simlog output
+  *------------------------------------------------------------------------*/
+  char varlist[256];
+
+  create_field_list(varlist, global->fdParams);
+  sim_log ( "Fields variable list: " << varlist );
+
+  create_hydro_list(varlist, global->hedParams);
+  sim_log ( "Electron species variable list: " << varlist );
+
+  create_hydro_list(varlist, global->hHdParams);
+  sim_log ( "Ion species variable list: " << varlist );
+
+ /*------------------------------------------------------------------------*/
+
+  sim_log("***Finished with user-specified initialization ***"); 
+
+  // Upon completion of the initialization, the following occurs:
+  // - The synchronization error (tang E, norm B) is computed between domains
+  //   and tang E / norm B are synchronized by averaging where discrepancies
+  //   are encountered.
+  // - The initial divergence error of the magnetic field is computed and
+  //   one pass of cleaning is done (for good measure)
+  // - The bound charge density necessary to give the simulation an initially
+  //   clean divergence e is computed.
+  // - The particle momentum is uncentered from u_0 to u_{-1/2}
+  // - The user diagnostics are called on the initial state
+  // - The physics loop is started
+  //
+  // The physics loop consists of:
+  // - Advance particles from x_0,u_{-1/2} to x_1,u_{1/2}
+  // - User particle injection at x_{1-age}, u_{1/2} (use inject_particles)
+  // - User current injection (adjust field(x,y,z).jfx, jfy, jfz)
+  // - Advance B from B_0 to B_{1/2}
+  // - Advance E from E_0 to E_1
+  // - User field injection to E_1 (adjust field(x,y,z).ex,ey,ez,cbx,cby,cbz)
+  // - Advance B from B_{1/2} to B_1
+  // - (periodically) Divergence clean electric field
+  // - (periodically) Divergence clean magnetic field
+  // - (periodically) Synchronize shared tang e and norm b
+  // - Increment the time step
+  // - Call user diagnostics
+  // - (periodically) Print a status message
+} 
+
+
+begin_diagnostics {
+
+  int mobile_ions=global->mobile_ions,
+      H_present=global->H_present,
+      He_present=global->He_present;
+
+  if ( step()%200==0 ) sim_log("Time step: "<<step());
+
+# define should_dump(x) \
+  (global->x##_interval>0 && remainder(step(),global->x##_interval)==0)
+
+  // Do a mkdir at time t=0 to ensure we have all the directories we need
+  if ( step()==0 ) {
+    dump_mkdir("rundata"); 
+    dump_mkdir("fft"); 
+    dump_mkdir("field"); 
+    dump_mkdir("ehydro"); 
+    dump_mkdir("Hhydro"); 
+    dump_mkdir("Hehydro"); 
+    dump_mkdir("restart"); 
+    dump_mkdir("particle"); 
+    dump_mkdir("poynting"); 
+    dump_mkdir("velocity"); 
+
+    // Turn off rundata for now
+    // dump_grid("rundata/grid");
+    // dump_materials("rundata/materials");
+    // dump_species("rundata/species");
+    global_header("global", global->outputParams);
+
+  }
+
+  // Field and hydro data
+
+  if ( should_dump(field) ) {
+    field_dump( global->fdParams ); 
+
+#if 1  
+    if ( global->load_particles ) {
+      hydro_dump( "electron", global->hedParams );
+      if ( global->mobile_ions ) {
+        if ( global->H_present  ) hydro_dump( "H",  global->hHdParams );
+        if ( global->He_present ) hydro_dump( "He", global->hHedParams );
+      }
+    }
+#endif
+
+  }
+
+  // Particle dump data
+#if 0  
+  if ( should_dump(particle) && global->load_particles ) {
+    dump_particles( "electron", "particle/eparticle" );
+    if ( global->mobile_ions ) {
+      if ( global->H_present  ) dump_particles( "H",  "particle/Hparticle" );
+      if ( global->He_present ) dump_particles( "He", "particle/Heparticle" );
+    }
+  } 
+#endif
+
+  
+#define ALLOCATE(A,LEN,TYPE)                                             \
+  if ( !((A)=(TYPE *)malloc((size_t)(LEN)*sizeof(TYPE))) ) ERROR(("Cannot allocate."));
+
+  //--------------------------------------------------------------------------- 
+
+  // Restart dump filenames are by default tagged with the current timestep.
+  // If you do not want to do this add "0" to the dump_restart arguments. One
+  // reason to not tag the dumps is if you want only one restart dump (the most
+  // recent one) to be stored at a time to conserve on file space. Note: A
+  // restart dump should be the _very_ _last_ dump made in a diagnostics
+  // routine. If not, the diagnostics performed after the restart dump but
+  // before the next timestep will be missed on a restart. Restart dumps are
+  // in a binary format. Each rank makes a restart dump.
+
+  // Restarting from restart files is accomplished by running the executable 
+  // with "restart restart" as additional command line arguments.  The executable
+  // must be identical to that used to generate the restart dumps or else 
+  // the behavior may be unpredictable. 
+
+  // Note:  restart is not currently working with custom boundary conditions
+  // (such as the reflux condition) and has not been tested with emission 
+  // turned on.  
+  
+  if ( step()>0 && should_dump(restart) ) {
+    static const char * restart_fbase[2] = { "restart/restart0", "restart/restart1" };
+
+    //BEGIN_TURNSTILE(NUM_TURNSTILES); 
+      checkpt( restart_fbase[global->rtoggle], step() );
+    //END_TURNSTILE;
+    global->rtoggle^=1;
+  }
+
+  // Shut down simulation when wall clock time exceeds global->quota_sec. 
+  // Note that the mp_elapsed() is guaranteed to return the same value for all
+  // processors (i.e., elapsed time on proc #0), and therefore the abort will 
+  // be synchronized across processors. 
+
+  if ( step()>0 && global->quota_check_interval && (step()%global->quota_check_interval)==0 ) {
+    if ( uptime() > global->quota_sec ) {
+
+      //BEGIN_TURNSTILE(NUM_TURNSTILES); 
+        checkpt( "restart/restart", step() );
+      //END_TURNSTILE;
+
+      sim_log( "Restart dump restart completed." );
+      sim_log( "Allowed runtime exceeded for this job.  Terminating." );
+      mp_barrier(); // Just to be safe
+      halt_mp();
+      exit(0);
+    }
+  }
+
+} 
+
+
+begin_field_injection { 
+  // Inject a light wave from lhs boundary with E aligned along y
+  // Use scalar diffraction theory for the Gaussian beam source.  (This is approximate). 
+
+  // For quiet startup (i.e., so that we don't propagate a delta-function noise
+  // pulse at time t=0) we multiply by a constant phase term exp(i phi) where: 
+  //   phi = k*global->xfocus+atan(h)    (3d) 
+
+  // Inject from the left a field of the form ey = e0 sin( omega t )
+
+# define DY    ( grid->y0 + (iy-0.5)*grid->dy - global->ycenter )
+# define DZ    ( grid->z0 + (iz-1  )*grid->dz - global->zcenter )
+# define R2    ( DY*DY + DZ*DZ )                                   
+# define R2Z    ( DZ*DZ )                                   
+# define PHASE ( -global->omega*t + h*R2Z/(global->width*global->width) )
+# define MASK  ( R2Z<=pow(global->mask*global->width,2) ? 1 : 0 )
+
+  if ( grid->x0==0 ) {               // Node is on left boundary
+    double alpha      = grid->cvac*grid->dt/grid->dx;
+    double emax_coeff = (4/(1+alpha))*global->omega*grid->dt*global->e0;
+    double prefactor  = emax_coeff*sqrt(2/M_PI); 
+    double t          = grid->dt*step(); 
+
+    // Compute Rayleigh length in c/wpe
+    double rl         = M_PI*global->waist*global->waist/global->lambda; 
+
+    double pulse_shape_factor = 1;
+    float pulse_length        = 70;  // units of 1/wpe
+    float sin_t_tau           = sin(0.5*t*M_PI/pulse_length);
+    pulse_shape_factor        = ( t<pulse_length ? sin_t_tau : 1 );
+    double h                  = global->xfocus/rl;   // Distance / Rayleigh length
+
+    // Loop over all Ey values on left edge of this node
+    for ( int iz=1; iz<=grid->nz+1; ++iz ) 
+      for ( int iy=1; iy<=grid->ny; ++iy )  
+        field(1,iy,iz).ey += prefactor 
+                             * cos(PHASE) 
+//                           * exp(-R2/(global->width*global->width))  // 3D
+                             * exp(-R2Z/(global->width*global->width)) 
+                             * MASK * pulse_shape_factor; 
+  }
+}
+
+
+begin_particle_injection {
+  // No particle injection for this simulation
+}
+
+
+begin_current_injection {
+  // No current injection for this simulation
+}
+
+begin_particle_collisions {
+  // No particle collisions for this simulation
+}
+
diff --git a/TestScripts/lpi-deck/lpi_2d_F6_test-nx288 b/TestScripts/lpi-deck/lpi_2d_F6_test-nx288
new file mode 100755
index 00000000..ef919d91
--- /dev/null
+++ b/TestScripts/lpi-deck/lpi_2d_F6_test-nx288
@@ -0,0 +1,987 @@
+// Stress Tensor(e_hydro)_4/abs(Charge Density(e_hydro)+ 1.0e-8) - (Momentum Density(e_hydro)_Y/abs(Charge Density(e_hydro)+ 1.0e-8)) * (Current Density(e_hydro)_Y/(Charge Density(e_hydro) + 1.0e-8))
+
+//========================================================================
+//
+// LPI 3D deck - Linearly polarized (in y) plane wave incident from left 
+//               boundary 
+//
+// Adapted from Albright's Lightning 3D LPI deck.  
+// B. Albright, X-1-PTA; 28 Jan. 2007
+// Lin Yin, X-1-PTA, 23 Feb 2009, for Cerrillos test 
+// 
+// Executable creates its own directory structure.  Remove the old with: 
+//
+// rm -rf rundata ehydro Hhydro Hehydro restart poynting velocity particle field
+//========================================================================
+
+// Employ turnstiles to partially serialize the high-volume file writes. 
+// In this case, the restart dumps.  Set NUM_TURNSTILES to be the desired
+// number of simultaneous writes. 
+#define NUM_TURNSTILES 128
+
+begin_globals {
+  double e0;                   // peak amplitude of oscillating electric field
+  double omega;                // angular freq. of the beam
+  int field_interval;          // how often to dump field and hydro
+  int particle_interval;       // how often to dump particle data
+  int poynting_interval;       // how often to compute poynting flux on boundary
+  int restart_interval;        // how often to write restart data
+  int quota_check_interval;    // how often to check if quote exceeded
+  double quota_sec;            // run quota in sec
+  int rtoggle;                 // enable save of last 2 restart files for safety
+  int load_particles;          // were particles loaded? 
+  double topology_x;           // domain topology needed to normalize Poynting diagnostic 
+  double topology_y;
+  double topology_z;
+  int mobile_ions;
+  int H_present; 
+  int He_present; 
+
+  // Parameters for 3d Gaussian wave launch
+  double lambda;               
+  double waist;                // how wide the focused beam is
+  double width;                
+  double zcenter;              // center of beam at boundary in z 
+  double ycenter;              // center of beam at boundary in y
+  double xfocus;               // how far from boundary to focus
+  double mask;                 // # gaussian widths from beam center where I is nonzero
+
+  // Ponyting diagnostic flags - which output to turn on
+
+  // write_backscatter_only flag:  when this flag is nonzero, it means to only compute 
+  // poynting data for the lower-x surface.  This flag affects both the summed poynting 
+  // data as well as the surface data. 
+
+  int write_poynting_data;     // Whether to write poynting data to file (or just stdout)
+
+  int write_backscatter_only;  // nonzero means we only write backscatter diagnostic for fields
+
+  // write_poynting_sum is nonzero if you wish to write a file containing the integrated
+  // poynting data on one or more surfaces.  If write_backscatter_only is nonzero, then 
+  // the output file will be a time series of double precision numbers containing the 
+  // integrated poynting flux at that point in time.  If write_backscatter_only is zero,
+  // then for each time step, six double precision numbers are written, containing the 
+  // poynting flux through the lower x, upper x, lower y, upper y, lower z, and upper z
+  // surfaces. 
+
+  int write_poynting_sum;      // nonzero if we wish to write integrated Poynting data
+
+  // write_poynting_faces is probably useless, but I put it here anyway in case it's not. 
+  // When this flag is nonzero, it will print out the poynting flux at each of a 2D array 
+  // of points on the surface.  When this flag is turned on and write_backscatter_only is 
+  // nonzero, then only the 2D array of points on the lower-x boundary surface are written
+  // for each time step.  When this flag is turned on and write_bacscatter_only is 
+  // zero, then it will write 2D array data for the lower x, upper x, lower y, upper y, 
+  // lower z, upper z surfaces for each time step. 
+
+  int write_poynting_faces;    // nonzero if we wish to write Poynting data on sim boundaries 
+
+  // write_eb_faces is nonzero when we wish to get the raw e and b data at the boundary
+  // (e.g., to be used with a filter to distinguish between SRS and SBS backscatter).  
+  // When this flag is on and write_backscatter_only is nonzero, then only the 2D array
+  // of points on the lower-x boundary surface are written for each time step.  When 
+  // this flag is turned on and write_backscatteR_only is zero, then it will write 2D
+  // array data for the lower x, upper x, lower y, upper y, lower z, upper z surfaces for
+  // each time step.  When turned on, four files are produced: e1, e2, cb1, cb2.  The 
+  // values of the quantities printed depend on the face one is considering:  for the 
+  // x faces, e1 = ey, e2 = ez, cb1 = cby, cb2 = cbz.  Similarly for y and z surfaces, 
+  // but where 1 and 2 stand for (z,x) and (x,y) coordinates, respectively.  
+
+  int write_eb_faces;          // nonzero if we wish to write E and B data on sim boundaries
+
+  // Needed for movie files
+  float vthe;                   // vthe/c  
+  float vthi_He;                // vthi_He/c
+  float vthi_H;                 // vthi_H/c
+  int   velocity_interval;      // how frequently to dump binned velocity space data
+
+  // Dump parameters for standard VPIC output formats
+  DumpParameters fdParams;
+  DumpParameters hedParams;
+  DumpParameters hHdParams;
+  DumpParameters hHedParams;
+  std::vector<DumpParameters *> outputParams;
+};
+
+begin_initialization {
+  
+  // System of units
+  double ec         = 4.8032e-10;          // stat coulomb
+  double c_vac      = 2.99792458e10;       // cm/sec
+  double m_e        = 9.1094e-28;          // g
+  double k_b        = 1.6022e-12;          // erg/eV
+  double mec2       = m_e*c_vac*c_vac/k_b; 
+  double mpc2       = mec2*1836.0; 
+
+  double cfl_req    = 0.98;                // How close to Courant should we try to run
+  double damp       = 0;                   // How much radiation damping
+  double iv_thick   = 2;                   // Thickness of impermeable vacuum (in cells)
+
+  // Experimental parameters
+
+  double t_e               = 600;         // electron temperature, eV
+  double t_i               = 150;         // ion temperature, eV
+  double n_e_over_n_crit   = 0.05;         // n_e/n_crit
+  double vacuum_wavelength = 527 * 1e-7;   // third micron light (cm)
+  double laser_intensity   = 2.5e15 * 1e7;   // in ergs/cm^2 (note: 1 W = 1e7 ergs)
+
+  // Simulation parameters 
+
+#if 0
+  // 2048 processors
+  double Lx                = 4*35.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 2*6.0 * 1e-4;              
+  double Lz                = 2*6.0 * 1e-4;                 
+  double nx                = 8192;
+  double ny                = 512;
+  double nz                = 512; 
+  double topology_x        = 64;
+  double topology_y        = 4;
+  double topology_z        = 8;            
+  // single-processor mesh = 128 x 128 x 64
+#endif
+
+#if 0
+  // 14300x4 processors
+  double Lx                = 120.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 12.0 * 1e-4;              
+  double Lz                = 12.0 * 1e-4;                 
+  double nx                = 6292;
+  double ny                = 440;
+  double nz                = 440; 
+  double topology_x        = 143;
+  double topology_y        = 10;
+  double topology_z        = 10;            
+  // single-processor mesh = 44 x 44 x 44
+#endif
+
+#if 0
+  // 2*4096 processors
+  double Lx                = 4*35.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 2*6.0 * 1e-4;              
+  double Lz                = 2*6.0 * 1e-4;                 
+  double nx                = 8192;
+  double ny                = 512;
+  double nz                = 512; 
+  double topology_x        = 128;
+  double topology_y        = 8;
+  double topology_z        = 8;            
+  // single-processor mesh = 64 x 64 x 64
+#endif
+
+  // This choice of nx, ny, nz and topology values imply that the  
+  // grid is 102 x 42 x 42 cells.  The transverse size is slightly smaller than 
+  // in the original deck in order to enable more choices for down-sampling/striding 
+  // the field and hydro output.  (The original had 43 cells in each direction, which
+  // is prime and therefore not strideable with Ben's output format). 
+
+# if 0
+  // one processor problem, for debugging
+  Lx /= topology_x;  nx /= topology_x;
+  Ly /= topology_y;  ny /= topology_y;   
+  Lz /= topology_z;  nz /= topology_z; 
+
+  topology_x = 1; 
+  topology_y = 1;  
+  topology_z = 1; 
+# endif  
+
+# if 0
+  // twoprocessor problem, for debugging
+  Lx /= (topology_x/2);  nx /= (topology_x/2);
+  Ly /= topology_y;  ny /= topology_y;   
+  Lz /= topology_z;  nz /= topology_z; 
+
+  topology_x = 2; 
+  topology_y = 1;  
+  topology_z = 1; 
+# endif  
+
+// Run a smaller problem for debugging purposes
+#if 0
+  double Lx                = 120.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 1.20 * 1e-4;              
+  double Lz                = 1.20 * 1e-4;                 
+  double nx                = 6292;
+  double ny                = 44;
+  double nz                = 44; 
+  double topology_x        = 143;
+  double topology_y        = 1;
+  double topology_z        = 1;            
+
+  // nproc() x 1 x 1 : 
+  double topology          = nproc(); 
+  Lx         /= 143; Lx         *= topology; 
+  nx         /= 143; nx         *= topology;
+  topology_x /= 143; topology_x *= topology; 
+
+  // single-processor mesh = 44 x 44 x 44
+#endif
+
+  float box_size_x =  120.0* 1e-4;
+  float box_size_z =  12.0* 1e-4;
+  
+  int mobile_ions           = 1;      // Whether or not to push ions
+  double nppc               = 512;    // Ave. number of particles/cell in ea. species
+  //double nppc               = 32;    // Ave. number of particles/cell in ea. species
+  int load_particles        = 1;      // Flag to turn on/off particle load 
+  int He_present            = 1;  
+  int H_present             = 1;  
+  double f_He               = 1;    // Ratio of number density of He to total ion density
+  double f_H                = 1-f_He; // Ratio of number density of H  to total ion density
+  if ( f_He == 1 ) H_present = 0; 
+  if ( f_H == 1  ) He_present = 0; 
+
+// Here _He is N3+
+  // Precompute some useful variables. 
+  double A_H                = 1;
+  double Z_H                = 1;
+  double A_He               = 14;
+  double Z_He               = 3; 
+  double mic2_H             = mpc2*A_H;
+  double mic2_He            = mpc2*A_He;
+  double mime_H             = mic2_H /mec2; 
+  double mime_He            = mic2_He/mec2; 
+                            
+  double uthe               = sqrt(t_e/mec2);     // vthe/c
+  double uthi_H             = sqrt(t_i/mic2_H);   // vthi/c for H
+  double uthi_He            = sqrt(t_i/mic2_He);  // vthi/c for He
+
+  // Plasma skin deptth in cm
+  double delta = (vacuum_wavelength / (2*M_PI) ) / sqrt( n_e_over_n_crit ); 
+
+  double n_e   = c_vac*c_vac*m_e/(4*M_PI*ec*ec*delta*delta); // electron density in cm^-3
+  double debye = uthe*delta;                      // electron Debye length (cm)
+  double omega = sqrt( 1/n_e_over_n_crit );       // laser beam freq. in wpe
+
+  double nx                = 7776;
+  double ny                = 1;
+  double nz                = 549; 
+  
+// Weak Scaling Test --matsekh
+#if 1
+  // nx = 7776 / {9, 27, 81, 162, 324, 648} == {864, 288, 96, 48, 24, 12}  
+  nx = nx/27; 
+  box_size_x /= 27.;
+#endif
+// End Weak Scaling Test --matsekh
+
+#if 0
+  // DEBUG - run on 16 proc.  
+  nx = nx/81; // default nx=7776/81==96 --matsekh
+  box_size_x /= 81.;  
+
+#endif
+
+  double hx = box_size_x/(delta*nx);  // in c/wpe
+  double hz = box_size_z/(delta*nz);  // in c/wpe
+  double hy = hz; 
+
+  double cell_size_x       = hx*delta/debye;      // Cell size in Debye lengths
+  double cell_size_z       = hz*delta/debye;
+//double cell_size_y       = hy*delta/debye;
+
+  double Lx                = nx*hx;   // in c/wpe
+  double Ly                = ny*hy;   // in c/wpe
+  double Lz                = nz*hz;   // in c/wpe
+
+  double topology_x        = nproc();
+  double topology_y        = 1;
+  double topology_z        = 1;            
+
+  double f_number          = 6;                   // f/# of beam
+  double lambda            = vacuum_wavelength/delta; // vacuum wavelength in c/wpe
+  double waist             = f_number*lambda;     // width of beam at focus in c/wpe
+  double xfocus            = Lx/2;                // in c/wpe
+  double ycenter           = 0;                   // center of spot in y on lhs boundary
+  double zcenter           = 0;                   // center of spot in z on lhs boundary
+  double mask              = 1.5;                 // set drive I=0 outside r>mask*width at lhs boundary
+  double width = waist*sqrt( 1 + (lambda*xfocus/(M_PI*waist*waist))*(lambda*xfocus/(M_PI*waist*waist))); 
+
+  // Peak instantaneous E field in "natural units" 
+  double e0    = sqrt( 2*laser_intensity / (m_e*c_vac*c_vac*c_vac*n_e) );  
+//e0                       = e0*(waist/width);    // at entrance (3D Gaussian) 
+  e0 = e0*sqrt(waist/width); // 2D, at entrance
+
+  double dt                = cfl_req*courant_length(Lx,Ly,Lz,nx,ny,nz); // in 1/wpe; n.b. c=1 in nat. units
+  double nsteps_cycle      = trunc_granular(2*M_PI/(dt*omega),1)+1; 
+  dt                       = 2*M_PI/omega/nsteps_cycle; // nsteps_cycle time steps in one laser cycle
+
+  double wpe1ps = 1e-12*c_vac/delta;
+
+  double t_stop            = 100*wpe1ps;               // Runtime in 1/wpe
+  int particle_interval    = 0; 
+  int poynting_interval    = int(M_PI/((omega+1.5)*dt));                  // Num. steps between dumping poynting flux
+  int field_interval       = int(0.01*wpe1ps/dt);         // Num. steps between saving field, hydro data
+  int velocity_interval    = int(0.1*wpe1ps/dt);        // How frequently to dump binned velocity space data
+  int restart_interval     = 10000000;       // Num. steps between restart dumps
+  int quota_check_interval = 20;
+
+  // Ben:  This quota thing gracefully terminates after writing a final restart after 
+  // 11.5 hours; if you want a longer time before shutdown, set this value larger.  If 
+  // you want the code to just run all weekend, then set it to very long (2400.*3500, e.g.) 
+
+  double quota_sec         = 23.7*3600;           // Run quota in sec. 
+
+  // Turn on integrated backscatter poynting diagnostic - right now there is a bug in this, so we 
+  // only write the integrated backscatter time history on the left face. 
+
+  int write_poynting_data = 1;                    // Whether to write poynting data to file (or just stdout)
+
+  int write_backscatter_only = 0;                 // Nonzero means only write lower x face
+  int write_poynting_sum   = 0;                   // Whether to write integrated Poynting data 
+  int write_poynting_faces = 0;                   // Whether to write poynting data on sim boundary faces
+  int write_eb_faces       = 0;                   // Whether to write e and b field data on sim boundary faces
+
+  double N_e               = nppc*nx*ny*nz;       // Number of macro electrons in box
+  double Np_e              = Lx*Ly*Lz;            // "Number" of "physical" electrons in box (nat. units)
+  double q_e               = -Np_e/N_e;           // Charge per macro electron
+  double N_i               = N_e;                 // Number of macro ions of each species in box 
+  double Np_i              = Np_e/(Z_H*f_H+Z_He*f_He); // "Number" of "physical" ions of each sp. in box
+  double qi_H              = Z_H *f_H *Np_i/N_i;  // Charge per H  macro ion
+  double qi_He             = Z_He*f_He*Np_i/N_i;  // Charge per He macro ion 
+  
+  // Print simulation parameters
+
+  sim_log("***** Simulation parameters *****");
+  sim_log("* Processors:                     "<<nproc());
+  sim_log("* Topology:                       "<<topology_x<<" "<<topology_y<<" "<<topology_z); 
+  sim_log("* nsteps_cycle =                 "<<nsteps_cycle);
+  sim_log("* Time step, max time, nsteps:    "<<dt<<" "<<t_stop<<" "<<int(t_stop/(dt))); 
+  sim_log("* Debye length, XYZ cell sizes:   "<<debye<<" "<<cell_size_x<<" "<<cell_size_z);
+  sim_log("* Real cell sizes (in Debyes):    "<<hx/uthe<<" "<<hy/uthe<<" "<<hz/uthe);
+  sim_log("* Lx, Ly, Lz =                    "<<Lx<<" "<<Ly<<" "<<Lz);
+  sim_log("* nx, ny, nz =                    "<<nx<<" "<<ny<<" "<<nz);
+  sim_log("* Charge/macro electron =         "<<q_e);
+  sim_log("* Average particles/processor:    "<<N_e/nproc());
+  sim_log("* Average particles/cell:         "<<nppc);
+  sim_log("* Omega_0, Omega_pe:              "<<omega<<" "<<1);
+  sim_log("* Plasma density, ne/nc:          "<<n_e<<" "<<n_e_over_n_crit);
+  sim_log("* Vac wavelength (nm):            "<<vacuum_wavelength*1e7);
+  sim_log("* I_laser (W/cm^2):               "<<laser_intensity/1e7);
+  sim_log("* T_e, T_i (eV)                   "<<t_e<<" "<<t_i);
+  sim_log("* m_e, m_H, m_He                  "<<"1 "<<mime_H<<" "<<mime_He);
+  sim_log("* Radiation damping:              "<<damp);
+  sim_log("* Fraction of courant limit:      "<<cfl_req);
+  sim_log("* vthe/c:                         "<<uthe);
+  sim_log("* vthi_H /c:                      "<<uthi_H);
+  sim_log("* vthe_He/c:                      "<<uthi_He);
+  sim_log("* emax at entrance:               "<<e0);
+  sim_log("* emax at waist:                  "<<e0/(waist/width));
+  sim_log("* Poynting interval:              "<<poynting_interval); 
+  sim_log("* field interval:                 "<<field_interval); 
+  sim_log("* restart interval:               "<<restart_interval); 
+  sim_log("* num vacuum edge grids:          "<<iv_thick);
+  sim_log("* width, waist, xfocus:           "<<width<<" "<<waist<<" "<<xfocus); 
+  sim_log("* ycenter, zcenter, mask:         "<<ycenter<<" "<<zcenter<<" "<<mask); 
+  sim_log("* field_interval                  "<<field_interval); 
+  sim_log("* velocity_interval               "<<velocity_interval); 
+  sim_log("* restart_interval:               "<<restart_interval); 
+  sim_log("* quota_check_interval:           "<<quota_check_interval); 
+  sim_log("* write_poynting_sum:             "<<(write_poynting_sum ? "Yes" : "No")); 
+  sim_log("* write_poynting_faces:           "<<(write_poynting_faces? "Yes" : "No")); 
+  sim_log("* write_eb_faces:                 "<<(write_eb_faces ? "Yes" : "No")); 
+  sim_log("* write_backscatter_only:         "<<(write_backscatter_only ? "Yes" : "No")); 
+  sim_log("*********************************");
+
+  // Set up high level simulation parameters
+
+  sim_log("Setting up high-level simulation parameters.");
+  num_step             = int(t_stop/(dt));
+  num_step             = 20000;
+  sim_log("num_step: " << num_step); 
+
+  status_interval      = 200; 
+  sync_shared_interval = status_interval/1;
+  clean_div_e_interval = status_interval/1;
+  clean_div_b_interval = status_interval/10; 
+  
+  // Turn off some of the spam
+  verbose = 1; 
+
+  // For maxwellian reinjectoin, we need more than the default number of 
+  // passes (3) through the boundary handler
+  // Note:  We have to adjust sort intervals for maximum performance on Cell. 
+  // Note: On 1 PE fails after 2094 steps. Increasing num_comm_round to 10
+  // allows it to run > 25,000 steps.
+  num_comm_round = 6;
+
+  global->e0                   = e0; 
+  global->omega                = omega; 
+  global->field_interval       = field_interval; 
+  global->particle_interval    = particle_interval;
+  global->poynting_interval    = poynting_interval;
+  global->restart_interval     = restart_interval;
+  global->quota_check_interval = quota_check_interval;
+  global->quota_sec            = quota_sec;
+  global->rtoggle              = 0;
+  global->load_particles       = load_particles;
+  global->mobile_ions          = mobile_ions; 
+  global->H_present            = H_present; 
+  global->He_present           = He_present; 
+  global->topology_x           = topology_x;  
+  global->topology_y           = topology_y;  
+  global->topology_z           = topology_z;  
+  global->xfocus               = xfocus;  
+  global->ycenter              = ycenter; 
+  global->zcenter              = zcenter; 
+  global->mask                 = mask; 
+  global->waist                = waist; 
+  global->width                = width; 
+  global->lambda               = lambda; 
+
+  global->write_poynting_data  = write_poynting_data;
+
+  global->write_poynting_sum   = write_poynting_sum; 
+  global->write_poynting_faces = write_poynting_faces; 
+  global->write_eb_faces       = write_eb_faces;      
+  global->write_backscatter_only = write_backscatter_only; 
+
+  global->vthe                 = uthe; 
+  global->vthi_H               = uthi_H; 
+  global->vthi_He              = uthi_He; 
+  global->velocity_interval    = velocity_interval; 
+
+  // Set up the species
+  // Allow additional local particles in case of non-uniformity.
+
+  // Set up grid
+  sim_log("Setting up computational grid."); 
+  grid->dx       = hx; 
+  grid->dy       = hy; 
+  grid->dz       = hz; 
+  grid->dt       = dt; 
+  grid->cvac     = 1;
+  grid->eps0     = 1; 
+
+  sim_log("Setting up absorbing mesh."); 
+  define_periodic_grid( 0,          -0.5*Ly,    -0.5*Lz,        // Low corner
+                         Lx,         0.5*Ly,     0.5*Lz,        // High corner 
+                         nx,         ny,         nz,            // Resolution
+                         topology_x, topology_y, topology_z);    // Topology
+
+  if ( rank() == 0) {
+    set_domain_field_bc( BOUNDARY(-1,0,0), absorb_fields );
+  }
+  if ( rank() == nproc() -1) {
+    set_domain_field_bc( BOUNDARY( 1,0,0), absorb_fields );
+  }
+
+    set_domain_field_bc( BOUNDARY( 0,0, 1), absorb_fields );
+    set_domain_field_bc( BOUNDARY( 0,0,-1), absorb_fields );
+
+  sim_log("Setting up species."); 
+  double max_local_np = 2.0*N_e/nproc(); 
+  double max_local_nm = max_local_np/10.0;
+  species_t * electron = define_species("electron", -1, 1, max_local_np, max_local_nm, 20, 1); 
+
+  // Start with two ion species.  We have option to go to Xe and Kr gas fills if 
+  // we need a higher ion/electron macroparticle ratio.  
+
+  species_t *ion_H, *ion_He; 
+  if ( mobile_ions ) {
+    if ( H_present  ) ion_H  = define_species("H",  Z_H,  mime_H,  max_local_np, max_local_nm, 100, 1); 
+    if ( He_present ) ion_He = define_species("He", Z_He, mime_He, max_local_np, max_local_nm, 100, 1); 
+  }
+
+  // From grid/partition.c: used to determine which domains are on edge
+# define RANK_TO_INDEX(rank,ix,iy,iz) BEGIN_PRIMITIVE {                   \
+    int _ix, _iy, _iz;                                                    \
+    _ix  = (rank);                        /* ix = ix+gpx*( iy+gpy*iz ) */ \
+    _iy  = _ix/int(global->topology_x);   /* iy = iy+gpy*iz */            \
+    _ix -= _iy*int(global->topology_x);   /* ix = ix */                   \
+    _iz  = _iy/int(global->topology_y);   /* iz = iz */                   \
+    _iy -= _iz*int(global->topology_y);   /* iy = iy */                   \
+    (ix) = _ix;                                                           \
+    (iy) = _iy;                                                           \
+    (iz) = _iz;                                                           \
+  } END_PRIMITIVE 
+
+  sim_log("Overriding x boundaries to absorb fields."); 
+  int ix, iy, iz;        // Domain location in mesh
+  RANK_TO_INDEX( int(rank()), ix, iy, iz ); 
+
+  // Set up Maxwellian reinjection B.C. 
+  sim_log("Setting up Maxwellian reinjection boundary condition."); 
+
+  particle_bc_t * maxwellian_reinjection = 
+    define_particle_bc( maxwellian_reflux( species_list, entropy ) ); 
+  set_reflux_temp( maxwellian_reinjection, electron, uthe, uthe );
+  if ( mobile_ions ) { 
+    if ( H_present  ) set_reflux_temp( maxwellian_reinjection, ion_H,  uthi_H,  uthi_H  ); 
+    if ( He_present ) set_reflux_temp( maxwellian_reinjection, ion_He, uthi_He, uthi_He ); 
+  }
+
+  // Set up materials
+  sim_log("Setting up materials."); 
+  define_material( "vacuum", 1 );
+  define_field_array( NULL, damp ); 
+ 
+  // Paint the simulation volume with materials and boundary conditions
+# define iv_region (   x<      hx*iv_thick || x>Lx  -hx*iv_thick  \
+                    || z<-Lz/2+hz*iv_thick || z>Lz/2-hz*iv_thick ) /* all boundaries are i.v. */ 
+  set_region_bc( iv_region, maxwellian_reinjection, maxwellian_reinjection, maxwellian_reinjection ); 
+
+  // Load particles 
+  if ( load_particles ) {
+    sim_log("Loading particles.");
+    // Fast load of particles--don't bother fixing artificial domain correlations
+    double xmin=grid->x0, xmax=grid->x1;
+    double ymin=grid->y0, ymax=grid->y1;
+    double zmin=grid->z0, zmax=grid->z1;
+    repeat( N_e/(topology_x*topology_y*topology_z) ) {
+      double x = uniform( rng(0), xmin, xmax );
+      double y = uniform( rng(0), ymin, ymax );
+      double z = uniform( rng(0), zmin, zmax );
+      if ( iv_region ) continue;           // Particle fell in iv_region.  Don't load.
+      // third to last arg is "weight," a positive number
+      inject_particle( electron, x, y, z,
+                       normal( rng(0), 0, uthe),
+                       normal( rng(0), 0, uthe),
+                       normal( rng(0), 0, uthe), -q_e, 0, 0 );
+      if ( mobile_ions ) {
+        if ( H_present )  // Inject an H macroion on top of macroelectron
+          inject_particle( ion_H, x, y, z, 
+                           normal( rng(0), 0, uthi_H), 
+                           normal( rng(0), 0, uthi_H), 
+                           normal( rng(0), 0, uthi_H), qi_H, 0, 0 ); 
+        if ( He_present ) // Inject an H macroion on top of macroelectron
+          inject_particle( ion_He, x, y, z, 
+                           normal( rng(0), 0, uthi_He), 
+                           normal( rng(0), 0, uthi_He), 
+                           normal( rng(0), 0, uthi_He), qi_He, 0, 0 ); 
+      }
+    }
+  }
+
+
+ /*--------------------------------------------------------------------------
+  * New dump definition
+  *------------------------------------------------------------------------*/
+
+ /*--------------------------------------------------------------------------
+  * Set data output format
+  * 
+  * This option allows the user to specify the data format for an output
+  * dump.  Legal settings are 'band' and 'band_interleave'.  Band-interleave
+  * format is the native storage format for data in VPIC.  For field data,
+  * this looks something like:
+  * 
+  *   ex0 ey0 ez0 div_e_err0 cbx0 ... ex1 ey1 ez1 div_e_err1 cbx1 ...
+  *   
+  * Banded data format stores all data of a particular state variable as a
+  * contiguous array, and is easier for ParaView to process efficiently. 
+  * Banded data looks like:
+  * 
+  *   ex0 ex1 ex2 ... exN ey0 ey1 ey2 ...
+  *   
+  *------------------------------------------------------------------------*/
+  sim_log( "Setting up hydro and field diagnostics." );
+
+  global->fdParams.format = band;
+  sim_log ( "Field output format          : band" );
+
+  global->hedParams.format = band;
+  sim_log ( "Electron hydro output format : band" );
+
+  global->hHdParams.format = band;
+  sim_log ( "Hydrogen hydro output format : band" );
+
+  global->hHedParams.format = band;
+  sim_log ( "Helium hydro output format   : band" );
+
+ /*--------------------------------------------------------------------------
+  * Set stride
+  * 
+  * This option allows data down-sampling at output.  Data are down-sampled
+  * in each dimension by the stride specified for that dimension.  For
+  * example, to down-sample the x-dimension of the field data by a factor
+  * of 2, i.e., half as many data will be output, select:
+  * 
+  *   global->fdParams.stride_x = 2;
+  *
+  * The following 2-D example shows down-sampling of a 7x7 grid (nx = 7,
+  * ny = 7.  With ghost-cell padding the actual extents of the grid are 9x9.
+  * Setting the strides in x and y to equal 2 results in an output grid of
+  * nx = 4, ny = 4, with actual extents 6x6.
+  *
+  * G G G G G G G G G
+  * G X X X X X X X G
+  * G X X X X X X X G         G G G G G G
+  * G X X X X X X X G         G X X X X G
+  * G X X X X X X X G   ==>   G X X X X G
+  * G X X X X X X X G         G X X X X G
+  * G X X X X X X X G         G X X X X G
+  * G X X X X X X X G         G G G G G G
+  * G G G G G G G G G
+  *
+  * Note that grid extents in each dimension must be evenly divisible by
+  * the stride for that dimension:
+  *
+  *   nx = 150;
+  *   global->fdParams.stride_x = 10; // legal -> 150/10 = 15
+  *
+  *   global->fdParams.stride_x = 8; // illegal!!! -> 150/8 = 18.75
+  *------------------------------------------------------------------------*/
+
+  // Strides for field and hydro arrays.  Note that here we have defined them 
+  // the same for fields and all hydro species; if desired, we could use different
+  // strides for each.   Also note that strides must divide evenly into the number 
+  // of cells in a given domain. 
+
+  // Define strides and test that they evenly divide into grid->nx, ny, nz
+  int stride_x = 1, stride_y = 1, stride_z = 1; 
+  if ( int(grid->nx)%stride_x ) ERROR(("Stride doesn't evenly divide grid->nx.")); 
+  if ( int(grid->ny)%stride_y ) ERROR(("Stride doesn't evenly divide grid->ny.")); 
+  if ( int(grid->nz)%stride_z ) ERROR(("Stride doesn't evenly divide grid->nz.")); 
+
+  //----------------------------------------------------------------------
+  // Fields
+
+  // relative path to fields data from global header
+  sprintf(global->fdParams.baseDir, "field");
+
+  // base file name for fields output
+  sprintf(global->fdParams.baseFileName, "fields");
+
+  // set field strides
+  global->fdParams.stride_x = stride_x;
+  global->fdParams.stride_y = stride_y;
+  global->fdParams.stride_z = stride_z;
+  sim_log ( "Fields x-stride " << global->fdParams.stride_x );
+  sim_log ( "Fields y-stride " << global->fdParams.stride_y );
+  sim_log ( "Fields z-stride " << global->fdParams.stride_z );
+
+  // add field parameters to list
+  global->outputParams.push_back(&global->fdParams);
+
+  //----------------------------------------------------------------------
+  // Electron hydro
+
+  // relative path to electron species data from global header
+  sprintf(global->hedParams.baseDir, "ehydro");
+
+  // base file name for fields output
+  sprintf(global->hedParams.baseFileName, "e_hydro");
+
+  // set electron hydro strides
+  global->hedParams.stride_x = stride_x;
+  global->hedParams.stride_y = stride_y;
+  global->hedParams.stride_z = stride_z;
+  sim_log ( "Electron species x-stride " << global->hedParams.stride_x );
+  sim_log ( "Electron species y-stride " << global->hedParams.stride_y );
+  sim_log ( "Electron species z-stride " << global->hedParams.stride_z );
+
+  // add electron hydro parameters to list
+  global->outputParams.push_back(&global->hedParams);
+
+  //----------------------------------------------------------------------
+  // Hydrogen hydro
+
+  // relative path to electron species data from global header
+  sprintf(global->hHdParams.baseDir, "Hhydro");
+
+  // base file name for fields output
+  sprintf(global->hHdParams.baseFileName, "H_hydro");
+
+  // set hydrogen hydro strides
+  global->hHdParams.stride_x = stride_x;
+  global->hHdParams.stride_y = stride_y;
+  global->hHdParams.stride_z = stride_z;
+  sim_log ( "Ion species x-stride " << global->hHdParams.stride_x );
+  sim_log ( "Ion species y-stride " << global->hHdParams.stride_y );
+  sim_log ( "Ion species z-stride " << global->hHdParams.stride_z );
+
+  // add hydrogen hydro parameters to list
+  global->outputParams.push_back(&global->hHdParams);
+
+  //----------------------------------------------------------------------
+  // Helium hydro
+
+  // relative path to electron species data from global header
+  sprintf(global->hHedParams.baseDir, "Hehydro");
+
+  // base file name for fields output
+  sprintf(global->hHedParams.baseFileName, "He_hydro");
+
+  // set helium hydro strides
+  global->hHedParams.stride_x = stride_x;
+  global->hHedParams.stride_y = stride_y;
+  global->hHedParams.stride_z = stride_z;
+  sim_log ( "Ion species x-stride " << global->hHedParams.stride_x );
+  sim_log ( "Ion species y-stride " << global->hHedParams.stride_y );
+  sim_log ( "Ion species z-stride " << global->hHedParams.stride_z );
+
+  // add helium hydro parameters to list
+  global->outputParams.push_back(&global->hHedParams);
+
+ /*-----------------------------------------------------------------------
+  * Set output fields
+  *
+  * It is now possible to select which state-variables are output on a
+  * per-dump basis.  Variables are selected by passing an or-list of
+  * state-variables by name.  For example, to only output the x-component
+  * of the electric field and the y-component of the magnetic field, the
+  * user would call output_variables like:
+  *
+  *   global->fdParams.output_variables( ex | cby );
+  *
+  * NOTE: OUTPUT VARIABLES ARE ONLY USED FOR THE BANDED FORMAT.  IF THE
+  * FORMAT IS BAND-INTERLEAVE, ALL VARIABLES ARE OUTPUT AND CALLS TO
+  * 'output_variables' WILL HAVE NO EFFECT.
+  *
+  * ALSO: DEFAULT OUTPUT IS NONE!  THIS IS DUE TO THE WAY THAT VPIC
+  * HANDLES GLOBAL VARIABLES IN THE INPUT DECK AND IS UNAVOIDABLE.
+  *
+  * For convenience, the output variable 'all' is defined:
+  *
+  *   global->fdParams.output_variables( all );
+  *------------------------------------------------------------------------*/
+ /* CUT AND PASTE AS A STARTING POINT
+  * REMEMBER TO ADD APPROPRIATE GLOBAL DUMPPARAMETERS VARIABLE
+
+   output_variables( all );
+
+   output_variables( electric | div_e_err | magnetic | div_b_err |
+                     tca      | rhob      | current  | rhof |
+                     emat     | nmat      | fmat     | cmat );
+
+   output_variables( current_density  | charge_density |
+                     momentum_density | ke_density     | stress_tensor );
+  */
+
+  //global->fdParams.output_variables( all );
+  global->fdParams.output_variables( electric | magnetic );
+
+  //global->hedParams.output_variables( all );
+  //global->hedParams.output_variables( current_density | momentum_density );
+  global->hedParams.output_variables(  current_density  | charge_density |
+                                       momentum_density | ke_density |
+                                       stress_tensor );
+  global->hHdParams.output_variables(  current_density  | charge_density |
+                                       momentum_density | ke_density |
+                                       stress_tensor );
+  global->hHedParams.output_variables( current_density  | charge_density |
+                                       momentum_density | ke_density |
+                                       stress_tensor );
+
+ /*--------------------------------------------------------------------------
+  * Convenience functions for simlog output
+  *------------------------------------------------------------------------*/
+  char varlist[256];
+
+  create_field_list(varlist, global->fdParams);
+  sim_log ( "Fields variable list: " << varlist );
+
+  create_hydro_list(varlist, global->hedParams);
+  sim_log ( "Electron species variable list: " << varlist );
+
+  create_hydro_list(varlist, global->hHdParams);
+  sim_log ( "Ion species variable list: " << varlist );
+
+ /*------------------------------------------------------------------------*/
+
+  sim_log("***Finished with user-specified initialization ***"); 
+
+  // Upon completion of the initialization, the following occurs:
+  // - The synchronization error (tang E, norm B) is computed between domains
+  //   and tang E / norm B are synchronized by averaging where discrepancies
+  //   are encountered.
+  // - The initial divergence error of the magnetic field is computed and
+  //   one pass of cleaning is done (for good measure)
+  // - The bound charge density necessary to give the simulation an initially
+  //   clean divergence e is computed.
+  // - The particle momentum is uncentered from u_0 to u_{-1/2}
+  // - The user diagnostics are called on the initial state
+  // - The physics loop is started
+  //
+  // The physics loop consists of:
+  // - Advance particles from x_0,u_{-1/2} to x_1,u_{1/2}
+  // - User particle injection at x_{1-age}, u_{1/2} (use inject_particles)
+  // - User current injection (adjust field(x,y,z).jfx, jfy, jfz)
+  // - Advance B from B_0 to B_{1/2}
+  // - Advance E from E_0 to E_1
+  // - User field injection to E_1 (adjust field(x,y,z).ex,ey,ez,cbx,cby,cbz)
+  // - Advance B from B_{1/2} to B_1
+  // - (periodically) Divergence clean electric field
+  // - (periodically) Divergence clean magnetic field
+  // - (periodically) Synchronize shared tang e and norm b
+  // - Increment the time step
+  // - Call user diagnostics
+  // - (periodically) Print a status message
+} 
+
+
+begin_diagnostics {
+
+  int mobile_ions=global->mobile_ions,
+      H_present=global->H_present,
+      He_present=global->He_present;
+
+  if ( step()%200==0 ) sim_log("Time step: "<<step());
+
+# define should_dump(x) \
+  (global->x##_interval>0 && remainder(step(),global->x##_interval)==0)
+
+  // Do a mkdir at time t=0 to ensure we have all the directories we need
+  if ( step()==0 ) {
+    dump_mkdir("rundata"); 
+    dump_mkdir("fft"); 
+    dump_mkdir("field"); 
+    dump_mkdir("ehydro"); 
+    dump_mkdir("Hhydro"); 
+    dump_mkdir("Hehydro"); 
+    dump_mkdir("restart"); 
+    dump_mkdir("particle"); 
+    dump_mkdir("poynting"); 
+    dump_mkdir("velocity"); 
+
+    // Turn off rundata for now
+    // dump_grid("rundata/grid");
+    // dump_materials("rundata/materials");
+    // dump_species("rundata/species");
+    global_header("global", global->outputParams);
+
+  }
+
+  // Field and hydro data
+
+  if ( should_dump(field) ) {
+    field_dump( global->fdParams ); 
+
+#if 1  
+    if ( global->load_particles ) {
+      hydro_dump( "electron", global->hedParams );
+      if ( global->mobile_ions ) {
+        if ( global->H_present  ) hydro_dump( "H",  global->hHdParams );
+        if ( global->He_present ) hydro_dump( "He", global->hHedParams );
+      }
+    }
+#endif
+
+  }
+
+  // Particle dump data
+#if 0  
+  if ( should_dump(particle) && global->load_particles ) {
+    dump_particles( "electron", "particle/eparticle" );
+    if ( global->mobile_ions ) {
+      if ( global->H_present  ) dump_particles( "H",  "particle/Hparticle" );
+      if ( global->He_present ) dump_particles( "He", "particle/Heparticle" );
+    }
+  } 
+#endif
+
+  
+#define ALLOCATE(A,LEN,TYPE)                                             \
+  if ( !((A)=(TYPE *)malloc((size_t)(LEN)*sizeof(TYPE))) ) ERROR(("Cannot allocate."));
+
+  //--------------------------------------------------------------------------- 
+
+  // Restart dump filenames are by default tagged with the current timestep.
+  // If you do not want to do this add "0" to the dump_restart arguments. One
+  // reason to not tag the dumps is if you want only one restart dump (the most
+  // recent one) to be stored at a time to conserve on file space. Note: A
+  // restart dump should be the _very_ _last_ dump made in a diagnostics
+  // routine. If not, the diagnostics performed after the restart dump but
+  // before the next timestep will be missed on a restart. Restart dumps are
+  // in a binary format. Each rank makes a restart dump.
+
+  // Restarting from restart files is accomplished by running the executable 
+  // with "restart restart" as additional command line arguments.  The executable
+  // must be identical to that used to generate the restart dumps or else 
+  // the behavior may be unpredictable. 
+
+  // Note:  restart is not currently working with custom boundary conditions
+  // (such as the reflux condition) and has not been tested with emission 
+  // turned on.  
+  
+  if ( step()>0 && should_dump(restart) ) {
+    static const char * restart_fbase[2] = { "restart/restart0", "restart/restart1" };
+
+    //BEGIN_TURNSTILE(NUM_TURNSTILES); 
+      checkpt( restart_fbase[global->rtoggle], step() );
+    //END_TURNSTILE;
+    global->rtoggle^=1;
+  }
+
+  // Shut down simulation when wall clock time exceeds global->quota_sec. 
+  // Note that the mp_elapsed() is guaranteed to return the same value for all
+  // processors (i.e., elapsed time on proc #0), and therefore the abort will 
+  // be synchronized across processors. 
+
+  if ( step()>0 && global->quota_check_interval && (step()%global->quota_check_interval)==0 ) {
+    if ( uptime() > global->quota_sec ) {
+
+      //BEGIN_TURNSTILE(NUM_TURNSTILES); 
+        checkpt( "restart/restart", step() );
+      //END_TURNSTILE;
+
+      sim_log( "Restart dump restart completed." );
+      sim_log( "Allowed runtime exceeded for this job.  Terminating." );
+      mp_barrier(); // Just to be safe
+      halt_mp();
+      exit(0);
+    }
+  }
+
+} 
+
+
+begin_field_injection { 
+  // Inject a light wave from lhs boundary with E aligned along y
+  // Use scalar diffraction theory for the Gaussian beam source.  (This is approximate). 
+
+  // For quiet startup (i.e., so that we don't propagate a delta-function noise
+  // pulse at time t=0) we multiply by a constant phase term exp(i phi) where: 
+  //   phi = k*global->xfocus+atan(h)    (3d) 
+
+  // Inject from the left a field of the form ey = e0 sin( omega t )
+
+# define DY    ( grid->y0 + (iy-0.5)*grid->dy - global->ycenter )
+# define DZ    ( grid->z0 + (iz-1  )*grid->dz - global->zcenter )
+# define R2    ( DY*DY + DZ*DZ )                                   
+# define R2Z    ( DZ*DZ )                                   
+# define PHASE ( -global->omega*t + h*R2Z/(global->width*global->width) )
+# define MASK  ( R2Z<=pow(global->mask*global->width,2) ? 1 : 0 )
+
+  if ( grid->x0==0 ) {               // Node is on left boundary
+    double alpha      = grid->cvac*grid->dt/grid->dx;
+    double emax_coeff = (4/(1+alpha))*global->omega*grid->dt*global->e0;
+    double prefactor  = emax_coeff*sqrt(2/M_PI); 
+    double t          = grid->dt*step(); 
+
+    // Compute Rayleigh length in c/wpe
+    double rl         = M_PI*global->waist*global->waist/global->lambda; 
+
+    double pulse_shape_factor = 1;
+    float pulse_length        = 70;  // units of 1/wpe
+    float sin_t_tau           = sin(0.5*t*M_PI/pulse_length);
+    pulse_shape_factor        = ( t<pulse_length ? sin_t_tau : 1 );
+    double h                  = global->xfocus/rl;   // Distance / Rayleigh length
+
+    // Loop over all Ey values on left edge of this node
+    for ( int iz=1; iz<=grid->nz+1; ++iz ) 
+      for ( int iy=1; iy<=grid->ny; ++iy )  
+        field(1,iy,iz).ey += prefactor 
+                             * cos(PHASE) 
+//                           * exp(-R2/(global->width*global->width))  // 3D
+                             * exp(-R2Z/(global->width*global->width)) 
+                             * MASK * pulse_shape_factor; 
+  }
+}
+
+
+begin_particle_injection {
+  // No particle injection for this simulation
+}
+
+
+begin_current_injection {
+  // No current injection for this simulation
+}
+
+begin_particle_collisions {
+  // No particle collisions for this simulation
+}
+
diff --git a/TestScripts/lpi-deck/lpi_2d_F6_test-nx48 b/TestScripts/lpi-deck/lpi_2d_F6_test-nx48
new file mode 100755
index 00000000..fe123c61
--- /dev/null
+++ b/TestScripts/lpi-deck/lpi_2d_F6_test-nx48
@@ -0,0 +1,987 @@
+// Stress Tensor(e_hydro)_4/abs(Charge Density(e_hydro)+ 1.0e-8) - (Momentum Density(e_hydro)_Y/abs(Charge Density(e_hydro)+ 1.0e-8)) * (Current Density(e_hydro)_Y/(Charge Density(e_hydro) + 1.0e-8))
+
+//========================================================================
+//
+// LPI 3D deck - Linearly polarized (in y) plane wave incident from left 
+//               boundary 
+//
+// Adapted from Albright's Lightning 3D LPI deck.  
+// B. Albright, X-1-PTA; 28 Jan. 2007
+// Lin Yin, X-1-PTA, 23 Feb 2009, for Cerrillos test 
+// 
+// Executable creates its own directory structure.  Remove the old with: 
+//
+// rm -rf rundata ehydro Hhydro Hehydro restart poynting velocity particle field
+//========================================================================
+
+// Employ turnstiles to partially serialize the high-volume file writes. 
+// In this case, the restart dumps.  Set NUM_TURNSTILES to be the desired
+// number of simultaneous writes. 
+#define NUM_TURNSTILES 128
+
+begin_globals {
+  double e0;                   // peak amplitude of oscillating electric field
+  double omega;                // angular freq. of the beam
+  int field_interval;          // how often to dump field and hydro
+  int particle_interval;       // how often to dump particle data
+  int poynting_interval;       // how often to compute poynting flux on boundary
+  int restart_interval;        // how often to write restart data
+  int quota_check_interval;    // how often to check if quote exceeded
+  double quota_sec;            // run quota in sec
+  int rtoggle;                 // enable save of last 2 restart files for safety
+  int load_particles;          // were particles loaded? 
+  double topology_x;           // domain topology needed to normalize Poynting diagnostic 
+  double topology_y;
+  double topology_z;
+  int mobile_ions;
+  int H_present; 
+  int He_present; 
+
+  // Parameters for 3d Gaussian wave launch
+  double lambda;               
+  double waist;                // how wide the focused beam is
+  double width;                
+  double zcenter;              // center of beam at boundary in z 
+  double ycenter;              // center of beam at boundary in y
+  double xfocus;               // how far from boundary to focus
+  double mask;                 // # gaussian widths from beam center where I is nonzero
+
+  // Ponyting diagnostic flags - which output to turn on
+
+  // write_backscatter_only flag:  when this flag is nonzero, it means to only compute 
+  // poynting data for the lower-x surface.  This flag affects both the summed poynting 
+  // data as well as the surface data. 
+
+  int write_poynting_data;     // Whether to write poynting data to file (or just stdout)
+
+  int write_backscatter_only;  // nonzero means we only write backscatter diagnostic for fields
+
+  // write_poynting_sum is nonzero if you wish to write a file containing the integrated
+  // poynting data on one or more surfaces.  If write_backscatter_only is nonzero, then 
+  // the output file will be a time series of double precision numbers containing the 
+  // integrated poynting flux at that point in time.  If write_backscatter_only is zero,
+  // then for each time step, six double precision numbers are written, containing the 
+  // poynting flux through the lower x, upper x, lower y, upper y, lower z, and upper z
+  // surfaces. 
+
+  int write_poynting_sum;      // nonzero if we wish to write integrated Poynting data
+
+  // write_poynting_faces is probably useless, but I put it here anyway in case it's not. 
+  // When this flag is nonzero, it will print out the poynting flux at each of a 2D array 
+  // of points on the surface.  When this flag is turned on and write_backscatter_only is 
+  // nonzero, then only the 2D array of points on the lower-x boundary surface are written
+  // for each time step.  When this flag is turned on and write_bacscatter_only is 
+  // zero, then it will write 2D array data for the lower x, upper x, lower y, upper y, 
+  // lower z, upper z surfaces for each time step. 
+
+  int write_poynting_faces;    // nonzero if we wish to write Poynting data on sim boundaries 
+
+  // write_eb_faces is nonzero when we wish to get the raw e and b data at the boundary
+  // (e.g., to be used with a filter to distinguish between SRS and SBS backscatter).  
+  // When this flag is on and write_backscatter_only is nonzero, then only the 2D array
+  // of points on the lower-x boundary surface are written for each time step.  When 
+  // this flag is turned on and write_backscatteR_only is zero, then it will write 2D
+  // array data for the lower x, upper x, lower y, upper y, lower z, upper z surfaces for
+  // each time step.  When turned on, four files are produced: e1, e2, cb1, cb2.  The 
+  // values of the quantities printed depend on the face one is considering:  for the 
+  // x faces, e1 = ey, e2 = ez, cb1 = cby, cb2 = cbz.  Similarly for y and z surfaces, 
+  // but where 1 and 2 stand for (z,x) and (x,y) coordinates, respectively.  
+
+  int write_eb_faces;          // nonzero if we wish to write E and B data on sim boundaries
+
+  // Needed for movie files
+  float vthe;                   // vthe/c  
+  float vthi_He;                // vthi_He/c
+  float vthi_H;                 // vthi_H/c
+  int   velocity_interval;      // how frequently to dump binned velocity space data
+
+  // Dump parameters for standard VPIC output formats
+  DumpParameters fdParams;
+  DumpParameters hedParams;
+  DumpParameters hHdParams;
+  DumpParameters hHedParams;
+  std::vector<DumpParameters *> outputParams;
+};
+
+begin_initialization {
+  
+  // System of units
+  double ec         = 4.8032e-10;          // stat coulomb
+  double c_vac      = 2.99792458e10;       // cm/sec
+  double m_e        = 9.1094e-28;          // g
+  double k_b        = 1.6022e-12;          // erg/eV
+  double mec2       = m_e*c_vac*c_vac/k_b; 
+  double mpc2       = mec2*1836.0; 
+
+  double cfl_req    = 0.98;                // How close to Courant should we try to run
+  double damp       = 0;                   // How much radiation damping
+  double iv_thick   = 2;                   // Thickness of impermeable vacuum (in cells)
+
+  // Experimental parameters
+
+  double t_e               = 600;         // electron temperature, eV
+  double t_i               = 150;         // ion temperature, eV
+  double n_e_over_n_crit   = 0.05;         // n_e/n_crit
+  double vacuum_wavelength = 527 * 1e-7;   // third micron light (cm)
+  double laser_intensity   = 2.5e15 * 1e7;   // in ergs/cm^2 (note: 1 W = 1e7 ergs)
+
+  // Simulation parameters 
+
+#if 0
+  // 2048 processors
+  double Lx                = 4*35.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 2*6.0 * 1e-4;              
+  double Lz                = 2*6.0 * 1e-4;                 
+  double nx                = 8192;
+  double ny                = 512;
+  double nz                = 512; 
+  double topology_x        = 64;
+  double topology_y        = 4;
+  double topology_z        = 8;            
+  // single-processor mesh = 128 x 128 x 64
+#endif
+
+#if 0
+  // 14300x4 processors
+  double Lx                = 120.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 12.0 * 1e-4;              
+  double Lz                = 12.0 * 1e-4;                 
+  double nx                = 6292;
+  double ny                = 440;
+  double nz                = 440; 
+  double topology_x        = 143;
+  double topology_y        = 10;
+  double topology_z        = 10;            
+  // single-processor mesh = 44 x 44 x 44
+#endif
+
+#if 0
+  // 2*4096 processors
+  double Lx                = 4*35.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 2*6.0 * 1e-4;              
+  double Lz                = 2*6.0 * 1e-4;                 
+  double nx                = 8192;
+  double ny                = 512;
+  double nz                = 512; 
+  double topology_x        = 128;
+  double topology_y        = 8;
+  double topology_z        = 8;            
+  // single-processor mesh = 64 x 64 x 64
+#endif
+
+  // This choice of nx, ny, nz and topology values imply that the  
+  // grid is 102 x 42 x 42 cells.  The transverse size is slightly smaller than 
+  // in the original deck in order to enable more choices for down-sampling/striding 
+  // the field and hydro output.  (The original had 43 cells in each direction, which
+  // is prime and therefore not strideable with Ben's output format). 
+
+# if 0
+  // one processor problem, for debugging
+  Lx /= topology_x;  nx /= topology_x;
+  Ly /= topology_y;  ny /= topology_y;   
+  Lz /= topology_z;  nz /= topology_z; 
+
+  topology_x = 1; 
+  topology_y = 1;  
+  topology_z = 1; 
+# endif  
+
+# if 0
+  // twoprocessor problem, for debugging
+  Lx /= (topology_x/2);  nx /= (topology_x/2);
+  Ly /= topology_y;  ny /= topology_y;   
+  Lz /= topology_z;  nz /= topology_z; 
+
+  topology_x = 2; 
+  topology_y = 1;  
+  topology_z = 1; 
+# endif  
+
+// Run a smaller problem for debugging purposes
+#if 0
+  double Lx                = 120.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 1.20 * 1e-4;              
+  double Lz                = 1.20 * 1e-4;                 
+  double nx                = 6292;
+  double ny                = 44;
+  double nz                = 44; 
+  double topology_x        = 143;
+  double topology_y        = 1;
+  double topology_z        = 1;            
+
+  // nproc() x 1 x 1 : 
+  double topology          = nproc(); 
+  Lx         /= 143; Lx         *= topology; 
+  nx         /= 143; nx         *= topology;
+  topology_x /= 143; topology_x *= topology; 
+
+  // single-processor mesh = 44 x 44 x 44
+#endif
+
+  float box_size_x =  120.0* 1e-4;
+  float box_size_z =  12.0* 1e-4;
+  
+  int mobile_ions           = 1;      // Whether or not to push ions
+  double nppc               = 512;    // Ave. number of particles/cell in ea. species
+  //double nppc               = 32;    // Ave. number of particles/cell in ea. species
+  int load_particles        = 1;      // Flag to turn on/off particle load 
+  int He_present            = 1;  
+  int H_present             = 1;  
+  double f_He               = 1;    // Ratio of number density of He to total ion density
+  double f_H                = 1-f_He; // Ratio of number density of H  to total ion density
+  if ( f_He == 1 ) H_present = 0; 
+  if ( f_H == 1  ) He_present = 0; 
+
+// Here _He is N3+
+  // Precompute some useful variables. 
+  double A_H                = 1;
+  double Z_H                = 1;
+  double A_He               = 14;
+  double Z_He               = 3; 
+  double mic2_H             = mpc2*A_H;
+  double mic2_He            = mpc2*A_He;
+  double mime_H             = mic2_H /mec2; 
+  double mime_He            = mic2_He/mec2; 
+                            
+  double uthe               = sqrt(t_e/mec2);     // vthe/c
+  double uthi_H             = sqrt(t_i/mic2_H);   // vthi/c for H
+  double uthi_He            = sqrt(t_i/mic2_He);  // vthi/c for He
+
+  // Plasma skin deptth in cm
+  double delta = (vacuum_wavelength / (2*M_PI) ) / sqrt( n_e_over_n_crit ); 
+
+  double n_e   = c_vac*c_vac*m_e/(4*M_PI*ec*ec*delta*delta); // electron density in cm^-3
+  double debye = uthe*delta;                      // electron Debye length (cm)
+  double omega = sqrt( 1/n_e_over_n_crit );       // laser beam freq. in wpe
+
+  double nx                = 7776;
+  double ny                = 1;
+  double nz                = 549; 
+  
+// Weak Scaling Test --matsekh
+#if 1
+  // nx = 7776 / {9, 27, 81, 162, 324, 648} == {864, 288, 96, 48, 24, 12}  
+  nx = nx/162; 
+  box_size_x /= 162.;
+#endif
+// End Weak Scaling Test --matsekh
+
+#if 0
+  // DEBUG - run on 16 proc.  
+  nx = nx/81; // default nx=7776/81==96 --matsekh
+  box_size_x /= 81.;  
+
+#endif
+
+  double hx = box_size_x/(delta*nx);  // in c/wpe
+  double hz = box_size_z/(delta*nz);  // in c/wpe
+  double hy = hz; 
+
+  double cell_size_x       = hx*delta/debye;      // Cell size in Debye lengths
+  double cell_size_z       = hz*delta/debye;
+//double cell_size_y       = hy*delta/debye;
+
+  double Lx                = nx*hx;   // in c/wpe
+  double Ly                = ny*hy;   // in c/wpe
+  double Lz                = nz*hz;   // in c/wpe
+
+  double topology_x        = nproc();
+  double topology_y        = 1;
+  double topology_z        = 1;            
+
+  double f_number          = 6;                   // f/# of beam
+  double lambda            = vacuum_wavelength/delta; // vacuum wavelength in c/wpe
+  double waist             = f_number*lambda;     // width of beam at focus in c/wpe
+  double xfocus            = Lx/2;                // in c/wpe
+  double ycenter           = 0;                   // center of spot in y on lhs boundary
+  double zcenter           = 0;                   // center of spot in z on lhs boundary
+  double mask              = 1.5;                 // set drive I=0 outside r>mask*width at lhs boundary
+  double width = waist*sqrt( 1 + (lambda*xfocus/(M_PI*waist*waist))*(lambda*xfocus/(M_PI*waist*waist))); 
+
+  // Peak instantaneous E field in "natural units" 
+  double e0    = sqrt( 2*laser_intensity / (m_e*c_vac*c_vac*c_vac*n_e) );  
+//e0                       = e0*(waist/width);    // at entrance (3D Gaussian) 
+  e0 = e0*sqrt(waist/width); // 2D, at entrance
+
+  double dt                = cfl_req*courant_length(Lx,Ly,Lz,nx,ny,nz); // in 1/wpe; n.b. c=1 in nat. units
+  double nsteps_cycle      = trunc_granular(2*M_PI/(dt*omega),1)+1; 
+  dt                       = 2*M_PI/omega/nsteps_cycle; // nsteps_cycle time steps in one laser cycle
+
+  double wpe1ps = 1e-12*c_vac/delta;
+
+  double t_stop            = 100*wpe1ps;               // Runtime in 1/wpe
+  int particle_interval    = 0; 
+  int poynting_interval    = int(M_PI/((omega+1.5)*dt));                  // Num. steps between dumping poynting flux
+  int field_interval       = int(0.01*wpe1ps/dt);         // Num. steps between saving field, hydro data
+  int velocity_interval    = int(0.1*wpe1ps/dt);        // How frequently to dump binned velocity space data
+  int restart_interval     = 10000000;       // Num. steps between restart dumps
+  int quota_check_interval = 20;
+
+  // Ben:  This quota thing gracefully terminates after writing a final restart after 
+  // 11.5 hours; if you want a longer time before shutdown, set this value larger.  If 
+  // you want the code to just run all weekend, then set it to very long (2400.*3500, e.g.) 
+
+  double quota_sec         = 23.7*3600;           // Run quota in sec. 
+
+  // Turn on integrated backscatter poynting diagnostic - right now there is a bug in this, so we 
+  // only write the integrated backscatter time history on the left face. 
+
+  int write_poynting_data = 1;                    // Whether to write poynting data to file (or just stdout)
+
+  int write_backscatter_only = 0;                 // Nonzero means only write lower x face
+  int write_poynting_sum   = 0;                   // Whether to write integrated Poynting data 
+  int write_poynting_faces = 0;                   // Whether to write poynting data on sim boundary faces
+  int write_eb_faces       = 0;                   // Whether to write e and b field data on sim boundary faces
+
+  double N_e               = nppc*nx*ny*nz;       // Number of macro electrons in box
+  double Np_e              = Lx*Ly*Lz;            // "Number" of "physical" electrons in box (nat. units)
+  double q_e               = -Np_e/N_e;           // Charge per macro electron
+  double N_i               = N_e;                 // Number of macro ions of each species in box 
+  double Np_i              = Np_e/(Z_H*f_H+Z_He*f_He); // "Number" of "physical" ions of each sp. in box
+  double qi_H              = Z_H *f_H *Np_i/N_i;  // Charge per H  macro ion
+  double qi_He             = Z_He*f_He*Np_i/N_i;  // Charge per He macro ion 
+  
+  // Print simulation parameters
+
+  sim_log("***** Simulation parameters *****");
+  sim_log("* Processors:                     "<<nproc());
+  sim_log("* Topology:                       "<<topology_x<<" "<<topology_y<<" "<<topology_z); 
+  sim_log("* nsteps_cycle =                 "<<nsteps_cycle);
+  sim_log("* Time step, max time, nsteps:    "<<dt<<" "<<t_stop<<" "<<int(t_stop/(dt))); 
+  sim_log("* Debye length, XYZ cell sizes:   "<<debye<<" "<<cell_size_x<<" "<<cell_size_z);
+  sim_log("* Real cell sizes (in Debyes):    "<<hx/uthe<<" "<<hy/uthe<<" "<<hz/uthe);
+  sim_log("* Lx, Ly, Lz =                    "<<Lx<<" "<<Ly<<" "<<Lz);
+  sim_log("* nx, ny, nz =                    "<<nx<<" "<<ny<<" "<<nz);
+  sim_log("* Charge/macro electron =         "<<q_e);
+  sim_log("* Average particles/processor:    "<<N_e/nproc());
+  sim_log("* Average particles/cell:         "<<nppc);
+  sim_log("* Omega_0, Omega_pe:              "<<omega<<" "<<1);
+  sim_log("* Plasma density, ne/nc:          "<<n_e<<" "<<n_e_over_n_crit);
+  sim_log("* Vac wavelength (nm):            "<<vacuum_wavelength*1e7);
+  sim_log("* I_laser (W/cm^2):               "<<laser_intensity/1e7);
+  sim_log("* T_e, T_i (eV)                   "<<t_e<<" "<<t_i);
+  sim_log("* m_e, m_H, m_He                  "<<"1 "<<mime_H<<" "<<mime_He);
+  sim_log("* Radiation damping:              "<<damp);
+  sim_log("* Fraction of courant limit:      "<<cfl_req);
+  sim_log("* vthe/c:                         "<<uthe);
+  sim_log("* vthi_H /c:                      "<<uthi_H);
+  sim_log("* vthe_He/c:                      "<<uthi_He);
+  sim_log("* emax at entrance:               "<<e0);
+  sim_log("* emax at waist:                  "<<e0/(waist/width));
+  sim_log("* Poynting interval:              "<<poynting_interval); 
+  sim_log("* field interval:                 "<<field_interval); 
+  sim_log("* restart interval:               "<<restart_interval); 
+  sim_log("* num vacuum edge grids:          "<<iv_thick);
+  sim_log("* width, waist, xfocus:           "<<width<<" "<<waist<<" "<<xfocus); 
+  sim_log("* ycenter, zcenter, mask:         "<<ycenter<<" "<<zcenter<<" "<<mask); 
+  sim_log("* field_interval                  "<<field_interval); 
+  sim_log("* velocity_interval               "<<velocity_interval); 
+  sim_log("* restart_interval:               "<<restart_interval); 
+  sim_log("* quota_check_interval:           "<<quota_check_interval); 
+  sim_log("* write_poynting_sum:             "<<(write_poynting_sum ? "Yes" : "No")); 
+  sim_log("* write_poynting_faces:           "<<(write_poynting_faces? "Yes" : "No")); 
+  sim_log("* write_eb_faces:                 "<<(write_eb_faces ? "Yes" : "No")); 
+  sim_log("* write_backscatter_only:         "<<(write_backscatter_only ? "Yes" : "No")); 
+  sim_log("*********************************");
+
+  // Set up high level simulation parameters
+
+  sim_log("Setting up high-level simulation parameters.");
+  num_step             = int(t_stop/(dt));
+  num_step             = 20000;
+  sim_log("num_step: " << num_step); 
+
+  status_interval      = 200; 
+  sync_shared_interval = status_interval/1;
+  clean_div_e_interval = status_interval/1;
+  clean_div_b_interval = status_interval/10; 
+  
+  // Turn off some of the spam
+  verbose = 1; 
+
+  // For maxwellian reinjectoin, we need more than the default number of 
+  // passes (3) through the boundary handler
+  // Note:  We have to adjust sort intervals for maximum performance on Cell. 
+  // Note: On 1 PE fails after 2094 steps. Increasing num_comm_round to 10
+  // allows it to run > 25,000 steps.
+  num_comm_round = 6;
+
+  global->e0                   = e0; 
+  global->omega                = omega; 
+  global->field_interval       = field_interval; 
+  global->particle_interval    = particle_interval;
+  global->poynting_interval    = poynting_interval;
+  global->restart_interval     = restart_interval;
+  global->quota_check_interval = quota_check_interval;
+  global->quota_sec            = quota_sec;
+  global->rtoggle              = 0;
+  global->load_particles       = load_particles;
+  global->mobile_ions          = mobile_ions; 
+  global->H_present            = H_present; 
+  global->He_present           = He_present; 
+  global->topology_x           = topology_x;  
+  global->topology_y           = topology_y;  
+  global->topology_z           = topology_z;  
+  global->xfocus               = xfocus;  
+  global->ycenter              = ycenter; 
+  global->zcenter              = zcenter; 
+  global->mask                 = mask; 
+  global->waist                = waist; 
+  global->width                = width; 
+  global->lambda               = lambda; 
+
+  global->write_poynting_data  = write_poynting_data;
+
+  global->write_poynting_sum   = write_poynting_sum; 
+  global->write_poynting_faces = write_poynting_faces; 
+  global->write_eb_faces       = write_eb_faces;      
+  global->write_backscatter_only = write_backscatter_only; 
+
+  global->vthe                 = uthe; 
+  global->vthi_H               = uthi_H; 
+  global->vthi_He              = uthi_He; 
+  global->velocity_interval    = velocity_interval; 
+
+  // Set up the species
+  // Allow additional local particles in case of non-uniformity.
+
+  // Set up grid
+  sim_log("Setting up computational grid."); 
+  grid->dx       = hx; 
+  grid->dy       = hy; 
+  grid->dz       = hz; 
+  grid->dt       = dt; 
+  grid->cvac     = 1;
+  grid->eps0     = 1; 
+
+  sim_log("Setting up absorbing mesh."); 
+  define_periodic_grid( 0,          -0.5*Ly,    -0.5*Lz,        // Low corner
+                         Lx,         0.5*Ly,     0.5*Lz,        // High corner 
+                         nx,         ny,         nz,            // Resolution
+                         topology_x, topology_y, topology_z);    // Topology
+
+  if ( rank() == 0) {
+    set_domain_field_bc( BOUNDARY(-1,0,0), absorb_fields );
+  }
+  if ( rank() == nproc() -1) {
+    set_domain_field_bc( BOUNDARY( 1,0,0), absorb_fields );
+  }
+
+    set_domain_field_bc( BOUNDARY( 0,0, 1), absorb_fields );
+    set_domain_field_bc( BOUNDARY( 0,0,-1), absorb_fields );
+
+  sim_log("Setting up species."); 
+  double max_local_np = 2.0*N_e/nproc(); 
+  double max_local_nm = max_local_np/10.0;
+  species_t * electron = define_species("electron", -1, 1, max_local_np, max_local_nm, 20, 1); 
+
+  // Start with two ion species.  We have option to go to Xe and Kr gas fills if 
+  // we need a higher ion/electron macroparticle ratio.  
+
+  species_t *ion_H, *ion_He; 
+  if ( mobile_ions ) {
+    if ( H_present  ) ion_H  = define_species("H",  Z_H,  mime_H,  max_local_np, max_local_nm, 100, 1); 
+    if ( He_present ) ion_He = define_species("He", Z_He, mime_He, max_local_np, max_local_nm, 100, 1); 
+  }
+
+  // From grid/partition.c: used to determine which domains are on edge
+# define RANK_TO_INDEX(rank,ix,iy,iz) BEGIN_PRIMITIVE {                   \
+    int _ix, _iy, _iz;                                                    \
+    _ix  = (rank);                        /* ix = ix+gpx*( iy+gpy*iz ) */ \
+    _iy  = _ix/int(global->topology_x);   /* iy = iy+gpy*iz */            \
+    _ix -= _iy*int(global->topology_x);   /* ix = ix */                   \
+    _iz  = _iy/int(global->topology_y);   /* iz = iz */                   \
+    _iy -= _iz*int(global->topology_y);   /* iy = iy */                   \
+    (ix) = _ix;                                                           \
+    (iy) = _iy;                                                           \
+    (iz) = _iz;                                                           \
+  } END_PRIMITIVE 
+
+  sim_log("Overriding x boundaries to absorb fields."); 
+  int ix, iy, iz;        // Domain location in mesh
+  RANK_TO_INDEX( int(rank()), ix, iy, iz ); 
+
+  // Set up Maxwellian reinjection B.C. 
+  sim_log("Setting up Maxwellian reinjection boundary condition."); 
+
+  particle_bc_t * maxwellian_reinjection = 
+    define_particle_bc( maxwellian_reflux( species_list, entropy ) ); 
+  set_reflux_temp( maxwellian_reinjection, electron, uthe, uthe );
+  if ( mobile_ions ) { 
+    if ( H_present  ) set_reflux_temp( maxwellian_reinjection, ion_H,  uthi_H,  uthi_H  ); 
+    if ( He_present ) set_reflux_temp( maxwellian_reinjection, ion_He, uthi_He, uthi_He ); 
+  }
+
+  // Set up materials
+  sim_log("Setting up materials."); 
+  define_material( "vacuum", 1 );
+  define_field_array( NULL, damp ); 
+ 
+  // Paint the simulation volume with materials and boundary conditions
+# define iv_region (   x<      hx*iv_thick || x>Lx  -hx*iv_thick  \
+                    || z<-Lz/2+hz*iv_thick || z>Lz/2-hz*iv_thick ) /* all boundaries are i.v. */ 
+  set_region_bc( iv_region, maxwellian_reinjection, maxwellian_reinjection, maxwellian_reinjection ); 
+
+  // Load particles 
+  if ( load_particles ) {
+    sim_log("Loading particles.");
+    // Fast load of particles--don't bother fixing artificial domain correlations
+    double xmin=grid->x0, xmax=grid->x1;
+    double ymin=grid->y0, ymax=grid->y1;
+    double zmin=grid->z0, zmax=grid->z1;
+    repeat( N_e/(topology_x*topology_y*topology_z) ) {
+      double x = uniform( rng(0), xmin, xmax );
+      double y = uniform( rng(0), ymin, ymax );
+      double z = uniform( rng(0), zmin, zmax );
+      if ( iv_region ) continue;           // Particle fell in iv_region.  Don't load.
+      // third to last arg is "weight," a positive number
+      inject_particle( electron, x, y, z,
+                       normal( rng(0), 0, uthe),
+                       normal( rng(0), 0, uthe),
+                       normal( rng(0), 0, uthe), -q_e, 0, 0 );
+      if ( mobile_ions ) {
+        if ( H_present )  // Inject an H macroion on top of macroelectron
+          inject_particle( ion_H, x, y, z, 
+                           normal( rng(0), 0, uthi_H), 
+                           normal( rng(0), 0, uthi_H), 
+                           normal( rng(0), 0, uthi_H), qi_H, 0, 0 ); 
+        if ( He_present ) // Inject an H macroion on top of macroelectron
+          inject_particle( ion_He, x, y, z, 
+                           normal( rng(0), 0, uthi_He), 
+                           normal( rng(0), 0, uthi_He), 
+                           normal( rng(0), 0, uthi_He), qi_He, 0, 0 ); 
+      }
+    }
+  }
+
+
+ /*--------------------------------------------------------------------------
+  * New dump definition
+  *------------------------------------------------------------------------*/
+
+ /*--------------------------------------------------------------------------
+  * Set data output format
+  * 
+  * This option allows the user to specify the data format for an output
+  * dump.  Legal settings are 'band' and 'band_interleave'.  Band-interleave
+  * format is the native storage format for data in VPIC.  For field data,
+  * this looks something like:
+  * 
+  *   ex0 ey0 ez0 div_e_err0 cbx0 ... ex1 ey1 ez1 div_e_err1 cbx1 ...
+  *   
+  * Banded data format stores all data of a particular state variable as a
+  * contiguous array, and is easier for ParaView to process efficiently. 
+  * Banded data looks like:
+  * 
+  *   ex0 ex1 ex2 ... exN ey0 ey1 ey2 ...
+  *   
+  *------------------------------------------------------------------------*/
+  sim_log( "Setting up hydro and field diagnostics." );
+
+  global->fdParams.format = band;
+  sim_log ( "Field output format          : band" );
+
+  global->hedParams.format = band;
+  sim_log ( "Electron hydro output format : band" );
+
+  global->hHdParams.format = band;
+  sim_log ( "Hydrogen hydro output format : band" );
+
+  global->hHedParams.format = band;
+  sim_log ( "Helium hydro output format   : band" );
+
+ /*--------------------------------------------------------------------------
+  * Set stride
+  * 
+  * This option allows data down-sampling at output.  Data are down-sampled
+  * in each dimension by the stride specified for that dimension.  For
+  * example, to down-sample the x-dimension of the field data by a factor
+  * of 2, i.e., half as many data will be output, select:
+  * 
+  *   global->fdParams.stride_x = 2;
+  *
+  * The following 2-D example shows down-sampling of a 7x7 grid (nx = 7,
+  * ny = 7.  With ghost-cell padding the actual extents of the grid are 9x9.
+  * Setting the strides in x and y to equal 2 results in an output grid of
+  * nx = 4, ny = 4, with actual extents 6x6.
+  *
+  * G G G G G G G G G
+  * G X X X X X X X G
+  * G X X X X X X X G         G G G G G G
+  * G X X X X X X X G         G X X X X G
+  * G X X X X X X X G   ==>   G X X X X G
+  * G X X X X X X X G         G X X X X G
+  * G X X X X X X X G         G X X X X G
+  * G X X X X X X X G         G G G G G G
+  * G G G G G G G G G
+  *
+  * Note that grid extents in each dimension must be evenly divisible by
+  * the stride for that dimension:
+  *
+  *   nx = 150;
+  *   global->fdParams.stride_x = 10; // legal -> 150/10 = 15
+  *
+  *   global->fdParams.stride_x = 8; // illegal!!! -> 150/8 = 18.75
+  *------------------------------------------------------------------------*/
+
+  // Strides for field and hydro arrays.  Note that here we have defined them 
+  // the same for fields and all hydro species; if desired, we could use different
+  // strides for each.   Also note that strides must divide evenly into the number 
+  // of cells in a given domain. 
+
+  // Define strides and test that they evenly divide into grid->nx, ny, nz
+  int stride_x = 1, stride_y = 1, stride_z = 1; 
+  if ( int(grid->nx)%stride_x ) ERROR(("Stride doesn't evenly divide grid->nx.")); 
+  if ( int(grid->ny)%stride_y ) ERROR(("Stride doesn't evenly divide grid->ny.")); 
+  if ( int(grid->nz)%stride_z ) ERROR(("Stride doesn't evenly divide grid->nz.")); 
+
+  //----------------------------------------------------------------------
+  // Fields
+
+  // relative path to fields data from global header
+  sprintf(global->fdParams.baseDir, "field");
+
+  // base file name for fields output
+  sprintf(global->fdParams.baseFileName, "fields");
+
+  // set field strides
+  global->fdParams.stride_x = stride_x;
+  global->fdParams.stride_y = stride_y;
+  global->fdParams.stride_z = stride_z;
+  sim_log ( "Fields x-stride " << global->fdParams.stride_x );
+  sim_log ( "Fields y-stride " << global->fdParams.stride_y );
+  sim_log ( "Fields z-stride " << global->fdParams.stride_z );
+
+  // add field parameters to list
+  global->outputParams.push_back(&global->fdParams);
+
+  //----------------------------------------------------------------------
+  // Electron hydro
+
+  // relative path to electron species data from global header
+  sprintf(global->hedParams.baseDir, "ehydro");
+
+  // base file name for fields output
+  sprintf(global->hedParams.baseFileName, "e_hydro");
+
+  // set electron hydro strides
+  global->hedParams.stride_x = stride_x;
+  global->hedParams.stride_y = stride_y;
+  global->hedParams.stride_z = stride_z;
+  sim_log ( "Electron species x-stride " << global->hedParams.stride_x );
+  sim_log ( "Electron species y-stride " << global->hedParams.stride_y );
+  sim_log ( "Electron species z-stride " << global->hedParams.stride_z );
+
+  // add electron hydro parameters to list
+  global->outputParams.push_back(&global->hedParams);
+
+  //----------------------------------------------------------------------
+  // Hydrogen hydro
+
+  // relative path to electron species data from global header
+  sprintf(global->hHdParams.baseDir, "Hhydro");
+
+  // base file name for fields output
+  sprintf(global->hHdParams.baseFileName, "H_hydro");
+
+  // set hydrogen hydro strides
+  global->hHdParams.stride_x = stride_x;
+  global->hHdParams.stride_y = stride_y;
+  global->hHdParams.stride_z = stride_z;
+  sim_log ( "Ion species x-stride " << global->hHdParams.stride_x );
+  sim_log ( "Ion species y-stride " << global->hHdParams.stride_y );
+  sim_log ( "Ion species z-stride " << global->hHdParams.stride_z );
+
+  // add hydrogen hydro parameters to list
+  global->outputParams.push_back(&global->hHdParams);
+
+  //----------------------------------------------------------------------
+  // Helium hydro
+
+  // relative path to electron species data from global header
+  sprintf(global->hHedParams.baseDir, "Hehydro");
+
+  // base file name for fields output
+  sprintf(global->hHedParams.baseFileName, "He_hydro");
+
+  // set helium hydro strides
+  global->hHedParams.stride_x = stride_x;
+  global->hHedParams.stride_y = stride_y;
+  global->hHedParams.stride_z = stride_z;
+  sim_log ( "Ion species x-stride " << global->hHedParams.stride_x );
+  sim_log ( "Ion species y-stride " << global->hHedParams.stride_y );
+  sim_log ( "Ion species z-stride " << global->hHedParams.stride_z );
+
+  // add helium hydro parameters to list
+  global->outputParams.push_back(&global->hHedParams);
+
+ /*-----------------------------------------------------------------------
+  * Set output fields
+  *
+  * It is now possible to select which state-variables are output on a
+  * per-dump basis.  Variables are selected by passing an or-list of
+  * state-variables by name.  For example, to only output the x-component
+  * of the electric field and the y-component of the magnetic field, the
+  * user would call output_variables like:
+  *
+  *   global->fdParams.output_variables( ex | cby );
+  *
+  * NOTE: OUTPUT VARIABLES ARE ONLY USED FOR THE BANDED FORMAT.  IF THE
+  * FORMAT IS BAND-INTERLEAVE, ALL VARIABLES ARE OUTPUT AND CALLS TO
+  * 'output_variables' WILL HAVE NO EFFECT.
+  *
+  * ALSO: DEFAULT OUTPUT IS NONE!  THIS IS DUE TO THE WAY THAT VPIC
+  * HANDLES GLOBAL VARIABLES IN THE INPUT DECK AND IS UNAVOIDABLE.
+  *
+  * For convenience, the output variable 'all' is defined:
+  *
+  *   global->fdParams.output_variables( all );
+  *------------------------------------------------------------------------*/
+ /* CUT AND PASTE AS A STARTING POINT
+  * REMEMBER TO ADD APPROPRIATE GLOBAL DUMPPARAMETERS VARIABLE
+
+   output_variables( all );
+
+   output_variables( electric | div_e_err | magnetic | div_b_err |
+                     tca      | rhob      | current  | rhof |
+                     emat     | nmat      | fmat     | cmat );
+
+   output_variables( current_density  | charge_density |
+                     momentum_density | ke_density     | stress_tensor );
+  */
+
+  //global->fdParams.output_variables( all );
+  global->fdParams.output_variables( electric | magnetic );
+
+  //global->hedParams.output_variables( all );
+  //global->hedParams.output_variables( current_density | momentum_density );
+  global->hedParams.output_variables(  current_density  | charge_density |
+                                       momentum_density | ke_density |
+                                       stress_tensor );
+  global->hHdParams.output_variables(  current_density  | charge_density |
+                                       momentum_density | ke_density |
+                                       stress_tensor );
+  global->hHedParams.output_variables( current_density  | charge_density |
+                                       momentum_density | ke_density |
+                                       stress_tensor );
+
+ /*--------------------------------------------------------------------------
+  * Convenience functions for simlog output
+  *------------------------------------------------------------------------*/
+  char varlist[256];
+
+  create_field_list(varlist, global->fdParams);
+  sim_log ( "Fields variable list: " << varlist );
+
+  create_hydro_list(varlist, global->hedParams);
+  sim_log ( "Electron species variable list: " << varlist );
+
+  create_hydro_list(varlist, global->hHdParams);
+  sim_log ( "Ion species variable list: " << varlist );
+
+ /*------------------------------------------------------------------------*/
+
+  sim_log("***Finished with user-specified initialization ***"); 
+
+  // Upon completion of the initialization, the following occurs:
+  // - The synchronization error (tang E, norm B) is computed between domains
+  //   and tang E / norm B are synchronized by averaging where discrepancies
+  //   are encountered.
+  // - The initial divergence error of the magnetic field is computed and
+  //   one pass of cleaning is done (for good measure)
+  // - The bound charge density necessary to give the simulation an initially
+  //   clean divergence e is computed.
+  // - The particle momentum is uncentered from u_0 to u_{-1/2}
+  // - The user diagnostics are called on the initial state
+  // - The physics loop is started
+  //
+  // The physics loop consists of:
+  // - Advance particles from x_0,u_{-1/2} to x_1,u_{1/2}
+  // - User particle injection at x_{1-age}, u_{1/2} (use inject_particles)
+  // - User current injection (adjust field(x,y,z).jfx, jfy, jfz)
+  // - Advance B from B_0 to B_{1/2}
+  // - Advance E from E_0 to E_1
+  // - User field injection to E_1 (adjust field(x,y,z).ex,ey,ez,cbx,cby,cbz)
+  // - Advance B from B_{1/2} to B_1
+  // - (periodically) Divergence clean electric field
+  // - (periodically) Divergence clean magnetic field
+  // - (periodically) Synchronize shared tang e and norm b
+  // - Increment the time step
+  // - Call user diagnostics
+  // - (periodically) Print a status message
+} 
+
+
+begin_diagnostics {
+
+  int mobile_ions=global->mobile_ions,
+      H_present=global->H_present,
+      He_present=global->He_present;
+
+  if ( step()%200==0 ) sim_log("Time step: "<<step());
+
+# define should_dump(x) \
+  (global->x##_interval>0 && remainder(step(),global->x##_interval)==0)
+
+  // Do a mkdir at time t=0 to ensure we have all the directories we need
+  if ( step()==0 ) {
+    dump_mkdir("rundata"); 
+    dump_mkdir("fft"); 
+    dump_mkdir("field"); 
+    dump_mkdir("ehydro"); 
+    dump_mkdir("Hhydro"); 
+    dump_mkdir("Hehydro"); 
+    dump_mkdir("restart"); 
+    dump_mkdir("particle"); 
+    dump_mkdir("poynting"); 
+    dump_mkdir("velocity"); 
+
+    // Turn off rundata for now
+    // dump_grid("rundata/grid");
+    // dump_materials("rundata/materials");
+    // dump_species("rundata/species");
+    global_header("global", global->outputParams);
+
+  }
+
+  // Field and hydro data
+
+  if ( should_dump(field) ) {
+    field_dump( global->fdParams ); 
+
+#if 1  
+    if ( global->load_particles ) {
+      hydro_dump( "electron", global->hedParams );
+      if ( global->mobile_ions ) {
+        if ( global->H_present  ) hydro_dump( "H",  global->hHdParams );
+        if ( global->He_present ) hydro_dump( "He", global->hHedParams );
+      }
+    }
+#endif
+
+  }
+
+  // Particle dump data
+#if 0  
+  if ( should_dump(particle) && global->load_particles ) {
+    dump_particles( "electron", "particle/eparticle" );
+    if ( global->mobile_ions ) {
+      if ( global->H_present  ) dump_particles( "H",  "particle/Hparticle" );
+      if ( global->He_present ) dump_particles( "He", "particle/Heparticle" );
+    }
+  } 
+#endif
+
+  
+#define ALLOCATE(A,LEN,TYPE)                                             \
+  if ( !((A)=(TYPE *)malloc((size_t)(LEN)*sizeof(TYPE))) ) ERROR(("Cannot allocate."));
+
+  //--------------------------------------------------------------------------- 
+
+  // Restart dump filenames are by default tagged with the current timestep.
+  // If you do not want to do this add "0" to the dump_restart arguments. One
+  // reason to not tag the dumps is if you want only one restart dump (the most
+  // recent one) to be stored at a time to conserve on file space. Note: A
+  // restart dump should be the _very_ _last_ dump made in a diagnostics
+  // routine. If not, the diagnostics performed after the restart dump but
+  // before the next timestep will be missed on a restart. Restart dumps are
+  // in a binary format. Each rank makes a restart dump.
+
+  // Restarting from restart files is accomplished by running the executable 
+  // with "restart restart" as additional command line arguments.  The executable
+  // must be identical to that used to generate the restart dumps or else 
+  // the behavior may be unpredictable. 
+
+  // Note:  restart is not currently working with custom boundary conditions
+  // (such as the reflux condition) and has not been tested with emission 
+  // turned on.  
+  
+  if ( step()>0 && should_dump(restart) ) {
+    static const char * restart_fbase[2] = { "restart/restart0", "restart/restart1" };
+
+    //BEGIN_TURNSTILE(NUM_TURNSTILES); 
+      checkpt( restart_fbase[global->rtoggle], step() );
+    //END_TURNSTILE;
+    global->rtoggle^=1;
+  }
+
+  // Shut down simulation when wall clock time exceeds global->quota_sec. 
+  // Note that the mp_elapsed() is guaranteed to return the same value for all
+  // processors (i.e., elapsed time on proc #0), and therefore the abort will 
+  // be synchronized across processors. 
+
+  if ( step()>0 && global->quota_check_interval && (step()%global->quota_check_interval)==0 ) {
+    if ( uptime() > global->quota_sec ) {
+
+      //BEGIN_TURNSTILE(NUM_TURNSTILES); 
+        checkpt( "restart/restart", step() );
+      //END_TURNSTILE;
+
+      sim_log( "Restart dump restart completed." );
+      sim_log( "Allowed runtime exceeded for this job.  Terminating." );
+      mp_barrier(); // Just to be safe
+      halt_mp();
+      exit(0);
+    }
+  }
+
+} 
+
+
+begin_field_injection { 
+  // Inject a light wave from lhs boundary with E aligned along y
+  // Use scalar diffraction theory for the Gaussian beam source.  (This is approximate). 
+
+  // For quiet startup (i.e., so that we don't propagate a delta-function noise
+  // pulse at time t=0) we multiply by a constant phase term exp(i phi) where: 
+  //   phi = k*global->xfocus+atan(h)    (3d) 
+
+  // Inject from the left a field of the form ey = e0 sin( omega t )
+
+# define DY    ( grid->y0 + (iy-0.5)*grid->dy - global->ycenter )
+# define DZ    ( grid->z0 + (iz-1  )*grid->dz - global->zcenter )
+# define R2    ( DY*DY + DZ*DZ )                                   
+# define R2Z    ( DZ*DZ )                                   
+# define PHASE ( -global->omega*t + h*R2Z/(global->width*global->width) )
+# define MASK  ( R2Z<=pow(global->mask*global->width,2) ? 1 : 0 )
+
+  if ( grid->x0==0 ) {               // Node is on left boundary
+    double alpha      = grid->cvac*grid->dt/grid->dx;
+    double emax_coeff = (4/(1+alpha))*global->omega*grid->dt*global->e0;
+    double prefactor  = emax_coeff*sqrt(2/M_PI); 
+    double t          = grid->dt*step(); 
+
+    // Compute Rayleigh length in c/wpe
+    double rl         = M_PI*global->waist*global->waist/global->lambda; 
+
+    double pulse_shape_factor = 1;
+    float pulse_length        = 70;  // units of 1/wpe
+    float sin_t_tau           = sin(0.5*t*M_PI/pulse_length);
+    pulse_shape_factor        = ( t<pulse_length ? sin_t_tau : 1 );
+    double h                  = global->xfocus/rl;   // Distance / Rayleigh length
+
+    // Loop over all Ey values on left edge of this node
+    for ( int iz=1; iz<=grid->nz+1; ++iz ) 
+      for ( int iy=1; iy<=grid->ny; ++iy )  
+        field(1,iy,iz).ey += prefactor 
+                             * cos(PHASE) 
+//                           * exp(-R2/(global->width*global->width))  // 3D
+                             * exp(-R2Z/(global->width*global->width)) 
+                             * MASK * pulse_shape_factor; 
+  }
+}
+
+
+begin_particle_injection {
+  // No particle injection for this simulation
+}
+
+
+begin_current_injection {
+  // No current injection for this simulation
+}
+
+begin_particle_collisions {
+  // No particle collisions for this simulation
+}
+
diff --git a/TestScripts/lpi-deck/lpi_2d_F6_test-nx864 b/TestScripts/lpi-deck/lpi_2d_F6_test-nx864
new file mode 100755
index 00000000..254fd57e
--- /dev/null
+++ b/TestScripts/lpi-deck/lpi_2d_F6_test-nx864
@@ -0,0 +1,987 @@
+// Stress Tensor(e_hydro)_4/abs(Charge Density(e_hydro)+ 1.0e-8) - (Momentum Density(e_hydro)_Y/abs(Charge Density(e_hydro)+ 1.0e-8)) * (Current Density(e_hydro)_Y/(Charge Density(e_hydro) + 1.0e-8))
+
+//========================================================================
+//
+// LPI 3D deck - Linearly polarized (in y) plane wave incident from left 
+//               boundary 
+//
+// Adapted from Albright's Lightning 3D LPI deck.  
+// B. Albright, X-1-PTA; 28 Jan. 2007
+// Lin Yin, X-1-PTA, 23 Feb 2009, for Cerrillos test 
+// 
+// Executable creates its own directory structure.  Remove the old with: 
+//
+// rm -rf rundata ehydro Hhydro Hehydro restart poynting velocity particle field
+//========================================================================
+
+// Employ turnstiles to partially serialize the high-volume file writes. 
+// In this case, the restart dumps.  Set NUM_TURNSTILES to be the desired
+// number of simultaneous writes. 
+#define NUM_TURNSTILES 128
+
+begin_globals {
+  double e0;                   // peak amplitude of oscillating electric field
+  double omega;                // angular freq. of the beam
+  int field_interval;          // how often to dump field and hydro
+  int particle_interval;       // how often to dump particle data
+  int poynting_interval;       // how often to compute poynting flux on boundary
+  int restart_interval;        // how often to write restart data
+  int quota_check_interval;    // how often to check if quote exceeded
+  double quota_sec;            // run quota in sec
+  int rtoggle;                 // enable save of last 2 restart files for safety
+  int load_particles;          // were particles loaded? 
+  double topology_x;           // domain topology needed to normalize Poynting diagnostic 
+  double topology_y;
+  double topology_z;
+  int mobile_ions;
+  int H_present; 
+  int He_present; 
+
+  // Parameters for 3d Gaussian wave launch
+  double lambda;               
+  double waist;                // how wide the focused beam is
+  double width;                
+  double zcenter;              // center of beam at boundary in z 
+  double ycenter;              // center of beam at boundary in y
+  double xfocus;               // how far from boundary to focus
+  double mask;                 // # gaussian widths from beam center where I is nonzero
+
+  // Ponyting diagnostic flags - which output to turn on
+
+  // write_backscatter_only flag:  when this flag is nonzero, it means to only compute 
+  // poynting data for the lower-x surface.  This flag affects both the summed poynting 
+  // data as well as the surface data. 
+
+  int write_poynting_data;     // Whether to write poynting data to file (or just stdout)
+
+  int write_backscatter_only;  // nonzero means we only write backscatter diagnostic for fields
+
+  // write_poynting_sum is nonzero if you wish to write a file containing the integrated
+  // poynting data on one or more surfaces.  If write_backscatter_only is nonzero, then 
+  // the output file will be a time series of double precision numbers containing the 
+  // integrated poynting flux at that point in time.  If write_backscatter_only is zero,
+  // then for each time step, six double precision numbers are written, containing the 
+  // poynting flux through the lower x, upper x, lower y, upper y, lower z, and upper z
+  // surfaces. 
+
+  int write_poynting_sum;      // nonzero if we wish to write integrated Poynting data
+
+  // write_poynting_faces is probably useless, but I put it here anyway in case it's not. 
+  // When this flag is nonzero, it will print out the poynting flux at each of a 2D array 
+  // of points on the surface.  When this flag is turned on and write_backscatter_only is 
+  // nonzero, then only the 2D array of points on the lower-x boundary surface are written
+  // for each time step.  When this flag is turned on and write_bacscatter_only is 
+  // zero, then it will write 2D array data for the lower x, upper x, lower y, upper y, 
+  // lower z, upper z surfaces for each time step. 
+
+  int write_poynting_faces;    // nonzero if we wish to write Poynting data on sim boundaries 
+
+  // write_eb_faces is nonzero when we wish to get the raw e and b data at the boundary
+  // (e.g., to be used with a filter to distinguish between SRS and SBS backscatter).  
+  // When this flag is on and write_backscatter_only is nonzero, then only the 2D array
+  // of points on the lower-x boundary surface are written for each time step.  When 
+  // this flag is turned on and write_backscatteR_only is zero, then it will write 2D
+  // array data for the lower x, upper x, lower y, upper y, lower z, upper z surfaces for
+  // each time step.  When turned on, four files are produced: e1, e2, cb1, cb2.  The 
+  // values of the quantities printed depend on the face one is considering:  for the 
+  // x faces, e1 = ey, e2 = ez, cb1 = cby, cb2 = cbz.  Similarly for y and z surfaces, 
+  // but where 1 and 2 stand for (z,x) and (x,y) coordinates, respectively.  
+
+  int write_eb_faces;          // nonzero if we wish to write E and B data on sim boundaries
+
+  // Needed for movie files
+  float vthe;                   // vthe/c  
+  float vthi_He;                // vthi_He/c
+  float vthi_H;                 // vthi_H/c
+  int   velocity_interval;      // how frequently to dump binned velocity space data
+
+  // Dump parameters for standard VPIC output formats
+  DumpParameters fdParams;
+  DumpParameters hedParams;
+  DumpParameters hHdParams;
+  DumpParameters hHedParams;
+  std::vector<DumpParameters *> outputParams;
+};
+
+begin_initialization {
+  
+  // System of units
+  double ec         = 4.8032e-10;          // stat coulomb
+  double c_vac      = 2.99792458e10;       // cm/sec
+  double m_e        = 9.1094e-28;          // g
+  double k_b        = 1.6022e-12;          // erg/eV
+  double mec2       = m_e*c_vac*c_vac/k_b; 
+  double mpc2       = mec2*1836.0; 
+
+  double cfl_req    = 0.98;                // How close to Courant should we try to run
+  double damp       = 0;                   // How much radiation damping
+  double iv_thick   = 2;                   // Thickness of impermeable vacuum (in cells)
+
+  // Experimental parameters
+
+  double t_e               = 600;         // electron temperature, eV
+  double t_i               = 150;         // ion temperature, eV
+  double n_e_over_n_crit   = 0.05;         // n_e/n_crit
+  double vacuum_wavelength = 527 * 1e-7;   // third micron light (cm)
+  double laser_intensity   = 2.5e15 * 1e7;   // in ergs/cm^2 (note: 1 W = 1e7 ergs)
+
+  // Simulation parameters 
+
+#if 0
+  // 2048 processors
+  double Lx                = 4*35.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 2*6.0 * 1e-4;              
+  double Lz                = 2*6.0 * 1e-4;                 
+  double nx                = 8192;
+  double ny                = 512;
+  double nz                = 512; 
+  double topology_x        = 64;
+  double topology_y        = 4;
+  double topology_z        = 8;            
+  // single-processor mesh = 128 x 128 x 64
+#endif
+
+#if 0
+  // 14300x4 processors
+  double Lx                = 120.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 12.0 * 1e-4;              
+  double Lz                = 12.0 * 1e-4;                 
+  double nx                = 6292;
+  double ny                = 440;
+  double nz                = 440; 
+  double topology_x        = 143;
+  double topology_y        = 10;
+  double topology_z        = 10;            
+  // single-processor mesh = 44 x 44 x 44
+#endif
+
+#if 0
+  // 2*4096 processors
+  double Lx                = 4*35.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 2*6.0 * 1e-4;              
+  double Lz                = 2*6.0 * 1e-4;                 
+  double nx                = 8192;
+  double ny                = 512;
+  double nz                = 512; 
+  double topology_x        = 128;
+  double topology_y        = 8;
+  double topology_z        = 8;            
+  // single-processor mesh = 64 x 64 x 64
+#endif
+
+  // This choice of nx, ny, nz and topology values imply that the  
+  // grid is 102 x 42 x 42 cells.  The transverse size is slightly smaller than 
+  // in the original deck in order to enable more choices for down-sampling/striding 
+  // the field and hydro output.  (The original had 43 cells in each direction, which
+  // is prime and therefore not strideable with Ben's output format). 
+
+# if 0
+  // one processor problem, for debugging
+  Lx /= topology_x;  nx /= topology_x;
+  Ly /= topology_y;  ny /= topology_y;   
+  Lz /= topology_z;  nz /= topology_z; 
+
+  topology_x = 1; 
+  topology_y = 1;  
+  topology_z = 1; 
+# endif  
+
+# if 0
+  // twoprocessor problem, for debugging
+  Lx /= (topology_x/2);  nx /= (topology_x/2);
+  Ly /= topology_y;  ny /= topology_y;   
+  Lz /= topology_z;  nz /= topology_z; 
+
+  topology_x = 2; 
+  topology_y = 1;  
+  topology_z = 1; 
+# endif  
+
+// Run a smaller problem for debugging purposes
+#if 0
+  double Lx                = 120.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 1.20 * 1e-4;              
+  double Lz                = 1.20 * 1e-4;                 
+  double nx                = 6292;
+  double ny                = 44;
+  double nz                = 44; 
+  double topology_x        = 143;
+  double topology_y        = 1;
+  double topology_z        = 1;            
+
+  // nproc() x 1 x 1 : 
+  double topology          = nproc(); 
+  Lx         /= 143; Lx         *= topology; 
+  nx         /= 143; nx         *= topology;
+  topology_x /= 143; topology_x *= topology; 
+
+  // single-processor mesh = 44 x 44 x 44
+#endif
+
+  float box_size_x =  120.0* 1e-4;
+  float box_size_z =  12.0* 1e-4;
+  
+  int mobile_ions           = 1;      // Whether or not to push ions
+  double nppc               = 512;    // Ave. number of particles/cell in ea. species
+  //double nppc               = 32;    // Ave. number of particles/cell in ea. species
+  int load_particles        = 1;      // Flag to turn on/off particle load 
+  int He_present            = 1;  
+  int H_present             = 1;  
+  double f_He               = 1;    // Ratio of number density of He to total ion density
+  double f_H                = 1-f_He; // Ratio of number density of H  to total ion density
+  if ( f_He == 1 ) H_present = 0; 
+  if ( f_H == 1  ) He_present = 0; 
+
+// Here _He is N3+
+  // Precompute some useful variables. 
+  double A_H                = 1;
+  double Z_H                = 1;
+  double A_He               = 14;
+  double Z_He               = 3; 
+  double mic2_H             = mpc2*A_H;
+  double mic2_He            = mpc2*A_He;
+  double mime_H             = mic2_H /mec2; 
+  double mime_He            = mic2_He/mec2; 
+                            
+  double uthe               = sqrt(t_e/mec2);     // vthe/c
+  double uthi_H             = sqrt(t_i/mic2_H);   // vthi/c for H
+  double uthi_He            = sqrt(t_i/mic2_He);  // vthi/c for He
+
+  // Plasma skin deptth in cm
+  double delta = (vacuum_wavelength / (2*M_PI) ) / sqrt( n_e_over_n_crit ); 
+
+  double n_e   = c_vac*c_vac*m_e/(4*M_PI*ec*ec*delta*delta); // electron density in cm^-3
+  double debye = uthe*delta;                      // electron Debye length (cm)
+  double omega = sqrt( 1/n_e_over_n_crit );       // laser beam freq. in wpe
+
+  double nx                = 7776;
+  double ny                = 1;
+  double nz                = 549; 
+  
+// Weak Scaling Test --matsekh
+#if 1
+  // nx = 7776 / {9, 27, 81, 162, 324, 648} == {864, 288, 96, 48, 24, 12}  
+  nx = nx/9; 
+  box_size_x /= 9.;
+#endif
+// End Weak Scaling Test --matsekh
+
+#if 0
+  // DEBUG - run on 16 proc.  
+  nx = nx/81; // default nx=7776/81==96 --matsekh
+  box_size_x /= 81.;  
+
+#endif
+
+  double hx = box_size_x/(delta*nx);  // in c/wpe
+  double hz = box_size_z/(delta*nz);  // in c/wpe
+  double hy = hz; 
+
+  double cell_size_x       = hx*delta/debye;      // Cell size in Debye lengths
+  double cell_size_z       = hz*delta/debye;
+//double cell_size_y       = hy*delta/debye;
+
+  double Lx                = nx*hx;   // in c/wpe
+  double Ly                = ny*hy;   // in c/wpe
+  double Lz                = nz*hz;   // in c/wpe
+
+  double topology_x        = nproc();
+  double topology_y        = 1;
+  double topology_z        = 1;            
+
+  double f_number          = 6;                   // f/# of beam
+  double lambda            = vacuum_wavelength/delta; // vacuum wavelength in c/wpe
+  double waist             = f_number*lambda;     // width of beam at focus in c/wpe
+  double xfocus            = Lx/2;                // in c/wpe
+  double ycenter           = 0;                   // center of spot in y on lhs boundary
+  double zcenter           = 0;                   // center of spot in z on lhs boundary
+  double mask              = 1.5;                 // set drive I=0 outside r>mask*width at lhs boundary
+  double width = waist*sqrt( 1 + (lambda*xfocus/(M_PI*waist*waist))*(lambda*xfocus/(M_PI*waist*waist))); 
+
+  // Peak instantaneous E field in "natural units" 
+  double e0    = sqrt( 2*laser_intensity / (m_e*c_vac*c_vac*c_vac*n_e) );  
+//e0                       = e0*(waist/width);    // at entrance (3D Gaussian) 
+  e0 = e0*sqrt(waist/width); // 2D, at entrance
+
+  double dt                = cfl_req*courant_length(Lx,Ly,Lz,nx,ny,nz); // in 1/wpe; n.b. c=1 in nat. units
+  double nsteps_cycle      = trunc_granular(2*M_PI/(dt*omega),1)+1; 
+  dt                       = 2*M_PI/omega/nsteps_cycle; // nsteps_cycle time steps in one laser cycle
+
+  double wpe1ps = 1e-12*c_vac/delta;
+
+  double t_stop            = 100*wpe1ps;               // Runtime in 1/wpe
+  int particle_interval    = 0; 
+  int poynting_interval    = int(M_PI/((omega+1.5)*dt));                  // Num. steps between dumping poynting flux
+  int field_interval       = int(0.01*wpe1ps/dt);         // Num. steps between saving field, hydro data
+  int velocity_interval    = int(0.1*wpe1ps/dt);        // How frequently to dump binned velocity space data
+  int restart_interval     = 10000000;       // Num. steps between restart dumps
+  int quota_check_interval = 20;
+
+  // Ben:  This quota thing gracefully terminates after writing a final restart after 
+  // 11.5 hours; if you want a longer time before shutdown, set this value larger.  If 
+  // you want the code to just run all weekend, then set it to very long (2400.*3500, e.g.) 
+
+  double quota_sec         = 23.7*3600;           // Run quota in sec. 
+
+  // Turn on integrated backscatter poynting diagnostic - right now there is a bug in this, so we 
+  // only write the integrated backscatter time history on the left face. 
+
+  int write_poynting_data = 1;                    // Whether to write poynting data to file (or just stdout)
+
+  int write_backscatter_only = 0;                 // Nonzero means only write lower x face
+  int write_poynting_sum   = 0;                   // Whether to write integrated Poynting data 
+  int write_poynting_faces = 0;                   // Whether to write poynting data on sim boundary faces
+  int write_eb_faces       = 0;                   // Whether to write e and b field data on sim boundary faces
+
+  double N_e               = nppc*nx*ny*nz;       // Number of macro electrons in box
+  double Np_e              = Lx*Ly*Lz;            // "Number" of "physical" electrons in box (nat. units)
+  double q_e               = -Np_e/N_e;           // Charge per macro electron
+  double N_i               = N_e;                 // Number of macro ions of each species in box 
+  double Np_i              = Np_e/(Z_H*f_H+Z_He*f_He); // "Number" of "physical" ions of each sp. in box
+  double qi_H              = Z_H *f_H *Np_i/N_i;  // Charge per H  macro ion
+  double qi_He             = Z_He*f_He*Np_i/N_i;  // Charge per He macro ion 
+  
+  // Print simulation parameters
+
+  sim_log("***** Simulation parameters *****");
+  sim_log("* Processors:                     "<<nproc());
+  sim_log("* Topology:                       "<<topology_x<<" "<<topology_y<<" "<<topology_z); 
+  sim_log("* nsteps_cycle =                 "<<nsteps_cycle);
+  sim_log("* Time step, max time, nsteps:    "<<dt<<" "<<t_stop<<" "<<int(t_stop/(dt))); 
+  sim_log("* Debye length, XYZ cell sizes:   "<<debye<<" "<<cell_size_x<<" "<<cell_size_z);
+  sim_log("* Real cell sizes (in Debyes):    "<<hx/uthe<<" "<<hy/uthe<<" "<<hz/uthe);
+  sim_log("* Lx, Ly, Lz =                    "<<Lx<<" "<<Ly<<" "<<Lz);
+  sim_log("* nx, ny, nz =                    "<<nx<<" "<<ny<<" "<<nz);
+  sim_log("* Charge/macro electron =         "<<q_e);
+  sim_log("* Average particles/processor:    "<<N_e/nproc());
+  sim_log("* Average particles/cell:         "<<nppc);
+  sim_log("* Omega_0, Omega_pe:              "<<omega<<" "<<1);
+  sim_log("* Plasma density, ne/nc:          "<<n_e<<" "<<n_e_over_n_crit);
+  sim_log("* Vac wavelength (nm):            "<<vacuum_wavelength*1e7);
+  sim_log("* I_laser (W/cm^2):               "<<laser_intensity/1e7);
+  sim_log("* T_e, T_i (eV)                   "<<t_e<<" "<<t_i);
+  sim_log("* m_e, m_H, m_He                  "<<"1 "<<mime_H<<" "<<mime_He);
+  sim_log("* Radiation damping:              "<<damp);
+  sim_log("* Fraction of courant limit:      "<<cfl_req);
+  sim_log("* vthe/c:                         "<<uthe);
+  sim_log("* vthi_H /c:                      "<<uthi_H);
+  sim_log("* vthe_He/c:                      "<<uthi_He);
+  sim_log("* emax at entrance:               "<<e0);
+  sim_log("* emax at waist:                  "<<e0/(waist/width));
+  sim_log("* Poynting interval:              "<<poynting_interval); 
+  sim_log("* field interval:                 "<<field_interval); 
+  sim_log("* restart interval:               "<<restart_interval); 
+  sim_log("* num vacuum edge grids:          "<<iv_thick);
+  sim_log("* width, waist, xfocus:           "<<width<<" "<<waist<<" "<<xfocus); 
+  sim_log("* ycenter, zcenter, mask:         "<<ycenter<<" "<<zcenter<<" "<<mask); 
+  sim_log("* field_interval                  "<<field_interval); 
+  sim_log("* velocity_interval               "<<velocity_interval); 
+  sim_log("* restart_interval:               "<<restart_interval); 
+  sim_log("* quota_check_interval:           "<<quota_check_interval); 
+  sim_log("* write_poynting_sum:             "<<(write_poynting_sum ? "Yes" : "No")); 
+  sim_log("* write_poynting_faces:           "<<(write_poynting_faces? "Yes" : "No")); 
+  sim_log("* write_eb_faces:                 "<<(write_eb_faces ? "Yes" : "No")); 
+  sim_log("* write_backscatter_only:         "<<(write_backscatter_only ? "Yes" : "No")); 
+  sim_log("*********************************");
+
+  // Set up high level simulation parameters
+
+  sim_log("Setting up high-level simulation parameters.");
+  num_step             = int(t_stop/(dt));
+  num_step             = 20000;
+  sim_log("num_step: " << num_step); 
+
+  status_interval      = 200; 
+  sync_shared_interval = status_interval/1;
+  clean_div_e_interval = status_interval/1;
+  clean_div_b_interval = status_interval/10; 
+  
+  // Turn off some of the spam
+  verbose = 1; 
+
+  // For maxwellian reinjectoin, we need more than the default number of 
+  // passes (3) through the boundary handler
+  // Note:  We have to adjust sort intervals for maximum performance on Cell. 
+  // Note: On 1 PE fails after 2094 steps. Increasing num_comm_round to 10
+  // allows it to run > 25,000 steps.
+  num_comm_round = 6;
+
+  global->e0                   = e0; 
+  global->omega                = omega; 
+  global->field_interval       = field_interval; 
+  global->particle_interval    = particle_interval;
+  global->poynting_interval    = poynting_interval;
+  global->restart_interval     = restart_interval;
+  global->quota_check_interval = quota_check_interval;
+  global->quota_sec            = quota_sec;
+  global->rtoggle              = 0;
+  global->load_particles       = load_particles;
+  global->mobile_ions          = mobile_ions; 
+  global->H_present            = H_present; 
+  global->He_present           = He_present; 
+  global->topology_x           = topology_x;  
+  global->topology_y           = topology_y;  
+  global->topology_z           = topology_z;  
+  global->xfocus               = xfocus;  
+  global->ycenter              = ycenter; 
+  global->zcenter              = zcenter; 
+  global->mask                 = mask; 
+  global->waist                = waist; 
+  global->width                = width; 
+  global->lambda               = lambda; 
+
+  global->write_poynting_data  = write_poynting_data;
+
+  global->write_poynting_sum   = write_poynting_sum; 
+  global->write_poynting_faces = write_poynting_faces; 
+  global->write_eb_faces       = write_eb_faces;      
+  global->write_backscatter_only = write_backscatter_only; 
+
+  global->vthe                 = uthe; 
+  global->vthi_H               = uthi_H; 
+  global->vthi_He              = uthi_He; 
+  global->velocity_interval    = velocity_interval; 
+
+  // Set up the species
+  // Allow additional local particles in case of non-uniformity.
+
+  // Set up grid
+  sim_log("Setting up computational grid."); 
+  grid->dx       = hx; 
+  grid->dy       = hy; 
+  grid->dz       = hz; 
+  grid->dt       = dt; 
+  grid->cvac     = 1;
+  grid->eps0     = 1; 
+
+  sim_log("Setting up absorbing mesh."); 
+  define_periodic_grid( 0,          -0.5*Ly,    -0.5*Lz,        // Low corner
+                         Lx,         0.5*Ly,     0.5*Lz,        // High corner 
+                         nx,         ny,         nz,            // Resolution
+                         topology_x, topology_y, topology_z);    // Topology
+
+  if ( rank() == 0) {
+    set_domain_field_bc( BOUNDARY(-1,0,0), absorb_fields );
+  }
+  if ( rank() == nproc() -1) {
+    set_domain_field_bc( BOUNDARY( 1,0,0), absorb_fields );
+  }
+
+    set_domain_field_bc( BOUNDARY( 0,0, 1), absorb_fields );
+    set_domain_field_bc( BOUNDARY( 0,0,-1), absorb_fields );
+
+  sim_log("Setting up species."); 
+  double max_local_np = 2.0*N_e/nproc(); 
+  double max_local_nm = max_local_np/10.0;
+  species_t * electron = define_species("electron", -1, 1, max_local_np, max_local_nm, 20, 1); 
+
+  // Start with two ion species.  We have option to go to Xe and Kr gas fills if 
+  // we need a higher ion/electron macroparticle ratio.  
+
+  species_t *ion_H, *ion_He; 
+  if ( mobile_ions ) {
+    if ( H_present  ) ion_H  = define_species("H",  Z_H,  mime_H,  max_local_np, max_local_nm, 100, 1); 
+    if ( He_present ) ion_He = define_species("He", Z_He, mime_He, max_local_np, max_local_nm, 100, 1); 
+  }
+
+  // From grid/partition.c: used to determine which domains are on edge
+# define RANK_TO_INDEX(rank,ix,iy,iz) BEGIN_PRIMITIVE {                   \
+    int _ix, _iy, _iz;                                                    \
+    _ix  = (rank);                        /* ix = ix+gpx*( iy+gpy*iz ) */ \
+    _iy  = _ix/int(global->topology_x);   /* iy = iy+gpy*iz */            \
+    _ix -= _iy*int(global->topology_x);   /* ix = ix */                   \
+    _iz  = _iy/int(global->topology_y);   /* iz = iz */                   \
+    _iy -= _iz*int(global->topology_y);   /* iy = iy */                   \
+    (ix) = _ix;                                                           \
+    (iy) = _iy;                                                           \
+    (iz) = _iz;                                                           \
+  } END_PRIMITIVE 
+
+  sim_log("Overriding x boundaries to absorb fields."); 
+  int ix, iy, iz;        // Domain location in mesh
+  RANK_TO_INDEX( int(rank()), ix, iy, iz ); 
+
+  // Set up Maxwellian reinjection B.C. 
+  sim_log("Setting up Maxwellian reinjection boundary condition."); 
+
+  particle_bc_t * maxwellian_reinjection = 
+    define_particle_bc( maxwellian_reflux( species_list, entropy ) ); 
+  set_reflux_temp( maxwellian_reinjection, electron, uthe, uthe );
+  if ( mobile_ions ) { 
+    if ( H_present  ) set_reflux_temp( maxwellian_reinjection, ion_H,  uthi_H,  uthi_H  ); 
+    if ( He_present ) set_reflux_temp( maxwellian_reinjection, ion_He, uthi_He, uthi_He ); 
+  }
+
+  // Set up materials
+  sim_log("Setting up materials."); 
+  define_material( "vacuum", 1 );
+  define_field_array( NULL, damp ); 
+ 
+  // Paint the simulation volume with materials and boundary conditions
+# define iv_region (   x<      hx*iv_thick || x>Lx  -hx*iv_thick  \
+                    || z<-Lz/2+hz*iv_thick || z>Lz/2-hz*iv_thick ) /* all boundaries are i.v. */ 
+  set_region_bc( iv_region, maxwellian_reinjection, maxwellian_reinjection, maxwellian_reinjection ); 
+
+  // Load particles 
+  if ( load_particles ) {
+    sim_log("Loading particles.");
+    // Fast load of particles--don't bother fixing artificial domain correlations
+    double xmin=grid->x0, xmax=grid->x1;
+    double ymin=grid->y0, ymax=grid->y1;
+    double zmin=grid->z0, zmax=grid->z1;
+    repeat( N_e/(topology_x*topology_y*topology_z) ) {
+      double x = uniform( rng(0), xmin, xmax );
+      double y = uniform( rng(0), ymin, ymax );
+      double z = uniform( rng(0), zmin, zmax );
+      if ( iv_region ) continue;           // Particle fell in iv_region.  Don't load.
+      // third to last arg is "weight," a positive number
+      inject_particle( electron, x, y, z,
+                       normal( rng(0), 0, uthe),
+                       normal( rng(0), 0, uthe),
+                       normal( rng(0), 0, uthe), -q_e, 0, 0 );
+      if ( mobile_ions ) {
+        if ( H_present )  // Inject an H macroion on top of macroelectron
+          inject_particle( ion_H, x, y, z, 
+                           normal( rng(0), 0, uthi_H), 
+                           normal( rng(0), 0, uthi_H), 
+                           normal( rng(0), 0, uthi_H), qi_H, 0, 0 ); 
+        if ( He_present ) // Inject an H macroion on top of macroelectron
+          inject_particle( ion_He, x, y, z, 
+                           normal( rng(0), 0, uthi_He), 
+                           normal( rng(0), 0, uthi_He), 
+                           normal( rng(0), 0, uthi_He), qi_He, 0, 0 ); 
+      }
+    }
+  }
+
+
+ /*--------------------------------------------------------------------------
+  * New dump definition
+  *------------------------------------------------------------------------*/
+
+ /*--------------------------------------------------------------------------
+  * Set data output format
+  * 
+  * This option allows the user to specify the data format for an output
+  * dump.  Legal settings are 'band' and 'band_interleave'.  Band-interleave
+  * format is the native storage format for data in VPIC.  For field data,
+  * this looks something like:
+  * 
+  *   ex0 ey0 ez0 div_e_err0 cbx0 ... ex1 ey1 ez1 div_e_err1 cbx1 ...
+  *   
+  * Banded data format stores all data of a particular state variable as a
+  * contiguous array, and is easier for ParaView to process efficiently. 
+  * Banded data looks like:
+  * 
+  *   ex0 ex1 ex2 ... exN ey0 ey1 ey2 ...
+  *   
+  *------------------------------------------------------------------------*/
+  sim_log( "Setting up hydro and field diagnostics." );
+
+  global->fdParams.format = band;
+  sim_log ( "Field output format          : band" );
+
+  global->hedParams.format = band;
+  sim_log ( "Electron hydro output format : band" );
+
+  global->hHdParams.format = band;
+  sim_log ( "Hydrogen hydro output format : band" );
+
+  global->hHedParams.format = band;
+  sim_log ( "Helium hydro output format   : band" );
+
+ /*--------------------------------------------------------------------------
+  * Set stride
+  * 
+  * This option allows data down-sampling at output.  Data are down-sampled
+  * in each dimension by the stride specified for that dimension.  For
+  * example, to down-sample the x-dimension of the field data by a factor
+  * of 2, i.e., half as many data will be output, select:
+  * 
+  *   global->fdParams.stride_x = 2;
+  *
+  * The following 2-D example shows down-sampling of a 7x7 grid (nx = 7,
+  * ny = 7.  With ghost-cell padding the actual extents of the grid are 9x9.
+  * Setting the strides in x and y to equal 2 results in an output grid of
+  * nx = 4, ny = 4, with actual extents 6x6.
+  *
+  * G G G G G G G G G
+  * G X X X X X X X G
+  * G X X X X X X X G         G G G G G G
+  * G X X X X X X X G         G X X X X G
+  * G X X X X X X X G   ==>   G X X X X G
+  * G X X X X X X X G         G X X X X G
+  * G X X X X X X X G         G X X X X G
+  * G X X X X X X X G         G G G G G G
+  * G G G G G G G G G
+  *
+  * Note that grid extents in each dimension must be evenly divisible by
+  * the stride for that dimension:
+  *
+  *   nx = 150;
+  *   global->fdParams.stride_x = 10; // legal -> 150/10 = 15
+  *
+  *   global->fdParams.stride_x = 8; // illegal!!! -> 150/8 = 18.75
+  *------------------------------------------------------------------------*/
+
+  // Strides for field and hydro arrays.  Note that here we have defined them 
+  // the same for fields and all hydro species; if desired, we could use different
+  // strides for each.   Also note that strides must divide evenly into the number 
+  // of cells in a given domain. 
+
+  // Define strides and test that they evenly divide into grid->nx, ny, nz
+  int stride_x = 1, stride_y = 1, stride_z = 1; 
+  if ( int(grid->nx)%stride_x ) ERROR(("Stride doesn't evenly divide grid->nx.")); 
+  if ( int(grid->ny)%stride_y ) ERROR(("Stride doesn't evenly divide grid->ny.")); 
+  if ( int(grid->nz)%stride_z ) ERROR(("Stride doesn't evenly divide grid->nz.")); 
+
+  //----------------------------------------------------------------------
+  // Fields
+
+  // relative path to fields data from global header
+  sprintf(global->fdParams.baseDir, "field");
+
+  // base file name for fields output
+  sprintf(global->fdParams.baseFileName, "fields");
+
+  // set field strides
+  global->fdParams.stride_x = stride_x;
+  global->fdParams.stride_y = stride_y;
+  global->fdParams.stride_z = stride_z;
+  sim_log ( "Fields x-stride " << global->fdParams.stride_x );
+  sim_log ( "Fields y-stride " << global->fdParams.stride_y );
+  sim_log ( "Fields z-stride " << global->fdParams.stride_z );
+
+  // add field parameters to list
+  global->outputParams.push_back(&global->fdParams);
+
+  //----------------------------------------------------------------------
+  // Electron hydro
+
+  // relative path to electron species data from global header
+  sprintf(global->hedParams.baseDir, "ehydro");
+
+  // base file name for fields output
+  sprintf(global->hedParams.baseFileName, "e_hydro");
+
+  // set electron hydro strides
+  global->hedParams.stride_x = stride_x;
+  global->hedParams.stride_y = stride_y;
+  global->hedParams.stride_z = stride_z;
+  sim_log ( "Electron species x-stride " << global->hedParams.stride_x );
+  sim_log ( "Electron species y-stride " << global->hedParams.stride_y );
+  sim_log ( "Electron species z-stride " << global->hedParams.stride_z );
+
+  // add electron hydro parameters to list
+  global->outputParams.push_back(&global->hedParams);
+
+  //----------------------------------------------------------------------
+  // Hydrogen hydro
+
+  // relative path to electron species data from global header
+  sprintf(global->hHdParams.baseDir, "Hhydro");
+
+  // base file name for fields output
+  sprintf(global->hHdParams.baseFileName, "H_hydro");
+
+  // set hydrogen hydro strides
+  global->hHdParams.stride_x = stride_x;
+  global->hHdParams.stride_y = stride_y;
+  global->hHdParams.stride_z = stride_z;
+  sim_log ( "Ion species x-stride " << global->hHdParams.stride_x );
+  sim_log ( "Ion species y-stride " << global->hHdParams.stride_y );
+  sim_log ( "Ion species z-stride " << global->hHdParams.stride_z );
+
+  // add hydrogen hydro parameters to list
+  global->outputParams.push_back(&global->hHdParams);
+
+  //----------------------------------------------------------------------
+  // Helium hydro
+
+  // relative path to electron species data from global header
+  sprintf(global->hHedParams.baseDir, "Hehydro");
+
+  // base file name for fields output
+  sprintf(global->hHedParams.baseFileName, "He_hydro");
+
+  // set helium hydro strides
+  global->hHedParams.stride_x = stride_x;
+  global->hHedParams.stride_y = stride_y;
+  global->hHedParams.stride_z = stride_z;
+  sim_log ( "Ion species x-stride " << global->hHedParams.stride_x );
+  sim_log ( "Ion species y-stride " << global->hHedParams.stride_y );
+  sim_log ( "Ion species z-stride " << global->hHedParams.stride_z );
+
+  // add helium hydro parameters to list
+  global->outputParams.push_back(&global->hHedParams);
+
+ /*-----------------------------------------------------------------------
+  * Set output fields
+  *
+  * It is now possible to select which state-variables are output on a
+  * per-dump basis.  Variables are selected by passing an or-list of
+  * state-variables by name.  For example, to only output the x-component
+  * of the electric field and the y-component of the magnetic field, the
+  * user would call output_variables like:
+  *
+  *   global->fdParams.output_variables( ex | cby );
+  *
+  * NOTE: OUTPUT VARIABLES ARE ONLY USED FOR THE BANDED FORMAT.  IF THE
+  * FORMAT IS BAND-INTERLEAVE, ALL VARIABLES ARE OUTPUT AND CALLS TO
+  * 'output_variables' WILL HAVE NO EFFECT.
+  *
+  * ALSO: DEFAULT OUTPUT IS NONE!  THIS IS DUE TO THE WAY THAT VPIC
+  * HANDLES GLOBAL VARIABLES IN THE INPUT DECK AND IS UNAVOIDABLE.
+  *
+  * For convenience, the output variable 'all' is defined:
+  *
+  *   global->fdParams.output_variables( all );
+  *------------------------------------------------------------------------*/
+ /* CUT AND PASTE AS A STARTING POINT
+  * REMEMBER TO ADD APPROPRIATE GLOBAL DUMPPARAMETERS VARIABLE
+
+   output_variables( all );
+
+   output_variables( electric | div_e_err | magnetic | div_b_err |
+                     tca      | rhob      | current  | rhof |
+                     emat     | nmat      | fmat     | cmat );
+
+   output_variables( current_density  | charge_density |
+                     momentum_density | ke_density     | stress_tensor );
+  */
+
+  //global->fdParams.output_variables( all );
+  global->fdParams.output_variables( electric | magnetic );
+
+  //global->hedParams.output_variables( all );
+  //global->hedParams.output_variables( current_density | momentum_density );
+  global->hedParams.output_variables(  current_density  | charge_density |
+                                       momentum_density | ke_density |
+                                       stress_tensor );
+  global->hHdParams.output_variables(  current_density  | charge_density |
+                                       momentum_density | ke_density |
+                                       stress_tensor );
+  global->hHedParams.output_variables( current_density  | charge_density |
+                                       momentum_density | ke_density |
+                                       stress_tensor );
+
+ /*--------------------------------------------------------------------------
+  * Convenience functions for simlog output
+  *------------------------------------------------------------------------*/
+  char varlist[256];
+
+  create_field_list(varlist, global->fdParams);
+  sim_log ( "Fields variable list: " << varlist );
+
+  create_hydro_list(varlist, global->hedParams);
+  sim_log ( "Electron species variable list: " << varlist );
+
+  create_hydro_list(varlist, global->hHdParams);
+  sim_log ( "Ion species variable list: " << varlist );
+
+ /*------------------------------------------------------------------------*/
+
+  sim_log("***Finished with user-specified initialization ***"); 
+
+  // Upon completion of the initialization, the following occurs:
+  // - The synchronization error (tang E, norm B) is computed between domains
+  //   and tang E / norm B are synchronized by averaging where discrepancies
+  //   are encountered.
+  // - The initial divergence error of the magnetic field is computed and
+  //   one pass of cleaning is done (for good measure)
+  // - The bound charge density necessary to give the simulation an initially
+  //   clean divergence e is computed.
+  // - The particle momentum is uncentered from u_0 to u_{-1/2}
+  // - The user diagnostics are called on the initial state
+  // - The physics loop is started
+  //
+  // The physics loop consists of:
+  // - Advance particles from x_0,u_{-1/2} to x_1,u_{1/2}
+  // - User particle injection at x_{1-age}, u_{1/2} (use inject_particles)
+  // - User current injection (adjust field(x,y,z).jfx, jfy, jfz)
+  // - Advance B from B_0 to B_{1/2}
+  // - Advance E from E_0 to E_1
+  // - User field injection to E_1 (adjust field(x,y,z).ex,ey,ez,cbx,cby,cbz)
+  // - Advance B from B_{1/2} to B_1
+  // - (periodically) Divergence clean electric field
+  // - (periodically) Divergence clean magnetic field
+  // - (periodically) Synchronize shared tang e and norm b
+  // - Increment the time step
+  // - Call user diagnostics
+  // - (periodically) Print a status message
+} 
+
+
+begin_diagnostics {
+
+  int mobile_ions=global->mobile_ions,
+      H_present=global->H_present,
+      He_present=global->He_present;
+
+  if ( step()%200==0 ) sim_log("Time step: "<<step());
+
+# define should_dump(x) \
+  (global->x##_interval>0 && remainder(step(),global->x##_interval)==0)
+
+  // Do a mkdir at time t=0 to ensure we have all the directories we need
+  if ( step()==0 ) {
+    dump_mkdir("rundata"); 
+    dump_mkdir("fft"); 
+    dump_mkdir("field"); 
+    dump_mkdir("ehydro"); 
+    dump_mkdir("Hhydro"); 
+    dump_mkdir("Hehydro"); 
+    dump_mkdir("restart"); 
+    dump_mkdir("particle"); 
+    dump_mkdir("poynting"); 
+    dump_mkdir("velocity"); 
+
+    // Turn off rundata for now
+    // dump_grid("rundata/grid");
+    // dump_materials("rundata/materials");
+    // dump_species("rundata/species");
+    global_header("global", global->outputParams);
+
+  }
+
+  // Field and hydro data
+
+  if ( should_dump(field) ) {
+    field_dump( global->fdParams ); 
+
+#if 1  
+    if ( global->load_particles ) {
+      hydro_dump( "electron", global->hedParams );
+      if ( global->mobile_ions ) {
+        if ( global->H_present  ) hydro_dump( "H",  global->hHdParams );
+        if ( global->He_present ) hydro_dump( "He", global->hHedParams );
+      }
+    }
+#endif
+
+  }
+
+  // Particle dump data
+#if 0  
+  if ( should_dump(particle) && global->load_particles ) {
+    dump_particles( "electron", "particle/eparticle" );
+    if ( global->mobile_ions ) {
+      if ( global->H_present  ) dump_particles( "H",  "particle/Hparticle" );
+      if ( global->He_present ) dump_particles( "He", "particle/Heparticle" );
+    }
+  } 
+#endif
+
+  
+#define ALLOCATE(A,LEN,TYPE)                                             \
+  if ( !((A)=(TYPE *)malloc((size_t)(LEN)*sizeof(TYPE))) ) ERROR(("Cannot allocate."));
+
+  //--------------------------------------------------------------------------- 
+
+  // Restart dump filenames are by default tagged with the current timestep.
+  // If you do not want to do this add "0" to the dump_restart arguments. One
+  // reason to not tag the dumps is if you want only one restart dump (the most
+  // recent one) to be stored at a time to conserve on file space. Note: A
+  // restart dump should be the _very_ _last_ dump made in a diagnostics
+  // routine. If not, the diagnostics performed after the restart dump but
+  // before the next timestep will be missed on a restart. Restart dumps are
+  // in a binary format. Each rank makes a restart dump.
+
+  // Restarting from restart files is accomplished by running the executable 
+  // with "restart restart" as additional command line arguments.  The executable
+  // must be identical to that used to generate the restart dumps or else 
+  // the behavior may be unpredictable. 
+
+  // Note:  restart is not currently working with custom boundary conditions
+  // (such as the reflux condition) and has not been tested with emission 
+  // turned on.  
+  
+  if ( step()>0 && should_dump(restart) ) {
+    static const char * restart_fbase[2] = { "restart/restart0", "restart/restart1" };
+
+    //BEGIN_TURNSTILE(NUM_TURNSTILES); 
+      checkpt( restart_fbase[global->rtoggle], step() );
+    //END_TURNSTILE;
+    global->rtoggle^=1;
+  }
+
+  // Shut down simulation when wall clock time exceeds global->quota_sec. 
+  // Note that the mp_elapsed() is guaranteed to return the same value for all
+  // processors (i.e., elapsed time on proc #0), and therefore the abort will 
+  // be synchronized across processors. 
+
+  if ( step()>0 && global->quota_check_interval && (step()%global->quota_check_interval)==0 ) {
+    if ( uptime() > global->quota_sec ) {
+
+      //BEGIN_TURNSTILE(NUM_TURNSTILES); 
+        checkpt( "restart/restart", step() );
+      //END_TURNSTILE;
+
+      sim_log( "Restart dump restart completed." );
+      sim_log( "Allowed runtime exceeded for this job.  Terminating." );
+      mp_barrier(); // Just to be safe
+      halt_mp();
+      exit(0);
+    }
+  }
+
+} 
+
+
+begin_field_injection { 
+  // Inject a light wave from lhs boundary with E aligned along y
+  // Use scalar diffraction theory for the Gaussian beam source.  (This is approximate). 
+
+  // For quiet startup (i.e., so that we don't propagate a delta-function noise
+  // pulse at time t=0) we multiply by a constant phase term exp(i phi) where: 
+  //   phi = k*global->xfocus+atan(h)    (3d) 
+
+  // Inject from the left a field of the form ey = e0 sin( omega t )
+
+# define DY    ( grid->y0 + (iy-0.5)*grid->dy - global->ycenter )
+# define DZ    ( grid->z0 + (iz-1  )*grid->dz - global->zcenter )
+# define R2    ( DY*DY + DZ*DZ )                                   
+# define R2Z    ( DZ*DZ )                                   
+# define PHASE ( -global->omega*t + h*R2Z/(global->width*global->width) )
+# define MASK  ( R2Z<=pow(global->mask*global->width,2) ? 1 : 0 )
+
+  if ( grid->x0==0 ) {               // Node is on left boundary
+    double alpha      = grid->cvac*grid->dt/grid->dx;
+    double emax_coeff = (4/(1+alpha))*global->omega*grid->dt*global->e0;
+    double prefactor  = emax_coeff*sqrt(2/M_PI); 
+    double t          = grid->dt*step(); 
+
+    // Compute Rayleigh length in c/wpe
+    double rl         = M_PI*global->waist*global->waist/global->lambda; 
+
+    double pulse_shape_factor = 1;
+    float pulse_length        = 70;  // units of 1/wpe
+    float sin_t_tau           = sin(0.5*t*M_PI/pulse_length);
+    pulse_shape_factor        = ( t<pulse_length ? sin_t_tau : 1 );
+    double h                  = global->xfocus/rl;   // Distance / Rayleigh length
+
+    // Loop over all Ey values on left edge of this node
+    for ( int iz=1; iz<=grid->nz+1; ++iz ) 
+      for ( int iy=1; iy<=grid->ny; ++iy )  
+        field(1,iy,iz).ey += prefactor 
+                             * cos(PHASE) 
+//                           * exp(-R2/(global->width*global->width))  // 3D
+                             * exp(-R2Z/(global->width*global->width)) 
+                             * MASK * pulse_shape_factor; 
+  }
+}
+
+
+begin_particle_injection {
+  // No particle injection for this simulation
+}
+
+
+begin_current_injection {
+  // No current injection for this simulation
+}
+
+begin_particle_collisions {
+  // No particle collisions for this simulation
+}
+
diff --git a/TestScripts/lpi-deck/lpi_2d_F6_test-nx96 b/TestScripts/lpi-deck/lpi_2d_F6_test-nx96
new file mode 100755
index 00000000..59a316c9
--- /dev/null
+++ b/TestScripts/lpi-deck/lpi_2d_F6_test-nx96
@@ -0,0 +1,987 @@
+// Stress Tensor(e_hydro)_4/abs(Charge Density(e_hydro)+ 1.0e-8) - (Momentum Density(e_hydro)_Y/abs(Charge Density(e_hydro)+ 1.0e-8)) * (Current Density(e_hydro)_Y/(Charge Density(e_hydro) + 1.0e-8))
+
+//========================================================================
+//
+// LPI 3D deck - Linearly polarized (in y) plane wave incident from left 
+//               boundary 
+//
+// Adapted from Albright's Lightning 3D LPI deck.  
+// B. Albright, X-1-PTA; 28 Jan. 2007
+// Lin Yin, X-1-PTA, 23 Feb 2009, for Cerrillos test 
+// 
+// Executable creates its own directory structure.  Remove the old with: 
+//
+// rm -rf rundata ehydro Hhydro Hehydro restart poynting velocity particle field
+//========================================================================
+
+// Employ turnstiles to partially serialize the high-volume file writes. 
+// In this case, the restart dumps.  Set NUM_TURNSTILES to be the desired
+// number of simultaneous writes. 
+#define NUM_TURNSTILES 128
+
+begin_globals {
+  double e0;                   // peak amplitude of oscillating electric field
+  double omega;                // angular freq. of the beam
+  int field_interval;          // how often to dump field and hydro
+  int particle_interval;       // how often to dump particle data
+  int poynting_interval;       // how often to compute poynting flux on boundary
+  int restart_interval;        // how often to write restart data
+  int quota_check_interval;    // how often to check if quote exceeded
+  double quota_sec;            // run quota in sec
+  int rtoggle;                 // enable save of last 2 restart files for safety
+  int load_particles;          // were particles loaded? 
+  double topology_x;           // domain topology needed to normalize Poynting diagnostic 
+  double topology_y;
+  double topology_z;
+  int mobile_ions;
+  int H_present; 
+  int He_present; 
+
+  // Parameters for 3d Gaussian wave launch
+  double lambda;               
+  double waist;                // how wide the focused beam is
+  double width;                
+  double zcenter;              // center of beam at boundary in z 
+  double ycenter;              // center of beam at boundary in y
+  double xfocus;               // how far from boundary to focus
+  double mask;                 // # gaussian widths from beam center where I is nonzero
+
+  // Ponyting diagnostic flags - which output to turn on
+
+  // write_backscatter_only flag:  when this flag is nonzero, it means to only compute 
+  // poynting data for the lower-x surface.  This flag affects both the summed poynting 
+  // data as well as the surface data. 
+
+  int write_poynting_data;     // Whether to write poynting data to file (or just stdout)
+
+  int write_backscatter_only;  // nonzero means we only write backscatter diagnostic for fields
+
+  // write_poynting_sum is nonzero if you wish to write a file containing the integrated
+  // poynting data on one or more surfaces.  If write_backscatter_only is nonzero, then 
+  // the output file will be a time series of double precision numbers containing the 
+  // integrated poynting flux at that point in time.  If write_backscatter_only is zero,
+  // then for each time step, six double precision numbers are written, containing the 
+  // poynting flux through the lower x, upper x, lower y, upper y, lower z, and upper z
+  // surfaces. 
+
+  int write_poynting_sum;      // nonzero if we wish to write integrated Poynting data
+
+  // write_poynting_faces is probably useless, but I put it here anyway in case it's not. 
+  // When this flag is nonzero, it will print out the poynting flux at each of a 2D array 
+  // of points on the surface.  When this flag is turned on and write_backscatter_only is 
+  // nonzero, then only the 2D array of points on the lower-x boundary surface are written
+  // for each time step.  When this flag is turned on and write_bacscatter_only is 
+  // zero, then it will write 2D array data for the lower x, upper x, lower y, upper y, 
+  // lower z, upper z surfaces for each time step. 
+
+  int write_poynting_faces;    // nonzero if we wish to write Poynting data on sim boundaries 
+
+  // write_eb_faces is nonzero when we wish to get the raw e and b data at the boundary
+  // (e.g., to be used with a filter to distinguish between SRS and SBS backscatter).  
+  // When this flag is on and write_backscatter_only is nonzero, then only the 2D array
+  // of points on the lower-x boundary surface are written for each time step.  When 
+  // this flag is turned on and write_backscatteR_only is zero, then it will write 2D
+  // array data for the lower x, upper x, lower y, upper y, lower z, upper z surfaces for
+  // each time step.  When turned on, four files are produced: e1, e2, cb1, cb2.  The 
+  // values of the quantities printed depend on the face one is considering:  for the 
+  // x faces, e1 = ey, e2 = ez, cb1 = cby, cb2 = cbz.  Similarly for y and z surfaces, 
+  // but where 1 and 2 stand for (z,x) and (x,y) coordinates, respectively.  
+
+  int write_eb_faces;          // nonzero if we wish to write E and B data on sim boundaries
+
+  // Needed for movie files
+  float vthe;                   // vthe/c  
+  float vthi_He;                // vthi_He/c
+  float vthi_H;                 // vthi_H/c
+  int   velocity_interval;      // how frequently to dump binned velocity space data
+
+  // Dump parameters for standard VPIC output formats
+  DumpParameters fdParams;
+  DumpParameters hedParams;
+  DumpParameters hHdParams;
+  DumpParameters hHedParams;
+  std::vector<DumpParameters *> outputParams;
+};
+
+begin_initialization {
+  
+  // System of units
+  double ec         = 4.8032e-10;          // stat coulomb
+  double c_vac      = 2.99792458e10;       // cm/sec
+  double m_e        = 9.1094e-28;          // g
+  double k_b        = 1.6022e-12;          // erg/eV
+  double mec2       = m_e*c_vac*c_vac/k_b; 
+  double mpc2       = mec2*1836.0; 
+
+  double cfl_req    = 0.98;                // How close to Courant should we try to run
+  double damp       = 0;                   // How much radiation damping
+  double iv_thick   = 2;                   // Thickness of impermeable vacuum (in cells)
+
+  // Experimental parameters
+
+  double t_e               = 600;         // electron temperature, eV
+  double t_i               = 150;         // ion temperature, eV
+  double n_e_over_n_crit   = 0.05;         // n_e/n_crit
+  double vacuum_wavelength = 527 * 1e-7;   // third micron light (cm)
+  double laser_intensity   = 2.5e15 * 1e7;   // in ergs/cm^2 (note: 1 W = 1e7 ergs)
+
+  // Simulation parameters 
+
+#if 0
+  // 2048 processors
+  double Lx                = 4*35.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 2*6.0 * 1e-4;              
+  double Lz                = 2*6.0 * 1e-4;                 
+  double nx                = 8192;
+  double ny                = 512;
+  double nz                = 512; 
+  double topology_x        = 64;
+  double topology_y        = 4;
+  double topology_z        = 8;            
+  // single-processor mesh = 128 x 128 x 64
+#endif
+
+#if 0
+  // 14300x4 processors
+  double Lx                = 120.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 12.0 * 1e-4;              
+  double Lz                = 12.0 * 1e-4;                 
+  double nx                = 6292;
+  double ny                = 440;
+  double nz                = 440; 
+  double topology_x        = 143;
+  double topology_y        = 10;
+  double topology_z        = 10;            
+  // single-processor mesh = 44 x 44 x 44
+#endif
+
+#if 0
+  // 2*4096 processors
+  double Lx                = 4*35.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 2*6.0 * 1e-4;              
+  double Lz                = 2*6.0 * 1e-4;                 
+  double nx                = 8192;
+  double ny                = 512;
+  double nz                = 512; 
+  double topology_x        = 128;
+  double topology_y        = 8;
+  double topology_z        = 8;            
+  // single-processor mesh = 64 x 64 x 64
+#endif
+
+  // This choice of nx, ny, nz and topology values imply that the  
+  // grid is 102 x 42 x 42 cells.  The transverse size is slightly smaller than 
+  // in the original deck in order to enable more choices for down-sampling/striding 
+  // the field and hydro output.  (The original had 43 cells in each direction, which
+  // is prime and therefore not strideable with Ben's output format). 
+
+# if 0
+  // one processor problem, for debugging
+  Lx /= topology_x;  nx /= topology_x;
+  Ly /= topology_y;  ny /= topology_y;   
+  Lz /= topology_z;  nz /= topology_z; 
+
+  topology_x = 1; 
+  topology_y = 1;  
+  topology_z = 1; 
+# endif  
+
+# if 0
+  // twoprocessor problem, for debugging
+  Lx /= (topology_x/2);  nx /= (topology_x/2);
+  Ly /= topology_y;  ny /= topology_y;   
+  Lz /= topology_z;  nz /= topology_z; 
+
+  topology_x = 2; 
+  topology_y = 1;  
+  topology_z = 1; 
+# endif  
+
+// Run a smaller problem for debugging purposes
+#if 0
+  double Lx                = 120.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 1.20 * 1e-4;              
+  double Lz                = 1.20 * 1e-4;                 
+  double nx                = 6292;
+  double ny                = 44;
+  double nz                = 44; 
+  double topology_x        = 143;
+  double topology_y        = 1;
+  double topology_z        = 1;            
+
+  // nproc() x 1 x 1 : 
+  double topology          = nproc(); 
+  Lx         /= 143; Lx         *= topology; 
+  nx         /= 143; nx         *= topology;
+  topology_x /= 143; topology_x *= topology; 
+
+  // single-processor mesh = 44 x 44 x 44
+#endif
+
+  float box_size_x =  120.0* 1e-4;
+  float box_size_z =  12.0* 1e-4;
+  
+  int mobile_ions           = 1;      // Whether or not to push ions
+  double nppc               = 512;    // Ave. number of particles/cell in ea. species
+  //double nppc               = 32;    // Ave. number of particles/cell in ea. species
+  int load_particles        = 1;      // Flag to turn on/off particle load 
+  int He_present            = 1;  
+  int H_present             = 1;  
+  double f_He               = 1;    // Ratio of number density of He to total ion density
+  double f_H                = 1-f_He; // Ratio of number density of H  to total ion density
+  if ( f_He == 1 ) H_present = 0; 
+  if ( f_H == 1  ) He_present = 0; 
+
+// Here _He is N3+
+  // Precompute some useful variables. 
+  double A_H                = 1;
+  double Z_H                = 1;
+  double A_He               = 14;
+  double Z_He               = 3; 
+  double mic2_H             = mpc2*A_H;
+  double mic2_He            = mpc2*A_He;
+  double mime_H             = mic2_H /mec2; 
+  double mime_He            = mic2_He/mec2; 
+                            
+  double uthe               = sqrt(t_e/mec2);     // vthe/c
+  double uthi_H             = sqrt(t_i/mic2_H);   // vthi/c for H
+  double uthi_He            = sqrt(t_i/mic2_He);  // vthi/c for He
+
+  // Plasma skin deptth in cm
+  double delta = (vacuum_wavelength / (2*M_PI) ) / sqrt( n_e_over_n_crit ); 
+
+  double n_e   = c_vac*c_vac*m_e/(4*M_PI*ec*ec*delta*delta); // electron density in cm^-3
+  double debye = uthe*delta;                      // electron Debye length (cm)
+  double omega = sqrt( 1/n_e_over_n_crit );       // laser beam freq. in wpe
+
+  double nx                = 7776;
+  double ny                = 1;
+  double nz                = 549; 
+  
+// Weak Scaling Test --matsekh
+#if 1
+  // nx = 7776 / {9, 27, 81, 162, 324, 648} == {864, 288, 96, 48, 24, 12}  
+  nx = nx/81; 
+  box_size_x /= 81.;
+#endif
+// End Weak Scaling Test --matsekh
+
+#if 0
+  // DEBUG - run on 16 proc.  
+  nx = nx/81; // default nx=7776/81==96 --matsekh
+  box_size_x /= 81.;  
+
+#endif
+
+  double hx = box_size_x/(delta*nx);  // in c/wpe
+  double hz = box_size_z/(delta*nz);  // in c/wpe
+  double hy = hz; 
+
+  double cell_size_x       = hx*delta/debye;      // Cell size in Debye lengths
+  double cell_size_z       = hz*delta/debye;
+//double cell_size_y       = hy*delta/debye;
+
+  double Lx                = nx*hx;   // in c/wpe
+  double Ly                = ny*hy;   // in c/wpe
+  double Lz                = nz*hz;   // in c/wpe
+
+  double topology_x        = nproc();
+  double topology_y        = 1;
+  double topology_z        = 1;            
+
+  double f_number          = 6;                   // f/# of beam
+  double lambda            = vacuum_wavelength/delta; // vacuum wavelength in c/wpe
+  double waist             = f_number*lambda;     // width of beam at focus in c/wpe
+  double xfocus            = Lx/2;                // in c/wpe
+  double ycenter           = 0;                   // center of spot in y on lhs boundary
+  double zcenter           = 0;                   // center of spot in z on lhs boundary
+  double mask              = 1.5;                 // set drive I=0 outside r>mask*width at lhs boundary
+  double width = waist*sqrt( 1 + (lambda*xfocus/(M_PI*waist*waist))*(lambda*xfocus/(M_PI*waist*waist))); 
+
+  // Peak instantaneous E field in "natural units" 
+  double e0    = sqrt( 2*laser_intensity / (m_e*c_vac*c_vac*c_vac*n_e) );  
+//e0                       = e0*(waist/width);    // at entrance (3D Gaussian) 
+  e0 = e0*sqrt(waist/width); // 2D, at entrance
+
+  double dt                = cfl_req*courant_length(Lx,Ly,Lz,nx,ny,nz); // in 1/wpe; n.b. c=1 in nat. units
+  double nsteps_cycle      = trunc_granular(2*M_PI/(dt*omega),1)+1; 
+  dt                       = 2*M_PI/omega/nsteps_cycle; // nsteps_cycle time steps in one laser cycle
+
+  double wpe1ps = 1e-12*c_vac/delta;
+
+  double t_stop            = 100*wpe1ps;               // Runtime in 1/wpe
+  int particle_interval    = 0; 
+  int poynting_interval    = int(M_PI/((omega+1.5)*dt));                  // Num. steps between dumping poynting flux
+  int field_interval       = int(0.01*wpe1ps/dt);         // Num. steps between saving field, hydro data
+  int velocity_interval    = int(0.1*wpe1ps/dt);        // How frequently to dump binned velocity space data
+  int restart_interval     = 10000000;       // Num. steps between restart dumps
+  int quota_check_interval = 20;
+
+  // Ben:  This quota thing gracefully terminates after writing a final restart after 
+  // 11.5 hours; if you want a longer time before shutdown, set this value larger.  If 
+  // you want the code to just run all weekend, then set it to very long (2400.*3500, e.g.) 
+
+  double quota_sec         = 23.7*3600;           // Run quota in sec. 
+
+  // Turn on integrated backscatter poynting diagnostic - right now there is a bug in this, so we 
+  // only write the integrated backscatter time history on the left face. 
+
+  int write_poynting_data = 1;                    // Whether to write poynting data to file (or just stdout)
+
+  int write_backscatter_only = 0;                 // Nonzero means only write lower x face
+  int write_poynting_sum   = 0;                   // Whether to write integrated Poynting data 
+  int write_poynting_faces = 0;                   // Whether to write poynting data on sim boundary faces
+  int write_eb_faces       = 0;                   // Whether to write e and b field data on sim boundary faces
+
+  double N_e               = nppc*nx*ny*nz;       // Number of macro electrons in box
+  double Np_e              = Lx*Ly*Lz;            // "Number" of "physical" electrons in box (nat. units)
+  double q_e               = -Np_e/N_e;           // Charge per macro electron
+  double N_i               = N_e;                 // Number of macro ions of each species in box 
+  double Np_i              = Np_e/(Z_H*f_H+Z_He*f_He); // "Number" of "physical" ions of each sp. in box
+  double qi_H              = Z_H *f_H *Np_i/N_i;  // Charge per H  macro ion
+  double qi_He             = Z_He*f_He*Np_i/N_i;  // Charge per He macro ion 
+  
+  // Print simulation parameters
+
+  sim_log("***** Simulation parameters *****");
+  sim_log("* Processors:                     "<<nproc());
+  sim_log("* Topology:                       "<<topology_x<<" "<<topology_y<<" "<<topology_z); 
+  sim_log("* nsteps_cycle =                 "<<nsteps_cycle);
+  sim_log("* Time step, max time, nsteps:    "<<dt<<" "<<t_stop<<" "<<int(t_stop/(dt))); 
+  sim_log("* Debye length, XYZ cell sizes:   "<<debye<<" "<<cell_size_x<<" "<<cell_size_z);
+  sim_log("* Real cell sizes (in Debyes):    "<<hx/uthe<<" "<<hy/uthe<<" "<<hz/uthe);
+  sim_log("* Lx, Ly, Lz =                    "<<Lx<<" "<<Ly<<" "<<Lz);
+  sim_log("* nx, ny, nz =                    "<<nx<<" "<<ny<<" "<<nz);
+  sim_log("* Charge/macro electron =         "<<q_e);
+  sim_log("* Average particles/processor:    "<<N_e/nproc());
+  sim_log("* Average particles/cell:         "<<nppc);
+  sim_log("* Omega_0, Omega_pe:              "<<omega<<" "<<1);
+  sim_log("* Plasma density, ne/nc:          "<<n_e<<" "<<n_e_over_n_crit);
+  sim_log("* Vac wavelength (nm):            "<<vacuum_wavelength*1e7);
+  sim_log("* I_laser (W/cm^2):               "<<laser_intensity/1e7);
+  sim_log("* T_e, T_i (eV)                   "<<t_e<<" "<<t_i);
+  sim_log("* m_e, m_H, m_He                  "<<"1 "<<mime_H<<" "<<mime_He);
+  sim_log("* Radiation damping:              "<<damp);
+  sim_log("* Fraction of courant limit:      "<<cfl_req);
+  sim_log("* vthe/c:                         "<<uthe);
+  sim_log("* vthi_H /c:                      "<<uthi_H);
+  sim_log("* vthe_He/c:                      "<<uthi_He);
+  sim_log("* emax at entrance:               "<<e0);
+  sim_log("* emax at waist:                  "<<e0/(waist/width));
+  sim_log("* Poynting interval:              "<<poynting_interval); 
+  sim_log("* field interval:                 "<<field_interval); 
+  sim_log("* restart interval:               "<<restart_interval); 
+  sim_log("* num vacuum edge grids:          "<<iv_thick);
+  sim_log("* width, waist, xfocus:           "<<width<<" "<<waist<<" "<<xfocus); 
+  sim_log("* ycenter, zcenter, mask:         "<<ycenter<<" "<<zcenter<<" "<<mask); 
+  sim_log("* field_interval                  "<<field_interval); 
+  sim_log("* velocity_interval               "<<velocity_interval); 
+  sim_log("* restart_interval:               "<<restart_interval); 
+  sim_log("* quota_check_interval:           "<<quota_check_interval); 
+  sim_log("* write_poynting_sum:             "<<(write_poynting_sum ? "Yes" : "No")); 
+  sim_log("* write_poynting_faces:           "<<(write_poynting_faces? "Yes" : "No")); 
+  sim_log("* write_eb_faces:                 "<<(write_eb_faces ? "Yes" : "No")); 
+  sim_log("* write_backscatter_only:         "<<(write_backscatter_only ? "Yes" : "No")); 
+  sim_log("*********************************");
+
+  // Set up high level simulation parameters
+
+  sim_log("Setting up high-level simulation parameters.");
+  num_step             = int(t_stop/(dt));
+  num_step             = 20000;
+  sim_log("num_step: " << num_step); 
+
+  status_interval      = 200; 
+  sync_shared_interval = status_interval/1;
+  clean_div_e_interval = status_interval/1;
+  clean_div_b_interval = status_interval/10; 
+  
+  // Turn off some of the spam
+  verbose = 1; 
+
+  // For maxwellian reinjectoin, we need more than the default number of 
+  // passes (3) through the boundary handler
+  // Note:  We have to adjust sort intervals for maximum performance on Cell. 
+  // Note: On 1 PE fails after 2094 steps. Increasing num_comm_round to 10
+  // allows it to run > 25,000 steps.
+  num_comm_round = 6;
+
+  global->e0                   = e0; 
+  global->omega                = omega; 
+  global->field_interval       = field_interval; 
+  global->particle_interval    = particle_interval;
+  global->poynting_interval    = poynting_interval;
+  global->restart_interval     = restart_interval;
+  global->quota_check_interval = quota_check_interval;
+  global->quota_sec            = quota_sec;
+  global->rtoggle              = 0;
+  global->load_particles       = load_particles;
+  global->mobile_ions          = mobile_ions; 
+  global->H_present            = H_present; 
+  global->He_present           = He_present; 
+  global->topology_x           = topology_x;  
+  global->topology_y           = topology_y;  
+  global->topology_z           = topology_z;  
+  global->xfocus               = xfocus;  
+  global->ycenter              = ycenter; 
+  global->zcenter              = zcenter; 
+  global->mask                 = mask; 
+  global->waist                = waist; 
+  global->width                = width; 
+  global->lambda               = lambda; 
+
+  global->write_poynting_data  = write_poynting_data;
+
+  global->write_poynting_sum   = write_poynting_sum; 
+  global->write_poynting_faces = write_poynting_faces; 
+  global->write_eb_faces       = write_eb_faces;      
+  global->write_backscatter_only = write_backscatter_only; 
+
+  global->vthe                 = uthe; 
+  global->vthi_H               = uthi_H; 
+  global->vthi_He              = uthi_He; 
+  global->velocity_interval    = velocity_interval; 
+
+  // Set up the species
+  // Allow additional local particles in case of non-uniformity.
+
+  // Set up grid
+  sim_log("Setting up computational grid."); 
+  grid->dx       = hx; 
+  grid->dy       = hy; 
+  grid->dz       = hz; 
+  grid->dt       = dt; 
+  grid->cvac     = 1;
+  grid->eps0     = 1; 
+
+  sim_log("Setting up absorbing mesh."); 
+  define_periodic_grid( 0,          -0.5*Ly,    -0.5*Lz,        // Low corner
+                         Lx,         0.5*Ly,     0.5*Lz,        // High corner 
+                         nx,         ny,         nz,            // Resolution
+                         topology_x, topology_y, topology_z);    // Topology
+
+  if ( rank() == 0) {
+    set_domain_field_bc( BOUNDARY(-1,0,0), absorb_fields );
+  }
+  if ( rank() == nproc() -1) {
+    set_domain_field_bc( BOUNDARY( 1,0,0), absorb_fields );
+  }
+
+    set_domain_field_bc( BOUNDARY( 0,0, 1), absorb_fields );
+    set_domain_field_bc( BOUNDARY( 0,0,-1), absorb_fields );
+
+  sim_log("Setting up species."); 
+  double max_local_np = 2.0*N_e/nproc(); 
+  double max_local_nm = max_local_np/10.0;
+  species_t * electron = define_species("electron", -1, 1, max_local_np, max_local_nm, 20, 1); 
+
+  // Start with two ion species.  We have option to go to Xe and Kr gas fills if 
+  // we need a higher ion/electron macroparticle ratio.  
+
+  species_t *ion_H, *ion_He; 
+  if ( mobile_ions ) {
+    if ( H_present  ) ion_H  = define_species("H",  Z_H,  mime_H,  max_local_np, max_local_nm, 100, 1); 
+    if ( He_present ) ion_He = define_species("He", Z_He, mime_He, max_local_np, max_local_nm, 100, 1); 
+  }
+
+  // From grid/partition.c: used to determine which domains are on edge
+# define RANK_TO_INDEX(rank,ix,iy,iz) BEGIN_PRIMITIVE {                   \
+    int _ix, _iy, _iz;                                                    \
+    _ix  = (rank);                        /* ix = ix+gpx*( iy+gpy*iz ) */ \
+    _iy  = _ix/int(global->topology_x);   /* iy = iy+gpy*iz */            \
+    _ix -= _iy*int(global->topology_x);   /* ix = ix */                   \
+    _iz  = _iy/int(global->topology_y);   /* iz = iz */                   \
+    _iy -= _iz*int(global->topology_y);   /* iy = iy */                   \
+    (ix) = _ix;                                                           \
+    (iy) = _iy;                                                           \
+    (iz) = _iz;                                                           \
+  } END_PRIMITIVE 
+
+  sim_log("Overriding x boundaries to absorb fields."); 
+  int ix, iy, iz;        // Domain location in mesh
+  RANK_TO_INDEX( int(rank()), ix, iy, iz ); 
+
+  // Set up Maxwellian reinjection B.C. 
+  sim_log("Setting up Maxwellian reinjection boundary condition."); 
+
+  particle_bc_t * maxwellian_reinjection = 
+    define_particle_bc( maxwellian_reflux( species_list, entropy ) ); 
+  set_reflux_temp( maxwellian_reinjection, electron, uthe, uthe );
+  if ( mobile_ions ) { 
+    if ( H_present  ) set_reflux_temp( maxwellian_reinjection, ion_H,  uthi_H,  uthi_H  ); 
+    if ( He_present ) set_reflux_temp( maxwellian_reinjection, ion_He, uthi_He, uthi_He ); 
+  }
+
+  // Set up materials
+  sim_log("Setting up materials."); 
+  define_material( "vacuum", 1 );
+  define_field_array( NULL, damp ); 
+ 
+  // Paint the simulation volume with materials and boundary conditions
+# define iv_region (   x<      hx*iv_thick || x>Lx  -hx*iv_thick  \
+                    || z<-Lz/2+hz*iv_thick || z>Lz/2-hz*iv_thick ) /* all boundaries are i.v. */ 
+  set_region_bc( iv_region, maxwellian_reinjection, maxwellian_reinjection, maxwellian_reinjection ); 
+
+  // Load particles 
+  if ( load_particles ) {
+    sim_log("Loading particles.");
+    // Fast load of particles--don't bother fixing artificial domain correlations
+    double xmin=grid->x0, xmax=grid->x1;
+    double ymin=grid->y0, ymax=grid->y1;
+    double zmin=grid->z0, zmax=grid->z1;
+    repeat( N_e/(topology_x*topology_y*topology_z) ) {
+      double x = uniform( rng(0), xmin, xmax );
+      double y = uniform( rng(0), ymin, ymax );
+      double z = uniform( rng(0), zmin, zmax );
+      if ( iv_region ) continue;           // Particle fell in iv_region.  Don't load.
+      // third to last arg is "weight," a positive number
+      inject_particle( electron, x, y, z,
+                       normal( rng(0), 0, uthe),
+                       normal( rng(0), 0, uthe),
+                       normal( rng(0), 0, uthe), -q_e, 0, 0 );
+      if ( mobile_ions ) {
+        if ( H_present )  // Inject an H macroion on top of macroelectron
+          inject_particle( ion_H, x, y, z, 
+                           normal( rng(0), 0, uthi_H), 
+                           normal( rng(0), 0, uthi_H), 
+                           normal( rng(0), 0, uthi_H), qi_H, 0, 0 ); 
+        if ( He_present ) // Inject an H macroion on top of macroelectron
+          inject_particle( ion_He, x, y, z, 
+                           normal( rng(0), 0, uthi_He), 
+                           normal( rng(0), 0, uthi_He), 
+                           normal( rng(0), 0, uthi_He), qi_He, 0, 0 ); 
+      }
+    }
+  }
+
+
+ /*--------------------------------------------------------------------------
+  * New dump definition
+  *------------------------------------------------------------------------*/
+
+ /*--------------------------------------------------------------------------
+  * Set data output format
+  * 
+  * This option allows the user to specify the data format for an output
+  * dump.  Legal settings are 'band' and 'band_interleave'.  Band-interleave
+  * format is the native storage format for data in VPIC.  For field data,
+  * this looks something like:
+  * 
+  *   ex0 ey0 ez0 div_e_err0 cbx0 ... ex1 ey1 ez1 div_e_err1 cbx1 ...
+  *   
+  * Banded data format stores all data of a particular state variable as a
+  * contiguous array, and is easier for ParaView to process efficiently. 
+  * Banded data looks like:
+  * 
+  *   ex0 ex1 ex2 ... exN ey0 ey1 ey2 ...
+  *   
+  *------------------------------------------------------------------------*/
+  sim_log( "Setting up hydro and field diagnostics." );
+
+  global->fdParams.format = band;
+  sim_log ( "Field output format          : band" );
+
+  global->hedParams.format = band;
+  sim_log ( "Electron hydro output format : band" );
+
+  global->hHdParams.format = band;
+  sim_log ( "Hydrogen hydro output format : band" );
+
+  global->hHedParams.format = band;
+  sim_log ( "Helium hydro output format   : band" );
+
+ /*--------------------------------------------------------------------------
+  * Set stride
+  * 
+  * This option allows data down-sampling at output.  Data are down-sampled
+  * in each dimension by the stride specified for that dimension.  For
+  * example, to down-sample the x-dimension of the field data by a factor
+  * of 2, i.e., half as many data will be output, select:
+  * 
+  *   global->fdParams.stride_x = 2;
+  *
+  * The following 2-D example shows down-sampling of a 7x7 grid (nx = 7,
+  * ny = 7.  With ghost-cell padding the actual extents of the grid are 9x9.
+  * Setting the strides in x and y to equal 2 results in an output grid of
+  * nx = 4, ny = 4, with actual extents 6x6.
+  *
+  * G G G G G G G G G
+  * G X X X X X X X G
+  * G X X X X X X X G         G G G G G G
+  * G X X X X X X X G         G X X X X G
+  * G X X X X X X X G   ==>   G X X X X G
+  * G X X X X X X X G         G X X X X G
+  * G X X X X X X X G         G X X X X G
+  * G X X X X X X X G         G G G G G G
+  * G G G G G G G G G
+  *
+  * Note that grid extents in each dimension must be evenly divisible by
+  * the stride for that dimension:
+  *
+  *   nx = 150;
+  *   global->fdParams.stride_x = 10; // legal -> 150/10 = 15
+  *
+  *   global->fdParams.stride_x = 8; // illegal!!! -> 150/8 = 18.75
+  *------------------------------------------------------------------------*/
+
+  // Strides for field and hydro arrays.  Note that here we have defined them 
+  // the same for fields and all hydro species; if desired, we could use different
+  // strides for each.   Also note that strides must divide evenly into the number 
+  // of cells in a given domain. 
+
+  // Define strides and test that they evenly divide into grid->nx, ny, nz
+  int stride_x = 1, stride_y = 1, stride_z = 1; 
+  if ( int(grid->nx)%stride_x ) ERROR(("Stride doesn't evenly divide grid->nx.")); 
+  if ( int(grid->ny)%stride_y ) ERROR(("Stride doesn't evenly divide grid->ny.")); 
+  if ( int(grid->nz)%stride_z ) ERROR(("Stride doesn't evenly divide grid->nz.")); 
+
+  //----------------------------------------------------------------------
+  // Fields
+
+  // relative path to fields data from global header
+  sprintf(global->fdParams.baseDir, "field");
+
+  // base file name for fields output
+  sprintf(global->fdParams.baseFileName, "fields");
+
+  // set field strides
+  global->fdParams.stride_x = stride_x;
+  global->fdParams.stride_y = stride_y;
+  global->fdParams.stride_z = stride_z;
+  sim_log ( "Fields x-stride " << global->fdParams.stride_x );
+  sim_log ( "Fields y-stride " << global->fdParams.stride_y );
+  sim_log ( "Fields z-stride " << global->fdParams.stride_z );
+
+  // add field parameters to list
+  global->outputParams.push_back(&global->fdParams);
+
+  //----------------------------------------------------------------------
+  // Electron hydro
+
+  // relative path to electron species data from global header
+  sprintf(global->hedParams.baseDir, "ehydro");
+
+  // base file name for fields output
+  sprintf(global->hedParams.baseFileName, "e_hydro");
+
+  // set electron hydro strides
+  global->hedParams.stride_x = stride_x;
+  global->hedParams.stride_y = stride_y;
+  global->hedParams.stride_z = stride_z;
+  sim_log ( "Electron species x-stride " << global->hedParams.stride_x );
+  sim_log ( "Electron species y-stride " << global->hedParams.stride_y );
+  sim_log ( "Electron species z-stride " << global->hedParams.stride_z );
+
+  // add electron hydro parameters to list
+  global->outputParams.push_back(&global->hedParams);
+
+  //----------------------------------------------------------------------
+  // Hydrogen hydro
+
+  // relative path to electron species data from global header
+  sprintf(global->hHdParams.baseDir, "Hhydro");
+
+  // base file name for fields output
+  sprintf(global->hHdParams.baseFileName, "H_hydro");
+
+  // set hydrogen hydro strides
+  global->hHdParams.stride_x = stride_x;
+  global->hHdParams.stride_y = stride_y;
+  global->hHdParams.stride_z = stride_z;
+  sim_log ( "Ion species x-stride " << global->hHdParams.stride_x );
+  sim_log ( "Ion species y-stride " << global->hHdParams.stride_y );
+  sim_log ( "Ion species z-stride " << global->hHdParams.stride_z );
+
+  // add hydrogen hydro parameters to list
+  global->outputParams.push_back(&global->hHdParams);
+
+  //----------------------------------------------------------------------
+  // Helium hydro
+
+  // relative path to electron species data from global header
+  sprintf(global->hHedParams.baseDir, "Hehydro");
+
+  // base file name for fields output
+  sprintf(global->hHedParams.baseFileName, "He_hydro");
+
+  // set helium hydro strides
+  global->hHedParams.stride_x = stride_x;
+  global->hHedParams.stride_y = stride_y;
+  global->hHedParams.stride_z = stride_z;
+  sim_log ( "Ion species x-stride " << global->hHedParams.stride_x );
+  sim_log ( "Ion species y-stride " << global->hHedParams.stride_y );
+  sim_log ( "Ion species z-stride " << global->hHedParams.stride_z );
+
+  // add helium hydro parameters to list
+  global->outputParams.push_back(&global->hHedParams);
+
+ /*-----------------------------------------------------------------------
+  * Set output fields
+  *
+  * It is now possible to select which state-variables are output on a
+  * per-dump basis.  Variables are selected by passing an or-list of
+  * state-variables by name.  For example, to only output the x-component
+  * of the electric field and the y-component of the magnetic field, the
+  * user would call output_variables like:
+  *
+  *   global->fdParams.output_variables( ex | cby );
+  *
+  * NOTE: OUTPUT VARIABLES ARE ONLY USED FOR THE BANDED FORMAT.  IF THE
+  * FORMAT IS BAND-INTERLEAVE, ALL VARIABLES ARE OUTPUT AND CALLS TO
+  * 'output_variables' WILL HAVE NO EFFECT.
+  *
+  * ALSO: DEFAULT OUTPUT IS NONE!  THIS IS DUE TO THE WAY THAT VPIC
+  * HANDLES GLOBAL VARIABLES IN THE INPUT DECK AND IS UNAVOIDABLE.
+  *
+  * For convenience, the output variable 'all' is defined:
+  *
+  *   global->fdParams.output_variables( all );
+  *------------------------------------------------------------------------*/
+ /* CUT AND PASTE AS A STARTING POINT
+  * REMEMBER TO ADD APPROPRIATE GLOBAL DUMPPARAMETERS VARIABLE
+
+   output_variables( all );
+
+   output_variables( electric | div_e_err | magnetic | div_b_err |
+                     tca      | rhob      | current  | rhof |
+                     emat     | nmat      | fmat     | cmat );
+
+   output_variables( current_density  | charge_density |
+                     momentum_density | ke_density     | stress_tensor );
+  */
+
+  //global->fdParams.output_variables( all );
+  global->fdParams.output_variables( electric | magnetic );
+
+  //global->hedParams.output_variables( all );
+  //global->hedParams.output_variables( current_density | momentum_density );
+  global->hedParams.output_variables(  current_density  | charge_density |
+                                       momentum_density | ke_density |
+                                       stress_tensor );
+  global->hHdParams.output_variables(  current_density  | charge_density |
+                                       momentum_density | ke_density |
+                                       stress_tensor );
+  global->hHedParams.output_variables( current_density  | charge_density |
+                                       momentum_density | ke_density |
+                                       stress_tensor );
+
+ /*--------------------------------------------------------------------------
+  * Convenience functions for simlog output
+  *------------------------------------------------------------------------*/
+  char varlist[256];
+
+  create_field_list(varlist, global->fdParams);
+  sim_log ( "Fields variable list: " << varlist );
+
+  create_hydro_list(varlist, global->hedParams);
+  sim_log ( "Electron species variable list: " << varlist );
+
+  create_hydro_list(varlist, global->hHdParams);
+  sim_log ( "Ion species variable list: " << varlist );
+
+ /*------------------------------------------------------------------------*/
+
+  sim_log("***Finished with user-specified initialization ***"); 
+
+  // Upon completion of the initialization, the following occurs:
+  // - The synchronization error (tang E, norm B) is computed between domains
+  //   and tang E / norm B are synchronized by averaging where discrepancies
+  //   are encountered.
+  // - The initial divergence error of the magnetic field is computed and
+  //   one pass of cleaning is done (for good measure)
+  // - The bound charge density necessary to give the simulation an initially
+  //   clean divergence e is computed.
+  // - The particle momentum is uncentered from u_0 to u_{-1/2}
+  // - The user diagnostics are called on the initial state
+  // - The physics loop is started
+  //
+  // The physics loop consists of:
+  // - Advance particles from x_0,u_{-1/2} to x_1,u_{1/2}
+  // - User particle injection at x_{1-age}, u_{1/2} (use inject_particles)
+  // - User current injection (adjust field(x,y,z).jfx, jfy, jfz)
+  // - Advance B from B_0 to B_{1/2}
+  // - Advance E from E_0 to E_1
+  // - User field injection to E_1 (adjust field(x,y,z).ex,ey,ez,cbx,cby,cbz)
+  // - Advance B from B_{1/2} to B_1
+  // - (periodically) Divergence clean electric field
+  // - (periodically) Divergence clean magnetic field
+  // - (periodically) Synchronize shared tang e and norm b
+  // - Increment the time step
+  // - Call user diagnostics
+  // - (periodically) Print a status message
+} 
+
+
+begin_diagnostics {
+
+  int mobile_ions=global->mobile_ions,
+      H_present=global->H_present,
+      He_present=global->He_present;
+
+  if ( step()%200==0 ) sim_log("Time step: "<<step());
+
+# define should_dump(x) \
+  (global->x##_interval>0 && remainder(step(),global->x##_interval)==0)
+
+  // Do a mkdir at time t=0 to ensure we have all the directories we need
+  if ( step()==0 ) {
+    dump_mkdir("rundata"); 
+    dump_mkdir("fft"); 
+    dump_mkdir("field"); 
+    dump_mkdir("ehydro"); 
+    dump_mkdir("Hhydro"); 
+    dump_mkdir("Hehydro"); 
+    dump_mkdir("restart"); 
+    dump_mkdir("particle"); 
+    dump_mkdir("poynting"); 
+    dump_mkdir("velocity"); 
+
+    // Turn off rundata for now
+    // dump_grid("rundata/grid");
+    // dump_materials("rundata/materials");
+    // dump_species("rundata/species");
+    global_header("global", global->outputParams);
+
+  }
+
+  // Field and hydro data
+
+  if ( should_dump(field) ) {
+    field_dump( global->fdParams ); 
+
+#if 1  
+    if ( global->load_particles ) {
+      hydro_dump( "electron", global->hedParams );
+      if ( global->mobile_ions ) {
+        if ( global->H_present  ) hydro_dump( "H",  global->hHdParams );
+        if ( global->He_present ) hydro_dump( "He", global->hHedParams );
+      }
+    }
+#endif
+
+  }
+
+  // Particle dump data
+#if 0  
+  if ( should_dump(particle) && global->load_particles ) {
+    dump_particles( "electron", "particle/eparticle" );
+    if ( global->mobile_ions ) {
+      if ( global->H_present  ) dump_particles( "H",  "particle/Hparticle" );
+      if ( global->He_present ) dump_particles( "He", "particle/Heparticle" );
+    }
+  } 
+#endif
+
+  
+#define ALLOCATE(A,LEN,TYPE)                                             \
+  if ( !((A)=(TYPE *)malloc((size_t)(LEN)*sizeof(TYPE))) ) ERROR(("Cannot allocate."));
+
+  //--------------------------------------------------------------------------- 
+
+  // Restart dump filenames are by default tagged with the current timestep.
+  // If you do not want to do this add "0" to the dump_restart arguments. One
+  // reason to not tag the dumps is if you want only one restart dump (the most
+  // recent one) to be stored at a time to conserve on file space. Note: A
+  // restart dump should be the _very_ _last_ dump made in a diagnostics
+  // routine. If not, the diagnostics performed after the restart dump but
+  // before the next timestep will be missed on a restart. Restart dumps are
+  // in a binary format. Each rank makes a restart dump.
+
+  // Restarting from restart files is accomplished by running the executable 
+  // with "restart restart" as additional command line arguments.  The executable
+  // must be identical to that used to generate the restart dumps or else 
+  // the behavior may be unpredictable. 
+
+  // Note:  restart is not currently working with custom boundary conditions
+  // (such as the reflux condition) and has not been tested with emission 
+  // turned on.  
+  
+  if ( step()>0 && should_dump(restart) ) {
+    static const char * restart_fbase[2] = { "restart/restart0", "restart/restart1" };
+
+    //BEGIN_TURNSTILE(NUM_TURNSTILES); 
+      checkpt( restart_fbase[global->rtoggle], step() );
+    //END_TURNSTILE;
+    global->rtoggle^=1;
+  }
+
+  // Shut down simulation when wall clock time exceeds global->quota_sec. 
+  // Note that the mp_elapsed() is guaranteed to return the same value for all
+  // processors (i.e., elapsed time on proc #0), and therefore the abort will 
+  // be synchronized across processors. 
+
+  if ( step()>0 && global->quota_check_interval && (step()%global->quota_check_interval)==0 ) {
+    if ( uptime() > global->quota_sec ) {
+
+      //BEGIN_TURNSTILE(NUM_TURNSTILES); 
+        checkpt( "restart/restart", step() );
+      //END_TURNSTILE;
+
+      sim_log( "Restart dump restart completed." );
+      sim_log( "Allowed runtime exceeded for this job.  Terminating." );
+      mp_barrier(); // Just to be safe
+      halt_mp();
+      exit(0);
+    }
+  }
+
+} 
+
+
+begin_field_injection { 
+  // Inject a light wave from lhs boundary with E aligned along y
+  // Use scalar diffraction theory for the Gaussian beam source.  (This is approximate). 
+
+  // For quiet startup (i.e., so that we don't propagate a delta-function noise
+  // pulse at time t=0) we multiply by a constant phase term exp(i phi) where: 
+  //   phi = k*global->xfocus+atan(h)    (3d) 
+
+  // Inject from the left a field of the form ey = e0 sin( omega t )
+
+# define DY    ( grid->y0 + (iy-0.5)*grid->dy - global->ycenter )
+# define DZ    ( grid->z0 + (iz-1  )*grid->dz - global->zcenter )
+# define R2    ( DY*DY + DZ*DZ )                                   
+# define R2Z    ( DZ*DZ )                                   
+# define PHASE ( -global->omega*t + h*R2Z/(global->width*global->width) )
+# define MASK  ( R2Z<=pow(global->mask*global->width,2) ? 1 : 0 )
+
+  if ( grid->x0==0 ) {               // Node is on left boundary
+    double alpha      = grid->cvac*grid->dt/grid->dx;
+    double emax_coeff = (4/(1+alpha))*global->omega*grid->dt*global->e0;
+    double prefactor  = emax_coeff*sqrt(2/M_PI); 
+    double t          = grid->dt*step(); 
+
+    // Compute Rayleigh length in c/wpe
+    double rl         = M_PI*global->waist*global->waist/global->lambda; 
+
+    double pulse_shape_factor = 1;
+    float pulse_length        = 70;  // units of 1/wpe
+    float sin_t_tau           = sin(0.5*t*M_PI/pulse_length);
+    pulse_shape_factor        = ( t<pulse_length ? sin_t_tau : 1 );
+    double h                  = global->xfocus/rl;   // Distance / Rayleigh length
+
+    // Loop over all Ey values on left edge of this node
+    for ( int iz=1; iz<=grid->nz+1; ++iz ) 
+      for ( int iy=1; iy<=grid->ny; ++iy )  
+        field(1,iy,iz).ey += prefactor 
+                             * cos(PHASE) 
+//                           * exp(-R2/(global->width*global->width))  // 3D
+                             * exp(-R2Z/(global->width*global->width)) 
+                             * MASK * pulse_shape_factor; 
+  }
+}
+
+
+begin_particle_injection {
+  // No particle injection for this simulation
+}
+
+
+begin_current_injection {
+  // No current injection for this simulation
+}
+
+begin_particle_collisions {
+  // No particle collisions for this simulation
+}
+
diff --git a/TestScripts/lpi-deck/lpi_2d_F6_test.original b/TestScripts/lpi-deck/lpi_2d_F6_test.original
new file mode 100755
index 00000000..033ed8d9
--- /dev/null
+++ b/TestScripts/lpi-deck/lpi_2d_F6_test.original
@@ -0,0 +1,978 @@
+// Stress Tensor(e_hydro)_4/abs(Charge Density(e_hydro)+ 1.0e-8) - (Momentum Density(e_hydro)_Y/abs(Charge Density(e_hydro)+ 1.0e-8)) * (Current Density(e_hydro)_Y/(Charge Density(e_hydro) + 1.0e-8))
+
+//========================================================================
+//
+// LPI 3D deck - Linearly polarized (in y) plane wave incident from left 
+//               boundary 
+//
+// Adapted from Albright's Lightning 3D LPI deck.  
+// B. Albright, X-1-PTA; 28 Jan. 2007
+// Lin Yin, X-1-PTA, 23 Feb 2009, for Cerrillos test 
+// 
+// Executable creates its own directory structure.  Remove the old with: 
+//
+// rm -rf rundata ehydro Hhydro Hehydro restart poynting velocity particle field
+//========================================================================
+
+// Employ turnstiles to partially serialize the high-volume file writes. 
+// In this case, the restart dumps.  Set NUM_TURNSTILES to be the desired
+// number of simultaneous writes. 
+#define NUM_TURNSTILES 128
+
+begin_globals {
+  double e0;                   // peak amplitude of oscillating electric field
+  double omega;                // angular freq. of the beam
+  int field_interval;          // how often to dump field and hydro
+  int particle_interval;       // how often to dump particle data
+  int poynting_interval;       // how often to compute poynting flux on boundary
+  int restart_interval;        // how often to write restart data
+  int quota_check_interval;    // how often to check if quote exceeded
+  double quota_sec;            // run quota in sec
+  int rtoggle;                 // enable save of last 2 restart files for safety
+  int load_particles;          // were particles loaded? 
+  double topology_x;           // domain topology needed to normalize Poynting diagnostic 
+  double topology_y;
+  double topology_z;
+  int mobile_ions;
+  int H_present; 
+  int He_present; 
+
+  // Parameters for 3d Gaussian wave launch
+  double lambda;               
+  double waist;                // how wide the focused beam is
+  double width;                
+  double zcenter;              // center of beam at boundary in z 
+  double ycenter;              // center of beam at boundary in y
+  double xfocus;               // how far from boundary to focus
+  double mask;                 // # gaussian widths from beam center where I is nonzero
+
+  // Ponyting diagnostic flags - which output to turn on
+
+  // write_backscatter_only flag:  when this flag is nonzero, it means to only compute 
+  // poynting data for the lower-x surface.  This flag affects both the summed poynting 
+  // data as well as the surface data. 
+
+  int write_poynting_data;     // Whether to write poynting data to file (or just stdout)
+
+  int write_backscatter_only;  // nonzero means we only write backscatter diagnostic for fields
+
+  // write_poynting_sum is nonzero if you wish to write a file containing the integrated
+  // poynting data on one or more surfaces.  If write_backscatter_only is nonzero, then 
+  // the output file will be a time series of double precision numbers containing the 
+  // integrated poynting flux at that point in time.  If write_backscatter_only is zero,
+  // then for each time step, six double precision numbers are written, containing the 
+  // poynting flux through the lower x, upper x, lower y, upper y, lower z, and upper z
+  // surfaces. 
+
+  int write_poynting_sum;      // nonzero if we wish to write integrated Poynting data
+
+  // write_poynting_faces is probably useless, but I put it here anyway in case it's not. 
+  // When this flag is nonzero, it will print out the poynting flux at each of a 2D array 
+  // of points on the surface.  When this flag is turned on and write_backscatter_only is 
+  // nonzero, then only the 2D array of points on the lower-x boundary surface are written
+  // for each time step.  When this flag is turned on and write_bacscatter_only is 
+  // zero, then it will write 2D array data for the lower x, upper x, lower y, upper y, 
+  // lower z, upper z surfaces for each time step. 
+
+  int write_poynting_faces;    // nonzero if we wish to write Poynting data on sim boundaries 
+
+  // write_eb_faces is nonzero when we wish to get the raw e and b data at the boundary
+  // (e.g., to be used with a filter to distinguish between SRS and SBS backscatter).  
+  // When this flag is on and write_backscatter_only is nonzero, then only the 2D array
+  // of points on the lower-x boundary surface are written for each time step.  When 
+  // this flag is turned on and write_backscatteR_only is zero, then it will write 2D
+  // array data for the lower x, upper x, lower y, upper y, lower z, upper z surfaces for
+  // each time step.  When turned on, four files are produced: e1, e2, cb1, cb2.  The 
+  // values of the quantities printed depend on the face one is considering:  for the 
+  // x faces, e1 = ey, e2 = ez, cb1 = cby, cb2 = cbz.  Similarly for y and z surfaces, 
+  // but where 1 and 2 stand for (z,x) and (x,y) coordinates, respectively.  
+
+  int write_eb_faces;          // nonzero if we wish to write E and B data on sim boundaries
+
+  // Needed for movie files
+  float vthe;                   // vthe/c  
+  float vthi_He;                // vthi_He/c
+  float vthi_H;                 // vthi_H/c
+  int   velocity_interval;      // how frequently to dump binned velocity space data
+
+  // Dump parameters for standard VPIC output formats
+  DumpParameters fdParams;
+  DumpParameters hedParams;
+  DumpParameters hHdParams;
+  DumpParameters hHedParams;
+  std::vector<DumpParameters *> outputParams;
+};
+
+begin_initialization {
+  
+  // System of units
+  double ec         = 4.8032e-10;          // stat coulomb
+  double c_vac      = 2.99792458e10;       // cm/sec
+  double m_e        = 9.1094e-28;          // g
+  double k_b        = 1.6022e-12;          // erg/eV
+  double mec2       = m_e*c_vac*c_vac/k_b; 
+  double mpc2       = mec2*1836.0; 
+
+  double cfl_req    = 0.98;                // How close to Courant should we try to run
+  double damp       = 0;                   // How much radiation damping
+  double iv_thick   = 2;                   // Thickness of impermeable vacuum (in cells)
+
+  // Experimental parameters
+
+  double t_e               = 600;         // electron temperature, eV
+  double t_i               = 150;         // ion temperature, eV
+  double n_e_over_n_crit   = 0.05;         // n_e/n_crit
+  double vacuum_wavelength = 527 * 1e-7;   // third micron light (cm)
+  double laser_intensity   = 2.5e15 * 1e7;   // in ergs/cm^2 (note: 1 W = 1e7 ergs)
+
+  // Simulation parameters 
+
+#if 0
+  // 2048 processors
+  double Lx                = 4*35.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 2*6.0 * 1e-4;              
+  double Lz                = 2*6.0 * 1e-4;                 
+  double nx                = 8192;
+  double ny                = 512;
+  double nz                = 512; 
+  double topology_x        = 64;
+  double topology_y        = 4;
+  double topology_z        = 8;            
+  // single-processor mesh = 128 x 128 x 64
+#endif
+
+#if 0
+  // 14300x4 processors
+  double Lx                = 120.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 12.0 * 1e-4;              
+  double Lz                = 12.0 * 1e-4;                 
+  double nx                = 6292;
+  double ny                = 440;
+  double nz                = 440; 
+  double topology_x        = 143;
+  double topology_y        = 10;
+  double topology_z        = 10;            
+  // single-processor mesh = 44 x 44 x 44
+#endif
+
+#if 0
+  // 2*4096 processors
+  double Lx                = 4*35.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 2*6.0 * 1e-4;              
+  double Lz                = 2*6.0 * 1e-4;                 
+  double nx                = 8192;
+  double ny                = 512;
+  double nz                = 512; 
+  double topology_x        = 128;
+  double topology_y        = 8;
+  double topology_z        = 8;            
+  // single-processor mesh = 64 x 64 x 64
+#endif
+
+  // This choice of nx, ny, nz and topology values imply that the  
+  // grid is 102 x 42 x 42 cells.  The transverse size is slightly smaller than 
+  // in the original deck in order to enable more choices for down-sampling/striding 
+  // the field and hydro output.  (The original had 43 cells in each direction, which
+  // is prime and therefore not strideable with Ben's output format). 
+
+# if 0
+  // one processor problem, for debugging
+  Lx /= topology_x;  nx /= topology_x;
+  Ly /= topology_y;  ny /= topology_y;   
+  Lz /= topology_z;  nz /= topology_z; 
+
+  topology_x = 1; 
+  topology_y = 1;  
+  topology_z = 1; 
+# endif  
+
+# if 0
+  // twoprocessor problem, for debugging
+  Lx /= (topology_x/2);  nx /= (topology_x/2);
+  Ly /= topology_y;  ny /= topology_y;   
+  Lz /= topology_z;  nz /= topology_z; 
+
+  topology_x = 2; 
+  topology_y = 1;  
+  topology_z = 1; 
+# endif  
+
+// Run a smaller problem for debugging purposes
+#if 0
+  double Lx                = 120.0* 1e-4;   // In cm (note: 1 micron = 1e-4 cm)   
+  double Ly                = 1.20 * 1e-4;              
+  double Lz                = 1.20 * 1e-4;                 
+  double nx                = 6292;
+  double ny                = 44;
+  double nz                = 44; 
+  double topology_x        = 143;
+  double topology_y        = 1;
+  double topology_z        = 1;            
+
+  // nproc() x 1 x 1 : 
+  double topology          = nproc(); 
+  Lx         /= 143; Lx         *= topology; 
+  nx         /= 143; nx         *= topology;
+  topology_x /= 143; topology_x *= topology; 
+
+  // single-processor mesh = 44 x 44 x 44
+#endif
+
+  float box_size_x =  120.0* 1e-4;
+  float box_size_z =  12.0* 1e-4;
+  
+  int mobile_ions           = 1;      // Whether or not to push ions
+  double nppc               = 512;    // Ave. number of particles/cell in ea. species
+  //double nppc               = 32;    // Ave. number of particles/cell in ea. species
+  int load_particles        = 1;      // Flag to turn on/off particle load 
+  int He_present            = 1;  
+  int H_present             = 1;  
+  double f_He               = 1;    // Ratio of number density of He to total ion density
+  double f_H                = 1-f_He; // Ratio of number density of H  to total ion density
+  if ( f_He == 1 ) H_present = 0; 
+  if ( f_H == 1  ) He_present = 0; 
+
+// Here _He is N3+
+  // Precompute some useful variables. 
+  double A_H                = 1;
+  double Z_H                = 1;
+  double A_He               = 14;
+  double Z_He               = 3; 
+  double mic2_H             = mpc2*A_H;
+  double mic2_He            = mpc2*A_He;
+  double mime_H             = mic2_H /mec2; 
+  double mime_He            = mic2_He/mec2; 
+                            
+  double uthe               = sqrt(t_e/mec2);     // vthe/c
+  double uthi_H             = sqrt(t_i/mic2_H);   // vthi/c for H
+  double uthi_He            = sqrt(t_i/mic2_He);  // vthi/c for He
+
+  // Plasma skin deptth in cm
+  double delta = (vacuum_wavelength / (2*M_PI) ) / sqrt( n_e_over_n_crit ); 
+
+  double n_e   = c_vac*c_vac*m_e/(4*M_PI*ec*ec*delta*delta); // electron density in cm^-3
+  double debye = uthe*delta;                      // electron Debye length (cm)
+  double omega = sqrt( 1/n_e_over_n_crit );       // laser beam freq. in wpe
+
+  double nx                = 7776;
+  double ny                = 1;
+  double nz                = 549; 
+  
+#if 1
+  // DEBUG - run on 16 proc.  
+  nx = nx/81;
+  box_size_x /= 81.;  
+#endif
+
+  double hx = box_size_x/(delta*nx);  // in c/wpe
+  double hz = box_size_z/(delta*nz);  // in c/wpe
+  double hy = hz; 
+
+  double cell_size_x       = hx*delta/debye;      // Cell size in Debye lengths
+  double cell_size_z       = hz*delta/debye;
+//double cell_size_y       = hy*delta/debye;
+
+  double Lx                = nx*hx;   // in c/wpe
+  double Ly                = ny*hy;   // in c/wpe
+  double Lz                = nz*hz;   // in c/wpe
+
+  double topology_x        = nproc();
+  double topology_y        = 1;
+  double topology_z        = 1;            
+
+  double f_number          = 6;                   // f/# of beam
+  double lambda            = vacuum_wavelength/delta; // vacuum wavelength in c/wpe
+  double waist             = f_number*lambda;     // width of beam at focus in c/wpe
+  double xfocus            = Lx/2;                // in c/wpe
+  double ycenter           = 0;                   // center of spot in y on lhs boundary
+  double zcenter           = 0;                   // center of spot in z on lhs boundary
+  double mask              = 1.5;                 // set drive I=0 outside r>mask*width at lhs boundary
+  double width = waist*sqrt( 1 + (lambda*xfocus/(M_PI*waist*waist))*(lambda*xfocus/(M_PI*waist*waist))); 
+
+  // Peak instantaneous E field in "natural units" 
+  double e0    = sqrt( 2*laser_intensity / (m_e*c_vac*c_vac*c_vac*n_e) );  
+//e0                       = e0*(waist/width);    // at entrance (3D Gaussian) 
+  e0 = e0*sqrt(waist/width); // 2D, at entrance
+
+  double dt                = cfl_req*courant_length(Lx,Ly,Lz,nx,ny,nz); // in 1/wpe; n.b. c=1 in nat. units
+  double nsteps_cycle      = trunc_granular(2*M_PI/(dt*omega),1)+1; 
+  dt                       = 2*M_PI/omega/nsteps_cycle; // nsteps_cycle time steps in one laser cycle
+
+  double wpe1ps = 1e-12*c_vac/delta;
+
+  double t_stop            = 100*wpe1ps;               // Runtime in 1/wpe
+  int particle_interval    = 0; 
+  int poynting_interval    = int(M_PI/((omega+1.5)*dt));                  // Num. steps between dumping poynting flux
+  int field_interval       = int(0.01*wpe1ps/dt);         // Num. steps between saving field, hydro data
+  int velocity_interval    = int(0.1*wpe1ps/dt);        // How frequently to dump binned velocity space data
+  int restart_interval     = 10000000;       // Num. steps between restart dumps
+  int quota_check_interval = 20;
+
+  // Ben:  This quota thing gracefully terminates after writing a final restart after 
+  // 11.5 hours; if you want a longer time before shutdown, set this value larger.  If 
+  // you want the code to just run all weekend, then set it to very long (2400.*3500, e.g.) 
+
+  double quota_sec         = 23.7*3600;           // Run quota in sec. 
+
+  // Turn on integrated backscatter poynting diagnostic - right now there is a bug in this, so we 
+  // only write the integrated backscatter time history on the left face. 
+
+  int write_poynting_data = 1;                    // Whether to write poynting data to file (or just stdout)
+
+  int write_backscatter_only = 0;                 // Nonzero means only write lower x face
+  int write_poynting_sum   = 0;                   // Whether to write integrated Poynting data 
+  int write_poynting_faces = 0;                   // Whether to write poynting data on sim boundary faces
+  int write_eb_faces       = 0;                   // Whether to write e and b field data on sim boundary faces
+
+  double N_e               = nppc*nx*ny*nz;       // Number of macro electrons in box
+  double Np_e              = Lx*Ly*Lz;            // "Number" of "physical" electrons in box (nat. units)
+  double q_e               = -Np_e/N_e;           // Charge per macro electron
+  double N_i               = N_e;                 // Number of macro ions of each species in box 
+  double Np_i              = Np_e/(Z_H*f_H+Z_He*f_He); // "Number" of "physical" ions of each sp. in box
+  double qi_H              = Z_H *f_H *Np_i/N_i;  // Charge per H  macro ion
+  double qi_He             = Z_He*f_He*Np_i/N_i;  // Charge per He macro ion 
+  
+  // Print simulation parameters
+
+  sim_log("***** Simulation parameters *****");
+  sim_log("* Processors:                     "<<nproc());
+  sim_log("* Topology:                       "<<topology_x<<" "<<topology_y<<" "<<topology_z); 
+  sim_log("* nsteps_cycle =                 "<<nsteps_cycle);
+  sim_log("* Time step, max time, nsteps:    "<<dt<<" "<<t_stop<<" "<<int(t_stop/(dt))); 
+  sim_log("* Debye length, XYZ cell sizes:   "<<debye<<" "<<cell_size_x<<" "<<cell_size_z);
+  sim_log("* Real cell sizes (in Debyes):    "<<hx/uthe<<" "<<hy/uthe<<" "<<hz/uthe);
+  sim_log("* Lx, Ly, Lz =                    "<<Lx<<" "<<Ly<<" "<<Lz);
+  sim_log("* nx, ny, nz =                    "<<nx<<" "<<ny<<" "<<nz);
+  sim_log("* Charge/macro electron =         "<<q_e);
+  sim_log("* Average particles/processor:    "<<N_e/nproc());
+  sim_log("* Average particles/cell:         "<<nppc);
+  sim_log("* Omega_0, Omega_pe:              "<<omega<<" "<<1);
+  sim_log("* Plasma density, ne/nc:          "<<n_e<<" "<<n_e_over_n_crit);
+  sim_log("* Vac wavelength (nm):            "<<vacuum_wavelength*1e7);
+  sim_log("* I_laser (W/cm^2):               "<<laser_intensity/1e7);
+  sim_log("* T_e, T_i (eV)                   "<<t_e<<" "<<t_i);
+  sim_log("* m_e, m_H, m_He                  "<<"1 "<<mime_H<<" "<<mime_He);
+  sim_log("* Radiation damping:              "<<damp);
+  sim_log("* Fraction of courant limit:      "<<cfl_req);
+  sim_log("* vthe/c:                         "<<uthe);
+  sim_log("* vthi_H /c:                      "<<uthi_H);
+  sim_log("* vthe_He/c:                      "<<uthi_He);
+  sim_log("* emax at entrance:               "<<e0);
+  sim_log("* emax at waist:                  "<<e0/(waist/width));
+  sim_log("* Poynting interval:              "<<poynting_interval); 
+  sim_log("* field interval:                 "<<field_interval); 
+  sim_log("* restart interval:               "<<restart_interval); 
+  sim_log("* num vacuum edge grids:          "<<iv_thick);
+  sim_log("* width, waist, xfocus:           "<<width<<" "<<waist<<" "<<xfocus); 
+  sim_log("* ycenter, zcenter, mask:         "<<ycenter<<" "<<zcenter<<" "<<mask); 
+  sim_log("* field_interval                  "<<field_interval); 
+  sim_log("* velocity_interval               "<<velocity_interval); 
+  sim_log("* restart_interval:               "<<restart_interval); 
+  sim_log("* quota_check_interval:           "<<quota_check_interval); 
+  sim_log("* write_poynting_sum:             "<<(write_poynting_sum ? "Yes" : "No")); 
+  sim_log("* write_poynting_faces:           "<<(write_poynting_faces? "Yes" : "No")); 
+  sim_log("* write_eb_faces:                 "<<(write_eb_faces ? "Yes" : "No")); 
+  sim_log("* write_backscatter_only:         "<<(write_backscatter_only ? "Yes" : "No")); 
+  sim_log("*********************************");
+
+  // Set up high level simulation parameters
+
+  sim_log("Setting up high-level simulation parameters.");
+  num_step             = int(t_stop/(dt));
+  num_step             = 20000;
+  sim_log("num_step: " << num_step); 
+
+  status_interval      = 200; 
+  sync_shared_interval = status_interval/1;
+  clean_div_e_interval = status_interval/1;
+  clean_div_b_interval = status_interval/10; 
+  
+  // Turn off some of the spam
+  verbose = 1; 
+
+  // For maxwellian reinjectoin, we need more than the default number of 
+  // passes (3) through the boundary handler
+  // Note:  We have to adjust sort intervals for maximum performance on Cell. 
+  // Note: On 1 PE fails after 2094 steps. Increasing num_comm_round to 10
+  // allows it to run > 25,000 steps.
+  num_comm_round = 6;
+
+  global->e0                   = e0; 
+  global->omega                = omega; 
+  global->field_interval       = field_interval; 
+  global->particle_interval    = particle_interval;
+  global->poynting_interval    = poynting_interval;
+  global->restart_interval     = restart_interval;
+  global->quota_check_interval = quota_check_interval;
+  global->quota_sec            = quota_sec;
+  global->rtoggle              = 0;
+  global->load_particles       = load_particles;
+  global->mobile_ions          = mobile_ions; 
+  global->H_present            = H_present; 
+  global->He_present           = He_present; 
+  global->topology_x           = topology_x;  
+  global->topology_y           = topology_y;  
+  global->topology_z           = topology_z;  
+  global->xfocus               = xfocus;  
+  global->ycenter              = ycenter; 
+  global->zcenter              = zcenter; 
+  global->mask                 = mask; 
+  global->waist                = waist; 
+  global->width                = width; 
+  global->lambda               = lambda; 
+
+  global->write_poynting_data  = write_poynting_data;
+
+  global->write_poynting_sum   = write_poynting_sum; 
+  global->write_poynting_faces = write_poynting_faces; 
+  global->write_eb_faces       = write_eb_faces;      
+  global->write_backscatter_only = write_backscatter_only; 
+
+  global->vthe                 = uthe; 
+  global->vthi_H               = uthi_H; 
+  global->vthi_He              = uthi_He; 
+  global->velocity_interval    = velocity_interval; 
+
+  // Set up the species
+  // Allow additional local particles in case of non-uniformity.
+
+  // Set up grid
+  sim_log("Setting up computational grid."); 
+  grid->dx       = hx; 
+  grid->dy       = hy; 
+  grid->dz       = hz; 
+  grid->dt       = dt; 
+  grid->cvac     = 1;
+  grid->eps0     = 1; 
+
+  sim_log("Setting up absorbing mesh."); 
+  define_periodic_grid( 0,          -0.5*Ly,    -0.5*Lz,        // Low corner
+                         Lx,         0.5*Ly,     0.5*Lz,        // High corner 
+                         nx,         ny,         nz,            // Resolution
+                         topology_x, topology_y, topology_z);    // Topology
+
+  if ( rank() == 0) {
+    set_domain_field_bc( BOUNDARY(-1,0,0), absorb_fields );
+  }
+  if ( rank() == nproc() -1) {
+    set_domain_field_bc( BOUNDARY( 1,0,0), absorb_fields );
+  }
+
+    set_domain_field_bc( BOUNDARY( 0,0, 1), absorb_fields );
+    set_domain_field_bc( BOUNDARY( 0,0,-1), absorb_fields );
+
+  sim_log("Setting up species."); 
+  double max_local_np = 2.0*N_e/nproc(); 
+  double max_local_nm = max_local_np/10.0;
+  species_t * electron = define_species("electron", -1, 1, max_local_np, max_local_nm, 20, 1); 
+
+  // Start with two ion species.  We have option to go to Xe and Kr gas fills if 
+  // we need a higher ion/electron macroparticle ratio.  
+
+  species_t *ion_H, *ion_He; 
+  if ( mobile_ions ) {
+    if ( H_present  ) ion_H  = define_species("H",  Z_H,  mime_H,  max_local_np, max_local_nm, 100, 1); 
+    if ( He_present ) ion_He = define_species("He", Z_He, mime_He, max_local_np, max_local_nm, 100, 1); 
+  }
+
+  // From grid/partition.c: used to determine which domains are on edge
+# define RANK_TO_INDEX(rank,ix,iy,iz) BEGIN_PRIMITIVE {                   \
+    int _ix, _iy, _iz;                                                    \
+    _ix  = (rank);                        /* ix = ix+gpx*( iy+gpy*iz ) */ \
+    _iy  = _ix/int(global->topology_x);   /* iy = iy+gpy*iz */            \
+    _ix -= _iy*int(global->topology_x);   /* ix = ix */                   \
+    _iz  = _iy/int(global->topology_y);   /* iz = iz */                   \
+    _iy -= _iz*int(global->topology_y);   /* iy = iy */                   \
+    (ix) = _ix;                                                           \
+    (iy) = _iy;                                                           \
+    (iz) = _iz;                                                           \
+  } END_PRIMITIVE 
+
+  sim_log("Overriding x boundaries to absorb fields."); 
+  int ix, iy, iz;        // Domain location in mesh
+  RANK_TO_INDEX( int(rank()), ix, iy, iz ); 
+
+  // Set up Maxwellian reinjection B.C. 
+  sim_log("Setting up Maxwellian reinjection boundary condition."); 
+
+  particle_bc_t * maxwellian_reinjection = 
+    define_particle_bc( maxwellian_reflux( species_list, entropy ) ); 
+  set_reflux_temp( maxwellian_reinjection, electron, uthe, uthe );
+  if ( mobile_ions ) { 
+    if ( H_present  ) set_reflux_temp( maxwellian_reinjection, ion_H,  uthi_H,  uthi_H  ); 
+    if ( He_present ) set_reflux_temp( maxwellian_reinjection, ion_He, uthi_He, uthi_He ); 
+  }
+
+  // Set up materials
+  sim_log("Setting up materials."); 
+  define_material( "vacuum", 1 );
+  define_field_array( NULL, damp ); 
+ 
+  // Paint the simulation volume with materials and boundary conditions
+# define iv_region (   x<      hx*iv_thick || x>Lx  -hx*iv_thick  \
+                    || z<-Lz/2+hz*iv_thick || z>Lz/2-hz*iv_thick ) /* all boundaries are i.v. */ 
+  set_region_bc( iv_region, maxwellian_reinjection, maxwellian_reinjection, maxwellian_reinjection ); 
+
+  // Load particles 
+  if ( load_particles ) {
+    sim_log("Loading particles.");
+    // Fast load of particles--don't bother fixing artificial domain correlations
+    double xmin=grid->x0, xmax=grid->x1;
+    double ymin=grid->y0, ymax=grid->y1;
+    double zmin=grid->z0, zmax=grid->z1;
+    repeat( N_e/(topology_x*topology_y*topology_z) ) {
+      double x = uniform( rng(0), xmin, xmax );
+      double y = uniform( rng(0), ymin, ymax );
+      double z = uniform( rng(0), zmin, zmax );
+      if ( iv_region ) continue;           // Particle fell in iv_region.  Don't load.
+      // third to last arg is "weight," a positive number
+      inject_particle( electron, x, y, z,
+                       normal( rng(0), 0, uthe),
+                       normal( rng(0), 0, uthe),
+                       normal( rng(0), 0, uthe), -q_e, 0, 0 );
+      if ( mobile_ions ) {
+        if ( H_present )  // Inject an H macroion on top of macroelectron
+          inject_particle( ion_H, x, y, z, 
+                           normal( rng(0), 0, uthi_H), 
+                           normal( rng(0), 0, uthi_H), 
+                           normal( rng(0), 0, uthi_H), qi_H, 0, 0 ); 
+        if ( He_present ) // Inject an H macroion on top of macroelectron
+          inject_particle( ion_He, x, y, z, 
+                           normal( rng(0), 0, uthi_He), 
+                           normal( rng(0), 0, uthi_He), 
+                           normal( rng(0), 0, uthi_He), qi_He, 0, 0 ); 
+      }
+    }
+  }
+
+
+ /*--------------------------------------------------------------------------
+  * New dump definition
+  *------------------------------------------------------------------------*/
+
+ /*--------------------------------------------------------------------------
+  * Set data output format
+  * 
+  * This option allows the user to specify the data format for an output
+  * dump.  Legal settings are 'band' and 'band_interleave'.  Band-interleave
+  * format is the native storage format for data in VPIC.  For field data,
+  * this looks something like:
+  * 
+  *   ex0 ey0 ez0 div_e_err0 cbx0 ... ex1 ey1 ez1 div_e_err1 cbx1 ...
+  *   
+  * Banded data format stores all data of a particular state variable as a
+  * contiguous array, and is easier for ParaView to process efficiently. 
+  * Banded data looks like:
+  * 
+  *   ex0 ex1 ex2 ... exN ey0 ey1 ey2 ...
+  *   
+  *------------------------------------------------------------------------*/
+  sim_log( "Setting up hydro and field diagnostics." );
+
+  global->fdParams.format = band;
+  sim_log ( "Field output format          : band" );
+
+  global->hedParams.format = band;
+  sim_log ( "Electron hydro output format : band" );
+
+  global->hHdParams.format = band;
+  sim_log ( "Hydrogen hydro output format : band" );
+
+  global->hHedParams.format = band;
+  sim_log ( "Helium hydro output format   : band" );
+
+ /*--------------------------------------------------------------------------
+  * Set stride
+  * 
+  * This option allows data down-sampling at output.  Data are down-sampled
+  * in each dimension by the stride specified for that dimension.  For
+  * example, to down-sample the x-dimension of the field data by a factor
+  * of 2, i.e., half as many data will be output, select:
+  * 
+  *   global->fdParams.stride_x = 2;
+  *
+  * The following 2-D example shows down-sampling of a 7x7 grid (nx = 7,
+  * ny = 7.  With ghost-cell padding the actual extents of the grid are 9x9.
+  * Setting the strides in x and y to equal 2 results in an output grid of
+  * nx = 4, ny = 4, with actual extents 6x6.
+  *
+  * G G G G G G G G G
+  * G X X X X X X X G
+  * G X X X X X X X G         G G G G G G
+  * G X X X X X X X G         G X X X X G
+  * G X X X X X X X G   ==>   G X X X X G
+  * G X X X X X X X G         G X X X X G
+  * G X X X X X X X G         G X X X X G
+  * G X X X X X X X G         G G G G G G
+  * G G G G G G G G G
+  *
+  * Note that grid extents in each dimension must be evenly divisible by
+  * the stride for that dimension:
+  *
+  *   nx = 150;
+  *   global->fdParams.stride_x = 10; // legal -> 150/10 = 15
+  *
+  *   global->fdParams.stride_x = 8; // illegal!!! -> 150/8 = 18.75
+  *------------------------------------------------------------------------*/
+
+  // Strides for field and hydro arrays.  Note that here we have defined them 
+  // the same for fields and all hydro species; if desired, we could use different
+  // strides for each.   Also note that strides must divide evenly into the number 
+  // of cells in a given domain. 
+
+  // Define strides and test that they evenly divide into grid->nx, ny, nz
+  int stride_x = 1, stride_y = 1, stride_z = 1; 
+  if ( int(grid->nx)%stride_x ) ERROR(("Stride doesn't evenly divide grid->nx.")); 
+  if ( int(grid->ny)%stride_y ) ERROR(("Stride doesn't evenly divide grid->ny.")); 
+  if ( int(grid->nz)%stride_z ) ERROR(("Stride doesn't evenly divide grid->nz.")); 
+
+  //----------------------------------------------------------------------
+  // Fields
+
+  // relative path to fields data from global header
+  sprintf(global->fdParams.baseDir, "field");
+
+  // base file name for fields output
+  sprintf(global->fdParams.baseFileName, "fields");
+
+  // set field strides
+  global->fdParams.stride_x = stride_x;
+  global->fdParams.stride_y = stride_y;
+  global->fdParams.stride_z = stride_z;
+  sim_log ( "Fields x-stride " << global->fdParams.stride_x );
+  sim_log ( "Fields y-stride " << global->fdParams.stride_y );
+  sim_log ( "Fields z-stride " << global->fdParams.stride_z );
+
+  // add field parameters to list
+  global->outputParams.push_back(&global->fdParams);
+
+  //----------------------------------------------------------------------
+  // Electron hydro
+
+  // relative path to electron species data from global header
+  sprintf(global->hedParams.baseDir, "ehydro");
+
+  // base file name for fields output
+  sprintf(global->hedParams.baseFileName, "e_hydro");
+
+  // set electron hydro strides
+  global->hedParams.stride_x = stride_x;
+  global->hedParams.stride_y = stride_y;
+  global->hedParams.stride_z = stride_z;
+  sim_log ( "Electron species x-stride " << global->hedParams.stride_x );
+  sim_log ( "Electron species y-stride " << global->hedParams.stride_y );
+  sim_log ( "Electron species z-stride " << global->hedParams.stride_z );
+
+  // add electron hydro parameters to list
+  global->outputParams.push_back(&global->hedParams);
+
+  //----------------------------------------------------------------------
+  // Hydrogen hydro
+
+  // relative path to electron species data from global header
+  sprintf(global->hHdParams.baseDir, "Hhydro");
+
+  // base file name for fields output
+  sprintf(global->hHdParams.baseFileName, "H_hydro");
+
+  // set hydrogen hydro strides
+  global->hHdParams.stride_x = stride_x;
+  global->hHdParams.stride_y = stride_y;
+  global->hHdParams.stride_z = stride_z;
+  sim_log ( "Ion species x-stride " << global->hHdParams.stride_x );
+  sim_log ( "Ion species y-stride " << global->hHdParams.stride_y );
+  sim_log ( "Ion species z-stride " << global->hHdParams.stride_z );
+
+  // add hydrogen hydro parameters to list
+  global->outputParams.push_back(&global->hHdParams);
+
+  //----------------------------------------------------------------------
+  // Helium hydro
+
+  // relative path to electron species data from global header
+  sprintf(global->hHedParams.baseDir, "Hehydro");
+
+  // base file name for fields output
+  sprintf(global->hHedParams.baseFileName, "He_hydro");
+
+  // set helium hydro strides
+  global->hHedParams.stride_x = stride_x;
+  global->hHedParams.stride_y = stride_y;
+  global->hHedParams.stride_z = stride_z;
+  sim_log ( "Ion species x-stride " << global->hHedParams.stride_x );
+  sim_log ( "Ion species y-stride " << global->hHedParams.stride_y );
+  sim_log ( "Ion species z-stride " << global->hHedParams.stride_z );
+
+  // add helium hydro parameters to list
+  global->outputParams.push_back(&global->hHedParams);
+
+ /*-----------------------------------------------------------------------
+  * Set output fields
+  *
+  * It is now possible to select which state-variables are output on a
+  * per-dump basis.  Variables are selected by passing an or-list of
+  * state-variables by name.  For example, to only output the x-component
+  * of the electric field and the y-component of the magnetic field, the
+  * user would call output_variables like:
+  *
+  *   global->fdParams.output_variables( ex | cby );
+  *
+  * NOTE: OUTPUT VARIABLES ARE ONLY USED FOR THE BANDED FORMAT.  IF THE
+  * FORMAT IS BAND-INTERLEAVE, ALL VARIABLES ARE OUTPUT AND CALLS TO
+  * 'output_variables' WILL HAVE NO EFFECT.
+  *
+  * ALSO: DEFAULT OUTPUT IS NONE!  THIS IS DUE TO THE WAY THAT VPIC
+  * HANDLES GLOBAL VARIABLES IN THE INPUT DECK AND IS UNAVOIDABLE.
+  *
+  * For convenience, the output variable 'all' is defined:
+  *
+  *   global->fdParams.output_variables( all );
+  *------------------------------------------------------------------------*/
+ /* CUT AND PASTE AS A STARTING POINT
+  * REMEMBER TO ADD APPROPRIATE GLOBAL DUMPPARAMETERS VARIABLE
+
+   output_variables( all );
+
+   output_variables( electric | div_e_err | magnetic | div_b_err |
+                     tca      | rhob      | current  | rhof |
+                     emat     | nmat      | fmat     | cmat );
+
+   output_variables( current_density  | charge_density |
+                     momentum_density | ke_density     | stress_tensor );
+  */
+
+  //global->fdParams.output_variables( all );
+  global->fdParams.output_variables( electric | magnetic );
+
+  //global->hedParams.output_variables( all );
+  //global->hedParams.output_variables( current_density | momentum_density );
+  global->hedParams.output_variables(  current_density  | charge_density |
+                                       momentum_density | ke_density |
+                                       stress_tensor );
+  global->hHdParams.output_variables(  current_density  | charge_density |
+                                       momentum_density | ke_density |
+                                       stress_tensor );
+  global->hHedParams.output_variables( current_density  | charge_density |
+                                       momentum_density | ke_density |
+                                       stress_tensor );
+
+ /*--------------------------------------------------------------------------
+  * Convenience functions for simlog output
+  *------------------------------------------------------------------------*/
+  char varlist[256];
+
+  create_field_list(varlist, global->fdParams);
+  sim_log ( "Fields variable list: " << varlist );
+
+  create_hydro_list(varlist, global->hedParams);
+  sim_log ( "Electron species variable list: " << varlist );
+
+  create_hydro_list(varlist, global->hHdParams);
+  sim_log ( "Ion species variable list: " << varlist );
+
+ /*------------------------------------------------------------------------*/
+
+  sim_log("***Finished with user-specified initialization ***"); 
+
+  // Upon completion of the initialization, the following occurs:
+  // - The synchronization error (tang E, norm B) is computed between domains
+  //   and tang E / norm B are synchronized by averaging where discrepancies
+  //   are encountered.
+  // - The initial divergence error of the magnetic field is computed and
+  //   one pass of cleaning is done (for good measure)
+  // - The bound charge density necessary to give the simulation an initially
+  //   clean divergence e is computed.
+  // - The particle momentum is uncentered from u_0 to u_{-1/2}
+  // - The user diagnostics are called on the initial state
+  // - The physics loop is started
+  //
+  // The physics loop consists of:
+  // - Advance particles from x_0,u_{-1/2} to x_1,u_{1/2}
+  // - User particle injection at x_{1-age}, u_{1/2} (use inject_particles)
+  // - User current injection (adjust field(x,y,z).jfx, jfy, jfz)
+  // - Advance B from B_0 to B_{1/2}
+  // - Advance E from E_0 to E_1
+  // - User field injection to E_1 (adjust field(x,y,z).ex,ey,ez,cbx,cby,cbz)
+  // - Advance B from B_{1/2} to B_1
+  // - (periodically) Divergence clean electric field
+  // - (periodically) Divergence clean magnetic field
+  // - (periodically) Synchronize shared tang e and norm b
+  // - Increment the time step
+  // - Call user diagnostics
+  // - (periodically) Print a status message
+} 
+
+
+begin_diagnostics {
+
+  int mobile_ions=global->mobile_ions,
+      H_present=global->H_present,
+      He_present=global->He_present;
+
+  if ( step()%200==0 ) sim_log("Time step: "<<step());
+
+# define should_dump(x) \
+  (global->x##_interval>0 && remainder(step(),global->x##_interval)==0)
+
+  // Do a mkdir at time t=0 to ensure we have all the directories we need
+  if ( step()==0 ) {
+    dump_mkdir("rundata"); 
+    dump_mkdir("fft"); 
+    dump_mkdir("field"); 
+    dump_mkdir("ehydro"); 
+    dump_mkdir("Hhydro"); 
+    dump_mkdir("Hehydro"); 
+    dump_mkdir("restart"); 
+    dump_mkdir("particle"); 
+    dump_mkdir("poynting"); 
+    dump_mkdir("velocity"); 
+
+    // Turn off rundata for now
+    // dump_grid("rundata/grid");
+    // dump_materials("rundata/materials");
+    // dump_species("rundata/species");
+    global_header("global", global->outputParams);
+
+  }
+
+  // Field and hydro data
+
+  if ( should_dump(field) ) {
+    field_dump( global->fdParams ); 
+
+#if 1  
+    if ( global->load_particles ) {
+      hydro_dump( "electron", global->hedParams );
+      if ( global->mobile_ions ) {
+        if ( global->H_present  ) hydro_dump( "H",  global->hHdParams );
+        if ( global->He_present ) hydro_dump( "He", global->hHedParams );
+      }
+    }
+#endif
+
+  }
+
+  // Particle dump data
+#if 0  
+  if ( should_dump(particle) && global->load_particles ) {
+    dump_particles( "electron", "particle/eparticle" );
+    if ( global->mobile_ions ) {
+      if ( global->H_present  ) dump_particles( "H",  "particle/Hparticle" );
+      if ( global->He_present ) dump_particles( "He", "particle/Heparticle" );
+    }
+  } 
+#endif
+
+  
+#define ALLOCATE(A,LEN,TYPE)                                             \
+  if ( !((A)=(TYPE *)malloc((size_t)(LEN)*sizeof(TYPE))) ) ERROR(("Cannot allocate."));
+
+  //--------------------------------------------------------------------------- 
+
+  // Restart dump filenames are by default tagged with the current timestep.
+  // If you do not want to do this add "0" to the dump_restart arguments. One
+  // reason to not tag the dumps is if you want only one restart dump (the most
+  // recent one) to be stored at a time to conserve on file space. Note: A
+  // restart dump should be the _very_ _last_ dump made in a diagnostics
+  // routine. If not, the diagnostics performed after the restart dump but
+  // before the next timestep will be missed on a restart. Restart dumps are
+  // in a binary format. Each rank makes a restart dump.
+
+  // Restarting from restart files is accomplished by running the executable 
+  // with "restart restart" as additional command line arguments.  The executable
+  // must be identical to that used to generate the restart dumps or else 
+  // the behavior may be unpredictable. 
+
+  // Note:  restart is not currently working with custom boundary conditions
+  // (such as the reflux condition) and has not been tested with emission 
+  // turned on.  
+  
+  if ( step()>0 && should_dump(restart) ) {
+    static const char * restart_fbase[2] = { "restart/restart0", "restart/restart1" };
+
+    //BEGIN_TURNSTILE(NUM_TURNSTILES); 
+      checkpt( restart_fbase[global->rtoggle], step() );
+    //END_TURNSTILE;
+    global->rtoggle^=1;
+  }
+
+  // Shut down simulation when wall clock time exceeds global->quota_sec. 
+  // Note that the mp_elapsed() is guaranteed to return the same value for all
+  // processors (i.e., elapsed time on proc #0), and therefore the abort will 
+  // be synchronized across processors. 
+
+  if ( step()>0 && global->quota_check_interval && (step()%global->quota_check_interval)==0 ) {
+    if ( uptime() > global->quota_sec ) {
+
+      //BEGIN_TURNSTILE(NUM_TURNSTILES); 
+        checkpt( "restart/restart", step() );
+      //END_TURNSTILE;
+
+      sim_log( "Restart dump restart completed." );
+      sim_log( "Allowed runtime exceeded for this job.  Terminating." );
+      mp_barrier(); // Just to be safe
+      halt_mp();
+      exit(0);
+    }
+  }
+
+} 
+
+
+begin_field_injection { 
+  // Inject a light wave from lhs boundary with E aligned along y
+  // Use scalar diffraction theory for the Gaussian beam source.  (This is approximate). 
+
+  // For quiet startup (i.e., so that we don't propagate a delta-function noise
+  // pulse at time t=0) we multiply by a constant phase term exp(i phi) where: 
+  //   phi = k*global->xfocus+atan(h)    (3d) 
+
+  // Inject from the left a field of the form ey = e0 sin( omega t )
+
+# define DY    ( grid->y0 + (iy-0.5)*grid->dy - global->ycenter )
+# define DZ    ( grid->z0 + (iz-1  )*grid->dz - global->zcenter )
+# define R2    ( DY*DY + DZ*DZ )                                   
+# define R2Z    ( DZ*DZ )                                   
+# define PHASE ( -global->omega*t + h*R2Z/(global->width*global->width) )
+# define MASK  ( R2Z<=pow(global->mask*global->width,2) ? 1 : 0 )
+
+  if ( grid->x0==0 ) {               // Node is on left boundary
+    double alpha      = grid->cvac*grid->dt/grid->dx;
+    double emax_coeff = (4/(1+alpha))*global->omega*grid->dt*global->e0;
+    double prefactor  = emax_coeff*sqrt(2/M_PI); 
+    double t          = grid->dt*step(); 
+
+    // Compute Rayleigh length in c/wpe
+    double rl         = M_PI*global->waist*global->waist/global->lambda; 
+
+    double pulse_shape_factor = 1;
+    float pulse_length        = 70;  // units of 1/wpe
+    float sin_t_tau           = sin(0.5*t*M_PI/pulse_length);
+    pulse_shape_factor        = ( t<pulse_length ? sin_t_tau : 1 );
+    double h                  = global->xfocus/rl;   // Distance / Rayleigh length
+
+    // Loop over all Ey values on left edge of this node
+    for ( int iz=1; iz<=grid->nz+1; ++iz ) 
+      for ( int iy=1; iy<=grid->ny; ++iy )  
+        field(1,iy,iz).ey += prefactor 
+                             * cos(PHASE) 
+//                           * exp(-R2/(global->width*global->width))  // 3D
+                             * exp(-R2Z/(global->width*global->width)) 
+                             * MASK * pulse_shape_factor; 
+  }
+}
+
+
+begin_particle_injection {
+  // No particle injection for this simulation
+}
+
+
+begin_current_injection {
+  // No current injection for this simulation
+}
+
+begin_particle_collisions {
+  // No particle collisions for this simulation
+}
+
diff --git a/TestScripts/vpic-build.sh b/TestScripts/vpic-build.sh
new file mode 100755
index 00000000..d5c62d32
--- /dev/null
+++ b/TestScripts/vpic-build.sh
@@ -0,0 +1,232 @@
+#!/bin/bash
+
+#SBATCH -N 1
+#SBATCH -t 2:00:00 
+#SBATCH --output=vpic-build-log.txt
+
+src_dir=/your/vpic/master/location/VPIC/vpic-master # please edit accordingly
+builds=/your/vpic/build/location/VPIC/test-builds   # please edit accordingly
+mkdir -p $builds
+openmp=$builds/openmp
+pthreads=$builds/pthreads
+
+### Build weak and strong scaling lpi_2d_F6_test input decks (or use your own input deck) ###
+function lpi_deck(){
+    $1/bin/vpic $2/lpi-deck/lpi_2d_F6_test        # "lpi-deck" directory contains strong scaling deck "lpi_2d_F6_test" 
+    $1/bin/vpic $2/lpi-deck/lpi_2d_F6_test-nx12   # "lpi-deck" directory contains weak scaling decks  "lpi_2d_F6_test-nx***" 
+    $1/bin/vpic $2/lpi-deck/lpi_2d_F6_test-nx24
+    $1/bin/vpic $2/lpi-deck/lpi_2d_F6_test-nx48
+    $1/bin/vpic $2/lpi-deck/lpi_2d_F6_test-nx96
+    $1/bin/vpic $2/lpi-deck/lpi_2d_F6_test-nx288
+    $1/bin/vpic $2/lpi-deck/lpi_2d_F6_test-nx864
+}
+
+### Configure VPIC cmake ###
+function vpic_cmake(){
+cmake \
+  -DCMAKE_BUILD_TYPE=Release \
+  -DENABLE_INTEGRATED_TESTS=OFF \
+  -DUSE_V4_AVX=ON \
+  -DUSE_V4_AVX2=ON \
+  -DUSE_V8_AVX=ON \
+  -DUSE_V8_AVX2=ON \
+  -DCMAKE_C_COMPILER=cc \
+  -DCMAKE_CXX_COMPILER=CC \
+  -DCMAKE_C_FLAGS="-O3 -dynamic -craype-verbose" \
+  -DCMAKE_CXX_FLAGS="-O3 -dynamic -craype-verbose" \
+  -DCMAKE_EXE_LINKER_FLAGS="-dynamic" \
+  -DUSE_PTHREADS=$2 \
+  -DUSE_OPENMP=$3 \
+  -DUSE_LEGACY_SORT=$4 \
+  $1
+}
+
+# PrgEnv-cray, OpenMP, TSORT
+build_dir=$openmp/cray/tsort 
+mkdir -p $build_dir 
+cd $build_dir 
+cmake_pthreads="OFF"
+cmake_openmp="ON" 
+cmake_sort="TSORT" 
+vpic_cmake $src_dir $cmake_pthreads $cmake_openmp $cmake_sort 
+make
+lpi_deck $build_dir $src_dir
+
+# PrgEnv-cray, OpenMP, LSORT 
+build_dir=$openmp/cray/lsort
+mkdir -p $build_dir
+cd $build_dir
+cmake_pthreads="OFF"
+cmake_openmp="ON"
+cmake_sort="LSORT"
+vpic_cmake $src_dir $cmake_pthreads $cmake_openmp $cmake_sort
+make
+lpi_deck $build_dir $src_dir
+
+# PrgEnv-gnu, OpenMP, TSORT 
+build_dir=$openmp/gnu/tsort
+mkdir -p $build_dir
+cd $build_dir
+module swap PrgEnv-cray PrgEnv-gnu
+cmake_pthreads="OFF"
+cmake_openmp="ON"
+cmake_sort="TSORT"
+vpic_cmake $src_dir $cmake_pthreads $cmake_openmp $cmake_sort
+make
+lpi_deck $build_dir $src_dir
+
+# PrgEnv-gnu, OpenMP, LSORT 
+build_dir=$openmp/gnu/lsort
+mkdir -p $build_dir
+cd $build_dir
+module swap PrgEnv-cray PrgEnv-gnu
+cmake_pthreads="OFF"
+cmake_openmp="ON"
+cmake_sort="LSORT"
+vpic_cmake $src_dir $cmake_pthreads $cmake_openmp $cmake_sort
+make
+lpi_deck $build_dir $src_dir
+
+# PrgEnv-aocc, OpenMP, TSORT 
+build_dir=$openmp/aocc/tsort
+mkdir -p $build_dir
+cd $build_dir
+module swap PrgEnv-cray PrgEnv-aocc
+cmake_pthreads="OFF"
+cmake_openmp="ON"
+cmake_sort="TSORT"
+vpic_cmake $src_dir $cmake_pthreads $cmake_openmp $cmake_sort
+make
+lpi_deck $build_dir $src_dir
+
+# PrgEnv-aocc, OpenMP, LSORT 
+build_dir=$openmp/aocc/lsort
+mkdir -p $build_dir
+cd $build_dir
+module swap PrgEnv-cray PrgEnv-aocc
+cmake_pthreads="OFF"
+cmake_openmp="ON"
+cmake_sort="LSORT"
+vpic_cmake $src_dir $cmake_pthreads $cmake_openmp $cmake_sort
+make
+lpi_deck $build_dir $src_dir
+
+# PrgEnv-intel, OpenMP, TSORT 
+build_dir=$openmp/intel/tsort
+mkdir -p $build_dir
+cd $build_dir
+module swap PrgEnv-cray PrgEnv-intel
+cmake_pthreads="OFF"
+cmake_openmp="ON"
+cmake_sort="TSORT"
+vpic_cmake $src_dir $cmake_pthreads $cmake_openmp $cmake_sort
+make
+lpi_deck $build_dir $src_dir
+
+# PrgEnv-intel, OpenMP, LSORT
+build_dir=$openmp/intel/lsort
+mkdir -p $build_dir
+cd $build_dir
+module swap PrgEnv-cray PrgEnv-intel
+cmake_pthreads="OFF"
+cmake_openmp="ON"
+cmake_sort="LSORT"
+vpic_cmake $src_dir $cmake_pthreads $cmake_openmp $cmake_sort
+make
+lpi_deck $build_dir $src_dir
+
+# PrgEnv-cray, PTHREADS, TSORT
+build_dir=$pthreads/cray/tsort 
+mkdir -p $build_dir
+cd $build_dir
+cmake_pthreads="ON"
+cmake_openmp="OFF"
+cmake_sort="TSORT"
+vpic_cmake $src_dir $cmake_pthreads $cmake_openmp $cmake_sort
+make
+lpi_deck $build_dir $src_dir
+
+# PrgEnv-cray, PTHREADS, LSORT
+build_dir=$pthreads/cray/lsort
+mkdir -p $build_dir
+cd $build_dir
+cmake_pthreads="ON"
+cmake_openmp="OFF"
+cmake_sort="LSORT"
+vpic_cmake $src_dir $cmake_pthreads $cmake_openmp $cmake_sort
+make
+lpi_deck $build_dir $src_dir
+
+# PrgEnv-gnu, PTHREADS, TSORT
+build_dir=$pthreads/gnu/tsort
+mkdir -p $build_dir
+cd $build_dir
+module swap PrgEnv-cray PrgEnv-gnu
+cmake_pthreads="ON"
+cmake_openmp="OFF"
+cmake_sort="TSORT"
+vpic_cmake $src_dir $cmake_pthreads $cmake_openmp $cmake_sort
+make
+lpi_deck $build_dir $src_dir
+
+# PrgEnv-gnu, PTHREADS, LSORT
+build_dir=$pthreads/gnu/lsort
+mkdir -p $build_dir
+cd $build_dir
+module swap PrgEnv-cray PrgEnv-gnu
+cmake_pthreads="ON"
+cmake_openmp="OFF"
+cmake_sort="LSORT"
+vpic_cmake $src_dir $cmake_pthreads $cmake_openmp $cmake_sort
+make
+lpi_deck $build_dir $src_dir
+
+# PrgEnv-aocc, PTHREADS, TSORT 
+build_dir=$pthreads/aocc/tsort
+mkdir -p $build_dir
+cd $build_dir
+module swap PrgEnv-cray PrgEnv-aocc
+cmake_pthreads="ON"
+cmake_openmp="OFF"
+cmake_sort="TSORT"
+vpic_cmake $src_dir $cmake_pthreads $cmake_openmp $cmake_sort
+make
+lpi_deck $build_dir $src_dir
+
+# PrgEnv-aocc, PTHREADS, LSORT
+build_dir=$pthreads/aocc/lsort
+mkdir -p $build_dir
+cd $build_dir
+module swap PrgEnv-cray PrgEnv-aocc
+cmake_pthreads="ON"
+cmake_openmp="OFF"
+cmake_sort="LSORT"
+vpic_cmake $src_dir $cmake_pthreads $cmake_openmp $cmake_sort
+make
+lpi_deck $build_dir $src_dir
+
+# PrgEnv-intel, PTHREADS, TSORT
+build_dir=$pthreads/intel/tsort
+mkdir -p $build_dir
+cd $build_dir
+module swap PrgEnv-cray PrgEnv-intel
+cmake_pthreads="ON"
+cmake_openmp="OFF"
+cmake_sort="TSORT"
+vpic_cmake $src_dir $cmake_pthreads $cmake_openmp $cmake_sort
+make
+lpi_deck $build_dir $src_dir
+
+# PrgEnv-intel, PTHREADS, LSORT
+build_dir=$pthreads/intel/lsort
+mkdir -p $build_dir
+cd $build_dir
+module swap PrgEnv-cray PrgEnv-intel
+cmake_pthreads="ON"
+cmake_openmp="OFF"
+cmake_sort="LSORT"
+vpic_cmake $src_dir $cmake_pthreads $cmake_openmp $cmake_sort
+make
+lpi_deck $build_dir $src_dir
+
+##############################################################################
diff --git a/TestScripts/vpic-test.sh b/TestScripts/vpic-test.sh
new file mode 100755
index 00000000..dcb12df9
--- /dev/null
+++ b/TestScripts/vpic-test.sh
@@ -0,0 +1,110 @@
+#!/bin/bash
+
+#SBATCH -N 1
+#SBATCH -t 8:00:00
+
+master=/your/vpic/master/location/VPIC/vpic-master   # please edit accordingly
+scripts=$master/NewTestScripts             
+scratch=/your/vpic/build/and/results/location/VPIC   # please edit accordingly
+builds=$scratch/test-builds
+results=$scratch/NewTestResults
+
+function vpic_benchmark(){
+
+    prg_env=$1 
+    dir_path=$2
+
+    # set up directory tree 
+    test_dir=$results/$dir_path
+    mkdir -p $test_dir
+    cd $test_dir
+    mkdir -p strong weak thread
+    build_dir=$builds/$dir_path
+
+    # set up strong scaling test
+    cd $test_dir/strong
+    deck=$build_dir/lpi_2d_F6_test.Linux
+    # launch strong scaling test
+    sbatch $scripts/vpic_strong_scaling.sh $prg_env $deck
+
+    # set up thread scaling test (const cpu)
+    cd $test_dir/thread
+    deck=$build_dir/lpi_2d_F6_test.Linux
+    # launch thread scaling test (const cpu) 
+    sbatch $scripts/vpic_thread_scaling.sh $prg_env $deck
+
+    # set up weak scaling test
+    cd $test_dir/weak
+    deck1=$build_dir/lpi_2d_F6_test-nx864.Linux
+    deck2=$build_dir/lpi_2d_F6_test-nx288.Linux
+    deck3=$build_dir/lpi_2d_F6_test-nx96.Linux
+    deck4=$build_dir/lpi_2d_F6_test-nx48.Linux
+    deck5=$build_dir/lpi_2d_F6_test-nx24.Linux
+    deck6=$build_dir/lpi_2d_F6_test-nx12.Linux
+    # launch weak scaling test
+    sbatch $scripts/vpic_weak_scaling.sh $prg_env $deck1 $deck2 $deck3 $deck4 $deck5 $deck6
+}
+
+prog_env="cray"
+bench_path=openmp/cray/tsort
+vpic_benchmark $prog_env $bench_path  
+
+prog_env="cray"
+bench_path=openmp/cray/lsort
+vpic_benchmark $prog_env $bench_path  
+
+prog_env="aocc"
+bench_path=openmp/aocc/tsort
+vpic_benchmark $prog_env $bench_path  
+
+prog_env="aocc"
+bench_path=openmp/aocc/lsort
+vpic_benchmark $prog_env $bench_path  
+
+prog_env="gnu"
+bench_path=openmp/gnu/tsort
+vpic_benchmark $prog_env $bench_path  
+
+prog_env="gnu"
+bench_path=openmp/gnu/lsort
+vpic_benchmark $prog_env $bench_path  
+
+prog_env="intel"
+bench_path=openmp/intel/tsort
+vpic_benchmark $prog_env $bench_path  
+
+prog_env="intel"
+bench_path=openmp/intel/lsort
+vpic_benchmark $prog_env $bench_path  
+
+prog_env="cray"
+bench_path=pthreads/cray/tsort
+vpic_benchmark $prog_env $bench_path  
+
+prog_env="cray"
+bench_path=pthreads/cray/lsort
+vpic_benchmark $prog_env $bench_path  
+
+prog_env="aocc"
+bench_path=pthreads/aocc/tsort
+vpic_benchmark $prog_env $bench_path  
+
+prog_env="aocc"
+bench_path=pthreads/aocc/lsort
+vpic_benchmark $prog_env $bench_path  
+
+prog_env="gnu"
+bench_path=pthreads/gnu/tsort
+vpic_benchmark $prog_env $bench_path  
+
+prog_env="gnu"
+bench_path=pthreads/gnu/lsort
+vpic_benchmark $prog_env $bench_path  
+
+prog_env="intel"
+bench_path=pthreads/intel/tsort
+vpic_benchmark $prog_env $bench_path  
+
+prog_env="intel"
+bench_path=pthreads/intel/lsort
+vpic_benchmark $prog_env $bench_path  
diff --git a/TestScripts/vpic_strong_scaling.sh b/TestScripts/vpic_strong_scaling.sh
new file mode 100755
index 00000000..660b278a
--- /dev/null
+++ b/TestScripts/vpic_strong_scaling.sh
@@ -0,0 +1,34 @@
+#!/bin/bash 
+
+prog_env=$1       # programming environment
+code=$2           # executable
+
+nn=1              # number of nodes
+threads=8         # number of cpu threads
+p_scale=2         # scaling constant
+log=./log-strong  # log file
+
+if [ $prog_env = "aocc" ] 
+then    
+    module swap PrgEnv-cray PrgEnv-aocc
+elif [ $prog_env = "gnu" ] 
+then
+    module swap PrgEnv-cray PrgEnv-gnu
+elif [ $prog_env = "intel" ] 
+then
+    module swap PrgEnv-cray PrgEnv-intel
+fi
+
+module list
+cc --version
+CC --version 
+#lscpu
+#env 
+
+let np=32\*$nn       # number of processes / ranks
+while [ $np -ge 1 ]
+do   
+    srun -n $np -c $threads --cpu-bind=cores $code --tpp $threads >> $log
+    
+    let np=$np/$p_scale
+done
diff --git a/TestScripts/vpic_thread_scaling.sh b/TestScripts/vpic_thread_scaling.sh
new file mode 100755
index 00000000..4d3a8d31
--- /dev/null
+++ b/TestScripts/vpic_thread_scaling.sh
@@ -0,0 +1,36 @@
+#!/bin/bash 
+
+prog_env=$1       # programming environment
+code=$2           # executable
+
+nn=1              # number of nodes
+threads=8         # number of cpu threads
+p_scale=2         # scaling constant
+log=./log-thread  # log file
+
+if [ $prog_env = "aocc" ] 
+then    
+    module swap PrgEnv-cray PrgEnv-aocc
+elif [ $prog_env = "gnu" ] 
+then
+    module swap PrgEnv-cray PrgEnv-gnu
+elif [ $prog_env = "intel" ]
+then
+    module swap PrgEnv-cray PrgEnv-intel
+fi
+
+module list
+cc --version
+CC --version 
+#lscpu
+#env 
+
+let np=32\*$nn       # number of processes / ranks
+while [ $np -ge 1 ]
+do   
+    srun -n $np -c $threads --cpu-bind=cores $code --tpp $threads >> $log
+    
+    let np=$np/$p_scale
+
+    let threads=$threads\*$p_scale
+done
diff --git a/TestScripts/vpic_weak_scaling.sh b/TestScripts/vpic_weak_scaling.sh
new file mode 100755
index 00000000..5b670e62
--- /dev/null
+++ b/TestScripts/vpic_weak_scaling.sh
@@ -0,0 +1,57 @@
+#!/bin/bash 
+
+prog_env=$1       # programming environment
+
+nn=1              # number of nodes
+threads=8         # number of cpu threads
+p_scale=2         # scaling constant
+log=./log-weak    # log file
+
+let np=32\*$nn     # number of processes / ranks
+
+if [ $prog_env = "aocc" ] 
+then    
+    module swap PrgEnv-cray PrgEnv-aocc
+elif [ $prog_env = "gnu" ] 
+then
+    module swap PrgEnv-cray PrgEnv-gnu
+elif [ $prog_env = "intel" ]
+then
+    module swap PrgEnv-cray PrgEnv-intel
+fi
+
+module list
+cc --version
+CC --version 
+#lscpu
+#env 
+
+function weak_run(){
+
+    srun -n $1 -c $2 --cpu-bind=cores $3 --tpp $2 >> $log
+}
+
+code=$2             # executable
+let np=$np/$p_scale
+weak_run $np $threads $code
+
+code=$3             # executable
+let np=$np/$p_scale
+weak_run $np $threads $code
+
+code=$4             # executable 
+let np=$np/$p_scale
+weak_run $np $threads $code
+
+code=$5             # executable
+let np=$np/$p_scale
+weak_run $np $threads $code
+
+code=$6             # executable
+let np=$np/$p_scale
+weak_run $np $threads $code
+
+code=$7             # executable
+let np=$np/$p_scale
+weak_run $np $threads $code
+

From 6261115d9284ed8815400b6c203234fb9f1793b3 Mon Sep 17 00:00:00 2001
From: "Anna M. Matsekh" <matsekh@ch-fe1.local>
Date: Wed, 30 Mar 2022 14:53:39 -0600
Subject: [PATCH 2/2] minor bug fixes

---
 TestScripts/vpic-build.sh          |  13 ++-
 TestScripts/vpic-test.sh           | 145 ++++++++++++++---------------
 TestScripts/vpic_strong_scaling.sh |   2 +-
 TestScripts/vpic_thread_scaling.sh |   2 +-
 TestScripts/vpic_weak_scaling.sh   |  34 ++++---
 5 files changed, 96 insertions(+), 100 deletions(-)

diff --git a/TestScripts/vpic-build.sh b/TestScripts/vpic-build.sh
index d5c62d32..f9b2520c 100755
--- a/TestScripts/vpic-build.sh
+++ b/TestScripts/vpic-build.sh
@@ -1,19 +1,19 @@
 #!/bin/bash
 
 #SBATCH -N 1
-#SBATCH -t 2:00:00 
+#SBATCH -t 4:00:00 
 #SBATCH --output=vpic-build-log.txt
 
-src_dir=/your/vpic/master/location/VPIC/vpic-master # please edit accordingly
-builds=/your/vpic/build/location/VPIC/test-builds   # please edit accordingly
+src_dir=/users/matsekh/VPIC/vpic-master
+builds=/lustre/scratch5/.mdt0/matsekh/VPIC/test-builds
 mkdir -p $builds
 openmp=$builds/openmp
 pthreads=$builds/pthreads
 
-### Build weak and strong scaling lpi_2d_F6_test input decks (or use your own input deck) ###
+### Build weak and strong scaling lpi_2d_F6_test input decks ###
 function lpi_deck(){
-    $1/bin/vpic $2/lpi-deck/lpi_2d_F6_test        # "lpi-deck" directory contains strong scaling deck "lpi_2d_F6_test" 
-    $1/bin/vpic $2/lpi-deck/lpi_2d_F6_test-nx12   # "lpi-deck" directory contains weak scaling decks  "lpi_2d_F6_test-nx***" 
+    $1/bin/vpic $2/lpi-deck/lpi_2d_F6_test        # strong scaling deck
+    $1/bin/vpic $2/lpi-deck/lpi_2d_F6_test-nx12   # weak scaling decks 
     $1/bin/vpic $2/lpi-deck/lpi_2d_F6_test-nx24
     $1/bin/vpic $2/lpi-deck/lpi_2d_F6_test-nx48
     $1/bin/vpic $2/lpi-deck/lpi_2d_F6_test-nx96
@@ -229,4 +229,3 @@ vpic_cmake $src_dir $cmake_pthreads $cmake_openmp $cmake_sort
 make
 lpi_deck $build_dir $src_dir
 
-##############################################################################
diff --git a/TestScripts/vpic-test.sh b/TestScripts/vpic-test.sh
index dcb12df9..56301ebb 100755
--- a/TestScripts/vpic-test.sh
+++ b/TestScripts/vpic-test.sh
@@ -1,11 +1,8 @@
 #!/bin/bash
 
-#SBATCH -N 1
-#SBATCH -t 8:00:00
-
-master=/your/vpic/master/location/VPIC/vpic-master   # please edit accordingly
-scripts=$master/NewTestScripts             
-scratch=/your/vpic/build/and/results/location/VPIC   # please edit accordingly
+master=$HOME/VPIC/vpic-master
+scripts=$master/NewTestScripts
+scratch=/lustre/scratch5/.mdt0/matsekh/VPIC
 builds=$scratch/test-builds
 results=$scratch/NewTestResults
 
@@ -18,20 +15,22 @@ function vpic_benchmark(){
     test_dir=$results/$dir_path
     mkdir -p $test_dir
     cd $test_dir
-    mkdir -p strong weak thread
+    mkdir -p strong
+    mkdir -p thread
+    mkdir -p weak
     build_dir=$builds/$dir_path
 
     # set up strong scaling test
     cd $test_dir/strong
     deck=$build_dir/lpi_2d_F6_test.Linux
     # launch strong scaling test
-    sbatch $scripts/vpic_strong_scaling.sh $prg_env $deck
+    sbatch -N 1 -t 10:00:00 $scripts/vpic_strong_scaling.sh $prg_env $deck
 
     # set up thread scaling test (const cpu)
     cd $test_dir/thread
     deck=$build_dir/lpi_2d_F6_test.Linux
     # launch thread scaling test (const cpu) 
-    sbatch $scripts/vpic_thread_scaling.sh $prg_env $deck
+    sbatch -N 1 -t 10:00:00 $scripts/vpic_thread_scaling.sh $prg_env $deck
 
     # set up weak scaling test
     cd $test_dir/weak
@@ -42,69 +41,69 @@ function vpic_benchmark(){
     deck5=$build_dir/lpi_2d_F6_test-nx24.Linux
     deck6=$build_dir/lpi_2d_F6_test-nx12.Linux
     # launch weak scaling test
-    sbatch $scripts/vpic_weak_scaling.sh $prg_env $deck1 $deck2 $deck3 $deck4 $deck5 $deck6
+    sbatch -N 1 -t 10:00:00 $scripts/vpic_weak_scaling.sh $prg_env $deck1 $deck2 $deck3 $deck4 $deck5 $deck6
 }
 
-prog_env="cray"
-bench_path=openmp/cray/tsort
-vpic_benchmark $prog_env $bench_path  
-
-prog_env="cray"
-bench_path=openmp/cray/lsort
-vpic_benchmark $prog_env $bench_path  
-
-prog_env="aocc"
-bench_path=openmp/aocc/tsort
-vpic_benchmark $prog_env $bench_path  
-
-prog_env="aocc"
-bench_path=openmp/aocc/lsort
-vpic_benchmark $prog_env $bench_path  
-
-prog_env="gnu"
-bench_path=openmp/gnu/tsort
-vpic_benchmark $prog_env $bench_path  
-
-prog_env="gnu"
-bench_path=openmp/gnu/lsort
-vpic_benchmark $prog_env $bench_path  
-
-prog_env="intel"
-bench_path=openmp/intel/tsort
-vpic_benchmark $prog_env $bench_path  
-
-prog_env="intel"
-bench_path=openmp/intel/lsort
-vpic_benchmark $prog_env $bench_path  
-
-prog_env="cray"
-bench_path=pthreads/cray/tsort
-vpic_benchmark $prog_env $bench_path  
-
-prog_env="cray"
-bench_path=pthreads/cray/lsort
-vpic_benchmark $prog_env $bench_path  
-
-prog_env="aocc"
-bench_path=pthreads/aocc/tsort
-vpic_benchmark $prog_env $bench_path  
-
-prog_env="aocc"
-bench_path=pthreads/aocc/lsort
-vpic_benchmark $prog_env $bench_path  
-
-prog_env="gnu"
-bench_path=pthreads/gnu/tsort
-vpic_benchmark $prog_env $bench_path  
-
-prog_env="gnu"
-bench_path=pthreads/gnu/lsort
-vpic_benchmark $prog_env $bench_path  
-
-prog_env="intel"
-bench_path=pthreads/intel/tsort
-vpic_benchmark $prog_env $bench_path  
-
-prog_env="intel"
-bench_path=pthreads/intel/lsort
-vpic_benchmark $prog_env $bench_path  
+ prog_env="cray"
+ bench_path=openmp/cray/tsort
+ vpic_benchmark $prog_env $bench_path  
+ 
+ prog_env="cray"
+ bench_path=openmp/cray/lsort
+ vpic_benchmark $prog_env $bench_path  
+ 
+ prog_env="aocc"
+ bench_path=openmp/aocc/tsort
+ vpic_benchmark $prog_env $bench_path  
+ 
+ prog_env="aocc"
+ bench_path=openmp/aocc/lsort
+ vpic_benchmark $prog_env $bench_path  
+
+ prog_env="gnu"
+ bench_path=openmp/gnu/tsort
+ vpic_benchmark $prog_env $bench_path  
+
+ prog_env="gnu"
+ bench_path=openmp/gnu/lsort
+ vpic_benchmark $prog_env $bench_path  
+
+ prog_env="intel"
+ bench_path=openmp/intel/tsort
+ vpic_benchmark $prog_env $bench_path  
+ 
+ prog_env="intel"
+ bench_path=openmp/intel/lsort
+ vpic_benchmark $prog_env $bench_path  
+ 
+ prog_env="cray"
+ bench_path=pthreads/cray/tsort
+ vpic_benchmark $prog_env $bench_path  
+ 
+ prog_env="cray"
+ bench_path=pthreads/cray/lsort
+ vpic_benchmark $prog_env $bench_path  
+ 
+ prog_env="aocc"
+ bench_path=pthreads/aocc/tsort
+ vpic_benchmark $prog_env $bench_path  
+ 
+ prog_env="aocc"
+ bench_path=pthreads/aocc/lsort
+ vpic_benchmark $prog_env $bench_path  
+
+ prog_env="gnu"
+ bench_path=pthreads/gnu/tsort
+ vpic_benchmark $prog_env $bench_path  
+
+ prog_env="gnu"
+ bench_path=pthreads/gnu/lsort
+ vpic_benchmark $prog_env $bench_path  
+
+ prog_env="intel"
+ bench_path=pthreads/intel/tsort
+ vpic_benchmark $prog_env $bench_path  
+ 
+ prog_env="intel"
+ bench_path=pthreads/intel/lsort
+ vpic_benchmark $prog_env $bench_path  
diff --git a/TestScripts/vpic_strong_scaling.sh b/TestScripts/vpic_strong_scaling.sh
index 660b278a..84e7f92a 100755
--- a/TestScripts/vpic_strong_scaling.sh
+++ b/TestScripts/vpic_strong_scaling.sh
@@ -22,7 +22,7 @@ fi
 module list
 cc --version
 CC --version 
-#lscpu
+lscpu
 #env 
 
 let np=32\*$nn       # number of processes / ranks
diff --git a/TestScripts/vpic_thread_scaling.sh b/TestScripts/vpic_thread_scaling.sh
index 4d3a8d31..57c15b94 100755
--- a/TestScripts/vpic_thread_scaling.sh
+++ b/TestScripts/vpic_thread_scaling.sh
@@ -22,7 +22,7 @@ fi
 module list
 cc --version
 CC --version 
-#lscpu
+lscpu
 #env 
 
 let np=32\*$nn       # number of processes / ranks
diff --git a/TestScripts/vpic_weak_scaling.sh b/TestScripts/vpic_weak_scaling.sh
index 5b670e62..ce98a69e 100755
--- a/TestScripts/vpic_weak_scaling.sh
+++ b/TestScripts/vpic_weak_scaling.sh
@@ -1,4 +1,4 @@
-#!/bin/bash 
+#!/bin/bash
 
 prog_env=$1       # programming environment
 
@@ -6,8 +6,7 @@ nn=1              # number of nodes
 threads=8         # number of cpu threads
 p_scale=2         # scaling constant
 log=./log-weak    # log file
-
-let np=32\*$nn     # number of processes / ranks
+let np=32\*$nn        # number of processes / ranks
 
 if [ $prog_env = "aocc" ] 
 then    
@@ -22,8 +21,8 @@ fi
 
 module list
 cc --version
-CC --version 
-#lscpu
+CC --version
+lscpu
 #env 
 
 function weak_run(){
@@ -31,27 +30,26 @@ function weak_run(){
     srun -n $1 -c $2 --cpu-bind=cores $3 --tpp $2 >> $log
 }
 
-code=$2             # executable
-let np=$np/$p_scale
-weak_run $np $threads $code
+code=$2                      # executable
+weak_run $np $threads $code  # np==$nn
 
-code=$3             # executable
-let np=$np/$p_scale
+code=$3                      # executable
+let np=$np/$p_scale          # np==$nn/2
 weak_run $np $threads $code
 
-code=$4             # executable 
-let np=$np/$p_scale
+code=$4                      # executable 
+let np=$np/$p_scale          # np==$nn/4
 weak_run $np $threads $code
 
-code=$5             # executable
-let np=$np/$p_scale
+code=$5                      # executable
+let np=$np/$p_scale          # np==$nn/8
 weak_run $np $threads $code
 
-code=$6             # executable
-let np=$np/$p_scale
+code=$6                      # executable
+let np=$np/$p_scale          # np==$nn/16
 weak_run $np $threads $code
 
-code=$7             # executable
-let np=$np/$p_scale
+code=$7                      # executable
+let np=$np/$p_scale          # np==$nn/32
 weak_run $np $threads $code