Cleaned up the implmenentation and docstrings of the cube_file_methods functions using ChatGPT

alexvakimov · alexvakimov · commit dee30190f149 · 2026-03-29T16:39:13.000-04:00
diff --git a/src/libra_py/cube_file_methods.py b/src/libra_py/cube_file_methods.py
@@ -33,211 +33,245 @@
 import util.libutil as comn
 import libra_py.packages.cp2k.methods as CP2K_methods
 
-
 def read_cube(filename: str):
     """
-    This function reads the wavefunction from a cube file and stores it in
-    a 1D numpy array
-    Args:
-        filename (string): the name of the .cube file to read
-    Returns:
-        isovalues (numpy.array) : the 1D array of the wavefunctions for all the points on the grid
+    Read scalar field data (e.g., wavefunction or electron density) from a Gaussian
+    .cube file and return it as a flattened 1D NumPy array.
+
+    This function parses the cube file header to determine where the volumetric
+    data begins, then reads all grid values and stores them in a single array.
+    The ordering of values follows the convention used in the cube file
+    (typically x fastest, then y, then z).
+
+    Parameters
+    ----------
+    filename : str
+        Path to the .cube file.
+
+    Returns
+    -------
+    isovalues : numpy.ndarray
+        1D array containing the scalar field values on the grid.
+
+    Notes
+    -----
+    - The number of atoms is read from the third line of the file. Its absolute
+      value is used to handle Gaussian cube files where this number may be negative.
+    - The function skips:
+        * 2 comment lines
+        * 1 line with number of atoms and origin
+        * 3 lines defining the grid
+        * `natoms` lines with atomic coordinates
+    - Some cube file variants include an extra line before the data block; this
+      is handled automatically.
+    - The output is flattened; reshaping into a 3D grid must be done separately
+      using grid dimensions from the header if needed.
     """
 
-    f = open(filename, 'r')
-    lines = f.readlines()
-    f.close()
-    # The absolute value is for Gaussian since it might return a negative number for number of atoms
+    with open(filename, 'r') as f:
+        lines = f.readlines()
+
+    # Number of atoms (may be negative in some Gaussian cube files)
     natoms = abs(int(lines[2].split()[0]))
-    # We skip a few lines in the cube files, that go as follows:
-    # 2 lines - comments
-    # 1 line  - the number of atoms, etc.
-    # 3 lines - the grid spacing and number of grid points in each dimensions
 
-    nstart = natoms + 2 + 1 + 3        # the index of the first line containing wfc data
-    # For Gaussian cube files
+    # Index of the first line containing volumetric data
+    nstart = natoms + 2 + 1 + 3
+
+    # Handle cube files with an extra line before data (format variation)
     if len(lines[nstart].split()) < 6:
         nstart += 1
 
-    nlines = len(lines)          # the total number of lines
-
     isovalues = []
-    for i in range(nstart, nlines):
-        tmp = lines[i].split()
-        ncols = len(tmp)
-
-        for j in range(ncols):
-            isovalues.append(float(tmp[j]))
-
-    isovalues = np.array(isovalues)
-    # data = np.loadtxt(filename,skiprows=n)
-
-    return isovalues
+    for line in lines[nstart:]:
+        for val in line.split():
+            isovalues.append(float(val))
 
+    return np.array(isovalues)
+    
 
 def grid_volume(filename: str):
     """
-    This function reads the wavefunction from a cube file and calculate
-    the grid volum using the X-, Y- and Z-axis of the volumetric region
-    which are placed in the 4th, 5th and 6th line of the cube file structure
-
-    Args:
-
-        filename (string): The name of the .cube file to read.
-
-    Returns:
-
-        dv (float): The grid volume in Bohr^3.
-
+    Compute the volume element (voxel volume) of a grid cell from a Gaussian
+    .cube file.
+
+    The cube file defines the volumetric grid using three lattice vectors
+    (one per axis), given in lines 4–6 of the file. Each vector corresponds
+    to the spacing and direction of the grid along x, y, and z. The volume
+    of a single grid cell is the absolute value of the determinant of these
+    three vectors.
+
+    Parameters
+    ----------
+    filename : str
+        Path to the .cube file.
+
+    Returns
+    -------
+    dv : float
+        Volume of a single grid cell (voxel) in Bohr³.
+
+    Notes
+    -----
+    - Lines 4–6 of the cube file contain:
+        * number of grid points along each axis (first column)
+        * corresponding lattice vector components (remaining columns)
+    - Only the vector components are used here.
+    - The total grid volume would be `dv * Nx * Ny * Nz`, where Nx, Ny, Nz
+      are the number of grid points along each axis.
     """
 
-    f = open(filename, 'r')
-    lines = f.readlines()
-    f.close()
+    with open(filename, 'r') as f:
+        lines = f.readlines()
 
-    # We use the 3rd, 4th and 5th row in the lines to obtain
-    # the axes of the parallelpiped into a numpy array (Voxel).
-    axis_1 = [float(lines[3].split()[1]), float(lines[3].split()[2]), float(lines[3].split()[3])]
-    axis_2 = [float(lines[4].split()[1]), float(lines[4].split()[2]), float(lines[4].split()[3])]
-    axis_3 = [float(lines[5].split()[1]), float(lines[5].split()[2]), float(lines[5].split()[3])]
-    vol_element = np.array([axis_1, axis_2, axis_3])
-    # Then we calculate the determinant of Voxel to obtain the volume.
-    dv = np.absolute(np.linalg.det(vol_element))
+    # Extract lattice vectors (skip the first column: number of grid points)
+    axis_1 = [float(x) for x in lines[3].split()[1:4]]
+    axis_2 = [float(x) for x in lines[4].split()[1:4]]
+    axis_3 = [float(x) for x in lines[5].split()[1:4]]
+
+    # Form the voxel matrix and compute its volume
+    voxel = np.array([axis_1, axis_2, axis_3])
+    dv = abs(np.linalg.det(voxel))
 
     return dv
 
 
 def read_volumetric_data(filename: str):
     """
-    This function reads the volumetric data in a format used for plotting
-    of the data. The difference between 'read_cube' function is that it will
-    show the data in a 3D array which has the shape as the number of grid points
-    for each of the X-, Y-, and Z-axis in the 4th to 6th line of the cube file.
-    This function will return the grid points in each axis, the coordinates of the
-    structure and the spacing vector which is used to plot the isosurfaces of the
-    molecular orbitals.
-
-    Args:
-
-        filename (string): The name of the .cube file.
-
-    Returns:
-
-        coordinates (2D numpy array): The coordinates of the molecule structure in
-                                      the same format shown in the .cube file.
-
-        x_grid, y_grid, z_grid (3D numpy array): Containing the grid points for each of the X-,
-                                     Y-, and Z- axis.
-
-        wave_fun (3D numpy array): The volumetric data in a 3D numpy array format.
-
-        spacing_vector (numpy 1D array): The spacing vector used for plotting the isosurfaces.
-
+    Read volumetric data from a Gaussian .cube file and return it in a
+    structured 3D form suitable for visualization and analysis.
+
+    This function parses the cube file header to extract grid dimensions,
+    lattice vectors, and atomic coordinates, then reshapes the volumetric
+    data into a 3D array. It also constructs real-space grid coordinates
+    corresponding to each voxel.
+
+    Parameters
+    ----------
+    filename : str
+        Path to the .cube file.
+
+    Returns
+    -------
+    coordinates : numpy.ndarray
+        Array of shape (natoms, 5) containing atomic data as read from the
+        cube file (atomic number, charge, x, y, z). Values are returned as strings.
+
+    x_grid, y_grid, z_grid : numpy.ndarray
+        3D arrays of shape (nx, ny, nz) containing the Cartesian coordinates
+        of each grid point.
+
+    wave_fun : numpy.ndarray
+        3D array of shape (nx, ny, nz) containing the volumetric data
+        (e.g., wavefunction or electron density).
+
+    spacing_vector : numpy.ndarray
+        Vector representing the effective grid spacing (sum of the three
+        lattice vectors). Often used in visualization routines.
+
+    Notes
+    -----
+    - The cube file structure is:
+        * 2 comment lines
+        * 1 line: number of atoms and origin
+        * 3 lines: grid size and lattice vectors
+        * natoms lines: atomic coordinates
+        * remaining lines: volumetric data
+    - Grid vectors are given in Bohr; no unit conversion is applied.
+    - The volumetric data is assumed to be ordered with x varying fastest,
+      followed by y, then z (standard cube convention).
     """
 
-    f = open(filename, 'r')
-    lines = f.readlines()
-    f.close()
+    with open(filename, 'r') as f:
+        lines = f.readlines()
 
-    # The number of atoms in the 3rd line
+    # Number of atoms
     natoms = int(lines[2].split()[0])
 
-    # The number of voxels defined for each axis obtained from
-    # the first elements of the 4th, 5th, and 6th line of the cube files
+    # Grid dimensions
     nx = int(lines[3].split()[0])
     ny = int(lines[4].split()[0])
     nz = int(lines[5].split()[0])
 
-    # The three vectors below are the same vectors which are present in lines 4th to 6th
-    # which are used to create the 3D numpy array of the grid points (x, y, z) used to plot
-    # the isosurfaces.
-    # Here we use the same unit as is used in the .cube file structure which is Bohr
-    # with no need for unit conversion. This makes the plotting easier.
-    axis_1 = np.array([float(lines[3].split()[1]), float(lines[3].split()[2]), float(lines[3].split()[3])])
-    axis_2 = np.array([float(lines[4].split()[1]), float(lines[4].split()[2]), float(lines[4].split()[3])])
-    axis_3 = np.array([float(lines[5].split()[1]), float(lines[5].split()[2]), float(lines[5].split()[3])])
-
-    # The spacing vector. This will be used in the 'marching_cubes_lewiner'
-    # function to define the vertices and faces for 'plot_trisurf' function.
-    # This vector is obtained from the sum of the three axis in the cube file as above.
+    # Lattice vectors
+    axis_1 = np.array([float(x) for x in lines[3].split()[1:4]])
+    axis_2 = np.array([float(x) for x in lines[4].split()[1:4]])
+    axis_3 = np.array([float(x) for x in lines[5].split()[1:4]])
+
+    # Effective spacing vector (useful for visualization libraries)
     spacing_vector = axis_1 + axis_2 + axis_3
 
-    # First we read all the isovalues into a 1D list 'isovals'.
+    # Read volumetric data (flattened)
+    data_start = natoms + 6
     isovals = []
+    for line in lines[data_start:]:
+        isovals.extend(float(val) for val in line.split())
+
+    # Reshape into 3D array (cube convention)
+    wave_fun = np.array(isovals).reshape((nx, ny, nz))
 
-    # Starting from the line which the volumetric data starts which is the (natoms+3+2+1+1)th line.
-    for i in range(natoms + 3 + 2 + 1, len(lines)):
-        for j in range(0, len(lines[i].split())):
-            isovals.append(float(lines[i].split()[j]))
-
-    # Define the volumetric numpy array
-    wave_fun = np.zeros((nx, ny, nz))
-
-    # Setting up the counters to append the isovalues in a 3D numpy array
-    c = 0
-    c1 = 0
-    c2 = 0
-
-    for i in range(0, len(isovals)):
-        if c2 != nx:
-            wave_fun[c2][c1][c] = isovals[i]
-            c = c + 1
-            if c % nz == 0:
-                c = 0
-                c1 = c1 + 1
-                if c1 % ny == 0:
-                    c1 = 0
-                    c2 = c2 + 1
-
-    # Now define the x, y, and z 3D arrays to store the grids
-    # which is then used for plotting the isosurfaces.
+    # Construct coordinate grids
     x_grid = np.zeros((nx, ny, nz))
     y_grid = np.zeros((nx, ny, nz))
     z_grid = np.zeros((nx, ny, nz))
 
-    # Defining each element of the grid points.
-    for i in range(0, nx):
-        for j in range(0, ny):
-            for k in range(0, nz):
-                x_grid[i][j][k] = axis_1[0] * i + axis_2[0] * j + axis_3[0] * k
-                y_grid[i][j][k] = axis_1[1] * i + axis_2[1] * j + axis_3[1] * k
-                z_grid[i][j][k] = axis_1[2] * i + axis_2[2] * j + axis_3[2] * k
+    for i in range(nx):
+        for j in range(ny):
+            for k in range(nz):
+                r = i * axis_1 + j * axis_2 + k * axis_3
+                x_grid[i, j, k] = r[0]
+                y_grid[i, j, k] = r[1]
+                z_grid[i, j, k] = r[2]
 
-    # For plottin the atoms in the molecule we have to read the
-    # coordinates in xyz format which starts from the 7th line.
-    coordinates = []
-    for i in range(6, natoms + 6):
-        coordinates.append(lines[i].split())
-
-    coordinates = np.array(coordinates)
+    # Atomic coordinates (raw format from file)
+    coordinates = np.array([lines[i].split() for i in range(6, natoms + 6)])
 
     return coordinates, x_grid, y_grid, z_grid, wave_fun, spacing_vector
-
+    
 
 def integrate_cube(cube_A, cube_B, grid_volume):
     """
-    This function calculates the element-wise multiplication of two numpy arrays
-    and sums their product. Then, it will multiply the sum by 'dv' element to
-    compute the integral of two wavefunction represented as .cube files.
-    Args:
-        cube_A, cube_B (numpy array): The elements of the cube files in a 1D array
-                            obtained from the 'read_cube' function.
-        grid_volume (float): The volume of the voxel obtained from grid_volume function.
-    Returns:
-        integral (float): The integration between two wavefunction in the .cube files.
+    Compute the numerical integral of the product of two scalar fields
+    defined on the same volumetric grid (e.g., wavefunctions from .cube files).
+
+    The integral is approximated as a discrete sum over all grid points:
+        ∫ A(r) B(r) dV ≈ Σ_i A_i * B_i * dv
+    where dv is the volume of a single grid cell (voxel).
+
+    Parameters
+    ----------
+    cube_A, cube_B : numpy.ndarray
+        1D arrays containing the volumetric data (e.g., wavefunctions or
+        densities) sampled on the same grid. These are typically obtained
+        from a cube file reader (e.g., `read_cube`).
+        Both arrays must have the same shape and ordering.
+
+    grid_volume : float
+        Volume of a single grid cell (voxel), typically computed using
+        `grid_volume`. Units are usually Bohr³.
+
+    Returns
+    -------
+    integral : float
+        Numerical approximation of the integral ∫ A(r) B(r) dV.
+
+    Notes
+    -----
+    - This operation corresponds to an overlap integral if A and B are
+      wavefunctions defined on the same grid.
+    - No normalization or unit conversion is performed.
+    - The accuracy depends on the grid resolution and spacing.
     """
-    # Compute the element-wise multiplication of the two cube files
-    # which were previously obtained in 1D numpy arrays and store
-    # them into another 1D numpy array.
-    product = np.multiply(cube_A, cube_B)
 
-    # Compute the summation of the above matrix
-    summation = product.sum()
-    integral = summation * grid_volume
+    # Element-wise product
+    product = cube_A * cube_B
+
+    # Discrete integration
+    integral = product.sum() * grid_volume
 
     return integral
 
 
+
 def plot_cubes(params):
     """
     This function plots the cubes for selected energy levels using VMD.