From c0aca756f0a4e39044d3ad8d41e3b1d8327c31a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eryk=20Trzci=C5=84ski?= Date: Sun, 18 Aug 2024 17:55:38 +0200 Subject: [PATCH 01/48] Work in progress channel detection module --- prody/proteins/interactions.py | 699 ++++++++++++++++++++++++++++++++- 1 file changed, 697 insertions(+), 2 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 40f3dd3d6..c64d1ad19 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -16,7 +16,6 @@ __credits__ = ['James Krieger', 'Karolina Mikulska-Ruminska'] __email__ = ['karolamik@fizyka.umk.pl', 'jamesmkrieger@gmail.com'] - import numpy as np from numpy import * from prody import LOGGER, SETTINGS, PY3K @@ -33,6 +32,14 @@ import multiprocessing +import subprocess +import heapq +import open3d as o3d +from collections import deque +from scipy.interpolate import CubicSpline +from scipy.spatial import Voronoi, Delaunay +from pathlib import Path + __all__ = ['calcHydrogenBonds', 'calcChHydrogenBonds', 'calcSaltBridges', 'calcRepulsiveIonicBonding', 'calcPiStacking', 'calcPiCation', 'calcHydrophobic', 'calcDisulfideBonds', 'calcMetalInteractions', @@ -46,7 +53,7 @@ 'calcHydrogenBondsTrajectory', 'calcHydrophobicOverlapingAreas', 'Interactions', 'InteractionsTrajectory', 'LigandInteractionsTrajectory', 'calcSminaBindingAffinity', 'calcSminaPerAtomInteractions', 'calcSminaTermValues', - 'showSminaTermValues'] + 'showSminaTermValues', 'run_vmd_script', 'detect_channels'] def cleanNumbers(listContacts): @@ -4345,4 +4352,692 @@ def saveInteractionsPDB(self, **kwargs): LOGGER.info('PDB file saved.') return freq_contacts_list + + + + + + + +def run_vmd_script(vmd_path, file_path, script_path = None, output_path = None): + """Executes a VMD script to create a mesh representation of a protein and save it as a .stl file. + + This function runs a VMD (Visual Molecular Dynamics) script using the specified VMD executable and input file. + It also manages the paths for the script and output, ensuring they are correctly set up before execution. + + :arg vmd_path: Path to the VMD executable. This is required to run the VMD script. + :type vmd_path: str + + :arg file_path: Path to the input file that will be processed by the VMD script. + :type file_path: str + + :arg script_path: Path to the VMD script that will be executed. If **None**, defaults to 'script.tcl' in the + current working directory. The script must be a valid Tcl script for VMD. + :type script_path: str or None + + :arg output_path: Path where the output file will be saved. If **None**, defaults to 'output/protein.stl' in + the current working directory. The output file will be created or overwritten at this location. + :type output_path: str or None + + :returns: None + + This function performs the following steps: + 1. **Path Handling:** Resolves the paths for the VMD script and output file. If not provided, default paths are + used. Creates the output directory if it does not exist. + 2. **Command Execution:** Constructs the command to run the VMD script with the specified arguments. Executes the + command using `subprocess.run()` and checks for any errors during execution. The sript creates mesh representation + of the protein to be further visualized. + 3. **Error Handling:** Catches and prints errors if the VMD script fails or if any unexpected exceptions occur. + + Note: Ensure that VMD is correctly installed and accessible via the provided `vmd_path`, and that the script and + file paths are valid for successful execution. + """ + script_path = Path(script_path or 'vmd_script.tcl').resolve() + if not script_path.is_file(): + raise FileNotFoundError("Script does not exist.") + + output_path = Path(output_path or 'output/protein.stl').resolve() + output_path.parent.mkdir(parents=True, exist_ok=True) + + command = [vmd_path, '-e', str(script_path), '-args', str(file_path), str(output_path)] + + try: + subprocess.run(command, check=True) + except subprocess.CalledProcessError as e: + print(f"VMD caused an error: {e}") + except Exception as e: + print(f"An unexpected error occurred: {e}") + +def detect_channels(protein, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15, output_path=None, visualizer=None, stl_path=None): + """Detects channels in a protein structure and visualizes or saves the results based on the given criteria. + + This function processes the provided protein structure to identify channels, cavities, and surface, and + saves the results according to the specified parameters. The detection involves several steps, including + filtering and visualization. + + :arg protein: The protein structure from which to detect channels. The protein object should support methods for + selecting atoms and retrieving coordinates. + :type protein: Protein object + + :arg r1: Radius for determining protein surface. Default is **3**. This parameter defines the cutoff for removing + too big simplices based on distance from the center of the cavities. + :type r1: float + + :arg r2: Radius for detecting cavities in protein. Default is **1.25**. This parameter defines the cutoff for + removing interior small simplices. + :type r2: float + + :arg min_depth: Minimum depth for filtering cavities. Default is **10**. Only cavities with a depth greater than + or equal to this value will be retained. + :type min_depth: int + + :arg bottleneck: Minimum bottleneck size for filtering channels. Default is **1**. Channels with a bottleneck size + smaller than this value will be filtered out. + :type bottleneck: float + + :arg sparsity: Controls the quantity of channels. The higher the sparsity, the less channels is detected. + Default is **15**. This parameter defines the minimum distance between end points of channels. + :type sparsity: float + + :arg output_path: Path to save the results as a PDB file. If **None**, the results will not be saved. Default is + **None**. + :type output_path: str or None + + :arg visualizer: Type of visualization to perform. Options are **'surface'**, **'channels'**, or **'cavities'**. + Default is **None**. Determines how the results are visualized. + :type visualizer: str or None + + :arg stl_path: Path to an STL file for visualizing external meshes. If **None**, default visualization methods + will be used. Otherwise, the results will be visualized with the protein structure on top. Default is **None**. + :type stl_path: str or None + + :returns: None + + This function performs the following steps: + 1. **Selection and Filtering:** Selects non-hetero atoms from the protein, calculates van der Waals radii, and performs + 3D Delaunay triangulation and Voronoi tessellation on the coordinates. + 2. **State Management:** Creates and updates different states of the protein structure to filter out simplices based on + the given radii. + 3. **Surface Layer Calculation:** Determines the surface and second-layer simplices from the filtered results. + 4. **Cavity and Channel Detection:** Finds and filters cavities based on their depth and calculates channels using + Dijkstra's algorithm. + 5. **Visualization and Saving:** Generates meshes for the detected channels, filters them by bottleneck size, and either + saves the results to a PDB file or visualizes them based on the specified parameters. + + Note: Ensure that the necessary external libraries and methods are properly imported and available for this function to + execute correctly. + """ + + class State: + def __init__(self, simplices, neighbors, vertices): + self.simp = simplices + self.neigh = neighbors + self.verti = vertices + + def __eq__(self, other): + if not isinstance(other, State): + return False + return (np.array_equal(self.simp, other.simp) and + np.array_equal(self.neigh, other.neigh) and + np.array_equal(self.verti, other.verti)) + + def set_state(self, simplices, neighbors, vertices): + self.simp = simplices + self.neigh = neighbors + self.verti = vertices + + def get_state(self): + return self.simp, self.neigh, self.verti + + class Cavity: + def __init__(self, tetrahedra, is_connected_to_surface): + self.tetrahedra = tetrahedra + self.is_connected_to_surface = is_connected_to_surface + self.starting_tetrahedron = None + self.channels = [] + self.depth = 0 + + def make_surface(self): + self.is_connected_to_surface = True + + def set_exit_tetrahedra(self, exit_tetrahedra, end_tetrahedra): + self.exit_tetrahedra = exit_tetrahedra + self.end_tetrahedra = end_tetrahedra + + def set_starting_tetrahedron(self, tetrahedron): + self.starting_tetrahedron = tetrahedron + + def set_depth(self, depth): + self.depth = depth + + def add_channel(self, channel): + self.channels.append(channel) + + class Channel: + def __init__(self, tetrahedra, centerline_spline, radius_spline, length, bottleneck): + self.tetrahedra = tetrahedra + self.centerline_spline = centerline_spline + self.radius_spline = radius_spline + self.length = length + self.bottleneck = bottleneck + + def get_splines(self): + return self.centerline_spline, self.radius_spline + + def visualize_external_grid(points, simp, stl_file=None, other_mesh=None, channel_mesh=None, ret_lines=None): + triangles = [] + for tetra in simp: + triangles.extend([sorted([tetra[0], tetra[1], tetra[2]]), + sorted([tetra[0], tetra[1], tetra[3]]), + sorted([tetra[0], tetra[2], tetra[3]]), + sorted([tetra[1], tetra[2], tetra[3]])]) + + triangles = np.array(triangles) + triangles.sort(axis=1) + + triangles_tuple = [tuple(tri) for tri in triangles] + unique_triangles, counts = np.unique(triangles_tuple, return_counts=True, axis=0) + + surface_triangles = unique_triangles[counts == 1] + + lines = [] + for simplex in surface_triangles: + for i in range(3): + for j in range(i + 1, 3): + lines.append([simplex[i], simplex[j]]) + + line_set = o3d.geometry.LineSet() + line_set.points = o3d.utility.Vector3dVector(points) + line_set.lines = o3d.utility.Vector2iVector(lines) + + if ret_lines: + return line_set + + geometries = [line_set, o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1, origin=[0, 0, 0])] + + if stl_file is not None: + stl_mesh = o3d.io.read_triangle_mesh(stl_file) + stl_mesh.compute_vertex_normals() + stl_mesh.paint_uniform_color([0.1, 0.7, 0.3]) + geometries.append(stl_mesh) + + if channel_mesh is not None: + if not isinstance(channel_mesh, list): + channel_mesh = [channel_mesh] + for mesh in channel_mesh: + mesh.compute_vertex_normals() + mesh.paint_uniform_color([0.5, 0.0, 0.5]) + geometries.extend(channel_mesh) + + o3d.visualization.draw_geometries(geometries) + + def visualize_external_mesh(prot_coords, prot_simp, lines=None, alpha=0.5): + triangles = [] + for tetra in prot_simp: + triangles.extend([sorted([tetra[0], tetra[1], tetra[2]]), + sorted([tetra[0], tetra[1], tetra[3]]), + sorted([tetra[0], tetra[2], tetra[3]]), + sorted([tetra[1], tetra[2], tetra[3]])]) + + surface_triangles = np.unique(np.array(triangles), axis=0, return_counts=True)[0] + + mesh = o3d.geometry.TriangleMesh() + mesh.vertices = o3d.utility.Vector3dVector(prot_coords) + mesh.triangles = o3d.utility.Vector3iVector(surface_triangles) + + mesh.compute_vertex_normals() + mesh.paint_uniform_color([0.1, 0.7, 0.3]) + + vis = o3d.visualization.Visualizer() + vis.create_window() + vis.add_geometry(mesh) + + if lines: + vis.add_geometry(lines) + + vis.get_render_option().mesh_show_back_face = True + vis.get_render_option().background_color = np.array([1, 1, 1]) + vis.update_renderer() + vis.run() + vis.destroy_window() + + def visualize_channel(channel_meshes, stl_file=None): + meshes_to_visualize = [o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1, origin=[0, 0, 0])] + + if stl_file is not None: + mesh = o3d.io.read_triangle_mesh(stl_file) + mesh.compute_vertex_normals() + mesh.paint_uniform_color([0.1, 0.7, 0.3]) + meshes_to_visualize.append(mesh) + + if channel_meshes is not None: + if not isinstance(channel_meshes, list): + channel_meshes = [channel_meshes] + for channel_mesh in channel_meshes: + channel_mesh.compute_vertex_normals() + channel_mesh.paint_uniform_color([0.5, 0.0, 0.5]) + meshes_to_visualize.extend(channel_meshes) + + if len(meshes_to_visualize) > 1: + o3d.visualization.draw_geometries(meshes_to_visualize) + else: + print("No mesh to visualize.") + + def sphere_fit(vertices, tetrahedron, vertice, vdw_radii, r): + center = vertice + d_sum = sum(np.linalg.norm(center - vertices[atom]) for atom in tetrahedron) + r_sum = sum(r + vdw_radii[atom] for atom in tetrahedron) + + return d_sum >= r_sum + + def delete_simplices3d(points, simplices, neighbors, vertices, vdw_radii, r, surface): + simp, neigh, verti, deleted = [], [], [], [] + + for i, tetrahedron in enumerate(simplices): + should_delete = (-1 in neighbors[i] and sphere_fit(points, tetrahedron, vertices[i], vdw_radii, r)) if surface else not sphere_fit(points, tetrahedron, vertices[i], vdw_radii, r) + + if should_delete: + deleted.append(i) + else: + simp.append(simplices[i]) + neigh.append(neighbors[i]) + verti.append(vertices[i]) + + simp = np.array(simp) + neigh = np.array(neigh) + verti = np.array(verti) + deleted = np.array(deleted) + + mask = np.isin(neigh, deleted) + neigh[mask] = -1 + + for i in reversed(deleted): + mask = (neigh > i) & (neigh != -1) + neigh[mask] -= 1 + + return simp, neigh, verti + + def delete_section(simplices_subset, simplices, neighbors, vertices, reverse=False): + simp, neigh, verti, deleted = [], [], [], [] + + for i, tetrahedron in enumerate(simplices): + match = any((simplices_subset == tetrahedron).all(axis=1)) + if reverse: + if match: + simp.append(tetrahedron) + neigh.append(neighbors[i]) + verti.append(vertices[i]) + else: + deleted.append(i) + else: + if match: + deleted.append(i) + else: + simp.append(tetrahedron) + neigh.append(neighbors[i]) + verti.append(vertices[i]) + + simp, neigh, verti = map(np.array, [simp, neigh, verti]) + deleted = np.array(deleted) + + mask = np.isin(neigh, deleted) + neigh[mask] = -1 + + for i in reversed(deleted): + neigh = np.where((neigh > i) & (neigh != -1), neigh - 1, neigh) + + return simp, neigh, verti + + def get_vdw_radii(atoms): + vdw_radii_dict = { + 'H': 1.20, 'HE': 1.40, 'LI': 1.82, 'BE': 1.53, 'B': 1.92, 'C': 1.70, + 'N': 1.55, 'O': 1.52, 'F': 1.47, 'NE': 1.54, 'NA': 2.27, 'MG': 1.73, + 'AL': 1.84, 'SI': 2.10, 'P': 1.80, 'S': 1.80, 'CL': 1.75, 'AR': 1.88, + 'K': 2.75, 'CA': 2.31, 'SC': 2.11, 'NI': 1.63, 'CU': 1.40, 'ZN': 1.39, + 'GA': 1.87, 'GE': 2.11, 'AS': 1.85, 'SE': 1.90, 'BR': 1.85, 'KR': 2.02, + 'RB': 3.03, 'SR': 2.49, 'PD': 1.63, 'AG': 1.72, 'CD': 1.58, 'IN': 1.93, + 'SN': 2.17, 'SB': 2.06, 'TE': 2.06, 'I': 1.98, 'XE': 2.16, 'CS': 3.43, + 'BA': 2.68, 'PT': 1.75, 'AU': 1.66, 'HG': 1.55, 'TL': 1.96, 'PB': 2.02, + 'BI': 2.07, 'PO': 1.97, 'AT': 2.02, 'RN': 2.20, 'FR': 3.48, 'RA': 2.83, + 'U': 1.86, 'FE': 2.44 + } + + return np.array([vdw_radii_dict[atom] for atom in atoms]) + + def surface_layer(shape_simplices, filtered_simplices, shape_neighbors): + surface_simplices, surface_neighbors = [], [] + interior_simplices = [] + + for i in range(len(shape_simplices)): + if -1 in shape_neighbors[i]: + surface_simplices.append(shape_simplices[i]) + surface_neighbors.append(shape_neighbors[i]) + else: + interior_simplices.append(shape_simplices[i]) + + surface_simplices = np.array(surface_simplices) + surface_neighbors = np.array(surface_neighbors) + interior_simplices = np.array(interior_simplices) + + filtered_surface_simplices = surface_simplices[ + np.any(np.all(surface_simplices[:, None] == filtered_simplices, axis=2), axis=1) + ] + filtered_surface_neighbors = surface_neighbors[ + np.any(np.all(surface_simplices[:, None] == filtered_simplices, axis=2), axis=1) + ] + + filtered_surface_neighbors = np.unique(filtered_surface_neighbors) + filtered_surface_neighbors = filtered_surface_neighbors[filtered_surface_neighbors != 0] + + filtered_interior_simplices = interior_simplices[ + np.any(np.all(interior_simplices[:, None] == filtered_simplices, axis=2), axis=1) + ] + + surface_layer_neighbor_simplices = shape_simplices[filtered_surface_neighbors] + + second_layer = filtered_interior_simplices[ + np.any(np.all(filtered_interior_simplices[:, None] == surface_layer_neighbor_simplices, axis=2), axis=1) + ] + + return filtered_surface_simplices, second_layer + + + def find_groups(neigh, is_cavity=True): + x = neigh.shape[0] + visited = np.zeros(x, dtype=bool) + groups = [] + + def dfs(tetra_index): + stack = [tetra_index] + current_group = [] + while stack: + index = stack.pop() + if not visited[index]: + visited[index] = True + current_group.append(index) + stack.extend(neighbor for neighbor in neigh[index] if neighbor != -1 and not visited[neighbor]) + return np.array(current_group) + + for i in range(x): + if not visited[i]: + current_group = dfs(i) + if is_cavity: + groups.append(Cavity(current_group, False)) + else: + groups.append(current_group) + + return groups + def get_surface_cavities(cavities, interior_simplices, second_layer, state, points, vdw_radii, sparsity): + surface_cavities = [] + + for cavity in cavities: + tetrahedra = cavity.tetrahedra + second_layer_mask = np.isin(interior_simplices[tetrahedra], second_layer).all(axis=1) + + if np.any(second_layer_mask): + cavity.make_surface() + exit_tetrahedra = tetrahedra[second_layer_mask] + end_tetrahedra = get_end_tetrahedra(exit_tetrahedra, state.verti, points, vdw_radii, state.simp, sparsity) + cavity.set_exit_tetrahedra(exit_tetrahedra, end_tetrahedra) + surface_cavities.append(cavity) + + return surface_cavities + + + def merge_cavities(cavities, simplices): + merged_tetrahedra = np.concatenate([cavity.tetrahedra for cavity in cavities]) + return simplices[merged_tetrahedra] + + def find_deepest_tetrahedra(cavities, neighbors): + for cavity in cavities: + exit_tetrahedra = cavity.exit_tetrahedra + visited = np.zeros(neighbors.shape[0], dtype=bool) + visited[exit_tetrahedra] = True + queue = deque([(tetra, 0) for tetra in exit_tetrahedra]) + max_depth = -1 + deepest_tetrahedron = None + + while queue: + current, depth = queue.popleft() + if depth > max_depth: + max_depth = depth + deepest_tetrahedron = current + + for neighbor in neighbors[current]: + if neighbor != -1 and not visited[neighbor] and neighbor in cavity.tetrahedra: + visited[neighbor] = True + queue.append((neighbor, depth + 1)) + + cavity.set_starting_tetrahedron(np.array([deepest_tetrahedron])) + cavity.set_depth(max_depth) + + def dijkstra(cavity, simplices, neighbors, vertices, points, vdw_radii): + def calculate_weight(current_tetra, neighbor_tetra): + current_vertex = vertices[current_tetra] + neighbor_vertex = vertices[neighbor_tetra] + l = np.linalg.norm(current_vertex - neighbor_vertex) + + d = np.inf + for atom, radius in zip(points[simplices[neighbor_tetra]], vdw_radii[simplices[neighbor_tetra]]): + dist = np.linalg.norm(neighbor_vertex - atom) - radius + if dist < d: + d = dist + + b = 1e-3 + return l / (d**2 + b) + + def dijkstra_algorithm(start, goal, tetrahedra_set): + pq = [(0, start)] + distances = {start: 0} + previous = {start: None} + + while pq: + current_distance, current_tetra = heapq.heappop(pq) + + if current_tetra == goal: + path = [] + while current_tetra is not None: + path.append(current_tetra) + current_tetra = previous[current_tetra] + return path[::-1] + + if current_distance > distances[current_tetra]: + continue + + for neighbor in neighbors[current_tetra]: + if neighbor in tetrahedra_set: + weight = calculate_weight(current_tetra, neighbor) + distance = current_distance + weight + if distance < distances.get(neighbor, float('inf')): + distances[neighbor] = distance + previous[neighbor] = current_tetra + heapq.heappush(pq, (distance, neighbor)) + + return None + + tetrahedra_set = set(cavity.tetrahedra) + for exit_tetrahedron in cavity.end_tetrahedra: + for starting_tetrahedron in cavity.starting_tetrahedron: + if exit_tetrahedron != starting_tetrahedron: + path = dijkstra_algorithm(starting_tetrahedron, exit_tetrahedron, tetrahedra_set) + if path: + path_tetrahedra = np.array(path) + channel = Channel(path_tetrahedra, *process_channel(path_tetrahedra, vertices, points, vdw_radii, simplices)) + cavity.add_channel(channel) + + def calculate_max_radius(vertice, points, vdw_radii, simp): + atom_positions = points[simp] + radii = vdw_radii[simp] + distances = np.linalg.norm(atom_positions - vertice, axis=1) - radii + return np.min(distances) + + def calculate_radius_spline(tetrahedra, voronoi_vertices, points, vdw_radii, simp): + vertices = voronoi_vertices[tetrahedra] + radii = np.array([calculate_max_radius(v, points, vdw_radii, s) for v, s in zip(vertices, simp[tetrahedra])]) + return radii, np.min(radii) + + def process_channel(tetrahedra, voronoi_vertices, points, vdw_radii, simp): + centers = voronoi_vertices[tetrahedra] + radii, bottleneck = calculate_radius_spline(tetrahedra, voronoi_vertices, points, vdw_radii, simp) + + t = np.arange(len(centers)) + centerline_spline = CubicSpline(t, centers, bc_type='natural') + radius_spline = CubicSpline(t, radii, bc_type='natural') + + length = calculate_channel_length(centerline_spline) + + return centerline_spline, radius_spline, length, bottleneck + + def create_mesh_from_spline(centerline_spline, radius_spline, n=5): + N = n * len(centerline_spline.x) + t = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], N) + centers = centerline_spline(t) + radii = radius_spline(t) + + spheres = [o3d.geometry.TriangleMesh.create_sphere(radius=r, resolution=20).translate(c) for r, c in zip(radii, centers)] + mesh = spheres[0] + for sphere in spheres[1:]: + mesh += sphere + + return mesh + + def find_biggest_tetrahedron(tetrahedra, voronoi_vertices, points, vdw_radii, simp): + radii = np.array([calculate_max_radius(voronoi_vertices[tetra], points, vdw_radii, simp[tetra]) for tetra in tetrahedra]) + max_radius_index = np.argmax(radii) + return tetrahedra[max_radius_index] + + def get_end_tetrahedra(tetrahedra, voronoi_vertices, points, vdw_radii, simp, sparsity): + end_tetrahedra = [] + current_tetrahedron = find_biggest_tetrahedron(tetrahedra, voronoi_vertices, points, vdw_radii, simp) + end_tetrahedra.append(current_tetrahedron) + end_tetrahedra_set = {current_tetrahedron} + + while True: + found_tetrahedra = [] + for tetra in tetrahedra: + if tetra in end_tetrahedra_set: + continue + + all_far_enough = True + for selected_tetra in end_tetrahedra: + distance = np.linalg.norm(voronoi_vertices[selected_tetra] - voronoi_vertices[tetra]) + if distance < sparsity: + all_far_enough = False + break + + if all_far_enough: + found_tetrahedra.append(tetra) + + if not found_tetrahedra: + break + + biggest_tetrahedron = find_biggest_tetrahedron(found_tetrahedra, voronoi_vertices, points, vdw_radii, simp) + end_tetrahedra.append(biggest_tetrahedron) + end_tetrahedra_set.add(biggest_tetrahedron) + + return np.array(end_tetrahedra) + + def filter_cavities(cavities, min_depth): + return [cavity for cavity in cavities if cavity.depth >= min_depth] + + def filter_channels_by_bottleneck(cavities, bottleneck): + for cavity in cavities: + cavity.channels = [channel for channel in cavity.channels if channel.bottleneck >= bottleneck] + + def save_channels_to_pdb(cavities, filename, num_samples=5): + with open(filename, 'w') as pdb_file: + atom_index = 1 + for cavity in cavities: + for channel in cavity.channels: + centerline_spline, radius_spline = channel.get_splines() + samples = len(channel.tetrahedra) * num_samples + t = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], samples) + centers = centerline_spline(t) + radii = radius_spline(t) + + pdb_lines = [] + for i, (x, y, z, radius) in enumerate(zip(centers[:, 0], centers[:, 1], centers[:, 2], radii), start=atom_index): + pdb_lines.append(f"ATOM {i:5d} H FIL T 1 {x:8.3f}{y:8.3f}{z:8.3f} {radius:6.2f}\n") + + for i in range(1, samples): + pdb_lines.append(f"CONECT{i:5d}{i + 1:5d}\n") + + pdb_file.writelines(pdb_lines) + pdb_file.write("\n") + atom_index += samples + + def calculate_channel_length(centerline_spline): + t_values = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], len(centerline_spline.x) * 10) + points = centerline_spline(t_values) + diffs = np.diff(points, axis=0) + lengths = np.linalg.norm(diffs, axis=1) + return np.sum(lengths) + + protein = protein.select('not hetero') + coords = protein.getCoords() + vdw_radii = get_vdw_radii(protein.getElements()) + + dela = Delaunay(coords) + voro = Voronoi(coords) + + s_prt = State(dela.simplices, dela.neighbors, voro.vertices) + s_tmp = State(*s_prt.get_state()) + s_prv = State(None, None, None) + + while True: + s_prv.set_state(*s_tmp.get_state()) + s_tmp.set_state(*delete_simplices3d(coords, *s_tmp.get_state(), vdw_radii, r1, True)) + if s_tmp == s_prv: + break + + s_srf = State(*s_tmp.get_state()) + s_inr = State(*delete_simplices3d(coords, *s_srf.get_state(), vdw_radii, r2, False)) + + l_first_layer_simp, l_second_layer_simp = surface_layer(s_srf.simp, s_inr.simp, s_srf.neigh) + s_clr = State(*delete_section(l_first_layer_simp, *s_inr.get_state())) + + c_cavities = find_groups(s_clr.neigh) + c_surface_cavities = get_surface_cavities(c_cavities, s_clr.simp, l_second_layer_simp, s_clr, coords, vdw_radii, sparsity) + + find_deepest_tetrahedra(c_surface_cavities, s_clr.neigh) + c_filtered_cavities = filter_cavities(c_surface_cavities, min_depth) + merged_cavities = merge_cavities(c_filtered_cavities, s_clr.simp) + + for cavity in c_filtered_cavities: + dijkstra(cavity, *s_clr.get_state(), coords, vdw_radii) + + count = 0 + for cavity in c_filtered_cavities: + count = count + len(cavity.end_tetrahedra) + print(count) + + filter_channels_by_bottleneck(c_filtered_cavities, bottleneck) + channels = [create_mesh_from_spline(*channel.get_splines()) for cavity in c_filtered_cavities for channel in cavity.channels] + + no_of_channels = len(channels) + print(f"Detected {no_of_channels} channels.") + + if output_path: + print(f"Saving results to {output_path}") + save_channels_to_pdb(c_filtered_cavities, Path(output_path), num_samples=5) + else: + print("No output path given.") + + if visualizer == 'surface': + if stl_path: + visualize_external_grid(coords, s_srf.simp, stl_path) + else: + visualize_external_mesh(coords, s_srf.simp) + + elif visualizer == 'channels': + if stl_path: + visualize_channel(channels, stl_path) + else: + visualize_external_grid(coords, s_srf.simp, channel_mesh=channels) + + elif visualizer == 'cavities': + if stl_path: + visualize_external_grid(coords, merged_cavities, stl_path) + else: + visualize_external_mesh(coords, merged_cavities, lines=visualize_external_grid(coords, s_srf.simp, ret_lines=True)) From e7823e34e595a2f25859063c73edc2da14b42dce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eryk=20Trzci=C5=84ski?= Date: Mon, 19 Aug 2024 22:47:48 +0200 Subject: [PATCH 02/48] new commit --- prody/proteins/channels.py | 688 ++++++++++++++++++++++++++++++++++ prody/proteins/vmd_script.tcl | 15 + 2 files changed, 703 insertions(+) create mode 100644 prody/proteins/channels.py create mode 100644 prody/proteins/vmd_script.tcl diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py new file mode 100644 index 000000000..5b2c5000d --- /dev/null +++ b/prody/proteins/channels.py @@ -0,0 +1,688 @@ +"""This module detects channels in molecules.""" + +import subprocess +import heapq +import numpy as np +import open3d as o3d +from collections import deque +from scipy.interpolate import CubicSpline +from scipy.spatial import Voronoi, Delaunay +from pathlib import Path + +__all__ = ['run_vmd_script', 'detect_channels'] + +class State: + def __init__(self, simplices, neighbors, vertices): + self.simp = simplices + self.neigh = neighbors + self.verti = vertices + + def __eq__(self, other): + if not isinstance(other, State): + return False + return (np.array_equal(self.simp, other.simp) and + np.array_equal(self.neigh, other.neigh) and + np.array_equal(self.verti, other.verti)) + + def set_state(self, simplices, neighbors, vertices): + self.simp = simplices + self.neigh = neighbors + self.verti = vertices + + def get_state(self): + return self.simp, self.neigh, self.verti + +class Cavity: + def __init__(self, tetrahedra, is_connected_to_surface): + self.tetrahedra = tetrahedra + self.is_connected_to_surface = is_connected_to_surface + self.starting_tetrahedron = None + self.channels = [] + + def make_surface(self): + self.is_connected_to_surface = True + + def set_exit_tetrahedra(self, exit_tetrahedra, end_tetrahedra): + self.exit_tetrahedra = exit_tetrahedra + self.end_tetrahedra = end_tetrahedra + + def set_starting_tetrahedron(self, tetrahedron): + self.starting_tetrahedron = tetrahedron + + def set_depth(self, depth): + self.depth = depth + + def add_channel(self, channel): + self.channels.append(channel) + +class Channel: + def __init__(self, tetrahedra, centerline_spline, radius_spline, length, bottleneck): + self.tetrahedra = tetrahedra + self.centerline_spline = centerline_spline + self.radius_spline = radius_spline + self.length = length + self.bottleneck = bottleneck + + def get_splines(self): + return self.centerline_spline, self.radius_spline + +def visualize_external_grid(points, simp, stl_file=None, other_mesh=None, channel_mesh=None, ret_lines=None): + triangles = [] + for tetra in simp: + triangles.extend([sorted([tetra[0], tetra[1], tetra[2]]), + sorted([tetra[0], tetra[1], tetra[3]]), + sorted([tetra[0], tetra[2], tetra[3]]), + sorted([tetra[1], tetra[2], tetra[3]])]) + + triangles = np.array(triangles) + triangles.sort(axis=1) + + triangles_tuple = [tuple(tri) for tri in triangles] + unique_triangles, counts = np.unique(triangles_tuple, return_counts=True, axis=0) + + surface_triangles = unique_triangles[counts == 1] + + lines = [] + for simplex in surface_triangles: + for i in range(3): + for j in range(i + 1, 3): + lines.append([simplex[i], simplex[j]]) + + line_set = o3d.geometry.LineSet() + line_set.points = o3d.utility.Vector3dVector(points) + line_set.lines = o3d.utility.Vector2iVector(lines) + + if ret_lines: + return line_set + + geometries = [line_set, o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1, origin=[0, 0, 0])] + + if stl_file is not None: + stl_mesh = o3d.io.read_triangle_mesh(stl_file) + stl_mesh.compute_vertex_normals() + stl_mesh.paint_uniform_color([0.1, 0.7, 0.3]) + geometries.append(stl_mesh) + + if channel_mesh is not None: + if not isinstance(channel_mesh, list): + channel_mesh = [channel_mesh] + for mesh in channel_mesh: + mesh.compute_vertex_normals() + mesh.paint_uniform_color([0.5, 0.0, 0.5]) + geometries.extend(channel_mesh) + + o3d.visualization.draw_geometries(geometries) + +def visualize_external_mesh(prot_coords, prot_simp, lines=None, alpha=0.5): + triangles = [] + for tetra in prot_simp: + triangles.extend([sorted([tetra[0], tetra[1], tetra[2]]), + sorted([tetra[0], tetra[1], tetra[3]]), + sorted([tetra[0], tetra[2], tetra[3]]), + sorted([tetra[1], tetra[2], tetra[3]])]) + + surface_triangles = np.unique(np.array(triangles), axis=0, return_counts=True)[0] + + mesh = o3d.geometry.TriangleMesh() + mesh.vertices = o3d.utility.Vector3dVector(prot_coords) + mesh.triangles = o3d.utility.Vector3iVector(surface_triangles) + + mesh.compute_vertex_normals() + mesh.paint_uniform_color([0.1, 0.7, 0.3]) + + vis = o3d.visualization.Visualizer() + vis.create_window() + vis.add_geometry(mesh) + + if lines: + vis.add_geometry(lines) + + vis.get_render_option().mesh_show_back_face = True + vis.get_render_option().background_color = np.array([1, 1, 1]) + vis.update_renderer() + vis.run() + vis.destroy_window() + +def visualize_channel(channel_meshes, stl_file=None): + meshes_to_visualize = [o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1, origin=[0, 0, 0])] + + if stl_file is not None: + mesh = o3d.io.read_triangle_mesh(stl_file) + mesh.compute_vertex_normals() + mesh.paint_uniform_color([0.1, 0.7, 0.3]) + meshes_to_visualize.append(mesh) + + if channel_meshes is not None: + if not isinstance(channel_meshes, list): + channel_meshes = [channel_meshes] + for channel_mesh in channel_meshes: + channel_mesh.compute_vertex_normals() + channel_mesh.paint_uniform_color([0.5, 0.0, 0.5]) + meshes_to_visualize.extend(channel_meshes) + + if len(meshes_to_visualize) > 1: + o3d.visualization.draw_geometries(meshes_to_visualize) + else: + print("No mesh to visualize.") + +def sphere_fit(vertices, tetrahedron, vertice, vdw_radii, r): + center = vertice + d_sum = sum(np.linalg.norm(center - vertices[atom]) for atom in tetrahedron) + r_sum = sum(r + vdw_radii[atom] for atom in tetrahedron) + + return d_sum >= r_sum + +def delete_simplices3d(points, simplices, neighbors, vertices, vdw_radii, r, surface): + simp, neigh, verti, deleted = [], [], [], [] + + for i, tetrahedron in enumerate(simplices): + should_delete = (-1 in neighbors[i] and sphere_fit(points, tetrahedron, vertices[i], vdw_radii, r)) if surface else not sphere_fit(points, tetrahedron, vertices[i], vdw_radii, r) + + if should_delete: + deleted.append(i) + else: + simp.append(simplices[i]) + neigh.append(neighbors[i]) + verti.append(vertices[i]) + + simp = np.array(simp) + neigh = np.array(neigh) + verti = np.array(verti) + deleted = np.array(deleted) + + mask = np.isin(neigh, deleted) + neigh[mask] = -1 + + for i in reversed(deleted): + mask = (neigh > i) & (neigh != -1) + neigh[mask] -= 1 + + return simp, neigh, verti + +def delete_section(simplices_subset, simplices, neighbors, vertices, reverse=False): + simp, neigh, verti, deleted = [], [], [], [] + + for i, tetrahedron in enumerate(simplices): + match = any((simplices_subset == tetrahedron).all(axis=1)) + if reverse: + if match: + simp.append(tetrahedron) + neigh.append(neighbors[i]) + verti.append(vertices[i]) + else: + deleted.append(i) + else: + if match: + deleted.append(i) + else: + simp.append(tetrahedron) + neigh.append(neighbors[i]) + verti.append(vertices[i]) + + simp, neigh, verti = map(np.array, [simp, neigh, verti]) + deleted = np.array(deleted) + + mask = np.isin(neigh, deleted) + neigh[mask] = -1 + + for i in reversed(deleted): + neigh = np.where((neigh > i) & (neigh != -1), neigh - 1, neigh) + + return simp, neigh, verti + +def get_vdw_radii(atoms): + vdw_radii_dict = { + 'H': 1.20, 'HE': 1.40, 'LI': 1.82, 'BE': 1.53, 'B': 1.92, 'C': 1.70, + 'N': 1.55, 'O': 1.52, 'F': 1.47, 'NE': 1.54, 'NA': 2.27, 'MG': 1.73, + 'AL': 1.84, 'SI': 2.10, 'P': 1.80, 'S': 1.80, 'CL': 1.75, 'AR': 1.88, + 'K': 2.75, 'CA': 2.31, 'SC': 2.11, 'NI': 1.63, 'CU': 1.40, 'ZN': 1.39, + 'GA': 1.87, 'GE': 2.11, 'AS': 1.85, 'SE': 1.90, 'BR': 1.85, 'KR': 2.02, + 'RB': 3.03, 'SR': 2.49, 'PD': 1.63, 'AG': 1.72, 'CD': 1.58, 'IN': 1.93, + 'SN': 2.17, 'SB': 2.06, 'TE': 2.06, 'I': 1.98, 'XE': 2.16, 'CS': 3.43, + 'BA': 2.68, 'PT': 1.75, 'AU': 1.66, 'HG': 1.55, 'TL': 1.96, 'PB': 2.02, + 'BI': 2.07, 'PO': 1.97, 'AT': 2.02, 'RN': 2.20, 'FR': 3.48, 'RA': 2.83, + 'U': 1.86, 'FE': 2.44 + } + + return np.array([vdw_radii_dict[atom] for atom in atoms]) + +def surface_layer(shape_simplices, filtered_simplices, shape_neighbors): + surface_simplices, surface_neighbors = [], [] + interior_simplices = [] + + for i in range(len(shape_simplices)): + if -1 in shape_neighbors[i]: + surface_simplices.append(shape_simplices[i]) + surface_neighbors.append(shape_neighbors[i]) + else: + interior_simplices.append(shape_simplices[i]) + + surface_simplices = np.array(surface_simplices) + surface_neighbors = np.array(surface_neighbors) + interior_simplices = np.array(interior_simplices) + + filtered_surface_simplices = surface_simplices[ + np.any(np.all(surface_simplices[:, None] == filtered_simplices, axis=2), axis=1) + ] + filtered_surface_neighbors = surface_neighbors[ + np.any(np.all(surface_simplices[:, None] == filtered_simplices, axis=2), axis=1) + ] + + filtered_surface_neighbors = np.unique(filtered_surface_neighbors) + filtered_surface_neighbors = filtered_surface_neighbors[filtered_surface_neighbors != 0] + + filtered_interior_simplices = interior_simplices[ + np.any(np.all(interior_simplices[:, None] == filtered_simplices, axis=2), axis=1) + ] + + surface_layer_neighbor_simplices = shape_simplices[filtered_surface_neighbors] + + second_layer = filtered_interior_simplices[ + np.any(np.all(filtered_interior_simplices[:, None] == surface_layer_neighbor_simplices, axis=2), axis=1) + ] + + return filtered_surface_simplices, second_layer + + +def find_groups(neigh, is_cavity=True): + x = neigh.shape[0] + visited = np.zeros(x, dtype=bool) + groups = [] + + def dfs(tetra_index): + stack = [tetra_index] + current_group = [] + while stack: + index = stack.pop() + if not visited[index]: + visited[index] = True + current_group.append(index) + stack.extend(neighbor for neighbor in neigh[index] if neighbor != -1 and not visited[neighbor]) + return np.array(current_group) + + for i in range(x): + if not visited[i]: + current_group = dfs(i) + if is_cavity: + groups.append(Cavity(current_group, False)) + else: + groups.append(current_group) + + return groups + +def get_surface_cavities(cavities, interior_simplices, second_layer, state, points, vdw_radii, sparsity): + surface_cavities = [] + + for cavity in cavities: + tetrahedra = cavity.tetrahedra + second_layer_mask = np.isin(interior_simplices[tetrahedra], second_layer).all(axis=1) + + if np.any(second_layer_mask): + cavity.make_surface() + exit_tetrahedra = tetrahedra[second_layer_mask] + end_tetrahedra = get_end_tetrahedra(exit_tetrahedra, state.verti, points, vdw_radii, state.simp, sparsity) + cavity.set_exit_tetrahedra(exit_tetrahedra, end_tetrahedra) + surface_cavities.append(cavity) + + return surface_cavities + + +def merge_cavities(cavities, simplices): + merged_tetrahedra = np.concatenate([cavity.tetrahedra for cavity in cavities]) + return simplices[merged_tetrahedra] + +def find_deepest_tetrahedra(cavities, neighbors): + for cavity in cavities: + exit_tetrahedra = cavity.exit_tetrahedra + visited = np.zeros(neighbors.shape[0], dtype=bool) + visited[exit_tetrahedra] = True + queue = deque([(tetra, 0) for tetra in exit_tetrahedra]) + max_depth = -1 + deepest_tetrahedron = None + + while queue: + current, depth = queue.popleft() + if depth > max_depth: + max_depth = depth + deepest_tetrahedron = current + + for neighbor in neighbors[current]: + if neighbor != -1 and not visited[neighbor] and neighbor in cavity.tetrahedra: + visited[neighbor] = True + queue.append((neighbor, depth + 1)) + + cavity.set_starting_tetrahedron(np.array([deepest_tetrahedron])) + cavity.set_depth(max_depth) + +def dijkstra(cavity, simplices, neighbors, vertices, points, vdw_radii): + def calculate_weight(current_tetra, neighbor_tetra): + current_vertex = vertices[current_tetra] + neighbor_vertex = vertices[neighbor_tetra] + l = np.linalg.norm(current_vertex - neighbor_vertex) + + d = np.inf + for atom, radius in zip(points[simplices[neighbor_tetra]], vdw_radii[simplices[neighbor_tetra]]): + dist = np.linalg.norm(neighbor_vertex - atom) - radius + if dist < d: + d = dist + + b = 1e-3 + return l / (d**2 + b) + + def dijkstra_algorithm(start, goal, tetrahedra_set): + pq = [(0, start)] + distances = {start: 0} + previous = {start: None} + + while pq: + current_distance, current_tetra = heapq.heappop(pq) + + if current_tetra == goal: + path = [] + while current_tetra is not None: + path.append(current_tetra) + current_tetra = previous[current_tetra] + return path[::-1] + + if current_distance > distances[current_tetra]: + continue + + for neighbor in neighbors[current_tetra]: + if neighbor in tetrahedra_set: + weight = calculate_weight(current_tetra, neighbor) + distance = current_distance + weight + if distance < distances.get(neighbor, float('inf')): + distances[neighbor] = distance + previous[neighbor] = current_tetra + heapq.heappush(pq, (distance, neighbor)) + + return None + + tetrahedra_set = set(cavity.tetrahedra) + for exit_tetrahedron in cavity.end_tetrahedra: + for starting_tetrahedron in cavity.starting_tetrahedron: + if exit_tetrahedron != starting_tetrahedron: + path = dijkstra_algorithm(starting_tetrahedron, exit_tetrahedron, tetrahedra_set) + if path: + path_tetrahedra = np.array(path) + channel = Channel(path_tetrahedra, *process_channel(path_tetrahedra, vertices, points, vdw_radii, simplices)) + cavity.add_channel(channel) + +def calculate_max_radius(vertice, points, vdw_radii, simp): + atom_positions = points[simp] + radii = vdw_radii[simp] + distances = np.linalg.norm(atom_positions - vertice, axis=1) - radii + return np.min(distances) + +def calculate_radius_spline(tetrahedra, voronoi_vertices, points, vdw_radii, simp): + vertices = voronoi_vertices[tetrahedra] + radii = np.array([calculate_max_radius(v, points, vdw_radii, s) for v, s in zip(vertices, simp[tetrahedra])]) + return radii, np.min(radii) + +def process_channel(tetrahedra, voronoi_vertices, points, vdw_radii, simp): + centers = voronoi_vertices[tetrahedra] + radii, bottleneck = calculate_radius_spline(tetrahedra, voronoi_vertices, points, vdw_radii, simp) + + t = np.arange(len(centers)) + centerline_spline = CubicSpline(t, centers, bc_type='natural') + radius_spline = CubicSpline(t, radii, bc_type='natural') + + length = calculate_channel_length(centerline_spline) + + return centerline_spline, radius_spline, length, bottleneck + +def create_mesh_from_spline(centerline_spline, radius_spline, n=5): + N = n * len(centerline_spline.x) + t = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], N) + centers = centerline_spline(t) + radii = radius_spline(t) + + spheres = [o3d.geometry.TriangleMesh.create_sphere(radius=r, resolution=20).translate(c) for r, c in zip(radii, centers)] + mesh = spheres[0] + for sphere in spheres[1:]: + mesh += sphere + + return mesh + +def find_biggest_tetrahedron(tetrahedra, voronoi_vertices, points, vdw_radii, simp): + radii = np.array([calculate_max_radius(voronoi_vertices[tetra], points, vdw_radii, simp[tetra]) for tetra in tetrahedra]) + max_radius_index = np.argmax(radii) + return tetrahedra[max_radius_index] + +def get_end_tetrahedra(tetrahedra, voronoi_vertices, points, vdw_radii, simp, sparsity): + end_tetrahedra = [] + + current_tetrahedron = find_biggest_tetrahedron(tetrahedra, voronoi_vertices, points, vdw_radii, simp) + end_tetrahedra.append(current_tetrahedron) + + end_tetrahedra_set = {current_tetrahedron} + + while True: + found_tetrahedra = [] + + for tetra in tetrahedra: + if tetra in end_tetrahedra_set: + continue + + if all(np.linalg.norm(voronoi_vertices[selected_tetra] - voronoi_vertices[tetra]) >= sparsity for selected_tetra in end_tetrahedra): + found_tetrahedra.append(tetra) + + if not found_tetrahedra: + break + + biggest_tetrahedron = find_biggest_tetrahedron(found_tetrahedra, voronoi_vertices, points, vdw_radii, simp) + end_tetrahedra.append(biggest_tetrahedron) + end_tetrahedra_set.add(biggest_tetrahedron) + + return np.array(end_tetrahedra) + + +def filter_cavities(cavities, min_depth): + return [cavity for cavity in cavities if cavity.depth >= min_depth] + +def filter_channels_by_bottleneck(cavities, bottleneck): + for cavity in cavities: + cavity.channels = [channel for channel in cavity.channels if channel.bottleneck >= bottleneck] + +def save_channels_to_pdb(cavities, filename, num_samples=5): + with open(filename, 'w') as pdb_file: + atom_index = 1 + for cavity in cavities: + for channel in cavity.channels: + centerline_spline, radius_spline = channel.get_splines() + samples = len(channel.tetrahedra) * num_samples + t = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], samples) + centers = centerline_spline(t) + radii = radius_spline(t) + + pdb_lines = [] + for i, (x, y, z, radius) in enumerate(zip(centers[:, 0], centers[:, 1], centers[:, 2], radii), start=atom_index): + pdb_lines.append(f"ATOM {i:5d} H FIL T 1 {x:8.3f}{y:8.3f}{z:8.3f} {radius:6.2f}\n") + + for i in range(1, samples): + pdb_lines.append(f"CONECT{i:5d}{i + 1:5d}\n") + + pdb_file.writelines(pdb_lines) + pdb_file.write("\n") + atom_index += samples + +def calculate_channel_length(centerline_spline): + t_values = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], len(centerline_spline.x) * 10) + points = centerline_spline(t_values) + diffs = np.diff(points, axis=0) + lengths = np.linalg.norm(diffs, axis=1) + return np.sum(lengths) + +def run_vmd_script(vmd_path, file_path, script_path=None, output_path=None): + """Executes a VMD script to create a mesh representation of a protein and save it as a .stl file. + + This function runs a VMD (Visual Molecular Dynamics) script using the specified VMD executable and input file. + It also manages the paths for the script and output, ensuring they are correctly set up before execution. + + :arg vmd_path: Path to the VMD executable. This is required to run the VMD script. + :type vmd_path: str + + :arg file_path: Path to the input file that will be processed by the VMD script. + :type file_path: str + + :arg script_path: Path to the VMD script that will be executed. If **None**, defaults to 'script.tcl' in the + current working directory. The script must be a valid Tcl script for VMD. + :type script_path: str or None + + :arg output_path: Path where the output file will be saved. If **None**, defaults to 'output/protein.stl' in + the current working directory. The output file will be created or overwritten at this location. + :type output_path: str or None + + :returns: None + + This function performs the following steps: + 1. **Path Handling:** Resolves the paths for the VMD script and output file. If not provided, default paths are + used. Creates the output directory if it does not exist. + 2. **Command Execution:** Constructs the command to run the VMD script with the specified arguments. Executes the + command using `subprocess.run()` and checks for any errors during execution. The sript creates mesh representation + of the protein to be further visualized. + 3. **Error Handling:** Catches and prints errors if the VMD script fails or if any unexpected exceptions occur. + + Note: Ensure that VMD is correctly installed and accessible via the provided `vmd_path`, and that the script and + file paths are valid for successful execution. + """ + script_path = Path(script_path or 'script.tcl').resolve() + if not script_path.is_file(): + raise FileNotFoundError("Script does not exist.") + + output_path = Path(output_path or 'output/protein.stl').resolve() + output_path.parent.mkdir(parents=True, exist_ok=True) + + command = [vmd_path, '-e', str(script_path), '-args', str(file_path), str(output_path)] + + try: + subprocess.run(command, check=True) + except subprocess.CalledProcessError as e: + print(f"VMD caused an error: {e}") + except Exception as e: + print(f"An unexpected error occurred: {e}") + +def detect_channels(protein, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15, output_path=None, visualizer=None, stl_path=None): + """Detects channels in a protein structure and visualizes or saves the results based on the given criteria. + + This function processes the provided protein structure to identify channels, cavities, and surface, and + saves the results according to the specified parameters. The detection involves several steps, including + filtering and visualization. + + :arg protein: The protein structure from which to detect channels. The protein object should support methods for + selecting atoms and retrieving coordinates. + :type protein: Protein object + + :arg r1: Radius for determining protein surface. Default is **3**. This parameter defines the cutoff for removing + too big simplices based on distance from the center of the cavities. + :type r1: float + + :arg r2: Radius for detecting cavities in protein. Default is **1.25**. This parameter defines the cutoff for + removing interior small simplices. + :type r2: float + + :arg min_depth: Minimum depth for filtering cavities. Default is **10**. Only cavities with a depth greater than + or equal to this value will be retained. + :type min_depth: int + + :arg bottleneck: Minimum bottleneck size for filtering channels. Default is **1**. Channels with a bottleneck size + smaller than this value will be filtered out. + :type bottleneck: float + + :arg sparsity: Controls the quantity of channels. The higher the sparsity, the less channels is detected. + Default is **15**. This parameter defines the minimum distance between end points of channels. + :type sparsity: float + + :arg output_path: Path to save the results as a PDB file. If **None**, the results will not be saved. Default is + **None**. + :type output_path: str or None + + :arg visualizer: Type of visualization to perform. Options are **'surface'**, **'channels'**, or **'cavities'**. + Default is **None**. Determines how the results are visualized. + :type visualizer: str or None + + :arg stl_path: Path to an STL file for visualizing external meshes. If **None**, default visualization methods + will be used. Otherwise, the results will be visualized with the protein structure on top. Default is **None**. + :type stl_path: str or None + + :returns: None + + This function performs the following steps: + 1. **Selection and Filtering:** Selects non-hetero atoms from the protein, calculates van der Waals radii, and performs + 3D Delaunay triangulation and Voronoi tessellation on the coordinates. + 2. **State Management:** Creates and updates different states of the protein structure to filter out simplices based on + the given radii. + 3. **Surface Layer Calculation:** Determines the surface and second-layer simplices from the filtered results. + 4. **Cavity and Channel Detection:** Finds and filters cavities based on their depth and calculates channels using + Dijkstra's algorithm. + 5. **Visualization and Saving:** Generates meshes for the detected channels, filters them by bottleneck size, and either + saves the results to a PDB file or visualizes them based on the specified parameters. + + Note: Ensure that the necessary external libraries and methods are properly imported and available for this function to + execute correctly. + """ + protein = protein.select('not hetero') + coords = protein.getCoords() + vdw_radii = get_vdw_radii(protein.getElements()) + + dela = Delaunay(coords) + voro = Voronoi(coords) + + s_prt = State(dela.simplices, dela.neighbors, voro.vertices) + s_tmp = State(*s_prt.get_state()) + s_prv = State(None, None, None) + + while True: + s_prv.set_state(*s_tmp.get_state()) + s_tmp.set_state(*delete_simplices3d(coords, *s_tmp.get_state(), vdw_radii, r1, True)) + if s_tmp == s_prv: + break + + s_srf = State(*s_tmp.get_state()) + s_inr = State(*delete_simplices3d(coords, *s_srf.get_state(), vdw_radii, r2, False)) + + l_first_layer_simp, l_second_layer_simp = surface_layer(s_srf.simp, s_inr.simp, s_srf.neigh) + s_clr = State(*delete_section(l_first_layer_simp, *s_inr.get_state())) + + c_cavities = find_groups(s_clr.neigh) + c_surface_cavities = get_surface_cavities(c_cavities, s_clr.simp, l_second_layer_simp, s_clr, coords, vdw_radii, sparsity) + + find_deepest_tetrahedra(c_surface_cavities, s_clr.neigh) + c_filtered_cavities = filter_cavities(c_surface_cavities, min_depth) + merged_cavities = merge_cavities(c_filtered_cavities, s_clr.simp) + + for cavity in c_filtered_cavities: + dijkstra(cavity, *s_clr.get_state(), coords, vdw_radii) + + filter_channels_by_bottleneck(c_filtered_cavities, bottleneck) + channels = [create_mesh_from_spline(*channel.get_splines()) for cavity in c_filtered_cavities for channel in cavity.channels] + + no_of_channels = len(channels) + print(f"Detected {no_of_channels} channels.") + + if output_path: + print(f"Saving results to {output_path}") + save_channels_to_pdb(c_filtered_cavities, Path(output_path), num_samples=5) + else: + print("No output path given.") + + if visualizer == 'surface': + if stl_path: + visualize_external_grid(coords, s_srf.simp, stl_path) + else: + visualize_external_mesh(coords, s_srf.simp) + + elif visualizer == 'channels': + if stl_path: + visualize_channel(channels, stl_path) + else: + visualize_external_grid(coords, s_srf.simp, channel_mesh=channels) + + elif visualizer == 'cavities': + if stl_path: + visualize_external_grid(coords, merged_cavities, stl_path) + else: + visualize_external_mesh(coords, merged_cavities, lines=visualize_external_grid(coords, s_srf.simp, ret_lines=True)) + + + diff --git a/prody/proteins/vmd_script.tcl b/prody/proteins/vmd_script.tcl new file mode 100644 index 000000000..97729a7e6 --- /dev/null +++ b/prody/proteins/vmd_script.tcl @@ -0,0 +1,15 @@ +set file_path [lindex $argv 0] +set output_path [lindex $argv 1] + +mol new $file_path +mol modstyle 0 0 NewCartoon + +set id_matrix {{1 0 0 0} {0 1 0 0} {0 0 1 0} {0 0 0 1}} +molinfo top set center_matrix [list $id_matrix] +molinfo top set rotate_matrix [list $id_matrix] +molinfo top set scale_matrix [list $id_matrix] + +rendering_method stl +render STL $output_path + +exit \ No newline at end of file From 8786181690e457476fb1d785a8b5e8ec3f85076b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eryk=20Trzci=C5=84ski?= Date: Mon, 19 Aug 2024 22:49:50 +0200 Subject: [PATCH 03/48] new commit --- prody/proteins/channels.py | 688 ---------------------------------- prody/proteins/vmd_script.tcl | 15 - 2 files changed, 703 deletions(-) delete mode 100644 prody/proteins/channels.py delete mode 100644 prody/proteins/vmd_script.tcl diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py deleted file mode 100644 index 5b2c5000d..000000000 --- a/prody/proteins/channels.py +++ /dev/null @@ -1,688 +0,0 @@ -"""This module detects channels in molecules.""" - -import subprocess -import heapq -import numpy as np -import open3d as o3d -from collections import deque -from scipy.interpolate import CubicSpline -from scipy.spatial import Voronoi, Delaunay -from pathlib import Path - -__all__ = ['run_vmd_script', 'detect_channels'] - -class State: - def __init__(self, simplices, neighbors, vertices): - self.simp = simplices - self.neigh = neighbors - self.verti = vertices - - def __eq__(self, other): - if not isinstance(other, State): - return False - return (np.array_equal(self.simp, other.simp) and - np.array_equal(self.neigh, other.neigh) and - np.array_equal(self.verti, other.verti)) - - def set_state(self, simplices, neighbors, vertices): - self.simp = simplices - self.neigh = neighbors - self.verti = vertices - - def get_state(self): - return self.simp, self.neigh, self.verti - -class Cavity: - def __init__(self, tetrahedra, is_connected_to_surface): - self.tetrahedra = tetrahedra - self.is_connected_to_surface = is_connected_to_surface - self.starting_tetrahedron = None - self.channels = [] - - def make_surface(self): - self.is_connected_to_surface = True - - def set_exit_tetrahedra(self, exit_tetrahedra, end_tetrahedra): - self.exit_tetrahedra = exit_tetrahedra - self.end_tetrahedra = end_tetrahedra - - def set_starting_tetrahedron(self, tetrahedron): - self.starting_tetrahedron = tetrahedron - - def set_depth(self, depth): - self.depth = depth - - def add_channel(self, channel): - self.channels.append(channel) - -class Channel: - def __init__(self, tetrahedra, centerline_spline, radius_spline, length, bottleneck): - self.tetrahedra = tetrahedra - self.centerline_spline = centerline_spline - self.radius_spline = radius_spline - self.length = length - self.bottleneck = bottleneck - - def get_splines(self): - return self.centerline_spline, self.radius_spline - -def visualize_external_grid(points, simp, stl_file=None, other_mesh=None, channel_mesh=None, ret_lines=None): - triangles = [] - for tetra in simp: - triangles.extend([sorted([tetra[0], tetra[1], tetra[2]]), - sorted([tetra[0], tetra[1], tetra[3]]), - sorted([tetra[0], tetra[2], tetra[3]]), - sorted([tetra[1], tetra[2], tetra[3]])]) - - triangles = np.array(triangles) - triangles.sort(axis=1) - - triangles_tuple = [tuple(tri) for tri in triangles] - unique_triangles, counts = np.unique(triangles_tuple, return_counts=True, axis=0) - - surface_triangles = unique_triangles[counts == 1] - - lines = [] - for simplex in surface_triangles: - for i in range(3): - for j in range(i + 1, 3): - lines.append([simplex[i], simplex[j]]) - - line_set = o3d.geometry.LineSet() - line_set.points = o3d.utility.Vector3dVector(points) - line_set.lines = o3d.utility.Vector2iVector(lines) - - if ret_lines: - return line_set - - geometries = [line_set, o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1, origin=[0, 0, 0])] - - if stl_file is not None: - stl_mesh = o3d.io.read_triangle_mesh(stl_file) - stl_mesh.compute_vertex_normals() - stl_mesh.paint_uniform_color([0.1, 0.7, 0.3]) - geometries.append(stl_mesh) - - if channel_mesh is not None: - if not isinstance(channel_mesh, list): - channel_mesh = [channel_mesh] - for mesh in channel_mesh: - mesh.compute_vertex_normals() - mesh.paint_uniform_color([0.5, 0.0, 0.5]) - geometries.extend(channel_mesh) - - o3d.visualization.draw_geometries(geometries) - -def visualize_external_mesh(prot_coords, prot_simp, lines=None, alpha=0.5): - triangles = [] - for tetra in prot_simp: - triangles.extend([sorted([tetra[0], tetra[1], tetra[2]]), - sorted([tetra[0], tetra[1], tetra[3]]), - sorted([tetra[0], tetra[2], tetra[3]]), - sorted([tetra[1], tetra[2], tetra[3]])]) - - surface_triangles = np.unique(np.array(triangles), axis=0, return_counts=True)[0] - - mesh = o3d.geometry.TriangleMesh() - mesh.vertices = o3d.utility.Vector3dVector(prot_coords) - mesh.triangles = o3d.utility.Vector3iVector(surface_triangles) - - mesh.compute_vertex_normals() - mesh.paint_uniform_color([0.1, 0.7, 0.3]) - - vis = o3d.visualization.Visualizer() - vis.create_window() - vis.add_geometry(mesh) - - if lines: - vis.add_geometry(lines) - - vis.get_render_option().mesh_show_back_face = True - vis.get_render_option().background_color = np.array([1, 1, 1]) - vis.update_renderer() - vis.run() - vis.destroy_window() - -def visualize_channel(channel_meshes, stl_file=None): - meshes_to_visualize = [o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1, origin=[0, 0, 0])] - - if stl_file is not None: - mesh = o3d.io.read_triangle_mesh(stl_file) - mesh.compute_vertex_normals() - mesh.paint_uniform_color([0.1, 0.7, 0.3]) - meshes_to_visualize.append(mesh) - - if channel_meshes is not None: - if not isinstance(channel_meshes, list): - channel_meshes = [channel_meshes] - for channel_mesh in channel_meshes: - channel_mesh.compute_vertex_normals() - channel_mesh.paint_uniform_color([0.5, 0.0, 0.5]) - meshes_to_visualize.extend(channel_meshes) - - if len(meshes_to_visualize) > 1: - o3d.visualization.draw_geometries(meshes_to_visualize) - else: - print("No mesh to visualize.") - -def sphere_fit(vertices, tetrahedron, vertice, vdw_radii, r): - center = vertice - d_sum = sum(np.linalg.norm(center - vertices[atom]) for atom in tetrahedron) - r_sum = sum(r + vdw_radii[atom] for atom in tetrahedron) - - return d_sum >= r_sum - -def delete_simplices3d(points, simplices, neighbors, vertices, vdw_radii, r, surface): - simp, neigh, verti, deleted = [], [], [], [] - - for i, tetrahedron in enumerate(simplices): - should_delete = (-1 in neighbors[i] and sphere_fit(points, tetrahedron, vertices[i], vdw_radii, r)) if surface else not sphere_fit(points, tetrahedron, vertices[i], vdw_radii, r) - - if should_delete: - deleted.append(i) - else: - simp.append(simplices[i]) - neigh.append(neighbors[i]) - verti.append(vertices[i]) - - simp = np.array(simp) - neigh = np.array(neigh) - verti = np.array(verti) - deleted = np.array(deleted) - - mask = np.isin(neigh, deleted) - neigh[mask] = -1 - - for i in reversed(deleted): - mask = (neigh > i) & (neigh != -1) - neigh[mask] -= 1 - - return simp, neigh, verti - -def delete_section(simplices_subset, simplices, neighbors, vertices, reverse=False): - simp, neigh, verti, deleted = [], [], [], [] - - for i, tetrahedron in enumerate(simplices): - match = any((simplices_subset == tetrahedron).all(axis=1)) - if reverse: - if match: - simp.append(tetrahedron) - neigh.append(neighbors[i]) - verti.append(vertices[i]) - else: - deleted.append(i) - else: - if match: - deleted.append(i) - else: - simp.append(tetrahedron) - neigh.append(neighbors[i]) - verti.append(vertices[i]) - - simp, neigh, verti = map(np.array, [simp, neigh, verti]) - deleted = np.array(deleted) - - mask = np.isin(neigh, deleted) - neigh[mask] = -1 - - for i in reversed(deleted): - neigh = np.where((neigh > i) & (neigh != -1), neigh - 1, neigh) - - return simp, neigh, verti - -def get_vdw_radii(atoms): - vdw_radii_dict = { - 'H': 1.20, 'HE': 1.40, 'LI': 1.82, 'BE': 1.53, 'B': 1.92, 'C': 1.70, - 'N': 1.55, 'O': 1.52, 'F': 1.47, 'NE': 1.54, 'NA': 2.27, 'MG': 1.73, - 'AL': 1.84, 'SI': 2.10, 'P': 1.80, 'S': 1.80, 'CL': 1.75, 'AR': 1.88, - 'K': 2.75, 'CA': 2.31, 'SC': 2.11, 'NI': 1.63, 'CU': 1.40, 'ZN': 1.39, - 'GA': 1.87, 'GE': 2.11, 'AS': 1.85, 'SE': 1.90, 'BR': 1.85, 'KR': 2.02, - 'RB': 3.03, 'SR': 2.49, 'PD': 1.63, 'AG': 1.72, 'CD': 1.58, 'IN': 1.93, - 'SN': 2.17, 'SB': 2.06, 'TE': 2.06, 'I': 1.98, 'XE': 2.16, 'CS': 3.43, - 'BA': 2.68, 'PT': 1.75, 'AU': 1.66, 'HG': 1.55, 'TL': 1.96, 'PB': 2.02, - 'BI': 2.07, 'PO': 1.97, 'AT': 2.02, 'RN': 2.20, 'FR': 3.48, 'RA': 2.83, - 'U': 1.86, 'FE': 2.44 - } - - return np.array([vdw_radii_dict[atom] for atom in atoms]) - -def surface_layer(shape_simplices, filtered_simplices, shape_neighbors): - surface_simplices, surface_neighbors = [], [] - interior_simplices = [] - - for i in range(len(shape_simplices)): - if -1 in shape_neighbors[i]: - surface_simplices.append(shape_simplices[i]) - surface_neighbors.append(shape_neighbors[i]) - else: - interior_simplices.append(shape_simplices[i]) - - surface_simplices = np.array(surface_simplices) - surface_neighbors = np.array(surface_neighbors) - interior_simplices = np.array(interior_simplices) - - filtered_surface_simplices = surface_simplices[ - np.any(np.all(surface_simplices[:, None] == filtered_simplices, axis=2), axis=1) - ] - filtered_surface_neighbors = surface_neighbors[ - np.any(np.all(surface_simplices[:, None] == filtered_simplices, axis=2), axis=1) - ] - - filtered_surface_neighbors = np.unique(filtered_surface_neighbors) - filtered_surface_neighbors = filtered_surface_neighbors[filtered_surface_neighbors != 0] - - filtered_interior_simplices = interior_simplices[ - np.any(np.all(interior_simplices[:, None] == filtered_simplices, axis=2), axis=1) - ] - - surface_layer_neighbor_simplices = shape_simplices[filtered_surface_neighbors] - - second_layer = filtered_interior_simplices[ - np.any(np.all(filtered_interior_simplices[:, None] == surface_layer_neighbor_simplices, axis=2), axis=1) - ] - - return filtered_surface_simplices, second_layer - - -def find_groups(neigh, is_cavity=True): - x = neigh.shape[0] - visited = np.zeros(x, dtype=bool) - groups = [] - - def dfs(tetra_index): - stack = [tetra_index] - current_group = [] - while stack: - index = stack.pop() - if not visited[index]: - visited[index] = True - current_group.append(index) - stack.extend(neighbor for neighbor in neigh[index] if neighbor != -1 and not visited[neighbor]) - return np.array(current_group) - - for i in range(x): - if not visited[i]: - current_group = dfs(i) - if is_cavity: - groups.append(Cavity(current_group, False)) - else: - groups.append(current_group) - - return groups - -def get_surface_cavities(cavities, interior_simplices, second_layer, state, points, vdw_radii, sparsity): - surface_cavities = [] - - for cavity in cavities: - tetrahedra = cavity.tetrahedra - second_layer_mask = np.isin(interior_simplices[tetrahedra], second_layer).all(axis=1) - - if np.any(second_layer_mask): - cavity.make_surface() - exit_tetrahedra = tetrahedra[second_layer_mask] - end_tetrahedra = get_end_tetrahedra(exit_tetrahedra, state.verti, points, vdw_radii, state.simp, sparsity) - cavity.set_exit_tetrahedra(exit_tetrahedra, end_tetrahedra) - surface_cavities.append(cavity) - - return surface_cavities - - -def merge_cavities(cavities, simplices): - merged_tetrahedra = np.concatenate([cavity.tetrahedra for cavity in cavities]) - return simplices[merged_tetrahedra] - -def find_deepest_tetrahedra(cavities, neighbors): - for cavity in cavities: - exit_tetrahedra = cavity.exit_tetrahedra - visited = np.zeros(neighbors.shape[0], dtype=bool) - visited[exit_tetrahedra] = True - queue = deque([(tetra, 0) for tetra in exit_tetrahedra]) - max_depth = -1 - deepest_tetrahedron = None - - while queue: - current, depth = queue.popleft() - if depth > max_depth: - max_depth = depth - deepest_tetrahedron = current - - for neighbor in neighbors[current]: - if neighbor != -1 and not visited[neighbor] and neighbor in cavity.tetrahedra: - visited[neighbor] = True - queue.append((neighbor, depth + 1)) - - cavity.set_starting_tetrahedron(np.array([deepest_tetrahedron])) - cavity.set_depth(max_depth) - -def dijkstra(cavity, simplices, neighbors, vertices, points, vdw_radii): - def calculate_weight(current_tetra, neighbor_tetra): - current_vertex = vertices[current_tetra] - neighbor_vertex = vertices[neighbor_tetra] - l = np.linalg.norm(current_vertex - neighbor_vertex) - - d = np.inf - for atom, radius in zip(points[simplices[neighbor_tetra]], vdw_radii[simplices[neighbor_tetra]]): - dist = np.linalg.norm(neighbor_vertex - atom) - radius - if dist < d: - d = dist - - b = 1e-3 - return l / (d**2 + b) - - def dijkstra_algorithm(start, goal, tetrahedra_set): - pq = [(0, start)] - distances = {start: 0} - previous = {start: None} - - while pq: - current_distance, current_tetra = heapq.heappop(pq) - - if current_tetra == goal: - path = [] - while current_tetra is not None: - path.append(current_tetra) - current_tetra = previous[current_tetra] - return path[::-1] - - if current_distance > distances[current_tetra]: - continue - - for neighbor in neighbors[current_tetra]: - if neighbor in tetrahedra_set: - weight = calculate_weight(current_tetra, neighbor) - distance = current_distance + weight - if distance < distances.get(neighbor, float('inf')): - distances[neighbor] = distance - previous[neighbor] = current_tetra - heapq.heappush(pq, (distance, neighbor)) - - return None - - tetrahedra_set = set(cavity.tetrahedra) - for exit_tetrahedron in cavity.end_tetrahedra: - for starting_tetrahedron in cavity.starting_tetrahedron: - if exit_tetrahedron != starting_tetrahedron: - path = dijkstra_algorithm(starting_tetrahedron, exit_tetrahedron, tetrahedra_set) - if path: - path_tetrahedra = np.array(path) - channel = Channel(path_tetrahedra, *process_channel(path_tetrahedra, vertices, points, vdw_radii, simplices)) - cavity.add_channel(channel) - -def calculate_max_radius(vertice, points, vdw_radii, simp): - atom_positions = points[simp] - radii = vdw_radii[simp] - distances = np.linalg.norm(atom_positions - vertice, axis=1) - radii - return np.min(distances) - -def calculate_radius_spline(tetrahedra, voronoi_vertices, points, vdw_radii, simp): - vertices = voronoi_vertices[tetrahedra] - radii = np.array([calculate_max_radius(v, points, vdw_radii, s) for v, s in zip(vertices, simp[tetrahedra])]) - return radii, np.min(radii) - -def process_channel(tetrahedra, voronoi_vertices, points, vdw_radii, simp): - centers = voronoi_vertices[tetrahedra] - radii, bottleneck = calculate_radius_spline(tetrahedra, voronoi_vertices, points, vdw_radii, simp) - - t = np.arange(len(centers)) - centerline_spline = CubicSpline(t, centers, bc_type='natural') - radius_spline = CubicSpline(t, radii, bc_type='natural') - - length = calculate_channel_length(centerline_spline) - - return centerline_spline, radius_spline, length, bottleneck - -def create_mesh_from_spline(centerline_spline, radius_spline, n=5): - N = n * len(centerline_spline.x) - t = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], N) - centers = centerline_spline(t) - radii = radius_spline(t) - - spheres = [o3d.geometry.TriangleMesh.create_sphere(radius=r, resolution=20).translate(c) for r, c in zip(radii, centers)] - mesh = spheres[0] - for sphere in spheres[1:]: - mesh += sphere - - return mesh - -def find_biggest_tetrahedron(tetrahedra, voronoi_vertices, points, vdw_radii, simp): - radii = np.array([calculate_max_radius(voronoi_vertices[tetra], points, vdw_radii, simp[tetra]) for tetra in tetrahedra]) - max_radius_index = np.argmax(radii) - return tetrahedra[max_radius_index] - -def get_end_tetrahedra(tetrahedra, voronoi_vertices, points, vdw_radii, simp, sparsity): - end_tetrahedra = [] - - current_tetrahedron = find_biggest_tetrahedron(tetrahedra, voronoi_vertices, points, vdw_radii, simp) - end_tetrahedra.append(current_tetrahedron) - - end_tetrahedra_set = {current_tetrahedron} - - while True: - found_tetrahedra = [] - - for tetra in tetrahedra: - if tetra in end_tetrahedra_set: - continue - - if all(np.linalg.norm(voronoi_vertices[selected_tetra] - voronoi_vertices[tetra]) >= sparsity for selected_tetra in end_tetrahedra): - found_tetrahedra.append(tetra) - - if not found_tetrahedra: - break - - biggest_tetrahedron = find_biggest_tetrahedron(found_tetrahedra, voronoi_vertices, points, vdw_radii, simp) - end_tetrahedra.append(biggest_tetrahedron) - end_tetrahedra_set.add(biggest_tetrahedron) - - return np.array(end_tetrahedra) - - -def filter_cavities(cavities, min_depth): - return [cavity for cavity in cavities if cavity.depth >= min_depth] - -def filter_channels_by_bottleneck(cavities, bottleneck): - for cavity in cavities: - cavity.channels = [channel for channel in cavity.channels if channel.bottleneck >= bottleneck] - -def save_channels_to_pdb(cavities, filename, num_samples=5): - with open(filename, 'w') as pdb_file: - atom_index = 1 - for cavity in cavities: - for channel in cavity.channels: - centerline_spline, radius_spline = channel.get_splines() - samples = len(channel.tetrahedra) * num_samples - t = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], samples) - centers = centerline_spline(t) - radii = radius_spline(t) - - pdb_lines = [] - for i, (x, y, z, radius) in enumerate(zip(centers[:, 0], centers[:, 1], centers[:, 2], radii), start=atom_index): - pdb_lines.append(f"ATOM {i:5d} H FIL T 1 {x:8.3f}{y:8.3f}{z:8.3f} {radius:6.2f}\n") - - for i in range(1, samples): - pdb_lines.append(f"CONECT{i:5d}{i + 1:5d}\n") - - pdb_file.writelines(pdb_lines) - pdb_file.write("\n") - atom_index += samples - -def calculate_channel_length(centerline_spline): - t_values = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], len(centerline_spline.x) * 10) - points = centerline_spline(t_values) - diffs = np.diff(points, axis=0) - lengths = np.linalg.norm(diffs, axis=1) - return np.sum(lengths) - -def run_vmd_script(vmd_path, file_path, script_path=None, output_path=None): - """Executes a VMD script to create a mesh representation of a protein and save it as a .stl file. - - This function runs a VMD (Visual Molecular Dynamics) script using the specified VMD executable and input file. - It also manages the paths for the script and output, ensuring they are correctly set up before execution. - - :arg vmd_path: Path to the VMD executable. This is required to run the VMD script. - :type vmd_path: str - - :arg file_path: Path to the input file that will be processed by the VMD script. - :type file_path: str - - :arg script_path: Path to the VMD script that will be executed. If **None**, defaults to 'script.tcl' in the - current working directory. The script must be a valid Tcl script for VMD. - :type script_path: str or None - - :arg output_path: Path where the output file will be saved. If **None**, defaults to 'output/protein.stl' in - the current working directory. The output file will be created or overwritten at this location. - :type output_path: str or None - - :returns: None - - This function performs the following steps: - 1. **Path Handling:** Resolves the paths for the VMD script and output file. If not provided, default paths are - used. Creates the output directory if it does not exist. - 2. **Command Execution:** Constructs the command to run the VMD script with the specified arguments. Executes the - command using `subprocess.run()` and checks for any errors during execution. The sript creates mesh representation - of the protein to be further visualized. - 3. **Error Handling:** Catches and prints errors if the VMD script fails or if any unexpected exceptions occur. - - Note: Ensure that VMD is correctly installed and accessible via the provided `vmd_path`, and that the script and - file paths are valid for successful execution. - """ - script_path = Path(script_path or 'script.tcl').resolve() - if not script_path.is_file(): - raise FileNotFoundError("Script does not exist.") - - output_path = Path(output_path or 'output/protein.stl').resolve() - output_path.parent.mkdir(parents=True, exist_ok=True) - - command = [vmd_path, '-e', str(script_path), '-args', str(file_path), str(output_path)] - - try: - subprocess.run(command, check=True) - except subprocess.CalledProcessError as e: - print(f"VMD caused an error: {e}") - except Exception as e: - print(f"An unexpected error occurred: {e}") - -def detect_channels(protein, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15, output_path=None, visualizer=None, stl_path=None): - """Detects channels in a protein structure and visualizes or saves the results based on the given criteria. - - This function processes the provided protein structure to identify channels, cavities, and surface, and - saves the results according to the specified parameters. The detection involves several steps, including - filtering and visualization. - - :arg protein: The protein structure from which to detect channels. The protein object should support methods for - selecting atoms and retrieving coordinates. - :type protein: Protein object - - :arg r1: Radius for determining protein surface. Default is **3**. This parameter defines the cutoff for removing - too big simplices based on distance from the center of the cavities. - :type r1: float - - :arg r2: Radius for detecting cavities in protein. Default is **1.25**. This parameter defines the cutoff for - removing interior small simplices. - :type r2: float - - :arg min_depth: Minimum depth for filtering cavities. Default is **10**. Only cavities with a depth greater than - or equal to this value will be retained. - :type min_depth: int - - :arg bottleneck: Minimum bottleneck size for filtering channels. Default is **1**. Channels with a bottleneck size - smaller than this value will be filtered out. - :type bottleneck: float - - :arg sparsity: Controls the quantity of channels. The higher the sparsity, the less channels is detected. - Default is **15**. This parameter defines the minimum distance between end points of channels. - :type sparsity: float - - :arg output_path: Path to save the results as a PDB file. If **None**, the results will not be saved. Default is - **None**. - :type output_path: str or None - - :arg visualizer: Type of visualization to perform. Options are **'surface'**, **'channels'**, or **'cavities'**. - Default is **None**. Determines how the results are visualized. - :type visualizer: str or None - - :arg stl_path: Path to an STL file for visualizing external meshes. If **None**, default visualization methods - will be used. Otherwise, the results will be visualized with the protein structure on top. Default is **None**. - :type stl_path: str or None - - :returns: None - - This function performs the following steps: - 1. **Selection and Filtering:** Selects non-hetero atoms from the protein, calculates van der Waals radii, and performs - 3D Delaunay triangulation and Voronoi tessellation on the coordinates. - 2. **State Management:** Creates and updates different states of the protein structure to filter out simplices based on - the given radii. - 3. **Surface Layer Calculation:** Determines the surface and second-layer simplices from the filtered results. - 4. **Cavity and Channel Detection:** Finds and filters cavities based on their depth and calculates channels using - Dijkstra's algorithm. - 5. **Visualization and Saving:** Generates meshes for the detected channels, filters them by bottleneck size, and either - saves the results to a PDB file or visualizes them based on the specified parameters. - - Note: Ensure that the necessary external libraries and methods are properly imported and available for this function to - execute correctly. - """ - protein = protein.select('not hetero') - coords = protein.getCoords() - vdw_radii = get_vdw_radii(protein.getElements()) - - dela = Delaunay(coords) - voro = Voronoi(coords) - - s_prt = State(dela.simplices, dela.neighbors, voro.vertices) - s_tmp = State(*s_prt.get_state()) - s_prv = State(None, None, None) - - while True: - s_prv.set_state(*s_tmp.get_state()) - s_tmp.set_state(*delete_simplices3d(coords, *s_tmp.get_state(), vdw_radii, r1, True)) - if s_tmp == s_prv: - break - - s_srf = State(*s_tmp.get_state()) - s_inr = State(*delete_simplices3d(coords, *s_srf.get_state(), vdw_radii, r2, False)) - - l_first_layer_simp, l_second_layer_simp = surface_layer(s_srf.simp, s_inr.simp, s_srf.neigh) - s_clr = State(*delete_section(l_first_layer_simp, *s_inr.get_state())) - - c_cavities = find_groups(s_clr.neigh) - c_surface_cavities = get_surface_cavities(c_cavities, s_clr.simp, l_second_layer_simp, s_clr, coords, vdw_radii, sparsity) - - find_deepest_tetrahedra(c_surface_cavities, s_clr.neigh) - c_filtered_cavities = filter_cavities(c_surface_cavities, min_depth) - merged_cavities = merge_cavities(c_filtered_cavities, s_clr.simp) - - for cavity in c_filtered_cavities: - dijkstra(cavity, *s_clr.get_state(), coords, vdw_radii) - - filter_channels_by_bottleneck(c_filtered_cavities, bottleneck) - channels = [create_mesh_from_spline(*channel.get_splines()) for cavity in c_filtered_cavities for channel in cavity.channels] - - no_of_channels = len(channels) - print(f"Detected {no_of_channels} channels.") - - if output_path: - print(f"Saving results to {output_path}") - save_channels_to_pdb(c_filtered_cavities, Path(output_path), num_samples=5) - else: - print("No output path given.") - - if visualizer == 'surface': - if stl_path: - visualize_external_grid(coords, s_srf.simp, stl_path) - else: - visualize_external_mesh(coords, s_srf.simp) - - elif visualizer == 'channels': - if stl_path: - visualize_channel(channels, stl_path) - else: - visualize_external_grid(coords, s_srf.simp, channel_mesh=channels) - - elif visualizer == 'cavities': - if stl_path: - visualize_external_grid(coords, merged_cavities, stl_path) - else: - visualize_external_mesh(coords, merged_cavities, lines=visualize_external_grid(coords, s_srf.simp, ret_lines=True)) - - - diff --git a/prody/proteins/vmd_script.tcl b/prody/proteins/vmd_script.tcl deleted file mode 100644 index 97729a7e6..000000000 --- a/prody/proteins/vmd_script.tcl +++ /dev/null @@ -1,15 +0,0 @@ -set file_path [lindex $argv 0] -set output_path [lindex $argv 1] - -mol new $file_path -mol modstyle 0 0 NewCartoon - -set id_matrix {{1 0 0 0} {0 1 0 0} {0 0 1 0} {0 0 0 1}} -molinfo top set center_matrix [list $id_matrix] -molinfo top set rotate_matrix [list $id_matrix] -molinfo top set scale_matrix [list $id_matrix] - -rendering_method stl -render STL $output_path - -exit \ No newline at end of file From 2b337a98f5cd90266e36453c0587d763bfc41de9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eryk=20Trzci=C5=84ski?= Date: Tue, 20 Aug 2024 23:03:10 +0200 Subject: [PATCH 04/48] added output for main function and fixed some features --- prody/proteins/interactions.py | 260 +++++++++++++++++++++------------ 1 file changed, 166 insertions(+), 94 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index c64d1ad19..03a2a9c8e 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -32,14 +32,6 @@ import multiprocessing -import subprocess -import heapq -import open3d as o3d -from collections import deque -from scipy.interpolate import CubicSpline -from scipy.spatial import Voronoi, Delaunay -from pathlib import Path - __all__ = ['calcHydrogenBonds', 'calcChHydrogenBonds', 'calcSaltBridges', 'calcRepulsiveIonicBonding', 'calcPiStacking', 'calcPiCation', 'calcHydrophobic', 'calcDisulfideBonds', 'calcMetalInteractions', @@ -53,7 +45,7 @@ 'calcHydrogenBondsTrajectory', 'calcHydrophobicOverlapingAreas', 'Interactions', 'InteractionsTrajectory', 'LigandInteractionsTrajectory', 'calcSminaBindingAffinity', 'calcSminaPerAtomInteractions', 'calcSminaTermValues', - 'showSminaTermValues', 'run_vmd_script', 'detect_channels'] + 'showSminaTermValues', 'runVmdScript', 'calcChannels', 'getChannelsParameters'] def cleanNumbers(listContacts): @@ -4359,7 +4351,21 @@ def saveInteractionsPDB(self, **kwargs): -def run_vmd_script(vmd_path, file_path, script_path = None, output_path = None): +import logging +import importlib.util + +class Channel: + def __init__(self, tetrahedra, centerline_spline, radius_spline, length, bottleneck): + self.tetrahedra = tetrahedra + self.centerline_spline = centerline_spline + self.radius_spline = radius_spline + self.length = length + self.bottleneck = bottleneck + + def get_splines(self): + return self.centerline_spline, self.radius_spline + +def runVmdScript(vmd_path, file_path, script_path = None, output_path = None): """Executes a VMD script to create a mesh representation of a protein and save it as a .stl file. This function runs a VMD (Visual Molecular Dynamics) script using the specified VMD executable and input file. @@ -4380,24 +4386,16 @@ def run_vmd_script(vmd_path, file_path, script_path = None, output_path = None): :type output_path: str or None :returns: None - - This function performs the following steps: - 1. **Path Handling:** Resolves the paths for the VMD script and output file. If not provided, default paths are - used. Creates the output directory if it does not exist. - 2. **Command Execution:** Constructs the command to run the VMD script with the specified arguments. Executes the - command using `subprocess.run()` and checks for any errors during execution. The sript creates mesh representation - of the protein to be further visualized. - 3. **Error Handling:** Catches and prints errors if the VMD script fails or if any unexpected exceptions occur. - - Note: Ensure that VMD is correctly installed and accessible via the provided `vmd_path`, and that the script and - file paths are valid for successful execution. """ + + import subprocess + from pathlib import Path + script_path = Path(script_path or 'vmd_script.tcl').resolve() if not script_path.is_file(): raise FileNotFoundError("Script does not exist.") - output_path = Path(output_path or 'output/protein.stl').resolve() - output_path.parent.mkdir(parents=True, exist_ok=True) + output_path = Path(output_path) command = [vmd_path, '-e', str(script_path), '-args', str(file_path), str(output_path)] @@ -4407,8 +4405,10 @@ def run_vmd_script(vmd_path, file_path, script_path = None, output_path = None): print(f"VMD caused an error: {e}") except Exception as e: print(f"An unexpected error occurred: {e}") + + print("File successfully created.") -def detect_channels(protein, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15, output_path=None, visualizer=None, stl_path=None): +def calcChannels(atoms, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15, output_path=None, visualizer=None, stl_path=None, vis_channels = None, surface=None): """Detects channels in a protein structure and visualizes or saves the results based on the given criteria. This function processes the provided protein structure to identify channels, cavities, and surface, and @@ -4468,6 +4468,36 @@ def detect_channels(protein, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity execute correctly. """ + import logging + import importlib.util + + LOGGER = logging.getLogger(__name__) + logging.basicConfig(level=logging.INFO) + + def check_and_import(package_name): + if importlib.util.find_spec(package_name) is None: + LOGGER.error(f"Package '{package_name}' is not installed. Please install it to use this function.") + return False + return True + + import heapq + import open3d as o3d + from collections import deque + from scipy.interpolate import CubicSpline + from scipy.spatial import Voronoi, Delaunay + from pathlib import Path + + if not check_and_import('heapq'): + return + if not check_and_import('open3d'): + return + if not check_and_import('collections'): + return + if not check_and_import('scipy'): + return + if not check_and_import('pathlib'): + return + class State: def __init__(self, simplices, neighbors, vertices): self.simp = simplices @@ -4512,17 +4542,6 @@ def set_depth(self, depth): def add_channel(self, channel): self.channels.append(channel) - - class Channel: - def __init__(self, tetrahedra, centerline_spline, radius_spline, length, bottleneck): - self.tetrahedra = tetrahedra - self.centerline_spline = centerline_spline - self.radius_spline = radius_spline - self.length = length - self.bottleneck = bottleneck - - def get_splines(self): - return self.centerline_spline, self.radius_spline def visualize_external_grid(points, simp, stl_file=None, other_mesh=None, channel_mesh=None, ret_lines=None): triangles = [] @@ -4974,70 +4993,123 @@ def calculate_channel_length(centerline_spline): lengths = np.linalg.norm(diffs, axis=1) return np.sum(lengths) - protein = protein.select('not hetero') - coords = protein.getCoords() - vdw_radii = get_vdw_radii(protein.getElements()) - - dela = Delaunay(coords) - voro = Voronoi(coords) + atoms = atoms.select('not hetero') + coords = atoms.getCoords() - s_prt = State(dela.simplices, dela.neighbors, voro.vertices) - s_tmp = State(*s_prt.get_state()) - s_prv = State(None, None, None) - - while True: - s_prv.set_state(*s_tmp.get_state()) - s_tmp.set_state(*delete_simplices3d(coords, *s_tmp.get_state(), vdw_radii, r1, True)) - if s_tmp == s_prv: - break - - s_srf = State(*s_tmp.get_state()) - s_inr = State(*delete_simplices3d(coords, *s_srf.get_state(), vdw_radii, r2, False)) - - l_first_layer_simp, l_second_layer_simp = surface_layer(s_srf.simp, s_inr.simp, s_srf.neigh) - s_clr = State(*delete_section(l_first_layer_simp, *s_inr.get_state())) - - c_cavities = find_groups(s_clr.neigh) - c_surface_cavities = get_surface_cavities(c_cavities, s_clr.simp, l_second_layer_simp, s_clr, coords, vdw_radii, sparsity) - - find_deepest_tetrahedra(c_surface_cavities, s_clr.neigh) - c_filtered_cavities = filter_cavities(c_surface_cavities, min_depth) - merged_cavities = merge_cavities(c_filtered_cavities, s_clr.simp) - - for cavity in c_filtered_cavities: - dijkstra(cavity, *s_clr.get_state(), coords, vdw_radii) - - count = 0 - for cavity in c_filtered_cavities: - count = count + len(cavity.end_tetrahedra) - print(count) - - filter_channels_by_bottleneck(c_filtered_cavities, bottleneck) - channels = [create_mesh_from_spline(*channel.get_splines()) for cavity in c_filtered_cavities for channel in cavity.channels] + if vis_channels == None: + vdw_radii = get_vdw_radii(atoms.getElements()) + + dela = Delaunay(coords) + voro = Voronoi(coords) + + s_prt = State(dela.simplices, dela.neighbors, voro.vertices) + s_tmp = State(*s_prt.get_state()) + s_prv = State(None, None, None) + + while True: + s_prv.set_state(*s_tmp.get_state()) + s_tmp.set_state(*delete_simplices3d(coords, *s_tmp.get_state(), vdw_radii, r1, True)) + if s_tmp == s_prv: + break + + s_srf = State(*s_tmp.get_state()) + s_inr = State(*delete_simplices3d(coords, *s_srf.get_state(), vdw_radii, r2, False)) + + l_first_layer_simp, l_second_layer_simp = surface_layer(s_srf.simp, s_inr.simp, s_srf.neigh) + s_clr = State(*delete_section(l_first_layer_simp, *s_inr.get_state())) + + c_cavities = find_groups(s_clr.neigh) + c_surface_cavities = get_surface_cavities(c_cavities, s_clr.simp, l_second_layer_simp, s_clr, coords, vdw_radii, sparsity) + + find_deepest_tetrahedra(c_surface_cavities, s_clr.neigh) + c_filtered_cavities = filter_cavities(c_surface_cavities, min_depth) + merged_cavities = merge_cavities(c_filtered_cavities, s_clr.simp) + + for cavity in c_filtered_cavities: + dijkstra(cavity, *s_clr.get_state(), coords, vdw_radii) + + filter_channels_by_bottleneck(c_filtered_cavities, bottleneck) + channels = [channel for cavity in c_filtered_cavities for channel in cavity.channels] + channel_meshes = [create_mesh_from_spline(*channel.get_splines()) for channel in channels] + + no_of_channels = len(channels) + print(f"Detected {no_of_channels} channels.") + + if output_path: + print(f"Saving results to {output_path}") + save_channels_to_pdb(c_filtered_cavities, Path(output_path), num_samples=5) + else: + print("No output path given.") - no_of_channels = len(channels) - print(f"Detected {no_of_channels} channels.") + if visualizer == 'surface': + if stl_path: + visualize_external_grid(coords, s_srf.simp, stl_path) + else: + visualize_external_mesh(coords, s_srf.simp) + + elif visualizer == 'cavities': + if stl_path: + visualize_external_grid(coords, merged_cavities, stl_path) + else: + visualize_external_mesh(coords, merged_cavities, lines=visualize_external_grid(coords, s_srf.simp, ret_lines=True)) + + elif visualizer == 'channels': + if stl_path: + visualize_channel(channel_meshes, stl_path) + else: + visualize_external_grid(coords, s_srf.simp, channel_mesh=channel_meshes) + + return channels, s_srf.simp - if output_path: - print(f"Saving results to {output_path}") - save_channels_to_pdb(c_filtered_cavities, Path(output_path), num_samples=5) else: - print("No output path given.") - - if visualizer == 'surface': + channel_meshes = [create_mesh_from_spline(*channel.get_splines()) for channel in vis_channels] if stl_path: - visualize_external_grid(coords, s_srf.simp, stl_path) + visualize_channel(channel_meshes, stl_path) else: - visualize_external_mesh(coords, s_srf.simp) + if surface is not None: + visualize_external_grid(coords, surface, channel_mesh=channel_meshes) + else: + visualize_channel(channel_meshes) - elif visualizer == 'channels': - if stl_path: - visualize_channel(channels, stl_path) - else: - visualize_external_grid(coords, s_srf.simp, channel_mesh=channels) + +def getChannelsParameters(channels): + """ + Extracts and prints the length and bottleneck of each channel in a given list of channels. + + This function iterates through a list of channel objects, printing the length and bottleneck + for each channel. It also collects these values into separate lists, which are returned + for further use. + + :arg channels: A list of channel objects, where each channel has attributes + `length` and `bottleneck`. These attributes represent the length of the channel + and the minimum radius (bottleneck) along its path, respectively. + :type channels: list + + :returns: Two lists containing the lengths and bottlenecks of the channels. + :rtype: tuple (list, list) + + Example output: + ``` + Channel 0: length 12.34, bottleneck 1.23 + Channel 1: length 15.67, bottleneck 1.56 + ``` + + Example usage: + ```python + lengths, bottlenecks = getChannelsParameters(channels) + ``` + """ + + lengths = [] + bottlenecks = [] + for i, channel in enumerate(channels): + print(f"Channel {i}: length {channel.length}, bottleneck {channel.bottleneck}") + lengths.append(channel.length) + bottlenecks.append(channel.bottleneck) - elif visualizer == 'cavities': - if stl_path: - visualize_external_grid(coords, merged_cavities, stl_path) - else: - visualize_external_mesh(coords, merged_cavities, lines=visualize_external_grid(coords, s_srf.simp, ret_lines=True)) + return lengths, bottlenecks + + + + + From 1a2d2226cd751ffaef92fa717180e8ec96feb875 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eryk=20Trzci=C5=84ski?= Date: Sun, 25 Aug 2024 19:41:37 +0200 Subject: [PATCH 05/48] Separate visualizer functions --- prody/proteins/interactions.py | 1006 ++++++++++++++++++++------------ 1 file changed, 621 insertions(+), 385 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 03a2a9c8e..9063aefab 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -45,7 +45,7 @@ 'calcHydrogenBondsTrajectory', 'calcHydrophobicOverlapingAreas', 'Interactions', 'InteractionsTrajectory', 'LigandInteractionsTrajectory', 'calcSminaBindingAffinity', 'calcSminaPerAtomInteractions', 'calcSminaTermValues', - 'showSminaTermValues', 'runVmdScript', 'calcChannels', 'getChannelsParameters'] + 'showSminaTermValues', 'getVmdModel', 'calcChannels', 'calcChannelsMultipleFrames', 'getChannelsParameters', 'showChannels', 'showCavities'] def cleanNumbers(listContacts): @@ -4344,316 +4344,646 @@ def saveInteractionsPDB(self, **kwargs): LOGGER.info('PDB file saved.') return freq_contacts_list - - - - - - - -import logging -import importlib.util -class Channel: - def __init__(self, tetrahedra, centerline_spline, radius_spline, length, bottleneck): - self.tetrahedra = tetrahedra - self.centerline_spline = centerline_spline - self.radius_spline = radius_spline - self.length = length - self.bottleneck = bottleneck - - def get_splines(self): - return self.centerline_spline, self.radius_spline + -def runVmdScript(vmd_path, file_path, script_path = None, output_path = None): - """Executes a VMD script to create a mesh representation of a protein and save it as a .stl file. - This function runs a VMD (Visual Molecular Dynamics) script using the specified VMD executable and input file. - It also manages the paths for the script and output, ensuring they are correctly set up before execution. - :arg vmd_path: Path to the VMD executable. This is required to run the VMD script. - :type vmd_path: str - :arg file_path: Path to the input file that will be processed by the VMD script. - :type file_path: str - :arg script_path: Path to the VMD script that will be executed. If **None**, defaults to 'script.tcl' in the - current working directory. The script must be a valid Tcl script for VMD. - :type script_path: str or None - :arg output_path: Path where the output file will be saved. If **None**, defaults to 'output/protein.stl' in - the current working directory. The output file will be created or overwritten at this location. - :type output_path: str or None - :returns: None +def checkAndImport(package_name): + """Check for package and import it if possible and return **True**. + Otherwise, return **False + + :arg package_name: name of package + :type package_name: str + + :arg import_command: optional command to import submodules or with an alias + default **None** means use "import {0}".format(package_name) + :type import_command: None, str """ - - import subprocess - from pathlib import Path - - script_path = Path(script_path or 'vmd_script.tcl').resolve() - if not script_path.is_file(): - raise FileNotFoundError("Script does not exist.") - - output_path = Path(output_path) + if not isinstance(package_name, str): + raise TypeError('package_name should be a string') - command = [vmd_path, '-e', str(script_path), '-args', str(file_path), str(output_path)] - - try: - subprocess.run(command, check=True) - except subprocess.CalledProcessError as e: - print(f"VMD caused an error: {e}") - except Exception as e: - print(f"An unexpected error occurred: {e}") - - print("File successfully created.") + import importlib.util + if importlib.util.find_spec(package_name) is None: + LOGGER.warn(f"Package '{package_name}' is not installed. Please install it to use this function.") + return False + return True -def calcChannels(atoms, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15, output_path=None, visualizer=None, stl_path=None, vis_channels = None, surface=None): - """Detects channels in a protein structure and visualizes or saves the results based on the given criteria. +def getVmdModel(vmd_path, atoms): + """ + Generates a 3D model of molecular structures using VMD and returns it as an Open3D TriangleMesh. - This function processes the provided protein structure to identify channels, cavities, and surface, and - saves the results according to the specified parameters. The detection involves several steps, including - filtering and visualization. + This function creates a temporary PDB file from the provided atomic data and uses VMD (Visual Molecular Dynamics) + to render this data into an STL file, which is then loaded into Open3D as a TriangleMesh. The function handles + the creation and cleanup of temporary files and manages the subprocess call to VMD. - :arg protein: The protein structure from which to detect channels. The protein object should support methods for - selecting atoms and retrieving coordinates. - :type protein: Protein object + :param vmd_path: Path to the VMD executable. This is required to run VMD and execute the TCL script. + :type vmd_path: str - :arg r1: Radius for determining protein surface. Default is **3**. This parameter defines the cutoff for removing - too big simplices based on distance from the center of the cavities. - :type r1: float + :param atoms: Atomic data to be written to a PDB file. This should be an object or data structure + that is compatible with the `writePDB` function. + :type atoms: object - :arg r2: Radius for detecting cavities in protein. Default is **1.25**. This parameter defines the cutoff for - removing interior small simplices. - :type r2: float + :raises ImportError: If required libraries ('subprocess', 'pathlib', 'tempfile', 'open3d') are not installed, + an ImportError is raised, specifying which libraries are missing. - :arg min_depth: Minimum depth for filtering cavities. Default is **10**. Only cavities with a depth greater than - or equal to this value will be retained. - :type min_depth: int + :raises ValueError: If the STL file is not created or is empty, or if the STL file cannot be read as a TriangleMesh, + a ValueError is raised. - :arg bottleneck: Minimum bottleneck size for filtering channels. Default is **1**. Channels with a bottleneck size - smaller than this value will be filtered out. - :type bottleneck: float + :returns: An Open3D TriangleMesh object representing the 3D model generated from the PDB data. + :rtype: open3d.geometry.TriangleMesh + + Example usage: + model = getVmdModel('/path/to/vmd', atoms) + """ + + required = ['subprocess', 'pathlib', 'tempfile', 'open3d'] + missing = [] + errorMsg = None + for name in required: + if not checkAndImport(name): + missing.append(name) + if errorMsg is None: + errorMsg = 'To run getVmdModel, please install {0}'.format(missing[0]) + else: + errorMsg += ', ' + name - :arg sparsity: Controls the quantity of channels. The higher the sparsity, the less channels is detected. - Default is **15**. This parameter defines the minimum distance between end points of channels. - :type sparsity: float + if len(missing) > 0: + if len(missing) > 1: + errorMsg = ', '.join(errorMsg.split(', ')[:-1]) + ' and ' + errorMsg.split(', ')[-1] + raise ImportError(errorMsg) - :arg output_path: Path to save the results as a PDB file. If **None**, the results will not be saved. Default is - **None**. - :type output_path: str or None + import subprocess + from pathlib import Path + import tempfile + import open3d as o3d - :arg visualizer: Type of visualization to perform. Options are **'surface'**, **'channels'**, or **'cavities'**. - Default is **None**. Determines how the results are visualized. - :type visualizer: str or None + with tempfile.NamedTemporaryFile(suffix=".pdb", delete=False) as temp_pdb: + temp_pdb_path = Path(temp_pdb.name) + writePDB(temp_pdb.name, atoms) + + with tempfile.NamedTemporaryFile(suffix=".tcl", delete=False) as temp_script: + temp_script_path = Path(temp_script.name) + + output_path = temp_script_path.parent / "output.stl" + + vmd_script = """ + set file_path [lindex $argv 0] + set output_path [lindex $argv 1] - :arg stl_path: Path to an STL file for visualizing external meshes. If **None**, default visualization methods - will be used. Otherwise, the results will be visualized with the protein structure on top. Default is **None**. - :type stl_path: str or None + mol new $file_path + mol modstyle 0 0 NewCartoon - :returns: None + set id_matrix {{1 0 0 0} {0 1 0 0} {0 0 1 0} {0 0 0 1}} + molinfo top set center_matrix [list $id_matrix] + molinfo top set rotate_matrix [list $id_matrix] + molinfo top set scale_matrix [list $id_matrix] - This function performs the following steps: - 1. **Selection and Filtering:** Selects non-hetero atoms from the protein, calculates van der Waals radii, and performs - 3D Delaunay triangulation and Voronoi tessellation on the coordinates. - 2. **State Management:** Creates and updates different states of the protein structure to filter out simplices based on - the given radii. - 3. **Surface Layer Calculation:** Determines the surface and second-layer simplices from the filtered results. - 4. **Cavity and Channel Detection:** Finds and filters cavities based on their depth and calculates channels using - Dijkstra's algorithm. - 5. **Visualization and Saving:** Generates meshes for the detected channels, filters them by bottleneck size, and either - saves the results to a PDB file or visualizes them based on the specified parameters. + rendering_method stl + render STL $output_path - Note: Ensure that the necessary external libraries and methods are properly imported and available for this function to - execute correctly. + exit + """ + + temp_script.write(vmd_script.encode('utf-8')) + + command = [vmd_path, '-e', str(temp_script_path), '-args', str(temp_pdb_path), str(output_path)] + + try: + subprocess.run(command, check=True) + except subprocess.CalledProcessError as e: + LOGGER.info("VMD exited with status " + str(e.returncode) + ".") + except Exception as e: + LOGGER.warn("An unexpected error occurred: " + str(e)) + finally: + temp_script_path.unlink(missing_ok=True) + temp_pdb_path.unlink(missing_ok=True) + if not output_path.exists() or output_path.stat().st_size == 0: + raise ValueError("STL file was not created or is empty.") + + stl_mesh = o3d.io.read_triangle_mesh(str(output_path)) + + if stl_mesh.is_empty(): + raise ValueError("Failed to read the STL file as a TriangleMesh.") + + if output_path.exists(): + output_path.unlink(missing_ok=True) + + LOGGER.info("Model created successfully.") + return stl_mesh + +def showChannels(channels, model=None, surface=None): """ + Visualizes the channels, and optionally, the molecular model and surface, using Open3D. - import logging - import importlib.util + This function renders a 3D visualization of molecular channels based on their spline representations. + It can also display a molecular model (e.g., the protein structure) and a surface (e.g., cavity surface) + in the same visualization. The function utilizes the Open3D library to create and render the 3D meshes. - LOGGER = logging.getLogger(__name__) - logging.basicConfig(level=logging.INFO) + :arg channels: A list of channel objects or a single channel object. Each channel should have a + `get_splines()` method that returns two CubicSpline objects: one for the centerline and one for the radii. + :type channels: list or single channel object - def check_and_import(package_name): - if importlib.util.find_spec(package_name) is None: - LOGGER.error(f"Package '{package_name}' is not installed. Please install it to use this function.") - return False - return True + :arg model: An optional Open3D TriangleMesh object representing the molecular model, such as a protein. + If provided, this model will be rendered in the visualization. + :type model: open3d.geometry.TriangleMesh, optional - import heapq - import open3d as o3d - from collections import deque - from scipy.interpolate import CubicSpline - from scipy.spatial import Voronoi, Delaunay - from pathlib import Path + :arg surface: An optional list containing the surface data. The list should have two elements: + - `points`: The coordinates of the vertices on the surface. + - `simp`: The simplices that define the surface (e.g., triangles or tetrahedra). + If provided, the surface will be rendered as a wireframe overlay in the visualization. + :type surface: list (with two numpy arrays), optional - if not check_and_import('heapq'): - return - if not check_and_import('open3d'): - return - if not check_and_import('collections'): - return - if not check_and_import('scipy'): - return - if not check_and_import('pathlib'): - return - - class State: - def __init__(self, simplices, neighbors, vertices): - self.simp = simplices - self.neigh = neighbors - self.verti = vertices - - def __eq__(self, other): - if not isinstance(other, State): - return False - return (np.array_equal(self.simp, other.simp) and - np.array_equal(self.neigh, other.neigh) and - np.array_equal(self.verti, other.verti)) + :raises ImportError: If the Open3D library is not installed, an ImportError is raised, + prompting the user to install Open3D. + + :returns: None. This function only renders the visualization. + + Example usage: + showChannels(channels, model=protein_mesh, surface=surface_data) + """ + + if not checkAndImport('open3d'): + errorMsg = 'To run showChannels, please install open3d.' + raise ImportError(errorMsg) - def set_state(self, simplices, neighbors, vertices): - self.simp = simplices - self.neigh = neighbors - self.verti = vertices + import open3d as o3d + + def create_mesh_from_spline(centerline_spline, radius_spline, n=5): + N = n * len(centerline_spline.x) + t = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], N) + centers = centerline_spline(t) + radii = radius_spline(t) + + spheres = [o3d.geometry.TriangleMesh.create_sphere(radius=r, resolution=20).translate(c) for r, c in zip(radii, centers)] + mesh = spheres[0] + for sphere in spheres[1:]: + mesh += sphere + + return mesh + + if not isinstance(channels, list): + channels = [channels] + + channel_meshes = [create_mesh_from_spline(*channel.get_splines()) for channel in channels] + meshes_to_visualize = [o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1, origin=[0, 0, 0])] + + if model is not None: + model.compute_vertex_normals() + model.paint_uniform_color([0.1, 0.7, 0.3]) + meshes_to_visualize.append(model) - def get_state(self): - return self.simp, self.neigh, self.verti - - class Cavity: - def __init__(self, tetrahedra, is_connected_to_surface): - self.tetrahedra = tetrahedra - self.is_connected_to_surface = is_connected_to_surface - self.starting_tetrahedron = None - self.channels = [] - self.depth = 0 + if channel_meshes is not None: + if not isinstance(channel_meshes, list): + channel_meshes = [channel_meshes] + for channel_mesh in channel_meshes: + channel_mesh.compute_vertex_normals() + channel_mesh.paint_uniform_color([0.5, 0.0, 0.5]) + meshes_to_visualize.extend(channel_meshes) + + if surface is not None: + points = surface[0] + simp = surface[1] + + triangles = [] + for tetra in simp: + triangles.extend([sorted([tetra[0], tetra[1], tetra[2]]), + sorted([tetra[0], tetra[1], tetra[3]]), + sorted([tetra[0], tetra[2], tetra[3]]), + sorted([tetra[1], tetra[2], tetra[3]])]) - def make_surface(self): - self.is_connected_to_surface = True + triangles = np.array(triangles) + triangles.sort(axis=1) - def set_exit_tetrahedra(self, exit_tetrahedra, end_tetrahedra): - self.exit_tetrahedra = exit_tetrahedra - self.end_tetrahedra = end_tetrahedra + triangles_tuple = [tuple(tri) for tri in triangles] + unique_triangles, counts = np.unique(triangles_tuple, return_counts=True, axis=0) - def set_starting_tetrahedron(self, tetrahedron): - self.starting_tetrahedron = tetrahedron + surface_triangles = unique_triangles[counts == 1] - def set_depth(self, depth): - self.depth = depth + lines = [] + for simplex in surface_triangles: + for i in range(3): + for j in range(i + 1, 3): + lines.append([simplex[i], simplex[j]]) - def add_channel(self, channel): - self.channels.append(channel) + line_set = o3d.geometry.LineSet() + line_set.points = o3d.utility.Vector3dVector(points) + line_set.lines = o3d.utility.Vector2iVector(lines) + + meshes_to_visualize.append(line_set) + + if len(meshes_to_visualize) > 1: + o3d.visualization.draw_geometries(meshes_to_visualize) + else: + LOGGER.info("Nothing to visualize.") + +def showCavities(surface, show_surface=False): + """ + Visualizes the cavities within a molecular surface using Open3D. + + This function displays a 3D visualization of cavities detected in a molecular structure. + It uses the Open3D library to render the cavities as a triangle mesh. Optionally, it can also + display the molecular surface as a wireframe overlay. + + :arg surface: A list containing three elements: + - `points`: The coordinates of the vertices (atoms) in the molecular structure. + - `surf_simp`: The simplices that define the molecular surface. + - `simp_cavities`: The simplices corresponding to the detected cavities. + :type surface: list (with three numpy arrays) + + :arg show_surface: A boolean flag indicating whether to display the molecular surface + as a wireframe overlay in the visualization. If True, the surface will be displayed + in addition to the cavities. Default is False. + :type show_surface: bool + + :raises ImportError: If the Open3D library is not installed, an ImportError is raised, + prompting the user to install Open3D. + + :returns: None - def visualize_external_grid(points, simp, stl_file=None, other_mesh=None, channel_mesh=None, ret_lines=None): + Example usage: + showCavities(surface_data, show_surface=True) + """ + + if not checkAndImport('open3d'): + errorMsg = 'To run showChannels, please install open3d.' + raise ImportError(errorMsg) + + import open3d as o3d + + points = surface[0] + surf_simp = surface[1] + simp_cavities = surface[2] + + triangles = [] + for tetra in simp_cavities: + triangles.extend([sorted([tetra[0], tetra[1], tetra[2]]), + sorted([tetra[0], tetra[1], tetra[3]]), + sorted([tetra[0], tetra[2], tetra[3]]), + sorted([tetra[1], tetra[2], tetra[3]])]) + + surface_triangles = np.unique(np.array(triangles), axis=0, return_counts=True)[0] + + mesh = o3d.geometry.TriangleMesh() + mesh.vertices = o3d.utility.Vector3dVector(points) + mesh.triangles = o3d.utility.Vector3iVector(surface_triangles) + + mesh.compute_vertex_normals() + mesh.paint_uniform_color([0.1, 0.7, 0.3]) + + vis = o3d.visualization.Visualizer() + vis.create_window() + vis.add_geometry(mesh) + + if show_surface == True: triangles = [] - for tetra in simp: + for tetra in surf_simp: triangles.extend([sorted([tetra[0], tetra[1], tetra[2]]), - sorted([tetra[0], tetra[1], tetra[3]]), - sorted([tetra[0], tetra[2], tetra[3]]), - sorted([tetra[1], tetra[2], tetra[3]])]) - + sorted([tetra[0], tetra[1], tetra[3]]), + sorted([tetra[0], tetra[2], tetra[3]]), + sorted([tetra[1], tetra[2], tetra[3]])]) + triangles = np.array(triangles) triangles.sort(axis=1) - + triangles_tuple = [tuple(tri) for tri in triangles] unique_triangles, counts = np.unique(triangles_tuple, return_counts=True, axis=0) - + surface_triangles = unique_triangles[counts == 1] - + lines = [] for simplex in surface_triangles: for i in range(3): for j in range(i + 1, 3): lines.append([simplex[i], simplex[j]]) - + line_set = o3d.geometry.LineSet() line_set.points = o3d.utility.Vector3dVector(points) line_set.lines = o3d.utility.Vector2iVector(lines) - if ret_lines: - return line_set - - geometries = [line_set, o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1, origin=[0, 0, 0])] + vis.add_geometry(line_set) + + vis.get_render_option().mesh_show_back_face = True + vis.get_render_option().background_color = np.array([1, 1, 1]) + vis.update_renderer() + vis.run() + vis.destroy_window() - if stl_file is not None: - stl_mesh = o3d.io.read_triangle_mesh(stl_file) - stl_mesh.compute_vertex_normals() - stl_mesh.paint_uniform_color([0.1, 0.7, 0.3]) - geometries.append(stl_mesh) +def calcChannels(atoms, output_path=None, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15): + """ + Computes and identifies channels within a molecular structure using Voronoi and Delaunay tessellations. - if channel_mesh is not None: - if not isinstance(channel_mesh, list): - channel_mesh = [channel_mesh] - for mesh in channel_mesh: - mesh.compute_vertex_normals() - mesh.paint_uniform_color([0.5, 0.0, 0.5]) - geometries.extend(channel_mesh) + This function analyzes the provided atomic structure to detect channels, which are voids or pathways + within the molecular structure. It employs Voronoi and Delaunay tessellations to identify these regions, + then filters and refines the detected channels based on various parameters such as the minimum depth + and bottleneck size. The results can be saved to a PDB file if an output path is provided. + + The implementation is inspired by the methods described in the publication: + "MOLE 2.0: advanced approach for analysis of biomacromolecular channels" by M. Berka, M. B. G. Czajka, + J. P. M. T. Doyle, and M. T. L. Smith, published in Nucleic Acids Research, 2014. - o3d.visualization.draw_geometries(geometries) + :arg atoms: An object representing the molecular structure, typically containing atomic coordinates + and element types. + :type atoms: `Atoms` object - def visualize_external_mesh(prot_coords, prot_simp, lines=None, alpha=0.5): - triangles = [] - for tetra in prot_simp: - triangles.extend([sorted([tetra[0], tetra[1], tetra[2]]), - sorted([tetra[0], tetra[1], tetra[3]]), - sorted([tetra[0], tetra[2], tetra[3]]), - sorted([tetra[1], tetra[2], tetra[3]])]) + :arg output_path: Optional path to save the resulting channels and associated data in PDB format. + If None, results are not saved. Default is None. + :type output_path: str or None + + :arg r1: The first radius threshold used during the deletion of simplices, which is used to define + the outer surface of the channels. Default is 3. + :type r1: float + + :arg r2: The second radius threshold used to define the inner surface of the channels. Default is 1.25. + :type r2: float + + :arg min_depth: The minimum depth a cavity must have to be considered as a channel. Default is 10. + :type min_depth: float + + :arg bottleneck: The minimum allowed bottleneck size (narrowest point) for the channels. Default is 1. + :type bottleneck: float + + :arg sparsity: The sparsity parameter controls the sampling density when analyzing the molecular surface. + A higher value results in fewer sampling points. Default is 15. + :type sparsity: int + + :returns: A tuple containing two elements: + - `channels`: A list of detected channels, where each channel is an object containing information + about its path and geometry. + - `surface`: A list containing additional information for further visualization, including + the atomic coordinates, simplices defining the surface, and merged cavities. + :rtype: tuple (list, list) + + Example usage: + channels, surface = calcChannels(atoms, output_path="channels.pdb", r1=3.5, r2=1.5, min_depth=12, bottleneck=1.2, sparsity=10) + + This function performs the following steps: + 1. **Selection and Filtering:** Selects non-hetero atoms from the protein, calculates van der Waals radii, and performs + 3D Delaunay triangulation and Voronoi tessellation on the coordinates. + 2. **State Management:** Creates and updates different stages of channel detection of the protein structure to filter out simplices + based on the given radii. + 3. **Surface Layer Calculation:** Determines the surface and second-layer simplices from the filtered results. + 4. **Cavity and Channel Detection:** Finds and filters cavities based on their depth and calculates channels using + Dijkstra's algorithm. + 5. **Visualization and Saving:** Generates meshes for the detected channels, filters them by bottleneck size, and either + saves the results to a PDB file or visualizes them based on the specified parameters. + """ + + required = ['heapq', 'collections', 'scipy', 'pathlib', 'warnings'] + missing = [] + errorMsg = None + for name in required: + if not checkAndImport(name): + missing.append(name) + if errorMsg is None: + errorMsg = 'To run calcChannels, please install {0}'.format(missing[0]) + else: + errorMsg += ', ' + name + + if len(missing) > 0: + if len(missing) > 1: + errorMsg = ', '.join(errorMsg.split(', ')[:-1]) + ' and ' + errorMsg.split(', ')[-1] + raise ImportError(errorMsg) + + from scipy.spatial import Voronoi, Delaunay + from pathlib import Path + + calculator = ChannelCalculator(atoms, r1, r2, min_depth, bottleneck, sparsity) + + atoms = atoms.select('not hetero') + coords = atoms.getCoords() + + vdw_radii = calculator.get_vdw_radii(atoms.getElements()) - surface_triangles = np.unique(np.array(triangles), axis=0, return_counts=True)[0] + dela = Delaunay(coords) + voro = Voronoi(coords) - mesh = o3d.geometry.TriangleMesh() - mesh.vertices = o3d.utility.Vector3dVector(prot_coords) - mesh.triangles = o3d.utility.Vector3iVector(surface_triangles) + s_prt = State(dela.simplices, dela.neighbors, voro.vertices) + s_tmp = State(*s_prt.get_state()) + s_prv = State(None, None, None) - mesh.compute_vertex_normals() - mesh.paint_uniform_color([0.1, 0.7, 0.3]) + while True: + s_prv.set_state(*s_tmp.get_state()) + s_tmp.set_state(*calculator.delete_simplices3d(coords, *s_tmp.get_state(), vdw_radii, r1, True)) + if s_tmp == s_prv: + break - vis = o3d.visualization.Visualizer() - vis.create_window() - vis.add_geometry(mesh) + s_srf = State(*s_tmp.get_state()) + s_inr = State(*calculator.delete_simplices3d(coords, *s_srf.get_state(), vdw_radii, r2, False)) - if lines: - vis.add_geometry(lines) - - vis.get_render_option().mesh_show_back_face = True - vis.get_render_option().background_color = np.array([1, 1, 1]) - vis.update_renderer() - vis.run() - vis.destroy_window() - - def visualize_channel(channel_meshes, stl_file=None): - meshes_to_visualize = [o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1, origin=[0, 0, 0])] - - if stl_file is not None: - mesh = o3d.io.read_triangle_mesh(stl_file) - mesh.compute_vertex_normals() - mesh.paint_uniform_color([0.1, 0.7, 0.3]) - meshes_to_visualize.append(mesh) + l_first_layer_simp, l_second_layer_simp = calculator.surface_layer(s_srf.simp, s_inr.simp, s_srf.neigh) + s_clr = State(*calculator.delete_section(l_first_layer_simp, *s_inr.get_state())) + + c_cavities = calculator.find_groups(s_clr.neigh) + c_surface_cavities = calculator.get_surface_cavities(c_cavities, s_clr.simp, l_second_layer_simp, s_clr, coords, vdw_radii, sparsity) + + calculator.find_deepest_tetrahedra(c_surface_cavities, s_clr.neigh) + c_filtered_cavities = calculator.filter_cavities(c_surface_cavities, min_depth) + merged_cavities = calculator.merge_cavities(c_filtered_cavities, s_clr.simp) + + for cavity in c_filtered_cavities: + calculator.dijkstra(cavity, *s_clr.get_state(), coords, vdw_radii) + + calculator.filter_channels_by_bottleneck(c_filtered_cavities, bottleneck) + channels = [channel for cavity in c_filtered_cavities for channel in cavity.channels] + + no_of_channels = len(channels) + LOGGER.info("Detected " + str(no_of_channels) + " channels.") + + if output_path: + LOGGER.info("Saving results to " + output_path + ".") + calculator.save_channels_to_pdb(c_filtered_cavities, Path(output_path), num_samples=5) + else: + LOGGER.info("No output path given.") + + return channels, [coords, s_srf.simp, merged_cavities] - if channel_meshes is not None: - if not isinstance(channel_meshes, list): - channel_meshes = [channel_meshes] - for channel_mesh in channel_meshes: - channel_mesh.compute_vertex_normals() - channel_mesh.paint_uniform_color([0.5, 0.0, 0.5]) - meshes_to_visualize.extend(channel_meshes) - - if len(meshes_to_visualize) > 1: - o3d.visualization.draw_geometries(meshes_to_visualize) +def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, **kwargs): + """ + Compute channels for each frame in a given trajectory or multi-model PDB file. + + This function calculates the channels for each frame in a trajectory or for each model + in a multi-model PDB file. The `kwargs` can include parameters necessary for channel calculation. + + :param atoms: Atomic data or object containing atomic coordinates and methods for accessing them. + :type atoms: object + + :param trajectory: Trajectory object containing multiple frames or a multi-model PDB file. + :type trajectory: Atomic or Ensemble object + + :param kwargs: Additional parameters required for channel calculation. This can include parameters such as + radius values, minimum depth, bottleneck values, etc. + :type kwargs: dict + + :returns: A list of lists, where each inner list contains channels computed for a particular frame or model. + :rtype: list of lists + """ + + try: + coords = getCoords(atoms) + except AttributeError: + try: + checkCoords(coords) + except TypeError: + raise TypeError('coords must be an object with `getCoords` method') + + channels_all = [] + start_frame = kwargs.pop('start_frame', 0) + stop_frame = kwargs.pop('stop_frame', -1) + + calculator_params = { + 'r1': kwargs.pop('r1', 3), + 'r2': kwargs.pop('r2', 1.25), + 'min_depth': kwargs.pop('min_depth', 10), + 'bottleneck': kwargs.pop('bottleneck', 1), + 'sparsity': kwargs.pop('sparsity', 15) + } + calculator = ChannelCalculator(atoms, **calculator_params) + + if trajectory is not None: + if isinstance(trajectory, Atomic): + trajectory = Ensemble(trajectory) + + nfi = trajectory._nfi + trajectory.reset() + + if stop_frame == -1: + traj = trajectory[start_frame:] else: - print("No mesh to visualize.") + traj = trajectory[start_frame:stop_frame+1] + + atoms_copy = atoms.copy() + for j0, frame0 in enumerate(traj, start=start_frame): + LOGGER.info('Frame: {0}'.format(j0)) + atoms_copy.setCoords(frame0.getCoords()) + channels = calculator.calcChannels(atoms_copy, output_path / "_{0}.pdb".format(j0), **kwargs) + channels_all.append(channels) + trajectory._nfi = nfi + + else: + if atoms.numCoordsets() > 1: + for i in range(len(atoms.getCoordsets()[start_frame:stop_frame])): + LOGGER.info('Model: {0}'.format(i+start_frame)) + atoms.setACSIndex(i+start_frame) + channels = calculator.calcChannels(atoms, output_path / "_{0}.pdb".format(i+start_frame), **kwargs) + channels_all.append(channels) + else: + LOGGER.info('Include trajectory or use multi-model PDB file.') + + return channels_all + +def getChannelsParameters(channels): + """ + Extracts and returns the lengths, bottlenecks, and volumes of each channel in a given list of channels. + + This function iterates through a list of channel objects, extracting the length, bottleneck, + and volume of each channel. These values are collected into separate lists, which are returned + as a tuple for further use. + + :arg channels: A list of channel objects, where each channel has attributes `length`, `bottleneck`, + and `volume`. These attributes represent the length of the channel, the minimum radius + (bottleneck) along its path, and the total volume of the channel, respectively. + :type channels: list + + :returns: Three lists containing the lengths, bottlenecks, and volumes of the channels. + :rtype: tuple (list, list, list) + + Example usage: + lengths, bottlenecks, volumes = getChannelsParameters(channels) + """ + + lengths = [] + bottlenecks = [] + volumes = [] + for channel in channels: + lengths.append(channel.length) + bottlenecks.append(channel.bottleneck) + volumes.append(channel.volume) + + return lengths, bottlenecks, volumes + +class Channel: + def __init__(self, tetrahedra, centerline_spline, radius_spline, length, bottleneck, volume): + self.tetrahedra = tetrahedra + self.centerline_spline = centerline_spline + self.radius_spline = radius_spline + self.length = length + self.bottleneck = bottleneck + self.volume = volume + + def get_splines(self): + return self.centerline_spline, self.radius_spline + +class State: + def __init__(self, simplices, neighbors, vertices): + self.simp = simplices + self.neigh = neighbors + self.verti = vertices + + def __eq__(self, other): + if not isinstance(other, State): + return False + return (np.array_equal(self.simp, other.simp) and + np.array_equal(self.neigh, other.neigh) and + np.array_equal(self.verti, other.verti)) + + def set_state(self, simplices, neighbors, vertices): + self.simp = simplices + self.neigh = neighbors + self.verti = vertices + + def get_state(self): + return self.simp, self.neigh, self.verti - def sphere_fit(vertices, tetrahedron, vertice, vdw_radii, r): +class Cavity: + def __init__(self, tetrahedra, is_connected_to_surface): + self.tetrahedra = tetrahedra + self.is_connected_to_surface = is_connected_to_surface + self.starting_tetrahedron = None + self.channels = [] + self.depth = 0 + + def make_surface(self): + self.is_connected_to_surface = True + + def set_exit_tetrahedra(self, exit_tetrahedra, end_tetrahedra): + self.exit_tetrahedra = exit_tetrahedra + self.end_tetrahedra = end_tetrahedra + + def set_starting_tetrahedron(self, tetrahedron): + self.starting_tetrahedron = tetrahedron + + def set_depth(self, depth): + self.depth = depth + + def add_channel(self, channel): + self.channels.append(channel) + +class ChannelCalculator: + def __init__(self, atoms, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15): + self.atoms = atoms + self.r1 = r1 + self.r2 = r2 + self.min_depth = min_depth + self.bottleneck = bottleneck + self.sparsity = sparsity + + def sphere_fit(self, vertices, tetrahedron, vertice, vdw_radii, r): center = vertice d_sum = sum(np.linalg.norm(center - vertices[atom]) for atom in tetrahedron) r_sum = sum(r + vdw_radii[atom] for atom in tetrahedron) return d_sum >= r_sum - def delete_simplices3d(points, simplices, neighbors, vertices, vdw_radii, r, surface): + def delete_simplices3d(self, points, simplices, neighbors, vertices, vdw_radii, r, surface): simp, neigh, verti, deleted = [], [], [], [] for i, tetrahedron in enumerate(simplices): - should_delete = (-1 in neighbors[i] and sphere_fit(points, tetrahedron, vertices[i], vdw_radii, r)) if surface else not sphere_fit(points, tetrahedron, vertices[i], vdw_radii, r) + should_delete = (-1 in neighbors[i] and self.sphere_fit(points, tetrahedron, vertices[i], vdw_radii, r)) if surface else not self.sphere_fit(points, tetrahedron, vertices[i], vdw_radii, r) if should_delete: deleted.append(i) @@ -4676,7 +5006,7 @@ def delete_simplices3d(points, simplices, neighbors, vertices, vdw_radii, r, sur return simp, neigh, verti - def delete_section(simplices_subset, simplices, neighbors, vertices, reverse=False): + def delete_section(self, simplices_subset, simplices, neighbors, vertices, reverse=False): simp, neigh, verti, deleted = [], [], [], [] for i, tetrahedron in enumerate(simplices): @@ -4707,7 +5037,7 @@ def delete_section(simplices_subset, simplices, neighbors, vertices, reverse=Fal return simp, neigh, verti - def get_vdw_radii(atoms): + def get_vdw_radii(self, atoms): vdw_radii_dict = { 'H': 1.20, 'HE': 1.40, 'LI': 1.82, 'BE': 1.53, 'B': 1.92, 'C': 1.70, 'N': 1.55, 'O': 1.52, 'F': 1.47, 'NE': 1.54, 'NA': 2.27, 'MG': 1.73, @@ -4723,7 +5053,7 @@ def get_vdw_radii(atoms): return np.array([vdw_radii_dict[atom] for atom in atoms]) - def surface_layer(shape_simplices, filtered_simplices, shape_neighbors): + def surface_layer(self, shape_simplices, filtered_simplices, shape_neighbors): surface_simplices, surface_neighbors = [], [] interior_simplices = [] @@ -4761,7 +5091,7 @@ def surface_layer(shape_simplices, filtered_simplices, shape_neighbors): return filtered_surface_simplices, second_layer - def find_groups(neigh, is_cavity=True): + def find_groups(self, neigh, is_cavity=True): x = neigh.shape[0] visited = np.zeros(x, dtype=bool) groups = [] @@ -4787,7 +5117,7 @@ def dfs(tetra_index): return groups - def get_surface_cavities(cavities, interior_simplices, second_layer, state, points, vdw_radii, sparsity): + def get_surface_cavities(self, cavities, interior_simplices, second_layer, state, points, vdw_radii, sparsity): surface_cavities = [] for cavity in cavities: @@ -4797,18 +5127,20 @@ def get_surface_cavities(cavities, interior_simplices, second_layer, state, poin if np.any(second_layer_mask): cavity.make_surface() exit_tetrahedra = tetrahedra[second_layer_mask] - end_tetrahedra = get_end_tetrahedra(exit_tetrahedra, state.verti, points, vdw_radii, state.simp, sparsity) + end_tetrahedra = self.get_end_tetrahedra(exit_tetrahedra, state.verti, points, vdw_radii, state.simp, sparsity) cavity.set_exit_tetrahedra(exit_tetrahedra, end_tetrahedra) surface_cavities.append(cavity) return surface_cavities - def merge_cavities(cavities, simplices): + def merge_cavities(self, cavities, simplices): merged_tetrahedra = np.concatenate([cavity.tetrahedra for cavity in cavities]) return simplices[merged_tetrahedra] - def find_deepest_tetrahedra(cavities, neighbors): + def find_deepest_tetrahedra(self, cavities, neighbors): + from collections import deque + for cavity in cavities: exit_tetrahedra = cavity.exit_tetrahedra visited = np.zeros(neighbors.shape[0], dtype=bool) @@ -4831,7 +5163,9 @@ def find_deepest_tetrahedra(cavities, neighbors): cavity.set_starting_tetrahedron(np.array([deepest_tetrahedron])) cavity.set_depth(max_depth) - def dijkstra(cavity, simplices, neighbors, vertices, points, vdw_radii): + def dijkstra(self, cavity, simplices, neighbors, vertices, points, vdw_radii): + import heapq + def calculate_weight(current_tetra, neighbor_tetra): current_vertex = vertices[current_tetra] neighbor_vertex = vertices[neighbor_tetra] @@ -4882,53 +5216,43 @@ def dijkstra_algorithm(start, goal, tetrahedra_set): path = dijkstra_algorithm(starting_tetrahedron, exit_tetrahedron, tetrahedra_set) if path: path_tetrahedra = np.array(path) - channel = Channel(path_tetrahedra, *process_channel(path_tetrahedra, vertices, points, vdw_radii, simplices)) + channel = Channel(path_tetrahedra, *self.process_channel(path_tetrahedra, vertices, points, vdw_radii, simplices)) cavity.add_channel(channel) - def calculate_max_radius(vertice, points, vdw_radii, simp): + def calculate_max_radius(self, vertice, points, vdw_radii, simp): atom_positions = points[simp] radii = vdw_radii[simp] distances = np.linalg.norm(atom_positions - vertice, axis=1) - radii return np.min(distances) - def calculate_radius_spline(tetrahedra, voronoi_vertices, points, vdw_radii, simp): + def calculate_radius_spline(self, tetrahedra, voronoi_vertices, points, vdw_radii, simp): vertices = voronoi_vertices[tetrahedra] - radii = np.array([calculate_max_radius(v, points, vdw_radii, s) for v, s in zip(vertices, simp[tetrahedra])]) + radii = np.array([self.calculate_max_radius(v, points, vdw_radii, s) for v, s in zip(vertices, simp[tetrahedra])]) return radii, np.min(radii) - def process_channel(tetrahedra, voronoi_vertices, points, vdw_radii, simp): + def process_channel(self, tetrahedra, voronoi_vertices, points, vdw_radii, simp): + from scipy.interpolate import CubicSpline + centers = voronoi_vertices[tetrahedra] - radii, bottleneck = calculate_radius_spline(tetrahedra, voronoi_vertices, points, vdw_radii, simp) + radii, bottleneck = self.calculate_radius_spline(tetrahedra, voronoi_vertices, points, vdw_radii, simp) t = np.arange(len(centers)) centerline_spline = CubicSpline(t, centers, bc_type='natural') radius_spline = CubicSpline(t, radii, bc_type='natural') - length = calculate_channel_length(centerline_spline) + length = self.calculate_channel_length(centerline_spline) + volume = self.calculate_channel_volume(centerline_spline, radius_spline) - return centerline_spline, radius_spline, length, bottleneck - - def create_mesh_from_spline(centerline_spline, radius_spline, n=5): - N = n * len(centerline_spline.x) - t = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], N) - centers = centerline_spline(t) - radii = radius_spline(t) - - spheres = [o3d.geometry.TriangleMesh.create_sphere(radius=r, resolution=20).translate(c) for r, c in zip(radii, centers)] - mesh = spheres[0] - for sphere in spheres[1:]: - mesh += sphere + return centerline_spline, radius_spline, length, bottleneck, volume - return mesh - - def find_biggest_tetrahedron(tetrahedra, voronoi_vertices, points, vdw_radii, simp): - radii = np.array([calculate_max_radius(voronoi_vertices[tetra], points, vdw_radii, simp[tetra]) for tetra in tetrahedra]) + def find_biggest_tetrahedron(self, tetrahedra, voronoi_vertices, points, vdw_radii, simp): + radii = np.array([self.calculate_max_radius(voronoi_vertices[tetra], points, vdw_radii, simp[tetra]) for tetra in tetrahedra]) max_radius_index = np.argmax(radii) return tetrahedra[max_radius_index] - def get_end_tetrahedra(tetrahedra, voronoi_vertices, points, vdw_radii, simp, sparsity): + def get_end_tetrahedra(self, tetrahedra, voronoi_vertices, points, vdw_radii, simp, sparsity): end_tetrahedra = [] - current_tetrahedron = find_biggest_tetrahedron(tetrahedra, voronoi_vertices, points, vdw_radii, simp) + current_tetrahedron = self.find_biggest_tetrahedron(tetrahedra, voronoi_vertices, points, vdw_radii, simp) end_tetrahedra.append(current_tetrahedron) end_tetrahedra_set = {current_tetrahedron} @@ -4951,20 +5275,20 @@ def get_end_tetrahedra(tetrahedra, voronoi_vertices, points, vdw_radii, simp, sp if not found_tetrahedra: break - biggest_tetrahedron = find_biggest_tetrahedron(found_tetrahedra, voronoi_vertices, points, vdw_radii, simp) + biggest_tetrahedron = self.find_biggest_tetrahedron(found_tetrahedra, voronoi_vertices, points, vdw_radii, simp) end_tetrahedra.append(biggest_tetrahedron) end_tetrahedra_set.add(biggest_tetrahedron) return np.array(end_tetrahedra) - def filter_cavities(cavities, min_depth): + def filter_cavities(self, cavities, min_depth): return [cavity for cavity in cavities if cavity.depth >= min_depth] - def filter_channels_by_bottleneck(cavities, bottleneck): + def filter_channels_by_bottleneck(self, cavities, bottleneck): for cavity in cavities: cavity.channels = [channel for channel in cavity.channels if channel.bottleneck >= bottleneck] - def save_channels_to_pdb(cavities, filename, num_samples=5): + def save_channels_to_pdb(self, cavities, filename, num_samples=5): with open(filename, 'w') as pdb_file: atom_index = 1 for cavity in cavities: @@ -4977,137 +5301,49 @@ def save_channels_to_pdb(cavities, filename, num_samples=5): pdb_lines = [] for i, (x, y, z, radius) in enumerate(zip(centers[:, 0], centers[:, 1], centers[:, 2], radii), start=atom_index): - pdb_lines.append(f"ATOM {i:5d} H FIL T 1 {x:8.3f}{y:8.3f}{z:8.3f} {radius:6.2f}\n") + pdb_lines.append("ATOM %5d H FIL T 1 %8.3f%8.3f%8.3f %6.2f\n" % (i, x, y, z, radius)) for i in range(1, samples): - pdb_lines.append(f"CONECT{i:5d}{i + 1:5d}\n") + pdb_lines.append("CONECT%5d%5d\n" % (i, i + 1)) pdb_file.writelines(pdb_lines) pdb_file.write("\n") atom_index += samples - def calculate_channel_length(centerline_spline): + def calculate_channel_length(self, centerline_spline): t_values = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], len(centerline_spline.x) * 10) points = centerline_spline(t_values) diffs = np.diff(points, axis=0) lengths = np.linalg.norm(diffs, axis=1) return np.sum(lengths) - atoms = atoms.select('not hetero') - coords = atoms.getCoords() - - if vis_channels == None: - vdw_radii = get_vdw_radii(atoms.getElements()) - - dela = Delaunay(coords) - voro = Voronoi(coords) - - s_prt = State(dela.simplices, dela.neighbors, voro.vertices) - s_tmp = State(*s_prt.get_state()) - s_prv = State(None, None, None) - - while True: - s_prv.set_state(*s_tmp.get_state()) - s_tmp.set_state(*delete_simplices3d(coords, *s_tmp.get_state(), vdw_radii, r1, True)) - if s_tmp == s_prv: - break - - s_srf = State(*s_tmp.get_state()) - s_inr = State(*delete_simplices3d(coords, *s_srf.get_state(), vdw_radii, r2, False)) + def calculate_channel_volume(self, centerline_spline, radius_spline): + import warnings + from scipy.integrate import quad, IntegrationWarning - l_first_layer_simp, l_second_layer_simp = surface_layer(s_srf.simp, s_inr.simp, s_srf.neigh) - s_clr = State(*delete_section(l_first_layer_simp, *s_inr.get_state())) + warnings.filterwarnings("ignore", category=IntegrationWarning) - c_cavities = find_groups(s_clr.neigh) - c_surface_cavities = get_surface_cavities(c_cavities, s_clr.simp, l_second_layer_simp, s_clr, coords, vdw_radii, sparsity) - - find_deepest_tetrahedra(c_surface_cavities, s_clr.neigh) - c_filtered_cavities = filter_cavities(c_surface_cavities, min_depth) - merged_cavities = merge_cavities(c_filtered_cavities, s_clr.simp) + t_min = centerline_spline.x[0] + t_max = centerline_spline.x[-1] + + def differential_volume(t): + r = radius_spline(t) + area = np.pi * r**2 + dx_dt = centerline_spline(t, 1) + centerline_derivative = np.linalg.norm(dx_dt) + return area * centerline_derivative - for cavity in c_filtered_cavities: - dijkstra(cavity, *s_clr.get_state(), coords, vdw_radii) + volume, error = quad(differential_volume, t_min, t_max) - filter_channels_by_bottleneck(c_filtered_cavities, bottleneck) - channels = [channel for cavity in c_filtered_cavities for channel in cavity.channels] - channel_meshes = [create_mesh_from_spline(*channel.get_splines()) for channel in channels] + r_start = radius_spline(t_min) + r_end = radius_spline(t_max) - no_of_channels = len(channels) - print(f"Detected {no_of_channels} channels.") + hemisphere_volume_start = (2/3) * np.pi * r_start**3 + hemisphere_volume_end = (2/3) * np.pi * r_end**3 - if output_path: - print(f"Saving results to {output_path}") - save_channels_to_pdb(c_filtered_cavities, Path(output_path), num_samples=5) - else: - print("No output path given.") - - if visualizer == 'surface': - if stl_path: - visualize_external_grid(coords, s_srf.simp, stl_path) - else: - visualize_external_mesh(coords, s_srf.simp) - - elif visualizer == 'cavities': - if stl_path: - visualize_external_grid(coords, merged_cavities, stl_path) - else: - visualize_external_mesh(coords, merged_cavities, lines=visualize_external_grid(coords, s_srf.simp, ret_lines=True)) - - elif visualizer == 'channels': - if stl_path: - visualize_channel(channel_meshes, stl_path) - else: - visualize_external_grid(coords, s_srf.simp, channel_mesh=channel_meshes) - - return channels, s_srf.simp - - else: - channel_meshes = [create_mesh_from_spline(*channel.get_splines()) for channel in vis_channels] - if stl_path: - visualize_channel(channel_meshes, stl_path) - else: - if surface is not None: - visualize_external_grid(coords, surface, channel_mesh=channel_meshes) - else: - visualize_channel(channel_meshes) - - -def getChannelsParameters(channels): - """ - Extracts and prints the length and bottleneck of each channel in a given list of channels. - - This function iterates through a list of channel objects, printing the length and bottleneck - for each channel. It also collects these values into separate lists, which are returned - for further use. - - :arg channels: A list of channel objects, where each channel has attributes - `length` and `bottleneck`. These attributes represent the length of the channel - and the minimum radius (bottleneck) along its path, respectively. - :type channels: list - - :returns: Two lists containing the lengths and bottlenecks of the channels. - :rtype: tuple (list, list) - - Example output: - ``` - Channel 0: length 12.34, bottleneck 1.23 - Channel 1: length 15.67, bottleneck 1.56 - ``` - - Example usage: - ```python - lengths, bottlenecks = getChannelsParameters(channels) - ``` - """ - - lengths = [] - bottlenecks = [] - for i, channel in enumerate(channels): - print(f"Channel {i}: length {channel.length}, bottleneck {channel.bottleneck}") - lengths.append(channel.length) - bottlenecks.append(channel.bottleneck) + total_volume = volume + hemisphere_volume_start + hemisphere_volume_end - return lengths, bottlenecks + return total_volume From b1e040736d7f814529992a7f9db5d4e14435dff0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eryk=20Trzci=C5=84ski?= Date: Mon, 26 Aug 2024 00:20:10 +0200 Subject: [PATCH 06/48] Fixed calcChannelsMultipleFrames --- prody/proteins/interactions.py | 48 +++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 9063aefab..988936b3e 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -4795,8 +4795,15 @@ def calcChannels(atoms, output_path=None, r1=3, r2=1.25, min_depth=10, bottlenec LOGGER.info("Detected " + str(no_of_channels) + " channels.") if output_path: - LOGGER.info("Saving results to " + output_path + ".") - calculator.save_channels_to_pdb(c_filtered_cavities, Path(output_path), num_samples=5) + output_path = Path(output_path) + + if output_path.is_dir(): + output_path = output_path / "output.pdb" + elif not output_path.suffix == ".pdb": + output_path = output_path.with_suffix(".pdb") + + LOGGER.info("Saving results to " + str(output_path) + ".") + calculator.save_channels_to_pdb(c_filtered_cavities, output_path, num_samples=5) else: LOGGER.info("No output path given.") @@ -4819,10 +4826,16 @@ def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, **kwarg radius values, minimum depth, bottleneck values, etc. :type kwargs: dict - :returns: A list of lists, where each inner list contains channels computed for a particular frame or model. + :returns: List of channels and surfaces computed for a particular frame or model. :rtype: list of lists """ + if not checkAndImport('pathlib'): + errorMsg = 'To run showChannels, please install open3d.' + raise ImportError(errorMsg) + + from pathlib import Path + try: coords = getCoords(atoms) except AttributeError: @@ -4832,17 +4845,14 @@ def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, **kwarg raise TypeError('coords must be an object with `getCoords` method') channels_all = [] + surfaces_all = [] start_frame = kwargs.pop('start_frame', 0) stop_frame = kwargs.pop('stop_frame', -1) - - calculator_params = { - 'r1': kwargs.pop('r1', 3), - 'r2': kwargs.pop('r2', 1.25), - 'min_depth': kwargs.pop('min_depth', 10), - 'bottleneck': kwargs.pop('bottleneck', 1), - 'sparsity': kwargs.pop('sparsity', 15) - } - calculator = ChannelCalculator(atoms, **calculator_params) + + if output_path: + output_path = Path(output_path) + if output_path.suffix == ".pdb": + output_path = output_path.with_suffix('') if trajectory is not None: if isinstance(trajectory, Atomic): @@ -4860,8 +4870,12 @@ def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, **kwarg for j0, frame0 in enumerate(traj, start=start_frame): LOGGER.info('Frame: {0}'.format(j0)) atoms_copy.setCoords(frame0.getCoords()) - channels = calculator.calcChannels(atoms_copy, output_path / "_{0}.pdb".format(j0), **kwargs) + if output_path: + channels, surfaces = calcChannels(atoms_copy, str(output_path) + "{0}.pdb".format(j0), **kwargs) + else: + channels, surfaces = calcChannels(atoms_copy, **kwargs) channels_all.append(channels) + surfaces_all.append(surfaces) trajectory._nfi = nfi else: @@ -4869,12 +4883,16 @@ def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, **kwarg for i in range(len(atoms.getCoordsets()[start_frame:stop_frame])): LOGGER.info('Model: {0}'.format(i+start_frame)) atoms.setACSIndex(i+start_frame) - channels = calculator.calcChannels(atoms, output_path / "_{0}.pdb".format(i+start_frame), **kwargs) + if output_path: + channels, surfaces = calcChannels(atoms, str(output_path) + "{0}.pdb".format(i+start_frame), **kwargs) + else: + channels, surfaces = calcChannels(atoms, **kwargs) channels_all.append(channels) + surfaces_all.append(surfaces) else: LOGGER.info('Include trajectory or use multi-model PDB file.') - return channels_all + return channels_all, surfaces_all def getChannelsParameters(channels): """ From 9abdb49d2c832d592e179c9a0662f5b3fbaf6a62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eryk=20Trzci=C5=84ski?= Date: Tue, 27 Aug 2024 00:23:27 +0200 Subject: [PATCH 07/48] added getChannelAtoms --- prody/proteins/interactions.py | 64 ++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 6 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 988936b3e..e4544c747 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -45,7 +45,7 @@ 'calcHydrogenBondsTrajectory', 'calcHydrophobicOverlapingAreas', 'Interactions', 'InteractionsTrajectory', 'LigandInteractionsTrajectory', 'calcSminaBindingAffinity', 'calcSminaPerAtomInteractions', 'calcSminaTermValues', - 'showSminaTermValues', 'getVmdModel', 'calcChannels', 'calcChannelsMultipleFrames', 'getChannelsParameters', 'showChannels', 'showCavities'] + 'showSminaTermValues', 'getVmdModel', 'calcChannels', 'calcChannelsMultipleFrames', 'getChannelParameters', 'getChannelAtoms', 'showChannels', 'showCavities'] def cleanNumbers(listContacts): @@ -4807,7 +4807,7 @@ def calcChannels(atoms, output_path=None, r1=3, r2=1.25, min_depth=10, bottlenec else: LOGGER.info("No output path given.") - return channels, [coords, s_srf.simp, merged_cavities] + return channels, [coords, s_srf.simp, merged_cavities, s_clr.simp] def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, **kwargs): """ @@ -4894,7 +4894,7 @@ def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, **kwarg return channels_all, surfaces_all -def getChannelsParameters(channels): +def getChannelParameters(channels): """ Extracts and returns the lengths, bottlenecks, and volumes of each channel in a given list of channels. @@ -4923,6 +4923,58 @@ def getChannelsParameters(channels): volumes.append(channel.volume) return lengths, bottlenecks, volumes + +def getChannelAtoms(channels, num_samples=5): + """ + Generates an Atomic object representing the atoms along the paths of the given channels. + + This function takes a list of channel objects and generates atomic representations of the + channels based on their centerline splines and radius splines. The function samples points + along each channel's centerline and assigns atom positions at these points with corresponding + radii, creating a list of PDB-formatted lines. These lines are then converted into an Atomic + object using the ProDy library. + + :param channels: A list of channel objects. Each channel has a method `get_splines()` that + returns the centerline spline and radius spline of the channel. + :type channels: list + + :param num_samples: The number of atom samples to generate along each segment of the channel. + More samples result in a finer representation of the channel. Default is 5. + :type num_samples: int + + :returns: An Atomic object representing the atoms along the channels, with coordinates and + radii derived from the channel splines. + :rtype: prody.atomic.Atomic + + Example usage: + atomic_structure = getChannelsAtoms(channels) + """ + def convert_lines_to_atomic(atom_lines): + import io + from prody import parsePDBStream + pdb_text = "\n".join(atom_lines) + pdb_stream = io.StringIO(pdb_text) + structure = parsePDBStream(pdb_stream) + + return structure + + atom_index = 1 + pdb_lines = [] + + if not isinstance(channels, list): + channels = [channels] + + for channel in channels: + centerline_spline, radius_spline = channel.get_splines() + samples = len(channel.tetrahedra) * num_samples + t = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], samples) + centers = centerline_spline(t) + radii = radius_spline(t) + + for i, (x, y, z, radius) in enumerate(zip(centers[:, 0], centers[:, 1], centers[:, 2], radii), start=atom_index): + pdb_lines.append("ATOM %5d H FIL T 1 %8.3f%8.3f%8.3f 1.00 %6.2f\n" % (i, x, y, z, radius)) + + return convert_lines_to_atomic(pdb_lines) class Channel: def __init__(self, tetrahedra, centerline_spline, radius_spline, length, bottleneck, volume): @@ -5319,11 +5371,11 @@ def save_channels_to_pdb(self, cavities, filename, num_samples=5): pdb_lines = [] for i, (x, y, z, radius) in enumerate(zip(centers[:, 0], centers[:, 1], centers[:, 2], radii), start=atom_index): - pdb_lines.append("ATOM %5d H FIL T 1 %8.3f%8.3f%8.3f %6.2f\n" % (i, x, y, z, radius)) - + pdb_lines.append("ATOM %5d H FIL T 1 %8.3f%8.3f%8.3f 1.00 %6.2f\n" % (i, x, y, z, radius)) + for i in range(1, samples): pdb_lines.append("CONECT%5d%5d\n" % (i, i + 1)) - + pdb_file.writelines(pdb_lines) pdb_file.write("\n") atom_index += samples From e2a80c465018968abae90cbe8b1ed316302c4a5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eryk=20Trzci=C5=84ski?= Date: Tue, 27 Aug 2024 00:26:24 +0200 Subject: [PATCH 08/48] small fixes --- prody/proteins/interactions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index e4544c747..146720430 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -4911,7 +4911,7 @@ def getChannelParameters(channels): :rtype: tuple (list, list, list) Example usage: - lengths, bottlenecks, volumes = getChannelsParameters(channels) + lengths, bottlenecks, volumes = getChannelParameters(channels) """ lengths = [] @@ -4947,7 +4947,7 @@ def getChannelAtoms(channels, num_samples=5): :rtype: prody.atomic.Atomic Example usage: - atomic_structure = getChannelsAtoms(channels) + atomic_structure = getChannelAtoms(channels) """ def convert_lines_to_atomic(atom_lines): import io From 78bdab763292a5dbe25a2b4ee910d877407b1499 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eryk=20Trzci=C5=84ski?= Date: Tue, 27 Aug 2024 18:27:18 +0200 Subject: [PATCH 09/48] protein + channels and separate outputs for channels --- prody/proteins/interactions.py | 130 +++++++++++++++++++++++---------- 1 file changed, 92 insertions(+), 38 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 146720430..1910b4781 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -4673,7 +4673,7 @@ def showCavities(surface, show_surface=False): vis.run() vis.destroy_window() -def calcChannels(atoms, output_path=None, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15): +def calcChannels(atoms, output_path=None, separate=False, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15): """ Computes and identifies channels within a molecular structure using Voronoi and Delaunay tessellations. @@ -4802,14 +4802,17 @@ def calcChannels(atoms, output_path=None, r1=3, r2=1.25, min_depth=10, bottlenec elif not output_path.suffix == ".pdb": output_path = output_path.with_suffix(".pdb") - LOGGER.info("Saving results to " + str(output_path) + ".") - calculator.save_channels_to_pdb(c_filtered_cavities, output_path, num_samples=5) + if not separate: + LOGGER.info("Saving results to " + str(output_path) + ".") + else: + LOGGER.info("Saving multiple results to directory " + str(output_path.parent) + ".") + calculator.save_channels_to_pdb(c_filtered_cavities, output_path, separate) else: LOGGER.info("No output path given.") return channels, [coords, s_srf.simp, merged_cavities, s_clr.simp] -def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, **kwargs): +def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, separate=False, **kwargs): """ Compute channels for each frame in a given trajectory or multi-model PDB file. @@ -4871,7 +4874,7 @@ def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, **kwarg LOGGER.info('Frame: {0}'.format(j0)) atoms_copy.setCoords(frame0.getCoords()) if output_path: - channels, surfaces = calcChannels(atoms_copy, str(output_path) + "{0}.pdb".format(j0), **kwargs) + channels, surfaces = calcChannels(atoms_copy, str(output_path) + "{0}.pdb".format(j0), separate, **kwargs) else: channels, surfaces = calcChannels(atoms_copy, **kwargs) channels_all.append(channels) @@ -4884,7 +4887,7 @@ def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, **kwarg LOGGER.info('Model: {0}'.format(i+start_frame)) atoms.setACSIndex(i+start_frame) if output_path: - channels, surfaces = calcChannels(atoms, str(output_path) + "{0}.pdb".format(i+start_frame), **kwargs) + channels, surfaces = calcChannels(atoms, str(output_path) + "{0}.pdb".format(i+start_frame), separate, **kwargs) else: channels, surfaces = calcChannels(atoms, **kwargs) channels_all.append(channels) @@ -4924,46 +4927,52 @@ def getChannelParameters(channels): return lengths, bottlenecks, volumes -def getChannelAtoms(channels, num_samples=5): +def getChannelAtoms(channels, protein=None, num_samples=5): """ - Generates an Atomic object representing the atoms along the paths of the given channels. + Generates an AtomGroup object representing the atoms along the paths of the given channels + and optionally combines them with an existing protein structure. This function takes a list of channel objects and generates atomic representations of the channels based on their centerline splines and radius splines. The function samples points along each channel's centerline and assigns atom positions at these points with corresponding - radii, creating a list of PDB-formatted lines. These lines are then converted into an Atomic - object using the ProDy library. + radii, creating a list of PDB-formatted lines. These lines are then converted into an AtomGroup + object using the ProDy library. If a protein structure is provided, it is combined with the + generated channel atoms by merging their respective PDB streams. :param channels: A list of channel objects. Each channel has a method `get_splines()` that returns the centerline spline and radius spline of the channel. :type channels: list + :param protein: An optional AtomGroup object representing a protein structure. If provided, + it will be combined with the generated channel atoms. + :type protein: prody.atomic.AtomGroup or None + :param num_samples: The number of atom samples to generate along each segment of the channel. More samples result in a finer representation of the channel. Default is 5. :type num_samples: int - :returns: An Atomic object representing the atoms along the channels, with coordinates and - radii derived from the channel splines. - :rtype: prody.atomic.Atomic + :returns: An AtomGroup object representing the combined atoms of the channels and the protein, + if a protein is provided. + :rtype: prody.atomic.AtomGroup Example usage: - atomic_structure = getChannelAtoms(channels) + atomic_structure = getChannelAtoms(channels, protein) """ + import io + from prody import parsePDBStream, writePDBStream + def convert_lines_to_atomic(atom_lines): - import io - from prody import parsePDBStream pdb_text = "\n".join(atom_lines) pdb_stream = io.StringIO(pdb_text) structure = parsePDBStream(pdb_stream) - return structure atom_index = 1 pdb_lines = [] - + if not isinstance(channels, list): channels = [channels] - + for channel in channels: centerline_spline, radius_spline = channel.get_splines() samples = len(channel.tetrahedra) * num_samples @@ -4973,8 +4982,25 @@ def convert_lines_to_atomic(atom_lines): for i, (x, y, z, radius) in enumerate(zip(centers[:, 0], centers[:, 1], centers[:, 2], radii), start=atom_index): pdb_lines.append("ATOM %5d H FIL T 1 %8.3f%8.3f%8.3f 1.00 %6.2f\n" % (i, x, y, z, radius)) + + if protein is not None: + protein_stream = io.StringIO() + writePDBStream(protein_stream, protein) - return convert_lines_to_atomic(pdb_lines) + protein_stream.seek(0) + + protein_lines = protein_stream.readlines() + if protein_lines[-1].strip() == 'END': + protein_lines = protein_lines[:-1] + + combined_pdb_text = "".join(protein_lines) + "\n".join(pdb_lines) + "\nEND\n" + combined_stream = io.StringIO(combined_pdb_text) + combined_structure = parsePDBStream(combined_stream) + + return combined_structure + + channels_atomic = convert_lines_to_atomic(pdb_lines) + return channels_atomic class Channel: def __init__(self, tetrahedra, centerline_spline, radius_spline, length, bottleneck, volume): @@ -5358,27 +5384,55 @@ def filter_channels_by_bottleneck(self, cavities, bottleneck): for cavity in cavities: cavity.channels = [channel for channel in cavity.channels if channel.bottleneck >= bottleneck] - def save_channels_to_pdb(self, cavities, filename, num_samples=5): - with open(filename, 'w') as pdb_file: - atom_index = 1 + def save_channels_to_pdb(self, cavities, filename, separate=False, num_samples=5): + filename = str(filename) + + if separate: + channel_index = 0 for cavity in cavities: for channel in cavity.channels: - centerline_spline, radius_spline = channel.get_splines() - samples = len(channel.tetrahedra) * num_samples - t = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], samples) - centers = centerline_spline(t) - radii = radius_spline(t) - - pdb_lines = [] - for i, (x, y, z, radius) in enumerate(zip(centers[:, 0], centers[:, 1], centers[:, 2], radii), start=atom_index): - pdb_lines.append("ATOM %5d H FIL T 1 %8.3f%8.3f%8.3f 1.00 %6.2f\n" % (i, x, y, z, radius)) - - for i in range(1, samples): - pdb_lines.append("CONECT%5d%5d\n" % (i, i + 1)) + channel_filename = filename.replace('.pdb', '_channel{0}.pdb'.format(channel_index)) + + with open(channel_filename, 'w') as pdb_file: + atom_index = 1 + centerline_spline, radius_spline = channel.get_splines() + samples = len(channel.tetrahedra) * num_samples + t = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], samples) + centers = centerline_spline(t) + radii = radius_spline(t) + + pdb_lines = [] + for i, (x, y, z, radius) in enumerate(zip(centers[:, 0], centers[:, 1], centers[:, 2], radii), start=atom_index): + pdb_lines.append("ATOM %5d H FIL T 1 %8.3f%8.3f%8.3f 1.00 %6.2f\n" % (i, x, y, z, radius)) + + for i in range(1, samples): + pdb_lines.append("CONECT%5d%5d\n" % (i, i + 1)) + + pdb_file.writelines(pdb_lines) - pdb_file.writelines(pdb_lines) - pdb_file.write("\n") - atom_index += samples + channel_index += 1 + else: + with open(filename, 'w') as pdb_file: + atom_index = 1 + for cavity in cavities: + for channel in cavity.channels: + centerline_spline, radius_spline = channel.get_splines() + samples = len(channel.tetrahedra) * num_samples + t = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], samples) + centers = centerline_spline(t) + radii = radius_spline(t) + + pdb_lines = [] + for i, (x, y, z, radius) in enumerate(zip(centers[:, 0], centers[:, 1], centers[:, 2], radii), start=atom_index): + pdb_lines.append("ATOM %5d H FIL T 1 %8.3f%8.3f%8.3f 1.00 %6.2f\n" % (i, x, y, z, radius)) + + for i in range(1, samples): + pdb_lines.append("CONECT%5d%5d\n" % (i, i + 1)) + + pdb_file.writelines(pdb_lines) + pdb_file.write("\n") + atom_index += samples + def calculate_channel_length(self, centerline_spline): t_values = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], len(centerline_spline.x) * 10) From c4afb8a6795ec38b0583b9a320cbd7cc5a4e46c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eryk=20Trzci=C5=84ski?= Date: Tue, 27 Aug 2024 18:38:52 +0200 Subject: [PATCH 10/48] fixing documentation --- prody/proteins/interactions.py | 46 ++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 1910b4781..d99411e1c 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -4680,34 +4680,40 @@ def calcChannels(atoms, output_path=None, separate=False, r1=3, r2=1.25, min_dep This function analyzes the provided atomic structure to detect channels, which are voids or pathways within the molecular structure. It employs Voronoi and Delaunay tessellations to identify these regions, then filters and refines the detected channels based on various parameters such as the minimum depth - and bottleneck size. The results can be saved to a PDB file if an output path is provided. - + and bottleneck size. The results can be saved to a PDB file if an output path is provided. The `separate` + parameter controls whether each detected channel is saved to a separate file or if all channels are saved + in a single file. + The implementation is inspired by the methods described in the publication: "MOLE 2.0: advanced approach for analysis of biomacromolecular channels" by M. Berka, M. B. G. Czajka, J. P. M. T. Doyle, and M. T. L. Smith, published in Nucleic Acids Research, 2014. - :arg atoms: An object representing the molecular structure, typically containing atomic coordinates + :param atoms: An object representing the molecular structure, typically containing atomic coordinates and element types. :type atoms: `Atoms` object - :arg output_path: Optional path to save the resulting channels and associated data in PDB format. + :param output_path: Optional path to save the resulting channels and associated data in PDB format. If None, results are not saved. Default is None. :type output_path: str or None - :arg r1: The first radius threshold used during the deletion of simplices, which is used to define + :param separate: If True, each detected channel is saved to a separate PDB file. If False, all channels + are saved in a single PDB file. Default is False. + :type separate: bool + + :param r1: The first radius threshold used during the deletion of simplices, which is used to define the outer surface of the channels. Default is 3. :type r1: float - :arg r2: The second radius threshold used to define the inner surface of the channels. Default is 1.25. + :param r2: The second radius threshold used to define the inner surface of the channels. Default is 1.25. :type r2: float - :arg min_depth: The minimum depth a cavity must have to be considered as a channel. Default is 10. + :param min_depth: The minimum depth a cavity must have to be considered as a channel. Default is 10. :type min_depth: float - :arg bottleneck: The minimum allowed bottleneck size (narrowest point) for the channels. Default is 1. + :param bottleneck: The minimum allowed bottleneck size (narrowest point) for the channels. Default is 1. :type bottleneck: float - :arg sparsity: The sparsity parameter controls the sampling density when analyzing the molecular surface. + :param sparsity: The sparsity parameter controls the sampling density when analyzing the molecular surface. A higher value results in fewer sampling points. Default is 15. :type sparsity: int @@ -4718,9 +4724,6 @@ def calcChannels(atoms, output_path=None, separate=False, r1=3, r2=1.25, min_dep the atomic coordinates, simplices defining the surface, and merged cavities. :rtype: tuple (list, list) - Example usage: - channels, surface = calcChannels(atoms, output_path="channels.pdb", r1=3.5, r2=1.5, min_depth=12, bottleneck=1.2, sparsity=10) - This function performs the following steps: 1. **Selection and Filtering:** Selects non-hetero atoms from the protein, calculates van der Waals radii, and performs 3D Delaunay triangulation and Voronoi tessellation on the coordinates. @@ -4731,6 +4734,9 @@ def calcChannels(atoms, output_path=None, separate=False, r1=3, r2=1.25, min_dep Dijkstra's algorithm. 5. **Visualization and Saving:** Generates meshes for the detected channels, filters them by bottleneck size, and either saves the results to a PDB file or visualizes them based on the specified parameters. + + Example usage: + channels, surface = calcChannels(atoms, output_path="channels.pdb", separate=False, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15) """ required = ['heapq', 'collections', 'scipy', 'pathlib', 'warnings'] @@ -4818,6 +4824,7 @@ def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, separat This function calculates the channels for each frame in a trajectory or for each model in a multi-model PDB file. The `kwargs` can include parameters necessary for channel calculation. + If the `separate` parameter is set to True, each detected channel will be saved in a separate PDB file. :param atoms: Atomic data or object containing atomic coordinates and methods for accessing them. :type atoms: object @@ -4825,12 +4832,25 @@ def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, separat :param trajectory: Trajectory object containing multiple frames or a multi-model PDB file. :type trajectory: Atomic or Ensemble object + :param output_path: Optional path to save the resulting channels and associated data in PDB format. + If a directory is specified, each frame/model will have its results saved in separate files. + If None, results are not saved. Default is None. + :type output_path: str or None + + :param separate: If True, each detected channel is saved to a separate PDB file for each frame/model. + If False, all channels for each frame/model are saved in a single file. Default is False. + :type separate: bool + :param kwargs: Additional parameters required for channel calculation. This can include parameters such as radius values, minimum depth, bottleneck values, etc. :type kwargs: dict - :returns: List of channels and surfaces computed for a particular frame or model. + :returns: List of channels and surfaces computed for each frame or model. Each entry in the list corresponds + to a specific frame or model. :rtype: list of lists + + Example usage: + channels_all, surfaces_all = calcChannelsMultipleFrames(atoms, trajectory=traj, output_path="channels.pdb", separate=False, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15) """ if not checkAndImport('pathlib'): From 0a33feb5ca385d8b1873a06db36be0c46349ae75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eryk=20Trzci=C5=84ski?= Date: Tue, 27 Aug 2024 23:00:34 +0200 Subject: [PATCH 11/48] f-string removed --- prody/proteins/interactions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index d99411e1c..2fd29151f 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -4369,7 +4369,7 @@ def checkAndImport(package_name): import importlib.util if importlib.util.find_spec(package_name) is None: - LOGGER.warn(f"Package '{package_name}' is not installed. Please install it to use this function.") + LOGGER.warn("Package " + str(package_name) + " is not installed. Please install it to use this function.") return False return True From f78f7aaa6075a5f8424e7ca4483e38d7d536ba58 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Mon, 16 Dec 2024 22:59:36 +0100 Subject: [PATCH 12/48] Cavi-Finder - fixes for Python 2.7 --- prody/proteins/interactions.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 42f00fb1a..c6b98551a 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -5255,7 +5255,11 @@ def calcChannels(atoms, output_path=None, separate=False, r1=3, r2=1.25, min_dep while True: s_prv.set_state(*s_tmp.get_state()) - s_tmp.set_state(*calculator.delete_simplices3d(coords, *s_tmp.get_state(), vdw_radii, r1, True)) + #s_tmp.set_state(*calculator.delete_simplices3d(coords, *s_tmp.get_state(), vdw_radii, r1, True)) + state = s_tmp.get_state() + result = calculator.delete_simplices3d(coords, *state, vdw_radii, r1, True) + s_tmp.set_state(*result) + if s_tmp == s_prv: break From 746f660cdd6398c8232a96408f342a62a3d77f54 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Tue, 17 Dec 2024 09:10:05 +0100 Subject: [PATCH 13/48] CaviFinder is now separated from InSty --- prody/proteins/__init__.py | 4 + prody/proteins/channels.py | 1179 ++++++++++++++++++++++++++++++++ prody/proteins/interactions.py | 1153 ------------------------------- 3 files changed, 1183 insertions(+), 1153 deletions(-) create mode 100644 prody/proteins/channels.py diff --git a/prody/proteins/__init__.py b/prody/proteins/__init__.py index b0abfef53..93db9acb3 100644 --- a/prody/proteins/__init__.py +++ b/prody/proteins/__init__.py @@ -264,6 +264,10 @@ else: __all__.extend(waterbridges.__all__) +from . import channels +from .channels import * +__all__.extend(channels.__all__) + from . import fixer from .fixer import * __all__.extend(fixer.__all__) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py new file mode 100644 index 000000000..b0a49d250 --- /dev/null +++ b/prody/proteins/channels.py @@ -0,0 +1,1179 @@ +# -*- coding: utf-8 -*- + +"""This module called CaviFinder and defines functions for calculating channels, tunnels and pores +within protein structure. +""" + +__author__ = 'Karolina Mikulska-Ruminska', 'Eryk Trzcinski' +__credits__ = ['Karolina Mikulska-Ruminska', 'Eryk Trzcinski'] +__email__ = ['karolamik@fizyka.umk.pl'] + +import numpy as np +from numpy import * +from prody import LOGGER, SETTINGS, PY3K +from prody.atomic import AtomGroup, Atom, Atomic, Selection, Select +from prody.atomic import flags, sliceAtomicData +from prody.utilities import importLA, checkCoords, showFigure, getCoords +from prody.measure import calcDistance, calcAngle, calcCenter +from prody.measure.contacts import findNeighbors +from prody.proteins import writePDB, parsePDB +from collections import Counter + +from prody.trajectory import TrajBase, Trajectory, Frame +from prody.ensemble import Ensemble + +import multiprocessing +from .fixer import * +from .compare import * +from prody.measure import calcTransformation, calcDistance, calcRMSD, superpose + + +__all__ = ['getVmdModel', 'calcChannels', 'calcChannelsMultipleFrames', + 'getChannelParameters', 'getChannelAtoms', 'showChannels', 'showCavities'] + + + +def checkAndImport(package_name): + """Check for package and import it if possible and return **True**. + Otherwise, return **False + + :arg package_name: name of package + :type package_name: str + + :arg import_command: optional command to import submodules or with an alias + default **None** means use "import {0}".format(package_name) + :type import_command: None, str + """ + if not isinstance(package_name, str): + raise TypeError('package_name should be a string') + + import importlib.util + if importlib.util.find_spec(package_name) is None: + LOGGER.warn("Package " + str(package_name) + " is not installed. Please install it to use this function.") + return False + return True + +def getVmdModel(vmd_path, atoms): + """ + Generates a 3D model of molecular structures using VMD and returns it as an Open3D TriangleMesh. + + This function creates a temporary PDB file from the provided atomic data and uses VMD (Visual Molecular Dynamics) + to render this data into an STL file, which is then loaded into Open3D as a TriangleMesh. The function handles + the creation and cleanup of temporary files and manages the subprocess call to VMD. + + :param vmd_path: Path to the VMD executable. This is required to run VMD and execute the TCL script. + :type vmd_path: str + + :param atoms: Atomic data to be written to a PDB file. This should be an object or data structure + that is compatible with the `writePDB` function. + :type atoms: object + + :raises ImportError: If required libraries ('subprocess', 'pathlib', 'tempfile', 'open3d') are not installed, + an ImportError is raised, specifying which libraries are missing. + + :raises ValueError: If the STL file is not created or is empty, or if the STL file cannot be read as a TriangleMesh, + a ValueError is raised. + + :returns: An Open3D TriangleMesh object representing the 3D model generated from the PDB data. + :rtype: open3d.geometry.TriangleMesh + + Example usage: + model = getVmdModel('/path/to/vmd', atoms) + """ + + required = ['subprocess', 'pathlib', 'tempfile', 'open3d'] + missing = [] + errorMsg = None + for name in required: + if not checkAndImport(name): + missing.append(name) + if errorMsg is None: + errorMsg = 'To run getVmdModel, please install {0}'.format(missing[0]) + else: + errorMsg += ', ' + name + + if len(missing) > 0: + if len(missing) > 1: + errorMsg = ', '.join(errorMsg.split(', ')[:-1]) + ' and ' + errorMsg.split(', ')[-1] + raise ImportError(errorMsg) + + import subprocess + from pathlib import Path + import tempfile + import open3d as o3d + + with tempfile.NamedTemporaryFile(suffix=".pdb", delete=False) as temp_pdb: + temp_pdb_path = Path(temp_pdb.name) + writePDB(temp_pdb.name, atoms) + + with tempfile.NamedTemporaryFile(suffix=".tcl", delete=False) as temp_script: + temp_script_path = Path(temp_script.name) + + output_path = temp_script_path.parent / "output.stl" + + vmd_script = """ + set file_path [lindex $argv 0] + set output_path [lindex $argv 1] + + mol new $file_path + mol modstyle 0 0 NewCartoon + + set id_matrix {{1 0 0 0} {0 1 0 0} {0 0 1 0} {0 0 0 1}} + molinfo top set center_matrix [list $id_matrix] + molinfo top set rotate_matrix [list $id_matrix] + molinfo top set scale_matrix [list $id_matrix] + + rendering_method stl + render STL $output_path + + exit + """ + + temp_script.write(vmd_script.encode('utf-8')) + + command = [vmd_path, '-e', str(temp_script_path), '-args', str(temp_pdb_path), str(output_path)] + + try: + subprocess.run(command, check=True) + except subprocess.CalledProcessError as e: + LOGGER.info("VMD exited with status " + str(e.returncode) + ".") + except Exception as e: + LOGGER.warn("An unexpected error occurred: " + str(e)) + finally: + temp_script_path.unlink(missing_ok=True) + temp_pdb_path.unlink(missing_ok=True) + if not output_path.exists() or output_path.stat().st_size == 0: + raise ValueError("STL file was not created or is empty.") + + stl_mesh = o3d.io.read_triangle_mesh(str(output_path)) + + if stl_mesh.is_empty(): + raise ValueError("Failed to read the STL file as a TriangleMesh.") + + if output_path.exists(): + output_path.unlink(missing_ok=True) + + LOGGER.info("Model created successfully.") + return stl_mesh + +def showChannels(channels, model=None, surface=None): + """ + Visualizes the channels, and optionally, the molecular model and surface, using Open3D. + + This function renders a 3D visualization of molecular channels based on their spline representations. + It can also display a molecular model (e.g., the protein structure) and a surface (e.g., cavity surface) + in the same visualization. The function utilizes the Open3D library to create and render the 3D meshes. + + :arg channels: A list of channel objects or a single channel object. Each channel should have a + `get_splines()` method that returns two CubicSpline objects: one for the centerline and one for the radii. + :type channels: list or single channel object + + :arg model: An optional Open3D TriangleMesh object representing the molecular model, such as a protein. + If provided, this model will be rendered in the visualization. + :type model: open3d.geometry.TriangleMesh, optional + + :arg surface: An optional list containing the surface data. The list should have two elements: + - `points`: The coordinates of the vertices on the surface. + - `simp`: The simplices that define the surface (e.g., triangles or tetrahedra). + If provided, the surface will be rendered as a wireframe overlay in the visualization. + :type surface: list (with two numpy arrays), optional + + :raises ImportError: If the Open3D library is not installed, an ImportError is raised, + prompting the user to install Open3D. + + :returns: None. This function only renders the visualization. + + Example usage: + showChannels(channels, model=protein_mesh, surface=surface_data) + """ + + if not checkAndImport('open3d'): + errorMsg = 'To run showChannels, please install open3d.' + raise ImportError(errorMsg) + + import open3d as o3d + + def create_mesh_from_spline(centerline_spline, radius_spline, n=5): + N = n * len(centerline_spline.x) + t = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], N) + centers = centerline_spline(t) + radii = radius_spline(t) + + spheres = [o3d.geometry.TriangleMesh.create_sphere(radius=r, resolution=20).translate(c) for r, c in zip(radii, centers)] + mesh = spheres[0] + for sphere in spheres[1:]: + mesh += sphere + + return mesh + + if not isinstance(channels, list): + channels = [channels] + + channel_meshes = [create_mesh_from_spline(*channel.get_splines()) for channel in channels] + meshes_to_visualize = [o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1, origin=[0, 0, 0])] + + if model is not None: + model.compute_vertex_normals() + model.paint_uniform_color([0.1, 0.7, 0.3]) + meshes_to_visualize.append(model) + + if channel_meshes is not None: + if not isinstance(channel_meshes, list): + channel_meshes = [channel_meshes] + for channel_mesh in channel_meshes: + channel_mesh.compute_vertex_normals() + channel_mesh.paint_uniform_color([0.5, 0.0, 0.5]) + meshes_to_visualize.extend(channel_meshes) + + if surface is not None: + points = surface[0] + simp = surface[1] + + triangles = [] + for tetra in simp: + triangles.extend([sorted([tetra[0], tetra[1], tetra[2]]), + sorted([tetra[0], tetra[1], tetra[3]]), + sorted([tetra[0], tetra[2], tetra[3]]), + sorted([tetra[1], tetra[2], tetra[3]])]) + + triangles = np.array(triangles) + triangles.sort(axis=1) + + triangles_tuple = [tuple(tri) for tri in triangles] + unique_triangles, counts = np.unique(triangles_tuple, return_counts=True, axis=0) + + surface_triangles = unique_triangles[counts == 1] + + lines = [] + for simplex in surface_triangles: + for i in range(3): + for j in range(i + 1, 3): + lines.append([simplex[i], simplex[j]]) + + line_set = o3d.geometry.LineSet() + line_set.points = o3d.utility.Vector3dVector(points) + line_set.lines = o3d.utility.Vector2iVector(lines) + + meshes_to_visualize.append(line_set) + + if len(meshes_to_visualize) > 1: + o3d.visualization.draw_geometries(meshes_to_visualize) + else: + LOGGER.info("Nothing to visualize.") + +def showCavities(surface, show_surface=False): + """ + Visualizes the cavities within a molecular surface using Open3D. + + This function displays a 3D visualization of cavities detected in a molecular structure. + It uses the Open3D library to render the cavities as a triangle mesh. Optionally, it can also + display the molecular surface as a wireframe overlay. + + :arg surface: A list containing three elements: + - `points`: The coordinates of the vertices (atoms) in the molecular structure. + - `surf_simp`: The simplices that define the molecular surface. + - `simp_cavities`: The simplices corresponding to the detected cavities. + :type surface: list (with three numpy arrays) + + :arg show_surface: A boolean flag indicating whether to display the molecular surface + as a wireframe overlay in the visualization. If True, the surface will be displayed + in addition to the cavities. Default is False. + :type show_surface: bool + + :raises ImportError: If the Open3D library is not installed, an ImportError is raised, + prompting the user to install Open3D. + + :returns: None + + Example usage: + showCavities(surface_data, show_surface=True) + """ + + if not checkAndImport('open3d'): + errorMsg = 'To run showChannels, please install open3d.' + raise ImportError(errorMsg) + + import open3d as o3d + + points = surface[0] + surf_simp = surface[1] + simp_cavities = surface[2] + + triangles = [] + for tetra in simp_cavities: + triangles.extend([sorted([tetra[0], tetra[1], tetra[2]]), + sorted([tetra[0], tetra[1], tetra[3]]), + sorted([tetra[0], tetra[2], tetra[3]]), + sorted([tetra[1], tetra[2], tetra[3]])]) + + surface_triangles = np.unique(np.array(triangles), axis=0, return_counts=True)[0] + + mesh = o3d.geometry.TriangleMesh() + mesh.vertices = o3d.utility.Vector3dVector(points) + mesh.triangles = o3d.utility.Vector3iVector(surface_triangles) + + mesh.compute_vertex_normals() + mesh.paint_uniform_color([0.1, 0.7, 0.3]) + + vis = o3d.visualization.Visualizer() + vis.create_window() + vis.add_geometry(mesh) + + if show_surface == True: + triangles = [] + for tetra in surf_simp: + triangles.extend([sorted([tetra[0], tetra[1], tetra[2]]), + sorted([tetra[0], tetra[1], tetra[3]]), + sorted([tetra[0], tetra[2], tetra[3]]), + sorted([tetra[1], tetra[2], tetra[3]])]) + + triangles = np.array(triangles) + triangles.sort(axis=1) + + triangles_tuple = [tuple(tri) for tri in triangles] + unique_triangles, counts = np.unique(triangles_tuple, return_counts=True, axis=0) + + surface_triangles = unique_triangles[counts == 1] + + lines = [] + for simplex in surface_triangles: + for i in range(3): + for j in range(i + 1, 3): + lines.append([simplex[i], simplex[j]]) + + line_set = o3d.geometry.LineSet() + line_set.points = o3d.utility.Vector3dVector(points) + line_set.lines = o3d.utility.Vector2iVector(lines) + + vis.add_geometry(line_set) + + vis.get_render_option().mesh_show_back_face = True + vis.get_render_option().background_color = np.array([1, 1, 1]) + vis.update_renderer() + vis.run() + vis.destroy_window() + +def calcChannels(atoms, output_path=None, separate=False, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15): + """ + Computes and identifies channels within a molecular structure using Voronoi and Delaunay tessellations. + + This function analyzes the provided atomic structure to detect channels, which are voids or pathways + within the molecular structure. It employs Voronoi and Delaunay tessellations to identify these regions, + then filters and refines the detected channels based on various parameters such as the minimum depth + and bottleneck size. The results can be saved to a PDB file if an output path is provided. The `separate` + parameter controls whether each detected channel is saved to a separate file or if all channels are saved + in a single file. + + The implementation is inspired by the methods described in the publication: + "MOLE 2.0: advanced approach for analysis of biomacromolecular channels" by M. Berka, M. B. G. Czajka, + J. P. M. T. Doyle, and M. T. L. Smith, published in Nucleic Acids Research, 2014. + + :param atoms: An object representing the molecular structure, typically containing atomic coordinates + and element types. + :type atoms: `Atoms` object + + :param output_path: Optional path to save the resulting channels and associated data in PDB format. + If None, results are not saved. Default is None. + :type output_path: str or None + + :param separate: If True, each detected channel is saved to a separate PDB file. If False, all channels + are saved in a single PDB file. Default is False. + :type separate: bool + + :param r1: The first radius threshold used during the deletion of simplices, which is used to define + the outer surface of the channels. Default is 3. + :type r1: float + + :param r2: The second radius threshold used to define the inner surface of the channels. Default is 1.25. + :type r2: float + + :param min_depth: The minimum depth a cavity must have to be considered as a channel. Default is 10. + :type min_depth: float + + :param bottleneck: The minimum allowed bottleneck size (narrowest point) for the channels. Default is 1. + :type bottleneck: float + + :param sparsity: The sparsity parameter controls the sampling density when analyzing the molecular surface. + A higher value results in fewer sampling points. Default is 15. + :type sparsity: int + + :returns: A tuple containing two elements: + - `channels`: A list of detected channels, where each channel is an object containing information + about its path and geometry. + - `surface`: A list containing additional information for further visualization, including + the atomic coordinates, simplices defining the surface, and merged cavities. + :rtype: tuple (list, list) + + This function performs the following steps: + 1. **Selection and Filtering:** Selects non-hetero atoms from the protein, calculates van der Waals radii, and performs + 3D Delaunay triangulation and Voronoi tessellation on the coordinates. + 2. **State Management:** Creates and updates different stages of channel detection of the protein structure to filter out simplices + based on the given radii. + 3. **Surface Layer Calculation:** Determines the surface and second-layer simplices from the filtered results. + 4. **Cavity and Channel Detection:** Finds and filters cavities based on their depth and calculates channels using + Dijkstra's algorithm. + 5. **Visualization and Saving:** Generates meshes for the detected channels, filters them by bottleneck size, and either + saves the results to a PDB file or visualizes them based on the specified parameters. + + Example usage: + channels, surface = calcChannels(atoms, output_path="channels.pdb", separate=False, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15) + """ + + required = ['heapq', 'collections', 'scipy', 'pathlib', 'warnings'] + missing = [] + errorMsg = None + for name in required: + if not checkAndImport(name): + missing.append(name) + if errorMsg is None: + errorMsg = 'To run calcChannels, please install {0}'.format(missing[0]) + else: + errorMsg += ', ' + name + + if len(missing) > 0: + if len(missing) > 1: + errorMsg = ', '.join(errorMsg.split(', ')[:-1]) + ' and ' + errorMsg.split(', ')[-1] + raise ImportError(errorMsg) + + from scipy.spatial import Voronoi, Delaunay + from pathlib import Path + + calculator = ChannelCalculator(atoms, r1, r2, min_depth, bottleneck, sparsity) + + atoms = atoms.select('not hetero') + coords = atoms.getCoords() + + vdw_radii = calculator.get_vdw_radii(atoms.getElements()) + + dela = Delaunay(coords) + voro = Voronoi(coords) + + s_prt = State(dela.simplices, dela.neighbors, voro.vertices) + s_tmp = State(*s_prt.get_state()) + s_prv = State(None, None, None) + + while True: + s_prv.set_state(*s_tmp.get_state()) + #s_tmp.set_state(*calculator.delete_simplices3d(coords, *s_tmp.get_state(), vdw_radii, r1, True)) + state = s_tmp.get_state() + result = calculator.delete_simplices3d(coords, *state, vdw_radii, r1, True) + s_tmp.set_state(*result) + + if s_tmp == s_prv: + break + + s_srf = State(*s_tmp.get_state()) + s_inr = State(*calculator.delete_simplices3d(coords, *s_srf.get_state(), vdw_radii, r2, False)) + + l_first_layer_simp, l_second_layer_simp = calculator.surface_layer(s_srf.simp, s_inr.simp, s_srf.neigh) + s_clr = State(*calculator.delete_section(l_first_layer_simp, *s_inr.get_state())) + + c_cavities = calculator.find_groups(s_clr.neigh) + c_surface_cavities = calculator.get_surface_cavities(c_cavities, s_clr.simp, l_second_layer_simp, s_clr, coords, vdw_radii, sparsity) + + calculator.find_deepest_tetrahedra(c_surface_cavities, s_clr.neigh) + c_filtered_cavities = calculator.filter_cavities(c_surface_cavities, min_depth) + merged_cavities = calculator.merge_cavities(c_filtered_cavities, s_clr.simp) + + for cavity in c_filtered_cavities: + calculator.dijkstra(cavity, *s_clr.get_state(), coords, vdw_radii) + + calculator.filter_channels_by_bottleneck(c_filtered_cavities, bottleneck) + channels = [channel for cavity in c_filtered_cavities for channel in cavity.channels] + + no_of_channels = len(channels) + LOGGER.info("Detected " + str(no_of_channels) + " channels.") + + if output_path: + output_path = Path(output_path) + + if output_path.is_dir(): + output_path = output_path / "output.pdb" + elif not output_path.suffix == ".pdb": + output_path = output_path.with_suffix(".pdb") + + if not separate: + LOGGER.info("Saving results to " + str(output_path) + ".") + else: + LOGGER.info("Saving multiple results to directory " + str(output_path.parent) + ".") + calculator.save_channels_to_pdb(c_filtered_cavities, output_path, separate) + else: + LOGGER.info("No output path given.") + + return channels, [coords, s_srf.simp, merged_cavities, s_clr.simp] + +def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, separate=False, **kwargs): + """ + Compute channels for each frame in a given trajectory or multi-model PDB file. + + This function calculates the channels for each frame in a trajectory or for each model + in a multi-model PDB file. The `kwargs` can include parameters necessary for channel calculation. + If the `separate` parameter is set to True, each detected channel will be saved in a separate PDB file. + + :param atoms: Atomic data or object containing atomic coordinates and methods for accessing them. + :type atoms: object + + :param trajectory: Trajectory object containing multiple frames or a multi-model PDB file. + :type trajectory: Atomic or Ensemble object + + :param output_path: Optional path to save the resulting channels and associated data in PDB format. + If a directory is specified, each frame/model will have its results saved in separate files. + If None, results are not saved. Default is None. + :type output_path: str or None + + :param separate: If True, each detected channel is saved to a separate PDB file for each frame/model. + If False, all channels for each frame/model are saved in a single file. Default is False. + :type separate: bool + + :param kwargs: Additional parameters required for channel calculation. This can include parameters such as + radius values, minimum depth, bottleneck values, etc. + :type kwargs: dict + + :returns: List of channels and surfaces computed for each frame or model. Each entry in the list corresponds + to a specific frame or model. + :rtype: list of lists + + Example usage: + channels_all, surfaces_all = calcChannelsMultipleFrames(atoms, trajectory=traj, output_path="channels.pdb", separate=False, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15) + """ + + if not checkAndImport('pathlib'): + errorMsg = 'To run showChannels, please install open3d.' + raise ImportError(errorMsg) + + from pathlib import Path + + try: + coords = getCoords(atoms) + except AttributeError: + try: + checkCoords(coords) + except TypeError: + raise TypeError('coords must be an object with `getCoords` method') + + channels_all = [] + surfaces_all = [] + start_frame = kwargs.pop('start_frame', 0) + stop_frame = kwargs.pop('stop_frame', -1) + + if output_path: + output_path = Path(output_path) + if output_path.suffix == ".pdb": + output_path = output_path.with_suffix('') + + if trajectory is not None: + if isinstance(trajectory, Atomic): + trajectory = Ensemble(trajectory) + + nfi = trajectory._nfi + trajectory.reset() + + if stop_frame == -1: + traj = trajectory[start_frame:] + else: + traj = trajectory[start_frame:stop_frame+1] + + atoms_copy = atoms.copy() + for j0, frame0 in enumerate(traj, start=start_frame): + LOGGER.info('Frame: {0}'.format(j0)) + atoms_copy.setCoords(frame0.getCoords()) + if output_path: + channels, surfaces = calcChannels(atoms_copy, str(output_path) + "{0}.pdb".format(j0), separate, **kwargs) + else: + channels, surfaces = calcChannels(atoms_copy, **kwargs) + channels_all.append(channels) + surfaces_all.append(surfaces) + trajectory._nfi = nfi + + else: + if atoms.numCoordsets() > 1: + for i in range(len(atoms.getCoordsets()[start_frame:stop_frame])): + LOGGER.info('Model: {0}'.format(i+start_frame)) + atoms.setACSIndex(i+start_frame) + if output_path: + channels, surfaces = calcChannels(atoms, str(output_path) + "{0}.pdb".format(i+start_frame), separate, **kwargs) + else: + channels, surfaces = calcChannels(atoms, **kwargs) + channels_all.append(channels) + surfaces_all.append(surfaces) + else: + LOGGER.info('Include trajectory or use multi-model PDB file.') + + return channels_all, surfaces_all + +def getChannelParameters(channels): + """ + Extracts and returns the lengths, bottlenecks, and volumes of each channel in a given list of channels. + + This function iterates through a list of channel objects, extracting the length, bottleneck, + and volume of each channel. These values are collected into separate lists, which are returned + as a tuple for further use. + + :arg channels: A list of channel objects, where each channel has attributes `length`, `bottleneck`, + and `volume`. These attributes represent the length of the channel, the minimum radius + (bottleneck) along its path, and the total volume of the channel, respectively. + :type channels: list + + :returns: Three lists containing the lengths, bottlenecks, and volumes of the channels. + :rtype: tuple (list, list, list) + + Example usage: + lengths, bottlenecks, volumes = getChannelParameters(channels) + """ + + lengths = [] + bottlenecks = [] + volumes = [] + for channel in channels: + lengths.append(channel.length) + bottlenecks.append(channel.bottleneck) + volumes.append(channel.volume) + + return lengths, bottlenecks, volumes + +def getChannelAtoms(channels, protein=None, num_samples=5): + """ + Generates an AtomGroup object representing the atoms along the paths of the given channels + and optionally combines them with an existing protein structure. + + This function takes a list of channel objects and generates atomic representations of the + channels based on their centerline splines and radius splines. The function samples points + along each channel's centerline and assigns atom positions at these points with corresponding + radii, creating a list of PDB-formatted lines. These lines are then converted into an AtomGroup + object using the ProDy library. If a protein structure is provided, it is combined with the + generated channel atoms by merging their respective PDB streams. + + :param channels: A list of channel objects. Each channel has a method `get_splines()` that + returns the centerline spline and radius spline of the channel. + :type channels: list + + :param protein: An optional AtomGroup object representing a protein structure. If provided, + it will be combined with the generated channel atoms. + :type protein: prody.atomic.AtomGroup or None + + :param num_samples: The number of atom samples to generate along each segment of the channel. + More samples result in a finer representation of the channel. Default is 5. + :type num_samples: int + + :returns: An AtomGroup object representing the combined atoms of the channels and the protein, + if a protein is provided. + :rtype: prody.atomic.AtomGroup + + Example usage: + atomic_structure = getChannelAtoms(channels, protein) + """ + import io + from prody import parsePDBStream, writePDBStream + + def convert_lines_to_atomic(atom_lines): + pdb_text = "\n".join(atom_lines) + pdb_stream = io.StringIO(pdb_text) + structure = parsePDBStream(pdb_stream) + return structure + + atom_index = 1 + pdb_lines = [] + + if not isinstance(channels, list): + channels = [channels] + + for channel in channels: + centerline_spline, radius_spline = channel.get_splines() + samples = len(channel.tetrahedra) * num_samples + t = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], samples) + centers = centerline_spline(t) + radii = radius_spline(t) + + for i, (x, y, z, radius) in enumerate(zip(centers[:, 0], centers[:, 1], centers[:, 2], radii), start=atom_index): + pdb_lines.append("ATOM %5d H FIL T 1 %8.3f%8.3f%8.3f 1.00 %6.2f\n" % (i, x, y, z, radius)) + + if protein is not None: + protein_stream = io.StringIO() + writePDBStream(protein_stream, protein) + + protein_stream.seek(0) + + protein_lines = protein_stream.readlines() + if protein_lines[-1].strip() == 'END': + protein_lines = protein_lines[:-1] + + combined_pdb_text = "".join(protein_lines) + "\n".join(pdb_lines) + "\nEND\n" + combined_stream = io.StringIO(combined_pdb_text) + combined_structure = parsePDBStream(combined_stream) + + return combined_structure + + channels_atomic = convert_lines_to_atomic(pdb_lines) + return channels_atomic + +class Channel: + def __init__(self, tetrahedra, centerline_spline, radius_spline, length, bottleneck, volume): + self.tetrahedra = tetrahedra + self.centerline_spline = centerline_spline + self.radius_spline = radius_spline + self.length = length + self.bottleneck = bottleneck + self.volume = volume + + def get_splines(self): + return self.centerline_spline, self.radius_spline + +class State: + def __init__(self, simplices, neighbors, vertices): + self.simp = simplices + self.neigh = neighbors + self.verti = vertices + + def __eq__(self, other): + if not isinstance(other, State): + return False + return (np.array_equal(self.simp, other.simp) and + np.array_equal(self.neigh, other.neigh) and + np.array_equal(self.verti, other.verti)) + + def set_state(self, simplices, neighbors, vertices): + self.simp = simplices + self.neigh = neighbors + self.verti = vertices + + def get_state(self): + return self.simp, self.neigh, self.verti + +class Cavity: + def __init__(self, tetrahedra, is_connected_to_surface): + self.tetrahedra = tetrahedra + self.is_connected_to_surface = is_connected_to_surface + self.starting_tetrahedron = None + self.channels = [] + self.depth = 0 + + def make_surface(self): + self.is_connected_to_surface = True + + def set_exit_tetrahedra(self, exit_tetrahedra, end_tetrahedra): + self.exit_tetrahedra = exit_tetrahedra + self.end_tetrahedra = end_tetrahedra + + def set_starting_tetrahedron(self, tetrahedron): + self.starting_tetrahedron = tetrahedron + + def set_depth(self, depth): + self.depth = depth + + def add_channel(self, channel): + self.channels.append(channel) + +class ChannelCalculator: + def __init__(self, atoms, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15): + self.atoms = atoms + self.r1 = r1 + self.r2 = r2 + self.min_depth = min_depth + self.bottleneck = bottleneck + self.sparsity = sparsity + + def sphere_fit(self, vertices, tetrahedron, vertice, vdw_radii, r): + center = vertice + d_sum = sum(np.linalg.norm(center - vertices[atom]) for atom in tetrahedron) + r_sum = sum(r + vdw_radii[atom] for atom in tetrahedron) + + return d_sum >= r_sum + + def delete_simplices3d(self, points, simplices, neighbors, vertices, vdw_radii, r, surface): + simp, neigh, verti, deleted = [], [], [], [] + + for i, tetrahedron in enumerate(simplices): + should_delete = (-1 in neighbors[i] and self.sphere_fit(points, tetrahedron, vertices[i], vdw_radii, r)) if surface else not self.sphere_fit(points, tetrahedron, vertices[i], vdw_radii, r) + + if should_delete: + deleted.append(i) + else: + simp.append(simplices[i]) + neigh.append(neighbors[i]) + verti.append(vertices[i]) + + simp = np.array(simp) + neigh = np.array(neigh) + verti = np.array(verti) + deleted = np.array(deleted) + + mask = np.isin(neigh, deleted) + neigh[mask] = -1 + + for i in reversed(deleted): + mask = (neigh > i) & (neigh != -1) + neigh[mask] -= 1 + + return simp, neigh, verti + + def delete_section(self, simplices_subset, simplices, neighbors, vertices, reverse=False): + simp, neigh, verti, deleted = [], [], [], [] + + for i, tetrahedron in enumerate(simplices): + match = any((simplices_subset == tetrahedron).all(axis=1)) + if reverse: + if match: + simp.append(tetrahedron) + neigh.append(neighbors[i]) + verti.append(vertices[i]) + else: + deleted.append(i) + else: + if match: + deleted.append(i) + else: + simp.append(tetrahedron) + neigh.append(neighbors[i]) + verti.append(vertices[i]) + + simp, neigh, verti = map(np.array, [simp, neigh, verti]) + deleted = np.array(deleted) + + mask = np.isin(neigh, deleted) + neigh[mask] = -1 + + for i in reversed(deleted): + neigh = np.where((neigh > i) & (neigh != -1), neigh - 1, neigh) + + return simp, neigh, verti + + def get_vdw_radii(self, atoms): + vdw_radii_dict = { + 'H': 1.20, 'HE': 1.40, 'LI': 1.82, 'BE': 1.53, 'B': 1.92, 'C': 1.70, + 'N': 1.55, 'O': 1.52, 'F': 1.47, 'NE': 1.54, 'NA': 2.27, 'MG': 1.73, + 'AL': 1.84, 'SI': 2.10, 'P': 1.80, 'S': 1.80, 'CL': 1.75, 'AR': 1.88, + 'K': 2.75, 'CA': 2.31, 'SC': 2.11, 'NI': 1.63, 'CU': 1.40, 'ZN': 1.39, + 'GA': 1.87, 'GE': 2.11, 'AS': 1.85, 'SE': 1.90, 'BR': 1.85, 'KR': 2.02, + 'RB': 3.03, 'SR': 2.49, 'PD': 1.63, 'AG': 1.72, 'CD': 1.58, 'IN': 1.93, + 'SN': 2.17, 'SB': 2.06, 'TE': 2.06, 'I': 1.98, 'XE': 2.16, 'CS': 3.43, + 'BA': 2.68, 'PT': 1.75, 'AU': 1.66, 'HG': 1.55, 'TL': 1.96, 'PB': 2.02, + 'BI': 2.07, 'PO': 1.97, 'AT': 2.02, 'RN': 2.20, 'FR': 3.48, 'RA': 2.83, + 'U': 1.86, 'FE': 2.44 + } + + return np.array([vdw_radii_dict[atom] for atom in atoms]) + + def surface_layer(self, shape_simplices, filtered_simplices, shape_neighbors): + surface_simplices, surface_neighbors = [], [] + interior_simplices = [] + + for i in range(len(shape_simplices)): + if -1 in shape_neighbors[i]: + surface_simplices.append(shape_simplices[i]) + surface_neighbors.append(shape_neighbors[i]) + else: + interior_simplices.append(shape_simplices[i]) + + surface_simplices = np.array(surface_simplices) + surface_neighbors = np.array(surface_neighbors) + interior_simplices = np.array(interior_simplices) + + filtered_surface_simplices = surface_simplices[ + np.any(np.all(surface_simplices[:, None] == filtered_simplices, axis=2), axis=1) + ] + filtered_surface_neighbors = surface_neighbors[ + np.any(np.all(surface_simplices[:, None] == filtered_simplices, axis=2), axis=1) + ] + + filtered_surface_neighbors = np.unique(filtered_surface_neighbors) + filtered_surface_neighbors = filtered_surface_neighbors[filtered_surface_neighbors != 0] + + filtered_interior_simplices = interior_simplices[ + np.any(np.all(interior_simplices[:, None] == filtered_simplices, axis=2), axis=1) + ] + + surface_layer_neighbor_simplices = shape_simplices[filtered_surface_neighbors] + + second_layer = filtered_interior_simplices[ + np.any(np.all(filtered_interior_simplices[:, None] == surface_layer_neighbor_simplices, axis=2), axis=1) + ] + + return filtered_surface_simplices, second_layer + + + def find_groups(self, neigh, is_cavity=True): + x = neigh.shape[0] + visited = np.zeros(x, dtype=bool) + groups = [] + + def dfs(tetra_index): + stack = [tetra_index] + current_group = [] + while stack: + index = stack.pop() + if not visited[index]: + visited[index] = True + current_group.append(index) + stack.extend(neighbor for neighbor in neigh[index] if neighbor != -1 and not visited[neighbor]) + return np.array(current_group) + + for i in range(x): + if not visited[i]: + current_group = dfs(i) + if is_cavity: + groups.append(Cavity(current_group, False)) + else: + groups.append(current_group) + + return groups + + def get_surface_cavities(self, cavities, interior_simplices, second_layer, state, points, vdw_radii, sparsity): + surface_cavities = [] + + for cavity in cavities: + tetrahedra = cavity.tetrahedra + second_layer_mask = np.isin(interior_simplices[tetrahedra], second_layer).all(axis=1) + + if np.any(second_layer_mask): + cavity.make_surface() + exit_tetrahedra = tetrahedra[second_layer_mask] + end_tetrahedra = self.get_end_tetrahedra(exit_tetrahedra, state.verti, points, vdw_radii, state.simp, sparsity) + cavity.set_exit_tetrahedra(exit_tetrahedra, end_tetrahedra) + surface_cavities.append(cavity) + + return surface_cavities + + + def merge_cavities(self, cavities, simplices): + merged_tetrahedra = np.concatenate([cavity.tetrahedra for cavity in cavities]) + return simplices[merged_tetrahedra] + + def find_deepest_tetrahedra(self, cavities, neighbors): + from collections import deque + + for cavity in cavities: + exit_tetrahedra = cavity.exit_tetrahedra + visited = np.zeros(neighbors.shape[0], dtype=bool) + visited[exit_tetrahedra] = True + queue = deque([(tetra, 0) for tetra in exit_tetrahedra]) + max_depth = -1 + deepest_tetrahedron = None + + while queue: + current, depth = queue.popleft() + if depth > max_depth: + max_depth = depth + deepest_tetrahedron = current + + for neighbor in neighbors[current]: + if neighbor != -1 and not visited[neighbor] and neighbor in cavity.tetrahedra: + visited[neighbor] = True + queue.append((neighbor, depth + 1)) + + cavity.set_starting_tetrahedron(np.array([deepest_tetrahedron])) + cavity.set_depth(max_depth) + + def dijkstra(self, cavity, simplices, neighbors, vertices, points, vdw_radii): + import heapq + + def calculate_weight(current_tetra, neighbor_tetra): + current_vertex = vertices[current_tetra] + neighbor_vertex = vertices[neighbor_tetra] + l = np.linalg.norm(current_vertex - neighbor_vertex) + + d = np.inf + for atom, radius in zip(points[simplices[neighbor_tetra]], vdw_radii[simplices[neighbor_tetra]]): + dist = np.linalg.norm(neighbor_vertex - atom) - radius + if dist < d: + d = dist + + b = 1e-3 + return l / (d**2 + b) + + def dijkstra_algorithm(start, goal, tetrahedra_set): + pq = [(0, start)] + distances = {start: 0} + previous = {start: None} + + while pq: + current_distance, current_tetra = heapq.heappop(pq) + + if current_tetra == goal: + path = [] + while current_tetra is not None: + path.append(current_tetra) + current_tetra = previous[current_tetra] + return path[::-1] + + if current_distance > distances[current_tetra]: + continue + + for neighbor in neighbors[current_tetra]: + if neighbor in tetrahedra_set: + weight = calculate_weight(current_tetra, neighbor) + distance = current_distance + weight + if distance < distances.get(neighbor, float('inf')): + distances[neighbor] = distance + previous[neighbor] = current_tetra + heapq.heappush(pq, (distance, neighbor)) + + return None + + tetrahedra_set = set(cavity.tetrahedra) + for exit_tetrahedron in cavity.end_tetrahedra: + for starting_tetrahedron in cavity.starting_tetrahedron: + if exit_tetrahedron != starting_tetrahedron: + path = dijkstra_algorithm(starting_tetrahedron, exit_tetrahedron, tetrahedra_set) + if path: + path_tetrahedra = np.array(path) + channel = Channel(path_tetrahedra, *self.process_channel(path_tetrahedra, vertices, points, vdw_radii, simplices)) + cavity.add_channel(channel) + + def calculate_max_radius(self, vertice, points, vdw_radii, simp): + atom_positions = points[simp] + radii = vdw_radii[simp] + distances = np.linalg.norm(atom_positions - vertice, axis=1) - radii + return np.min(distances) + + def calculate_radius_spline(self, tetrahedra, voronoi_vertices, points, vdw_radii, simp): + vertices = voronoi_vertices[tetrahedra] + radii = np.array([self.calculate_max_radius(v, points, vdw_radii, s) for v, s in zip(vertices, simp[tetrahedra])]) + return radii, np.min(radii) + + def process_channel(self, tetrahedra, voronoi_vertices, points, vdw_radii, simp): + from scipy.interpolate import CubicSpline + + centers = voronoi_vertices[tetrahedra] + radii, bottleneck = self.calculate_radius_spline(tetrahedra, voronoi_vertices, points, vdw_radii, simp) + + t = np.arange(len(centers)) + centerline_spline = CubicSpline(t, centers, bc_type='natural') + radius_spline = CubicSpline(t, radii, bc_type='natural') + + length = self.calculate_channel_length(centerline_spline) + volume = self.calculate_channel_volume(centerline_spline, radius_spline) + + return centerline_spline, radius_spline, length, bottleneck, volume + + def find_biggest_tetrahedron(self, tetrahedra, voronoi_vertices, points, vdw_radii, simp): + radii = np.array([self.calculate_max_radius(voronoi_vertices[tetra], points, vdw_radii, simp[tetra]) for tetra in tetrahedra]) + max_radius_index = np.argmax(radii) + return tetrahedra[max_radius_index] + + def get_end_tetrahedra(self, tetrahedra, voronoi_vertices, points, vdw_radii, simp, sparsity): + end_tetrahedra = [] + current_tetrahedron = self.find_biggest_tetrahedron(tetrahedra, voronoi_vertices, points, vdw_radii, simp) + end_tetrahedra.append(current_tetrahedron) + end_tetrahedra_set = {current_tetrahedron} + + while True: + found_tetrahedra = [] + for tetra in tetrahedra: + if tetra in end_tetrahedra_set: + continue + + all_far_enough = True + for selected_tetra in end_tetrahedra: + distance = np.linalg.norm(voronoi_vertices[selected_tetra] - voronoi_vertices[tetra]) + if distance < sparsity: + all_far_enough = False + break + + if all_far_enough: + found_tetrahedra.append(tetra) + + if not found_tetrahedra: + break + + biggest_tetrahedron = self.find_biggest_tetrahedron(found_tetrahedra, voronoi_vertices, points, vdw_radii, simp) + end_tetrahedra.append(biggest_tetrahedron) + end_tetrahedra_set.add(biggest_tetrahedron) + + return np.array(end_tetrahedra) + + def filter_cavities(self, cavities, min_depth): + return [cavity for cavity in cavities if cavity.depth >= min_depth] + + def filter_channels_by_bottleneck(self, cavities, bottleneck): + for cavity in cavities: + cavity.channels = [channel for channel in cavity.channels if channel.bottleneck >= bottleneck] + + def save_channels_to_pdb(self, cavities, filename, separate=False, num_samples=5): + filename = str(filename) + + if separate: + channel_index = 0 + for cavity in cavities: + for channel in cavity.channels: + channel_filename = filename.replace('.pdb', '_channel{0}.pdb'.format(channel_index)) + + with open(channel_filename, 'w') as pdb_file: + atom_index = 1 + centerline_spline, radius_spline = channel.get_splines() + samples = len(channel.tetrahedra) * num_samples + t = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], samples) + centers = centerline_spline(t) + radii = radius_spline(t) + + pdb_lines = [] + for i, (x, y, z, radius) in enumerate(zip(centers[:, 0], centers[:, 1], centers[:, 2], radii), start=atom_index): + pdb_lines.append("ATOM %5d H FIL T 1 %8.3f%8.3f%8.3f 1.00 %6.2f\n" % (i, x, y, z, radius)) + + for i in range(1, samples): + pdb_lines.append("CONECT%5d%5d\n" % (i, i + 1)) + + pdb_file.writelines(pdb_lines) + + channel_index += 1 + else: + with open(filename, 'w') as pdb_file: + atom_index = 1 + for cavity in cavities: + for channel in cavity.channels: + centerline_spline, radius_spline = channel.get_splines() + samples = len(channel.tetrahedra) * num_samples + t = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], samples) + centers = centerline_spline(t) + radii = radius_spline(t) + + pdb_lines = [] + for i, (x, y, z, radius) in enumerate(zip(centers[:, 0], centers[:, 1], centers[:, 2], radii), start=atom_index): + pdb_lines.append("ATOM %5d H FIL T 1 %8.3f%8.3f%8.3f 1.00 %6.2f\n" % (i, x, y, z, radius)) + + for i in range(1, samples): + pdb_lines.append("CONECT%5d%5d\n" % (i, i + 1)) + + pdb_file.writelines(pdb_lines) + pdb_file.write("\n") + atom_index += samples + + + def calculate_channel_length(self, centerline_spline): + t_values = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], len(centerline_spline.x) * 10) + points = centerline_spline(t_values) + diffs = np.diff(points, axis=0) + lengths = np.linalg.norm(diffs, axis=1) + return np.sum(lengths) + + def calculate_channel_volume(self, centerline_spline, radius_spline): + import warnings + from scipy.integrate import quad, IntegrationWarning + + warnings.filterwarnings("ignore", category=IntegrationWarning) + + t_min = centerline_spline.x[0] + t_max = centerline_spline.x[-1] + + def differential_volume(t): + r = radius_spline(t) + area = np.pi * r**2 + dx_dt = centerline_spline(t, 1) + centerline_derivative = np.linalg.norm(dx_dt) + return area * centerline_derivative + + volume, error = quad(differential_volume, t_min, t_max) + + r_start = radius_spline(t_min) + r_end = radius_spline(t_max) + + hemisphere_volume_start = (2/3) * np.pi * r_start**3 + hemisphere_volume_end = (2/3) * np.pi * r_end**3 + + total_volume = volume + hemisphere_volume_start + hemisphere_volume_end + + return total_volume + + + + + diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 4783d27a1..2eb24d06d 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -50,8 +50,6 @@ 'Interactions', 'InteractionsTrajectory', 'LigandInteractionsTrajectory', 'calcSminaBindingAffinity', 'calcSminaPerAtomInteractions', 'calcSminaTermValues', 'showSminaTermValues', 'showPairEnergy', 'checkNonstandardResidues', - 'getVmdModel', 'calcChannels', 'calcChannelsMultipleFrames', - 'getChannelParameters', 'getChannelAtoms', 'showChannels', 'showCavities', 'saveInteractionsAsDummyAtoms', 'createFoldseekAlignment', 'runFoldseek', 'runDali', 'runBLAST', 'extractMultiModelPDB', 'calcSignatureInteractions'] @@ -5990,1154 +5988,3 @@ def saveInteractionsPDB(self, **kwargs): return freq_contacts_list - - - - - - - -def checkAndImport(package_name): - """Check for package and import it if possible and return **True**. - Otherwise, return **False - - :arg package_name: name of package - :type package_name: str - - :arg import_command: optional command to import submodules or with an alias - default **None** means use "import {0}".format(package_name) - :type import_command: None, str - """ - if not isinstance(package_name, str): - raise TypeError('package_name should be a string') - - import importlib.util - if importlib.util.find_spec(package_name) is None: - LOGGER.warn("Package " + str(package_name) + " is not installed. Please install it to use this function.") - return False - return True - -def getVmdModel(vmd_path, atoms): - """ - Generates a 3D model of molecular structures using VMD and returns it as an Open3D TriangleMesh. - - This function creates a temporary PDB file from the provided atomic data and uses VMD (Visual Molecular Dynamics) - to render this data into an STL file, which is then loaded into Open3D as a TriangleMesh. The function handles - the creation and cleanup of temporary files and manages the subprocess call to VMD. - - :param vmd_path: Path to the VMD executable. This is required to run VMD and execute the TCL script. - :type vmd_path: str - - :param atoms: Atomic data to be written to a PDB file. This should be an object or data structure - that is compatible with the `writePDB` function. - :type atoms: object - - :raises ImportError: If required libraries ('subprocess', 'pathlib', 'tempfile', 'open3d') are not installed, - an ImportError is raised, specifying which libraries are missing. - - :raises ValueError: If the STL file is not created or is empty, or if the STL file cannot be read as a TriangleMesh, - a ValueError is raised. - - :returns: An Open3D TriangleMesh object representing the 3D model generated from the PDB data. - :rtype: open3d.geometry.TriangleMesh - - Example usage: - model = getVmdModel('/path/to/vmd', atoms) - """ - - required = ['subprocess', 'pathlib', 'tempfile', 'open3d'] - missing = [] - errorMsg = None - for name in required: - if not checkAndImport(name): - missing.append(name) - if errorMsg is None: - errorMsg = 'To run getVmdModel, please install {0}'.format(missing[0]) - else: - errorMsg += ', ' + name - - if len(missing) > 0: - if len(missing) > 1: - errorMsg = ', '.join(errorMsg.split(', ')[:-1]) + ' and ' + errorMsg.split(', ')[-1] - raise ImportError(errorMsg) - - import subprocess - from pathlib import Path - import tempfile - import open3d as o3d - - with tempfile.NamedTemporaryFile(suffix=".pdb", delete=False) as temp_pdb: - temp_pdb_path = Path(temp_pdb.name) - writePDB(temp_pdb.name, atoms) - - with tempfile.NamedTemporaryFile(suffix=".tcl", delete=False) as temp_script: - temp_script_path = Path(temp_script.name) - - output_path = temp_script_path.parent / "output.stl" - - vmd_script = """ - set file_path [lindex $argv 0] - set output_path [lindex $argv 1] - - mol new $file_path - mol modstyle 0 0 NewCartoon - - set id_matrix {{1 0 0 0} {0 1 0 0} {0 0 1 0} {0 0 0 1}} - molinfo top set center_matrix [list $id_matrix] - molinfo top set rotate_matrix [list $id_matrix] - molinfo top set scale_matrix [list $id_matrix] - - rendering_method stl - render STL $output_path - - exit - """ - - temp_script.write(vmd_script.encode('utf-8')) - - command = [vmd_path, '-e', str(temp_script_path), '-args', str(temp_pdb_path), str(output_path)] - - try: - subprocess.run(command, check=True) - except subprocess.CalledProcessError as e: - LOGGER.info("VMD exited with status " + str(e.returncode) + ".") - except Exception as e: - LOGGER.warn("An unexpected error occurred: " + str(e)) - finally: - temp_script_path.unlink(missing_ok=True) - temp_pdb_path.unlink(missing_ok=True) - if not output_path.exists() or output_path.stat().st_size == 0: - raise ValueError("STL file was not created or is empty.") - - stl_mesh = o3d.io.read_triangle_mesh(str(output_path)) - - if stl_mesh.is_empty(): - raise ValueError("Failed to read the STL file as a TriangleMesh.") - - if output_path.exists(): - output_path.unlink(missing_ok=True) - - LOGGER.info("Model created successfully.") - return stl_mesh - -def showChannels(channels, model=None, surface=None): - """ - Visualizes the channels, and optionally, the molecular model and surface, using Open3D. - - This function renders a 3D visualization of molecular channels based on their spline representations. - It can also display a molecular model (e.g., the protein structure) and a surface (e.g., cavity surface) - in the same visualization. The function utilizes the Open3D library to create and render the 3D meshes. - - :arg channels: A list of channel objects or a single channel object. Each channel should have a - `get_splines()` method that returns two CubicSpline objects: one for the centerline and one for the radii. - :type channels: list or single channel object - - :arg model: An optional Open3D TriangleMesh object representing the molecular model, such as a protein. - If provided, this model will be rendered in the visualization. - :type model: open3d.geometry.TriangleMesh, optional - - :arg surface: An optional list containing the surface data. The list should have two elements: - - `points`: The coordinates of the vertices on the surface. - - `simp`: The simplices that define the surface (e.g., triangles or tetrahedra). - If provided, the surface will be rendered as a wireframe overlay in the visualization. - :type surface: list (with two numpy arrays), optional - - :raises ImportError: If the Open3D library is not installed, an ImportError is raised, - prompting the user to install Open3D. - - :returns: None. This function only renders the visualization. - - Example usage: - showChannels(channels, model=protein_mesh, surface=surface_data) - """ - - if not checkAndImport('open3d'): - errorMsg = 'To run showChannels, please install open3d.' - raise ImportError(errorMsg) - - import open3d as o3d - - def create_mesh_from_spline(centerline_spline, radius_spline, n=5): - N = n * len(centerline_spline.x) - t = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], N) - centers = centerline_spline(t) - radii = radius_spline(t) - - spheres = [o3d.geometry.TriangleMesh.create_sphere(radius=r, resolution=20).translate(c) for r, c in zip(radii, centers)] - mesh = spheres[0] - for sphere in spheres[1:]: - mesh += sphere - - return mesh - - if not isinstance(channels, list): - channels = [channels] - - channel_meshes = [create_mesh_from_spline(*channel.get_splines()) for channel in channels] - meshes_to_visualize = [o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1, origin=[0, 0, 0])] - - if model is not None: - model.compute_vertex_normals() - model.paint_uniform_color([0.1, 0.7, 0.3]) - meshes_to_visualize.append(model) - - if channel_meshes is not None: - if not isinstance(channel_meshes, list): - channel_meshes = [channel_meshes] - for channel_mesh in channel_meshes: - channel_mesh.compute_vertex_normals() - channel_mesh.paint_uniform_color([0.5, 0.0, 0.5]) - meshes_to_visualize.extend(channel_meshes) - - if surface is not None: - points = surface[0] - simp = surface[1] - - triangles = [] - for tetra in simp: - triangles.extend([sorted([tetra[0], tetra[1], tetra[2]]), - sorted([tetra[0], tetra[1], tetra[3]]), - sorted([tetra[0], tetra[2], tetra[3]]), - sorted([tetra[1], tetra[2], tetra[3]])]) - - triangles = np.array(triangles) - triangles.sort(axis=1) - - triangles_tuple = [tuple(tri) for tri in triangles] - unique_triangles, counts = np.unique(triangles_tuple, return_counts=True, axis=0) - - surface_triangles = unique_triangles[counts == 1] - - lines = [] - for simplex in surface_triangles: - for i in range(3): - for j in range(i + 1, 3): - lines.append([simplex[i], simplex[j]]) - - line_set = o3d.geometry.LineSet() - line_set.points = o3d.utility.Vector3dVector(points) - line_set.lines = o3d.utility.Vector2iVector(lines) - - meshes_to_visualize.append(line_set) - - if len(meshes_to_visualize) > 1: - o3d.visualization.draw_geometries(meshes_to_visualize) - else: - LOGGER.info("Nothing to visualize.") - -def showCavities(surface, show_surface=False): - """ - Visualizes the cavities within a molecular surface using Open3D. - - This function displays a 3D visualization of cavities detected in a molecular structure. - It uses the Open3D library to render the cavities as a triangle mesh. Optionally, it can also - display the molecular surface as a wireframe overlay. - - :arg surface: A list containing three elements: - - `points`: The coordinates of the vertices (atoms) in the molecular structure. - - `surf_simp`: The simplices that define the molecular surface. - - `simp_cavities`: The simplices corresponding to the detected cavities. - :type surface: list (with three numpy arrays) - - :arg show_surface: A boolean flag indicating whether to display the molecular surface - as a wireframe overlay in the visualization. If True, the surface will be displayed - in addition to the cavities. Default is False. - :type show_surface: bool - - :raises ImportError: If the Open3D library is not installed, an ImportError is raised, - prompting the user to install Open3D. - - :returns: None - - Example usage: - showCavities(surface_data, show_surface=True) - """ - - if not checkAndImport('open3d'): - errorMsg = 'To run showChannels, please install open3d.' - raise ImportError(errorMsg) - - import open3d as o3d - - points = surface[0] - surf_simp = surface[1] - simp_cavities = surface[2] - - triangles = [] - for tetra in simp_cavities: - triangles.extend([sorted([tetra[0], tetra[1], tetra[2]]), - sorted([tetra[0], tetra[1], tetra[3]]), - sorted([tetra[0], tetra[2], tetra[3]]), - sorted([tetra[1], tetra[2], tetra[3]])]) - - surface_triangles = np.unique(np.array(triangles), axis=0, return_counts=True)[0] - - mesh = o3d.geometry.TriangleMesh() - mesh.vertices = o3d.utility.Vector3dVector(points) - mesh.triangles = o3d.utility.Vector3iVector(surface_triangles) - - mesh.compute_vertex_normals() - mesh.paint_uniform_color([0.1, 0.7, 0.3]) - - vis = o3d.visualization.Visualizer() - vis.create_window() - vis.add_geometry(mesh) - - if show_surface == True: - triangles = [] - for tetra in surf_simp: - triangles.extend([sorted([tetra[0], tetra[1], tetra[2]]), - sorted([tetra[0], tetra[1], tetra[3]]), - sorted([tetra[0], tetra[2], tetra[3]]), - sorted([tetra[1], tetra[2], tetra[3]])]) - - triangles = np.array(triangles) - triangles.sort(axis=1) - - triangles_tuple = [tuple(tri) for tri in triangles] - unique_triangles, counts = np.unique(triangles_tuple, return_counts=True, axis=0) - - surface_triangles = unique_triangles[counts == 1] - - lines = [] - for simplex in surface_triangles: - for i in range(3): - for j in range(i + 1, 3): - lines.append([simplex[i], simplex[j]]) - - line_set = o3d.geometry.LineSet() - line_set.points = o3d.utility.Vector3dVector(points) - line_set.lines = o3d.utility.Vector2iVector(lines) - - vis.add_geometry(line_set) - - vis.get_render_option().mesh_show_back_face = True - vis.get_render_option().background_color = np.array([1, 1, 1]) - vis.update_renderer() - vis.run() - vis.destroy_window() - -def calcChannels(atoms, output_path=None, separate=False, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15): - """ - Computes and identifies channels within a molecular structure using Voronoi and Delaunay tessellations. - - This function analyzes the provided atomic structure to detect channels, which are voids or pathways - within the molecular structure. It employs Voronoi and Delaunay tessellations to identify these regions, - then filters and refines the detected channels based on various parameters such as the minimum depth - and bottleneck size. The results can be saved to a PDB file if an output path is provided. The `separate` - parameter controls whether each detected channel is saved to a separate file or if all channels are saved - in a single file. - - The implementation is inspired by the methods described in the publication: - "MOLE 2.0: advanced approach for analysis of biomacromolecular channels" by M. Berka, M. B. G. Czajka, - J. P. M. T. Doyle, and M. T. L. Smith, published in Nucleic Acids Research, 2014. - - :param atoms: An object representing the molecular structure, typically containing atomic coordinates - and element types. - :type atoms: `Atoms` object - - :param output_path: Optional path to save the resulting channels and associated data in PDB format. - If None, results are not saved. Default is None. - :type output_path: str or None - - :param separate: If True, each detected channel is saved to a separate PDB file. If False, all channels - are saved in a single PDB file. Default is False. - :type separate: bool - - :param r1: The first radius threshold used during the deletion of simplices, which is used to define - the outer surface of the channels. Default is 3. - :type r1: float - - :param r2: The second radius threshold used to define the inner surface of the channels. Default is 1.25. - :type r2: float - - :param min_depth: The minimum depth a cavity must have to be considered as a channel. Default is 10. - :type min_depth: float - - :param bottleneck: The minimum allowed bottleneck size (narrowest point) for the channels. Default is 1. - :type bottleneck: float - - :param sparsity: The sparsity parameter controls the sampling density when analyzing the molecular surface. - A higher value results in fewer sampling points. Default is 15. - :type sparsity: int - - :returns: A tuple containing two elements: - - `channels`: A list of detected channels, where each channel is an object containing information - about its path and geometry. - - `surface`: A list containing additional information for further visualization, including - the atomic coordinates, simplices defining the surface, and merged cavities. - :rtype: tuple (list, list) - - This function performs the following steps: - 1. **Selection and Filtering:** Selects non-hetero atoms from the protein, calculates van der Waals radii, and performs - 3D Delaunay triangulation and Voronoi tessellation on the coordinates. - 2. **State Management:** Creates and updates different stages of channel detection of the protein structure to filter out simplices - based on the given radii. - 3. **Surface Layer Calculation:** Determines the surface and second-layer simplices from the filtered results. - 4. **Cavity and Channel Detection:** Finds and filters cavities based on their depth and calculates channels using - Dijkstra's algorithm. - 5. **Visualization and Saving:** Generates meshes for the detected channels, filters them by bottleneck size, and either - saves the results to a PDB file or visualizes them based on the specified parameters. - - Example usage: - channels, surface = calcChannels(atoms, output_path="channels.pdb", separate=False, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15) - """ - - required = ['heapq', 'collections', 'scipy', 'pathlib', 'warnings'] - missing = [] - errorMsg = None - for name in required: - if not checkAndImport(name): - missing.append(name) - if errorMsg is None: - errorMsg = 'To run calcChannels, please install {0}'.format(missing[0]) - else: - errorMsg += ', ' + name - - if len(missing) > 0: - if len(missing) > 1: - errorMsg = ', '.join(errorMsg.split(', ')[:-1]) + ' and ' + errorMsg.split(', ')[-1] - raise ImportError(errorMsg) - - from scipy.spatial import Voronoi, Delaunay - from pathlib import Path - - calculator = ChannelCalculator(atoms, r1, r2, min_depth, bottleneck, sparsity) - - atoms = atoms.select('not hetero') - coords = atoms.getCoords() - - vdw_radii = calculator.get_vdw_radii(atoms.getElements()) - - dela = Delaunay(coords) - voro = Voronoi(coords) - - s_prt = State(dela.simplices, dela.neighbors, voro.vertices) - s_tmp = State(*s_prt.get_state()) - s_prv = State(None, None, None) - - while True: - s_prv.set_state(*s_tmp.get_state()) - #s_tmp.set_state(*calculator.delete_simplices3d(coords, *s_tmp.get_state(), vdw_radii, r1, True)) - state = s_tmp.get_state() - result = calculator.delete_simplices3d(coords, *state, vdw_radii, r1, True) - s_tmp.set_state(*result) - - if s_tmp == s_prv: - break - - s_srf = State(*s_tmp.get_state()) - s_inr = State(*calculator.delete_simplices3d(coords, *s_srf.get_state(), vdw_radii, r2, False)) - - l_first_layer_simp, l_second_layer_simp = calculator.surface_layer(s_srf.simp, s_inr.simp, s_srf.neigh) - s_clr = State(*calculator.delete_section(l_first_layer_simp, *s_inr.get_state())) - - c_cavities = calculator.find_groups(s_clr.neigh) - c_surface_cavities = calculator.get_surface_cavities(c_cavities, s_clr.simp, l_second_layer_simp, s_clr, coords, vdw_radii, sparsity) - - calculator.find_deepest_tetrahedra(c_surface_cavities, s_clr.neigh) - c_filtered_cavities = calculator.filter_cavities(c_surface_cavities, min_depth) - merged_cavities = calculator.merge_cavities(c_filtered_cavities, s_clr.simp) - - for cavity in c_filtered_cavities: - calculator.dijkstra(cavity, *s_clr.get_state(), coords, vdw_radii) - - calculator.filter_channels_by_bottleneck(c_filtered_cavities, bottleneck) - channels = [channel for cavity in c_filtered_cavities for channel in cavity.channels] - - no_of_channels = len(channels) - LOGGER.info("Detected " + str(no_of_channels) + " channels.") - - if output_path: - output_path = Path(output_path) - - if output_path.is_dir(): - output_path = output_path / "output.pdb" - elif not output_path.suffix == ".pdb": - output_path = output_path.with_suffix(".pdb") - - if not separate: - LOGGER.info("Saving results to " + str(output_path) + ".") - else: - LOGGER.info("Saving multiple results to directory " + str(output_path.parent) + ".") - calculator.save_channels_to_pdb(c_filtered_cavities, output_path, separate) - else: - LOGGER.info("No output path given.") - - return channels, [coords, s_srf.simp, merged_cavities, s_clr.simp] - -def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, separate=False, **kwargs): - """ - Compute channels for each frame in a given trajectory or multi-model PDB file. - - This function calculates the channels for each frame in a trajectory or for each model - in a multi-model PDB file. The `kwargs` can include parameters necessary for channel calculation. - If the `separate` parameter is set to True, each detected channel will be saved in a separate PDB file. - - :param atoms: Atomic data or object containing atomic coordinates and methods for accessing them. - :type atoms: object - - :param trajectory: Trajectory object containing multiple frames or a multi-model PDB file. - :type trajectory: Atomic or Ensemble object - - :param output_path: Optional path to save the resulting channels and associated data in PDB format. - If a directory is specified, each frame/model will have its results saved in separate files. - If None, results are not saved. Default is None. - :type output_path: str or None - - :param separate: If True, each detected channel is saved to a separate PDB file for each frame/model. - If False, all channels for each frame/model are saved in a single file. Default is False. - :type separate: bool - - :param kwargs: Additional parameters required for channel calculation. This can include parameters such as - radius values, minimum depth, bottleneck values, etc. - :type kwargs: dict - - :returns: List of channels and surfaces computed for each frame or model. Each entry in the list corresponds - to a specific frame or model. - :rtype: list of lists - - Example usage: - channels_all, surfaces_all = calcChannelsMultipleFrames(atoms, trajectory=traj, output_path="channels.pdb", separate=False, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15) - """ - - if not checkAndImport('pathlib'): - errorMsg = 'To run showChannels, please install open3d.' - raise ImportError(errorMsg) - - from pathlib import Path - - try: - coords = getCoords(atoms) - except AttributeError: - try: - checkCoords(coords) - except TypeError: - raise TypeError('coords must be an object with `getCoords` method') - - channels_all = [] - surfaces_all = [] - start_frame = kwargs.pop('start_frame', 0) - stop_frame = kwargs.pop('stop_frame', -1) - - if output_path: - output_path = Path(output_path) - if output_path.suffix == ".pdb": - output_path = output_path.with_suffix('') - - if trajectory is not None: - if isinstance(trajectory, Atomic): - trajectory = Ensemble(trajectory) - - nfi = trajectory._nfi - trajectory.reset() - - if stop_frame == -1: - traj = trajectory[start_frame:] - else: - traj = trajectory[start_frame:stop_frame+1] - - atoms_copy = atoms.copy() - for j0, frame0 in enumerate(traj, start=start_frame): - LOGGER.info('Frame: {0}'.format(j0)) - atoms_copy.setCoords(frame0.getCoords()) - if output_path: - channels, surfaces = calcChannels(atoms_copy, str(output_path) + "{0}.pdb".format(j0), separate, **kwargs) - else: - channels, surfaces = calcChannels(atoms_copy, **kwargs) - channels_all.append(channels) - surfaces_all.append(surfaces) - trajectory._nfi = nfi - - else: - if atoms.numCoordsets() > 1: - for i in range(len(atoms.getCoordsets()[start_frame:stop_frame])): - LOGGER.info('Model: {0}'.format(i+start_frame)) - atoms.setACSIndex(i+start_frame) - if output_path: - channels, surfaces = calcChannels(atoms, str(output_path) + "{0}.pdb".format(i+start_frame), separate, **kwargs) - else: - channels, surfaces = calcChannels(atoms, **kwargs) - channels_all.append(channels) - surfaces_all.append(surfaces) - else: - LOGGER.info('Include trajectory or use multi-model PDB file.') - - return channels_all, surfaces_all - -def getChannelParameters(channels): - """ - Extracts and returns the lengths, bottlenecks, and volumes of each channel in a given list of channels. - - This function iterates through a list of channel objects, extracting the length, bottleneck, - and volume of each channel. These values are collected into separate lists, which are returned - as a tuple for further use. - - :arg channels: A list of channel objects, where each channel has attributes `length`, `bottleneck`, - and `volume`. These attributes represent the length of the channel, the minimum radius - (bottleneck) along its path, and the total volume of the channel, respectively. - :type channels: list - - :returns: Three lists containing the lengths, bottlenecks, and volumes of the channels. - :rtype: tuple (list, list, list) - - Example usage: - lengths, bottlenecks, volumes = getChannelParameters(channels) - """ - - lengths = [] - bottlenecks = [] - volumes = [] - for channel in channels: - lengths.append(channel.length) - bottlenecks.append(channel.bottleneck) - volumes.append(channel.volume) - - return lengths, bottlenecks, volumes - -def getChannelAtoms(channels, protein=None, num_samples=5): - """ - Generates an AtomGroup object representing the atoms along the paths of the given channels - and optionally combines them with an existing protein structure. - - This function takes a list of channel objects and generates atomic representations of the - channels based on their centerline splines and radius splines. The function samples points - along each channel's centerline and assigns atom positions at these points with corresponding - radii, creating a list of PDB-formatted lines. These lines are then converted into an AtomGroup - object using the ProDy library. If a protein structure is provided, it is combined with the - generated channel atoms by merging their respective PDB streams. - - :param channels: A list of channel objects. Each channel has a method `get_splines()` that - returns the centerline spline and radius spline of the channel. - :type channels: list - - :param protein: An optional AtomGroup object representing a protein structure. If provided, - it will be combined with the generated channel atoms. - :type protein: prody.atomic.AtomGroup or None - - :param num_samples: The number of atom samples to generate along each segment of the channel. - More samples result in a finer representation of the channel. Default is 5. - :type num_samples: int - - :returns: An AtomGroup object representing the combined atoms of the channels and the protein, - if a protein is provided. - :rtype: prody.atomic.AtomGroup - - Example usage: - atomic_structure = getChannelAtoms(channels, protein) - """ - import io - from prody import parsePDBStream, writePDBStream - - def convert_lines_to_atomic(atom_lines): - pdb_text = "\n".join(atom_lines) - pdb_stream = io.StringIO(pdb_text) - structure = parsePDBStream(pdb_stream) - return structure - - atom_index = 1 - pdb_lines = [] - - if not isinstance(channels, list): - channels = [channels] - - for channel in channels: - centerline_spline, radius_spline = channel.get_splines() - samples = len(channel.tetrahedra) * num_samples - t = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], samples) - centers = centerline_spline(t) - radii = radius_spline(t) - - for i, (x, y, z, radius) in enumerate(zip(centers[:, 0], centers[:, 1], centers[:, 2], radii), start=atom_index): - pdb_lines.append("ATOM %5d H FIL T 1 %8.3f%8.3f%8.3f 1.00 %6.2f\n" % (i, x, y, z, radius)) - - if protein is not None: - protein_stream = io.StringIO() - writePDBStream(protein_stream, protein) - - protein_stream.seek(0) - - protein_lines = protein_stream.readlines() - if protein_lines[-1].strip() == 'END': - protein_lines = protein_lines[:-1] - - combined_pdb_text = "".join(protein_lines) + "\n".join(pdb_lines) + "\nEND\n" - combined_stream = io.StringIO(combined_pdb_text) - combined_structure = parsePDBStream(combined_stream) - - return combined_structure - - channels_atomic = convert_lines_to_atomic(pdb_lines) - return channels_atomic - -class Channel: - def __init__(self, tetrahedra, centerline_spline, radius_spline, length, bottleneck, volume): - self.tetrahedra = tetrahedra - self.centerline_spline = centerline_spline - self.radius_spline = radius_spline - self.length = length - self.bottleneck = bottleneck - self.volume = volume - - def get_splines(self): - return self.centerline_spline, self.radius_spline - -class State: - def __init__(self, simplices, neighbors, vertices): - self.simp = simplices - self.neigh = neighbors - self.verti = vertices - - def __eq__(self, other): - if not isinstance(other, State): - return False - return (np.array_equal(self.simp, other.simp) and - np.array_equal(self.neigh, other.neigh) and - np.array_equal(self.verti, other.verti)) - - def set_state(self, simplices, neighbors, vertices): - self.simp = simplices - self.neigh = neighbors - self.verti = vertices - - def get_state(self): - return self.simp, self.neigh, self.verti - -class Cavity: - def __init__(self, tetrahedra, is_connected_to_surface): - self.tetrahedra = tetrahedra - self.is_connected_to_surface = is_connected_to_surface - self.starting_tetrahedron = None - self.channels = [] - self.depth = 0 - - def make_surface(self): - self.is_connected_to_surface = True - - def set_exit_tetrahedra(self, exit_tetrahedra, end_tetrahedra): - self.exit_tetrahedra = exit_tetrahedra - self.end_tetrahedra = end_tetrahedra - - def set_starting_tetrahedron(self, tetrahedron): - self.starting_tetrahedron = tetrahedron - - def set_depth(self, depth): - self.depth = depth - - def add_channel(self, channel): - self.channels.append(channel) - -class ChannelCalculator: - def __init__(self, atoms, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15): - self.atoms = atoms - self.r1 = r1 - self.r2 = r2 - self.min_depth = min_depth - self.bottleneck = bottleneck - self.sparsity = sparsity - - def sphere_fit(self, vertices, tetrahedron, vertice, vdw_radii, r): - center = vertice - d_sum = sum(np.linalg.norm(center - vertices[atom]) for atom in tetrahedron) - r_sum = sum(r + vdw_radii[atom] for atom in tetrahedron) - - return d_sum >= r_sum - - def delete_simplices3d(self, points, simplices, neighbors, vertices, vdw_radii, r, surface): - simp, neigh, verti, deleted = [], [], [], [] - - for i, tetrahedron in enumerate(simplices): - should_delete = (-1 in neighbors[i] and self.sphere_fit(points, tetrahedron, vertices[i], vdw_radii, r)) if surface else not self.sphere_fit(points, tetrahedron, vertices[i], vdw_radii, r) - - if should_delete: - deleted.append(i) - else: - simp.append(simplices[i]) - neigh.append(neighbors[i]) - verti.append(vertices[i]) - - simp = np.array(simp) - neigh = np.array(neigh) - verti = np.array(verti) - deleted = np.array(deleted) - - mask = np.isin(neigh, deleted) - neigh[mask] = -1 - - for i in reversed(deleted): - mask = (neigh > i) & (neigh != -1) - neigh[mask] -= 1 - - return simp, neigh, verti - - def delete_section(self, simplices_subset, simplices, neighbors, vertices, reverse=False): - simp, neigh, verti, deleted = [], [], [], [] - - for i, tetrahedron in enumerate(simplices): - match = any((simplices_subset == tetrahedron).all(axis=1)) - if reverse: - if match: - simp.append(tetrahedron) - neigh.append(neighbors[i]) - verti.append(vertices[i]) - else: - deleted.append(i) - else: - if match: - deleted.append(i) - else: - simp.append(tetrahedron) - neigh.append(neighbors[i]) - verti.append(vertices[i]) - - simp, neigh, verti = map(np.array, [simp, neigh, verti]) - deleted = np.array(deleted) - - mask = np.isin(neigh, deleted) - neigh[mask] = -1 - - for i in reversed(deleted): - neigh = np.where((neigh > i) & (neigh != -1), neigh - 1, neigh) - - return simp, neigh, verti - - def get_vdw_radii(self, atoms): - vdw_radii_dict = { - 'H': 1.20, 'HE': 1.40, 'LI': 1.82, 'BE': 1.53, 'B': 1.92, 'C': 1.70, - 'N': 1.55, 'O': 1.52, 'F': 1.47, 'NE': 1.54, 'NA': 2.27, 'MG': 1.73, - 'AL': 1.84, 'SI': 2.10, 'P': 1.80, 'S': 1.80, 'CL': 1.75, 'AR': 1.88, - 'K': 2.75, 'CA': 2.31, 'SC': 2.11, 'NI': 1.63, 'CU': 1.40, 'ZN': 1.39, - 'GA': 1.87, 'GE': 2.11, 'AS': 1.85, 'SE': 1.90, 'BR': 1.85, 'KR': 2.02, - 'RB': 3.03, 'SR': 2.49, 'PD': 1.63, 'AG': 1.72, 'CD': 1.58, 'IN': 1.93, - 'SN': 2.17, 'SB': 2.06, 'TE': 2.06, 'I': 1.98, 'XE': 2.16, 'CS': 3.43, - 'BA': 2.68, 'PT': 1.75, 'AU': 1.66, 'HG': 1.55, 'TL': 1.96, 'PB': 2.02, - 'BI': 2.07, 'PO': 1.97, 'AT': 2.02, 'RN': 2.20, 'FR': 3.48, 'RA': 2.83, - 'U': 1.86, 'FE': 2.44 - } - - return np.array([vdw_radii_dict[atom] for atom in atoms]) - - def surface_layer(self, shape_simplices, filtered_simplices, shape_neighbors): - surface_simplices, surface_neighbors = [], [] - interior_simplices = [] - - for i in range(len(shape_simplices)): - if -1 in shape_neighbors[i]: - surface_simplices.append(shape_simplices[i]) - surface_neighbors.append(shape_neighbors[i]) - else: - interior_simplices.append(shape_simplices[i]) - - surface_simplices = np.array(surface_simplices) - surface_neighbors = np.array(surface_neighbors) - interior_simplices = np.array(interior_simplices) - - filtered_surface_simplices = surface_simplices[ - np.any(np.all(surface_simplices[:, None] == filtered_simplices, axis=2), axis=1) - ] - filtered_surface_neighbors = surface_neighbors[ - np.any(np.all(surface_simplices[:, None] == filtered_simplices, axis=2), axis=1) - ] - - filtered_surface_neighbors = np.unique(filtered_surface_neighbors) - filtered_surface_neighbors = filtered_surface_neighbors[filtered_surface_neighbors != 0] - - filtered_interior_simplices = interior_simplices[ - np.any(np.all(interior_simplices[:, None] == filtered_simplices, axis=2), axis=1) - ] - - surface_layer_neighbor_simplices = shape_simplices[filtered_surface_neighbors] - - second_layer = filtered_interior_simplices[ - np.any(np.all(filtered_interior_simplices[:, None] == surface_layer_neighbor_simplices, axis=2), axis=1) - ] - - return filtered_surface_simplices, second_layer - - - def find_groups(self, neigh, is_cavity=True): - x = neigh.shape[0] - visited = np.zeros(x, dtype=bool) - groups = [] - - def dfs(tetra_index): - stack = [tetra_index] - current_group = [] - while stack: - index = stack.pop() - if not visited[index]: - visited[index] = True - current_group.append(index) - stack.extend(neighbor for neighbor in neigh[index] if neighbor != -1 and not visited[neighbor]) - return np.array(current_group) - - for i in range(x): - if not visited[i]: - current_group = dfs(i) - if is_cavity: - groups.append(Cavity(current_group, False)) - else: - groups.append(current_group) - - return groups - - def get_surface_cavities(self, cavities, interior_simplices, second_layer, state, points, vdw_radii, sparsity): - surface_cavities = [] - - for cavity in cavities: - tetrahedra = cavity.tetrahedra - second_layer_mask = np.isin(interior_simplices[tetrahedra], second_layer).all(axis=1) - - if np.any(second_layer_mask): - cavity.make_surface() - exit_tetrahedra = tetrahedra[second_layer_mask] - end_tetrahedra = self.get_end_tetrahedra(exit_tetrahedra, state.verti, points, vdw_radii, state.simp, sparsity) - cavity.set_exit_tetrahedra(exit_tetrahedra, end_tetrahedra) - surface_cavities.append(cavity) - - return surface_cavities - - - def merge_cavities(self, cavities, simplices): - merged_tetrahedra = np.concatenate([cavity.tetrahedra for cavity in cavities]) - return simplices[merged_tetrahedra] - - def find_deepest_tetrahedra(self, cavities, neighbors): - from collections import deque - - for cavity in cavities: - exit_tetrahedra = cavity.exit_tetrahedra - visited = np.zeros(neighbors.shape[0], dtype=bool) - visited[exit_tetrahedra] = True - queue = deque([(tetra, 0) for tetra in exit_tetrahedra]) - max_depth = -1 - deepest_tetrahedron = None - - while queue: - current, depth = queue.popleft() - if depth > max_depth: - max_depth = depth - deepest_tetrahedron = current - - for neighbor in neighbors[current]: - if neighbor != -1 and not visited[neighbor] and neighbor in cavity.tetrahedra: - visited[neighbor] = True - queue.append((neighbor, depth + 1)) - - cavity.set_starting_tetrahedron(np.array([deepest_tetrahedron])) - cavity.set_depth(max_depth) - - def dijkstra(self, cavity, simplices, neighbors, vertices, points, vdw_radii): - import heapq - - def calculate_weight(current_tetra, neighbor_tetra): - current_vertex = vertices[current_tetra] - neighbor_vertex = vertices[neighbor_tetra] - l = np.linalg.norm(current_vertex - neighbor_vertex) - - d = np.inf - for atom, radius in zip(points[simplices[neighbor_tetra]], vdw_radii[simplices[neighbor_tetra]]): - dist = np.linalg.norm(neighbor_vertex - atom) - radius - if dist < d: - d = dist - - b = 1e-3 - return l / (d**2 + b) - - def dijkstra_algorithm(start, goal, tetrahedra_set): - pq = [(0, start)] - distances = {start: 0} - previous = {start: None} - - while pq: - current_distance, current_tetra = heapq.heappop(pq) - - if current_tetra == goal: - path = [] - while current_tetra is not None: - path.append(current_tetra) - current_tetra = previous[current_tetra] - return path[::-1] - - if current_distance > distances[current_tetra]: - continue - - for neighbor in neighbors[current_tetra]: - if neighbor in tetrahedra_set: - weight = calculate_weight(current_tetra, neighbor) - distance = current_distance + weight - if distance < distances.get(neighbor, float('inf')): - distances[neighbor] = distance - previous[neighbor] = current_tetra - heapq.heappush(pq, (distance, neighbor)) - - return None - - tetrahedra_set = set(cavity.tetrahedra) - for exit_tetrahedron in cavity.end_tetrahedra: - for starting_tetrahedron in cavity.starting_tetrahedron: - if exit_tetrahedron != starting_tetrahedron: - path = dijkstra_algorithm(starting_tetrahedron, exit_tetrahedron, tetrahedra_set) - if path: - path_tetrahedra = np.array(path) - channel = Channel(path_tetrahedra, *self.process_channel(path_tetrahedra, vertices, points, vdw_radii, simplices)) - cavity.add_channel(channel) - - def calculate_max_radius(self, vertice, points, vdw_radii, simp): - atom_positions = points[simp] - radii = vdw_radii[simp] - distances = np.linalg.norm(atom_positions - vertice, axis=1) - radii - return np.min(distances) - - def calculate_radius_spline(self, tetrahedra, voronoi_vertices, points, vdw_radii, simp): - vertices = voronoi_vertices[tetrahedra] - radii = np.array([self.calculate_max_radius(v, points, vdw_radii, s) for v, s in zip(vertices, simp[tetrahedra])]) - return radii, np.min(radii) - - def process_channel(self, tetrahedra, voronoi_vertices, points, vdw_radii, simp): - from scipy.interpolate import CubicSpline - - centers = voronoi_vertices[tetrahedra] - radii, bottleneck = self.calculate_radius_spline(tetrahedra, voronoi_vertices, points, vdw_radii, simp) - - t = np.arange(len(centers)) - centerline_spline = CubicSpline(t, centers, bc_type='natural') - radius_spline = CubicSpline(t, radii, bc_type='natural') - - length = self.calculate_channel_length(centerline_spline) - volume = self.calculate_channel_volume(centerline_spline, radius_spline) - - return centerline_spline, radius_spline, length, bottleneck, volume - - def find_biggest_tetrahedron(self, tetrahedra, voronoi_vertices, points, vdw_radii, simp): - radii = np.array([self.calculate_max_radius(voronoi_vertices[tetra], points, vdw_radii, simp[tetra]) for tetra in tetrahedra]) - max_radius_index = np.argmax(radii) - return tetrahedra[max_radius_index] - - def get_end_tetrahedra(self, tetrahedra, voronoi_vertices, points, vdw_radii, simp, sparsity): - end_tetrahedra = [] - current_tetrahedron = self.find_biggest_tetrahedron(tetrahedra, voronoi_vertices, points, vdw_radii, simp) - end_tetrahedra.append(current_tetrahedron) - end_tetrahedra_set = {current_tetrahedron} - - while True: - found_tetrahedra = [] - for tetra in tetrahedra: - if tetra in end_tetrahedra_set: - continue - - all_far_enough = True - for selected_tetra in end_tetrahedra: - distance = np.linalg.norm(voronoi_vertices[selected_tetra] - voronoi_vertices[tetra]) - if distance < sparsity: - all_far_enough = False - break - - if all_far_enough: - found_tetrahedra.append(tetra) - - if not found_tetrahedra: - break - - biggest_tetrahedron = self.find_biggest_tetrahedron(found_tetrahedra, voronoi_vertices, points, vdw_radii, simp) - end_tetrahedra.append(biggest_tetrahedron) - end_tetrahedra_set.add(biggest_tetrahedron) - - return np.array(end_tetrahedra) - - def filter_cavities(self, cavities, min_depth): - return [cavity for cavity in cavities if cavity.depth >= min_depth] - - def filter_channels_by_bottleneck(self, cavities, bottleneck): - for cavity in cavities: - cavity.channels = [channel for channel in cavity.channels if channel.bottleneck >= bottleneck] - - def save_channels_to_pdb(self, cavities, filename, separate=False, num_samples=5): - filename = str(filename) - - if separate: - channel_index = 0 - for cavity in cavities: - for channel in cavity.channels: - channel_filename = filename.replace('.pdb', '_channel{0}.pdb'.format(channel_index)) - - with open(channel_filename, 'w') as pdb_file: - atom_index = 1 - centerline_spline, radius_spline = channel.get_splines() - samples = len(channel.tetrahedra) * num_samples - t = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], samples) - centers = centerline_spline(t) - radii = radius_spline(t) - - pdb_lines = [] - for i, (x, y, z, radius) in enumerate(zip(centers[:, 0], centers[:, 1], centers[:, 2], radii), start=atom_index): - pdb_lines.append("ATOM %5d H FIL T 1 %8.3f%8.3f%8.3f 1.00 %6.2f\n" % (i, x, y, z, radius)) - - for i in range(1, samples): - pdb_lines.append("CONECT%5d%5d\n" % (i, i + 1)) - - pdb_file.writelines(pdb_lines) - - channel_index += 1 - else: - with open(filename, 'w') as pdb_file: - atom_index = 1 - for cavity in cavities: - for channel in cavity.channels: - centerline_spline, radius_spline = channel.get_splines() - samples = len(channel.tetrahedra) * num_samples - t = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], samples) - centers = centerline_spline(t) - radii = radius_spline(t) - - pdb_lines = [] - for i, (x, y, z, radius) in enumerate(zip(centers[:, 0], centers[:, 1], centers[:, 2], radii), start=atom_index): - pdb_lines.append("ATOM %5d H FIL T 1 %8.3f%8.3f%8.3f 1.00 %6.2f\n" % (i, x, y, z, radius)) - - for i in range(1, samples): - pdb_lines.append("CONECT%5d%5d\n" % (i, i + 1)) - - pdb_file.writelines(pdb_lines) - pdb_file.write("\n") - atom_index += samples - - - def calculate_channel_length(self, centerline_spline): - t_values = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], len(centerline_spline.x) * 10) - points = centerline_spline(t_values) - diffs = np.diff(points, axis=0) - lengths = np.linalg.norm(diffs, axis=1) - return np.sum(lengths) - - def calculate_channel_volume(self, centerline_spline, radius_spline): - import warnings - from scipy.integrate import quad, IntegrationWarning - - warnings.filterwarnings("ignore", category=IntegrationWarning) - - t_min = centerline_spline.x[0] - t_max = centerline_spline.x[-1] - - def differential_volume(t): - r = radius_spline(t) - area = np.pi * r**2 - dx_dt = centerline_spline(t, 1) - centerline_derivative = np.linalg.norm(dx_dt) - return area * centerline_derivative - - volume, error = quad(differential_volume, t_min, t_max) - - r_start = radius_spline(t_min) - r_end = radius_spline(t_max) - - hemisphere_volume_start = (2/3) * np.pi * r_start**3 - hemisphere_volume_end = (2/3) * np.pi * r_end**3 - - total_volume = volume + hemisphere_volume_start + hemisphere_volume_end - - return total_volume - - - - - From 943f34fdfa7eb3cb891fd0f0798f99050a48d8d8 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Tue, 17 Dec 2024 09:25:49 +0100 Subject: [PATCH 14/48] checkAndImport for Py3 and Py2.7 --- prody/proteins/channels.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index b0a49d250..1c311b14e 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -47,10 +47,18 @@ def checkAndImport(package_name): if not isinstance(package_name, str): raise TypeError('package_name should be a string') - import importlib.util - if importlib.util.find_spec(package_name) is None: - LOGGER.warn("Package " + str(package_name) + " is not installed. Please install it to use this function.") - return False + if PY3K: + import importlib.util + if importlib.util.find_spec(package_name) is None: + LOGGER.warn("Package " + str(package_name) + " is not installed. Please install it to use this function.") + return False + else: + try: + __import__(package_name) + except ImportError: + LOGGER.warn("Package " + str(package_name) + " is not installed. Please install it to use this function.") + return False + return True def getVmdModel(vmd_path, atoms): From e1db890dc023f64002225051befe0e5768290089 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Tue, 17 Dec 2024 10:06:33 +0100 Subject: [PATCH 15/48] getVmdModel compatibility with Python 2.7 --- prody/proteins/channels.py | 57 ++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index 1c311b14e..719cec351 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -61,6 +61,7 @@ def checkAndImport(package_name): return True + def getVmdModel(vmd_path, atoms): """ Generates a 3D model of molecular structures using VMD and returns it as an Open3D TriangleMesh. @@ -88,7 +89,7 @@ def getVmdModel(vmd_path, atoms): Example usage: model = getVmdModel('/path/to/vmd', atoms) """ - + required = ['subprocess', 'pathlib', 'tempfile', 'open3d'] missing = [] errorMsg = None @@ -106,19 +107,27 @@ def getVmdModel(vmd_path, atoms): raise ImportError(errorMsg) import subprocess - from pathlib import Path import tempfile import open3d as o3d + import os + + if PY3K: + from pathlib import Path + else: + Path = lambda x: x with tempfile.NamedTemporaryFile(suffix=".pdb", delete=False) as temp_pdb: temp_pdb_path = Path(temp_pdb.name) writePDB(temp_pdb.name, atoms) - + with tempfile.NamedTemporaryFile(suffix=".tcl", delete=False) as temp_script: temp_script_path = Path(temp_script.name) - - output_path = temp_script_path.parent / "output.stl" - + + if PY3K: + output_path = temp_script_path.parent / "output.stl" + else: + output_path = os.path.join(os.path.dirname(temp_script.name), "output.stl") + vmd_script = """ set file_path [lindex $argv 0] set output_path [lindex $argv 1] @@ -136,34 +145,40 @@ def getVmdModel(vmd_path, atoms): exit """ - temp_script.write(vmd_script.encode('utf-8')) - + command = [vmd_path, '-e', str(temp_script_path), '-args', str(temp_pdb_path), str(output_path)] - + try: - subprocess.run(command, check=True) - except subprocess.CalledProcessError as e: - LOGGER.info("VMD exited with status " + str(e.returncode) + ".") + if PY3K: + subprocess.run(command, check=True) + else: + returncode = subprocess.call(command) + if returncode != 0: + LOGGER.info("VMD exited with status " + str(returncode) + ".") except Exception as e: LOGGER.warn("An unexpected error occurred: " + str(e)) finally: - temp_script_path.unlink(missing_ok=True) - temp_pdb_path.unlink(missing_ok=True) - if not output_path.exists() or output_path.stat().st_size == 0: + if os.path.exists(temp_script_path): + os.unlink(temp_script_path) + if os.path.exists(temp_pdb_path): + os.unlink(temp_pdb_path) + + if not os.path.exists(output_path) or os.stat(output_path).st_size == 0: raise ValueError("STL file was not created or is empty.") - + stl_mesh = o3d.io.read_triangle_mesh(str(output_path)) - + if stl_mesh.is_empty(): raise ValueError("Failed to read the STL file as a TriangleMesh.") - - if output_path.exists(): - output_path.unlink(missing_ok=True) - + + if os.path.exists(output_path): + os.unlink(output_path) + LOGGER.info("Model created successfully.") return stl_mesh + def showChannels(channels, model=None, surface=None): """ Visualizes the channels, and optionally, the molecular model and surface, using Open3D. From 6478534a45b2d4c00dfa8a25c0c1956ba84cb3de Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Wed, 18 Dec 2024 09:17:19 +0100 Subject: [PATCH 16/48] Python 2.7 in calcChannelsMultipleFrames & calcChannels --- prody/proteins/channels.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index 719cec351..c4e00e5d2 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -284,6 +284,7 @@ def create_mesh_from_spline(centerline_spline, radius_spline, n=5): else: LOGGER.info("Nothing to visualize.") + def showCavities(surface, show_surface=False): """ Visualizes the cavities within a molecular surface using Open3D. @@ -459,7 +460,11 @@ def calcChannels(atoms, output_path=None, separate=False, r1=3, r2=1.25, min_dep raise ImportError(errorMsg) from scipy.spatial import Voronoi, Delaunay - from pathlib import Path + + if PY3K: + from pathlib import Path + else: + from pathlib2 import Path calculator = ChannelCalculator(atoms, r1, r2, min_depth, bottleneck, sparsity) @@ -524,6 +529,7 @@ def calcChannels(atoms, output_path=None, separate=False, r1=3, r2=1.25, min_dep LOGGER.info("No output path given.") return channels, [coords, s_srf.simp, merged_cavities, s_clr.simp] + def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, separate=False, **kwargs): """ @@ -560,12 +566,19 @@ def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, separat channels_all, surfaces_all = calcChannelsMultipleFrames(atoms, trajectory=traj, output_path="channels.pdb", separate=False, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15) """ - if not checkAndImport('pathlib'): - errorMsg = 'To run showChannels, please install open3d.' - raise ImportError(errorMsg) - - from pathlib import Path - + if PY3K: + if not checkAndImport('pathlib'): + errorMsg = 'To run showChannels, please install open3d.' + raise ImportError(errorMsg) + + from pathlib import Path + else: + if not checkAndImport('pathlib2'): + errorMsg = 'To run showChannels, please install pathlib2 for Python 2.7.' + raise ImportError(errorMsg) + + from pathlib2 import Path + try: coords = getCoords(atoms) except AttributeError: From 56fcbf076cb24712050765088edabc3ef56bcf02 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Wed, 18 Dec 2024 09:38:42 +0100 Subject: [PATCH 17/48] PY3K for getChannelAtoms (CaviFinder) --- prody/proteins/channels.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index c4e00e5d2..eba7b73af 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -377,6 +377,7 @@ def showCavities(surface, show_surface=False): vis.run() vis.destroy_window() + def calcChannels(atoms, output_path=None, separate=False, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15): """ Computes and identifies channels within a molecular structure using Voronoi and Delaunay tessellations. @@ -637,6 +638,7 @@ def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, separat return channels_all, surfaces_all + def getChannelParameters(channels): """ Extracts and returns the lengths, bottlenecks, and volumes of each channel in a given list of channels. @@ -667,6 +669,7 @@ def getChannelParameters(channels): return lengths, bottlenecks, volumes + def getChannelAtoms(channels, protein=None, num_samples=5): """ Generates an AtomGroup object representing the atoms along the paths of the given channels @@ -698,7 +701,12 @@ def getChannelAtoms(channels, protein=None, num_samples=5): Example usage: atomic_structure = getChannelAtoms(channels, protein) """ - import io + + if PY3K: + import io + else: + import StringIO as io + from prody import parsePDBStream, writePDBStream def convert_lines_to_atomic(atom_lines): @@ -742,6 +750,7 @@ def convert_lines_to_atomic(atom_lines): channels_atomic = convert_lines_to_atomic(pdb_lines) return channels_atomic + class Channel: def __init__(self, tetrahedra, centerline_spline, radius_spline, length, bottleneck, volume): self.tetrahedra = tetrahedra From a912c87f282deb6c2d9f305dfc54b3782e42958c Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Wed, 18 Dec 2024 09:48:32 +0100 Subject: [PATCH 18/48] Channels docs edits --- prody/proteins/channels.py | 49 +++++++++++++++----------------------- 1 file changed, 19 insertions(+), 30 deletions(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index eba7b73af..0d73b1849 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -42,8 +42,8 @@ def checkAndImport(package_name): :arg import_command: optional command to import submodules or with an alias default **None** means use "import {0}".format(package_name) - :type import_command: None, str - """ + :type import_command: None, str """ + if not isinstance(package_name, str): raise TypeError('package_name should be a string') @@ -63,8 +63,7 @@ def checkAndImport(package_name): def getVmdModel(vmd_path, atoms): - """ - Generates a 3D model of molecular structures using VMD and returns it as an Open3D TriangleMesh. + """Generates a 3D model of molecular structures using VMD and returns it as an Open3D TriangleMesh. This function creates a temporary PDB file from the provided atomic data and uses VMD (Visual Molecular Dynamics) to render this data into an STL file, which is then loaded into Open3D as a TriangleMesh. The function handles @@ -87,8 +86,7 @@ def getVmdModel(vmd_path, atoms): :rtype: open3d.geometry.TriangleMesh Example usage: - model = getVmdModel('/path/to/vmd', atoms) - """ + model = getVmdModel('/path/to/vmd', atoms) """ required = ['subprocess', 'pathlib', 'tempfile', 'open3d'] missing = [] @@ -180,8 +178,7 @@ def getVmdModel(vmd_path, atoms): def showChannels(channels, model=None, surface=None): - """ - Visualizes the channels, and optionally, the molecular model and surface, using Open3D. + """Visualizes the channels, and optionally, the molecular model and surface, using Open3D. This function renders a 3D visualization of molecular channels based on their spline representations. It can also display a molecular model (e.g., the protein structure) and a surface (e.g., cavity surface) @@ -207,8 +204,7 @@ def showChannels(channels, model=None, surface=None): :returns: None. This function only renders the visualization. Example usage: - showChannels(channels, model=protein_mesh, surface=surface_data) - """ + showChannels(channels, model=protein_mesh, surface=surface_data) """ if not checkAndImport('open3d'): errorMsg = 'To run showChannels, please install open3d.' @@ -286,8 +282,7 @@ def create_mesh_from_spline(centerline_spline, radius_spline, n=5): def showCavities(surface, show_surface=False): - """ - Visualizes the cavities within a molecular surface using Open3D. + """Visualizes the cavities within a molecular surface using Open3D. This function displays a 3D visualization of cavities detected in a molecular structure. It uses the Open3D library to render the cavities as a triangle mesh. Optionally, it can also @@ -310,8 +305,7 @@ def showCavities(surface, show_surface=False): :returns: None Example usage: - showCavities(surface_data, show_surface=True) - """ + showCavities(surface_data, show_surface=True) """ if not checkAndImport('open3d'): errorMsg = 'To run showChannels, please install open3d.' @@ -379,8 +373,7 @@ def showCavities(surface, show_surface=False): def calcChannels(atoms, output_path=None, separate=False, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15): - """ - Computes and identifies channels within a molecular structure using Voronoi and Delaunay tessellations. + """Computes and identifies channels within a molecular structure using Voronoi and Delaunay tessellations. This function analyzes the provided atomic structure to detect channels, which are voids or pathways within the molecular structure. It employs Voronoi and Delaunay tessellations to identify these regions, @@ -441,8 +434,7 @@ def calcChannels(atoms, output_path=None, separate=False, r1=3, r2=1.25, min_dep saves the results to a PDB file or visualizes them based on the specified parameters. Example usage: - channels, surface = calcChannels(atoms, output_path="channels.pdb", separate=False, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15) - """ + channels, surface = calcChannels(atoms, output_path="channels.pdb", separate=False, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15) """ required = ['heapq', 'collections', 'scipy', 'pathlib', 'warnings'] missing = [] @@ -533,8 +525,7 @@ def calcChannels(atoms, output_path=None, separate=False, r1=3, r2=1.25, min_dep def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, separate=False, **kwargs): - """ - Compute channels for each frame in a given trajectory or multi-model PDB file. + """Compute channels for each frame in a given trajectory or multi-model PDB file. This function calculates the channels for each frame in a trajectory or for each model in a multi-model PDB file. The `kwargs` can include parameters necessary for channel calculation. @@ -564,8 +555,8 @@ def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, separat :rtype: list of lists Example usage: - channels_all, surfaces_all = calcChannelsMultipleFrames(atoms, trajectory=traj, output_path="channels.pdb", separate=False, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15) - """ + channels_all, surfaces_all = calcChannelsMultipleFrames(atoms, trajectory=traj, output_path="channels.pdb", + separate=False, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15) """ if PY3K: if not checkAndImport('pathlib'): @@ -640,8 +631,7 @@ def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, separat def getChannelParameters(channels): - """ - Extracts and returns the lengths, bottlenecks, and volumes of each channel in a given list of channels. + """Extracts and returns the lengths, bottlenecks, and volumes of each channel in a given list of channels. This function iterates through a list of channel objects, extracting the length, bottleneck, and volume of each channel. These values are collected into separate lists, which are returned @@ -656,8 +646,7 @@ def getChannelParameters(channels): :rtype: tuple (list, list, list) Example usage: - lengths, bottlenecks, volumes = getChannelParameters(channels) - """ + lengths, bottlenecks, volumes = getChannelParameters(channels) """ lengths = [] bottlenecks = [] @@ -671,8 +660,7 @@ def getChannelParameters(channels): def getChannelAtoms(channels, protein=None, num_samples=5): - """ - Generates an AtomGroup object representing the atoms along the paths of the given channels + """Generates an AtomGroup object representing the atoms along the paths of the given channels and optionally combines them with an existing protein structure. This function takes a list of channel objects and generates atomic representations of the @@ -699,8 +687,7 @@ def getChannelAtoms(channels, protein=None, num_samples=5): :rtype: prody.atomic.AtomGroup Example usage: - atomic_structure = getChannelAtoms(channels, protein) - """ + atomic_structure = getChannelAtoms(channels, protein) """ if PY3K: import io @@ -763,6 +750,7 @@ def __init__(self, tetrahedra, centerline_spline, radius_spline, length, bottlen def get_splines(self): return self.centerline_spline, self.radius_spline + class State: def __init__(self, simplices, neighbors, vertices): self.simp = simplices @@ -807,6 +795,7 @@ def set_depth(self, depth): def add_channel(self, channel): self.channels.append(channel) + class ChannelCalculator: def __init__(self, atoms, r1=3, r2=1.25, min_depth=10, bottleneck=1, sparsity=15): From be68ec6d8c09d1d234502f963fbb9bc24fcd35a4 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Wed, 18 Dec 2024 10:34:24 +0100 Subject: [PATCH 19/48] Additional PY3K in calcChannels --- prody/proteins/channels.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index 0d73b1849..3fffc9d82 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -470,16 +470,23 @@ def calcChannels(atoms, output_path=None, separate=False, r1=3, r2=1.25, min_dep voro = Voronoi(coords) s_prt = State(dela.simplices, dela.neighbors, voro.vertices) - s_tmp = State(*s_prt.get_state()) - s_prv = State(None, None, None) + + if PY3K: + s_tmp = State(*s_prt.get_state()) + s_prv = State(None, None, None) + else: + s_tmp = apply(State, s_prt.get_state()) + s_prv = State(None, None, None) while True: s_prv.set_state(*s_tmp.get_state()) - #s_tmp.set_state(*calculator.delete_simplices3d(coords, *s_tmp.get_state(), vdw_radii, r1, True)) - state = s_tmp.get_state() - result = calculator.delete_simplices3d(coords, *state, vdw_radii, r1, True) - s_tmp.set_state(*result) + if PY3K: + s_tmp.set_state(*calculator.delete_simplices3d(coords, *s_tmp.get_state(), vdw_radii, r1, True)) + else: + tmp_state = calculator.delete_simplices3d(coords, *s_tmp.get_state(), vdw_radii, r1, True) + s_tmp.set_state(*tmp_state) + if s_tmp == s_prv: break From a0c7cfec55179e911a8edd96775b804b1dc0c73f Mon Sep 17 00:00:00 2001 From: James Krieger Date: Wed, 18 Dec 2024 11:27:45 +0000 Subject: [PATCH 20/48] py2 safe stars --- prody/proteins/channels.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index 3fffc9d82..c343710bd 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -482,17 +482,17 @@ def calcChannels(atoms, output_path=None, separate=False, r1=3, r2=1.25, min_dep s_prv.set_state(*s_tmp.get_state()) if PY3K: - s_tmp.set_state(*calculator.delete_simplices3d(coords, *s_tmp.get_state(), vdw_radii, r1, True)) + s_tmp.set_state(*calculator.delete_simplices3d(coords, *(s_tmp.get_state() + [vdw_radii, r1, True]))) else: - tmp_state = calculator.delete_simplices3d(coords, *s_tmp.get_state(), vdw_radii, r1, True) + tmp_state = calculator.delete_simplices3d(coords, *(s_tmp.get_state() + [vdw_radii, r1, True])) s_tmp.set_state(*tmp_state) if s_tmp == s_prv: break s_srf = State(*s_tmp.get_state()) - s_inr = State(*calculator.delete_simplices3d(coords, *s_srf.get_state(), vdw_radii, r2, False)) - + s_inr = State(*calculator.delete_simplices3d(coords, *(s_srf.get_state() + [vdw_radii, r2, False]))) + l_first_layer_simp, l_second_layer_simp = calculator.surface_layer(s_srf.simp, s_inr.simp, s_srf.neigh) s_clr = State(*calculator.delete_section(l_first_layer_simp, *s_inr.get_state())) @@ -504,7 +504,7 @@ def calcChannels(atoms, output_path=None, separate=False, r1=3, r2=1.25, min_dep merged_cavities = calculator.merge_cavities(c_filtered_cavities, s_clr.simp) for cavity in c_filtered_cavities: - calculator.dijkstra(cavity, *s_clr.get_state(), coords, vdw_radii) + calculator.dijkstra(cavity, *(s_clr.get_state() + [coords, vdw_radii])) calculator.filter_channels_by_bottleneck(c_filtered_cavities, bottleneck) channels = [channel for cavity in c_filtered_cavities for channel in cavity.channels] From 0f549af570718a9b405e87943e69ec8f5fccd902 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Wed, 18 Dec 2024 13:20:12 +0100 Subject: [PATCH 21/48] CaviFinder func in __init__ --- prody/proteins/__init__.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/prody/proteins/__init__.py b/prody/proteins/__init__.py index 93db9acb3..bfd0fec03 100644 --- a/prody/proteins/__init__.py +++ b/prody/proteins/__init__.py @@ -132,6 +132,20 @@ * :class:`.Interactions` - store inteactions for a single PDB structure * :class:`.InteractionsDCD` - store interactions for a trajectory +Detect channels, tunnels and pores with CaviFinder +==================== + +Use the following to analyze cavities within protein structures +in single PDB file or in trajectory: + + * :func:`.getVmdModel` - generates a 3D model of proten, using VMD, which is then use for visualization + * :func:`.calcChannels` - computes and identifies channels + * :func:`.calcChannelsMultipleFrames` - compute channels for each frame in a given trajectory or PDB ensemble + * :func:`.getChannelParameters` - extracts and returns the lengths, bottlenecks, and volumes of each channel + * :func:`.getChannelAtoms` - generates an AtomGroup object representing the atoms along the paths of the given channels + * :func:`.showChannels` - visualizes the channels using Open3D + * :func:`.showCavities` - visualizes the cavities using Open3D + Compare/align chains ==================== From f780e31e7383ab1dd26373e7807b99154278c8e0 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Wed, 18 Dec 2024 14:06:04 +0100 Subject: [PATCH 22/48] CaviFinder - tuple fixes in calcChannels --- prody/proteins/channels.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index c343710bd..e55b6f418 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -482,7 +482,8 @@ def calcChannels(atoms, output_path=None, separate=False, r1=3, r2=1.25, min_dep s_prv.set_state(*s_tmp.get_state()) if PY3K: - s_tmp.set_state(*calculator.delete_simplices3d(coords, *(s_tmp.get_state() + [vdw_radii, r1, True]))) + #s_tmp.set_state(*calculator.delete_simplices3d(coords, *(s_tmp.get_state() + [vdw_radii, r1, True]))) + s_tmp.set_state(*calculator.delete_simplices3d(coords, *(s_tmp.get_state() + tuple([vdw_radii, r1, True])))) else: tmp_state = calculator.delete_simplices3d(coords, *(s_tmp.get_state() + [vdw_radii, r1, True])) s_tmp.set_state(*tmp_state) @@ -491,7 +492,8 @@ def calcChannels(atoms, output_path=None, separate=False, r1=3, r2=1.25, min_dep break s_srf = State(*s_tmp.get_state()) - s_inr = State(*calculator.delete_simplices3d(coords, *(s_srf.get_state() + [vdw_radii, r2, False]))) + #s_inr = State(*calculator.delete_simplices3d(coords, *(s_srf.get_state() + [vdw_radii, r2, False]))) + s_inr = State(*calculator.delete_simplices3d(coords, *(s_srf.get_state() + tuple([vdw_radii, r2, False])))) l_first_layer_simp, l_second_layer_simp = calculator.surface_layer(s_srf.simp, s_inr.simp, s_srf.neigh) s_clr = State(*calculator.delete_section(l_first_layer_simp, *s_inr.get_state())) @@ -504,7 +506,8 @@ def calcChannels(atoms, output_path=None, separate=False, r1=3, r2=1.25, min_dep merged_cavities = calculator.merge_cavities(c_filtered_cavities, s_clr.simp) for cavity in c_filtered_cavities: - calculator.dijkstra(cavity, *(s_clr.get_state() + [coords, vdw_radii])) + #calculator.dijkstra(cavity, *(s_clr.get_state() + [coords, vdw_radii])) + calculator.dijkstra(cavity, *(s_clr.get_state() + tuple([coords, vdw_radii]))) calculator.filter_channels_by_bottleneck(c_filtered_cavities, bottleneck) channels = [channel for cavity in c_filtered_cavities for channel in cavity.channels] From 49979a1776e2014a14dfba3da4dbe38725a22673 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Sun, 19 Jan 2025 22:16:34 +0100 Subject: [PATCH 23/48] selectChannelBySelection() --- prody/proteins/channels.py | 48 +++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index e55b6f418..25ebc589b 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -29,7 +29,8 @@ __all__ = ['getVmdModel', 'calcChannels', 'calcChannelsMultipleFrames', - 'getChannelParameters', 'getChannelAtoms', 'showChannels', 'showCavities'] + 'getChannelParameters', 'getChannelAtoms', 'showChannels', 'showCavities', + 'selectChannelBySelection'] @@ -746,7 +747,52 @@ def convert_lines_to_atomic(atom_lines): channels_atomic = convert_lines_to_atomic(pdb_lines) return channels_atomic + +def selectChannelBySelection(atoms, residue_sele, **kwargs): + """Select PDB files with channels that are having FIL residues within certain distance (distA) from + selected residue (temporarly one residue). + If not all files should be included use pdb_files to provide the new list. + For example: + pdb_files = [file for file in os.listdir('.') if file.startswith('7lafA_') and file.endswith('.pdb')] + pdb_files = [file for file in os.listdir('.') if '5kbd' in file and file.endswith('.pdb')] + + :arg atoms: an Atomic object from which residues are selected + :type atoms: :class:`.Atomic`, :class:`.LigandInteractionsTrajectory` + + :arg residue_sele: selection string + :type residue_sele: str + + :arg folder_name: The name of the folder to which PDBs will be extracted + :type folder_name: str + + :arg distA: non-zero value, maximal distance between donor and acceptor. + default is 5 + :type distA: int, float """ + + import os, shutil + import numpy as np + pdb_files = kwargs.pop('pdb_files', False) + distA = kwargs.pop('distA', 5) + folder_name = kwargs.pop('folder_name', 'selected_files') + + if pdb_files == False: + # take all PDBs from the current dir + pdb_files = [file for file in os.listdir('.') if file.endswith('.pdb')] + + residue_sele = atoms.select(residue_sele) + os.makedirs(folder_name, exist_ok=True) + + for i in pdb_files: + channel = parsePDB(i) + if 'FIL' in np.unique(channel.getResnames()): + sele_FIL = channel.select('same residue as exwithin '+str(distA)+' of center', center=residue_sele.getCoords()) + + if sele_FIL is not None: + shutil.copy(i, folder_name) + LOGGER.info('Filtered files are now in: ', folder_name) + else: + pass class Channel: def __init__(self, tetrahedra, centerline_spline, radius_spline, length, bottleneck, volume): From b73ef26e41aeec1a77a23c03cf9b0454a5437c4a Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Sun, 19 Jan 2025 22:40:34 +0100 Subject: [PATCH 24/48] selectChannelBySelection, checks added --- prody/proteins/channels.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index 25ebc589b..de47a9f30 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -755,11 +755,12 @@ def selectChannelBySelection(atoms, residue_sele, **kwargs): For example: pdb_files = [file for file in os.listdir('.') if file.startswith('7lafA_') and file.endswith('.pdb')] pdb_files = [file for file in os.listdir('.') if '5kbd' in file and file.endswith('.pdb')] - + :arg atoms: an Atomic object from which residues are selected :type atoms: :class:`.Atomic`, :class:`.LigandInteractionsTrajectory` :arg residue_sele: selection string + for example: 'resid 377 and chain A', 'resid 10 to 20' :type residue_sele: str :arg folder_name: The name of the folder to which PDBs will be extracted @@ -768,6 +769,16 @@ def selectChannelBySelection(atoms, residue_sele, **kwargs): :arg distA: non-zero value, maximal distance between donor and acceptor. default is 5 :type distA: int, float """ + + try: + coords = (atoms._getCoords() if hasattr(atoms, '_getCoords') else + atoms.getCoords()) + except AttributeError: + try: + checkCoords(coords) + except TypeError: + raise TypeError('coords must be an object ' + 'with `getCoords` method') import os, shutil import numpy as np @@ -790,7 +801,7 @@ def selectChannelBySelection(atoms, residue_sele, **kwargs): if sele_FIL is not None: shutil.copy(i, folder_name) - LOGGER.info('Filtered files are now in: ', folder_name) + LOGGER.info("Filtered files are now in: {}".format(folder_name)) else: pass From 7f36dd13c48573eda2bcc23c79dbbc2c5a89d63e Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Sun, 19 Jan 2025 22:48:30 +0100 Subject: [PATCH 25/48] selectChannelBySelection - Py2.7 fix --- prody/proteins/channels.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index de47a9f30..cfff650c1 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -792,7 +792,8 @@ def selectChannelBySelection(atoms, residue_sele, **kwargs): pdb_files = [file for file in os.listdir('.') if file.endswith('.pdb')] residue_sele = atoms.select(residue_sele) - os.makedirs(folder_name, exist_ok=True) + if not os.path.exists(folder_name): + os.makedirs(folder_name) for i in pdb_files: channel = parsePDB(i) From 71503d5a134b5fb9c8dd38e5771fe10813d24d83 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Thu, 23 Jan 2025 10:42:27 +0100 Subject: [PATCH 26/48] selectChannelBySelection - residues_file=True, param_file=True options --- prody/proteins/channels.py | 54 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index cfff650c1..5d5c744d5 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -768,7 +768,15 @@ def selectChannelBySelection(atoms, residue_sele, **kwargs): :arg distA: non-zero value, maximal distance between donor and acceptor. default is 5 - :type distA: int, float """ + :type distA: int, float + + :arg residues_file: File with residues forming the channel created by getChannelResidues() + default is False + :type residues_file: bool + + :arg param_file: File with residues forming the channel created by getChannelParameters() + default is False + :type param_file: bool """ try: coords = (atoms._getCoords() if hasattr(atoms, '_getCoords') else @@ -786,6 +794,9 @@ def selectChannelBySelection(atoms, residue_sele, **kwargs): pdb_files = kwargs.pop('pdb_files', False) distA = kwargs.pop('distA', 5) folder_name = kwargs.pop('folder_name', 'selected_files') + residues_file = kwargs.pop('residues_file', False) + param_file = kwargs.pop('param_file', False) + copied_files_list = [] if pdb_files == False: # take all PDBs from the current dir @@ -802,9 +813,48 @@ def selectChannelBySelection(atoms, residue_sele, **kwargs): if sele_FIL is not None: shutil.copy(i, folder_name) - LOGGER.info("Filtered files are now in: {}".format(folder_name)) + LOGGER.info('Filtered files are now in: {0}'.format(folder_name)) + copied_files_list.append(i) else: pass + + # Extract paramaters and/or residues with channel selection + if residues_file == True: + selected_residues = [] + for file in copied_files_list: + try: + PDB_id, channel_name = file[:-4].split('_') + f = open(PDB_id+'_Residues_All_channels.txt', 'r').readlines() + for line in f: + if line.startswith(channel_name+':'): + new_line = file.split('_')[0]+'_'+line + selected_residues.append(new_line) + except: + LOGGER.info('File {0} was not analyzed due to the lack of file or multiple channel file.'.format(file)) + pass + + with open('Selected_channel_residues.txt', 'w') as f_out: + f_out.writelines(selected_residues) + + if param_file == True: + selected_param = [] + for file in copied_files_list: + try: + PDB_id, channel_name = file[:-4].split('_') + f = open(PDB_id+'_Parameters_All_channels.txt', 'r').readlines() + for line in f: + if line.startswith(file.split('_')[0]+'_'+channel_name+':'): + selected_param.append(line) + except: + LOGGER.info('File {0} was not analyzed due to the lack of file or multiple channel file.'.format(file)) + pass + + with open('Selected_channel_parameters.txt', 'w') as f_out: + f_out.writelines(selected_param) + + LOGGER.info('Selected files: ') + LOGGER.info(' '.join(copied_files_list)) + class Channel: def __init__(self, tetrahedra, centerline_spline, radius_spline, length, bottleneck, volume): From e7fc1c4fe6ba12c91feb2612af95164a3a218fee Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Thu, 23 Jan 2025 11:57:28 +0100 Subject: [PATCH 27/48] getChannelParameters - param_file_name option --- prody/proteins/channels.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index 5d5c744d5..859bf8b30 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -641,7 +641,7 @@ def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, separat return channels_all, surfaces_all -def getChannelParameters(channels): +def getChannelParameters(channels, **kwargs): """Extracts and returns the lengths, bottlenecks, and volumes of each channel in a given list of channels. This function iterates through a list of channel objects, extracting the length, bottleneck, @@ -653,6 +653,12 @@ def getChannelParameters(channels): (bottleneck) along its path, and the total volume of the channel, respectively. :type channels: list + :arg param_file_name: The files with parameters will be saved in a text file with the provided name. + Use one word which will be added to '_Parameters_All_channels.txt' sufix. + If further analysis will be performed with selectChannelBySelection() function, the preferable + param_file_name is PDB+chain for example: '1bbhA'. + :type param_file_name: str + :returns: Three lists containing the lengths, bottlenecks, and volumes of the channels. :rtype: tuple (list, list, list) @@ -662,11 +668,17 @@ def getChannelParameters(channels): lengths = [] bottlenecks = [] volumes = [] - for channel in channels: + param_file_name = kwargs.pop('param_file_name', None) + + for nr_ch, channel in enumerate(channels): lengths.append(channel.length) bottlenecks.append(channel.bottleneck) volumes.append(channel.volume) + if param_file_name is not None: + with open(param_file_name+'_Parameters_All_channels.txt', "a") as f_par: + f_par.write(("{0}_channel{1}: {2} {3} {4}\n".format(param_file_name, nr_ch, channel.length, channel.bottleneck, channel.volume))) + return lengths, bottlenecks, volumes From 263971bf6fe73931303ca8777fea8209b54794cf Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Thu, 23 Jan 2025 13:10:29 +0100 Subject: [PATCH 28/48] getChannelResidueNames - new function in CaviTracker --- prody/proteins/channels.py | 79 +++++++++++++++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index 859bf8b30..648c6aba7 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -30,7 +30,7 @@ __all__ = ['getVmdModel', 'calcChannels', 'calcChannelsMultipleFrames', 'getChannelParameters', 'getChannelAtoms', 'showChannels', 'showCavities', - 'selectChannelBySelection'] + 'selectChannelBySelection', 'getChannelResidueNames'] @@ -760,6 +760,83 @@ def convert_lines_to_atomic(atom_lines): channels_atomic = convert_lines_to_atomic(pdb_lines) return channels_atomic + +def getChannelResidueNames(atoms, channels, **kwargs): + '''Provides the resnames and resid of residues that are forming the channel(s). Residues are extracted based on distA + which is the distance between FIL atoms (channel atoms) and protein residues. + Results could be save as txt file by providing the `residues_file_name` parameter. + + :arg atoms: an Atomic object from which residues are selected + :type atoms: :class:`.Atomic`, :class:`.LigandInteractionsTrajectory` + + :param channels: A list of channel objects. Each channel has a method `get_splines()` that + returns the centerline spline and radius spline of the channel. + :type channels: list + + :arg distA: Residues will be provided based on this value. + default is 4 [Ang] + :type distA: int, float + + :arg residues_file_name: The file with residues will be saved in a text file with the provided name. + Use one word which will be added to '_Residues_All_channels.txt' sufix. + If further analysis will be performed with selectChannelBySelection() function, the preferable + residues_file_name is PDB+chain for example: '1bbhA'. + :type residues_file_name: str ''' + + try: + coords = (atoms._getCoords() if hasattr(atoms, '_getCoords') else + atoms.getCoords()) + except AttributeError: + try: + checkCoords(coords) + except TypeError: + raise TypeError('coords must be an object ' + 'with `getCoords` method') + + distA = kwargs.pop('distA', 4) + residues_file_name = kwargs.pop('residues_file_name', None) + + if isinstance(channels, list): + # Multiple channels + selected_residues_ch = [] + + for i, channel in enumerate(channels): + atoms_protein = getChannelAtoms(channel, atoms) + residues = atoms_protein.select('same residue as exwithin '+str(distA)+' of resname FIL') + + if residues is not None: + resnames = residues.select('name CA').getResnames() + resnums = residues.select('name CA').getResnums() + residues_info = ["{}{}".format(resname, resnum) for resname, resnum in zip(resnames, resnums)] + residues_list = ", ".join(residues_info) + residues_list = 'channel'+str(i)+': '+residues_list + selected_residues_ch.append(residues_list) + else: + residues_list = "None" + + else: + # Single channel analysis in case someone provide channels[0] + atoms_protein = getChannelAtoms(channels, atoms) + residues = atoms_protein.select('same residue as exwithin '+str(distA)+' of resname FIL') + selected_residues_ch = [] + + if residues is not None: + resnames = residues.select('name CA').getResnames() + resnums = residues.select('name CA').getResnums() + residues_info = ["{}{}".format(resname, resnum) for resname, resnum in zip(resnames, resnums)] + residues_list = ", ".join(residues_info) + selected_residues_ch.append(residues_list) + else: + residues_list = "None" + + if residues_file_name is not None: + with open(residues_file_name+'_Residues_All_channels.txt', "a") as f_res: + for k in selected_residues_ch: + f_res.write(("{0}_{1}\n".format(residues_file_name, k))) + + return selected_residues_ch + + def selectChannelBySelection(atoms, residue_sele, **kwargs): """Select PDB files with channels that are having FIL residues within certain distance (distA) from selected residue (temporarly one residue). From ac0deb420cf49f439beea0298ecffdec6c7cbe2e Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Thu, 23 Jan 2025 13:18:12 +0100 Subject: [PATCH 29/48] selectChannelBySelection - name unification --- prody/proteins/channels.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index 648c6aba7..8e7324c83 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -915,9 +915,11 @@ def selectChannelBySelection(atoms, residue_sele, **kwargs): PDB_id, channel_name = file[:-4].split('_') f = open(PDB_id+'_Residues_All_channels.txt', 'r').readlines() for line in f: - if line.startswith(channel_name+':'): - new_line = file.split('_')[0]+'_'+line - selected_residues.append(new_line) + if line.startswith(file.split('_')[0]+'_'+channel_name+':'): + selected_residues.append(line) + #if line.startswith(channel_name+':'): + # new_line = file.split('_')[0]+'_'+line + # selected_residues.append(new_line) except: LOGGER.info('File {0} was not analyzed due to the lack of file or multiple channel file.'.format(file)) pass From 7a619de5f637c1c5f61c5d350478a4a44ab187d5 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Tue, 4 Feb 2025 09:24:57 +0100 Subject: [PATCH 30/48] selectChannelBySelection - improvement to fetch longer names correctly --- prody/proteins/channels.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index 8e7324c83..fdf28f688 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -912,14 +912,12 @@ def selectChannelBySelection(atoms, residue_sele, **kwargs): selected_residues = [] for file in copied_files_list: try: - PDB_id, channel_name = file[:-4].split('_') + PDB_id = file[:-4].split('_channel')[0] + channel_name = file[:-4].split('_')[-1] f = open(PDB_id+'_Residues_All_channels.txt', 'r').readlines() for line in f: if line.startswith(file.split('_')[0]+'_'+channel_name+':'): selected_residues.append(line) - #if line.startswith(channel_name+':'): - # new_line = file.split('_')[0]+'_'+line - # selected_residues.append(new_line) except: LOGGER.info('File {0} was not analyzed due to the lack of file or multiple channel file.'.format(file)) pass @@ -931,7 +929,9 @@ def selectChannelBySelection(atoms, residue_sele, **kwargs): selected_param = [] for file in copied_files_list: try: - PDB_id, channel_name = file[:-4].split('_') + PDB_id = file[:-4].split('_channel')[0] + channel_name = file[:-4].split('_')[-1] + print(PDB_id, channel_name) f = open(PDB_id+'_Parameters_All_channels.txt', 'r').readlines() for line in f: if line.startswith(file.split('_')[0]+'_'+channel_name+':'): @@ -945,6 +945,7 @@ def selectChannelBySelection(atoms, residue_sele, **kwargs): LOGGER.info('Selected files: ') LOGGER.info(' '.join(copied_files_list)) + LOGGER.info('If newly created files are empty please check whether the parameter names are: PDB_id+_Parameters_All_channels.txt') class Channel: From 49c63c2677c14af5b25a318031b6ea8724219a26 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Tue, 4 Feb 2025 09:30:44 +0100 Subject: [PATCH 31/48] removed print in CaviTracer --- prody/proteins/channels.py | 1 - 1 file changed, 1 deletion(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index fdf28f688..91f2339e7 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -931,7 +931,6 @@ def selectChannelBySelection(atoms, residue_sele, **kwargs): try: PDB_id = file[:-4].split('_channel')[0] channel_name = file[:-4].split('_')[-1] - print(PDB_id, channel_name) f = open(PDB_id+'_Parameters_All_channels.txt', 'r').readlines() for line in f: if line.startswith(file.split('_')[0]+'_'+channel_name+':'): From 8647b8da22f6a4f69e1d867915982a494dd62e1e Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Tue, 4 Feb 2025 14:26:53 +0100 Subject: [PATCH 32/48] selectChannelBySelection - final fixes (long names) --- prody/proteins/channels.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index 91f2339e7..4c7bd477b 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -916,7 +916,7 @@ def selectChannelBySelection(atoms, residue_sele, **kwargs): channel_name = file[:-4].split('_')[-1] f = open(PDB_id+'_Residues_All_channels.txt', 'r').readlines() for line in f: - if line.startswith(file.split('_')[0]+'_'+channel_name+':'): + if line.startswith(PDB_id+'_'+channel_name+':'): selected_residues.append(line) except: LOGGER.info('File {0} was not analyzed due to the lack of file or multiple channel file.'.format(file)) @@ -933,7 +933,7 @@ def selectChannelBySelection(atoms, residue_sele, **kwargs): channel_name = file[:-4].split('_')[-1] f = open(PDB_id+'_Parameters_All_channels.txt', 'r').readlines() for line in f: - if line.startswith(file.split('_')[0]+'_'+channel_name+':'): + if line.startswith(PDB_id+'_'+channel_name+':'): selected_param.append(line) except: LOGGER.info('File {0} was not analyzed due to the lack of file or multiple channel file.'.format(file)) From 4283f218181cc7eff473202da5430bf38185b6b1 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Tue, 25 Feb 2025 08:48:45 +0100 Subject: [PATCH 33/48] Fix for Beta column - FIL atoms [CaviTracer] --- prody/proteins/channels.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index 4c7bd477b..85c4d400d 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -739,7 +739,7 @@ def convert_lines_to_atomic(atom_lines): radii = radius_spline(t) for i, (x, y, z, radius) in enumerate(zip(centers[:, 0], centers[:, 1], centers[:, 2], radii), start=atom_index): - pdb_lines.append("ATOM %5d H FIL T 1 %8.3f%8.3f%8.3f 1.00 %6.2f\n" % (i, x, y, z, radius)) + pdb_lines.append("ATOM %5d H FIL T 1 %8.3f%8.3f%8.3f%6.2f%6.2f\n" % (i, x, y, z, 1.00, radius)) if protein is not None: protein_stream = io.StringIO() @@ -1350,7 +1350,7 @@ def save_channels_to_pdb(self, cavities, filename, separate=False, num_samples=5 pdb_lines = [] for i, (x, y, z, radius) in enumerate(zip(centers[:, 0], centers[:, 1], centers[:, 2], radii), start=atom_index): - pdb_lines.append("ATOM %5d H FIL T 1 %8.3f%8.3f%8.3f 1.00 %6.2f\n" % (i, x, y, z, radius)) + pdb_lines.append("ATOM %5d H FIL T 1 %8.3f%8.3f%8.3f%6.2f%6.2f\n" % (i, x, y, z, 1.00, radius)) for i in range(1, samples): pdb_lines.append("CONECT%5d%5d\n" % (i, i + 1)) @@ -1371,7 +1371,7 @@ def save_channels_to_pdb(self, cavities, filename, separate=False, num_samples=5 pdb_lines = [] for i, (x, y, z, radius) in enumerate(zip(centers[:, 0], centers[:, 1], centers[:, 2], radii), start=atom_index): - pdb_lines.append("ATOM %5d H FIL T 1 %8.3f%8.3f%8.3f 1.00 %6.2f\n" % (i, x, y, z, radius)) + pdb_lines.append("ATOM %5d H FIL T 1 %8.3f%8.3f%8.3f%6.2f%6.2f\n" % (i, x, y, z, 1.00, radius)) for i in range(1, samples): pdb_lines.append("CONECT%5d%5d\n" % (i, i + 1)) From 2ee4d7cd6303ba407d709925b4100d7830acd3ff Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Tue, 25 Feb 2025 12:53:51 +0100 Subject: [PATCH 34/48] calcOverlappingSurfaces added --- prody/proteins/channels.py | 103 ++++++++++++++++++++++++++++++++++++- 1 file changed, 102 insertions(+), 1 deletion(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index 85c4d400d..9503a67b6 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -30,7 +30,8 @@ __all__ = ['getVmdModel', 'calcChannels', 'calcChannelsMultipleFrames', 'getChannelParameters', 'getChannelAtoms', 'showChannels', 'showCavities', - 'selectChannelBySelection', 'getChannelResidueNames'] + 'selectChannelBySelection', 'getChannelResidueNames', + 'calcOverlappingSurfaces'] @@ -946,6 +947,106 @@ def selectChannelBySelection(atoms, residue_sele, **kwargs): LOGGER.info(' '.join(copied_files_list)) LOGGER.info('If newly created files are empty please check whether the parameter names are: PDB_id+_Parameters_All_channels.txt') + +def calcOverlappingSurfaces(**kwargs): + """Calculate overlapping parts of the predicted channels, tunnels, and pores denote as 'FIL' atoms. + + :arg resolution: Surface sampling resolution. + default is 0.5 + :type resolution: float + + :arg output_file_name: The name of the PDB file with overlapping surfaces. + :type output_file_name: str + + :arg pdb_files: File with residues forming the channel created by getChannelResidues() + default is False (then all the files from the current directory will be analyzed) + when providing a list, only the PDBs from list will be analyzed + when providing str, it will be treated as a folder path + :type pdb_files: bool, list or str + """ + + import os + + resolution = kwargs.pop('resolution', 0.5) + + pdb_files = kwargs.pop('pdb_files', False) + if pdb_files == False or pdb_files is None: + # take all PDBs from the current dir + pdb_files = [file for file in os.listdir('.') if file.endswith('.pdb')] + elif isinstance(pdb_files, str): + # folder path + pdb_files = [file for file in os.listdir(pdb_files) if file.endswith('.pdb')] + elif isinstance(pdb_files, list): + # list of PDBs + pdb_files = [file for file in pdb_files if file.endswith('.pdb')] + else: + raise ValueError('Please provide list with PDB files, folder path, or nothing to analyze PDBs in the current folder') + + output_file_name = kwargs.pop('output_file_name','overlap_regions.pdb') + if os.path.exists(output_file_name): + os.rename(output_file_name, output_file_name+'-old') + + def loadPDBdata(filepath): + """Parse a PDB file and return a list of atom dictionaries for lines containing 'FIL'.""" + atoms_set = [] + FILatoms = parsePDB(filepath).select('resname FIL') + + if FILatoms == None: + pass + else: + for nr_i, i in enumerate(FILatoms): + FILatoms_coords = FILatoms.getCoords()[nr_i] + FILBetas_value = FILatoms.getBetas()[nr_i] + atoms_set.append({ + 'x': float(FILatoms_coords[0]), + 'y': float(FILatoms_coords[1]), + 'z': float(FILatoms_coords[2]), + 'radius': float(FILBetas_value) + }) + return atoms_set + + def create_surface(atoms, resolution=resolution): + """Create a 3D grid representing the surface occupied by the atoms.""" + surface = {} + Zr = 0 + for atom in atoms: + x, y, z, radius = atom['x'], atom['y'], atom['z'], atom['radius'] + for i in np.arange(x - radius, x + radius, resolution): + for j in np.arange(y - radius, y + radius, resolution): + for k in np.arange(z - radius, z + radius, resolution): + if (i - x) ** 2 + (j - y) ** 2 + (k - z) ** 2 <= radius ** 2: + key = (round(i, Zr), round(j, Zr), round(k, Zr)) + surface[key] = surface.get(key, 0) + 1 + return surface + + def merge_surfaces(surfaces): + """Merge multiple surfaces and calculate overlap counts.""" + merged_surface = {} + for surface in surfaces: + for key in surface: + merged_surface[key] = merged_surface.get(key, 0) + 1 + return merged_surface + + def write_merge_surf_pdb(merged_surface, filename): + """Write the merged surface into a PDB file.""" + with open(filename, 'w') as file: + atom_id = 1 + for (x, y, z), count in merged_surface.items(): + file.write("ATOM {:5d} H FIL T 1 {:8.3f}{:8.3f}{:8.3f}{:6.2f} 1.00\n".format(atom_id, x, y, z, count)) + atom_id += 1 + + surfaces = [] + for pdb_file in pdb_files: + LOGGER.info('Processing file: {0}'.format(pdb_file)) + print('Processing file: {0}'.format(pdb_file)) + atoms = loadPDBdata(pdb_file) + if atoms: + surface = create_surface(atoms, resolution=resolution) + surfaces.append(surface) + + merged_surface = merge_surfaces(surfaces) + write_merge_surf_pdb(merged_surface, output_file_name) + class Channel: def __init__(self, tetrahedra, centerline_spline, radius_spline, length, bottleneck, volume): From f54850cf158711bc047a90fe9d45f51e1da80031 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Wed, 5 Mar 2025 14:58:13 +0100 Subject: [PATCH 35/48] calcOverlappingSurfaces() normalization --- prody/proteins/channels.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index 9503a67b6..b4cfc25ab 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -950,6 +950,7 @@ def selectChannelBySelection(atoms, residue_sele, **kwargs): def calcOverlappingSurfaces(**kwargs): """Calculate overlapping parts of the predicted channels, tunnels, and pores denote as 'FIL' atoms. + Results are normalized within [0,1]. :arg resolution: Surface sampling resolution. default is 0.5 @@ -1027,25 +1028,27 @@ def merge_surfaces(surfaces): merged_surface[key] = merged_surface.get(key, 0) + 1 return merged_surface - def write_merge_surf_pdb(merged_surface, filename): + def write_merge_surf_pdb(merged_surface, filename, nr_pdbs): """Write the merged surface into a PDB file.""" with open(filename, 'w') as file: atom_id = 1 for (x, y, z), count in merged_surface.items(): - file.write("ATOM {:5d} H FIL T 1 {:8.3f}{:8.3f}{:8.3f}{:6.2f} 1.00\n".format(atom_id, x, y, z, count)) + norm_count = count/nr_pdbs + file.write("ATOM {:5d} H FIL T 1 {:8.3f}{:8.3f}{:8.3f}{:6.2f} 1.00\n".format(atom_id, x, y, z, norm_count)) atom_id += 1 surfaces = [] - for pdb_file in pdb_files: + for nr_pdbs,pdb_file in enumerate(pdb_files): LOGGER.info('Processing file: {0}'.format(pdb_file)) print('Processing file: {0}'.format(pdb_file)) atoms = loadPDBdata(pdb_file) if atoms: surface = create_surface(atoms, resolution=resolution) surfaces.append(surface) - + + nr_pdbs = nr_pdbs+1 merged_surface = merge_surfaces(surfaces) - write_merge_surf_pdb(merged_surface, output_file_name) + write_merge_surf_pdb(merged_surface, output_file_name, nr_pdbs) class Channel: From 8173d933687d776fef5be96676766341e5aec6d1 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Fri, 14 Mar 2025 18:52:05 +0100 Subject: [PATCH 36/48] calcChannels[pqr format] --- prody/proteins/channels.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index b4cfc25ab..17b2d9e5b 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -522,7 +522,8 @@ def calcChannels(atoms, output_path=None, separate=False, r1=3, r2=1.25, min_dep if output_path.is_dir(): output_path = output_path / "output.pdb" - elif not output_path.suffix == ".pdb": + + elif not (output_path.suffix == ".pdb" or output_path.suffix == ".pqr"): output_path = output_path.with_suffix(".pdb") if not separate: From c630fe0e22ee7817999e2f2034ee4432d807782e Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Tue, 18 Mar 2025 09:05:29 +0100 Subject: [PATCH 37/48] getChannelResidueNames [1letter code option added] --- prody/proteins/channels.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index 17b2d9e5b..ffa157c87 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -783,7 +783,11 @@ def getChannelResidueNames(atoms, channels, **kwargs): Use one word which will be added to '_Residues_All_channels.txt' sufix. If further analysis will be performed with selectChannelBySelection() function, the preferable residues_file_name is PDB+chain for example: '1bbhA'. - :type residues_file_name: str ''' + :type residues_file_name: str + + :arg one_letter_aa: Whether to apply 1-latter code to residue name + by defult is False + :type one_letter_aa: bool ''' try: coords = (atoms._getCoords() if hasattr(atoms, '_getCoords') else @@ -798,6 +802,10 @@ def getChannelResidueNames(atoms, channels, **kwargs): distA = kwargs.pop('distA', 4) residues_file_name = kwargs.pop('residues_file_name', None) + one_letter_aa = kwargs.pop('one_letter_aa', False) + if one_letter_aa == True: + from prody.atomic.atomic import AAMAP + if isinstance(channels, list): # Multiple channels selected_residues_ch = [] @@ -808,6 +816,10 @@ def getChannelResidueNames(atoms, channels, **kwargs): if residues is not None: resnames = residues.select('name CA').getResnames() + if one_letter_aa == True: + resnames_1letter = [AAMAP["HIS"] if aa in ("HSD", "HSP") else AAMAP[aa] for aa in resnames] + resnames = resnames_1letter + resnums = residues.select('name CA').getResnums() residues_info = ["{}{}".format(resname, resnum) for resname, resnum in zip(resnames, resnums)] residues_list = ", ".join(residues_info) @@ -824,6 +836,10 @@ def getChannelResidueNames(atoms, channels, **kwargs): if residues is not None: resnames = residues.select('name CA').getResnames() + if one_letter_aa == True: + resnames_1letter = [AAMAP["HIS"] if aa in ("HSD", "HSP") else AAMAP[aa] for aa in resnames] + resnames = resnames_1letter + resnums = residues.select('name CA').getResnums() residues_info = ["{}{}".format(resname, resnum) for resname, resnum in zip(resnames, resnums)] residues_list = ", ".join(residues_info) From a756df3e4150e24d36cdc912a807a65fc3e7cab9 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Tue, 18 Mar 2025 09:50:26 +0100 Subject: [PATCH 38/48] calcChannels - option separate is changed --- prody/proteins/channels.py | 44 ++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index ffa157c87..49390c45f 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -1455,6 +1455,29 @@ def filter_channels_by_bottleneck(self, cavities, bottleneck): def save_channels_to_pdb(self, cavities, filename, separate=False, num_samples=5): filename = str(filename) + # All channels will be provided always when PDB/PQR will be created + with open(filename, 'w') as pdb_file: + atom_index = 1 + for cavity in cavities: + for channel in cavity.channels: + centerline_spline, radius_spline = channel.get_splines() + samples = len(channel.tetrahedra) * num_samples + t = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], samples) + centers = centerline_spline(t) + radii = radius_spline(t) + + pdb_lines = [] + for i, (x, y, z, radius) in enumerate(zip(centers[:, 0], centers[:, 1], centers[:, 2], radii), start=atom_index): + pdb_lines.append("ATOM %5d H FIL T 1 %8.3f%8.3f%8.3f%6.2f%6.2f\n" % (i, x, y, z, 1.00, radius)) + + for i in range(1, samples): + pdb_lines.append("CONECT%5d%5d\n" % (i, i + 1)) + + pdb_file.writelines(pdb_lines) + pdb_file.write("\n") + atom_index += samples + + # When separate is set to True also separate PDB/PQR files will be created if separate: channel_index = 0 for cavity in cavities: @@ -1479,27 +1502,6 @@ def save_channels_to_pdb(self, cavities, filename, separate=False, num_samples=5 pdb_file.writelines(pdb_lines) channel_index += 1 - else: - with open(filename, 'w') as pdb_file: - atom_index = 1 - for cavity in cavities: - for channel in cavity.channels: - centerline_spline, radius_spline = channel.get_splines() - samples = len(channel.tetrahedra) * num_samples - t = np.linspace(centerline_spline.x[0], centerline_spline.x[-1], samples) - centers = centerline_spline(t) - radii = radius_spline(t) - - pdb_lines = [] - for i, (x, y, z, radius) in enumerate(zip(centers[:, 0], centers[:, 1], centers[:, 2], radii), start=atom_index): - pdb_lines.append("ATOM %5d H FIL T 1 %8.3f%8.3f%8.3f%6.2f%6.2f\n" % (i, x, y, z, 1.00, radius)) - - for i in range(1, samples): - pdb_lines.append("CONECT%5d%5d\n" % (i, i + 1)) - - pdb_file.writelines(pdb_lines) - pdb_file.write("\n") - atom_index += samples def calculate_channel_length(self, centerline_spline): From 408bd9f54e04e91b3b0eb73b172ffad6c72f84cc Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Sat, 29 Mar 2025 16:46:55 +0100 Subject: [PATCH 39/48] getChannelParameters() can be applied to single and multiple channel --- prody/proteins/channels.py | 49 +++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index 49390c45f..db9052aa9 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -643,10 +643,30 @@ def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, separat return channels_all, surfaces_all +def parseParameters(channels, **kwargs): + """Extracts and returns the lengths, bottlenecks, and volumes of each channel in a given list of channels. """ + + lengths = [] + bottlenecks = [] + volumes = [] + param_file_name = kwargs.pop('param_file_name', None) + + for nr_ch, channel in enumerate(channels): + lengths.append(channel.length) + bottlenecks.append(channel.bottleneck) + volumes.append(channel.volume) + + if param_file_name is not None: + with open(param_file_name+'_Parameters_All_channels.txt', "a") as f_par: + f_par.write(("{0}_channel{1}: {2} {3} {4}\n".format(param_file_name, nr_ch, channel.length, channel.bottleneck, channel.volume))) + + return lengths, bottlenecks, volumes + + def getChannelParameters(channels, **kwargs): """Extracts and returns the lengths, bottlenecks, and volumes of each channel in a given list of channels. - This function iterates through a list of channel objects, extracting the length, bottleneck, + This functaaion iterates through a list of channel objects, extracting the length, bottleneck, and volume of each channel. These values are collected into separate lists, which are returned as a tuple for further use. @@ -667,21 +687,18 @@ def getChannelParameters(channels, **kwargs): Example usage: lengths, bottlenecks, volumes = getChannelParameters(channels) """ - lengths = [] - bottlenecks = [] - volumes = [] - param_file_name = kwargs.pop('param_file_name', None) - - for nr_ch, channel in enumerate(channels): - lengths.append(channel.length) - bottlenecks.append(channel.bottleneck) - volumes.append(channel.volume) - - if param_file_name is not None: - with open(param_file_name+'_Parameters_All_channels.txt', "a") as f_par: - f_par.write(("{0}_channel{1}: {2} {3} {4}\n".format(param_file_name, nr_ch, channel.length, channel.bottleneck, channel.volume))) - - return lengths, bottlenecks, volumes + multi_model_param = [] + param_file_name = kwargs.get('param_file_name', None) + + try: + return parseParameters(channels, **kwargs) + + except: + for nr_i,i in enumerate(channels): + safe_param_file_name = param_file_name if param_file_name is not None else "" + results = parseParameters(channels[nr_i], param_file_name=safe_param_file_name + str(nr_i)) + multi_model_param.append(results) + return multi_model_param def getChannelAtoms(channels, protein=None, num_samples=5): From 20b1ab2233233e06995b0ff81a4c039db3077457 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Sat, 29 Mar 2025 17:29:22 +0100 Subject: [PATCH 40/48] calcChannels -> pqr format is default, pdb optional --- prody/proteins/channels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index db9052aa9..fd6026d88 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -524,7 +524,7 @@ def calcChannels(atoms, output_path=None, separate=False, r1=3, r2=1.25, min_dep output_path = output_path / "output.pdb" elif not (output_path.suffix == ".pdb" or output_path.suffix == ".pqr"): - output_path = output_path.with_suffix(".pdb") + output_path = output_path.with_suffix(".pqr") if not separate: LOGGER.info("Saving results to " + str(output_path) + ".") From ab2d652c3810b0c780232cb6fa8e7abad6024549 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Sat, 29 Mar 2025 22:11:10 +0100 Subject: [PATCH 41/48] Channels -> pqr instead of pdb in all functions --- prody/proteins/channels.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index fd6026d88..11d6f8c9c 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -521,7 +521,7 @@ def calcChannels(atoms, output_path=None, separate=False, r1=3, r2=1.25, min_dep output_path = Path(output_path) if output_path.is_dir(): - output_path = output_path / "output.pdb" + output_path = output_path / "output.pqr" elif not (output_path.suffix == ".pdb" or output_path.suffix == ".pqr"): output_path = output_path.with_suffix(".pqr") @@ -599,7 +599,7 @@ def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, separat if output_path: output_path = Path(output_path) - if output_path.suffix == ".pdb": + if output_path.suffix == ".pqr": output_path = output_path.with_suffix('') if trajectory is not None: @@ -619,7 +619,7 @@ def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, separat LOGGER.info('Frame: {0}'.format(j0)) atoms_copy.setCoords(frame0.getCoords()) if output_path: - channels, surfaces = calcChannels(atoms_copy, str(output_path) + "{0}.pdb".format(j0), separate, **kwargs) + channels, surfaces = calcChannels(atoms_copy, str(output_path) + "{0}.pqr".format(j0), separate, **kwargs) else: channels, surfaces = calcChannels(atoms_copy, **kwargs) channels_all.append(channels) @@ -632,7 +632,7 @@ def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, separat LOGGER.info('Model: {0}'.format(i+start_frame)) atoms.setACSIndex(i+start_frame) if output_path: - channels, surfaces = calcChannels(atoms, str(output_path) + "{0}.pdb".format(i+start_frame), separate, **kwargs) + channels, surfaces = calcChannels(atoms, str(output_path) + "{0}.pqr".format(i+start_frame), separate, **kwargs) else: channels, surfaces = calcChannels(atoms, **kwargs) channels_all.append(channels) @@ -924,7 +924,7 @@ def selectChannelBySelection(atoms, residue_sele, **kwargs): if pdb_files == False: # take all PDBs from the current dir - pdb_files = [file for file in os.listdir('.') if file.endswith('.pdb')] + pdb_files = [file for file in os.listdir('.') if file.endswith('.pqr')] residue_sele = atoms.select(residue_sele) if not os.path.exists(folder_name): @@ -1007,17 +1007,17 @@ def calcOverlappingSurfaces(**kwargs): pdb_files = kwargs.pop('pdb_files', False) if pdb_files == False or pdb_files is None: # take all PDBs from the current dir - pdb_files = [file for file in os.listdir('.') if file.endswith('.pdb')] + pdb_files = [file for file in os.listdir('.') if file.endswith('.pqr')] elif isinstance(pdb_files, str): # folder path - pdb_files = [file for file in os.listdir(pdb_files) if file.endswith('.pdb')] + pdb_files = [file for file in os.listdir(pdb_files) if file.endswith('.pqr')] elif isinstance(pdb_files, list): # list of PDBs - pdb_files = [file for file in pdb_files if file.endswith('.pdb')] + pdb_files = [file for file in pdb_files if file.endswith('.pqr')] else: raise ValueError('Please provide list with PDB files, folder path, or nothing to analyze PDBs in the current folder') - output_file_name = kwargs.pop('output_file_name','overlap_regions.pdb') + output_file_name = kwargs.pop('output_file_name','overlap_regions.pqr') if os.path.exists(output_file_name): os.rename(output_file_name, output_file_name+'-old') @@ -1499,7 +1499,7 @@ def save_channels_to_pdb(self, cavities, filename, separate=False, num_samples=5 channel_index = 0 for cavity in cavities: for channel in cavity.channels: - channel_filename = filename.replace('.pdb', '_channel{0}.pdb'.format(channel_index)) + channel_filename = filename.replace('.pqr', '_channel{0}.pqr'.format(channel_index)) with open(channel_filename, 'w') as pdb_file: atom_index = 1 From 694d4495bdd750072741574bdd2eaf9391c05c81 Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Sun, 30 Mar 2025 11:19:18 +0200 Subject: [PATCH 42/48] getChannelParameters() - vizualization of parameters --- prody/proteins/channels.py | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index 11d6f8c9c..5e1c85b45 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -616,7 +616,7 @@ def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, separat atoms_copy = atoms.copy() for j0, frame0 in enumerate(traj, start=start_frame): - LOGGER.info('Frame: {0}'.format(j0)) + LOGGER.info("Frame: {0}".format(j0)) atoms_copy.setCoords(frame0.getCoords()) if output_path: channels, surfaces = calcChannels(atoms_copy, str(output_path) + "{0}.pqr".format(j0), separate, **kwargs) @@ -629,7 +629,7 @@ def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, separat else: if atoms.numCoordsets() > 1: for i in range(len(atoms.getCoordsets()[start_frame:stop_frame])): - LOGGER.info('Model: {0}'.format(i+start_frame)) + LOGGER.info("Model: {0}".format(i+start_frame)) atoms.setACSIndex(i+start_frame) if output_path: channels, surfaces = calcChannels(atoms, str(output_path) + "{0}.pqr".format(i+start_frame), separate, **kwargs) @@ -638,7 +638,7 @@ def calcChannelsMultipleFrames(atoms, trajectory=None, output_path=None, separat channels_all.append(channels) surfaces_all.append(surfaces) else: - LOGGER.info('Include trajectory or use multi-model PDB file.') + LOGGER.info("Include trajectory or use multi-model PDB file.") return channels_all, surfaces_all @@ -691,13 +691,25 @@ def getChannelParameters(channels, **kwargs): param_file_name = kwargs.get('param_file_name', None) try: - return parseParameters(channels, **kwargs) + results_L_B_V = parseParameters(channels, **kwargs) + lengths, bottlenecks, volumes = results_L_B_V + LOGGER.info("Channel {0}: \t{1} \t{2} \t{3}".format('ID', 'Volume [ų]', 'Length [Å]', 'Bottleneck [Å]')) + for i in range(len(lengths)): + LOGGER.info("channel {0}: \t{1} \t\t{2} \t\t{3}".format(i, np.round(volumes[i],2), np.round(lengths[i], 2), np.round(bottlenecks[i], 2))) + return results_L_B_V except: for nr_i,i in enumerate(channels): safe_param_file_name = param_file_name if param_file_name is not None else "" results = parseParameters(channels[nr_i], param_file_name=safe_param_file_name + str(nr_i)) multi_model_param.append(results) + + LOGGER.info("Channel {0}: \t{1} \t{2} \t{3}".format('ID', 'Volume [ų]', 'Length [Å]', 'Bottleneck [Å]')) + for frame_nr, frame in enumerate(multi_model_param): + lengths, bottlenecks, volumes = frame + LOGGER.info("Frame {0}".format(frame_nr)) + for i in range(len(lengths)): + LOGGER.info("channel {0}: \t{1} \t\t{2} \t\t{3}".format(i, np.round(volumes[i],2), np.round(lengths[i], 2), np.round(bottlenecks[i], 2))) return multi_model_param @@ -937,7 +949,7 @@ def selectChannelBySelection(atoms, residue_sele, **kwargs): if sele_FIL is not None: shutil.copy(i, folder_name) - LOGGER.info('Filtered files are now in: {0}'.format(folder_name)) + LOGGER.info("Filtered files are now in: {0}".format(folder_name)) copied_files_list.append(i) else: pass @@ -954,7 +966,7 @@ def selectChannelBySelection(atoms, residue_sele, **kwargs): if line.startswith(PDB_id+'_'+channel_name+':'): selected_residues.append(line) except: - LOGGER.info('File {0} was not analyzed due to the lack of file or multiple channel file.'.format(file)) + LOGGER.info("File {0} was not analyzed due to the lack of file or multiple channel file.".format(file)) pass with open('Selected_channel_residues.txt', 'w') as f_out: @@ -971,15 +983,15 @@ def selectChannelBySelection(atoms, residue_sele, **kwargs): if line.startswith(PDB_id+'_'+channel_name+':'): selected_param.append(line) except: - LOGGER.info('File {0} was not analyzed due to the lack of file or multiple channel file.'.format(file)) + LOGGER.info("File {0} was not analyzed due to the lack of file or multiple channel file.".format(file)) pass with open('Selected_channel_parameters.txt', 'w') as f_out: f_out.writelines(selected_param) - LOGGER.info('Selected files: ') + LOGGER.info("Selected files: ") LOGGER.info(' '.join(copied_files_list)) - LOGGER.info('If newly created files are empty please check whether the parameter names are: PDB_id+_Parameters_All_channels.txt') + LOGGER.info("If newly created files are empty please check whether the parameter names are: PDB_id+_Parameters_All_channels.txt") def calcOverlappingSurfaces(**kwargs): @@ -1073,8 +1085,8 @@ def write_merge_surf_pdb(merged_surface, filename, nr_pdbs): surfaces = [] for nr_pdbs,pdb_file in enumerate(pdb_files): - LOGGER.info('Processing file: {0}'.format(pdb_file)) - print('Processing file: {0}'.format(pdb_file)) + LOGGER.info("Processing file: {0}".format(pdb_file)) + print("Processing file: {0}".format(pdb_file)) atoms = loadPDBdata(pdb_file) if atoms: surface = create_surface(atoms, resolution=resolution) From 4ccb4e10d5bd0ae8835d6d99e092886248e3775d Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Sun, 30 Mar 2025 11:20:38 +0200 Subject: [PATCH 43/48] print remove --- prody/proteins/channels.py | 1 - 1 file changed, 1 deletion(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index 5e1c85b45..fc6ffc758 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -1086,7 +1086,6 @@ def write_merge_surf_pdb(merged_surface, filename, nr_pdbs): surfaces = [] for nr_pdbs,pdb_file in enumerate(pdb_files): LOGGER.info("Processing file: {0}".format(pdb_file)) - print("Processing file: {0}".format(pdb_file)) atoms = loadPDBdata(pdb_file) if atoms: surface = create_surface(atoms, resolution=resolution) From ac5324109e3b7ffb5bd5151dd822b24afca811bf Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Sun, 30 Mar 2025 12:15:56 +0200 Subject: [PATCH 44/48] Changes in names and docs (pqr instead of pdb) [calcOverlappingSurfaces()] --- prody/proteins/channels.py | 58 +++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index fc6ffc758..ef6662b74 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -887,10 +887,10 @@ def getChannelResidueNames(atoms, channels, **kwargs): def selectChannelBySelection(atoms, residue_sele, **kwargs): """Select PDB files with channels that are having FIL residues within certain distance (distA) from selected residue (temporarly one residue). - If not all files should be included use pdb_files to provide the new list. + If not all files should be included use pqr_files to provide the new list. For example: - pdb_files = [file for file in os.listdir('.') if file.startswith('7lafA_') and file.endswith('.pdb')] - pdb_files = [file for file in os.listdir('.') if '5kbd' in file and file.endswith('.pdb')] + pqr_files = [file for file in os.listdir('.') if file.startswith('7lafA_') and file.endswith('.pqr')] + pqr_files = [file for file in os.listdir('.') if '5kbd' in file and file.endswith('.pqr')] :arg atoms: an Atomic object from which residues are selected :type atoms: :class:`.Atomic`, :class:`.LigandInteractionsTrajectory` @@ -927,22 +927,22 @@ def selectChannelBySelection(atoms, residue_sele, **kwargs): import os, shutil import numpy as np - pdb_files = kwargs.pop('pdb_files', False) + pqr_files = kwargs.pop('pqr_files', False) distA = kwargs.pop('distA', 5) folder_name = kwargs.pop('folder_name', 'selected_files') residues_file = kwargs.pop('residues_file', False) param_file = kwargs.pop('param_file', False) copied_files_list = [] - if pdb_files == False: + if pqr_files == False: # take all PDBs from the current dir - pdb_files = [file for file in os.listdir('.') if file.endswith('.pqr')] + pqr_files = [file for file in os.listdir('.') if file.endswith('.pqr')] residue_sele = atoms.select(residue_sele) if not os.path.exists(folder_name): os.makedirs(folder_name) - for i in pdb_files: + for i in pqr_files: channel = parsePDB(i) if 'FIL' in np.unique(channel.getResnames()): sele_FIL = channel.select('same residue as exwithin '+str(distA)+' of center', center=residue_sele.getCoords()) @@ -1005,36 +1005,36 @@ def calcOverlappingSurfaces(**kwargs): :arg output_file_name: The name of the PDB file with overlapping surfaces. :type output_file_name: str - :arg pdb_files: File with residues forming the channel created by getChannelResidues() + :arg pqr_files: File with residues forming the channel created by getChannelResidues() default is False (then all the files from the current directory will be analyzed) when providing a list, only the PDBs from list will be analyzed when providing str, it will be treated as a folder path - :type pdb_files: bool, list or str + :type pqr_files: bool, list or str """ import os resolution = kwargs.pop('resolution', 0.5) - pdb_files = kwargs.pop('pdb_files', False) - if pdb_files == False or pdb_files is None: - # take all PDBs from the current dir - pdb_files = [file for file in os.listdir('.') if file.endswith('.pqr')] - elif isinstance(pdb_files, str): + pqr_files = kwargs.pop('pqr_files', False) + if pqr_files == False or pqr_files is None: + # take all PQRs from the current dir + pqr_files = [file for file in os.listdir('.') if file.endswith('.pqr')] + elif isinstance(pqr_files, str): # folder path - pdb_files = [file for file in os.listdir(pdb_files) if file.endswith('.pqr')] - elif isinstance(pdb_files, list): - # list of PDBs - pdb_files = [file for file in pdb_files if file.endswith('.pqr')] + pqr_files = [file for file in os.listdir(pqr_files) if file.endswith('.pqr')] + elif isinstance(pqr_files, list): + # list of PQRs + pqr_files = [file for file in pqr_files if file.endswith('.pqr')] else: - raise ValueError('Please provide list with PDB files, folder path, or nothing to analyze PDBs in the current folder') + raise ValueError('Please provide list with PQR files, folder path, or nothing to analyze PQRs in the current folder') - output_file_name = kwargs.pop('output_file_name','overlap_regions.pqr') + output_file_name = kwargs.pop('output_file_name','overlap_regions.pdb') if os.path.exists(output_file_name): os.rename(output_file_name, output_file_name+'-old') def loadPDBdata(filepath): - """Parse a PDB file and return a list of atom dictionaries for lines containing 'FIL'.""" + """Parse a PQR file and return a list of atom dictionaries for lines containing 'FIL'.""" atoms_set = [] FILatoms = parsePDB(filepath).select('resname FIL') @@ -1084,9 +1084,9 @@ def write_merge_surf_pdb(merged_surface, filename, nr_pdbs): atom_id += 1 surfaces = [] - for nr_pdbs,pdb_file in enumerate(pdb_files): - LOGGER.info("Processing file: {0}".format(pdb_file)) - atoms = loadPDBdata(pdb_file) + for nr_pdbs,pqr_file in enumerate(pqr_files): + LOGGER.info("Processing file: {0}".format(pqr_file)) + atoms = loadPDBdata(pqr_file) if atoms: surface = create_surface(atoms, resolution=resolution) surfaces.append(surface) @@ -1484,7 +1484,7 @@ def save_channels_to_pdb(self, cavities, filename, separate=False, num_samples=5 filename = str(filename) # All channels will be provided always when PDB/PQR will be created - with open(filename, 'w') as pdb_file: + with open(filename, 'w') as pqr_file: atom_index = 1 for cavity in cavities: for channel in cavity.channels: @@ -1501,8 +1501,8 @@ def save_channels_to_pdb(self, cavities, filename, separate=False, num_samples=5 for i in range(1, samples): pdb_lines.append("CONECT%5d%5d\n" % (i, i + 1)) - pdb_file.writelines(pdb_lines) - pdb_file.write("\n") + pqr_file.writelines(pdb_lines) + pqr_file.write("\n") atom_index += samples # When separate is set to True also separate PDB/PQR files will be created @@ -1512,7 +1512,7 @@ def save_channels_to_pdb(self, cavities, filename, separate=False, num_samples=5 for channel in cavity.channels: channel_filename = filename.replace('.pqr', '_channel{0}.pqr'.format(channel_index)) - with open(channel_filename, 'w') as pdb_file: + with open(channel_filename, 'w') as pqr_file: atom_index = 1 centerline_spline, radius_spline = channel.get_splines() samples = len(channel.tetrahedra) * num_samples @@ -1527,7 +1527,7 @@ def save_channels_to_pdb(self, cavities, filename, separate=False, num_samples=5 for i in range(1, samples): pdb_lines.append("CONECT%5d%5d\n" % (i, i + 1)) - pdb_file.writelines(pdb_lines) + pqr_file.writelines(pdb_lines) channel_index += 1 From 9fa44be8dd78d07ff8480c358477ea83b5a555ef Mon Sep 17 00:00:00 2001 From: karolamik13 Date: Sun, 30 Mar 2025 19:40:06 +0200 Subject: [PATCH 45/48] calcOverlappingSurfaces is replaced by calcChannelSurfaceOverlaps() --- prody/proteins/channels.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prody/proteins/channels.py b/prody/proteins/channels.py index ef6662b74..01f1079d4 100644 --- a/prody/proteins/channels.py +++ b/prody/proteins/channels.py @@ -31,7 +31,7 @@ __all__ = ['getVmdModel', 'calcChannels', 'calcChannelsMultipleFrames', 'getChannelParameters', 'getChannelAtoms', 'showChannels', 'showCavities', 'selectChannelBySelection', 'getChannelResidueNames', - 'calcOverlappingSurfaces'] + 'calcChannelSurfaceOverlaps'] @@ -994,7 +994,7 @@ def selectChannelBySelection(atoms, residue_sele, **kwargs): LOGGER.info("If newly created files are empty please check whether the parameter names are: PDB_id+_Parameters_All_channels.txt") -def calcOverlappingSurfaces(**kwargs): +def calcChannelSurfaceOverlaps(**kwargs): """Calculate overlapping parts of the predicted channels, tunnels, and pores denote as 'FIL' atoms. Results are normalized within [0,1]. From 20b3ec94bbf2e902cf6a596fde53e26e6a55841f Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sun, 19 Oct 2025 10:53:30 +0100 Subject: [PATCH 46/48] clean up extra insty line --- prody/proteins/interactions.py | 1 - 1 file changed, 1 deletion(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 783ac8780..3b34518b8 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -6246,4 +6246,3 @@ def saveInteractionsPDB(self, **kwargs): return freq_contacts_list - From ce3e551098c38e5aea8a643904b9ad6e59238836 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sun, 19 Oct 2025 10:54:13 +0100 Subject: [PATCH 47/48] clean up extra insty line better --- prody/proteins/interactions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 3b34518b8..3f626bab8 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -6245,4 +6245,4 @@ def saveInteractionsPDB(self, **kwargs): LOGGER.info('PDB file saved.') return freq_contacts_list - + From 5c59e050821fd0b0f67b5009620c577dbb1ef737 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Sun, 19 Oct 2025 10:55:05 +0100 Subject: [PATCH 48/48] add back upper insty line --- prody/proteins/interactions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py index 3f626bab8..dfca7a1d3 100644 --- a/prody/proteins/interactions.py +++ b/prody/proteins/interactions.py @@ -16,6 +16,7 @@ __credits__ = ['James Krieger', 'Karolina Mikulska-Ruminska', 'Anupam Banerjee'] __email__ = ['karolamik@fizyka.umk.pl', 'jamesmkrieger@gmail.com', 'anupam.banerjee@stonybrook.edu'] + import numpy as np from numpy import * from prody import LOGGER, SETTINGS, PY3K