diff --git a/.vscode/settings.json b/.vscode/settings.json index 6f3a291..f673a71 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,3 @@ { - "liveServer.settings.port": 5501 + "liveServer.settings.port": 5502 } \ No newline at end of file diff --git a/cli/cluster_search.py b/cli/cluster_search.py index 51abff3..35d6793 100644 --- a/cli/cluster_search.py +++ b/cli/cluster_search.py @@ -47,6 +47,8 @@ def __init__(self): self.d3js_json = None self.db = None # databse of motif and matrix self.treshold_img = False + self.gibbs_results = None + self.kld_df = None def generate_unique_random_ids(self, count: int) -> list: """ @@ -96,7 +98,7 @@ def parse_gibbs_output(self, gibbs_path: str, n_clusters: int) -> pd.DataFrame: for c_file in os.listdir(res_path): if f'{n_clusters}g.ds' in c_file: file_path = os.path.join(res_path, c_file) - df = pd.read_csv(file_path, sep='\s+') + df = pd.read_csv(file_path, sep="\s+") # output_path = f'data/sampledata_701014/res_{n_clusters}g.csv' # df.to_csv(output_path, index=False) logging.info(f"Data parsed for No clusters {n_clusters}") @@ -376,7 +378,49 @@ def compute_correlations( self.console.log( f"Cluster Search Preprocess completed in {elapsed_time:.2f} seconds." ) - + def read_KLD_file(self,file_path): + """ + Read & Prase Gibbs gibbs.KLDvsClusters file. + Args: + file_path (str): The path to the Gibbs gibbs.KLDvsClusters file. + Returns: + list: A list of tuples containing the cluster number and KLD value. + """ + try: + # Initialize an empty dictionary to store data + data_dict = {'cluster': []} + + with open(file_path, 'r') as file: + lines = file.readlines() + for line in lines[1:]: # Skip the header line + parts = line.strip().split('\t') + cluster_number = int(parts[0]) + kld_values = [float(value) for value in parts[1:]] + + # Ensure the dictionary has enough group columns + for i in range(1, len(kld_values) + 1): + group_key = f'group{i}' + if group_key not in data_dict: + data_dict[group_key] = [] + + # Populate the row data + data_dict['cluster'].append(cluster_number) + for i, kld_value in enumerate(kld_values, start=1): + data_dict[f'group{i}'].append(kld_value if kld_value > 0 else 0) + + # Fill remaining groups with zeros if necessary + for j in range(len(kld_values) + 1, len(data_dict) - 1): + data_dict[f'group{j}'].append(0) + + # Convert the dictionary to a DataFrame + df = pd.DataFrame(data_dict) + df.loc[:, 'total'] = df.iloc[:, 1:].sum(axis=1) + df.reset_index(drop=True, inplace=True) + # print(df) + return df + except Exception as e: + print(f"Error reading file: {e}") + sys.exit(1) def compute_correlations_v2( self, db: pd.DataFrame, @@ -397,7 +441,10 @@ def compute_correlations_v2( #Update to self self.threshold = threshold self.treshold_img = treshold_img - + self.gibbs_results = gibbs_results + self.kld_df = self.read_KLD_file( + os.path.join(self.gibbs_results, 'images', 'gibbs.KLDvsClusters.tab') + ) gibbs_result_matrix = os.path.join(gibbs_results, "matrices") should_process = False if os.path.exists(gibbs_result_matrix) and any(".mat" in file for file in os.listdir(gibbs_result_matrix)): @@ -1151,7 +1198,7 @@ def insert_script_png_json(self, script_data_path, img_fallback_path, div_id): return script_template.render(script_data_path=script_data_path, img_fallback_path=img_fallback_path, div_id=div_id) - def render_hla_section(self, hla_name, corr, best_cluster_img, naturally_presented_img): + def render_hla_section(self, hla_name, corr, best_cluster_img, naturally_presented_img,kld_clust_group_kld): if str(self.species).lower() == "human": hla_name = f"HLA-{hla_name}" @@ -1160,7 +1207,7 @@ def render_hla_section(self, hla_name, corr, best_cluster_img, naturally_present template = Template('''
-

{{ hla_name }} PCC = {{corr}}

+

Best matched allotype is {{ hla_name }} with PCC = {{corr}} and KLD= {{ kld }}

@@ -1208,7 +1255,7 @@ def render_hla_section(self, hla_name, corr, best_cluster_img, naturally_present
''') - return template.render(hla_name=hla_name, corr=corr, best_cluster_img=best_cluster_img, naturally_presented_img=naturally_presented_img) + return template.render(hla_name=hla_name, corr=corr, best_cluster_img=best_cluster_img, naturally_presented_img=naturally_presented_img,kld= kld_clust_group_kld) def make_datatable(self, correlation_dict): df = pd.DataFrame(correlation_dict.items(), @@ -1219,9 +1266,26 @@ def make_datatable(self, correlation_dict): df['HLA'] = df['HLA'].apply( lambda x: x.split('/')[-1].replace('.txt', '')) df['Correlation'] = df['Correlation'].apply(lambda x: round(x, 2)) + #Add KLD from kld_clust_group_kld + if self.kld_df is not None: + try: + df['KLD'] = df['Cluster'].apply( + lambda x: self.kld_df.loc[ + self.kld_df['cluster'] == int(str(x).split('of')[-1]), + f'group{str(x).split("of")[0]}' + ].values[0] if f'group{str(x).split("of")[0]}' in self.kld_df.columns else None + ) + except KeyError as e: + # Handle the case where the key is not found + self.console.log( + f"KeyError: The key KLD group was not found in the Corr DataFrame." + ) + df['KLD'] = 'NA' + else: + df['KLD'] = 'NA' df = df.sort_values(by='Correlation', ascending=False) df = df.reset_index(drop=True) - df = df[['Cluster', 'HLA', 'Correlation']] + df = df[['Cluster', 'HLA', 'Correlation', 'KLD']] return df def process_correlation_data(self, df=None): @@ -1540,11 +1604,37 @@ def _create_carousel_for_cluster(self, carousel_id, cluster_num, group_data): total_slides = len(group_nums) # Start building the carousel HTML + # Convert cluster_num to words for display + def number_to_words(n): + words = { + 1: "One", 2: "Two", 3: "Three", 4: "Four", 5: "Five", 6: "Six", + 7: "Seven", 8: "Eight", 9: "Nine", 10: "Ten" + } + return words.get(n, str(n)) + + cluster_num_word = number_to_words(int(cluster_num)) + # KLD = self.kld_df[] + if self.kld_df is not None: + kld_clust_df = self.kld_df[self.kld_df['cluster'] == cluster_num] + if not kld_clust_df.empty: + kld = round(kld_clust_df['total'].values[0], 2) + else: + kld = None + # Determine singular/plural for group(s) + group_label = "group" if len(group_nums) == 1 else "groups" + group_range = f"{min(group_nums)}" if len(group_nums) == 1 else f"{min(group_nums)} to {max(group_nums)}" + cluster_label = f"{cluster_num_word} cluster output" + carousel_html = f"""
-

Cluster {cluster_num}

-
""" @@ -1742,6 +1850,13 @@ def generate_html_layout(self, correlation_dict, db, gibbs_out, immunolyser=Fals $(targetId).carousel(parseInt(slideIndex)); }); }); + + document.addEventListener('DOMContentLoaded', function () { + var popoverTriggerList = [].slice.call(document.querySelectorAll('[data-bs-toggle="popover"]')); + popoverTriggerList.forEach(function (popoverTriggerEl) { + new bootstrap.Popover(popoverTriggerEl); + }); + }); """ body_end_1 = Template("""
@@ -1800,6 +1915,7 @@ def generate_html_layout(self, correlation_dict, db, gibbs_out, immunolyser=Fals noImgDivs.forEach(div => { div.innerHTML = placeholderSVG; // Add placeholder to each "no-img" div }); + }); diff --git a/cli/html_config.py b/cli/html_config.py index 9b26a1c..cd78fee 100644 --- a/cli/html_config.py +++ b/cli/html_config.py @@ -192,6 +192,7 @@