diff --git a/.vscode/settings.json b/.vscode/settings.json
index 6f3a291..f673a71 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,3 +1,3 @@
{
- "liveServer.settings.port": 5501
+ "liveServer.settings.port": 5502
}
\ No newline at end of file
diff --git a/cli/cluster_search.py b/cli/cluster_search.py
index 51abff3..35d6793 100644
--- a/cli/cluster_search.py
+++ b/cli/cluster_search.py
@@ -47,6 +47,8 @@ def __init__(self):
self.d3js_json = None
self.db = None # databse of motif and matrix
self.treshold_img = False
+ self.gibbs_results = None
+ self.kld_df = None
def generate_unique_random_ids(self, count: int) -> list:
"""
@@ -96,7 +98,7 @@ def parse_gibbs_output(self, gibbs_path: str, n_clusters: int) -> pd.DataFrame:
for c_file in os.listdir(res_path):
if f'{n_clusters}g.ds' in c_file:
file_path = os.path.join(res_path, c_file)
- df = pd.read_csv(file_path, sep='\s+')
+ df = pd.read_csv(file_path, sep="\s+")
# output_path = f'data/sampledata_701014/res_{n_clusters}g.csv'
# df.to_csv(output_path, index=False)
logging.info(f"Data parsed for No clusters {n_clusters}")
@@ -376,7 +378,49 @@ def compute_correlations(
self.console.log(
f"Cluster Search Preprocess completed in {elapsed_time:.2f} seconds."
)
-
+ def read_KLD_file(self,file_path):
+ """
+ Read & Prase Gibbs gibbs.KLDvsClusters file.
+ Args:
+ file_path (str): The path to the Gibbs gibbs.KLDvsClusters file.
+ Returns:
+ list: A list of tuples containing the cluster number and KLD value.
+ """
+ try:
+ # Initialize an empty dictionary to store data
+ data_dict = {'cluster': []}
+
+ with open(file_path, 'r') as file:
+ lines = file.readlines()
+ for line in lines[1:]: # Skip the header line
+ parts = line.strip().split('\t')
+ cluster_number = int(parts[0])
+ kld_values = [float(value) for value in parts[1:]]
+
+ # Ensure the dictionary has enough group columns
+ for i in range(1, len(kld_values) + 1):
+ group_key = f'group{i}'
+ if group_key not in data_dict:
+ data_dict[group_key] = []
+
+ # Populate the row data
+ data_dict['cluster'].append(cluster_number)
+ for i, kld_value in enumerate(kld_values, start=1):
+ data_dict[f'group{i}'].append(kld_value if kld_value > 0 else 0)
+
+ # Fill remaining groups with zeros if necessary
+ for j in range(len(kld_values) + 1, len(data_dict) - 1):
+ data_dict[f'group{j}'].append(0)
+
+ # Convert the dictionary to a DataFrame
+ df = pd.DataFrame(data_dict)
+ df.loc[:, 'total'] = df.iloc[:, 1:].sum(axis=1)
+ df.reset_index(drop=True, inplace=True)
+ # print(df)
+ return df
+ except Exception as e:
+ print(f"Error reading file: {e}")
+ sys.exit(1)
def compute_correlations_v2(
self,
db: pd.DataFrame,
@@ -397,7 +441,10 @@ def compute_correlations_v2(
#Update to self
self.threshold = threshold
self.treshold_img = treshold_img
-
+ self.gibbs_results = gibbs_results
+ self.kld_df = self.read_KLD_file(
+ os.path.join(self.gibbs_results, 'images', 'gibbs.KLDvsClusters.tab')
+ )
gibbs_result_matrix = os.path.join(gibbs_results, "matrices")
should_process = False
if os.path.exists(gibbs_result_matrix) and any(".mat" in file for file in os.listdir(gibbs_result_matrix)):
@@ -1151,7 +1198,7 @@ def insert_script_png_json(self, script_data_path, img_fallback_path, div_id):
return script_template.render(script_data_path=script_data_path, img_fallback_path=img_fallback_path, div_id=div_id)
- def render_hla_section(self, hla_name, corr, best_cluster_img, naturally_presented_img):
+ def render_hla_section(self, hla_name, corr, best_cluster_img, naturally_presented_img,kld_clust_group_kld):
if str(self.species).lower() == "human":
hla_name = f"HLA-{hla_name}"
@@ -1160,7 +1207,7 @@ def render_hla_section(self, hla_name, corr, best_cluster_img, naturally_present
template = Template('''
-
{{ hla_name }} PCC = {{corr}}
+ Best matched allotype is {{ hla_name }} with PCC = {{corr}} and KLD= {{ kld }}
@@ -1208,7 +1255,7 @@ def render_hla_section(self, hla_name, corr, best_cluster_img, naturally_present
''')
- return template.render(hla_name=hla_name, corr=corr, best_cluster_img=best_cluster_img, naturally_presented_img=naturally_presented_img)
+ return template.render(hla_name=hla_name, corr=corr, best_cluster_img=best_cluster_img, naturally_presented_img=naturally_presented_img,kld= kld_clust_group_kld)
def make_datatable(self, correlation_dict):
df = pd.DataFrame(correlation_dict.items(),
@@ -1219,9 +1266,26 @@ def make_datatable(self, correlation_dict):
df['HLA'] = df['HLA'].apply(
lambda x: x.split('/')[-1].replace('.txt', ''))
df['Correlation'] = df['Correlation'].apply(lambda x: round(x, 2))
+ #Add KLD from kld_clust_group_kld
+ if self.kld_df is not None:
+ try:
+ df['KLD'] = df['Cluster'].apply(
+ lambda x: self.kld_df.loc[
+ self.kld_df['cluster'] == int(str(x).split('of')[-1]),
+ f'group{str(x).split("of")[0]}'
+ ].values[0] if f'group{str(x).split("of")[0]}' in self.kld_df.columns else None
+ )
+ except KeyError as e:
+ # Handle the case where the key is not found
+ self.console.log(
+ f"KeyError: The key KLD group was not found in the Corr DataFrame."
+ )
+ df['KLD'] = 'NA'
+ else:
+ df['KLD'] = 'NA'
df = df.sort_values(by='Correlation', ascending=False)
df = df.reset_index(drop=True)
- df = df[['Cluster', 'HLA', 'Correlation']]
+ df = df[['Cluster', 'HLA', 'Correlation', 'KLD']]
return df
def process_correlation_data(self, df=None):
@@ -1540,11 +1604,37 @@ def _create_carousel_for_cluster(self, carousel_id, cluster_num, group_data):
total_slides = len(group_nums)
# Start building the carousel HTML
+ # Convert cluster_num to words for display
+ def number_to_words(n):
+ words = {
+ 1: "One", 2: "Two", 3: "Three", 4: "Four", 5: "Five", 6: "Six",
+ 7: "Seven", 8: "Eight", 9: "Nine", 10: "Ten"
+ }
+ return words.get(n, str(n))
+
+ cluster_num_word = number_to_words(int(cluster_num))
+ # KLD = self.kld_df[]
+ if self.kld_df is not None:
+ kld_clust_df = self.kld_df[self.kld_df['cluster'] == cluster_num]
+ if not kld_clust_df.empty:
+ kld = round(kld_clust_df['total'].values[0], 2)
+ else:
+ kld = None
+ # Determine singular/plural for group(s)
+ group_label = "group" if len(group_nums) == 1 else "groups"
+ group_range = f"{min(group_nums)}" if len(group_nums) == 1 else f"{min(group_nums)} to {max(group_nums)}"
+ cluster_label = f"{cluster_num_word} cluster output"
+
carousel_html = f"""
-
Cluster {cluster_num}
-
+
+
{cluster_label} ({group_range} {group_label}) (KLD = {kld})
+
+
+
+
+
"""
# Add indicators
@@ -1560,7 +1650,18 @@ def _create_carousel_for_cluster(self, carousel_id, cluster_num, group_data):
"""
-
+ # Add KLD score if available
+
+
+ if self.kld_df is not None:
+ kld_clust_df = self.kld_df[self.kld_df['cluster'] == cluster_num]
+ if not kld_clust_df.empty:
+ kld = kld_clust_df['total'].values[0]
+ self.console.log(f"-------KLD Results for {cluster_num} clusters-------", style="bold green")
+ self.console.log(f"Total KLD score for cluster {cluster_num}: {kld}", style="bold yellow")
+ else:
+ kld_clust_df = None
+
# Add carousel items
for i, group_num in enumerate(group_nums):
active_class = "active" if i == 0 else ""
@@ -1582,10 +1683,17 @@ def _create_carousel_for_cluster(self, carousel_id, cluster_num, group_data):
if nat_img and hasattr(self, '_outfolder'):
nat_img = str(nat_img).replace(f"{self._outfolder}/", '')
-
+ #find KLD for group
+ kld_clust_group_kld = kld_clust_df[f'group{group_num}'].values[0] if kld_clust_df is not None and f'group{group_num}' in kld_clust_df.columns else None
+
+ self.console.log(f"Group {group_num} KLD score for cluster {cluster_num}: {kld_clust_group_kld}", style="bold yellow")
+ # Generate the HLA section for this cluster
+ kld_clust_group_kld = round(kld_clust_group_kld, 2) if isinstance(
+ kld_clust_group_kld, (int, float)) else kld_clust_group_kld
+
# Generate the HLA section for this cluster
hla_section = self.render_hla_section(
- hla_name, correlation_formatted, gibbs_img, nat_img)
+ hla_name, correlation_formatted, gibbs_img, nat_img,kld_clust_group_kld)
# Create carousel item with the HLA section
carousel_html += f"""
@@ -1599,16 +1707,16 @@ def _create_carousel_for_cluster(self, carousel_id, cluster_num, group_data):
# Add carousel controls
carousel_html += f"""
-
-
-
- Previous
-
-
-
- Next
-
+
+
+ Previous
+
+
+
+ Next
+
+
"""
@@ -1742,6 +1850,13 @@ def generate_html_layout(self, correlation_dict, db, gibbs_out, immunolyser=Fals
$(targetId).carousel(parseInt(slideIndex));
});
});
+
+ document.addEventListener('DOMContentLoaded', function () {
+ var popoverTriggerList = [].slice.call(document.querySelectorAll('[data-bs-toggle="popover"]'));
+ popoverTriggerList.forEach(function (popoverTriggerEl) {
+ new bootstrap.Popover(popoverTriggerEl);
+ });
+ });
"""
body_end_1 = Template("""
@@ -1800,6 +1915,7 @@ def generate_html_layout(self, correlation_dict, db, gibbs_out, immunolyser=Fals
noImgDivs.forEach(div => {
div.innerHTML = placeholderSVG; // Add placeholder to each "no-img" div
});
+
});
diff --git a/cli/html_config.py b/cli/html_config.py
index 9b26a1c..cd78fee 100644
--- a/cli/html_config.py
+++ b/cli/html_config.py
@@ -192,6 +192,7 @@