Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
"liveServer.settings.port": 5501
"liveServer.settings.port": 5502
}
158 changes: 137 additions & 21 deletions cli/cluster_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ def __init__(self):
self.d3js_json = None
self.db = None # databse of motif and matrix
self.treshold_img = False
self.gibbs_results = None
self.kld_df = None

def generate_unique_random_ids(self, count: int) -> list:
"""
Expand Down Expand Up @@ -96,7 +98,7 @@ def parse_gibbs_output(self, gibbs_path: str, n_clusters: int) -> pd.DataFrame:
for c_file in os.listdir(res_path):
if f'{n_clusters}g.ds' in c_file:
file_path = os.path.join(res_path, c_file)
df = pd.read_csv(file_path, sep='\s+')
df = pd.read_csv(file_path, sep="\s+")
# output_path = f'data/sampledata_701014/res_{n_clusters}g.csv'
# df.to_csv(output_path, index=False)
logging.info(f"Data parsed for No clusters {n_clusters}")
Expand Down Expand Up @@ -376,7 +378,49 @@ def compute_correlations(
self.console.log(
f"Cluster Search Preprocess completed in {elapsed_time:.2f} seconds."
)

def read_KLD_file(self,file_path):
"""
Read & Prase Gibbs gibbs.KLDvsClusters file.
Args:
file_path (str): The path to the Gibbs gibbs.KLDvsClusters file.
Returns:
list: A list of tuples containing the cluster number and KLD value.
"""
try:
# Initialize an empty dictionary to store data
data_dict = {'cluster': []}

with open(file_path, 'r') as file:
lines = file.readlines()
for line in lines[1:]: # Skip the header line
parts = line.strip().split('\t')
cluster_number = int(parts[0])
kld_values = [float(value) for value in parts[1:]]

# Ensure the dictionary has enough group columns
for i in range(1, len(kld_values) + 1):
group_key = f'group{i}'
if group_key not in data_dict:
data_dict[group_key] = []

# Populate the row data
data_dict['cluster'].append(cluster_number)
for i, kld_value in enumerate(kld_values, start=1):
data_dict[f'group{i}'].append(kld_value if kld_value > 0 else 0)

# Fill remaining groups with zeros if necessary
for j in range(len(kld_values) + 1, len(data_dict) - 1):
data_dict[f'group{j}'].append(0)

# Convert the dictionary to a DataFrame
df = pd.DataFrame(data_dict)
df.loc[:, 'total'] = df.iloc[:, 1:].sum(axis=1)
df.reset_index(drop=True, inplace=True)
# print(df)
return df
except Exception as e:
print(f"Error reading file: {e}")
sys.exit(1)
def compute_correlations_v2(
self,
db: pd.DataFrame,
Expand All @@ -397,7 +441,10 @@ def compute_correlations_v2(
#Update to self
self.threshold = threshold
self.treshold_img = treshold_img

self.gibbs_results = gibbs_results
self.kld_df = self.read_KLD_file(
os.path.join(self.gibbs_results, 'images', 'gibbs.KLDvsClusters.tab')
)
gibbs_result_matrix = os.path.join(gibbs_results, "matrices")
should_process = False
if os.path.exists(gibbs_result_matrix) and any(".mat" in file for file in os.listdir(gibbs_result_matrix)):
Expand Down Expand Up @@ -1151,7 +1198,7 @@ def insert_script_png_json(self, script_data_path, img_fallback_path, div_id):

return script_template.render(script_data_path=script_data_path, img_fallback_path=img_fallback_path, div_id=div_id)

def render_hla_section(self, hla_name, corr, best_cluster_img, naturally_presented_img):
def render_hla_section(self, hla_name, corr, best_cluster_img, naturally_presented_img,kld_clust_group_kld):

if str(self.species).lower() == "human":
hla_name = f"HLA-{hla_name}"
Expand All @@ -1160,7 +1207,7 @@ def render_hla_section(self, hla_name, corr, best_cluster_img, naturally_present
template = Template('''
<div class="row" style="border: 2px solid #007bff;">
<div class="row">
<h3 style="text-align: center;">{{ hla_name }} PCC = {{corr}}</h3>
<h4 style="text-align: center;"> Best matched allotype is {{ hla_name }} with PCC = {{corr}} and KLD= {{ kld }} </h4>
</div>
<div class="row">
<div class="col">
Expand Down Expand Up @@ -1208,7 +1255,7 @@ def render_hla_section(self, hla_name, corr, best_cluster_img, naturally_present
</div>
</div>
''')
return template.render(hla_name=hla_name, corr=corr, best_cluster_img=best_cluster_img, naturally_presented_img=naturally_presented_img)
return template.render(hla_name=hla_name, corr=corr, best_cluster_img=best_cluster_img, naturally_presented_img=naturally_presented_img,kld= kld_clust_group_kld)

def make_datatable(self, correlation_dict):
df = pd.DataFrame(correlation_dict.items(),
Expand All @@ -1219,9 +1266,26 @@ def make_datatable(self, correlation_dict):
df['HLA'] = df['HLA'].apply(
lambda x: x.split('/')[-1].replace('.txt', ''))
df['Correlation'] = df['Correlation'].apply(lambda x: round(x, 2))
#Add KLD from kld_clust_group_kld
if self.kld_df is not None:
try:
df['KLD'] = df['Cluster'].apply(
lambda x: self.kld_df.loc[
self.kld_df['cluster'] == int(str(x).split('of')[-1]),
f'group{str(x).split("of")[0]}'
].values[0] if f'group{str(x).split("of")[0]}' in self.kld_df.columns else None
)
except KeyError as e:
# Handle the case where the key is not found
self.console.log(
f"KeyError: The key KLD group was not found in the Corr DataFrame."
)
df['KLD'] = 'NA'
else:
df['KLD'] = 'NA'
df = df.sort_values(by='Correlation', ascending=False)
df = df.reset_index(drop=True)
df = df[['Cluster', 'HLA', 'Correlation']]
df = df[['Cluster', 'HLA', 'Correlation', 'KLD']]
return df

def process_correlation_data(self, df=None):
Expand Down Expand Up @@ -1540,11 +1604,37 @@ def _create_carousel_for_cluster(self, carousel_id, cluster_num, group_data):
total_slides = len(group_nums)

# Start building the carousel HTML
# Convert cluster_num to words for display
def number_to_words(n):
words = {
1: "One", 2: "Two", 3: "Three", 4: "Four", 5: "Five", 6: "Six",
7: "Seven", 8: "Eight", 9: "Nine", 10: "Ten"
}
return words.get(n, str(n))

cluster_num_word = number_to_words(int(cluster_num))
# KLD = self.kld_df[]
if self.kld_df is not None:
kld_clust_df = self.kld_df[self.kld_df['cluster'] == cluster_num]
if not kld_clust_df.empty:
kld = round(kld_clust_df['total'].values[0], 2)
else:
kld = None
# Determine singular/plural for group(s)
group_label = "group" if len(group_nums) == 1 else "groups"
group_range = f"{min(group_nums)}" if len(group_nums) == 1 else f"{min(group_nums)} to {max(group_nums)}"
cluster_label = f"{cluster_num_word} cluster output"

carousel_html = f"""
<div class="row mt-4 mb-4">
<div class="col-12">
<h2 class="text-center">Cluster {cluster_num}</h2>
<div id="{carousel_id}" class="carousel slide" data-ride="carousel" data-interval="false">
<div class="d-flex justify-content-between align-items-center">
<h2 class="text-center flex-grow-1">{cluster_label} ({group_range} {group_label}) (KLD = {kld})</h2>
<span class="d-inline-block" tabindex="0" data-bs-toggle="popover" data-bs-trigger="hover focus" data-bs-content="Use the left and right arrows to navigate through the cluster presentations.">
<i class="bi bi-info-circle" style="font-size: 1.5rem; color: #0d6efd; cursor: pointer;"></i>
</span>
</div>
<div id="{carousel_id}" class="carousel slide" data-ride="carousel" data-interval="false">
"""

# Add indicators
Expand All @@ -1560,7 +1650,18 @@ def _create_carousel_for_cluster(self, carousel_id, cluster_num, group_data):
</ol>
<div class="carousel-inner">
"""

# Add KLD score if available


if self.kld_df is not None:
kld_clust_df = self.kld_df[self.kld_df['cluster'] == cluster_num]
if not kld_clust_df.empty:
kld = kld_clust_df['total'].values[0]
self.console.log(f"-------KLD Results for {cluster_num} clusters-------", style="bold green")
self.console.log(f"Total KLD score for cluster {cluster_num}: {kld}", style="bold yellow")
else:
kld_clust_df = None

# Add carousel items
for i, group_num in enumerate(group_nums):
active_class = "active" if i == 0 else ""
Expand All @@ -1582,10 +1683,17 @@ def _create_carousel_for_cluster(self, carousel_id, cluster_num, group_data):

if nat_img and hasattr(self, '_outfolder'):
nat_img = str(nat_img).replace(f"{self._outfolder}/", '')

#find KLD for group
kld_clust_group_kld = kld_clust_df[f'group{group_num}'].values[0] if kld_clust_df is not None and f'group{group_num}' in kld_clust_df.columns else None

self.console.log(f"Group {group_num} KLD score for cluster {cluster_num}: {kld_clust_group_kld}", style="bold yellow")
# Generate the HLA section for this cluster
kld_clust_group_kld = round(kld_clust_group_kld, 2) if isinstance(
kld_clust_group_kld, (int, float)) else kld_clust_group_kld

# Generate the HLA section for this cluster
hla_section = self.render_hla_section(
hla_name, correlation_formatted, gibbs_img, nat_img)
hla_name, correlation_formatted, gibbs_img, nat_img,kld_clust_group_kld)

# Create carousel item with the HLA section
carousel_html += f"""
Expand All @@ -1599,16 +1707,16 @@ def _create_carousel_for_cluster(self, carousel_id, cluster_num, group_data):

# Add carousel controls
carousel_html += f"""
</div>
<a class="carousel-control-prev" href="#{carousel_id}" role="button" data-slide="prev">
<span class="carousel-control-prev-icon" aria-hidden="true"></span>
<span class="sr-only">Previous</span>
</a>
<a class="carousel-control-next" href="#{carousel_id}" role="button" data-slide="next">
<span class="carousel-control-next-icon" aria-hidden="true"></span>
<span class="sr-only">Next</span>
</a>
</div>
<a class="carousel-control-prev" href="#{carousel_id}" role="button" data-slide="prev" style="filter: invert(36%) sepia(97%) saturate(7477%) hue-rotate(202deg) brightness(97%) contrast(101%);">
<span class="carousel-control-prev-icon" aria-hidden="true"></span>
<span class="sr-only text-dark">Previous</span>
</a>
<a class="carousel-control-next" href="#{carousel_id}" role="button" data-slide="next" style="filter: invert(36%) sepia(97%) saturate(7477%) hue-rotate(202deg) brightness(97%) contrast(101%);">
<span class="carousel-control-next-icon" aria-hidden="true"></span>
<span class="sr-only text-dark">Next</span>
</a>
</div>
</div>
</div>
"""
Expand Down Expand Up @@ -1742,6 +1850,13 @@ def generate_html_layout(self, correlation_dict, db, gibbs_out, immunolyser=Fals
$(targetId).carousel(parseInt(slideIndex));
});
});

document.addEventListener('DOMContentLoaded', function () {
var popoverTriggerList = [].slice.call(document.querySelectorAll('[data-bs-toggle="popover"]'));
popoverTriggerList.forEach(function (popoverTriggerEl) {
new bootstrap.Popover(popoverTriggerEl);
});
});
"""
body_end_1 = Template("""
</div>
Expand Down Expand Up @@ -1800,6 +1915,7 @@ def generate_html_layout(self, correlation_dict, db, gibbs_out, immunolyser=Fals
noImgDivs.forEach(div => {
div.innerHTML = placeholderSVG; // Add placeholder to each "no-img" div
});

});
</script>

Expand Down
7 changes: 7 additions & 0 deletions cli/html_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@
</div>
</footer>
<script>

document.addEventListener('DOMContentLoaded', function() {
const downloadLinks = document.querySelectorAll('.dropdown-item');

Expand All @@ -218,6 +219,12 @@
`;

document.getElementById('no-img').innerHTML = placeholderSVG;


var popoverTriggerList = [].slice.call(document.querySelectorAll('[data-bs-toggle="popover"]'));
popoverTriggerList.forEach(function (popoverTriggerEl) {
new bootstrap.Popover(popoverTriggerEl);
});
</script>

<script>
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

setup(
name="HLA-PEPCLUST",
version="0.1.0-dev",
version="1.1.0-dev",
author="Sanjay Krishna",
author_email="sanjay.sondekoppagopalakrishna@mail.com",
packages=find_packages(),
Expand Down