Skip to content

Error when increased features to N=3 of sample data for hdbscan #1

@sr248

Description

@sr248

I got an error when I changed the code in quick_start.ipynb as following.

'''Apply HEADSS to evaluate the chosen data'''
merge = HEADSS.headss_merge(df = data, N = 3, split_columns = ['x', 'y', 'z'], merge = True,
                      cluster_columns=['x','y','z'], min_cluster_size = 10, 
                      min_samples = 10, cluster_method = 'leaf', allow_single_cluster = False,
                 total_threshold = 0.1, overlap_threshold = 0.5, minimum_members = 10) 

# clustering result
merged_df = merge.members_df

While debugging, it seems the register df holds no data of DataFrame in headss_merge.describe_clusters(). Here are the errors.

{
	"name": "ValueError",
	"message": "max() iterable argument is empty",
	"stack": "---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[24], line 2
      1 '''Apply HEADSS to evaluate the chosen data'''
----> 2 merge = HEADSS.headss_merge(df = data, N = 3, split_columns = ['x', 'y', 'z'], merge = True,
      3                       cluster_columns=['x','y','z'], min_cluster_size = 10, 
      4                       min_samples = 10, cluster_method = 'leaf', allow_single_cluster = False,
      5                  total_threshold = 0.1, overlap_threshold = 0.5, minimum_members = 10) 
      7 # clustering result
      8 merged_df = merge.members_df

File /work/HEADSS/code/HEADSS.py:698, in headss_merge.__init__(self, N, df, split_columns, cluster_columns, df_clustered, stitch_regions, min_cluster_size, min_samples, cluster_method, allow_single_cluster, merge, total_threshold, overlap_threshold, minimum_members)
    696 self.overlap_threshold = overlap_threshold
    697 print('check1')
--> 698 self.members_df = self.mergeClusters()

File /work/HEADSS/code/HEADSS.py:851, in headss_merge.mergeClusters(self)
    849 df = self.members_df
    850 print('check2')
--> 851 cluster_info = self.describe_clusters()
    852 matches = self.find_overlapping_clusters(cluster_info = cluster_info[:])
    853 cluster_merges = self.check_cluster_merge(matches = matches)

File /work/HEADSS/code/HEADSS.py:716, in headss_merge.describe_clusters(self, group_col)
    714 groups = df[group_col].unique()
    715 print('groups:', groups)
--> 716 cluster_info = np.zeros(int(max(groups)+1), dtype = object)
    717 print('cluster_info:', cluster_info)
    718 for index, group in enumerate(groups):

ValueError: max() iterable argument is empty"
}

Could you please check on this? Thank you!

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions