From 8994f56d448424e8970a751380af237b82c9ce3b Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Mon, 20 Oct 2014 18:49:44 +0200 Subject: [PATCH 1/4] add both my changes and Dario's --- scimes/scimes.py | 192 +++++++++++++++++++++++++++++------------------ 1 file changed, 120 insertions(+), 72 deletions(-) diff --git a/scimes/scimes.py b/scimes/scimes.py index 0b5c4e4..810532f 100644 --- a/scimes/scimes.py +++ b/scimes/scimes.py @@ -8,6 +8,7 @@ from astrodendro import Dendrogram, ppv_catalog from astropy import units as u +from astropy.stats import median_absolute_deviation as mad from sklearn import metrics from sklearn.cluster import spectral_clustering @@ -19,12 +20,12 @@ -def mat_smooth(Mat): +def mat_smooth(Mat, scalpar = 0): #Evaluate sigma nonmax = Mat != Mat.max() - M = Mat[nonmax] + M = Mat#[nonmax] Mlin = np.sort(M.ravel()) Mdiff = np.ediff1d(Mlin) @@ -32,27 +33,24 @@ def mat_smooth(Mat): Mup = [0]+Mdiff.tolist() Mdn = Mdiff.tolist()+[0] - ind_up = np.argsort(Mup)[::-1] - ind_dn = np.argsort(Mdn)[::-1] + if scalpar == 0: + + Mdiff0 = Mdiff[Mdiff != 0] - # Search for the highest distance - # between the values - Mlsup = Mlin[ind_up] - Mlsdn = Mlin[ind_dn] + # Find outliers + outls = Mdiff0[Mdiff0 > np.mean(Mdiff0)+4*np.std(Mdiff0)] + sd_dist = (Mlin[Mup.index(outls[0])]+Mlin[Mdn.index(outls[0])])/2 + + print "-- Estimated scaling parameter =", sd_dist - # Take the lower value larger distance - Mlsup_sel = np.sort(Mlsup[0:5]) - Mlsdn_sel = np.sort(Mlsdn[0:5]) - - sd_dist = (Mlsup[0]+Mlsdn[0])/2. + else: - print "-- Estimated smoothing parameter =", sd_dist - + print "-- Using provided scaling parameter =", scalpar + sd_dist = scalpar + NM = np.exp(-(Mat**2)/(sd_dist**2)) NM[range(NM.shape[0]), range(NM.shape[1])] = 0 - stop() - return NM @@ -68,7 +66,6 @@ def aff_matrix(allleavidx, dictparents, dictprops): volumes = dictprops['volumes'] luminosities = dictprops['luminosities'] - # Let's save one for loop n2 = num**2 @@ -99,10 +96,10 @@ def aff_matrix(allleavidx, dictparents, dictprops): # Finding the common parents aux_commons = list(set(ipars).intersection(set(jpars))) + commons = [x for x in lpars if x in aux_commons] - pi_idx = commons[0] - + # Volume wij = volumes[pi_idx] WAs[0,imat,jmat] = wij @@ -112,7 +109,6 @@ def aff_matrix(allleavidx, dictparents, dictprops): wij = luminosities[pi_idx] WAs[1,imat,jmat] = wij WAs[1,jmat,imat] = wij - return WAs @@ -155,22 +151,36 @@ def guessk(Mat, thresh = 0.2): +def get_idx(struct): + + leav_list_idx = [] + sort_leav = struct.sorted_leaves() + + for l in sort_leav: + leav_list_idx.append(l.idx) + + return leav_list_idx + + + + def clust_cleaning(dendro, allleavidx, allclusters, t_brs_idx): # Find the lowest level parent common for all clusters # to get the cores_idx pars_lev = [] + pars_idx = [] for leaf in allleavidx: - pars_lev.append(dendro[leaf].parent.level) - + pars_lev.append(dendro[leaf].parent.height) + pars_idx.append(dendro[leaf].parent.idx) + pars_lev = np.asarray(pars_lev) + pars_idx = np.asarray(pars_idx) allclusters = np.asarray(allclusters) cores_idx = [] - doubt_idx = [] - doubt_clust = [] for cluster in set(allclusters): @@ -180,59 +190,70 @@ def clust_cleaning(dendro, allleavidx, allclusters, t_brs_idx): # Leaves in that cluster clust_leaves_idx = np.asarray(allleavidx)[clust_idx] - # Height of leaf parents into that cluster + # Height of leaf parents into that cluster + # the parent with the lowest height is supposed + # to be the parent common to all leaves + # and becomes the core candidate + clust_pars_idx = np.asarray(pars_idx[clust_idx]) clust_pars_lev = pars_lev[clust_idx] - clust_pars_lev = np.sort(clust_pars_lev) - clust_pars_lev = clust_pars_lev.tolist() - - index = clust_pars_lev.index(clust_pars_lev[0]) - clust_leaves_idx = clust_leaves_idx.tolist() + clust_pars_lev = np.argsort(clust_pars_lev) + ord_pars_idx = clust_pars_idx[clust_pars_lev] + ord_pars_idx = ord_pars_idx.tolist() + t_brs_idx = np.asarray(t_brs_idx) + core_candidate = dendro[ord_pars_idx[0]] - # To avoid "two leaves" branches as cores - if len(dendro[clust_leaves_idx[index]].parent.sorted_leaves()) >= len(clust_leaves_idx) \ - or t_brs_idx[t_brs_idx == dendro[clust_leaves_idx[index]].parent.idx].size > 0: - core_candidate = dendro[clust_leaves_idx[index]].parent - else: - core_candidate = dendro[clust_leaves_idx[index]].parent.parent - - + # Checking cluster cores. # A cluster core is a branch that contains only # leaves of that given core. Otherwise move to the upper level # and check again. - size_core_candidate = len(core_candidate.sorted_leaves()) - size_cluster = len(clust_leaves_idx) - count = 0 - while size_core_candidate > size_cluster: + # First check if the core candidate contains all leaves of the + # cluster, otherwise go one level down + + leav_core = get_idx(core_candidate) + leav_cluster = clust_leaves_idx - if count >= len(clust_pars_lev)-1: - print 'Unassignable cluster', cluster - break - index = clust_pars_lev.index(clust_pars_lev[count+1]) - core_candidate = dendro[clust_leaves_idx[index]].parent + fst_check = list(set(leav_cluster) - set(leav_core)) + + if len(fst_check)>0 and \ + t_brs_idx[t_brs_idx == core_candidate.idx].size == 0: + core_candidate = core_candidate.parent + leav_core = get_idx(core_candidate) + - size_core_candidate = len(core_candidate.sorted_leaves()) - - count = count + 1 + # Difference between the two lists: leaves that + # are in the core but not in the cluster + diff_leav = list(set(leav_core) - set(leav_cluster)) + + count = 1 + while len(diff_leav) > 0: + + core_candidate = dendro[leav_cluster[count]].parent + leav_core = get_idx(core_candidate) + new_cluster = leav_cluster[count:-1] + + if len(new_cluster) == 0: + print 'Unassignable cluster', cluster + break + + diff_leav = list(set(leav_core) - set(new_cluster)) + count = count+1 else: cores_idx.append(core_candidate.idx) - return cores_idx -def cloudstering(dendrogram, catalog, criteria, user_k): - # Listing all branches at dendrogram's trunk - # and get useful information - +def cloudstering(dendrogram, catalog, criteria, user_k, user_ams, user_scalpars): + trunk = dendrogram.trunk branches = [] @@ -275,10 +296,8 @@ def cloudstering(dendrogram, catalog, criteria, user_k): dict_parents = dict(zip(all_leav_names,all_parents)) - - # Retriving needed properties from the catalog - volumes = np.pi*catalog['radius_pc'].data**2*catalog['sigv_kms'].data + volumes = catalog['volume'].data luminosities = catalog['luminosity'].data t_volume = sum(volumes[trunk_brs_idx]) @@ -289,15 +308,28 @@ def cloudstering(dendrogram, catalog, criteria, user_k): volumes.append(t_volume) luminosities.append(t_luminosity) - - + dict_props = {'volumes':volumes, 'luminosities':luminosities} - # Generating affinity matrices - AMs = aff_matrix(all_leav_idx, dict_parents, dict_props) - + # Generating affinity matrices if not provided + if user_ams == None: + AMs = aff_matrix(all_leav_idx, dict_parents, dict_props) + else: + AMs = user_ams + + + # Check whether the affinity matrix scaling parameter + # are provided by the user, if so use them, otherwise + # calculate them + if user_scalpars == None: + scpars = np.zeros(len(criteria)) + else: + scpars = user_scalpars + + + print "- Start spectral clustering" # Selecting the criteria and merging the matrices @@ -306,9 +338,9 @@ def cloudstering(dendrogram, catalog, criteria, user_k): print "-- Smoothing ", cr, " matrix" if criteria.index(cr) == 0: - AM = mat_smooth(AMs[cr,:,:]) + AM = mat_smooth(AMs[cr,:,:], scalpar = scpars[cr]) else: - AM = AM*mat_smooth(AMs[cr,:,:]) + AM = AM*mat_smooth(AMs[cr,:,:], scalpar = scpars[cr]) # Showing the final affinity matrix @@ -352,11 +384,10 @@ def cloudstering(dendrogram, catalog, criteria, user_k): print '-- Not necessary to cluster' all_clusters = np.zeros(len(leaves), dtype = np.int32) - - + clust_branches = clust_cleaning(dendrogram, all_leav_idx, all_clusters, trunk_brs_idx) - return clust_branches + return clust_branches, AMs @@ -387,22 +418,38 @@ class SpectralCloudstering: it will be guessed automatically through the eigenvalues of the unsmoothed affinity matrix + user_ams: numpy array + User provided affinity matrix. Whether this is not + furnish it is automatically generated through the + volume and/or luminosity criteria + + user_scalpars: float + User defined scaling parameter(s). Whether those are not + furnish scaling parameters are automatically estimated. + + Return ------- clusters: list The dendrogram branch indexes corresponding to the - identified clusters + identified clusters + + affmats: numpy array + The affinity matrices calculated by the algorithm """ - def __init__(self, dendrogram, catalog, cl_volume = True, cl_luminosity=True, user_k = None): + def __init__(self, dendrogram, catalog, cl_volume = True, cl_luminosity=True, \ + user_k = None, user_ams = None, user_scalpars = None): self.dendrogram = dendrogram self.catalog = catalog self.cl_volume = cl_volume self.cl_luminosity = cl_luminosity self.user_k = user_k or 0 + self.user_ams = user_ams + self.user_scalpars = user_scalpars # Clustering criteria chosen self.criteria = [] @@ -412,4 +459,5 @@ def __init__(self, dendrogram, catalog, cl_volume = True, cl_luminosity=True, us self.criteria.append(1) - self.clusters = cloudstering(self.dendrogram, self.catalog, self.criteria, self.user_k) + self.clusters, self.affmats = cloudstering(self.dendrogram, self.catalog, self.criteria, \ + self.user_k, self.user_ams, self.user_scalpars) From 5408e9f8239f8518ce809e4663e7859f841d9347 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Tue, 21 Oct 2014 16:45:36 +0200 Subject: [PATCH 2/4] adam's version of orion2scimes --- scimes/orion2scimes.py | 263 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 263 insertions(+) create mode 100644 scimes/orion2scimes.py diff --git a/scimes/orion2scimes.py b/scimes/orion2scimes.py new file mode 100644 index 0000000..056bd77 --- /dev/null +++ b/scimes/orion2scimes.py @@ -0,0 +1,263 @@ +import warnings +import os.path + +import numpy as np +import math +import aplpy + +from astrodendro import Dendrogram, ppv_catalog +from astropy import units as u +from astropy.io import fits +from astropy import wcs +from astropy.table.table import Column +from astropy.table import Table + +from sklearn.cluster import spectral_clustering +from skimage.measure import regionprops + +from scimes import SpectralCloudstering + +from datetime import datetime + +from pdb import set_trace as stop + + + +def showdendro( dendro, cores_idx): + + # For the random colors + r = lambda: random.randint(0,255) + + p = dendro.plotter() + + fig = plt.figure(figsize=(20, 12)) + ax = fig.add_subplot(111) + + ax.set_yscale('log') + + cols = [] + + + # Plot the whole tree + + p.plot_tree(ax, color='black') + + for i in range(len(cores_idx)): + + col = '#%02X%02X%02X' % (r(),r(),r()) + cols.append(col) + p.plot_tree(ax, structure=cores_idx[i], color=cols[i], lw=3) + + ax.set_title("Final clustering configuration") + + ax.set_xlabel("Structure") + ax.set_ylabel("Flux") + + + return + + + +def make_asgn(dendro, data_file, cores_idx = [], tag = '_', collapse = True): + + data = fits.open(data_file)[0] + + # Making the assignment cube + if len(data.shape) == 3: + asgn = np.zeros(data.shape, dtype = int32) + + if len(data.shape) == 4: + asgn = np.zeros((data.shape[1],data.shape[2],data.shape[3]), dtype = int32) + + for i in cores_idx: + asgn[where(d[i].get_mask(shape = asgn.shape))] = i + + + # Write the fits file + asgn = fits.PrimaryHDU(asgn.astype('short'), data.header) + + os.system("rm -rf "+data_file+'_asgn_'+tag+'.fits') + print "Write "+data_file+'_asgn_'+tag+'.fits' + asgn.writeto(data_file+'_asgn_'+tag+'.fits') + + # Collapsed version of the asgn cube + if collapse: + + asgn_map = np.amax(asgn.data, axis = 0) + + matshow(asgn_map, origin = "lower") + colorbar() + + + return + + + + + +path = '/Users/adam/work/scimes/' +#path = '/Volumes/Zeruel_data/ORION/' +#path = '/Users/Dario/Documents/dendrograms/' +filename = path+'orion' + +do_make = True +do_catalog = True +do_load = True + + +if do_make: + + # Make dendrogram + + do_load = False + + print 'Make dendrogram from the full cube' + hdu = fits.open(path+'orion.fits')[0] + data = hdu.data + hd = hdu.header + + if data.ndim==4: + data = data[0,:,:,:] + + + # Calculating pixel per beam + bmaj = hd.get('BMAJ') + bmin = hd.get('BMIN') + cdelt1 = abs(hd.get('CDELT1')) + cdelt2 = abs(hd.get('CDELT2')) + + if bmaj != None and bmin != None: + ppbeam = abs((bmaj*bmin)/(cdelt1*cdelt2)*2*math.pi/(8*math.log(2))) + else: + ppbeam = 5 + + # Getting a very rought estimation of rms + free1 = data[22:24,:,:] + free2 = data[75:79,:,:] + freeT = np.concatenate((free1,free2), axis=0) + rms = np.std(freeT[np.isfinite(freeT)]) + + #rms = 0.25 + #ppbeam = 5 + + d = Dendrogram.compute(data, min_value=2*rms, \ + min_delta=2*rms, min_npix=3*ppbeam, verbose = 1) + + d.save_to(filename+'_dendrogram.fits') + + +if do_load: + + # Load data and dendrogram + hdu = fits.open(filename+'.fits')[0] + data = hdu.data + hd = hdu.header + + if size(shape(data))==4: + data = data[0,:,:,:] + + print 'Load dendrogram file: '+filename + d = Dendrogram.load_from(filename+'_dendrogram.fits') + + print 'Load catalog file: '+filename + cat = Table.read(filename+'_catalog.fits') + + #stop() + + + + +if do_catalog: + + # Making the catalog + print "Making a simple catalog of dendrogram structures" + metadata = {} + metadata['data_unit'] = u.Jy #This should be Kelvin! + + cat = ppv_catalog(d, metadata) + + # Centre position + xcens = cat['x_cen'].data + ycens = cat['y_cen'].data + vcens = cat['v_cen'].data + + hdm = fits.open(filename+'_distmap.fits')[0] + distmap = hdm.data + + w = wcs.WCS(hd) + xint, yint, vint = np.meshgrid(np.arange(data.shape[2]),\ + np.arange(data.shape[1]),\ + np.arange(data.shape[0]),\ + indexing='ij') + + ra, dec, vel = w.all_pix2world(xint,yint,vint,0) + raxis = ra[:,0,0] + daxis = dec[0,:,0] + vaxis = vel[0,0,:] + + + if hd.get('CTYPE3') == 'M/S': + vaxis = vaxis/1000. + + # Pixel (voxel) size + dtor = math.pi/180. + + deltax_pc = np.zeros(len(cat['radius'].data)) + deltay_pc = np.zeros(len(cat['radius'].data)) + + for i in range(len(cat['radius'].data)): + + dist = distmap[ycens[i],xcens[i]] + + deltax_pc[i] = abs(raxis[1]-raxis[0])*math.cos(dtor*np.mean(daxis))*dtor*dist + deltay_pc[i] = abs(daxis[1]-daxis[0])*dtor*dist + + deltav_kms = abs(vaxis[1]-vaxis[0]) + + # Physical constants + mh = 1.673534*10**(-24) # hydrogen mass CGS + ms = 1.98900*10**33 # solar mass CGS + pc = 3.0857*10**18 # parsec CGS + xco = 2*10**20 + + + # Radius [pc] + rads = 1.91*np.sqrt(deltax_pc*deltay_pc)*cat['radius'].data + + # Velocity dispersion [km/s] + sigvs = deltav_kms*cat['v_rms'].data + + # Luminosity and Luminosity mass + luminosities = cat['flux'].data*deltav_kms*deltax_pc*deltay_pc + mlums = luminosities*(xco*(2.*mh)*(1.36)*(pc*pc)/ms) + + # Virial mass + mvirs = 1040*sigvs**2*rads + + # Volume + volumes = np.pi*rads**2*sigvs + + + # Update the catalog + cat.add_column(Column(name='radius_pc', data=rads))#, units=u.pc)) + cat.add_column(Column(name='sigv_kms', data=sigvs))#, units=u.km/u.s)) + cat.add_column(Column(name='luminosity', data=luminosities))#, units=u.K*u.km/u.s*u.pc**2)) + cat.add_column(Column(name='volume', data=volumes)) + cat.add_column(Column(name='mlum', data=mlums))#, units=u.msolMass)) + cat.add_column(Column(name='mvir', data=mvirs))#, units=u.msolMass)) + + # Save the catalog + os.system('rm -rf '+filename+'_catalog.fits') + cat.write(filename+'_catalog.fits') + +# Added by Adam Oct 14 - may be wrong +#d=data + +# Calling the clustering procedure +gmcs = SpectralCloudstering(d, cat) + +# Make the asgn cube +make_asgn(d, filename+'.fits', cores_idx = gmcs.clusters) + +# Show the clustered dendrogram +showdendro(d, gmcs.clusters) From 39baa4e974e92176573ebdb1cbbc26f35a05f576 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Tue, 21 Oct 2014 20:19:48 +0200 Subject: [PATCH 3/4] bug fixes: imports primarily --- scimes/orion2scimes.py | 17 ++++++++++------- scimes/scimes.py | 2 +- scimes/spectral.py | 14 +++++++------- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/scimes/orion2scimes.py b/scimes/orion2scimes.py index 056bd77..8335049 100644 --- a/scimes/orion2scimes.py +++ b/scimes/orion2scimes.py @@ -4,6 +4,9 @@ import numpy as np import math import aplpy +import random + +from matplotlib import pyplot as plt from astrodendro import Dendrogram, ppv_catalog from astropy import units as u @@ -64,13 +67,13 @@ def make_asgn(dendro, data_file, cores_idx = [], tag = '_', collapse = True): # Making the assignment cube if len(data.shape) == 3: - asgn = np.zeros(data.shape, dtype = int32) + asgn = np.zeros(data.shape, dtype = np.int32) if len(data.shape) == 4: - asgn = np.zeros((data.shape[1],data.shape[2],data.shape[3]), dtype = int32) + asgn = np.zeros((data.shape[1],data.shape[2],data.shape[3]), dtype = np.int32) for i in cores_idx: - asgn[where(d[i].get_mask(shape = asgn.shape))] = i + asgn[np.where(d[i].get_mask(shape = asgn.shape))] = i # Write the fits file @@ -85,8 +88,8 @@ def make_asgn(dendro, data_file, cores_idx = [], tag = '_', collapse = True): asgn_map = np.amax(asgn.data, axis = 0) - matshow(asgn_map, origin = "lower") - colorbar() + plt.matshow(asgn_map, origin = "lower") + plt.colorbar() return @@ -95,10 +98,10 @@ def make_asgn(dendro, data_file, cores_idx = [], tag = '_', collapse = True): -path = '/Users/adam/work/scimes/' +path = './' #path = '/Volumes/Zeruel_data/ORION/' #path = '/Users/Dario/Documents/dendrograms/' -filename = path+'orion' +filename = os.path.join(path, 'orion') do_make = True do_catalog = True diff --git a/scimes/scimes.py b/scimes/scimes.py index 7fcfb35..45bad41 100644 --- a/scimes/scimes.py +++ b/scimes/scimes.py @@ -11,7 +11,7 @@ from astropy.stats import median_absolute_deviation as mad from sklearn import metrics -from sklearn.cluster import spectral_clustering +from spectral import spectral_clustering from skimage.measure import regionprops from datetime import datetime diff --git a/scimes/spectral.py b/scimes/spectral.py index 7c19a0d..f2d23c8 100644 --- a/scimes/spectral.py +++ b/scimes/spectral.py @@ -9,13 +9,13 @@ import numpy as np -from ..base import BaseEstimator, ClusterMixin -from ..utils import check_random_state, as_float_array, deprecated -from ..utils.extmath import norm -from ..metrics.pairwise import pairwise_kernels -from ..neighbors import kneighbors_graph -from ..manifold import spectral_embedding -from .k_means_ import k_means +from sklearn.base import BaseEstimator, ClusterMixin +from sklearn.utils import check_random_state, as_float_array, deprecated +from sklearn.utils.extmath import norm +from sklearn.metrics.pairwise import pairwise_kernels +from sklearn.neighbors import kneighbors_graph +from sklearn.manifold import spectral_embedding +from sklearn.cluster.k_means_ import k_means def discretize(vectors, copy=True, max_svd_restarts=30, n_iter_max=20, From af4bf93b8ee493b14cb1c5267ad150f170fc5732 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Tue, 21 Oct 2014 20:40:30 +0200 Subject: [PATCH 4/4] remove comment --- scimes/orion2scimes.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/scimes/orion2scimes.py b/scimes/orion2scimes.py index 8335049..75e7ba0 100644 --- a/scimes/orion2scimes.py +++ b/scimes/orion2scimes.py @@ -253,9 +253,6 @@ def make_asgn(dendro, data_file, cores_idx = [], tag = '_', collapse = True): os.system('rm -rf '+filename+'_catalog.fits') cat.write(filename+'_catalog.fits') -# Added by Adam Oct 14 - may be wrong -#d=data - # Calling the clustering procedure gmcs = SpectralCloudstering(d, cat)