From 84c9a8e3c4e995c5cf9253e5fec53ab58b10ac13 Mon Sep 17 00:00:00 2001 From: Mozes Jacobs Date: Tue, 7 Dec 2021 14:13:50 -0800 Subject: [PATCH 01/16] changes for bias --- pastis/io/read/all_data.py | 47 +++++++++++++++++-- pastis/optimization/counts.py | 26 +++++----- pastis/optimization/pastis_algorithms.py | 33 ++++++++----- pastis/optimization/piecewise_whole_genome.py | 16 +++---- pastis/script/pastis-poisson | 2 + 5 files changed, 89 insertions(+), 35 deletions(-) diff --git a/pastis/io/read/all_data.py b/pastis/io/read/all_data.py index 6b440586e..e35336c65 100644 --- a/pastis/io/read/all_data.py +++ b/pastis/io/read/all_data.py @@ -9,7 +9,14 @@ def _get_lengths(lengths): """Load chromosome lengths from file, or reformat lengths object. """ - + if lengths is not None: + if (isinstance(lengths, list) or isinstance(lengths, np.ndarray)) and len(lengths) == 1: + lengths = lengths[0] + if isinstance(lengths, str) and os.path.exists(lengths): + lengths = load_lengths(lengths) + lengths = np.array(lengths).astype(int) + return lengths +""" if isinstance(lengths, str) and os.path.exists(lengths): lengths = load_lengths(lengths) elif lengths is not None and (isinstance(lengths, list) or isinstance(lengths, np.ndarray)): @@ -17,6 +24,7 @@ def _get_lengths(lengths): lengths = load_lengths(lengths[0]) lengths = np.array(lengths).astype(int) return lengths +""" def _get_chrom(chrom, lengths): @@ -24,6 +32,16 @@ def _get_chrom(chrom, lengths): """ lengths = _get_lengths(lengths) + if chrom is not None: + if (isinstance(chrom, list) or isinstance(chrom, np.ndarray)) and len(chrom) == 1: + chrom = chrom[0] + if isinstance(chrom, str) and os.path.exists(chrom): + chrom = np.genfromtxt(chrom, dtype='str') + chrom = np.array(chrom).reshape(-1) + else: + chrom = np.array(['num%d' % i for i in range(1, len(lengths) + 1)]) + return chrom +""" if isinstance(chrom, str) and os.path.exists(chrom): chrom = np.array(np.genfromtxt(chrom, dtype='str')).reshape(-1) elif chrom is not None and (isinstance(chrom, list) or isinstance(chrom, np.ndarray)): @@ -33,6 +51,7 @@ def _get_chrom(chrom, lengths): else: chrom = np.array(['num%d' % i for i in range(1, len(lengths) + 1)]) return chrom +""" def _get_counts(counts, lengths): @@ -60,8 +79,27 @@ def _get_counts(counts, lengths): return output +def _get_bias(bias): + """Load bias from file, or reformat bias object. + """ + if bias is not None: + if (isinstance(bias, list) or isinstance(bias, np.ndarray)) and len(bias) == 1: + bias = bias[0] + if isinstance(bias, str): + if os.path.exists(bias): + if bias.endswith(".npy"): + bias = np.load(bias) + else: + bias = np.loadtxt(bias) + else: + raise ValueError("Path to bias vector does not exist.") + bias = np.array(bias).astype(float) + return bias + + def load_data(counts, lengths_full, ploidy, chrom_full=None, - chrom_subset=None, exclude_zeros=False, struct_true=None): + chrom_subset=None, exclude_zeros=False, struct_true=None, + bias=None): """Load all input data from files, and/or reformat data objects. If files are provided, load data from files. Also reformats data objects. @@ -81,6 +119,8 @@ def load_data(counts, lengths_full, ploidy, chrom_full=None, chrom_subset : list of str, optional Label for each chromosome to be excised from the full data; labels of chromosomes for which inference should be performed. + bias : str, optional + The path to the bias vector to normalize the counts with. Returns ------- @@ -106,6 +146,7 @@ def load_data(counts, lengths_full, ploidy, chrom_full=None, lengths_full = _get_lengths(lengths_full) chrom_full = _get_chrom(chrom_full, lengths_full) counts = _get_counts(counts, lengths_full) + bias = _get_bias(bias) if struct_true is not None and isinstance(struct_true, str): struct_true = np.loadtxt(struct_true) @@ -115,4 +156,4 @@ def load_data(counts, lengths_full, ploidy, chrom_full=None, chrom_full=chrom_full, chrom_subset=chrom_subset, exclude_zeros=exclude_zeros, struct_true=struct_true) - return counts, lengths_subset, chrom_subset, lengths_full, chrom_full, struct_true + return counts, lengths_subset, chrom_subset, lengths_full, chrom_full, struct_true, bias diff --git a/pastis/optimization/counts.py b/pastis/optimization/counts.py index be531dc2d..1676c7fbd 100644 --- a/pastis/optimization/counts.py +++ b/pastis/optimization/counts.py @@ -295,7 +295,8 @@ def check_counts(counts, lengths, ploidy, exclude_zeros=False, def preprocess_counts(counts_raw, lengths, ploidy, multiscale_factor, normalize, filter_threshold, beta=None, fullres_torm=None, excluded_counts=None, mixture_coefs=None, - exclude_zeros=False, input_weight=None, verbose=True): + exclude_zeros=False, input_weight=None, verbose=True, + bias=None): """Check counts, reformat, reduce resolution, filter, and compute bias. Preprocessing options include reducing resolution, computing bias (if @@ -329,6 +330,8 @@ def preprocess_counts(counts_raw, lengths, ploidy, multiscale_factor, normalize, therefore be removed. There should be one array per counts matrix. excluded_counts : {"inter", "intra"}, optional Whether to exclude inter- or intra-chromosomal counts from optimization. + bias: str, optional + The path to the bias vector to normalize the counts data with. Returns ------- @@ -344,7 +347,7 @@ def preprocess_counts(counts_raw, lengths, ploidy, multiscale_factor, normalize, counts_raw, lengths=lengths, ploidy=ploidy, multiscale_factor=multiscale_factor, normalize=normalize, filter_threshold=filter_threshold, exclude_zeros=exclude_zeros, - verbose=verbose) + verbose=verbose, bias=bias) lengths_lowres = decrease_lengths_res(lengths, multiscale_factor) @@ -389,7 +392,7 @@ def _percent_nan_beads(counts): def _prep_counts(counts_list, lengths, ploidy=1, multiscale_factor=1, normalize=True, filter_threshold=0.04, exclude_zeros=True, - verbose=True): + verbose=True, bias=None): """Copy counts, check matrix, reduce resolution, filter, and compute bias. """ @@ -504,15 +507,16 @@ def _prep_counts(counts_list, lengths, ploidy=1, multiscale_factor=1, counts_dict[counts_type] = counts # Optionally normalize counts - bias = None if normalize: - if verbose: - print('COMPUTING BIAS: all counts together', flush=True) - bias = ICE_normalization( - ambiguate_counts( - list(counts_dict.values()), lengths=lengths_lowres, - ploidy=ploidy, exclude_zeros=True), - max_iter=300, output_bias=True)[1].flatten() + # If we have to calculate the bias + if bias is None: + if verbose: + print('COMPUTING BIAS: all counts together', flush=True) + bias = ICE_normalization( + ambiguate_counts( + list(counts_dict.values()), lengths=lengths_lowres, + ploidy=ploidy, exclude_zeros=True), + max_iter=300, output_bias=True)[1].flatten() # In each counts matrix, zero out counts for which bias is NaN for counts_type, counts in counts_dict.items(): initial_zero_beads = find_beads_to_remove( diff --git a/pastis/optimization/pastis_algorithms.py b/pastis/optimization/pastis_algorithms.py index 48a1cd8c9..7d73893a4 100644 --- a/pastis/optimization/pastis_algorithms.py +++ b/pastis/optimization/pastis_algorithms.py @@ -58,7 +58,7 @@ def _infer_draft(counts_raw, lengths, ploidy, outdir=None, alpha=None, seed=0, struct_draft_fullres=None, callback_freq=None, callback_function=None, reorienter=None, alpha_true=None, struct_true=None, input_weight=None, exclude_zeros=False, - null=False, mixture_coefs=None, verbose=True): + null=False, mixture_coefs=None, verbose=True, bias=None): """Infer draft 3D structures with PASTIS via Poisson model. """ @@ -92,7 +92,8 @@ def _infer_draft(counts_raw, lengths, ploidy, outdir=None, alpha=None, seed=0, counts_raw=counts_raw, lengths=lengths, ploidy=ploidy, normalize=normalize, filter_threshold=filter_threshold, multiscale_factor=1, exclude_zeros=exclude_zeros, beta=beta, - input_weight=input_weight, verbose=False, mixture_coefs=mixture_coefs) + input_weight=input_weight, verbose=False, mixture_coefs=mixture_coefs, + bias=bias) beta = [c.beta for c in counts if c.sum() != 0] alpha_ = alpha @@ -117,7 +118,7 @@ def _infer_draft(counts_raw, lengths, ploidy, outdir=None, alpha=None, seed=0, reorienter=reorienter, alpha_true=alpha_true, struct_true=struct_true, input_weight=input_weight, exclude_zeros=exclude_zeros, null=null, mixture_coefs=mixture_coefs, - verbose=verbose) + verbose=verbose, bias=bias) if not infer_var_fullres['converged']: return struct_draft_fullres, alpha_, beta_, hsc_r, False if alpha is not None: @@ -176,7 +177,7 @@ def _infer_draft(counts_raw, lengths, ploidy, outdir=None, alpha=None, seed=0, reorienter=reorienter, alpha_true=alpha_true, struct_true=struct_true, input_weight=input_weight, exclude_zeros=exclude_zeros, null=null, - mixture_coefs=mixture_coefs, verbose=verbose) + mixture_coefs=mixture_coefs, verbose=verbose, bias=bias) if not infer_var_lowres['converged']: return struct_draft_fullres, alpha_, beta_, hsc_r, False hsc_r = distance_between_homologs( @@ -205,7 +206,8 @@ def infer(counts_raw, lengths, ploidy, outdir='', alpha=None, seed=0, struct_draft_fullres=None, draft=False, simple_diploid=False, callback_freq=None, callback_function=None, reorienter=None, alpha_true=None, struct_true=None, input_weight=None, - exclude_zeros=False, null=False, mixture_coefs=None, verbose=True): + exclude_zeros=False, null=False, mixture_coefs=None, verbose=True, + bias=None): """Infer 3D structures with PASTIS via Poisson model. Optimize 3D structure from Hi-C contact counts data for diploid @@ -298,6 +300,8 @@ def infer(counts_raw, lengths, ploidy, outdir='', alpha=None, seed=0, For diploid organisms: whether this optimization is inferring a "simple diploid" structure in which homologs are assumed to be identical and completely overlapping with one another. + bias: str, optional + The path to the bias vector to normalize the counts data with. Returns ------- @@ -364,7 +368,7 @@ def infer(counts_raw, lengths, ploidy, outdir='', alpha=None, seed=0, reorienter=reorienter, alpha_true=alpha_true, struct_true=struct_true, input_weight=input_weight, exclude_zeros=exclude_zeros, null=null, mixture_coefs=mixture_coefs, - verbose=verbose) + verbose=verbose, bias=bias) if not draft_converged: return None, {'alpha': alpha_, 'beta': beta_, 'seed': seed, 'converged': draft_converged} @@ -408,7 +412,7 @@ def infer(counts_raw, lengths, ploidy, outdir='', alpha=None, seed=0, filter_threshold=filter_threshold, multiscale_factor=multiscale_factor, exclude_zeros=exclude_zeros, beta=beta_, input_weight=input_weight, verbose=verbose, fullres_torm=fullres_torm, - excluded_counts=excluded_counts, mixture_coefs=mixture_coefs) + excluded_counts=excluded_counts, mixture_coefs=mixture_coefs, bias=bias) if verbose: print('BETA: %s' % ', '.join( ['%s=%.3g' % (c.ambiguity, c.beta) for c in counts if c.sum() != 0]), @@ -578,7 +582,7 @@ def infer(counts_raw, lengths, ploidy, outdir='', alpha=None, seed=0, callback_freq=callback_freq, reorienter=reorienter, alpha_true=alpha_true, struct_true=struct_true, input_weight=input_weight, exclude_zeros=exclude_zeros, - null=null, mixture_coefs=mixture_coefs, verbose=verbose) + null=null, mixture_coefs=mixture_coefs, verbose=verbose, bias=bias) if not infer_var['converged']: return struct_, infer_var if reorienter is not None and reorienter.reorient: @@ -605,7 +609,7 @@ def pastis_poisson(counts, lengths, ploidy, outdir='', chromosomes=None, piecewise_step1_accuracy=1, alpha_true=None, struct_true=None, init='mds', input_weight=None, exclude_zeros=False, null=False, mixture_coefs=None, - verbose=True): + verbose=True, bias=None): """Infer 3D structures with PASTIS via Poisson model. Infer 3D structure from Hi-C contact counts data for haploid or diploid @@ -681,6 +685,8 @@ def pastis_poisson(counts, lengths, ploidy, outdir='', chromosomes=None, homolog-separating constraint specificying the expected mean inter- homolog count for each chromosome, scaled by beta and biases. If not supplied, `mhs_k` will be estimated from the counts data. + bias : str, optional + The path to the bias vector to normalize the counts with Returns ------- @@ -706,10 +712,11 @@ def pastis_poisson(counts, lengths, ploidy, outdir='', chromosomes=None, callback_freq = {'print': print_freq, 'history': history_freq, 'save': save_freq} - counts, lengths_subset, chrom_subset, lengths_full, chrom_full, struct_true = load_data( + counts, lengths_subset, chrom_subset, lengths_full, chrom_full, struct_true, bias = load_data( counts=counts, lengths_full=lengths_full, ploidy=ploidy, chrom_full=chrom_full, chrom_subset=chrom_subset, - exclude_zeros=exclude_zeros, struct_true=struct_true) + exclude_zeros=exclude_zeros, struct_true=struct_true, + bias=bias) outdir = _output_subdir( outdir=outdir, chrom_full=chrom_full, chrom_subset=chrom_subset, @@ -731,7 +738,7 @@ def pastis_poisson(counts, lengths, ploidy, outdir='', chromosomes=None, callback_function=callback_function, callback_freq=callback_freq, alpha_true=alpha_true, struct_true=struct_true, input_weight=input_weight, exclude_zeros=exclude_zeros, - null=null, mixture_coefs=mixture_coefs, verbose=verbose) + null=null, mixture_coefs=mixture_coefs, verbose=verbose, bias=bias) else: from .piecewise_whole_genome import infer_piecewise @@ -757,7 +764,7 @@ def pastis_poisson(counts, lengths, ploidy, outdir='', chromosomes=None, piecewise_step1_accuracy=piecewise_step1_accuracy, alpha_true=alpha_true, struct_true=struct_true, init=init, input_weight=input_weight, exclude_zeros=exclude_zeros, null=null, - mixture_coefs=mixture_coefs, verbose=verbose) + mixture_coefs=mixture_coefs, verbose=verbose, bias=bias) if verbose: if infer_var['converged']: diff --git a/pastis/optimization/piecewise_whole_genome.py b/pastis/optimization/piecewise_whole_genome.py index 4d89a0e23..f92d89d7f 100644 --- a/pastis/optimization/piecewise_whole_genome.py +++ b/pastis/optimization/piecewise_whole_genome.py @@ -410,7 +410,7 @@ def infer_piecewise(counts_raw, outdir, lengths, ploidy, chromosomes, alpha, piecewise_step1_accuracy=1, alpha_true=None, struct_true=None, init='msd', input_weight=None, exclude_zeros=False, null=False, - mixture_coefs=None, verbose=True): + mixture_coefs=None, verbose=True, bias=None): """Infer whole genome 3D structures piecewise, first inferring chromosomes. """ @@ -457,7 +457,7 @@ def infer_piecewise(counts_raw, outdir, lengths, ploidy, chromosomes, alpha, callback_function=callback_function, alpha_true=alpha_true, struct_true=struct_true, input_weight=input_weight, exclude_zeros=exclude_zeros, null=null, mixture_coefs=mixture_coefs, - verbose=verbose) + verbose=verbose, bias=bias) if not draft_converged: return None, {'alpha': alpha_, 'beta': beta_, 'seed': seed, 'converged': draft_converged} @@ -473,7 +473,7 @@ def infer_piecewise(counts_raw, outdir, lengths, ploidy, chromosomes, alpha, normalize=normalize, filter_threshold=filter_threshold, multiscale_factor=1, exclude_zeros=exclude_zeros, beta=beta, input_weight=input_weight, verbose=verbose, - mixture_coefs=mixture_coefs) + mixture_coefs=mixture_coefs, bias=bias) else: fullres_torm_for_multiscale = None @@ -519,7 +519,7 @@ def infer_piecewise(counts_raw, outdir, lengths, ploidy, chromosomes, alpha, callback_freq=callback_freq, alpha_true=alpha_true, struct_true=struct_true, input_weight=input_weight, exclude_zeros=exclude_zeros, - null=null, mixture_coefs=mixture_coefs, verbose=verbose) + null=null, mixture_coefs=mixture_coefs, verbose=verbose, bias=bias) if not infer_var['converged']: return struct_, infer_var @@ -599,7 +599,7 @@ def infer_piecewise(counts_raw, outdir, lengths, ploidy, chromosomes, alpha, callback_freq=callback_freq, alpha_true=alpha_true, struct_true=chrom_struct_true, input_weight=input_weight, exclude_zeros=exclude_zeros, null=null, - mixture_coefs=mixture_coefs, verbose=verbose) + mixture_coefs=mixture_coefs, verbose=verbose, bias=bias) if not infer_var['converged']: return struct_, infer_var @@ -630,7 +630,7 @@ def infer_piecewise(counts_raw, outdir, lengths, ploidy, chromosomes, alpha, 'save': 0}, alpha_true=alpha_true, struct_true=struct_true, input_weight=input_weight, exclude_zeros=exclude_zeros, - null=null, mixture_coefs=mixture_coefs, verbose=False) + null=null, mixture_coefs=mixture_coefs, verbose=False, bias=bias) # Optionally rotate & translate previously inferred chromosomes if piecewise_opt_orient: @@ -669,7 +669,7 @@ def infer_piecewise(counts_raw, outdir, lengths, ploidy, chromosomes, alpha, callback_freq=callback_freq, reorienter=reorienter, alpha_true=alpha_true, struct_true=struct_true, input_weight=input_weight, exclude_zeros=exclude_zeros, - null=null, mixture_coefs=mixture_coefs, verbose=verbose) + null=null, mixture_coefs=mixture_coefs, verbose=verbose, bias=bias) if not infer_var['converged']: return struct_, infer_var @@ -699,6 +699,6 @@ def infer_piecewise(counts_raw, outdir, lengths, ploidy, chromosomes, alpha, callback_function=callback_function, callback_freq=callback_freq, alpha_true=alpha_true, struct_true=struct_true, input_weight=input_weight, exclude_zeros=exclude_zeros, - null=null, mixture_coefs=mixture_coefs, verbose=verbose) + null=null, mixture_coefs=mixture_coefs, verbose=verbose, bias=bias) return struct_, infer_var diff --git a/pastis/script/pastis-poisson b/pastis/script/pastis-poisson index 7017bb6db..071e0403f 100644 --- a/pastis/script/pastis-poisson +++ b/pastis/script/pastis-poisson @@ -51,6 +51,8 @@ parser.add_argument("--multiscale_rounds", default=1, type=int, " should be inferred during multiscale optimization." " Values of 1 or 0 disable multiscale" " optimization.") +parser.add_argument("--bias", default=None, type=str, + help="Path to the bias vector for normalization.") # Optimization convergence parser.add_argument("--max_iter", default=30000, type=int, From 4abb32078aae98c717ba9b2fc144f4b7e8b7da08 Mon Sep 17 00:00:00 2001 From: Mozes Jacobs Date: Sun, 2 Jan 2022 23:54:29 -0800 Subject: [PATCH 02/16] debugging --- pastis/optimization/counts.py | 21 +++++++++++---------- pastis/optimization/mds.py | 5 +++++ pastis/optimization/pastis_algorithms.py | 3 +++ pastis/optimization/utils.py | 2 ++ 4 files changed, 21 insertions(+), 10 deletions(-) diff --git a/pastis/optimization/counts.py b/pastis/optimization/counts.py index 1676c7fbd..69bc0f81d 100644 --- a/pastis/optimization/counts.py +++ b/pastis/optimization/counts.py @@ -505,18 +505,19 @@ def _prep_counts(counts_list, lengths, ploidy=1, multiscale_factor=1, individual_counts_torms = individual_counts_torms | torm counts = sparse.coo_matrix(counts) counts_dict[counts_type] = counts - + + verbose=True # Optionally normalize counts - if normalize: + if normalize and bias is None: # If we have to calculate the bias - if bias is None: - if verbose: - print('COMPUTING BIAS: all counts together', flush=True) - bias = ICE_normalization( - ambiguate_counts( - list(counts_dict.values()), lengths=lengths_lowres, - ploidy=ploidy, exclude_zeros=True), - max_iter=300, output_bias=True)[1].flatten() + if verbose: + print('COMPUTING BIAS: all counts together', flush=True) + bias = ICE_normalization( + ambiguate_counts( + list(counts_dict.values()), lengths=lengths_lowres, + ploidy=ploidy, exclude_zeros=True), + max_iter=300, output_bias=True)[1].flatten() + if bias is not None: # In each counts matrix, zero out counts for which bias is NaN for counts_type, counts in counts_dict.items(): initial_zero_beads = find_beads_to_remove( diff --git a/pastis/optimization/mds.py b/pastis/optimization/mds.py index 1d4446b2e..98e799064 100644 --- a/pastis/optimization/mds.py +++ b/pastis/optimization/mds.py @@ -128,17 +128,22 @@ def estimate_X(counts, alpha=-3., beta=1., ini=None, """ n = counts.shape[0] + print('in here') random_state = check_random_state(random_state) if ini is None or ini == "random": + print('what') ini = 1 - 2 * random_state.rand(n * 3) if not precompute_distances or precompute_distances == "auto": + print('lol!') distances = compute_wish_distances(counts, alpha=alpha, beta=beta, bias=bias) else: if bias is not None: counts /= bias counts /= bias.T + print('ohh') + np.save("gesine_counts2.npy", counts) distances = counts results = optimize.fmin_l_bfgs_b( MDS_obj, ini.flatten(), diff --git a/pastis/optimization/pastis_algorithms.py b/pastis/optimization/pastis_algorithms.py index 7d73893a4..4defbffc7 100644 --- a/pastis/optimization/pastis_algorithms.py +++ b/pastis/optimization/pastis_algorithms.py @@ -422,6 +422,9 @@ def infer(counts_raw, lengths, ploidy, outdir='', alpha=None, seed=0, else: print('ALPHA: %.3g' % alpha_, flush=True) + #import scipy + #scipy.sparse.save_npz("gesine_counts2.npz", counts[0].tocoo()) + # INITIALIZATION random_state = np.random.RandomState(seed) random_state = check_random_state(random_state) diff --git a/pastis/optimization/utils.py b/pastis/optimization/utils.py index efc4bf63f..6cea63d02 100644 --- a/pastis/optimization/utils.py +++ b/pastis/optimization/utils.py @@ -162,8 +162,10 @@ def compute_wish_distances(counts, alpha=-3., beta=1., bias=None): if not sparse.isspmatrix_coo(counts): counts = counts.tocoo() if bias is not None: + print('bias') bias = bias.flatten() counts.data /= bias[counts.row] * bias[counts.col] + sparse.save_npz("gesine_counts3.npz", counts) wish_distances = counts / beta wish_distances.data[wish_distances.data != 0] **= 1. / alpha return wish_distances From eb394dc528d1ccdf094d9fd862bdf49f7d06bd5d Mon Sep 17 00:00:00 2001 From: Mozes Jacobs Date: Thu, 6 Jan 2022 12:13:33 -0800 Subject: [PATCH 03/16] debugging --- pastis/optimization/counts.py | 2 +- pastis/optimization/mds.py | 11 +++++++++-- pastis/optimization/pastis_algorithms.py | 8 ++++++++ pastis/optimization/utils.py | 10 ++++++++-- 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/pastis/optimization/counts.py b/pastis/optimization/counts.py index 69bc0f81d..8fd1c30ea 100644 --- a/pastis/optimization/counts.py +++ b/pastis/optimization/counts.py @@ -538,7 +538,7 @@ def _prep_counts(counts_list, lengths, ploidy=1, multiscale_factor=1, print(' removing %d additional beads from %s' % (torm.sum() - initial_zero_beads, counts_type), flush=True) - + #bias[np.isnan(bias)] = 1 output_counts = check_counts( list(counts_dict.values()), lengths=lengths_lowres, ploidy=ploidy, exclude_zeros=exclude_zeros) diff --git a/pastis/optimization/mds.py b/pastis/optimization/mds.py index 98e799064..569d3a63d 100644 --- a/pastis/optimization/mds.py +++ b/pastis/optimization/mds.py @@ -129,6 +129,7 @@ def estimate_X(counts, alpha=-3., beta=1., ini=None, """ n = counts.shape[0] print('in here') + random_state = check_random_state(random_state) if ini is None or ini == "random": @@ -138,13 +139,19 @@ def estimate_X(counts, alpha=-3., beta=1., ini=None, print('lol!') distances = compute_wish_distances(counts, alpha=alpha, beta=beta, bias=bias) + #np.save("gesine_wish_distance", distances) + #np.save("nelle_wish_distance", distances) + print('saved') else: if bias is not None: + print('used bias here') counts /= bias counts /= bias.T - print('ohh') - np.save("gesine_counts2.npy", counts) + #print('ohh') + #np.save("gesine_counts2.npy", counts) distances = counts + #distances = sparse.load_npz("nelle_wd_sparse.npz") + #distances = sparse.load_npz("gesine_wd_sparse.npz") results = optimize.fmin_l_bfgs_b( MDS_obj, ini.flatten(), MDS_gradient, diff --git a/pastis/optimization/pastis_algorithms.py b/pastis/optimization/pastis_algorithms.py index 4defbffc7..40f885b26 100644 --- a/pastis/optimization/pastis_algorithms.py +++ b/pastis/optimization/pastis_algorithms.py @@ -442,6 +442,14 @@ def infer(counts_raw, lengths, ploidy, outdir='', alpha=None, seed=0, alpha=alpha_init if alpha_ is None else alpha_, bias=bias, multiscale_factor=multiscale_factor, reorienter=reorienter, mixture_coefs=mixture_coefs, verbose=verbose) + #np.save("gesine_init_nelle_wd", struct_init) + #print("DONE THE SAVE!") + #print(np.sum(np.isnan(struct_init))) + #print('saved') + #np.save("gesine_init_struct_seed1", struct_init) + #print("LOADED IT!!!") + #struct_init = np.loadtxt("./data/exp/exp_MDS_01_structure.txt") + #struct_init[np.isnan(struct_init)] = 0 # HOMOLOG-SEPARATING CONSTRAINT if ploidy == 1 and (hsc_lambda > 0 or mhs_lambda > 0): diff --git a/pastis/optimization/utils.py b/pastis/optimization/utils.py index 6cea63d02..249435490 100644 --- a/pastis/optimization/utils.py +++ b/pastis/optimization/utils.py @@ -162,12 +162,18 @@ def compute_wish_distances(counts, alpha=-3., beta=1., bias=None): if not sparse.isspmatrix_coo(counts): counts = counts.tocoo() if bias is not None: - print('bias') + print('done it here') + #print('bias') bias = bias.flatten() + #bias[np.isnan(bias)] = 1.0 counts.data /= bias[counts.row] * bias[counts.col] - sparse.save_npz("gesine_counts3.npz", counts) + #sparse.save_npz("gesine_counts3.npz", counts) wish_distances = counts / beta wish_distances.data[wish_distances.data != 0] **= 1. / alpha + #sparse.save_npz("gesine_wd_sparse.npz", wish_distances) + #print("SAVED THEM!") + #return sparse.load_npz("nelle_wd_sparse.npz") + #return sparse.load_npz("gesine_wd_sparse.npz") return wish_distances else: wish_distances = counts.copy() / beta From 403eea4dc0acae034ea2eea0861488fab08c0afc Mon Sep 17 00:00:00 2001 From: Mozes Jacobs Date: Sun, 9 Jan 2022 23:30:41 -0800 Subject: [PATCH 04/16] debugging --- pastis/optimization/initialization.py | 1 + pastis/optimization/mds.py | 66 +++++++++++++++++++++++++-- pastis/optimization/utils.py | 7 +++ 3 files changed, 69 insertions(+), 5 deletions(-) diff --git a/pastis/optimization/initialization.py b/pastis/optimization/initialization.py index b2984e77d..b62b8d6b7 100644 --- a/pastis/optimization/initialization.py +++ b/pastis/optimization/initialization.py @@ -42,6 +42,7 @@ def _initialize_struct_mds(counts, lengths, ploidy, alpha, bias, random_state, struct = struct.reshape(-1, 3) torm = find_beads_to_remove(counts, struct.shape[0]) + print(torm) struct[torm] = np.nan return struct diff --git a/pastis/optimization/mds.py b/pastis/optimization/mds.py index 569d3a63d..acc15b539 100644 --- a/pastis/optimization/mds.py +++ b/pastis/optimization/mds.py @@ -79,6 +79,37 @@ def estimate_X(counts, alpha=-3., beta=1., ini=None, random_state=None, type="MDS2", factr=1e12, maxiter=10000): + + """ + counts, + alpha=-3., + beta=1., + ini=None, + verbose=0, + use_zero_entries=False, + precompute_distances=False, + bias=None, + random_state=None, + type="MDS2", + factr=1e12, + maxiter=10000 + + + ua_counts._counts.astype(float), + alpha=-3. if alpha is None else alpha, + beta=ua_beta, + #ini=None + #verbose=False, + #use_zero_entries=False, + #precompute_distances='auto', + bias=(np.tile(bias, ploidy) if bias is not None else bias), + random_state=random_state, + #type="MDS2", + #factr=1e12, + #maxiter=10000, + + """ + """ Estimating the structure from contact count data using MDS/NMDS @@ -127,24 +158,49 @@ def estimate_X(counts, alpha=-3., beta=1., ini=None, The structure as an ndarray of shape (n, 3). """ + + #print(counts) + #print(alpha) + #print(beta) + #print(ini) + #print(verbose) + #print(use_zero_entries) + #print(bias) + #print(random_state) + #print(factr) + #print(maxiter) + + """ + + -3.0 + 1.0 + RandomState(MT19937) + + + -3.0 + 1.0 + RandomState(MT19937) + """ + + n = counts.shape[0] - print('in here') + #print('in here') random_state = check_random_state(random_state) if ini is None or ini == "random": - print('what') + #print('what') ini = 1 - 2 * random_state.rand(n * 3) if not precompute_distances or precompute_distances == "auto": - print('lol!') + #print('lol!') distances = compute_wish_distances(counts, alpha=alpha, beta=beta, bias=bias) #np.save("gesine_wish_distance", distances) #np.save("nelle_wish_distance", distances) - print('saved') + #print('saved') else: if bias is not None: - print('used bias here') + #print('used bias here') counts /= bias counts /= bias.T #print('ohh') diff --git a/pastis/optimization/utils.py b/pastis/optimization/utils.py index 249435490..2aaf38b37 100644 --- a/pastis/optimization/utils.py +++ b/pastis/optimization/utils.py @@ -155,6 +155,7 @@ def compute_wish_distances(counts, alpha=-3., beta=1., bias=None): ------- wish_distances """ + print("WISHING") if beta == 0: raise ValueError("beta cannot be equal to 0.") counts = counts.copy() @@ -174,6 +175,12 @@ def compute_wish_distances(counts, alpha=-3., beta=1., bias=None): #print("SAVED THEM!") #return sparse.load_npz("nelle_wd_sparse.npz") #return sparse.load_npz("gesine_wd_sparse.npz") + + #sparse.save_npz("nelle_wd_nonorm_bias_bias_triu.npz", wish_distances) + #sparse.save_npz("nelle_wd_nonorm_bias_bias.npz", wish_distances) + #sparse.save_npz("nelle_wd_norm_bias_none_triu.npz", wish_distances) + sparse.save_npz("nelle_wd_norm_bias_none.npz", wish_distances) + print("WE HAVE INDEED SAVED THEM") return wish_distances else: wish_distances = counts.copy() / beta From aabf95444a6414409da7b3f11569c79dfe140518 Mon Sep 17 00:00:00 2001 From: Mozes Jacobs Date: Sun, 16 Jan 2022 19:40:49 -0800 Subject: [PATCH 05/16] temp --- pastis/optimization/counts.py | 1 - pastis/optimization/initialization.py | 4 +-- pastis/optimization/mds.py | 35 --------------------------- pastis/optimization/utils.py | 17 +------------ 4 files changed, 3 insertions(+), 54 deletions(-) diff --git a/pastis/optimization/counts.py b/pastis/optimization/counts.py index 8fd1c30ea..e60bf3673 100644 --- a/pastis/optimization/counts.py +++ b/pastis/optimization/counts.py @@ -538,7 +538,6 @@ def _prep_counts(counts_list, lengths, ploidy=1, multiscale_factor=1, print(' removing %d additional beads from %s' % (torm.sum() - initial_zero_beads, counts_type), flush=True) - #bias[np.isnan(bias)] = 1 output_counts = check_counts( list(counts_dict.values()), lengths=lengths_lowres, ploidy=ploidy, exclude_zeros=exclude_zeros) diff --git a/pastis/optimization/initialization.py b/pastis/optimization/initialization.py index b62b8d6b7..dca257e53 100644 --- a/pastis/optimization/initialization.py +++ b/pastis/optimization/initialization.py @@ -31,7 +31,7 @@ def _initialize_struct_mds(counts, lengths, ploidy, alpha, bias, random_state, ua_beta = ua_counts.beta if ua_beta is not None: ua_beta *= multiscale_factor ** 2 - + struct = estimate_X( ua_counts._counts.astype(float), alpha=-3. if alpha is None else alpha, beta=ua_beta, verbose=False, @@ -42,7 +42,7 @@ def _initialize_struct_mds(counts, lengths, ploidy, alpha, bias, random_state, struct = struct.reshape(-1, 3) torm = find_beads_to_remove(counts, struct.shape[0]) - print(torm) + struct[torm] = np.nan return struct diff --git a/pastis/optimization/mds.py b/pastis/optimization/mds.py index acc15b539..b84f459a6 100644 --- a/pastis/optimization/mds.py +++ b/pastis/optimization/mds.py @@ -159,55 +159,20 @@ def estimate_X(counts, alpha=-3., beta=1., ini=None, """ - #print(counts) - #print(alpha) - #print(beta) - #print(ini) - #print(verbose) - #print(use_zero_entries) - #print(bias) - #print(random_state) - #print(factr) - #print(maxiter) - - """ - - -3.0 - 1.0 - RandomState(MT19937) - - - -3.0 - 1.0 - RandomState(MT19937) - """ - - n = counts.shape[0] - #print('in here') random_state = check_random_state(random_state) if ini is None or ini == "random": - #print('what') ini = 1 - 2 * random_state.rand(n * 3) if not precompute_distances or precompute_distances == "auto": - #print('lol!') distances = compute_wish_distances(counts, alpha=alpha, beta=beta, bias=bias) - #np.save("gesine_wish_distance", distances) - #np.save("nelle_wish_distance", distances) - #print('saved') else: if bias is not None: - #print('used bias here') counts /= bias counts /= bias.T - #print('ohh') - #np.save("gesine_counts2.npy", counts) distances = counts - #distances = sparse.load_npz("nelle_wd_sparse.npz") - #distances = sparse.load_npz("gesine_wd_sparse.npz") results = optimize.fmin_l_bfgs_b( MDS_obj, ini.flatten(), MDS_gradient, diff --git a/pastis/optimization/utils.py b/pastis/optimization/utils.py index 2aaf38b37..bc4a27202 100644 --- a/pastis/optimization/utils.py +++ b/pastis/optimization/utils.py @@ -155,7 +155,7 @@ def compute_wish_distances(counts, alpha=-3., beta=1., bias=None): ------- wish_distances """ - print("WISHING") + #print("WISHING") if beta == 0: raise ValueError("beta cannot be equal to 0.") counts = counts.copy() @@ -163,27 +163,12 @@ def compute_wish_distances(counts, alpha=-3., beta=1., bias=None): if not sparse.isspmatrix_coo(counts): counts = counts.tocoo() if bias is not None: - print('done it here') - #print('bias') bias = bias.flatten() - #bias[np.isnan(bias)] = 1.0 counts.data /= bias[counts.row] * bias[counts.col] - #sparse.save_npz("gesine_counts3.npz", counts) wish_distances = counts / beta wish_distances.data[wish_distances.data != 0] **= 1. / alpha - #sparse.save_npz("gesine_wd_sparse.npz", wish_distances) - #print("SAVED THEM!") - #return sparse.load_npz("nelle_wd_sparse.npz") - #return sparse.load_npz("gesine_wd_sparse.npz") - - #sparse.save_npz("nelle_wd_nonorm_bias_bias_triu.npz", wish_distances) - #sparse.save_npz("nelle_wd_nonorm_bias_bias.npz", wish_distances) - #sparse.save_npz("nelle_wd_norm_bias_none_triu.npz", wish_distances) - sparse.save_npz("nelle_wd_norm_bias_none.npz", wish_distances) - print("WE HAVE INDEED SAVED THEM") return wish_distances else: wish_distances = counts.copy() / beta wish_distances[wish_distances != 0] **= 1. / alpha - return wish_distances From 45a6247170aa39b9969e38209f94c29b0765487c Mon Sep 17 00:00:00 2001 From: Mozes Jacobs Date: Thu, 10 Feb 2022 13:38:41 -0800 Subject: [PATCH 06/16] ready to be reviewed --- pastis/io/read/all_data.py | 45 ++++++++++-------------- pastis/optimization/mds.py | 32 ----------------- pastis/optimization/pastis_algorithms.py | 11 ------ pastis/optimization/utils.py | 2 +- 4 files changed, 19 insertions(+), 71 deletions(-) diff --git a/pastis/io/read/all_data.py b/pastis/io/read/all_data.py index e35336c65..565ac51d8 100644 --- a/pastis/io/read/all_data.py +++ b/pastis/io/read/all_data.py @@ -9,22 +9,18 @@ def _get_lengths(lengths): """Load chromosome lengths from file, or reformat lengths object. """ + if lengths is not None: - if (isinstance(lengths, list) or isinstance(lengths, np.ndarray)) and len(lengths) == 1: + if (isinstance(lengths, list) or isinstance(lengths, np.ndarray)) \ + and len(lengths) == 1: lengths = lengths[0] - if isinstance(lengths, str) and os.path.exists(lengths): - lengths = load_lengths(lengths) + if isinstance(lengths, str): + if os.path.exists(lengths): + lengths = load_lengths(lengths) + else: + raise ValueError("Path to lengths does not exist.") lengths = np.array(lengths).astype(int) return lengths -""" - if isinstance(lengths, str) and os.path.exists(lengths): - lengths = load_lengths(lengths) - elif lengths is not None and (isinstance(lengths, list) or isinstance(lengths, np.ndarray)): - if len(lengths) == 1 and isinstance(lengths[0], str) and os.path.exists(lengths[0]): - lengths = load_lengths(lengths[0]) - lengths = np.array(lengths).astype(int) - return lengths -""" def _get_chrom(chrom, lengths): @@ -33,25 +29,18 @@ def _get_chrom(chrom, lengths): lengths = _get_lengths(lengths) if chrom is not None: - if (isinstance(chrom, list) or isinstance(chrom, np.ndarray)) and len(chrom) == 1: + if (isinstance(chrom, list) or isinstance(chrom, np.ndarray)) \ + and len(chrom) == 1: chrom = chrom[0] - if isinstance(chrom, str) and os.path.exists(chrom): - chrom = np.genfromtxt(chrom, dtype='str') + if isinstance(chrom, str): + if os.path.exists(chrom): + chrom = np.genfromtxt(chrom, dtype='str') + else: + raise ValueError("Path to chrom does not exist.") chrom = np.array(chrom).reshape(-1) else: chrom = np.array(['num%d' % i for i in range(1, len(lengths) + 1)]) return chrom -""" - if isinstance(chrom, str) and os.path.exists(chrom): - chrom = np.array(np.genfromtxt(chrom, dtype='str')).reshape(-1) - elif chrom is not None and (isinstance(chrom, list) or isinstance(chrom, np.ndarray)): - if len(chrom) == 1 and isinstance(chrom[0], str) and os.path.exists(chrom[0]): - chrom = np.array(np.genfromtxt(chrom[0], dtype='str')).reshape(-1) - chrom = np.array(chrom) - else: - chrom = np.array(['num%d' % i for i in range(1, len(lengths) + 1)]) - return chrom -""" def _get_counts(counts, lengths): @@ -82,8 +71,10 @@ def _get_counts(counts, lengths): def _get_bias(bias): """Load bias from file, or reformat bias object. """ + if bias is not None: - if (isinstance(bias, list) or isinstance(bias, np.ndarray)) and len(bias) == 1: + if (isinstance(bias, list) or isinstance(bias, np.ndarray)) \ + and len(bias) == 1: bias = bias[0] if isinstance(bias, str): if os.path.exists(bias): diff --git a/pastis/optimization/mds.py b/pastis/optimization/mds.py index b84f459a6..3b399c6fa 100644 --- a/pastis/optimization/mds.py +++ b/pastis/optimization/mds.py @@ -79,37 +79,6 @@ def estimate_X(counts, alpha=-3., beta=1., ini=None, random_state=None, type="MDS2", factr=1e12, maxiter=10000): - - """ - counts, - alpha=-3., - beta=1., - ini=None, - verbose=0, - use_zero_entries=False, - precompute_distances=False, - bias=None, - random_state=None, - type="MDS2", - factr=1e12, - maxiter=10000 - - - ua_counts._counts.astype(float), - alpha=-3. if alpha is None else alpha, - beta=ua_beta, - #ini=None - #verbose=False, - #use_zero_entries=False, - #precompute_distances='auto', - bias=(np.tile(bias, ploidy) if bias is not None else bias), - random_state=random_state, - #type="MDS2", - #factr=1e12, - #maxiter=10000, - - """ - """ Estimating the structure from contact count data using MDS/NMDS @@ -161,7 +130,6 @@ def estimate_X(counts, alpha=-3., beta=1., ini=None, n = counts.shape[0] - random_state = check_random_state(random_state) if ini is None or ini == "random": ini = 1 - 2 * random_state.rand(n * 3) diff --git a/pastis/optimization/pastis_algorithms.py b/pastis/optimization/pastis_algorithms.py index 40f885b26..7d73893a4 100644 --- a/pastis/optimization/pastis_algorithms.py +++ b/pastis/optimization/pastis_algorithms.py @@ -422,9 +422,6 @@ def infer(counts_raw, lengths, ploidy, outdir='', alpha=None, seed=0, else: print('ALPHA: %.3g' % alpha_, flush=True) - #import scipy - #scipy.sparse.save_npz("gesine_counts2.npz", counts[0].tocoo()) - # INITIALIZATION random_state = np.random.RandomState(seed) random_state = check_random_state(random_state) @@ -442,14 +439,6 @@ def infer(counts_raw, lengths, ploidy, outdir='', alpha=None, seed=0, alpha=alpha_init if alpha_ is None else alpha_, bias=bias, multiscale_factor=multiscale_factor, reorienter=reorienter, mixture_coefs=mixture_coefs, verbose=verbose) - #np.save("gesine_init_nelle_wd", struct_init) - #print("DONE THE SAVE!") - #print(np.sum(np.isnan(struct_init))) - #print('saved') - #np.save("gesine_init_struct_seed1", struct_init) - #print("LOADED IT!!!") - #struct_init = np.loadtxt("./data/exp/exp_MDS_01_structure.txt") - #struct_init[np.isnan(struct_init)] = 0 # HOMOLOG-SEPARATING CONSTRAINT if ploidy == 1 and (hsc_lambda > 0 or mhs_lambda > 0): diff --git a/pastis/optimization/utils.py b/pastis/optimization/utils.py index bc4a27202..0bee9c916 100644 --- a/pastis/optimization/utils.py +++ b/pastis/optimization/utils.py @@ -155,7 +155,7 @@ def compute_wish_distances(counts, alpha=-3., beta=1., bias=None): ------- wish_distances """ - #print("WISHING") + if beta == 0: raise ValueError("beta cannot be equal to 0.") counts = counts.copy() From 2504223a0aea05b655fe9fad0fd730442762e520 Mon Sep 17 00:00:00 2001 From: Mozes Jacobs Date: Sat, 12 Feb 2022 17:13:45 -0800 Subject: [PATCH 07/16] fixed bug --- pastis/io/read/all_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pastis/io/read/all_data.py b/pastis/io/read/all_data.py index 565ac51d8..b9129863f 100644 --- a/pastis/io/read/all_data.py +++ b/pastis/io/read/all_data.py @@ -12,14 +12,14 @@ def _get_lengths(lengths): if lengths is not None: if (isinstance(lengths, list) or isinstance(lengths, np.ndarray)) \ - and len(lengths) == 1: + and len(lengths) == 1: lengths = lengths[0] if isinstance(lengths, str): if os.path.exists(lengths): lengths = load_lengths(lengths) else: raise ValueError("Path to lengths does not exist.") - lengths = np.array(lengths).astype(int) + lengths = np.array([lengths]).astype(int) return lengths From 8d5409840d134eba3102165e87adb517604fe14c Mon Sep 17 00:00:00 2001 From: Mozes Jacobs Date: Sat, 12 Feb 2022 17:41:09 -0800 Subject: [PATCH 08/16] fixed locally? --- pastis/io/read/all_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pastis/io/read/all_data.py b/pastis/io/read/all_data.py index b9129863f..f5a7061c7 100644 --- a/pastis/io/read/all_data.py +++ b/pastis/io/read/all_data.py @@ -12,7 +12,7 @@ def _get_lengths(lengths): if lengths is not None: if (isinstance(lengths, list) or isinstance(lengths, np.ndarray)) \ - and len(lengths) == 1: + and len(lengths) == 1: lengths = lengths[0] if isinstance(lengths, str): if os.path.exists(lengths): From aa983cd8bb31f2701d4c228a2f033bbe04a4b15f Mon Sep 17 00:00:00 2001 From: Mozes Jacobs Date: Sat, 12 Feb 2022 18:09:37 -0800 Subject: [PATCH 09/16] debugging --- pastis/io/read/all_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pastis/io/read/all_data.py b/pastis/io/read/all_data.py index f5a7061c7..565ac51d8 100644 --- a/pastis/io/read/all_data.py +++ b/pastis/io/read/all_data.py @@ -19,7 +19,7 @@ def _get_lengths(lengths): lengths = load_lengths(lengths) else: raise ValueError("Path to lengths does not exist.") - lengths = np.array([lengths]).astype(int) + lengths = np.array(lengths).astype(int) return lengths From d88ee37165de21954abd37a1d6722c68bf9c214f Mon Sep 17 00:00:00 2001 From: Mozes Jacobs Date: Sat, 12 Feb 2022 18:12:03 -0800 Subject: [PATCH 10/16] negative binomial fails randomly --- pastis/io/read/all_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pastis/io/read/all_data.py b/pastis/io/read/all_data.py index 565ac51d8..0452d72d9 100644 --- a/pastis/io/read/all_data.py +++ b/pastis/io/read/all_data.py @@ -18,7 +18,7 @@ def _get_lengths(lengths): if os.path.exists(lengths): lengths = load_lengths(lengths) else: - raise ValueError("Path to lengths does not exist.") + raise ValueError("Path to lengths file does not exist.") lengths = np.array(lengths).astype(int) return lengths From 524ccc81fb9e7a0cbdf268c35a3c4672f4a7f135 Mon Sep 17 00:00:00 2001 From: Mozes Jacobs Date: Sat, 12 Feb 2022 18:17:55 -0800 Subject: [PATCH 11/16] negative biniomal --- pastis/io/read/all_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pastis/io/read/all_data.py b/pastis/io/read/all_data.py index 0452d72d9..565ac51d8 100644 --- a/pastis/io/read/all_data.py +++ b/pastis/io/read/all_data.py @@ -18,7 +18,7 @@ def _get_lengths(lengths): if os.path.exists(lengths): lengths = load_lengths(lengths) else: - raise ValueError("Path to lengths file does not exist.") + raise ValueError("Path to lengths does not exist.") lengths = np.array(lengths).astype(int) return lengths From 281e5dd13fecdb0f28396ec267933f6e1c8707a5 Mon Sep 17 00:00:00 2001 From: Mozes Jacobs Date: Sun, 13 Feb 2022 12:25:52 -0800 Subject: [PATCH 12/16] what is going on --- pastis/io/read/all_data.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pastis/io/read/all_data.py b/pastis/io/read/all_data.py index 565ac51d8..28662a191 100644 --- a/pastis/io/read/all_data.py +++ b/pastis/io/read/all_data.py @@ -10,6 +10,14 @@ def _get_lengths(lengths): """Load chromosome lengths from file, or reformat lengths object. """ + if isinstance(lengths, str) and os.path.exists(lengths): + lengths = load_lengths(lengths) + elif lengths is not None and (isinstance(lengths, list) or isinstance(lengths, np.ndarray)): + if len(lengths) == 1 and isinstance(lengths[0], str) and os.path.exists(lengths[0]): + lengths = load_lengths(lengths[0]) + lengths = np.array(lengths).astype(int) + return lengths + if lengths is not None: if (isinstance(lengths, list) or isinstance(lengths, np.ndarray)) \ and len(lengths) == 1: @@ -19,7 +27,7 @@ def _get_lengths(lengths): lengths = load_lengths(lengths) else: raise ValueError("Path to lengths does not exist.") - lengths = np.array(lengths).astype(int) + lengths = np.array(lengths).astype(int) return lengths From 73b42d1305571b34068399dd88cd09fbe77a3c59 Mon Sep 17 00:00:00 2001 From: Mozes Jacobs Date: Sun, 13 Feb 2022 12:30:04 -0800 Subject: [PATCH 13/16] small changed to if statement --- pastis/io/read/all_data.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pastis/io/read/all_data.py b/pastis/io/read/all_data.py index 28662a191..feaa8674a 100644 --- a/pastis/io/read/all_data.py +++ b/pastis/io/read/all_data.py @@ -10,6 +10,7 @@ def _get_lengths(lengths): """Load chromosome lengths from file, or reformat lengths object. """ + """ if isinstance(lengths, str) and os.path.exists(lengths): lengths = load_lengths(lengths) elif lengths is not None and (isinstance(lengths, list) or isinstance(lengths, np.ndarray)): @@ -17,10 +18,11 @@ def _get_lengths(lengths): lengths = load_lengths(lengths[0]) lengths = np.array(lengths).astype(int) return lengths + """ if lengths is not None: - if (isinstance(lengths, list) or isinstance(lengths, np.ndarray)) \ - and len(lengths) == 1: + if (isinstance(lengths, list) or isinstance(lengths, np.ndarray)): + if len(lengths) == 1: lengths = lengths[0] if isinstance(lengths, str): if os.path.exists(lengths): From c3c146bf19dc304509e09e42f6a53d8a614b8d68 Mon Sep 17 00:00:00 2001 From: Mozes Jacobs Date: Sun, 13 Feb 2022 12:57:15 -0800 Subject: [PATCH 14/16] added isinstance int --- pastis/io/read/all_data.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pastis/io/read/all_data.py b/pastis/io/read/all_data.py index feaa8674a..479cc53d1 100644 --- a/pastis/io/read/all_data.py +++ b/pastis/io/read/all_data.py @@ -21,14 +21,16 @@ def _get_lengths(lengths): """ if lengths is not None: - if (isinstance(lengths, list) or isinstance(lengths, np.ndarray)): - if len(lengths) == 1: + if (isinstance(lengths, list) or isinstance(lengths, np.ndarray)) \ + and len(lengths) == 1: lengths = lengths[0] if isinstance(lengths, str): if os.path.exists(lengths): lengths = load_lengths(lengths) else: raise ValueError("Path to lengths does not exist.") + if isinstance(lengths, int): + lengths = [lengths] lengths = np.array(lengths).astype(int) return lengths From 9bfdb2c1889e6d25734b2cc62e67ffc3eed8ce46 Mon Sep 17 00:00:00 2001 From: Mozes Jacobs Date: Sun, 13 Feb 2022 13:27:11 -0800 Subject: [PATCH 15/16] fixed bug with one element numpy array --- pastis/io/read/all_data.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pastis/io/read/all_data.py b/pastis/io/read/all_data.py index 479cc53d1..f2c922a35 100644 --- a/pastis/io/read/all_data.py +++ b/pastis/io/read/all_data.py @@ -10,16 +10,6 @@ def _get_lengths(lengths): """Load chromosome lengths from file, or reformat lengths object. """ - """ - if isinstance(lengths, str) and os.path.exists(lengths): - lengths = load_lengths(lengths) - elif lengths is not None and (isinstance(lengths, list) or isinstance(lengths, np.ndarray)): - if len(lengths) == 1 and isinstance(lengths[0], str) and os.path.exists(lengths[0]): - lengths = load_lengths(lengths[0]) - lengths = np.array(lengths).astype(int) - return lengths - """ - if lengths is not None: if (isinstance(lengths, list) or isinstance(lengths, np.ndarray)) \ and len(lengths) == 1: @@ -29,6 +19,8 @@ def _get_lengths(lengths): lengths = load_lengths(lengths) else: raise ValueError("Path to lengths does not exist.") + if isinstance(lengths, np.int64): + lengths = lengths.item() if isinstance(lengths, int): lengths = [lengths] lengths = np.array(lengths).astype(int) @@ -49,6 +41,8 @@ def _get_chrom(chrom, lengths): chrom = np.genfromtxt(chrom, dtype='str') else: raise ValueError("Path to chrom does not exist.") + if isinstance(chrom, np.ndarray): + chrom = [chrom].item() chrom = np.array(chrom).reshape(-1) else: chrom = np.array(['num%d' % i for i in range(1, len(lengths) + 1)]) @@ -96,6 +90,10 @@ def _get_bias(bias): bias = np.loadtxt(bias) else: raise ValueError("Path to bias vector does not exist.") + if isinstance(bias, np.int64): + bias = bias.item() + if isinstance(bias, int): + bias = [bias] bias = np.array(bias).astype(float) return bias @@ -147,6 +145,8 @@ def load_data(counts, lengths_full, ploidy, chrom_full=None, """ lengths_full = _get_lengths(lengths_full) + print("YEP") + print(lengths_full) chrom_full = _get_chrom(chrom_full, lengths_full) counts = _get_counts(counts, lengths_full) bias = _get_bias(bias) From 4fad1032569e03a815aca257ebf10b0df5a9c0f8 Mon Sep 17 00:00:00 2001 From: Mozes Jacobs Date: Sun, 13 Feb 2022 13:27:39 -0800 Subject: [PATCH 16/16] removed print statement --- pastis/io/read/all_data.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pastis/io/read/all_data.py b/pastis/io/read/all_data.py index f2c922a35..5f5ace5a8 100644 --- a/pastis/io/read/all_data.py +++ b/pastis/io/read/all_data.py @@ -145,8 +145,6 @@ def load_data(counts, lengths_full, ploidy, chrom_full=None, """ lengths_full = _get_lengths(lengths_full) - print("YEP") - print(lengths_full) chrom_full = _get_chrom(chrom_full, lengths_full) counts = _get_counts(counts, lengths_full) bias = _get_bias(bias)