From c832a2e95e413b102acb278c74c6501a01f30157 Mon Sep 17 00:00:00 2001 From: Philipp Date: Sun, 28 Sep 2025 22:24:42 -0400 Subject: [PATCH 1/3] Add arithmetic function to combine matrices --- deeptools/computeMatrixOperations.py | 161 ++++++++++++++++++++++++++- 1 file changed, 160 insertions(+), 1 deletion(-) diff --git a/deeptools/computeMatrixOperations.py b/deeptools/computeMatrixOperations.py index 0224f00a39..ccfc194f32 100755 --- a/deeptools/computeMatrixOperations.py +++ b/deeptools/computeMatrixOperations.py @@ -49,6 +49,9 @@ def parse_arguments(): or computeMatrixOperations dataRange -h +or + computeMatrixOperationsExtended arithmetic -h + """, epilog='example usages:\n' 'computeMatrixOperations subset -m input.mat.gz -o output.mat.gz --group "group 1" "group 2" --samples "sample 3" "sample 10"\n\n' @@ -137,6 +140,14 @@ def parse_arguments(): help='Returns the min, max, median, 10th and 90th percentile of the matrix values per sample.', usage='Example usage:\n computeMatrixOperations dataRange -m input.mat.gz\n\n') + # arithmetic + subparsers.add_parser( + 'arithmetic', + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + parents=[arithmeticArgs()], + help='Perform arithmetic operations between two matrices with identical dimensions.', + usage='Example usage:\n computeMatrixOperationsExtended arithmetic -m input1.mat.gz input2.mat.gz -o output.mat.gz --operation ratio --pseudocount 1\n\n') + parser.add_argument('--version', action='version', version='%(prog)s {}'.format(version('deeptools'))) @@ -250,6 +261,43 @@ def filterValuesArgs(): return parser +def arithmeticArgs(): + parser = argparse.ArgumentParser(add_help=False) + required = parser.add_argument_group('Required arguments') + + required.add_argument('--matrixFile', '-m', + help='Two matrix files from the computeMatrix tool.', + nargs=2, + required=True) + + required.add_argument('--outFileName', '-o', + help='Output file name', + required=True) + + optional = parser.add_argument_group('Optional arguments') + + optional.add_argument('--operation', + choices=['ratio', 'log2ratio', 'subtract', 'add', 'divide', 'multiply'], + default='ratio', + help='Arithmetic operation to perform. ratio=(data1+pseudocount)/(data2+pseudocount), ' + 'log2ratio=log2((data1+pseudocount)/(data2+pseudocount)), ' + 'subtract=data1-data2, add=data1+data2, divide=data1/data2, ' + 'multiply=data1*data2 (Default: %(default)s)') + + optional.add_argument('--pseudocount', + type=float, + default=1.0, + help='Pseudocount to add to avoid division by zero. ' + 'Only used for ratio and log2ratio operations. ' + 'Set to 0 to disable. (Default: %(default)s)') + + optional.add_argument('--no-pseudocount', + action='store_true', + help='Disable pseudocount (equivalent to --pseudocount 0)') + + return parser + + def sortArgs(): parser = argparse.ArgumentParser(add_help=False) required = parser.add_argument_group('Required arguments') @@ -688,6 +736,109 @@ def loadGTF(line, fp, fname, labels, regions, transcriptID, transcript_id_design regions[labelIdx][name] = len(regions[labelIdx]) +def arithmeticMatrices(args): + """ + Perform arithmetic operations between two matrices. + Both matrices must have identical dimensions. + """ + # Load both matrices + hm1 = heatmapper.heatmapper() + hm2 = heatmapper.heatmapper() + + hm1.read_matrix_file(args.matrixFile[0]) + hm2.read_matrix_file(args.matrixFile[1]) + + # Check that matrices have the same dimensions + if hm1.matrix.matrix.shape != hm2.matrix.matrix.shape: + sys.exit("Error: Matrices have different dimensions. " + "Matrix 1: {}, Matrix 2: {}\n".format( + hm1.matrix.matrix.shape, hm2.matrix.matrix.shape)) + + # Check that sample numbers and boundaries match + if len(hm1.matrix.sample_labels) != len(hm2.matrix.sample_labels): + sys.exit("Error: Matrices have different numbers of samples. " + "Matrix 1: {}, Matrix 2: {}\n".format( + len(hm1.matrix.sample_labels), len(hm2.matrix.sample_labels))) + + if hm1.matrix.sample_boundaries != hm2.matrix.sample_boundaries: + sys.exit("Error: Matrices have different sample boundaries.\n") + + # Check that group boundaries match + if hm1.matrix.group_boundaries != hm2.matrix.group_boundaries: + sys.exit("Error: Matrices have different group boundaries.\n") + + # Determine pseudocount + pseudocount = 0.0 if args.no_pseudocount else args.pseudocount + + # Perform the arithmetic operation + data1 = hm1.matrix.matrix + data2 = hm2.matrix.matrix + + # Handle NaN values by preserving them in the output + nan_mask = np.isnan(data1) | np.isnan(data2) + + if args.operation == 'ratio': + if pseudocount == 0: + # Avoid division by zero + with np.errstate(divide='ignore', invalid='ignore'): + result = data1 / data2 + else: + result = (data1 + pseudocount) / (data2 + pseudocount) + + elif args.operation == 'log2ratio': + if pseudocount == 0: + # Avoid division by zero and log of zero/negative + with np.errstate(divide='ignore', invalid='ignore'): + result = np.log2(data1 / data2) + else: + with np.errstate(invalid='ignore'): + result = np.log2((data1 + pseudocount) / (data2 + pseudocount)) + + elif args.operation == 'subtract': + result = data1 - data2 + + elif args.operation == 'add': + result = data1 + data2 + + elif args.operation == 'divide': + with np.errstate(divide='ignore', invalid='ignore'): + result = data1 / data2 + + elif args.operation == 'multiply': + result = data1 * data2 + + else: + sys.exit("Error: Unknown operation '{}'\n".format(args.operation)) + + # Preserve NaN values from original matrices + result[nan_mask] = np.nan + + # Replace the matrix in hm1 with the result + hm1.matrix.matrix = result + + # Update the sample labels to indicate the operation + operation_label = args.operation + if args.operation in ['ratio', 'log2ratio'] and pseudocount != 0: + operation_label += "_pc{}".format(pseudocount) + + # Keep the original sample labels but could optionally modify them + # to indicate the operation performed + + # Save the result + hm1.save_matrix(args.outFileName) + + # Print summary + print("Arithmetic operation completed:") + print(" Operation: {}".format(args.operation)) + if args.operation in ['ratio', 'log2ratio']: + print(" Pseudocount: {}".format(pseudocount)) + print(" Matrix 1: {}".format(args.matrixFile[0])) + print(" Matrix 2: {}".format(args.matrixFile[1])) + print(" Output: {}".format(args.outFileName)) + print(" Matrix dimensions: {}".format(result.shape)) + print(" Number of NaN values: {}".format(np.sum(np.isnan(result)))) + + def sortMatrix(hm, regionsFileName, transcriptID, transcript_id_designator, verbose=True): """ Iterate through the files noted by regionsFileName and sort hm accordingly @@ -790,8 +941,10 @@ def main(args=None): if args is None: if len(sys.argv) == 1: args = ["-h"] - if len(sys.argv) == 2: + elif len(sys.argv) == 2: args = [sys.argv[1], "-h"] + else: + args = sys.argv[1:] # Pass all arguments except the script name args = parse_arguments().parse_args(args) @@ -848,5 +1001,11 @@ def main(args=None): elif args.command == 'relabel': relabelMatrix(hm, args) hm.save_matrix(args.outFileName) + elif args.command == 'arithmetic': + arithmeticMatrices(args) else: sys.exit("Unknown command {0}!\n".format(args.command)) + + +if __name__ == "__main__": + main() From 79afd61fc7cf16dc7c51fe5ac52c0902951d4fda Mon Sep 17 00:00:00 2001 From: Philipp Date: Sun, 28 Sep 2025 22:30:02 -0400 Subject: [PATCH 2/3] Update computeMatrixOperations.py --- deeptools/computeMatrixOperations.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/deeptools/computeMatrixOperations.py b/deeptools/computeMatrixOperations.py index ccfc194f32..682636018f 100755 --- a/deeptools/computeMatrixOperations.py +++ b/deeptools/computeMatrixOperations.py @@ -941,10 +941,8 @@ def main(args=None): if args is None: if len(sys.argv) == 1: args = ["-h"] - elif len(sys.argv) == 2: + if len(sys.argv) == 2: args = [sys.argv[1], "-h"] - else: - args = sys.argv[1:] # Pass all arguments except the script name args = parse_arguments().parse_args(args) @@ -1005,7 +1003,3 @@ def main(args=None): arithmeticMatrices(args) else: sys.exit("Unknown command {0}!\n".format(args.command)) - - -if __name__ == "__main__": - main() From 370070961465cbc1c330611637bd0026023fbc8b Mon Sep 17 00:00:00 2001 From: Philipp Date: Thu, 2 Oct 2025 10:53:05 -0400 Subject: [PATCH 3/3] Adding "mean" function --- deeptools/computeMatrixOperations.py | 94 ++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/deeptools/computeMatrixOperations.py b/deeptools/computeMatrixOperations.py index 682636018f..7c864e0443 100755 --- a/deeptools/computeMatrixOperations.py +++ b/deeptools/computeMatrixOperations.py @@ -52,6 +52,9 @@ def parse_arguments(): or computeMatrixOperationsExtended arithmetic -h +or + computeMatrixOperationsExtended mean -h + """, epilog='example usages:\n' 'computeMatrixOperations subset -m input.mat.gz -o output.mat.gz --group "group 1" "group 2" --samples "sample 3" "sample 10"\n\n' @@ -148,6 +151,14 @@ def parse_arguments(): help='Perform arithmetic operations between two matrices with identical dimensions.', usage='Example usage:\n computeMatrixOperationsExtended arithmetic -m input1.mat.gz input2.mat.gz -o output.mat.gz --operation ratio --pseudocount 1\n\n') + # mean + subparsers.add_parser( + 'mean', + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + parents=[meanArgs()], + help='Calculate element-wise mean across two or more matrices with identical dimensions.', + usage='Example usage:\n computeMatrixOperationsExtended mean -m input1.mat.gz input2.mat.gz input3.mat.gz -o output.mat.gz\n\n') + parser.add_argument('--version', action='version', version='%(prog)s {}'.format(version('deeptools'))) @@ -298,6 +309,22 @@ def arithmeticArgs(): return parser +def meanArgs(): + parser = argparse.ArgumentParser(add_help=False) + required = parser.add_argument_group('Required arguments') + + required.add_argument('--matrixFile', '-m', + help='Two or more matrix files from the computeMatrix tool.', + nargs='+', + required=True) + + required.add_argument('--outFileName', '-o', + help='Output file name', + required=True) + + return parser + + def sortArgs(): parser = argparse.ArgumentParser(add_help=False) required = parser.add_argument_group('Required arguments') @@ -839,6 +866,71 @@ def arithmeticMatrices(args): print(" Number of NaN values: {}".format(np.sum(np.isnan(result)))) +def meanMatrices(args): + """ + Calculate the element-wise mean across two or more matrices. + All matrices must have identical dimensions. + """ + if len(args.matrixFile) < 2: + sys.exit("Error: At least two matrix files are required for mean calculation.\n") + + # Load all matrices + matrices = [] + for matrix_file in args.matrixFile: + hm = heatmapper.heatmapper() + hm.read_matrix_file(matrix_file) + matrices.append(hm) + + # Use the first matrix as reference + ref_matrix = matrices[0] + ref_shape = ref_matrix.matrix.matrix.shape + + # Check that all matrices have the same dimensions + for idx, hm in enumerate(matrices[1:], start=1): + if hm.matrix.matrix.shape != ref_shape: + sys.exit("Error: Matrix {} has different dimensions. " + "Expected: {}, Got: {}\n".format( + idx + 1, ref_shape, hm.matrix.matrix.shape)) + + # Check that sample numbers and boundaries match + if len(hm.matrix.sample_labels) != len(ref_matrix.matrix.sample_labels): + sys.exit("Error: Matrix {} has different number of samples. " + "Expected: {}, Got: {}\n".format( + idx + 1, len(ref_matrix.matrix.sample_labels), + len(hm.matrix.sample_labels))) + + if hm.matrix.sample_boundaries != ref_matrix.matrix.sample_boundaries: + sys.exit("Error: Matrix {} has different sample boundaries.\n".format(idx + 1)) + + # Check that group boundaries match + if hm.matrix.group_boundaries != ref_matrix.matrix.group_boundaries: + sys.exit("Error: Matrix {} has different group boundaries.\n".format(idx + 1)) + + # Stack all matrices and compute the mean using nanmean to handle NaN values + matrix_stack = np.array([hm.matrix.matrix for hm in matrices]) + + # Use nanmean to ignore NaN values when computing the mean + # If all values at a position are NaN, the result will be NaN + with np.errstate(invalid='ignore'): + result = np.nanmean(matrix_stack, axis=0) + + # Replace the matrix in the reference with the mean + ref_matrix.matrix.matrix = result + + # Save the result + ref_matrix.save_matrix(args.outFileName) + + # Print summary + print("Mean calculation completed:") + print(" Number of matrices: {}".format(len(matrices))) + print(" Input files:") + for i, fname in enumerate(args.matrixFile, start=1): + print(" {}: {}".format(i, fname)) + print(" Output: {}".format(args.outFileName)) + print(" Matrix dimensions: {}".format(result.shape)) + print(" Number of NaN values: {}".format(np.sum(np.isnan(result)))) + + def sortMatrix(hm, regionsFileName, transcriptID, transcript_id_designator, verbose=True): """ Iterate through the files noted by regionsFileName and sort hm accordingly @@ -1001,5 +1093,7 @@ def main(args=None): hm.save_matrix(args.outFileName) elif args.command == 'arithmetic': arithmeticMatrices(args) + elif args.command == 'mean': + meanMatrices(args) else: sys.exit("Unknown command {0}!\n".format(args.command))