From 47b3e66cc6f60b57228bda7ae2448612402c1006 Mon Sep 17 00:00:00 2001 From: sposadac Date: Tue, 17 Jun 2025 19:14:24 +0200 Subject: [PATCH 1/2] fix: standardize color file path handling in scanpy export Update `exportScanpyOneFieldColor` to store only the color filename in the config, aligning with how other files are handled. This prevents incorrect absolute path resolution when loading the config. - Improve file writing with context manager - Clean up redundant code in `exportScanpyColors` --- src/cbPyLib/cellbrowser/cellbrowser.py | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/src/cbPyLib/cellbrowser/cellbrowser.py b/src/cbPyLib/cellbrowser/cellbrowser.py index 780f4471..f0a002b2 100755 --- a/src/cbPyLib/cellbrowser/cellbrowser.py +++ b/src/cbPyLib/cellbrowser/cellbrowser.py @@ -5210,9 +5210,9 @@ def check_nonnegative_integers(X): def exportScanpyOneFieldColor(fieldName, fieldValues, colors, outDir, configData): "write a single color file, for one field" - outFname = join(outDir, fieldName+"_colors.tsv") + outFname = join(outDir, fieldName + "_colors.tsv") logging.info("Writing colors of field %s to %s" % (fieldName, outFname)) - + # Debugging: print lengths and contents #print(f"Field name: {fieldName}") #print(f"Field values (length {len(fieldValues)}): {fieldValues}") @@ -5224,28 +5224,24 @@ def exportScanpyOneFieldColor(fieldName, fieldValues, colors, outDir, configData # Handle mismatch: you can either raise an exception or handle it gracefully return - ofh = open(outFname, "w") - ofh.write("#val color\n") - for val, color in zip(fieldValues, colors): - ofh.write("%s\t%s\n" % (val, color)) - ofh.close() + with open(outFname, "w") as ofh: + ofh.write("#val\tcolor\n") + for val, color in zip(fieldValues, colors): + ofh.write("%s\t%s\n" % (val, color)) + if "colors" not in configData: configData["colors"] = {} - configData["colors"][fieldName] = outFname + configData["colors"][fieldName] = fieldName + "_colors.tsv" def exportScanpyColors(adata, outDir, configData): " create one tsv with the colors per color definition in adata " for fieldName in adata.obs.keys(): - colorKey = fieldName+"_colors" - #if colorKey in adata.uns: - #outFname = exportScanpyOneFieldColor(fieldName, adata.obs[fieldName].values.categories, adata.uns[colorKey], outDir, configData) + colorKey = fieldName + "_colors" if colorKey in adata.uns and adata.uns[colorKey] is not None and len(adata.uns[colorKey]) > 0: fieldValues = adata.obs[fieldName].values.categories - colors = adata.uns[colorKey] - # Check if colors is a list/array and has a non-zero length + colors = adata.uns[colorKey] if colors is not None and len(colors) > 0: - fieldValues = adata.obs[fieldName].values.categories - outFname = exportScanpyOneFieldColor(fieldName, fieldValues, colors, outDir, configData) + exportScanpyOneFieldColor(fieldName, fieldValues, colors, outDir, configData) else: logging.warning("Skipping %s because colors are not available or empty." % fieldName) return configData From f42bb710efbfba106d72bdad79f9a50fe66f7ff4 Mon Sep 17 00:00:00 2001 From: sposadac Date: Tue, 17 Jun 2025 19:19:58 +0200 Subject: [PATCH 2/2] fix: simplify fallback logic for marker gene computation Update conditional logic to ensure fallback computation of marker genes only occurs when `markerField` is missing and markers are not explicitly skipped. Previously, the presence of `clusterField` would trigger marker computation even when `markerField` existed, potentially overwriting existing results unnecessarily. - Prevents redundant recomputation of markers - Ensures fallback only happens when markers are actually missing - Improves consistency and avoids unexpected behavior when `clusterField` is provided --- src/cbPyLib/cellbrowser/cellbrowser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cbPyLib/cellbrowser/cellbrowser.py b/src/cbPyLib/cellbrowser/cellbrowser.py index f0a002b2..7ef4d1ce 100755 --- a/src/cbPyLib/cellbrowser/cellbrowser.py +++ b/src/cbPyLib/cellbrowser/cellbrowser.py @@ -5335,7 +5335,7 @@ def scanpyToCellbrowser(adata, path, datasetName, metaFields=None, clusterField= logging.warn("No valid embeddings were found in anndata.obsm but at least one array of coordinates is usually required. Keys obsm: %s" % (coordFields)) ##Check for cluster markers - if (markerField not in adata.uns or clusterField is not None) and not skipMarkers: + if markerField not in adata.uns and not skipMarkers: logging.warn("Couldnt find list of cluster marker genes in the h5ad file in adata.uns with the key '%s'. " "In the future, from Python, try running sc.tl.rank_genes_groups(adata) to " "create the cluster annotation and write the h5ad file then." % markerField)