diff --git a/.gitignore b/.gitignore index 5124c9a..73e89e8 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ results/ testing/ testing* *.pyc +.nf-test* +nf-test diff --git a/.gitpod.yml b/.gitpod.yml index 25488dc..f816db2 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,9 +1,15 @@ -image: nfcore/gitpod:latest +image: nfcore/gitpod:dev tasks: - name: Update Nextflow and setup pre-commit command: | pre-commit install --install-hooks nextflow self-update + - name: conda update + command: | + conda update --all --yes + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack diff --git a/CHANGELOG.md b/CHANGELOG.md index 0780dbd..eddc6d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ Initial release of nf-core/scscape, created with the [nf-core](https://nf-co.re/ ### `Added` +- pipeline-level nf-tests by @sateeshperi [#] + ### `Fixed` ### `Dependencies` diff --git a/Samples.csv b/Samples.csv deleted file mode 100644 index 06ede41..0000000 --- a/Samples.csv +++ /dev/null @@ -1,3 +0,0 @@ -id,data_directory,mt_cc_rm_genes -samp_1,/Users/rgrindle/Desktop/mdibl/wd/scRNA-seq/test_pipe/Samp1,test_mito.csv -samp_2,/Users/rgrindle/Desktop/mdibl/wd/scRNA-seq/test_pipe/Samp2,test_mito.csv diff --git a/auxillaryGeneFiles/danioRerio.aux.features.csv b/assets/auxillaryGeneFiles/danioRerio.aux.features.csv similarity index 100% rename from auxillaryGeneFiles/danioRerio.aux.features.csv rename to assets/auxillaryGeneFiles/danioRerio.aux.features.csv diff --git a/auxillaryGeneFiles/musMusculus.aux.features.csv b/assets/auxillaryGeneFiles/musMusculus.aux.features.csv similarity index 100% rename from auxillaryGeneFiles/musMusculus.aux.features.csv rename to assets/auxillaryGeneFiles/musMusculus.aux.features.csv diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 13f1c7d..65171f1 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,5 +1,5 @@ -id,data_directory,mt_cc_rm_genes -0dpa,assets/subset_test_data/0dpa/,auxillaryGeneFiles/danioRerio.aux.features.csv -1dpa,assets/subset_test_data/1dpa/,auxillaryGeneFiles/danioRerio.aux.features.csv -2dpa,assets/subset_test_data/2dpa/,auxillaryGeneFiles/danioRerio.aux.features.csv -4dpa,assets/subset_test_data/4dpa/,auxillaryGeneFiles/danioRerio.aux.features.csv +id,barcodes,features,matrix,mt_cc_rm_genes +0dpa,/assets/subset_test_data/0dpa/barcodes.tsv.gz,/assets/subset_test_data/0dpa/features.tsv.gz,/assets/subset_test_data/0dpa/matrix.mtx.gz,/assets/auxillaryGeneFiles/danioRerio.aux.features.csv +1dpa,/assets/subset_test_data/1dpa/barcodes.tsv.gz,/assets/subset_test_data/1dpa/features.tsv.gz,/assets/subset_test_data/1dpa/matrix.mtx.gz,/assets/auxillaryGeneFiles/danioRerio.aux.features.csv +2dpa,/assets/subset_test_data/2dpa/barcodes.tsv.gz,/assets/subset_test_data/2dpa/features.tsv.gz,/assets/subset_test_data/2dpa/matrix.mtx.gz,/assets/auxillaryGeneFiles/danioRerio.aux.features.csv +4dpa,/assets/subset_test_data/4dpa/barcodes.tsv.gz,/assets/subset_test_data/4dpa/features.tsv.gz,/assets/subset_test_data/4dpa/matrix.mtx.gz,/assets/auxillaryGeneFiles/danioRerio.aux.features.csv diff --git a/assets/schema_input.json b/assets/schema_input.json index 2f65d67..ff03e02 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -11,15 +11,23 @@ "type": "string", "meta": ["id"] }, - "data_directory": { + "barcodes": { "type": "string", - "format": "directory-path" + "format": "file-path" + }, + "features": { + "type": "string", + "format": "file-path" + }, + "matrix": { + "type": "string", + "format": "file-path" }, "mt_cc_rm_genes": { "type": "string", "format": "file-path" } }, - "required": ["id", "data_directory"] + "required": ["id", "barcodes", "features", "matrix", "mt_cc_rm_genes"] } } diff --git a/conf/test.config b/conf/test.config index 70de1ef..3bd8aa2 100644 --- a/conf/test.config +++ b/conf/test.config @@ -14,40 +14,30 @@ params { config_profile_name = 'Test profile' config_profile_description = 'Minimal test dataset to check pipeline function' - + sample_sheet = "${projectDir}/assets/samplesheet.csv" segmentation_sheet = "${projectDir}/assets/segmentation.csv" - gene_identifier = "gene_name" - min_cells = 3 - min_features = 200 + gene_identifier = "gene_name" + min_cells = 3 + min_features = 200 nfeature_lower = 10 nfeature_upper = 0 - ncount_lower = 10 - ncount_upper = 0 - max_mito_pct = 10 - - vars_2_regress = "nCount_RNA,nFeature_RNA,percent.mt,S.Score,G2M.Score" - - features_2_scale = "VF" - scale_method = "SCT" - - pcMax = null - + ncount_lower = 10 + ncount_upper = 0 + max_mito_pct = 10 + + vars_2_regress = "nCount_RNA,nFeature_RNA,percent.mt,S.Score,G2M.Score" + features_2_scale = "VF" + scale_method = "SCT" + pcMax = null integration_method = "Harmony" - - resolutions = "0.05,0.1,0.3,0.5,0.7,0.9,1.2,1.5" - - makeLoupe = true - eula_agreement = "Agree" + resolutions = "0.05,0.1,0.3,0.5,0.7,0.9,1.2,1.5" + makeLoupe = true + eula_agreement = "Agree" // Limit resources so that this can run on GitHub Actions max_cpus = 4 max_memory = '10.GB' max_time = '6.h' - // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - sample_sheet = "${projectDir}/assets/samplesheet.csv" - } diff --git a/modules/local/gzip/main.nf b/modules/local/gzip/main.nf index 21aed16..9a1130a 100644 --- a/modules/local/gzip/main.nf +++ b/modules/local/gzip/main.nf @@ -9,22 +9,20 @@ process GZIP { 'quay.io/biocontainers/gzip:1.11' }" input: - tuple val(meta), path(sample_files) + tuple val(meta), path(file) output: - tuple val(meta), path (sample_files), emit: zip - path("versions.yml"), emit: versions + tuple val(meta), path ("*.zip"), optional: true, emit: zip + path("versions.yml") , emit: versions when: task.ext.when == null || task.ext.when script: """ - for file in ${sample_files}/*; do - if [[ \$file != *".gz"* ]]; then - gzip \$file + if [[ $file != *".gz"* ]]; then + gzip $file fi - done cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 0000000..70180b0 --- /dev/null +++ b/nf-test.config @@ -0,0 +1,22 @@ +config { + // location for all nf-tests + testsDir "." + + // nf-test directory including temporary files for each test + workDir System.getenv("NFT_WORKDIR") ?: ".nf-test" + + // location of an optional nextflow.config file specific for executing tests + configFile "tests/nextflow.config" + + // run all test with the defined docker profile from the main nextflow.config + profile "test" + + // List of filenames or patterns that should be trigger a full test run + triggers 'nextflow.config', 'nf-test.config', 'conf/test.config', 'conf/test_full.config' + + // load the necessary plugins + plugins { + load "nft-bam@0.3.0" + load "nft-utils@0.0.2" + } +} diff --git a/segmentation.csv b/segmentation.csv deleted file mode 100644 index 522fc39..0000000 --- a/segmentation.csv +++ /dev/null @@ -1,5 +0,0 @@ -id,early,late,all -0dpa,true,false,true -1dpa,true,false,true -2dpa,false,true,true -4dpa,false,true,true diff --git a/tests/.nftignore b/tests/.nftignore new file mode 100644 index 0000000..25b9fab --- /dev/null +++ b/tests/.nftignore @@ -0,0 +1,16 @@ +**/fastqc-status-check-*{pdf,svg} +**/fastqc_adapter_content_plot.{pdf,png,svg} +**/fastqc_overrepresented_sequences_plot*{pdf,svg} +**/fastqc_per_base_*_plot*{pdf,png,svg} +**/fastqc_per_sequence_*{pdf,svg} +**/fastqc_sequence_length_distribution_plot.{pdf,png,svg} +**/fastqc_sequence_{counts,duplication_levels}_plot*{pdf,svg} +**/fastqc_top_overrepresented_sequences_table*{pdf,png,svg,txt} +**/multiqc_data.json +**/multiqc_general_stats.txt +**/multiqc_plots +**/multiqc_report.html +**/multiqc_samtools_flagstat.txt +**/multiqc_samtools_stats.txt +**/multiqc_software_versions.txt +**/multiqc_sources.txt diff --git a/tests/main.nf.test b/tests/main.nf.test new file mode 100644 index 0000000..e7fe805 --- /dev/null +++ b/tests/main.nf.test @@ -0,0 +1,39 @@ +nextflow_pipeline { + + name "Test Workflow main.nf" + script "../main.nf" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, true, ['pipeline_info/*.{html,json,txt}'], null) + + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, false, null, 'tests/.nftignore') + + assertAll( + { assert workflow.success}, + { assert snapshot( + + // Number of successful tasks + workflow.trace.succeeded().size(), + + // All stable path name, with a relative path + getRelativePath(stable_name, outputDir), + + // All files with stable contents + stable_path + ).match() } + ) + } + + } + +} diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100644 index 0000000..5bd53e1 --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,38 @@ +/* +======================================================================================== + Nextflow config file for running tests +======================================================================================== +*/ + + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + sample_sheet = "${projectDir}/assets/samplesheet.csv" + segmentation_sheet = "${projectDir}/assets/segmentation.csv" + gene_identifier = "gene_name" + min_cells = 3 + min_features = 200 + + nfeature_lower = 10 + nfeature_upper = 0 + ncount_lower = 10 + ncount_upper = 0 + max_mito_pct = 10 + + vars_2_regress = "nCount_RNA,nFeature_RNA,percent.mt,S.Score,G2M.Score" + features_2_scale = "VF" + scale_method = "SCT" + pcMax = null + integration_method = "Harmony" + resolutions = "0.05,0.1,0.3,0.5,0.7,0.9,1.2,1.5" + makeLoupe = true + eula_agreement = "Agree" + + // Limit resources so that this can run on GitHub Actions + max_cpus = 8 + max_memory = 12.GB + max_time = 6.h + +} diff --git a/workflows/scscape.nf b/workflows/scscape.nf index cc51e60..8eb7dd0 100644 --- a/workflows/scscape.nf +++ b/workflows/scscape.nf @@ -79,6 +79,7 @@ workflow SCSCAPE { ch_validation_log = Channel.empty() ch_samples = Channel.fromList(samplesheetToList(params.sample_sheet, "./assets/schema_input.json")) + ch_samples.dump(tag: "ch_samples") ch_gzip = GZIP(ch_samples.map { [ it[0], it[1]] }) ch_gzip.zip @@ -87,7 +88,8 @@ workflow SCSCAPE { .set {ch_samples_compressed} - ch_contrasts_file = Channel.from(file(params.segmentation_sheet)) + ch_contrasts_file = Channel.value(file(params.segmentation_sheet)) + ch_contrasts_file.splitCsv ( header:true, sep:(params.segmentation_sheet.endsWith('tsv') ? '\t' : ',')) .flatMap().filter { !(it.toString().toUpperCase().contains("FALSE")) } .map { it -> @@ -104,7 +106,9 @@ workflow SCSCAPE { .map { it.reverse() } .set { ch_contrasts } - ch_contrasts.join(ch_samples_compressed).flatMap() + ch_contrasts.dump(tag: "ch_contrasts") + + ch_contrasts.join(ch_samples).flatMap() .map { it -> if ( it instanceof LinkedHashMap ){ group_ls = new ArrayList() @@ -145,7 +149,7 @@ workflow SCSCAPE { ch_init_rds.rds.join(ch_updated_meta).set { ch_init_rds_meta } ch_validation_log.mix(ch_init_rds.log).set{ ch_validation_log } } else { - + ch_init_rds = MAKE_SEURAT ( ch_updated_meta.map { [ it[0] , it[1] ] } .map{ meta, data -> @@ -170,7 +174,7 @@ workflow SCSCAPE { ch_init_rds.rds.join(ch_updated_meta).set { ch_init_rds_meta } ch_validation_log.mix(ch_init_rds.log).set{ ch_validation_log } } - + ch_normalized_qc = NORMALIZE_QC ( ch_init_rds_meta.map { [it[0], it[1]] } .map{ meta, data ->