diff --git a/CHANGELOG.md b/CHANGELOG.md index d42fcdde3..d5c45983f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,9 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Credits +Special thanks to the following for their contributions to the release: + +- [Elad Herzog](https://github.com/EladH1) + ### Enhancements and fixes - [PR #1608](https://github.com/nf-core/rnaseq/pull/1608) - Bump version after release 3.21.0 +- [PR #1617](https://github.com/nf-core/rnaseq/pull/1617) - Update bbmap/bbsplit module ## [[3.21.0](https://github.com/nf-core/rnaseq/releases/tag/3.21.0)] - 2025-09-18 diff --git a/modules.json b/modules.json index 8d8669e9a..674b309c8 100644 --- a/modules.json +++ b/modules.json @@ -7,7 +7,7 @@ "nf-core": { "bbmap/bbsplit": { "branch": "master", - "git_sha": "8864afe586537bf562eac7b83349c26207f3cb4d", + "git_sha": "fa1477128e0ff24675c8f56d6c334fcc323025c4", "installed_by": ["fastq_qc_trim_filter_setstrandedness", "modules"] }, "bedtools/genomecov": { diff --git a/modules/nf-core/bbmap/bbsplit/main.nf b/modules/nf-core/bbmap/bbsplit/main.nf index da4e0a1b2..2deebbec7 100644 --- a/modules/nf-core/bbmap/bbsplit/main.nf +++ b/modules/nf-core/bbmap/bbsplit/main.nf @@ -10,13 +10,13 @@ process BBMAP_BBSPLIT { input: tuple val(meta), path(reads) - path index + path index, name: 'input_index' path primary_ref tuple val(other_ref_names), path(other_ref_paths) val only_build_index output: - path "bbsplit" , optional:true, emit: index + path "bbsplit_index" , optional:true, emit: index tuple val(meta), path('*primary*fastq.gz'), optional:true, emit: primary_fastq tuple val(meta), path('*fastq.gz') , optional:true, emit: all_fastq tuple val(meta), path('*txt') , optional:true, emit: stats @@ -46,16 +46,17 @@ process BBMAP_BBSPLIT { def fastq_out='' def index_files='' def refstats_cmd='' + def use_index = index ? true : false if (only_build_index) { if (primary_ref && other_ref_names && other_ref_paths) { - index_files = 'ref_primary=' +primary_ref + ' ' + other_refs.join(' ') + ' path=bbsplit' + index_files = 'ref_primary=' +primary_ref + ' ' + other_refs.join(' ') + ' path=bbsplit_build' } else { log.error 'ERROR: Please specify as input a primary fasta file along with names and paths to non-primary fasta files.' } } else { if (index) { - index_files = "path=$index" + index_files = "path=index_writable" } else if (primary_ref && other_ref_names && other_ref_paths) { index_files = "ref_primary=${primary_ref} ${other_refs.join(' ')}" } else { @@ -67,15 +68,18 @@ process BBMAP_BBSPLIT { } """ - # When we stage in the index files the time stamps get disturbed, which - # bbsplit doesn't like. Fix the time stamps in its summaries. This needs to - # be done via Java to match what bbmap does + # If using a pre-built index, copy it to avoid modifying input files in place, + # then fix timestamps. When we stage in the index files the time stamps get + # disturbed, which bbsplit doesn't like. Fix the time stamps in its summaries. + # This needs to be done via Java to match what bbmap does. + if [ "$use_index" == "true" ]; then + cp -rL input_index index_writable - if [ $index ]; then - for summary_file in \$(find $index/ref/genome -name summary.txt); do - src=\$(grep '^source' "\$summary_file" | cut -f2- -d\$'\\t' | sed 's|.*/bbsplit|bbsplit|') + for summary_file in \$(find index_writable/ref/genome -name summary.txt); do + # Extract the path from summary.txt and update it to point to index_writable + src=\$(grep '^source' "\$summary_file" | cut -f2- -d\$'\\t' | sed 's|.*/ref/|index_writable/ref/|') mod=\$(echo "System.out.println(java.nio.file.Files.getLastModifiedTime(java.nio.file.Paths.get(\\"\$src\\")).toMillis());" | jshell -J-Djdk.lang.Process.launchMechanism=vfork -) - sed "s|^last modified.*|last modified\\t\$mod|" "\$summary_file" > \${summary_file}.tmp && mv \${summary_file}.tmp \${summary_file} + sed -e 's|bbsplit_index/ref|index_writable/ref|' -e "s|^last modified.*|last modified\\t\$mod|" "\$summary_file" > \${summary_file}.tmp && mv \${summary_file}.tmp \${summary_file} done fi @@ -92,11 +96,15 @@ process BBMAP_BBSPLIT { # Summary files will have an absolute path that will make the index # impossible to use in other processes- we can fix that + if [ -d bbsplit_build/ref/genome ]; then + for summary_file in \$(find bbsplit_build/ref/genome -name summary.txt); do + src=\$(grep '^source' "\$summary_file" | cut -f2- -d\$'\\t' | sed 's|.*/bbsplit_build|bbsplit_index|') + sed "s|^source.*|source\\t\$src|" "\$summary_file" > \${summary_file}.tmp && mv \${summary_file}.tmp \${summary_file} + done - for summary_file in \$(find bbsplit/ref/genome -name summary.txt); do - src=\$(grep '^source' "\$summary_file" | cut -f2- -d\$'\\t' | sed 's|.*/bbsplit|bbsplit|') - sed "s|^source.*|source\\t\$src|" "\$summary_file" > \${summary_file}.tmp && mv \${summary_file}.tmp \${summary_file} - done + # Atomically rename the completed index + mv bbsplit_build bbsplit_index + fi cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -110,11 +118,14 @@ process BBMAP_BBSPLIT { other_ref_names.eachWithIndex { name, index -> other_refs += "echo '' | gzip > ${prefix}_${name}.fastq.gz" } + def will_build_index = only_build_index || (!index && primary_ref && other_ref_names && other_ref_paths) """ - if [ ! -d bbsplit ]; then - mkdir bbsplit + # Create index directory if building an index (either only_build_index or on-the-fly) + if [ "${will_build_index}" == "true" ]; then + mkdir -p bbsplit_index fi + # Only create output files if splitting (not just building index) if ! (${only_build_index}); then echo '' | gzip > ${prefix}_primary.fastq.gz ${other_refs} diff --git a/modules/nf-core/bbmap/bbsplit/meta.yml b/modules/nf-core/bbmap/bbsplit/meta.yml index 081aeb2b3..66f87cce0 100644 --- a/modules/nf-core/bbmap/bbsplit/meta.yml +++ b/modules/nf-core/bbmap/bbsplit/meta.yml @@ -25,31 +25,33 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. - - - index: - type: directory - description: Directory to place generated index - pattern: "*" - - - primary_ref: - type: file - description: Path to the primary reference - pattern: "*" + ontologies: [] + - index: + type: directory + description: Directory to place generated index + pattern: "*" + - primary_ref: + type: file + description: Path to the primary reference + pattern: "*" + ontologies: [] - - other_ref_names: type: list description: List of other reference ids apart from the primary - other_ref_paths: type: list description: Path to other references paths corresponding to "other_ref_names" - - - only_build_index: - type: string - description: true = only build index; false = mapping + - only_build_index: + type: string + description: true = only build index; false = mapping output: - - index: - - bbsplit: - type: directory - description: Directory with index files - pattern: "bbsplit" - - primary_fastq: - - meta: + index: + - bbsplit_index: + type: directory + description: Directory with index files + pattern: "bbsplit_index" + primary_fastq: + - - meta: type: map description: | Groovy Map containing sample information @@ -58,8 +60,10 @@ output: type: file description: Output reads that map to the primary reference pattern: "*primary*fastq.gz" - - all_fastq: - - meta: + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + all_fastq: + - - meta: type: map description: | Groovy Map containing sample information @@ -68,8 +72,10 @@ output: type: file description: All reads mapping to any of the references pattern: "*fastq.gz" - - stats: - - meta: + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + stats: + - - meta: type: map description: | Groovy Map containing sample information @@ -78,8 +84,9 @@ output: type: file description: Tab-delimited text file containing mapping statistics pattern: "*.txt" - - log: - - meta: + ontologies: [] + log: + - - meta: type: map description: | Groovy Map containing sample information @@ -88,14 +95,19 @@ output: type: file description: Log file pattern: "*.log" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@joseespinosa" - "@drpatelh" + - "@pinin4fjords" maintainers: - "@joseespinosa" - "@drpatelh" + - "@pinin4fjords" diff --git a/modules/nf-core/bbmap/bbsplit/tests/main.nf.test b/modules/nf-core/bbmap/bbsplit/tests/main.nf.test index 0674d247f..2f942f39c 100644 --- a/modules/nf-core/bbmap/bbsplit/tests/main.nf.test +++ b/modules/nf-core/bbmap/bbsplit/tests/main.nf.test @@ -3,8 +3,12 @@ nextflow_process { name "Test Process BBMAP_BBSPLIT" script "../main.nf" process "BBMAP_BBSPLIT" + tag "modules" + tag "modules_nfcore" + tag "bbmap" + tag "bbmap/bbsplit" - test("sarscov2_se_fastq_fasta_chr22_fasta - index") { + test("sarscov2_se_fastq_fasta_chr22_fasta - build index") { when { process { @@ -31,7 +35,7 @@ nextflow_process { } } - test("sarscov2_se_fastq_fasta_chr22_fasta - index - stub") { + test("sarscov2_se_fastq_fasta_chr22_fasta - build index - stub") { options "-stub" @@ -52,13 +56,12 @@ nextflow_process { then { assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } + { assert process.success } ) } } - test("sarscov2_se_fastq_fasta_chr22_fasta") { + test("sarscov2_se_fastq_fasta_chr22_fasta - split with prebuilt index") { setup { @@ -87,58 +90,27 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]) input[1] = BBMAP_BBSPLIT_INDEX.out.index - input[2] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)) - input[3] = Channel.of([ - [ 'human' ], // meta map - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr22/sequence/chr22_23800000-23980000.fa', checkIfExists: true) - ]) + input[2] = [] + input[3] = [[], []] input[4] = false """ } } then { - def filesToExclude = [ - "info.txt", - "reflist.txt", - "scaffolds.txt.gz", - "summary.txt" - ] - - def outputFiles = [] - def outputDirectory = new File(process.out.index[0]) - outputDirectory.eachFileRecurse { file -> - if (file.isFile()){ - outputFiles << file - } - } - - def filesExist = filesToExclude.every { excludeName -> - outputFiles.any { file -> - file.getName().endsWith(excludeName) && file.exists() - } - } - - def filteredFiles = outputFiles - .findAll { file -> - !filesToExclude.any { excludeName -> - file.getName().endsWith(excludeName) - } - }.sort{ it.getName()} - assertAll( { assert process.success }, { assert path(process.out.log[0][1]).text.contains("If you wish to regenerate the index") }, - { assert filesExist : "One or more files to exclude do not exist" }, { assert snapshot( - filteredFiles, + process.out.primary_fastq, + process.out.stats, process.out.versions ).match()} ) } } - test("sarscov2_se_fastq_fasta_chr22_fasta - stub") { + test("sarscov2_se_fastq_fasta_chr22_fasta - split with prebuilt index - stub") { options "-stub" @@ -169,11 +141,8 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]) input[1] = BBMAP_BBSPLIT_INDEX.out.index - input[2] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)) - input[3] = Channel.of([ - [ 'human' ], // meta map - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr22/sequence/chr22_23800000-23980000.fa', checkIfExists: true) - ]) + input[2] = [] + input[3] = [[], []] input[4] = false """ } @@ -181,8 +150,7 @@ nextflow_process { then { assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } + { assert process.success } ) } } diff --git a/modules/nf-core/bbmap/bbsplit/tests/main.nf.test.snap b/modules/nf-core/bbmap/bbsplit/tests/main.nf.test.snap index 656d3ed05..7e739a663 100644 --- a/modules/nf-core/bbmap/bbsplit/tests/main.nf.test.snap +++ b/modules/nf-core/bbmap/bbsplit/tests/main.nf.test.snap @@ -1,12 +1,23 @@ { - "sarscov2_se_fastq_fasta_chr22_fasta": { + "sarscov2_se_fastq_fasta_chr22_fasta - split with prebuilt index": { "content": [ [ - "chr1.chrom.gz:md5,8fec4c63ec642613ad10adf4cc2e6ade", - "chr1_index_k13_c13_b1.block:md5,385913c1e84b77dc7bf36288ee1c8706", - "chr1_index_k13_c13_b1.block2.gz:md5,2556b45206835a0ff7078d683b5fd6e2", - "merged_ref_9222711925172838098.fa.gz:md5,983cef447fb28394b88a5b49b3579f0c", - "namelist.txt:md5,45e7a4cdc7a11a39ada56844ca3a1e30" + [ + { + "id": "test", + "single_end": true + }, + "test_primary.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats.txt:md5,2cbf69b72e5f4f8508306b54e8fe2861" + ] ], [ "versions.yml:md5,b75d21d7649b5a512e7b54dd15cd3eba" @@ -14,11 +25,11 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nextflow": "25.10.0" }, - "timestamp": "2025-03-12T18:57:32.958846" + "timestamp": "2025-11-07T12:36:23.511852" }, - "sarscov2_se_fastq_fasta_chr22_fasta - index": { + "sarscov2_se_fastq_fasta_chr22_fasta - build index": { "content": [ [ "versions.yml:md5,b75d21d7649b5a512e7b54dd15cd3eba" @@ -26,174 +37,8 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.5" - }, - "timestamp": "2025-03-12T18:57:12.155636" - }, - "sarscov2_se_fastq_fasta_chr22_fasta - index - stub": { - "content": [ - { - "0": [ - [ - - ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - - ], - "4": [ - [ - { - - }, - "null.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "5": [ - "versions.yml:md5,b75d21d7649b5a512e7b54dd15cd3eba" - ], - "all_fastq": [ - - ], - "index": [ - [ - - ] - ], - "log": [ - [ - { - - }, - "null.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "primary_fastq": [ - - ], - "stats": [ - - ], - "versions": [ - "versions.yml:md5,b75d21d7649b5a512e7b54dd15cd3eba" - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" - }, - "timestamp": "2025-03-12T18:57:17.177351" - }, - "sarscov2_se_fastq_fasta_chr22_fasta - stub": { - "content": [ - { - "0": [ - [ - - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": true - }, - "test_primary.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "2": [ - [ - { - "id": "test", - "single_end": true - }, - [ - "test_human.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test_primary.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ] - ], - "3": [ - [ - { - "id": "test", - "single_end": true - }, - "test.stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "4": [ - [ - { - "id": "test", - "single_end": true - }, - "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "5": [ - "versions.yml:md5,b75d21d7649b5a512e7b54dd15cd3eba" - ], - "all_fastq": [ - [ - { - "id": "test", - "single_end": true - }, - [ - "test_human.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test_primary.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ] - ], - "index": [ - [ - - ] - ], - "log": [ - [ - { - "id": "test", - "single_end": true - }, - "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "primary_fastq": [ - [ - { - "id": "test", - "single_end": true - }, - "test_primary.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "stats": [ - [ - { - "id": "test", - "single_end": true - }, - "test.stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,b75d21d7649b5a512e7b54dd15cd3eba" - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nextflow": "25.10.0" }, - "timestamp": "2025-03-12T18:57:42.233549" + "timestamp": "2025-11-07T11:05:45.517832" } } \ No newline at end of file