From 952202ce753affba5cd332a0a7fce96afb02cf7c Mon Sep 17 00:00:00 2001 From: EladH1 Date: Tue, 4 Nov 2025 11:36:56 +0100 Subject: [PATCH 01/11] The previous script had issue of staging -> modified the summary_file . --- modules/nf-core/bbmap/bbsplit/main.nf | 33 ++++++--------------------- 1 file changed, 7 insertions(+), 26 deletions(-) mode change 100644 => 100755 modules/nf-core/bbmap/bbsplit/main.nf diff --git a/modules/nf-core/bbmap/bbsplit/main.nf b/modules/nf-core/bbmap/bbsplit/main.nf old mode 100644 new mode 100755 index da4e0a1b2..87f7e14d5 --- a/modules/nf-core/bbmap/bbsplit/main.nf +++ b/modules/nf-core/bbmap/bbsplit/main.nf @@ -2,6 +2,9 @@ process BBMAP_BBSPLIT { tag "$meta.id" label 'process_high' label 'error_retry' + + // Force copy mode to prevent index modifications + stageInMode 'copy' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -38,8 +41,8 @@ process BBMAP_BBSPLIT { } def other_refs = [] - other_ref_names.eachWithIndex { name, index -> - other_refs << "ref_${name}=${other_ref_paths[index]}" + other_ref_names.eachWithIndex { name, idx -> + other_refs << "ref_${name}=${other_ref_paths[idx]}" } def fastq_in='' @@ -66,21 +69,7 @@ process BBMAP_BBSPLIT { refstats_cmd = 'refstats=' + prefix + '.stats.txt' } """ - - # When we stage in the index files the time stamps get disturbed, which - # bbsplit doesn't like. Fix the time stamps in its summaries. This needs to - # be done via Java to match what bbmap does - - if [ $index ]; then - for summary_file in \$(find $index/ref/genome -name summary.txt); do - src=\$(grep '^source' "\$summary_file" | cut -f2- -d\$'\\t' | sed 's|.*/bbsplit|bbsplit|') - mod=\$(echo "System.out.println(java.nio.file.Files.getLastModifiedTime(java.nio.file.Paths.get(\\"\$src\\")).toMillis());" | jshell -J-Djdk.lang.Process.launchMechanism=vfork -) - sed "s|^last modified.*|last modified\\t\$mod|" "\$summary_file" > \${summary_file}.tmp && mv \${summary_file}.tmp \${summary_file} - done - fi - # Run BBSplit - bbsplit.sh \\ -Xmx${avail_mem}M \\ $index_files \\ @@ -90,14 +79,6 @@ process BBMAP_BBSPLIT { $refstats_cmd \\ $args 2>| >(tee ${prefix}.log >&2) - # Summary files will have an absolute path that will make the index - # impossible to use in other processes- we can fix that - - for summary_file in \$(find bbsplit/ref/genome -name summary.txt); do - src=\$(grep '^source' "\$summary_file" | cut -f2- -d\$'\\t' | sed 's|.*/bbsplit|bbsplit|') - sed "s|^source.*|source\\t\$src|" "\$summary_file" > \${summary_file}.tmp && mv \${summary_file}.tmp \${summary_file} - done - cat <<-END_VERSIONS > versions.yml "${task.process}": bbmap: \$(bbversion.sh | grep -v "Duplicate cpuset") @@ -107,7 +88,7 @@ process BBMAP_BBSPLIT { stub: def prefix = task.ext.prefix ?: "${meta.id}" def other_refs = '' - other_ref_names.eachWithIndex { name, index -> + other_ref_names.eachWithIndex { name, _idx -> other_refs += "echo '' | gzip > ${prefix}_${name}.fastq.gz" } """ @@ -128,4 +109,4 @@ process BBMAP_BBSPLIT { bbmap: \$(bbversion.sh | grep -v "Duplicate cpuset") END_VERSIONS """ -} +} \ No newline at end of file From d9a288433ab308de7b00e0d130b29adf5d534744 Mon Sep 17 00:00:00 2001 From: EladH1 Date: Wed, 5 Nov 2025 11:12:59 +0100 Subject: [PATCH 02/11] bbsplit patch lint --- modules.json | 3 +- .../nf-core/bbmap/bbsplit/bbmap-bbsplit.diff | 97 +++++++++++++++++++ 2 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 modules/nf-core/bbmap/bbsplit/bbmap-bbsplit.diff diff --git a/modules.json b/modules.json index aa20e9320..69270ed31 100644 --- a/modules.json +++ b/modules.json @@ -8,7 +8,8 @@ "bbmap/bbsplit": { "branch": "master", "git_sha": "8864afe586537bf562eac7b83349c26207f3cb4d", - "installed_by": ["fastq_qc_trim_filter_setstrandedness", "modules"] + "installed_by": ["fastq_qc_trim_filter_setstrandedness", "modules"], + "patch": "modules/nf-core/bbmap/bbsplit/bbmap-bbsplit.diff" }, "bedtools/genomecov": { "branch": "master", diff --git a/modules/nf-core/bbmap/bbsplit/bbmap-bbsplit.diff b/modules/nf-core/bbmap/bbsplit/bbmap-bbsplit.diff new file mode 100644 index 000000000..93e8ab49e --- /dev/null +++ b/modules/nf-core/bbmap/bbsplit/bbmap-bbsplit.diff @@ -0,0 +1,97 @@ +Changes in component 'nf-core/bbmap/bbsplit' +'modules/nf-core/bbmap/bbsplit/meta.yml' is unchanged +'modules/nf-core/bbmap/bbsplit/nextflow.config' was created +'modules/nf-core/bbmap/bbsplit/environment.yml' is unchanged +Changes in 'bbmap/bbsplit/main.nf': +--- modules/nf-core/bbmap/bbsplit/main.nf ++++ modules/nf-core/bbmap/bbsplit/main.nf +@@ -2,6 +2,9 @@ + tag "$meta.id" + label 'process_high' + label 'error_retry' ++ ++ // Force copy mode to prevent index modifications ++ stageInMode 'copy' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? +@@ -38,8 +41,8 @@ + } + + def other_refs = [] +- other_ref_names.eachWithIndex { name, index -> +- other_refs << "ref_${name}=${other_ref_paths[index]}" ++ other_ref_names.eachWithIndex { name, idx -> ++ other_refs << "ref_${name}=${other_ref_paths[idx]}" + } + + def fastq_in='' +@@ -66,21 +69,7 @@ + refstats_cmd = 'refstats=' + prefix + '.stats.txt' + } + """ +- +- # When we stage in the index files the time stamps get disturbed, which +- # bbsplit doesn't like. Fix the time stamps in its summaries. This needs to +- # be done via Java to match what bbmap does +- +- if [ $index ]; then +- for summary_file in \$(find $index/ref/genome -name summary.txt); do +- src=\$(grep '^source' "\$summary_file" | cut -f2- -d\$'\\t' | sed 's|.*/bbsplit|bbsplit|') +- mod=\$(echo "System.out.println(java.nio.file.Files.getLastModifiedTime(java.nio.file.Paths.get(\\"\$src\\")).toMillis());" | jshell -J-Djdk.lang.Process.launchMechanism=vfork -) +- sed "s|^last modified.*|last modified\\t\$mod|" "\$summary_file" > \${summary_file}.tmp && mv \${summary_file}.tmp \${summary_file} +- done +- fi +- + # Run BBSplit +- + bbsplit.sh \\ + -Xmx${avail_mem}M \\ + $index_files \\ +@@ -89,14 +78,6 @@ + $fastq_out \\ + $refstats_cmd \\ + $args 2>| >(tee ${prefix}.log >&2) +- +- # Summary files will have an absolute path that will make the index +- # impossible to use in other processes- we can fix that +- +- for summary_file in \$(find bbsplit/ref/genome -name summary.txt); do +- src=\$(grep '^source' "\$summary_file" | cut -f2- -d\$'\\t' | sed 's|.*/bbsplit|bbsplit|') +- sed "s|^source.*|source\\t\$src|" "\$summary_file" > \${summary_file}.tmp && mv \${summary_file}.tmp \${summary_file} +- done + + cat <<-END_VERSIONS > versions.yml + "${task.process}": +@@ -107,7 +88,7 @@ + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def other_refs = '' +- other_ref_names.eachWithIndex { name, index -> ++ other_ref_names.eachWithIndex { name, _idx -> + other_refs += "echo '' | gzip > ${prefix}_${name}.fastq.gz" + } + """ +@@ -128,4 +109,4 @@ + bbmap: \$(bbversion.sh | grep -v "Duplicate cpuset") + END_VERSIONS + """ +-} ++} +Changes in 'bbmap/bbsplit/tests/main.nf.test': +--- modules/nf-core/bbmap/bbsplit/tests/main.nf.test ++++ modules/nf-core/bbmap/bbsplit/tests/main.nf.test +@@ -3,10 +3,6 @@ + name "Test Process BBMAP_BBSPLIT" + script "../main.nf" + process "BBMAP_BBSPLIT" +- tag "modules" +- tag "modules_nfcore" +- tag "bbmap" +- tag "bbmap/bbsplit" + + test("sarscov2_se_fastq_fasta_chr22_fasta - index") { + + +'modules/nf-core/bbmap/bbsplit/tests/main.nf.test.snap' is unchanged +************************************************************ From 4819736fbf2c6cc4cb4e396f9e0315793f57e979 Mon Sep 17 00:00:00 2001 From: EladH1 Date: Wed, 5 Nov 2025 11:16:32 +0100 Subject: [PATCH 03/11] linting --- .github/workflows/linting.yml | 14 +++++++------- .github/workflows/linting_comment.yml | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 8b0f88c36..30e66026f 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -11,12 +11,12 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 - - name: Set up Python 3.13 - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + - name: Set up Python 3.14 + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.13" + python-version: "3.14" - name: Install pre-commit run: pip install pre-commit @@ -28,14 +28,14 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 - name: Install Nextflow uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.13" + python-version: "3.14" architecture: "x64" - name: read .nf-core.yml diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index d43797d9d..e6e9bc269 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2 + uses: marocchino/sticky-pull-request-comment@773744901bac0e8cbb5a0dc842800d45e9b2b405 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} From 8204ad975b3fd69dcee1198061f7b5b5a87dff7b Mon Sep 17 00:00:00 2001 From: EladH1 Date: Wed, 5 Nov 2025 11:46:55 +0100 Subject: [PATCH 04/11] bbsplit linting2 --- .../nf-core/bbmap/bbsplit/bbmap-bbsplit.diff | 97 ------------------- .../nf-core/bbmap/bbsplit/tests/main.nf.test | 4 + 2 files changed, 4 insertions(+), 97 deletions(-) delete mode 100644 modules/nf-core/bbmap/bbsplit/bbmap-bbsplit.diff diff --git a/modules/nf-core/bbmap/bbsplit/bbmap-bbsplit.diff b/modules/nf-core/bbmap/bbsplit/bbmap-bbsplit.diff deleted file mode 100644 index 93e8ab49e..000000000 --- a/modules/nf-core/bbmap/bbsplit/bbmap-bbsplit.diff +++ /dev/null @@ -1,97 +0,0 @@ -Changes in component 'nf-core/bbmap/bbsplit' -'modules/nf-core/bbmap/bbsplit/meta.yml' is unchanged -'modules/nf-core/bbmap/bbsplit/nextflow.config' was created -'modules/nf-core/bbmap/bbsplit/environment.yml' is unchanged -Changes in 'bbmap/bbsplit/main.nf': ---- modules/nf-core/bbmap/bbsplit/main.nf -+++ modules/nf-core/bbmap/bbsplit/main.nf -@@ -2,6 +2,9 @@ - tag "$meta.id" - label 'process_high' - label 'error_retry' -+ -+ // Force copy mode to prevent index modifications -+ stageInMode 'copy' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? -@@ -38,8 +41,8 @@ - } - - def other_refs = [] -- other_ref_names.eachWithIndex { name, index -> -- other_refs << "ref_${name}=${other_ref_paths[index]}" -+ other_ref_names.eachWithIndex { name, idx -> -+ other_refs << "ref_${name}=${other_ref_paths[idx]}" - } - - def fastq_in='' -@@ -66,21 +69,7 @@ - refstats_cmd = 'refstats=' + prefix + '.stats.txt' - } - """ -- -- # When we stage in the index files the time stamps get disturbed, which -- # bbsplit doesn't like. Fix the time stamps in its summaries. This needs to -- # be done via Java to match what bbmap does -- -- if [ $index ]; then -- for summary_file in \$(find $index/ref/genome -name summary.txt); do -- src=\$(grep '^source' "\$summary_file" | cut -f2- -d\$'\\t' | sed 's|.*/bbsplit|bbsplit|') -- mod=\$(echo "System.out.println(java.nio.file.Files.getLastModifiedTime(java.nio.file.Paths.get(\\"\$src\\")).toMillis());" | jshell -J-Djdk.lang.Process.launchMechanism=vfork -) -- sed "s|^last modified.*|last modified\\t\$mod|" "\$summary_file" > \${summary_file}.tmp && mv \${summary_file}.tmp \${summary_file} -- done -- fi -- - # Run BBSplit -- - bbsplit.sh \\ - -Xmx${avail_mem}M \\ - $index_files \\ -@@ -89,14 +78,6 @@ - $fastq_out \\ - $refstats_cmd \\ - $args 2>| >(tee ${prefix}.log >&2) -- -- # Summary files will have an absolute path that will make the index -- # impossible to use in other processes- we can fix that -- -- for summary_file in \$(find bbsplit/ref/genome -name summary.txt); do -- src=\$(grep '^source' "\$summary_file" | cut -f2- -d\$'\\t' | sed 's|.*/bbsplit|bbsplit|') -- sed "s|^source.*|source\\t\$src|" "\$summary_file" > \${summary_file}.tmp && mv \${summary_file}.tmp \${summary_file} -- done - - cat <<-END_VERSIONS > versions.yml - "${task.process}": -@@ -107,7 +88,7 @@ - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - def other_refs = '' -- other_ref_names.eachWithIndex { name, index -> -+ other_ref_names.eachWithIndex { name, _idx -> - other_refs += "echo '' | gzip > ${prefix}_${name}.fastq.gz" - } - """ -@@ -128,4 +109,4 @@ - bbmap: \$(bbversion.sh | grep -v "Duplicate cpuset") - END_VERSIONS - """ --} -+} -Changes in 'bbmap/bbsplit/tests/main.nf.test': ---- modules/nf-core/bbmap/bbsplit/tests/main.nf.test -+++ modules/nf-core/bbmap/bbsplit/tests/main.nf.test -@@ -3,10 +3,6 @@ - name "Test Process BBMAP_BBSPLIT" - script "../main.nf" - process "BBMAP_BBSPLIT" -- tag "modules" -- tag "modules_nfcore" -- tag "bbmap" -- tag "bbmap/bbsplit" - - test("sarscov2_se_fastq_fasta_chr22_fasta - index") { - - -'modules/nf-core/bbmap/bbsplit/tests/main.nf.test.snap' is unchanged -************************************************************ diff --git a/modules/nf-core/bbmap/bbsplit/tests/main.nf.test b/modules/nf-core/bbmap/bbsplit/tests/main.nf.test index 0674d247f..1f260d5f6 100644 --- a/modules/nf-core/bbmap/bbsplit/tests/main.nf.test +++ b/modules/nf-core/bbmap/bbsplit/tests/main.nf.test @@ -3,6 +3,10 @@ nextflow_process { name "Test Process BBMAP_BBSPLIT" script "../main.nf" process "BBMAP_BBSPLIT" + tag "modules" + tag "modules_nfcore" + tag "bbmap" + tag "bbmap/bbsplit" test("sarscov2_se_fastq_fasta_chr22_fasta - index") { From 75b7c47afcc1a6445fcd5f1f407612a4b1bff614 Mon Sep 17 00:00:00 2001 From: EladH1 Date: Wed, 5 Nov 2025 11:58:52 +0100 Subject: [PATCH 05/11] lint pipe --- modules.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules.json b/modules.json index 69270ed31..aa20e9320 100644 --- a/modules.json +++ b/modules.json @@ -8,8 +8,7 @@ "bbmap/bbsplit": { "branch": "master", "git_sha": "8864afe586537bf562eac7b83349c26207f3cb4d", - "installed_by": ["fastq_qc_trim_filter_setstrandedness", "modules"], - "patch": "modules/nf-core/bbmap/bbsplit/bbmap-bbsplit.diff" + "installed_by": ["fastq_qc_trim_filter_setstrandedness", "modules"] }, "bedtools/genomecov": { "branch": "master", From 628d041b6cbf436d2c72419ee7f3469502bb7fe9 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 13 Nov 2025 12:45:22 +0000 Subject: [PATCH 06/11] Revert bbsplit staging changes to allow proper module update --- .github/workflows/linting.yml | 14 ++++---- .github/workflows/linting_comment.yml | 2 +- modules/nf-core/bbmap/bbsplit/main.nf | 33 +++++++++++++++---- .../nf-core/bbmap/bbsplit/tests/main.nf.test | 4 --- 4 files changed, 34 insertions(+), 19 deletions(-) mode change 100755 => 100644 modules/nf-core/bbmap/bbsplit/main.nf diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 30e66026f..8b0f88c36 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -11,12 +11,12 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - - name: Set up Python 3.14 - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 + - name: Set up Python 3.13 + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: - python-version: "3.14" + python-version: "3.13" - name: Install pre-commit run: pip install pre-commit @@ -28,14 +28,14 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: - python-version: "3.14" + python-version: "3.13" architecture: "x64" - name: read .nf-core.yml diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index e6e9bc269..d43797d9d 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@773744901bac0e8cbb5a0dc842800d45e9b2b405 # v2 + uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/modules/nf-core/bbmap/bbsplit/main.nf b/modules/nf-core/bbmap/bbsplit/main.nf old mode 100755 new mode 100644 index 87f7e14d5..da4e0a1b2 --- a/modules/nf-core/bbmap/bbsplit/main.nf +++ b/modules/nf-core/bbmap/bbsplit/main.nf @@ -2,9 +2,6 @@ process BBMAP_BBSPLIT { tag "$meta.id" label 'process_high' label 'error_retry' - - // Force copy mode to prevent index modifications - stageInMode 'copy' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -41,8 +38,8 @@ process BBMAP_BBSPLIT { } def other_refs = [] - other_ref_names.eachWithIndex { name, idx -> - other_refs << "ref_${name}=${other_ref_paths[idx]}" + other_ref_names.eachWithIndex { name, index -> + other_refs << "ref_${name}=${other_ref_paths[index]}" } def fastq_in='' @@ -69,7 +66,21 @@ process BBMAP_BBSPLIT { refstats_cmd = 'refstats=' + prefix + '.stats.txt' } """ + + # When we stage in the index files the time stamps get disturbed, which + # bbsplit doesn't like. Fix the time stamps in its summaries. This needs to + # be done via Java to match what bbmap does + + if [ $index ]; then + for summary_file in \$(find $index/ref/genome -name summary.txt); do + src=\$(grep '^source' "\$summary_file" | cut -f2- -d\$'\\t' | sed 's|.*/bbsplit|bbsplit|') + mod=\$(echo "System.out.println(java.nio.file.Files.getLastModifiedTime(java.nio.file.Paths.get(\\"\$src\\")).toMillis());" | jshell -J-Djdk.lang.Process.launchMechanism=vfork -) + sed "s|^last modified.*|last modified\\t\$mod|" "\$summary_file" > \${summary_file}.tmp && mv \${summary_file}.tmp \${summary_file} + done + fi + # Run BBSplit + bbsplit.sh \\ -Xmx${avail_mem}M \\ $index_files \\ @@ -79,6 +90,14 @@ process BBMAP_BBSPLIT { $refstats_cmd \\ $args 2>| >(tee ${prefix}.log >&2) + # Summary files will have an absolute path that will make the index + # impossible to use in other processes- we can fix that + + for summary_file in \$(find bbsplit/ref/genome -name summary.txt); do + src=\$(grep '^source' "\$summary_file" | cut -f2- -d\$'\\t' | sed 's|.*/bbsplit|bbsplit|') + sed "s|^source.*|source\\t\$src|" "\$summary_file" > \${summary_file}.tmp && mv \${summary_file}.tmp \${summary_file} + done + cat <<-END_VERSIONS > versions.yml "${task.process}": bbmap: \$(bbversion.sh | grep -v "Duplicate cpuset") @@ -88,7 +107,7 @@ process BBMAP_BBSPLIT { stub: def prefix = task.ext.prefix ?: "${meta.id}" def other_refs = '' - other_ref_names.eachWithIndex { name, _idx -> + other_ref_names.eachWithIndex { name, index -> other_refs += "echo '' | gzip > ${prefix}_${name}.fastq.gz" } """ @@ -109,4 +128,4 @@ process BBMAP_BBSPLIT { bbmap: \$(bbversion.sh | grep -v "Duplicate cpuset") END_VERSIONS """ -} \ No newline at end of file +} diff --git a/modules/nf-core/bbmap/bbsplit/tests/main.nf.test b/modules/nf-core/bbmap/bbsplit/tests/main.nf.test index 1f260d5f6..0674d247f 100644 --- a/modules/nf-core/bbmap/bbsplit/tests/main.nf.test +++ b/modules/nf-core/bbmap/bbsplit/tests/main.nf.test @@ -3,10 +3,6 @@ nextflow_process { name "Test Process BBMAP_BBSPLIT" script "../main.nf" process "BBMAP_BBSPLIT" - tag "modules" - tag "modules_nfcore" - tag "bbmap" - tag "bbmap/bbsplit" test("sarscov2_se_fastq_fasta_chr22_fasta - index") { From 439396c6b00305ae41726f24bdfc149d431bb813 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 13 Nov 2025 12:53:13 +0000 Subject: [PATCH 07/11] Update bbmap/bbsplit module to latest version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This updates the bbmap/bbsplit module to address staging issues reported in the original PR. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- modules.json | 2 +- modules/nf-core/bbmap/bbsplit/main.nf | 45 ++-- modules/nf-core/bbmap/bbsplit/meta.yml | 70 +++--- .../nf-core/bbmap/bbsplit/tests/main.nf.test | 64 ++---- .../bbmap/bbsplit/tests/main.nf.test.snap | 199 ++---------------- 5 files changed, 108 insertions(+), 272 deletions(-) diff --git a/modules.json b/modules.json index aa20e9320..920c05cd7 100644 --- a/modules.json +++ b/modules.json @@ -7,7 +7,7 @@ "nf-core": { "bbmap/bbsplit": { "branch": "master", - "git_sha": "8864afe586537bf562eac7b83349c26207f3cb4d", + "git_sha": "b5114f2bb0aa49281e1292d0c8b1433e368c0ed3", "installed_by": ["fastq_qc_trim_filter_setstrandedness", "modules"] }, "bedtools/genomecov": { diff --git a/modules/nf-core/bbmap/bbsplit/main.nf b/modules/nf-core/bbmap/bbsplit/main.nf index da4e0a1b2..2deebbec7 100644 --- a/modules/nf-core/bbmap/bbsplit/main.nf +++ b/modules/nf-core/bbmap/bbsplit/main.nf @@ -10,13 +10,13 @@ process BBMAP_BBSPLIT { input: tuple val(meta), path(reads) - path index + path index, name: 'input_index' path primary_ref tuple val(other_ref_names), path(other_ref_paths) val only_build_index output: - path "bbsplit" , optional:true, emit: index + path "bbsplit_index" , optional:true, emit: index tuple val(meta), path('*primary*fastq.gz'), optional:true, emit: primary_fastq tuple val(meta), path('*fastq.gz') , optional:true, emit: all_fastq tuple val(meta), path('*txt') , optional:true, emit: stats @@ -46,16 +46,17 @@ process BBMAP_BBSPLIT { def fastq_out='' def index_files='' def refstats_cmd='' + def use_index = index ? true : false if (only_build_index) { if (primary_ref && other_ref_names && other_ref_paths) { - index_files = 'ref_primary=' +primary_ref + ' ' + other_refs.join(' ') + ' path=bbsplit' + index_files = 'ref_primary=' +primary_ref + ' ' + other_refs.join(' ') + ' path=bbsplit_build' } else { log.error 'ERROR: Please specify as input a primary fasta file along with names and paths to non-primary fasta files.' } } else { if (index) { - index_files = "path=$index" + index_files = "path=index_writable" } else if (primary_ref && other_ref_names && other_ref_paths) { index_files = "ref_primary=${primary_ref} ${other_refs.join(' ')}" } else { @@ -67,15 +68,18 @@ process BBMAP_BBSPLIT { } """ - # When we stage in the index files the time stamps get disturbed, which - # bbsplit doesn't like. Fix the time stamps in its summaries. This needs to - # be done via Java to match what bbmap does + # If using a pre-built index, copy it to avoid modifying input files in place, + # then fix timestamps. When we stage in the index files the time stamps get + # disturbed, which bbsplit doesn't like. Fix the time stamps in its summaries. + # This needs to be done via Java to match what bbmap does. + if [ "$use_index" == "true" ]; then + cp -rL input_index index_writable - if [ $index ]; then - for summary_file in \$(find $index/ref/genome -name summary.txt); do - src=\$(grep '^source' "\$summary_file" | cut -f2- -d\$'\\t' | sed 's|.*/bbsplit|bbsplit|') + for summary_file in \$(find index_writable/ref/genome -name summary.txt); do + # Extract the path from summary.txt and update it to point to index_writable + src=\$(grep '^source' "\$summary_file" | cut -f2- -d\$'\\t' | sed 's|.*/ref/|index_writable/ref/|') mod=\$(echo "System.out.println(java.nio.file.Files.getLastModifiedTime(java.nio.file.Paths.get(\\"\$src\\")).toMillis());" | jshell -J-Djdk.lang.Process.launchMechanism=vfork -) - sed "s|^last modified.*|last modified\\t\$mod|" "\$summary_file" > \${summary_file}.tmp && mv \${summary_file}.tmp \${summary_file} + sed -e 's|bbsplit_index/ref|index_writable/ref|' -e "s|^last modified.*|last modified\\t\$mod|" "\$summary_file" > \${summary_file}.tmp && mv \${summary_file}.tmp \${summary_file} done fi @@ -92,11 +96,15 @@ process BBMAP_BBSPLIT { # Summary files will have an absolute path that will make the index # impossible to use in other processes- we can fix that + if [ -d bbsplit_build/ref/genome ]; then + for summary_file in \$(find bbsplit_build/ref/genome -name summary.txt); do + src=\$(grep '^source' "\$summary_file" | cut -f2- -d\$'\\t' | sed 's|.*/bbsplit_build|bbsplit_index|') + sed "s|^source.*|source\\t\$src|" "\$summary_file" > \${summary_file}.tmp && mv \${summary_file}.tmp \${summary_file} + done - for summary_file in \$(find bbsplit/ref/genome -name summary.txt); do - src=\$(grep '^source' "\$summary_file" | cut -f2- -d\$'\\t' | sed 's|.*/bbsplit|bbsplit|') - sed "s|^source.*|source\\t\$src|" "\$summary_file" > \${summary_file}.tmp && mv \${summary_file}.tmp \${summary_file} - done + # Atomically rename the completed index + mv bbsplit_build bbsplit_index + fi cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -110,11 +118,14 @@ process BBMAP_BBSPLIT { other_ref_names.eachWithIndex { name, index -> other_refs += "echo '' | gzip > ${prefix}_${name}.fastq.gz" } + def will_build_index = only_build_index || (!index && primary_ref && other_ref_names && other_ref_paths) """ - if [ ! -d bbsplit ]; then - mkdir bbsplit + # Create index directory if building an index (either only_build_index or on-the-fly) + if [ "${will_build_index}" == "true" ]; then + mkdir -p bbsplit_index fi + # Only create output files if splitting (not just building index) if ! (${only_build_index}); then echo '' | gzip > ${prefix}_primary.fastq.gz ${other_refs} diff --git a/modules/nf-core/bbmap/bbsplit/meta.yml b/modules/nf-core/bbmap/bbsplit/meta.yml index 081aeb2b3..b496849ad 100644 --- a/modules/nf-core/bbmap/bbsplit/meta.yml +++ b/modules/nf-core/bbmap/bbsplit/meta.yml @@ -25,31 +25,33 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. - - - index: - type: directory - description: Directory to place generated index - pattern: "*" - - - primary_ref: - type: file - description: Path to the primary reference - pattern: "*" + ontologies: [] + - index: + type: directory + description: Directory to place generated index + pattern: "*" + - primary_ref: + type: file + description: Path to the primary reference + pattern: "*" + ontologies: [] - - other_ref_names: type: list description: List of other reference ids apart from the primary - other_ref_paths: type: list description: Path to other references paths corresponding to "other_ref_names" - - - only_build_index: - type: string - description: true = only build index; false = mapping + - only_build_index: + type: string + description: true = only build index; false = mapping output: - - index: - - bbsplit: - type: directory - description: Directory with index files - pattern: "bbsplit" - - primary_fastq: - - meta: + index: + - bbsplit_index: + type: directory + description: Directory with index files + pattern: "bbsplit_index" + primary_fastq: + - - meta: type: map description: | Groovy Map containing sample information @@ -58,8 +60,10 @@ output: type: file description: Output reads that map to the primary reference pattern: "*primary*fastq.gz" - - all_fastq: - - meta: + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + all_fastq: + - - meta: type: map description: | Groovy Map containing sample information @@ -68,8 +72,10 @@ output: type: file description: All reads mapping to any of the references pattern: "*fastq.gz" - - stats: - - meta: + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + stats: + - - meta: type: map description: | Groovy Map containing sample information @@ -78,8 +84,9 @@ output: type: file description: Tab-delimited text file containing mapping statistics pattern: "*.txt" - - log: - - meta: + ontologies: [] + log: + - - meta: type: map description: | Groovy Map containing sample information @@ -88,14 +95,19 @@ output: type: file description: Log file pattern: "*.log" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@joseespinosa" - "@drpatelh" + - "@pinin4fjords" maintainers: - "@joseespinosa" - "@drpatelh" + - "@pinin4fjords" \ No newline at end of file diff --git a/modules/nf-core/bbmap/bbsplit/tests/main.nf.test b/modules/nf-core/bbmap/bbsplit/tests/main.nf.test index 0674d247f..2f942f39c 100644 --- a/modules/nf-core/bbmap/bbsplit/tests/main.nf.test +++ b/modules/nf-core/bbmap/bbsplit/tests/main.nf.test @@ -3,8 +3,12 @@ nextflow_process { name "Test Process BBMAP_BBSPLIT" script "../main.nf" process "BBMAP_BBSPLIT" + tag "modules" + tag "modules_nfcore" + tag "bbmap" + tag "bbmap/bbsplit" - test("sarscov2_se_fastq_fasta_chr22_fasta - index") { + test("sarscov2_se_fastq_fasta_chr22_fasta - build index") { when { process { @@ -31,7 +35,7 @@ nextflow_process { } } - test("sarscov2_se_fastq_fasta_chr22_fasta - index - stub") { + test("sarscov2_se_fastq_fasta_chr22_fasta - build index - stub") { options "-stub" @@ -52,13 +56,12 @@ nextflow_process { then { assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } + { assert process.success } ) } } - test("sarscov2_se_fastq_fasta_chr22_fasta") { + test("sarscov2_se_fastq_fasta_chr22_fasta - split with prebuilt index") { setup { @@ -87,58 +90,27 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]) input[1] = BBMAP_BBSPLIT_INDEX.out.index - input[2] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)) - input[3] = Channel.of([ - [ 'human' ], // meta map - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr22/sequence/chr22_23800000-23980000.fa', checkIfExists: true) - ]) + input[2] = [] + input[3] = [[], []] input[4] = false """ } } then { - def filesToExclude = [ - "info.txt", - "reflist.txt", - "scaffolds.txt.gz", - "summary.txt" - ] - - def outputFiles = [] - def outputDirectory = new File(process.out.index[0]) - outputDirectory.eachFileRecurse { file -> - if (file.isFile()){ - outputFiles << file - } - } - - def filesExist = filesToExclude.every { excludeName -> - outputFiles.any { file -> - file.getName().endsWith(excludeName) && file.exists() - } - } - - def filteredFiles = outputFiles - .findAll { file -> - !filesToExclude.any { excludeName -> - file.getName().endsWith(excludeName) - } - }.sort{ it.getName()} - assertAll( { assert process.success }, { assert path(process.out.log[0][1]).text.contains("If you wish to regenerate the index") }, - { assert filesExist : "One or more files to exclude do not exist" }, { assert snapshot( - filteredFiles, + process.out.primary_fastq, + process.out.stats, process.out.versions ).match()} ) } } - test("sarscov2_se_fastq_fasta_chr22_fasta - stub") { + test("sarscov2_se_fastq_fasta_chr22_fasta - split with prebuilt index - stub") { options "-stub" @@ -169,11 +141,8 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]) input[1] = BBMAP_BBSPLIT_INDEX.out.index - input[2] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)) - input[3] = Channel.of([ - [ 'human' ], // meta map - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr22/sequence/chr22_23800000-23980000.fa', checkIfExists: true) - ]) + input[2] = [] + input[3] = [[], []] input[4] = false """ } @@ -181,8 +150,7 @@ nextflow_process { then { assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } + { assert process.success } ) } } diff --git a/modules/nf-core/bbmap/bbsplit/tests/main.nf.test.snap b/modules/nf-core/bbmap/bbsplit/tests/main.nf.test.snap index 656d3ed05..7e739a663 100644 --- a/modules/nf-core/bbmap/bbsplit/tests/main.nf.test.snap +++ b/modules/nf-core/bbmap/bbsplit/tests/main.nf.test.snap @@ -1,12 +1,23 @@ { - "sarscov2_se_fastq_fasta_chr22_fasta": { + "sarscov2_se_fastq_fasta_chr22_fasta - split with prebuilt index": { "content": [ [ - "chr1.chrom.gz:md5,8fec4c63ec642613ad10adf4cc2e6ade", - "chr1_index_k13_c13_b1.block:md5,385913c1e84b77dc7bf36288ee1c8706", - "chr1_index_k13_c13_b1.block2.gz:md5,2556b45206835a0ff7078d683b5fd6e2", - "merged_ref_9222711925172838098.fa.gz:md5,983cef447fb28394b88a5b49b3579f0c", - "namelist.txt:md5,45e7a4cdc7a11a39ada56844ca3a1e30" + [ + { + "id": "test", + "single_end": true + }, + "test_primary.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats.txt:md5,2cbf69b72e5f4f8508306b54e8fe2861" + ] ], [ "versions.yml:md5,b75d21d7649b5a512e7b54dd15cd3eba" @@ -14,11 +25,11 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nextflow": "25.10.0" }, - "timestamp": "2025-03-12T18:57:32.958846" + "timestamp": "2025-11-07T12:36:23.511852" }, - "sarscov2_se_fastq_fasta_chr22_fasta - index": { + "sarscov2_se_fastq_fasta_chr22_fasta - build index": { "content": [ [ "versions.yml:md5,b75d21d7649b5a512e7b54dd15cd3eba" @@ -26,174 +37,8 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.5" - }, - "timestamp": "2025-03-12T18:57:12.155636" - }, - "sarscov2_se_fastq_fasta_chr22_fasta - index - stub": { - "content": [ - { - "0": [ - [ - - ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - - ], - "4": [ - [ - { - - }, - "null.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "5": [ - "versions.yml:md5,b75d21d7649b5a512e7b54dd15cd3eba" - ], - "all_fastq": [ - - ], - "index": [ - [ - - ] - ], - "log": [ - [ - { - - }, - "null.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "primary_fastq": [ - - ], - "stats": [ - - ], - "versions": [ - "versions.yml:md5,b75d21d7649b5a512e7b54dd15cd3eba" - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" - }, - "timestamp": "2025-03-12T18:57:17.177351" - }, - "sarscov2_se_fastq_fasta_chr22_fasta - stub": { - "content": [ - { - "0": [ - [ - - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": true - }, - "test_primary.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "2": [ - [ - { - "id": "test", - "single_end": true - }, - [ - "test_human.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test_primary.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ] - ], - "3": [ - [ - { - "id": "test", - "single_end": true - }, - "test.stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "4": [ - [ - { - "id": "test", - "single_end": true - }, - "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "5": [ - "versions.yml:md5,b75d21d7649b5a512e7b54dd15cd3eba" - ], - "all_fastq": [ - [ - { - "id": "test", - "single_end": true - }, - [ - "test_human.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test_primary.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ] - ], - "index": [ - [ - - ] - ], - "log": [ - [ - { - "id": "test", - "single_end": true - }, - "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "primary_fastq": [ - [ - { - "id": "test", - "single_end": true - }, - "test_primary.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "stats": [ - [ - { - "id": "test", - "single_end": true - }, - "test.stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,b75d21d7649b5a512e7b54dd15cd3eba" - ] - } - ], - "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nextflow": "25.10.0" }, - "timestamp": "2025-03-12T18:57:42.233549" + "timestamp": "2025-11-07T11:05:45.517832" } } \ No newline at end of file From 411ac4fcef1074b0865a74c7d4b9680473d4baaf Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 13 Nov 2025 12:56:05 +0000 Subject: [PATCH 08/11] Update CHANGELOG.md to credit Elad Herzog for PR #1617 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d42fcdde3..d5c45983f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,9 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Credits +Special thanks to the following for their contributions to the release: + +- [Elad Herzog](https://github.com/EladH1) + ### Enhancements and fixes - [PR #1608](https://github.com/nf-core/rnaseq/pull/1608) - Bump version after release 3.21.0 +- [PR #1617](https://github.com/nf-core/rnaseq/pull/1617) - Update bbmap/bbsplit module ## [[3.21.0](https://github.com/nf-core/rnaseq/releases/tag/3.21.0)] - 2025-09-18 From 8a5e38b5e2e9c184617b9e838dad2b7f8d5ca629 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 13 Nov 2025 14:52:10 +0000 Subject: [PATCH 09/11] Fix maintainers list formatting in meta.yml --- modules/nf-core/bbmap/bbsplit/meta.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/nf-core/bbmap/bbsplit/meta.yml b/modules/nf-core/bbmap/bbsplit/meta.yml index b496849ad..3075bcd99 100644 --- a/modules/nf-core/bbmap/bbsplit/meta.yml +++ b/modules/nf-core/bbmap/bbsplit/meta.yml @@ -110,4 +110,5 @@ authors: maintainers: - "@joseespinosa" - "@drpatelh" - - "@pinin4fjords" \ No newline at end of file + - "@pinin4fjords" + From d2d9f87a91b6ea2ab47d25fe025243d773dd4223 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 13 Nov 2025 14:54:43 +0000 Subject: [PATCH 10/11] Remove trailing whitespace in meta.yml --- modules/nf-core/bbmap/bbsplit/meta.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/nf-core/bbmap/bbsplit/meta.yml b/modules/nf-core/bbmap/bbsplit/meta.yml index 3075bcd99..66f87cce0 100644 --- a/modules/nf-core/bbmap/bbsplit/meta.yml +++ b/modules/nf-core/bbmap/bbsplit/meta.yml @@ -111,4 +111,3 @@ maintainers: - "@joseespinosa" - "@drpatelh" - "@pinin4fjords" - From 2e2c75c95052479978a04156557e07f5d2fe9bec Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Thu, 13 Nov 2025 15:10:52 +0000 Subject: [PATCH 11/11] Update git_sha for bbmap/bbsplit module --- modules.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules.json b/modules.json index 920c05cd7..a62301a56 100644 --- a/modules.json +++ b/modules.json @@ -7,7 +7,7 @@ "nf-core": { "bbmap/bbsplit": { "branch": "master", - "git_sha": "b5114f2bb0aa49281e1292d0c8b1433e368c0ed3", + "git_sha": "fa1477128e0ff24675c8f56d6c334fcc323025c4", "installed_by": ["fastq_qc_trim_filter_setstrandedness", "modules"] }, "bedtools/genomecov": {