nf-core · chaochaowong · Oct 23, 2025 · Oct 23, 2025 · Oct 23, 2025 · Oct 23, 2025
diff --git a/README.md b/README.md
@@ -388,6 +388,8 @@ The earth sciences folder contain subfolders for different data formats encounte
     - genome3.fasta: Reference fasta based on chr19:45760000-45770300
     - genome_motifs.txt: TF motifs used for cellranger-atac
     - genome.NC_012920_1.gb: Contains mtDNA reference genome in Genbank format
+    - GRCh38_chr22.fasta: GRCh38 reference fasta based on chr22
+    - GRCh38_chr22.fasta.fai: index file for 'GRCh38_chr22.fasta'
     - transcriptome.fasta: Reference transcriptome based on `genome.fasta`
     - gff3: Encode GFF3 file downsampled based on reference position
     - gtf: Encode GTF file downsampled based on reference position, `genome_minimal.gtf` is a minimal version containing only the standard fields
@@ -609,6 +611,8 @@ The earth sciences folder contain subfolders for different data formats encounte
       - NA03697B2_downsampled.pbmm2.repeats.bai: associated index to NA03697B2_downsampled.pbmm2.repeats.bam
       - NA037562_downsampled.pbmm2.repeats.bam: subsample of puretarget pacbio reads from the [public pacbio dataset](https://downloads.pacbcloud.com/public/dataset/PureTargetRE/Coriell/PBMM2-BAM-Input-For-IGV-And-TRGT/) aligned to genome3.fasta
       - NA037562_downsampled.pbmm2.repeats.bai: associated index to NA037562_downsampled.pbmm2.repeats.bam
+      - SCRI_KT5028_GRCh38_downsampled_on_chr22_pbmm2_snv_hiphased.bam: AML cell line down-sized sample of 1000 pbmm2-aligned and snv-phased sorted reads scattered on chr22; made for testing 'hificnv' and 'pb-cpg-tools' modules.
+      - SCRI_KT5028_GRCh38_downsampled_on_chr22_pbmm2_snv_hiphased.bam.csi: index file of 'SCRI_KT5028_GRCh38_downsampled_on_chr22_pbmm2_snv_hiphased.bam'
     - bed:
       - alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned_tc.bed: first set of gene models generated by TAMA collapse
       - alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned_tc.2.bed: first set of gene models generated by TAMA collapse
@@ -628,6 +632,8 @@ The earth sciences folder contain subfolders for different data formats encounte
       - FAM_snvs_annotated_ranked.vcf.gz: VCF file from HG002, only with ch16 generated from deepvariant and GLnexus
       - FAM.ped: ped file associated with HG002
       - peddy.sites: peddy standard hg38 sites downsampled to only chr16
+      - SCRI_KT5028_GRCh38_downsampled_on_chr22_pbmm2_snv_hiphased.vcf.gz: VCF file associated with 'SCRI_KT5028_GRCh38_downsampled_on_chr22_pbmm2_snv_hiphased.bam'
+      - SCRI_KT5028_GRCh38_downsampled_on_chr22_pbmm2_snv_hiphased.vcf.gz.tbi: Index file associated 'SCRI_KT5028_GRCh38_downsampled_on_chr22_pbmm2_snv_hiphased.vcf.gz'
 
   - popgen:
     - plink_simulated.bed: case-control simulated variants dataset in PLINK binary format

diff --git a/data/genomics/homo_sapiens/genome/GRCh38_chr22.fasta b/data/genomics/homo_sapiens/genome/GRCh38_chr22.fasta
diff --git a/data/genomics/homo_sapiens/genome/GRCh38_chr22.fasta.fai b/data/genomics/homo_sapiens/genome/GRCh38_chr22.fasta.fai
@@ -0,0 +1 @@
+chr22	50818468	7	100	101
diff --git a/...cs/homo_sapiens/pacbio/bam/SCRI_KT5028_GRCh38_downsampled_on_chr22_pbmm2_snv_hiphased.bam b/...cs/homo_sapiens/pacbio/bam/SCRI_KT5028_GRCh38_downsampled_on_chr22_pbmm2_snv_hiphased.bam
diff --git a/...omo_sapiens/pacbio/bam/SCRI_KT5028_GRCh38_downsampled_on_chr22_pbmm2_snv_hiphased.bam.csi b/...omo_sapiens/pacbio/bam/SCRI_KT5028_GRCh38_downsampled_on_chr22_pbmm2_snv_hiphased.bam.csi
diff --git a/...homo_sapiens/pacbio/vcf/SCRI_KT5028_GRCh38_downsampled_on_chr22_pbmm2_snv_hiphased.vcf.gz b/...homo_sapiens/pacbio/vcf/SCRI_KT5028_GRCh38_downsampled_on_chr22_pbmm2_snv_hiphased.vcf.gz
diff --git a/..._sapiens/pacbio/vcf/SCRI_KT5028_GRCh38_downsampled_on_chr22_pbmm2_snv_hiphased.vcf.gz.tbi b/..._sapiens/pacbio/vcf/SCRI_KT5028_GRCh38_downsampled_on_chr22_pbmm2_snv_hiphased.vcf.gz.tbi