From 49fc41ebdba302cfab1628a399aeda1706f171be Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 18 Nov 2025 13:28:19 +1100 Subject: [PATCH 1/5] Minor `NvvmArch` test fixes. - Improve a few comments. - Combine three `nvvm_arch_target_feature_format_*` tests into one. - Avoid some unnecessary local variables. - Rename `nvvm_arch_all_target_features_includes_lower_capabilities`, and order things more sensibly within it. - Remove `target_feature_synthesis_supports_conditional_compilation_patterns` and `target_feature_synthesis_enables_correct_cfg_patterns` and `nvvm_arch_a_suffix_includes_all_available_instructions`. They are all testing the same things as `nvvm_arch_all_target_features`. (A couple of things from them were moved into `nvvm_arch_all_target_features`, to preserve test coverage.) This all just makes things more streamlined and avoids repetition, to get ready for subsequent changes. --- crates/nvvm/src/lib.rs | 233 +++++++++++++---------------------------- 1 file changed, 71 insertions(+), 162 deletions(-) diff --git a/crates/nvvm/src/lib.rs b/crates/nvvm/src/lib.rs index 70f81740..cba1ae3d 100644 --- a/crates/nvvm/src/lib.rs +++ b/crates/nvvm/src/lib.rs @@ -747,34 +747,24 @@ mod tests { } #[test] - fn nvvm_arch_target_feature_format_base_variants() { + fn nvvm_arch_target_feature() { use crate::NvvmArch; - // Test base variants format + // Test baseline features assert_eq!(NvvmArch::Compute35.target_feature(), "compute_35"); assert_eq!(NvvmArch::Compute61.target_feature(), "compute_61"); assert_eq!(NvvmArch::Compute90.target_feature(), "compute_90"); assert_eq!(NvvmArch::Compute100.target_feature(), "compute_100"); assert_eq!(NvvmArch::Compute120.target_feature(), "compute_120"); - } - #[test] - fn nvvm_arch_target_feature_format_family_variants() { - use crate::NvvmArch; - - // Test family ('f') variants format + // Test family-specfic ('f') features assert_eq!(NvvmArch::Compute100f.target_feature(), "compute_100f"); assert_eq!(NvvmArch::Compute101f.target_feature(), "compute_101f"); assert_eq!(NvvmArch::Compute103f.target_feature(), "compute_103f"); assert_eq!(NvvmArch::Compute120f.target_feature(), "compute_120f"); assert_eq!(NvvmArch::Compute121f.target_feature(), "compute_121f"); - } - #[test] - fn nvvm_arch_target_feature_format_architecture_variants() { - use crate::NvvmArch; - - // Test architecture ('a') variants format + // Test architecture-specific ('a') features assert_eq!(NvvmArch::Compute90a.target_feature(), "compute_90a"); assert_eq!(NvvmArch::Compute100a.target_feature(), "compute_100a"); assert_eq!(NvvmArch::Compute101a.target_feature(), "compute_101a"); @@ -784,24 +774,55 @@ mod tests { } #[test] - fn nvvm_arch_all_target_features_includes_lower_capabilities() { + fn nvvm_arch_all_target_features() { use crate::NvvmArch; // Compute35 only includes itself - let compute35_features = NvvmArch::Compute35.all_target_features(); - assert_eq!(compute35_features, vec!["compute_35"]); + assert_eq!( + NvvmArch::Compute35.all_target_features(), + vec!["compute_35"] + ); // Compute50 includes all lower base capabilities - let compute50_features = NvvmArch::Compute50.all_target_features(); assert_eq!( - compute50_features, - vec!["compute_35", "compute_37", "compute_50"] + NvvmArch::Compute50.all_target_features(), + vec!["compute_35", "compute_37", "compute_50"], ); // Compute61 includes all lower base capabilities - let compute61_features = NvvmArch::Compute61.all_target_features(); assert_eq!( - compute61_features, + NvvmArch::Compute61.all_target_features(), + vec![ + "compute_35", + "compute_37", + "compute_50", + "compute_52", + "compute_53", + "compute_60", + "compute_61", + ] + ); + + // Compute70 includes all lower base capabilities + assert_eq!( + NvvmArch::Compute70.all_target_features(), + vec![ + "compute_35", + "compute_37", + "compute_50", + "compute_52", + "compute_53", + "compute_60", + "compute_61", + "compute_62", + "compute_70", + ] + ); + + // Compute90 includes lower base capabilities + let compute90_features = NvvmArch::Compute90.all_target_features(); + assert_eq!( + compute90_features, vec![ "compute_35", "compute_37", @@ -809,7 +830,16 @@ mod tests { "compute_52", "compute_53", "compute_60", - "compute_61" + "compute_61", + "compute_62", + "compute_70", + "compute_72", + "compute_75", + "compute_80", + "compute_86", + "compute_87", + "compute_89", + "compute_90", ] ); @@ -836,6 +866,23 @@ mod tests { // Should include itself assert!(compute100a_features.contains(&"compute_100a".to_string())); + // Test 'f' variant with 100f + let compute100f_features = NvvmArch::Compute100f.all_target_features(); + assert!(compute100f_features.contains(&"compute_100".to_string())); // Same version base + assert!(compute100f_features.contains(&"compute_101".to_string())); // Higher minor + assert!(compute100f_features.contains(&"compute_103".to_string())); // Higher minor + assert!(compute100f_features.contains(&"compute_100f".to_string())); // Self + assert!(!compute100f_features.contains(&"compute_101f".to_string())); // No other 'f' variants + assert!(!compute100f_features.contains(&"compute_90".to_string())); // Different major + + // Test 'f' variant with 101f + let compute101f_features = NvvmArch::Compute101f.all_target_features(); + assert!(!compute101f_features.contains(&"compute_100".to_string())); // Lower minor NOT included + assert!(compute101f_features.contains(&"compute_101".to_string())); // Same version base + assert!(compute101f_features.contains(&"compute_103".to_string())); // Higher minor included + assert!(compute101f_features.contains(&"compute_101f".to_string())); // Self + assert!(!compute101f_features.contains(&"compute_101a".to_string())); // No 'a' variants + // Test compute101a let compute101a_features = NvvmArch::Compute101a.all_target_features(); // Should include all base up to 101 @@ -860,112 +907,6 @@ mod tests { // Should NOT include different major versions assert!(!compute120f_features.contains(&"compute_100".to_string())); assert!(!compute120f_features.contains(&"compute_90".to_string())); - - // Test 'f' variant with 100f - let compute100f_features = NvvmArch::Compute100f.all_target_features(); - assert!(compute100f_features.contains(&"compute_100".to_string())); // Same version base - assert!(compute100f_features.contains(&"compute_101".to_string())); // Higher minor - assert!(compute100f_features.contains(&"compute_103".to_string())); // Higher minor - assert!(compute100f_features.contains(&"compute_100f".to_string())); // Self - assert!(!compute100f_features.contains(&"compute_101f".to_string())); // No other 'f' variants - assert!(!compute100f_features.contains(&"compute_90".to_string())); // Different major - - // Test 'f' variant with 101f - let compute101f_features = NvvmArch::Compute101f.all_target_features(); - assert!(!compute101f_features.contains(&"compute_100".to_string())); // Lower minor NOT included - assert!(compute101f_features.contains(&"compute_101".to_string())); // Same version base - assert!(compute101f_features.contains(&"compute_103".to_string())); // Higher minor included - assert!(compute101f_features.contains(&"compute_101f".to_string())); // Self - assert!(!compute101f_features.contains(&"compute_101a".to_string())); // No 'a' variants - - // Compute90 includes lower base capabilities - let compute90_features = NvvmArch::Compute90.all_target_features(); - assert_eq!( - compute90_features, - vec![ - "compute_35", - "compute_37", - "compute_50", - "compute_52", - "compute_53", - "compute_60", - "compute_61", - "compute_62", - "compute_70", - "compute_72", - "compute_75", - "compute_80", - "compute_86", - "compute_87", - "compute_89", - "compute_90" - ] - ); - } - - #[test] - fn target_feature_synthesis_supports_conditional_compilation_patterns() { - use crate::NvvmArch; - - // When targeting Compute61, should enable all lower capabilities - let features = NvvmArch::Compute61.all_target_features(); - - // Should enable compute_60 (for f64 atomics) - assert!(features.contains(&"compute_60".to_string())); - - // Should enable compute_50 (for 64-bit integer atomics) - assert!(features.contains(&"compute_50".to_string())); - - // Should enable compute_35 (baseline) - assert!(features.contains(&"compute_35".to_string())); - - // Should enable the target itself - assert!(features.contains(&"compute_61".to_string())); - - // Should NOT enable higher capabilities - assert!(!features.contains(&"compute_62".to_string())); - assert!(!features.contains(&"compute_70".to_string())); - } - - #[test] - fn target_feature_synthesis_enables_correct_cfg_patterns() { - use crate::NvvmArch; - - // Test that targeting Compute70 enables appropriate cfg patterns - let features = NvvmArch::Compute70.all_target_features(); - - // These should all be true for compute_70 target - let expected_enabled = [ - "compute_35", - "compute_37", - "compute_50", - "compute_52", - "compute_53", - "compute_60", - "compute_61", - "compute_62", - "compute_70", - ]; - - for feature in expected_enabled { - assert!( - features.contains(&feature.to_string()), - "Compute70 should enable {} for cfg(target_feature = \"{}\")", - feature, - feature - ); - } - - // These should NOT be enabled for compute_70 target - let expected_disabled = ["compute_72", "compute_75", "compute_80", "compute_90"]; - - for feature in expected_disabled { - assert!( - !features.contains(&feature.to_string()), - "Compute70 should NOT enable {}", - feature - ); - } } #[test] @@ -1097,7 +1038,7 @@ mod tests { NvvmArch::Compute120 ); - // Floating-point variants return base + // Family-specific variants return base assert_eq!( NvvmArch::Compute120f.base_architecture(), NvvmArch::Compute120 @@ -1154,38 +1095,6 @@ mod tests { assert!(compute120f_variants.contains(&NvvmArch::Compute120a)); } - #[test] - fn nvvm_arch_a_suffix_includes_all_available_instructions() { - use crate::NvvmArch; - - // Test that 'a' suffix variants include all available instructions for the architecture - // While they only RUN on exact CC, they enable all base and family features during compilation - - // Test Compute90a - let features = NvvmArch::Compute90a.all_target_features(); - assert!(features.contains(&"compute_90a".to_string())); // Includes itself - assert!(features.contains(&"compute_90".to_string())); // Includes base - assert!(features.contains(&"compute_80".to_string())); // Includes lower versions - assert!(!features.contains(&"compute_100".to_string())); // Does NOT include higher versions - - // Test Compute100a - let features = NvvmArch::Compute100a.all_target_features(); - assert!(features.contains(&"compute_100a".to_string())); // Includes itself - assert!(features.contains(&"compute_100".to_string())); // Includes base - assert!(features.contains(&"compute_100f".to_string())); // Includes family variant - assert!(features.contains(&"compute_90".to_string())); // Includes lower base versions - assert!(!features.contains(&"compute_90a".to_string())); // Does NOT include other 'a' variants - assert!(!features.contains(&"compute_101f".to_string())); // Does NOT include higher minor family variants - - // Test Compute120a - let features = NvvmArch::Compute120a.all_target_features(); - assert!(features.contains(&"compute_120a".to_string())); // Includes itself - assert!(features.contains(&"compute_120".to_string())); // Includes base - assert!(features.contains(&"compute_120f".to_string())); // Includes family variant (same minor) - assert!(features.contains(&"compute_100".to_string())); // Includes lower base versions - assert!(!features.contains(&"compute_121f".to_string())); // Does NOT include higher minor family variants - } - #[test] fn nvvm_arch_variants_for_capability() { use crate::NvvmArch; From 6020b60dfcab1d24a5208b5189e3c6b110069dc0 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 18 Nov 2025 13:50:01 +1100 Subject: [PATCH 2/5] Use `Vec` literals when testing `all_target_features`. Currently some `all_target_features` results are compared against a `Vec` literal, while some are only tested with a smattering of `contains`/`!contains` calls. This commit changes the latter one to use `Vec` literals. This is more thorough, easier to read, and demonstrates two existing bugs with `all_target_features`: - The values for the 'f' suffix ones are badly wrong. - The sort order is lexicographic, which puts `compute_100` before `compute_90`. The next commit will fix these bugs. --- crates/nvvm/src/lib.rs | 210 +++++++++++++++++++++++++++++------------ 1 file changed, 150 insertions(+), 60 deletions(-) diff --git a/crates/nvvm/src/lib.rs b/crates/nvvm/src/lib.rs index cba1ae3d..70732742 100644 --- a/crates/nvvm/src/lib.rs +++ b/crates/nvvm/src/lib.rs @@ -843,70 +843,160 @@ mod tests { ] ); - // Test 'a' variant - includes all available instructions for the architecture - // This means: all base variants up to same version, all 'f' variants with same major and <= minor, plus itself - let compute90a_features = NvvmArch::Compute90a.all_target_features(); - // Should include all base up to 90 - assert!(compute90a_features.contains(&"compute_35".to_string())); - assert!(compute90a_features.contains(&"compute_90".to_string())); - // Should include the 'a' variant itself - assert!(compute90a_features.contains(&"compute_90a".to_string())); - // Should NOT include any 'f' variants (90 has no 'f' variants) - - // Test compute100a - should include base variants, and 100f - let compute100a_features = NvvmArch::Compute100a.all_target_features(); - // Should include all base up to 100 - assert!(compute100a_features.contains(&"compute_90".to_string())); - assert!(compute100a_features.contains(&"compute_100".to_string())); - // Should include 100f (same major, <= minor) - assert!(compute100a_features.contains(&"compute_100f".to_string())); - // Should NOT include 101f or 103f (higher minor) - assert!(!compute100a_features.contains(&"compute_101f".to_string())); - assert!(!compute100a_features.contains(&"compute_103f".to_string())); - // Should include itself - assert!(compute100a_features.contains(&"compute_100a".to_string())); + // Test 'a' variant - includes all available instructions for the architecture. + // This means: all base variants up to same version, no 'f' variants (90 has none), and the + // 'a' variant. + assert_eq!( + NvvmArch::Compute90a.all_target_features(), + vec![ + "compute_35", + "compute_37", + "compute_50", + "compute_52", + "compute_53", + "compute_60", + "compute_61", + "compute_62", + "compute_70", + "compute_72", + "compute_75", + "compute_80", + "compute_86", + "compute_87", + "compute_89", + "compute_90", + "compute_90a", + ] + ); + + // Test compute100a - should include base variants up to 100, and 100f, and itself, + // but NOT 101f or 103f (higher minor). + assert_eq!( + NvvmArch::Compute100a.all_target_features(), + vec![ + "compute_100", + "compute_100a", + "compute_100f", + "compute_35", + "compute_37", + "compute_50", + "compute_52", + "compute_53", + "compute_60", + "compute_61", + "compute_62", + "compute_70", + "compute_72", + "compute_75", + "compute_80", + "compute_86", + "compute_87", + "compute_89", + "compute_90", + ] + ); // Test 'f' variant with 100f - let compute100f_features = NvvmArch::Compute100f.all_target_features(); - assert!(compute100f_features.contains(&"compute_100".to_string())); // Same version base - assert!(compute100f_features.contains(&"compute_101".to_string())); // Higher minor - assert!(compute100f_features.contains(&"compute_103".to_string())); // Higher minor - assert!(compute100f_features.contains(&"compute_100f".to_string())); // Self - assert!(!compute100f_features.contains(&"compute_101f".to_string())); // No other 'f' variants - assert!(!compute100f_features.contains(&"compute_90".to_string())); // Different major + assert_eq!( + NvvmArch::Compute100f.all_target_features(), + // FIXME: this is wrong + vec!["compute_100", "compute_100f", "compute_101", "compute_103"] + ); + + // Test compute101a - should include base variants up to 101, and 100f and 101f, and + // itself, but not 103f (higher minor) + assert_eq!( + NvvmArch::Compute101a.all_target_features(), + vec![ + "compute_100", + "compute_100f", + "compute_101", + "compute_101a", + "compute_101f", + "compute_35", + "compute_37", + "compute_50", + "compute_52", + "compute_53", + "compute_60", + "compute_61", + "compute_62", + "compute_70", + "compute_72", + "compute_75", + "compute_80", + "compute_86", + "compute_87", + "compute_89", + "compute_90", + ] + ); // Test 'f' variant with 101f - let compute101f_features = NvvmArch::Compute101f.all_target_features(); - assert!(!compute101f_features.contains(&"compute_100".to_string())); // Lower minor NOT included - assert!(compute101f_features.contains(&"compute_101".to_string())); // Same version base - assert!(compute101f_features.contains(&"compute_103".to_string())); // Higher minor included - assert!(compute101f_features.contains(&"compute_101f".to_string())); // Self - assert!(!compute101f_features.contains(&"compute_101a".to_string())); // No 'a' variants - - // Test compute101a - let compute101a_features = NvvmArch::Compute101a.all_target_features(); - // Should include all base up to 101 - assert!(compute101a_features.contains(&"compute_100".to_string())); - assert!(compute101a_features.contains(&"compute_101".to_string())); - // Should include 100f and 101f (same major, <= minor) - assert!(compute101a_features.contains(&"compute_100f".to_string())); - assert!(compute101a_features.contains(&"compute_101f".to_string())); - // Should NOT include 103f (higher minor) - assert!(!compute101a_features.contains(&"compute_103f".to_string())); - // Should include itself - assert!(compute101a_features.contains(&"compute_101a".to_string())); - - // Test 'f' variant - includes same major version with >= minor - let compute120f_features = NvvmArch::Compute120f.all_target_features(); - assert!(compute120f_features.contains(&"compute_120".to_string())); - assert!(compute120f_features.contains(&"compute_121".to_string())); // Higher minor included - assert!(compute120f_features.contains(&"compute_120f".to_string())); // Self included - assert!(!compute120f_features.contains(&"compute_120a".to_string())); // No 'a' variants - assert!(!compute120f_features.contains(&"compute_121f".to_string())); // No other 'f' variants - assert!(!compute120f_features.contains(&"compute_121a".to_string())); // No 'a' variants - // Should NOT include different major versions - assert!(!compute120f_features.contains(&"compute_100".to_string())); - assert!(!compute120f_features.contains(&"compute_90".to_string())); + assert_eq!( + NvvmArch::Compute101f.all_target_features(), + vec!["compute_101", "compute_101f", "compute_103"], + ); + + assert_eq!( + NvvmArch::Compute120.all_target_features(), + vec![ + "compute_100", + "compute_101", + "compute_103", + "compute_120", + "compute_35", + "compute_37", + "compute_50", + "compute_52", + "compute_53", + "compute_60", + "compute_61", + "compute_62", + "compute_70", + "compute_72", + "compute_75", + "compute_80", + "compute_86", + "compute_87", + "compute_89", + "compute_90", + ] + ); + + assert_eq!( + NvvmArch::Compute120f.all_target_features(), + // FIXME: this is wrong + vec!["compute_120", "compute_120f", "compute_121"] + ); + + assert_eq!( + NvvmArch::Compute120a.all_target_features(), + vec![ + "compute_100", + "compute_101", + "compute_103", + "compute_120", + "compute_120a", + "compute_120f", + "compute_35", + "compute_37", + "compute_50", + "compute_52", + "compute_53", + "compute_60", + "compute_61", + "compute_62", + "compute_70", + "compute_72", + "compute_75", + "compute_80", + "compute_86", + "compute_87", + "compute_89", + "compute_90", + ] + ); } #[test] From 5512f026954f420c174d75fdd3d602ef42f56a9b Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 18 Nov 2025 14:06:44 +1100 Subject: [PATCH 3/5] Fix `NvvmArch::all_target_features` bugs. It now does a single filter pass over the enum variants, which is simpler and fixes the sorting issue and the incorrect 'f' suffix results. I removed some comments in the `nvvm_arch_all_target_features` test, because they were low-value. There are now better comments within `all_target_features` that explain what's happening. I also remove the comment about PTX forward-compatibility. It was correct but confusing. This function answers the question "what features are available if I'm targeting a particular NvvmArch?" (backwards compatibility). That comment explained "what GPU CCs will this run on?" (forward compatibility). Also update the relevant section in the guide, where the 'f' details were incorrect. And make the terminology more consistent. --- crates/nvvm/src/lib.rs | 207 ++++++++++++++---------- guide/src/guide/compute_capabilities.md | 26 +-- 2 files changed, 134 insertions(+), 99 deletions(-) diff --git a/crates/nvvm/src/lib.rs b/crates/nvvm/src/lib.rs index 70732742..c386b482 100644 --- a/crates/nvvm/src/lib.rs +++ b/crates/nvvm/src/lib.rs @@ -430,73 +430,62 @@ impl NvvmArch { } } - /// Get all target features up to and including this architecture. + /// Gets all target features up to and including this architecture. This effectively answers + /// the question "for a given compilation target, what architectural features can be used?" /// - /// # PTX Forward-Compatibility Rules (per NVIDIA documentation): + /// # Examples /// - /// - **No suffix** (compute_XX): PTX is forward-compatible across all future architectures. - /// Example: compute_70 runs on CC 7.0, 8.x, 9.x, 10.x, 12.x, and all future GPUs. + /// ``` + /// # use nvvm::NvvmArch; + /// let features = NvvmArch::Compute53.all_target_features(); + /// assert_eq!( + /// features, + /// vec!["compute_35", "compute_37", "compute_50", "compute_52", "compute_53"] + /// ); + /// ``` /// - /// - **Family-specific 'f' suffix** (compute_XXf): Forward-compatible within the same major - /// version family. Supports devices with same major CC and equal or higher minor CC. - /// Example: compute_100f runs on CC 10.0, 10.3, and future 10.x devices, but NOT on 11.x. - /// - /// - **Architecture-specific 'a' suffix** (compute_XXa): The code only runs on GPUs of that - /// specific CC and no others. No forward or backward compatibility whatsoever. - /// These features are primarily related to Tensor Core programming. - /// Example: compute_100a ONLY runs on CC 10.0, not on 10.3, 10.1, 9.0, or any other version. + /// # External resources /// /// For more details on family and architecture-specific features, see: /// pub fn all_target_features(&self) -> Vec { - let mut features: Vec = if self.is_architecture_variant() { - // 'a' variants: include all available instructions for the architecture - // This means: all base variants up to same version, all 'f' variants with same major and <= minor, plus itself - let base_features: Vec = NvvmArch::iter() - .filter(|arch| { - arch.is_base_variant() && arch.capability_value() <= self.capability_value() - }) - .map(|arch| arch.target_feature()) - .collect(); - - let family_features: Vec = NvvmArch::iter() - .filter(|arch| { - arch.is_family_variant() - && arch.major_version() == self.major_version() - && arch.minor_version() <= self.minor_version() - }) - .map(|arch| arch.target_feature()) - .collect(); + // All lower-or-equal baseline features are included. + let included_baseline = |arch: &NvvmArch| { + arch.is_base_variant() && arch.capability_value() <= self.capability_value() + }; - base_features - .into_iter() - .chain(family_features) - .chain(std::iter::once(self.target_feature())) + // All lower-or-equal-with-same-major-version family features are included. + let included_family = |arch: &NvvmArch| { + arch.is_family_variant() + && arch.major_version() == self.major_version() + && arch.minor_version() <= self.minor_version() + }; + + if self.is_architecture_variant() { + // Architecture-specific ('a' suffix) features include: + // - all lower-or-equal baseline features + // - all lower-or-equal-with-same-major-version family features + // - itself + NvvmArch::iter() + .filter(|arch| included_baseline(arch) || included_family(arch) || arch == self) + .map(|arch| arch.target_feature()) .collect() } else if self.is_family_variant() { - // 'f' variants: same major version with equal or higher minor version + // Family-specific ('f' suffix) features include: + // - all lower-or-equal baseline features + // - all lower-or-equal-with-same-major-version family features NvvmArch::iter() - .filter(|arch| { - // Include base variants with same major and >= minor version - arch.is_base_variant() - && arch.major_version() == self.major_version() - && arch.minor_version() >= self.minor_version() - }) + .filter(|arch| included_baseline(arch) || included_family(arch)) .map(|arch| arch.target_feature()) - .chain(std::iter::once(self.target_feature())) // Add the 'f' variant itself .collect() } else { - // Base variants: all base architectures from lower or equal versions + // Baseline (no suffix) features include: + // - all lower-or-equal baseline features NvvmArch::iter() - .filter(|arch| { - arch.is_base_variant() && arch.capability_value() <= self.capability_value() - }) + .filter(included_baseline) .map(|arch| arch.target_feature()) .collect() - }; - - features.sort(); - features + } } /// Create an iterator over all architectures from Compute35 up to and including this one @@ -777,19 +766,16 @@ mod tests { fn nvvm_arch_all_target_features() { use crate::NvvmArch; - // Compute35 only includes itself assert_eq!( NvvmArch::Compute35.all_target_features(), vec!["compute_35"] ); - // Compute50 includes all lower base capabilities assert_eq!( NvvmArch::Compute50.all_target_features(), vec!["compute_35", "compute_37", "compute_50"], ); - // Compute61 includes all lower base capabilities assert_eq!( NvvmArch::Compute61.all_target_features(), vec![ @@ -803,7 +789,6 @@ mod tests { ] ); - // Compute70 includes all lower base capabilities assert_eq!( NvvmArch::Compute70.all_target_features(), vec![ @@ -819,7 +804,6 @@ mod tests { ] ); - // Compute90 includes lower base capabilities let compute90_features = NvvmArch::Compute90.all_target_features(); assert_eq!( compute90_features, @@ -843,9 +827,6 @@ mod tests { ] ); - // Test 'a' variant - includes all available instructions for the architecture. - // This means: all base variants up to same version, no 'f' variants (90 has none), and the - // 'a' variant. assert_eq!( NvvmArch::Compute90a.all_target_features(), vec![ @@ -869,14 +850,9 @@ mod tests { ] ); - // Test compute100a - should include base variants up to 100, and 100f, and itself, - // but NOT 101f or 103f (higher minor). assert_eq!( NvvmArch::Compute100a.all_target_features(), vec![ - "compute_100", - "compute_100a", - "compute_100f", "compute_35", "compute_37", "compute_50", @@ -893,26 +869,39 @@ mod tests { "compute_87", "compute_89", "compute_90", + "compute_100", + "compute_100f", + "compute_100a", ] ); - // Test 'f' variant with 100f assert_eq!( NvvmArch::Compute100f.all_target_features(), - // FIXME: this is wrong - vec!["compute_100", "compute_100f", "compute_101", "compute_103"] + vec![ + "compute_35", + "compute_37", + "compute_50", + "compute_52", + "compute_53", + "compute_60", + "compute_61", + "compute_62", + "compute_70", + "compute_72", + "compute_75", + "compute_80", + "compute_86", + "compute_87", + "compute_89", + "compute_90", + "compute_100", + "compute_100f", + ] ); - // Test compute101a - should include base variants up to 101, and 100f and 101f, and - // itself, but not 103f (higher minor) assert_eq!( NvvmArch::Compute101a.all_target_features(), vec![ - "compute_100", - "compute_100f", - "compute_101", - "compute_101a", - "compute_101f", "compute_35", "compute_37", "compute_50", @@ -929,22 +918,43 @@ mod tests { "compute_87", "compute_89", "compute_90", + "compute_100", + "compute_100f", + "compute_101", + "compute_101f", + "compute_101a", ] ); - // Test 'f' variant with 101f assert_eq!( NvvmArch::Compute101f.all_target_features(), - vec!["compute_101", "compute_101f", "compute_103"], + vec![ + "compute_35", + "compute_37", + "compute_50", + "compute_52", + "compute_53", + "compute_60", + "compute_61", + "compute_62", + "compute_70", + "compute_72", + "compute_75", + "compute_80", + "compute_86", + "compute_87", + "compute_89", + "compute_90", + "compute_100", + "compute_100f", + "compute_101", + "compute_101f", + ] ); assert_eq!( NvvmArch::Compute120.all_target_features(), vec![ - "compute_100", - "compute_101", - "compute_103", - "compute_120", "compute_35", "compute_37", "compute_50", @@ -961,24 +971,43 @@ mod tests { "compute_87", "compute_89", "compute_90", + "compute_100", + "compute_101", + "compute_103", + "compute_120", ] ); assert_eq!( NvvmArch::Compute120f.all_target_features(), - // FIXME: this is wrong - vec!["compute_120", "compute_120f", "compute_121"] - ); - - assert_eq!( - NvvmArch::Compute120a.all_target_features(), vec![ + "compute_35", + "compute_37", + "compute_50", + "compute_52", + "compute_53", + "compute_60", + "compute_61", + "compute_62", + "compute_70", + "compute_72", + "compute_75", + "compute_80", + "compute_86", + "compute_87", + "compute_89", + "compute_90", "compute_100", "compute_101", "compute_103", "compute_120", - "compute_120a", "compute_120f", + ] + ); + + assert_eq!( + NvvmArch::Compute120a.all_target_features(), + vec![ "compute_35", "compute_37", "compute_50", @@ -995,6 +1024,12 @@ mod tests { "compute_87", "compute_89", "compute_90", + "compute_100", + "compute_101", + "compute_103", + "compute_120", + "compute_120f", + "compute_120a", ] ); } diff --git a/guide/src/guide/compute_capabilities.md b/guide/src/guide/compute_capabilities.md index 432522c7..617169fb 100644 --- a/guide/src/guide/compute_capabilities.md +++ b/guide/src/guide/compute_capabilities.md @@ -74,9 +74,9 @@ CudaBuilder::new("kernels") .unwrap(); // In your kernel code: -#[cfg(target_feature = "compute_60")] // ✓ Pass (older compute capability) -#[cfg(target_feature = "compute_70")] // ✓ Pass (current compute capability) -#[cfg(target_feature = "compute_80")] // ✗ Fail (newer compute capability) +#[cfg(target_feature = "compute_60")] // ✓ Pass (lower base variant) +#[cfg(target_feature = "compute_70")] // ✓ Pass (this base variant)) +#[cfg(target_feature = "compute_80")] // ✗ Fail (higher base variant) ``` ### Family Suffix ('f') @@ -99,13 +99,13 @@ CudaBuilder::new("kernels") .unwrap(); // In your kernel code: -#[cfg(target_feature = "compute_100")] // ✗ Fail (10.0 < 10.1) -#[cfg(target_feature = "compute_101")] // ✓ Pass (equal major, equal minor) -#[cfg(target_feature = "compute_103")] // ✓ Pass (equal major, greater minor) +#[cfg(target_feature = "compute_90")] // ✓ Pass (lower base variant) +#[cfg(target_feature = "compute_100")] // ✓ Pass (lower base variant) +#[cfg(target_feature = "compute_100f")] // ✓ Pass (lower 'f' variant) +#[cfg(target_feature = "compute_101")] // ✓ Pass (this base variant) #[cfg(target_feature = "compute_101f")] // ✓ Pass (the 'f' variant itself) -#[cfg(target_feature = "compute_100f")] // ✗ Fail (other 'f' variant) -#[cfg(target_feature = "compute_90")] // ✗ Fail (different major) -#[cfg(target_feature = "compute_110")] // ✗ Fail (different major) +#[cfg(target_feature = "compute_103")] // ✗ Fail (higher base variant) +#[cfg(target_feature = "compute_110")] // ✗ Fail (higher base variant) ``` ### Architecture Suffix ('a') @@ -130,12 +130,12 @@ CudaBuilder::new("kernels") .unwrap(); // In your kernel code: -#[cfg(target_feature = "compute_100a")] // ✓ Pass (the 'a' variant itself) -#[cfg(target_feature = "compute_100")] // ✓ Pass (base variant) #[cfg(target_feature = "compute_90")] // ✓ Pass (lower base variant) +#[cfg(target_feature = "compute_100")] // ✓ Pass (base variant) #[cfg(target_feature = "compute_100f")] // ✓ Pass (family variant with same major/minor) -#[cfg(target_feature = "compute_101f")] // ✗ Fail (family variant with higher minor) -#[cfg(target_feature = "compute_110")] // ✗ Fail (higher major version) +#[cfg(target_feature = "compute_100a")] // ✓ Pass (the 'a' variant itself) +#[cfg(target_feature = "compute_101f")] // ✗ Fail (higher family variant) +#[cfg(target_feature = "compute_110")] // ✗ Fail (higher base variant) ``` Note: While the 'a' variant enables all these features during compilation (allowing you to use all available instructions), the generated PTX code will still only run on the exact GPU architecture specified. From 6d28b86460a64cbac3a963379bc8d9aaad299f67 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 18 Nov 2025 15:18:43 +1100 Subject: [PATCH 4/5] Streamline more tests. Several of them sort or inspect their vectors unnecessarily. We can just do `Vec` literal equality tests. --- crates/nvvm/src/lib.rs | 44 +++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 28 deletions(-) diff --git a/crates/nvvm/src/lib.rs b/crates/nvvm/src/lib.rs index c386b482..53cbdef1 100644 --- a/crates/nvvm/src/lib.rs +++ b/crates/nvvm/src/lib.rs @@ -1193,31 +1193,20 @@ mod tests { assert_eq!(compute80_variants, vec![NvvmArch::Compute80]); // Architecture with architecture and base variants - let mut compute90_variants = NvvmArch::Compute90.get_variants(); - compute90_variants.sort_by_key(|v| format!("{:?}", v)); assert_eq!( - compute90_variants, + NvvmArch::Compute90.get_variants(), vec![NvvmArch::Compute90, NvvmArch::Compute90a] ); // Architecture with all three variants - let mut compute120_variants = NvvmArch::Compute120.get_variants(); - compute120_variants.sort_by_key(|v| format!("{:?}", v)); - assert_eq!( - compute120_variants, - vec![ - NvvmArch::Compute120, - NvvmArch::Compute120a, - NvvmArch::Compute120f - ] - ); - - // Getting variants from a variant returns all variants - let compute120f_variants = NvvmArch::Compute120f.get_variants(); - assert_eq!(compute120f_variants.len(), 3); - assert!(compute120f_variants.contains(&NvvmArch::Compute120)); - assert!(compute120f_variants.contains(&NvvmArch::Compute120f)); - assert!(compute120f_variants.contains(&NvvmArch::Compute120a)); + let expected120 = vec![ + NvvmArch::Compute120, + NvvmArch::Compute120f, + NvvmArch::Compute120a, + ]; + assert_eq!(NvvmArch::Compute120.get_variants(), expected120); + assert_eq!(NvvmArch::Compute120f.get_variants(), expected120); + assert_eq!(NvvmArch::Compute120a.get_variants(), expected120); } #[test] @@ -1225,23 +1214,22 @@ mod tests { use crate::NvvmArch; // Capability with single variant - let compute75_variants = NvvmArch::variants_for_capability(75); - assert_eq!(compute75_variants, vec![NvvmArch::Compute75]); + assert_eq!( + NvvmArch::variants_for_capability(75), + vec![NvvmArch::Compute75] + ); // Capability with multiple variants - let mut compute101_variants = NvvmArch::variants_for_capability(101); - compute101_variants.sort_by_key(|v| format!("{:?}", v)); assert_eq!( - compute101_variants, + NvvmArch::variants_for_capability(101), vec![ NvvmArch::Compute101, + NvvmArch::Compute101f, NvvmArch::Compute101a, - NvvmArch::Compute101f ] ); // Non-existent capability - let compute999_variants = NvvmArch::variants_for_capability(999); - assert!(compute999_variants.is_empty()); + assert!(NvvmArch::variants_for_capability(999).is_empty()); } } From 2b9cf81c24737bc62a867b57eb99e742a0930bed Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 18 Nov 2025 15:26:58 +1100 Subject: [PATCH 5/5] Streamline `use` items. It's standard to have `use super::*;` in a test module, and this lets us remove a bunch of existing `use` items. --- crates/nvvm/src/lib.rs | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/crates/nvvm/src/lib.rs b/crates/nvvm/src/lib.rs index 53cbdef1..2a00534b 100644 --- a/crates/nvvm/src/lib.rs +++ b/crates/nvvm/src/lib.rs @@ -689,12 +689,11 @@ impl NvvmProgram { #[cfg(test)] mod tests { + use super::*; use std::str::FromStr; #[test] fn nvvm_arch_capability_value() { - use crate::NvvmArch; - assert_eq!(NvvmArch::Compute35.capability_value(), 35); assert_eq!(NvvmArch::Compute37.capability_value(), 37); assert_eq!(NvvmArch::Compute50.capability_value(), 50); @@ -715,8 +714,6 @@ mod tests { #[test] fn nvvm_arch_major_minor_version() { - use crate::NvvmArch; - // Test major/minor version extraction assert_eq!(NvvmArch::Compute35.major_version(), 3); assert_eq!(NvvmArch::Compute35.minor_version(), 5); @@ -737,8 +734,6 @@ mod tests { #[test] fn nvvm_arch_target_feature() { - use crate::NvvmArch; - // Test baseline features assert_eq!(NvvmArch::Compute35.target_feature(), "compute_35"); assert_eq!(NvvmArch::Compute61.target_feature(), "compute_61"); @@ -764,8 +759,6 @@ mod tests { #[test] fn nvvm_arch_all_target_features() { - use crate::NvvmArch; - assert_eq!( NvvmArch::Compute35.all_target_features(), vec!["compute_35"] @@ -1036,8 +1029,6 @@ mod tests { #[test] fn nvvm_arch_iter_up_to_includes_only_lower_or_equal() { - use crate::NvvmArch; - // Compute35 only includes itself let archs: Vec<_> = NvvmArch::Compute35.iter_up_to().collect(); assert_eq!(archs, vec![NvvmArch::Compute35]); @@ -1076,8 +1067,8 @@ mod tests { #[test] fn options_parse_correctly() { - use crate::NvvmArch::*; - use crate::NvvmOption::{self, *}; + use NvvmArch::*; + use NvvmOption::{self, *}; let opts = vec![ "-g", @@ -1132,8 +1123,6 @@ mod tests { #[test] fn nvvm_arch_variant_checks() { - use crate::NvvmArch; - // Base variants assert!(NvvmArch::Compute90.is_base_variant()); assert!(NvvmArch::Compute120.is_base_variant()); @@ -1154,8 +1143,6 @@ mod tests { #[test] fn nvvm_arch_base_architecture() { - use crate::NvvmArch; - // Base variants return themselves assert_eq!(NvvmArch::Compute90.base_architecture(), NvvmArch::Compute90); assert_eq!( @@ -1186,8 +1173,6 @@ mod tests { #[test] fn nvvm_arch_get_variants() { - use crate::NvvmArch; - // Architecture with only base variant let compute80_variants = NvvmArch::Compute80.get_variants(); assert_eq!(compute80_variants, vec![NvvmArch::Compute80]); @@ -1211,8 +1196,6 @@ mod tests { #[test] fn nvvm_arch_variants_for_capability() { - use crate::NvvmArch; - // Capability with single variant assert_eq!( NvvmArch::variants_for_capability(75),