From 49a7321b5be1784f961a38d7fdede8d06a7873da Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 18 Nov 2025 09:09:11 +1100 Subject: [PATCH 1/7] Simplify `NvvmArch::fmt`. --- crates/nvvm/src/lib.rs | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/crates/nvvm/src/lib.rs b/crates/nvvm/src/lib.rs index d3f34c4e..6424f1d6 100644 --- a/crates/nvvm/src/lib.rs +++ b/crates/nvvm/src/lib.rs @@ -339,15 +339,7 @@ pub enum NvvmArch { impl Display for NvvmArch { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let raw = format!("{self:?}").to_ascii_lowercase(); - // Handle architectures with suffixes (e.g., Compute90a -> compute_90a) - if let Some(pos) = raw.find(|c: char| c.is_ascii_digit()) { - let (prefix, rest) = raw.split_at(pos); - write!(f, "{prefix}_{rest}") - } else { - // Fallback for unexpected format - f.write_str(&raw) - } + f.write_str(&self.target_feature()) } } From 42b4ea58e4185bcbb1ced7c6ad8fe28c8e81c3c1 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 20 Nov 2025 11:27:07 +1100 Subject: [PATCH 2/7] Clarify `all_target_features`. In particular, the backward- vs. forward-compatibility idea. --- crates/nvvm/src/lib.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/crates/nvvm/src/lib.rs b/crates/nvvm/src/lib.rs index 6424f1d6..81f8ec77 100644 --- a/crates/nvvm/src/lib.rs +++ b/crates/nvvm/src/lib.rs @@ -472,8 +472,15 @@ impl NvvmArch { } } - /// Gets all target features up to and including this architecture. This effectively answers + /// Gets all target features supported by this compilation target. This effectively answers /// the question "for a given compilation target, what architectural features can be used?" + /// E.g. the "compute_90" compilation target includes features from "compute_80" and earlier. + /// This set of features does not change over time. + /// + /// Note that this is different to the question "for a given compilation target, what devices + /// can the generated PTX code run on?" E.g. PTX code compiled for the "compute_90" compilation + /// target can run on devices with compute capability 9.0 and later. This set of devices will + /// expand over time, as new devices are released. /// /// # Examples /// From f29f2c8a8ec4baa66e2a580207964d03c571b8c9 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 20 Nov 2025 11:38:37 +1100 Subject: [PATCH 3/7] Make `NvvmArch::all_target_features` return `Self` instead of `String`. This is simpler, and the result can be easily converted to `String` if necessary. Also remove lots of `NvvmArch::` prefixes in the tests. --- crates/nvvm/src/lib.rs | 569 +++++++++++---------------- crates/rustc_codegen_nvvm/src/lib.rs | 8 +- 2 files changed, 234 insertions(+), 343 deletions(-) diff --git a/crates/nvvm/src/lib.rs b/crates/nvvm/src/lib.rs index 81f8ec77..41d47ef6 100644 --- a/crates/nvvm/src/lib.rs +++ b/crates/nvvm/src/lib.rs @@ -485,11 +485,11 @@ impl NvvmArch { /// # Examples /// /// ``` - /// # use nvvm::NvvmArch; - /// let features = NvvmArch::Compute53.all_target_features(); + /// use nvvm::NvvmArch::*; + /// let features = Compute53.all_target_features(); /// assert_eq!( /// features, - /// vec!["compute_35", "compute_37", "compute_50", "compute_52", "compute_53"] + /// vec![Compute35, Compute37, Compute50, Compute52, Compute53] /// ); /// ``` /// @@ -497,7 +497,7 @@ impl NvvmArch { /// /// For more details on family and architecture-specific features, see: /// - pub fn all_target_features(&self) -> Vec { + pub fn all_target_features(&self) -> Vec { // All lower-or-equal baseline features are included. let included_baseline = |arch: &NvvmArch| { arch.is_base_variant() && arch.capability_value() <= self.capability_value() @@ -517,7 +517,6 @@ impl NvvmArch { // - itself NvvmArch::iter() .filter(|arch| included_baseline(arch) || included_family(arch) || arch == self) - .map(|arch| arch.target_feature()) .collect() } else if self.is_family_variant() { // Family-specific ('f' suffix) features include: @@ -525,15 +524,11 @@ impl NvvmArch { // - all lower-or-equal-with-same-major-version family features NvvmArch::iter() .filter(|arch| included_baseline(arch) || included_family(arch)) - .map(|arch| arch.target_feature()) .collect() } else { // Baseline (no suffix) features include: // - all lower-or-equal baseline features - NvvmArch::iter() - .filter(included_baseline) - .map(|arch| arch.target_feature()) - .collect() + NvvmArch::iter().filter(included_baseline).collect() } } @@ -740,338 +735,277 @@ impl NvvmProgram { mod tests { use super::*; use std::str::FromStr; + use NvvmArch::*; #[test] fn nvvm_arch_capability_value() { - assert_eq!(NvvmArch::Compute35.capability_value(), 35); - assert_eq!(NvvmArch::Compute37.capability_value(), 37); - assert_eq!(NvvmArch::Compute50.capability_value(), 50); - assert_eq!(NvvmArch::Compute52.capability_value(), 52); - assert_eq!(NvvmArch::Compute53.capability_value(), 53); - assert_eq!(NvvmArch::Compute60.capability_value(), 60); - assert_eq!(NvvmArch::Compute61.capability_value(), 61); - assert_eq!(NvvmArch::Compute62.capability_value(), 62); - assert_eq!(NvvmArch::Compute70.capability_value(), 70); - assert_eq!(NvvmArch::Compute72.capability_value(), 72); - assert_eq!(NvvmArch::Compute75.capability_value(), 75); - assert_eq!(NvvmArch::Compute80.capability_value(), 80); - assert_eq!(NvvmArch::Compute86.capability_value(), 86); - assert_eq!(NvvmArch::Compute87.capability_value(), 87); - assert_eq!(NvvmArch::Compute89.capability_value(), 89); - assert_eq!(NvvmArch::Compute90.capability_value(), 90); + assert_eq!(Compute35.capability_value(), 35); + assert_eq!(Compute37.capability_value(), 37); + assert_eq!(Compute50.capability_value(), 50); + assert_eq!(Compute52.capability_value(), 52); + assert_eq!(Compute53.capability_value(), 53); + assert_eq!(Compute60.capability_value(), 60); + assert_eq!(Compute61.capability_value(), 61); + assert_eq!(Compute62.capability_value(), 62); + assert_eq!(Compute70.capability_value(), 70); + assert_eq!(Compute72.capability_value(), 72); + assert_eq!(Compute75.capability_value(), 75); + assert_eq!(Compute80.capability_value(), 80); + assert_eq!(Compute86.capability_value(), 86); + assert_eq!(Compute87.capability_value(), 87); + assert_eq!(Compute89.capability_value(), 89); + assert_eq!(Compute90.capability_value(), 90); } #[test] fn nvvm_arch_major_minor_version() { // Test major/minor version extraction - assert_eq!(NvvmArch::Compute35.major_version(), 3); - assert_eq!(NvvmArch::Compute35.minor_version(), 5); + assert_eq!(Compute35.major_version(), 3); + assert_eq!(Compute35.minor_version(), 5); - assert_eq!(NvvmArch::Compute70.major_version(), 7); - assert_eq!(NvvmArch::Compute70.minor_version(), 0); + assert_eq!(Compute70.major_version(), 7); + assert_eq!(Compute70.minor_version(), 0); - assert_eq!(NvvmArch::Compute121.major_version(), 12); - assert_eq!(NvvmArch::Compute121.minor_version(), 1); + assert_eq!(Compute121.major_version(), 12); + assert_eq!(Compute121.minor_version(), 1); // Suffixes don't affect version numbers - assert_eq!(NvvmArch::Compute100f.major_version(), 10); - assert_eq!(NvvmArch::Compute100f.minor_version(), 0); + assert_eq!(Compute100f.major_version(), 10); + assert_eq!(Compute100f.minor_version(), 0); - assert_eq!(NvvmArch::Compute90a.major_version(), 9); - assert_eq!(NvvmArch::Compute90a.minor_version(), 0); + assert_eq!(Compute90a.major_version(), 9); + assert_eq!(Compute90a.minor_version(), 0); } #[test] fn nvvm_arch_target_feature() { // Test baseline features - assert_eq!(NvvmArch::Compute35.target_feature(), "compute_35"); - assert_eq!(NvvmArch::Compute61.target_feature(), "compute_61"); - assert_eq!(NvvmArch::Compute90.target_feature(), "compute_90"); - assert_eq!(NvvmArch::Compute100.target_feature(), "compute_100"); - assert_eq!(NvvmArch::Compute120.target_feature(), "compute_120"); + assert_eq!(Compute35.target_feature(), "compute_35"); + assert_eq!(Compute61.target_feature(), "compute_61"); + assert_eq!(Compute90.target_feature(), "compute_90"); + assert_eq!(Compute100.target_feature(), "compute_100"); + assert_eq!(Compute120.target_feature(), "compute_120"); // Test family-specfic ('f') features - assert_eq!(NvvmArch::Compute100f.target_feature(), "compute_100f"); - assert_eq!(NvvmArch::Compute101f.target_feature(), "compute_101f"); - assert_eq!(NvvmArch::Compute103f.target_feature(), "compute_103f"); - assert_eq!(NvvmArch::Compute120f.target_feature(), "compute_120f"); - assert_eq!(NvvmArch::Compute121f.target_feature(), "compute_121f"); + assert_eq!(Compute100f.target_feature(), "compute_100f"); + assert_eq!(Compute101f.target_feature(), "compute_101f"); + assert_eq!(Compute103f.target_feature(), "compute_103f"); + assert_eq!(Compute120f.target_feature(), "compute_120f"); + assert_eq!(Compute121f.target_feature(), "compute_121f"); // Test architecture-specific ('a') features - assert_eq!(NvvmArch::Compute90a.target_feature(), "compute_90a"); - assert_eq!(NvvmArch::Compute100a.target_feature(), "compute_100a"); - assert_eq!(NvvmArch::Compute101a.target_feature(), "compute_101a"); - assert_eq!(NvvmArch::Compute103a.target_feature(), "compute_103a"); - assert_eq!(NvvmArch::Compute120a.target_feature(), "compute_120a"); - assert_eq!(NvvmArch::Compute121a.target_feature(), "compute_121a"); + assert_eq!(Compute90a.target_feature(), "compute_90a"); + assert_eq!(Compute100a.target_feature(), "compute_100a"); + assert_eq!(Compute101a.target_feature(), "compute_101a"); + assert_eq!(Compute103a.target_feature(), "compute_103a"); + assert_eq!(Compute120a.target_feature(), "compute_120a"); + assert_eq!(Compute121a.target_feature(), "compute_121a"); } #[test] fn nvvm_arch_all_target_features() { - assert_eq!( - NvvmArch::Compute35.all_target_features(), - vec!["compute_35"] - ); + assert_eq!(Compute35.all_target_features(), vec![Compute35]); assert_eq!( - NvvmArch::Compute50.all_target_features(), - vec!["compute_35", "compute_37", "compute_50"], + Compute50.all_target_features(), + vec![Compute35, Compute37, Compute50], ); assert_eq!( - NvvmArch::Compute61.all_target_features(), - vec![ - "compute_35", - "compute_37", - "compute_50", - "compute_52", - "compute_53", - "compute_60", - "compute_61", - ] + Compute61.all_target_features(), + vec![Compute35, Compute37, Compute50, Compute52, Compute53, Compute60, Compute61] ); assert_eq!( - NvvmArch::Compute70.all_target_features(), + Compute70.all_target_features(), vec![ - "compute_35", - "compute_37", - "compute_50", - "compute_52", - "compute_53", - "compute_60", - "compute_61", - "compute_62", - "compute_70", + Compute35, Compute37, Compute50, Compute52, Compute53, Compute60, Compute61, + Compute62, Compute70, ] ); - let compute90_features = NvvmArch::Compute90.all_target_features(); + let compute90_features = Compute90.all_target_features(); assert_eq!( compute90_features, vec![ - "compute_35", - "compute_37", - "compute_50", - "compute_52", - "compute_53", - "compute_60", - "compute_61", - "compute_62", - "compute_70", - "compute_72", - "compute_75", - "compute_80", - "compute_86", - "compute_87", - "compute_89", - "compute_90", + Compute35, Compute37, Compute50, Compute52, Compute53, Compute60, Compute61, + Compute62, Compute70, Compute72, Compute75, Compute80, Compute86, Compute87, + Compute89, Compute90, ] ); assert_eq!( - NvvmArch::Compute90a.all_target_features(), + Compute90a.all_target_features(), vec![ - "compute_35", - "compute_37", - "compute_50", - "compute_52", - "compute_53", - "compute_60", - "compute_61", - "compute_62", - "compute_70", - "compute_72", - "compute_75", - "compute_80", - "compute_86", - "compute_87", - "compute_89", - "compute_90", - "compute_90a", + Compute35, Compute37, Compute50, Compute52, Compute53, Compute60, Compute61, + Compute62, Compute70, Compute72, Compute75, Compute80, Compute86, Compute87, + Compute89, Compute90, Compute90a, ] ); assert_eq!( - NvvmArch::Compute100a.all_target_features(), + Compute100a.all_target_features(), vec![ - "compute_35", - "compute_37", - "compute_50", - "compute_52", - "compute_53", - "compute_60", - "compute_61", - "compute_62", - "compute_70", - "compute_72", - "compute_75", - "compute_80", - "compute_86", - "compute_87", - "compute_89", - "compute_90", - "compute_100", - "compute_100f", - "compute_100a", + Compute35, + Compute37, + Compute50, + Compute52, + Compute53, + Compute60, + Compute61, + Compute62, + Compute70, + Compute72, + Compute75, + Compute80, + Compute86, + Compute87, + Compute89, + Compute90, + Compute100, + Compute100f, + Compute100a, ] ); assert_eq!( - NvvmArch::Compute100f.all_target_features(), + Compute100f.all_target_features(), vec![ - "compute_35", - "compute_37", - "compute_50", - "compute_52", - "compute_53", - "compute_60", - "compute_61", - "compute_62", - "compute_70", - "compute_72", - "compute_75", - "compute_80", - "compute_86", - "compute_87", - "compute_89", - "compute_90", - "compute_100", - "compute_100f", + Compute35, + Compute37, + Compute50, + Compute52, + Compute53, + Compute60, + Compute61, + Compute62, + Compute70, + Compute72, + Compute75, + Compute80, + Compute86, + Compute87, + Compute89, + Compute90, + Compute100, + Compute100f, ] ); assert_eq!( - NvvmArch::Compute101a.all_target_features(), + Compute101a.all_target_features(), vec![ - "compute_35", - "compute_37", - "compute_50", - "compute_52", - "compute_53", - "compute_60", - "compute_61", - "compute_62", - "compute_70", - "compute_72", - "compute_75", - "compute_80", - "compute_86", - "compute_87", - "compute_89", - "compute_90", - "compute_100", - "compute_100f", - "compute_101", - "compute_101f", - "compute_101a", + Compute35, + Compute37, + Compute50, + Compute52, + Compute53, + Compute60, + Compute61, + Compute62, + Compute70, + Compute72, + Compute75, + Compute80, + Compute86, + Compute87, + Compute89, + Compute90, + Compute100, + Compute100f, + Compute101, + Compute101f, + Compute101a, ] ); assert_eq!( - NvvmArch::Compute101f.all_target_features(), + Compute101f.all_target_features(), vec![ - "compute_35", - "compute_37", - "compute_50", - "compute_52", - "compute_53", - "compute_60", - "compute_61", - "compute_62", - "compute_70", - "compute_72", - "compute_75", - "compute_80", - "compute_86", - "compute_87", - "compute_89", - "compute_90", - "compute_100", - "compute_100f", - "compute_101", - "compute_101f", + Compute35, + Compute37, + Compute50, + Compute52, + Compute53, + Compute60, + Compute61, + Compute62, + Compute70, + Compute72, + Compute75, + Compute80, + Compute86, + Compute87, + Compute89, + Compute90, + Compute100, + Compute100f, + Compute101, + Compute101f, ] ); assert_eq!( - NvvmArch::Compute120.all_target_features(), + Compute120.all_target_features(), vec![ - "compute_35", - "compute_37", - "compute_50", - "compute_52", - "compute_53", - "compute_60", - "compute_61", - "compute_62", - "compute_70", - "compute_72", - "compute_75", - "compute_80", - "compute_86", - "compute_87", - "compute_89", - "compute_90", - "compute_100", - "compute_101", - "compute_103", - "compute_120", + Compute35, Compute37, Compute50, Compute52, Compute53, Compute60, Compute61, + Compute62, Compute70, Compute72, Compute75, Compute80, Compute86, Compute87, + Compute89, Compute90, Compute100, Compute101, Compute103, Compute120, ] ); assert_eq!( - NvvmArch::Compute120f.all_target_features(), + Compute120f.all_target_features(), vec![ - "compute_35", - "compute_37", - "compute_50", - "compute_52", - "compute_53", - "compute_60", - "compute_61", - "compute_62", - "compute_70", - "compute_72", - "compute_75", - "compute_80", - "compute_86", - "compute_87", - "compute_89", - "compute_90", - "compute_100", - "compute_101", - "compute_103", - "compute_120", - "compute_120f", + Compute35, + Compute37, + Compute50, + Compute52, + Compute53, + Compute60, + Compute61, + Compute62, + Compute70, + Compute72, + Compute75, + Compute80, + Compute86, + Compute87, + Compute89, + Compute90, + Compute100, + Compute101, + Compute103, + Compute120, + Compute120f, ] ); assert_eq!( - NvvmArch::Compute120a.all_target_features(), + Compute120a.all_target_features(), vec![ - "compute_35", - "compute_37", - "compute_50", - "compute_52", - "compute_53", - "compute_60", - "compute_61", - "compute_62", - "compute_70", - "compute_72", - "compute_75", - "compute_80", - "compute_86", - "compute_87", - "compute_89", - "compute_90", - "compute_100", - "compute_101", - "compute_103", - "compute_120", - "compute_120f", - "compute_120a", + Compute35, + Compute37, + Compute50, + Compute52, + Compute53, + Compute60, + Compute61, + Compute62, + Compute70, + Compute72, + Compute75, + Compute80, + Compute86, + Compute87, + Compute89, + Compute90, + Compute100, + Compute101, + Compute103, + Compute120, + Compute120f, + Compute120a, ] ); } @@ -1079,44 +1013,26 @@ mod tests { #[test] fn nvvm_arch_iter_up_to_includes_only_lower_or_equal() { // Compute35 only includes itself - let archs: Vec<_> = NvvmArch::Compute35.iter_up_to().collect(); - assert_eq!(archs, vec![NvvmArch::Compute35]); + let archs: Vec<_> = Compute35.iter_up_to().collect(); + assert_eq!(archs, vec![Compute35]); // Compute52 includes all up to 52 - let archs: Vec<_> = NvvmArch::Compute52.iter_up_to().collect(); - assert_eq!( - archs, - vec![ - NvvmArch::Compute35, - NvvmArch::Compute37, - NvvmArch::Compute50, - NvvmArch::Compute52, - ] - ); + let archs: Vec<_> = Compute52.iter_up_to().collect(); + assert_eq!(archs, vec![Compute35, Compute37, Compute50, Compute52,]); // Compute75 includes all up to 75 - let archs: Vec<_> = NvvmArch::Compute75.iter_up_to().collect(); + let archs: Vec<_> = Compute75.iter_up_to().collect(); assert_eq!( archs, vec![ - NvvmArch::Compute35, - NvvmArch::Compute37, - NvvmArch::Compute50, - NvvmArch::Compute52, - NvvmArch::Compute53, - NvvmArch::Compute60, - NvvmArch::Compute61, - NvvmArch::Compute62, - NvvmArch::Compute70, - NvvmArch::Compute72, - NvvmArch::Compute75, + Compute35, Compute37, Compute50, Compute52, Compute53, Compute60, Compute61, + Compute62, Compute70, Compute72, Compute75, ] ); } #[test] fn options_parse_correctly() { - use NvvmArch::*; use NvvmOption::{self, *}; let ok = |opt, val| assert_eq!(NvvmOption::from_str(opt), Ok(val)); @@ -1171,92 +1087,63 @@ mod tests { #[test] fn nvvm_arch_variant_checks() { // Base variants - assert!(NvvmArch::Compute90.is_base_variant()); - assert!(NvvmArch::Compute120.is_base_variant()); - assert!(!NvvmArch::Compute90.is_family_variant()); - assert!(!NvvmArch::Compute90.is_architecture_variant()); + assert!(Compute90.is_base_variant()); + assert!(Compute120.is_base_variant()); + assert!(!Compute90.is_family_variant()); + assert!(!Compute90.is_architecture_variant()); // Family-specific variants - assert!(NvvmArch::Compute120f.is_family_variant()); - assert!(!NvvmArch::Compute120f.is_base_variant()); - assert!(!NvvmArch::Compute120f.is_architecture_variant()); + assert!(Compute120f.is_family_variant()); + assert!(!Compute120f.is_base_variant()); + assert!(!Compute120f.is_architecture_variant()); // Architecture-specific variants - assert!(NvvmArch::Compute90a.is_architecture_variant()); - assert!(NvvmArch::Compute120a.is_architecture_variant()); - assert!(!NvvmArch::Compute90a.is_base_variant()); - assert!(!NvvmArch::Compute90a.is_family_variant()); + assert!(Compute90a.is_architecture_variant()); + assert!(Compute120a.is_architecture_variant()); + assert!(!Compute90a.is_base_variant()); + assert!(!Compute90a.is_family_variant()); } #[test] fn nvvm_arch_base_architecture() { // Base variants return themselves - assert_eq!(NvvmArch::Compute90.base_architecture(), NvvmArch::Compute90); - assert_eq!( - NvvmArch::Compute120.base_architecture(), - NvvmArch::Compute120 - ); + assert_eq!(Compute90.base_architecture(), Compute90); + assert_eq!(Compute120.base_architecture(), Compute120); // Family-specific variants return base - assert_eq!( - NvvmArch::Compute120f.base_architecture(), - NvvmArch::Compute120 - ); - assert_eq!( - NvvmArch::Compute101f.base_architecture(), - NvvmArch::Compute101 - ); + assert_eq!(Compute120f.base_architecture(), Compute120); + assert_eq!(Compute101f.base_architecture(), Compute101); // Architecture variants return base - assert_eq!( - NvvmArch::Compute90a.base_architecture(), - NvvmArch::Compute90 - ); - assert_eq!( - NvvmArch::Compute120a.base_architecture(), - NvvmArch::Compute120 - ); + assert_eq!(Compute90a.base_architecture(), Compute90); + assert_eq!(Compute120a.base_architecture(), Compute120); } #[test] fn nvvm_arch_get_variants() { // Architecture with only base variant - let compute80_variants = NvvmArch::Compute80.get_variants(); - assert_eq!(compute80_variants, vec![NvvmArch::Compute80]); + let compute80_variants = Compute80.get_variants(); + assert_eq!(compute80_variants, vec![Compute80]); // Architecture with architecture and base variants - assert_eq!( - NvvmArch::Compute90.get_variants(), - vec![NvvmArch::Compute90, NvvmArch::Compute90a] - ); + assert_eq!(Compute90.get_variants(), vec![Compute90, Compute90a]); // Architecture with all three variants - let expected120 = vec![ - NvvmArch::Compute120, - NvvmArch::Compute120f, - NvvmArch::Compute120a, - ]; - assert_eq!(NvvmArch::Compute120.get_variants(), expected120); - assert_eq!(NvvmArch::Compute120f.get_variants(), expected120); - assert_eq!(NvvmArch::Compute120a.get_variants(), expected120); + let expected120 = vec![Compute120, Compute120f, Compute120a]; + assert_eq!(Compute120.get_variants(), expected120); + assert_eq!(Compute120f.get_variants(), expected120); + assert_eq!(Compute120a.get_variants(), expected120); } #[test] fn nvvm_arch_variants_for_capability() { // Capability with single variant - assert_eq!( - NvvmArch::variants_for_capability(75), - vec![NvvmArch::Compute75] - ); + assert_eq!(NvvmArch::variants_for_capability(75), vec![Compute75]); // Capability with multiple variants assert_eq!( NvvmArch::variants_for_capability(101), - vec![ - NvvmArch::Compute101, - NvvmArch::Compute101f, - NvvmArch::Compute101a, - ] + vec![Compute101, Compute101f, Compute101a,] ); // Non-existent capability diff --git a/crates/rustc_codegen_nvvm/src/lib.rs b/crates/rustc_codegen_nvvm/src/lib.rs index dc2665d7..b1271b65 100644 --- a/crates/rustc_codegen_nvvm/src/lib.rs +++ b/crates/rustc_codegen_nvvm/src/lib.rs @@ -146,7 +146,11 @@ impl CodegenBackend for NvvmCodegenBackend { for opt in &args.nvvm_options { if let ::nvvm::NvvmOption::Arch(arch) = opt { // Add all features up to and including the current architecture - features.extend(arch.all_target_features()); + features.extend( + arch.all_target_features() + .into_iter() + .map(|feature| feature.target_feature()), + ); break; } } @@ -234,7 +238,7 @@ impl CodegenBackend for NvvmCodegenBackend { target_features.extend( backend_features .iter() - .map(|f| rustc_span::Symbol::intern(f)), + .map(|f| rustc_span::Symbol::intern(&f.target_feature())), ); break; } From 32fecaaf0b99bef01aca0405518a810d40de96a9 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 18 Nov 2025 09:03:16 +1100 Subject: [PATCH 4/7] Use `&'static str` instead of `String` for target features. It's a more precise type -- these are static values. Also it avoids unnecessary allocations. --- crates/nvvm/src/lib.rs | 68 ++++++++++++++-------------- crates/rustc_codegen_nvvm/src/lib.rs | 4 +- tests/compiletests/src/main.rs | 6 +-- 3 files changed, 39 insertions(+), 39 deletions(-) diff --git a/crates/nvvm/src/lib.rs b/crates/nvvm/src/lib.rs index 41d47ef6..0f859138 100644 --- a/crates/nvvm/src/lib.rs +++ b/crates/nvvm/src/lib.rs @@ -339,7 +339,7 @@ pub enum NvvmArch { impl Display for NvvmArch { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str(&self.target_feature()) + f.write_str(self.target_feature()) } } @@ -435,40 +435,40 @@ impl NvvmArch { } /// Get the target feature string (e.g., "compute_35" for Compute35, "compute_90a" for Compute90a) - pub fn target_feature(&self) -> String { + pub fn target_feature(&self) -> &'static str { match self { - Self::Compute35 => "compute_35".to_string(), - Self::Compute37 => "compute_37".to_string(), - Self::Compute50 => "compute_50".to_string(), - Self::Compute52 => "compute_52".to_string(), - Self::Compute53 => "compute_53".to_string(), - Self::Compute60 => "compute_60".to_string(), - Self::Compute61 => "compute_61".to_string(), - Self::Compute62 => "compute_62".to_string(), - Self::Compute70 => "compute_70".to_string(), - Self::Compute72 => "compute_72".to_string(), - Self::Compute75 => "compute_75".to_string(), - Self::Compute80 => "compute_80".to_string(), - Self::Compute86 => "compute_86".to_string(), - Self::Compute87 => "compute_87".to_string(), - Self::Compute89 => "compute_89".to_string(), - Self::Compute90 => "compute_90".to_string(), - Self::Compute90a => "compute_90a".to_string(), - Self::Compute100 => "compute_100".to_string(), - Self::Compute100f => "compute_100f".to_string(), - Self::Compute100a => "compute_100a".to_string(), - Self::Compute101 => "compute_101".to_string(), - Self::Compute101f => "compute_101f".to_string(), - Self::Compute101a => "compute_101a".to_string(), - Self::Compute103 => "compute_103".to_string(), - Self::Compute103f => "compute_103f".to_string(), - Self::Compute103a => "compute_103a".to_string(), - Self::Compute120 => "compute_120".to_string(), - Self::Compute120f => "compute_120f".to_string(), - Self::Compute120a => "compute_120a".to_string(), - Self::Compute121 => "compute_121".to_string(), - Self::Compute121f => "compute_121f".to_string(), - Self::Compute121a => "compute_121a".to_string(), + Self::Compute35 => "compute_35", + Self::Compute37 => "compute_37", + Self::Compute50 => "compute_50", + Self::Compute52 => "compute_52", + Self::Compute53 => "compute_53", + Self::Compute60 => "compute_60", + Self::Compute61 => "compute_61", + Self::Compute62 => "compute_62", + Self::Compute70 => "compute_70", + Self::Compute72 => "compute_72", + Self::Compute75 => "compute_75", + Self::Compute80 => "compute_80", + Self::Compute86 => "compute_86", + Self::Compute87 => "compute_87", + Self::Compute89 => "compute_89", + Self::Compute90 => "compute_90", + Self::Compute90a => "compute_90a", + Self::Compute100 => "compute_100", + Self::Compute100f => "compute_100f", + Self::Compute100a => "compute_100a", + Self::Compute101 => "compute_101", + Self::Compute101f => "compute_101f", + Self::Compute101a => "compute_101a", + Self::Compute103 => "compute_103", + Self::Compute103f => "compute_103f", + Self::Compute103a => "compute_103a", + Self::Compute120 => "compute_120", + Self::Compute120f => "compute_120f", + Self::Compute120a => "compute_120a", + Self::Compute121 => "compute_121", + Self::Compute121f => "compute_121f", + Self::Compute121a => "compute_121a", } } diff --git a/crates/rustc_codegen_nvvm/src/lib.rs b/crates/rustc_codegen_nvvm/src/lib.rs index b1271b65..851724b6 100644 --- a/crates/rustc_codegen_nvvm/src/lib.rs +++ b/crates/rustc_codegen_nvvm/src/lib.rs @@ -149,7 +149,7 @@ impl CodegenBackend for NvvmCodegenBackend { features.extend( arch.all_target_features() .into_iter() - .map(|feature| feature.target_feature()), + .map(|s| s.target_feature().to_string()), ); break; } @@ -238,7 +238,7 @@ impl CodegenBackend for NvvmCodegenBackend { target_features.extend( backend_features .iter() - .map(|f| rustc_span::Symbol::intern(&f.target_feature())), + .map(|f| rustc_span::Symbol::intern(f.target_feature())), ); break; } diff --git a/tests/compiletests/src/main.rs b/tests/compiletests/src/main.rs index e797f11b..a4614c4e 100644 --- a/tests/compiletests/src/main.rs +++ b/tests/compiletests/src/main.rs @@ -149,7 +149,7 @@ impl Runner { // which offer `// only-S` and `// ignore-S` for any stage ID `S`. let stage_id = if variation.name == "default" { // Use the architecture name as the stage ID. - arch.clone() + arch.to_string() } else { // Include the variation name in the stage ID. format!("{}-{}", arch, variation.name) @@ -161,7 +161,7 @@ impl Runner { &self.deps_target_dir, &self.codegen_backend_path, CUDA_TARGET, - &arch, + arch, ); let mut flags = test_rustc_flags( &self.codegen_backend_path, @@ -174,7 +174,7 @@ impl Runner { .deps_target_dir .join(DepKind::ProcMacro.target_dir_suffix(CUDA_TARGET)), ], - &arch, + arch, ); flags += variation.extra_flags; From d2bd8ac76c4a8e201bc6bd8d37e21cbf6262dc05 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 25 Nov 2025 14:33:27 +1100 Subject: [PATCH 5/7] Simplify `NvvmArch::is_base_variant`. --- crates/nvvm/src/lib.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/crates/nvvm/src/lib.rs b/crates/nvvm/src/lib.rs index 0f859138..b5c4143d 100644 --- a/crates/nvvm/src/lib.rs +++ b/crates/nvvm/src/lib.rs @@ -540,12 +540,9 @@ impl NvvmArch { /// Check if this architecture is a base variant (no suffix) pub fn is_base_variant(&self) -> bool { - let feature = self.target_feature(); - // A base variant doesn't end with any letter suffix - !feature - .chars() - .last() - .is_some_and(|c| c.is_ascii_alphabetic()) + !self + .target_feature() + .ends_with(|c| char::is_ascii_alphabetic(&c)) } /// Check if this architecture is a family-specific variant (f suffix) From 0ee976e2f833e563edb1392bffb148841f7fc15e Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 25 Nov 2025 15:23:46 +1100 Subject: [PATCH 6/7] Remove `NvvmArch::iter_up_to`. This is just a slightly less powerful version of `all_target_features`, one that always includes all the 'a' and 'f' variants for each level. It doesn't seem worth having. --- crates/nvvm/src/lib.rs | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/crates/nvvm/src/lib.rs b/crates/nvvm/src/lib.rs index b5c4143d..0654cd93 100644 --- a/crates/nvvm/src/lib.rs +++ b/crates/nvvm/src/lib.rs @@ -532,12 +532,6 @@ impl NvvmArch { } } - /// Create an iterator over all architectures from Compute35 up to and including this one - pub fn iter_up_to(&self) -> impl Iterator { - let current = self.capability_value(); - NvvmArch::iter().filter(move |arch| arch.capability_value() <= current) - } - /// Check if this architecture is a base variant (no suffix) pub fn is_base_variant(&self) -> bool { !self @@ -1007,27 +1001,6 @@ mod tests { ); } - #[test] - fn nvvm_arch_iter_up_to_includes_only_lower_or_equal() { - // Compute35 only includes itself - let archs: Vec<_> = Compute35.iter_up_to().collect(); - assert_eq!(archs, vec![Compute35]); - - // Compute52 includes all up to 52 - let archs: Vec<_> = Compute52.iter_up_to().collect(); - assert_eq!(archs, vec![Compute35, Compute37, Compute50, Compute52,]); - - // Compute75 includes all up to 75 - let archs: Vec<_> = Compute75.iter_up_to().collect(); - assert_eq!( - archs, - vec![ - Compute35, Compute37, Compute50, Compute52, Compute53, Compute60, Compute61, - Compute62, Compute70, Compute72, Compute75, - ] - ); - } - #[test] fn options_parse_correctly() { use NvvmOption::{self, *}; From 8e78cd90cd213ded255457afb9a4687f9a1dca07 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 26 Nov 2025 12:43:55 +1100 Subject: [PATCH 7/7] Improve some comments. --- crates/nvvm/src/lib.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/crates/nvvm/src/lib.rs b/crates/nvvm/src/lib.rs index 0654cd93..c89aab93 100644 --- a/crates/nvvm/src/lib.rs +++ b/crates/nvvm/src/lib.rs @@ -21,7 +21,7 @@ pub fn ir_version() -> (i32, i32) { let mut minor_ir = MaybeUninit::uninit(); let mut major_dbg = MaybeUninit::uninit(); let mut minor_dbg = MaybeUninit::uninit(); - // according to the docs this cant fail + // according to the docs this can't fail let _ = nvvm_sys::nvvmIRVersion( major_ir.as_mut_ptr(), minor_ir.as_mut_ptr(), @@ -39,7 +39,7 @@ pub fn dbg_version() -> (i32, i32) { let mut minor_ir = MaybeUninit::uninit(); let mut major_dbg = MaybeUninit::uninit(); let mut minor_dbg = MaybeUninit::uninit(); - // according to the docs this cant fail + // according to the docs this can't fail let _ = nvvm_sys::nvvmIRVersion( major_ir.as_mut_ptr(), minor_ir.as_mut_ptr(), @@ -55,12 +55,17 @@ pub fn nvvm_version() -> (i32, i32) { unsafe { let mut major = MaybeUninit::uninit(); let mut minor = MaybeUninit::uninit(); - // according to the docs this cant fail + // according to the docs this can't fail let _ = nvvm_sys::nvvmVersion(major.as_mut_ptr(), minor.as_mut_ptr()); (major.assume_init(), minor.assume_init()) } } +/// Rust version of `nvvmResult`. +/// - `NVVM_SUCCESS` isn't covered because this type only covers the error cases, due to Rust +/// having `Result` where the success case is separate from the error cases. +/// - `NVVM_ERROR_INVALID_PROGRAM` isn't covered because it's not possible to get an invalid +/// program handle through this safe api. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum NvvmError { /// The NVVM compiler ran out of memory. @@ -79,8 +84,6 @@ pub enum NvvmError { /// Compilation failed because of bad IR or other reasons. Getting the compiler /// log should yield more info. CompilationError, - // InvalidProgram isnt handled because its not possible - // to get an invalid program handle through this safe api } impl Display for NvvmError {