Skip to content

Commit 6ffb344

Browse files
committed
Emulate 128 bit ops
I thought we could use the fallback code from compiler-builtins but I guess not? Most things were broken. I had AI bang out our implementations, I did not write them. This is generating a lot of code, perhaps better to generate a fn and then have everything call it? I added an example and confirmed it works / passes.
1 parent 2891f7d commit 6ffb344

File tree

14 files changed

+1131
-443
lines changed

14 files changed

+1131
-443
lines changed

.github/workflows/ci_windows.yml

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,13 @@ jobs:
8888
- name: Build all bindings
8989
run: cargo build --all-features -p cust_raw
9090

91-
- name: Build workspace
92-
run: cargo build --workspace --exclude "optix*" --exclude "path-tracer" --exclude "denoiser" --exclude "vecadd*" --exclude "gemm*" --exclude "ex0*" --exclude "cudnn*" --exclude "sha2*"
91+
- name: Build
92+
run: cargo build --workspace --exclude "optix*" --exclude "path-tracer" --exclude "denoiser" --exclude "vecadd*" --exclude "gemm*" --exclude "ex*" --exclude "cudnn*" --exclude "sha2*" --exclude "i128*"
9393

9494
- name: Clippy
9595
env:
9696
RUSTFLAGS: -Dwarnings
97-
run: cargo clippy --workspace --exclude "optix*" --exclude "path-tracer" --exclude "denoiser" --exclude "vecadd*" --exclude "gemm*" --exclude "ex0*" --exclude "cudnn*" --exclude "sha2*"
97+
run: cargo clippy --workspace --exclude "optix*" --exclude "path-tracer" --exclude "denoiser" --exclude "vecadd*" --exclude "gemm*" --exclude "ex*" --exclude "cudnn*" --exclude "sha2*" --exclude "i128*"
9898

9999
# Very limited testing because we can only run tests that don't rely on having a CUDA GPU.
100100
- name: Test
@@ -103,8 +103,7 @@ jobs:
103103
- name: Check documentation
104104
env:
105105
RUSTDOCFLAGS: -Dwarnings
106-
run: cargo doc --workspace --all-features --document-private-items --no-deps --exclude "optix*" --exclude "path-tracer" --exclude "denoiser" --exclude "vecadd*" --exclude "gemm*" --exclude "ex0*" --exclude "cudnn*" --exclude "sha2*" --exclude "cust_raw"
107-
108-
# Disabled due to dll issues, someone with Windows knowledge needed
106+
run: cargo doc --workspace --all-features --document-private-items --no-deps --exclude "optix*" --exclude "path-tracer" --exclude "denoiser" --exclude "vecadd*" --exclude "gemm*" --exclude "ex*" --exclude "cudnn*" --exclude "sha2*" --exclude "i128*" --exclude "cust_raw"
107+
# Disabled due to dll issues, someone with Windows knowledge needed
109108
# - name: Compiletest
110109
# run: cargo run -p compiletests --release --no-default-features -- --target-arch compute_61,compute_75,compute_90

Cargo.lock

Lines changed: 15 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ members = [
1616
"examples/cuda/path_tracer/kernels",
1717
"examples/cuda/sha2_crates_io",
1818
"examples/cuda/sha2_crates_io/kernels",
19+
"examples/cuda/i128_demo",
20+
"examples/cuda/i128_demo/kernels",
1921

2022
"examples/optix/*",
2123
"tests/compiletests",
-2.11 KB
Binary file not shown.

crates/rustc_codegen_nvvm/libintrinsics.ll

Lines changed: 0 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -148,113 +148,6 @@ start:
148148
}
149149
declare {i16, i1} @llvm.umul.with.overflow.i16(i16, i16) #0
150150

151-
; This is a bit weird, we need to use functions defined in rust crates (compiler_builtins)
152-
; as intrinsics in the codegen, but we can't directly use their name, otherwise we will have
153-
; really odd and incorrect behavior in the crate theyre defined in. So we need to make a wrapper for them that is opaque
154-
; to the codegen, which is what this is doing.
155-
156-
define {<2 x i64>, i1} @__nvvm_i128_addo(<2 x i64>, <2 x i64>) #0 {
157-
start:
158-
%2 = call {<2 x i64>, i1} @__rust_i128_addo(<2 x i64> %0, <2 x i64> %1)
159-
ret {<2 x i64>, i1} %2
160-
}
161-
declare {<2 x i64>, i1} @__rust_i128_addo(<2 x i64>, <2 x i64>) #0
162-
163-
define {<2 x i64>, i1} @__nvvm_u128_addo(<2 x i64>, <2 x i64>) #0 {
164-
start:
165-
%2 = call {<2 x i64>, i1} @__rust_u128_addo(<2 x i64> %0, <2 x i64> %1)
166-
ret {<2 x i64>, i1} %2
167-
}
168-
declare {<2 x i64>, i1} @__rust_u128_addo(<2 x i64>, <2 x i64>) #0
169-
170-
define {<2 x i64>, i1} @__nvvm_i128_subo(<2 x i64>, <2 x i64>) #0 {
171-
start:
172-
%2 = call {<2 x i64>, i1} @__rust_i128_subo(<2 x i64> %0, <2 x i64> %1)
173-
ret {<2 x i64>, i1} %2
174-
}
175-
declare {<2 x i64>, i1} @__rust_i128_subo(<2 x i64>, <2 x i64>) #0
176-
177-
define {<2 x i64>, i1} @__nvvm_u128_subo(<2 x i64>, <2 x i64>) #0 {
178-
start:
179-
%2 = call {<2 x i64>, i1} @__rust_u128_subo(<2 x i64> %0, <2 x i64> %1)
180-
ret {<2 x i64>, i1} %2
181-
}
182-
declare {<2 x i64>, i1} @__rust_u128_subo(<2 x i64>, <2 x i64>) #0
183-
184-
define {<2 x i64>, i1} @__nvvm_i128_mulo(<2 x i64>, <2 x i64>) #0 {
185-
start:
186-
%2 = call {<2 x i64>, i1} @__rust_i128_mulo(<2 x i64> %0, <2 x i64> %1)
187-
ret {<2 x i64>, i1} %2
188-
}
189-
declare {<2 x i64>, i1} @__rust_i128_mulo(<2 x i64>, <2 x i64>) #0
190-
191-
define {<2 x i64>, i1} @__nvvm_u128_mulo(<2 x i64>, <2 x i64>) #0 {
192-
start:
193-
%2 = call {<2 x i64>, i1} @__rust_u128_mulo(<2 x i64> %0, <2 x i64> %1)
194-
ret {<2 x i64>, i1} %2
195-
}
196-
declare {<2 x i64>, i1} @__rust_u128_mulo(<2 x i64>, <2 x i64>) #0
197-
198-
; Division operations from compiler-builtins
199-
define <2 x i64> @__nvvm_divti3(<2 x i64>, <2 x i64>) #0 {
200-
start:
201-
%2 = call <2 x i64> @__divti3(<2 x i64> %0, <2 x i64> %1)
202-
ret <2 x i64> %2
203-
}
204-
declare <2 x i64> @__divti3(<2 x i64>, <2 x i64>) #0
205-
206-
define <2 x i64> @__nvvm_udivti3(<2 x i64>, <2 x i64>) #0 {
207-
start:
208-
%2 = call <2 x i64> @__udivti3(<2 x i64> %0, <2 x i64> %1)
209-
ret <2 x i64> %2
210-
}
211-
declare <2 x i64> @__udivti3(<2 x i64>, <2 x i64>) #0
212-
213-
; Remainder operations from compiler-builtins
214-
define <2 x i64> @__nvvm_modti3(<2 x i64>, <2 x i64>) #0 {
215-
start:
216-
%2 = call <2 x i64> @__modti3(<2 x i64> %0, <2 x i64> %1)
217-
ret <2 x i64> %2
218-
}
219-
declare <2 x i64> @__modti3(<2 x i64>, <2 x i64>) #0
220-
221-
define <2 x i64> @__nvvm_umodti3(<2 x i64>, <2 x i64>) #0 {
222-
start:
223-
%2 = call <2 x i64> @__umodti3(<2 x i64> %0, <2 x i64> %1)
224-
ret <2 x i64> %2
225-
}
226-
declare <2 x i64> @__umodti3(<2 x i64>, <2 x i64>) #0
227-
228-
; Multiplication from compiler-builtins
229-
define <2 x i64> @__nvvm_multi3(<2 x i64>, <2 x i64>) #0 {
230-
start:
231-
%2 = call <2 x i64> @__multi3(<2 x i64> %0, <2 x i64> %1)
232-
ret <2 x i64> %2
233-
}
234-
declare <2 x i64> @__multi3(<2 x i64>, <2 x i64>) #0
235-
236-
; Shift operations from compiler-builtins
237-
define <2 x i64> @__nvvm_ashlti3(<2 x i64>, i32) #0 {
238-
start:
239-
%2 = call <2 x i64> @__ashlti3(<2 x i64> %0, i32 %1)
240-
ret <2 x i64> %2
241-
}
242-
declare <2 x i64> @__ashlti3(<2 x i64>, i32) #0
243-
244-
define <2 x i64> @__nvvm_ashrti3(<2 x i64>, i32) #0 {
245-
start:
246-
%2 = call <2 x i64> @__ashrti3(<2 x i64> %0, i32 %1)
247-
ret <2 x i64> %2
248-
}
249-
declare <2 x i64> @__ashrti3(<2 x i64>, i32) #0
250-
251-
define <2 x i64> @__nvvm_lshrti3(<2 x i64>, i32) #0 {
252-
start:
253-
%2 = call <2 x i64> @__lshrti3(<2 x i64> %0, i32 %1)
254-
ret <2 x i64> %2
255-
}
256-
declare <2 x i64> @__lshrti3(<2 x i64>, i32) #0
257-
258151
; Required because we need to explicitly generate { i32, i1 } for the following intrinsics
259152
; except rustc will not generate them (it will make { i32, i8 }) which libnvvm rejects.
260153

0 commit comments

Comments
 (0)