Skip to content

Commit 9ed71cb

Browse files
authored
Merge pull request #1 from KernelTuner/dev
Rewrite core structure
2 parents 8026c7f + 46d598c commit 9ed71cb

40 files changed

+6966
-4111
lines changed

.github/workflows/cmake-action.yml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
name: CMake
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
cuda-version:
7+
required: true
8+
type: string
9+
10+
env:
11+
# Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
12+
BUILD_TYPE: Debug
13+
14+
jobs:
15+
build:
16+
# The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac.
17+
# You can convert this to a matrix build if you need cross-platform coverage.
18+
# See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix
19+
runs-on: ubuntu-latest
20+
21+
steps:
22+
- uses: Jimver/cuda-toolkit@v0.2.11
23+
id: cuda-toolkit
24+
with:
25+
method: network
26+
sub-packages: '["nvcc"]'
27+
cuda: ${{ inputs.cuda-version }}
28+
29+
- uses: actions/checkout@v3
30+
with:
31+
submodules: 'true'
32+
33+
- name: Configure CMake
34+
# Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
35+
# See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
36+
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DKERNEL_FLOAT_BUILD_TEST=1 -DKERNEL_FLOAT_BUILD_EXAMPLE=1
37+
38+
- name: Build
39+
# Build your program with the given configuration
40+
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
41+
42+
- name: Test
43+
working-directory: ${{github.workspace}}/build
44+
# Execute tests defined by the CMake configuration.
45+
# See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail
46+
run: ./tests/kernel_float_tests --durations=yes --success --verbosity=high ~[GPU]
47+

.github/workflows/cmake.yml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
name: CMake
2+
3+
on:
4+
push:
5+
pull_request:
6+
branches: [ "main" ]
7+
8+
env:
9+
# Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
10+
BUILD_TYPE: Debug
11+
12+
jobs:
13+
build-cuda:
14+
uses: ./.github/workflows/cmake-action.yml
15+
with:
16+
cuda-version: "12.2.0"
17+
18+
build-cuda-11-7:
19+
needs: build-cuda
20+
uses: ./.github/workflows/cmake-action.yml
21+
with:
22+
cuda-version: "11.7.0"
23+
24+
build-cuda-12-0:
25+
needs: build-cuda
26+
uses: ./.github/workflows/cmake-action.yml
27+
with:
28+
cuda-version: "12.0.0"

combine.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,24 @@
22
import subprocess
33
from datetime import datetime
44

5+
license_boilerplate = """/*
6+
* Kernel Float: Header-only library for vector types and reduced precision floating-point math.
7+
*
8+
* Licensed under the Apache License, Version 2.0 (the "License");
9+
* you may not use this file except in compliance with the License.
10+
* You may obtain a copy of the License at
11+
*
12+
* http://www.apache.org/licenses/LICENSE-2.0
13+
*
14+
* Unless required by applicable law or agreed to in writing, software
15+
* distributed under the License is distributed on an "AS IS" BASIS,
16+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
* See the License for the specific language governing permissions and
18+
* limitations under the License.
19+
*/
20+
21+
"""
22+
523
directory = "include/kernel_float"
624
contents = dict()
725

@@ -28,7 +46,8 @@
2846
except Exception as e:
2947
print(f"warning: {e}")
3048

31-
output = "\n".join([
49+
output = license_boilerplate
50+
output += "\n".join([
3251
"//" + "=" * 80,
3352
"// this file has been auto-generated, do not modify its contents!",
3453
f"// date: {date}",

docs/api.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@ API Reference
33
.. toctree::
44
api/types.rst
55
api/primitives.rst
6+
api/generation.rst
67
api/unary_operators.rst
78
api/binary_operators.rst
89
api/reductions.rst
9-
api/shuffling.rst
1010
api/mathematical.rst
11+
api/conditional.rst
12+

docs/build_api.py

Lines changed: 35 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -65,51 +65,51 @@ def build_index_page(groups):
6565

6666
return filename
6767

68-
aliases = []
69-
for ty in ["vec", "float", "double", "half", "bfloat16x", ""]:
70-
if ty != "vec":
71-
aliases.append(f"{ty}X")
72-
68+
aliases = ["scalar", "vec"]
69+
for ty in ["vec"]:
7370
for i in range(2, 8 + 1):
7471
aliases.append(f"{ty}{i}")
7572

7673
groups = {
7774
"Types": [
7875
("vector", "vector", "struct"),
79-
("Aliases", [
80-
"unaligned_vec",
81-
"vec",
82-
] + aliases,
83-
"typedef"),
76+
("Aliases", aliases, "typedef"),
8477
],
8578
"Primitives": [
86-
("range", "range()"),
87-
("range", "range(F)"),
8879
"map",
8980
"reduce",
9081
"zip",
9182
"zip_common",
9283
"cast",
9384
"broadcast",
94-
"resize",
95-
"for_each",
96-
],
97-
"Shuffling": [
85+
"convert",
86+
"make_vec",
87+
"into_vector",
9888
"concat",
99-
"swizzle",
100-
"first",
101-
"last",
102-
"reversed",
103-
"rotate_left",
104-
"rotate_right",
89+
"select",
90+
"for_each",
10591
],
106-
"Unary Operators": [
92+
"Generation": [
93+
"range",
94+
"range_like",
95+
"each_index",
10796
"fill",
10897
"fill_like",
10998
"zeros",
11099
"zeros_like",
111100
"ones",
112101
"ones_like",
102+
],
103+
"Shuffling": [
104+
# "concat",
105+
# "swizzle",
106+
# "first",
107+
# "last",
108+
# "reversed",
109+
# "rotate_left",
110+
# "rotate_right",
111+
],
112+
"Unary Operators": [
113113
"negate",
114114
"bit_not",
115115
"logical_not",
@@ -135,21 +135,21 @@ def build_index_page(groups):
135135
("min", "min(L&&, R&&)"),
136136
"nextafter",
137137
"modf",
138-
"pow",
138+
("pow", "pow(L&&, R&&)"),
139139
"remainder",
140140
#"rhypot",
141141
],
142142
"Reductions": [
143143
"sum",
144-
("max", "max(V&&)"),
145-
("min", "min(V&&)"),
144+
("max", "max(const V&)"),
145+
("min", "min(const V&)"),
146146
"product",
147147
"all",
148148
"any",
149149
"count",
150150
],
151151
"Mathematical": [
152-
"abs",
152+
("abs", "abs(const V&)"),
153153
"acos",
154154
"acosh",
155155
"asin",
@@ -166,22 +166,22 @@ def build_index_page(groups):
166166
"erfcinv",
167167
"erfcx",
168168
"erfinv",
169-
"exp",
169+
("exp", "exp(const V&)"),
170170
"exp10",
171171
"exp2",
172172
"fabs",
173173
"floor",
174174
"ilogb",
175175
"lgamma",
176-
"log",
176+
("log", "log(const V&)"),
177177
"log10",
178178
"logb",
179179
"nearbyint",
180180
"normcdf",
181181
"rcbrt",
182182
"sin",
183183
"sinh",
184-
"sqrt",
184+
("sqrt", "sqrt(const V&)"),
185185
"tan",
186186
"tanh",
187187
"tgamma",
@@ -193,6 +193,11 @@ def build_index_page(groups):
193193
"isinf",
194194
"isnan",
195195
],
196+
"Conditional": [
197+
("where", "where(const C&, const L&, const R&)"),
198+
("where", "where(const C&, const L&)"),
199+
("where", "where(const C&)"),
200+
]
196201
}
197202

198203
build_index_page(groups)

examples/vector_add/main.cu

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,7 @@
44
#include <vector>
55

66
#include "kernel_float.h"
7-
namespace kf = kernel_float;
8-
9-
using x = kf::half;
7+
using namespace kernel_float::prelude;
108

119
void cuda_check(cudaError_t code) {
1210
if (code != cudaSuccess) {
@@ -15,11 +13,7 @@ void cuda_check(cudaError_t code) {
1513
}
1614

1715
template<int N>
18-
__global__ void my_kernel(
19-
int length,
20-
const kf::unaligned_vec<__half, N>* input,
21-
double constant,
22-
kf::unaligned_vec<float, N>* output) {
16+
__global__ void my_kernel(int length, const khalf<N>* input, double constant, kfloat<N>* output) {
2317
int i = blockIdx.x * blockDim.x + threadIdx.x;
2418

2519
if (i * N < length) {
@@ -30,24 +24,24 @@ __global__ void my_kernel(
3024
template<int items_per_thread>
3125
void run_kernel(int n) {
3226
double constant = 1.0;
33-
std::vector<__half> input(n);
27+
std::vector<half> input(n);
3428
std::vector<float> output_expected;
3529
std::vector<float> output_result;
3630

3731
// Generate input data
3832
for (int i = 0; i < n; i++) {
39-
input[i] = __half(i);
33+
input[i] = half(i);
4034
output_expected[i] = float(i + constant);
4135
}
4236

4337
// Allocate device memory
44-
kf::unaligned_vec<__half, items_per_thread>* input_dev;
45-
kf::unaligned_vec<float, items_per_thread>* output_dev;
46-
cuda_check(cudaMalloc(&input_dev, sizeof(__half) * n));
38+
khalf<items_per_thread>* input_dev;
39+
kfloat<items_per_thread>* output_dev;
40+
cuda_check(cudaMalloc(&input_dev, sizeof(half) * n));
4741
cuda_check(cudaMalloc(&output_dev, sizeof(float) * n));
4842

4943
// Copy device memory
50-
cuda_check(cudaMemcpy(input_dev, input.data(), sizeof(__half) * n, cudaMemcpyDefault));
44+
cuda_check(cudaMemcpy(input_dev, input.data(), sizeof(half) * n, cudaMemcpyDefault));
5145

5246
// Launch kernel!
5347
int block_size = 256;

include/kernel_float.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
#ifndef KERNEL_FLOAT_H
22
#define KERNEL_FLOAT_H
33

4+
#include "kernel_float/base.h"
45
#include "kernel_float/bf16.h"
56
#include "kernel_float/binops.h"
6-
#include "kernel_float/cast.h"
7+
#include "kernel_float/conversion.h"
78
#include "kernel_float/fp16.h"
8-
#include "kernel_float/fp8.h"
9-
#include "kernel_float/interface.h"
109
#include "kernel_float/iterate.h"
1110
#include "kernel_float/macros.h"
1211
#include "kernel_float/meta.h"
12+
#include "kernel_float/prelude.h"
1313
#include "kernel_float/reduce.h"
14-
#include "kernel_float/storage.h"
15-
#include "kernel_float/swizzle.h"
14+
#include "kernel_float/triops.h"
1615
#include "kernel_float/unops.h"
16+
#include "kernel_float/vector.h"
1717

18-
#endif
18+
#endif

0 commit comments

Comments
 (0)