Skip to content
Open
Show file tree
Hide file tree
Changes from 33 commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
70ef83a
define basic intrinsics
vchuravy Feb 4, 2025
02f3124
Fix docstrings
christiangnrd Sep 16, 2025
1103574
localmemory
christiangnrd Sep 16, 2025
8f40452
Move default barrier implementation to KernelIntrinsics
christiangnrd Sep 18, 2025
471dc58
Implement remaining intrinsics for POCL
christiangnrd Sep 18, 2025
a8a14a1
Add basic tests
christiangnrd Sep 18, 2025
96d9ffc
Int32 -> Int
christiangnrd Oct 1, 2025
685a7d4
Format
christiangnrd Oct 2, 2025
a54fb5d
Launch interface
christiangnrd Oct 22, 2025
5a6cd9b
Docs
christiangnrd Oct 22, 2025
faa5213
New stuff docs
christiangnrd Oct 22, 2025
6baa445
Fix
christiangnrd Oct 22, 2025
1953e8d
Temp adaptation to test in-progress interface with CUDA.jl
christiangnrd Oct 22, 2025
ba52bbb
Better launch interface
christiangnrd Nov 4, 2025
1fb74a0
Only keep common compiler kwarg
christiangnrd Nov 4, 2025
36c67ce
Fixup tests
christiangnrd Nov 5, 2025
0636279
No `backend` in macro kwargs
christiangnrd Nov 5, 2025
48c6b8b
Tests
christiangnrd Nov 5, 2025
cc240c6
Fixes
christiangnrd Nov 5, 2025
f41d1e0
More fix
christiangnrd Nov 5, 2025
8e97be1
Print
christiangnrd Nov 5, 2025
bf37826
tweak
christiangnrd Nov 5, 2025
cae65fe
Fix
christiangnrd Nov 5, 2025
8861658
Ugh
christiangnrd Nov 5, 2025
86c18fc
Format
christiangnrd Nov 5, 2025
0a6f172
Format
christiangnrd Nov 5, 2025
2371f88
drbh
christiangnrd Nov 5, 2025
0a45913
`_print` docstring
christiangnrd Nov 6, 2025
ff3b077
Test all launch size options
christiangnrd Nov 6, 2025
0732ae0
Fix backend
christiangnrd Nov 6, 2025
7d6be1b
Format
christiangnrd Nov 6, 2025
04f21ec
More consistent docstrings
christiangnrd Nov 6, 2025
d068b80
Qualify `map`
christiangnrd Nov 6, 2025
2fe4502
`KIKernel` -> `Kernel`
christiangnrd Nov 6, 2025
aaf20a7
Remove redundant parameter
christiangnrd Nov 6, 2025
e2941fb
`kiconvert` -> `argconvert`
christiangnrd Nov 6, 2025
5262c9e
Remove old definition
christiangnrd Nov 6, 2025
ca11482
Unbreak ABI
christiangnrd Nov 6, 2025
d6f3e66
Readd old definition
christiangnrd Nov 7, 2025
1400c9f
Naming
christiangnrd Nov 7, 2025
055a81f
Reword
christiangnrd Nov 7, 2025
913f8c8
Docstrings
christiangnrd Nov 7, 2025
298e807
Rename `kikernel`
christiangnrd Nov 7, 2025
42f17d6
Renaming
christiangnrd Nov 7, 2025
da98ac8
Update `unsafe_indices` examples to use KI directly
christiangnrd Nov 12, 2025
8510dfe
Format
christiangnrd Nov 12, 2025
89b57e9
Format
christiangnrd Nov 12, 2025
ec246c7
Format
christiangnrd Nov 12, 2025
5620ba2
Add kernel launch test
christiangnrd Nov 12, 2025
b0013f1
Synchronize
christiangnrd Nov 13, 2025
da1d96d
don't export KI and fix POCL.synchronize
vchuravy Nov 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 34 additions & 36 deletions src/KernelAbstractions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,15 @@ function unsafe_free! end

unsafe_free!(::AbstractArray) = return

"""
Abstract type for all KernelAbstractions backends.
"""
abstract type Backend end

include("intrinsics.jl")
import .KernelIntrinsics
export KernelIntrinsics

###
# Kernel language
# - @localmem
Expand Down Expand Up @@ -239,11 +248,8 @@ end
Declare storage that is local to a workgroup.
"""
macro localmem(T, dims)
# Stay in sync with CUDAnative
id = gensym("static_shmem")

return quote
$SharedMemory($(esc(T)), Val($(esc(dims))), Val($(QuoteNode(id))))
$SharedMemory($(esc(T)), Val($(esc(dims))))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is technically an ABI break, which I had avoided so far.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is ca11482 sufficient or do we still need to require backends to take in a third unused argument on the KI side?

end
end

Expand Down Expand Up @@ -460,13 +466,27 @@ end
# Internal kernel functions
###

function __index_Local_Linear end
function __index_Group_Linear end
function __index_Global_Linear end
@inline function __index_Local_Linear(ctx)
return KernelIntrinsics.get_local_id().x
end

@inline function __index_Group_Linear(ctx)
return KernelIntrinsics.get_group_id().x
end

function __index_Local_Cartesian end
function __index_Group_Cartesian end
function __index_Global_Cartesian end
@inline function __index_Global_Linear(ctx)
return KernelIntrinsics.get_global_id().x
end

@inline function __index_Local_Cartesian(ctx)
return @inbounds workitems(__iterspace(ctx))[KernelIntrinsics.get_local_id().x]
end
@inline function __index_Group_Cartesian(ctx)
return @inbounds blocks(__iterspace(ctx))[KernelIntrinsics.get_group_id().x]
end
@inline function __index_Global_Cartesian(ctx)
return @inbounds expand(__iterspace(ctx), KernelIntrinsics.get_group_id().x, KernelIntrinsics.get_local_id().x)
end

@inline __index_Local_NTuple(ctx, I...) = Tuple(__index_Local_Cartesian(ctx, I...))
@inline __index_Group_NTuple(ctx, I...) = Tuple(__index_Group_Cartesian(ctx, I...))
Expand All @@ -482,11 +502,6 @@ constify(arg) = adapt(ConstAdaptor(), arg)
# Backend hierarchy
###

"""

Abstract type for all KernelAbstractions backends.
"""
abstract type Backend end

"""
Abstract type for all GPU based KernelAbstractions backends.
Expand Down Expand Up @@ -796,29 +811,12 @@ include("macros.jl")
###

function Scratchpad end
function SharedMemory end

function __synchronize()
error("@synchronize used outside kernel or not captured")
end

@generated function __print(items...)
str = ""
args = []
# SharedMemory(t::Type{T}, dims::Val{Dims}, id::Val{Id}) where {T, Dims, Id} = KernelIntrinsics.localmemory(t, dims, id)
SharedMemory(t::Type{T}, dims::Val{Dims}) where {T, Dims} = KernelIntrinsics.localmemory(t, dims)

for i in 1:length(items)
item = :(items[$i])
T = items[i]
if T <: Val
item = QuoteNode(T.parameters[1])
end
push!(args, item)
end
__synchronize() = KernelIntrinsics.barrier()

return quote
print($(args...))
end
end
__print(args...) = KernelIntrinsics._print(args...)

# Utils
__size(args::Tuple) = Tuple{args...}
Expand Down
Loading
Loading