Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions 13_BitonicSort/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
include(common RESULT_VARIABLE RES)
if(NOT RES)
message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory")
endif()

nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}")

if(NBL_EMBED_BUILTIN_RESOURCES)
set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData)
set(RESOURCE_DIR "app_resources")

get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE)
get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE)
get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE)

file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*")
foreach(RES_FILE ${BUILTIN_RESOURCE_FILES})
LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}")
endforeach()

ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}")

LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_})
endif()
87 changes: 87 additions & 0 deletions 13_BitonicSort/app_resources/bitonic_sort_shader.comp.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#include "common.hlsl"
#include "nbl/builtin/hlsl/workgroup/basic.hlsl"
#include "nbl/builtin/hlsl/workgroup/bitonic_sort.hlsl"

[[vk::push_constant]] PushConstantData pushConstants;

using namespace nbl::hlsl;

using BitonicSortConfig = workgroup::bitonic_sort::bitonic_sort_config<ElementsPerThreadLog2, WorkgroupSizeLog2, uint32_t, uint32_t, less<uint32_t> >;

NBL_CONSTEXPR uint32_t WorkgroupSize = BitonicSortConfig::WorkgroupSize;

groupshared uint32_t sharedmem[2 * WorkgroupSize];

uint32_t3 glsl::gl_WorkGroupSize() { return uint32_t3(uint32_t(BitonicSortConfig::WorkgroupSize), 1, 1); }

struct SharedMemoryAccessor
{
template <typename AccessType, typename IndexType>
void set(IndexType idx, AccessType value)
{
sharedmem[idx] = value;
}

template <typename AccessType, typename IndexType>
void get(IndexType idx, NBL_REF_ARG(AccessType) value)
{
value = sharedmem[idx];
}

void workgroupExecutionAndMemoryBarrier()
{
glsl::barrier();
}

};

struct Accessor
{
static Accessor create(const uint64_t address)
{
Accessor accessor;
accessor.address = address;
return accessor;
}

template <typename AccessType, typename IndexType>
void get(const IndexType index, NBL_REF_ARG(AccessType) value)
{
value = vk::RawBufferLoad<AccessType>(address + index * sizeof(AccessType));
}

template <typename AccessType, typename IndexType>
void set(const IndexType index, const AccessType value)
{
vk::RawBufferStore<AccessType>(address + index * sizeof(AccessType), value);
}

uint64_t address;
};
Comment on lines +37 to +59

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there's readymade BDA accessors you can use AFAIK


[numthreads(BitonicSortConfig::WorkgroupSize, 1, 1)]
[shader("compute")]
void main()
{
Accessor accessor = Accessor::create(pushConstants.deviceBufferAddress);
SharedMemoryAccessor sharedmemAccessor;

const uint32_t threadID = glsl::gl_LocalInvocationID().x;

// Each thread handles 2 ADJACENT elements: lo and hi
// Following bitonic sort pattern: thread i handles elements [2*i] and [2*i + 1]
const uint32_t loIdx = threadID * 2;
const uint32_t hiIdx = threadID * 2 + 1;

uint32_t loKey, hiKey;
accessor.get(loIdx, loKey);
accessor.get(hiIdx, hiKey);

uint32_t loVal = loIdx;
uint32_t hiVal = hiIdx;

workgroup::BitonicSort<BitonicSortConfig>::template __call<Accessor, SharedMemoryAccessor>(accessor, sharedmemAccessor, loKey, hiKey, loVal, hiVal);

accessor.set(loIdx, loKey);
accessor.set(hiIdx, hiKey);
}
13 changes: 13 additions & 0 deletions 13_BitonicSort/app_resources/common.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#ifndef _BITONIC_SORT_COMMON_INCLUDED_
#define _BITONIC_SORT_COMMON_INCLUDED_
#include "nbl/builtin/hlsl/cpp_compat.hlsl"

struct PushConstantData
{
uint64_t deviceBufferAddress;
};

NBL_CONSTEXPR uint32_t WorkgroupSizeLog2 = 7; // 128 threads
NBL_CONSTEXPR uint32_t ElementsPerThreadLog2 = 1; // 2 elements per thread
NBL_CONSTEXPR uint32_t elementCount = uint32_t(1) << (WorkgroupSizeLog2 + ElementsPerThreadLog2);
#endif
28 changes: 28 additions & 0 deletions 13_BitonicSort/config.json.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"enableParallelBuild": true,
"threadsPerBuildProcess" : 2,
"isExecuted": false,
"scriptPath": "",
"cmake": {
"configurations": [ "Release", "Debug", "RelWithDebInfo" ],
"buildModes": [],
"requiredOptions": []
},
"profiles": [
{
"backend": "vulkan", // should be none
"platform": "windows",
"buildModes": [],
"runConfiguration": "Release", // we also need to run in Debug nad RWDI because foundational example
"gpuArchitectures": []
}
],
"dependencies": [],
"data": [
{
"dependencies": [],
"command": [""],
"outputs": []
}
]
}
Loading