diff --git a/31_HLSLPathTracer/CMakeLists.txt b/31_HLSLPathTracer/CMakeLists.txt new file mode 100644 index 000000000..07b0fd396 --- /dev/null +++ b/31_HLSLPathTracer/CMakeLists.txt @@ -0,0 +1,37 @@ +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +if(NBL_BUILD_IMGUI) + set(NBL_INCLUDE_SERACH_DIRECTORIES + "${CMAKE_CURRENT_SOURCE_DIR}/include" + ) + + list(APPEND NBL_LIBRARIES + imtestengine + "${NBL_EXT_IMGUI_UI_LIB}" + ) + + nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + + if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) + endif() +endif() + + diff --git a/31_HLSLPathTracer/app_resources/glsl/common.glsl b/31_HLSLPathTracer/app_resources/glsl/common.glsl new file mode 100644 index 000000000..6b6e96710 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/glsl/common.glsl @@ -0,0 +1,837 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +// firefly and variance reduction techniques +//#define KILL_DIFFUSE_SPECULAR_PATHS +//#define VISUALIZE_HIGH_VARIANCE + +// debug +//#define NEE_ONLY + +layout(set = 2, binding = 0) uniform sampler2D envMap; +layout(set = 2, binding = 1) uniform usamplerBuffer sampleSequence; +layout(set = 2, binding = 2) uniform usampler2D scramblebuf; + +layout(set=0, binding=0, rgba16f) uniform image2D outImage; + +#ifndef _NBL_GLSL_WORKGROUP_SIZE_ +#define _NBL_GLSL_WORKGROUP_SIZE_ 512 +layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) in; +#endif + +ivec2 getCoordinates() { + ivec2 imageSize = imageSize(outImage); + return ivec2(gl_GlobalInvocationID.x % imageSize.x, gl_GlobalInvocationID.x / imageSize.x); +} + +vec2 getTexCoords() { + ivec2 imageSize = imageSize(outImage); + ivec2 iCoords = getCoordinates(); + return vec2(float(iCoords.x) / imageSize.x, 1.0 - float(iCoords.y) / imageSize.y); +} + + +#include +#include +#include +#ifdef PERSISTENT_WORKGROUPS +#include +#endif + +#include + +layout(push_constant, row_major) uniform constants +{ + mat4 invMVP; + int sampleCount; + int depth; +} PTPushConstant; + +#define INVALID_ID_16BIT 0xffffu +struct Sphere +{ + vec3 position; + float radius2; + uint bsdfLightIDs; +}; + +Sphere Sphere_Sphere(in vec3 position, in float radius, in uint bsdfID, in uint lightID) +{ + Sphere sphere; + sphere.position = position; + sphere.radius2 = radius*radius; + sphere.bsdfLightIDs = bitfieldInsert(bsdfID,lightID,16,16); + return sphere; +} + +// return intersection distance if found, nbl_glsl_FLT_NAN otherwise +float Sphere_intersect(in Sphere sphere, in vec3 origin, in vec3 direction) +{ + vec3 relOrigin = origin-sphere.position; + float relOriginLen2 = dot(relOrigin,relOrigin); + const float radius2 = sphere.radius2; + + float dirDotRelOrigin = dot(direction,relOrigin); + float det = radius2-relOriginLen2+dirDotRelOrigin*dirDotRelOrigin; + + // do some speculative math here + float detsqrt = sqrt(det); + return -dirDotRelOrigin+(relOriginLen2>radius2 ? (-detsqrt):detsqrt); +} + +vec3 Sphere_getNormal(in Sphere sphere, in vec3 position) +{ + const float radiusRcp = inversesqrt(sphere.radius2); + return (position-sphere.position)*radiusRcp; +} + +float Sphere_getSolidAngle_impl(in float cosThetaMax) +{ + return 2.0*nbl_glsl_PI*(1.0-cosThetaMax); +} +float Sphere_getSolidAngle(in Sphere sphere, in vec3 origin) +{ + float cosThetaMax = sqrt(1.0-sphere.radius2/nbl_glsl_lengthSq(sphere.position-origin)); + return Sphere_getSolidAngle_impl(cosThetaMax); +} + + +Sphere spheres[SPHERE_COUNT] = { + Sphere_Sphere(vec3(0.0,-100.5,-1.0),100.0,0u,INVALID_ID_16BIT), + Sphere_Sphere(vec3(2.0,0.0,-1.0),0.5,1u,INVALID_ID_16BIT), + Sphere_Sphere(vec3(0.0,0.0,-1.0),0.5,2u,INVALID_ID_16BIT), + Sphere_Sphere(vec3(-2.0,0.0,-1.0),0.5,3u,INVALID_ID_16BIT), + Sphere_Sphere(vec3(2.0,0.0,1.0),0.5,4u,INVALID_ID_16BIT), + Sphere_Sphere(vec3(0.0,0.0,1.0),0.5,4u,INVALID_ID_16BIT), + Sphere_Sphere(vec3(-2.0,0.0,1.0),0.5,5u,INVALID_ID_16BIT), + Sphere_Sphere(vec3(0.5,1.0,0.5),0.5,6u,INVALID_ID_16BIT) +#if SPHERE_COUNT>8 + ,Sphere_Sphere(vec3(-1.5,1.5,0.0),0.3,INVALID_ID_16BIT,0u) +#endif +}; + + +struct Triangle +{ + vec3 vertex0; + uint bsdfLightIDs; + vec3 vertex1; + uint padding0; + vec3 vertex2; + uint padding1; +}; + +Triangle Triangle_Triangle(in mat3 vertices, in uint bsdfID, in uint lightID) +{ + Triangle tri; + tri.vertex0 = vertices[0]; + tri.vertex1 = vertices[1]; + tri.vertex2 = vertices[2]; + // + tri.bsdfLightIDs = bitfieldInsert(bsdfID, lightID, 16, 16); + return tri; +} + +// return intersection distance if found, nbl_glsl_FLT_NAN otherwise +float Triangle_intersect(in Triangle tri, in vec3 origin, in vec3 direction) +{ + const vec3 edges[2] = vec3[2](tri.vertex1-tri.vertex0,tri.vertex2-tri.vertex0); + + const vec3 h = cross(direction,edges[1]); + const float a = dot(edges[0],h); + + const vec3 relOrigin = origin-tri.vertex0; + + const float u = dot(relOrigin,h)/a; + + const vec3 q = cross(relOrigin,edges[0]); + const float v = dot(direction,q)/a; + + const float t = dot(edges[1],q)/a; + + return t>0.f&&u>=0.f&&v>=0.f&&(u+v)<=1.f ? t:nbl_glsl_FLT_NAN; +} + +vec3 Triangle_getNormalTimesArea_impl(in mat2x3 edges) +{ + return cross(edges[0],edges[1])*0.5; +} +vec3 Triangle_getNormalTimesArea(in Triangle tri) +{ + return Triangle_getNormalTimesArea_impl(mat2x3(tri.vertex1-tri.vertex0,tri.vertex2-tri.vertex0)); +} + + + +struct Rectangle +{ + vec3 offset; + uint bsdfLightIDs; + vec3 edge0; + uint padding0; + vec3 edge1; + uint padding1; +}; + +Rectangle Rectangle_Rectangle(in vec3 offset, in vec3 edge0, in vec3 edge1, in uint bsdfID, in uint lightID) +{ + Rectangle rect; + rect.offset = offset; + rect.edge0 = edge0; + rect.edge1 = edge1; + // + rect.bsdfLightIDs = bitfieldInsert(bsdfID, lightID, 16, 16); + return rect; +} + +void Rectangle_getNormalBasis(in Rectangle rect, out mat3 basis, out vec2 extents) +{ + extents = vec2(length(rect.edge0), length(rect.edge1)); + basis[0] = rect.edge0/extents[0]; + basis[1] = rect.edge1/extents[1]; + basis[2] = normalize(cross(basis[0],basis[1])); +} + +// return intersection distance if found, nbl_glsl_FLT_NAN otherwise +float Rectangle_intersect(in Rectangle rect, in vec3 origin, in vec3 direction) +{ + const vec3 h = cross(direction,rect.edge1); + const float a = dot(rect.edge0,h); + + const vec3 relOrigin = origin-rect.offset; + + const float u = dot(relOrigin,h)/a; + + const vec3 q = cross(relOrigin,rect.edge0); + const float v = dot(direction,q)/a; + + const float t = dot(rect.edge1,q)/a; + + const bool intersection = t>0.f&&u>=0.f&&v>=0.f&&u<=1.f&&v<=1.f; + return intersection ? t:nbl_glsl_FLT_NAN; +} + +vec3 Rectangle_getNormalTimesArea(in Rectangle rect) +{ + return cross(rect.edge0,rect.edge1); +} + + + +#define DIFFUSE_OP 0u +#define CONDUCTOR_OP 1u +#define DIELECTRIC_OP 2u +#define OP_BITS_OFFSET 0 +#define OP_BITS_SIZE 2 +struct BSDFNode +{ + uvec4 data[2]; +}; + +uint BSDFNode_getType(in BSDFNode node) +{ + return bitfieldExtract(node.data[0].w,OP_BITS_OFFSET,OP_BITS_SIZE); +} +bool BSDFNode_isBSDF(in BSDFNode node) +{ + return BSDFNode_getType(node)==DIELECTRIC_OP; +} +bool BSDFNode_isNotDiffuse(in BSDFNode node) +{ + return BSDFNode_getType(node)!=DIFFUSE_OP; +} +float BSDFNode_getRoughness(in BSDFNode node) +{ + return uintBitsToFloat(node.data[1].w); +} +vec3 BSDFNode_getRealEta(in BSDFNode node) +{ + return uintBitsToFloat(node.data[0].rgb); +} +vec3 BSDFNode_getImaginaryEta(in BSDFNode node) +{ + return uintBitsToFloat(node.data[1].rgb); +} +mat2x3 BSDFNode_getEta(in BSDFNode node) +{ + return mat2x3(BSDFNode_getRealEta(node),BSDFNode_getImaginaryEta(node)); +} +#include +vec3 BSDFNode_getReflectance(in BSDFNode node, in float VdotH) +{ + const vec3 albedoOrRealIoR = uintBitsToFloat(node.data[0].rgb); + if (BSDFNode_isNotDiffuse(node)) + return nbl_glsl_fresnel_conductor(albedoOrRealIoR, BSDFNode_getImaginaryEta(node), VdotH); + else + return albedoOrRealIoR; +} + +float BSDFNode_getNEEProb(in BSDFNode bsdf) +{ + const float alpha = BSDFNode_isNotDiffuse(bsdf) ? BSDFNode_getRoughness(bsdf):1.0; + return min(8.0*alpha,1.0); +} + +#include +#include +float getLuma(in vec3 col) +{ + return dot(transpose(nbl_glsl_scRGBtoXYZ)[1],col); +} + +#define BSDF_COUNT 7 +BSDFNode bsdfs[BSDF_COUNT] = { + {{uvec4(floatBitsToUint(vec3(0.8,0.8,0.8)),DIFFUSE_OP),floatBitsToUint(vec4(0.0,0.0,0.0,0.0))}}, + {{uvec4(floatBitsToUint(vec3(0.8,0.4,0.4)),DIFFUSE_OP),floatBitsToUint(vec4(0.0,0.0,0.0,0.0))}}, + {{uvec4(floatBitsToUint(vec3(0.4,0.8,0.4)),DIFFUSE_OP),floatBitsToUint(vec4(0.0,0.0,0.0,0.0))}}, + {{uvec4(floatBitsToUint(vec3(1.02,1.02,1.3)),CONDUCTOR_OP),floatBitsToUint(vec4(1.0,1.0,2.0,0.0))}}, + {{uvec4(floatBitsToUint(vec3(1.02,1.3,1.02)),CONDUCTOR_OP),floatBitsToUint(vec4(1.0,2.0,1.0,0.0))}}, + {{uvec4(floatBitsToUint(vec3(1.02,1.3,1.02)),CONDUCTOR_OP),floatBitsToUint(vec4(1.0,2.0,1.0,0.15))}}, + {{uvec4(floatBitsToUint(vec3(1.4,1.45,1.5)),DIELECTRIC_OP),floatBitsToUint(vec4(0.0,0.0,0.0,0.0625))}} +}; + + +struct Light +{ + vec3 radiance; + uint objectID; +}; + +vec3 Light_getRadiance(in Light light) +{ + return light.radiance; +} +uint Light_getObjectID(in Light light) +{ + return light.objectID; +} + + +#define LIGHT_COUNT 1 +float scene_getLightChoicePdf(in Light light) +{ + return 1.0/float(LIGHT_COUNT); +} + + +#define LIGHT_COUNT 1 +Light lights[LIGHT_COUNT] = +{ + { + vec3(30.0,25.0,15.0), +#ifdef POLYGON_METHOD + 0u +#else + 8u +#endif + } +}; + + + +#define ANY_HIT_FLAG (-2147483648) +#define DEPTH_BITS_COUNT 8 +#define DEPTH_BITS_OFFSET (31-DEPTH_BITS_COUNT) +struct ImmutableRay_t +{ + vec3 origin; + vec3 direction; +#if POLYGON_METHOD==2 + vec3 normalAtOrigin; + bool wasBSDFAtOrigin; +#endif +}; +struct MutableRay_t +{ + float intersectionT; + uint objectID; + /* irrelevant here + uint triangleID; + vec2 barycentrics; + */ +}; +struct Payload_t +{ + vec3 accumulation; + float otherTechniqueHeuristic; + vec3 throughput; + #ifdef KILL_DIFFUSE_SPECULAR_PATHS + bool hasDiffuse; + #endif +}; + +struct Ray_t +{ + ImmutableRay_t _immutable; + MutableRay_t _mutable; + Payload_t _payload; +}; + + +#define INTERSECTION_ERROR_BOUND_LOG2 (-8.0) +float getTolerance_common(in uint depth) +{ + float depthRcp = 1.0/float(depth); + return INTERSECTION_ERROR_BOUND_LOG2;// *depthRcp*depthRcp; +} +float getStartTolerance(in uint depth) +{ + return exp2(getTolerance_common(depth)); +} +float getEndTolerance(in uint depth) +{ + return 1.0-exp2(getTolerance_common(depth)+1.0); +} + + +vec2 SampleSphericalMap(vec3 v) +{ + vec2 uv = vec2(atan(v.z, v.x), asin(v.y)); + uv *= nbl_glsl_RECIPROCAL_PI*0.5; + uv += 0.5; + return uv; +} + +void missProgram(in ImmutableRay_t _immutable, inout Payload_t _payload) +{ + vec3 finalContribution = _payload.throughput; + // #define USE_ENVMAP +#ifdef USE_ENVMAP + vec2 uv = SampleSphericalMap(_immutable.direction); + finalContribution *= textureLod(envMap, uv, 0.0).rgb; +#else + const vec3 kConstantEnvLightRadiance = vec3(0.15, 0.21, 0.3); + finalContribution *= kConstantEnvLightRadiance; + _payload.accumulation += finalContribution; +#endif +} + +#include +#include +#include +#include +#include +#include +#include +nbl_glsl_LightSample nbl_glsl_bsdf_cos_generate(in nbl_glsl_AnisotropicViewSurfaceInteraction interaction, in vec3 u, in BSDFNode bsdf, in float monochromeEta, out nbl_glsl_AnisotropicMicrofacetCache _cache) +{ + const float a = BSDFNode_getRoughness(bsdf); + const mat2x3 ior = BSDFNode_getEta(bsdf); + + // fresnel stuff for dielectrics + float orientedEta, rcpOrientedEta; + const bool viewerInsideMedium = nbl_glsl_getOrientedEtas(orientedEta,rcpOrientedEta,interaction.isotropic.NdotV,monochromeEta); + + nbl_glsl_LightSample smpl; + nbl_glsl_AnisotropicMicrofacetCache dummy; + switch (BSDFNode_getType(bsdf)) + { + case DIFFUSE_OP: + smpl = nbl_glsl_oren_nayar_cos_generate(interaction,u.xy,a*a); + break; + case CONDUCTOR_OP: + smpl = nbl_glsl_ggx_cos_generate(interaction,u.xy,a,a,_cache); + break; + default: + smpl = nbl_glsl_ggx_dielectric_cos_generate(interaction,u,a,a,monochromeEta,_cache); + break; + } + return smpl; +} + +vec3 nbl_glsl_bsdf_cos_remainder_and_pdf(out float pdf, in nbl_glsl_LightSample _sample, in nbl_glsl_AnisotropicViewSurfaceInteraction interaction, in BSDFNode bsdf, in float monochromeEta, in nbl_glsl_AnisotropicMicrofacetCache _cache) +{ + // are V and L on opposite sides of the surface? + const bool transmitted = nbl_glsl_isTransmissionPath(interaction.isotropic.NdotV,_sample.NdotL); + + // is the BSDF or BRDF, if it is then we make the dot products `abs` before `max(,0.0)` + const bool transmissive = BSDFNode_isBSDF(bsdf); + const float clampedNdotL = nbl_glsl_conditionalAbsOrMax(transmissive,_sample.NdotL,0.0); + const float clampedNdotV = nbl_glsl_conditionalAbsOrMax(transmissive,interaction.isotropic.NdotV,0.0); + + vec3 remainder; + + const float minimumProjVectorLen = 0.00000001; + if (clampedNdotV>minimumProjVectorLen && clampedNdotL>minimumProjVectorLen) + { + // fresnel stuff for conductors (but reflectance also doubles as albedo) + const mat2x3 ior = BSDFNode_getEta(bsdf); + const vec3 reflectance = BSDFNode_getReflectance(bsdf,_cache.isotropic.VdotH); + + // fresnel stuff for dielectrics + float orientedEta, rcpOrientedEta; + const bool viewerInsideMedium = nbl_glsl_getOrientedEtas(orientedEta,rcpOrientedEta,interaction.isotropic.NdotV,monochromeEta); + + // + const float VdotL = dot(interaction.isotropic.V.dir,_sample.L); + + // + const float a = max(BSDFNode_getRoughness(bsdf),0.0001); // TODO: @Crisspl 0-roughness still doesn't work! Also Beckmann has a weird dark rim instead as fresnel!? + const float a2 = a*a; + + // TODO: refactor into Material Compiler-esque thing + switch (BSDFNode_getType(bsdf)) + { + case DIFFUSE_OP: + remainder = reflectance*nbl_glsl_oren_nayar_cos_remainder_and_pdf_wo_clamps(pdf,a*a,VdotL,clampedNdotL,clampedNdotV); + break; + case CONDUCTOR_OP: + remainder = nbl_glsl_ggx_cos_remainder_and_pdf_wo_clamps(pdf,nbl_glsl_ggx_trowbridge_reitz(a2,_cache.isotropic.NdotH2),clampedNdotL,_sample.NdotL2,clampedNdotV,interaction.isotropic.NdotV_squared,reflectance,a2); + break; + default: + remainder = vec3(nbl_glsl_ggx_dielectric_cos_remainder_and_pdf(pdf, _sample, interaction.isotropic, _cache.isotropic, monochromeEta, a*a)); + break; + } + } + else + remainder = vec3(0.0); + return remainder; +} + +layout (constant_id = 0) const int MAX_DEPTH_LOG2 = 4; +layout (constant_id = 1) const int MAX_SAMPLES_LOG2 = 10; + + +#include + +mat2x3 rand3d(in uint protoDimension, in uint _sample, inout nbl_glsl_xoroshiro64star_state_t scramble_state) +{ + mat2x3 retval; + uint address = bitfieldInsert(protoDimension,_sample,MAX_DEPTH_LOG2,MAX_SAMPLES_LOG2); + for (int i=0; i<2u; i++) + { + uvec3 seqVal = texelFetch(sampleSequence,int(address)+i).xyz; + seqVal ^= uvec3(nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state)); + retval[i] = vec3(seqVal)*uintBitsToFloat(0x2f800004u); + } + return retval; +} + + +void traceRay_extraShape(inout int objectID, inout float intersectionT, in vec3 origin, in vec3 direction); +int traceRay(inout float intersectionT, in vec3 origin, in vec3 direction) +{ + const bool anyHit = intersectionT!=nbl_glsl_FLT_MAX; + + int objectID = -1; + for (int i=0; i0.0 && tnbl_glsl_FLT_MIN; + // but if we allowed non-watertight transmitters (single water surface), it would make sense just to apply this line by itself + nbl_glsl_AnisotropicMicrofacetCache _cache; + validPath = validPath && nbl_glsl_calcAnisotropicMicrofacetCache(_cache, interaction, nee_sample, monochromeEta); + if (lightPdflumaContributionThreshold && traceRay(t,intersection+nee_sample.L*t*getStartTolerance(depth),nee_sample.L)==-1) + ray._payload.accumulation += neeContrib; + }} + } +#if NEE_ONLY + return false; +#endif + // sample BSDF + float bsdfPdf; vec3 bsdfSampleL; + { + nbl_glsl_AnisotropicMicrofacetCache _cache; + nbl_glsl_LightSample bsdf_sample = nbl_glsl_bsdf_cos_generate(interaction,epsilon[1],bsdf,monochromeEta,_cache); + // the value of the bsdf divided by the probability of the sample being generated + throughput *= nbl_glsl_bsdf_cos_remainder_and_pdf(bsdfPdf,bsdf_sample,interaction,bsdf,monochromeEta,_cache); + // + bsdfSampleL = bsdf_sample.L; + } + + // additional threshold + const float lumaThroughputThreshold = lumaContributionThreshold; + if (bsdfPdf>bsdfPdfThreshold && getLuma(throughput)>lumaThroughputThreshold) + { + ray._payload.throughput = throughput; + ray._payload.otherTechniqueHeuristic = neeProbability/bsdfPdf; // numerically stable, don't touch + ray._payload.otherTechniqueHeuristic *= ray._payload.otherTechniqueHeuristic; + + // trace new ray + ray._immutable.origin = intersection+bsdfSampleL*(1.0/*kSceneSize*/)*getStartTolerance(depth); + ray._immutable.direction = bsdfSampleL; + #if POLYGON_METHOD==2 + ray._immutable.normalAtOrigin = interaction.isotropic.N; + ray._immutable.wasBSDFAtOrigin = isBSDF; + #endif + return true; + } + } + return false; +} + +void main() +{ + const ivec2 imageExtents = imageSize(outImage); + +#ifdef PERSISTENT_WORKGROUPS + uint virtualThreadIndex; + for (uint virtualThreadBase = gl_WorkGroupID.x * _NBL_GLSL_WORKGROUP_SIZE_; virtualThreadBase < 1920*1080; virtualThreadBase += gl_NumWorkGroups.x * _NBL_GLSL_WORKGROUP_SIZE_) // not sure why 1280*720 doesn't cover draw surface + { + virtualThreadIndex = virtualThreadBase + gl_LocalInvocationIndex.x; + const ivec2 coords = ivec2(nbl_glsl_morton_decode2d32b(virtualThreadIndex)); +#else + const ivec2 coords = getCoordinates(); +#endif + + vec2 texCoord = vec2(coords) / vec2(imageExtents); + texCoord.y = 1.0 - texCoord.y; + + if (false == (all(lessThanEqual(ivec2(0),coords)) && all(greaterThan(imageExtents,coords)))) { +#ifdef PERSISTENT_WORKGROUPS + continue; +#else + return; +#endif + } + + if (((PTPushConstant.depth-1)>>MAX_DEPTH_LOG2)>0 || ((PTPushConstant.sampleCount-1)>>MAX_SAMPLES_LOG2)>0) + { + vec4 pixelCol = vec4(1.0,0.0,0.0,1.0); + imageStore(outImage, coords, pixelCol); +#ifdef PERSISTENT_WORKGROUPS + continue; +#else + return; +#endif + } + + nbl_glsl_xoroshiro64star_state_t scramble_start_state = texelFetch(scramblebuf,coords,0).rg; + const vec2 pixOffsetParam = vec2(1.0)/vec2(textureSize(scramblebuf,0)); + + + const mat4 invMVP = PTPushConstant.invMVP; + + vec4 NDC = vec4(texCoord*vec2(2.0,-2.0)+vec2(-1.0,1.0),0.0,1.0); + vec3 camPos; + { + vec4 tmp = invMVP*NDC; + camPos = tmp.xyz/tmp.w; + NDC.z = 1.0; + } + + vec3 color = vec3(0.0); + float meanLumaSquared = 0.0; + // TODO: if we collapse the nested for loop, then all GPUs will get `PTPushConstant.depth` factor speedup, not just NV with separate PC + for (int i=0; i5.0) + color = vec3(1.0,0.0,0.0); + #endif + + vec4 pixelCol = vec4(color, 1.0); + imageStore(outImage, coords, pixelCol); + +#ifdef PERSISTENT_WORKGROUPS + } +#endif +} +/** TODO: Improving Rendering + +Now: +- Always MIS (path correlated reuse) +- Test MIS alpha (roughness) scheme + +Many Lights: +- Path Guiding +- Light Importance Lists/Classification +- Spatio-Temporal Reservoir Sampling + +Indirect Light: +- Bidirectional Path Tracing +- Uniform Path Sampling / Vertex Connection and Merging / Path Space Regularization + +Animations: +- A-SVGF / BMFR +**/ \ No newline at end of file diff --git a/31_HLSLPathTracer/app_resources/glsl/litByRectangle.comp b/31_HLSLPathTracer/app_resources/glsl/litByRectangle.comp new file mode 100644 index 000000000..d898655c4 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/glsl/litByRectangle.comp @@ -0,0 +1,182 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#version 430 core +#extension GL_GOOGLE_include_directive : require + +#define SPHERE_COUNT 8 +#define POLYGON_METHOD 1 // 0 area sampling, 1 solid angle sampling, 2 approximate projected solid angle sampling +#include "app_resources/glsl/common.glsl" + +#define RECTANGLE_COUNT 1 +const vec3 edge0 = normalize(vec3(2,0,-1)); +const vec3 edge1 = normalize(vec3(2,-5,4)); +Rectangle rectangles[RECTANGLE_COUNT] = { + Rectangle_Rectangle(vec3(-3.8,0.35,1.3),edge0*7.0,edge1*0.1,INVALID_ID_16BIT,0u) +}; + + +void traceRay_extraShape(inout int objectID, inout float intersectionT, in vec3 origin, in vec3 direction) +{ + for (int i=0; i0.0 && t +#include +#include + +float nbl_glsl_light_deferred_pdf(in Light light, in Ray_t ray) +{ + const Rectangle rect = rectangles[Light_getObjectID(light)]; + + const ImmutableRay_t _immutable = ray._immutable; + const vec3 L = _immutable.direction; +#if POLYGON_METHOD==0 + const float dist = ray._mutable.intersectionT; + return dist*dist/abs(dot(Rectangle_getNormalTimesArea(rect),L)); +#else + #ifdef TRIANGLE_REFERENCE + const mat3 sphericalVertices[2] = + { + nbl_glsl_shapes_getSphericalTriangle(mat3(rect.offset,rect.offset+rect.edge0,rect.offset+rect.edge1),_immutable.origin), + nbl_glsl_shapes_getSphericalTriangle(mat3(rect.offset+rect.edge1,rect.offset+rect.edge0,rect.offset+rect.edge0+rect.edge1),_immutable.origin) + }; + float solidAngle[2]; + vec3 cos_vertices[2],sin_vertices[2]; + float cos_a[2],cos_c[2],csc_b[2],csc_c[2]; + for (uint i=0u; i<2u; i++) + solidAngle[i] = nbl_glsl_shapes_SolidAngleOfTriangle(sphericalVertices[i],cos_vertices[i],sin_vertices[i],cos_a[i],cos_c[i],csc_b[i],csc_c[i]); + const float rectSolidAngle = solidAngle[0]+solidAngle[1]; + #if POLYGON_METHOD==1 + return 1.f/rectSolidAngle; + #elif POLYGON_METHOD==2 + // TODO: figure out what breaks for a directly visible light under MIS + if (rectSolidAngle > nbl_glsl_FLT_MIN) + { + const vec2 bary = nbl_glsl_barycentric_reconstructBarycentrics(L*ray._mutable.intersectionT+_immutable.origin-rect.offset,mat2x3(rect.edge0,rect.edge1)); + const uint i = bary.x>=0.f&&bary.y>=0.f&&(bary.x+bary.y)<=1.f ? 0u:1u; + + float pdf = nbl_glsl_sampling_probProjectedSphericalTriangleSample(solidAngle[i],cos_vertices[i],sin_vertices[i],cos_a[i],cos_c[i],csc_b[i],csc_c[i],sphericalVertices[i],_immutable.normalAtOrigin,_immutable.wasBSDFAtOrigin,L); + pdf *= solidAngle[i]/rectSolidAngle; + return pdf; + } + else + return nbl_glsl_FLT_INF; + #endif + #else + float pdf; + mat3 rectNormalBasis; + vec2 rectExtents; + Rectangle_getNormalBasis(rect, rectNormalBasis, rectExtents); + vec3 sphR0 = nbl_glsl_shapes_getSphericalRectangle(_immutable.origin, rect.offset, rectNormalBasis); + float solidAngle = nbl_glsl_shapes_SolidAngleOfRectangle(sphR0, rectExtents); + if (solidAngle > nbl_glsl_FLT_MIN) + { + #if POLYGON_METHOD==1 + pdf = 1.f/solidAngle; + #else + #error + #endif + } + else + pdf = nbl_glsl_FLT_INF; + return pdf; + #endif +#endif +} + +vec3 nbl_glsl_light_generate_and_pdf(out float pdf, out float newRayMaxT, in vec3 origin, in nbl_glsl_AnisotropicViewSurfaceInteraction interaction, in bool isBSDF, in vec3 xi, in uint objectID) +{ + const Rectangle rect = rectangles[objectID]; + const vec3 N = Rectangle_getNormalTimesArea(rect); + + const vec3 origin2origin = rect.offset-origin; +#if POLYGON_METHOD==0 + vec3 L = origin2origin+rect.edge0*xi.x+rect.edge1*xi.y; // TODO: refactor + + const float distanceSq = dot(L,L); + const float rcpDistance = inversesqrt(distanceSq); + L *= rcpDistance; + + pdf = distanceSq/abs(dot(N,L)); + newRayMaxT = 1.0/rcpDistance; + return L; +#else + #ifdef TRIANGLE_REFERENCE + const mat3 sphericalVertices[2] = + { + nbl_glsl_shapes_getSphericalTriangle(mat3(rect.offset,rect.offset+rect.edge0,rect.offset+rect.edge1),origin), + nbl_glsl_shapes_getSphericalTriangle(mat3(rect.offset+rect.edge1,rect.offset+rect.edge0,rect.offset+rect.edge0+rect.edge1),origin) + }; + float solidAngle[2]; + vec3 cos_vertices[2],sin_vertices[2]; + float cos_a[2],cos_c[2],csc_b[2],csc_c[2]; + for (uint i=0u; i<2u; i++) + solidAngle[i] = nbl_glsl_shapes_SolidAngleOfTriangle(sphericalVertices[i],cos_vertices[i],sin_vertices[i],cos_a[i],cos_c[i],csc_b[i],csc_c[i]); + vec3 L = vec3(0.f,0.f,0.f); + const float rectangleSolidAngle = solidAngle[0]+solidAngle[1]; + if (rectangleSolidAngle > nbl_glsl_FLT_MIN) + { + float rcpTriangleChoiceProb; + const uint i = nbl_glsl_partitionRandVariable(solidAngle[0]/rectangleSolidAngle,xi.z,rcpTriangleChoiceProb) ? 1u:0u; + #if POLYGON_METHOD==1 + L = nbl_glsl_sampling_generateSphericalTriangleSample(solidAngle[i],cos_vertices[i],sin_vertices[i],cos_a[i],cos_c[i],csc_b[i],csc_c[i],sphericalVertices[i],xi.xy); + pdf = 1.f/rectangleSolidAngle; + #elif POLYGON_METHOD==2 + float rcpPdf; + L = nbl_glsl_sampling_generateProjectedSphericalTriangleSample(rcpPdf,solidAngle[i],cos_vertices[i],sin_vertices[i],cos_a[i],cos_c[i],csc_b[i],csc_c[i],sphericalVertices[i],interaction.isotropic.N,isBSDF,xi.xy); + pdf = 1.f/(rcpPdf*rcpTriangleChoiceProb); + #endif + } + else + pdf = nbl_glsl_FLT_INF; + #else + mat3 rectNormalBasis; + vec2 rectExtents; + Rectangle_getNormalBasis(rect, rectNormalBasis, rectExtents); + vec3 sphR0 = nbl_glsl_shapes_getSphericalRectangle(origin, rect.offset, rectNormalBasis); + vec3 L = vec3(0.f,0.f,0.f); + float solidAngle; + vec2 sphUv = nbl_glsl_sampling_generateSphericalRectangleSample(sphR0, rectExtents, xi.xy, solidAngle); + if (solidAngle > nbl_glsl_FLT_MIN) + { + #if POLYGON_METHOD==1 + vec3 sph_sample = sphUv[0] * rect.edge0 + sphUv[1] * rect.edge1 + rect.offset; + L = normalize(sph_sample - origin); + pdf = 1.f/solidAngle; + #else + #error + #endif + } + else + pdf = nbl_glsl_FLT_INF; + #endif + newRayMaxT = dot(N,origin2origin)/dot(N,L); + return L; +#endif +} + + +uint getBSDFLightIDAndDetermineNormal(out vec3 normal, in uint objectID, in vec3 intersection) +{ + if (objectID0.0) + { + const float rcpDistance = inversesqrt(distanceSQ); + Z *= rcpDistance; + + const float cosThetaMax = sqrt(cosThetaMax2); + const float cosTheta = mix(1.0,cosThetaMax,xi.x); + + vec3 L = Z*cosTheta; + + const float cosTheta2 = cosTheta*cosTheta; + const float sinTheta = sqrt(1.0-cosTheta2); + float sinPhi,cosPhi; + nbl_glsl_sincos(2.0*nbl_glsl_PI*xi.y-nbl_glsl_PI,sinPhi,cosPhi); + mat2x3 XY = nbl_glsl_frisvad(Z); + + L += (XY[0]*cosPhi+XY[1]*sinPhi)*sinTheta; + + newRayMaxT = (cosTheta-sqrt(cosTheta2-cosThetaMax2))/rcpDistance; + pdf = 1.0/Sphere_getSolidAngle_impl(cosThetaMax); + return L; + } + pdf = 0.0; + return vec3(0.0,0.0,0.0); +} + +uint getBSDFLightIDAndDetermineNormal(out vec3 normal, in uint objectID, in vec3 intersection) +{ + Sphere sphere = spheres[objectID]; + normal = Sphere_getNormal(sphere,intersection); + return sphere.bsdfLightIDs; +} \ No newline at end of file diff --git a/31_HLSLPathTracer/app_resources/glsl/litByTriangle.comp b/31_HLSLPathTracer/app_resources/glsl/litByTriangle.comp new file mode 100644 index 000000000..36fe522f2 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/glsl/litByTriangle.comp @@ -0,0 +1,105 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#version 430 core +#extension GL_GOOGLE_include_directive : require + +#define SPHERE_COUNT 8 +#define POLYGON_METHOD 1 // 0 area sampling, 1 solid angle sampling, 2 approximate projected solid angle sampling +#include "app_resources/glsl/common.glsl" + +#define TRIANGLE_COUNT 1 +Triangle triangles[TRIANGLE_COUNT] = { + Triangle_Triangle(mat3(vec3(-1.8,0.35,0.3),vec3(-1.2,0.35,0.0),vec3(-1.5,0.8,-0.3))*10.0,INVALID_ID_16BIT,0u) +}; + +void traceRay_extraShape(inout int objectID, inout float intersectionT, in vec3 origin, in vec3 direction) +{ + for (int i=0; i0.0 && t +float nbl_glsl_light_deferred_pdf(in Light light, in Ray_t ray) +{ + const Triangle tri = triangles[Light_getObjectID(light)]; + + const vec3 L = ray._immutable.direction; +#if POLYGON_METHOD==0 + const float dist = ray._mutable.intersectionT; + return dist*dist/abs(dot(Triangle_getNormalTimesArea(tri),L)); +#else + const ImmutableRay_t _immutable = ray._immutable; + const mat3 sphericalVertices = nbl_glsl_shapes_getSphericalTriangle(mat3(tri.vertex0,tri.vertex1,tri.vertex2),_immutable.origin); + #if POLYGON_METHOD==1 + const float rcpProb = nbl_glsl_shapes_SolidAngleOfTriangle(sphericalVertices); + // if `rcpProb` is NAN then the triangle's solid angle was close to 0.0 + return rcpProb>nbl_glsl_FLT_MIN ? (1.0/rcpProb):nbl_glsl_FLT_MAX; + #elif POLYGON_METHOD==2 + const float pdf = nbl_glsl_sampling_probProjectedSphericalTriangleSample(sphericalVertices,_immutable.normalAtOrigin,_immutable.wasBSDFAtOrigin,L); + // if `pdf` is NAN then the triangle's projected solid angle was close to 0.0, if its close to INF then the triangle was very small + return pdfnbl_glsl_FLT_MIN ? (1.0/rcpPdf):0.0; + + const vec3 N = Triangle_getNormalTimesArea(tri); + newRayMaxT = dot(N,tri.vertex0-origin)/dot(N,L); + return L; +#endif +} + + +uint getBSDFLightIDAndDetermineNormal(out vec3 normal, in uint objectID, in vec3 intersection) +{ + if (objectID +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace ext +{ + +template // TODO make type T Spectrum +struct Payload +{ + using this_t = Payload; + using scalar_type = T; + using vector3_type = vector; + + vector3_type accumulation; + scalar_type otherTechniqueHeuristic; + vector3_type throughput; + // #ifdef KILL_DIFFUSE_SPECULAR_PATHS + // bool hasDiffuse; + // #endif +}; + +enum ProceduralShapeType : uint16_t +{ + PST_NONE = 0, + PST_SPHERE, + PST_TRIANGLE, + PST_RECTANGLE +}; + +struct ObjectID +{ + static ObjectID create(uint32_t id, uint32_t mode, ProceduralShapeType shapeType) + { + ObjectID retval; + retval.id = id; + retval.mode = mode; + retval.shapeType = shapeType; + return retval; + } + + uint32_t id; + uint32_t mode; + ProceduralShapeType shapeType; +}; + +template +struct Ray +{ + using this_t = Ray; + using scalar_type = T; + using vector3_type = vector; + + // immutable + vector3_type origin; + vector3_type direction; + + // polygon method == PPM_APPROX_PROJECTED_SOLID_ANGLE + vector3_type normalAtOrigin; + bool wasBSDFAtOrigin; + + // mutable + scalar_type intersectionT; + ObjectID objectID; + + Payload payload; +}; + +template +struct Light +{ + using spectral_type = Spectrum; + + NBL_CONSTEXPR_STATIC_INLINE uint32_t INVALID_ID = 0xffffu; + + static Light create(NBL_CONST_REF_ARG(spectral_type) radiance, uint32_t objId, uint32_t mode, ProceduralShapeType shapeType) + { + Light retval; + retval.radiance = radiance; + retval.objectID = ObjectID::create(objId, mode, shapeType); + return retval; + } + + static Light create(NBL_CONST_REF_ARG(spectral_type) radiance, NBL_CONST_REF_ARG(ObjectID) objectID) + { + Light retval; + retval.radiance = radiance; + retval.objectID = objectID; + return retval; + } + + spectral_type radiance; + ObjectID objectID; +}; + +template +struct BxDFNode +{ + using spectral_type = Spectrum; + using params_type = bxdf::SBxDFCreationParams; + + NBL_CONSTEXPR_STATIC_INLINE uint32_t INVALID_ID = 0xffffu; + + // for diffuse bxdfs + static BxDFNode create(uint32_t materialType, bool isAniso, NBL_CONST_REF_ARG(float32_t2) A, NBL_CONST_REF_ARG(spectral_type) albedo) + { + BxDFNode retval; + retval.albedo = albedo; + retval.materialType = materialType; + retval.params.is_aniso = isAniso; + retval.params.A = hlsl::max(A, (float32_t2)1e-4); + retval.params.ior0 = (spectral_type)1.0; + retval.params.ior1 = (spectral_type)1.0; + return retval; + } + + // for conductor + dielectric + static BxDFNode create(uint32_t materialType, bool isAniso, NBL_CONST_REF_ARG(float32_t2) A, NBL_CONST_REF_ARG(spectral_type) ior0, NBL_CONST_REF_ARG(spectral_type) ior1) + { + BxDFNode retval; + retval.albedo = (spectral_type)1.0; + retval.materialType = materialType; + retval.params.is_aniso = isAniso; + retval.params.A = hlsl::max(A, (float32_t2)1e-4); + retval.params.ior0 = ior0; + retval.params.ior1 = ior1; + return retval; + } + + spectral_type albedo; + uint32_t materialType; + params_type params; +}; + +template +struct Tolerance +{ + NBL_CONSTEXPR_STATIC_INLINE float INTERSECTION_ERROR_BOUND_LOG2 = -8.0; + + static T __common(uint32_t depth) + { + float depthRcp = 1.0 / float(depth); + return INTERSECTION_ERROR_BOUND_LOG2; + } + + static T getStart(uint32_t depth) + { + return nbl::hlsl::exp2(__common(depth)); + } + + static T getEnd(uint32_t depth) + { + return 1.0 - nbl::hlsl::exp2(__common(depth) + 1.0); + } +}; + +enum PTPolygonMethod : uint16_t +{ + PPM_AREA, + PPM_SOLID_ANGLE, + PPM_APPROX_PROJECTED_SOLID_ANGLE +}; + +enum IntersectMode : uint32_t +{ + IM_RAY_QUERY, + IM_RAY_TRACING, + IM_PROCEDURAL +}; + +template +struct Shape; + +template<> +struct Shape +{ + static Shape create(NBL_CONST_REF_ARG(float32_t3) position, float32_t radius2, uint32_t bsdfLightIDs) + { + Shape retval; + retval.position = position; + retval.radius2 = radius2; + retval.bsdfLightIDs = bsdfLightIDs; + return retval; + } + + static Shape create(NBL_CONST_REF_ARG(float32_t3) position, float32_t radius, uint32_t bsdfID, uint32_t lightID) + { + uint32_t bsdfLightIDs = glsl::bitfieldInsert(bsdfID, lightID, 16, 16); + return create(position, radius * radius, bsdfLightIDs); + } + + // return intersection distance if found, nan otherwise + float intersect(NBL_CONST_REF_ARG(float32_t3) origin, NBL_CONST_REF_ARG(float32_t3) direction) + { + float32_t3 relOrigin = origin - position; + float relOriginLen2 = hlsl::dot(relOrigin, relOrigin); + + float dirDotRelOrigin = hlsl::dot(direction, relOrigin); + float det = radius2 - relOriginLen2 + dirDotRelOrigin * dirDotRelOrigin; + + // do some speculative math here + float detsqrt = hlsl::sqrt(det); + return -dirDotRelOrigin + (relOriginLen2 > radius2 ? (-detsqrt) : detsqrt); + } + + float32_t3 getNormal(NBL_CONST_REF_ARG(float32_t3) hitPosition) + { + const float radiusRcp = hlsl::rsqrt(radius2); + return (hitPosition - position) * radiusRcp; + } + + float getSolidAngle(NBL_CONST_REF_ARG(float32_t3) origin) + { + float32_t3 dist = position - origin; + float cosThetaMax = hlsl::sqrt(1.0 - radius2 / hlsl::dot(dist, dist)); + return 2.0 * numbers::pi * (1.0 - cosThetaMax); + } + + NBL_CONSTEXPR_STATIC_INLINE uint32_t ObjSize = 5; + + float32_t3 position; + float32_t radius2; + uint32_t bsdfLightIDs; +}; + +template<> +struct Shape +{ + static Shape create(NBL_CONST_REF_ARG(float32_t3) vertex0, NBL_CONST_REF_ARG(float32_t3) vertex1, NBL_CONST_REF_ARG(float32_t3) vertex2, uint32_t bsdfLightIDs) + { + Shape retval; + retval.vertex0 = vertex0; + retval.vertex1 = vertex1; + retval.vertex2 = vertex2; + retval.bsdfLightIDs = bsdfLightIDs; + return retval; + } + + static Shape create(NBL_CONST_REF_ARG(float32_t3) vertex0, NBL_CONST_REF_ARG(float32_t3) vertex1, NBL_CONST_REF_ARG(float32_t3) vertex2, uint32_t bsdfID, uint32_t lightID) + { + uint32_t bsdfLightIDs = glsl::bitfieldInsert(bsdfID, lightID, 16, 16); + return create(vertex0, vertex1, vertex2, bsdfLightIDs); + } + + float intersect(NBL_CONST_REF_ARG(float32_t3) origin, NBL_CONST_REF_ARG(float32_t3) direction) + { + const float32_t3 edges[2] = { vertex1 - vertex0, vertex2 - vertex0 }; + + const float32_t3 h = hlsl::cross(direction, edges[1]); + const float a = hlsl::dot(edges[0], h); + + const float32_t3 relOrigin = origin - vertex0; + + const float u = hlsl::dot(relOrigin, h) / a; + + const float32_t3 q = hlsl::cross(relOrigin, edges[0]); + const float v = hlsl::dot(direction, q) / a; + + const float t = hlsl::dot(edges[1], q) / a; + + const bool intersection = t > 0.f && u >= 0.f && v >= 0.f && (u + v) <= 1.f; + return intersection ? t : bit_cast(numeric_limits::infinity); + } + + float32_t3 getNormalTimesArea() + { + const float32_t3 edges[2] = { vertex1 - vertex0, vertex2 - vertex0 }; + return hlsl::cross(edges[0], edges[1]) * 0.5f; + } + + NBL_CONSTEXPR_STATIC_INLINE uint32_t ObjSize = 10; + + float32_t3 vertex0; + float32_t3 vertex1; + float32_t3 vertex2; + uint32_t bsdfLightIDs; +}; + +template<> +struct Shape +{ + static Shape create(NBL_CONST_REF_ARG(float32_t3) offset, NBL_CONST_REF_ARG(float32_t3) edge0, NBL_CONST_REF_ARG(float32_t3) edge1, uint32_t bsdfLightIDs) + { + Shape retval; + retval.offset = offset; + retval.edge0 = edge0; + retval.edge1 = edge1; + retval.bsdfLightIDs = bsdfLightIDs; + return retval; + } + + static Shape create(NBL_CONST_REF_ARG(float32_t3) offset, NBL_CONST_REF_ARG(float32_t3) edge0, NBL_CONST_REF_ARG(float32_t3) edge1, uint32_t bsdfID, uint32_t lightID) + { + uint32_t bsdfLightIDs = glsl::bitfieldInsert(bsdfID, lightID, 16, 16); + return create(offset, edge0, edge1, bsdfLightIDs); + } + + float intersect(NBL_CONST_REF_ARG(float32_t3) origin, NBL_CONST_REF_ARG(float32_t3) direction) + { + const float32_t3 h = hlsl::cross(direction, edge1); + const float a = hlsl::dot(edge0, h); + + const float32_t3 relOrigin = origin - offset; + + const float u = hlsl::dot(relOrigin,h)/a; + + const float32_t3 q = hlsl::cross(relOrigin, edge0); + const float v = hlsl::dot(direction, q) / a; + + const float t = hlsl::dot(edge1, q) / a; + + const bool intersection = t > 0.f && u >= 0.f && v >= 0.f && u <= 1.f && v <= 1.f; + return intersection ? t : bit_cast(numeric_limits::infinity); + } + + float32_t3 getNormalTimesArea() + { + return hlsl::cross(edge0, edge1); + } + + void getNormalBasis(NBL_REF_ARG(float32_t3x3) basis, NBL_REF_ARG(float32_t2) extents) + { + extents = float32_t2(nbl::hlsl::length(edge0), nbl::hlsl::length(edge1)); + basis[0] = edge0 / extents[0]; + basis[1] = edge1 / extents[1]; + basis[2] = normalize(cross(basis[0],basis[1])); + } + + NBL_CONSTEXPR_STATIC_INLINE uint32_t ObjSize = 10; + + float32_t3 offset; + float32_t3 edge0; + float32_t3 edge1; + uint32_t bsdfLightIDs; +}; + +} +} +} + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/intersector.hlsl b/31_HLSLPathTracer/app_resources/hlsl/intersector.hlsl new file mode 100644 index 000000000..e59fdc2c3 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/intersector.hlsl @@ -0,0 +1,88 @@ +#ifndef _NBL_HLSL_EXT_INTERSECTOR_INCLUDED_ +#define _NBL_HLSL_EXT_INTERSECTOR_INCLUDED_ + +#include "common.hlsl" +#include "scene.hlsl" +#include + +namespace nbl +{ +namespace hlsl +{ +namespace ext +{ +namespace Intersector +{ + +template +struct Comprehensive +{ + using scalar_type = typename Ray::scalar_type; + using vector3_type = vector; + using ray_type = Ray; + + using light_type = Light; + using bxdfnode_type = BxdfNode; + using scene_type = Scene; + + static ObjectID traceRay(NBL_REF_ARG(ray_type) ray, NBL_CONST_REF_ARG(scene_type) scene) + { + ObjectID objectID; + objectID.id = -1; + + // prodedural shapes + for (int i = 0; i < scene.sphereCount; i++) + { + float t = scene.spheres[i].intersect(ray.origin, ray.direction); + + bool closerIntersection = t > 0.0 && t < ray.intersectionT; + + if (closerIntersection) + { + ray.intersectionT = t; + objectID.id = i; + objectID.mode = IM_PROCEDURAL; + objectID.shapeType = PST_SPHERE; + } + } + for (int i = 0; i < scene.triangleCount; i++) + { + float t = scene.triangles[i].intersect(ray.origin, ray.direction); + + bool closerIntersection = t > 0.0 && t < ray.intersectionT; + + if (closerIntersection) + { + ray.intersectionT = t; + objectID.id = i; + objectID.mode = IM_PROCEDURAL; + objectID.shapeType = PST_TRIANGLE; + } + } + for (int i = 0; i < scene.rectangleCount; i++) + { + float t = scene.rectangles[i].intersect(ray.origin, ray.direction); + + bool closerIntersection = t > 0.0 && t < ray.intersectionT; + + if (closerIntersection) + { + ray.intersectionT = t; + objectID.id = i; + objectID.mode = IM_PROCEDURAL; + objectID.shapeType = PST_TRIANGLE; + } + } + + // TODO: trace AS + + return objectID; + } +}; + +} +} +} +} + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/material_system.hlsl b/31_HLSLPathTracer/app_resources/hlsl/material_system.hlsl new file mode 100644 index 000000000..4e2fdc5a0 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/material_system.hlsl @@ -0,0 +1,205 @@ +#ifndef _NBL_HLSL_EXT_MATERIAL_SYSTEM_INCLUDED_ +#define _NBL_HLSL_EXT_MATERIAL_SYSTEM_INCLUDED_ + +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace ext +{ +namespace MaterialSystem +{ + +enum MaterialType : uint32_t // enum class? +{ + DIFFUSE, + CONDUCTOR, + DIELECTRIC +}; + +template +struct MaterialParams +{ + using this_t = MaterialParams; + using sample_type = typename DiffuseBxDF::sample_type; + using anisotropic_interaction_type = typename DiffuseBxDF::anisotropic_interaction_type; + using isotropic_interaction_type = typename anisotropic_interaction_type::isotropic_interaction_type; + using anisocache_type = typename ConductorBxDF::anisocache_type; + using isocache_type = typename anisocache_type::isocache_type; + + using diffuse_params_type = typename DiffuseBxDF::params_isotropic_t; + using conductor_params_type = typename ConductorBxDF::params_isotropic_t; + using dielectric_params_type = typename DielectricBxDF::params_isotropic_t; + + // we're only doing isotropic for this example + static this_t create(sample_type _sample, isotropic_interaction_type _interaction, isocache_type _cache, bxdf::BxDFClampMode _clamp) + { + this_t retval; + retval._Sample = _sample; + retval.interaction = _interaction; + retval.cache = _cache; + retval.clampMode = _clamp; + return retval; + } + + diffuse_params_type getDiffuseParams() + { + return diffuse_params_type::create(_Sample, interaction, clampMode); + } + + conductor_params_type getConductorParams() + { + return conductor_params_type::create(_Sample, interaction, cache, clampMode); + } + + dielectric_params_type getDielectricParams() + { + return dielectric_params_type::create(_Sample, interaction, cache, clampMode); + } + + sample_type _Sample; + isotropic_interaction_type interaction; + isocache_type cache; + bxdf::BxDFClampMode clampMode; +}; + +template // NOTE: these bxdfs should match the ones in Scene BxDFNode +struct System +{ + using this_t = System; + using scalar_type = typename DiffuseBxDF::scalar_type; // types should be same across all 3 bxdfs + using vector2_type = vector; + using vector3_type = vector; + using measure_type = typename DiffuseBxDF::spectral_type; + using sample_type = typename DiffuseBxDF::sample_type; + using ray_dir_info_type = typename sample_type::ray_dir_info_type; + using quotient_pdf_type = typename DiffuseBxDF::quotient_pdf_type; + using anisotropic_interaction_type = typename DiffuseBxDF::anisotropic_interaction_type; + using isotropic_interaction_type = typename anisotropic_interaction_type::isotropic_interaction_type; + using anisocache_type = typename ConductorBxDF::anisocache_type; + using isocache_type = typename anisocache_type::isocache_type; + using params_t = MaterialParams; + using create_params_t = bxdf::SBxDFCreationParams; + + using diffuse_op_type = DiffuseBxDF; + using conductor_op_type = ConductorBxDF; + using dielectric_op_type = DielectricBxDF; + + static this_t create(NBL_CONST_REF_ARG(create_params_t) diffuseParams, NBL_CONST_REF_ARG(create_params_t) conductorParams, NBL_CONST_REF_ARG(create_params_t) dielectricParams) + { + this_t retval; + retval.diffuseBxDF = diffuse_op_type::create(diffuseParams); + retval.conductorBxDF = conductor_op_type::create(conductorParams); + retval.dielectricBxDF = dielectric_op_type::create(dielectricParams); + return retval; + } + + measure_type eval(uint32_t material, NBL_CONST_REF_ARG(create_params_t) cparams, NBL_CONST_REF_ARG(params_t) params) + { + switch(material) + { + case MaterialType::DIFFUSE: + { + diffuseBxDF.init(cparams); + return (measure_type)diffuseBxDF.eval(params.getDiffuseParams()); + } + break; + case MaterialType::CONDUCTOR: + { + conductorBxDF.init(cparams); + return conductorBxDF.eval(params.getConductorParams()); + } + break; + case MaterialType::DIELECTRIC: + { + dielectricBxDF.init(cparams); + return dielectricBxDF.eval(params.getDielectricParams()); + } + break; + default: + return (measure_type)0.0; + } + } + + sample_type generate(uint32_t material, NBL_CONST_REF_ARG(create_params_t) cparams, NBL_CONST_REF_ARG(anisotropic_interaction_type) interaction, NBL_CONST_REF_ARG(vector3_type) u, NBL_REF_ARG(anisocache_type) _cache) + { + switch(material) + { + case MaterialType::DIFFUSE: + { + diffuseBxDF.init(cparams); + return diffuseBxDF.generate(interaction, u.xy); + } + break; + case MaterialType::CONDUCTOR: + { + conductorBxDF.init(cparams); + return conductorBxDF.generate(interaction, u.xy, _cache); + } + break; + case MaterialType::DIELECTRIC: + { + dielectricBxDF.init(cparams); + return dielectricBxDF.generate(interaction, u, _cache); + } + break; + default: + { + ray_dir_info_type L; + L.direction = (vector3_type)0; + return sample_type::create(L, 0, (vector3_type)0); + } + } + + ray_dir_info_type L; + L.direction = (vector3_type)0; + return sample_type::create(L, 0, (vector3_type)0); + } + + quotient_pdf_type quotient_and_pdf(uint32_t material, NBL_CONST_REF_ARG(create_params_t) cparams, NBL_CONST_REF_ARG(params_t) params) + { + const float minimumProjVectorLen = 0.00000001; + if (params.interaction.getNdotV() > minimumProjVectorLen && params._Sample.getNdotL() > minimumProjVectorLen) + { + switch(material) + { + case MaterialType::DIFFUSE: + { + diffuseBxDF.init(cparams); + return diffuseBxDF.quotient_and_pdf(params.getDiffuseParams()); + } + break; + case MaterialType::CONDUCTOR: + { + conductorBxDF.init(cparams); + return conductorBxDF.quotient_and_pdf(params.getConductorParams()); + } + break; + case MaterialType::DIELECTRIC: + { + dielectricBxDF.init(cparams); + return dielectricBxDF.quotient_and_pdf(params.getDielectricParams()); + } + break; + default: + return quotient_pdf_type::create((measure_type)0.0, 0.0); + } + } + return quotient_pdf_type::create((measure_type)0.0, 0.0); + } + + DiffuseBxDF diffuseBxDF; + ConductorBxDF conductorBxDF; + DielectricBxDF dielectricBxDF; +}; + +} +} +} +} + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl b/31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl new file mode 100644 index 000000000..ac74b1abf --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl @@ -0,0 +1,446 @@ +#ifndef _NBL_HLSL_EXT_NEXT_EVENT_ESTIMATOR_INCLUDED_ +#define _NBL_HLSL_EXT_NEXT_EVENT_ESTIMATOR_INCLUDED_ + +#include "common.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace ext +{ +namespace NextEventEstimator +{ + +template +struct ShapeSampling; + +template +struct ShapeSampling +{ + static ShapeSampling create(NBL_CONST_REF_ARG(Shape) sphere) + { + ShapeSampling retval; + retval.sphere = sphere; + return retval; + } + + template + float deferredPdf(NBL_CONST_REF_ARG(Ray) ray) + { + return 1.0 / sphere.getSolidAngle(ray.origin); + } + + template + float32_t3 generate_and_pdf(NBL_REF_ARG(float32_t) pdf, NBL_REF_ARG(float32_t) newRayMaxT, NBL_CONST_REF_ARG(float32_t3) origin, NBL_CONST_REF_ARG(Aniso) interaction, bool isBSDF, NBL_CONST_REF_ARG(float32_t3) xi) + { + float32_t3 Z = sphere.position - origin; + const float distanceSQ = hlsl::dot(Z,Z); + const float cosThetaMax2 = 1.0 - sphere.radius2 / distanceSQ; + if (cosThetaMax2 > 0.0) + { + const float rcpDistance = 1.0 / hlsl::sqrt(distanceSQ); + Z *= rcpDistance; + + const float cosThetaMax = hlsl::sqrt(cosThetaMax2); + const float cosTheta = hlsl::mix(1.0, cosThetaMax, xi.x); + + float32_t3 L = Z * cosTheta; + + const float cosTheta2 = cosTheta * cosTheta; + const float sinTheta = hlsl::sqrt(1.0 - cosTheta2); + float sinPhi, cosPhi; + math::sincos(2.0 * numbers::pi * xi.y - numbers::pi, sinPhi, cosPhi); + float32_t3 X, Y; + math::frisvad(Z, X, Y); + + L += (X * cosPhi + Y * sinPhi) * sinTheta; + + newRayMaxT = (cosTheta - hlsl::sqrt(cosTheta2 - cosThetaMax2)) / rcpDistance; + pdf = 1.0 / (2.0 * numbers::pi * (1.0 - cosThetaMax)); + return L; + } + pdf = 0.0; + return float32_t3(0.0,0.0,0.0); + } + + Shape sphere; +}; + +template<> +struct ShapeSampling +{ + static ShapeSampling create(NBL_CONST_REF_ARG(Shape) tri) + { + ShapeSampling retval; + retval.tri = tri; + return retval; + } + + template + float deferredPdf(NBL_CONST_REF_ARG(Ray) ray) + { + const float dist = ray.intersectionT; + const float32_t3 L = ray.direction; + return dist * dist / hlsl::abs(hlsl::dot(tri.getNormalTimesArea(), L)); + } + + template + float32_t3 generate_and_pdf(NBL_REF_ARG(float32_t) pdf, NBL_REF_ARG(float32_t) newRayMaxT, NBL_CONST_REF_ARG(float32_t3) origin, NBL_CONST_REF_ARG(Aniso) interaction, bool isBSDF, NBL_CONST_REF_ARG(float32_t3) xi) + { + const float32_t3 edge0 = tri.vertex1 - tri.vertex0; + const float32_t3 edge1 = tri.vertex2 - tri.vertex0; + const float sqrtU = hlsl::sqrt(xi.x); + float32_t3 pnt = tri.vertex0 + edge0 * (1.0 - sqrtU) + edge1 * sqrtU * xi.y; + float32_t3 L = pnt - origin; + + const float distanceSq = hlsl::dot(L,L); + const float rcpDistance = 1.0 / hlsl::sqrt(distanceSq); + L *= rcpDistance; + + pdf = distanceSq / hlsl::abs(hlsl::dot(hlsl::cross(edge0, edge1) * 0.5f, L)); + newRayMaxT = 1.0 / rcpDistance; + return L; + } + + Shape tri; +}; + +template<> +struct ShapeSampling +{ + static ShapeSampling create(NBL_CONST_REF_ARG(Shape) tri) + { + ShapeSampling retval; + retval.tri = tri; + return retval; + } + + template + float deferredPdf(NBL_CONST_REF_ARG(Ray) ray) + { + shapes::SphericalTriangle st = shapes::SphericalTriangle::create(tri.vertex0, tri.vertex1, tri.vertex2, ray.origin); + const float rcpProb = st.solidAngleOfTriangle(); + // if `rcpProb` is NAN then the triangle's solid angle was close to 0.0 + return rcpProb > numeric_limits::min ? (1.0 / rcpProb) : numeric_limits::max; + } + + template + float32_t3 generate_and_pdf(NBL_REF_ARG(float32_t) pdf, NBL_REF_ARG(float32_t) newRayMaxT, NBL_CONST_REF_ARG(float32_t3) origin, NBL_CONST_REF_ARG(Aniso) interaction, bool isBSDF, NBL_CONST_REF_ARG(float32_t3) xi) + { + float rcpPdf; + shapes::SphericalTriangle st = shapes::SphericalTriangle::create(tri.vertex0, tri.vertex1, tri.vertex2, origin); + sampling::SphericalTriangle sst = sampling::SphericalTriangle::create(st); + + const float32_t3 L = sst.generate(rcpPdf, xi.xy); + + pdf = rcpPdf > numeric_limits::min ? (1.0 / rcpPdf) : numeric_limits::max; + + const float32_t3 N = tri.getNormalTimesArea(); + newRayMaxT = hlsl::dot(N, tri.vertex0 - origin) / hlsl::dot(N, L); + return L; + } + + Shape tri; +}; + +template<> +struct ShapeSampling +{ + static ShapeSampling create(NBL_CONST_REF_ARG(Shape) tri) + { + ShapeSampling retval; + retval.tri = tri; + return retval; + } + + template + float deferredPdf(NBL_CONST_REF_ARG(Ray) ray) + { + const float32_t3 L = ray.direction; + shapes::SphericalTriangle st = shapes::SphericalTriangle::create(tri.vertex0, tri.vertex1, tri.vertex2, ray.origin); + sampling::ProjectedSphericalTriangle pst = sampling::ProjectedSphericalTriangle::create(st); + const float pdf = pst.pdf(ray.normalAtOrigin, ray.wasBSDFAtOrigin, L); + // if `pdf` is NAN then the triangle's projected solid angle was close to 0.0, if its close to INF then the triangle was very small + return pdf < numeric_limits::max ? pdf : numeric_limits::max; + } + + template + float32_t3 generate_and_pdf(NBL_REF_ARG(float32_t) pdf, NBL_REF_ARG(float32_t) newRayMaxT, NBL_CONST_REF_ARG(float32_t3) origin, NBL_CONST_REF_ARG(Aniso) interaction, bool isBSDF, NBL_CONST_REF_ARG(float32_t3) xi) + { + float rcpPdf; + shapes::SphericalTriangle st = shapes::SphericalTriangle::create(tri.vertex0, tri.vertex1, tri.vertex2, origin); + sampling::ProjectedSphericalTriangle sst = sampling::ProjectedSphericalTriangle::create(st); + + const float32_t3 L = sst.generate(rcpPdf, interaction.isotropic.N, isBSDF, xi.xy); + + pdf = rcpPdf > numeric_limits::min ? (1.0 / rcpPdf) : numeric_limits::max; + + const float32_t3 N = tri.getNormalTimesArea(); + newRayMaxT = hlsl::dot(N, tri.vertex0 - origin) / hlsl::dot(N, L); + return L; + } + + Shape tri; +}; + +template<> +struct ShapeSampling +{ + static ShapeSampling create(NBL_CONST_REF_ARG(Shape) rect) + { + ShapeSampling retval; + retval.rect = rect; + return retval; + } + + template + float deferredPdf(NBL_CONST_REF_ARG(Ray) ray) + { + const float dist = ray.intersectionT; + const float32_t3 L = ray.direction; + return dist * dist / hlsl::abs(hlsl::dot(rect.getNormalTimesArea(), L)); + } + + template + float32_t3 generate_and_pdf(NBL_REF_ARG(float32_t) pdf, NBL_REF_ARG(float32_t) newRayMaxT, NBL_CONST_REF_ARG(float32_t3) origin, NBL_CONST_REF_ARG(Aniso) interaction, bool isBSDF, NBL_CONST_REF_ARG(float32_t3) xi) + { + const float32_t3 N = rect.getNormalTimesArea(); + const float32_t3 origin2origin = rect.offset - origin; + + float32_t3 L = origin2origin + rect.edge0 * xi.x + rect.edge1 * xi.y; + const float distSq = hlsl::dot(L, L); + const float rcpDist = 1.0 / hlsl::sqrt(distSq); + L *= rcpDist; + pdf = distSq / hlsl::abs(hlsl::dot(N, L)); + newRayMaxT = 1.0 / rcpDist; + return L; + } + + Shape rect; +}; + +template<> +struct ShapeSampling +{ + static ShapeSampling create(NBL_CONST_REF_ARG(Shape) rect) + { + ShapeSampling retval; + retval.rect = rect; + return retval; + } + + template + float deferredPdf(NBL_CONST_REF_ARG(Ray) ray) + { + float pdf; + float32_t3x3 rectNormalBasis; + float32_t2 rectExtents; + rect.getNormalBasis(rectNormalBasis, rectExtents); + shapes::SphericalRectangle sphR0 = shapes::SphericalRectangle::create(ray.origin, rect.offset, rectNormalBasis); + float solidAngle = sphR0.solidAngleOfRectangle(rectExtents); + if (solidAngle > numeric_limits::min) + pdf = 1.f / solidAngle; + else + pdf = bit_cast(numeric_limits::infinity); + return pdf; + } + + template + float32_t3 generate_and_pdf(NBL_REF_ARG(float32_t) pdf, NBL_REF_ARG(float32_t) newRayMaxT, NBL_CONST_REF_ARG(float32_t3) origin, NBL_CONST_REF_ARG(Aniso) interaction, bool isBSDF, NBL_CONST_REF_ARG(float32_t3) xi) + { + const float32_t3 N = rect.getNormalTimesArea(); + const float32_t3 origin2origin = rect.offset - origin; + + float32_t3x3 rectNormalBasis; + float32_t2 rectExtents; + rect.getNormalBasis(rectNormalBasis, rectExtents); + shapes::SphericalRectangle sphR0 = shapes::SphericalRectangle::create(origin, rect.offset, rectNormalBasis); + float32_t3 L = (float32_t3)0.0; + float solidAngle = sphR0.solidAngleOfRectangle(rectExtents); + + sampling::SphericalRectangle ssph = sampling::SphericalRectangle::create(sphR0); + float32_t2 sphUv = ssph.generate(rectExtents, xi.xy, solidAngle); + if (solidAngle > numeric_limits::min) + { + float32_t3 sph_sample = sphUv[0] * rect.edge0 + sphUv[1] * rect.edge1 + rect.offset; + L = sph_sample - origin; + L = hlsl::mix(nbl::hlsl::normalize(L), (float32_t3)0.0, hlsl::abs(L) > (float32_t3)numeric_limits::min); // TODO? sometimes L is vec3(0), find cause + pdf = 1.f / solidAngle; + } + else + pdf = bit_cast(numeric_limits::infinity); + + newRayMaxT = hlsl::dot(N, origin2origin) / hlsl::dot(N, L); + return L; + } + + Shape rect; +}; + +// PPM_APPROX_PROJECTED_SOLID_ANGLE not available for PST_TRIANGLE + + +template +struct Estimator; + +template +struct Estimator +{ + using scalar_type = typename Ray::scalar_type; + using vector3_type = vector; + using ray_type = Ray; + using scene_type = Scene; + using light_type = typename Scene::light_type; + using spectral_type = typename light_type::spectral_type; + using interaction_type = Aniso; + using quotient_pdf_type = sampling::quotient_and_pdf; + using sample_type = LightSample; + using ray_dir_info_type = typename sample_type::ray_dir_info_type; + + // affected by https://github.com/microsoft/DirectXShaderCompiler/issues/7007 + // NBL_CONSTEXPR_STATIC_INLINE PTPolygonMethod PolygonMethod = PPM; + enum : uint16_t { PolygonMethod = PPM }; + + static spectral_type deferredEvalAndPdf(NBL_REF_ARG(scalar_type) pdf, NBL_CONST_REF_ARG(scene_type) scene, uint32_t lightID, NBL_CONST_REF_ARG(ray_type) ray) + { + pdf = 1.0 / scene.lightCount; + const light_type light = scene.lights[lightID]; + const Shape sphere = scene.spheres[light.objectID.id]; + const ShapeSampling sampling = ShapeSampling::create(sphere); + pdf *= sampling.template deferredPdf(ray); + + return light.radiance; + } + + static sample_type generate_and_quotient_and_pdf(NBL_REF_ARG(quotient_pdf_type) quotient_pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(scene_type) scene, uint32_t lightID, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(interaction_type) interaction, bool isBSDF, NBL_CONST_REF_ARG(vector3_type) xi, uint32_t depth) + { + const light_type light = scene.lights[lightID]; + const Shape sphere = scene.spheres[light.objectID.id]; + const ShapeSampling sampling = ShapeSampling::create(sphere); + + scalar_type pdf; + const vector3_type sampleL = sampling.template generate_and_pdf(pdf, newRayMaxT, origin, interaction, isBSDF, xi); + const vector3_type V = interaction.isotropic.V.getDirection(); + const scalar_type VdotL = nbl::hlsl::dot(V, sampleL); + ray_dir_info_type rayL; + rayL.direction = sampleL; + sample_type L = sample_type::create(rayL,VdotL,interaction.T,interaction.B,interaction.isotropic.N); + + newRayMaxT *= Tolerance::getEnd(depth); + pdf *= 1.0 / scalar_type(scene.lightCount); + spectral_type quo = light.radiance / pdf; + quotient_pdf = quotient_pdf_type::create(quo, pdf); + + return L; + } +}; + +template +struct Estimator +{ + using scalar_type = typename Ray::scalar_type; + using vector3_type = vector; + using ray_type = Ray; + using scene_type = Scene; + using light_type = typename Scene::light_type; + using spectral_type = typename light_type::spectral_type; + using interaction_type = Aniso; + using quotient_pdf_type = sampling::quotient_and_pdf; + using sample_type = LightSample; + using ray_dir_info_type = typename sample_type::ray_dir_info_type; + + // NBL_CONSTEXPR_STATIC_INLINE PTPolygonMethod PolygonMethod = PPM; + enum : uint16_t { PolygonMethod = PPM }; + + static spectral_type deferredEvalAndPdf(NBL_REF_ARG(scalar_type) pdf, NBL_CONST_REF_ARG(scene_type) scene, uint32_t lightID, NBL_CONST_REF_ARG(ray_type) ray) + { + pdf = 1.0 / scene.lightCount; + const light_type light = scene.lights[lightID]; + const Shape tri = scene.triangles[light.objectID.id]; + const ShapeSampling sampling = ShapeSampling::create(tri); + pdf *= sampling.template deferredPdf(ray); + + return light.radiance; + } + + static sample_type generate_and_quotient_and_pdf(NBL_REF_ARG(quotient_pdf_type) quotient_pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(scene_type) scene, uint32_t lightID, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(interaction_type) interaction, bool isBSDF, NBL_CONST_REF_ARG(vector3_type) xi, uint32_t depth) + { + const light_type light = scene.lights[lightID]; + const Shape tri = scene.triangles[light.objectID.id]; + const ShapeSampling sampling = ShapeSampling::create(tri); + + scalar_type pdf; + const vector3_type sampleL = sampling.template generate_and_pdf(pdf, newRayMaxT, origin, interaction, isBSDF, xi); + const vector3_type V = interaction.isotropic.V.getDirection(); + const scalar_type VdotL = nbl::hlsl::dot(V, sampleL); + ray_dir_info_type rayL; + rayL.direction = sampleL; + sample_type L = sample_type::create(rayL,VdotL,interaction.T,interaction.B,interaction.isotropic.N); + + newRayMaxT *= Tolerance::getEnd(depth); + pdf *= 1.0 / scalar_type(scene.lightCount); + spectral_type quo = light.radiance / pdf; + quotient_pdf = quotient_pdf_type::create(quo, pdf); + + return L; + } +}; + +template +struct Estimator +{ + using scalar_type = typename Ray::scalar_type; + using vector3_type = vector; + using ray_type = Ray; + using scene_type = Scene; + using light_type = typename Scene::light_type; + using spectral_type = typename light_type::spectral_type; + using interaction_type = Aniso; + using quotient_pdf_type = sampling::quotient_and_pdf; + using sample_type = LightSample; + using ray_dir_info_type = typename sample_type::ray_dir_info_type; + + // NBL_CONSTEXPR_STATIC_INLINE PTPolygonMethod PolygonMethod = PPM; + enum : uint16_t { PolygonMethod = PPM }; + + static spectral_type deferredEvalAndPdf(NBL_REF_ARG(scalar_type) pdf, NBL_CONST_REF_ARG(scene_type) scene, uint32_t lightID, NBL_CONST_REF_ARG(ray_type) ray) + { + pdf = 1.0 / scene.lightCount; + const light_type light = scene.lights[lightID]; + const Shape rect = scene.rectangles[light.objectID.id]; + const ShapeSampling sampling = ShapeSampling::create(rect); + pdf *= sampling.template deferredPdf(ray); + + return light.radiance; + } + + static sample_type generate_and_quotient_and_pdf(NBL_REF_ARG(quotient_pdf_type) quotient_pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(scene_type) scene, uint32_t lightID, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(interaction_type) interaction, bool isBSDF, NBL_CONST_REF_ARG(vector3_type) xi, uint32_t depth) + { + const light_type light = scene.lights[lightID]; + const Shape rect = scene.rectangles[light.objectID.id]; + const ShapeSampling sampling = ShapeSampling::create(rect); + + scalar_type pdf; + const vector3_type sampleL = sampling.template generate_and_pdf(pdf, newRayMaxT, origin, interaction, isBSDF, xi); + const vector3_type V = interaction.isotropic.V.getDirection(); + const scalar_type VdotL = nbl::hlsl::dot(V, sampleL); + ray_dir_info_type rayL; + rayL.direction = sampleL; + sample_type L = sample_type::create(rayL,VdotL,interaction.T,interaction.B,interaction.isotropic.N); + + newRayMaxT *= Tolerance::getEnd(depth); + pdf *= 1.0 / scalar_type(scene.lightCount); + spectral_type quo = light.radiance / pdf; + quotient_pdf = quotient_pdf_type::create(quo, pdf); + + return L; + } +}; + +} +} +} +} + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl new file mode 100644 index 000000000..add1eb8a9 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl @@ -0,0 +1,320 @@ +#ifndef _NBL_HLSL_EXT_PATHTRACER_INCLUDED_ +#define _NBL_HLSL_EXT_PATHTRACER_INCLUDED_ + +#include +#include +#include +#include + +#include "rand_gen.hlsl" +#include "ray_gen.hlsl" +#include "intersector.hlsl" +#include "material_system.hlsl" +#include "next_event_estimator.hlsl" +#include "scene.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace ext +{ +namespace PathTracer +{ + +template +struct PathTracerCreationParams +{ + // rng gen + uint32_t2 rngState; + + // ray gen + vector pixOffsetParam; + vector camPos; + vector NDC; + matrix invMVP; + + // mat + BxDFCreation diffuseParams; + BxDFCreation conductorParams; + BxDFCreation dielectricParams; +}; + +template +struct Unidirectional +{ + using this_t = Unidirectional; + using randgen_type = RandGen; + using raygen_type = RayGen; + using intersector_type = Intersector; + using material_system_type = MaterialSystem; + using nee_type = NextEventEstimator; + + using scalar_type = typename MaterialSystem::scalar_type; + using vector3_type = vector; + using measure_type = typename MaterialSystem::measure_type; + using sample_type = typename NextEventEstimator::sample_type; + using ray_dir_info_type = typename sample_type::ray_dir_info_type; + using ray_type = typename RayGen::ray_type; + using light_type = Light; + using bxdfnode_type = BxDFNode; + using anisotropic_interaction_type = typename MaterialSystem::anisotropic_interaction_type; + using isotropic_interaction_type = typename anisotropic_interaction_type::isotropic_interaction_type; + using anisocache_type = typename MaterialSystem::anisocache_type; + using isocache_type = typename anisocache_type::isocache_type; + using quotient_pdf_type = typename NextEventEstimator::quotient_pdf_type; + using params_type = typename MaterialSystem::params_t; + using create_params_type = typename MaterialSystem::create_params_t; + using scene_type = Scene; + + using diffuse_op_type = typename MaterialSystem::diffuse_op_type; + using conductor_op_type = typename MaterialSystem::conductor_op_type; + using dielectric_op_type = typename MaterialSystem::dielectric_op_type; + + static this_t create(NBL_CONST_REF_ARG(PathTracerCreationParams) params) + { + this_t retval; + retval.randGen = randgen_type::create(params.rngState); + retval.rayGen = raygen_type::create(params.pixOffsetParam, params.camPos, params.NDC, params.invMVP); + retval.materialSystem = material_system_type::create(params.diffuseParams, params.conductorParams, params.dielectricParams); + return retval; + } + + vector3_type rand3d(uint32_t protoDimension, uint32_t _sample, uint32_t i) + { + uint32_t address = glsl::bitfieldInsert(protoDimension, _sample, MAX_DEPTH_LOG2, MAX_SAMPLES_LOG2); + uint32_t3 seqVal = sampleSequence[address + i].xyz; + seqVal ^= randGen(); + return vector3_type(seqVal) * bit_cast(0x2f800004u); + } + + scalar_type getLuma(NBL_CONST_REF_ARG(vector3_type) col) + { + return hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col); + } + + // TODO: probably will only work with procedural shapes, do the other ones + bool closestHitProgram(uint32_t depth, uint32_t _sample, NBL_REF_ARG(ray_type) ray, NBL_CONST_REF_ARG(scene_type) scene) + { + const ObjectID objectID = ray.objectID; + const vector3_type intersection = ray.origin + ray.direction * ray.intersectionT; + + uint32_t bsdfLightIDs; + anisotropic_interaction_type interaction; + isotropic_interaction_type iso_interaction; + uint32_t mode = objectID.mode; + switch (mode) + { + // TODO + case IM_RAY_QUERY: + case IM_RAY_TRACING: + break; + case IM_PROCEDURAL: + { + bsdfLightIDs = scene.getBsdfLightIDs(objectID); + vector3_type N = scene.getNormal(objectID, intersection); + N = nbl::hlsl::normalize(N); + ray_dir_info_type V; + V.direction = -ray.direction; + isotropic_interaction_type iso_interaction = isotropic_interaction_type::create(V, N); + interaction = anisotropic_interaction_type::create(iso_interaction); + } + break; + default: + break; + } + + vector3_type throughput = ray.payload.throughput; + + // emissive + const uint32_t lightID = glsl::bitfieldExtract(bsdfLightIDs, 16, 16); + if (lightID != light_type::INVALID_ID) + { + float _pdf; + ray.payload.accumulation += nee.deferredEvalAndPdf(_pdf, scene, lightID, ray) * throughput / (1.0 + _pdf * _pdf * ray.payload.otherTechniqueHeuristic); + } + + const uint32_t bsdfID = glsl::bitfieldExtract(bsdfLightIDs, 0, 16); + if (bsdfID == bxdfnode_type::INVALID_ID) + return false; + + bxdfnode_type bxdf = scene.bxdfs[bsdfID]; + + // TODO: ifdef kill diffuse specular paths + + const bool isBSDF = (bxdf.materialType == ext::MaterialSystem::MaterialType::DIFFUSE) ? bxdf::traits::type == bxdf::BT_BSDF : + (bxdf.materialType == ext::MaterialSystem::MaterialType::CONDUCTOR) ? bxdf::traits::type == bxdf::BT_BSDF : + bxdf::traits::type == bxdf::BT_BSDF; + + vector3_type eps0 = rand3d(depth, _sample, 0u); + vector3_type eps1 = rand3d(depth, _sample, 1u); + + // thresholds + const scalar_type bxdfPdfThreshold = 0.0001; + const scalar_type lumaContributionThreshold = getLuma(colorspace::eotf::sRGB((vector3_type)1.0 / 255.0)); // OETF smallest perceptible value + const vector3_type throughputCIE_Y = hlsl::transpose(colorspace::sRGBtoXYZ)[1] * throughput; // TODO: this only works if spectral_type is dim 3 + const measure_type eta = bxdf.params.ior0 / bxdf.params.ior1; // assume it's real, not imaginary? + const scalar_type monochromeEta = hlsl::dot(throughputCIE_Y, eta) / (throughputCIE_Y.r + throughputCIE_Y.g + throughputCIE_Y.b); // TODO: imaginary eta? + + // sample lights + const scalar_type neeProbability = 1.0; // BSDFNode_getNEEProb(bsdf); + scalar_type rcpChoiceProb; + if (!math::partitionRandVariable(neeProbability, eps0.z, rcpChoiceProb) && depth < 2u) + { + uint32_t randLightID = uint32_t(float32_t(randGen().x) / numeric_limits::max) * scene.lightCount; + quotient_pdf_type neeContrib_pdf; + scalar_type t; + sample_type nee_sample = nee.generate_and_quotient_and_pdf( + neeContrib_pdf, t, + scene, randLightID, intersection, interaction, + isBSDF, eps0, depth + ); + + // We don't allow non watertight transmitters in this renderer + bool validPath = nee_sample.getNdotL() > numeric_limits::min; + // but if we allowed non-watertight transmitters (single water surface), it would make sense just to apply this line by itself + bxdf::fresnel::OrientedEtas orientedEta = bxdf::fresnel::OrientedEtas::create(interaction.getNdotV(), monochromeEta); + anisocache_type _cache = anisocache_type::template create(interaction, nee_sample, orientedEta); + validPath = validPath && _cache.getNdotH() >= 0.0; + bxdf.params.eta = monochromeEta; + + if (neeContrib_pdf.pdf < numeric_limits::max) + { + if (nbl::hlsl::any(isnan(nee_sample.getL().getDirection()))) + ray.payload.accumulation += vector3_type(1000.f, 0.f, 0.f); + else if (nbl::hlsl::all((vector3_type)69.f == nee_sample.getL().getDirection())) + ray.payload.accumulation += vector3_type(0.f, 1000.f, 0.f); + else if (validPath) + { + bxdf::BxDFClampMode _clamp; + _clamp = (bxdf.materialType == ext::MaterialSystem::MaterialType::DIELECTRIC) ? bxdf::BxDFClampMode::BCM_ABS : bxdf::BxDFClampMode::BCM_MAX; + // example only uses isotropic bxdfs + params_type params = params_type::create(nee_sample, interaction.isotropic, _cache.iso_cache, _clamp); + + quotient_pdf_type bsdf_quotient_pdf = materialSystem.quotient_and_pdf(bxdf.materialType, bxdf.params, params); + neeContrib_pdf.quotient *= bxdf.albedo * throughput * bsdf_quotient_pdf.quotient; + const scalar_type otherGenOverChoice = bsdf_quotient_pdf.pdf * rcpChoiceProb; + const scalar_type otherGenOverLightAndChoice = otherGenOverChoice / bsdf_quotient_pdf.pdf; + neeContrib_pdf.quotient *= otherGenOverChoice / (1.f + otherGenOverLightAndChoice * otherGenOverLightAndChoice); // balance heuristic + + // TODO: ifdef NEE only + // neeContrib_pdf.quotient *= otherGenOverChoice; + + ray_type nee_ray; + nee_ray.origin = intersection + nee_sample.getL().getDirection() * t * Tolerance::getStart(depth); + nee_ray.direction = nee_sample.getL().getDirection(); + nee_ray.intersectionT = t; + if (bsdf_quotient_pdf.pdf < numeric_limits::max && getLuma(neeContrib_pdf.quotient) > lumaContributionThreshold && intersector_type::traceRay(nee_ray, scene).id == -1) + ray.payload.accumulation += neeContrib_pdf.quotient; + } + } + } + + // return false; // NEE only + + // sample BSDF + scalar_type bxdfPdf; + vector3_type bxdfSample; + { + anisocache_type _cache; + sample_type bsdf_sample = materialSystem.generate(bxdf.materialType, bxdf.params, interaction, eps1, _cache); + + bxdf::BxDFClampMode _clamp; + _clamp = (bxdf.materialType == ext::MaterialSystem::MaterialType::DIELECTRIC) ? bxdf::BxDFClampMode::BCM_ABS : bxdf::BxDFClampMode::BCM_MAX; + // example only uses isotropic bxdfs + params_type params = params_type::create(bsdf_sample, interaction.isotropic, _cache.iso_cache, _clamp); + + // the value of the bsdf divided by the probability of the sample being generated + quotient_pdf_type bsdf_quotient_pdf = materialSystem.quotient_and_pdf(bxdf.materialType, bxdf.params, params); + throughput *= bxdf.albedo * bsdf_quotient_pdf.quotient; + bxdfPdf = bsdf_quotient_pdf.pdf; + bxdfSample = bsdf_sample.getL().getDirection(); + } + + // additional threshold + const float lumaThroughputThreshold = lumaContributionThreshold; + if (bxdfPdf > bxdfPdfThreshold && getLuma(throughput) > lumaThroughputThreshold) + { + ray.payload.throughput = throughput; + scalar_type otherTechniqueHeuristic = neeProbability / bxdfPdf; // numerically stable, don't touch + ray.payload.otherTechniqueHeuristic = otherTechniqueHeuristic * otherTechniqueHeuristic; + + // trace new ray + ray.origin = intersection + bxdfSample * (1.0/*kSceneSize*/) * Tolerance::getStart(depth); + ray.direction = bxdfSample; + if ((PTPolygonMethod)nee_type::PolygonMethod == PPM_APPROX_PROJECTED_SOLID_ANGLE) + { + ray.normalAtOrigin = interaction.getN(); + ray.wasBSDFAtOrigin = isBSDF; + } + return true; + } + + return false; + } + + void missProgram(NBL_REF_ARG(ray_type) ray) + { + vector3_type finalContribution = ray.payload.throughput; + // #ifdef USE_ENVMAP + // vec2 uv = SampleSphericalMap(_immutable.direction); + // finalContribution *= textureLod(envMap, uv, 0.0).rgb; + // #else + const vector3_type kConstantEnvLightRadiance = vector3_type(0.15, 0.21, 0.3); // TODO: match spectral_type + finalContribution *= kConstantEnvLightRadiance; + ray.payload.accumulation += finalContribution; + // #endif + } + + // Li + measure_type getMeasure(uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(scene_type) scene) + { + measure_type Li = (measure_type)0.0; + scalar_type meanLumaSq = 0.0; + for (uint32_t i = 0; i < numSamples; i++) + { + vector3_type uvw = rand3d(0u, i, randGen.rng()); // TODO: take from scramblebuf? + ray_type ray = rayGen.generate(uvw); + + // bounces + bool hit = true; + bool rayAlive = true; + for (int d = 1; (d <= depth) && hit && rayAlive; d += 2) + { + ray.intersectionT = numeric_limits::max; + ray.objectID = intersector_type::traceRay(ray, scene); + + hit = ray.objectID.id != -1; + if (hit) + rayAlive = closestHitProgram(1, i, ray, scene); + } + if (!hit) + missProgram(ray); + + measure_type accumulation = ray.payload.accumulation; + scalar_type rcpSampleSize = 1.0 / (i + 1); + Li += (accumulation - Li) * rcpSampleSize; + + // TODO: visualize high variance + + // TODO: russian roulette early exit? + } + + return Li; + } + + NBL_CONSTEXPR_STATIC_INLINE uint32_t MAX_DEPTH_LOG2 = 4u; + NBL_CONSTEXPR_STATIC_INLINE uint32_t MAX_SAMPLES_LOG2 = 10u; + + randgen_type randGen; + raygen_type rayGen; + material_system_type materialSystem; + nee_type nee; +}; + +} +} +} +} + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl b/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl new file mode 100644 index 000000000..22695657c --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl @@ -0,0 +1,19 @@ +// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#pragma wave shader_stage(fragment) + +// vertex shader is provided by the fullScreenTriangle extension +#include +using namespace nbl::hlsl; +using namespace ext::FullScreenTriangle; + +// binding 0 set 0 +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D texture; +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState samplerState; + +[[vk::location(0)]] float32_t4 main(SVertexAttributes vxAttr) : SV_Target0 +{ + return float32_t4(texture.Sample(samplerState, vxAttr.uv).rgb, 1.0f); +} \ No newline at end of file diff --git a/31_HLSLPathTracer/app_resources/hlsl/rand_gen.hlsl b/31_HLSLPathTracer/app_resources/hlsl/rand_gen.hlsl new file mode 100644 index 000000000..4f5302fea --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/rand_gen.hlsl @@ -0,0 +1,38 @@ +#ifndef _NBL_HLSL_EXT_RANDGEN_INCLUDED_ +#define _NBL_HLSL_EXT_RANDGEN_INCLUDED_ + +namespace nbl +{ +namespace hlsl +{ +namespace ext +{ +namespace RandGen +{ + +template +struct Uniform3D +{ + using rng_type = RNG; + + static Uniform3D create(uint32_t2 seed) + { + Uniform3D retval; + retval.rng = rng_type::construct(seed); + return retval; + } + + uint32_t3 operator()() + { + return uint32_t3(rng(), rng(), rng()); + } + + rng_type rng; +}; + +} +} +} +} + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/ray_gen.hlsl b/31_HLSLPathTracer/app_resources/hlsl/ray_gen.hlsl new file mode 100644 index 000000000..0759b1cd3 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/ray_gen.hlsl @@ -0,0 +1,82 @@ +#ifndef _NBL_HLSL_EXT_RAYGEN_INCLUDED_ +#define _NBL_HLSL_EXT_RAYGEN_INCLUDED_ + +#include + +#include "common.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace ext +{ +namespace RayGen +{ + +template +struct Basic +{ + using this_t = Basic; + using ray_type = Ray; + using scalar_type = typename Ray::scalar_type; + using vector3_type = typename Ray::vector3_type; + + using vector2_type = vector; + using vector4_type = vector; + using matrix4x4_type = matrix; + + static this_t create(NBL_CONST_REF_ARG(vector2_type) pixOffsetParam, NBL_CONST_REF_ARG(vector3_type) camPos, NBL_CONST_REF_ARG(vector4_type) NDC, NBL_CONST_REF_ARG(matrix4x4_type) invMVP) + { + this_t retval; + retval.pixOffsetParam = pixOffsetParam; + retval.camPos = camPos; + retval.NDC = NDC; + retval.invMVP = invMVP; + return retval; + } + + ray_type generate(NBL_CONST_REF_ARG(vector3_type) randVec) + { + ray_type ray; + ray.origin = camPos; + + vector4_type tmp = NDC; + // apply stochastic reconstruction filter + const float gaussianFilterCutoff = 2.5; + const float truncation = nbl::hlsl::exp(-0.5 * gaussianFilterCutoff * gaussianFilterCutoff); + vector2_type remappedRand = randVec.xy; + remappedRand.x *= 1.0 - truncation; + remappedRand.x += truncation; + tmp.xy += pixOffsetParam * nbl::hlsl::boxMullerTransform(remappedRand, 1.5); + // for depth of field we could do another stochastic point-pick + tmp = nbl::hlsl::mul(invMVP, tmp); + ray.direction = nbl::hlsl::normalize(tmp.xyz / tmp.w - camPos); + + // #if POLYGON_METHOD==2 + // ray._immutable.normalAtOrigin = vec3(0.0,0.0,0.0); + // ray._immutable.wasBSDFAtOrigin = false; + // #endif + + ray.payload.accumulation = (vector3_type)0.0; + ray.payload.otherTechniqueHeuristic = 0.0; // needed for direct eye-light paths + ray.payload.throughput = (vector3_type)1.0; + // #ifdef KILL_DIFFUSE_SPECULAR_PATHS + // ray._payload.hasDiffuse = false; + // #endif + + return ray; + } + + vector2_type pixOffsetParam; + vector3_type camPos; + vector4_type NDC; + matrix4x4_type invMVP; +}; + +} +} +} +} + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl new file mode 100644 index 000000000..a40eb3dd0 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl @@ -0,0 +1,226 @@ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include "nbl/builtin/hlsl/random/pcg.hlsl" +#include "nbl/builtin/hlsl/random/xoroshiro.hlsl" +#ifdef PERSISTENT_WORKGROUPS +#include "nbl/builtin/hlsl/math/morton.hlsl" +#endif + +#include "nbl/builtin/hlsl/bxdf/reflection.hlsl" +#include "nbl/builtin/hlsl/bxdf/transmission.hlsl" + +// add these defines (one at a time) using -D argument to dxc +// #define SPHERE_LIGHT +// #define TRIANGLE_LIGHT +// #define RECTANGLE_LIGHT + +#ifdef SPHERE_LIGHT +#define SPHERE_COUNT 9 +#define TRIANGLE_COUNT 0 +#define RECTANGLE_COUNT 0 +#endif + +#ifdef TRIANGLE_LIGHT +#define TRIANGLE_COUNT 1 +#define SPHERE_COUNT 8 +#define RECTANGLE_COUNT 0 +#endif + +#ifdef RECTANGLE_LIGHT +#define RECTANGLE_COUNT 1 +#define SPHERE_COUNT 8 +#define TRIANGLE_COUNT 0 +#endif + +#define LIGHT_COUNT 1 +#define BXDF_COUNT 7 + +#include "render_common.hlsl" +#include "pathtracer.hlsl" + +using namespace nbl; +using namespace hlsl; + +NBL_CONSTEXPR uint32_t WorkgroupSize = 512; +NBL_CONSTEXPR uint32_t MAX_DEPTH_LOG2 = 4; +NBL_CONSTEXPR uint32_t MAX_SAMPLES_LOG2 = 10; + +#ifdef SPHERE_LIGHT +NBL_CONSTEXPR ext::ProceduralShapeType LIGHT_TYPE = ext::PST_SPHERE; +#endif +#ifdef TRIANGLE_LIGHT +NBL_CONSTEXPR ext::ProceduralShapeType LIGHT_TYPE = ext::PST_TRIANGLE; +#endif +#ifdef RECTANGLE_LIGHT +NBL_CONSTEXPR ext::ProceduralShapeType LIGHT_TYPE = ext::PST_RECTANGLE; +#endif + +NBL_CONSTEXPR ext::PTPolygonMethod POLYGON_METHOD = ext::PPM_SOLID_ANGLE; + +int32_t2 getCoordinates() +{ + uint32_t width, height; + outImage.GetDimensions(width, height); + return int32_t2(glsl::gl_GlobalInvocationID().x % width, glsl::gl_GlobalInvocationID().x / width); +} + +float32_t2 getTexCoords() +{ + uint32_t width, height; + outImage.GetDimensions(width, height); + int32_t2 iCoords = getCoordinates(); + return float32_t2(float(iCoords.x) / width, 1.0 - float(iCoords.y) / height); +} + +using ray_dir_info_t = bxdf::ray_dir_info::SBasic; +using iso_interaction = bxdf::surface_interactions::SIsotropic; +using aniso_interaction = bxdf::surface_interactions::SAnisotropic; +using sample_t = bxdf::SLightSample; +using iso_cache = bxdf::SIsotropicMicrofacetCache; +using aniso_cache = bxdf::SAnisotropicMicrofacetCache; +using quotient_pdf_t = sampling::quotient_and_pdf; +using spectral_t = vector; +using create_params_t = bxdf::SBxDFCreationParams; + +using diffuse_bxdf_type = bxdf::reflection::SOrenNayarBxDF; +using conductor_bxdf_type = bxdf::reflection::SGGXBxDF; +using dielectric_bxdf_type = bxdf::transmission::SGGXDielectricBxDF; + +using ray_type = ext::Ray; +using light_type = ext::Light; +using bxdfnode_type = ext::BxDFNode; +using scene_type = ext::Scene; +using randgen_type = ext::RandGen::Uniform3D; +using raygen_type = ext::RayGen::Basic; +using intersector_type = ext::Intersector::Comprehensive; +using material_system_type = ext::MaterialSystem::System; +using nee_type = ext::NextEventEstimator::Estimator; +using pathtracer_type = ext::PathTracer::Unidirectional; + +static const ext::Shape spheres[SPHERE_COUNT] = { + ext::Shape::create(float3(0.0, -100.5, -1.0), 100.0, 0u, light_type::INVALID_ID), + ext::Shape::create(float3(2.0, 0.0, -1.0), 0.5, 1u, light_type::INVALID_ID), + ext::Shape::create(float3(0.0, 0.0, -1.0), 0.5, 2u, light_type::INVALID_ID), + ext::Shape::create(float3(-2.0, 0.0, -1.0), 0.5, 3u, light_type::INVALID_ID), + ext::Shape::create(float3(2.0, 0.0, 1.0), 0.5, 4u, light_type::INVALID_ID), + ext::Shape::create(float3(0.0, 0.0, 1.0), 0.5, 4u, light_type::INVALID_ID), + ext::Shape::create(float3(-2.0, 0.0, 1.0), 0.5, 5u, light_type::INVALID_ID), + ext::Shape::create(float3(0.5, 1.0, 0.5), 0.5, 6u, light_type::INVALID_ID) +#ifdef SPHERE_LIGHT + ,ext::Shape::create(float3(-1.5, 1.5, 0.0), 0.3, bxdfnode_type::INVALID_ID, 0u) +#endif +}; + +#ifdef TRIANGLE_LIGHT +static const ext::Shape triangles[TRIANGLE_COUNT] = { + ext::Shape::create(float3(-1.8,0.35,0.3) * 10.0, float3(-1.2,0.35,0.0) * 10.0, float3(-1.5,0.8,-0.3) * 10.0, bxdfnode_type::INVALID_ID, 0u) +}; +#else +static const ext::Shape triangles[1]; +#endif + +#ifdef RECTANGLE_LIGHT +static const ext::Shape rectangles[RECTANGLE_COUNT] = { + ext::Shape::create(float3(-3.8,0.35,1.3), normalize(float3(2,0,-1))*7.0, normalize(float3(2,-5,4))*0.1, bxdfnode_type::INVALID_ID, 0u) +}; +#else +static const ext::Shape rectangles[1]; +#endif + +static const light_type lights[LIGHT_COUNT] = { + light_type::create(spectral_t(30.0,25.0,15.0), +#ifdef SPHERE_LIGHT + 8u, +#else + 0u, +#endif + ext::IntersectMode::IM_PROCEDURAL, LIGHT_TYPE) +}; + +static const bxdfnode_type bxdfs[BXDF_COUNT] = { + bxdfnode_type::create(ext::MaterialSystem::MaterialType::DIFFUSE, false, float2(0,0), spectral_t(0.8,0.8,0.8)), + bxdfnode_type::create(ext::MaterialSystem::MaterialType::DIFFUSE, false, float2(0,0), spectral_t(0.8,0.4,0.4)), + bxdfnode_type::create(ext::MaterialSystem::MaterialType::DIFFUSE, false, float2(0,0), spectral_t(0.4,0.8,0.4)), + bxdfnode_type::create(ext::MaterialSystem::MaterialType::CONDUCTOR, false, float2(0,0), spectral_t(1.02,1.02,1.3), spectral_t(1.0,1.0,2.0)), + bxdfnode_type::create(ext::MaterialSystem::MaterialType::CONDUCTOR, false, float2(0,0), spectral_t(1.02,1.3,1.02), spectral_t(1.0,2.0,1.0)), + bxdfnode_type::create(ext::MaterialSystem::MaterialType::CONDUCTOR, false, float2(0.15,0.15), spectral_t(1.02,1.3,1.02), spectral_t(1.0,2.0,1.0)), + bxdfnode_type::create(ext::MaterialSystem::MaterialType::DIELECTRIC, false, float2(0.0625,0.0625), spectral_t(1,1,1), spectral_t(0.71,0.69,0.67)) +}; + +static const ext::Scene scene = ext::Scene::create( + spheres, triangles, rectangles, + SPHERE_COUNT, TRIANGLE_COUNT, RECTANGLE_COUNT, + lights, LIGHT_COUNT, bxdfs, BXDF_COUNT +); + +[numthreads(WorkgroupSize, 1, 1)] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + uint32_t width, height; + outImage.GetDimensions(width, height); +#ifdef PERSISTENT_WORKGROUPS + uint32_t virtualThreadIndex; + [loop] + for (uint32_t virtualThreadBase = glsl::gl_WorkGroupID().x * WorkgroupSize; virtualThreadBase < 1920*1080; virtualThreadBase += glsl::gl_NumWorkGroups().x * WorkgroupSize) // not sure why 1280*720 doesn't cover draw surface + { + virtualThreadIndex = virtualThreadBase + glsl::gl_LocalInvocationIndex().x; + const int32_t2 coords = (int32_t2)math::Morton::decode2d(virtualThreadIndex); +#else + const int32_t2 coords = getCoordinates(); +#endif + float32_t2 texCoord = float32_t2(coords) / float32_t2(width, height); + texCoord.y = 1.0 - texCoord.y; + + if (false == (all((int32_t2)0 < coords)) && all(int32_t2(width, height) < coords)) { +#ifdef PERSISTENT_WORKGROUPS + continue; +#else + return; +#endif + } + + if (((pc.depth - 1) >> MAX_DEPTH_LOG2) > 0 || ((pc.sampleCount - 1) >> MAX_SAMPLES_LOG2) > 0) + { + float32_t4 pixelCol = float32_t4(1.0,0.0,0.0,1.0); + outImage[coords] = pixelCol; +#ifdef PERSISTENT_WORKGROUPS + continue; +#else + return; +#endif + } + + int flatIdx = glsl::gl_GlobalInvocationID().y * glsl::gl_NumWorkGroups().x * WorkgroupSize + glsl::gl_GlobalInvocationID().x; + + // set up path tracer + ext::PathTracer::PathTracerCreationParams ptCreateParams; + ptCreateParams.rngState = scramblebuf[coords].rg; + + uint2 scrambleDim; + scramblebuf.GetDimensions(scrambleDim.x, scrambleDim.y); + ptCreateParams.pixOffsetParam = (float2)1.0 / float2(scrambleDim); + + float4 NDC = float4(texCoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); + { + float4 tmp = mul(pc.invMVP, NDC); + ptCreateParams.camPos = tmp.xyz / tmp.w; + NDC.z = 1.0; + } + + ptCreateParams.NDC = NDC; + ptCreateParams.invMVP = pc.invMVP; + + ptCreateParams.diffuseParams = bxdfs[0].params; + ptCreateParams.conductorParams = bxdfs[3].params; + ptCreateParams.dielectricParams = bxdfs[6].params; + + pathtracer_type pathtracer = pathtracer_type::create(ptCreateParams); + + float32_t3 color = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene); + float32_t4 pixCol = float32_t4(color, 1.0); + outImage[coords] = pixCol; + +#ifdef PERSISTENT_WORKGROUPS + } +#endif +} diff --git a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl new file mode 100644 index 000000000..5e5cf89da --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl @@ -0,0 +1,23 @@ +#ifndef _NBL_HLSL_PATHTRACER_RENDER_COMMON_INCLUDED_ +#define _NBL_HLSL_PATHTRACER_RENDER_COMMON_INCLUDED_ + +struct SPushConstants +{ + float32_t4x4 invMVP; + int sampleCount; + int depth; +}; + +[[vk::push_constant]] SPushConstants pc; + +[[vk::combinedImageSampler]][[vk::binding(0, 2)]] Texture2D envMap; // unused +[[vk::combinedImageSampler]][[vk::binding(0, 2)]] SamplerState envSampler; + +[[vk::binding(1, 2)]] Buffer sampleSequence; + +[[vk::combinedImageSampler]][[vk::binding(2, 2)]] Texture2D scramblebuf; // unused +[[vk::combinedImageSampler]][[vk::binding(2, 2)]] SamplerState scrambleSampler; + +[[vk::image_format("rgba16f")]][[vk::binding(0, 0)]] RWTexture2D outImage; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/scene.hlsl b/31_HLSLPathTracer/app_resources/hlsl/scene.hlsl new file mode 100644 index 000000000..40fb01057 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/scene.hlsl @@ -0,0 +1,111 @@ +#ifndef _NBL_HLSL_EXT_PATHTRACING_SCENE_INCLUDED_ +#define _NBL_HLSL_EXT_PATHTRACING_SCENE_INCLUDED_ + +#include "common.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace ext +{ + +template +struct Scene +{ + using light_type = Light; + using bxdfnode_type = BxdfNode; + using this_t = Scene; + + // NBL_CONSTEXPR_STATIC_INLINE uint32_t maxSphereCount = 25; + // NBL_CONSTEXPR_STATIC_INLINE uint32_t maxTriangleCount = 12; + // NBL_CONSTEXPR_STATIC_INLINE uint32_t maxRectangleCount = 12; + +#if SPHERE_COUNT < 1 +#define SCENE_SPHERE_COUNT 1 +#else +#define SCENE_SPHERE_COUNT SPHERE_COUNT +#endif + +#if TRIANGLE_COUNT < 1 +#define SCENE_TRIANGLE_COUNT 1 +#else +#define SCENE_TRIANGLE_COUNT TRIANGLE_COUNT +#endif + +#if RECTANGLE_COUNT < 1 +#define SCENE_RECTANGLE_COUNT 1 +#else +#define SCENE_RECTANGLE_COUNT RECTANGLE_COUNT +#endif + + Shape spheres[SCENE_SPHERE_COUNT]; + Shape triangles[SCENE_TRIANGLE_COUNT]; + Shape rectangles[SCENE_RECTANGLE_COUNT]; + + uint32_t sphereCount; + uint32_t triangleCount; + uint32_t rectangleCount; + + // NBL_CONSTEXPR_STATIC_INLINE uint32_t maxLightCount = 4; + + light_type lights[LIGHT_COUNT]; + uint32_t lightCount; + + // NBL_CONSTEXPR_STATIC_INLINE uint32_t maxBxdfCount = 16; + + bxdfnode_type bxdfs[BXDF_COUNT]; + uint32_t bxdfCount; + + // AS ases; + + static this_t create( + NBL_CONST_REF_ARG(Shape) spheres[SCENE_SPHERE_COUNT], + NBL_CONST_REF_ARG(Shape) triangles[SCENE_TRIANGLE_COUNT], + NBL_CONST_REF_ARG(Shape) rectangles[SCENE_RECTANGLE_COUNT], + uint32_t sphereCount, uint32_t triangleCount, uint32_t rectangleCount, + NBL_CONST_REF_ARG(light_type) lights[LIGHT_COUNT], uint32_t lightCount, + NBL_CONST_REF_ARG(bxdfnode_type) bxdfs[BXDF_COUNT], uint32_t bxdfCount) + { + this_t retval; + retval.spheres = spheres; + retval.triangles = triangles; + retval.rectangles = rectangles; + retval.sphereCount = sphereCount; + retval.triangleCount = triangleCount; + retval.rectangleCount = rectangleCount; + + retval.lights = lights; + retval.lightCount = lightCount; + + retval.bxdfs = bxdfs; + retval.bxdfCount = bxdfCount; + return retval; + } + +#undef SCENE_SPHERE_COUNT +#undef SCENE_TRIANGLE_COUNT +#undef SCENE_RECTANGLE_COUNT + + // TODO: get these to work with AS types as well + uint32_t getBsdfLightIDs(NBL_CONST_REF_ARG(ObjectID) objectID) + { + return (objectID.shapeType == PST_SPHERE) ? spheres[objectID.id].bsdfLightIDs : + (objectID.shapeType == PST_TRIANGLE) ? triangles[objectID.id].bsdfLightIDs : + (objectID.shapeType == PST_RECTANGLE) ? rectangles[objectID.id].bsdfLightIDs : -1; + } + + float32_t3 getNormal(NBL_CONST_REF_ARG(ObjectID) objectID, NBL_CONST_REF_ARG(float32_t3) intersection) + { + return (objectID.shapeType == PST_SPHERE) ? spheres[objectID.id].getNormal(intersection) : + (objectID.shapeType == PST_TRIANGLE) ? triangles[objectID.id].getNormalTimesArea() : + (objectID.shapeType == PST_RECTANGLE) ? rectangles[objectID.id].getNormalTimesArea() : + (float32_t3)0.0; + } +}; + +} +} +} + +#endif diff --git a/31_HLSLPathTracer/config.json.template b/31_HLSLPathTracer/config.json.template new file mode 100644 index 000000000..24adf54fb --- /dev/null +++ b/31_HLSLPathTracer/config.json.template @@ -0,0 +1,28 @@ +{ + "enableParallelBuild": true, + "threadsPerBuildProcess" : 2, + "isExecuted": false, + "scriptPath": "", + "cmake": { + "configurations": [ "Release", "Debug", "RelWithDebInfo" ], + "buildModes": [], + "requiredOptions": [] + }, + "profiles": [ + { + "backend": "vulkan", + "platform": "windows", + "buildModes": [], + "runConfiguration": "Release", + "gpuArchitectures": [] + } + ], + "dependencies": [], + "data": [ + { + "dependencies": [], + "command": [""], + "outputs": [] + } + ] +} diff --git a/31_HLSLPathTracer/include/nbl/this_example/common.hpp b/31_HLSLPathTracer/include/nbl/this_example/common.hpp new file mode 100644 index 000000000..db051bb3e --- /dev/null +++ b/31_HLSLPathTracer/include/nbl/this_example/common.hpp @@ -0,0 +1,17 @@ +#ifndef __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ +#define __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ + +#include + +// common api +#include "nbl/examples/common/SimpleWindowedApplication.hpp" +#include "nbl/examples/examples.hpp" +#include "nbl/examples/cameras/CCamera.hpp" +#include "nbl/examples/common/CEventCallback.hpp" + +// example's own headers +#include "nbl/ui/ICursorControl.h" +#include "nbl/ext/ImGui/ImGui.h" +#include "imgui/imgui_internal.h" + +#endif // __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ \ No newline at end of file diff --git a/31_HLSLPathTracer/main.cpp b/31_HLSLPathTracer/main.cpp new file mode 100644 index 000000000..2e139af8d --- /dev/null +++ b/31_HLSLPathTracer/main.cpp @@ -0,0 +1,1425 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/this_example/common.hpp" +#include "nbl/asset/interchange/IImageAssetHandlerBase.h" +#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" +#include "nbl/builtin/hlsl/surface_transform.h" + +using namespace nbl; +using namespace core; +using namespace hlsl; +using namespace system; +using namespace asset; +using namespace ui; +using namespace video; +using namespace nbl::examples; + +struct PTPushConstant { + matrix4SIMD invMVP; + int sampleCount; + int depth; +}; + +// TODO: Add a QueryPool for timestamping once its ready +// TODO: Do buffer creation using assConv +class HLSLComputePathtracer final : public SimpleWindowedApplication, public BuiltinResourcesApplication +{ + using device_base_t = SimpleWindowedApplication; + using asset_base_t = BuiltinResourcesApplication; + using clock_t = std::chrono::steady_clock; + + enum E_LIGHT_GEOMETRY : uint8_t + { + ELG_SPHERE, + ELG_TRIANGLE, + ELG_RECTANGLE, + ELG_COUNT + }; + + enum E_RENDER_MODE : uint8_t + { + ERM_GLSL, + ERM_HLSL, + // ERM_CHECKERED, + ERM_COUNT + }; + + constexpr static inline uint32_t2 WindowDimensions = { 1280, 720 }; + constexpr static inline uint32_t MaxFramesInFlight = 5; + constexpr static inline clock_t::duration DisplayImageDuration = std::chrono::milliseconds(900); + constexpr static inline uint32_t DefaultWorkGroupSize = 512u; + constexpr static inline uint32_t MaxDescriptorCount = 256u; + constexpr static inline uint32_t MaxDepthLog2 = 4u; // 5 + constexpr static inline uint32_t MaxSamplesLog2 = 10u; // 18 + constexpr static inline uint32_t MaxBufferDimensions = 3u << MaxDepthLog2; + constexpr static inline uint32_t MaxBufferSamples = 1u << MaxSamplesLog2; + constexpr static inline uint8_t MaxUITextureCount = 1u; + static inline std::string DefaultImagePathsFile = "envmap/envmap_0.exr"; + static inline std::string OwenSamplerFilePath = "owen_sampler_buffer.bin"; + static inline std::array PTGLSLShaderPaths = { "app_resources/glsl/litBySphere.comp", "app_resources/glsl/litByTriangle.comp", "app_resources/glsl/litByRectangle.comp" }; + static inline std::string PTHLSLShaderPath = "app_resources/hlsl/render.comp.hlsl"; + static inline std::array PTHLSLShaderVariants = { "SPHERE_LIGHT", "TRIANGLE_LIGHT", "RECTANGLE_LIGHT" }; + static inline std::string PresentShaderPath = "app_resources/hlsl/present.frag.hlsl"; + + const char* shaderNames[E_LIGHT_GEOMETRY::ELG_COUNT] = { + "ELG_SPHERE", + "ELG_TRIANGLE", + "ELG_RECTANGLE" + }; + + const char* shaderTypes[E_RENDER_MODE::ERM_COUNT] = { + "ERM_GLSL", + "ERM_HLSL" + }; + + public: + inline HLSLComputePathtracer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + + inline bool isComputeOnly() const override { return false; } + + inline core::vector getSurfaces() const override + { + if (!m_surface) + { + { + auto windowCallback = core::make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem), smart_refctd_ptr(m_logger)); + IWindow::SCreationParams params = {}; + params.callback = core::make_smart_refctd_ptr(); + params.width = WindowDimensions.x; + params.height = WindowDimensions.y; + params.x = 32; + params.y = 32; + params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE; + params.windowCaption = "ComputeShaderPathtracer"; + params.callback = windowCallback; + const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); + } + + auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api), smart_refctd_ptr_static_cast(m_window)); + const_cast&>(m_surface) = nbl::video::CSimpleResizeSurface::create(std::move(surface)); + } + + if (m_surface) + return { {m_surface->getSurface()/*,EQF_NONE*/} }; + + return {}; + } + + inline bool onAppInitialized(smart_refctd_ptr&& system) override + { + // Init systems + { + m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); + + // Remember to call the base class initialization! + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + m_semaphore = m_device->createSemaphore(m_realFrameIx); + + if (!m_semaphore) + return logFail("Failed to create semaphore!"); + } + + // Create renderpass and init surface + nbl::video::IGPURenderpass* renderpass; + { + ISwapchain::SCreationParams swapchainParams = { .surface = smart_refctd_ptr(m_surface->getSurface()) }; + if (!swapchainParams.deduceFormat(m_physicalDevice)) + return logFail("Could not choose a Surface Format for the Swapchain!"); + + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = + { + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = + { + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COPY_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .dstAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + }, + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = + { + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + + auto scResources = std::make_unique(m_device.get(), swapchainParams.surfaceFormat.format, dependencies); + renderpass = scResources->getRenderpass(); + + if (!renderpass) + return logFail("Failed to create Renderpass!"); + + auto gQueue = getGraphicsQueue(); + if (!m_surface || !m_surface->init(gQueue, std::move(scResources), swapchainParams.sharedParams)) + return logFail("Could not create Window & Surface or initialize the Surface!"); + } + + // image upload utils + { + m_scratchSemaphore = m_device->createSemaphore(0); + if (!m_scratchSemaphore) + return logFail("Could not create Scratch Semaphore"); + m_scratchSemaphore->setObjectDebugName("Scratch Semaphore"); + // we don't want to overcomplicate the example with multi-queue + m_intendedSubmit.queue = getGraphicsQueue(); + // wait for nothing before upload + m_intendedSubmit.waitSemaphores = {}; + m_intendedSubmit.waitSemaphores = {}; + // fill later + m_intendedSubmit.scratchCommandBuffers = {}; + m_intendedSubmit.scratchSemaphore = { + .semaphore = m_scratchSemaphore.get(), + .value = 0, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS + }; + } + + // Create command pool and buffers + { + auto gQueue = getGraphicsQueue(); + m_cmdPool = m_device->createCommandPool(gQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + if (!m_cmdPool) + return logFail("Couldn't create Command Pool!"); + + if (!m_cmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data(), MaxFramesInFlight })) + return logFail("Couldn't create Command Buffer!"); + } + + ISampler::SParams samplerParams = { + .AnisotropicFilter = 0 + }; + auto defaultSampler = m_device->createSampler(samplerParams); + + // Create descriptors and pipeline for the pathtracer + { + auto convertDSLayoutCPU2GPU = [&](smart_refctd_ptr cpuLayout) { + auto converter = CAssetConverter::create({ .device = m_device.get() }); + CAssetConverter::SInputs inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); + CAssetConverter::SConvertParams params = {}; + params.utilities = m_utils.get(); + + std::get>(inputs.assets) = { &cpuLayout.get(),1 }; + // don't need to assert that we don't need to provide patches since layouts are not patchable + //assert(true); + auto reservation = converter->reserve(inputs); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuLayout = reservation.getGPUObjects().front().value; + if (!gpuLayout) { + m_logger->log("Failed to convert %s into an IGPUDescriptorSetLayout handle", ILogger::ELL_ERROR); + std::exit(-1); + } + + return gpuLayout; + }; + auto convertDSCPU2GPU = [&](smart_refctd_ptr cpuDS) { + auto converter = CAssetConverter::create({ .device = m_device.get() }); + CAssetConverter::SInputs inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); + CAssetConverter::SConvertParams params = {}; + params.utilities = m_utils.get(); + + std::get>(inputs.assets) = { &cpuDS.get(), 1 }; + // don't need to assert that we don't need to provide patches since layouts are not patchable + //assert(true); + auto reservation = converter->reserve(inputs); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuDS = reservation.getGPUObjects().front().value; + if (!gpuDS) { + m_logger->log("Failed to convert %s into an IGPUDescriptorSet handle", ILogger::ELL_ERROR); + std::exit(-1); + } + + return gpuDS; + }; + + std::array descriptorSet0Bindings = {}; + std::array descriptorSet3Bindings = {}; + std::array presentDescriptorSetBindings; + + descriptorSet0Bindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + descriptorSet3Bindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + descriptorSet3Bindings[1] = { + .binding = 1u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + descriptorSet3Bindings[2] = { + .binding = 2u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + presentDescriptorSetBindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .count = 1u, + .immutableSamplers = &defaultSampler + }; + + auto cpuDescriptorSetLayout0 = make_smart_refctd_ptr(descriptorSet0Bindings); + auto cpuDescriptorSetLayout2 = make_smart_refctd_ptr(descriptorSet3Bindings); + + auto gpuDescriptorSetLayout0 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout0); + auto gpuDescriptorSetLayout2 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout2); + auto gpuPresentDescriptorSetLayout = m_device->createDescriptorSetLayout(presentDescriptorSetBindings); + + auto cpuDescriptorSet0 = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout0)); + auto cpuDescriptorSet2 = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout2)); + + m_descriptorSet0 = convertDSCPU2GPU(cpuDescriptorSet0); + m_descriptorSet2 = convertDSCPU2GPU(cpuDescriptorSet2); + + smart_refctd_ptr presentDSPool; + { + const video::IGPUDescriptorSetLayout* const layouts[] = { gpuPresentDescriptorSetLayout.get() }; + const uint32_t setCounts[] = { 1u }; + presentDSPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); + } + m_presentDescriptorSet = presentDSPool->createDescriptorSet(gpuPresentDescriptorSetLayout); + + // Create Shaders + auto loadAndCompileGLSLShader = [&](const std::string& pathToShader, bool persistentWorkGroups = false) -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.workingDirectory = localInputCWD; + auto assetBundle = m_assetMgr->getAsset(pathToShader, lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + { + m_logger->log("Could not load shader: ", ILogger::ELL_ERROR, pathToShader); + std::exit(-1); + } + + auto source = smart_refctd_ptr_static_cast(assets[0]); + // The down-cast should not fail! + assert(source); + + auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + CGLSLCompiler::SOptions options = {}; + options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; // should be compute + options.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; + options.spirvOptimizer = nullptr; +#ifndef _NBL_DEBUG + ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; + auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); + options.spirvOptimizer = opt.get(); +#endif + options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; + options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); + options.preprocessorOptions.logger = m_logger.get(); + options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); + + const IShaderCompiler::SMacroDefinition persistentDefine = { "PERSISTENT_WORKGROUPS", "1" }; + if (persistentWorkGroups) + options.preprocessorOptions.extraDefines = { &persistentDefine, &persistentDefine + 1 }; + + source = compiler->compileToSPIRV((const char*)source->getContent()->getPointer(), options); + + // this time we skip the use of the asset converter since the ICPUShader->IGPUShader path is quick and simple + auto shader = m_device->compileShader({ source.get(), nullptr, nullptr, nullptr }); + if (!shader) + { + m_logger->log("GLSL shader creationed failed: %s!", ILogger::ELL_ERROR, pathToShader); + std::exit(-1); + } + + return shader; + }; + + auto loadAndCompileHLSLShader = [&](const std::string& pathToShader, const std::string& defineMacro = "", bool persistentWorkGroups = false) -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.workingDirectory = localInputCWD; + auto assetBundle = m_assetMgr->getAsset(pathToShader, lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + { + m_logger->log("Could not load shader: ", ILogger::ELL_ERROR, pathToShader); + std::exit(-1); + } + + auto source = smart_refctd_ptr_static_cast(assets[0]); + // The down-cast should not fail! + assert(source); + + auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + CHLSLCompiler::SOptions options = {}; + options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; + options.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; + options.spirvOptimizer = nullptr; +#ifndef _NBL_DEBUG + ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; + auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); + options.spirvOptimizer = opt.get(); +#endif + options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; + options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); + options.preprocessorOptions.logger = m_logger.get(); + options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); + + const IShaderCompiler::SMacroDefinition defines[2] = { {defineMacro, ""}, { "PERSISTENT_WORKGROUPS", "1" } }; + if (!defineMacro.empty() && persistentWorkGroups) + options.preprocessorOptions.extraDefines = { defines, defines + 2 }; + else if (!defineMacro.empty() && !persistentWorkGroups) + options.preprocessorOptions.extraDefines = { defines, defines + 1 }; + + source = compiler->compileToSPIRV((const char*)source->getContent()->getPointer(), options); + + auto shader = m_device->compileShader({ source.get(), nullptr, nullptr, nullptr }); + if (!shader) + { + m_logger->log("HLSL shader creationed failed: %s!", ILogger::ELL_ERROR, pathToShader); + std::exit(-1); + } + + return shader; + }; + + // Create compute pipelines + { + for (int index = 0; index < E_LIGHT_GEOMETRY::ELG_COUNT; index++) { + const nbl::asset::SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(PTPushConstant) + }; + auto ptPipelineLayout = m_device->createPipelineLayout( + { &pcRange, 1 }, + core::smart_refctd_ptr(gpuDescriptorSetLayout0), + nullptr, + core::smart_refctd_ptr(gpuDescriptorSetLayout2), + nullptr + ); + if (!ptPipelineLayout) { + return logFail("Failed to create Pathtracing pipeline layout"); + } + + { + auto ptShader = loadAndCompileGLSLShader(PTGLSLShaderPaths[index]); + + IGPUComputePipeline::SCreationParams params = {}; + params.layout = ptPipelineLayout.get(); + params.shader.shader = ptShader.get(); + params.shader.entryPoint = "main"; + params.shader.entries = nullptr; + params.cached.requireFullSubgroups = true; + params.shader.requiredSubgroupSize = static_cast(5); + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTGLSLPipelines.data() + index)) + return logFail("Failed to create GLSL compute pipeline!\n"); + } + { + auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index]); + + IGPUComputePipeline::SCreationParams params = {}; + params.layout = ptPipelineLayout.get(); + params.shader.shader = ptShader.get(); + params.shader.entryPoint = "main"; + params.shader.entries = nullptr; + params.cached.requireFullSubgroups = true; + params.shader.requiredSubgroupSize = static_cast(5); + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPipelines.data() + index)) + return logFail("Failed to create HLSL compute pipeline!\n"); + } + + // persistent wg pipelines + { + auto ptShader = loadAndCompileGLSLShader(PTGLSLShaderPaths[index], true); + + IGPUComputePipeline::SCreationParams params = {}; + params.layout = ptPipelineLayout.get(); + params.shader.shader = ptShader.get(); + params.shader.entryPoint = "main"; + params.shader.entries = nullptr; + params.cached.requireFullSubgroups = true; + params.shader.requiredSubgroupSize = static_cast(5); + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTGLSLPersistentWGPipelines.data() + index)) + return logFail("Failed to create GLSL PersistentWG compute pipeline!\n"); + } + { + auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], true); + + IGPUComputePipeline::SCreationParams params = {}; + params.layout = ptPipelineLayout.get(); + params.shader.shader = ptShader.get(); + params.shader.entryPoint = "main"; + params.shader.entries = nullptr; + params.cached.requireFullSubgroups = true; + params.shader.requiredSubgroupSize = static_cast(5); + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPersistentWGPipelines.data() + index)) + return logFail("Failed to create HLSL PersistentWG compute pipeline!\n"); + } + } + } + + // Create graphics pipeline + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); + if (!fsTriProtoPPln) + return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); + + // Load Fragment Shader + auto fragmentShader = loadAndCompileHLSLShader(PresentShaderPath); + if (!fragmentShader) + return logFail("Failed to Load and Compile Fragment Shader: lumaMeterShader!"); + + const IGPUPipelineBase::SShaderSpecInfo fragSpec = { + .shader = fragmentShader.get(), + .entryPoint = "main" + }; + + auto presentLayout = m_device->createPipelineLayout( + {}, + core::smart_refctd_ptr(gpuPresentDescriptorSetLayout), + nullptr, + nullptr, + nullptr + ); + m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scRes->getRenderpass()); + if (!m_presentPipeline) + return logFail("Could not create Graphics Pipeline!"); + + } + } + + // load CPUImages and convert to GPUImages + smart_refctd_ptr envMap, scrambleMap; + { + auto convertImgCPU2GPU = [&](std::span cpuImgs) { + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[0].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + std::array commandBufferInfo = { cmdbuf }; + core::smart_refctd_ptr imgFillSemaphore = m_device->createSemaphore(0); + imgFillSemaphore->setObjectDebugName("Image Fill Semaphore"); + + auto converter = CAssetConverter::create({ .device = m_device.get() }); + // We don't want to generate mip-maps for these images, to ensure that we must override the default callbacks. + struct SInputs final : CAssetConverter::SInputs + { + // we also need to override this to have concurrent sharing + inline std::span getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUImage* buffer, const CAssetConverter::patch_t& patch) const override + { + if (familyIndices.size() > 1) + return familyIndices; + return {}; + } + + inline uint8_t getMipLevelCount(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override + { + return image->getCreationParameters().mipLevels; + } + inline uint16_t needToRecomputeMips(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override + { + return 0b0u; + } + + std::vector familyIndices; + } inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); + { + const core::set uniqueFamilyIndices = { queue->getFamilyIndex(), queue->getFamilyIndex() }; + inputs.familyIndices = { uniqueFamilyIndices.begin(),uniqueFamilyIndices.end() }; + } + // scratch command buffers for asset converter transfer commands + SIntendedSubmitInfo transfer = { + .queue = queue, + .waitSemaphores = {}, + .prevCommandBuffers = {}, + .scratchCommandBuffers = commandBufferInfo, + .scratchSemaphore = { + .semaphore = imgFillSemaphore.get(), + .value = 0, + // because of layout transitions + .stageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS + } + }; + // as per the `SIntendedSubmitInfo` one commandbuffer must be begun + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + // Normally we'd have to inherit and override the `getFinalOwnerQueueFamily` callback to ensure that the + // compute queue becomes the owner of the buffers and images post-transfer, but in this example we use concurrent sharing + CAssetConverter::SConvertParams params = {}; + params.transfer = &transfer; + params.utilities = m_utils.get(); + + std::get>(inputs.assets) = cpuImgs; + // assert that we don't need to provide patches + assert(cpuImgs[0]->getImageUsageFlags().hasFlags(ICPUImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT)); + auto reservation = converter->reserve(inputs); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuImgs = reservation.getGPUObjects(); + for (auto& gpuImg : gpuImgs) { + if (!gpuImg) { + m_logger->log("Failed to convert %s into an IGPUImage handle", ILogger::ELL_ERROR, DefaultImagePathsFile); + std::exit(-1); + } + } + + // and launch the conversions + m_api->startCapture(); + auto result = reservation.convert(params); + m_api->endCapture(); + if (!result.blocking() && result.copy() != IQueue::RESULT::SUCCESS) { + m_logger->log("Failed to record or submit conversions", ILogger::ELL_ERROR); + std::exit(-1); + } + + envMap = gpuImgs[0].value; + scrambleMap = gpuImgs[1].value; + }; + + smart_refctd_ptr envMapCPU, scrambleMapCPU; + { + IAssetLoader::SAssetLoadParams lp; + lp.workingDirectory = this->sharedInputCWD; + SAssetBundle bundle = m_assetMgr->getAsset(DefaultImagePathsFile, lp); + if (bundle.getContents().empty()) { + m_logger->log("Couldn't load an asset.", ILogger::ELL_ERROR); + std::exit(-1); + } + + envMapCPU = IAsset::castDown(bundle.getContents()[0]); + if (!envMapCPU) { + m_logger->log("Couldn't load an asset.", ILogger::ELL_ERROR); + std::exit(-1); + } + }; + { + asset::ICPUImage::SCreationParams info; + info.format = asset::E_FORMAT::EF_R32G32_UINT; + info.type = asset::ICPUImage::ET_2D; + auto extent = envMapCPU->getCreationParameters().extent; + info.extent.width = extent.width; + info.extent.height = extent.height; + info.extent.depth = 1u; + info.mipLevels = 1u; + info.arrayLayers = 1u; + info.samples = asset::ICPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT; + info.flags = static_cast(0u); + info.usage = asset::IImage::EUF_TRANSFER_SRC_BIT | asset::IImage::EUF_SAMPLED_BIT; + + scrambleMapCPU = ICPUImage::create(std::move(info)); + const uint32_t texelFormatByteSize = getTexelOrBlockBytesize(scrambleMapCPU->getCreationParameters().format); + const uint32_t texelBufferSize = scrambleMapCPU->getImageDataSizeInBytes(); + auto texelBuffer = ICPUBuffer::create({ texelBufferSize }); + + core::RandomSampler rng(0xbadc0ffeu); + auto out = reinterpret_cast(texelBuffer->getPointer()); + for (auto index = 0u; index < texelBufferSize / 4; index++) { + out[index] = rng.nextSample(); + } + + auto regions = core::make_refctd_dynamic_array>(1u); + ICPUImage::SBufferCopy& region = regions->front(); + region.imageSubresource.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; + region.imageSubresource.mipLevel = 0u; + region.imageSubresource.baseArrayLayer = 0u; + region.imageSubresource.layerCount = 1u; + region.bufferOffset = 0u; + region.bufferRowLength = IImageAssetHandlerBase::calcPitchInBlocks(extent.width, texelFormatByteSize); + region.bufferImageHeight = 0u; + region.imageOffset = { 0u, 0u, 0u }; + region.imageExtent = scrambleMapCPU->getCreationParameters().extent; + + scrambleMapCPU->setBufferAndRegions(std::move(texelBuffer), regions); + } + + std::array cpuImgs = { envMapCPU.get(), scrambleMapCPU.get()}; + convertImgCPU2GPU(cpuImgs); + } + + // create views for textures + { + auto createHDRIImage = [this](const asset::E_FORMAT colorFormat, const uint32_t width, const uint32_t height) -> smart_refctd_ptr { + IGPUImage::SCreationParams imgInfo; + imgInfo.format = colorFormat; + imgInfo.type = IGPUImage::ET_2D; + imgInfo.extent.width = width; + imgInfo.extent.height = height; + imgInfo.extent.depth = 1u; + imgInfo.mipLevels = 1u; + imgInfo.arrayLayers = 1u; + imgInfo.samples = IGPUImage::ESCF_1_BIT; + imgInfo.flags = static_cast(0u); + imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT | asset::IImage::EUF_SAMPLED_BIT; + + auto image = m_device->createImage(std::move(imgInfo)); + auto imageMemReqs = image->getMemoryReqs(); + imageMemReqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + m_device->allocate(imageMemReqs, image.get()); + + return image; + }; + auto createHDRIImageView = [this](smart_refctd_ptr img) -> smart_refctd_ptr + { + auto format = img->getCreationParameters().format; + IGPUImageView::SCreationParams imgViewInfo; + imgViewInfo.image = std::move(img); + imgViewInfo.format = format; + imgViewInfo.viewType = IGPUImageView::ET_2D; + imgViewInfo.flags = static_cast(0u); + imgViewInfo.subresourceRange.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; + imgViewInfo.subresourceRange.baseArrayLayer = 0u; + imgViewInfo.subresourceRange.baseMipLevel = 0u; + imgViewInfo.subresourceRange.layerCount = 1u; + imgViewInfo.subresourceRange.levelCount = 1u; + + return m_device->createImageView(std::move(imgViewInfo)); + }; + + auto params = envMap->getCreationParameters(); + auto extent = params.extent; + envMap->setObjectDebugName("Env Map"); + m_envMapView = createHDRIImageView(envMap); + m_envMapView->setObjectDebugName("Env Map View"); + scrambleMap->setObjectDebugName("Scramble Map"); + m_scrambleView = createHDRIImageView(scrambleMap); + m_scrambleView->setObjectDebugName("Scramble Map View"); + auto outImg = createHDRIImage(asset::E_FORMAT::EF_R16G16B16A16_SFLOAT, WindowDimensions.x, WindowDimensions.y); + outImg->setObjectDebugName("Output Image"); + m_outImgView = createHDRIImageView(outImg); + m_outImgView->setObjectDebugName("Output Image View"); + } + + // create sequence buffer view + { + // TODO: do this better use asset manager to get the ICPUBuffer from `.bin` + auto createBufferFromCacheFile = [this]( + system::path filename, + size_t bufferSize, + void *data, + smart_refctd_ptr& buffer + ) -> std::pair, bool> + { + ISystem::future_t> owenSamplerFileFuture; + ISystem::future_t owenSamplerFileReadFuture; + size_t owenSamplerFileBytesRead; + + m_system->createFile(owenSamplerFileFuture, localOutputCWD / filename, IFile::ECF_READ); + smart_refctd_ptr owenSamplerFile; + + if (owenSamplerFileFuture.wait()) + { + owenSamplerFileFuture.acquire().move_into(owenSamplerFile); + if (!owenSamplerFile) + return { nullptr, false }; + + owenSamplerFile->read(owenSamplerFileReadFuture, data, 0, bufferSize); + if (owenSamplerFileReadFuture.wait()) + { + owenSamplerFileReadFuture.acquire().move_into(owenSamplerFileBytesRead); + + if (owenSamplerFileBytesRead < bufferSize) + { + buffer = asset::ICPUBuffer::create({ sizeof(uint32_t) * bufferSize }); + return { owenSamplerFile, false }; + } + + buffer = asset::ICPUBuffer::create({ { sizeof(uint32_t) * bufferSize }, data }); + } + } + + return { owenSamplerFile, true }; + }; + auto writeBufferIntoCacheFile = [this](smart_refctd_ptr file, size_t bufferSize, void* data) + { + ISystem::future_t owenSamplerFileWriteFuture; + size_t owenSamplerFileBytesWritten; + + file->write(owenSamplerFileWriteFuture, data, 0, bufferSize); + if (owenSamplerFileWriteFuture.wait()) + owenSamplerFileWriteFuture.acquire().move_into(owenSamplerFileBytesWritten); + }; + + constexpr size_t bufferSize = MaxBufferDimensions * MaxBufferSamples; + std::array data = {}; + smart_refctd_ptr sampleSeq; + + auto cacheBufferResult = createBufferFromCacheFile(sharedOutputCWD/OwenSamplerFilePath, bufferSize, data.data(), sampleSeq); + if (!cacheBufferResult.second) + { + core::OwenSampler sampler(MaxBufferDimensions, 0xdeadbeefu); + + ICPUBuffer::SCreationParams params = {}; + params.size = MaxBufferDimensions*MaxBufferSamples*sizeof(uint32_t); + sampleSeq = ICPUBuffer::create(std::move(params)); + + auto out = reinterpret_cast(sampleSeq->getPointer()); + for (auto dim = 0u; dim < MaxBufferDimensions; dim++) + for (uint32_t i = 0; i < MaxBufferSamples; i++) + { + out[i * MaxBufferDimensions + dim] = sampler.sample(dim, i); + } + if (cacheBufferResult.first) + writeBufferIntoCacheFile(cacheBufferResult.first, bufferSize, out); + } + + IGPUBuffer::SCreationParams params = {}; + params.usage = asset::IBuffer::EUF_TRANSFER_DST_BIT | asset::IBuffer::EUF_UNIFORM_TEXEL_BUFFER_BIT; + params.size = sampleSeq->getSize(); + + // we don't want to overcomplicate the example with multi-queue + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[0].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + IQueue::SSubmitInfo::SCommandBufferInfo cmdbufInfo = { cmdbuf }; + m_intendedSubmit.scratchCommandBuffers = { &cmdbufInfo, 1 }; + + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + m_api->startCapture(); + auto bufferFuture = m_utils->createFilledDeviceLocalBufferOnDedMem( + m_intendedSubmit, + std::move(params), + sampleSeq->getPointer() + ); + m_api->endCapture(); + bufferFuture.wait(); + auto buffer = bufferFuture.get(); + + m_sequenceBufferView = m_device->createBufferView({ 0u, buffer->get()->getSize(), *buffer }, asset::E_FORMAT::EF_R32G32B32_UINT); + m_sequenceBufferView->setObjectDebugName("Sequence Buffer"); + } + + // Update Descriptors + { + ISampler::SParams samplerParams0 = { + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::ETBC_FLOAT_OPAQUE_BLACK, + ISampler::ETF_LINEAR, + ISampler::ETF_LINEAR, + ISampler::ESMM_LINEAR, + 0u, + false, + ECO_ALWAYS + }; + auto sampler0 = m_device->createSampler(samplerParams0); + ISampler::SParams samplerParams1 = { + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::ETBC_INT_OPAQUE_BLACK, + ISampler::ETF_NEAREST, + ISampler::ETF_NEAREST, + ISampler::ESMM_NEAREST, + 0u, + false, + ECO_ALWAYS + }; + auto sampler1 = m_device->createSampler(samplerParams1); + + std::array writeDSInfos = {}; + writeDSInfos[0].desc = m_outImgView; + writeDSInfos[0].info.image.imageLayout = IImage::LAYOUT::GENERAL; + writeDSInfos[1].desc = m_envMapView; + // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_FLOAT_OPAQUE_BLACK, ISampler::ETF_LINEAR, ISampler::ETF_LINEAR, ISampler::ESMM_LINEAR, 0u, false, ECO_ALWAYS }; + writeDSInfos[1].info.combinedImageSampler.sampler = sampler0; + writeDSInfos[1].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; + writeDSInfos[2].desc = m_sequenceBufferView; + writeDSInfos[3].desc = m_scrambleView; + // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_INT_OPAQUE_BLACK, ISampler::ETF_NEAREST, ISampler::ETF_NEAREST, ISampler::ESMM_NEAREST, 0u, false, ECO_ALWAYS }; + writeDSInfos[3].info.combinedImageSampler.sampler = sampler1; + writeDSInfos[3].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; + writeDSInfos[4].desc = m_outImgView; + writeDSInfos[4].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + std::array writeDescriptorSets = {}; + writeDescriptorSets[0] = { + .dstSet = m_descriptorSet0.get(), + .binding = 0, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[0] + }; + writeDescriptorSets[1] = { + .dstSet = m_descriptorSet2.get(), + .binding = 0, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[1] + }; + writeDescriptorSets[2] = { + .dstSet = m_descriptorSet2.get(), + .binding = 1, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[2] + }; + writeDescriptorSets[3] = { + .dstSet = m_descriptorSet2.get(), + .binding = 2, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[3] + }; + writeDescriptorSets[4] = { + .dstSet = m_presentDescriptorSet.get(), + .binding = 0, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[4] + }; + + m_device->updateDescriptorSets(writeDescriptorSets, {}); + } + + // Create ui descriptors + { + using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS; + { + IGPUSampler::SParams params; + params.AnisotropicFilter = 1u; + params.TextureWrapU = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; + params.TextureWrapV = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; + params.TextureWrapW = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; + + m_ui.samplers.gui = m_device->createSampler(params); + m_ui.samplers.gui->setObjectDebugName("Nabla IMGUI UI Sampler"); + } + + std::array, 69u> immutableSamplers; + for (auto& it : immutableSamplers) + it = smart_refctd_ptr(m_ui.samplers.scene); + + immutableSamplers[nbl::ext::imgui::UI::FontAtlasTexId] = smart_refctd_ptr(m_ui.samplers.gui); + + nbl::ext::imgui::UI::SCreationParameters params; + + params.resources.texturesInfo = { .setIx = 0u, .bindingIx = 0u }; + params.resources.samplersInfo = { .setIx = 0u, .bindingIx = 1u }; + params.assetManager = m_assetMgr; + params.pipelineCache = nullptr; + params.pipelineLayout = nbl::ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, MaxUITextureCount); + params.renderpass = smart_refctd_ptr(renderpass); + params.streamingBuffer = nullptr; + params.subpassIx = 0u; + params.transfer = getTransferUpQueue(); + params.utilities = m_utils; + { + m_ui.manager = ext::imgui::UI::create(std::move(params)); + + // note that we use default layout provided by our extension, but you are free to create your own by filling nbl::ext::imgui::UI::S_CREATION_PARAMETERS::resources + const auto* descriptorSetLayout = m_ui.manager->getPipeline()->getLayout()->getDescriptorSetLayout(0u); + const auto& params = m_ui.manager->getCreationParameters(); + + IDescriptorPool::SCreateInfo descriptorPoolInfo = {}; + descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLER)] = (uint32_t)nbl::ext::imgui::UI::DefaultSamplerIx::COUNT; + descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE)] = MaxUITextureCount; + descriptorPoolInfo.maxSets = 1u; + descriptorPoolInfo.flags = IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT; + + m_guiDescriptorSetPool = m_device->createDescriptorPool(std::move(descriptorPoolInfo)); + assert(m_guiDescriptorSetPool); + + m_guiDescriptorSetPool->createDescriptorSets(1u, &descriptorSetLayout, &m_ui.descriptorSet); + assert(m_ui.descriptorSet); + } + } + m_ui.manager->registerListener( + [this]() -> void { + ImGuiIO& io = ImGui::GetIO(); + + m_camera.setProjectionMatrix([&]() + { + static matrix4SIMD projection; + + projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); + + return projection; + }()); + + ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing); + + // create a window and insert the inspector + ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing); + ImGui::Begin("Controls"); + + ImGui::SameLine(); + + ImGui::Text("Camera"); + + ImGui::SliderFloat("Move speed", &moveSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Rotate speed", &rotateSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Fov", &fov, 20.f, 150.f); + ImGui::SliderFloat("zNear", &zNear, 0.1f, 100.f); + ImGui::SliderFloat("zFar", &zFar, 110.f, 10000.f); + ImGui::Combo("Shader", &PTPipeline, shaderNames, E_LIGHT_GEOMETRY::ELG_COUNT); + ImGui::Combo("Render Mode", &renderMode, shaderTypes, E_RENDER_MODE::ERM_COUNT); + ImGui::SliderInt("SPP", &spp, 1, MaxBufferSamples); + ImGui::SliderInt("Depth", &depth, 1, MaxBufferDimensions / 3); + ImGui::Checkbox("Persistent WorkGroups", &usePersistentWorkGroups); + + ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); + + ImGui::End(); + } + ); + + // Set Camera + { + core::vectorSIMDf cameraPosition(0, 5, -10); + matrix4SIMD proj = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH( + core::radians(60.0f), + WindowDimensions.x / WindowDimensions.y, + 0.01f, + 500.0f + ); + m_camera = Camera(cameraPosition, core::vectorSIMDf(0, 0, 0), proj); + } + + m_winMgr->setWindowSize(m_window.get(), WindowDimensions.x, WindowDimensions.y); + m_surface->recreateSwapchain(); + m_winMgr->show(m_window.get()); + m_oracle.reportBeginFrameRecord(); + m_camera.mapKeysToWASD(); + + return true; + } + + bool updateGUIDescriptorSet() + { + // texture atlas, note we don't create info & write pair for the font sampler because UI extension's is immutable and baked into DS layout + static std::array descriptorInfo; + static IGPUDescriptorSet::SWriteDescriptorSet writes[MaxUITextureCount]; + + descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].desc = smart_refctd_ptr(m_ui.manager->getFontAtlasView()); + + for (uint32_t i = 0; i < descriptorInfo.size(); ++i) + { + writes[i].dstSet = m_ui.descriptorSet.get(); + writes[i].binding = 0u; + writes[i].arrayElement = i; + writes[i].count = 1u; + } + writes[nbl::ext::imgui::UI::FontAtlasTexId].info = descriptorInfo.data() + nbl::ext::imgui::UI::FontAtlasTexId; + + return m_device->updateDescriptorSets(writes, {}); + } + + inline void workLoopBody() override + { + // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. + const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); + // We block for semaphores for 2 reasons here: + // A) Resource: Can't use resource like a command buffer BEFORE previous use is finished! [MaxFramesInFlight] + // B) Acquire: Can't have more acquires in flight than a certain threshold returned by swapchain or your surface helper class. [MaxAcquiresInFlight] + if (m_realFrameIx >= framesInFlight) + { + const ISemaphore::SWaitInfo cbDonePending[] = + { + { + .semaphore = m_semaphore.get(), + .value = m_realFrameIx + 1 - framesInFlight + } + }; + if (m_device->blockForSemaphores(cbDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) + return; + } + const auto resourceIx = m_realFrameIx % MaxFramesInFlight; + + m_api->startCapture(); + + // CPU events + update(); + + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[resourceIx].get(); + + if (!keepRunning()) + return; + + // render whole scene to offline frame buffer & submit + { + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + // disregard surface/swapchain transformation for now + const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); + PTPushConstant pc; + viewProjectionMatrix.getInverseTransform(pc.invMVP); + pc.sampleCount = spp; + pc.depth = depth; + + // safe to proceed + // upload buffer data + cmdbuf->beginDebugMarker("ComputeShaderPathtracer IMGUI Frame"); + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + // TRANSITION m_outImgView to GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) + { + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = m_outImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + } + + // cube envmap handle + { + IGPUComputePipeline* pipeline; + if (usePersistentWorkGroups) + pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_PTHLSLPersistentWGPipelines[PTPipeline].get() : m_PTGLSLPersistentWGPipelines[PTPipeline].get(); + else + pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_PTHLSLPipelines[PTPipeline].get() : m_PTGLSLPipelines[PTPipeline].get(); + cmdbuf->bindComputePipeline(pipeline); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 2u, 1u, &m_descriptorSet2.get()); + cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(PTPushConstant), &pc); + if (usePersistentWorkGroups) + { + uint32_t dispatchSize = m_physicalDevice->getLimits().computeOptimalPersistentWorkgroupDispatchSize(WindowDimensions.x * WindowDimensions.y, DefaultWorkGroupSize); + cmdbuf->dispatch(dispatchSize, 1u, 1u); + } + else + cmdbuf->dispatch(1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize, 1u, 1u); + } + + // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) + { + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = m_outImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + } + + // TODO: tone mapping and stuff + } + + asset::SViewport viewport; + { + viewport.minDepth = 1.f; + viewport.maxDepth = 0.f; + viewport.x = 0u; + viewport.y = 0u; + viewport.width = WindowDimensions.x; + viewport.height = WindowDimensions.y; + } + cmdbuf->setViewport(0u, 1u, &viewport); + + + VkRect2D defaultScisors[] = { {.offset = {(int32_t)viewport.x, (int32_t)viewport.y}, .extent = {(uint32_t)viewport.width, (uint32_t)viewport.height}} }; + cmdbuf->setScissor(defaultScisors); + + const VkRect2D currentRenderArea = + { + .offset = {0,0}, + .extent = {m_window->getWidth(),m_window->getHeight()} + }; + auto scRes = static_cast(m_surface->getSwapchainResources()); + + // Upload m_outImg to swapchain + UI + { + const IGPUCommandBuffer::SRenderpassBeginInfo info = + { + .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), + .colorClearValues = &clearColor, + .depthStencilClearValues = nullptr, + .renderArea = currentRenderArea + }; + nbl::video::ISemaphore::SWaitInfo waitInfo = { .semaphore = m_semaphore.get(), .value = m_realFrameIx + 1u }; + + cmdbuf->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + + cmdbuf->bindGraphicsPipeline(m_presentPipeline.get()); + cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, m_presentPipeline->getLayout(), 0, 1u, &m_presentDescriptorSet.get()); + ext::FullScreenTriangle::recordDrawCall(cmdbuf); + + const auto uiParams = m_ui.manager->getCreationParameters(); + auto* uiPipeline = m_ui.manager->getPipeline(); + cmdbuf->bindGraphicsPipeline(uiPipeline); + cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, uiPipeline->getLayout(), uiParams.resources.texturesInfo.setIx, 1u, &m_ui.descriptorSet.get()); + m_ui.manager->render(cmdbuf, waitInfo); + + cmdbuf->endRenderPass(); + } + + cmdbuf->end(); + { + const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = + { + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT + } + }; + { + { + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cmdbuf } + }; + + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = + { + { + .semaphore = m_currentImageAcquire.semaphore, + .value = m_currentImageAcquire.acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } + }; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = rendered + } + }; + + updateGUIDescriptorSet(); + + if (queue->submit(infos) != IQueue::RESULT::SUCCESS) + m_realFrameIx--; + } + } + + m_window->setCaption("[Nabla Engine] HLSL Compute Path Tracer"); + m_surface->present(m_currentImageAcquire.imageIndex, rendered); + } + m_api->endCapture(); + } + + inline bool keepRunning() override + { + if (m_surface->irrecoverable()) + return false; + + return true; + } + + inline bool onAppTerminated() override + { + return device_base_t::onAppTerminated(); + } + + inline void update() + { + m_camera.setMoveSpeed(moveSpeed); + m_camera.setRotateSpeed(rotateSpeed); + + static std::chrono::microseconds previousEventTimestamp{}; + + m_inputSystem->getDefaultMouse(&mouse); + m_inputSystem->getDefaultKeyboard(&keyboard); + + auto updatePresentationTimestamp = [&]() + { + m_currentImageAcquire = m_surface->acquireNextImage(); + + m_oracle.reportEndFrameRecord(); + const auto timestamp = m_oracle.getNextPresentationTimeStamp(); + m_oracle.reportBeginFrameRecord(); + + return timestamp; + }; + + const auto nextPresentationTimestamp = updatePresentationTimestamp(); + + struct + { + std::vector mouse{}; + std::vector keyboard{}; + } capturedEvents; + + m_camera.beginInputProcessing(nextPresentationTimestamp); + { + const auto& io = ImGui::GetIO(); + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void + { + if (!io.WantCaptureMouse) + m_camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl + + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + + previousEventTimestamp = e.timeStamp; + capturedEvents.mouse.emplace_back(e); + + if (e.type == nbl::ui::SMouseEvent::EET_SCROLL) + gcIndex = std::clamp(int16_t(gcIndex) + int16_t(core::sign(e.scrollEvent.verticalScroll)), int64_t(0), int64_t(ELG_COUNT - (uint8_t)1u)); + } + }, m_logger.get()); + + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void + { + if (!io.WantCaptureKeyboard) + m_camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl + + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + + previousEventTimestamp = e.timeStamp; + capturedEvents.keyboard.emplace_back(e); + } + }, m_logger.get()); + } + m_camera.endInputProcessing(nextPresentationTimestamp); + + const core::SRange mouseEvents(capturedEvents.mouse.data(), capturedEvents.mouse.data() + capturedEvents.mouse.size()); + const core::SRange keyboardEvents(capturedEvents.keyboard.data(), capturedEvents.keyboard.data() + capturedEvents.keyboard.size()); + const auto cursorPosition = m_window->getCursorControl()->getPosition(); + const auto mousePosition = float32_t2(cursorPosition.x, cursorPosition.y) - float32_t2(m_window->getX(), m_window->getY()); + + const ext::imgui::UI::SUpdateParameters params = + { + .mousePosition = mousePosition, + .displaySize = { m_window->getWidth(), m_window->getHeight() }, + .mouseEvents = mouseEvents, + .keyboardEvents = keyboardEvents + }; + + m_ui.manager->update(params); + } + + private: + smart_refctd_ptr m_window; + smart_refctd_ptr> m_surface; + + // gpu resources + smart_refctd_ptr m_cmdPool; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTGLSLPipelines; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPipelines; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTGLSLPersistentWGPipelines; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPersistentWGPipelines; + smart_refctd_ptr m_presentPipeline; + uint64_t m_realFrameIx = 0; + std::array, MaxFramesInFlight> m_cmdBufs; + ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; + smart_refctd_ptr m_descriptorSet0, m_descriptorSet2, m_presentDescriptorSet; + + core::smart_refctd_ptr m_guiDescriptorSetPool; + + // system resources + core::smart_refctd_ptr m_inputSystem; + InputSystem::ChannelReader mouse; + InputSystem::ChannelReader keyboard; + + // pathtracer resources + smart_refctd_ptr m_envMapView, m_scrambleView; + smart_refctd_ptr m_sequenceBufferView; + smart_refctd_ptr m_outImgView; + + // sync + smart_refctd_ptr m_semaphore; + + // image upload resources + smart_refctd_ptr m_scratchSemaphore; + SIntendedSubmitInfo m_intendedSubmit; + + struct C_UI + { + nbl::core::smart_refctd_ptr manager; + + struct + { + core::smart_refctd_ptr gui, scene; + } samplers; + + core::smart_refctd_ptr descriptorSet; + } m_ui; + + Camera m_camera; + + video::CDumbPresentationOracle m_oracle; + + uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed + + float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; + float viewWidth = 10.f; + float camYAngle = 165.f / 180.f * 3.14159f; + float camXAngle = 32.f / 180.f * 3.14159f; + int PTPipeline = E_LIGHT_GEOMETRY::ELG_SPHERE; + int renderMode = E_RENDER_MODE::ERM_HLSL; + int spp = 32; + int depth = 3; + bool usePersistentWorkGroups = false; + + bool m_firstFrame = true; + IGPUCommandBuffer::SClearColorValue clearColor = { .float32 = {0.f,0.f,0.f,1.f} }; +}; + +NBL_MAIN_FUNC(HLSLComputePathtracer) diff --git a/31_HLSLPathTracer/pipeline.groovy b/31_HLSLPathTracer/pipeline.groovy new file mode 100644 index 000000000..955e77cec --- /dev/null +++ b/31_HLSLPathTracer/pipeline.groovy @@ -0,0 +1,50 @@ +import org.DevshGraphicsProgramming.Agent +import org.DevshGraphicsProgramming.BuilderInfo +import org.DevshGraphicsProgramming.IBuilder + +class CHLSLPathTracerBuilder extends IBuilder +{ + public CHLSLPathTracerBuilder(Agent _agent, _info) + { + super(_agent, _info) + } + + @Override + public boolean prepare(Map axisMapping) + { + return true + } + + @Override + public boolean build(Map axisMapping) + { + IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") + IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") + + def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) + def nameOfConfig = getNameOfConfig(config) + + agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") + + return true + } + + @Override + public boolean test(Map axisMapping) + { + return true + } + + @Override + public boolean install(Map axisMapping) + { + return true + } +} + +def create(Agent _agent, _info) +{ + return new CHLSLPathTracerBuilder(_agent, _info) +} + +return this diff --git a/CMakeLists.txt b/CMakeLists.txt index f8ce94f93..9179ba584 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -66,6 +66,8 @@ if(NBL_BUILD_EXAMPLES) # Showcase compute pathtracing add_subdirectory(30_ComputeShaderPathTracer) + add_subdirectory(31_HLSLPathTracer EXCLUDE_FROM_ALL) + add_subdirectory(38_EXRSplit) # if (NBL_BUILD_MITSUBA_LOADER AND NBL_BUILD_OPTIX) # add_subdirectory(39_DenoiserTonemapper)