Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions sycl/source/detail/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,34 @@ const std::array<std::pair<std::string, backend>, 8> &getSyclBeMap() {
{"*", backend::all}}};
return SyclBeMap;
}
namespace {

unsigned int parseLevel(const char *ValStr) {
unsigned int intVal = 0;

if (ValStr) {
try {
intVal = std::stoul(ValStr);
} catch (...) {
// If the value is not null and not a number, it is considered
// to enable disk cache tracing. This is the legacy behavior.
intVal = 1;
}
}

// Legacy behavior.
if (intVal > 7)
intVal = 1;

return intVal;
}

} // namespace

void SYCLConfigTrace::reset() { Level = parseLevel(BaseT::getRawValue()); }

unsigned int SYCLConfigTrace::Level =
parseLevel(SYCLConfigTrace::BaseT::getRawValue());

} // namespace detail
} // namespace _V1
Expand Down
47 changes: 7 additions & 40 deletions sycl/source/detail/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -709,52 +709,19 @@ template <> class SYCLConfig<SYCL_JIT_AMDGCN_PTX_TARGET_FEATURES> {
// tracing of the corresponding caches. If the input value is not null and
// not a valid number, the disk cache tracing will be enabled (depreciated
// behavior). The default value is 0 and no tracing is enabled.
template <> class SYCLConfig<SYCL_CACHE_TRACE> {
class SYCLConfigTrace {
using BaseT = SYCLConfigBase<SYCL_CACHE_TRACE>;
enum TraceBitmask { DiskCache = 1, InMemCache = 2, KernelCompiler = 4 };

public:
static unsigned int get() { return getCachedValue(); }
static void reset() { (void)getCachedValue(true); }
static bool isTraceDiskCache() {
return getCachedValue() & TraceBitmask::DiskCache;
}
static bool isTraceInMemCache() {
return getCachedValue() & TraceBitmask::InMemCache;
}
static bool isTraceKernelCompiler() {
return getCachedValue() & TraceBitmask::KernelCompiler;
}
static unsigned int get() { return Level; }
static void reset();
static bool isTraceDiskCache() { return Level & DiskCache; }
static bool isTraceInMemCache() { return Level & InMemCache; }
static bool isTraceKernelCompiler() { return Level & KernelCompiler; }

private:
static unsigned int getCachedValue(bool ResetCache = false) {
const auto Parser = []() {
const char *ValStr = BaseT::getRawValue();
int intVal = 0;

if (ValStr) {
try {
intVal = std::stoi(ValStr);
} catch (...) {
// If the value is not null and not a number, it is considered
// to enable disk cache tracing. This is the legacy behavior.
intVal = 1;
}
}

// Legacy behavior.
if (intVal > 7)
intVal = 1;

return intVal;
};

static unsigned int Level = Parser();
if (ResetCache)
Level = Parser();

return Level;
}
static unsigned int Level;
};

// SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD accepts an integer that specifies
Expand Down
16 changes: 0 additions & 16 deletions sycl/source/detail/context_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,22 +263,6 @@ context_impl::get_backend_info<info::device::backend_version>() const {
}
#endif

ur_context_handle_t &context_impl::getHandleRef() { return MContext; }
const ur_context_handle_t &context_impl::getHandleRef() const {
return MContext;
}

KernelProgramCache &context_impl::getKernelProgramCache() const {
return MKernelProgramCache;
}

bool context_impl::hasDevice(const detail::device_impl &Device) const {
for (device_impl *D : MDevices)
if (D == &Device)
return true;
return false;
}

device_impl *
context_impl::findMatchingDeviceImpl(ur_device_handle_t &DeviceUR) const {
for (device_impl *D : MDevices)
Expand Down
15 changes: 11 additions & 4 deletions sycl/source/detail/context_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ class context_impl : public std::enable_shared_from_this<context_impl> {
/// reference will be invalid if context_impl was destroyed.
///
/// \return an instance of raw UR context handle.
ur_context_handle_t &getHandleRef();
ur_context_handle_t &getHandleRef() { return MContext; }

/// Gets the underlying context object (if any) without reference count
/// modification.
Expand All @@ -126,7 +126,7 @@ class context_impl : public std::enable_shared_from_this<context_impl> {
/// reference will be invalid if context_impl was destroyed.
///
/// \return an instance of raw UR context handle.
const ur_context_handle_t &getHandleRef() const;
const ur_context_handle_t &getHandleRef() const { return MContext; }

devices_range getDevices() const { return MDevices; }

Expand All @@ -151,10 +151,17 @@ class context_impl : public std::enable_shared_from_this<context_impl> {
return {MCachedLibPrograms, MCachedLibProgramsMutex};
}

KernelProgramCache &getKernelProgramCache() const;
KernelProgramCache &getKernelProgramCache() const {
return MKernelProgramCache;
}

/// Returns true if and only if context contains the given device.
bool hasDevice(const detail::device_impl &Device) const;
bool hasDevice(const detail::device_impl &Device) const {
for (device_impl *D : MDevices)
if (D == &Device)
return true;
return false;
}

/// Returns true if and only if the device can be used within this context.
/// For OpenCL this is currently equivalent to hasDevice, for other backends
Expand Down
21 changes: 0 additions & 21 deletions sycl/source/detail/device_kernel_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,27 +74,6 @@ void DeviceKernelInfo::setCompileTimeInfoIfNeeded(
assert(Info == *this);
}

FastKernelSubcacheT &DeviceKernelInfo::getKernelSubcache() {
assertInitialized();
return MFastKernelSubcache;
}
bool DeviceKernelInfo::usesAssert() {
assertInitialized();
return MUsesAssert;
}
const std::optional<int> &DeviceKernelInfo::getImplicitLocalArgPos() {
assertInitialized();
return MImplicitLocalArgPos;
}

bool DeviceKernelInfo::isCompileTimeInfoSet() const { return KernelSize != 0; }

void DeviceKernelInfo::assertInitialized() {
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
assert(MInitialized.load() && "Data needs to be initialized before use");
#endif
}

} // namespace detail
} // namespace _V1
} // namespace sycl
25 changes: 20 additions & 5 deletions sycl/source/detail/device_kernel_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,28 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy {
#endif
void setCompileTimeInfoIfNeeded(const CompileTimeKernelInfoTy &Info);

FastKernelSubcacheT &getKernelSubcache();
bool usesAssert();
const std::optional<int> &getImplicitLocalArgPos();
FastKernelSubcacheT &getKernelSubcache() {
assertInitialized();
return MFastKernelSubcache;
}

bool usesAssert() const {
assertInitialized();
return MUsesAssert;
}

std::optional<int> getImplicitLocalArgPos() const {
assertInitialized();
return MImplicitLocalArgPos;
}

private:
void assertInitialized();
bool isCompileTimeInfoSet() const;
void assertInitialized() const {
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
assert(MInitialized.load() && "Data needs to be initialized before use");
#endif
}
bool isCompileTimeInfoSet() const { return KernelSize != 0; }

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
std::atomic<bool> MInitialized = false;
Expand Down
55 changes: 21 additions & 34 deletions sycl/source/detail/global_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,20 +105,7 @@ void GlobalHandler::TraceEventXPTI(const char *Message) {
#endif
}

GlobalHandler *&GlobalHandler::getInstancePtr() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what if we keep this method in cpp file and move all others to the hpp? What will be the impact on performance?
My personal opinion is that code with a static variable declared inside the method looks clearer and isolated.

Copy link
Contributor Author

@lslusarczyk lslusarczyk Nov 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what if we keep this method in cpp file and move all others to the hpp? What will be the impact on performance?

I see getInstancePtr is used only in .cpp. Should be also fast. But this function is not needed any more. I removed it in favor of just using variable.

My personal opinion is that code with a static variable declared inside the method looks clearer and isolated.

You are right. However on critical path we should avoid this pattern, as it is slower than global variable outside function. See this SO answer. I worked in another project where we indeed observed how much static variable inside a method pattern can deteriorate performance.

static GlobalHandler *RTGlobalObjHandler = new GlobalHandler();
return RTGlobalObjHandler;
}

GlobalHandler &GlobalHandler::instance() {
GlobalHandler *RTGlobalObjHandler = GlobalHandler::getInstancePtr();
assert(RTGlobalObjHandler && "Handler must not be deallocated earlier");
return *RTGlobalObjHandler;
}

bool GlobalHandler::isInstanceAlive() {
return GlobalHandler::getInstancePtr();
}
GlobalHandler *GlobalHandler::RTGlobalObjHandler = new GlobalHandler();

template <typename T, typename... Types>
T &GlobalHandler::getOrCreate(InstWithLock<T> &IWL, Types &&...Args) {
Expand Down Expand Up @@ -331,8 +318,7 @@ void GlobalHandler::drainThreadPool() {
// 2) when process is being terminated
void shutdown_early(bool CanJoinThreads = true) {
const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector};
GlobalHandler *&Handler = GlobalHandler::getInstancePtr();
if (!Handler)
if (!GlobalHandler::RTGlobalObjHandler)
return;

#if defined(XPTI_ENABLE_INSTRUMENTATION) && defined(_WIN32)
Expand All @@ -342,26 +328,26 @@ void shutdown_early(bool CanJoinThreads = true) {
#endif

// Now that we are shutting down, we will no longer defer MemObj releases.
Handler->endDeferredRelease();
GlobalHandler::RTGlobalObjHandler->endDeferredRelease();

// Ensure neither host task is working so that no default context is accessed
// upon its release
Handler->prepareSchedulerToRelease(true);
GlobalHandler::RTGlobalObjHandler->prepareSchedulerToRelease(true);

if (Handler->MHostTaskThreadPool.Inst) {
Handler->MHostTaskThreadPool.Inst->finishAndWait(CanJoinThreads);
Handler->MHostTaskThreadPool.Inst.reset(nullptr);
if (GlobalHandler::RTGlobalObjHandler->MHostTaskThreadPool.Inst) {
GlobalHandler::RTGlobalObjHandler->MHostTaskThreadPool.Inst->finishAndWait(
CanJoinThreads);
GlobalHandler::RTGlobalObjHandler->MHostTaskThreadPool.Inst.reset(nullptr);
}

// This releases OUR reference to the default context, but
// other may yet have refs
Handler->releaseDefaultContexts();
GlobalHandler::RTGlobalObjHandler->releaseDefaultContexts();
}

void shutdown_late() {
const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector};
GlobalHandler *&Handler = GlobalHandler::getInstancePtr();
if (!Handler)
if (!GlobalHandler::RTGlobalObjHandler)
return;

#if defined(XPTI_ENABLE_INSTRUMENTATION) && defined(_WIN32)
Expand All @@ -371,26 +357,27 @@ void shutdown_late() {
#endif

// First, release resources, that may access adapters.
Handler->MPlatformCache.Inst.reset(nullptr);
Handler->MScheduler.Inst.reset(nullptr);
Handler->MProgramManager.Inst.reset(nullptr);
GlobalHandler::RTGlobalObjHandler->MPlatformCache.Inst.reset(nullptr);
GlobalHandler::RTGlobalObjHandler->MScheduler.Inst.reset(nullptr);
GlobalHandler::RTGlobalObjHandler->MProgramManager.Inst.reset(nullptr);

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
// Kernel cache, which is part of device kernel info,
// stores handles to the adapter, so clear it before releasing adapters.
Handler->MDeviceKernelInfoStorage.Inst.reset(nullptr);
GlobalHandler::RTGlobalObjHandler->MDeviceKernelInfoStorage.Inst.reset(
nullptr);
#endif

// Clear the adapters and reset the instance if it was there.
Handler->unloadAdapters();
if (Handler->MAdapters.Inst)
Handler->MAdapters.Inst.reset(nullptr);
GlobalHandler::RTGlobalObjHandler->unloadAdapters();
if (GlobalHandler::RTGlobalObjHandler->MAdapters.Inst)
GlobalHandler::RTGlobalObjHandler->MAdapters.Inst.reset(nullptr);

Handler->MXPTIRegistry.Inst.reset(nullptr);
GlobalHandler::RTGlobalObjHandler->MXPTIRegistry.Inst.reset(nullptr);

// Release the rest of global resources.
delete Handler;
Handler = nullptr;
delete GlobalHandler::RTGlobalObjHandler;
GlobalHandler::RTGlobalObjHandler = nullptr;
}

#ifdef _WIN32
Expand Down
25 changes: 12 additions & 13 deletions sycl/source/detail/global_handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,11 @@ class DeviceKernelInfo;
/// construction or destruction is generated anyway.
class GlobalHandler {
public:
/// \return a reference to a GlobalHandler singleton instance. Memory for
/// storing objects is allocated on first call. The reference is valid as long
/// as runtime library is loaded (i.e. untill `DllMain` or
static bool isInstanceAlive() { return RTGlobalObjHandler != nullptr; }
/// \return a reference to a GlobalHandler singleton instance. The reference
/// is valid as long as runtime library is loaded (i.e. untill `DllMain` or
/// `__attribute__((destructor))` is called).
static GlobalHandler &instance();

/// \return true if the instance has not been deallocated yet.
static bool isInstanceAlive();
static GlobalHandler &instance() { return *RTGlobalObjHandler; }

GlobalHandler(const GlobalHandler &) = delete;
GlobalHandler(GlobalHandler &&) = delete;
Expand Down Expand Up @@ -96,19 +93,18 @@ class GlobalHandler {
void attachScheduler(Scheduler *Scheduler);

private:
// Constructor and destructor are declared out-of-line to allow incomplete
// types as template arguments to unique_ptr.
GlobalHandler();
~GlobalHandler();

bool OkToDefer = true;

friend void shutdown_early(bool);
friend void shutdown_late();
friend class ObjectUsageCounter;
static GlobalHandler *&getInstancePtr();
static SpinLock MSyclGlobalHandlerProtector;

// Constructor and destructor are declared out-of-line to allow incomplete
// types as template arguments to unique_ptr.
GlobalHandler();
~GlobalHandler();

template <typename T> struct InstWithLock {
std::unique_ptr<T> Inst;
SpinLock Lock;
Expand All @@ -135,7 +131,10 @@ class GlobalHandler {
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
InstWithLock<std::deque<DeviceKernelInfo>> MDeviceKernelInfoStorage;
#endif

static GlobalHandler *RTGlobalObjHandler;
};

} // namespace detail
} // namespace _V1
} // namespace sycl
12 changes: 12 additions & 0 deletions sycl/source/detail/kernel_program_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,18 @@
namespace sycl {
inline namespace _V1 {
namespace detail {

void KernelProgramCache::traceKernelImpl(const char *Msg,
KernelNameStrRefT KernelName,
bool IsFastKernelCache) {
std::string Identifier =
"[IsFastCache: " + std::to_string(IsFastKernelCache) +
"][Key:{Name = " + KernelName.data() + "}]: ";

std::cerr << "[In-Memory Cache][Thread Id:" << std::this_thread::get_id()
<< "][Kernel Cache]" << Identifier << Msg << std::endl;
}

adapter_impl &KernelProgramCache::getAdapter() {
return MParentContext.getAdapter();
}
Expand Down
Loading
Loading