Skip to content

Commit a272143

Browse files
committed
optimize enqueueImpKernel
1 parent 66b3b53 commit a272143

16 files changed

+177
-186
lines changed

sycl/source/detail/config.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,35 @@ const std::array<std::pair<std::string, backend>, 8> &getSyclBeMap() {
180180
{"*", backend::all}}};
181181
return SyclBeMap;
182182
}
183+
namespace {
184+
185+
unsigned int parseLevel(const char *ValStr) {
186+
unsigned int intVal = 0;
187+
188+
if (ValStr) {
189+
try {
190+
intVal = std::stoul(ValStr);
191+
} catch (...) {
192+
// If the value is not null and not a number, it is considered
193+
// to enable disk cache tracing. This is the legacy behavior.
194+
intVal = 1;
195+
}
196+
}
197+
198+
// Legacy behavior.
199+
if (intVal > 7)
200+
intVal = 1;
201+
202+
return intVal;
203+
};
204+
205+
}
206+
207+
void SYCLConfigTrace::reset() {
208+
Level = parseLevel(BaseT::getRawValue());
209+
}
210+
211+
unsigned int SYCLConfigTrace::Level = parseLevel(SYCLConfigTrace::BaseT::getRawValue());
183212

184213
} // namespace detail
185214
} // namespace _V1

sycl/source/detail/config.hpp

Lines changed: 8 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -709,54 +709,28 @@ template <> class SYCLConfig<SYCL_JIT_AMDGCN_PTX_TARGET_FEATURES> {
709709
// tracing of the corresponding caches. If the input value is not null and
710710
// not a valid number, the disk cache tracing will be enabled (depreciated
711711
// behavior). The default value is 0 and no tracing is enabled.
712-
template <> class SYCLConfig<SYCL_CACHE_TRACE> {
712+
class SYCLConfigTrace {
713713
using BaseT = SYCLConfigBase<SYCL_CACHE_TRACE>;
714714
enum TraceBitmask { DiskCache = 1, InMemCache = 2, KernelCompiler = 4 };
715715

716716
public:
717-
static unsigned int get() { return getCachedValue(); }
718-
static void reset() { (void)getCachedValue(true); }
717+
static unsigned int get() { return Level; }
718+
static void reset();
719719
static bool isTraceDiskCache() {
720-
return getCachedValue() & TraceBitmask::DiskCache;
720+
return Level & DiskCache;
721721
}
722722
static bool isTraceInMemCache() {
723-
return getCachedValue() & TraceBitmask::InMemCache;
723+
return Level & InMemCache;
724724
}
725725
static bool isTraceKernelCompiler() {
726-
return getCachedValue() & TraceBitmask::KernelCompiler;
726+
return Level & KernelCompiler;
727727
}
728728

729729
private:
730-
static unsigned int getCachedValue(bool ResetCache = false) {
731-
const auto Parser = []() {
732-
const char *ValStr = BaseT::getRawValue();
733-
int intVal = 0;
734-
735-
if (ValStr) {
736-
try {
737-
intVal = std::stoi(ValStr);
738-
} catch (...) {
739-
// If the value is not null and not a number, it is considered
740-
// to enable disk cache tracing. This is the legacy behavior.
741-
intVal = 1;
742-
}
743-
}
744-
745-
// Legacy behavior.
746-
if (intVal > 7)
747-
intVal = 1;
748-
749-
return intVal;
750-
};
751-
752-
static unsigned int Level = Parser();
753-
if (ResetCache)
754-
Level = Parser();
755-
756-
return Level;
757-
}
730+
static unsigned int Level;
758731
};
759732

733+
760734
// SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD accepts an integer that specifies
761735
// the maximum size of the in-memory Program cache.
762736
// Cache eviction is performed when the cache size exceeds the threshold.

sycl/source/detail/context_impl.cpp

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -263,22 +263,6 @@ context_impl::get_backend_info<info::device::backend_version>() const {
263263
}
264264
#endif
265265

266-
ur_context_handle_t &context_impl::getHandleRef() { return MContext; }
267-
const ur_context_handle_t &context_impl::getHandleRef() const {
268-
return MContext;
269-
}
270-
271-
KernelProgramCache &context_impl::getKernelProgramCache() const {
272-
return MKernelProgramCache;
273-
}
274-
275-
bool context_impl::hasDevice(const detail::device_impl &Device) const {
276-
for (device_impl *D : MDevices)
277-
if (D == &Device)
278-
return true;
279-
return false;
280-
}
281-
282266
device_impl *
283267
context_impl::findMatchingDeviceImpl(ur_device_handle_t &DeviceUR) const {
284268
for (device_impl *D : MDevices)

sycl/source/detail/context_impl.hpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ class context_impl : public std::enable_shared_from_this<context_impl> {
116116
/// reference will be invalid if context_impl was destroyed.
117117
///
118118
/// \return an instance of raw UR context handle.
119-
ur_context_handle_t &getHandleRef();
119+
ur_context_handle_t &getHandleRef() { return MContext; }
120120

121121
/// Gets the underlying context object (if any) without reference count
122122
/// modification.
@@ -126,7 +126,9 @@ class context_impl : public std::enable_shared_from_this<context_impl> {
126126
/// reference will be invalid if context_impl was destroyed.
127127
///
128128
/// \return an instance of raw UR context handle.
129-
const ur_context_handle_t &getHandleRef() const;
129+
const ur_context_handle_t &getHandleRef() const {
130+
return MContext;
131+
}
130132

131133
devices_range getDevices() const { return MDevices; }
132134

@@ -151,10 +153,17 @@ class context_impl : public std::enable_shared_from_this<context_impl> {
151153
return {MCachedLibPrograms, MCachedLibProgramsMutex};
152154
}
153155

154-
KernelProgramCache &getKernelProgramCache() const;
156+
KernelProgramCache &getKernelProgramCache() const {
157+
return MKernelProgramCache;
158+
}
155159

156160
/// Returns true if and only if context contains the given device.
157-
bool hasDevice(const detail::device_impl &Device) const;
161+
bool hasDevice(const detail::device_impl &Device) const {
162+
for (device_impl *D : MDevices)
163+
if (D == &Device)
164+
return true;
165+
return false;
166+
}
158167

159168
/// Returns true if and only if the device can be used within this context.
160169
/// For OpenCL this is currently equivalent to hasDevice, for other backends

sycl/source/detail/device_kernel_info.cpp

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -74,27 +74,6 @@ void DeviceKernelInfo::setCompileTimeInfoIfNeeded(
7474
assert(Info == *this);
7575
}
7676

77-
FastKernelSubcacheT &DeviceKernelInfo::getKernelSubcache() {
78-
assertInitialized();
79-
return MFastKernelSubcache;
80-
}
81-
bool DeviceKernelInfo::usesAssert() {
82-
assertInitialized();
83-
return MUsesAssert;
84-
}
85-
const std::optional<int> &DeviceKernelInfo::getImplicitLocalArgPos() {
86-
assertInitialized();
87-
return MImplicitLocalArgPos;
88-
}
89-
90-
bool DeviceKernelInfo::isCompileTimeInfoSet() const { return KernelSize != 0; }
91-
92-
void DeviceKernelInfo::assertInitialized() {
93-
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
94-
assert(MInitialized.load() && "Data needs to be initialized before use");
95-
#endif
96-
}
97-
9877
} // namespace detail
9978
} // namespace _V1
10079
} // namespace sycl

sycl/source/detail/device_kernel_info.hpp

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -107,13 +107,28 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy {
107107
#endif
108108
void setCompileTimeInfoIfNeeded(const CompileTimeKernelInfoTy &Info);
109109

110-
FastKernelSubcacheT &getKernelSubcache();
111-
bool usesAssert();
112-
const std::optional<int> &getImplicitLocalArgPos();
110+
FastKernelSubcacheT &getKernelSubcache() {
111+
assertInitialized();
112+
return MFastKernelSubcache;
113+
}
114+
115+
bool usesAssert() const {
116+
assertInitialized();
117+
return MUsesAssert;
118+
}
119+
120+
const std::optional<int> &getImplicitLocalArgPos() {
121+
assertInitialized();
122+
return MImplicitLocalArgPos;
123+
}
113124

114125
private:
115-
void assertInitialized();
116-
bool isCompileTimeInfoSet() const;
126+
void assertInitialized() const {
127+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
128+
assert(MInitialized.load() && "Data needs to be initialized before use");
129+
#endif
130+
}
131+
bool isCompileTimeInfoSet() const { return KernelSize != 0; }
117132

118133
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
119134
std::atomic<bool> MInitialized = false;

sycl/source/detail/global_handler.cpp

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -105,20 +105,7 @@ void GlobalHandler::TraceEventXPTI(const char *Message) {
105105
#endif
106106
}
107107

108-
GlobalHandler *&GlobalHandler::getInstancePtr() {
109-
static GlobalHandler *RTGlobalObjHandler = new GlobalHandler();
110-
return RTGlobalObjHandler;
111-
}
112-
113-
GlobalHandler &GlobalHandler::instance() {
114-
GlobalHandler *RTGlobalObjHandler = GlobalHandler::getInstancePtr();
115-
assert(RTGlobalObjHandler && "Handler must not be deallocated earlier");
116-
return *RTGlobalObjHandler;
117-
}
118-
119-
bool GlobalHandler::isInstanceAlive() {
120-
return GlobalHandler::getInstancePtr();
121-
}
108+
GlobalHandler *GlobalHandler::RTGlobalObjHandler = new GlobalHandler();
122109

123110
template <typename T, typename... Types>
124111
T &GlobalHandler::getOrCreate(InstWithLock<T> &IWL, Types &&...Args) {

sycl/source/detail/global_handler.hpp

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,20 @@ class DeviceKernelInfo;
4848
/// construction or destruction is generated anyway.
4949
class GlobalHandler {
5050
public:
51-
/// \return a reference to a GlobalHandler singleton instance. Memory for
52-
/// storing objects is allocated on first call. The reference is valid as long
51+
52+
static bool isInstanceAlive() {
53+
return RTGlobalObjHandler != nullptr;
54+
}
55+
/// \return a reference to a GlobalHandler singleton instance. The reference is valid as long
5356
/// as runtime library is loaded (i.e. untill `DllMain` or
5457
/// `__attribute__((destructor))` is called).
55-
static GlobalHandler &instance();
58+
static GlobalHandler &instance() {
59+
return *RTGlobalObjHandler;
60+
}
5661

57-
/// \return true if the instance has not been deallocated yet.
58-
static bool isInstanceAlive();
62+
static GlobalHandler *&getInstancePtr() {
63+
return RTGlobalObjHandler;
64+
}
5965

6066
GlobalHandler(const GlobalHandler &) = delete;
6167
GlobalHandler(GlobalHandler &&) = delete;
@@ -95,20 +101,19 @@ class GlobalHandler {
95101
// For testing purposes only
96102
void attachScheduler(Scheduler *Scheduler);
97103

104+
// Constructor and destructor are declared out-of-line to allow incomplete
105+
// types as template arguments to unique_ptr.
106+
GlobalHandler();
107+
~GlobalHandler();
108+
98109
private:
99110
bool OkToDefer = true;
100111

101112
friend void shutdown_early(bool);
102113
friend void shutdown_late();
103114
friend class ObjectUsageCounter;
104-
static GlobalHandler *&getInstancePtr();
105115
static SpinLock MSyclGlobalHandlerProtector;
106116

107-
// Constructor and destructor are declared out-of-line to allow incomplete
108-
// types as template arguments to unique_ptr.
109-
GlobalHandler();
110-
~GlobalHandler();
111-
112117
template <typename T> struct InstWithLock {
113118
std::unique_ptr<T> Inst;
114119
SpinLock Lock;
@@ -135,7 +140,10 @@ class GlobalHandler {
135140
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
136141
InstWithLock<std::deque<DeviceKernelInfo>> MDeviceKernelInfoStorage;
137142
#endif
143+
144+
static GlobalHandler *RTGlobalObjHandler;
138145
};
146+
139147
} // namespace detail
140148
} // namespace _V1
141149
} // namespace sycl

sycl/source/detail/kernel_program_cache.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,18 @@
1212
namespace sycl {
1313
inline namespace _V1 {
1414
namespace detail {
15+
16+
void KernelProgramCache::traceKernelImpl(const char * Msg,
17+
KernelNameStrRefT KernelName,
18+
bool IsFastKernelCache) {
19+
std::string Identifier =
20+
"[IsFastCache: " + std::to_string(IsFastKernelCache) +
21+
"][Key:{Name = " + KernelName.data() + "}]: ";
22+
23+
std::cerr << "[In-Memory Cache][Thread Id:" << std::this_thread::get_id()
24+
<< "][Kernel Cache]" << Identifier << Msg << std::endl;
25+
}
26+
1527
adapter_impl &KernelProgramCache::getAdapter() {
1628
return MParentContext.getAdapter();
1729
}

sycl/source/detail/kernel_program_cache.hpp

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ class KernelProgramCache {
331331
template <typename MsgType>
332332
static inline void traceProgram(const MsgType &Msg,
333333
const ProgramCacheKeyT &CacheKey) {
334-
if (!SYCLConfig<SYCL_CACHE_TRACE>::isTraceInMemCache())
334+
if (!SYCLConfigTrace::isTraceInMemCache())
335335
return;
336336

337337
int ImageId = CacheKey.first.second;
@@ -361,23 +361,18 @@ class KernelProgramCache {
361361
<< "][Program Cache]" << Identifier << Msg << std::endl;
362362
}
363363

364-
// Sends message to std:cerr stream when SYCL_CACHE_TRACE environemnt is
365-
// set.
366-
template <typename MsgType>
367-
static inline void traceKernel(const MsgType &Msg,
364+
static void traceKernelImpl(const char * Msg,
368365
KernelNameStrRefT KernelName,
369-
bool IsFastKernelCache = false) {
370-
if (!SYCLConfig<SYCL_CACHE_TRACE>::isTraceInMemCache())
371-
return;
366+
bool IsFastKernelCache);
372367

373-
std::string Identifier =
374-
"[IsFastCache: " + std::to_string(IsFastKernelCache) +
375-
"][Key:{Name = " + KernelName.data() + "}]: ";
376-
377-
std::cerr << "[In-Memory Cache][Thread Id:" << std::this_thread::get_id()
378-
<< "][Kernel Cache]" << Identifier << Msg << std::endl;
368+
// Sends message to std:cerr stream when SYCL_CACHE_TRACE environemnt is
369+
// set.
370+
static void traceKernel(const char * Msg, KernelNameStrRefT KernelName, bool isFastKernelCache = false) {
371+
if (__builtin_expect(SYCLConfigTrace::isTraceInMemCache(), false))
372+
traceKernelImpl(Msg, KernelName, isFastKernelCache);
379373
}
380374

375+
381376
Locked<ProgramCache> acquireCachedPrograms() {
382377
return {MCachedPrograms, MProgramCacheMutex};
383378
}
@@ -513,7 +508,7 @@ class KernelProgramCache {
513508
auto LockedCacheKP = acquireKernelsPerProgramCache();
514509
// List kernels that are to be removed from the cache, if tracing is
515510
// enabled.
516-
if (SYCLConfig<SYCL_CACHE_TRACE>::isTraceInMemCache()) {
511+
if (SYCLConfigTrace::isTraceInMemCache()) {
517512
for (const auto &Kernel : LockedCacheKP.get()[NativePrg])
518513
traceKernel("Kernel evicted.", Kernel.first);
519514
}

0 commit comments

Comments
 (0)