Skip to content

Commit 9af80f1

Browse files
committed
[SYCL] optimize enqueueImpKernel
1 parent 66b3b53 commit 9af80f1

16 files changed

+164
-198
lines changed

sycl/source/detail/config.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,34 @@ const std::array<std::pair<std::string, backend>, 8> &getSyclBeMap() {
180180
{"*", backend::all}}};
181181
return SyclBeMap;
182182
}
183+
namespace {
184+
185+
unsigned int parseLevel(const char *ValStr) {
186+
unsigned int intVal = 0;
187+
188+
if (ValStr) {
189+
try {
190+
intVal = std::stoul(ValStr);
191+
} catch (...) {
192+
// If the value is not null and not a number, it is considered
193+
// to enable disk cache tracing. This is the legacy behavior.
194+
intVal = 1;
195+
}
196+
}
197+
198+
// Legacy behavior.
199+
if (intVal > 7)
200+
intVal = 1;
201+
202+
return intVal;
203+
}
204+
205+
} // namespace
206+
207+
void SYCLConfigTrace::reset() { Level = parseLevel(BaseT::getRawValue()); }
208+
209+
unsigned int SYCLConfigTrace::Level =
210+
parseLevel(SYCLConfigTrace::BaseT::getRawValue());
183211

184212
} // namespace detail
185213
} // namespace _V1

sycl/source/detail/config.hpp

Lines changed: 7 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -709,52 +709,19 @@ template <> class SYCLConfig<SYCL_JIT_AMDGCN_PTX_TARGET_FEATURES> {
709709
// tracing of the corresponding caches. If the input value is not null and
710710
// not a valid number, the disk cache tracing will be enabled (depreciated
711711
// behavior). The default value is 0 and no tracing is enabled.
712-
template <> class SYCLConfig<SYCL_CACHE_TRACE> {
712+
class SYCLConfigTrace {
713713
using BaseT = SYCLConfigBase<SYCL_CACHE_TRACE>;
714714
enum TraceBitmask { DiskCache = 1, InMemCache = 2, KernelCompiler = 4 };
715715

716716
public:
717-
static unsigned int get() { return getCachedValue(); }
718-
static void reset() { (void)getCachedValue(true); }
719-
static bool isTraceDiskCache() {
720-
return getCachedValue() & TraceBitmask::DiskCache;
721-
}
722-
static bool isTraceInMemCache() {
723-
return getCachedValue() & TraceBitmask::InMemCache;
724-
}
725-
static bool isTraceKernelCompiler() {
726-
return getCachedValue() & TraceBitmask::KernelCompiler;
727-
}
717+
static unsigned int get() { return Level; }
718+
static void reset();
719+
static bool isTraceDiskCache() { return Level & DiskCache; }
720+
static bool isTraceInMemCache() { return Level & InMemCache; }
721+
static bool isTraceKernelCompiler() { return Level & KernelCompiler; }
728722

729723
private:
730-
static unsigned int getCachedValue(bool ResetCache = false) {
731-
const auto Parser = []() {
732-
const char *ValStr = BaseT::getRawValue();
733-
int intVal = 0;
734-
735-
if (ValStr) {
736-
try {
737-
intVal = std::stoi(ValStr);
738-
} catch (...) {
739-
// If the value is not null and not a number, it is considered
740-
// to enable disk cache tracing. This is the legacy behavior.
741-
intVal = 1;
742-
}
743-
}
744-
745-
// Legacy behavior.
746-
if (intVal > 7)
747-
intVal = 1;
748-
749-
return intVal;
750-
};
751-
752-
static unsigned int Level = Parser();
753-
if (ResetCache)
754-
Level = Parser();
755-
756-
return Level;
757-
}
724+
static unsigned int Level;
758725
};
759726

760727
// SYCL_IN_MEM_CACHE_EVICTION_THRESHOLD accepts an integer that specifies

sycl/source/detail/context_impl.cpp

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -263,22 +263,6 @@ context_impl::get_backend_info<info::device::backend_version>() const {
263263
}
264264
#endif
265265

266-
ur_context_handle_t &context_impl::getHandleRef() { return MContext; }
267-
const ur_context_handle_t &context_impl::getHandleRef() const {
268-
return MContext;
269-
}
270-
271-
KernelProgramCache &context_impl::getKernelProgramCache() const {
272-
return MKernelProgramCache;
273-
}
274-
275-
bool context_impl::hasDevice(const detail::device_impl &Device) const {
276-
for (device_impl *D : MDevices)
277-
if (D == &Device)
278-
return true;
279-
return false;
280-
}
281-
282266
device_impl *
283267
context_impl::findMatchingDeviceImpl(ur_device_handle_t &DeviceUR) const {
284268
for (device_impl *D : MDevices)

sycl/source/detail/context_impl.hpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ class context_impl : public std::enable_shared_from_this<context_impl> {
116116
/// reference will be invalid if context_impl was destroyed.
117117
///
118118
/// \return an instance of raw UR context handle.
119-
ur_context_handle_t &getHandleRef();
119+
ur_context_handle_t &getHandleRef() { return MContext; }
120120

121121
/// Gets the underlying context object (if any) without reference count
122122
/// modification.
@@ -126,7 +126,7 @@ class context_impl : public std::enable_shared_from_this<context_impl> {
126126
/// reference will be invalid if context_impl was destroyed.
127127
///
128128
/// \return an instance of raw UR context handle.
129-
const ur_context_handle_t &getHandleRef() const;
129+
const ur_context_handle_t &getHandleRef() const { return MContext; }
130130

131131
devices_range getDevices() const { return MDevices; }
132132

@@ -151,10 +151,17 @@ class context_impl : public std::enable_shared_from_this<context_impl> {
151151
return {MCachedLibPrograms, MCachedLibProgramsMutex};
152152
}
153153

154-
KernelProgramCache &getKernelProgramCache() const;
154+
KernelProgramCache &getKernelProgramCache() const {
155+
return MKernelProgramCache;
156+
}
155157

156158
/// Returns true if and only if context contains the given device.
157-
bool hasDevice(const detail::device_impl &Device) const;
159+
bool hasDevice(const detail::device_impl &Device) const {
160+
for (device_impl *D : MDevices)
161+
if (D == &Device)
162+
return true;
163+
return false;
164+
}
158165

159166
/// Returns true if and only if the device can be used within this context.
160167
/// For OpenCL this is currently equivalent to hasDevice, for other backends

sycl/source/detail/device_kernel_info.cpp

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -74,27 +74,6 @@ void DeviceKernelInfo::setCompileTimeInfoIfNeeded(
7474
assert(Info == *this);
7575
}
7676

77-
FastKernelSubcacheT &DeviceKernelInfo::getKernelSubcache() {
78-
assertInitialized();
79-
return MFastKernelSubcache;
80-
}
81-
bool DeviceKernelInfo::usesAssert() {
82-
assertInitialized();
83-
return MUsesAssert;
84-
}
85-
const std::optional<int> &DeviceKernelInfo::getImplicitLocalArgPos() {
86-
assertInitialized();
87-
return MImplicitLocalArgPos;
88-
}
89-
90-
bool DeviceKernelInfo::isCompileTimeInfoSet() const { return KernelSize != 0; }
91-
92-
void DeviceKernelInfo::assertInitialized() {
93-
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
94-
assert(MInitialized.load() && "Data needs to be initialized before use");
95-
#endif
96-
}
97-
9877
} // namespace detail
9978
} // namespace _V1
10079
} // namespace sycl

sycl/source/detail/device_kernel_info.hpp

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -107,13 +107,28 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy {
107107
#endif
108108
void setCompileTimeInfoIfNeeded(const CompileTimeKernelInfoTy &Info);
109109

110-
FastKernelSubcacheT &getKernelSubcache();
111-
bool usesAssert();
112-
const std::optional<int> &getImplicitLocalArgPos();
110+
FastKernelSubcacheT &getKernelSubcache() {
111+
assertInitialized();
112+
return MFastKernelSubcache;
113+
}
114+
115+
bool usesAssert() const {
116+
assertInitialized();
117+
return MUsesAssert;
118+
}
119+
120+
const std::optional<int> &getImplicitLocalArgPos() {
121+
assertInitialized();
122+
return MImplicitLocalArgPos;
123+
}
113124

114125
private:
115-
void assertInitialized();
116-
bool isCompileTimeInfoSet() const;
126+
void assertInitialized() const {
127+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
128+
assert(MInitialized.load() && "Data needs to be initialized before use");
129+
#endif
130+
}
131+
bool isCompileTimeInfoSet() const { return KernelSize != 0; }
117132

118133
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
119134
std::atomic<bool> MInitialized = false;

sycl/source/detail/global_handler.cpp

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -105,20 +105,7 @@ void GlobalHandler::TraceEventXPTI(const char *Message) {
105105
#endif
106106
}
107107

108-
GlobalHandler *&GlobalHandler::getInstancePtr() {
109-
static GlobalHandler *RTGlobalObjHandler = new GlobalHandler();
110-
return RTGlobalObjHandler;
111-
}
112-
113-
GlobalHandler &GlobalHandler::instance() {
114-
GlobalHandler *RTGlobalObjHandler = GlobalHandler::getInstancePtr();
115-
assert(RTGlobalObjHandler && "Handler must not be deallocated earlier");
116-
return *RTGlobalObjHandler;
117-
}
118-
119-
bool GlobalHandler::isInstanceAlive() {
120-
return GlobalHandler::getInstancePtr();
121-
}
108+
GlobalHandler *GlobalHandler::RTGlobalObjHandler = new GlobalHandler();
122109

123110
template <typename T, typename... Types>
124111
T &GlobalHandler::getOrCreate(InstWithLock<T> &IWL, Types &&...Args) {

sycl/source/detail/global_handler.hpp

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,13 @@ class DeviceKernelInfo;
4848
/// construction or destruction is generated anyway.
4949
class GlobalHandler {
5050
public:
51-
/// \return a reference to a GlobalHandler singleton instance. Memory for
52-
/// storing objects is allocated on first call. The reference is valid as long
53-
/// as runtime library is loaded (i.e. untill `DllMain` or
51+
static bool isInstanceAlive() { return RTGlobalObjHandler != nullptr; }
52+
/// \return a reference to a GlobalHandler singleton instance. The reference
53+
/// is valid as long as runtime library is loaded (i.e. untill `DllMain` or
5454
/// `__attribute__((destructor))` is called).
55-
static GlobalHandler &instance();
55+
static GlobalHandler &instance() { return *RTGlobalObjHandler; }
5656

57-
/// \return true if the instance has not been deallocated yet.
58-
static bool isInstanceAlive();
57+
static GlobalHandler *&getInstancePtr() { return RTGlobalObjHandler; }
5958

6059
GlobalHandler(const GlobalHandler &) = delete;
6160
GlobalHandler(GlobalHandler &&) = delete;
@@ -95,20 +94,19 @@ class GlobalHandler {
9594
// For testing purposes only
9695
void attachScheduler(Scheduler *Scheduler);
9796

97+
// Constructor and destructor are declared out-of-line to allow incomplete
98+
// types as template arguments to unique_ptr.
99+
GlobalHandler();
100+
~GlobalHandler();
101+
98102
private:
99103
bool OkToDefer = true;
100104

101105
friend void shutdown_early(bool);
102106
friend void shutdown_late();
103107
friend class ObjectUsageCounter;
104-
static GlobalHandler *&getInstancePtr();
105108
static SpinLock MSyclGlobalHandlerProtector;
106109

107-
// Constructor and destructor are declared out-of-line to allow incomplete
108-
// types as template arguments to unique_ptr.
109-
GlobalHandler();
110-
~GlobalHandler();
111-
112110
template <typename T> struct InstWithLock {
113111
std::unique_ptr<T> Inst;
114112
SpinLock Lock;
@@ -135,7 +133,10 @@ class GlobalHandler {
135133
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
136134
InstWithLock<std::deque<DeviceKernelInfo>> MDeviceKernelInfoStorage;
137135
#endif
136+
137+
static GlobalHandler *RTGlobalObjHandler;
138138
};
139+
139140
} // namespace detail
140141
} // namespace _V1
141142
} // namespace sycl

sycl/source/detail/kernel_program_cache.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,18 @@
1212
namespace sycl {
1313
inline namespace _V1 {
1414
namespace detail {
15+
16+
void KernelProgramCache::traceKernelImpl(const char *Msg,
17+
KernelNameStrRefT KernelName,
18+
bool IsFastKernelCache) {
19+
std::string Identifier =
20+
"[IsFastCache: " + std::to_string(IsFastKernelCache) +
21+
"][Key:{Name = " + KernelName.data() + "}]: ";
22+
23+
std::cerr << "[In-Memory Cache][Thread Id:" << std::this_thread::get_id()
24+
<< "][Kernel Cache]" << Identifier << Msg << std::endl;
25+
}
26+
1527
adapter_impl &KernelProgramCache::getAdapter() {
1628
return MParentContext.getAdapter();
1729
}

sycl/source/detail/kernel_program_cache.hpp

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ class KernelProgramCache {
331331
template <typename MsgType>
332332
static inline void traceProgram(const MsgType &Msg,
333333
const ProgramCacheKeyT &CacheKey) {
334-
if (!SYCLConfig<SYCL_CACHE_TRACE>::isTraceInMemCache())
334+
if (!SYCLConfigTrace::isTraceInMemCache())
335335
return;
336336

337337
int ImageId = CacheKey.first.second;
@@ -361,21 +361,15 @@ class KernelProgramCache {
361361
<< "][Program Cache]" << Identifier << Msg << std::endl;
362362
}
363363

364+
static void traceKernelImpl(const char *Msg, KernelNameStrRefT KernelName,
365+
bool IsFastKernelCache);
366+
364367
// Sends message to std:cerr stream when SYCL_CACHE_TRACE environemnt is
365368
// set.
366-
template <typename MsgType>
367-
static inline void traceKernel(const MsgType &Msg,
368-
KernelNameStrRefT KernelName,
369-
bool IsFastKernelCache = false) {
370-
if (!SYCLConfig<SYCL_CACHE_TRACE>::isTraceInMemCache())
371-
return;
372-
373-
std::string Identifier =
374-
"[IsFastCache: " + std::to_string(IsFastKernelCache) +
375-
"][Key:{Name = " + KernelName.data() + "}]: ";
376-
377-
std::cerr << "[In-Memory Cache][Thread Id:" << std::this_thread::get_id()
378-
<< "][Kernel Cache]" << Identifier << Msg << std::endl;
369+
static void traceKernel(const char *Msg, KernelNameStrRefT KernelName,
370+
bool isFastKernelCache = false) {
371+
if (__builtin_expect(SYCLConfigTrace::isTraceInMemCache(), false))
372+
traceKernelImpl(Msg, KernelName, isFastKernelCache);
379373
}
380374

381375
Locked<ProgramCache> acquireCachedPrograms() {
@@ -513,7 +507,7 @@ class KernelProgramCache {
513507
auto LockedCacheKP = acquireKernelsPerProgramCache();
514508
// List kernels that are to be removed from the cache, if tracing is
515509
// enabled.
516-
if (SYCLConfig<SYCL_CACHE_TRACE>::isTraceInMemCache()) {
510+
if (SYCLConfigTrace::isTraceInMemCache()) {
517511
for (const auto &Kernel : LockedCacheKP.get()[NativePrg])
518512
traceKernel("Kernel evicted.", Kernel.first);
519513
}

0 commit comments

Comments
 (0)