@@ -8,6 +8,7 @@ import kotlinx.atomicfu.*
88import kotlinx.coroutines.internal.*
99import kotlinx.coroutines.intrinsics.*
1010import kotlinx.coroutines.selects.*
11+ import kotlin.concurrent.Volatile
1112import kotlin.contracts.*
1213import kotlin.coroutines.*
1314import kotlin.coroutines.intrinsics.*
@@ -205,10 +206,115 @@ private class LazyStandaloneCoroutine(
205206}
206207
207208// Used by withContext when context changes, but dispatcher stays the same
208- internal expect class UndispatchedCoroutine <in T >(
209+ internal class UndispatchedCoroutine <in T >(
209210 context : CoroutineContext ,
210211 uCont : Continuation <T >
211- ) : ScopeCoroutine<T>
212+ ) : ScopeCoroutine<T>(if (context[UndispatchedMarker ] == null) context + UndispatchedMarker else context, uCont) {
213+
214+ /* *
215+ * The state of [ThreadContextElement]s associated with the current undispatched coroutine.
216+ * It is stored in a thread local because this coroutine can be used concurrently in suspend-resume race scenario.
217+ * See the followin, boiled down example with inlined `withContinuationContext` body:
218+ * ```
219+ * val state = saveThreadContext(ctx)
220+ * try {
221+ * invokeSmthWithThisCoroutineAsCompletion() // Completion implies that 'afterResume' will be called
222+ * // COROUTINE_SUSPENDED is returned
223+ * } finally {
224+ * thisCoroutine().clearThreadContext() // Concurrently the "smth" could've been already resumed on a different thread
225+ * // and it also calls saveThreadContext and clearThreadContext
226+ * }
227+ * ```
228+ *
229+ * Usage note:
230+ *
231+ * This part of the code is performance-sensitive.
232+ * It is a well-established pattern to wrap various activities into system-specific undispatched
233+ * `withContext` for the sake of logging, MDC, tracing etc., meaning that there exists thousands of
234+ * undispatched coroutines.
235+ * Each access to [CommonThreadLocal] on JVM leaves a footprint in the corresponding Thread's `ThreadLocalMap`
236+ * that is cleared automatically as soon as the associated thread-local (-> UndispatchedCoroutine) is garbage collected.
237+ * When such coroutines are promoted to old generation, `ThreadLocalMap`s become bloated and an arbitrary accesses to thread locals
238+ * start to consume significant amount of CPU because these maps are open-addressed and cleaned up incrementally on each access.
239+ * (You can read more about this effect as "GC nepotism").
240+ *
241+ * To avoid that, we attempt to narrow down the lifetime of this thread local as much as possible:
242+ * - It's never accessed when we are sure there are no thread context elements
243+ * - It's cleaned up via [CommonThreadLocal.remove] as soon as the coroutine is suspended or finished.
244+ */
245+ private val threadStateToRecover = commonThreadLocal<Pair <CoroutineContext , Any ?>? > (Symbol (" UndispatchedCoroutine" ))
246+
247+ /*
248+ * Indicates that a coroutine has at least one thread context element associated with it
249+ * and that 'threadStateToRecover' is going to be set in case of dispatchhing in order to preserve them.
250+ * Better than nullable thread-local for easier debugging.
251+ *
252+ * It is used as a performance optimization to avoid 'threadStateToRecover' initialization
253+ * (note: tl.get() initializes thread local),
254+ * and is prone to false-positives as it is never reset: otherwise
255+ * it may lead to logical data races between suspensions point where
256+ * coroutine is yet being suspended in one thread while already being resumed
257+ * in another.
258+ */
259+ @Volatile
260+ private var threadLocalIsSet = false
261+
262+ init {
263+ /*
264+ * This is a hack for a very specific case in #2930 unless #3253 is implemented.
265+ * 'ThreadLocalStressTest' covers this change properly.
266+ *
267+ * The scenario this change covers is the following:
268+ * 1) The coroutine is being started as plain non kotlinx.coroutines related suspend function,
269+ * e.g. `suspend fun main` or, more importantly, Ktor `SuspendFunGun`, that is invoking
270+ * `withContext(tlElement)` which creates `UndispatchedCoroutine`.
271+ * 2) It (original continuation) is then not wrapped into `DispatchedContinuation` via `intercept()`
272+ * and goes neither through `DC.run` nor through `resumeUndispatchedWith` that both
273+ * do thread context element tracking.
274+ * 3) So thread locals never got chance to get properly set up via `saveThreadContext`,
275+ * but when `withContext` finishes, it attempts to recover thread locals in its `afterResume`.
276+ *
277+ * Here we detect precisely this situation and properly setup context to recover later.
278+ *
279+ */
280+ if (uCont.context[ContinuationInterceptor ] !is CoroutineDispatcher ) {
281+ /*
282+ * We cannot just "read" the elements as there is no such API,
283+ * so we update-restore it immediately and use the intermediate value
284+ * as the initial state, leveraging the fact that thread context element
285+ * is idempotent and such situations are increasingly rare.
286+ */
287+ val values = updateThreadContext(context, null )
288+ restoreThreadContext(context, values)
289+ saveThreadContext(context, values)
290+ }
291+ }
292+
293+ fun saveThreadContext (context : CoroutineContext , oldValue : Any? ) {
294+ threadLocalIsSet = true // Specify that thread-local is touched at all
295+ threadStateToRecover.set(context to oldValue)
296+ }
297+
298+ fun clearThreadContext (): Boolean {
299+ return ! (threadLocalIsSet && threadStateToRecover.get() == null ).also {
300+ threadStateToRecover.remove()
301+ }
302+ }
303+
304+ override fun afterResume (state : Any? ) {
305+ if (threadLocalIsSet) {
306+ threadStateToRecover.get()?.let { (ctx, value) ->
307+ restoreThreadContext(ctx, value)
308+ }
309+ threadStateToRecover.remove()
310+ }
311+ // resume undispatched -- update context but stay on the same dispatcher
312+ val result = recoverResult(state, uCont)
313+ withContinuationContext(uCont, null ) {
314+ uCont.resumeWith(result)
315+ }
316+ }
317+ }
212318
213319private const val UNDECIDED = 0
214320private const val SUSPENDED = 1
0 commit comments