diff --git a/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt index 89d900134bc..e1386c531d9 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt @@ -47,30 +47,9 @@ if(NOT Threads_FOUND OR NOT CMAKE_USE_PTHREADS_INIT) message(FATAL_ERROR "pthread compatible library not found") endif() -# Add echion -set(ECHION_COMMIT - "43432c5c0a89617b06533215a15d0d6ffbbfd02b" # https://github.com/P403n1x87/echion/commit/43432c5c0a89617b06533215a15d0d6ffbbfd02b - CACHE STRING "Commit hash of echion to use") -FetchContent_Declare( - echion - GIT_REPOSITORY "https://github.com/P403n1x87/echion.git" - GIT_TAG ${ECHION_COMMIT}) -FetchContent_GetProperties(echion) - -if(NOT echion_POPULATED) - FetchContent_Populate(echion) -endif() - # Specify the target C-extension that we want to build -add_library( - ${EXTENSION_NAME} SHARED - ${echion_SOURCE_DIR}/echion/frame.cc - ${echion_SOURCE_DIR}/echion/render.cc - ${echion_SOURCE_DIR}/echion/danger.cc - src/sampler.cpp - src/stack_renderer.cpp - src/stack_v2.cpp - src/thread_span_links.cpp) +add_library(${EXTENSION_NAME} SHARED src/echion/danger.cc src/echion/frame.cc src/sampler.cpp src/stack_renderer.cpp + src/stack_v2.cpp src/thread_span_links.cpp) # Add common config add_ddup_config(${EXTENSION_NAME}) @@ -82,7 +61,6 @@ add_cppcheck_target( ${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/include/util ${CMAKE_CURRENT_SOURCE_DIR}/.. - ${echion_SOURCE_DIR} SRC ${CMAKE_CURRENT_SOURCE_DIR}/src) @@ -93,8 +71,8 @@ target_compile_definitions(${EXTENSION_NAME} PRIVATE UNWIND_NATIVE_DISABLE) target_include_directories( ${EXTENSION_NAME} PRIVATE .. # include dd_wrapper from the root in order to make its paths transparent in the code include) -target_include_directories(${EXTENSION_NAME} SYSTEM PRIVATE ${echion_SOURCE_DIR} ${Python3_INCLUDE_DIRS} - include/vendored include/util) +target_include_directories(${EXTENSION_NAME} SYSTEM PRIVATE ${Python3_INCLUDE_DIRS} echion include/vendored + include/util) # Echion sources need to be given the current platform if(APPLE) diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/.clang-format b/ddtrace/internal/datadog/profiling/stack_v2/echion/.clang-format new file mode 100644 index 00000000000..3d941098cbb --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/.clang-format @@ -0,0 +1,110 @@ +Language: Cpp +BasedOnStyle: Google +AccessModifierOffset: -4 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: Left +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: Empty +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: Yes +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterClass: true + AfterControlStatement: true + AfterEnum: true + AfterFunction: true + AfterNamespace: true + AfterStruct: true + AfterUnion: true + BeforeCatch: true + BeforeElse: true + IndentBraces: false +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Custom +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeColon +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 100 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '^<.*\.h>' + Priority: 1 + - Regex: '^<.*' + Priority: 2 + - Regex: '.*' + Priority: 3 +IncludeIsMainRegex: '([-_](test|unittest))?$' +IndentCaseLabels: true +IndentPPDirectives: None +IndentWidth: 4 +IndentWrappedFunctionNames: false +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 2 +NamespaceIndentation: None +ObjCBinPackProtocolList: Never +ObjCBlockIndentWidth: 4 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +PointerAlignment: Left +ReflowComments: true +SortIncludes: true +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: false +SpacesInContainerLiterals: false +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 +TabWidth: 4 +UseTab: Never diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/cache.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/cache.h new file mode 100644 index 00000000000..36778292dfd --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/cache.h @@ -0,0 +1,60 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#include +#include +#include +#include + +#include + +#define CACHE_MAX_ENTRIES 2048 + +template +class LRUCache +{ +public: + LRUCache(size_t capacity) : capacity(capacity) {} + + Result> lookup(const K& k); + + void store(const K& k, std::unique_ptr v); + +private: + size_t capacity; + std::list>> items; + std::unordered_map>>::iterator> index; +}; + +template +void LRUCache::store(const K& k, std::unique_ptr v) +{ + // Check if cache is full + if (items.size() >= capacity) + { + index.erase(items.back().first); + items.pop_back(); + } + + // Insert the new item at front of the list + items.emplace_front(k, std::move(v)); + + // Insert in the map + index[k] = items.begin(); +} + +template +Result> LRUCache::lookup(const K& k) +{ + auto itr = index.find(k); + if (itr == index.end()) + return ErrorKind::LookupError; + + // Move to the front of the list + items.splice(items.begin(), items, itr->second); + + return std::reference_wrapper(*(itr->second->second.get())); +} diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/config.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/config.h new file mode 100644 index 00000000000..5aa058e1c30 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/config.h @@ -0,0 +1,69 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#define PY_SSIZE_T_CLEAN +#include + +// Sampling interval +inline unsigned int interval = 1000; + +// CPU Time mode +inline int cpu = 0; + +// For cpu time mode, Echion only unwinds threads that're running by default. +// Set this to false to unwind all threads. +inline bool ignore_non_running_threads = true; + +// Maximum number of frames to unwind +inline unsigned int max_frames = 2048; + +// ---------------------------------------------------------------------------- +static PyObject* set_interval(PyObject* Py_UNUSED(m), PyObject* args) +{ + unsigned int new_interval; + if (!PyArg_ParseTuple(args, "I", &new_interval)) + return NULL; + + interval = new_interval; + + Py_RETURN_NONE; +} + +// ---------------------------------------------------------------------------- +inline void _set_cpu(int new_cpu) +{ + cpu = new_cpu; +} + +// ---------------------------------------------------------------------------- +inline void _set_ignore_non_running_threads(bool new_ignore_non_running_threads) +{ + ignore_non_running_threads = new_ignore_non_running_threads; +} + +// ---------------------------------------------------------------------------- +static PyObject* set_cpu(PyObject* Py_UNUSED(m), PyObject* args) +{ + int new_cpu; + if (!PyArg_ParseTuple(args, "p", &new_cpu)) + return NULL; + + _set_cpu(new_cpu); + + Py_RETURN_NONE; +} + +// ---------------------------------------------------------------------------- +static PyObject* set_max_frames(PyObject* Py_UNUSED(m), PyObject* args) +{ + unsigned int new_max_frames; + if (!PyArg_ParseTuple(args, "I", &new_max_frames)) + return NULL; + + max_frames = new_max_frames; + + Py_RETURN_NONE; +} diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/cpython/tasks.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/cpython/tasks.h new file mode 100644 index 00000000000..51632d0cd6c --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/cpython/tasks.h @@ -0,0 +1,293 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#define PY_SSIZE_T_CLEAN +#include + +#if PY_VERSION_HEX >= 0x030b0000 +#include + +#define Py_BUILD_CORE +#if PY_VERSION_HEX >= 0x030d0000 +#include +#else +#include +#include +#endif // PY_VERSION_HEX >= 0x030d0000 +#else +#include +#include +#endif + +#include + +#include + +extern "C" { + +typedef enum +{ + STATE_PENDING, + STATE_CANCELLED, + STATE_FINISHED +} fut_state; + +#if PY_VERSION_HEX >= 0x030d0000 +#define FutureObj_HEAD(prefix) \ + PyObject_HEAD PyObject* prefix##_loop; \ + PyObject* prefix##_callback0; \ + PyObject* prefix##_context0; \ + PyObject* prefix##_callbacks; \ + PyObject* prefix##_exception; \ + PyObject* prefix##_exception_tb; \ + PyObject* prefix##_result; \ + PyObject* prefix##_source_tb; \ + PyObject* prefix##_cancel_msg; \ + PyObject* prefix##_cancelled_exc; \ + fut_state prefix##_state; \ + /* These bitfields need to be at the end of the struct \ + so that these and bitfields from TaskObj are contiguous. \ + */ \ + unsigned prefix##_log_tb : 1; \ + unsigned prefix##_blocking : 1; + +#elif PY_VERSION_HEX >= 0x030b0000 +#define FutureObj_HEAD(prefix) \ + PyObject_HEAD PyObject* prefix##_loop; \ + PyObject* prefix##_callback0; \ + PyObject* prefix##_context0; \ + PyObject* prefix##_callbacks; \ + PyObject* prefix##_exception; \ + PyObject* prefix##_exception_tb; \ + PyObject* prefix##_result; \ + PyObject* prefix##_source_tb; \ + PyObject* prefix##_cancel_msg; \ + fut_state prefix##_state; \ + int prefix##_log_tb; \ + int prefix##_blocking; \ + PyObject* dict; \ + PyObject* prefix##_weakreflist; \ + PyObject* prefix##_cancelled_exc; + +#elif PY_VERSION_HEX >= 0x030a0000 +#define FutureObj_HEAD(prefix) \ + PyObject_HEAD PyObject* prefix##_loop; \ + PyObject* prefix##_callback0; \ + PyObject* prefix##_context0; \ + PyObject* prefix##_callbacks; \ + PyObject* prefix##_exception; \ + PyObject* prefix##_exception_tb; \ + PyObject* prefix##_result; \ + PyObject* prefix##_source_tb; \ + PyObject* prefix##_cancel_msg; \ + fut_state prefix##_state; \ + int prefix##_log_tb; \ + int prefix##_blocking; \ + PyObject* dict; \ + PyObject* prefix##_weakreflist; \ + _PyErr_StackItem prefix##_cancelled_exc_state; + +#elif PY_VERSION_HEX >= 0x03090000 +#define FutureObj_HEAD(prefix) \ + PyObject_HEAD PyObject* prefix##_loop; \ + PyObject* prefix##_callback0; \ + PyObject* prefix##_context0; \ + PyObject* prefix##_callbacks; \ + PyObject* prefix##_exception; \ + PyObject* prefix##_result; \ + PyObject* prefix##_source_tb; \ + PyObject* prefix##_cancel_msg; \ + fut_state prefix##_state; \ + int prefix##_log_tb; \ + int prefix##_blocking; \ + PyObject* dict; \ + PyObject* prefix##_weakreflist; \ + _PyErr_StackItem prefix##_cancelled_exc_state; + +#else +#define FutureObj_HEAD(prefix) \ + PyObject_HEAD PyObject* prefix##_loop; \ + PyObject* prefix##_callback0; \ + PyObject* prefix##_context0; \ + PyObject* prefix##_callbacks; \ + PyObject* prefix##_exception; \ + PyObject* prefix##_result; \ + PyObject* prefix##_source_tb; \ + fut_state prefix##_state; \ + int prefix##_log_tb; \ + int prefix##_blocking; \ + PyObject* dict; \ + PyObject* prefix##_weakreflist; +#endif + +typedef struct +{ + FutureObj_HEAD(future) +} FutureObj; + +#if PY_VERSION_HEX >= 0x030d0000 +typedef struct +{ + FutureObj_HEAD(task) unsigned task_must_cancel : 1; + unsigned task_log_destroy_pending : 1; + int task_num_cancels_requested; + PyObject* task_fut_waiter; + PyObject* task_coro; + PyObject* task_name; + PyObject* task_context; +} TaskObj; + +#elif PY_VERSION_HEX >= 0x030a0000 +typedef struct +{ + FutureObj_HEAD(task) PyObject* task_fut_waiter; + PyObject* task_coro; + PyObject* task_name; + PyObject* task_context; + int task_must_cancel; + int task_log_destroy_pending; + int task_num_cancels_requested; +} TaskObj; + +#else +typedef struct +{ + FutureObj_HEAD(task) PyObject* task_fut_waiter; + PyObject* task_coro; + PyObject* task_name; + PyObject* task_context; + int task_must_cancel; + int task_log_destroy_pending; +} TaskObj; +#endif + +// ---- cr_await ---- + +#if PY_VERSION_HEX >= 0x030c0000 +#define RESUME_QUICK INSTRUMENTED_RESUME +#endif + +#if PY_VERSION_HEX >= 0x030b0000 +inline PyObject* PyGen_yf(PyGenObject* gen, PyObject* frame_addr) +{ + PyObject* yf = NULL; + + if (gen->gi_frame_state < FRAME_CLEARED) + { + if (gen->gi_frame_state == FRAME_CREATED) + return NULL; + + _PyInterpreterFrame frame; + if (copy_type(frame_addr, frame)) + return NULL; + + _Py_CODEUNIT next; +#if PY_VERSION_HEX >= 0x030d0000 + if (copy_type(frame.instr_ptr, next)) +#else + if (copy_type(frame.prev_instr + 1, next)) +#endif + return NULL; + if (!(_Py_OPCODE(next) == RESUME || _Py_OPCODE(next) == RESUME_QUICK) || + _Py_OPARG(next) < 2) + return NULL; + + if (frame.stacktop < 1 || frame.stacktop > (1 << 20)) + return NULL; + + auto localsplus = std::make_unique(frame.stacktop); + if (copy_generic(frame.localsplus, localsplus.get(), frame.stacktop * sizeof(PyObject*))) + return NULL; + + yf = localsplus[frame.stacktop - 1]; + } + + return yf; +} + +#elif PY_VERSION_HEX >= 0x030a0000 +inline PyObject* PyGen_yf(PyGenObject* Py_UNUSED(gen), PyObject* frame_addr) +{ + PyObject* yf = NULL; + PyFrameObject* f = (PyFrameObject*)frame_addr; + + if (f) + { + PyFrameObject frame; + if (copy_type(f, frame)) + return NULL; + + if (frame.f_lasti < 0) + return NULL; + + PyCodeObject code; + if (copy_type(frame.f_code, code)) + return NULL; + + Py_ssize_t s = 0; + auto c = pybytes_to_bytes_and_size(code.co_code, &s); + if (c == nullptr) + return NULL; + + if (c[(frame.f_lasti + 1) * sizeof(_Py_CODEUNIT)] != YIELD_FROM) + return NULL; + + ssize_t nvalues = frame.f_stackdepth; + if (nvalues < 1 || nvalues > (1 << 20)) + return NULL; + + auto stack = std::make_unique(nvalues); + + if (copy_generic(frame.f_valuestack, stack.get(), nvalues * sizeof(PyObject*))) + return NULL; + + yf = stack[nvalues - 1]; + } + + return yf; +} + +#else +inline PyObject* PyGen_yf(PyGenObject* Py_UNUSED(gen), PyObject* frame_addr) +{ + PyObject* yf = NULL; + PyFrameObject* f = (PyFrameObject*)frame_addr; + + if (frame_addr == NULL) + return NULL; + + PyFrameObject frame; + if (copy_type(f, frame)) + return NULL; + + if (frame.f_stacktop) + { + if (frame.f_lasti < 0) + return NULL; + + PyCodeObject code; + if (copy_type(frame.f_code, code)) + return NULL; + + Py_ssize_t s = 0; + auto c = pybytes_to_bytes_and_size(code.co_code, &s); + if (c == nullptr) + return NULL; + + if (c[f->f_lasti + sizeof(_Py_CODEUNIT)] != YIELD_FROM) + return NULL; + + auto stacktop = std::make_unique(); + if (copy_generic(frame.f_stacktop - 1, stacktop.get(), sizeof(PyObject*))) + return NULL; + + yf = *stacktop; + } + + return yf; +} +#endif +} diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/danger.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/danger.h new file mode 100644 index 00000000000..13cb06401aa --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/danger.h @@ -0,0 +1,99 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined PL_DARWIN +#include + +#include +#include +#include +#include +#include +#endif + +int init_segv_catcher(); + +#if defined PL_LINUX +ssize_t safe_memcpy_wrapper(pid_t, const struct iovec* __dstvec, unsigned long int __dstiovcnt, + const struct iovec* __srcvec, unsigned long int __srciovcnt, + unsigned long int); +#elif defined PL_DARWIN +kern_return_t safe_memcpy_wrapper(vm_map_read_t target_task, mach_vm_address_t address, + mach_vm_size_t size, mach_vm_address_t data, + mach_vm_size_t* outsize); +#endif + +struct ThreadAltStack +{ +private: + inline static constexpr size_t kAltStackSize = 1 << 20; // 1 MiB + +public: + void* mem = nullptr; + size_t size = 0; + bool ready = false; + + int ensure_installed() + { + if (ready) + { + return 0; + } + + // If an altstack is already present, keep it. + stack_t cur{}; + if (sigaltstack(nullptr, &cur) == 0 && !(cur.ss_flags & SS_DISABLE)) + { + ready = true; + return 0; + } + + void* stack_mem = mmap(nullptr, kAltStackSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (stack_mem == MAP_FAILED) + { + std::cerr << "Failed to allocate alt stack. Memory copying may not work." + << " Error: " << strerror(errno) << std::endl; + return -1; + } + + stack_t ss{}; + ss.ss_sp = stack_mem; + ss.ss_size = kAltStackSize; + ss.ss_flags = 0; + if (sigaltstack(&ss, nullptr) != 0) + { + std::cerr << "Failed to set alt stack. Memory copying may not work." + << " Error: " << strerror(errno) << std::endl; + return -1; + } + + this->mem = stack_mem; + this->size = kAltStackSize; + this->ready = true; + + return 0; + } + + ~ThreadAltStack() + { + if (!ready) + { + return; + } + + // Optional cleanup: disable and free. Safe at thread exit. + stack_t disable{}; + disable.ss_flags = SS_DISABLE; + (void)sigaltstack(&disable, nullptr); + munmap(mem, size); + } +}; diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/errors.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/errors.h new file mode 100644 index 00000000000..711ec683e96 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/errors.h @@ -0,0 +1,256 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#include +#include + +enum class ErrorKind +{ + Undefined, + LookupError, + PyBytesError, + BytecodeError, + FrameError, + MirrorError, + PyLongError, + PyUnicodeError, + UnwindError, + StackChunkError, + GenInfoError, + TaskInfoError, + TaskInfoGeneratorError, + ThreadInfoError, + CpuTimeError, + LocationError, + RendererError, +}; + +template +class [[nodiscard]] Result +{ +public: + // Factories + static Result ok(const T& v) + { + return Result(v); + } + static Result ok(T&& v) + { + return Result(std::move(v)); + } + static Result error(ErrorKind e) noexcept + { + return Result(e); + } + + // Constructors + Result(const T& v) noexcept(std::is_nothrow_copy_constructible::value) : success_(true) + { + ::new (static_cast(std::addressof(value_))) T(v); + } + + Result(T&& v) noexcept(std::is_nothrow_move_constructible::value) : success_(true) + { + ::new (static_cast(std::addressof(value_))) T(std::move(v)); + } + + Result(ErrorKind e) noexcept : success_(false) + { + error_ = e; + } + + // Destructor + ~Result() + { + reset(); + } + + // Copy ctor + Result(const Result& other) noexcept(std::is_nothrow_copy_constructible::value) + : success_(other.success_) + { + if (success_) + { + ::new (static_cast(std::addressof(value_))) T(other.value_); + } + else + { + error_ = other.error_; + } + } + + // Move ctor + Result(Result&& other) noexcept(std::is_nothrow_move_constructible::value) + : success_(other.success_) + { + if (success_) + { + ::new (static_cast(std::addressof(value_))) T(std::move(other.value_)); + } + else + { + error_ = other.error_; + } + } + + // Copy assignment + Result& operator=(const Result& other) noexcept(std::is_nothrow_copy_constructible::value && + std::is_nothrow_copy_assignable::value) + { + if (this == &other) + return *this; + + if (success_ && other.success_) + { + value_ = other.value_; + } + else if (success_ && !other.success_) + { + value_.~T(); + success_ = false; + error_ = other.error_; + } + else if (!success_ && other.success_) + { + ::new (static_cast(std::addressof(value_))) T(other.value_); + success_ = true; + } + else + { // both errors + error_ = other.error_; + } + return *this; + } + + // Move assignment + Result& operator=(Result&& other) noexcept(std::is_nothrow_move_constructible::value && + std::is_nothrow_move_assignable::value) + { + if (this == &other) + return *this; + + if (success_ && other.success_) + { + value_ = std::move(other.value_); + } + else if (success_ && !other.success_) + { + value_.~T(); + success_ = false; + error_ = other.error_; + } + else if (!success_ && other.success_) + { + ::new (static_cast(std::addressof(value_))) T(std::move(other.value_)); + success_ = true; + } + else + { // both errors + error_ = other.error_; + } + return *this; + } + + // Observers + explicit operator bool() const noexcept + { + return success_; + } + + T& operator*() & + { + return value_; + } + const T& operator*() const& + { + return value_; + } + T&& operator*() && + { + return std::move(value_); + } + + T* operator->() + { + return std::addressof(value_); + } + const T* operator->() const + { + return std::addressof(value_); + } + + bool has_value() const noexcept + { + return success_; + } + + // If in error, returns default_value + template + T value_or(U&& default_value) const + { + return success_ ? value_ : static_cast(std::forward(default_value)); + } + + // Returns ErrorKind::Undefined when holding a value + ErrorKind error() const noexcept + { + return success_ ? ErrorKind::Undefined : error_; + } + +private: + // Active member is tracked by success_ + union + { + ErrorKind error_; + T value_; + }; + bool success_; + + void reset() noexcept + { + if (success_) + { + value_.~T(); + } + } +}; + +// Specialization for void +template <> +class [[nodiscard]] Result +{ +public: + static Result ok() noexcept + { + return Result(true, ErrorKind::Undefined); + } + static Result error(ErrorKind e) noexcept + { + return Result(false, e); + } + Result(ErrorKind e) noexcept : success_(false), error_(e) {} + + explicit operator bool() const noexcept + { + return success_; + } + bool has_value() const noexcept + { + return success_; + } + + // Returns ErrorKind::Undefined when success + ErrorKind error() const noexcept + { + return success_ ? ErrorKind::Undefined : error_; + } + +private: + bool success_; + ErrorKind error_; + + explicit Result(bool s, ErrorKind e) noexcept : success_(s), error_(e) {} +}; diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/frame.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/frame.h new file mode 100644 index 00000000000..9fa9cdb92af --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/frame.h @@ -0,0 +1,97 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#define PY_SSIZE_T_CLEAN +#include +#if defined __GNUC__ && defined HAVE_STD_ATOMIC +#undef HAVE_STD_ATOMIC +#endif +#if PY_VERSION_HEX >= 0x030c0000 +// https://github.com/python/cpython/issues/108216#issuecomment-1696565797 +#undef _PyGC_FINALIZED +#endif +#include +#if PY_VERSION_HEX >= 0x030d0000 +#define Py_BUILD_CORE +#include +#endif // PY_VERSION_HEX >= 0x030d0000 +#if PY_VERSION_HEX >= 0x030b0000 +#define Py_BUILD_CORE +#include +#endif + +#include +#include +#include +#include + +#include +#include +#if PY_VERSION_HEX >= 0x030b0000 +#include +#endif // PY_VERSION_HEX >= 0x030b0000 +#include +#include + +// ---------------------------------------------------------------------------- +class Frame +{ +public: + using Ref = std::reference_wrapper; + using Ptr = std::unique_ptr; + using Key = uintptr_t; + + // ------------------------------------------------------------------------ + Key cache_key = 0; + StringTable::Key filename = 0; + StringTable::Key name = 0; + + struct _location + { + int line = 0; + int line_end = 0; + int column = 0; + int column_end = 0; + } location; + +#if PY_VERSION_HEX >= 0x030b0000 + bool is_entry = false; +#endif + + // ------------------------------------------------------------------------ + Frame(StringTable::Key filename, StringTable::Key name) : filename(filename), name(name) {} + Frame(StringTable::Key name) : name(name) {}; + Frame(PyObject* frame); + [[nodiscard]] static Result create(PyCodeObject* code, int lasti); + +#if PY_VERSION_HEX >= 0x030b0000 + [[nodiscard]] static Result> read( + _PyInterpreterFrame* frame_addr, _PyInterpreterFrame** prev_addr); +#else + [[nodiscard]] static Result> read(PyObject* frame_addr, + PyObject** prev_addr); +#endif + + [[nodiscard]] static Result> get(PyCodeObject* code_addr, + int lasti); + static Frame& get(PyObject* frame); + static Frame& get(StringTable::Key name); + +private: + [[nodiscard]] Result inline infer_location(PyCodeObject* code, int lasti); + static inline Key key(PyCodeObject* code, int lasti); + static inline Key key(PyObject* frame); +}; + +inline auto INVALID_FRAME = Frame(StringTable::INVALID); +inline auto UNKNOWN_FRAME = Frame(StringTable::UNKNOWN); +inline auto C_FRAME = Frame(StringTable::C_FRAME); + +// We make this a raw pointer to prevent its destruction on exit, since we +// control the lifetime of the cache. +inline LRUCache* frame_cache = nullptr; +void init_frame_cache(size_t capacity); +void reset_frame_cache(); diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/greenlets.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/greenlets.h new file mode 100644 index 00000000000..06dc32ac73a --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/greenlets.h @@ -0,0 +1,96 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2025 Gabriele N. Tornetta . + +#pragma once + +#include +#define Py_BUILD_CORE + + +#include +#include + + +#define FRAME_NOT_SET Py_False // Sentinel for frame cell + + +class GreenletInfo +{ +public: + typedef std::unique_ptr Ptr; + typedef std::reference_wrapper Ref; + typedef uintptr_t ID; + + ID greenlet_id = 0; + StringTable::Key name; + PyObject* frame = NULL; + + GreenletInfo(ID id, PyObject* frame, StringTable::Key name) + : greenlet_id(id), name(name), frame(frame) + { + } + + int unwind(PyObject*, PyThreadState*, FrameStack&); +}; + +// ---------------------------------------------------------------------------- + +inline int GreenletInfo::unwind(PyObject* frame, PyThreadState* tstate, FrameStack& stack) +{ + PyObject* frame_addr = NULL; +#if PY_VERSION_HEX >= 0x030d0000 + frame_addr = + frame == Py_None + ? reinterpret_cast(tstate->current_frame) + : reinterpret_cast(reinterpret_cast(frame)->f_frame); +#elif PY_VERSION_HEX >= 0x030b0000 + if (frame == Py_None) + { + _PyCFrame cframe; + _PyCFrame* cframe_addr = tstate->cframe; + if (copy_type(cframe_addr, cframe)) + // TODO: Invalid frame + return 0; + + frame_addr = reinterpret_cast(cframe.current_frame); + } + else + { + frame_addr = reinterpret_cast(reinterpret_cast(frame)->f_frame); + } + +#else // Python < 3.11 + frame_addr = frame == Py_None ? reinterpret_cast(tstate->frame) : frame; +#endif + auto count = unwind_frame(frame_addr, stack); + + stack.push_back(Frame::get(name)); + + return count + 1; // We add an extra count for the frame with the greenlet + // name. +} + +// ---------------------------------------------------------------------------- + +// We make this a reference to a heap-allocated object so that we can avoid +// the destruction on exit. We are in charge of cleaning up the object. Note +// that the object will leak, but this is not a problem. +inline std::unordered_map& greenlet_info_map = + *(new std::unordered_map()); + +// maps greenlets to their parent +inline std::unordered_map& greenlet_parent_map = + *(new std::unordered_map()); + +// maps threads to any currently active greenlets +inline std::unordered_map& greenlet_thread_map = + *(new std::unordered_map()); + +inline std::mutex greenlet_info_map_lock; + +// ---------------------------------------------------------------------------- + +inline std::vector> current_greenlets; + +// ---------------------------------------------------------------------------- diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/interp.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/interp.h new file mode 100644 index 00000000000..160fcea6440 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/interp.h @@ -0,0 +1,56 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#define PY_SSIZE_T_CLEAN +#include + +#if PY_VERSION_HEX >= 0x03090000 +#define Py_BUILD_CORE +#if defined __GNUC__ && defined HAVE_STD_ATOMIC +#undef HAVE_STD_ATOMIC +#endif +#include +#endif + +#include + +#include +#include + + +class InterpreterInfo +{ +public: + int64_t id = 0; + void* tstate_head = NULL; + void* next = NULL; +}; + +static void for_each_interp(std::function callback) +{ + InterpreterInfo interpreter_info = {0}; + + for (char* interp_addr = reinterpret_cast(runtime->interpreters.head); + interp_addr != NULL; interp_addr = reinterpret_cast(interpreter_info.next)) + { + if (copy_type(interp_addr + offsetof(PyInterpreterState, id), interpreter_info.id)) + continue; + +#if PY_VERSION_HEX >= 0x030b0000 + if (copy_type(interp_addr + offsetof(PyInterpreterState, threads.head), + interpreter_info.tstate_head)) +#else + if (copy_type(interp_addr + offsetof(PyInterpreterState, tstate_head), + interpreter_info.tstate_head)) +#endif + continue; + + if (copy_type(interp_addr + offsetof(PyInterpreterState, next), interpreter_info.next)) + continue; + + callback(interpreter_info); + }; +} diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/long.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/long.h new file mode 100644 index 00000000000..2971b81f31e --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/long.h @@ -0,0 +1,73 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . +#pragma once + +#include +#if PY_VERSION_HEX >= 0x030c0000 +#include +// Note: Even if use the right PYLONG_BITS_IN_DIGIT that is specified in the +// Python we use to build echion, it can be different from the Python that is +// used to run the program. +#if PYLONG_BITS_IN_DIGIT == 30 +typedef uint32_t digit; +#elif PYLONG_BITS_IN_DIGIT == 15 +typedef unsigned short digit; +#else +#error "Unsupported PYLONG_BITS_IN_DIGIT" +#endif // PYLONG_BITS_IN_DIGIT +#endif // PY_VERSION_HEX >= 0x030c0000 + +#include +#include + +constexpr Py_ssize_t MAX_DIGITS = 128; + +// ---------------------------------------------------------------------------- +#if PY_VERSION_HEX >= 0x030c0000 +[[nodiscard]] static Result pylong_to_llong(PyObject* long_addr) +{ + // Only used to extract a task-id on Python 3.12, omits overflow checks + PyLongObject long_obj; + long long ret = 0; + + if (copy_type(long_addr, long_obj)) + return ErrorKind::PyLongError; + + if (!PyLong_CheckExact(&long_obj)) + return ErrorKind::PyLongError; + + if (_PyLong_IsCompact(&long_obj)) + { + ret = static_cast(_PyLong_CompactValue(&long_obj)); + } + else + { + // If we're here, then we need to iterate over the digits + // We might overflow, but we don't care for now + int sign = _PyLong_NonCompactSign(&long_obj); + Py_ssize_t i = _PyLong_DigitCount(&long_obj); + + if (i > MAX_DIGITS) + { + return ErrorKind::PyLongError; + } + + // Copy over the digits as ob_digit is allocated dynamically with + // PyObject_Malloc. + digit digits[MAX_DIGITS]; + if (copy_generic(long_obj.long_value.ob_digit, digits, i * sizeof(digit))) + { + return ErrorKind::PyLongError; + } + while (--i >= 0) + { + ret <<= PyLong_SHIFT; + ret |= digits[i]; + } + ret *= sign; + } + + return ret; +} +#endif diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/mirrors.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/mirrors.h new file mode 100644 index 00000000000..854e5748c59 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/mirrors.h @@ -0,0 +1,207 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#include "echion/errors.h" +#define PY_SSIZE_T_CLEAN +#include +#include +#include + +#include + +#if PY_VERSION_HEX >= 0x030b0000 +#define Py_BUILD_CORE +#if defined __GNUC__ && defined HAVE_STD_ATOMIC +#undef HAVE_STD_ATOMIC +#endif +#include +#else +typedef struct +{ + Py_hash_t me_hash; + PyObject* me_key; + PyObject* me_value; /* This field is only meaningful for combined tables */ +} PyDictKeyEntry; + +typedef Py_ssize_t (*dict_lookup_func)(PyDictObject* mp, PyObject* key, Py_hash_t hash, + PyObject** value_addr); + +/* See dictobject.c for actual layout of DictKeysObject */ +typedef struct _dictkeysobject +{ + Py_ssize_t dk_refcnt; + + /* Size of the hash table (dk_indices). It must be a power of 2. */ + Py_ssize_t dk_size; + + dict_lookup_func dk_lookup; + + /* Number of usable entries in dk_entries. */ + Py_ssize_t dk_usable; + + /* Number of used entries in dk_entries. */ + Py_ssize_t dk_nentries; + + char dk_indices[]; /* char is required to avoid strict aliasing. */ + +} PyDictKeysObject; + +typedef PyObject* PyDictValues; +#endif + +#include + +#include + +class MirrorObject +{ +protected: + MirrorObject(std::unique_ptr data) : data(std::move(data)) {} + + std::unique_ptr data = nullptr; +}; + +// ---------------------------------------------------------------------------- +class MirrorDict : public MirrorObject +{ +public: + [[nodiscard]] static inline Result create(PyObject* dict_addr); + + [[nodiscard]] PyObject* get_item(PyObject* key) + { + return PyDict_GetItem(reinterpret_cast(&dict), key); + } + +private: + MirrorDict(PyDictObject dict, std::unique_ptr data) + : MirrorObject(std::move(data)), dict(dict) + { + } + PyDictObject dict; +}; + +[[nodiscard]] inline Result MirrorDict::create(PyObject* dict_addr) +{ + PyDictObject dict; + + if (copy_type(dict_addr, dict)) + { + return ErrorKind::MirrorError; + } + + PyDictKeysObject keys; + if (copy_type(dict.ma_keys, keys)) + { + return ErrorKind::MirrorError; + } + + // Compute the full dictionary data size +#if PY_VERSION_HEX >= 0x030b0000 + size_t entry_size = + keys.dk_kind == DICT_KEYS_UNICODE ? sizeof(PyDictUnicodeEntry) : sizeof(PyDictKeyEntry); + size_t keys_size = sizeof(PyDictKeysObject) + (1 << keys.dk_log2_index_bytes) + + (keys.dk_nentries * entry_size); +#else + size_t entry_size = sizeof(PyDictKeyEntry); + size_t keys_size = sizeof(PyDictKeysObject) + (keys.dk_size * sizeof(Py_ssize_t)) + + (keys.dk_nentries * entry_size); +#endif + size_t values_size = dict.ma_values != NULL ? keys.dk_nentries * sizeof(PyObject*) : 0; + + // Allocate the buffer + ssize_t data_size = keys_size + (keys.dk_nentries * entry_size) + values_size; + if (data_size < 0 || data_size > (1 << 20)) + { + return ErrorKind::MirrorError; + } + + auto data = std::make_unique(data_size); + + // Copy the key data and update the pointer + if (copy_generic(dict.ma_keys, data.get(), keys_size)) + { + return ErrorKind::MirrorError; + } + + dict.ma_keys = reinterpret_cast(data.get()); + + if (dict.ma_values != NULL) + { + // Copy the value data and update the pointer + char* values_addr = data.get() + keys_size; + if (copy_generic(dict.ma_values, keys_size, values_size)) + { + return ErrorKind::MirrorError; + } + + dict.ma_values = reinterpret_cast(values_addr); + } + + return MirrorDict(dict, std::move(data)); +} + +// ---------------------------------------------------------------------------- +class MirrorSet : public MirrorObject +{ +public: + [[nodiscard]] inline static Result create(PyObject*); + [[nodiscard]] Result> as_unordered_set(); + +private: + MirrorSet(size_t size, PySetObject set, std::unique_ptr data) + : MirrorObject(std::move(data)), size(size), set(set) + { + } + + size_t size; + PySetObject set; +}; + +[[nodiscard]] inline Result MirrorSet::create(PyObject* set_addr) +{ + PySetObject set; + + if (copy_type(set_addr, set)) + { + return ErrorKind::MirrorError; + } + + auto size = set.mask + 1; + ssize_t table_size = size * sizeof(setentry); + if (table_size < 0 || table_size > (1 << 20)) + { + return ErrorKind::MirrorError; + } + + auto data = std::make_unique(table_size); + if (copy_generic(set.table, data.get(), table_size)) + { + return ErrorKind::MirrorError; + } + + set.table = reinterpret_cast(data.get()); + + return MirrorSet(size, set, std::move(data)); +} + +[[nodiscard]] inline Result> MirrorSet::as_unordered_set() +{ + if (data == nullptr) + { + return ErrorKind::MirrorError; + } + + std::unordered_set uset; + + for (size_t i = 0; i < size; i++) + { + auto entry = set.table[i]; + if (entry.key != NULL) + uset.insert(entry.key); + } + + return uset; +} diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/mojo.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/mojo.h new file mode 100644 index 00000000000..f4d0cc1b4a8 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/mojo.h @@ -0,0 +1,35 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#define MOJO_VERSION 3 + +enum MojoEvent +{ + MOJO_RESERVED, + MOJO_METADATA, + MOJO_STACK, + MOJO_FRAME, + MOJO_FRAME_INVALID, + MOJO_FRAME_REF, + MOJO_FRAME_KERNEL, + MOJO_GC, + MOJO_IDLE, + MOJO_METRIC_TIME, + MOJO_METRIC_MEMORY, + MOJO_STRING, + MOJO_STRING_REF, + MOJO_MAX, +}; + +#if defined __arm__ +using mojo_int_t = long; +using mojo_uint_t = unsigned long; +using mojo_ref_t = unsigned long; +#else +using mojo_int_t = long long; +using mojo_uint_t = unsigned long long; +using mojo_ref_t = unsigned long long; +#endif diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/render.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/render.h new file mode 100644 index 00000000000..2824b59ce17 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/render.h @@ -0,0 +1,222 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +// Forward declaration +class Frame; + +enum MetricType +{ + Time, + Memory +}; + +class RendererInterface +{ +public: + [[nodiscard]] virtual Result open() = 0; + virtual void close() = 0; + virtual void header() = 0; + virtual void metadata(const std::string& label, const std::string& value) = 0; + // If a renderer has its own caching mechanism for frames, this can be used + // to store frame information. + virtual void frame(mojo_ref_t key, mojo_ref_t filename, mojo_ref_t name, mojo_int_t line, + mojo_int_t line_end, mojo_int_t column, mojo_int_t column_end) = 0; + // Refers to the frame stored using above function + virtual void frame_ref(mojo_ref_t key) = 0; + virtual void frame_kernel(const std::string& scope) = 0; + // Simlar to frame/frame_ref functions, helpers for string tables + virtual void string(mojo_ref_t key, const std::string& value) = 0; + virtual void string_ref(mojo_ref_t key) = 0; + + virtual void render_message(std::string_view msg) = 0; + virtual void render_thread_begin(PyThreadState* tstate, std::string_view name, + microsecond_t cpu_time, uintptr_t thread_id, + unsigned long native_id) = 0; + virtual void render_task_begin(std::string task_name, bool on_cpu) = 0; + virtual void render_stack_begin(long long pid, long long iid, + const std::string& thread_name) = 0; + virtual void render_frame(Frame& frame) = 0; + virtual void render_cpu_time(uint64_t cpu_time) = 0; + virtual void render_stack_end(MetricType metric_type, uint64_t delta) = 0; + + // The validity of the interface is a two-step process + // 1. If the RendererInterface has been destroyed, obviously it's invalid + // 2. There might be state behind RendererInterface, and the lifetime of that + // state alone may be insufficient to know its usability. is_valid + // should return false in such cases. + virtual bool is_valid() = 0; + virtual ~RendererInterface() = default; +}; + +class NullRenderer : public RendererInterface +{ +public: + bool is_valid() override + { + return true; + } + void header() override {} + void metadata(const std::string&, const std::string&) override {} + void frame(mojo_ref_t, mojo_ref_t, mojo_ref_t, mojo_int_t, mojo_int_t, mojo_int_t, + mojo_int_t) override + { + } + void frame_ref(mojo_ref_t) override {} + void frame_kernel(const std::string&) override {} + + void string(mojo_ref_t, const std::string&) override {} + void string_ref(mojo_ref_t) override {} + void render_message(std::string_view) override {} + void render_thread_begin(PyThreadState*, std::string_view, microsecond_t, uintptr_t, + unsigned long) override + { + } + void render_task_begin(std::string, bool) override {} + void render_stack_begin(long long, long long, const std::string&) override {} + void render_frame(Frame&) override {} + void render_cpu_time(uint64_t) override {} + void render_stack_end(MetricType, uint64_t) override {} + + Result open() override + { + return Result::ok(); + } + void close() override {} +}; + +class Renderer +{ +private: + std::shared_ptr defaultRenderer = std::make_shared(); + std::weak_ptr currentRenderer; + + std::shared_ptr getActiveRenderer() + { + if (auto renderer = currentRenderer.lock()) + { + if (renderer->is_valid()) + { + return renderer; + } + } + + return nullptr; + } + + Renderer() = default; + ~Renderer() = default; + +public: + Renderer(const Renderer&) = delete; + Renderer& operator=(const Renderer&) = delete; + + static Renderer& get() + { + static Renderer instance; + return instance; + } + + void set_renderer(std::shared_ptr renderer) + { + currentRenderer = renderer; + } + + void header() + { + getActiveRenderer()->header(); + } + + void metadata(const std::string& label, const std::string& value) + { + getActiveRenderer()->metadata(label, value); + } + + void string(mojo_ref_t key, const std::string& value) + { + getActiveRenderer()->string(key, value); + } + + void frame(mojo_ref_t key, mojo_ref_t filename, mojo_ref_t name, mojo_int_t line, + mojo_int_t line_end, mojo_int_t column, mojo_int_t column_end) + { + getActiveRenderer()->frame(key, filename, name, line, line_end, column, column_end); + } + + void frame_ref(mojo_ref_t key) + { + getActiveRenderer()->frame_ref(key); + } + + void frame_kernel(const std::string& scope) + { + getActiveRenderer()->frame_kernel(scope); + } + + void string(mojo_ref_t key, const char* value) + { + getActiveRenderer()->string(key, value); + } + + void string_ref(mojo_ref_t key) + { + getActiveRenderer()->string_ref(key); + } + + void render_message(std::string_view msg) + { + getActiveRenderer()->render_message(msg); + } + + [[nodiscard]] Result open() + { + return getActiveRenderer()->open(); + } + + void close() + { + getActiveRenderer()->close(); + } + + void render_thread_begin(PyThreadState* tstate, std::string_view name, microsecond_t cpu_time, + uintptr_t thread_id, unsigned long native_id) + { + getActiveRenderer()->render_thread_begin(tstate, name, cpu_time, thread_id, native_id); + } + + void render_task_begin(std::string task_name, bool on_cpu) + { + getActiveRenderer()->render_task_begin(task_name, on_cpu); + } + + void render_stack_begin(long long pid, long long iid, const std::string& thread_name) + { + getActiveRenderer()->render_stack_begin(pid, iid, thread_name); + } + + void render_frame(Frame& frame) + { + getActiveRenderer()->render_frame(frame); + } + + void render_cpu_time(uint64_t cpu_time) + { + getActiveRenderer()->render_cpu_time(cpu_time); + } + + void render_stack_end(MetricType metric_type, uint64_t delta) + { + getActiveRenderer()->render_stack_end(metric_type, delta); + } +}; diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/stack_chunk.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/stack_chunk.h new file mode 100644 index 00000000000..9987674b077 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/stack_chunk.h @@ -0,0 +1,109 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#define PY_SSIZE_T_CLEAN +#include +#if defined __GNUC__ && defined HAVE_STD_ATOMIC +#undef HAVE_STD_ATOMIC +#endif +#define Py_BUILD_CORE +#include + +#include +#include + +#include +#include + + +// ---------------------------------------------------------------------------- +class StackChunk +{ +public: + StackChunk() {} + + [[nodiscard]] inline Result update(_PyStackChunk* chunk_addr); + inline void* resolve(void* frame_addr); + inline bool is_valid() const; + +private: + void* origin = NULL; + std::vector data; + size_t data_capacity = 0; + std::unique_ptr previous = nullptr; +}; + +// ---------------------------------------------------------------------------- +Result StackChunk::update(_PyStackChunk* chunk_addr) +{ + _PyStackChunk chunk; + + if (copy_type(chunk_addr, chunk)) + { + return ErrorKind::StackChunkError; + } + + origin = chunk_addr; + // if data_capacity is not enough, reallocate. + if (chunk.size > data_capacity) + { + data_capacity = std::max(chunk.size, data_capacity); + data.resize(data_capacity); + } + + // Copy the data up until the size of the chunk + if (copy_generic(chunk_addr, data.data(), chunk.size)) + { + return ErrorKind::StackChunkError; + } + + if (chunk.previous != NULL) + { + if (previous == nullptr) + previous = std::make_unique(); + + auto update_success = previous->update(reinterpret_cast<_PyStackChunk*>(chunk.previous)); + if (!update_success) + { + previous = nullptr; + } + } + + return Result::ok(); +} + +// ---------------------------------------------------------------------------- +void* StackChunk::resolve(void* address) +{ + // If data is not properly initialized, simply return the address + if (!is_valid()) + { + return address; + } + + _PyStackChunk* chunk = reinterpret_cast<_PyStackChunk*>(data.data()); + + // Check if this chunk contains the address + if (address >= origin && address < reinterpret_cast(origin) + chunk->size) + return reinterpret_cast(chunk) + + (reinterpret_cast(address) - reinterpret_cast(origin)); + + if (previous) + return previous->resolve(address); + + return address; +} + +// ---------------------------------------------------------------------------- +bool StackChunk::is_valid() const +{ + return data_capacity > 0 && data.size() > 0 && data.size() >= sizeof(_PyStackChunk) && + data.data() != nullptr && origin != nullptr; +} + +// ---------------------------------------------------------------------------- + +inline std::unique_ptr stack_chunk = nullptr; diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/stacks.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/stacks.h new file mode 100644 index 00000000000..d5a3b93385b --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/stacks.h @@ -0,0 +1,280 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#define PY_SSIZE_T_CLEAN +#include + +#include +#include +#include +#include + +#include +#include +#include +#if PY_VERSION_HEX >= 0x030b0000 +#include "echion/stack_chunk.h" +#endif // PY_VERSION_HEX >= 0x030b0000 +#include + +// ---------------------------------------------------------------------------- + +class FrameStack : public std::deque +{ +public: + using Ptr = std::unique_ptr; + using Key = Frame::Key; + + // ------------------------------------------------------------------------ + Key key() + { + Key h = 0; + + for (auto it = this->begin(); it != this->end(); ++it) + h = rotl(h) ^ (*it).get().cache_key; + + return h; + } + + // ------------------------------------------------------------------------ + void render() + { + for (auto it = this->rbegin(); it != this->rend(); ++it) + { +#if PY_VERSION_HEX >= 0x030c0000 + if ((*it).get().is_entry) + // This is a shim frame so we skip it. + continue; +#endif + Renderer::get().render_frame((*it).get()); + } + } + +private: + // ------------------------------------------------------------------------ + static inline Frame::Key rotl(Key key) + { + return (key << 1) | (key >> (CHAR_BIT * sizeof(key) - 1)); + } +}; + +// ---------------------------------------------------------------------------- + +inline FrameStack python_stack; +inline FrameStack interleaved_stack; + +// ---------------------------------------------------------------------------- +static size_t unwind_frame(PyObject* frame_addr, FrameStack& stack) +{ + std::unordered_set seen_frames; // Used to detect cycles in the stack + int count = 0; + + PyObject* current_frame_addr = frame_addr; + while (current_frame_addr != NULL && stack.size() < max_frames) + { + if (seen_frames.find(current_frame_addr) != seen_frames.end()) + break; + + seen_frames.insert(current_frame_addr); + +#if PY_VERSION_HEX >= 0x030b0000 + auto maybe_frame = + Frame::read(reinterpret_cast<_PyInterpreterFrame*>(current_frame_addr), + reinterpret_cast<_PyInterpreterFrame**>(¤t_frame_addr)); +#else + auto maybe_frame = Frame::read(current_frame_addr, ¤t_frame_addr); +#endif + if (!maybe_frame) + { + break; + } + + if (maybe_frame->get().name == StringTable::C_FRAME) + { + continue; + } + + stack.push_back(*maybe_frame); + count++; + } + + return count; +} + +// ---------------------------------------------------------------------------- +static size_t unwind_frame_unsafe(PyObject* frame, FrameStack& stack) +{ + std::unordered_set seen_frames; // Used to detect cycles in the stack + int count = 0; + + PyObject* current_frame = frame; + while (current_frame != NULL && stack.size() < max_frames) + { + if (seen_frames.find(current_frame) != seen_frames.end()) + break; + +#if PY_VERSION_HEX >= 0x030d0000 + // See the comment in unwind_frame() + while (current_frame != NULL) + { + if (reinterpret_cast<_PyInterpreterFrame*>(current_frame)->f_executable->ob_type == + &PyCode_Type) + { + break; + } + current_frame = reinterpret_cast( + reinterpret_cast<_PyInterpreterFrame*>(current_frame)->previous); + } + + if (current_frame == NULL) + { + break; + } +#endif // PY_VERSION_HEX >= 0x030d0000 + count++; + + seen_frames.insert(current_frame); + + stack.push_back(Frame::get(current_frame)); + +#if PY_VERSION_HEX >= 0x030b0000 + current_frame = reinterpret_cast( + reinterpret_cast<_PyInterpreterFrame*>(current_frame)->previous); +#else + current_frame = (PyObject*)((PyFrameObject*)current_frame)->f_back; +#endif + } + + return count; +} + +// ---------------------------------------------------------------------------- +static void unwind_python_stack(PyThreadState* tstate, FrameStack& stack) +{ + stack.clear(); +#if PY_VERSION_HEX >= 0x030b0000 + if (stack_chunk == nullptr) + { + stack_chunk = std::make_unique(); + } + + if (!stack_chunk->update(reinterpret_cast<_PyStackChunk*>(tstate->datastack_chunk))) + { + stack_chunk = nullptr; + } +#endif + +#if PY_VERSION_HEX >= 0x030d0000 + PyObject* frame_addr = reinterpret_cast(tstate->current_frame); +#elif PY_VERSION_HEX >= 0x030b0000 + _PyCFrame cframe; + _PyCFrame* cframe_addr = tstate->cframe; + if (copy_type(cframe_addr, cframe)) + // TODO: Invalid frame + return; + + PyObject* frame_addr = reinterpret_cast(cframe.current_frame); +#else // Python < 3.11 + PyObject* frame_addr = reinterpret_cast(tstate->frame); +#endif + unwind_frame(frame_addr, stack); +} + +// ---------------------------------------------------------------------------- +static void unwind_python_stack_unsafe(PyThreadState* tstate, FrameStack& stack) +{ + stack.clear(); +#if PY_VERSION_HEX >= 0x030b0000 + if (stack_chunk == nullptr) + { + stack_chunk = std::make_unique(); + } + + if (!stack_chunk->update(reinterpret_cast<_PyStackChunk*>(tstate->datastack_chunk))) + { + stack_chunk = nullptr; + } +#endif + +#if PY_VERSION_HEX >= 0x030d0000 + PyObject* frame_addr = reinterpret_cast(tstate->current_frame); +#elif PY_VERSION_HEX >= 0x030b0000 + PyObject* frame_addr = reinterpret_cast(tstate->cframe->current_frame); +#else // Python < 3.11 + PyObject* frame_addr = reinterpret_cast(tstate->frame); +#endif + unwind_frame_unsafe(frame_addr, stack); +} + +// ---------------------------------------------------------------------------- +static void unwind_python_stack(PyThreadState* tstate) +{ + unwind_python_stack(tstate, python_stack); +} + +// ---------------------------------------------------------------------------- +class StackInfo +{ +public: + StringTable::Key task_name; + bool on_cpu; + FrameStack stack; + + StackInfo(StringTable::Key task_name, bool on_cpu) : task_name(task_name), on_cpu(on_cpu) {} +}; + +// ---------------------------------------------------------------------------- +// This table is used to store entire stacks and index them by key. This is +// used when profiling memory events to account for deallocations. +class StackTable +{ +public: + // ------------------------------------------------------------------------ + FrameStack::Key inline store(FrameStack::Ptr stack) + { + std::lock_guard guard(this->lock); + + auto stack_key = stack->key(); + + auto stack_entry = table.find(stack_key); + if (stack_entry == table.end()) + { + table.emplace(stack_key, std::move(stack)); + } + else + { + // TODO: Check for collisions. + } + + return stack_key; + } + + // ------------------------------------------------------------------------ + FrameStack& retrieve(FrameStack::Key stack_key) + { + std::lock_guard guard(this->lock); + + return *table.find(stack_key)->second; + } + + // ------------------------------------------------------------------------ + void clear() + { + std::lock_guard guard(this->lock); + + table.clear(); + } + +private: + std::unordered_map> table; + std::mutex lock; +}; + +// ---------------------------------------------------------------------------- +// We make this a reference to a heap-allocated object so that we can avoid +// the destruction on exit. We are in charge of cleaning up the object. Note +// that the object will leak, but this is not a problem. +inline auto& stack_table = *(new StackTable()); diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/state.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/state.h new file mode 100644 index 00000000000..dec0befcc1d --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/state.h @@ -0,0 +1,32 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#define PY_SSIZE_T_CLEAN +#include + +#if PY_VERSION_HEX >= 0x030c0000 +// https://github.com/python/cpython/issues/108216#issuecomment-1696565797 +#undef _PyGC_FINALIZED +#endif + +#if defined __GNUC__ && defined HAVE_STD_ATOMIC +#undef HAVE_STD_ATOMIC +#endif +#define Py_BUILD_CORE +#include + +#include + +inline _PyRuntimeState* runtime = &_PyRuntime; +inline PyThreadState* current_tstate = NULL; + +inline std::thread* sampler_thread = nullptr; + +inline int running = 0; + +inline PyObject* asyncio_current_tasks = NULL; +inline PyObject* asyncio_scheduled_tasks = NULL; // WeakSet +inline PyObject* asyncio_eager_tasks = NULL; // set diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/strings.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/strings.h new file mode 100644 index 00000000000..f51cad97629 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/strings.h @@ -0,0 +1,175 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#define PY_SSIZE_T_CLEAN +#include +#include + +#include +#include +#include + +#include +#include +#include + + +// ---------------------------------------------------------------------------- +static std::unique_ptr pybytes_to_bytes_and_size(PyObject* bytes_addr, + Py_ssize_t* size) +{ + PyBytesObject bytes; + + if (copy_type(bytes_addr, bytes)) + return nullptr; + + *size = bytes.ob_base.ob_size; + if (*size < 0 || *size > (1 << 20)) + return nullptr; + + auto data = std::make_unique(*size); + if (copy_generic(reinterpret_cast(bytes_addr) + offsetof(PyBytesObject, ob_sval), + data.get(), *size)) + return nullptr; + + return data; +} + +// ---------------------------------------------------------------------------- +static Result pyunicode_to_utf8(PyObject* str_addr) +{ + PyUnicodeObject str; + if (copy_type(str_addr, str)) + return ErrorKind::PyUnicodeError; + + PyASCIIObject& ascii = str._base._base; + + if (ascii.state.kind != 1) + return ErrorKind::PyUnicodeError; + + const char* data = ascii.state.compact + ? reinterpret_cast( + reinterpret_cast(str_addr) + sizeof(ascii)) + : static_cast(str._base.utf8); + if (data == NULL) + return ErrorKind::PyUnicodeError; + + Py_ssize_t size = ascii.state.compact ? ascii.length : str._base.utf8_length; + if (size < 0 || size > 1024) + return ErrorKind::PyUnicodeError; + + auto dest = std::string(size, '\0'); + if (copy_generic(data, dest.data(), size)) + return ErrorKind::PyUnicodeError; + + return Result(dest); +} + +// ---------------------------------------------------------------------------- + +class StringTable : public std::unordered_map +{ +public: + using Key = uintptr_t; + + + static constexpr Key INVALID = 1; + static constexpr Key UNKNOWN = 2; + static constexpr Key C_FRAME = 3; + + // Python string object + [[nodiscard]] inline Result key(PyObject* s) + { + const std::lock_guard lock(table_lock); + + auto k = reinterpret_cast(s); + + if (this->find(k) == this->end()) + { +#if PY_VERSION_HEX >= 0x030c0000 + // The task name might hold a PyLong for deferred task name formatting. + std::string str = "Task-"; + + auto maybe_long = pylong_to_llong(s); + if (maybe_long) + { + str += std::to_string(*maybe_long); + } + else + { + auto maybe_unicode = pyunicode_to_utf8(s); + if (!maybe_unicode) + { + return ErrorKind::PyUnicodeError; + } + + str = *maybe_unicode; + } +#else + auto maybe_unicode = pyunicode_to_utf8(s); + if (!maybe_unicode) + { + return ErrorKind::PyUnicodeError; + } + + std::string str = std::move(*maybe_unicode); +#endif + this->emplace(k, str); + Renderer::get().string(k, str); + } + + return Result(k); + }; + + // Python string object + [[nodiscard]] inline Key key_unsafe(PyObject* s) + { + const std::lock_guard lock(table_lock); + + auto k = reinterpret_cast(s); + + if (this->find(k) == this->end()) + { +#if PY_VERSION_HEX >= 0x030c0000 + // The task name might hold a PyLong for deferred task name formatting. + auto str = (PyLong_CheckExact(s)) ? "Task-" + std::to_string(PyLong_AsLong(s)) + : std::string(PyUnicode_AsUTF8(s)); +#else + auto str = std::string(PyUnicode_AsUTF8(s)); +#endif + this->emplace(k, str); + Renderer::get().string(k, str); + } + + return k; + }; + + [[nodiscard]] inline Result> lookup(Key key) + { + const std::lock_guard lock(table_lock); + + auto it = this->find(key); + if (it == this->end()) + return ErrorKind::LookupError; + + return std::ref(it->second); + }; + + StringTable() : std::unordered_map() + { + this->emplace(0, ""); + this->emplace(INVALID, ""); + this->emplace(UNKNOWN, ""); + }; + +private: + std::mutex table_lock; +}; + +// We make this a reference to a heap-allocated object so that we can avoid +// the destruction on exit. We are in charge of cleaning up the object. Note +// that the object will leak, but this is not a problem. +inline StringTable& string_table = *(new StringTable()); diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/tasks.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/tasks.h new file mode 100644 index 00000000000..962032f91dd --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/tasks.h @@ -0,0 +1,336 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#define PY_SSIZE_T_CLEAN +#include +#include + +#if PY_VERSION_HEX >= 0x030b0000 +#include + +#define Py_BUILD_CORE +#if PY_VERSION_HEX >= 0x030d0000 +#include +#else +#include +#endif // PY_VERSION_HEX >= 0x030d0000 +#else +#include +#include +#endif // PY_VERSION_HEX >= 0x30b0000 + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +// Max number of recursive calls GenInfo::GenInfo and TaskInfo::TaskInfo can do +// before raising an error. +const constexpr size_t MAX_RECURSION_DEPTH = 250; + +class GenInfo +{ +public: + typedef std::unique_ptr Ptr; + + PyObject* origin = NULL; + PyObject* frame = NULL; + + GenInfo::Ptr await = nullptr; + + bool is_running = false; + + [[nodiscard]] static Result create(PyObject* gen_addr); + GenInfo(PyObject* origin, PyObject* frame, GenInfo::Ptr await, bool is_running) + : origin(origin), frame(frame), await(std::move(await)), is_running(is_running) + { + } +}; + +inline Result GenInfo::create(PyObject* gen_addr) +{ + static thread_local size_t recursion_depth = 0; + recursion_depth++; + + if (recursion_depth > MAX_RECURSION_DEPTH) + { + recursion_depth--; + return ErrorKind::GenInfoError; + } + + PyGenObject gen; + + if (copy_type(gen_addr, gen) || !PyCoro_CheckExact(&gen)) + { + recursion_depth--; + return ErrorKind::GenInfoError; + } + + auto origin = gen_addr; + +#if PY_VERSION_HEX >= 0x030b0000 + // The frame follows the generator object + auto frame = (gen.gi_frame_state == FRAME_CLEARED) + ? NULL + : reinterpret_cast(reinterpret_cast(gen_addr) + + offsetof(PyGenObject, gi_iframe)); +#else + auto frame = (PyObject*)gen.gi_frame; +#endif + + PyFrameObject f; + if (copy_type(frame, f)) + { + recursion_depth--; + return ErrorKind::GenInfoError; + } + + PyObject* yf = (frame != NULL ? PyGen_yf(&gen, frame) : NULL); + GenInfo::Ptr await = nullptr; + if (yf != NULL && yf != gen_addr) + { + auto maybe_await = GenInfo::create(yf); + if (maybe_await) + { + await = std::move(*maybe_await); + } + } + +#if PY_VERSION_HEX >= 0x030b0000 + auto is_running = (gen.gi_frame_state == FRAME_EXECUTING); +#elif PY_VERSION_HEX >= 0x030a0000 + auto is_running = (frame != NULL) ? _PyFrame_IsExecuting(&f) : false; +#else + auto is_running = gen.gi_running; +#endif + + recursion_depth--; + return std::make_unique(origin, frame, std::move(await), is_running); +} + +// ---------------------------------------------------------------------------- + +class TaskInfo +{ +public: + typedef std::unique_ptr Ptr; + typedef std::reference_wrapper Ref; + + PyObject* origin = NULL; + PyObject* loop = NULL; + + GenInfo::Ptr coro = nullptr; + + StringTable::Key name; + + // Information to reconstruct the async stack as best as we can + TaskInfo::Ptr waiter = nullptr; + + [[nodiscard]] static Result create(TaskObj*); + TaskInfo(PyObject* origin, PyObject* loop, GenInfo::Ptr coro, StringTable::Key name, + TaskInfo::Ptr waiter) + : origin(origin), loop(loop), coro(std::move(coro)), name(name), waiter(std::move(waiter)) + { + } + + [[nodiscard]] static Result current(PyObject*); + inline size_t unwind(FrameStack&); +}; + +inline std::unordered_map task_link_map; +inline std::mutex task_link_map_lock; + +// ---------------------------------------------------------------------------- +inline Result TaskInfo::create(TaskObj* task_addr) +{ + static thread_local size_t recursion_depth = 0; + recursion_depth++; + + if (recursion_depth > MAX_RECURSION_DEPTH) + { + recursion_depth--; + return ErrorKind::TaskInfoError; + } + + TaskObj task; + if (copy_type(task_addr, task)) + { + recursion_depth--; + return ErrorKind::TaskInfoError; + } + + auto maybe_coro = GenInfo::create(task.task_coro); + if (!maybe_coro) + { + recursion_depth--; + return ErrorKind::TaskInfoGeneratorError; + } + + auto origin = reinterpret_cast(task_addr); + + auto maybe_name = string_table.key(task.task_name); + if (!maybe_name) + { + recursion_depth--; + return ErrorKind::TaskInfoError; + } + + auto name = *maybe_name; + auto loop = task.task_loop; + + TaskInfo::Ptr waiter = nullptr; + if (task.task_fut_waiter) + { + auto maybe_waiter = + TaskInfo::create(reinterpret_cast(task.task_fut_waiter)); // TODO: Make lazy? + if (maybe_waiter) + { + waiter = std::move(*maybe_waiter); + } + } + + recursion_depth--; + return std::make_unique(origin, loop, std::move(*maybe_coro), name, + std::move(waiter)); +} + +// ---------------------------------------------------------------------------- +inline Result TaskInfo::current(PyObject* loop) +{ + if (loop == NULL) + { + return ErrorKind::TaskInfoError; + } + + auto maybe_current_tasks_dict = MirrorDict::create(asyncio_current_tasks); + if (!maybe_current_tasks_dict) + { + return ErrorKind::TaskInfoError; + } + + auto current_tasks_dict = std::move(*maybe_current_tasks_dict); + PyObject* task = current_tasks_dict.get_item(loop); + if (task == NULL) + { + return ErrorKind::TaskInfoError; + } + + return TaskInfo::create(reinterpret_cast(task)); +} + +// ---------------------------------------------------------------------------- +// TODO: Make this a "for_each_task" function? +[[nodiscard]] inline Result> get_all_tasks(PyObject* loop) +{ + std::vector tasks; + if (loop == NULL) + return tasks; + + auto maybe_scheduled_tasks_set = MirrorSet::create(asyncio_scheduled_tasks); + if (!maybe_scheduled_tasks_set) + { + return ErrorKind::TaskInfoError; + } + + auto scheduled_tasks_set = std::move(*maybe_scheduled_tasks_set); + auto maybe_scheduled_tasks = scheduled_tasks_set.as_unordered_set(); + if (!maybe_scheduled_tasks) + { + return ErrorKind::TaskInfoError; + } + + auto scheduled_tasks = std::move(*maybe_scheduled_tasks); + for (auto task_wr_addr : scheduled_tasks) + { + PyWeakReference task_wr; + if (copy_type(task_wr_addr, task_wr)) + continue; + + auto maybe_task_info = TaskInfo::create(reinterpret_cast(task_wr.wr_object)); + if (maybe_task_info) + { + if ((*maybe_task_info)->loop == loop) + { + tasks.push_back(std::move(*maybe_task_info)); + } + } + } + + if (asyncio_eager_tasks != NULL) + { + auto maybe_eager_tasks_set = MirrorSet::create(asyncio_eager_tasks); + if (!maybe_eager_tasks_set) + { + return ErrorKind::TaskInfoError; + } + + auto eager_tasks_set = std::move(*maybe_eager_tasks_set); + + auto maybe_eager_tasks = eager_tasks_set.as_unordered_set(); + if (!maybe_eager_tasks) + { + return ErrorKind::TaskInfoError; + } + + auto eager_tasks = std::move(*maybe_eager_tasks); + for (auto task_addr : eager_tasks) + { + auto maybe_task_info = TaskInfo::create(reinterpret_cast(task_addr)); + if (maybe_task_info) + { + if ((*maybe_task_info)->loop == loop) + { + tasks.push_back(std::move(*maybe_task_info)); + } + } + } + } + + return tasks; +} + +// ---------------------------------------------------------------------------- + +inline std::vector> current_tasks; + +// ---------------------------------------------------------------------------- + +inline size_t TaskInfo::unwind(FrameStack& stack) +{ + // TODO: Check for running task. + std::stack coro_frames; + + // Unwind the coro chain + for (auto py_coro = this->coro.get(); py_coro != NULL; py_coro = py_coro->await.get()) + { + if (py_coro->frame != NULL) + coro_frames.push(py_coro->frame); + } + + int count = 0; + + // Unwind the coro frames + while (!coro_frames.empty()) + { + PyObject* frame = coro_frames.top(); + coro_frames.pop(); + + count += unwind_frame(frame, stack); + } + + return count; +} diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/threads.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/threads.h new file mode 100644 index 00000000000..025e6c1da79 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/threads.h @@ -0,0 +1,582 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#include +#define Py_BUILD_CORE + +#include +#include +#include +#include +#include + +#if defined PL_LINUX +#include +#elif defined PL_DARWIN +#include +#include +#endif + +#include +#include +#include +#include +#include +#include +#include + +class ThreadInfo +{ +public: + using Ptr = std::unique_ptr; + + uintptr_t thread_id; + unsigned long native_id; + + std::string name; + +#if defined PL_LINUX + clockid_t cpu_clock_id; +#elif defined PL_DARWIN + mach_port_t mach_port; +#endif + microsecond_t cpu_time; + + uintptr_t asyncio_loop = 0; + + [[nodiscard]] Result update_cpu_time(); + bool is_running(); + + [[nodiscard]] Result sample(int64_t, PyThreadState*, microsecond_t); + void unwind(PyThreadState*); + + // ------------------------------------------------------------------------ +#if defined PL_LINUX + ThreadInfo(uintptr_t thread_id, unsigned long native_id, const char* name, + clockid_t cpu_clock_id) + : thread_id(thread_id), native_id(native_id), name(name), cpu_clock_id(cpu_clock_id) + { + } +#elif defined PL_DARWIN + ThreadInfo(uintptr_t thread_id, unsigned long native_id, const char* name, + mach_port_t mach_port) + : thread_id(thread_id), native_id(native_id), name(name), mach_port(mach_port) + { + } +#endif + + [[nodiscard]] static Result> create(uintptr_t thread_id, + unsigned long native_id, + const char* name) + { +#if defined PL_LINUX + clockid_t cpu_clock_id; + if (pthread_getcpuclockid(static_cast(thread_id), &cpu_clock_id)) + { + return ErrorKind::ThreadInfoError; + } + + auto result = std::make_unique(thread_id, native_id, name, cpu_clock_id); +#elif defined PL_DARWIN + mach_port_t mach_port; + // pthread_mach_thread_np does not return a status code; the behaviour is undefined + // if thread_id is invalid. + mach_port = pthread_mach_thread_np((pthread_t)thread_id); + + auto result = std::make_unique(thread_id, native_id, name, mach_port); +#endif + + auto update_cpu_time_success = result->update_cpu_time(); + if (!update_cpu_time_success) + { + return ErrorKind::ThreadInfoError; + } + + return result; + }; + +private: + [[nodiscard]] Result unwind_tasks(); + void unwind_greenlets(PyThreadState*, unsigned long); +}; + +inline Result ThreadInfo::update_cpu_time() +{ +#if defined PL_LINUX + struct timespec ts; + if (clock_gettime(cpu_clock_id, &ts)) + { + // If the clock is invalid, we skip updating the CPU time. + // This can happen if we try to compute CPU time for a thread that has exited. + if (errno == EINVAL) + { + return Result::ok(); + } + + return ErrorKind::CpuTimeError; + } + + this->cpu_time = TS_TO_MICROSECOND(ts); +#elif defined PL_DARWIN + thread_basic_info_data_t info; + mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT; + kern_return_t kr = + thread_info((thread_act_t)this->mach_port, THREAD_BASIC_INFO, (thread_info_t)&info, &count); + + if (kr != KERN_SUCCESS) + { + // If the thread is invalid, we skip updating the CPU time. + // This can happen if we try to compute CPU time for a thread that has exited. + if (kr == KERN_INVALID_ARGUMENT) + { + return Result::ok(); + } + + return ErrorKind::CpuTimeError; + } + + if (info.flags & TH_FLAGS_IDLE) + { + return Result::ok(); + } + + this->cpu_time = TV_TO_MICROSECOND(info.user_time) + TV_TO_MICROSECOND(info.system_time); +#endif + + return Result::ok(); +} + +inline bool ThreadInfo::is_running() +{ +#if defined PL_LINUX + struct timespec ts1, ts2; + + // Get two back-to-back times + if (clock_gettime(cpu_clock_id, &ts1) != 0) + return false; + if (clock_gettime(cpu_clock_id, &ts2) != 0) + return false; + + // If the CPU time has advanced, the thread is running + return (ts1.tv_sec != ts2.tv_sec || ts1.tv_nsec != ts2.tv_nsec); + +#elif defined PL_DARWIN + thread_basic_info_data_t info; + mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT; + kern_return_t kr = + thread_info((thread_act_t)this->mach_port, THREAD_BASIC_INFO, (thread_info_t)&info, &count); + + if (kr != KERN_SUCCESS) + return false; + + return info.run_state == TH_STATE_RUNNING; + +#endif +} + +// ---------------------------------------------------------------------------- + +// We make this a reference to a heap-allocated object so that we can avoid +// the destruction on exit. We are in charge of cleaning up the object. Note +// that the object will leak, but this is not a problem. +inline std::unordered_map& thread_info_map = + *(new std::unordered_map()); // indexed by thread_id + +inline std::mutex thread_info_map_lock; + +// ---------------------------------------------------------------------------- +inline void ThreadInfo::unwind(PyThreadState* tstate) +{ + unwind_python_stack(tstate); + if (asyncio_loop) + { + auto unwind_tasks_success = unwind_tasks(); + if (!unwind_tasks_success) + { + // If we fail, that's OK + } + } + + // We make the assumption that gevent and asyncio are not mixed + // together to keep the logic here simple. We can always revisit this + // should there be a substantial demand for it. + unwind_greenlets(tstate, native_id); +} + +// ---------------------------------------------------------------------------- +inline Result ThreadInfo::unwind_tasks() +{ + std::vector leaf_tasks; + std::unordered_set parent_tasks; + std::unordered_map waitee_map; // Indexed by task origin + std::unordered_map origin_map; // Indexed by task origin + + auto maybe_all_tasks = get_all_tasks(reinterpret_cast(asyncio_loop)); + if (!maybe_all_tasks) + { + return ErrorKind::TaskInfoError; + } + + auto all_tasks = std::move(*maybe_all_tasks); + { + std::lock_guard lock(task_link_map_lock); + + // Clean up the task_link_map. Remove entries associated to tasks that + // no longer exist. + std::unordered_set all_task_origins; + std::transform(all_tasks.cbegin(), all_tasks.cend(), + std::inserter(all_task_origins, all_task_origins.begin()), + [](const TaskInfo::Ptr& task) { return task->origin; }); + + std::vector to_remove; + for (auto kv : task_link_map) + { + if (all_task_origins.find(kv.first) == all_task_origins.end()) + to_remove.push_back(kv.first); + } + for (auto key : to_remove) + task_link_map.erase(key); + + // Determine the parent tasks from the gather links. + std::transform(task_link_map.cbegin(), task_link_map.cend(), + std::inserter(parent_tasks, parent_tasks.begin()), + [](const std::pair& kv) { return kv.second; }); + } + + for (auto& task : all_tasks) + { + origin_map.emplace(task->origin, std::ref(*task)); + + if (task->waiter != NULL) + waitee_map.emplace(task->waiter->origin, std::ref(*task)); + else if (parent_tasks.find(task->origin) == parent_tasks.end()) + { + if (cpu && ignore_non_running_threads && !task->coro->is_running) + { + // This task is not running, so we skip it if we are + // interested in just CPU time. + continue; + } + leaf_tasks.push_back(std::ref(*task)); + } + } + + for (auto& leaf_task : leaf_tasks) + { + bool on_cpu = leaf_task.get().coro->is_running; + auto stack_info = std::make_unique(leaf_task.get().name, on_cpu); + auto& stack = stack_info->stack; + for (auto current_task = leaf_task;;) + { + auto& task = current_task.get(); + + size_t stack_size = task.unwind(stack); + + if (on_cpu) + { + // Undo the stack unwinding + // TODO[perf]: not super-efficient :( + for (size_t i = 0; i < stack_size; i++) + stack.pop_back(); + + // Instead we get part of the thread stack + FrameStack temp_stack; + size_t nframes = + (python_stack.size() > stack_size) ? python_stack.size() - stack_size : 0; + for (size_t i = 0; i < nframes; i++) + { + auto python_frame = python_stack.front(); + temp_stack.push_front(python_frame); + python_stack.pop_front(); + } + while (!temp_stack.empty()) + { + stack.push_front(temp_stack.front()); + temp_stack.pop_front(); + } + } + + // Add the task name frame + stack.push_back(Frame::get(task.name)); + + // Get the next task in the chain + PyObject* task_origin = task.origin; + if (waitee_map.find(task_origin) != waitee_map.end()) + { + current_task = waitee_map.find(task_origin)->second; + continue; + } + + { + // Check for, e.g., gather links + std::lock_guard lock(task_link_map_lock); + + if (task_link_map.find(task_origin) != task_link_map.end() && + origin_map.find(task_link_map[task_origin]) != origin_map.end()) + { + current_task = origin_map.find(task_link_map[task_origin])->second; + continue; + } + } + + break; + } + + // Finish off with the remaining thread stack + for (auto p = python_stack.begin(); p != python_stack.end(); p++) + stack.push_back(*p); + + current_tasks.push_back(std::move(stack_info)); + } + + return Result::ok(); +} + +// ---------------------------------------------------------------------------- +inline void ThreadInfo::unwind_greenlets(PyThreadState* tstate, unsigned long cur_native_id) +{ + const std::lock_guard guard(greenlet_info_map_lock); + + if (greenlet_thread_map.find(cur_native_id) == greenlet_thread_map.end()) + return; + + std::unordered_set parent_greenlets; + + // Collect all parent greenlets + std::transform( + greenlet_parent_map.cbegin(), greenlet_parent_map.cend(), + std::inserter(parent_greenlets, parent_greenlets.begin()), + [](const std::pair& kv) { return kv.second; }); + + // Unwind the leaf greenlets + for (auto& greenlet_info : greenlet_info_map) + { + auto greenlet_id = greenlet_info.first; + auto& greenlet = greenlet_info.second; + + if (parent_greenlets.find(greenlet_id) != parent_greenlets.end()) + continue; + + auto frame = greenlet->frame; + if (frame == FRAME_NOT_SET) + { + // The greenlet has not been started yet or has finished + continue; + } + + bool on_cpu = frame == Py_None; + if (cpu && ignore_non_running_threads && !on_cpu) + { + // Only the currently-running greenlet has a None in its frame + // cell. If we are interested in CPU time, we skip all greenlets + // that have an actual frame, as they are not running. + continue; + } + + auto stack_info = std::make_unique(greenlet->name, on_cpu); + auto& stack = stack_info->stack; + + greenlet->unwind(frame, tstate, stack); + + // Unwind the parent greenlets + for (;;) + { + auto parent_greenlet_info = greenlet_parent_map.find(greenlet_id); + if (parent_greenlet_info == greenlet_parent_map.end()) + break; + + auto parent_greenlet_id = parent_greenlet_info->second; + + auto parent_greenlet = greenlet_info_map.find(parent_greenlet_id); + if (parent_greenlet == greenlet_info_map.end()) + break; + + auto parent_frame = parent_greenlet->second->frame; + if (parent_frame == FRAME_NOT_SET || parent_frame == Py_None) + break; + + parent_greenlet->second->unwind(parent_frame, tstate, stack); + + // Move up the greenlet chain + greenlet_id = parent_greenlet_id; + } + + current_greenlets.push_back(std::move(stack_info)); + } +} + +// ---------------------------------------------------------------------------- +inline Result ThreadInfo::sample(int64_t iid, PyThreadState* tstate, microsecond_t delta) +{ + Renderer::get().render_thread_begin(tstate, name, delta, thread_id, native_id); + + if (cpu) + { + microsecond_t previous_cpu_time = cpu_time; + auto update_cpu_time_success = update_cpu_time(); + if (!update_cpu_time_success) + { + return ErrorKind::CpuTimeError; + } + + bool currently_running = is_running(); + if (!currently_running && ignore_non_running_threads) + { + return Result::ok(); + } + + Renderer::get().render_cpu_time(running ? cpu_time - previous_cpu_time : 0); + } + + unwind(tstate); + + // Asyncio tasks + if (current_tasks.empty()) + { + // If we don't have any asyncio tasks, we check that we don't have any + // greenlets either. In this case, we print the ordinary thread stack. + // With greenlets, we recover the thread stack from the active greenlet, + // so if we don't skip here we would have a double print. + if (current_greenlets.empty()) + { + // Print the PID and thread name + Renderer::get().render_stack_begin(pid, iid, name); + // Print the stack + python_stack.render(); + + Renderer::get().render_stack_end(MetricType::Time, delta); + } + } + else + { + for (auto& task_stack_info : current_tasks) + { + auto maybe_task_name = string_table.lookup(task_stack_info->task_name); + if (!maybe_task_name) + { + return ErrorKind::ThreadInfoError; + } + + const auto& task_name = maybe_task_name->get(); + Renderer::get().render_task_begin(task_name, task_stack_info->on_cpu); + Renderer::get().render_stack_begin(pid, iid, name); + + task_stack_info->stack.render(); + + Renderer::get().render_stack_end(MetricType::Time, delta); + } + + current_tasks.clear(); + } + + // Greenlet stacks + if (!current_greenlets.empty()) + { + for (auto& greenlet_stack : current_greenlets) + { + auto maybe_task_name = string_table.lookup(greenlet_stack->task_name); + if (!maybe_task_name) + { + return ErrorKind::ThreadInfoError; + } + + const auto& task_name = maybe_task_name->get(); + Renderer::get().render_task_begin(task_name, greenlet_stack->on_cpu); + Renderer::get().render_stack_begin(pid, iid, name); + + auto& stack = greenlet_stack->stack; + stack.render(); + + Renderer::get().render_stack_end(MetricType::Time, delta); + } + + current_greenlets.clear(); + } + + return Result::ok(); +} + +// ---------------------------------------------------------------------------- +static void for_each_thread(InterpreterInfo& interp, + std::function callback) +{ + std::unordered_set threads; + std::unordered_set seen_threads; + + threads.clear(); + seen_threads.clear(); + + // Start from the thread list head + threads.insert(static_cast(interp.tstate_head)); + + while (!threads.empty()) + { + // Pop the next thread + PyThreadState* tstate_addr = *threads.begin(); + threads.erase(threads.begin()); + + // Mark the thread as seen + seen_threads.insert(tstate_addr); + + // Since threads can be created and destroyed at any time, we make + // a copy of the structure before trying to read its fields. + PyThreadState tstate; + if (copy_type(tstate_addr, tstate)) + // We failed to copy the thread so we skip it. + continue; + + // Enqueue the unseen threads that we can reach from this thread. + if (tstate.next != NULL && seen_threads.find(tstate.next) == seen_threads.end()) + threads.insert(tstate.next); + if (tstate.prev != NULL && seen_threads.find(tstate.prev) == seen_threads.end()) + threads.insert(tstate.prev); + + { + const std::lock_guard guard(thread_info_map_lock); + + if (thread_info_map.find(tstate.thread_id) == thread_info_map.end()) + { + // If the threading module was not imported in the target then + // we mistakenly take the hypno thread as the main thread. We + // assume that any missing thread is the actual main thread, + // provided we don't already have a thread with the name + // "MainThread". Note that this can also happen on shutdown, so + // we need to avoid doing anything in that case. +#if PY_VERSION_HEX >= 0x030b0000 + auto native_id = tstate.native_thread_id; +#else + auto native_id = getpid(); +#endif + bool main_thread_tracked = false; + for (auto& kv : thread_info_map) + { + if (kv.second->name == "MainThread") + { + main_thread_tracked = true; + break; + } + } + if (main_thread_tracked) + continue; + + auto maybe_thread_info = + ThreadInfo::create(tstate.thread_id, native_id, "MainThread"); + if (!maybe_thread_info) + { + // We failed to create the thread info object so we skip it. + // We'll likely try again later with the valid thread + // information. + continue; + } + + thread_info_map.emplace(tstate.thread_id, std::move(*maybe_thread_info)); + } + + // Call back with the thread state and thread info. + callback(&tstate, *thread_info_map.find(tstate.thread_id)->second); + } + } +} diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/timing.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/timing.h new file mode 100644 index 00000000000..2f3d11695bd --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/timing.h @@ -0,0 +1,36 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#if defined PL_LINUX +#include +#elif defined PL_DARWIN +#include +#include + +inline clock_serv_t cclock; +#endif + +typedef unsigned long microsecond_t; + +inline microsecond_t last_time = 0; + +#define TS_TO_MICROSECOND(ts) ((ts).tv_sec * 1e6 + (ts).tv_nsec / 1e3) +#define TV_TO_MICROSECOND(tv) ((tv).seconds * 1e6 + (tv).microseconds) + +// ---------------------------------------------------------------------------- +static microsecond_t gettime() +{ +#if defined PL_LINUX + struct timespec ts; + if (clock_gettime(CLOCK_BOOTTIME, &ts)) + return 0; + return TS_TO_MICROSECOND(ts); +#elif defined PL_DARWIN + mach_timespec_t ts; + clock_get_time(cclock, &ts); + return TS_TO_MICROSECOND(ts); +#endif +} diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/vm.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/vm.h new file mode 100644 index 00000000000..09ab41c6708 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/vm.h @@ -0,0 +1,363 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#include +#include +#include +#include +#include + +#include + +#if defined PL_LINUX +#include +#include +#include +#include +#include +#include +#include + +typedef pid_t proc_ref_t; + +ssize_t process_vm_readv(pid_t, const struct iovec*, unsigned long liovcnt, + const struct iovec* remote_iov, unsigned long riovcnt, + unsigned long flags); + +#define copy_type(addr, dest) (copy_memory(pid, addr, sizeof(dest), &dest)) +#define copy_type_p(addr, dest) (copy_memory(pid, addr, sizeof(*dest), dest)) +#define copy_generic(addr, dest, size) \ + (copy_memory(pid, reinterpret_cast(addr), size, reinterpret_cast(dest))) + +#elif defined PL_DARWIN +#include +#include +#include +#include +#include + +typedef mach_port_t proc_ref_t; + +#define copy_type(addr, dest) (copy_memory(mach_task_self(), addr, sizeof(dest), &dest)) +#define copy_type_p(addr, dest) (copy_memory(mach_task_self(), addr, sizeof(*dest), dest)) +#define copy_generic(addr, dest, size) \ + (copy_memory(mach_task_self(), (void*)(addr), size, (void*)(dest))) + +inline kern_return_t (*safe_copy)(vm_map_read_t, mach_vm_address_t, mach_vm_size_t, + mach_vm_address_t, mach_vm_size_t*) = mach_vm_read_overwrite; + +#endif + +inline bool is_truthy(const char* s) +{ + const static std::array truthy_values = {"1", "true", "yes", + "on", "enable", "enabled"}; + + return std::find(truthy_values.begin(), truthy_values.end(), s) != truthy_values.end(); +} + +inline bool use_alternative_copy_memory() +{ + const char* use_fast_copy_memory = std::getenv("ECHION_USE_FAST_COPY_MEMORY"); + if (!use_fast_copy_memory) + { + return false; + } + + if (is_truthy(use_fast_copy_memory)) + { + return true; + } + + return false; +} + +#if defined PL_LINUX +// Some checks are done at static initialization, so use this to read them at runtime +inline bool failed_safe_copy = false; + +inline ssize_t (*safe_copy)(pid_t, const struct iovec*, unsigned long, const struct iovec*, + unsigned long, unsigned long) = process_vm_readv; + +class VmReader +{ + void* buffer{nullptr}; + size_t sz{0}; + int fd{-1}; + inline static VmReader* instance{nullptr}; // Prevents having to set this in implementation + + VmReader(size_t _sz, void* _buffer, int _fd) : buffer(_buffer), sz{_sz}, fd{_fd} {} + + static VmReader* create(size_t sz) + { + // Makes a temporary file and ftruncates it to the specified size + std::array tmp_dirs = {"/dev/shm", "/tmp", "/var/tmp"}; + std::string tmp_suffix = "/echion-XXXXXX"; + + int fd = -1; + void* ret = nullptr; + + for (auto& tmp_dir : tmp_dirs) + { + // Reset the file descriptor, just in case + close(fd); + fd = -1; + + // Create the temporary file + std::string tmpfile = tmp_dir + tmp_suffix; + fd = mkstemp(tmpfile.data()); + if (fd == -1) + continue; + + // Unlink might fail if delete is blocked on the VFS, but currently no action is taken + unlink(tmpfile.data()); + + // Make sure we have enough size + if (ftruncate(fd, sz) == -1) + { + continue; + } + + // Map the file + ret = mmap(NULL, sz, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + if (ret == MAP_FAILED) + { + ret = nullptr; + continue; + } + + // Successful. Break. + break; + } + + return new VmReader(sz, ret, fd); + } + + bool is_valid() const + { + return buffer != nullptr; + } + +public: + static VmReader* get_instance() + { + if (instance == nullptr) + { + instance = VmReader::create(1024 * 1024); // A megabyte? + if (!instance) + { + std::cerr << "Failed to initialize VmReader with buffer size " << instance->sz + << std::endl; + return nullptr; + } + } + + return instance; + } + + ssize_t safe_copy(pid_t pid, const struct iovec* local_iov, unsigned long liovcnt, + const struct iovec* remote_iov, unsigned long riovcnt, unsigned long flags) + { + (void)pid; + (void)flags; + if (liovcnt != 1 || riovcnt != 1) + { + // Unsupported + return 0; + } + + // Check to see if we need to resize the buffer + if (remote_iov[0].iov_len > sz) + { + if (ftruncate(fd, remote_iov[0].iov_len) == -1) + { + return 0; + } + else + { + void* tmp = mremap(buffer, sz, remote_iov[0].iov_len, MREMAP_MAYMOVE); + if (tmp == MAP_FAILED) + { + return 0; + } + buffer = tmp; // no need to munmap + sz = remote_iov[0].iov_len; + } + } + + ssize_t ret = pwritev(fd, remote_iov, riovcnt, 0); + if (ret == -1) + { + return ret; + } + + // Copy the data from the buffer to the remote process + std::memcpy(local_iov[0].iov_base, buffer, local_iov[0].iov_len); + return ret; + } + + ~VmReader() + { + if (buffer) + { + munmap(buffer, sz); + } + if (fd != -1) + { + close(fd); + } + instance = nullptr; + } +}; + +/** + * Initialize the safe copy operation on Linux + */ +inline bool read_process_vm_init() +{ + VmReader* _ = VmReader::get_instance(); + return !!_; +} + +inline ssize_t vmreader_safe_copy(pid_t pid, const struct iovec* local_iov, unsigned long liovcnt, + const struct iovec* remote_iov, unsigned long riovcnt, + unsigned long flags) +{ + auto reader = VmReader::get_instance(); + if (!reader) + return 0; + return reader->safe_copy(pid, local_iov, liovcnt, remote_iov, riovcnt, flags); +} + +/** + * Initialize the safe copy operation on Linux + * + * This occurs at static init + */ +__attribute__((constructor)) inline void init_safe_copy() +{ + if (use_alternative_copy_memory()) + { + if (init_segv_catcher() == 0) + { + safe_copy = safe_memcpy_wrapper; + return; + } + + std::cerr << "Failed to initialize segv catcher. Using process_vm_readv instead." + << std::endl; + } + + char src[128]; + char dst[128]; + for (size_t i = 0; i < 128; i++) + { + src[i] = 0x41; + dst[i] = ~0x42; + } + + // Check to see that process_vm_readv works, unless it's overridden + const char force_override_str[] = "ECHION_ALT_VM_READ_FORCE"; + const char* force_override = std::getenv(force_override_str); + if (!force_override || !is_truthy(force_override)) + { + struct iovec iov_dst = {dst, sizeof(dst)}; + struct iovec iov_src = {src, sizeof(src)}; + ssize_t result = process_vm_readv(getpid(), &iov_dst, 1, &iov_src, 1, 0); + + // If we succeed, then use process_vm_readv + if (result == sizeof(src)) + { + safe_copy = process_vm_readv; + return; + } + } + + // Else, we have to setup the writev method + if (!read_process_vm_init()) + { + // std::cerr might not have been fully initialized at this point, so use + // fprintf instead. + fprintf(stderr, "Failed to initialize all safe copy interfaces\n"); + failed_safe_copy = true; + return; + } + + safe_copy = vmreader_safe_copy; +} +#elif defined PL_DARWIN +/** + * Initialize the safe copy operation on Linux + * + * This occurs at static init + */ +__attribute__((constructor)) inline void init_safe_copy() +{ + if (use_alternative_copy_memory()) + { + if (init_segv_catcher() == 0) + { + safe_copy = safe_memcpy_wrapper; + return; + } + + std::cerr << "Failed to initialize segv catcher. Using process_vm_readv instead." + << std::endl; + } +} +#endif // if defined PL_DARWIN + +/** + * Copy a chunk of memory from a portion of the virtual memory of another + * process. + * @param proc_ref_t the process reference (platform-dependent) + * @param void * the remote address + * @param ssize_t the number of bytes to read + * @param void * the destination buffer, expected to be at least as large + * as the number of bytes to read. + * + * @return zero on success, otherwise non-zero. + */ +static inline int copy_memory(proc_ref_t proc_ref, const void* addr, ssize_t len, void* buf) +{ + ssize_t result = -1; + + // Early exit on zero page + if (reinterpret_cast(addr) < 4096) + { + return result; + } + +#if defined PL_LINUX + struct iovec local[1]; + struct iovec remote[1]; + + local[0].iov_base = buf; + local[0].iov_len = len; + remote[0].iov_base = const_cast(addr); + remote[0].iov_len = len; + + result = safe_copy(proc_ref, local, 1, remote, 1, 0); + +#elif defined PL_DARWIN + kern_return_t kr = safe_copy(proc_ref, reinterpret_cast(addr), len, + reinterpret_cast(buf), + reinterpret_cast(&result)); + + if (kr != KERN_SUCCESS) + return -1; + +#endif + + return len != result; +} + +inline pid_t pid = 0; + +inline void _set_pid(pid_t _pid) +{ + pid = _pid; +} diff --git a/ddtrace/internal/datadog/profiling/stack_v2/src/echion/.clang-format b/ddtrace/internal/datadog/profiling/stack_v2/src/echion/.clang-format new file mode 100644 index 00000000000..3d941098cbb --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/src/echion/.clang-format @@ -0,0 +1,110 @@ +Language: Cpp +BasedOnStyle: Google +AccessModifierOffset: -4 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: Left +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: Empty +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: Yes +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterClass: true + AfterControlStatement: true + AfterEnum: true + AfterFunction: true + AfterNamespace: true + AfterStruct: true + AfterUnion: true + BeforeCatch: true + BeforeElse: true + IndentBraces: false +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Custom +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeColon +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 100 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '^<.*\.h>' + Priority: 1 + - Regex: '^<.*' + Priority: 2 + - Regex: '.*' + Priority: 3 +IncludeIsMainRegex: '([-_](test|unittest))?$' +IndentCaseLabels: true +IndentPPDirectives: None +IndentWidth: 4 +IndentWrappedFunctionNames: false +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 2 +NamespaceIndentation: None +ObjCBinPackProtocolList: Never +ObjCBlockIndentWidth: 4 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +PointerAlignment: Left +ReflowComments: true +SortIncludes: true +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: false +SpacesInContainerLiterals: false +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 +TabWidth: 4 +UseTab: Never diff --git a/ddtrace/internal/datadog/profiling/stack_v2/src/echion/coremodule.cc b/ddtrace/internal/datadog/profiling/stack_v2/src/echion/coremodule.cc new file mode 100644 index 00000000000..b7b2115085b --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/src/echion/coremodule.cc @@ -0,0 +1,138 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#define PY_SSIZE_T_CLEAN +#include +#if PY_VERSION_HEX >= 0x030c0000 +// https://github.com/python/cpython/issues/108216#issuecomment-1696565797 +#undef _PyGC_FINALIZED +#endif + +#include + +#include +#include +#include +#include +#include +#include +#if defined PL_DARWIN +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +// ---------------------------------------------------------------------------- +static void _init() +{ + pid = getpid(); +} + +// ---------------------------------------------------------------------------- +static PyObject* track_thread(PyObject* Py_UNUSED(m), PyObject* args) +{ + uintptr_t thread_id; // map key + const char* thread_name; + pid_t native_id; + + if (!PyArg_ParseTuple(args, "lsi", &thread_id, &thread_name, &native_id)) + return NULL; + + { + const std::lock_guard guard(thread_info_map_lock); + + auto maybe_thread_info = ThreadInfo::create(thread_id, native_id, thread_name); + if (!maybe_thread_info) + { + PyErr_SetString(PyExc_RuntimeError, "Failed to track thread"); + return nullptr; + } + + auto entry = thread_info_map.find(thread_id); + if (entry != thread_info_map.end()) + { + // Thread is already tracked so we update its info + entry->second = std::move(*maybe_thread_info); + } + else + { + thread_info_map.emplace(thread_id, std::move(*maybe_thread_info)); + } + } + + Py_RETURN_NONE; +} + +// ---------------------------------------------------------------------------- +static PyObject* untrack_thread(PyObject* Py_UNUSED(m), PyObject* args) +{ + unsigned long thread_id; + if (!PyArg_ParseTuple(args, "l", &thread_id)) + return NULL; + + { + const std::lock_guard guard(thread_info_map_lock); + + thread_info_map.erase(thread_id); + } + + Py_RETURN_NONE; +} + +// ---------------------------------------------------------------------------- +static PyObject* init(PyObject* Py_UNUSED(m), PyObject* Py_UNUSED(args)) +{ + _init(); + + Py_RETURN_NONE; +} + +// ---------------------------------------------------------------------------- +static PyMethodDef echion_core_methods[] = { + {"track_thread", track_thread, METH_VARARGS, "Map the name of a thread with its identifier"}, + {"untrack_thread", untrack_thread, METH_VARARGS, "Untrack a terminated thread"}, + {"init", init, METH_NOARGS, "Initialize the stack sampler (usually after a fork)"}, + // Configuration interface + {"set_interval", set_interval, METH_VARARGS, "Set the sampling interval"}, + {"set_cpu", set_cpu, METH_VARARGS, "Set whether to use CPU time instead of wall time"}, + {"set_max_frames", set_max_frames, METH_VARARGS, "Set the max number of frames to unwind"}, + // Sentinel + {NULL, NULL, 0, NULL}}; + +// ---------------------------------------------------------------------------- +static struct PyModuleDef coremodule = { + PyModuleDef_HEAD_INIT, + "core", /* name of module */ + NULL, /* module documentation, may be NULL */ + -1, /* size of per-interpreter state of the module, + or -1 if the module keeps state in global variables. */ + echion_core_methods, + nullptr, /* m_traverse */ + nullptr, /* m_clear */ + nullptr, /* m_free */ + nullptr, /* m_is_preinitialised */ +}; + +// ---------------------------------------------------------------------------- +PyMODINIT_FUNC PyInit_core(void) +{ + PyObject* m; + + m = PyModule_Create(&coremodule); + if (m == NULL) + return NULL; + + // We make the assumption that this module is loaded by the main thread. + // TODO: These need to be reset after a fork. + _init(); + + return m; +} diff --git a/ddtrace/internal/datadog/profiling/stack_v2/src/echion/danger.cc b/ddtrace/internal/datadog/profiling/stack_v2/src/echion/danger.cc new file mode 100644 index 00000000000..cf6bc8a727f --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/src/echion/danger.cc @@ -0,0 +1,186 @@ +#include +#include + +// hello + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const size_t page_size = []() -> size_t { + auto v = sysconf(_SC_PAGESIZE); + +#ifdef PL_DARWIN + if (v <= 0) + { + // Fallback on macOS just in case + v = getpagesize(); + } +#endif + + if (v <= 0) + { + fprintf(stderr, "Failed to detect page size, falling back to 4096\n"); + return 4096; + } + + return v; +}(); + +struct sigaction g_old_segv; +struct sigaction g_old_bus; + +thread_local ThreadAltStack t_altstack; + +// We "arm" by publishing a valid jmp env for this thread. +thread_local sigjmp_buf t_jmpenv; +thread_local volatile sig_atomic_t t_handler_armed = 0; + +static inline void arm_fault_handler() +{ + t_handler_armed = 1; + __asm__ __volatile__("" ::: "memory"); +} + +static inline void disarm_fault_handler() +{ + __asm__ __volatile__("" ::: "memory"); + t_handler_armed = 0; +} + +static void segv_handler(int signo, siginfo_t*, void*) +{ + if (!t_handler_armed) + { + struct sigaction* old = (signo == SIGSEGV) ? &g_old_segv : &g_old_bus; + // Restore the previous handler and re-raise so default/old handling occurs. + sigaction(signo, old, nullptr); + raise(signo); + return; + } + + // Jump back to the armed site. Use 1 so sigsetjmp returns nonzero. + siglongjmp(t_jmpenv, 1); +} + +int init_segv_catcher() +{ + if (t_altstack.ensure_installed() != 0) + { + return -1; + } + + struct sigaction sa + { + }; + sa.sa_sigaction = segv_handler; + sigemptyset(&sa.sa_mask); + // SA_SIGINFO for 3-arg handler; SA_ONSTACK to run on alt stack; SA_NODEFER to avoid having to + // use savemask + sa.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_NODEFER; + if (sigaction(SIGSEGV, &sa, &g_old_segv) != 0) + { + return -1; + } + if (sigaction(SIGBUS, &sa, &g_old_bus) != 0) + { + // Try to roll back SIGSEGV install on failure. + sigaction(SIGSEGV, &g_old_segv, nullptr); + return -1; + } + + return 0; +} + +#if defined PL_LINUX +using safe_memcpy_return_t = ssize_t; +#elif defined PL_DARWIN +using safe_memcpy_return_t = mach_vm_size_t; +#endif + +safe_memcpy_return_t safe_memcpy(void* dst, const void* src, size_t n) +{ + if (t_altstack.ensure_installed() != 0) + { + errno = EINVAL; + return -1; + } + + bool t_faulted = false; + + auto* d = static_cast(dst); + auto* s = static_cast(src); + safe_memcpy_return_t rem = static_cast(n); + + arm_fault_handler(); + if (sigsetjmp(t_jmpenv, /* save sig mask = */ 0) != 0) + { + // We arrived here from siglongjmp after a fault. + t_faulted = true; + goto landing; + } + + // Copy in page-bounded chunks (at most one fault per bad page). + while (rem) + { + safe_memcpy_return_t to_src_pg = + page_size - (static_cast(reinterpret_cast(s)) & (page_size - 1)); + safe_memcpy_return_t to_dst_pg = + page_size - (static_cast(reinterpret_cast(d)) & (page_size - 1)); + safe_memcpy_return_t chunk = std::min(rem, std::min(to_src_pg, to_dst_pg)); + + // Optional early probe to fault before entering large memcpy + (void)*reinterpret_cast(s); + + // If this faults, we'll siglongjmp back to the sigsetjmp above. + (void)memcpy(d, s, static_cast(chunk)); + + d += chunk; + s += chunk; + rem -= chunk; + } + +landing: + disarm_fault_handler(); + + if (t_faulted) + { + errno = EFAULT; + return -1; + } + + return static_cast(n); +} + +#if defined PL_LINUX +ssize_t safe_memcpy_wrapper(pid_t, const struct iovec* __dstvec, unsigned long int __dstiovcnt, + const struct iovec* __srcvec, unsigned long int __srciovcnt, + unsigned long int) +{ + (void)__dstiovcnt; + (void)__srciovcnt; + assert(__dstiovcnt == 1); + assert(__srciovcnt == 1); + + size_t to_copy = std::min(__dstvec->iov_len, __srcvec->iov_len); + return safe_memcpy(__dstvec->iov_base, __srcvec->iov_base, to_copy); +} +#elif defined PL_DARWIN +kern_return_t safe_memcpy_wrapper(vm_map_read_t target_task, mach_vm_address_t address, + mach_vm_size_t size, mach_vm_address_t data, + mach_vm_size_t* outsize) +{ + (void)target_task; + + auto copied = safe_memcpy(reinterpret_cast(data), reinterpret_cast(address), + static_cast(size)); + *outsize = copied; + return copied == size ? KERN_SUCCESS : KERN_FAILURE; +} +#endif diff --git a/ddtrace/internal/datadog/profiling/stack_v2/src/echion/frame.cc b/ddtrace/internal/datadog/profiling/stack_v2/src/echion/frame.cc new file mode 100644 index 00000000000..e414387ce69 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/src/echion/frame.cc @@ -0,0 +1,448 @@ +#include + +#include +#include + +// ---------------------------------------------------------------------------- +#if PY_VERSION_HEX >= 0x030b0000 +static inline int _read_varint(unsigned char* table, ssize_t size, ssize_t* i) +{ + ssize_t guard = size - 1; + if (*i >= guard) + return 0; + + int val = table[++*i] & 63; + int shift = 0; + while (table[*i] & 64 && *i < guard) + { + shift += 6; + val |= (table[++*i] & 63) << shift; + } + return val; +} + +// ---------------------------------------------------------------------------- +static inline int _read_signed_varint(unsigned char* table, ssize_t size, ssize_t* i) +{ + int val = _read_varint(table, size, i); + return (val & 1) ? -(val >> 1) : (val >> 1); +} +#endif + +// ---------------------------------------------------------------------------- +void init_frame_cache(size_t capacity) +{ + frame_cache = new LRUCache(capacity); +} + +// ---------------------------------------------------------------------------- +void reset_frame_cache() +{ + delete frame_cache; + frame_cache = nullptr; +} + +// ------------------------------------------------------------------------ +Frame::Frame(PyObject* frame) +{ +#if PY_VERSION_HEX >= 0x030b0000 + +#if PY_VERSION_HEX >= 0x030d0000 + _PyInterpreterFrame* iframe = reinterpret_cast<_PyInterpreterFrame*>(frame); + const int lasti = _PyInterpreterFrame_LASTI(iframe); + PyCodeObject* code = reinterpret_cast(iframe->f_executable); +#else + const _PyInterpreterFrame* iframe = reinterpret_cast<_PyInterpreterFrame*>(frame); + const int lasti = _PyInterpreterFrame_LASTI(iframe); + PyCodeObject* code = iframe->f_code; +#endif // PY_VERSION_HEX >= 0x030d0000 + PyCode_Addr2Location(code, lasti << 1, &location.line, &location.column, &location.line_end, + &location.column_end); + location.column++; + location.column_end++; + name = string_table.key_unsafe(code->co_qualname); +#if PY_VERSION_HEX >= 0x030c0000 + is_entry = (iframe->owner == FRAME_OWNED_BY_CSTACK); // Shim frame +#else + is_entry = iframe->is_entry; +#endif + +#else + PyFrameObject* py_frame = reinterpret_cast(frame); + PyCodeObject* code = py_frame->f_code; + + location.line = PyFrame_GetLineNumber(py_frame); + name = string_table.key_unsafe(code->co_name); +#endif + filename = string_table.key_unsafe(code->co_filename); +} + +// ------------------------------------------------------------------------ +Result Frame::create(PyCodeObject* code, int lasti) +{ + auto maybe_filename = string_table.key(code->co_filename); + if (!maybe_filename) + { + return ErrorKind::FrameError; + } + +#if PY_VERSION_HEX >= 0x030b0000 + auto maybe_name = string_table.key(code->co_qualname); +#else + auto maybe_name = string_table.key(code->co_name); +#endif + + if (!maybe_name) + { + return ErrorKind::FrameError; + } + + auto frame = std::make_unique(*maybe_filename, *maybe_name); + auto infer_location_success = frame->infer_location(code, lasti); + if (!infer_location_success) + { + return ErrorKind::LocationError; + } + + return frame; +} + +// ---------------------------------------------------------------------------- +Result Frame::infer_location(PyCodeObject* code_obj, int lasti) +{ + unsigned int lineno = code_obj->co_firstlineno; + Py_ssize_t len = 0; + +#if PY_VERSION_HEX >= 0x030b0000 + auto table = pybytes_to_bytes_and_size(code_obj->co_linetable, &len); + if (table == nullptr) + { + return ErrorKind::LocationError; + } + + auto table_data = table.get(); + + for (Py_ssize_t i = 0, bc = 0; i < len; i++) + { + bc += (table[i] & 7) + 1; + int code = (table[i] >> 3) & 15; + unsigned char next_byte = 0; + switch (code) + { + case 15: + break; + + case 14: // Long form + lineno += _read_signed_varint(table_data, len, &i); + + this->location.line = lineno; + this->location.line_end = lineno + _read_varint(table_data, len, &i); + this->location.column = _read_varint(table_data, len, &i); + this->location.column_end = _read_varint(table_data, len, &i); + + break; + + case 13: // No column data + lineno += _read_signed_varint(table_data, len, &i); + + this->location.line = lineno; + this->location.line_end = lineno; + this->location.column = this->location.column_end = 0; + + break; + + case 12: // New lineno + case 11: + case 10: + if (i >= len - 2) + { + return ErrorKind::LocationError; + } + + lineno += code - 10; + + this->location.line = lineno; + this->location.line_end = lineno; + this->location.column = 1 + table[++i]; + this->location.column_end = 1 + table[++i]; + + break; + + default: + if (i >= len - 1) + { + return ErrorKind::LocationError; + } + + next_byte = table[++i]; + + this->location.line = lineno; + this->location.line_end = lineno; + this->location.column = 1 + (code << 3) + ((next_byte >> 4) & 7); + this->location.column_end = this->location.column + (next_byte & 15); + } + + if (bc > lasti) + break; + } + +#elif PY_VERSION_HEX >= 0x030a0000 + auto table = pybytes_to_bytes_and_size(code_obj->co_linetable, &len); + if (table == nullptr) + { + return ErrorKind::LocationError; + } + + lasti <<= 1; + for (int i = 0, bc = 0; i < len; i++) + { + int sdelta = table[i++]; + if (sdelta == 0xff) + break; + + bc += sdelta; + + int ldelta = table[i]; + if (ldelta == 0x80) + ldelta = 0; + else if (ldelta > 0x80) + lineno -= 0x100; + + lineno += ldelta; + if (bc > lasti) + break; + } + +#else + auto table = pybytes_to_bytes_and_size(code_obj->co_lnotab, &len); + if (table == nullptr) + { + return ErrorKind::LocationError; + } + + for (int i = 0, bc = 0; i < len; i++) + { + bc += table[i++]; + if (bc > lasti) + break; + + if (table[i] >= 0x80) + lineno -= 0x100; + + lineno += table[i]; + } + +#endif + + this->location.line = lineno; + this->location.line_end = lineno; + this->location.column = 0; + this->location.column_end = 0; + + return Result::ok(); +} + +// ------------------------------------------------------------------------ +Frame::Key Frame::key(PyCodeObject* code, int lasti) +{ + return ((static_cast(((reinterpret_cast(code)))) << 16) | lasti); +} + +// ---------------------------------------------------------------------------- +Frame::Key Frame::key(PyObject* frame) +{ +#if PY_VERSION_HEX >= 0x030d0000 + _PyInterpreterFrame* iframe = reinterpret_cast<_PyInterpreterFrame*>(frame); + const int lasti = _PyInterpreterFrame_LASTI(iframe); + PyCodeObject* code = reinterpret_cast(iframe->f_executable); +#elif PY_VERSION_HEX >= 0x030b0000 + const _PyInterpreterFrame* iframe = reinterpret_cast<_PyInterpreterFrame*>(frame); + const int lasti = _PyInterpreterFrame_LASTI(iframe); + PyCodeObject* code = iframe->f_code; +#else + const PyFrameObject* py_frame = reinterpret_cast(frame); + const int lasti = py_frame->f_lasti; + PyCodeObject* code = py_frame->f_code; +#endif + return key(code, lasti); +} + +// ------------------------------------------------------------------------ +#if PY_VERSION_HEX >= 0x030b0000 +Result> Frame::read(_PyInterpreterFrame* frame_addr, + _PyInterpreterFrame** prev_addr) +#else +Result> Frame::read(PyObject* frame_addr, PyObject** prev_addr) +#endif +{ +#if PY_VERSION_HEX >= 0x030b0000 + _PyInterpreterFrame iframe; + auto resolved_addr = + stack_chunk ? reinterpret_cast<_PyInterpreterFrame*>(stack_chunk->resolve(frame_addr)) + : frame_addr; + if (resolved_addr != frame_addr) + { + frame_addr = resolved_addr; + } + else + { + if (copy_type(frame_addr, iframe)) + { + return ErrorKind::FrameError; + } + frame_addr = &iframe; + } + if (frame_addr == NULL) + { + return ErrorKind::FrameError; + } + +#if PY_VERSION_HEX >= 0x030c0000 + if (frame_addr->owner == FRAME_OWNED_BY_CSTACK) + { + *prev_addr = frame_addr->previous; + // This is a C frame, we just need to ignore it + return std::ref(C_FRAME); + } + + if (frame_addr->owner != FRAME_OWNED_BY_THREAD && frame_addr->owner != FRAME_OWNED_BY_GENERATOR) + { + return ErrorKind::FrameError; + } +#endif // PY_VERSION_HEX >= 0x030c0000 + + // We cannot use _PyInterpreterFrame_LASTI because _PyCode_CODE reads + // from the code object. +#if PY_VERSION_HEX >= 0x030d0000 + const int lasti = + (static_cast((frame_addr->instr_ptr - 1 - + reinterpret_cast<_Py_CODEUNIT*>( + (reinterpret_cast(frame_addr->f_executable)))))) - + offsetof(PyCodeObject, co_code_adaptive) / sizeof(_Py_CODEUNIT); + auto maybe_frame = Frame::get(reinterpret_cast(frame_addr->f_executable), lasti); + if (!maybe_frame) + { + return ErrorKind::FrameError; + } + + auto& frame = maybe_frame->get(); +#else + const int lasti = (static_cast((frame_addr->prev_instr - + reinterpret_cast<_Py_CODEUNIT*>((frame_addr->f_code))))) - + offsetof(PyCodeObject, co_code_adaptive) / sizeof(_Py_CODEUNIT); + auto maybe_frame = Frame::get(frame_addr->f_code, lasti); + if (!maybe_frame) + { + return ErrorKind::FrameError; + } + + auto& frame = maybe_frame->get(); +#endif // PY_VERSION_HEX >= 0x030d0000 + if (&frame != &INVALID_FRAME) + { +#if PY_VERSION_HEX >= 0x030c0000 + frame.is_entry = (frame_addr->owner == FRAME_OWNED_BY_CSTACK); // Shim frame +#else // PY_VERSION_HEX < 0x030c0000 + frame.is_entry = frame_addr->is_entry; +#endif // PY_VERSION_HEX >= 0x030c0000 + } + + *prev_addr = &frame == &INVALID_FRAME ? NULL : frame_addr->previous; + +#else // PY_VERSION_HEX < 0x030b0000 + // Unwind the stack from leaf to root and store it in a stack. This way we + // can print it from root to leaf. + PyFrameObject py_frame; + + if (copy_type(frame_addr, py_frame)) + { + return ErrorKind::FrameError; + } + + auto maybe_frame = Frame::get(py_frame.f_code, py_frame.f_lasti); + if (!maybe_frame) + { + return ErrorKind::FrameError; + } + + auto& frame = maybe_frame->get(); + *prev_addr = (&frame == &INVALID_FRAME) ? NULL : reinterpret_cast(py_frame.f_back); +#endif // PY_VERSION_HEX >= 0x030b0000 + + return std::ref(frame); +} + +// ---------------------------------------------------------------------------- +Result> Frame::get(PyCodeObject* code_addr, int lasti) +{ + auto frame_key = Frame::key(code_addr, lasti); + + auto maybe_frame = frame_cache->lookup(frame_key); + if (maybe_frame) + { + return *maybe_frame; + } + + PyCodeObject code; + if (copy_type(code_addr, code)) + { + return std::ref(INVALID_FRAME); + } + + auto maybe_new_frame = Frame::create(&code, lasti); + if (!maybe_new_frame) + { + return std::ref(INVALID_FRAME); + } + + auto new_frame = std::move(*maybe_new_frame); + new_frame->cache_key = frame_key; + auto& f = *new_frame; + Renderer::get().frame(frame_key, new_frame->filename, new_frame->name, new_frame->location.line, + new_frame->location.line_end, new_frame->location.column, + new_frame->location.column_end); + frame_cache->store(frame_key, std::move(new_frame)); + return std::ref(f); +} + +// ---------------------------------------------------------------------------- +Frame& Frame::get(PyObject* frame) +{ + auto frame_key = Frame::key(frame); + + auto maybe_frame = frame_cache->lookup(frame_key); + if (maybe_frame) + { + return *maybe_frame; + } + + auto new_frame = std::make_unique(frame); + new_frame->cache_key = frame_key; + auto& f = *new_frame; + Renderer::get().frame(frame_key, new_frame->filename, new_frame->name, new_frame->location.line, + new_frame->location.line_end, new_frame->location.column, + new_frame->location.column_end); + frame_cache->store(frame_key, std::move(new_frame)); + return f; +} + +// ---------------------------------------------------------------------------- +Frame& Frame::get(StringTable::Key name) +{ + uintptr_t frame_key = static_cast(name); + + auto maybe_frame = frame_cache->lookup(frame_key); + if (maybe_frame) + { + return *maybe_frame; + } + + auto frame = std::make_unique(name); + frame->cache_key = frame_key; + auto& f = *frame; + Renderer::get().frame(frame_key, frame->filename, frame->name, frame->location.line, + frame->location.line_end, frame->location.column, + frame->location.column_end); + frame_cache->store(frame_key, std::move(frame)); + return f; +} diff --git a/releasenotes/notes/profiling-echion-fix-alloc-f1204a794b1d3a1d.yaml b/releasenotes/notes/profiling-echion-fix-alloc-f1204a794b1d3a1d.yaml deleted file mode 100644 index 8ca655a780c..00000000000 --- a/releasenotes/notes/profiling-echion-fix-alloc-f1204a794b1d3a1d.yaml +++ /dev/null @@ -1,7 +0,0 @@ ---- -fixes: - - | - profiling: Upgrades echion to resolve an issue where stack profiler can - allocate a large amount of memory unnecessarily. Resolves another issue - where the profiler can loop infinitely on Python 3.13. - diff --git a/releasenotes/notes/profiling-move-echion-to-dd-trace-py-6a231b502eed899c.yaml b/releasenotes/notes/profiling-move-echion-to-dd-trace-py-6a231b502eed899c.yaml new file mode 100644 index 00000000000..ae9f9b8bfd9 --- /dev/null +++ b/releasenotes/notes/profiling-move-echion-to-dd-trace-py-6a231b502eed899c.yaml @@ -0,0 +1,3 @@ +other: + - | + profiling: Moves echion, the Python stack sampler, to the ddtrace-py repository. diff --git a/scripts/cformat.sh b/scripts/cformat.sh index 1344f217d19..b716bd0717f 100755 --- a/scripts/cformat.sh +++ b/scripts/cformat.sh @@ -34,6 +34,7 @@ enumerate_files() { '*.c' '*.h' '*.cpp' + '*.cc' '*.hpp' )