diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 73288f70c8..8aab4eb2d1 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -11,6 +11,7 @@ .gitignore @Datadog/libdatadog .gitlab-ci.yml @Datadog/apm-common-components-core .gitlab/benchmarks.yml @Datadog/apm-common-components-core +.gitlab/fuzz.yml @Datadog/chaos-platform benchmark/ @Datadog/apm-common-components-core bin_tests/ @Datadog/libdatadog-profiling build-common/ @Datadog/apm-common-components-core @@ -64,6 +65,7 @@ tests/spawn_from_lib/ @Datadog/libdatadog-php @Datadog/libdatadog tests/windows_package/ @Datadog/apm-common-components-core tools/ @Datadog/apm-common-components-core windows/ @Datadog/libdatadog-core +fuzz/ @Datadog/chaos-platform # Specific overrides (must come after their general patterns above) bin_tests/tests/test_the_tests.rs @Datadog/libdatadog-core diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index fa4b9a1e6c..db84f1d154 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -7,6 +7,7 @@ variables: include: - local: .gitlab/benchmarks.yml + - local: .gitlab/fuzz.yml trigger_internal_build: variables: diff --git a/.gitlab/fuzz.yml b/.gitlab/fuzz.yml new file mode 100644 index 0000000000..aa49dfc225 --- /dev/null +++ b/.gitlab/fuzz.yml @@ -0,0 +1,31 @@ +# Fuzzing job configuration +# This job discovers, builds, and uploads all cargo-fuzz targets to the internal fuzzing infrastructure +# See ci/README_FUZZING.md for more information + +variables: + BASE_CI_IMAGE: registry.ddbuild.io/ci/benchmarking-platform:libdatadog-benchmarks + +fuzz: + tags: ["arch:amd64"] + needs: [] + image: + name: $BASE_CI_IMAGE + rules: + # runs on gitlab schedule and on merge to main. + # Also allow manual run in branches for ease of debug / testing + - if: '$CI_COMMIT_BRANCH == "main" && $CI_PIPELINE_SOURCE == "schedule"' + allow_failure: true + - if: $CI_COMMIT_BRANCH == "main" + allow_failure: true + - when: manual + allow_failure: true + timeout: 1h + script: + - VAULT_VERSION=1.15.4 && curl -fsSL "https://releases.hashicorp.com/vault/${VAULT_VERSION}/vault_${VAULT_VERSION}_linux_amd64.zip" -o vault.zip && unzip vault.zip && mv vault /usr/local/bin/vault && rm vault.zip && chmod +x /usr/local/bin/vault + - rustup default nightly + - cargo install cargo-fuzz + - pip3 install requests toml + - python3 fuzz/fuzz_infra.py + allow_failure: true + variables: + KUBERNETES_SERVICE_ACCOUNT_OVERWRITE: libdatadog diff --git a/Cargo.lock b/Cargo.lock index 1b11271120..82e4720980 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2897,6 +2897,7 @@ name = "libdd-trace-normalization" version = "1.0.0" dependencies = [ "anyhow", + "arbitrary", "criterion", "duplicate", "libdd-trace-protobuf", @@ -2943,7 +2944,6 @@ dependencies = [ "http-body-util", "httpmock", "hyper", - "hyper-http-proxy", "indexmap 2.12.0", "libdd-common", "libdd-tinybytes", diff --git a/fuzz/fuzz_infra.py b/fuzz/fuzz_infra.py new file mode 100755 index 0000000000..ceb124d348 --- /dev/null +++ b/fuzz/fuzz_infra.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 + +""" +Script for running fuzz targets in the internal fuzzing infrastructure. +This is called from .gitlab/fuzz.yml. + +If you want to run this locally, please set the VAULT_FUZZING_TOKEN environment variable +(i.e: ddtool auth token security-fuzzing-platform --datacenter=us1.ddbuild.io) + +In CI, this is expected to run with the base image defined in ./ci/Dockerfiles/Dockerfile.fuzz. + +""" + +import os +from subprocess import Popen, PIPE +import requests +import toml + +DEFAULT_FUZZING_SLACK_CHANNEL = "fuzzing-ops" # TODO: change me once we validated everything is not spamming and set up correctly. +# Lets reuse the token for all requests to avoid issues. +# The process should be short lived enough that the token should be valid for the duration. +_cached_token = None + + +def get_auth_header(): + global _cached_token + if os.getenv("VAULT_FUZZING_TOKEN") is not None: + return os.getenv("VAULT_FUZZING_TOKEN") + + if _cached_token is None: + _cached_token = ( + os.popen( + "vault read -field=token identity/oidc/token/security-fuzzing-platform" + ) + .read() + .strip() + ) + return _cached_token + + +def get_commit_sha(): + return os.getenv("CI_COMMIT_SHA") + + +def upload_fuzz( + directory, + git_sha, + fuzz_test, + team="apm-sdk-rust", + core_count=2, + duration=3600, + proc_count=2, + fuzz_memory=4, +): + """ + This builds and uploads fuzz targets to the internal fuzzing infrastructure. + It needs to be passed the -fuzz flag in order to build the fuzz with efficient coverage guidance. + """ + + api_url = "https://fuzzing-api.us1.ddbuild.io/api/v1" + + # Get the auth token a single time and reuse it for all requests + auth_header = get_auth_header() + if not auth_header: + print("❌ Failed to get auth header") + exit(1) + + # We let the API handle package name length validation + # It will be returned, truncated / reformated, if needed in the json response. + # We simply force the prefix to be `libdatadog-` for ease of filtering (until we improve that part on the API side) + # As a note: more than 63 characters will be truncated by the API + pkgname_prefix = "libdatadog-" + pkgname = ( + (pkgname_prefix + directory + "-" + fuzz_test) + .replace("_", "-") + .replace("/", "-") + ) + pkgname = pkgname.strip("-.") # Remove trailing dashes and dots. + print(f"pkgname: {pkgname}") + + print(f"Getting presigned URL for {pkgname}...") + headers = {"Authorization": f"Bearer {auth_header}"} + presigned_response = requests.post( + f"{api_url}/apps/{pkgname}/builds/{git_sha}/url", headers=headers, timeout=30 + ) + + if not presigned_response.ok: + print( + f"❌ Failed to get presigned URL (status {presigned_response.status_code})" + ) + try: + error_detail = presigned_response.json() + print(f"Error details: {error_detail}") + except Exception as e: + print(f"Raw error response: {presigned_response.text} {e}") + presigned_response.raise_for_status() + presigned_url = presigned_response.json()["data"]["url"] + + print(f"Uploading {pkgname} ({fuzz_test}) for {git_sha}...") + # Upload file to presigned URL + with open( + f"{directory}/target/x86_64-unknown-linux-gnu/release/{fuzz_test}", "rb" + ) as f: + upload_response = requests.put(presigned_url, data=f, timeout=300) + + if not upload_response.ok: + print(f"❌ Failed to upload file (status {upload_response.status_code})") + try: + error_detail = upload_response.json() + print(f"Error details: {error_detail}") + except Exception as e: + print(f"Raw error response: {upload_response.text} {e}") + upload_response.raise_for_status() + + print(f"Starting fuzzer for {pkgname} ({fuzz_test})...") + # Start new fuzzer + run_payload = { + "app": pkgname, + "debug": False, + "version": git_sha, + "core_count": core_count, + "duration": duration, + "type": "cargo-fuzz", + "binary": fuzz_test, + "team": team, + "process_count": proc_count, + "memory": fuzz_memory, + "repository_url": "https://github.com/DataDog/libdatadog", + "slack_channel": DEFAULT_FUZZING_SLACK_CHANNEL, + } + + headers = { + "Authorization": f"Bearer {auth_header}", + "Content-Type": "application/json", + } + + try: + response = requests.post( + f"{api_url}/apps/{pkgname}/fuzzers", + headers=headers, + json=run_payload, + timeout=30, + ) + response.raise_for_status() + except Exception as e: + error_detail = response.json() + print(f"❌ API request failed with status {response.status_code}") + print(f"Error details: {error_detail}") + print(f"Raw error response: {response.text} {e}") + + print(f"✅ Started fuzzer for {pkgname} ({fuzz_test})...") + response_json = response.json() + print(response_json) + + +def search_fuzz_tests(directory) -> list[str]: + fuzz_list_cmd = ["cargo", "+nightly", "fuzz", "list"] + process = Popen(fuzz_list_cmd, cwd=directory, stdout=PIPE, stderr=PIPE) + stdout, stderr = process.communicate() + + if process.returncode != 0: + print(f"❌ Failed to list fuzz tests in {directory}") + print(f"Command: {' '.join(fuzz_list_cmd)}") + print(f"Exit code: {process.returncode}") + if stderr: + print(f"Error output: {stderr.decode('utf-8')}") + if stdout: + print(f"Standard output: {stdout.decode('utf-8')}") + return [] + + return stdout.decode("utf-8").splitlines() + + +def build_fuzz(directory, fuzz_test) -> bool: + build_cmd = ["cargo", "+nightly", "fuzz", "build", fuzz_test] + return Popen(build_cmd, cwd=directory).wait() == 0 + + +# We want to search for all crates in the repository. +# We can't simply run `cargo fuzz list` in the root directory. +def is_fuzz_crate(cargo_toml_path) -> bool: + """Check if a Cargo.toml file has cargo-fuzz = true in its metadata.""" + try: + with open(cargo_toml_path, "r") as f: + cargo_config = toml.load(f) + return ( + cargo_config.get("package", {}) + .get("metadata", {}) + .get("cargo-fuzz", False) + ) + except Exception as e: + print(f"Warning: Could not parse {cargo_toml_path}: {e}") + return False + + +def find_cargo_roots(directory) -> list[str]: + print(f"Finding cargo roots in {directory}") + cargo_roots = [] + for root, dirs, files in os.walk(directory): + # Skip target directories to avoid scanning build artifacts + if "target" in dirs: + dirs.remove("target") + + if "Cargo.toml" in files: + cargo_toml_path = os.path.join(root, "Cargo.toml") + if is_fuzz_crate(cargo_toml_path): + print(f"Found fuzz cargo root: {root}") + cargo_roots.append(root) + else: + print(f"Skipping non-fuzz cargo root: {root}") + return cargo_roots + + +if __name__ == "__main__": + cargo_roots = find_cargo_roots(os.getcwd()) + print(cargo_roots) + git_sha = get_commit_sha() + + for cargo_root in cargo_roots: + fuzz_tests = search_fuzz_tests(cargo_root) + print(f"Found {len(fuzz_tests)} fuzz tests in {cargo_root}") + if len(fuzz_tests) == 0: + print(f"No fuzz tests found in {cargo_root}, skipping...") + continue + + for fuzz_test in fuzz_tests: + print(f"Building fuzz for {cargo_root}/{fuzz_test} ({git_sha})") + err = build_fuzz(cargo_root, fuzz_test) + if not err: + print( + f"❌ Failed to build fuzz for {cargo_root}/{fuzz_test} ({git_sha}). Skipping uploading." + ) + continue + + # Make cargo_root relative to the root of the repository, so the generated target name is libdatadog-- + # In the future, the api will support a custom path flag + repo_root = os.path.abspath(os.getcwd()) + rel_cargo_root = os.path.relpath(cargo_root, repo_root) + print(f"Uploading fuzz for {rel_cargo_root}/{fuzz_test} ({git_sha})") + upload_fuzz(rel_cargo_root, git_sha, fuzz_test) + diff --git a/libdd-trace-normalization/Cargo.toml b/libdd-trace-normalization/Cargo.toml index 78d58e8e6a..fcf0aa7f64 100644 --- a/libdd-trace-normalization/Cargo.toml +++ b/libdd-trace-normalization/Cargo.toml @@ -15,6 +15,10 @@ bench = false [dependencies] anyhow = "1.0" libdd-trace-protobuf = { version = "1.0.0", path = "../libdd-trace-protobuf" } +arbitrary = { version = "1.3", features = ["derive"], optional = true } + +[features] +fuzzing = ["arbitrary"] [dev-dependencies] rand = "0.8.5" diff --git a/libdd-trace-normalization/fuzz/.gitignore b/libdd-trace-normalization/fuzz/.gitignore new file mode 100644 index 0000000000..a66428bf17 --- /dev/null +++ b/libdd-trace-normalization/fuzz/.gitignore @@ -0,0 +1,4 @@ +target +corpus +artifacts + diff --git a/libdd-trace-normalization/fuzz/Cargo.lock b/libdd-trace-normalization/fuzz/Cargo.lock new file mode 100644 index 0000000000..5ce2a8c1ff --- /dev/null +++ b/libdd-trace-normalization/fuzz/Cargo.lock @@ -0,0 +1,263 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "anyhow" +version = "1.0.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" + +[[package]] +name = "arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +dependencies = [ + "derive_arbitrary", +] + +[[package]] +name = "bytes" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" + +[[package]] +name = "cc" +version = "1.2.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd405d82c84ff7f35739f175f67d8b9fb7687a0e84ccdc78bd3568839827cf07" +dependencies = [ + "find-msvc-tools", + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "derive_arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "find-msvc-tools" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom", + "libc", +] + +[[package]] +name = "libc" +version = "0.2.177" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" + +[[package]] +name = "libdd-trace-normalization" +version = "1.0.0" +dependencies = [ + "anyhow", + "arbitrary", + "libdd-trace-protobuf", +] + +[[package]] +name = "libdd-trace-normalization-fuzz" +version = "0.0.0" +dependencies = [ + "libdd-trace-normalization", + "libfuzzer-sys", +] + +[[package]] +name = "libdd-trace-protobuf" +version = "1.0.0" +dependencies = [ + "prost", + "serde", + "serde_bytes", +] + +[[package]] +name = "libfuzzer-sys" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5037190e1f70cbeef565bd267599242926f724d3b8a9f510fd7e0b540cfa4404" +dependencies = [ + "arbitrary", + "cc", +] + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "prost" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-derive" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +dependencies = [ + "anyhow", + "itertools", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "quote" +version = "1.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_bytes" +version = "0.11.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8" +dependencies = [ + "serde", + "serde_core", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "syn" +version = "2.0.111" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "wasip2" +version = "1.0.1+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wit-bindgen" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" diff --git a/libdd-trace-normalization/fuzz/Cargo.toml b/libdd-trace-normalization/fuzz/Cargo.toml new file mode 100644 index 0000000000..af0fa0e7b9 --- /dev/null +++ b/libdd-trace-normalization/fuzz/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "libdd-trace-normalization-fuzz" +version = "0.0.0" +publish = false +edition = "2021" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" +libdd-trace-normalization = { path = "..", features = ["fuzzing"] } + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "fuzz_normalize_span" +path = "fuzz_targets/fuzz_normalize_span.rs" +test = false +doc = false + diff --git a/libdd-trace-normalization/fuzz/README.md b/libdd-trace-normalization/fuzz/README.md new file mode 100644 index 0000000000..57e45fa52b --- /dev/null +++ b/libdd-trace-normalization/fuzz/README.md @@ -0,0 +1,32 @@ +# Fuzzing for libdd-trace-normalization + +This directory contains fuzz targets for the `libdd-trace-normalization` library using `cargo-fuzz` (libFuzzer). + +## Prerequisites + +Install `cargo-fuzz`: + +```bash +cargo install cargo-fuzz +``` + +## Running the Fuzzer + +Run the fuzzer indefinitely (Ctrl+C to stop): + +```bash +cd /path/to/libdd-trace-normalization +cargo fuzz run fuzz_normalize_span +``` + +Reproduce a finding: + +```bash +cargo fuzz run fuzz_normalize_span fuzz/artifacts/fuzz_normalize_span/crash- +``` + +To generate coverage information: + +```bash +cargo fuzz coverage fuzz_normalize_span +``` diff --git a/libdd-trace-normalization/fuzz/fuzz_targets/fuzz_normalize_span.rs b/libdd-trace-normalization/fuzz/fuzz_targets/fuzz_normalize_span.rs new file mode 100644 index 0000000000..fb5da5ee21 --- /dev/null +++ b/libdd-trace-normalization/fuzz/fuzz_targets/fuzz_normalize_span.rs @@ -0,0 +1,11 @@ +// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +#![no_main] + +use libdd_trace_normalization::fuzz::{fuzz_normalize_span, FuzzSpan}; +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|fuzz_span: FuzzSpan| { + fuzz_normalize_span(fuzz_span); +}); diff --git a/libdd-trace-normalization/src/fuzz.rs b/libdd-trace-normalization/src/fuzz.rs new file mode 100644 index 0000000000..96762383be --- /dev/null +++ b/libdd-trace-normalization/src/fuzz.rs @@ -0,0 +1,218 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use crate::normalizer; +use arbitrary::{Arbitrary, Unstructured}; +use libdd_trace_protobuf::pb; +use std::collections::HashMap; + +// Limit size to avoid OOM and similar issues with large payloads. +const MAX_METRICS_SIZE: u8 = 10; +const MAX_META_SIZE: u8 = 10; +const MAX_ATTRS_SIZE: u8 = 10; +const MAX_META_STRUCT_SIZE: u8 = 100; +const MAX_LINKS_SIZE: u8 = 10; +const MAX_EVENTS_SIZE: u8 = 10; + +/// Helper function to generate an arbitrary AttributeAnyValue +fn arbitrary_attribute_any_value(u: &mut Unstructured) -> arbitrary::Result { + let value_type: u8 = u.arbitrary()?; + + match value_type % 4 { + 0 => { + // String value + Ok(pb::AttributeAnyValue { + r#type: pb::attribute_any_value::AttributeAnyValueType::StringValue as i32, + string_value: u.arbitrary()?, + bool_value: false, + int_value: 0, + double_value: 0.0, + array_value: None, + }) + } + 1 => { + // Bool value + Ok(pb::AttributeAnyValue { + r#type: pb::attribute_any_value::AttributeAnyValueType::BoolValue as i32, + string_value: String::new(), + bool_value: u.arbitrary()?, + int_value: 0, + double_value: 0.0, + array_value: None, + }) + } + 2 => { + // Int value + Ok(pb::AttributeAnyValue { + r#type: pb::attribute_any_value::AttributeAnyValueType::IntValue as i32, + string_value: String::new(), + bool_value: false, + int_value: u.arbitrary()?, + double_value: 0.0, + array_value: None, + }) + } + _ => { + // Double value + Ok(pb::AttributeAnyValue { + r#type: pb::attribute_any_value::AttributeAnyValueType::DoubleValue as i32, + string_value: String::new(), + bool_value: false, + int_value: 0, + double_value: u.arbitrary()?, + array_value: None, + }) + } + } +} + +/// Custom wrapper to generate arbitrary Span data +#[derive(Debug)] +pub struct FuzzSpan { + pub span: pb::Span, +} + +impl<'a> Arbitrary<'a> for FuzzSpan { + fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { + // Generate all basic fields + let service: String = u.arbitrary()?; + let name: String = u.arbitrary()?; + let resource: String = u.arbitrary()?; + let trace_id: u64 = u.arbitrary()?; + let span_id: u64 = u.arbitrary()?; + let parent_id: u64 = u.arbitrary()?; + let start: i64 = u.arbitrary()?; + let duration: i64 = u.arbitrary()?; + let error: i32 = u.arbitrary()?; + let r#type: String = u.arbitrary()?; + + // Generate meta HashMap (string -> string) + let meta_size: u8 = u.arbitrary()?; + let meta_size = (meta_size % MAX_META_SIZE) as usize; + let mut meta = HashMap::new(); + for _ in 0..meta_size { + let key: String = u.arbitrary()?; + let value: String = u.arbitrary()?; + meta.insert(key, value); + } + + // Add special keys that normalize_span checks + if u.arbitrary()? { + let env_value: String = u.arbitrary()?; + meta.insert("env".to_string(), env_value); + } + if u.arbitrary()? { + let status_code: String = u.arbitrary()?; + meta.insert("http.status_code".to_string(), status_code); + } + + // Generate metrics HashMap (string -> f64) + let metrics_size: u8 = u.arbitrary()?; + let metrics_size = (metrics_size % MAX_METRICS_SIZE) as usize; + let mut metrics = HashMap::new(); + for _ in 0..metrics_size { + let key: String = u.arbitrary()?; + let value: f64 = u.arbitrary()?; + metrics.insert(key, value); + } + + // Add special metrics that might be checked + if u.arbitrary()? { + let sampling_priority: f64 = u.arbitrary()?; + metrics.insert("_sampling_priority_v1".to_string(), sampling_priority); + } + + // Generate meta_struct HashMap (string -> Vec) + let meta_struct_size: u8 = u.arbitrary()?; + let meta_struct_size = (meta_struct_size % MAX_META_SIZE) as usize; // Limit size + let mut meta_struct = HashMap::new(); + for _ in 0..meta_struct_size { + let key: String = u.arbitrary()?; + let value_len: u8 = u.arbitrary()?; + let value_len = (value_len % MAX_META_STRUCT_SIZE) as usize; // Limit byte vec size + let mut value = Vec::with_capacity(value_len); + for _ in 0..value_len { + value.push(u.arbitrary()?); + } + meta_struct.insert(key, value); + } + + // Generate span_links + let links_size: u8 = u.arbitrary()?; + let links_size = (links_size % MAX_LINKS_SIZE) as usize; + let mut span_links = Vec::new(); + for _ in 0..links_size { + let link = pb::SpanLink { + trace_id: u.arbitrary()?, + trace_id_high: u.arbitrary()?, + span_id: u.arbitrary()?, + attributes: { + let attrs_size: u8 = u.arbitrary()?; + let attrs_size = (attrs_size % MAX_ATTRS_SIZE) as usize; + let mut attrs = HashMap::new(); + for _ in 0..attrs_size { + let key: String = u.arbitrary()?; + let value: String = u.arbitrary()?; + attrs.insert(key, value); + } + attrs + }, + tracestate: u.arbitrary()?, + flags: u.arbitrary()?, + }; + span_links.push(link); + } + + // Generate span_events + let events_size: u8 = u.arbitrary()?; + let events_size = (events_size % MAX_EVENTS_SIZE) as usize; + let mut span_events = Vec::new(); + for _ in 0..events_size { + let event = pb::SpanEvent { + name: u.arbitrary()?, + time_unix_nano: u.arbitrary()?, + attributes: { + let attrs_size: u8 = u.arbitrary()?; + let attrs_size = (attrs_size % MAX_ATTRS_SIZE) as usize; + let mut attrs = HashMap::new(); + for _ in 0..attrs_size { + let key: String = u.arbitrary()?; + let value = arbitrary_attribute_any_value(u)?; + attrs.insert(key, value); + } + attrs + }, + }; + span_events.push(event); + } + + Ok(FuzzSpan { + span: pb::Span { + service, + name, + resource, + trace_id, + span_id, + parent_id, + start, + duration, + error, + meta, + metrics, + r#type, + meta_struct, + span_links, + span_events, + }, + }) + } +} + +/// Main fuzzing function that tests normalize_span with arbitrary data +pub fn fuzz_normalize_span(fuzz_span: FuzzSpan) { + let mut span = fuzz_span.span; + + // Call normalize_span - it may succeed or fail, both are valid + // The fuzzer will catch panics, crashes, or infinite loops + let _ = normalizer::normalize_span(&mut span); +} diff --git a/libdd-trace-normalization/src/lib.rs b/libdd-trace-normalization/src/lib.rs index 56c1704829..eddf527d2f 100644 --- a/libdd-trace-normalization/src/lib.rs +++ b/libdd-trace-normalization/src/lib.rs @@ -11,3 +11,6 @@ pub mod normalize_utils; pub mod normalizer; pub(crate) mod utf8_helpers; + +#[cfg(feature = "fuzzing")] +pub mod fuzz; diff --git a/libdd-trace-normalization/src/normalizer.rs b/libdd-trace-normalization/src/normalizer.rs index 616f8dbe3c..7450dad908 100644 --- a/libdd-trace-normalization/src/normalizer.rs +++ b/libdd-trace-normalization/src/normalizer.rs @@ -16,7 +16,7 @@ pub enum SamplerPriority { None = i8::MIN as isize, } -fn normalize_span(s: &mut pb::Span) -> anyhow::Result<()> { +pub(crate) fn normalize_span(s: &mut pb::Span) -> anyhow::Result<()> { anyhow::ensure!(s.trace_id != 0, "TraceID is zero (reason:trace_id_zero)"); anyhow::ensure!(s.span_id != 0, "SpanID is zero (reason:span_id_zero)");