Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 77 additions & 1 deletion vulnerabilities/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@
from vulnerabilities.severity_systems import ScoringSystem
from vulnerabilities.utils import classproperty
from vulnerabilities.utils import get_reference_id
from vulnerabilities.utils import is_commit
from vulnerabilities.utils import is_cve
from vulnerabilities.utils import nearest_patched_package
from vulnerabilities.utils import purl_to_dict
from vulnerabilities.utils import update_purl_version

Expand Down Expand Up @@ -194,6 +194,58 @@ def from_url(cls, url):
return cls(url=url)


@dataclasses.dataclass(eq=True)
@functools.total_ordering
class CodeCommitData:
commit_hash: str
vcs_url: str

commit_author: Optional[str] = None
commit_message: Optional[str] = None
commit_date: Optional[datetime.datetime] = None

def __post_init__(self):
if not self.commit_hash:
raise ValueError("Commit must have a non-empty commit_hash.")

if not is_commit(self.commit_hash):
raise ValueError("Commit must be a valid a commit_hash.")

if not self.vcs_url:
raise ValueError("Commit must have a non-empty vcs_url.")

def __lt__(self, other):
if not isinstance(other, CodeCommitData):
return NotImplemented
return self._cmp_key() < other._cmp_key()

# TODO: Add cache
def _cmp_key(self):
return (self.commit_hash, self.vcs_url, self.commit_author, self.commit_message)

def to_dict(self) -> dict:
"""Return a normalized dictionary representation of the commit."""
return {
"commit_hash": self.commit_hash,
"vcs_url": self.vcs_url,
"commit_author": self.commit_author,
"commit_message": self.commit_message,
"commit_date": self.commit_date,
}

@classmethod
def from_dict(cls, data: dict):
"""Create a Commit instance from a dictionary."""
commit_date = data.get("commit_date")
return cls(
commit_hash=str(data.get("commit_hash", "")),
vcs_url=data.get("vcs_url", ""),
commit_author=data.get("commit_author"),
commit_message=data.get("commit_message"),
commit_date=datetime.datetime.fromisoformat(commit_date) if commit_date else None,
)


class UnMergeablePackageError(Exception):
"""
Raised when a package cannot be merged with another one.
Expand Down Expand Up @@ -444,6 +496,8 @@ class AdvisoryData:
date_published: Optional[datetime.datetime] = None
weaknesses: List[int] = dataclasses.field(default_factory=list)
severities: List[VulnerabilitySeverity] = dataclasses.field(default_factory=list)
fixed_by_commits: List[CodeCommitData] = dataclasses.field(default_factory=list)
affected_by_commits: List[CodeCommitData] = dataclasses.field(default_factory=list)
url: Optional[str] = None
original_advisory_text: Optional[str] = None

Expand Down Expand Up @@ -476,6 +530,12 @@ def to_dict(self):
"severities": [sev.to_dict() for sev in self.severities],
"date_published": self.date_published.isoformat() if self.date_published else None,
"weaknesses": self.weaknesses,
"affected_by_commits": [
affected_by_commit.to_dict() for affected_by_commit in self.affected_by_commits
],
"fixed_by_commits": [
fixed_by_commit.to_dict() for fixed_by_commit in self.fixed_by_commits
],
"url": self.url if self.url else "",
}
return {
Expand Down Expand Up @@ -536,6 +596,8 @@ class AdvisoryDataV2:
date_published: Optional[datetime.datetime] = None
weaknesses: List[int] = dataclasses.field(default_factory=list)
url: Optional[str] = None
fixed_by_commits: List[CodeCommitData] = dataclasses.field(default_factory=list)
affected_by_commits: List[CodeCommitData] = dataclasses.field(default_factory=list)

def __post_init__(self):
if self.date_published and not self.date_published.tzinfo:
Expand All @@ -559,6 +621,12 @@ def to_dict(self):
"references": [ref.to_dict() for ref in self.references],
"date_published": self.date_published.isoformat() if self.date_published else None,
"weaknesses": self.weaknesses,
"affected_by_commits": [
affected_by_commit.to_dict() for affected_by_commit in self.affected_by_commits
],
"fixed_by_commits": [
fixed_by_commit.to_dict() for fixed_by_commit in self.fixed_by_commits
],
"url": self.url if self.url else "",
}

Expand All @@ -578,6 +646,14 @@ def from_dict(cls, advisory_data):
if date_published
else None,
"weaknesses": advisory_data["weaknesses"],
"affected_by_commits": [
CodeCommitData.from_dict(affected_by_commit)
for affected_by_commit in advisory_data["affected_by_commits"]
],
"fixed_by_commits": [
CodeCommitData.from_dict(fixed_by_commit)
for fixed_by_commit in advisory_data["fixed_by_commits"]
],
"url": advisory_data.get("url") or None,
}
return cls(**transformed)
Expand Down
2 changes: 1 addition & 1 deletion vulnerabilities/importers/curl.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def parse_advisory_data(raw_data) -> AdvisoryData:
... ]
... }
>>> parse_advisory_data(raw_data)
AdvisoryData(advisory_id='', aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None, url=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], references_v2=[], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], severities=[], url='https://curl.se/docs/CVE-2024-2379.json', original_advisory_text=None)
AdvisoryData(advisory_id='', aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None, url=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], references_v2=[], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], severities=[], fixed_by_commits=[], affected_by_commits=[], url='https://curl.se/docs/CVE-2024-2379.json', original_advisory_text=None)
"""

affected = get_item(raw_data, "affected")[0] if len(get_item(raw_data, "affected")) > 0 else []
Expand Down
67 changes: 62 additions & 5 deletions vulnerabilities/importers/osv.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import AffectedPackage
from vulnerabilities.importer import AffectedPackageV2
from vulnerabilities.importer import CodeCommitData
from vulnerabilities.importer import Reference
from vulnerabilities.importer import ReferenceV2
from vulnerabilities.importer import VulnerabilitySeverity
Expand Down Expand Up @@ -131,7 +132,8 @@ def parse_advisory_data_v2(
references = get_references_v2(raw_data=raw_data)

affected_packages = []

fixed_by_commits = []
affected_by_commits = []
for affected_pkg in raw_data.get("affected") or []:
purl = get_affected_purl(affected_pkg=affected_pkg, raw_id=advisory_id)

Expand All @@ -153,6 +155,10 @@ def parse_advisory_data_v2(
)
fixed_versions.extend([v.string for v in fixed_version])

introduced_commits, fixed_commits = get_code_commit(fixed_range, raw_id=advisory_id)
fixed_by_commits.extend(fixed_commits)
affected_by_commits.extend(introduced_commits)

fixed_version_range = (
get_fixed_version_range(fixed_versions, purl.type) if fixed_versions else None
)
Expand Down Expand Up @@ -182,6 +188,8 @@ def parse_advisory_data_v2(
affected_packages=affected_packages,
date_published=date_published,
weaknesses=weaknesses,
fixed_by_commits=fixed_by_commits,
affected_by_commits=affected_by_commits,
url=advisory_url,
original_advisory_text=advisory_text or json.dumps(raw_data, indent=2, ensure_ascii=False),
)
Expand All @@ -207,6 +215,17 @@ def extract_fixed_versions(fixed_range) -> Iterable[str]:
yield fixed


def extract_commits(introduced_range) -> Iterable[str]:
"""
Return a list of fixed version strings given a ``fixed_range`` mapping of
OSV data.
"""
for event in introduced_range.get("events") or []:
introduced = event.get("introduced")
fixed = event.get("fixed")
yield introduced, fixed


def get_published_date(raw_data):
published = raw_data.get("published")
return published and dateparser.parse(date_string=published)
Expand Down Expand Up @@ -392,11 +411,49 @@ def get_fixed_versions(fixed_range, raw_id, supported_ecosystem) -> List[Version
fixed_versions.append(SemverVersion(version))
except InvalidVersion:
logger.error(f"Invalid SemverVersion: {version!r} for OSV id: {raw_id!r}")

if fixed_range_type == "GIT":
# We process this in the get_code_commit function.
continue
else:
logger.error(f"Unsupported fixed version type: {version!r} for OSV id: {raw_id!r}")

# if fixed_range_type == "GIT":
# TODO add GitHubVersion univers fix_version
# logger.error(f"NotImplementedError GIT Version - {raw_id !r} - {i !r}")

return dedupe(fixed_versions)


def get_code_commit(ranges, raw_id):
"""
Return two lists of unique code commits (introduced and fixed) extracted from a
given vulnerability `ranges` dictionary.
"""
if ranges.get("type") != "GIT":
logger.debug(f"Skipping non-GIT range for OSV id: {raw_id!r}")
return [], []

repo = ranges.get("repo")
if not repo:
logger.error(f"Missing 'repo' field in range: {ranges} (OSV id: {raw_id!r})")
return [], []

repo = ranges.get("repo")
introduced_commits, fixed_commits = [], []
for introduced, fixed in extract_commits(ranges):
# Git uses this magic hash for the empty tree
if introduced == "0":
introduced = "4b825dc642cb6eb9a060e54bf8d69288fbee4904"

try:
if introduced:
introduced_commit = CodeCommitData(commit_hash=introduced, vcs_url=repo)
introduced_commits.append(introduced_commit)
except ValueError as e:
logger.error(f"Failed to extract introduced commits: {e!r}")

try:
if fixed:
fixed_commit = CodeCommitData(commit_hash=fixed, vcs_url=repo)
fixed_commits.append(fixed_commit)
except ValueError as e:
logger.error(f"Failed to extract fixed commits: {e!r}")

return introduced_commits, fixed_commits
44 changes: 44 additions & 0 deletions vulnerabilities/pipes/advisory.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from typing import Union

from django.db import transaction
from django.db.models import Q
from django.db.models.query import QuerySet

from vulnerabilities.importer import AdvisoryData
Expand All @@ -29,6 +30,7 @@
from vulnerabilities.models import AdvisoryWeakness
from vulnerabilities.models import AffectedByPackageRelatedVulnerability
from vulnerabilities.models import Alias
from vulnerabilities.models import CodeCommit
from vulnerabilities.models import FixingPackageRelatedVulnerability
from vulnerabilities.models import Package
from vulnerabilities.models import VulnerabilityReference
Expand Down Expand Up @@ -96,6 +98,42 @@ def get_or_create_advisory_weaknesses(weaknesses: List[str]) -> List[AdvisoryWea
return list(AdvisoryWeakness.objects.filter(cwe_id__in=weaknesses))


def get_or_create_advisory_code_commits(code_commits_data: List) -> List["CodeCommit"]:
"""
Given a list of commit-like objects (each with commit_hash and vcs_url),
create any missing CodeCommit entries and return the full list of CodeCommit objects.
"""
if not code_commits_data:
return []

pairs = [(c.commit_hash, c.vcs_url) for c in code_commits_data]

query = Q()
for commit_hash, vcs_url in pairs:
query |= Q(commit_hash=commit_hash, vcs_url=vcs_url)

existing_commits_qs = CodeCommit.objects.filter(query)
existing_pairs = set(existing_commits_qs.values_list("commit_hash", "vcs_url"))

to_create = [
CodeCommit(
commit_hash=c.commit_hash,
vcs_url=c.vcs_url,
commit_author=getattr(c, "commit_author", None),
commit_message=getattr(c, "commit_message", None),
commit_date=getattr(c, "commit_date", None),
)
for c in code_commits_data
if (c.commit_hash, c.vcs_url) not in existing_pairs
]

if to_create:
CodeCommit.objects.bulk_create(to_create, ignore_conflicts=True)

all_commits = CodeCommit.objects.filter(query)
return list(all_commits)


def insert_advisory(advisory: AdvisoryData, pipeline_id: str, logger: Callable = None):
from vulnerabilities.utils import compute_content_id

Expand Down Expand Up @@ -150,6 +188,9 @@ def insert_advisory_v2(
severities = get_or_create_advisory_severities(severities=advisory.severities)
weaknesses = get_or_create_advisory_weaknesses(weaknesses=advisory.weaknesses)
content_id = compute_content_id(advisory_data=advisory)
affected_by_commits = get_or_create_advisory_code_commits(advisory.affected_by_commits)
fixed_by_commits = get_or_create_advisory_code_commits(advisory.fixed_by_commits)

try:
default_data = {
"datasource_id": pipeline_id,
Expand Down Expand Up @@ -216,6 +257,9 @@ def insert_advisory_v2(
impact.affecting_packages.add(*affected_packages_v2)
impact.fixed_by_packages.add(*fixed_packages_v2)

impact.affecting_commits.add(*affected_by_commits)
impact.fixed_by_commits.add(*fixed_by_commits)

return advisory_obj


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import pytest

from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import CodeCommitData
from vulnerabilities.pipelines.v2_importers.github_osv_importer import GithubOSVImporterPipeline


Expand All @@ -27,7 +28,23 @@ def sample_osv_advisory(tmp_path: Path):
{
"package": {"name": "sample", "ecosystem": "pypi"},
"ranges": [
{"type": "ECOSYSTEM", "events": [{"introduced": "1.0.0"}, {"fixed": "1.2.0"}]}
{"type": "ECOSYSTEM", "events": [{"introduced": "1.0.0"}, {"fixed": "1.2.0"}]},
{
"type": "GIT",
"repo": "https://github.com/aboutcode-org/vulnerablecode",
"events": [
{"introduced": "0"},
{"fixed": "10081dd502dcfc0953de333fe8afb399db5f2a88"},
],
},
{
"type": "GIT",
"repo": "https://github.com/aboutcode-org/vulnerablecode",
"events": [
{"introduced": "b58c68c38a9de451818bac6c96d08d61e7f348a2"},
{"fixed": "61621982593152c47b520ce893eb90c332427483"},
],
},
],
"versions": ["1.0.0", "1.1.0"],
}
Expand Down Expand Up @@ -67,3 +84,36 @@ def delete(self):
assert advisory.original_advisory_text.strip().startswith("{")
assert advisory.affected_packages
assert advisory.affected_packages[0].package.type == "pypi"
assert advisory.affected_by_commits == [
CodeCommitData(
commit_hash="4b825dc642cb6eb9a060e54bf8d69288fbee4904",
vcs_url="https://github.com/aboutcode-org/vulnerablecode",
commit_author=None,
commit_message=None,
commit_date=None,
),
CodeCommitData(
commit_hash="b58c68c38a9de451818bac6c96d08d61e7f348a2",
vcs_url="https://github.com/aboutcode-org/vulnerablecode",
commit_author=None,
commit_message=None,
commit_date=None,
),
]

assert advisory.fixed_by_commits == [
CodeCommitData(
commit_hash="10081dd502dcfc0953de333fe8afb399db5f2a88",
vcs_url="https://github.com/aboutcode-org/vulnerablecode",
commit_author=None,
commit_message=None,
commit_date=None,
),
CodeCommitData(
commit_hash="61621982593152c47b520ce893eb90c332427483",
vcs_url="https://github.com/aboutcode-org/vulnerablecode",
commit_author=None,
commit_message=None,
commit_date=None,
),
]
Loading