From 5a4cdc49917946ccd07250f1932a11324e6f73ca Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 19 Dec 2025 15:10:56 +0530 Subject: [PATCH] Add support for advisory curation Signed-off-by: Tushar Goel --- vulnerabilities/importer.py | 25 ++++ vulnerabilities/models.py | 45 ++++--- .../pipelines/compute_advisory_todo.py | 6 +- .../v2_improvers/compute_advisory_todo.py | 6 +- .../v2_improvers/curate_advisories.py | 104 +++++++++++++++ vulnerabilities/pipes/advisory.py | 7 +- .../templates/advisory_todo_list.html | 119 ++++++++++++++++++ vulnerabilities/views.py | 8 ++ vulnerablecode/urls.py | 8 +- 9 files changed, 305 insertions(+), 23 deletions(-) create mode 100644 vulnerabilities/pipelines/v2_improvers/curate_advisories.py create mode 100644 vulnerabilities/templates/advisory_todo_list.html diff --git a/vulnerabilities/importer.py b/vulnerabilities/importer.py index 850587ca7..abf03e767 100644 --- a/vulnerabilities/importer.py +++ b/vulnerabilities/importer.py @@ -499,6 +499,31 @@ def from_dict(cls, advisory_data): if "fixed_version_range" in affected_packages[0] else AffectedPackage ) + if advisory_data.get("advisory_id") or advisory_data.get("severities") or affected_package_cls is AffectedPackageV2: + transformed = { + "advisory_id": advisory_data["advisory_id"], + "aliases": advisory_data["aliases"], + "summary": advisory_data["summary"], + "affected_packages": [ + affected_package_cls.from_dict(pkg) + for pkg in affected_packages + if pkg is not None + ], + "references_v2": [ + ReferenceV2.from_dict(ref) for ref in advisory_data["references"] + ], + "severities": [ + VulnerabilitySeverity.from_dict(sev) + for sev in advisory_data.get("severities", []) + ], + "date_published": datetime.datetime.fromisoformat(date_published) + if date_published + else None, + "weaknesses": advisory_data["weaknesses"], + "url": advisory_data.get("url") or None, + "original_advisory_text": advisory_data.get("original_advisory_text") or None, + } + return cls(**transformed) transformed = { "aliases": advisory_data["aliases"], "summary": advisory_data["summary"], diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index e1c4ddc6b..cc4d5fa98 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2422,13 +2422,22 @@ def create_new_job(self, execute_now=False): ISSUE_TYPE_CHOICES = [ - ("MISSING_AFFECTED_PACKAGE", "Advisory is missing affected package"), - ("MISSING_FIXED_BY_PACKAGE", "Advisory is missing fixed-by package"), + ( + "MISSING_AFFECTED_PACKAGE", + "Advisory is missing affected package", + ), + ( + "MISSING_FIXED_BY_PACKAGE", + "Advisory is missing fixed-by package", + ), ( "MISSING_AFFECTED_AND_FIXED_BY_PACKAGES", "Advisory is missing both affected and fixed-by packages", ), - ("MISSING_SUMMARY", "Advisory is missing summary"), + ( + "MISSING_SUMMARY", + "Advisory is missing summary", + ), ("CONFLICTING_FIXED_BY_PACKAGES", "Advisories have conflicting fixed-by packages"), ("CONFLICTING_AFFECTED_PACKAGES", "Advisories have conflicting affected packages"), ( @@ -2502,22 +2511,22 @@ class AdvisoryToDoV2(models.Model): # (see https://code.djangoproject.com/ticket/702), we use related_advisories_id # to avoid creating duplicate issue for same set of advisories, related_advisories_id = models.CharField( - max_length=40, - help_text="SHA1 digest of the unique_content_id field of the applicable advisories.", + max_length=64, + help_text="Computed unique content ID that identifies the related advisories.", ) advisories = models.ManyToManyField( "AdvisoryV2", through="ToDoRelatedAdvisoryV2", related_name="advisory_todos", - help_text="Advisory/ies where this TODO is applicable.", + help_text="Advisories for this TODO.", ) issue_type = models.CharField( max_length=50, choices=ISSUE_TYPE_CHOICES, db_index=True, - help_text="Select the issue that needs to be addressed from the available options.", + help_text="The issue type that needs to be addressed.", ) issue_detail = models.TextField( @@ -2530,12 +2539,6 @@ class AdvisoryToDoV2(models.Model): help_text="Timestamp indicating when this TODO was created.", ) - is_resolved = models.BooleanField( - default=False, - db_index=True, - help_text="This TODO is resolved or not.", - ) - resolved_at = models.DateTimeField( null=True, blank=True, @@ -2547,10 +2550,24 @@ class AdvisoryToDoV2(models.Model): help_text="Additional detail on how this TODO was resolved.", ) + curation_advisory = models.ForeignKey( + "AdvisoryV2", + null=True, + blank=True, + on_delete=models.SET_NULL, + related_name="curated_todos", + help_text="The advisory that was created/updated to resolve this TODO.", + ) + + status = models.CharField( + max_length=20, + default="open", + help_text="The current status of the TODO item.", + ) + class Meta: unique_together = ("related_advisories_id", "issue_type") - class AdvisorySeverity(models.Model): url = models.URLField( max_length=1024, diff --git a/vulnerabilities/pipelines/compute_advisory_todo.py b/vulnerabilities/pipelines/compute_advisory_todo.py index 8c4d1253d..ac1ac7586 100644 --- a/vulnerabilities/pipelines/compute_advisory_todo.py +++ b/vulnerabilities/pipelines/compute_advisory_todo.py @@ -18,7 +18,7 @@ from vulnerabilities.models import Alias from vulnerabilities.models import ToDoRelatedAdvisory from vulnerabilities.pipelines import VulnerableCodePipeline -from vulnerabilities.pipes.advisory import advisories_checksum +from vulnerabilities.pipes.advisory import compute_advisories_content_id class ComputeToDo(VulnerableCodePipeline): @@ -52,7 +52,7 @@ def compute_individual_advisory_todo(self): progress_step=1, ) for advisory in progress.iter(advisories.iterator(chunk_size=5000)): - advisory_todo_id = advisories_checksum(advisories=advisory) + advisory_todo_id = compute_advisories_content_id(advisories=advisory) check_missing_summary( advisory=advisory, todo_id=advisory_todo_id, @@ -297,7 +297,7 @@ def check_conflicting_affected_and_fixed_by_packages_for_alias( "Conflict matrix": matrix, } - todo_id = advisories_checksum(advisories) + todo_id = compute_advisories_content_id(advisories) todo = AdvisoryToDo( related_advisories_id=todo_id, issue_type=issue_type, diff --git a/vulnerabilities/pipelines/v2_improvers/compute_advisory_todo.py b/vulnerabilities/pipelines/v2_improvers/compute_advisory_todo.py index 981f10e92..4625f1ef0 100644 --- a/vulnerabilities/pipelines/v2_improvers/compute_advisory_todo.py +++ b/vulnerabilities/pipelines/v2_improvers/compute_advisory_todo.py @@ -18,7 +18,7 @@ from vulnerabilities.models import AdvisoryV2 from vulnerabilities.models import ToDoRelatedAdvisoryV2 from vulnerabilities.pipelines import VulnerableCodePipeline -from vulnerabilities.pipes.advisory import advisories_checksum +from vulnerabilities.pipes.advisory import compute_advisories_content_id class ComputeToDo(VulnerableCodePipeline): @@ -54,7 +54,7 @@ def compute_individual_advisory_todo(self): progress_step=1, ) for advisory in progress.iter(advisories.iterator(chunk_size=5000)): - advisory_todo_id = advisories_checksum(advisories=advisory) + advisory_todo_id = compute_advisories_content_id(advisories=advisory) check_missing_summary( advisory=advisory, todo_id=advisory_todo_id, @@ -302,7 +302,7 @@ def check_conflicting_affected_and_fixed_by_packages_for_alias( "Conflict matrix": matrix, } - todo_id = advisories_checksum(advisories) + todo_id = compute_advisories_content_id(advisories) todo = AdvisoryToDoV2( related_advisories_id=todo_id, issue_type=issue_type, diff --git a/vulnerabilities/pipelines/v2_improvers/curate_advisories.py b/vulnerabilities/pipelines/v2_improvers/curate_advisories.py new file mode 100644 index 000000000..3374edcf8 --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/curate_advisories.py @@ -0,0 +1,104 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import json +from pathlib import Path + +from fetchcode.vcs import fetch_via_vcs + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.models import AdvisoryToDoV2 +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.pipelines import VulnerableCodePipeline +from vulnerabilities.pipes.advisory import insert_advisory_v2 + + +class CurateAdvisoriesPipeline(VulnerableCodePipeline): + """ + Curate advisories + """ + + pipeline_id = "curate_advisories" + license_expression = None + + """ + Sample Curation Advisory: + + { + advisory: { + "advisory_id": "CVE-2024-12345", + "summary": "This is a curated summary for CVE-2024-12345", + "url": "https://github.com/TG1999/CVE-2024-12345", + "aliases": ["GHSA-1323-1213"], + "references": [ + { + "url": "https://github.com/TG1999/CVE-2024-12345", + "reference_id": "CVE-2024-12345", + } + ], + "severity": [ + { + "system": "CVSSv3", + "value": "9.8", + "vector": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H", + } + ], + "affected_packages": [ + { + "package": { + "type": "pypi", + "namespace": null, + "name": "example-package", + "version": "1.0.0" + }, + "affected_version_range": "<=1.0.0", + "fixed_version": "1.0.1" + }, + ] + }, + related_advisories: ["nvd_importer_v2/CVE-2024-12345"], + todo_ids : [133], + source: "Tushar", + } + """ + + @classmethod + def steps(cls): + return ( + cls.fetch_curation_repo, + cls.apply_curations, + ) + + @classmethod + def fetch_curation_repo(self): + """ + Fetch curation repository + """ + self.vcs_response = fetch_via_vcs(self.repo_url) + + @classmethod + def apply_curations(self): + """ + Apply curation to advisories + """ + advisory_files = Path(self.vcs_response.dest_dir).rglob("*.json") + for advisory_file in advisory_files: + advisory_data = json.load(open(advisory_file)) + advisory = AdvisoryData.from_dict(advisory_data["advisory"]) + advisory_obj = insert_advisory_v2(advisory=advisory, source=advisory_data.get("source")) + # Link related advisories + for related_advisory_id in advisory_data.get("related_advisories", []): + related_advisory = AdvisoryV2.objects.filter(avid=related_advisory_id).first() + if related_advisory: + advisory_obj.related_advisories.add(related_advisory) + advisory_obj.save() + + for todo in AdvisoryToDoV2.objects.filter(id__in=advisory_data.get("todo_ids", [])): + # Add advisory in todo's curated_advisories field + todo.curated_advisories.add(advisory_obj) + todo.save() diff --git a/vulnerabilities/pipes/advisory.py b/vulnerabilities/pipes/advisory.py index 413b260b6..293fa5d46 100644 --- a/vulnerabilities/pipes/advisory.py +++ b/vulnerabilities/pipes/advisory.py @@ -330,12 +330,15 @@ def import_advisory( advisory.save() -def advisories_checksum(advisories: Union[Advisory, List[Advisory]]) -> str: +def compute_advisories_content_id(advisories: Union[Advisory, List[Advisory]]) -> str: + """ + Return a content based ID string that uniquely identifies the list of advisories. + """ if isinstance(advisories, Advisory) or isinstance(advisories, AdvisoryV2): advisories = [advisories] contents = sorted([advisory.unique_content_id for advisory in advisories]) combined_contents = "".join(contents) - checksum = hashlib.sha1(combined_contents.encode()) + checksum = hashlib.sha256(combined_contents.encode(), usedforsecurity=False) return checksum.hexdigest() diff --git a/vulnerabilities/templates/advisory_todo_list.html b/vulnerabilities/templates/advisory_todo_list.html new file mode 100644 index 000000000..ff2162bc7 --- /dev/null +++ b/vulnerabilities/templates/advisory_todo_list.html @@ -0,0 +1,119 @@ +{% extends "base.html" %} +{% load humanize %} +{% load widget_tweaks %} +{% load static %} + +{% block content %} +
+ +

Advisory TODO List

+ + + + + + + + + + + + + + + + + + {% for todo in todos %} + + + + + + + + + + + + + + + + + + + + + + {% empty %} + + + + {% endfor %} + +
IDRelated AdvisoriesIssue TypeIssue DetailCreated AtStatusResolved AtResolution DetailActions
{{ todo.id }} + {% for adv in todo.advisories.all %} + + {{ adv.avid }} + {% if not forloop.last %}, {% endif %} + {% empty %} + None + {% endfor %} + {{ todo.get_issue_type_display }} + {{ todo.issue_detail|linebreaksbr }} + {{ todo.created_at }} + {% if todo.is_resolved %} + Resolved + {% else %} + Pending + {% endif %} + + {% if todo.resolved_at %} + {{ todo.resolved_at }} + {% else %} + + {% endif %} + + {{ todo.resolution_detail|linebreaksbr }} + + + View + + + {% if not todo.is_resolved %} + + Mark Resolved + + {% endif %} +
+ No TODOs found. +
+ + + {% if is_paginated %} + + {% endif %} + +
+{% endblock %} diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index f4cd99dbe..3919fa63a 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -34,6 +34,7 @@ from vulnerabilities.forms import PackageSearchForm from vulnerabilities.forms import PipelineSchedulePackageForm from vulnerabilities.forms import VulnerabilitySearchForm +from vulnerabilities.models import AdvisoryToDoV2 from vulnerabilities.models import ImpactedPackage from vulnerabilities.models import PipelineRun from vulnerabilities.models import PipelineSchedule @@ -720,3 +721,10 @@ def get_context_data(self, **kwargs): context["site_title"] = "VulnerableCode site admin" context["site_header"] = "VulnerableCode Administration" return context + + +class AdvisoryTodoListView(ListView): + model = AdvisoryToDoV2 + template_name = "advisory_todo_list.html" + context_object_name = "todos" + paginate_by = 50 diff --git a/vulnerablecode/urls.py b/vulnerablecode/urls.py index 8d170678a..a79c58aea 100644 --- a/vulnerablecode/urls.py +++ b/vulnerablecode/urls.py @@ -26,6 +26,8 @@ from vulnerabilities.api_v2 import PackageV2ViewSet from vulnerabilities.api_v2 import PipelineScheduleV2ViewSet from vulnerabilities.api_v2 import VulnerabilityV2ViewSet + +from vulnerabilities.views import AdvisoryTodoListView from vulnerabilities.views import AdminLoginView from vulnerabilities.views import AdvisoryDetails from vulnerabilities.views import AdvisoryPackagesDetails @@ -70,7 +72,6 @@ def __init__(self, *args, **kwargs): api_v2_router.register("pipelines", PipelineScheduleV2ViewSet, basename="pipelines") api_v2_router.register("advisory-codefixes", CodeFixV2ViewSet, basename="advisory-codefix") - urlpatterns = [ path("admin/login/", AdminLoginView.as_view(), name="admin-login"), path("api/v2/", include(api_v2_router.urls)), @@ -123,6 +124,11 @@ def __init__(self, *args, **kwargs): PackageSearchV2.as_view(), name="package_search_v2", ), + path( + "todos/", + AdvisoryTodoListView.as_view(), + name="todos", + ), re_path( r"^packages/(?Ppkg:.+)$", PackageDetails.as_view(),