22import os
33import re
44import json
5+ import logging
6+ logging .basicConfig (level = logging .DEBUG )
57
68from pathlib import Path
79from mdutils import MdUtils
@@ -16,7 +18,7 @@ class Messages:
1618 def map_severity_to_sarif (severity : str ) -> str :
1719 """
1820 Map Socket severity levels to SARIF levels (GitHub code scanning).
19-
21+
2022 'low' -> 'note'
2123 'medium' or 'middle' -> 'warning'
2224 'high' or 'critical' -> 'error'
@@ -39,115 +41,89 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str)
3941 Supports:
4042 1) JSON-based manifest files (package-lock.json, Pipfile.lock, composer.lock)
4143 - Locates a dictionary entry with the matching package & version
42- - Does a rough line-based search to find the actual line in the raw text
43- 2) Text-based (requirements.txt, package.json, yarn.lock, etc.)
44- - Uses compiled regex patterns to detect a match line by line
44+ - Searches the raw text for the key
45+ 2) Text-based (requirements.txt, package.json, yarn.lock, pnpm-lock.yaml, etc.)
46+ - Uses regex patterns to detect a match line by line
4547 """
46- # Extract just the file name to detect manifest type
4748 file_type = Path (manifest_file ).name
49+ logging .debug ("Processing file for line lookup: %s" , manifest_file )
4850
49- # ----------------------------------------------------
50- # 1) JSON-based manifest files
51- # ----------------------------------------------------
5251 if file_type in ["package-lock.json" , "Pipfile.lock" , "composer.lock" ]:
5352 try :
54- # Read entire file so we can parse JSON and also do raw line checks
5553 with open (manifest_file , "r" , encoding = "utf-8" ) as f :
5654 raw_text = f .read ()
57-
58- # Attempt JSON parse
55+ logging .debug ("Read %d characters from %s" , len (raw_text ), manifest_file )
5956 data = json .loads (raw_text )
60-
61- # In practice, you may need to check data["dependencies"], data["default"], etc.
62- # This is an example approach.
6357 packages_dict = (
6458 data .get ("packages" )
6559 or data .get ("default" )
6660 or data .get ("dependencies" )
6761 or {}
6862 )
69-
63+ logging . debug ( "Found package keys in %s: %s" , manifest_file , list ( packages_dict . keys ()))
7064 found_key = None
7165 found_info = None
72- # Locate a dictionary entry whose 'version' matches
7366 for key , value in packages_dict .items ():
74- # For NPM package-lock, keys might look like "node_modules/axios"
7567 if key .endswith (packagename ) and "version" in value :
7668 if value ["version" ] == packageversion :
7769 found_key = key
7870 found_info = value
7971 break
80-
8172 if found_key and found_info :
82- # Search lines to approximate the correct line number
83- needle_key = f'"{ found_key } ":' # e.g. "node_modules/axios":
84- needle_version = f'"version": "{ packageversion } "'
73+ needle_key = f'"{ found_key } ":'
8574 lines = raw_text .splitlines ()
86- best_line = 1
87- snippet = None
88-
75+ logging .debug ("Total lines in %s: %d" , manifest_file , len (lines ))
8976 for i , line in enumerate (lines , start = 1 ):
90- if (needle_key in line ) or (needle_version in line ):
91- best_line = i
92- snippet = line .strip ()
93- break # On first match, stop
94-
95- # If we found an approximate line, return it; else fallback to line 1
96- if best_line > 0 and snippet :
97- return best_line , snippet
98- else :
99- return 1 , f'"{ found_key } ": { found_info } '
77+ if needle_key in line :
78+ logging .debug ("Found match at line %d in %s: %s" , i , manifest_file , line .strip ())
79+ return i , line .strip ()
80+ return 1 , f'"{ found_key } ": { found_info } '
10081 else :
10182 return 1 , f"{ packagename } { packageversion } (not found in { manifest_file } )"
102-
103- except ( FileNotFoundError , json . JSONDecodeError ):
83+ except ( FileNotFoundError , json . JSONDecodeError ) as e :
84+ logging . error ( "Error reading %s: %s" , manifest_file , e )
10485 return 1 , f"Error reading { manifest_file } "
10586
106- # ----------------------------------------------------
107- # 2) Text-based / line-based manifests
108- # ----------------------------------------------------
109- # Define a dictionary of patterns for common manifest types
110- search_patterns = {
111- "package.json" : rf'"{ packagename } ":\s*"{ packageversion } "' ,
112- "yarn.lock" : rf'{ packagename } @{ packageversion } ' ,
113- "pnpm-lock.yaml" : rf'"{ re .escape (packagename )} "\s*:\s*\{{[^}}]*"version":\s*"{ re .escape (packageversion )} "' ,
114- "requirements.txt" : rf'^{ re .escape (packagename )} \s*(?:==|===|!=|>=|<=|~=|\s+)?\s*{ re .escape (packageversion )} (?:\s*;.*)?$' ,
115- "pyproject.toml" : rf'{ packagename } \s*=\s*"{ packageversion } "' ,
116- "Pipfile" : rf'"{ packagename } "\s*=\s*"{ packageversion } "' ,
117- "go.mod" : rf'require\s+{ re .escape (packagename )} \s+{ re .escape (packageversion )} ' ,
118- "go.sum" : rf'{ re .escape (packagename )} \s+{ re .escape (packageversion )} ' ,
119- "pom.xml" : rf'<artifactId>{ re .escape (packagename )} </artifactId>\s*<version>{ re .escape (packageversion )} </version>' ,
120- "build.gradle" : rf'implementation\s+"{ re .escape (packagename )} :{ re .escape (packageversion )} "' ,
121- "Gemfile" : rf'gem\s+"{ re .escape (packagename )} ",\s*"{ re .escape (packageversion )} "' ,
122- "Gemfile.lock" : rf'\s+{ re .escape (packagename )} \s+\({ re .escape (packageversion )} \)' ,
123- ".csproj" : rf'<PackageReference\s+Include="{ re .escape (packagename )} "\s+Version="{ re .escape (packageversion )} "\s*/>' ,
124- ".fsproj" : rf'<PackageReference\s+Include="{ re .escape (packagename )} "\s+Version="{ re .escape (packageversion )} "\s*/>' ,
125- "paket.dependencies" : rf'nuget\s+{ re .escape (packagename )} \s+{ re .escape (packageversion )} ' ,
126- "Cargo.toml" : rf'{ re .escape (packagename )} \s*=\s*"{ re .escape (packageversion )} "' ,
127- "build.sbt" : rf'"{ re .escape (packagename )} "\s*%\s*"{ re .escape (packageversion )} "' ,
128- "Podfile" : rf'pod\s+"{ re .escape (packagename )} ",\s*"{ re .escape (packageversion )} "' ,
129- "Package.swift" : rf'\.package\(name:\s*"{ re .escape (packagename )} ",\s*url:\s*".*?",\s*version:\s*"{ re .escape (packageversion )} "\)' ,
130- "mix.exs" : rf'\{{:{ re .escape (packagename )} ,\s*"{ re .escape (packageversion )} "\}}' ,
131- "composer.json" : rf'"{ re .escape (packagename )} ":\s*"{ re .escape (packageversion )} "' ,
132- "conanfile.txt" : rf'{ re .escape (packagename )} /{ re .escape (packageversion )} ' ,
133- "vcpkg.json" : rf'"{ re .escape (packagename )} ":\s*"{ re .escape (packageversion )} "' ,
134- }
135-
136- # If no specific pattern is found for this file name, fallback to a naive approach
137- searchstring = search_patterns .get (file_type , rf'{ re .escape (packagename )} .*{ re .escape (packageversion )} ' )
87+ # For pnpm-lock.yaml, use a special regex pattern.
88+ if file_type .lower () == "pnpm-lock.yaml" :
89+ searchstring = rf'^\s*/{ re .escape (packagename )} /{ re .escape (packageversion )} :'
90+ else :
91+ search_patterns = {
92+ "package.json" : rf'"{ packagename } ":\s*"[\^~]?{ re .escape (packageversion )} "' ,
93+ "yarn.lock" : rf'{ packagename } @{ packageversion } ' ,
94+ "requirements.txt" : rf'^{ re .escape (packagename )} \s*(?:==|===|!=|>=|<=|~=|\s+)?\s*{ re .escape (packageversion )} (?:\s*;.*)?$' ,
95+ "pyproject.toml" : rf'{ packagename } \s*=\s*"{ re .escape (packageversion )} "' ,
96+ "Pipfile" : rf'"{ packagename } "\s*=\s*"{ re .escape (packageversion )} "' ,
97+ "go.mod" : rf'require\s+{ re .escape (packagename )} \s+{ re .escape (packageversion )} ' ,
98+ "go.sum" : rf'{ re .escape (packagename )} \s+{ re .escape (packageversion )} ' ,
99+ "pom.xml" : rf'<artifactId>{ re .escape (packagename )} </artifactId>\s*<version>{ re .escape (packageversion )} </version>' ,
100+ "build.gradle" : rf'implementation\s+"{ re .escape (packagename )} :{ re .escape (packageversion )} "' ,
101+ "Gemfile" : rf'gem\s+"{ re .escape (packagename )} ",\s*"{ re .escape (packageversion )} "' ,
102+ "Gemfile.lock" : rf'\s+{ re .escape (packagename )} \s+\({ re .escape (packageversion )} \)' ,
103+ ".csproj" : rf'<PackageReference\s+Include="{ re .escape (packagename )} "\s+Version="{ re .escape (packageversion )} "\s*/>' ,
104+ ".fsproj" : rf'<PackageReference\s+Include="{ re .escape (packagename )} "\s+Version="{ re .escape (packageversion )} "\s*/>' ,
105+ "paket.dependencies" : rf'nuget\s+{ re .escape (packagename )} \s+{ re .escape (packageversion )} ' ,
106+ "Cargo.toml" : rf'{ re .escape (packagename )} \s*=\s*"{ re .escape (packageversion )} "' ,
107+ "build.sbt" : rf'"{ re .escape (packagename )} "\s*%\s*"{ re .escape (packageversion )} "' ,
108+ "Podfile" : rf'pod\s+"{ re .escape (packagename )} ",\s*"{ re .escape (packageversion )} "' ,
109+ "Package.swift" : rf'\.package\(name:\s*"{ re .escape (packagename )} ",\s*url:\s*".*?",\s*version:\s*"{ re .escape (packageversion )} "\)' ,
110+ "mix.exs" : rf'\{{:{ re .escape (packagename )} ,\s*"{ re .escape (packageversion )} "\}}' ,
111+ "composer.json" : rf'"{ re .escape (packagename )} ":\s*"{ re .escape (packageversion )} "' ,
112+ "conanfile.txt" : rf'{ re .escape (packagename )} /{ re .escape (packageversion )} ' ,
113+ "vcpkg.json" : rf'"{ re .escape (packagename )} ":\s*"{ re .escape (packageversion )} "' ,
114+ }
115+ searchstring = search_patterns .get (file_type , rf'{ re .escape (packagename )} .*{ re .escape (packageversion )} ' )
138116
117+ logging .debug ("Using search pattern for %s: %s" , file_type , searchstring )
139118 try :
140- # Read file lines and search for a match
141119 with open (manifest_file , 'r' , encoding = "utf-8" ) as file :
142120 lines = [line .rstrip ("\n " ) for line in file ]
121+ logging .debug ("Total lines in %s: %d" , manifest_file , len (lines ))
143122 for line_number , line_content in enumerate (lines , start = 1 ):
144- # For Python conditional dependencies, ignore everything after first ';'
145123 line_main = line_content .split (";" , 1 )[0 ].strip ()
146-
147- # Use a case-insensitive regex search
148124 if re .search (searchstring , line_main , re .IGNORECASE ):
125+ logging .debug ("Match found at line %d in %s: %s" , line_number , manifest_file , line_content .strip ())
149126 return line_number , line_content .strip ()
150-
151127 except FileNotFoundError :
152128 return 1 , f"{ manifest_file } not found"
153129 except Exception as e :
@@ -181,7 +157,6 @@ def get_manifest_type_url(manifest_file: str, pkg_name: str, pkg_version: str) -
181157 "composer.json" : "composer" ,
182158 "vcpkg.json" : "vcpkg" ,
183159 }
184-
185160 file_type = Path (manifest_file ).name
186161 url_prefix = manifest_to_url_prefix .get (file_type , "unknown" )
187162 return f"https://socket.dev/{ url_prefix } /package/{ pkg_name } /alerts/{ pkg_version } "
@@ -191,29 +166,33 @@ def create_security_comment_sarif(diff) -> dict:
191166 """
192167 Create SARIF-compliant output from the diff report, including dynamic URL generation
193168 based on manifest type and improved <br/> formatting for GitHub SARIF display.
169+
170+ This function now:
171+ - Processes every alert in diff.new_alerts.
172+ - For alerts with multiple manifest files, generates an individual SARIF result for each file.
173+ - Appends the manifest file name to the rule ID and name to make each result unique.
174+ - Does NOT fall back to 'requirements.txt' if no manifest file is provided.
175+ - Adds detailed logging to validate our assumptions.
176+
194177 """
195- scan_failed = False
196178 if len (diff .new_alerts ) == 0 :
197179 for alert in diff .new_alerts :
198- alert : Issue
199180 if alert .error :
200- scan_failed = True
201181 break
182+
202183 sarif_data = {
203184 "$schema" : "https://json.schemastore.org/sarif-2.1.0.json" ,
204185 "version" : "2.1.0" ,
205- "runs" : [
206- {
207- "tool" : {
208- "driver" : {
209- "name" : "Socket Security" ,
210- "informationUri" : "https://socket.dev" ,
211- "rules" : []
212- }
213- },
214- "results" : []
215- }
216- ]
186+ "runs" : [{
187+ "tool" : {
188+ "driver" : {
189+ "name" : "Socket Security" ,
190+ "informationUri" : "https://socket.dev" ,
191+ "rules" : []
192+ }
193+ },
194+ "results" : []
195+ }]
217196 }
218197
219198 rules_map = {}
@@ -222,60 +201,77 @@ def create_security_comment_sarif(diff) -> dict:
222201 for alert in diff .new_alerts :
223202 pkg_name = alert .pkg_name
224203 pkg_version = alert .pkg_version
225- rule_id = f"{ pkg_name } =={ pkg_version } "
204+ base_rule_id = f"{ pkg_name } =={ pkg_version } "
226205 severity = alert .severity
227206
228- # Generate the correct URL for the alert based on manifest type
229- introduced_list = alert .introduced_by
230- manifest_file = introduced_list [0 ][1 ] if introduced_list and isinstance (introduced_list [0 ], list ) else alert .manifests or "requirements.txt"
231- socket_url = Messages .get_manifest_type_url (manifest_file , pkg_name , pkg_version )
232-
233- # Prepare descriptions with <br/> replacements
234- short_desc = f"{ alert .props .get ('note' , '' )} <br/><br/>Suggested Action:<br/>{ alert .suggestion } <br/><a href=\" { socket_url } \" >{ socket_url } </a>"
235- full_desc = "{} - {}" .format (alert .title , alert .description .replace ('\r \n ' , '<br/>' ))
236-
237- # Identify the line and snippet in the manifest file
238- line_number , line_content = Messages .find_line_in_file (pkg_name , pkg_version , manifest_file )
239- if line_number < 1 :
240- line_number = 1 # Ensure SARIF compliance
241-
242- # Create the rule if not already defined
243- if rule_id not in rules_map :
244- rules_map [rule_id ] = {
245- "id" : rule_id ,
246- "name" : f"{ pkg_name } =={ pkg_version } " ,
247- "shortDescription" : {"text" : f"Alert generated for { rule_id } by Socket Security" },
248- "fullDescription" : {"text" : full_desc },
249- "helpUri" : socket_url ,
250- "defaultConfiguration" : {
251- "level" : Messages .map_severity_to_sarif (severity )
252- },
253- }
207+ logging .debug ("Alert %s - introduced_by: %s, manifests: %s" , base_rule_id , alert .introduced_by , getattr (alert , 'manifests' , None ))
208+ manifest_files = []
209+ if alert .introduced_by and isinstance (alert .introduced_by , list ):
210+ for entry in alert .introduced_by :
211+ if isinstance (entry , (list , tuple )) and len (entry ) >= 2 :
212+ files = [f .strip () for f in entry [1 ].split (";" ) if f .strip ()]
213+ manifest_files .extend (files )
214+ elif isinstance (entry , str ):
215+ manifest_files .extend ([m .strip () for m in entry .split (";" ) if m .strip ()])
216+ elif hasattr (alert , 'manifests' ) and alert .manifests :
217+ manifest_files = [mf .strip () for mf in alert .manifests .split (";" ) if mf .strip ()]
218+
219+ logging .debug ("Alert %s - extracted manifest_files: %s" , base_rule_id , manifest_files )
220+ if not manifest_files :
221+ logging .error ("Alert %s: No manifest file found; cannot determine file location." , base_rule_id )
222+ continue
223+
224+ logging .debug ("Alert %s - using manifest_files for processing: %s" , base_rule_id , manifest_files )
225+
226+ # Create an individual SARIF result for each manifest file.
227+ for mf in manifest_files :
228+ logging .debug ("Alert %s - Processing manifest file: %s" , base_rule_id , mf )
229+ socket_url = Messages .get_manifest_type_url (mf , pkg_name , pkg_version )
230+ line_number , line_content = Messages .find_line_in_file (pkg_name , pkg_version , mf )
231+ if line_number < 1 :
232+ line_number = 1
233+ logging .debug ("Alert %s: Manifest %s, line %d: %s" , base_rule_id , mf , line_number , line_content )
234+
235+ # Create a unique rule id and name by appending the manifest file.
236+ unique_rule_id = f"{ base_rule_id } ({ mf } )"
237+ rule_name = f"Alert { base_rule_id } ({ mf } )"
238+
239+ short_desc = (f"{ alert .props .get ('note' , '' )} <br/><br/>Suggested Action:<br/>{ alert .suggestion } "
240+ f"<br/><a href=\" { socket_url } \" >{ socket_url } </a>" )
241+ full_desc = "{} - {}" .format (alert .title , alert .description .replace ('\r \n ' , '<br/>' ))
242+
243+ if unique_rule_id not in rules_map :
244+ rules_map [unique_rule_id ] = {
245+ "id" : unique_rule_id ,
246+ "name" : rule_name ,
247+ "shortDescription" : {"text" : rule_name },
248+ "fullDescription" : {"text" : full_desc },
249+ "helpUri" : socket_url ,
250+ "defaultConfiguration" : {
251+ "level" : Messages .map_severity_to_sarif (severity )
252+ },
253+ }
254254
255- # Add the SARIF result
256- result_obj = {
257- "ruleId" : rule_id ,
258- "message" : {"text" : short_desc },
259- "locations" : [
260- {
255+ result_obj = {
256+ "ruleId" : unique_rule_id ,
257+ "message" : {"text" : short_desc },
258+ "locations" : [{
261259 "physicalLocation" : {
262- "artifactLocation" : {"uri" : manifest_file },
260+ "artifactLocation" : {"uri" : mf },
263261 "region" : {
264262 "startLine" : line_number ,
265263 "snippet" : {"text" : line_content },
266264 },
267265 }
268- }
269- ],
270- }
271- results_list .append (result_obj )
266+ }]
267+ }
268+ results_list .append (result_obj )
272269
273- # Attach rules and results
274270 sarif_data ["runs" ][0 ]["tool" ]["driver" ]["rules" ] = list (rules_map .values ())
275271 sarif_data ["runs" ][0 ]["results" ] = results_list
276272
277273 return sarif_data
278-
274+
279275 @staticmethod
280276 def create_security_comment_json (diff : Diff ) -> dict :
281277 scan_failed = False
0 commit comments