Skip to content

Commit 8013531

Browse files
committed
fix: validation
1 parent bf839e7 commit 8013531

File tree

4 files changed

+73
-18
lines changed

4 files changed

+73
-18
lines changed

.github/workflows/pr-check.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ jobs:
5353
- name: Validate SPDX document
5454
run: |
5555
echo "Validating SPDX 2.3 JSON document..."
56-
pyspdxtools -i test-sbom.spdx.json --validate
56+
pyspdxtools -i test-sbom.spdx.json
5757
5858
echo "SPDX validation passed!"
5959

.github/workflows/release.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ jobs:
8181
./ubuntu-sbom-arm64 --output release-test-sbom.spdx.json --progress=false
8282
8383
echo "Validating SPDX 2.3 JSON document..."
84-
pyspdxtools -i release-test-sbom.spdx.json --validate
84+
pyspdxtools -i release-test-sbom.spdx.json
8585
8686
echo "✓ SPDX validation passed! Binary generates valid SPDX 2.3 documents."
8787

main.go

Lines changed: 71 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -340,33 +340,88 @@ func normalizeLicense(license string) string {
340340
// Map common license strings to SPDX identifiers
341341
license = strings.TrimSpace(license)
342342

343+
// If empty, return NOASSERTION
344+
if license == "" {
345+
return "NOASSERTION"
346+
}
347+
348+
// Normalize to lowercase for case-insensitive matching
349+
licenseLower := strings.ToLower(license)
350+
351+
// Check for known SPDX patterns (case-insensitive)
343352
replacements := map[string]string{
344-
"GPL-2": "GPL-2.0-only",
345-
"GPL-2+": "GPL-2.0-or-later",
346-
"GPL-3": "GPL-3.0-only",
347-
"GPL-3+": "GPL-3.0-or-later",
348-
"LGPL-2": "LGPL-2.0-only",
349-
"LGPL-2+": "LGPL-2.0-or-later",
350-
"LGPL-2.1": "LGPL-2.1-only",
351-
"LGPL-2.1+": "LGPL-2.1-or-later",
352-
"LGPL-3": "LGPL-3.0-only",
353-
"LGPL-3+": "LGPL-3.0-or-later",
354-
"Apache-2": "Apache-2.0",
355-
"BSD": "BSD-3-Clause",
356-
"MIT/X11": "MIT",
353+
"gpl-2": "GPL-2.0-only",
354+
"gpl-2+": "GPL-2.0-or-later",
355+
"gpl-3": "GPL-3.0-only",
356+
"gpl-3+": "GPL-3.0-or-later",
357+
"lgpl-2": "LGPL-2.0-only",
358+
"lgpl-2+": "LGPL-2.0-or-later",
359+
"lgpl-2.1": "LGPL-2.1-only",
360+
"lgpl-2.1+": "LGPL-2.1-or-later",
361+
"lgpl-3": "LGPL-3.0-only",
362+
"lgpl-3+": "LGPL-3.0-or-later",
363+
"apache-2": "Apache-2.0",
364+
"bsd": "BSD-3-Clause",
365+
"mit/x11": "MIT",
366+
"expat": "MIT", // Expat is the MIT license
367+
"mit-1": "MIT",
368+
"mit-style": "MIT",
369+
"psf": "Python-2.0",
370+
"public-domain": "NOASSERTION", // Not a license
371+
"openldap-2.8": "NOASSERTION", // Not in SPDX list
372+
"hylafax": "NOASSERTION", // Not in SPDX list
373+
}
374+
375+
// Check for exact match first (case-insensitive)
376+
if mapped, ok := replacements[licenseLower]; ok {
377+
return mapped
357378
}
358379

380+
// Check for prefix match (case-insensitive)
359381
for old, new := range replacements {
360-
if strings.HasPrefix(license, old) {
382+
if strings.HasPrefix(licenseLower, old) {
361383
return new
362384
}
363385
}
364386

365-
if license == "" {
387+
// Check if it looks like a valid SPDX identifier (only letters, numbers, dots, hyphens)
388+
// Valid SPDX IDs don't contain: commas, parentheses, quotes, slashes (except in known patterns), spaces in certain contexts
389+
validSPDXPattern := regexp.MustCompile(`^[A-Za-z0-9.\-]+(\s+(AND|OR|WITH)\s+[A-Za-z0-9.\-]+)*$`)
390+
391+
// If it matches valid SPDX pattern, return it
392+
if validSPDXPattern.MatchString(license) {
393+
return license
394+
}
395+
396+
// If it contains copyright statements, full sentences, or invalid characters, return NOASSERTION
397+
invalidPatterns := []string{
398+
"Copyright",
399+
"copyright",
400+
"Permission is hereby",
401+
"The files",
402+
"Formerly,",
403+
"build-aux",
404+
"Portions",
405+
"free software",
406+
"<", // Email addresses
407+
">", // Email addresses
408+
"'", // Apostrophes
409+
",", // Commas in descriptions
410+
}
411+
412+
for _, pattern := range invalidPatterns {
413+
if strings.Contains(license, pattern) {
414+
return "NOASSERTION"
415+
}
416+
}
417+
418+
// If license string is longer than 50 chars, it's probably license text, not an identifier
419+
if len(license) > 50 {
366420
return "NOASSERTION"
367421
}
368422

369-
return license
423+
// Default: if we can't confidently map it, use NOASSERTION
424+
return "NOASSERTION"
370425
}
371426

372427
func sanitizeName(name string) string {

ubuntu-sbom-arm64

2.44 MB
Binary file not shown.

0 commit comments

Comments
 (0)