@@ -340,33 +340,88 @@ func normalizeLicense(license string) string {
340340 // Map common license strings to SPDX identifiers
341341 license = strings .TrimSpace (license )
342342
343+ // If empty, return NOASSERTION
344+ if license == "" {
345+ return "NOASSERTION"
346+ }
347+
348+ // Normalize to lowercase for case-insensitive matching
349+ licenseLower := strings .ToLower (license )
350+
351+ // Check for known SPDX patterns (case-insensitive)
343352 replacements := map [string ]string {
344- "GPL-2" : "GPL-2.0-only" ,
345- "GPL-2+" : "GPL-2.0-or-later" ,
346- "GPL-3" : "GPL-3.0-only" ,
347- "GPL-3+" : "GPL-3.0-or-later" ,
348- "LGPL-2" : "LGPL-2.0-only" ,
349- "LGPL-2+" : "LGPL-2.0-or-later" ,
350- "LGPL-2.1" : "LGPL-2.1-only" ,
351- "LGPL-2.1+" : "LGPL-2.1-or-later" ,
352- "LGPL-3" : "LGPL-3.0-only" ,
353- "LGPL-3+" : "LGPL-3.0-or-later" ,
354- "Apache-2" : "Apache-2.0" ,
355- "BSD" : "BSD-3-Clause" ,
356- "MIT/X11" : "MIT" ,
353+ "gpl-2" : "GPL-2.0-only" ,
354+ "gpl-2+" : "GPL-2.0-or-later" ,
355+ "gpl-3" : "GPL-3.0-only" ,
356+ "gpl-3+" : "GPL-3.0-or-later" ,
357+ "lgpl-2" : "LGPL-2.0-only" ,
358+ "lgpl-2+" : "LGPL-2.0-or-later" ,
359+ "lgpl-2.1" : "LGPL-2.1-only" ,
360+ "lgpl-2.1+" : "LGPL-2.1-or-later" ,
361+ "lgpl-3" : "LGPL-3.0-only" ,
362+ "lgpl-3+" : "LGPL-3.0-or-later" ,
363+ "apache-2" : "Apache-2.0" ,
364+ "bsd" : "BSD-3-Clause" ,
365+ "mit/x11" : "MIT" ,
366+ "expat" : "MIT" , // Expat is the MIT license
367+ "mit-1" : "MIT" ,
368+ "mit-style" : "MIT" ,
369+ "psf" : "Python-2.0" ,
370+ "public-domain" : "NOASSERTION" , // Not a license
371+ "openldap-2.8" : "NOASSERTION" , // Not in SPDX list
372+ "hylafax" : "NOASSERTION" , // Not in SPDX list
373+ }
374+
375+ // Check for exact match first (case-insensitive)
376+ if mapped , ok := replacements [licenseLower ]; ok {
377+ return mapped
357378 }
358379
380+ // Check for prefix match (case-insensitive)
359381 for old , new := range replacements {
360- if strings .HasPrefix (license , old ) {
382+ if strings .HasPrefix (licenseLower , old ) {
361383 return new
362384 }
363385 }
364386
365- if license == "" {
387+ // Check if it looks like a valid SPDX identifier (only letters, numbers, dots, hyphens)
388+ // Valid SPDX IDs don't contain: commas, parentheses, quotes, slashes (except in known patterns), spaces in certain contexts
389+ validSPDXPattern := regexp .MustCompile (`^[A-Za-z0-9.\-]+(\s+(AND|OR|WITH)\s+[A-Za-z0-9.\-]+)*$` )
390+
391+ // If it matches valid SPDX pattern, return it
392+ if validSPDXPattern .MatchString (license ) {
393+ return license
394+ }
395+
396+ // If it contains copyright statements, full sentences, or invalid characters, return NOASSERTION
397+ invalidPatterns := []string {
398+ "Copyright" ,
399+ "copyright" ,
400+ "Permission is hereby" ,
401+ "The files" ,
402+ "Formerly," ,
403+ "build-aux" ,
404+ "Portions" ,
405+ "free software" ,
406+ "<" , // Email addresses
407+ ">" , // Email addresses
408+ "'" , // Apostrophes
409+ "," , // Commas in descriptions
410+ }
411+
412+ for _ , pattern := range invalidPatterns {
413+ if strings .Contains (license , pattern ) {
414+ return "NOASSERTION"
415+ }
416+ }
417+
418+ // If license string is longer than 50 chars, it's probably license text, not an identifier
419+ if len (license ) > 50 {
366420 return "NOASSERTION"
367421 }
368422
369- return license
423+ // Default: if we can't confidently map it, use NOASSERTION
424+ return "NOASSERTION"
370425}
371426
372427func sanitizeName (name string ) string {
0 commit comments