2525import java .io .Serializable ;
2626import java .net .URI ;
2727import java .net .URISyntaxException ;
28+ import java .nio .ByteBuffer ;
2829import java .nio .charset .StandardCharsets ;
2930import java .util .Arrays ;
3031import java .util .Collections ;
3435import java .util .function .IntPredicate ;
3536import java .util .regex .Pattern ;
3637import java .util .stream .Collectors ;
38+ import java .util .stream .IntStream ;
3739
3840/**
3941 * <p>Package-URL (aka purl) is a "mostly universal" URL to describe a package. A purl is a URL composed of seven components:</p>
@@ -436,18 +438,18 @@ private String canonicalize(boolean coordinatesOnly) {
436438 purl .append ("/" );
437439 }
438440 if (name != null ) {
439- purl .append (uriEncode (name ));
441+ purl .append (percentEncode (name ));
440442 }
441443 if (version != null ) {
442- purl .append ("@" ).append (uriEncode (version ));
444+ purl .append ("@" ).append (percentEncode (version ));
443445 }
444446 if (! coordinatesOnly ) {
445447 if (qualifiers != null && !qualifiers .isEmpty ()) {
446448 purl .append ("?" );
447449 qualifiers .entrySet ().stream ().forEachOrdered (entry -> {
448450 purl .append (toLowerCase (entry .getKey ()));
449451 purl .append ("=" );
450- purl .append (uriEncode (entry .getValue ()));
452+ purl .append (percentEncode (entry .getValue ()));
451453 purl .append ("&" );
452454 });
453455 purl .setLength (purl .length () - 1 );
@@ -459,55 +461,14 @@ private String canonicalize(boolean coordinatesOnly) {
459461 return purl .toString ();
460462 }
461463
462- private static String uriEncode (final String source ) {
463- if (source == null || source .isEmpty ()) {
464- return source ;
465- }
466-
467- byte [] bytes = source .getBytes (StandardCharsets .UTF_8 );
468- int length = bytes .length ;
469- int pos = indexOfFirstUnsafeChar (bytes );
470-
471- if (pos == -1 ) {
472- return source ;
473- }
474-
475- StringBuilder sb = new StringBuilder (length * 3 );
476- sb .append (source , 0 , pos );
477-
478- for (int i = pos ; i < bytes .length ; i ++) {
479- byte b = bytes [i ];
480-
481- if (isUnreserved (b )) {
482- sb .append ((char ) b );
483- } else {
484- sb .append ('%' );
485- sb .append (Character .toUpperCase (Character .forDigit ((b >> 4 ) & 0xF , 16 )));
486- sb .append (Character .toUpperCase (Character .forDigit (b & 0xF , 16 )));
487- }
488- }
489-
490- return sb .toString ();
491- }
492-
493- private static int indexOfFirstUnsafeChar (final byte [] bytes ) {
494- final int length = bytes .length ;
495- int pos = -1 ;
496-
497- for (int i = 0 ; i < length ; i ++) {
498- if (!isUnreserved (bytes [i ])) {
499- pos = i ;
500- break ;
501- }
502- }
503-
504- return pos ;
505- }
506-
507464 private static boolean isUnreserved (int c ) {
508465 return (isValidCharForKey (c ) || c == '~' );
509466 }
510467
468+ private static boolean shouldEncode (int c ) {
469+ return !isUnreserved (c );
470+ }
471+
511472 private static boolean isAlpha (int c ) {
512473 return (isLowerCase (c ) || isUpperCase (c ));
513474 }
@@ -563,39 +524,81 @@ private static String toLowerCase(String s) {
563524 return new String (chars );
564525 }
565526
566- public static String uriDecode (final String source ) {
527+ private static String percentDecode (final String source ) {
567528 if (source == null || source .isEmpty ()) {
568529 return source ;
569530 }
570531
571- int percent = source .indexOf ('%' );
532+ byte [] bytes = source .getBytes (StandardCharsets .UTF_8 );
533+ int percentCharCount = getPercentCharCount (bytes );
572534
573- if (percent == - 1 ) {
535+ if (percentCharCount == 0 ) {
574536 return source ;
575537 }
576538
577- byte [] bytes = source .getBytes (StandardCharsets .UTF_8 );
578539 int length = bytes .length ;
579- ByteArrayOutputStream buffer = new ByteArrayOutputStream (length );
580- buffer . write ( bytes , 0 , percent );
540+ int capacity = (length + percentCharCount ) - ( percentCharCount * 3 );
541+ ByteBuffer buffer = ByteBuffer . allocate ( capacity );
581542
582- for (int i = percent ; i < length ; i ++) {
543+ for (int i = 0 ; i < length ; i ++) {
583544 int b = bytes [i ];
584545
585546 if (b == '%' ) {
586- if (i + 2 >= length ) {
587- return null ;
588- }
547+ int b1 = Character .digit (bytes [++i ], 16 );
548+ int b2 = Character .digit (bytes [++i ], 16 );
549+ buffer .put ((byte ) ((b1 << 4 ) + b2 ));
550+ } else {
551+ buffer .put ((byte ) b );
552+ }
553+ }
554+
555+ return new String (buffer .array (),StandardCharsets .UTF_8 );
556+ }
557+
558+ @ Deprecated
559+ public String uriDecode (final String source ) {
560+ return percentDecode (source );
561+ }
562+
563+ private static int getUnsafeCharCount (final byte [] bytes ) {
564+ return (int ) IntStream .range (0 , bytes .length ).map (i -> bytes [i ]).filter (PackageURL ::shouldEncode ).count ();
565+ }
589566
590- int b1 = Character .digit (bytes [++i ], 16 );
591- int b2 = Character .digit (bytes [++i ], 16 );
592- buffer .write ((char ) ((b1 << 4 ) + b2 ));
567+ private static boolean isPercent (int c ) {
568+ return (c == '%' );
569+ }
570+
571+ private static int getPercentCharCount (final byte [] bytes ) {
572+ return (int ) IntStream .range (0 , bytes .length ).map (i -> bytes [i ]).filter (PackageURL ::isPercent ).count ();
573+ }
574+
575+ private static String percentEncode (final String source ) {
576+ if (source == null || source .isEmpty ()) {
577+ return source ;
578+ }
579+
580+ byte [] bytes = source .getBytes (StandardCharsets .UTF_8 );
581+ int unsafeCharCount = getUnsafeCharCount (bytes );
582+
583+ if (unsafeCharCount == 0 ) {
584+ return source ;
585+ }
586+
587+ int length = bytes .length ;
588+ int capacity = (length - unsafeCharCount ) + (3 * unsafeCharCount );
589+ ByteBuffer bb = ByteBuffer .allocate (capacity );
590+
591+ for (byte b : bytes ) {
592+ if (shouldEncode (b )) {
593+ bb .put ((byte ) '%' );
594+ bb .put ((byte ) Character .toUpperCase (Character .forDigit ((b >> 4 ) & 0xF , 16 )));
595+ bb .put ((byte ) Character .toUpperCase (Character .forDigit (b & 0xF , 16 )));
593596 } else {
594- buffer . write (b );
597+ bb . put (b );
595598 }
596599 }
597600
598- return new String (buffer . toByteArray (), StandardCharsets .UTF_8 );
601+ return new String (bb . array (), StandardCharsets .UTF_8 );
599602 }
600603
601604 /**
@@ -749,7 +752,7 @@ private String[] parsePath(final String value, final boolean isSubpath) {
749752 }
750753
751754 private String encodePath (final String path ) {
752- return Arrays .stream (path .split ("/" )).map (segment -> uriEncode (segment )).collect (Collectors .joining ("/" ));
755+ return Arrays .stream (path .split ("/" )).map (segment -> percentEncode (segment )).collect (Collectors .joining ("/" ));
753756 }
754757
755758 /**
0 commit comments