Skip to content

Commit 69232d1

Browse files
committed
Fix URL encoding and decoding
The methods `uriEncode` and `uriDecode` did not properly handle percent-encoding. In particular, `uriEncode` didn't properly output two uppercase hex digits and `urlDecode` did not properly handle non-ASCII characters. Aditionally, if no percent-encoding was performed, these methods will now return the original string. Fixes package-url#150 Closes package-url#153 Fixes package-url#154
1 parent 62ac909 commit 69232d1

File tree

2 files changed

+119
-63
lines changed

2 files changed

+119
-63
lines changed

src/main/java/com/github/packageurl/PackageURL.java

Lines changed: 100 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,11 @@
2121
*/
2222
package com.github.packageurl;
2323

24+
import java.io.ByteArrayOutputStream;
2425
import java.io.Serializable;
2526
import java.net.URI;
2627
import java.net.URISyntaxException;
27-
import java.nio.charset.Charset;
28+
import java.nio.ByteBuffer;
2829
import java.nio.charset.StandardCharsets;
2930
import java.util.Arrays;
3031
import java.util.Collections;
@@ -34,6 +35,7 @@
3435
import java.util.function.IntPredicate;
3536
import java.util.regex.Pattern;
3637
import java.util.stream.Collectors;
38+
import java.util.stream.IntStream;
3739

3840
/**
3941
* <p>Package-URL (aka purl) is a "mostly universal" URL to describe a package. A purl is a URL composed of seven components:</p>
@@ -55,6 +57,7 @@ public final class PackageURL implements Serializable {
5557

5658
private static final long serialVersionUID = 3243226021636427586L;
5759
private static final Pattern PATH_SPLITTER = Pattern.compile("/");
60+
private static final char PERCENT_CHAR = '%';
5861

5962
/**
6063
* Constructs a new PackageURL object by parsing the specified string.
@@ -459,39 +462,14 @@ private String canonicalize(boolean coordinatesOnly) {
459462
return purl.toString();
460463
}
461464

462-
/**
463-
* Encodes the input in conformance with RFC 3986.
464-
*
465-
* @param input the String to encode
466-
* @return an encoded String
467-
*/
468-
private String percentEncode(final String input) {
469-
return uriEncode(input, StandardCharsets.UTF_8);
470-
}
471-
472-
private static String uriEncode(String source, Charset charset) {
473-
if (source == null || source.isEmpty()) {
474-
return source;
475-
}
476-
477-
StringBuilder builder = new StringBuilder();
478-
for (byte b : source.getBytes(charset)) {
479-
if (isUnreserved(b)) {
480-
builder.append((char) b);
481-
}
482-
else {
483-
// Substitution: A '%' followed by the hexadecimal representation of the ASCII value of the replaced character
484-
builder.append('%');
485-
builder.append(Integer.toHexString(b).toUpperCase());
486-
}
487-
}
488-
return builder.toString();
489-
}
490-
491465
private static boolean isUnreserved(int c) {
492466
return (isValidCharForKey(c) || c == '~');
493467
}
494468

469+
private static boolean shouldEncode(int c) {
470+
return !isUnreserved(c);
471+
}
472+
495473
private static boolean isAlpha(int c) {
496474
return (isLowerCase(c) || isUpperCase(c));
497475
}
@@ -547,42 +525,101 @@ private static String toLowerCase(String s) {
547525
return new String(chars);
548526
}
549527

550-
/**
551-
* Optionally decodes a String, if it's encoded. If String is not encoded,
552-
* method will return the original input value.
553-
*
554-
* @param input the value String to decode
555-
* @return a decoded String
556-
*/
557-
private String percentDecode(final String input) {
558-
if (input == null) {
559-
return null;
560-
}
561-
final String decoded = uriDecode(input);
562-
if (!decoded.equals(input)) {
563-
return decoded;
528+
private static String percentDecode(final String source) {
529+
if (source == null || source.isEmpty()) {
530+
return source;
564531
}
565-
return input;
566-
}
567532

568-
public static String uriDecode(String source) {
569-
if (source == null) {
533+
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
534+
int percentCharCount = getPercentCharCount(bytes);
535+
536+
if (percentCharCount == 0) {
570537
return source;
571538
}
572-
int length = source.length();
573-
StringBuilder builder = new StringBuilder();
539+
540+
int length = bytes.length;
541+
int capacity = (length + percentCharCount) - (percentCharCount * 3);
542+
543+
if (capacity <= 0) {
544+
throw new ValidationException("Invalid encoding in '" + source + "'");
545+
}
546+
547+
ByteBuffer buffer = ByteBuffer.allocate(capacity);
548+
574549
for (int i = 0; i < length; i++) {
575-
if (source.charAt(i) == '%') {
576-
String str = source.substring(i + 1, i + 3);
577-
char c = (char) Integer.parseInt(str, 16);
578-
builder.append(c);
579-
i += 2;
550+
int b;
551+
552+
if (bytes[i] == PERCENT_CHAR) {
553+
int b1 = Character.digit(bytes[++i], 16);
554+
int b2 = Character.digit(bytes[++i], 16);
555+
b = (byte) ((b1 << 4) + b2);
556+
} else {
557+
b = bytes[i];
558+
}
559+
560+
if (buffer.position() + 1 > capacity) {
561+
throw new ValidationException("Invalid encoding in '" + source + "'");
580562
}
581-
else {
582-
builder.append(source.charAt(i));
563+
564+
buffer.put((byte) b);
565+
}
566+
567+
return new String(buffer.array(),StandardCharsets.UTF_8);
568+
}
569+
570+
@Deprecated
571+
public String uriDecode(final String source) {
572+
return percentDecode(source);
573+
}
574+
575+
private static int getUnsafeCharCount(final byte[] bytes) {
576+
return (int) IntStream.range(0, bytes.length).map(i -> bytes[i]).filter(PackageURL::shouldEncode).count();
577+
}
578+
579+
private static boolean isPercent(int c) {
580+
return (c == PERCENT_CHAR);
581+
}
582+
583+
private static int getPercentCharCount(final byte[] bytes) {
584+
return (int) IntStream.range(0, bytes.length).map(i -> bytes[i]).filter(PackageURL::isPercent).count();
585+
}
586+
587+
private static String percentEncode(final String source) {
588+
if (source == null || source.isEmpty()) {
589+
return source;
590+
}
591+
592+
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
593+
int unsafeCharCount = getUnsafeCharCount(bytes);
594+
595+
if (unsafeCharCount == 0) {
596+
return source;
597+
}
598+
599+
int length = bytes.length;
600+
int capacity = (length - unsafeCharCount) + (3 * unsafeCharCount);
601+
ByteBuffer buffer = ByteBuffer.allocate(capacity);
602+
603+
for (byte b : bytes) {
604+
if (shouldEncode(b)) {
605+
if (buffer.position() + 3 > capacity) {
606+
throw new ValidationException("Invalid encoding in '" + source + "'");
607+
}
608+
609+
byte b1 = (byte) Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
610+
byte b2 = (byte) Character.toUpperCase(Character.forDigit(b & 0xF, 16));
611+
byte[] encoded = {(byte) PERCENT_CHAR, b1, b2 };
612+
buffer.put(encoded, 0, encoded.length);
613+
} else {
614+
if (buffer.position() + 1 > capacity) {
615+
throw new ValidationException("Invalid encoding in '" + source + "'");
616+
}
617+
618+
buffer.put(b);
583619
}
584620
}
585-
return builder.toString();
621+
622+
return new String(buffer.array(), StandardCharsets.UTF_8);
586623
}
587624

588625
/**
@@ -652,16 +689,16 @@ private void parse(final String purl) throws MalformedPackageURLException {
652689
// version is optional - check for existence
653690
index = remainder.lastIndexOf('@');
654691
if (index >= start) {
655-
this.version = validateVersion(percentDecode(remainder.substring(index + 1)));
692+
this.version = validateVersion(uriDecode(remainder.substring(index + 1)));
656693
remainder = remainder.substring(0, index);
657694
}
658695

659696
// The 'remainder' should now consist of an optional namespace and the name
660697
index = remainder.lastIndexOf('/');
661698
if (index <= start) {
662-
this.name = validateName(percentDecode(remainder.substring(start)));
699+
this.name = validateName(uriDecode(remainder.substring(start)));
663700
} else {
664-
this.name = validateName(percentDecode(remainder.substring(index + 1)));
701+
this.name = validateName(uriDecode(remainder.substring(index + 1)));
665702
remainder = remainder.substring(0, index);
666703
this.namespace = validateNamespace(parsePath(remainder.substring(start), false));
667704
}
@@ -712,7 +749,7 @@ private Map<String, String> parseQualifiers(final String encodedString) throws M
712749
final String[] entry = value.split("=", 2);
713750
if (entry.length == 2 && !entry[1].isEmpty()) {
714751
String key = toLowerCase(entry[0]);
715-
if (map.put(key, percentDecode(entry[1])) != null) {
752+
if (map.put(key, uriDecode(entry[1])) != null) {
716753
throw new ValidationException("Duplicate package qualifier encountered. More then one value was specified for " + key);
717754
}
718755
}
@@ -731,7 +768,7 @@ private String[] parsePath(final String value, final boolean isSubpath) {
731768
}
732769
return PATH_SPLITTER.splitAsStream(value)
733770
.filter(segment -> !segment.isEmpty() && !(isSubpath && (".".equals(segment) || "..".equals(segment))))
734-
.map(segment -> percentDecode(segment))
771+
.map(segment -> uriDecode(segment))
735772
.toArray(String[]::new);
736773
}
737774

src/test/java/com/github/packageurl/PackageURLTest.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,25 @@ public static void resetLocale() {
7070
Locale.setDefault(defaultLocale);
7171
}
7272

73+
@Test
74+
public void testValidPercentEncoding() throws MalformedPackageURLException {
75+
PackageURL purl = new PackageURL("maven", "com.google.summit", "summit-ast", "2.2.0\n", null, null);
76+
Assert.assertEquals("pkg:maven/com.google.summit/summit-ast@2.2.0%0A", purl.toString());
77+
PackageURL purl2 = new PackageURL("pkg:nuget/%D0%9Cicros%D0%BEft.%D0%95ntit%D1%83Fram%D0%B5work%D0%A1%D0%BEr%D0%B5");
78+
Assert.assertEquals("Мicrosоft.ЕntitуFramеworkСоrе", purl2.getName());
79+
Assert.assertEquals("pkg:nuget/%D0%9Cicros%D0%BEft.%D0%95ntit%D1%83Fram%D0%B5work%D0%A1%D0%BEr%D0%B5", purl2.toString());
80+
}
81+
82+
@SuppressWarnings("deprecation")
83+
@Test
84+
public void testInvalidPercentEncoding() throws MalformedPackageURLException {
85+
Assert.assertThrows(MalformedPackageURLException.class, () -> new PackageURL("pkg:maven/com.google.summit/summit-ast@2.2.0%"));
86+
Assert.assertThrows(MalformedPackageURLException.class, () -> new PackageURL("pkg:maven/com.google.summit/summit-ast@2.2.0%0"));
87+
PackageURL packageURL = new PackageURL("pkg:maven/com.google.summit/summit-ast@2.2.0");
88+
Assert.assertThrows(ValidationException.class, () -> packageURL.uriDecode("%"));
89+
Assert.assertThrows(ValidationException.class, () -> packageURL.uriDecode("%0"));
90+
}
91+
7392
@Test
7493
public void testConstructorParsing() throws Exception {
7594
exception = ExpectedException.none();

0 commit comments

Comments
 (0)