Skip to content

Commit 6e30cdc

Browse files
committed
Fix URL encoding and decoding
The methods `uriEncode` and `uriDecode` did not properly handle percent-encoding. In particular, `uriEncode` didn't properly output two uppercase hex digits and `urlDecode` did not properly handle non-ASCII characters. Aditionally, if no percent-encoding was performed, these methods will now return the original string. Fixes package-url#150 Closes package-url#153 Fixes package-url#154
1 parent 62ac909 commit 6e30cdc

File tree

2 files changed

+112
-63
lines changed

2 files changed

+112
-63
lines changed

src/main/java/com/github/packageurl/PackageURL.java

Lines changed: 88 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,11 @@
2121
*/
2222
package com.github.packageurl;
2323

24+
import java.io.ByteArrayOutputStream;
2425
import java.io.Serializable;
2526
import java.net.URI;
2627
import java.net.URISyntaxException;
27-
import java.nio.charset.Charset;
28+
import java.nio.ByteBuffer;
2829
import java.nio.charset.StandardCharsets;
2930
import java.util.Arrays;
3031
import java.util.Collections;
@@ -34,6 +35,7 @@
3435
import java.util.function.IntPredicate;
3536
import java.util.regex.Pattern;
3637
import java.util.stream.Collectors;
38+
import java.util.stream.IntStream;
3739

3840
/**
3941
* <p>Package-URL (aka purl) is a "mostly universal" URL to describe a package. A purl is a URL composed of seven components:</p>
@@ -459,39 +461,14 @@ private String canonicalize(boolean coordinatesOnly) {
459461
return purl.toString();
460462
}
461463

462-
/**
463-
* Encodes the input in conformance with RFC 3986.
464-
*
465-
* @param input the String to encode
466-
* @return an encoded String
467-
*/
468-
private String percentEncode(final String input) {
469-
return uriEncode(input, StandardCharsets.UTF_8);
470-
}
471-
472-
private static String uriEncode(String source, Charset charset) {
473-
if (source == null || source.isEmpty()) {
474-
return source;
475-
}
476-
477-
StringBuilder builder = new StringBuilder();
478-
for (byte b : source.getBytes(charset)) {
479-
if (isUnreserved(b)) {
480-
builder.append((char) b);
481-
}
482-
else {
483-
// Substitution: A '%' followed by the hexadecimal representation of the ASCII value of the replaced character
484-
builder.append('%');
485-
builder.append(Integer.toHexString(b).toUpperCase());
486-
}
487-
}
488-
return builder.toString();
489-
}
490-
491464
private static boolean isUnreserved(int c) {
492465
return (isValidCharForKey(c) || c == '~');
493466
}
494467

468+
private static boolean shouldEncode(int c) {
469+
return !isUnreserved(c);
470+
}
471+
495472
private static boolean isAlpha(int c) {
496473
return (isLowerCase(c) || isUpperCase(c));
497474
}
@@ -547,42 +524,90 @@ private static String toLowerCase(String s) {
547524
return new String(chars);
548525
}
549526

550-
/**
551-
* Optionally decodes a String, if it's encoded. If String is not encoded,
552-
* method will return the original input value.
553-
*
554-
* @param input the value String to decode
555-
* @return a decoded String
556-
*/
557-
private String percentDecode(final String input) {
558-
if (input == null) {
559-
return null;
560-
}
561-
final String decoded = uriDecode(input);
562-
if (!decoded.equals(input)) {
563-
return decoded;
527+
private static String percentDecode(final String source) {
528+
if (source == null || source.isEmpty()) {
529+
return source;
564530
}
565-
return input;
566-
}
567531

568-
public static String uriDecode(String source) {
569-
if (source == null) {
532+
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
533+
int percentCharCount = getPercentCharCount(bytes);
534+
535+
if (percentCharCount == 0) {
570536
return source;
571537
}
572-
int length = source.length();
573-
StringBuilder builder = new StringBuilder();
538+
539+
int length = bytes.length;
540+
int capacity = (length + percentCharCount) - (percentCharCount * 3);
541+
542+
if (capacity <= 0) {
543+
throw new ValidationException("Invalid encoding in '" + source + "'");
544+
}
545+
546+
ByteBuffer buffer = ByteBuffer.allocate(capacity);
547+
574548
for (int i = 0; i < length; i++) {
575-
if (source.charAt(i) == '%') {
576-
String str = source.substring(i + 1, i + 3);
577-
char c = (char) Integer.parseInt(str, 16);
578-
builder.append(c);
579-
i += 2;
549+
int b = bytes[i];
550+
551+
if (b == '%') {
552+
if (i + 2 < length) {
553+
throw new ValidationException("Invalid encoding in '" + source + "' at position " + i);
554+
}
555+
556+
int b1 = Character.digit(bytes[++i], 16);
557+
int b2 = Character.digit(bytes[++i], 16);
558+
buffer.put((byte) ((b1 << 4) + b2));
559+
} else {
560+
buffer.put((byte) b);
580561
}
581-
else {
582-
builder.append(source.charAt(i));
562+
}
563+
564+
return new String(buffer.array(),StandardCharsets.UTF_8);
565+
}
566+
567+
@Deprecated
568+
public String uriDecode(final String source) {
569+
return percentDecode(source);
570+
}
571+
572+
private static int getUnsafeCharCount(final byte[] bytes) {
573+
return (int) IntStream.range(0, bytes.length).map(i -> bytes[i]).filter(PackageURL::shouldEncode).count();
574+
}
575+
576+
private static boolean isPercent(int c) {
577+
return (c == '%');
578+
}
579+
580+
private static int getPercentCharCount(final byte[] bytes) {
581+
return (int) IntStream.range(0, bytes.length).map(i -> bytes[i]).filter(PackageURL::isPercent).count();
582+
}
583+
584+
private static String percentEncode(final String source) {
585+
if (source == null || source.isEmpty()) {
586+
return source;
587+
}
588+
589+
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
590+
int unsafeCharCount = getUnsafeCharCount(bytes);
591+
592+
if (unsafeCharCount == 0) {
593+
return source;
594+
}
595+
596+
int length = bytes.length;
597+
int capacity = (length - unsafeCharCount) + (3 * unsafeCharCount);
598+
ByteBuffer bb = ByteBuffer.allocate(capacity);
599+
600+
for (byte b : bytes) {
601+
if (shouldEncode(b)) {
602+
bb.put((byte) '%');
603+
bb.put((byte) Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16)));
604+
bb.put((byte) Character.toUpperCase(Character.forDigit(b & 0xF, 16)));
605+
} else {
606+
bb.put(b);
583607
}
584608
}
585-
return builder.toString();
609+
610+
return new String(bb.array(), StandardCharsets.UTF_8);
586611
}
587612

588613
/**
@@ -652,16 +677,16 @@ private void parse(final String purl) throws MalformedPackageURLException {
652677
// version is optional - check for existence
653678
index = remainder.lastIndexOf('@');
654679
if (index >= start) {
655-
this.version = validateVersion(percentDecode(remainder.substring(index + 1)));
680+
this.version = validateVersion(uriDecode(remainder.substring(index + 1)));
656681
remainder = remainder.substring(0, index);
657682
}
658683

659684
// The 'remainder' should now consist of an optional namespace and the name
660685
index = remainder.lastIndexOf('/');
661686
if (index <= start) {
662-
this.name = validateName(percentDecode(remainder.substring(start)));
687+
this.name = validateName(uriDecode(remainder.substring(start)));
663688
} else {
664-
this.name = validateName(percentDecode(remainder.substring(index + 1)));
689+
this.name = validateName(uriDecode(remainder.substring(index + 1)));
665690
remainder = remainder.substring(0, index);
666691
this.namespace = validateNamespace(parsePath(remainder.substring(start), false));
667692
}
@@ -712,7 +737,7 @@ private Map<String, String> parseQualifiers(final String encodedString) throws M
712737
final String[] entry = value.split("=", 2);
713738
if (entry.length == 2 && !entry[1].isEmpty()) {
714739
String key = toLowerCase(entry[0]);
715-
if (map.put(key, percentDecode(entry[1])) != null) {
740+
if (map.put(key, uriDecode(entry[1])) != null) {
716741
throw new ValidationException("Duplicate package qualifier encountered. More then one value was specified for " + key);
717742
}
718743
}
@@ -731,7 +756,7 @@ private String[] parsePath(final String value, final boolean isSubpath) {
731756
}
732757
return PATH_SPLITTER.splitAsStream(value)
733758
.filter(segment -> !segment.isEmpty() && !(isSubpath && (".".equals(segment) || "..".equals(segment))))
734-
.map(segment -> percentDecode(segment))
759+
.map(segment -> uriDecode(segment))
735760
.toArray(String[]::new);
736761
}
737762

src/test/java/com/github/packageurl/PackageURLTest.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,30 @@ public static void resetLocale() {
7070
Locale.setDefault(defaultLocale);
7171
}
7272

73+
@Test
74+
public void testEncoding1() throws MalformedPackageURLException {
75+
PackageURL purl = new PackageURL("maven", "com.google.summit", "summit-ast", "2.2.0\n", null, null);
76+
Assert.assertEquals("pkg:maven/com.google.summit/summit-ast@2.2.0%0A", purl.toString());
77+
}
78+
79+
@Test
80+
public void testEncoding2() throws MalformedPackageURLException {
81+
PackageURL purl = new PackageURL("pkg:nuget/%D0%9Cicros%D0%BEft.%D0%95ntit%D1%83Fram%D0%B5work%D0%A1%D0%BEr%D0%B5");
82+
Assert.assertEquals("Мicrosоft.ЕntitуFramеworkСоrе", purl.getName());
83+
Assert.assertEquals("pkg:nuget/%D0%9Cicros%D0%BEft.%D0%95ntit%D1%83Fram%D0%B5work%D0%A1%D0%BEr%D0%B5", purl.toString());
84+
}
85+
86+
@SuppressWarnings("deprecation")
87+
@Test
88+
public void testInvalidPercentEncoding() throws MalformedPackageURLException {
89+
Assert.assertThrows(MalformedPackageURLException.class, () -> new PackageURL("pkg:maven/com.google.summit/summit-ast@2.2.0%"));
90+
Assert.assertThrows(MalformedPackageURLException.class, () -> new PackageURL("pkg:maven/com.google.summit/summit-ast@2.2.0%0"));
91+
PackageURL packageURL = new PackageURL("pkg:maven/com.google.summit/summit-ast@2.2.0");
92+
Assert.assertThrows(ValidationException.class, () -> packageURL.uriDecode("%"));
93+
Assert.assertThrows(ValidationException.class, () -> packageURL.uriDecode("%0"));
94+
Assert.assertThrows(ValidationException.class, () -> packageURL.uriDecode("abc%0a0%"));
95+
}
96+
7397
@Test
7498
public void testConstructorParsing() throws Exception {
7599
exception = ExpectedException.none();

0 commit comments

Comments
 (0)