Skip to content

Commit 78f897b

Browse files
committed
Fix URL encoding and decoding
The methods `uriEncode` and `uriDecode` did not properly handle percent-encoding. In particular, `uriEncode` didn't properly output two uppercase hex digits and `urlDecode` did not properly handle non-ASCII characters. Aditionally, if no percent-encoding was performed, these methods will now return the original string. Fixes #150 Closes #153 Fixes #154
1 parent dd5f743 commit 78f897b

File tree

2 files changed

+157
-60
lines changed

2 files changed

+157
-60
lines changed

src/main/java/com/github/packageurl/PackageURL.java

Lines changed: 132 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import java.io.Serializable;
2727
import java.net.URI;
2828
import java.net.URISyntaxException;
29+
import java.nio.ByteBuffer;
2930
import java.nio.charset.StandardCharsets;
3031
import java.util.Arrays;
3132
import java.util.Collections;
@@ -34,6 +35,7 @@
3435
import java.util.TreeMap;
3536
import java.util.function.IntPredicate;
3637
import java.util.stream.Collectors;
38+
import java.util.stream.IntStream;
3739
import org.jspecify.annotations.Nullable;
3840

3941
/**
@@ -53,9 +55,10 @@
5355
* @since 1.0.0
5456
*/
5557
public final class PackageURL implements Serializable {
56-
5758
private static final long serialVersionUID = 3243226021636427586L;
5859

60+
private static final char PERCENT_CHAR = '%';
61+
5962
/**
6063
* Constructs a new PackageURL object by parsing the specified string.
6164
*
@@ -494,35 +497,14 @@ private String canonicalize(boolean coordinatesOnly) {
494497
return purl.toString();
495498
}
496499

497-
/**
498-
* Encodes the input in conformance with RFC 3986.
499-
*
500-
* @param input the String to encode
501-
* @return an encoded String
502-
*/
503-
private String percentEncode(final String input) {
504-
if (input.isEmpty()) {
505-
return input;
506-
}
507-
508-
StringBuilder builder = new StringBuilder();
509-
for (byte b : input.getBytes(StandardCharsets.UTF_8)) {
510-
if (isUnreserved(b)) {
511-
builder.append((char) b);
512-
}
513-
else {
514-
// Substitution: A '%' followed by the hexadecimal representation of the ASCII value of the replaced character
515-
builder.append('%');
516-
builder.append(Integer.toHexString(b).toUpperCase());
517-
}
518-
}
519-
return builder.toString();
520-
}
521-
522500
private static boolean isUnreserved(int c) {
523501
return (isValidCharForKey(c) || c == '~');
524502
}
525503

504+
private static boolean shouldEncode(int c) {
505+
return !isUnreserved(c);
506+
}
507+
526508
private static boolean isAlpha(int c) {
527509
return (isLowerCase(c) || isUpperCase(c));
528510
}
@@ -578,43 +560,134 @@ private static String toLowerCase(String s) {
578560
return new String(chars);
579561
}
580562

581-
/**
582-
* Optionally decodes a String, if it's encoded. If String is not encoded,
583-
* method will return the original input value.
584-
*
585-
* @param input the value String to decode
586-
* @return a decoded String
587-
*/
588-
private String percentDecode(final String input) {
589-
final String decoded = uriDecode(input);
590-
if (!decoded.equals(input)) {
591-
return decoded;
563+
private static int indexOfPercentChar(final byte[] bytes, final int start) {
564+
return IntStream.range(start, bytes.length).filter(i -> isPercent(bytes[i])).findFirst().orElse(-1);
565+
}
566+
567+
private static int indexOfUnsafeChar(final byte[] bytes, final int start) {
568+
return IntStream.range(start, bytes.length).filter(i -> shouldEncode(bytes[i])).findFirst().orElse(-1);
569+
}
570+
571+
private static byte percentDecode(final byte[] bytes, final int start) {
572+
if (start + 2 >= bytes.length) {
573+
throw new ValidationException("Incomplete percent encoding at offset " + start + " with value '" + new String(bytes, start, bytes.length - start, StandardCharsets.UTF_8) + "'");
574+
}
575+
576+
int pos1 = start + 1;
577+
byte b1 = bytes[pos1];
578+
int c1 = Character.digit(b1, 16);
579+
580+
if (c1 == -1) {
581+
throw new ValidationException("Invalid percent encoding char 1 at offset " + pos1 + " with value '" + ((char) b1) + "'");
592582
}
593-
return input;
583+
584+
int pos2 = pos1 + 1;
585+
byte b2 = bytes[pos2];
586+
int c2 = Character.digit(bytes[pos2], 16);
587+
588+
if (c2 == -1) {
589+
throw new ValidationException("Invalid percent encoding char 2 at offset " + pos2 + " with value '" + ((char) b2) + "'");
590+
}
591+
592+
return ((byte) ((c1 << 4) + c2));
594593
}
595594

596-
/**
597-
* Decodes a percent-encoded string.
598-
*
599-
* @param source string to decode, not {@code null}
600-
* @return A decoded string
601-
* @throws NullPointerException if {@code source} is {@code null}
602-
*/
603-
public static String uriDecode(String source) {
604-
int length = source.length();
605-
StringBuilder builder = new StringBuilder();
606-
for (int i = 0; i < length; i++) {
607-
if (source.charAt(i) == '%') {
608-
String str = source.substring(i + 1, i + 3);
609-
char c = (char) Integer.parseInt(str, 16);
610-
builder.append(c);
611-
i += 2;
595+
public static String percentDecode(final String source) {
596+
if (source.isEmpty()) {
597+
return source;
598+
}
599+
600+
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
601+
602+
int off = 0;
603+
int idx = indexOfPercentChar(bytes, off);
604+
605+
if (idx == -1) {
606+
return source;
607+
}
608+
609+
ByteBuffer buffer = ByteBuffer.wrap(bytes);
610+
611+
while (true) {
612+
int len = idx - off;
613+
614+
if (len > 0) {
615+
buffer.put(bytes, off, len);
616+
off += len;
612617
}
613-
else {
614-
builder.append(source.charAt(i));
618+
619+
buffer.put(percentDecode(bytes, off));
620+
off += 3;
621+
idx = indexOfPercentChar(bytes, off);
622+
623+
if (idx == -1) {
624+
int rem = bytes.length - off;
625+
626+
if (rem > 0) {
627+
buffer.put(bytes, off, rem);
628+
}
629+
630+
break;
615631
}
616632
}
617-
return builder.toString();
633+
634+
return new String(buffer.array(), 0, buffer.position(), StandardCharsets.UTF_8);
635+
}
636+
637+
@Deprecated
638+
public String uriDecode(final String source) {
639+
return source != null ? percentDecode(source) : null;
640+
}
641+
642+
private static boolean isPercent(int c) {
643+
return (c == PERCENT_CHAR);
644+
}
645+
646+
private static byte[] percentEncode(byte b) {
647+
byte b1 = (byte) Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
648+
byte b2 = (byte) Character.toUpperCase(Character.forDigit(b & 0xF, 16));
649+
return new byte[] {(byte) PERCENT_CHAR, b1, b2};
650+
}
651+
652+
public static String percentEncode(final String source) {
653+
if (source.isEmpty()) {
654+
return source;
655+
}
656+
657+
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
658+
659+
int off = 0;
660+
int idx = indexOfUnsafeChar(bytes, off);
661+
662+
if (idx == -1) {
663+
return source;
664+
}
665+
666+
ByteBuffer buffer = ByteBuffer.allocate(bytes.length * 3);
667+
668+
while (true) {
669+
int len = idx - off;
670+
671+
if (len > 0) {
672+
buffer.put(bytes, off, len);
673+
off += len;
674+
}
675+
676+
buffer.put(percentEncode(bytes[off++]));
677+
idx = indexOfUnsafeChar(bytes, off);
678+
679+
if (idx == -1) {
680+
int rem = bytes.length - off;
681+
682+
if (rem > 0) {
683+
buffer.put(bytes, off, rem);
684+
}
685+
686+
break;
687+
}
688+
}
689+
690+
return new String(buffer.array(), 0, buffer.position(), StandardCharsets.UTF_8);
618691
}
619692

620693
/**
@@ -758,12 +831,12 @@ private void verifyTypeConstraints(String type, @Nullable String namespace, @Nul
758831
private String[] parsePath(final String path, final boolean isSubpath) {
759832
return Arrays.stream(path.split("/"))
760833
.filter(segment -> !segment.isEmpty() && !(isSubpath && (".".equals(segment) || "..".equals(segment))))
761-
.map(this::percentDecode)
834+
.map(PackageURL::percentDecode)
762835
.toArray(String[]::new);
763836
}
764837

765838
private String encodePath(final String path) {
766-
return Arrays.stream(path.split("/")).map(this::percentEncode).collect(Collectors.joining("/"));
839+
return Arrays.stream(path.split("/")).map(PackageURL::percentEncode).collect(Collectors.joining("/"));
767840
}
768841

769842
/**
@@ -894,5 +967,4 @@ private StandardTypes() {
894967

895968
}
896969
}
897-
898970
}

src/test/java/com/github/packageurl/PackageURLTest.java

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,31 @@ static void resetLocale() {
7171
Locale.setDefault(defaultLocale);
7272
}
7373

74+
@Test
75+
void validPercentEncoding() throws MalformedPackageURLException {
76+
PackageURL purl = new PackageURL("maven", "com.google.summit", "summit-ast", "2.2.0\n", null, null);
77+
assertEquals("pkg:maven/com.google.summit/summit-ast@2.2.0%0A", purl.toString());
78+
PackageURL purl2 = new PackageURL("pkg:nuget/%D0%9Cicros%D0%BEft.%D0%95ntit%D1%83Fram%D0%B5work%D0%A1%D0%BEr%D0%B5");
79+
assertEquals("Мicrosоft.ЕntitуFramеworkСоrе", purl2.getName());
80+
assertEquals("pkg:nuget/%D0%9Cicros%D0%BEft.%D0%95ntit%D1%83Fram%D0%B5work%D0%A1%D0%BEr%D0%B5", purl2.toString());
81+
}
82+
83+
@SuppressWarnings("deprecation")
84+
@Test
85+
void invalidPercentEncoding() throws MalformedPackageURLException {
86+
assertThrows(MalformedPackageURLException.class, () -> new PackageURL("pkg:maven/com.google.summit/summit-ast@2.2.0%"));
87+
assertThrows(MalformedPackageURLException.class, () -> new PackageURL("pkg:maven/com.google.summit/summit-ast@2.2.0%0"));
88+
PackageURL purl = new PackageURL("pkg:maven/com.google.summit/summit-ast@2.2.0");
89+
Throwable t1 = assertThrows(ValidationException.class, () -> purl.uriDecode("%"));
90+
assertEquals("Incomplete percent encoding at offset 0 with value '%'", t1.getMessage());
91+
Throwable t2 = assertThrows(ValidationException.class, () -> purl.uriDecode("a%0"));
92+
assertEquals("Incomplete percent encoding at offset 1 with value '%0'", t2.getMessage());
93+
Throwable t3 = assertThrows(ValidationException.class, () -> purl.uriDecode("aaaa%%0A"));
94+
assertEquals("Invalid percent encoding char 1 at offset 5 with value '%'", t3.getMessage());
95+
Throwable t4 = assertThrows(ValidationException.class, () -> purl.uriDecode("%0G"));
96+
assertEquals("Invalid percent encoding char 2 at offset 2 with value 'G'", t4.getMessage());
97+
}
98+
7499
@Test
75100
void constructorParsing() throws Exception {
76101
for (int i = 0; i < json.length(); i++) {

0 commit comments

Comments
 (0)