Skip to content

Commit 11dcee0

Browse files
Arturo Bernalarturobernalg
Arturo Bernal
authored andcommitted
Bug Fix: Align URIBuilder encoding with RFC 3986
Previously, URIBuilder relied on partial or inconsistent character sets for various components, causing valid sub-delims or other characters to be unnecessarily percent-encoded or left unencoded in the fragment, query, userinfo, reg-name, and path segments. This patch introduces dedicated BitSets for each URI component (userinfo, host/reg-name, path segments, query, and fragment) and updates URIBuilder to use them. As a result, characters like ':', '@', '/', and '?' remain unencoded in the fragment and query where allowed by RFC 3986, while certain sub-delimiters in the path and host are now percent-encoded for strictness. This ensures consistent, RFC 3986–compliant encoding across all URI components.
1 parent 3426b90 commit 11dcee0

File tree

3 files changed

+77
-7
lines changed

3 files changed

+77
-7
lines changed

httpcore5/src/main/java/org/apache/hc/core5/net/PercentCodec.java

+41
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,16 @@ public class PercentCodec {
8484
URIC.or(UNRESERVED);
8585
}
8686

87+
static final BitSet FRAGMENT_SAFE = new BitSet(256);
88+
static {
89+
FRAGMENT_SAFE.or(UNRESERVED);
90+
FRAGMENT_SAFE.or(SUB_DELIMS);
91+
FRAGMENT_SAFE.set(':');
92+
FRAGMENT_SAFE.set('@');
93+
FRAGMENT_SAFE.set('/');
94+
FRAGMENT_SAFE.set('?');
95+
}
96+
8797
static final BitSet RFC5987_UNRESERVED = new BitSet(256);
8898

8999
static {
@@ -113,6 +123,37 @@ public class PercentCodec {
113123
RFC5987_UNRESERVED.set('~');
114124
}
115125

126+
static final BitSet PCHAR = new BitSet(256);
127+
static final BitSet USERINFO = new BitSet(256);
128+
static final BitSet REG_NAME = new BitSet(256);
129+
static final BitSet PATH_SEGMENT = new BitSet(256);
130+
static final BitSet QUERY = new BitSet(256);
131+
static final BitSet FRAGMENT = new BitSet(256);
132+
133+
static {
134+
PCHAR.or(UNRESERVED);
135+
PCHAR.or(SUB_DELIMS);
136+
PCHAR.set(':');
137+
PCHAR.set('@');
138+
USERINFO.or(UNRESERVED);
139+
USERINFO.or(SUB_DELIMS);
140+
USERINFO.set(':');
141+
REG_NAME.or(UNRESERVED);
142+
REG_NAME.or(SUB_DELIMS);
143+
REG_NAME.clear('!');
144+
PATH_SEGMENT.or(PCHAR);
145+
QUERY.or(PCHAR);
146+
QUERY.set('/');
147+
QUERY.set('?');
148+
FRAGMENT.or(PCHAR);
149+
FRAGMENT.set('/');
150+
FRAGMENT.set('?');
151+
// Some sub-delims remain encoded (RFC 3986 allows them unencoded, but we choose to be strict).
152+
PATH_SEGMENT.clear('(');
153+
PATH_SEGMENT.clear(')');
154+
PATH_SEGMENT.clear('&');
155+
}
156+
116157
private static final int RADIX = 16;
117158

118159
static void encode(final StringBuilder buf, final CharSequence content, final Charset charset,

httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java

+7-7
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ static void formatPath(final StringBuilder buf, final Iterable<String> segments,
306306
if (i > 0 || !rootless) {
307307
buf.append(PATH_SEPARATOR);
308308
}
309-
PercentCodec.encode(buf, segment, charset);
309+
PercentCodec.encode(buf, segment, charset, PercentCodec.PATH_SEGMENT, false);
310310
i++;
311311
}
312312
}
@@ -356,18 +356,18 @@ private String buildString() {
356356
} else if (this.userInfo != null) {
357357
final int idx = this.userInfo.indexOf(':');
358358
if (idx != -1) {
359-
PercentCodec.encode(sb, this.userInfo.substring(0, idx), this.charset);
359+
PercentCodec.encode(sb, this.userInfo.substring(0, idx), this.charset, PercentCodec.USERINFO, false);
360360
sb.append(':');
361-
PercentCodec.encode(sb, this.userInfo.substring(idx + 1), this.charset);
361+
PercentCodec.encode(sb, this.userInfo.substring(idx + 1), this.charset, PercentCodec.USERINFO, false);
362362
} else {
363-
PercentCodec.encode(sb, this.userInfo, this.charset);
363+
PercentCodec.encode(sb, this.userInfo, this.charset, PercentCodec.USERINFO, false);
364364
}
365365
sb.append("@");
366366
}
367367
if (InetAddressUtils.isIPv6(this.host)) {
368368
sb.append("[").append(this.host).append("]");
369369
} else {
370-
PercentCodec.encode(sb, this.host, this.charset);
370+
PercentCodec.encode(sb, this.host, this.charset, PercentCodec.REG_NAME, false);
371371
}
372372
if (this.port >= 0) {
373373
sb.append(":").append(this.port);
@@ -391,14 +391,14 @@ private String buildString() {
391391
formatQuery(sb, this.queryParams, this.charset, false);
392392
} else if (this.query != null) {
393393
sb.append("?");
394-
PercentCodec.encode(sb, this.query, this.charset, PercentCodec.URIC, false);
394+
PercentCodec.encode(sb, this.query, this.charset, PercentCodec.QUERY, false);
395395
}
396396
}
397397
if (this.encodedFragment != null) {
398398
sb.append("#").append(this.encodedFragment);
399399
} else if (this.fragment != null) {
400400
sb.append("#");
401-
PercentCodec.encode(sb, this.fragment, this.charset, PercentCodec.URIC, false);
401+
PercentCodec.encode(sb, this.fragment, this.charset, PercentCodec.FRAGMENT, false);
402402
}
403403
return sb.toString();
404404
}

httpcore5/src/test/java/org/apache/hc/core5/net/TestURIBuilder.java

+29
Original file line numberDiff line numberDiff line change
@@ -997,4 +997,33 @@ void testSetPlusAsBlank() throws Exception {
997997
params = uriBuilder.getQueryParams();
998998
Assertions.assertEquals("hello world", params.get(0).getValue());
999999
}
1000+
1001+
@Test
1002+
void testFragmentEncoding() throws Exception {
1003+
final String fragment = "frag ment:!@/?\"";
1004+
final String expectedEncodedFragment = "frag%20ment:!@/?%22";
1005+
1006+
final URI uri = new URIBuilder()
1007+
.setScheme("http")
1008+
.setHost("example.com")
1009+
.setFragment(fragment)
1010+
.build();
1011+
1012+
Assertions.assertEquals(expectedEncodedFragment, uri.getRawFragment());
1013+
}
1014+
1015+
@Test
1016+
void testCustomQueryEncoding() throws Exception {
1017+
final String query = "query param:!@/?\"";
1018+
final String expectedEncodedQuery = "query%20param:!@/?%22";
1019+
1020+
final URI uri = new URIBuilder()
1021+
.setScheme("http")
1022+
.setHost("example.com")
1023+
.setCustomQuery(query)
1024+
.build();
1025+
1026+
Assertions.assertEquals(expectedEncodedQuery, uri.getRawQuery());
1027+
}
1028+
10001029
}

0 commit comments

Comments
 (0)