Skip to content

Commit 62bd76d

Browse files
committed
Added EncodingPolicy enum (ALL_RESERVED, RFC_3986) and switched query and fragment encoding to PercentCodec.FRAGMENT under RFC_3986
1 parent 11dcee0 commit 62bd76d

File tree

3 files changed

+81
-44
lines changed

3 files changed

+81
-44
lines changed

httpcore5/src/main/java/org/apache/hc/core5/net/PercentCodec.java

-15
Original file line numberDiff line numberDiff line change
@@ -84,16 +84,6 @@ public class PercentCodec {
8484
URIC.or(UNRESERVED);
8585
}
8686

87-
static final BitSet FRAGMENT_SAFE = new BitSet(256);
88-
static {
89-
FRAGMENT_SAFE.or(UNRESERVED);
90-
FRAGMENT_SAFE.or(SUB_DELIMS);
91-
FRAGMENT_SAFE.set(':');
92-
FRAGMENT_SAFE.set('@');
93-
FRAGMENT_SAFE.set('/');
94-
FRAGMENT_SAFE.set('?');
95-
}
96-
9787
static final BitSet RFC5987_UNRESERVED = new BitSet(256);
9888

9989
static {
@@ -140,18 +130,13 @@ public class PercentCodec {
140130
USERINFO.set(':');
141131
REG_NAME.or(UNRESERVED);
142132
REG_NAME.or(SUB_DELIMS);
143-
REG_NAME.clear('!');
144133
PATH_SEGMENT.or(PCHAR);
145134
QUERY.or(PCHAR);
146135
QUERY.set('/');
147136
QUERY.set('?');
148137
FRAGMENT.or(PCHAR);
149138
FRAGMENT.set('/');
150139
FRAGMENT.set('?');
151-
// Some sub-delims remain encoded (RFC 3986 allows them unencoded, but we choose to be strict).
152-
PATH_SEGMENT.clear('(');
153-
PATH_SEGMENT.clear(')');
154-
PATH_SEGMENT.clear('&');
155140
}
156141

157142
private static final int RADIX = 16;

httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java

+79-14
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import java.nio.charset.StandardCharsets;
3535
import java.util.ArrayList;
3636
import java.util.Arrays;
37+
import java.util.BitSet;
3738
import java.util.Collections;
3839
import java.util.LinkedList;
3940
import java.util.List;
@@ -87,9 +88,36 @@ public static URIBuilder loopbackAddress() {
8788
private Charset charset;
8889
private String fragment;
8990
private String encodedFragment;
91+
private EncodingPolicy encodingPolicy = EncodingPolicy.ALL_RESERVED;
9092

9193
private boolean plusAsBlank;
9294

95+
/**
96+
* Defines the encoding policy for URI components in {@link URIBuilder}.
97+
* This enum controls how characters are percent-encoded when constructing a URI,
98+
* allowing flexibility between strict encoding and RFC 3986-compliant behavior.
99+
*
100+
* @since 5.4
101+
*/
102+
public enum EncodingPolicy {
103+
/**
104+
* Encodes all reserved characters, allowing only unreserved characters
105+
* (ALPHA, DIGIT, "-", ".", "_", "~") to remain unencoded. This is a strict
106+
* policy suitable for conservative URI production where maximum encoding
107+
* is desired.
108+
*/
109+
ALL_RESERVED,
110+
111+
/**
112+
* Follows RFC 3986 component-specific encoding rules. For example, query and
113+
* fragment components allow unreserved characters, sub-delimiters ("!", "$",
114+
* "&", "'", "(", ")", "*", "+", ",", ";", "="), and additional characters
115+
* (":", "@", "/", "?") to remain unencoded, as defined by {@code PercentCodec.FRAGMENT}.
116+
* This policy ensures compliance with RFC 3986 while maintaining interoperability.
117+
*/
118+
RFC_3986
119+
}
120+
93121
/**
94122
* Constructs an empty instance.
95123
*/
@@ -175,6 +203,22 @@ public URIBuilder setCharset(final Charset charset) {
175203
return this;
176204
}
177205

206+
/**
207+
* Sets the encoding policy for this {@link URIBuilder}.
208+
* The encoding policy determines how URI components (e.g., query, fragment) are
209+
* percent-encoded when building the URI string. If not set, the default policy
210+
* is {@link EncodingPolicy#RFC_3986}.
211+
*
212+
* @param encodingPolicy the encoding policy to apply, or {@code null} to reset
213+
* to the default ({@link EncodingPolicy#ALL_RESERVED})
214+
* @return this {@link URIBuilder} instance for method chaining
215+
* @since 5.4
216+
*/
217+
public URIBuilder setEncodingPolicy(final EncodingPolicy encodingPolicy) {
218+
this.encodingPolicy = encodingPolicy;
219+
return this;
220+
}
221+
178222
/**
179223
* Gets the authority.
180224
*
@@ -300,33 +344,46 @@ static List<String> parsePath(final CharSequence s, final Charset charset) {
300344
return list;
301345
}
302346

303-
static void formatPath(final StringBuilder buf, final Iterable<String> segments, final boolean rootless, final Charset charset) {
347+
static void formatPath(final StringBuilder buf, final Iterable<String> segments, final boolean rootless,
348+
final Charset charset, final BitSet safechars) {
304349
int i = 0;
305350
for (final String segment : segments) {
306351
if (i > 0 || !rootless) {
307352
buf.append(PATH_SEPARATOR);
308353
}
309-
PercentCodec.encode(buf, segment, charset, PercentCodec.PATH_SEGMENT, false);
354+
PercentCodec.encode(buf, segment, charset, safechars, false);
310355
i++;
311356
}
312357
}
313358

314-
static void formatQuery(final StringBuilder buf, final Iterable<? extends NameValuePair> params, final Charset charset,
315-
final boolean blankAsPlus) {
359+
static void formatPath(final StringBuilder buf, final Iterable<String> segments, final boolean rootless,
360+
final Charset charset) {
361+
formatPath(buf, segments, rootless, charset, PercentCodec.UNRESERVED);
362+
}
363+
364+
365+
static void formatQuery(final StringBuilder buf, final Iterable<? extends NameValuePair> params,
366+
final Charset charset, final BitSet safechars, final boolean blankAsPlus) {
316367
int i = 0;
317368
for (final NameValuePair parameter : params) {
318369
if (i > 0) {
319370
buf.append(QUERY_PARAM_SEPARATOR);
320371
}
321-
PercentCodec.encode(buf, parameter.getName(), charset, blankAsPlus);
372+
PercentCodec.encode(buf, parameter.getName(), charset, safechars, blankAsPlus);
322373
if (parameter.getValue() != null) {
323374
buf.append(PARAM_VALUE_SEPARATOR);
324-
PercentCodec.encode(buf, parameter.getValue(), charset, blankAsPlus);
375+
PercentCodec.encode(buf, parameter.getValue(), charset, safechars, blankAsPlus);
325376
}
326377
i++;
327378
}
328379
}
329380

381+
static void formatQuery(final StringBuilder buf, final Iterable<? extends NameValuePair> params,
382+
final Charset charset, final boolean blankAsPlus) {
383+
formatQuery(buf, params, charset, PercentCodec.UNRESERVED, blankAsPlus);
384+
}
385+
386+
330387
/**
331388
* Builds a {@link URI} instance.
332389
*/
@@ -356,18 +413,22 @@ private String buildString() {
356413
} else if (this.userInfo != null) {
357414
final int idx = this.userInfo.indexOf(':');
358415
if (idx != -1) {
359-
PercentCodec.encode(sb, this.userInfo.substring(0, idx), this.charset, PercentCodec.USERINFO, false);
416+
PercentCodec.encode(sb, this.userInfo.substring(0, idx), this.charset,
417+
encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.USERINFO, false);
360418
sb.append(':');
361-
PercentCodec.encode(sb, this.userInfo.substring(idx + 1), this.charset, PercentCodec.USERINFO, false);
419+
PercentCodec.encode(sb, this.userInfo.substring(idx + 1), this.charset,
420+
encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.USERINFO, false);
362421
} else {
363-
PercentCodec.encode(sb, this.userInfo, this.charset, PercentCodec.USERINFO, false);
422+
PercentCodec.encode(sb, this.userInfo, this.charset,
423+
encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.USERINFO, false);
364424
}
365425
sb.append("@");
366426
}
367427
if (InetAddressUtils.isIPv6(this.host)) {
368428
sb.append("[").append(this.host).append("]");
369429
} else {
370-
PercentCodec.encode(sb, this.host, this.charset, PercentCodec.REG_NAME, false);
430+
PercentCodec.encode(sb, this.host, this.charset,
431+
encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.REG_NAME, false);
371432
}
372433
if (this.port >= 0) {
373434
sb.append(":").append(this.port);
@@ -382,23 +443,27 @@ private String buildString() {
382443
}
383444
sb.append(this.encodedPath);
384445
} else if (this.pathSegments != null) {
385-
formatPath(sb, this.pathSegments, !authoritySpecified && this.pathRootless, this.charset);
446+
formatPath(sb, this.pathSegments, !authoritySpecified && this.pathRootless, this.charset,
447+
encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.PATH_SEGMENT);
386448
}
387449
if (this.encodedQuery != null) {
388450
sb.append("?").append(this.encodedQuery);
389451
} else if (this.queryParams != null && !this.queryParams.isEmpty()) {
390452
sb.append("?");
391-
formatQuery(sb, this.queryParams, this.charset, false);
453+
formatQuery(sb, this.queryParams, this.charset,
454+
encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.UNRESERVED : PercentCodec.QUERY, false);
392455
} else if (this.query != null) {
393456
sb.append("?");
394-
PercentCodec.encode(sb, this.query, this.charset, PercentCodec.QUERY, false);
457+
PercentCodec.encode(sb, this.query, this.charset,
458+
encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.URIC : PercentCodec.QUERY, false);
395459
}
396460
}
397461
if (this.encodedFragment != null) {
398462
sb.append("#").append(this.encodedFragment);
399463
} else if (this.fragment != null) {
400464
sb.append("#");
401-
PercentCodec.encode(sb, this.fragment, this.charset, PercentCodec.FRAGMENT, false);
465+
PercentCodec.encode(sb, this.fragment, this.charset,
466+
encodingPolicy == EncodingPolicy.ALL_RESERVED ? PercentCodec.URIC : PercentCodec.FRAGMENT, false);
402467
}
403468
return sb.toString();
404469
}

httpcore5/src/test/java/org/apache/hc/core5/net/TestURIBuilder.java

+2-15
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ void testFormatQuery() {
171171
@Test
172172
void testHierarchicalUri() throws Exception {
173173
final URI uri = new URI("http", "stuff", "localhost", 80, "/some stuff", "param=stuff", "fragment");
174-
final URIBuilder uribuilder = new URIBuilder(uri);
174+
final URIBuilder uribuilder = new URIBuilder(uri).setEncodingPolicy(URIBuilder.EncodingPolicy.ALL_RESERVED);
175175
final URI result = uribuilder.build();
176176
Assertions.assertEquals(new URI("http://stuff@localhost:80/some%20stuff?param=stuff#fragment"), result);
177177
}
@@ -998,20 +998,6 @@ void testSetPlusAsBlank() throws Exception {
998998
Assertions.assertEquals("hello world", params.get(0).getValue());
999999
}
10001000

1001-
@Test
1002-
void testFragmentEncoding() throws Exception {
1003-
final String fragment = "frag ment:!@/?\"";
1004-
final String expectedEncodedFragment = "frag%20ment:!@/?%22";
1005-
1006-
final URI uri = new URIBuilder()
1007-
.setScheme("http")
1008-
.setHost("example.com")
1009-
.setFragment(fragment)
1010-
.build();
1011-
1012-
Assertions.assertEquals(expectedEncodedFragment, uri.getRawFragment());
1013-
}
1014-
10151001
@Test
10161002
void testCustomQueryEncoding() throws Exception {
10171003
final String query = "query param:!@/?\"";
@@ -1021,6 +1007,7 @@ void testCustomQueryEncoding() throws Exception {
10211007
.setScheme("http")
10221008
.setHost("example.com")
10231009
.setCustomQuery(query)
1010+
.setEncodingPolicy(URIBuilder.EncodingPolicy.RFC_3986)
10241011
.build();
10251012

10261013
Assertions.assertEquals(expectedEncodedQuery, uri.getRawQuery());

0 commit comments

Comments
 (0)