Skip to content
This repository was archived by the owner on Jul 18, 2018. It is now read-only.

Commit 28c894b

Browse files
inexorabletashCommit Bot
authored andcommitted
Text Encoding: Add 'replacement' as label for replacement encoding
The 'replacement' encoding originated as a spec concept to prevent security attacks via problematic encodings by recognizing the label but not decoding the stream. It was initially specified as the only encoding where the name wasn't one of the labels, requiring special cases in all implementations. Based on more implementer feedback we'd like to remove the special case. Delete the special case code in Blink too. See also: whatwg/encoding#70 Bug: 744405 Change-Id: Ia15ccef1a9d7f35c23af4509a5a9758cbefc2087 Reviewed-on: https://chromium-review.googlesource.com/559973 Reviewed-by: Kent Tamura <[email protected]> Commit-Queue: Joshua Bell <[email protected]> Cr-Commit-Position: refs/heads/master@{#487288}
1 parent 19d792a commit 28c894b

File tree

10 files changed

+9
-41
lines changed

10 files changed

+9
-41
lines changed

third_party/WebKit/LayoutTests/FlagExpectations/enable-blink-features=LayoutNG

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7363,7 +7363,6 @@ crbug.com/591099 fast/encoding/bom-in-content.html [ Failure ]
73637363
crbug.com/591099 fast/encoding/bracket-in-script.html [ Failure ]
73647364
crbug.com/591099 fast/encoding/charset-invalid.html [ Failure ]
73657365
crbug.com/591099 fast/encoding/charset-koi8-u.html [ Failure ]
7366-
crbug.com/591099 fast/encoding/charset-replacement.html [ Failure ]
73677366
crbug.com/591099 fast/encoding/charset-xuser-defined.html [ Crash Failure ]
73687367
crbug.com/591099 fast/encoding/css-charset-default.xhtml [ Failure ]
73697368
crbug.com/591099 fast/encoding/css-charset.html [ Failure ]

third_party/WebKit/LayoutTests/fast/encoding/char-decoding.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@
144144
testDecode('unicodeFFFE', '%D8%69%DE%D6', 'U+D869/U+DED6');
145145

146146
// Replacement encodings should decode non-empty streams as replacement (U+FFFD) then EOF
147-
["csiso2022kr", "hz-gb-2312", "iso-2022-cn", "iso-2022-cn-ext", "iso-2022-kr"]
147+
["replacement", "csiso2022kr", "hz-gb-2312", "iso-2022-cn", "iso-2022-cn-ext", "iso-2022-kr"]
148148
.forEach(function(encoding) {
149149
testDecode(encoding, "", "");
150150
testDecode(encoding, "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD");

third_party/WebKit/LayoutTests/fast/encoding/charset-replacement-expected.txt

Lines changed: 0 additions & 5 deletions
This file was deleted.

third_party/WebKit/LayoutTests/fast/encoding/charset-replacement.html

Lines changed: 0 additions & 10 deletions
This file was deleted.

third_party/WebKit/Source/platform/wtf/text/TextCodecReplacement.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,8 @@ TextCodecReplacement::TextCodecReplacement()
1616

1717
void TextCodecReplacement::RegisterEncodingNames(
1818
EncodingNameRegistrar registrar) {
19-
// The 'replacement' label itself should not be referenceable by
20-
// resources or script - it's a specification convenience - but much of
21-
// the wtf/text API asserts that an encoding name is a label for itself.
22-
// This is handled in TextEncoding by marking it as not valid.
19+
// Taken from the alias table at·https://encoding.spec.whatwg.org/
2320
registrar("replacement", "replacement");
24-
2521
registrar("csiso2022kr", "replacement");
2622
registrar("hz-gb-2312", "replacement");
2723
registrar("iso-2022-cn", "replacement");

third_party/WebKit/Source/platform/wtf/text/TextCodecReplacement.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@
1010

1111
namespace WTF {
1212

13+
// The "replacement" encoding exists to prevent attacks that abuse a mismatch
14+
// between encodings supported on the server and the client. The encoder is
15+
// the same as UTF-8; and for a non-empty input the decoder emits U+FFFD and
16+
// terminates. See: https://encoding.spec.whatwg.org/#replacement and
17+
// https://encoding.spec.whatwg.org/#output-encodings
1318
class TextCodecReplacement final : public TextCodecUTF8 {
1419
public:
1520
TextCodecReplacement();

third_party/WebKit/Source/platform/wtf/text/TextCodecReplacementTest.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,8 @@ namespace {
2020
const char* g_replacement_alias = "iso-2022-kr";
2121

2222
TEST(TextCodecReplacement, Aliases) {
23-
// "replacement" is not a valid alias for itself
24-
EXPECT_FALSE(TextEncoding("replacement").IsValid());
25-
EXPECT_FALSE(TextEncoding("rEpLaCeMeNt").IsValid());
23+
EXPECT_TRUE(TextEncoding("replacement").IsValid());
24+
EXPECT_TRUE(TextEncoding("rEpLaCeMeNt").IsValid());
2625

2726
EXPECT_TRUE(TextEncoding(g_replacement_alias).IsValid());
2827
EXPECT_STREQ("replacement", TextEncoding(g_replacement_alias).GetName());

third_party/WebKit/Source/platform/wtf/text/TextEncoding.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,16 +44,10 @@ static const TextEncoding& UTF7Encoding() {
4444

4545
TextEncoding::TextEncoding(const char* name)
4646
: name_(AtomicCanonicalTextEncodingName(name)) {
47-
// Aliases are valid, but not "replacement" itself.
48-
if (name_ && IsReplacementEncoding(name))
49-
name_ = 0;
5047
}
5148

5249
TextEncoding::TextEncoding(const String& name)
5350
: name_(AtomicCanonicalTextEncodingName(name)) {
54-
// Aliases are valid, but not "replacement" itself.
55-
if (name_ && IsReplacementEncoding(name))
56-
name_ = 0;
5751
}
5852

5953
String TextEncoding::Decode(const char* data,

third_party/WebKit/Source/platform/wtf/text/TextEncodingRegistry.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -222,14 +222,6 @@ static void BuildBaseTextCodecMaps() {
222222
TextCodecUserDefined::RegisterCodecs(AddToTextCodecMap);
223223
}
224224

225-
bool IsReplacementEncoding(const char* alias) {
226-
return alias && !strcasecmp(alias, "replacement");
227-
}
228-
229-
bool IsReplacementEncoding(const String& alias) {
230-
return alias == "replacement";
231-
}
232-
233225
static void ExtendTextCodecMaps() {
234226
TextCodecReplacement::RegisterEncodingNames(AddToTextEncodingNameMap);
235227
TextCodecReplacement::RegisterCodecs(AddToTextCodecMap);

third_party/WebKit/Source/platform/wtf/text/TextEncodingRegistry.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,6 @@ template <typename CharacterType>
4646
const char* AtomicCanonicalTextEncodingName(const CharacterType*, size_t);
4747
const char* AtomicCanonicalTextEncodingName(const String&);
4848
bool NoExtendedTextEncodingNameUsed();
49-
bool IsReplacementEncoding(const char* alias);
50-
bool IsReplacementEncoding(const String& alias);
5149

5250
#ifndef NDEBUG
5351
void DumpTextEncodingNameMap();

0 commit comments

Comments
 (0)