4
4
#include " pch.h"
5
5
#include " TermControl.h"
6
6
7
- #include < unicode.hpp>
8
7
#include < LibraryResources.h>
9
8
10
9
#include " TermControlAutomationPeer.h"
11
- #include " ../../types/inc/GlyphWidth.hpp"
12
10
#include " ../../renderer/atlas/AtlasEngine.h"
13
11
14
12
#include " TermControl.g.cpp"
@@ -3208,51 +3206,6 @@ namespace winrt::Microsoft::Terminal::Control::implementation
3208
3206
_core.ClearHoveredCell ();
3209
3207
}
3210
3208
3211
- // Attackers abuse Unicode characters that happen to look similar to ASCII characters. Cyrillic for instance has
3212
- // its own glyphs for а, с, е, о, р, х, and у that look practically identical to their ASCII counterparts.
3213
- // This is called an "IDN homoglyph attack".
3214
- //
3215
- // But outright showing Punycode URIs only is similarly flawed as they can end up looking similar to valid ASCII URIs.
3216
- // xn--cnn.com for instance looks confusingly similar to cnn.com, but actually represents U+407E.
3217
- //
3218
- // An optimal solution would detect any URI that contains homoglyphs and show them in their Punycode form.
3219
- // Such a detector however is not quite trivial and requires constant maintenance, which this project's
3220
- // maintainers aren't currently well equipped to handle. As such we do the next best thing and show the
3221
- // Punycode encoding side-by-side with the Unicode string for any IDN.
3222
- static winrt::hstring sanitizeURI (winrt::hstring uri)
3223
- {
3224
- if (uri.empty ())
3225
- {
3226
- return uri;
3227
- }
3228
-
3229
- wchar_t punycodeBuffer[256 ];
3230
- wchar_t unicodeBuffer[256 ];
3231
-
3232
- // These functions return int, but are documented to only return positive numbers.
3233
- // Better make sure though. It allows us to pass punycodeLength right into IdnToUnicode.
3234
- const auto punycodeLength = std::max (0 , IdnToAscii (0 , uri.data (), gsl::narrow<int >(uri.size ()), &punycodeBuffer[0 ], 256 ));
3235
- const auto unicodeLength = std::max (0 , IdnToUnicode (0 , &punycodeBuffer[0 ], punycodeLength, &unicodeBuffer[0 ], 256 ));
3236
-
3237
- if (punycodeLength <= 0 || unicodeLength <= 0 )
3238
- {
3239
- return RS_ (L" InvalidUri" );
3240
- }
3241
-
3242
- const std::wstring_view punycode{ &punycodeBuffer[0 ], gsl::narrow_cast<size_t >(punycodeLength) };
3243
- const std::wstring_view unicode{ &unicodeBuffer[0 ], gsl::narrow_cast<size_t >(unicodeLength) };
3244
-
3245
- // IdnToAscii/IdnToUnicode return the input string as is if it's all
3246
- // plain ASCII. But we don't know if the input URI is Punycode or not.
3247
- // --> It's non-Punycode and ASCII if it round-trips.
3248
- if (uri == punycode && uri == unicode)
3249
- {
3250
- return uri;
3251
- }
3252
-
3253
- return winrt::hstring{ fmt::format (FMT_COMPILE (L" {}\n ({})" ), punycode, unicode) };
3254
- }
3255
-
3256
3209
void TermControl::_hoveredHyperlinkChanged (const IInspectable& /* sender*/ , const IInspectable& /* args*/ )
3257
3210
{
3258
3211
const auto lastHoveredCell = _core.HoveredCell ();
@@ -3261,12 +3214,48 @@ namespace winrt::Microsoft::Terminal::Control::implementation
3261
3214
return ;
3262
3215
}
3263
3216
3264
- const auto uriText = sanitizeURI ( _core.HoveredUriText () );
3217
+ auto uriText = _core.HoveredUriText ();
3265
3218
if (uriText.empty ())
3266
3219
{
3267
3220
return ;
3268
3221
}
3269
3222
3223
+ // Attackers abuse Unicode characters that happen to look similar to ASCII characters. Cyrillic for instance has
3224
+ // its own glyphs for а, с, е, о, р, х, and у that look practically identical to their ASCII counterparts.
3225
+ // This is called an "IDN homoglyph attack".
3226
+ //
3227
+ // But outright showing Punycode URIs only is similarly flawed as they can end up looking similar to valid ASCII URIs.
3228
+ // xn--cnn.com for instance looks confusingly similar to cnn.com, but actually represents U+407E.
3229
+ //
3230
+ // An optimal solution would detect any URI that contains homoglyphs and show them in their Punycode form.
3231
+ // Such a detector however is not quite trivial and requires constant maintenance, which this project's
3232
+ // maintainers aren't currently well equipped to handle. As such we do the next best thing and show the
3233
+ // Punycode encoding side-by-side with the Unicode string for any IDN.
3234
+ try
3235
+ {
3236
+ // DisplayUri/Iri drop authentication credentials, which is probably great, but AbsoluteCanonicalUri()
3237
+ // is the only getter that returns a punycode encoding of the URL. AbsoluteUri() is the only possible
3238
+ // counterpart, but as the name indicates, we'll end up hitting the != below for any non-canonical URL.
3239
+ //
3240
+ // This issue can be fixed by using the IUrl API from urlmon.h directly, which the WinRT API simply wraps.
3241
+ // IUrl is a very complex system with a ton of useful functionality, but we don't rely on it (neither WinRT),
3242
+ // so we could alternatively use its underlying API in wininet.h (InternetCrackUrlW, etc.).
3243
+ // That API however is rather difficult to use for such seldom executed code.
3244
+ const Windows::Foundation::Uri uri{ uriText };
3245
+ const auto unicode = uri.AbsoluteUri ();
3246
+ const auto punycode = uri.AbsoluteCanonicalUri ();
3247
+
3248
+ if (punycode != unicode)
3249
+ {
3250
+ const auto text = fmt::format (FMT_COMPILE (L" {}\n ({})" ), punycode, unicode);
3251
+ uriText = winrt::hstring{ text };
3252
+ }
3253
+ }
3254
+ catch (...)
3255
+ {
3256
+ uriText = RS_ (L" InvalidUri" );
3257
+ }
3258
+
3270
3259
const auto panel = SwapChainPanel ();
3271
3260
const auto scale = panel.CompositionScaleX ();
3272
3261
const auto offset = panel.ActualOffset ();
0 commit comments