helix-editor · alexrutar · Dec 9, 2024 · Dec 9, 2024
diff --git a/matcher/src/pattern.rs b/matcher/src/pattern.rs
@@ -122,17 +122,30 @@ impl Atom {
             normalize = false;
         }
         let needle = if needle.is_ascii() {
-            let mut needle = if escape_whitespace {
-                if let Some((start, rem)) = needle.split_once("\\ ") {
-                    let mut needle = start.to_owned();
-                    for rem in rem.split("\\ ") {
-                        needle.push(' ');
-                        needle.push_str(rem);
+            let mut needle_string = if escape_whitespace {
+                let mut needle_bytes = Vec::with_capacity(needle.len());
+                let mut saw_backslash = false;
+                for c in needle.bytes() {
+                    if saw_backslash {
+                        if c.is_ascii_whitespace() {
+                            needle_bytes.push(c);
+                            saw_backslash = false;
+                            continue;
+                        } else {
+                            needle_bytes.push(b'\\');
+                        }
                     }
-                    needle
-                } else {
-                    needle.to_owned()
+                    saw_backslash = c == b'\\';
+                    if !saw_backslash {
+                        needle_bytes.push(c);
+                    }
+                }
+                // push the potentially trailing backslash
+                if saw_backslash {
+                    needle_bytes.push(b'\\');
                 }
+                // SAFETY: we just checked that needle is ascii, so each `c` is a valid ASCII byte
+                unsafe { String::from_utf8_unchecked(needle_bytes) }
             } else {
                 needle.to_owned()
             };
@@ -141,18 +154,19 @@ impl Atom {
                 #[cfg(feature = "unicode-casefold")]
                 CaseMatching::Ignore => {
                     ignore_case = true;
-                    needle.make_ascii_lowercase()
+                    needle_string.make_ascii_lowercase()
                 }
                 #[cfg(feature = "unicode-casefold")]
                 CaseMatching::Smart => {
-                    ignore_case = !needle.bytes().any(|b| b.is_ascii_uppercase())
+                    ignore_case = !needle_string.bytes().any(|b| b.is_ascii_uppercase())
                 }
                 CaseMatching::Respect => ignore_case = false,
             }
+
             if append_dollar {
-                needle.push('$');
+                needle_string.push('$');
             }
-            Utf32String::Ascii(needle.into_boxed_str())
+            Utf32String::Ascii(needle_string.into_boxed_str())
         } else {
             let mut needle_ = Vec::with_capacity(needle.len());
             #[cfg(feature = "unicode-casefold")]
@@ -171,32 +185,38 @@ impl Atom {
                 let mut saw_backslash = false;
                 for mut c in chars::graphemes(needle) {
                     if saw_backslash {
-                        if c == ' ' {
-                            needle_.push(' ');
+                        if c.is_whitespace() {
+                            needle_.push(c);
                             saw_backslash = false;
                             continue;
                         } else {
                             needle_.push('\\');
                         }
                     }
                     saw_backslash = c == '\\';
-                    match case {
-                        #[cfg(feature = "unicode-casefold")]
-                        CaseMatching::Ignore => c = chars::to_lower_case(c),
-                        #[cfg(feature = "unicode-casefold")]
-                        CaseMatching::Smart => {
-                            ignore_case = ignore_case && !chars::is_upper_case(c)
+                    if !saw_backslash {
+                        match case {
+                            #[cfg(feature = "unicode-casefold")]
+                            CaseMatching::Ignore => c = chars::to_lower_case(c),
+                            #[cfg(feature = "unicode-casefold")]
+                            CaseMatching::Smart => {
+                                ignore_case = ignore_case && !chars::is_upper_case(c)
+                            }
+                            CaseMatching::Respect => (),
                         }
-                        CaseMatching::Respect => (),
-                    }
-                    match normalization {
-                        #[cfg(feature = "unicode-normalization")]
-                        Normalization::Smart => {
-                            normalize = normalize && chars::normalize(c) == c;
+                        match normalization {
+                            #[cfg(feature = "unicode-normalization")]
+                            Normalization::Smart => {
+                                normalize = normalize && chars::normalize(c) == c;
+                            }
+                            Normalization::Never => (),
                         }
-                        Normalization::Never => (),
+                        needle_.push(c);
                     }
-                    needle_.push(c);
+                }
+                // push the potentially trailing backslash
+                if saw_backslash {
+                    needle_.push('\\');
                 }
             } else {
                 let chars = chars::graphemes(needle).map(|mut c| {

diff --git a/matcher/src/pattern/tests.rs b/matcher/src/pattern/tests.rs
@@ -85,8 +85,38 @@ fn case_matching() {
 
 #[test]
 fn escape() {
+    // escapes only impact whitespace
     let pat = Atom::parse("foo\\ bar", CaseMatching::Smart, Normalization::Smart);
     assert_eq!(pat.needle.to_string(), "foo bar");
+    let pat = Atom::parse("foo\\\tbar", CaseMatching::Smart, Normalization::Smart);
+    assert_eq!(pat.needle.to_string(), "foo\tbar");
+    let pat = Atom::parse("\\", CaseMatching::Smart, Normalization::Smart);
+    assert_eq!(pat.needle.to_string(), "\\");
+    let pat = Atom::parse("\\ ", CaseMatching::Smart, Normalization::Smart);
+    assert_eq!(pat.needle.to_string(), " ");
+    let pat = Atom::parse("\\\\", CaseMatching::Smart, Normalization::Smart);
+    assert_eq!(pat.needle.to_string(), "\\\\");
+
+    // some unicode checks
+    let pat = Atom::parse("foö\\ bar", CaseMatching::Smart, Normalization::Smart);
+    assert_eq!(pat.needle.to_string(), "foö bar");
+    let pat = Atom::parse("ö\\ ", CaseMatching::Smart, Normalization::Smart);
+    assert_eq!(pat.needle.to_string(), "ö ");
+    let pat = Atom::parse("foö\\\\ bar", CaseMatching::Smart, Normalization::Smart);
+    assert_eq!(pat.needle.to_string(), "foö\\ bar");
+    let pat = Atom::parse("foo\\　bar", CaseMatching::Smart, Normalization::Smart);
+    assert_eq!(pat.needle.to_string(), "foo　bar"); // double-width IDEOGRAPHIC SPACE
+    let pat = Atom::parse("ö\\b", CaseMatching::Smart, Normalization::Smart);
+    assert_eq!(pat.needle.to_string(), "ö\\b");
+    let pat = Atom::parse("ö\\\\", CaseMatching::Smart, Normalization::Smart);
+    assert_eq!(pat.needle.to_string(), "ö\\\\");
+    let pat = Atom::parse("\\!^foö\\$", CaseMatching::Smart, Normalization::Smart);
+    assert_eq!(pat.needle.to_string(), "!^foö$");
+    assert_eq!(pat.kind, AtomKind::Fuzzy);
+    let pat = Atom::parse("!\\^foö\\$", CaseMatching::Smart, Normalization::Smart);
+    assert_eq!(pat.needle.to_string(), "^foö$");
+    assert_eq!(pat.kind, AtomKind::Substring);
+
     let pat = Atom::parse("\\!foo", CaseMatching::Smart, Normalization::Smart);
     assert_eq!(pat.needle.to_string(), "!foo");
     assert_eq!(pat.kind, AtomKind::Fuzzy);

diff --git a/src/pattern.rs b/src/pattern.rs
@@ -52,11 +52,19 @@ impl MultiPattern {
         let old_status = self.cols[column].1;
         if append
             && old_status != Status::Rescore
-            && self.cols[column]
-                .0
-                .atoms
-                .last()
-                .map_or(true, |last| !last.negative)
+                // must be rescored if the atom is negative or if there is an unescaped
+                // trailing `\`
+            && self.cols[column].0.atoms.last().map_or(true, |last| {
+                !last.negative
+                    && last
+                        .needle_text()
+                        .chars()
+                        .rev()
+                        .take_while(|c| *c == '\\')
+                        .count()
+                        % 2
+                        == 0
+            })
         {
             self.cols[column].1 = Status::Update;
         } else {

diff --git a/src/pattern/tests.rs b/src/pattern/tests.rs
@@ -11,4 +11,24 @@ fn append() {
     assert_eq!(pat.status(), Status::Update);
     pat.reparse(0, "!fo", CaseMatching::Smart, Normalization::Smart, true);
     assert_eq!(pat.status(), Status::Rescore);
+
+    let mut pat = MultiPattern::new(1);
+    pat.reparse(0, "a\\\\", CaseMatching::Smart, Normalization::Smart, true);
+    assert_eq!(pat.status(), Status::Update);
+    pat.reparse(
+        0,
+        "a\\\\\\",
+        CaseMatching::Smart,
+        Normalization::Smart,
+        true,
+    );
+    assert_eq!(pat.status(), Status::Update);
+    pat.reparse(
+        0,
+        "a\\\\\\\\",
+        CaseMatching::Smart,
+        Normalization::Smart,
+        true,
+    );
+    assert_eq!(pat.status(), Status::Rescore);
 }