Skip to content

Fix reparse trailing escape #67

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 49 additions & 29 deletions matcher/src/pattern.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,17 +122,30 @@ impl Atom {
normalize = false;
}
let needle = if needle.is_ascii() {
let mut needle = if escape_whitespace {
if let Some((start, rem)) = needle.split_once("\\ ") {
let mut needle = start.to_owned();
for rem in rem.split("\\ ") {
needle.push(' ');
needle.push_str(rem);
let mut needle_string = if escape_whitespace {
let mut needle_bytes = Vec::with_capacity(needle.len());
let mut saw_backslash = false;
for c in needle.bytes() {
if saw_backslash {
if c.is_ascii_whitespace() {
needle_bytes.push(c);
saw_backslash = false;
continue;
} else {
needle_bytes.push(b'\\');
}
}
needle
} else {
needle.to_owned()
saw_backslash = c == b'\\';
if !saw_backslash {
needle_bytes.push(c);
}
}
// push the potentially trailing backslash
if saw_backslash {
needle_bytes.push(b'\\');
}
// SAFETY: we just checked that needle is ascii, so each `c` is a valid ASCII byte
unsafe { String::from_utf8_unchecked(needle_bytes) }
} else {
needle.to_owned()
};
Expand All @@ -141,18 +154,19 @@ impl Atom {
#[cfg(feature = "unicode-casefold")]
CaseMatching::Ignore => {
ignore_case = true;
needle.make_ascii_lowercase()
needle_string.make_ascii_lowercase()
}
#[cfg(feature = "unicode-casefold")]
CaseMatching::Smart => {
ignore_case = !needle.bytes().any(|b| b.is_ascii_uppercase())
ignore_case = !needle_string.bytes().any(|b| b.is_ascii_uppercase())
}
CaseMatching::Respect => ignore_case = false,
}

if append_dollar {
needle.push('$');
needle_string.push('$');
}
Utf32String::Ascii(needle.into_boxed_str())
Utf32String::Ascii(needle_string.into_boxed_str())
} else {
let mut needle_ = Vec::with_capacity(needle.len());
#[cfg(feature = "unicode-casefold")]
Expand All @@ -171,32 +185,38 @@ impl Atom {
let mut saw_backslash = false;
for mut c in chars::graphemes(needle) {
if saw_backslash {
if c == ' ' {
needle_.push(' ');
if c.is_whitespace() {
needle_.push(c);
saw_backslash = false;
continue;
} else {
needle_.push('\\');
}
}
saw_backslash = c == '\\';
match case {
#[cfg(feature = "unicode-casefold")]
CaseMatching::Ignore => c = chars::to_lower_case(c),
#[cfg(feature = "unicode-casefold")]
CaseMatching::Smart => {
ignore_case = ignore_case && !chars::is_upper_case(c)
if !saw_backslash {
match case {
#[cfg(feature = "unicode-casefold")]
CaseMatching::Ignore => c = chars::to_lower_case(c),
#[cfg(feature = "unicode-casefold")]
CaseMatching::Smart => {
ignore_case = ignore_case && !chars::is_upper_case(c)
}
CaseMatching::Respect => (),
}
CaseMatching::Respect => (),
}
match normalization {
#[cfg(feature = "unicode-normalization")]
Normalization::Smart => {
normalize = normalize && chars::normalize(c) == c;
match normalization {
#[cfg(feature = "unicode-normalization")]
Normalization::Smart => {
normalize = normalize && chars::normalize(c) == c;
}
Normalization::Never => (),
}
Normalization::Never => (),
needle_.push(c);
}
needle_.push(c);
}
// push the potentially trailing backslash
if saw_backslash {
needle_.push('\\');
}
} else {
let chars = chars::graphemes(needle).map(|mut c| {
Expand Down
30 changes: 30 additions & 0 deletions matcher/src/pattern/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,38 @@ fn case_matching() {

#[test]
fn escape() {
// escapes only impact whitespace
let pat = Atom::parse("foo\\ bar", CaseMatching::Smart, Normalization::Smart);
assert_eq!(pat.needle.to_string(), "foo bar");
let pat = Atom::parse("foo\\\tbar", CaseMatching::Smart, Normalization::Smart);
assert_eq!(pat.needle.to_string(), "foo\tbar");
let pat = Atom::parse("\\", CaseMatching::Smart, Normalization::Smart);
assert_eq!(pat.needle.to_string(), "\\");
let pat = Atom::parse("\\ ", CaseMatching::Smart, Normalization::Smart);
assert_eq!(pat.needle.to_string(), " ");
let pat = Atom::parse("\\\\", CaseMatching::Smart, Normalization::Smart);
assert_eq!(pat.needle.to_string(), "\\\\");

// some unicode checks
let pat = Atom::parse("foö\\ bar", CaseMatching::Smart, Normalization::Smart);
assert_eq!(pat.needle.to_string(), "foö bar");
let pat = Atom::parse("ö\\ ", CaseMatching::Smart, Normalization::Smart);
assert_eq!(pat.needle.to_string(), "ö ");
let pat = Atom::parse("foö\\\\ bar", CaseMatching::Smart, Normalization::Smart);
assert_eq!(pat.needle.to_string(), "foö\\ bar");
let pat = Atom::parse("foo\\ bar", CaseMatching::Smart, Normalization::Smart);
assert_eq!(pat.needle.to_string(), "foo bar"); // double-width IDEOGRAPHIC SPACE
let pat = Atom::parse("ö\\b", CaseMatching::Smart, Normalization::Smart);
assert_eq!(pat.needle.to_string(), "ö\\b");
let pat = Atom::parse("ö\\\\", CaseMatching::Smart, Normalization::Smart);
assert_eq!(pat.needle.to_string(), "ö\\\\");
let pat = Atom::parse("\\!^foö\\$", CaseMatching::Smart, Normalization::Smart);
assert_eq!(pat.needle.to_string(), "!^foö$");
assert_eq!(pat.kind, AtomKind::Fuzzy);
let pat = Atom::parse("!\\^foö\\$", CaseMatching::Smart, Normalization::Smart);
assert_eq!(pat.needle.to_string(), "^foö$");
assert_eq!(pat.kind, AtomKind::Substring);

let pat = Atom::parse("\\!foo", CaseMatching::Smart, Normalization::Smart);
assert_eq!(pat.needle.to_string(), "!foo");
assert_eq!(pat.kind, AtomKind::Fuzzy);
Expand Down
18 changes: 13 additions & 5 deletions src/pattern.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,19 @@ impl MultiPattern {
let old_status = self.cols[column].1;
if append
&& old_status != Status::Rescore
&& self.cols[column]
.0
.atoms
.last()
.map_or(true, |last| !last.negative)
// must be rescored if the atom is negative or if there is an unescaped
// trailing `\`
&& self.cols[column].0.atoms.last().map_or(true, |last| {
!last.negative
&& last
.needle_text()
.chars()
.rev()
.take_while(|c| *c == '\\')
.count()
% 2
== 0
})
{
self.cols[column].1 = Status::Update;
} else {
Expand Down
20 changes: 20 additions & 0 deletions src/pattern/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,24 @@ fn append() {
assert_eq!(pat.status(), Status::Update);
pat.reparse(0, "!fo", CaseMatching::Smart, Normalization::Smart, true);
assert_eq!(pat.status(), Status::Rescore);

let mut pat = MultiPattern::new(1);
pat.reparse(0, "a\\\\", CaseMatching::Smart, Normalization::Smart, true);
assert_eq!(pat.status(), Status::Update);
pat.reparse(
0,
"a\\\\\\",
CaseMatching::Smart,
Normalization::Smart,
true,
);
assert_eq!(pat.status(), Status::Update);
pat.reparse(
0,
"a\\\\\\\\",
CaseMatching::Smart,
Normalization::Smart,
true,
);
assert_eq!(pat.status(), Status::Rescore);
}
Loading