Skip to content

Commit 84e9989

Browse files
authored
expose findall for Vector{UInt8} (#45307)
1 parent 762561c commit 84e9989

File tree

3 files changed

+75
-58
lines changed

3 files changed

+75
-58
lines changed

base/regex.jl

Lines changed: 0 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,6 @@ include("pcre.jl")
77
const DEFAULT_COMPILER_OPTS = PCRE.UTF | PCRE.MATCH_INVALID_UTF | PCRE.ALT_BSUX | PCRE.UCP
88
const DEFAULT_MATCH_OPTS = PCRE.NO_UTF_CHECK
99

10-
"""
11-
An abstract type representing any sort of pattern matching expression
12-
(typically a regular expression). `AbstractPattern` objects can be used to
13-
match strings with [`match`](@ref).
14-
15-
!!! compat "Julia 1.6"
16-
This type is available in Julia 1.6 and later.
17-
"""
18-
abstract type AbstractPattern end
19-
2010
"""
2111
Regex(pattern[, flags])
2212
@@ -438,54 +428,6 @@ findnext(r::Regex, s::AbstractString, idx::Integer) = throw(ArgumentError(
438428
findfirst(r::Regex, s::AbstractString) = findnext(r,s,firstindex(s))
439429

440430

441-
"""
442-
findall(
443-
pattern::Union{AbstractString,AbstractPattern},
444-
string::AbstractString;
445-
overlap::Bool = false,
446-
)
447-
448-
Return a `Vector{UnitRange{Int}}` of all the matches for `pattern` in `string`.
449-
Each element of the returned vector is a range of indices where the
450-
matching sequence is found, like the return value of [`findnext`](@ref).
451-
452-
If `overlap=true`, the matching sequences are allowed to overlap indices in the
453-
original string, otherwise they must be from disjoint character ranges.
454-
455-
# Examples
456-
```jldoctest
457-
julia> findall("a", "apple")
458-
1-element Vector{UnitRange{Int64}}:
459-
1:1
460-
461-
julia> findall("nana", "banana")
462-
1-element Vector{UnitRange{Int64}}:
463-
3:6
464-
465-
julia> findall("a", "banana")
466-
3-element Vector{UnitRange{Int64}}:
467-
2:2
468-
4:4
469-
6:6
470-
```
471-
472-
!!! compat "Julia 1.3"
473-
This method requires at least Julia 1.3.
474-
"""
475-
function findall(t::Union{AbstractString,AbstractPattern}, s::AbstractString; overlap::Bool=false)
476-
found = UnitRange{Int}[]
477-
i, e = firstindex(s), lastindex(s)
478-
while true
479-
r = findnext(t, s, i)
480-
isnothing(r) && break
481-
push!(found, r)
482-
j = overlap || isempty(r) ? first(r) : last(r)
483-
j > e && break
484-
@inbounds i = nextind(s, j)
485-
end
486-
return found
487-
end
488-
489431
"""
490432
findall(c::AbstractChar, s::AbstractString)
491433

base/strings/search.jl

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,15 @@
11
# This file is a part of Julia. License is MIT: https://julialang.org/license
22

3+
"""
4+
An abstract type representing any sort of pattern matching expression
5+
(typically a regular expression). `AbstractPattern` objects can be used to
6+
match strings with [`match`](@ref).
7+
8+
!!! compat "Julia 1.6"
9+
This type is available in Julia 1.6 and later.
10+
"""
11+
abstract type AbstractPattern end
12+
313
nothing_sentinel(i) = i == 0 ? nothing : i
414

515
function findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:AbstractChar},
@@ -406,6 +416,67 @@ true
406416
"""
407417
findlast(ch::AbstractChar, string::AbstractString) = findlast(==(ch), string)
408418

419+
"""
420+
findall(
421+
pattern::Union{AbstractString,AbstractPattern},
422+
string::AbstractString;
423+
overlap::Bool = false,
424+
)
425+
findall(
426+
pattern::Vector{UInt8}
427+
A::Vector{UInt8};
428+
overlap::Bool = false,
429+
)
430+
431+
Return a `Vector{UnitRange{Int}}` of all the matches for `pattern` in `string`.
432+
Each element of the returned vector is a range of indices where the
433+
matching sequence is found, like the return value of [`findnext`](@ref).
434+
435+
If `overlap=true`, the matching sequences are allowed to overlap indices in the
436+
original string, otherwise they must be from disjoint character ranges.
437+
438+
# Examples
439+
```jldoctest
440+
julia> findall("a", "apple")
441+
1-element Vector{UnitRange{Int64}}:
442+
1:1
443+
444+
julia> findall("nana", "banana")
445+
1-element Vector{UnitRange{Int64}}:
446+
3:6
447+
448+
julia> findall("a", "banana")
449+
3-element Vector{UnitRange{Int64}}:
450+
2:2
451+
4:4
452+
6:6
453+
454+
julia> findall(UInt8[1,2], UInt8[1,2,3,1,2])
455+
2-element Vector{UnitRange{Int64}}:
456+
1:2
457+
4:5
458+
```
459+
460+
!!! compat "Julia 1.3"
461+
This method requires at least Julia 1.3.
462+
"""
463+
464+
function findall(t::Union{AbstractString, AbstractPattern, AbstractVector{<:Union{Int8,UInt8}}},
465+
s::Union{AbstractString, AbstractPattern, AbstractVector{<:Union{Int8,UInt8}}},
466+
; overlap::Bool=false)
467+
found = UnitRange{Int}[]
468+
i, e = firstindex(s), lastindex(s)
469+
while true
470+
r = findnext(t, s, i)
471+
isnothing(r) && break
472+
push!(found, r)
473+
j = overlap || isempty(r) ? first(r) : last(r)
474+
j > e && break
475+
@inbounds i = nextind(s, j)
476+
end
477+
return found
478+
end
479+
409480
# AbstractString implementation of the generic findprev interface
410481
function findprev(testf::Function, s::AbstractString, i::Integer)
411482
i = Int(i)

test/regex.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@
6969
@test findall('', "OH⁻ + H₃CBr → HOH₃CBr⁻ → HOCH₃ + Br⁻") == [17, 35]
7070
@test findall('a', "") == Int[]
7171
@test findall('c', "batman") == Int[]
72+
@test findall([0x52, 0x62], [0x40, 0x52, 0x62, 0x63]) == [2:3]
73+
@test findall([0x52, 0x62], [0x40, 0x52, 0x62, 0x63, 0x52, 0x62]) == [2:3, 5:6]
74+
@test findall([0x01, 0x01], [0x01, 0x01, 0x01, 0x01]) == [1:2, 3:4]
75+
@test findall([0x01, 0x01], [0x01, 0x01, 0x01, 0x01]; overlap=true) == [1:2, 2:3, 3:4]
7276

7377
# count
7478
@test count(r"\w+", "foo bar") == 2

0 commit comments

Comments
 (0)