Skip to content

Commit 8812c5c

Browse files
Profile: Use two null block terminators (#42106)
1 parent 876df79 commit 8812c5c

File tree

5 files changed

+42
-35
lines changed

5 files changed

+42
-35
lines changed

src/signal-handling.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ void jl_shuffle_int_array_inplace(volatile uint64_t *carray, size_t size, uint64
3737

3838
JL_DLLEXPORT int jl_profile_is_buffer_full(void)
3939
{
40-
// the `+ 5` is for the block terminator `0` plus 4 metadata entries
41-
return bt_size_cur + (JL_BT_MAX_ENTRY_SIZE + 1) + 5 > bt_size_max;
40+
// the `+ 6` is for the two block terminators `0` plus 4 metadata entries
41+
return bt_size_cur + (JL_BT_MAX_ENTRY_SIZE + 1) + 6 > bt_size_max;
4242
}
4343

4444
static uint64_t jl_last_sigint_trigger = 0;

src/signals-mach.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -602,7 +602,8 @@ void *mach_profile_listener(void *arg)
602602
// store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
603603
bt_data_prof[bt_size_cur++].uintptr = ptls->sleep_check_state + 1;
604604

605-
// Mark the end of this block with 0
605+
// Mark the end of this block with two 0's
606+
bt_data_prof[bt_size_cur++].uintptr = 0;
606607
bt_data_prof[bt_size_cur++].uintptr = 0;
607608
}
608609
// We're done! Resume the thread.

src/signals-unix.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -800,7 +800,8 @@ static void *signal_listener(void *arg)
800800
// store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
801801
bt_data_prof[bt_size_cur++].uintptr = ptls->sleep_check_state + 1;
802802

803-
// Mark the end of this block with 0
803+
// Mark the end of this block with two 0's
804+
bt_data_prof[bt_size_cur++].uintptr = 0;
804805
bt_data_prof[bt_size_cur++].uintptr = 0;
805806
}
806807
}

src/signals-win.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,8 @@ static DWORD WINAPI profile_bt( LPVOID lparam )
375375
// store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
376376
bt_data_prof[bt_size_cur++].uintptr = ptls->sleep_check_state + 1;
377377

378-
// Mark the end of this block with 0
378+
// Mark the end of this block with two 0's
379+
bt_data_prof[bt_size_cur++].uintptr = 0;
379380
bt_data_prof[bt_size_cur++].uintptr = 0;
380381
}
381382
jl_unlock_profile();

stdlib/Profile/src/Profile.jl

Lines changed: 34 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ module Profile
77

88
import Base.StackTraces: lookup, UNKNOWN, show_spec_linfo, StackFrame
99

10+
const nmeta = 4 # number of metadata fields per block (threadid, taskid, cpu_cycle_clock, thread_sleeping)
11+
1012
# deprecated functions: use `getdict` instead
1113
lookup(ip::UInt) = lookup(convert(Ptr{Cvoid}, ip))
1214

@@ -41,9 +43,9 @@ end
4143
4244
Configure the `delay` between backtraces (measured in seconds), and the number `n` of instruction pointers that may be
4345
stored per thread. Each instruction pointer corresponds to a single line of code; backtraces generally consist of a long
44-
list of instruction pointers. Note that 5 spaces for instruction pointers per backtrace are used to store metadata and a marker.
45-
Current settings can be obtained by calling this function with no arguments, and each can be set independently using keywords
46-
or in the order `(n, delay)`.
46+
list of instruction pointers. Note that 6 spaces for instruction pointers per backtrace are used to store metadata and two
47+
NULL end markers. Current settings can be obtained by calling this function with no arguments, and each can be set independently
48+
using keywords or in the order `(n, delay)`.
4749
4850
!!! compat "Julia 1.8"
4951
As of Julia 1.8, this function allocates space for `n` instruction pointers per thread being profiled.
@@ -263,9 +265,9 @@ end
263265
function get_task_ids(data::Vector{<:Unsigned}, threadid = nothing)
264266
taskids = UInt[]
265267
for i in length(data):-1:1
266-
if data[i] == 0 # find start of block
267-
if isnothing(threadid) || data[i - 4] == threadid
268-
taskid = data[i - 3]
268+
if is_block_end(data, i)
269+
if isnothing(threadid) || data[i - 5] == threadid
270+
taskid = data[i - 4]
269271
!in(taskid, taskids) && push!(taskids, taskid)
270272
end
271273
end
@@ -276,16 +278,23 @@ end
276278
function get_thread_ids(data::Vector{<:Unsigned}, taskid = nothing)
277279
threadids = Int[]
278280
for i in length(data):-1:1
279-
if data[i] == 0 # find start of block
280-
if isnothing(taskid) || data[i - 3] == taskid
281-
threadid = data[i - 4]
281+
if is_block_end(data, i)
282+
if isnothing(taskid) || data[i - 4] == taskid
283+
threadid = data[i - 5]
282284
!in(threadid, threadids) && push!(threadids, threadid)
283285
end
284286
end
285287
end
286288
return sort(threadids)
287289
end
288290

291+
function is_block_end(data, i)
292+
i < nmeta + 1 && return false
293+
# 32-bit linux has been seen to have rogue NULL ips, so we use two to indicate block end, where the 2nd is the
294+
# actual end index
295+
return data[i] == 0 && data[i - 1] == 0
296+
end
297+
289298
"""
290299
print([io::IO = stdout,] data::Vector, lidict::LineInfoDict; kwargs...)
291300
@@ -509,22 +518,17 @@ function fetch(;include_meta = false)
509518
else
510519
nblocks = 0
511520
for i = 2:length(data)
512-
if data[i] == 0 && in(data[i - 1], [1,2])
513-
# detect block ends and count them
514-
# linux 32 has been seen to have rogue ips equal to 0 so also check for the previous entry looking like an idle
515-
# state metadata entry which can only be 1 or 2
521+
if is_block_end(data, i) # detect block ends and count them
516522
nblocks += 1
517523
end
518524
end
519-
nmeta = 4 # number of metadata fields (threadid, taskid, cpu_cycle_clock, thread_sleeping)
520-
data_stripped = Vector{UInt}(undef, length(data) - (nblocks * nmeta))
525+
data_stripped = Vector{UInt}(undef, length(data) - (nblocks * (nmeta + 1)))
521526
j = length(data_stripped)
522527
i = length(data)
523528
while i > 0 && j > 0
524529
data_stripped[j] = data[i]
525-
if i > 1 && data[i] == 0 && in(data[i - 1], [1,2])
526-
# detect block end (same approach as above)
527-
i -= nmeta
530+
if is_block_end(data, i)
531+
i -= (nmeta + 1) # metadata fields and the extra NULL IP
528532
end
529533
i -= 1
530534
j -= 1
@@ -551,14 +555,14 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict,
551555
skip = false
552556
nsleeping = 0
553557
for i in startframe:-1:1
554-
startframe - 1 <= i <= startframe - 4 && continue # skip metadata (it's read ahead below)
558+
startframe - 1 <= i <= startframe - (nmeta + 1) && continue # skip metadata (it's read ahead below) and extra block-end NULL IP
555559
ip = data[i]
556-
if i > 1 && ip == 0 && in(data[i - 1], [1,2]) # check that the field next to the zero is the idle metadata entry
560+
if is_block_end(data, i)
557561
# read metadata
558-
thread_sleeping = data[i - 1] - 1 # subtract 1 as state is incremented to avoid being equal to 0
559-
# cpu_cycle_clock = data[i - 2]
560-
taskid = data[i - 3]
561-
threadid = data[i - 4]
562+
thread_sleeping = data[i - 2] - 1 # subtract 1 as state is incremented to avoid being equal to 0
563+
# cpu_cycle_clock = data[i - 3]
564+
taskid = data[i - 4]
565+
threadid = data[i - 5]
562566
if !in(threadid, threads) || !in(taskid, tasks)
563567
skip = true
564568
continue
@@ -799,14 +803,14 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI
799803
skip = false
800804
nsleeping = 0
801805
for i in startframe:-1:1
802-
startframe - 1 <= i <= startframe - 4 && continue # skip metadata (its read ahead below)
806+
startframe - 1 <= i <= startframe - (nmeta + 1) && continue # skip metadata (its read ahead below) and extra block end NULL IP
803807
ip = all[i]
804-
if i > 1 && ip == 0 && in(all[i - 1], [1,2]) # check that the field next to the zero is the idle metadata entry
808+
if is_block_end(all, i)
805809
# read metadata
806-
thread_sleeping = all[i - 1] - 1 # subtract 1 as state is incremented to avoid being equal to 0
807-
# cpu_cycle_clock = all[i - 2]
808-
taskid = all[i - 3]
809-
threadid = all[i - 4]
810+
thread_sleeping = all[i - 2] - 1 # subtract 1 as state is incremented to avoid being equal to 0
811+
# cpu_cycle_clock = all[i - 3]
812+
taskid = all[i - 4]
813+
threadid = all[i - 5]
810814
if !in(threadid, threads) || !in(taskid, tasks)
811815
skip = true
812816
continue

0 commit comments

Comments
 (0)