@@ -22,7 +22,7 @@ being used for this purpose alone.
22
22
"""
23
23
module Timings
24
24
25
- using Core. Compiler: - , + , :, Vector, length, first, empty!, push!, pop!, @inline ,
25
+ using Core. Compiler: - , + , :, > , Vector, length, first, empty!, push!, pop!, @inline ,
26
26
@inbounds , copy, backtrace
27
27
28
28
# What we record for any given frame we infer during type inference.
47
47
48
48
_typeinf_identifier (frame:: InferenceFrameInfo ) = frame
49
49
50
+ _typeinf_frame_linfo (frame:: Core.Compiler.InferenceState ) = frame. linfo
51
+ _typeinf_frame_linfo (frame:: InferenceFrameInfo ) = frame. mi
52
+
50
53
"""
51
- Core.Compiler.Timing(mi_info, start_time, ...)
54
+ Core.Compiler.Timings. Timing(mi_info, start_time, ...)
52
55
53
56
Internal type containing the timing result for running type inference on a single
54
57
MethodInstance.
55
58
"""
56
- struct Timing
59
+ mutable struct Timing
57
60
mi_info:: InferenceFrameInfo
58
61
start_time:: UInt64
59
62
cur_start_time:: UInt64
@@ -66,6 +69,31 @@ Timing(mi_info, start_time) = Timing(mi_info, start_time, start_time, UInt64(0),
66
69
67
70
_time_ns () = ccall (:jl_hrtime , UInt64, ()) # Re-implemented here because Base not yet available.
68
71
72
+ """
73
+ Core.Compiler.Timings.clear_and_fetch_timings()
74
+
75
+ Return, then clear, the previously recorded type inference timings.
76
+
77
+ This fetches a vector of all of the type inference timings that have _finished_ as of this call. Note
78
+ that there may be concurrent invocations of inference that are still running in another thread, but
79
+ which haven't yet been added to this buffer. Those can be fetched in a future call.
80
+ """
81
+ function clear_and_fetch_timings ()
82
+ # Pass in the type, since the C code doesn't know about our Timing struct.
83
+ ccall (:jl_typeinf_profiling_clear_and_fetch , Any, (Any, Any,),
84
+ _finished_timings, Vector{Timing}):: Vector{Timing}
85
+ end
86
+
87
+ function finish_timing_profile (timing:: Timing )
88
+ ccall (:jl_typeinf_profiling_push_timing , Cvoid, (Any, Any,), _finished_timings, timing)
89
+ end
90
+
91
+ # DO NOT ACCESS DIRECTLY. This vector should only be accessed through the
92
+ # functions above. It is a buffer that lives in the Julia module only to be *rooted*
93
+ # for GC, but all accesses to the vector must go through C code, in order to be
94
+ # thread safe.
95
+ const _finished_timings = Timing[]
96
+
69
97
# We keep a stack of the Timings for each of the MethodInstances currently being timed.
70
98
# Since type inference currently operates via a depth-first search (during abstract
71
99
# evaluation), this vector operates like a call stack. The last node in _timings is the
@@ -74,16 +102,21 @@ _time_ns() = ccall(:jl_hrtime, UInt64, ()) # Re-implemented here because Base n
74
102
# call structure through type inference is recorded. (It's recorded as a tree, not a graph,
75
103
# because we create a new node for duplicates.)
76
104
const _timings = Timing[]
105
+
77
106
# ROOT() is an empty function used as the top-level Timing node to measure all time spent
78
107
# *not* in type inference during a given recording trace. It is used as a "dummy" node.
79
108
function ROOT () end
80
109
const ROOTmi = Core. Compiler. specialize_method (
81
110
first (Core. Compiler. methods (ROOT)), Tuple{typeof (ROOT)}, Core. svec ())
111
+
82
112
"""
83
113
Core.Compiler.reset_timings()
84
114
85
115
Empty out the previously recorded type inference timings (`Core.Compiler._timings`), and
86
116
start the ROOT() timer again. `ROOT()` measures all time spent _outside_ inference.
117
+
118
+ !!! info
119
+ This function is deprecated as of Julia 1.9; use [`clear_and_fetch_timings`](@ref) instead.
87
120
"""
88
121
function reset_timings ()
89
122
empty! (_timings)
@@ -93,7 +126,6 @@ function reset_timings()
93
126
_time_ns ()))
94
127
return nothing
95
128
end
96
- reset_timings ()
97
129
98
130
# (This is split into a function so that it can be called both in this module, at the top
99
131
# of `enter_new_timer()`, and once at the Very End of the operation, by whoever started
@@ -105,44 +137,31 @@ reset_timings()
105
137
parent_timer = _timings[end ]
106
138
accum_time = stop_time - parent_timer. cur_start_time
107
139
108
- # Add in accum_time ("modify" the immutable struct)
140
+ # Add in accum_time
109
141
@inbounds begin
110
- _timings[end ] = Timing (
111
- parent_timer. mi_info,
112
- parent_timer. start_time,
113
- parent_timer. cur_start_time,
114
- parent_timer. time + accum_time,
115
- parent_timer. children,
116
- parent_timer. bt,
117
- )
142
+ _timings[end ]. time += accum_time
118
143
end
119
144
return nothing
120
145
end
121
146
122
147
@inline function enter_new_timer (frame)
123
148
# Very first thing, stop the active timer: get the current time and add in the
124
149
# time since it was last started to its aggregate exclusive time.
125
- close_current_timer ()
126
-
127
- mi_info = _typeinf_identifier (frame)
150
+ if length (_timings) > 0
151
+ close_current_timer ()
152
+ end
128
153
129
154
# Start the new timer right before returning
155
+ mi_info = _typeinf_identifier (frame)
130
156
push! (_timings, Timing (mi_info, UInt64 (0 )))
131
157
len = length (_timings)
132
158
new_timer = @inbounds _timings[len]
159
+
133
160
# Set the current time _after_ appending the node, to try to exclude the
134
161
# overhead from measurement.
135
162
start = _time_ns ()
136
-
137
- @inbounds begin
138
- _timings[len] = Timing (
139
- new_timer. mi_info,
140
- start,
141
- start,
142
- new_timer. time,
143
- new_timer. children,
144
- )
145
- end
163
+ new_timer. start_time = start
164
+ new_timer. cur_start_time = start
146
165
147
166
return nothing
148
167
end
@@ -154,43 +173,38 @@ end
154
173
# Finish the new timer
155
174
stop_time = _time_ns ()
156
175
157
- expected_mi_info = _typeinf_identifier (_expected_frame_)
176
+ expected_linfo = _typeinf_frame_linfo (_expected_frame_)
158
177
159
178
# Grab the new timer again because it might have been modified in _timings
160
179
# (since it's an immutable struct)
161
180
# And remove it from the current timings stack
162
181
new_timer = pop! (_timings)
163
- Core. Compiler. @assert new_timer. mi_info. mi === expected_mi_info . mi
182
+ Core. Compiler. @assert new_timer. mi_info. mi === expected_linfo
164
183
165
- # Prepare to unwind one level of the stack and record in the parent
166
- parent_timer = _timings[end ]
184
+ # check for two cases: normal case & backcompat case
185
+ is_profile_root_normal = length (_timings) === 0
186
+ is_profile_root_backcompat = length (_timings) === 1 && _timings[1 ] === ROOTmi
187
+ is_profile_root = is_profile_root_normal || is_profile_root_backcompat
167
188
168
189
accum_time = stop_time - new_timer. cur_start_time
169
190
# Add in accum_time ("modify" the immutable struct)
170
- new_timer = Timing (
171
- new_timer. mi_info,
172
- new_timer. start_time,
173
- new_timer. cur_start_time,
174
- new_timer. time + accum_time,
175
- new_timer. children,
176
- parent_timer. mi_info. mi === ROOTmi ? backtrace () : nothing ,
177
- )
178
- # Record the final timing with the original parent timer
179
- push! (parent_timer. children, new_timer)
180
-
181
- # And finally restart the parent timer:
182
- len = length (_timings)
183
- @inbounds begin
184
- _timings[len] = Timing (
185
- parent_timer. mi_info,
186
- parent_timer. start_time,
187
- _time_ns (),
188
- parent_timer. time,
189
- parent_timer. children,
190
- parent_timer. bt,
191
- )
191
+ new_timer. time += accum_time
192
+ if is_profile_root
193
+ new_timer. bt = backtrace ()
192
194
end
193
195
196
+ # Prepare to unwind one level of the stack and record in the parent
197
+ if is_profile_root
198
+ finish_timing_profile (new_timer)
199
+ else
200
+ parent_timer = _timings[end ]
201
+
202
+ # Record the final timing with the original parent timer
203
+ push! (parent_timer. children, new_timer)
204
+
205
+ # And finally restart the parent timer:
206
+ parent_timer. cur_start_time = _time_ns ()
207
+ end
194
208
return nothing
195
209
end
196
210
0 commit comments