@@ -23,7 +23,7 @@ being used for this purpose alone.
23
23
module Timings
24
24
25
25
using Core. Compiler: - , + , :, > , Vector, length, first, empty!, push!, pop!, @inline ,
26
- @inbounds , copy, backtrace
26
+ @inbounds , copy, backtrace, IdDict, Task, Ref, get!
27
27
28
28
# What we record for any given frame we infer during type inference.
29
29
struct InferenceFrameInfo
94
94
# thread safe.
95
95
const _finished_timings = Timing[]
96
96
97
- # We keep a stack of the Timings for each of the MethodInstances currently being timed.
97
+ # We store a profiling stack for *each Task* as a task-local-storage variable, _timings.
98
+ # This is a stack of the Timings for each of the MethodInstances currently being timed.
98
99
# Since type inference currently operates via a depth-first search (during abstract
99
100
# evaluation), this vector operates like a call stack. The last node in _timings is the
100
101
# node currently being inferred, and its parent is directly before it, etc.
101
102
# Each Timing also contains its own vector for all of its children, so that the tree
102
103
# call structure through type inference is recorded. (It's recorded as a tree, not a graph,
103
104
# because we create a new node for duplicates.)
104
- const _timings = Timing[]
105
+ # You will see this accessed below as `task_local_storage(:_timings)`
106
+
107
+ # ------- Task Local Storage -------
108
+ # Reimplementation of Task Local Storage, since these functions aren't available yet
109
+ # at this stage of bootstrapping.
110
+ current_task () = ccall (:jl_get_current_task , Ref{Task}, ())
111
+ task_local_storage () = get_task_tls (current_task ())
112
+ function get_task_tls (t:: Task )
113
+ if t. storage === nothing
114
+ t. storage = IdDict ()
115
+ end
116
+ return (t. storage):: IdDict{Any,Any}
117
+ end
118
+ # -------
119
+
120
+ tls_timings () = get! (task_local_storage (), :_timings , Vector {Timing} ())
105
121
106
122
# ROOT() is an empty function used as the top-level Timing node to measure all time spent
107
123
# *not* in type inference during a given recording trace. It is used as a "dummy" node.
145
161
end
146
162
147
163
@inline function enter_new_timer (frame)
164
+ _timings = tls_timings ()
165
+
148
166
# Very first thing, stop the active timer: get the current time and add in the
149
167
# time since it was last started to its aggregate exclusive time.
150
168
if length (_timings) > 0
154
172
# Start the new timer right before returning
155
173
mi_info = _typeinf_identifier (frame)
156
174
push! (_timings, Timing (mi_info, UInt64 (0 )))
157
- len = length (_timings)
158
- new_timer = @inbounds _timings[len]
175
+ new_timer = @inbounds _timings[end ]
159
176
160
177
# Set the current time _after_ appending the node, to try to exclude the
161
178
# overhead from measurement.
170
187
# assert that indeed we are always returning to a parent after finishing all of its
171
188
# children (that is, asserting that inference proceeds via depth-first-search).
172
189
@inline function exit_current_timer (_expected_frame_)
190
+ _timings = tls_timings ()
191
+
173
192
# Finish the new timer
174
193
stop_time = _time_ns ()
175
194
0 commit comments