@@ -44,10 +44,47 @@ def device_dt(self):
44
44
return self ._device_dt
45
45
46
46
47
+ class BaseDeviceTimer :
48
+ __slots__ = ["queue" ]
49
+
50
+ def __init__ (self , sycl_queue ):
51
+ if not isinstance (sycl_queue , SyclQueue ):
52
+ raise TypeError (f"Expected type SyclQueue, got { type (sycl_queue )} " )
53
+ self .queue = sycl_queue
54
+
55
+
56
+ class QueueBarrierDeviceTimer (BaseDeviceTimer ):
57
+ __slots__ = []
58
+
59
+ def __init__ (self , sycl_queue ):
60
+ super (QueueBarrierDeviceTimer , self ).__init__ (sycl_queue )
61
+
62
+ def get_event (self ):
63
+ return self .queue .submit_barrier ()
64
+
65
+
66
+ class OrderManagerDeviceTimer (BaseDeviceTimer ):
67
+ __slots__ = ["_order_manager" , "_submit_empty_task_fn" ]
68
+
69
+ def __init__ (self , sycl_queue ):
70
+ import dpctl .utils ._seq_order_keeper as s_ok
71
+ from dpctl .utils import SequentialOrderManager as seq_om
72
+
73
+ super (OrderManagerDeviceTimer , self ).__init__ (sycl_queue )
74
+ self ._order_manager = seq_om [self .queue ]
75
+ self ._submit_empty_task_fn = s_ok ._submit_empty_task
76
+
77
+ def get_event (self ):
78
+ ev = self ._submit_empty_task_fn (
79
+ sycl_queue = self .queue , depends = self ._order_manager .submitted_events
80
+ )
81
+ self ._order_manager .add_event_pair (ev , ev )
82
+ return ev
83
+
84
+
47
85
class SyclTimer :
48
86
"""
49
- Context to measure device time and host wall-time of execution
50
- of commands submitted to :class:`dpctl.SyclQueue`.
87
+ Context to time execution of tasks submitted to :class:`dpctl.SyclQueue`.
51
88
52
89
:Example:
53
90
.. code-block:: python
@@ -58,40 +95,81 @@ class SyclTimer:
58
95
q = dpctl.SyclQueue(property="enable_profiling")
59
96
60
97
# create the timer
61
- milliseconds_sc = 1e-3
98
+ milliseconds_sc = 1e3
62
99
timer = dpctl.SyclTimer(time_scale = milliseconds_sc)
63
100
101
+ untimed_code_block_1
64
102
# use the timer
65
103
with timer(queue=q):
66
- code_block1
104
+ timed_code_block1
105
+
106
+ untimed_code_block_2
67
107
68
108
# use the timer
69
109
with timer(queue=q):
70
- code_block2
110
+ timed_code_block2
111
+
112
+ untimed_code_block_3
71
113
72
114
# retrieve elapsed times in milliseconds
73
115
wall_dt, device_dt = timer.dt
74
116
75
117
.. note::
76
- The timer submits barriers to the queue at the entrance and the
118
+ The timer submits tasks to the queue at the entrance and the
77
119
exit of the context and uses profiling information from events
78
120
associated with these submissions to perform the timing. Thus
79
121
:class:`dpctl.SyclTimer` requires the queue with ``"enable_profiling"``
80
122
property. In order to be able to collect the profiling information,
81
- the ``dt`` property ensures that both submitted barriers complete their
82
- execution and thus effectively synchronizes the queue.
123
+ the ``dt`` property ensures that both tasks submitted by the timer
124
+ complete their execution and thus effectively synchronizes the queue.
125
+
126
+ Execution of the above example results in the following task graph,
127
+ where each group of tasks is ordered after the one preceding it,
128
+ ``[tasks_of_untimed_block1]``, ``[timer_fence_start_task]``,
129
+ ``[tasks_of_timed_block1]``, ``[timer_fence_finish_task]``,
130
+ ``[tasks_of_untimed_block2]``, ``[timer_fence_start_task]``,
131
+ ``[tasks_of_timed_block2]``, ``[timer_fence_finish_task]``,
132
+ ``[tasks_of_untimed_block3]``.
133
+
134
+ ``device_timer`` keyword argument controls the type of tasks submitted.
135
+ With ``"queue_barrier"`` value, queue barrier tasks are used. With
136
+ ``"order_manager"`` value, a single empty body task is inserted
137
+ and order manager (used by all `dpctl.tensor` operations) is used to
138
+ order these tasks so that they fence operations performed within
139
+ timer's context.
140
+
141
+ Timing offloading operations that do not use the order manager with
142
+ the timer that uses ``"order_manager"`` as ``device_timer`` value
143
+ will be misleading becaused the tasks submitted by the timer will not
144
+ be ordered with respect to tasks we intend to time.
145
+
146
+ Note, that host timer effectively measures the time of task
147
+ submissions. To measure host timer wall-time that includes execution
148
+ of submitted tasks, make sure to include synchronization point in
149
+ the timed block.
150
+
151
+ :Example:
152
+ .. code-block:: python
153
+
154
+ with timer(q):
155
+ timed_block
156
+ q.wait()
83
157
84
158
Args:
85
159
host_timer (callable, optional):
86
160
A callable such that host_timer() returns current
87
161
host time in seconds.
88
162
Default: :py:func:`timeit.default_timer`.
163
+ device_timer (Literal["queue_barrier", "order_manager"], optional):
164
+ Device timing method. Default: "queue_barrier".
89
165
time_scale (Union[int, float], optional):
90
- Ratio of the unit of time of interest and one second .
166
+ Ratio of one second and the unit of time-scale of interest.
91
167
Default: ``1``.
92
168
"""
93
169
94
- def __init__ (self , host_timer = timeit .default_timer , time_scale = 1 ):
170
+ def __init__ (
171
+ self , host_timer = timeit .default_timer , device_timer = None , time_scale = 1
172
+ ):
95
173
"""
96
174
Create new instance of :class:`.SyclTimer`.
97
175
@@ -100,6 +178,8 @@ def __init__(self, host_timer=timeit.default_timer, time_scale=1):
100
178
A function that takes no arguments and returns a value
101
179
measuring time.
102
180
Default: :meth:`timeit.default_timer`.
181
+ device_timer (Literal["queue_barrier", "order_manager"], optional):
182
+ Device timing method. Default: "queue_barrier"
103
183
time_scale (Union[int, float], optional):
104
184
Scaling factor applied to durations measured by
105
185
the host_timer. Default: ``1``.
@@ -109,11 +189,26 @@ def __init__(self, host_timer=timeit.default_timer, time_scale=1):
109
189
self .queue = None
110
190
self .host_times = []
111
191
self .bracketing_events = []
192
+ self ._context_data = list ()
193
+ if device_timer is None :
194
+ device_timer = "queue_barrier"
195
+ if device_timer == "queue_barrier" :
196
+ self ._device_timer_class = QueueBarrierDeviceTimer
197
+ elif device_timer == "order_manager" :
198
+ self ._device_timer_class = OrderManagerDeviceTimer
199
+ else :
200
+ raise ValueError (
201
+ "Supported values for device_timer keyword are "
202
+ "'queue_barrier', 'order_manager', got "
203
+ f"'{ device_timer } '"
204
+ )
205
+ self ._device_timer = None
112
206
113
207
def __call__ (self , queue = None ):
114
208
if isinstance (queue , SyclQueue ):
115
209
if queue .has_enable_profiling :
116
210
self .queue = queue
211
+ self ._device_timer = self ._device_timer_class (queue )
117
212
else :
118
213
raise ValueError (
119
214
"The given queue was not created with the "
@@ -127,17 +222,17 @@ def __call__(self, queue=None):
127
222
return self
128
223
129
224
def __enter__ (self ):
130
- self ._event_start = self .queue .submit_barrier ()
131
- self ._host_start = self .timer ()
225
+ _event_start = self ._device_timer .get_event ()
226
+ _host_start = self .timer ()
227
+ self ._context_data .append ((_event_start , _host_start ))
132
228
return self
133
229
134
230
def __exit__ (self , * args ):
135
- self .host_times .append ((self ._host_start , self .timer ()))
136
- self .bracketing_events .append (
137
- (self ._event_start , self .queue .submit_barrier ())
138
- )
139
- del self ._event_start
140
- del self ._host_start
231
+ _event_end = self ._device_timer .get_event ()
232
+ _host_end = self .timer ()
233
+ _event_start , _host_start = self ._context_data .pop ()
234
+ self .host_times .append ((_host_start , _host_end ))
235
+ self .bracketing_events .append ((_event_start , _event_end ))
141
236
142
237
@property
143
238
def dt (self ):
0 commit comments