tracy_client/gpu.rs
1use std::{
2 convert::TryInto,
3 sync::{Arc, Mutex},
4};
5
6use crate::{Client, SpanLocation};
7
8#[repr(u8)]
9/// The API label associated with the given gpu context. The list here only includes
10/// APIs that are currently supported by Tracy's own gpu implementations.
11//
12// Copied from `tracy-client-sys/tracy/common/TracyQueue.hpp:391`. Comment on enum states
13// that the values are stable, due to potential serialization issues, so copying this enum
14// shouldn't be a problem.
15pub enum GpuContextType {
16 /// Stand in for other types of contexts.
17 Invalid = 0,
18 /// An OpenGL context
19 OpenGL = 1,
20 /// A Vulkan context
21 Vulkan = 2,
22 /// An OpenCL context
23 OpenCL = 3,
24 /// A D3D12 context.
25 Direct3D12 = 4,
26 /// A D3D11 context.
27 Direct3D11 = 5,
28}
29
30/// Context for creating gpu spans.
31///
32/// Generally corresponds to a single hardware queue.
33///
34/// The flow of creating and using gpu context generally looks like this:
35///
36/// ```rust,no_run
37/// # let client = tracy_client::Client::start();
38/// // The period of the gpu clock in nanoseconds, as provided by your GPU api.
39/// // This value corresponds to 1GHz.
40/// let period: f32 = 1_000_000_000.0;
41///
42/// // GPU API: Record writing a timestamp and resolve that to a mappable buffer.
43/// // GPU API: Submit the command buffer writing the timestamp.
44/// // GPU API: Immediately block until the submission is finished.
45/// // GPU API: Map buffer, get timestamp value.
46/// let starting_timestamp: i64 = /* whatever you get from this timestamp */ 0;
47///
48/// // Create the gpu context
49/// let gpu_context = client.new_gpu_context(
50/// Some("MyContext"),
51/// tracy_client::GpuContextType::Vulkan,
52/// starting_timestamp,
53/// period
54/// ).unwrap();
55///
56/// // Now you have some work that you want to time on the gpu.
57///
58/// // GPU API: Record writing a timestamp before the work.
59/// let mut span = gpu_context.span_alloc("MyGpuSpan1", "My::Work", "myfile.rs", 12).unwrap();
60///
61/// // GPU API: Record work.
62///
63/// // GPU API: Record writing a timestamp after the work.
64/// span.end_zone();
65///
66/// // Some time later, once the written timestamp values are available on the cpu.
67/// # let (starting_timestamp, ending_timestamp) = (0, 0);
68///
69/// // Consumes span.
70/// span.upload_timestamp(starting_timestamp, ending_timestamp);
71/// ```
72#[derive(Clone)]
73pub struct GpuContext {
74 #[cfg(feature = "enable")]
75 _client: Client,
76 #[cfg(feature = "enable")]
77 value: u8,
78 #[cfg(feature = "enable")]
79 gpu_start_timestamp: i64,
80 #[cfg(feature = "enable")]
81 span_freelist: Arc<Mutex<Vec<u16>>>,
82 _private: (),
83}
84#[cfg(feature = "enable")]
85static GPU_CONTEXT_INDEX: Mutex<u8> = Mutex::new(0);
86
87/// Errors that can occur when creating a gpu context.
88#[derive(Debug)]
89pub enum GpuContextCreationError {
90 /// More than `u8::MAX` contexts have been created at any point in the program.
91 TooManyContextsCreated,
92}
93
94impl std::fmt::Display for GpuContextCreationError {
95 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
96 write!(
97 f,
98 "More than 255 contexts have been created at any point in the execution of this program."
99 )
100 }
101}
102
103impl std::error::Error for GpuContextCreationError {}
104
105#[derive(Debug, PartialEq)]
106enum GpuSpanState {
107 /// The span has been started. All gpu spans start in this state.
108 Started,
109 /// The span has been ended, waiting for timestamp upload.
110 Ended,
111 /// All timestamps have been uploaded.
112 Uploaded,
113}
114
115/// Span for timing gpu work.
116///
117/// See the [context level documentation](GpuContext) for more information on use.
118///
119/// If the span is dropped early, the following happens:
120/// - If the span has not been ended, the span is ended. AND
121/// - If the span has not had values uploaded, the span is uploaded with
122/// the timestamps marking the start of the current gpu context. This
123/// will put the span out of the way of other spans.
124#[must_use]
125pub struct GpuSpan {
126 #[cfg(feature = "enable")]
127 context: GpuContext,
128 #[cfg(feature = "enable")]
129 start_query_id: u16,
130 #[cfg(feature = "enable")]
131 end_query_id: u16,
132 #[cfg(feature = "enable")]
133 state: GpuSpanState,
134 _private: (),
135}
136
137/// Errors that can occur when creating a gpu span.
138#[derive(Debug)]
139pub enum GpuSpanCreationError {
140 /// More than `32767` spans are still waiting for gpu data.
141 TooManyPendingSpans,
142}
143
144impl std::fmt::Display for GpuSpanCreationError {
145 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
146 write!(
147 f,
148 "Too many spans still waiting for gpu data. There may not be more than 32767 spans that are pending gpu data at once."
149 )
150 }
151}
152
153impl std::error::Error for GpuSpanCreationError {}
154
155impl Client {
156 /// Creates a new GPU context.
157 ///
158 /// - `name` is the name of the context.
159 /// - `ty` is the type (backend) of the context.
160 /// - `gpu_timestamp` is the gpu side timestamp the corresponds (as close as possible) to this call.
161 /// - `period` is the period of the gpu clock in nanoseconds (setting 1.0 means the clock is 1GHz, 1000.0 means 1MHz, etc).
162 ///
163 /// See the [type level documentation](GpuContext) for more information.
164 ///
165 /// # Errors
166 ///
167 /// - If more than 255 contexts were made during the lifetime of the application.
168 pub fn new_gpu_context(
169 self,
170 name: Option<&str>,
171 ty: GpuContextType,
172 gpu_timestamp: i64,
173 period: f32,
174 ) -> Result<GpuContext, GpuContextCreationError> {
175 #[cfg(feature = "enable")]
176 {
177 // We use a mutex to lock the context index to prevent races when using fetch_add.
178 //
179 // This prevents multiple contexts getting the same context id.
180 let mut context_index_guard = GPU_CONTEXT_INDEX.lock().unwrap();
181 if *context_index_guard == 255 {
182 return Err(GpuContextCreationError::TooManyContextsCreated);
183 }
184 let context = *context_index_guard;
185 *context_index_guard += 1;
186 drop(context_index_guard);
187
188 // SAFETY:
189 // - We know we aren't re-using the context id because of the above logic.
190 unsafe {
191 sys::___tracy_emit_gpu_new_context_serial(sys::___tracy_gpu_new_context_data {
192 gpuTime: gpu_timestamp,
193 period,
194 context,
195 flags: 0,
196 type_: ty as u8,
197 });
198 };
199
200 if let Some(name) = name {
201 // SAFETY:
202 // - We've allocated a context.
203 // - The names will copied into the command stream, so the pointers do not need to last.
204 unsafe {
205 sys::___tracy_emit_gpu_context_name_serial(
206 sys::___tracy_gpu_context_name_data {
207 context,
208 name: name.as_ptr().cast(),
209 len: name.len().try_into().unwrap_or(u16::MAX),
210 },
211 );
212 }
213 }
214
215 Ok(GpuContext {
216 _client: self,
217 value: context,
218 gpu_start_timestamp: gpu_timestamp,
219 span_freelist: Arc::new(Mutex::new((0..=u16::MAX).collect())),
220 _private: (),
221 })
222 }
223 #[cfg(not(feature = "enable"))]
224 Ok(GpuContext { _private: () })
225 }
226}
227
228impl GpuContext {
229 #[cfg(feature = "enable")]
230 fn alloc_span_ids(&self) -> Result<(u16, u16), GpuSpanCreationError> {
231 let mut freelist = self.span_freelist.lock().unwrap();
232 if freelist.len() < 2 {
233 return Err(GpuSpanCreationError::TooManyPendingSpans);
234 }
235 // These unwraps are unreachable.
236 let start = freelist.pop().unwrap();
237 let end = freelist.pop().unwrap();
238 Ok((start, end))
239 }
240
241 /// Creates a new gpu span with the given source location.
242 ///
243 /// This should be called right next to where you record the corresponding gpu timestamp. This
244 /// allows tracy to correctly associate the cpu time with the gpu timestamp.
245 ///
246 /// # Errors
247 ///
248 /// - If there are more than 32767 spans waiting for gpu data at once.
249 pub fn span(
250 &self,
251 span_location: &'static SpanLocation,
252 ) -> Result<GpuSpan, GpuSpanCreationError> {
253 #[cfg(feature = "enable")]
254 {
255 let (start_query_id, end_query_id) = self.alloc_span_ids()?;
256
257 // SAFETY: We know that the span location is valid forever as it is 'static. `usize` will
258 // always be smaller than u64, so no data will be lost.
259 unsafe {
260 sys::___tracy_emit_gpu_zone_begin_serial(sys::___tracy_gpu_zone_begin_data {
261 srcloc: std::ptr::addr_of!(span_location.data) as usize as u64,
262 queryId: start_query_id,
263 context: self.value,
264 });
265 };
266
267 Ok(GpuSpan {
268 context: self.clone(),
269 start_query_id,
270 end_query_id,
271 state: GpuSpanState::Started,
272 _private: (),
273 })
274 }
275 #[cfg(not(feature = "enable"))]
276 Ok(GpuSpan { _private: () })
277 }
278
279 /// Creates a new gpu span with the given name, function, file, and line.
280 ///
281 /// This should be called right next to where you record the corresponding gpu timestamp. This
282 /// allows tracy to correctly associate the cpu time with the gpu timestamp.
283 ///
284 /// # Errors
285 ///
286 /// - If there are more than 32767 spans waiting for gpu data at once.
287 pub fn span_alloc(
288 &self,
289 name: &str,
290 function: &str,
291 file: &str,
292 line: u32,
293 ) -> Result<GpuSpan, GpuSpanCreationError> {
294 #[cfg(feature = "enable")]
295 {
296 let srcloc = unsafe {
297 sys::___tracy_alloc_srcloc_name(
298 line,
299 file.as_ptr().cast(),
300 file.len(),
301 function.as_ptr().cast(),
302 function.len(),
303 name.as_ptr().cast(),
304 name.len(),
305 0,
306 )
307 };
308
309 let (start_query_id, end_query_id) = self.alloc_span_ids()?;
310
311 unsafe {
312 sys::___tracy_emit_gpu_zone_begin_alloc_serial(sys::___tracy_gpu_zone_begin_data {
313 srcloc,
314 queryId: start_query_id,
315 context: self.value,
316 });
317 };
318
319 Ok(GpuSpan {
320 context: self.clone(),
321 start_query_id,
322 end_query_id,
323 state: GpuSpanState::Started,
324 _private: (),
325 })
326 }
327 #[cfg(not(feature = "enable"))]
328 Ok(GpuSpan { _private: () })
329 }
330}
331
332impl GpuSpan {
333 /// Marks the end of the given gpu span. This should be called right next to where you record
334 /// the corresponding gpu timestamp for the end of the span. This allows tracy to correctly
335 /// associate the cpu time with the gpu timestamp.
336 ///
337 /// Only the first time you call this function will it actually emit a gpu zone end event. Any
338 /// subsequent calls will be ignored.
339 pub fn end_zone(&mut self) {
340 #[cfg(feature = "enable")]
341 {
342 if self.state != GpuSpanState::Started {
343 return;
344 }
345 unsafe {
346 sys::___tracy_emit_gpu_zone_end_serial(sys::___tracy_gpu_zone_end_data {
347 queryId: self.end_query_id,
348 context: self.context.value,
349 });
350 };
351 self.state = GpuSpanState::Ended;
352 }
353 }
354
355 /// Uploads the gpu timestamps associated with the span start and end to tracy,
356 /// closing out the span.
357 pub fn upload_timestamp(mut self, start_timestamp: i64, end_timestamp: i64) {
358 #[cfg(feature = "enable")]
359 self.upload_timestamp_impl(start_timestamp, end_timestamp);
360 }
361
362 #[cfg(feature = "enable")]
363 fn upload_timestamp_impl(&mut self, start_timestamp: i64, end_timestamp: i64) {
364 assert_eq!(
365 self.state,
366 GpuSpanState::Ended,
367 "You must call end_zone before uploading timestamps."
368 );
369 unsafe {
370 sys::___tracy_emit_gpu_time_serial(sys::___tracy_gpu_time_data {
371 gpuTime: start_timestamp,
372 queryId: self.start_query_id,
373 context: self.context.value,
374 });
375 };
376
377 unsafe {
378 sys::___tracy_emit_gpu_time_serial(sys::___tracy_gpu_time_data {
379 gpuTime: end_timestamp,
380 queryId: self.end_query_id,
381 context: self.context.value,
382 });
383 };
384
385 // Put the ids back into the freelist.
386 let mut freelist = self.context.span_freelist.lock().unwrap();
387 freelist.push(self.start_query_id);
388 freelist.push(self.end_query_id);
389 drop(freelist);
390
391 self.state = GpuSpanState::Uploaded;
392 }
393}
394
395impl Drop for GpuSpan {
396 fn drop(&mut self) {
397 #[cfg(feature = "enable")]
398 match self.state {
399 GpuSpanState::Started => {
400 self.end_zone();
401 self.upload_timestamp_impl(
402 self.context.gpu_start_timestamp,
403 self.context.gpu_start_timestamp,
404 );
405 }
406 GpuSpanState::Ended => {
407 self.upload_timestamp_impl(
408 self.context.gpu_start_timestamp,
409 self.context.gpu_start_timestamp,
410 );
411 }
412 GpuSpanState::Uploaded => {}
413 }
414 }
415}