tracy_client/
gpu.rs

1use std::{
2    convert::TryInto,
3    sync::{Arc, Mutex},
4};
5
6use crate::{Client, SpanLocation};
7
8#[repr(u8)]
9/// The API label associated with the given gpu context. The list here only includes
10/// APIs that are currently supported by Tracy's own gpu implementations.
11//
12// Copied from `tracy-client-sys/tracy/common/TracyQueue.hpp:391`. Comment on enum states
13// that the values are stable, due to potential serialization issues, so copying this enum
14// shouldn't be a problem.
15pub enum GpuContextType {
16    /// Stand in for other types of contexts.
17    Invalid = 0,
18    /// An OpenGL context
19    OpenGL = 1,
20    /// A Vulkan context
21    Vulkan = 2,
22    /// An OpenCL context
23    OpenCL = 3,
24    /// A D3D12 context.
25    Direct3D12 = 4,
26    /// A D3D11 context.
27    Direct3D11 = 5,
28}
29
30/// Context for creating gpu spans.
31///
32/// Generally corresponds to a single hardware queue.
33///
34/// The flow of creating and using gpu context generally looks like this:
35///
36/// ```rust,no_run
37/// # let client = tracy_client::Client::start();
38/// // The period of the gpu clock in nanoseconds, as provided by your GPU api.
39/// // This value corresponds to 1GHz.
40/// let period: f32 = 1_000_000_000.0;
41///
42/// // GPU API: Record writing a timestamp and resolve that to a mappable buffer.
43/// // GPU API: Submit the command buffer writing the timestamp.
44/// // GPU API: Immediately block until the submission is finished.
45/// // GPU API: Map buffer, get timestamp value.
46/// let starting_timestamp: i64 = /* whatever you get from this timestamp */ 0;
47///
48/// // Create the gpu context
49/// let gpu_context = client.new_gpu_context(
50///     Some("MyContext"),
51///     tracy_client::GpuContextType::Vulkan,
52///     starting_timestamp,
53///     period
54/// ).unwrap();
55///
56/// // Now you have some work that you want to time on the gpu.
57///
58/// // GPU API: Record writing a timestamp before the work.
59/// let mut span = gpu_context.span_alloc("MyGpuSpan1", "My::Work", "myfile.rs", 12).unwrap();
60///
61/// // GPU API: Record work.
62///
63/// // GPU API: Record writing a timestamp after the work.
64/// span.end_zone();
65///
66/// // Some time later, once the written timestamp values are available on the cpu.
67/// # let (starting_timestamp, ending_timestamp) = (0, 0);
68///
69/// // Consumes span.
70/// span.upload_timestamp(starting_timestamp, ending_timestamp);
71/// ```
72#[derive(Clone)]
73pub struct GpuContext {
74    #[cfg(feature = "enable")]
75    _client: Client,
76    #[cfg(feature = "enable")]
77    value: u8,
78    #[cfg(feature = "enable")]
79    gpu_start_timestamp: i64,
80    #[cfg(feature = "enable")]
81    span_freelist: Arc<Mutex<Vec<u16>>>,
82    _private: (),
83}
84#[cfg(feature = "enable")]
85static GPU_CONTEXT_INDEX: Mutex<u8> = Mutex::new(0);
86
87/// Errors that can occur when creating a gpu context.
88#[derive(Debug)]
89pub enum GpuContextCreationError {
90    /// More than `u8::MAX` contexts have been created at any point in the program.
91    TooManyContextsCreated,
92}
93
94impl std::fmt::Display for GpuContextCreationError {
95    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
96        write!(
97            f,
98            "More than 255 contexts have been created at any point in the execution of this program."
99        )
100    }
101}
102
103impl std::error::Error for GpuContextCreationError {}
104
105#[derive(Debug, PartialEq)]
106enum GpuSpanState {
107    /// The span has been started. All gpu spans start in this state.
108    Started,
109    /// The span has been ended, waiting for timestamp upload.
110    Ended,
111    /// All timestamps have been uploaded.
112    Uploaded,
113}
114
115/// Span for timing gpu work.
116///
117/// See the [context level documentation](GpuContext) for more information on use.
118///
119/// If the span is dropped early, the following happens:
120/// - If the span has not been ended, the span is ended. AND
121/// - If the span has not had values uploaded, the span is uploaded with
122///   the timestamps marking the start of the current gpu context. This
123///   will put the span out of the way of other spans.
124#[must_use]
125pub struct GpuSpan {
126    #[cfg(feature = "enable")]
127    context: GpuContext,
128    #[cfg(feature = "enable")]
129    start_query_id: u16,
130    #[cfg(feature = "enable")]
131    end_query_id: u16,
132    #[cfg(feature = "enable")]
133    state: GpuSpanState,
134    _private: (),
135}
136
137/// Errors that can occur when creating a gpu span.
138#[derive(Debug)]
139pub enum GpuSpanCreationError {
140    /// More than `32767` spans are still waiting for gpu data.
141    TooManyPendingSpans,
142}
143
144impl std::fmt::Display for GpuSpanCreationError {
145    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
146        write!(
147            f,
148            "Too many spans still waiting for gpu data. There may not be more than 32767 spans that are pending gpu data at once."
149        )
150    }
151}
152
153impl std::error::Error for GpuSpanCreationError {}
154
155impl Client {
156    /// Creates a new GPU context.
157    ///
158    /// - `name` is the name of the context.
159    /// - `ty` is the type (backend) of the context.
160    /// - `gpu_timestamp` is the gpu side timestamp the corresponds (as close as possible) to this call.
161    /// - `period` is the period of the gpu clock in nanoseconds (setting 1.0 means the clock is 1GHz, 1000.0 means 1MHz, etc).
162    ///
163    /// See the [type level documentation](GpuContext) for more information.
164    ///
165    /// # Errors
166    ///
167    /// - If more than 255 contexts were made during the lifetime of the application.
168    pub fn new_gpu_context(
169        self,
170        name: Option<&str>,
171        ty: GpuContextType,
172        gpu_timestamp: i64,
173        period: f32,
174    ) -> Result<GpuContext, GpuContextCreationError> {
175        #[cfg(feature = "enable")]
176        {
177            // We use a mutex to lock the context index to prevent races when using fetch_add.
178            //
179            // This prevents multiple contexts getting the same context id.
180            let mut context_index_guard = GPU_CONTEXT_INDEX.lock().unwrap();
181            if *context_index_guard == 255 {
182                return Err(GpuContextCreationError::TooManyContextsCreated);
183            }
184            let context = *context_index_guard;
185            *context_index_guard += 1;
186            drop(context_index_guard);
187
188            // SAFETY:
189            // - We know we aren't re-using the context id because of the above logic.
190            unsafe {
191                sys::___tracy_emit_gpu_new_context_serial(sys::___tracy_gpu_new_context_data {
192                    gpuTime: gpu_timestamp,
193                    period,
194                    context,
195                    flags: 0,
196                    type_: ty as u8,
197                });
198            };
199
200            if let Some(name) = name {
201                // SAFETY:
202                // - We've allocated a context.
203                // - The names will copied into the command stream, so the pointers do not need to last.
204                unsafe {
205                    sys::___tracy_emit_gpu_context_name_serial(
206                        sys::___tracy_gpu_context_name_data {
207                            context,
208                            name: name.as_ptr().cast(),
209                            len: name.len().try_into().unwrap_or(u16::MAX),
210                        },
211                    );
212                }
213            }
214
215            Ok(GpuContext {
216                _client: self,
217                value: context,
218                gpu_start_timestamp: gpu_timestamp,
219                span_freelist: Arc::new(Mutex::new((0..=u16::MAX).collect())),
220                _private: (),
221            })
222        }
223        #[cfg(not(feature = "enable"))]
224        Ok(GpuContext { _private: () })
225    }
226}
227
228impl GpuContext {
229    #[cfg(feature = "enable")]
230    fn alloc_span_ids(&self) -> Result<(u16, u16), GpuSpanCreationError> {
231        let mut freelist = self.span_freelist.lock().unwrap();
232        if freelist.len() < 2 {
233            return Err(GpuSpanCreationError::TooManyPendingSpans);
234        }
235        // These unwraps are unreachable.
236        let start = freelist.pop().unwrap();
237        let end = freelist.pop().unwrap();
238        Ok((start, end))
239    }
240
241    /// Creates a new gpu span with the given source location.
242    ///
243    /// This should be called right next to where you record the corresponding gpu timestamp. This
244    /// allows tracy to correctly associate the cpu time with the gpu timestamp.
245    ///
246    /// # Errors
247    ///
248    /// - If there are more than 32767 spans waiting for gpu data at once.
249    pub fn span(
250        &self,
251        span_location: &'static SpanLocation,
252    ) -> Result<GpuSpan, GpuSpanCreationError> {
253        #[cfg(feature = "enable")]
254        {
255            let (start_query_id, end_query_id) = self.alloc_span_ids()?;
256
257            // SAFETY: We know that the span location is valid forever as it is 'static. `usize` will
258            // always be smaller than u64, so no data will be lost.
259            unsafe {
260                sys::___tracy_emit_gpu_zone_begin_serial(sys::___tracy_gpu_zone_begin_data {
261                    srcloc: std::ptr::addr_of!(span_location.data) as usize as u64,
262                    queryId: start_query_id,
263                    context: self.value,
264                });
265            };
266
267            Ok(GpuSpan {
268                context: self.clone(),
269                start_query_id,
270                end_query_id,
271                state: GpuSpanState::Started,
272                _private: (),
273            })
274        }
275        #[cfg(not(feature = "enable"))]
276        Ok(GpuSpan { _private: () })
277    }
278
279    /// Creates a new gpu span with the given name, function, file, and line.
280    ///
281    /// This should be called right next to where you record the corresponding gpu timestamp. This
282    /// allows tracy to correctly associate the cpu time with the gpu timestamp.
283    ///
284    /// # Errors
285    ///
286    /// - If there are more than 32767 spans waiting for gpu data at once.
287    pub fn span_alloc(
288        &self,
289        name: &str,
290        function: &str,
291        file: &str,
292        line: u32,
293    ) -> Result<GpuSpan, GpuSpanCreationError> {
294        #[cfg(feature = "enable")]
295        {
296            let srcloc = unsafe {
297                sys::___tracy_alloc_srcloc_name(
298                    line,
299                    file.as_ptr().cast(),
300                    file.len(),
301                    function.as_ptr().cast(),
302                    function.len(),
303                    name.as_ptr().cast(),
304                    name.len(),
305                    0,
306                )
307            };
308
309            let (start_query_id, end_query_id) = self.alloc_span_ids()?;
310
311            unsafe {
312                sys::___tracy_emit_gpu_zone_begin_alloc_serial(sys::___tracy_gpu_zone_begin_data {
313                    srcloc,
314                    queryId: start_query_id,
315                    context: self.value,
316                });
317            };
318
319            Ok(GpuSpan {
320                context: self.clone(),
321                start_query_id,
322                end_query_id,
323                state: GpuSpanState::Started,
324                _private: (),
325            })
326        }
327        #[cfg(not(feature = "enable"))]
328        Ok(GpuSpan { _private: () })
329    }
330}
331
332impl GpuSpan {
333    /// Marks the end of the given gpu span. This should be called right next to where you record
334    /// the corresponding gpu timestamp for the end of the span. This allows tracy to correctly
335    /// associate the cpu time with the gpu timestamp.
336    ///
337    /// Only the first time you call this function will it actually emit a gpu zone end event. Any
338    /// subsequent calls will be ignored.
339    pub fn end_zone(&mut self) {
340        #[cfg(feature = "enable")]
341        {
342            if self.state != GpuSpanState::Started {
343                return;
344            }
345            unsafe {
346                sys::___tracy_emit_gpu_zone_end_serial(sys::___tracy_gpu_zone_end_data {
347                    queryId: self.end_query_id,
348                    context: self.context.value,
349                });
350            };
351            self.state = GpuSpanState::Ended;
352        }
353    }
354
355    /// Uploads the gpu timestamps associated with the span start and end to tracy,
356    /// closing out the span.
357    pub fn upload_timestamp(mut self, start_timestamp: i64, end_timestamp: i64) {
358        #[cfg(feature = "enable")]
359        self.upload_timestamp_impl(start_timestamp, end_timestamp);
360    }
361
362    #[cfg(feature = "enable")]
363    fn upload_timestamp_impl(&mut self, start_timestamp: i64, end_timestamp: i64) {
364        assert_eq!(
365            self.state,
366            GpuSpanState::Ended,
367            "You must call end_zone before uploading timestamps."
368        );
369        unsafe {
370            sys::___tracy_emit_gpu_time_serial(sys::___tracy_gpu_time_data {
371                gpuTime: start_timestamp,
372                queryId: self.start_query_id,
373                context: self.context.value,
374            });
375        };
376
377        unsafe {
378            sys::___tracy_emit_gpu_time_serial(sys::___tracy_gpu_time_data {
379                gpuTime: end_timestamp,
380                queryId: self.end_query_id,
381                context: self.context.value,
382            });
383        };
384
385        // Put the ids back into the freelist.
386        let mut freelist = self.context.span_freelist.lock().unwrap();
387        freelist.push(self.start_query_id);
388        freelist.push(self.end_query_id);
389        drop(freelist);
390
391        self.state = GpuSpanState::Uploaded;
392    }
393}
394
395impl Drop for GpuSpan {
396    fn drop(&mut self) {
397        #[cfg(feature = "enable")]
398        match self.state {
399            GpuSpanState::Started => {
400                self.end_zone();
401                self.upload_timestamp_impl(
402                    self.context.gpu_start_timestamp,
403                    self.context.gpu_start_timestamp,
404                );
405            }
406            GpuSpanState::Ended => {
407                self.upload_timestamp_impl(
408                    self.context.gpu_start_timestamp,
409                    self.context.gpu_start_timestamp,
410                );
411            }
412            GpuSpanState::Uploaded => {}
413        }
414    }
415}