Skip to main content

bevy_render/diagnostic/
internal.rs

1use alloc::{borrow::Cow, sync::Arc};
2use core::{
3    ops::{DerefMut, Range},
4    sync::atomic::{AtomicBool, Ordering},
5};
6use std::thread::{self, ThreadId};
7
8use bevy_diagnostic::{Diagnostic, DiagnosticMeasurement, DiagnosticPath, DiagnosticsStore};
9use bevy_ecs::resource::Resource;
10use bevy_ecs::system::{Res, ResMut};
11use bevy_platform::time::Instant;
12use std::sync::Mutex;
13use wgpu::{
14    Buffer, BufferDescriptor, BufferSize, BufferSlice, BufferUsages, CommandEncoder, ComputePass,
15    Device, Features, MapMode, PipelineStatisticsTypes, QuerySet, QuerySetDescriptor, QueryType,
16    RenderPass,
17};
18
19use crate::renderer::{RenderAdapterInfo, RenderDevice, RenderQueue, WgpuWrapper};
20
21use super::RecordDiagnostics;
22
23// buffer offset must be divisible by 256, so this constant must be divisible by 32 (=256/8)
24const MAX_TIMESTAMP_QUERIES: u32 = 256;
25const MAX_PIPELINE_STATISTICS: u32 = 128;
26
27const TIMESTAMP_SIZE: u64 = 8;
28const PIPELINE_STATISTICS_SIZE: u64 = 40;
29
30struct DiagnosticsRecorderInternal {
31    timestamp_period_ns: f32,
32    features: Features,
33    current_frame: Mutex<FrameData>,
34    submitted_frames: Vec<FrameData>,
35    finished_frames: Vec<FrameData>,
36    #[cfg(feature = "tracing-tracy")]
37    tracy_gpu_context: Option<tracy_client::GpuContext>,
38}
39
40/// Records diagnostics into [`QuerySet`]'s keeping track of the mapping between
41/// spans and indices to the corresponding entries in the [`QuerySet`].
42#[derive(Resource)]
43pub struct DiagnosticsRecorder(WgpuWrapper<DiagnosticsRecorderInternal>);
44
45impl DiagnosticsRecorder {
46    /// Creates the new `DiagnosticsRecorder`.
47    pub fn new(
48        adapter_info: &RenderAdapterInfo,
49        device: &RenderDevice,
50        queue: &RenderQueue,
51    ) -> DiagnosticsRecorder {
52        let features = device.features();
53
54        #[cfg(feature = "tracing-tracy")]
55        let tracy_gpu_context =
56            super::tracy_gpu::new_tracy_gpu_context(adapter_info, device, queue);
57        let _ = adapter_info; // Prevent unused variable warnings when tracing-tracy is not enabled
58
59        DiagnosticsRecorder(WgpuWrapper::new(DiagnosticsRecorderInternal {
60            timestamp_period_ns: queue.get_timestamp_period(),
61            features,
62            current_frame: Mutex::new(FrameData::new(
63                device,
64                features,
65                #[cfg(feature = "tracing-tracy")]
66                tracy_gpu_context.clone(),
67            )),
68            submitted_frames: Vec::new(),
69            finished_frames: Vec::new(),
70            #[cfg(feature = "tracing-tracy")]
71            tracy_gpu_context,
72        }))
73    }
74
75    fn current_frame_mut(&mut self) -> &mut FrameData {
76        self.0.current_frame.get_mut().expect("lock poisoned")
77    }
78
79    fn current_frame_lock(&self) -> impl DerefMut<Target = FrameData> + '_ {
80        self.0.current_frame.lock().expect("lock poisoned")
81    }
82
83    /// Begins recording diagnostics for a new frame.
84    pub fn begin_frame(&mut self) {
85        let internal = &mut self.0;
86        let mut idx = 0;
87        while idx < internal.submitted_frames.len() {
88            let timestamp = internal.timestamp_period_ns;
89            if internal.submitted_frames[idx].run_mapped_callback(timestamp) {
90                let removed = internal.submitted_frames.swap_remove(idx);
91                internal.finished_frames.push(removed);
92            } else {
93                idx += 1;
94            }
95        }
96
97        self.current_frame_mut().begin();
98    }
99
100    /// Copies data from [`QuerySet`]'s to a [`Buffer`], after which it can be downloaded to CPU.
101    ///
102    /// Should be called before [`DiagnosticsRecorder::finish_frame`].
103    pub fn resolve(&mut self, encoder: &mut CommandEncoder) {
104        self.current_frame_mut().resolve(encoder);
105    }
106
107    /// Finishes recording diagnostics for the current frame.
108    ///
109    /// The specified `callback` will be invoked when diagnostics become available.
110    ///
111    /// Should be called after [`DiagnosticsRecorder::resolve`],
112    /// and **after** all commands buffers have been queued.
113    pub fn finish_frame(
114        &mut self,
115        device: &RenderDevice,
116        callback: impl FnOnce(RenderDiagnostics) + Send + Sync + 'static,
117    ) {
118        #[cfg(feature = "tracing-tracy")]
119        let tracy_gpu_context = self.0.tracy_gpu_context.clone();
120
121        let internal = &mut self.0;
122        internal
123            .current_frame
124            .get_mut()
125            .expect("lock poisoned")
126            .finish(callback);
127
128        // reuse one of the finished frames, if we can
129        let new_frame = match internal.finished_frames.pop() {
130            Some(frame) => frame,
131            None => FrameData::new(
132                device,
133                internal.features,
134                #[cfg(feature = "tracing-tracy")]
135                tracy_gpu_context,
136            ),
137        };
138
139        let old_frame = core::mem::replace(
140            internal.current_frame.get_mut().expect("lock poisoned"),
141            new_frame,
142        );
143        internal.submitted_frames.push(old_frame);
144    }
145}
146
147impl RecordDiagnostics for DiagnosticsRecorder {
148    fn record_f32<N>(&self, command_encoder: &mut CommandEncoder, buffer: &BufferSlice, name: N)
149    where
150        N: Into<Cow<'static, str>>,
151    {
152        assert_eq!(
153            buffer.size(),
154            BufferSize::new(4).unwrap(),
155            "DiagnosticsRecorder::record_f32 buffer slice must be 4 bytes long"
156        );
157        assert!(
158            buffer.buffer().usage().contains(BufferUsages::COPY_SRC),
159            "DiagnosticsRecorder::record_f32 buffer must have BufferUsages::COPY_SRC"
160        );
161
162        self.current_frame_lock()
163            .record_value(command_encoder, buffer, name.into(), true);
164    }
165
166    fn record_u32<N>(&self, command_encoder: &mut CommandEncoder, buffer: &BufferSlice, name: N)
167    where
168        N: Into<Cow<'static, str>>,
169    {
170        assert_eq!(
171            buffer.size(),
172            BufferSize::new(4).unwrap(),
173            "DiagnosticsRecorder::record_u32 buffer slice must be 4 bytes long"
174        );
175        assert!(
176            buffer.buffer().usage().contains(BufferUsages::COPY_SRC),
177            "DiagnosticsRecorder::record_u32 buffer must have BufferUsages::COPY_SRC"
178        );
179
180        self.current_frame_lock()
181            .record_value(command_encoder, buffer, name.into(), false);
182    }
183
184    fn begin_time_span<E: WriteTimestamp>(&self, encoder: &mut E, span_name: Cow<'static, str>) {
185        self.current_frame_lock()
186            .begin_time_span(encoder, span_name);
187    }
188
189    fn end_time_span<E: WriteTimestamp>(&self, encoder: &mut E) {
190        self.current_frame_lock().end_time_span(encoder);
191    }
192
193    fn begin_pass_span<P: Pass>(&self, pass: &mut P, span_name: Cow<'static, str>) {
194        self.current_frame_lock().begin_pass(pass, span_name);
195    }
196
197    fn end_pass_span<P: Pass>(&self, pass: &mut P) {
198        self.current_frame_lock().end_pass(pass);
199    }
200}
201
202struct SpanRecord {
203    thread_id: ThreadId,
204    path_range: Range<usize>,
205    pass_kind: Option<PassKind>,
206    begin_timestamp_index: Option<u32>,
207    end_timestamp_index: Option<u32>,
208    begin_instant: Option<Instant>,
209    end_instant: Option<Instant>,
210    pipeline_statistics_index: Option<u32>,
211}
212
213struct FrameData {
214    device: Device,
215    timestamps_query_set: Option<QuerySet>,
216    num_timestamps: u32,
217    supports_timestamps_inside_passes: bool,
218    supports_timestamps_inside_encoders: bool,
219    pipeline_statistics_query_set: Option<QuerySet>,
220    num_pipeline_statistics: u32,
221    buffer_size: u64,
222    pipeline_statistics_buffer_offset: u64,
223    resolve_buffer: Option<Buffer>,
224    read_buffer: Option<Buffer>,
225    path_components: Vec<Cow<'static, str>>,
226    open_spans: Vec<SpanRecord>,
227    closed_spans: Vec<SpanRecord>,
228    value_buffers: Vec<(Buffer, Cow<'static, str>, bool)>,
229    is_mapped: Arc<AtomicBool>,
230    callback: Option<Box<dyn FnOnce(RenderDiagnostics) + Send + Sync + 'static>>,
231    #[cfg(feature = "tracing-tracy")]
232    tracy_gpu_context: Option<tracy_client::GpuContext>,
233}
234
235impl FrameData {
236    fn new(
237        device: &RenderDevice,
238        features: Features,
239        #[cfg(feature = "tracing-tracy")] tracy_gpu_context: Option<tracy_client::GpuContext>,
240    ) -> FrameData {
241        let wgpu_device = device.wgpu_device();
242        let mut buffer_size = 0;
243
244        let timestamps_query_set = if features.contains(Features::TIMESTAMP_QUERY) {
245            buffer_size += u64::from(MAX_TIMESTAMP_QUERIES) * TIMESTAMP_SIZE;
246            Some(wgpu_device.create_query_set(&QuerySetDescriptor {
247                label: Some("timestamps_query_set"),
248                ty: QueryType::Timestamp,
249                count: MAX_TIMESTAMP_QUERIES,
250            }))
251        } else {
252            None
253        };
254
255        let pipeline_statistics_buffer_offset = buffer_size;
256
257        let pipeline_statistics_query_set =
258            if features.contains(Features::PIPELINE_STATISTICS_QUERY) {
259                buffer_size += u64::from(MAX_PIPELINE_STATISTICS) * PIPELINE_STATISTICS_SIZE;
260                Some(wgpu_device.create_query_set(&QuerySetDescriptor {
261                    label: Some("pipeline_statistics_query_set"),
262                    ty: QueryType::PipelineStatistics(PipelineStatisticsTypes::all()),
263                    count: MAX_PIPELINE_STATISTICS,
264                }))
265            } else {
266                None
267            };
268
269        let (resolve_buffer, read_buffer) = if buffer_size > 0 {
270            let resolve_buffer = wgpu_device.create_buffer(&BufferDescriptor {
271                label: Some("render_statistics_resolve_buffer"),
272                size: buffer_size,
273                usage: BufferUsages::QUERY_RESOLVE | BufferUsages::COPY_SRC,
274                mapped_at_creation: false,
275            });
276            let read_buffer = wgpu_device.create_buffer(&BufferDescriptor {
277                label: Some("render_statistics_read_buffer"),
278                size: buffer_size,
279                usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
280                mapped_at_creation: false,
281            });
282            (Some(resolve_buffer), Some(read_buffer))
283        } else {
284            (None, None)
285        };
286
287        FrameData {
288            device: wgpu_device.clone(),
289            timestamps_query_set,
290            num_timestamps: 0,
291            supports_timestamps_inside_passes: features
292                .contains(Features::TIMESTAMP_QUERY_INSIDE_PASSES),
293            supports_timestamps_inside_encoders: features
294                .contains(Features::TIMESTAMP_QUERY_INSIDE_ENCODERS),
295            pipeline_statistics_query_set,
296            num_pipeline_statistics: 0,
297            buffer_size,
298            pipeline_statistics_buffer_offset,
299            resolve_buffer,
300            read_buffer,
301            path_components: Vec::new(),
302            open_spans: Vec::new(),
303            closed_spans: Vec::new(),
304            value_buffers: Vec::new(),
305            is_mapped: Arc::new(AtomicBool::new(false)),
306            callback: None,
307            #[cfg(feature = "tracing-tracy")]
308            tracy_gpu_context,
309        }
310    }
311
312    fn begin(&mut self) {
313        self.num_timestamps = 0;
314        self.num_pipeline_statistics = 0;
315        self.path_components.clear();
316        self.open_spans.clear();
317        self.closed_spans.clear();
318    }
319
320    fn write_timestamp(
321        &mut self,
322        encoder: &mut impl WriteTimestamp,
323        is_inside_pass: bool,
324    ) -> Option<u32> {
325        // `encoder.write_timestamp` is unsupported on WebGPU.
326        if !self.supports_timestamps_inside_encoders {
327            return None;
328        }
329
330        if is_inside_pass && !self.supports_timestamps_inside_passes {
331            return None;
332        }
333
334        if self.num_timestamps >= MAX_TIMESTAMP_QUERIES {
335            return None;
336        }
337
338        let set = self.timestamps_query_set.as_ref()?;
339        let index = self.num_timestamps;
340        encoder.write_timestamp(set, index);
341        self.num_timestamps += 1;
342        Some(index)
343    }
344
345    fn write_pipeline_statistics(
346        &mut self,
347        encoder: &mut impl WritePipelineStatistics,
348    ) -> Option<u32> {
349        if self.num_pipeline_statistics >= MAX_PIPELINE_STATISTICS {
350            return None;
351        }
352
353        let set = self.pipeline_statistics_query_set.as_ref()?;
354        let index = self.num_pipeline_statistics;
355        encoder.begin_pipeline_statistics_query(set, index);
356        self.num_pipeline_statistics += 1;
357        Some(index)
358    }
359
360    fn open_span(
361        &mut self,
362        pass_kind: Option<PassKind>,
363        name: Cow<'static, str>,
364    ) -> &mut SpanRecord {
365        let thread_id = thread::current().id();
366
367        let parent = self.open_spans.iter().rfind(|v| v.thread_id == thread_id);
368
369        let path_range = match &parent {
370            Some(parent) if parent.path_range.end == self.path_components.len() => {
371                parent.path_range.start..parent.path_range.end + 1
372            }
373            Some(parent) => {
374                self.path_components
375                    .extend_from_within(parent.path_range.clone());
376                self.path_components.len() - parent.path_range.len()..self.path_components.len() + 1
377            }
378            None => self.path_components.len()..self.path_components.len() + 1,
379        };
380
381        self.path_components.push(name);
382
383        self.open_spans.push(SpanRecord {
384            thread_id,
385            path_range,
386            pass_kind,
387            begin_timestamp_index: None,
388            end_timestamp_index: None,
389            begin_instant: None,
390            end_instant: None,
391            pipeline_statistics_index: None,
392        });
393
394        self.open_spans.last_mut().unwrap()
395    }
396
397    fn close_span(&mut self) -> &mut SpanRecord {
398        let thread_id = thread::current().id();
399
400        let iter = self.open_spans.iter();
401        let (index, _) = iter
402            .enumerate()
403            .rfind(|(_, v)| v.thread_id == thread_id)
404            .unwrap();
405
406        let span = self.open_spans.swap_remove(index);
407        self.closed_spans.push(span);
408        self.closed_spans.last_mut().unwrap()
409    }
410
411    fn record_value(
412        &mut self,
413        command_encoder: &mut CommandEncoder,
414        buffer: &BufferSlice,
415        name: Cow<'static, str>,
416        is_f32: bool,
417    ) {
418        let dest_buffer = self.device.create_buffer(&BufferDescriptor {
419            label: Some(&format!("render_diagnostic_{name}")),
420            size: 4,
421            usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
422            mapped_at_creation: false,
423        });
424
425        command_encoder.copy_buffer_to_buffer(
426            buffer.buffer(),
427            buffer.offset(),
428            &dest_buffer,
429            0,
430            Some(buffer.size().into()),
431        );
432
433        command_encoder.map_buffer_on_submit(&dest_buffer, MapMode::Read, .., |_| {});
434
435        self.value_buffers.push((dest_buffer, name, is_f32));
436    }
437
438    fn begin_time_span(&mut self, encoder: &mut impl WriteTimestamp, name: Cow<'static, str>) {
439        let begin_instant = Instant::now();
440        let begin_timestamp_index = self.write_timestamp(encoder, false);
441
442        let span = self.open_span(None, name);
443        span.begin_instant = Some(begin_instant);
444        span.begin_timestamp_index = begin_timestamp_index;
445    }
446
447    fn end_time_span(&mut self, encoder: &mut impl WriteTimestamp) {
448        let end_timestamp_index = self.write_timestamp(encoder, false);
449
450        let span = self.close_span();
451        span.end_timestamp_index = end_timestamp_index;
452        span.end_instant = Some(Instant::now());
453    }
454
455    fn begin_pass<P: Pass>(&mut self, pass: &mut P, name: Cow<'static, str>) {
456        let begin_instant = Instant::now();
457
458        let begin_timestamp_index = self.write_timestamp(pass, true);
459        let pipeline_statistics_index = self.write_pipeline_statistics(pass);
460
461        let span = self.open_span(Some(P::KIND), name);
462        span.begin_instant = Some(begin_instant);
463        span.begin_timestamp_index = begin_timestamp_index;
464        span.pipeline_statistics_index = pipeline_statistics_index;
465    }
466
467    fn end_pass(&mut self, pass: &mut impl Pass) {
468        let end_timestamp_index = self.write_timestamp(pass, true);
469
470        let span = self.close_span();
471        span.end_timestamp_index = end_timestamp_index;
472
473        if span.pipeline_statistics_index.is_some() {
474            pass.end_pipeline_statistics_query();
475        }
476
477        span.end_instant = Some(Instant::now());
478    }
479
480    fn resolve(&mut self, encoder: &mut CommandEncoder) {
481        let Some(resolve_buffer) = &self.resolve_buffer else {
482            return;
483        };
484
485        match &self.timestamps_query_set {
486            Some(set) if self.num_timestamps > 0 => {
487                encoder.resolve_query_set(set, 0..self.num_timestamps, resolve_buffer, 0);
488            }
489            _ => {}
490        }
491
492        match &self.pipeline_statistics_query_set {
493            Some(set) if self.num_pipeline_statistics > 0 => {
494                encoder.resolve_query_set(
495                    set,
496                    0..self.num_pipeline_statistics,
497                    resolve_buffer,
498                    self.pipeline_statistics_buffer_offset,
499                );
500            }
501            _ => {}
502        }
503
504        let Some(read_buffer) = &self.read_buffer else {
505            return;
506        };
507
508        encoder.copy_buffer_to_buffer(resolve_buffer, 0, read_buffer, 0, self.buffer_size);
509    }
510
511    fn diagnostic_path(&self, range: &Range<usize>, field: &str) -> DiagnosticPath {
512        DiagnosticPath::from_components(
513            core::iter::once("render")
514                .chain(self.path_components[range.clone()].iter().map(|v| &**v))
515                .chain(core::iter::once(field)),
516        )
517    }
518
519    fn finish(&mut self, callback: impl FnOnce(RenderDiagnostics) + Send + Sync + 'static) {
520        let Some(read_buffer) = &self.read_buffer else {
521            // we still have cpu timings, so let's use them
522
523            let mut diagnostics = Vec::new();
524
525            for span in &self.closed_spans {
526                if let (Some(begin), Some(end)) = (span.begin_instant, span.end_instant) {
527                    diagnostics.push(RenderDiagnostic {
528                        path: self.diagnostic_path(&span.path_range, "elapsed_cpu"),
529                        suffix: "ms",
530                        value: (end - begin).as_secs_f64() * 1000.0,
531                    });
532                }
533            }
534
535            for (buffer, diagnostic_path, is_f32) in self.value_buffers.drain(..) {
536                let buffer = buffer.get_mapped_range(..);
537                diagnostics.push(RenderDiagnostic {
538                    path: DiagnosticPath::from_components(
539                        core::iter::once("render")
540                            .chain(core::iter::once(diagnostic_path.as_ref())),
541                    ),
542                    suffix: "",
543                    value: if is_f32 {
544                        f32::from_le_bytes((*buffer).try_into().unwrap()) as f64
545                    } else {
546                        u32::from_le_bytes((*buffer).try_into().unwrap()) as f64
547                    },
548                });
549            }
550
551            callback(RenderDiagnostics(diagnostics));
552            return;
553        };
554
555        self.callback = Some(Box::new(callback));
556
557        let is_mapped = self.is_mapped.clone();
558        read_buffer.slice(..).map_async(MapMode::Read, move |res| {
559            if let Err(e) = res {
560                bevy_log::warn!("Failed to download render statistics buffer: {e}");
561                return;
562            }
563
564            is_mapped.store(true, Ordering::Release);
565        });
566    }
567
568    // returns true if the frame is considered finished, false otherwise
569    fn run_mapped_callback(&mut self, timestamp_period_ns: f32) -> bool {
570        let Some(read_buffer) = &self.read_buffer else {
571            return true;
572        };
573        if !self.is_mapped.load(Ordering::Acquire) {
574            // need to wait more
575            return false;
576        }
577        let Some(callback) = self.callback.take() else {
578            return true;
579        };
580
581        let data = read_buffer.slice(..).get_mapped_range();
582
583        let timestamps = data[..(self.num_timestamps * 8) as usize]
584            .as_chunks()
585            .0
586            .iter()
587            .map(|&v| u64::from_le_bytes(v))
588            .collect::<Vec<u64>>();
589
590        let start = self.pipeline_statistics_buffer_offset as usize;
591        let len = (self.num_pipeline_statistics as usize) * 40;
592        let pipeline_statistics = data[start..start + len]
593            .as_chunks()
594            .0
595            .iter()
596            .map(|&v| u64::from_le_bytes(v))
597            .collect::<Vec<u64>>();
598
599        let mut diagnostics = Vec::new();
600
601        for span in &self.closed_spans {
602            if let (Some(begin), Some(end)) = (span.begin_instant, span.end_instant) {
603                diagnostics.push(RenderDiagnostic {
604                    path: self.diagnostic_path(&span.path_range, "elapsed_cpu"),
605                    suffix: "ms",
606                    value: (end - begin).as_secs_f64() * 1000.0,
607                });
608            }
609
610            if let (Some(begin), Some(end)) = (span.begin_timestamp_index, span.end_timestamp_index)
611            {
612                let begin = timestamps[begin as usize] as f64;
613                let end = timestamps[end as usize] as f64;
614                let value = (end - begin) * (timestamp_period_ns as f64) / 1e6;
615
616                #[cfg(feature = "tracing-tracy")]
617                {
618                    // Calling span_alloc() and end_zone() here instead of in open_span() and close_span() means that tracy does not know where each GPU command was recorded on the CPU timeline.
619                    // Unfortunately we must do it this way, because tracy does not play nicely with multithreaded command recording. The start/end pairs would get all mixed up.
620                    // The GPU spans themselves are still accurate though, and it's probably safe to assume that each GPU span in frame N belongs to the corresponding CPU render node span from frame N-1.
621                    if let Some(tracy_gpu_context) = &self.tracy_gpu_context {
622                        let name = &self.path_components[span.path_range.clone()].join("/");
623                        let mut tracy_gpu_span =
624                            tracy_gpu_context.span_alloc(name, "", "", 0).unwrap();
625                        tracy_gpu_span.end_zone();
626                        tracy_gpu_span.upload_timestamp_start(begin as i64);
627                        tracy_gpu_span.upload_timestamp_end(end as i64);
628                    }
629                }
630
631                diagnostics.push(RenderDiagnostic {
632                    path: self.diagnostic_path(&span.path_range, "elapsed_gpu"),
633                    suffix: "ms",
634                    value,
635                });
636            }
637
638            if let Some(index) = span.pipeline_statistics_index {
639                let index = (index as usize) * 5;
640
641                if span.pass_kind == Some(PassKind::Render) {
642                    diagnostics.push(RenderDiagnostic {
643                        path: self.diagnostic_path(&span.path_range, "vertex_shader_invocations"),
644                        suffix: "",
645                        value: pipeline_statistics[index] as f64,
646                    });
647
648                    diagnostics.push(RenderDiagnostic {
649                        path: self.diagnostic_path(&span.path_range, "clipper_invocations"),
650                        suffix: "",
651                        value: pipeline_statistics[index + 1] as f64,
652                    });
653
654                    diagnostics.push(RenderDiagnostic {
655                        path: self.diagnostic_path(&span.path_range, "clipper_primitives_out"),
656                        suffix: "",
657                        value: pipeline_statistics[index + 2] as f64,
658                    });
659
660                    diagnostics.push(RenderDiagnostic {
661                        path: self.diagnostic_path(&span.path_range, "fragment_shader_invocations"),
662                        suffix: "",
663                        value: pipeline_statistics[index + 3] as f64,
664                    });
665                }
666
667                if span.pass_kind == Some(PassKind::Compute) {
668                    diagnostics.push(RenderDiagnostic {
669                        path: self.diagnostic_path(&span.path_range, "compute_shader_invocations"),
670                        suffix: "",
671                        value: pipeline_statistics[index + 4] as f64,
672                    });
673                }
674            }
675        }
676
677        for (buffer, diagnostic_path, is_f32) in self.value_buffers.drain(..) {
678            let buffer = buffer.get_mapped_range(..);
679            diagnostics.push(RenderDiagnostic {
680                path: DiagnosticPath::from_components(
681                    core::iter::once("render").chain(core::iter::once(diagnostic_path.as_ref())),
682                ),
683                suffix: "",
684                value: if is_f32 {
685                    f32::from_le_bytes((*buffer).try_into().unwrap()) as f64
686                } else {
687                    u32::from_le_bytes((*buffer).try_into().unwrap()) as f64
688                },
689            });
690        }
691
692        callback(RenderDiagnostics(diagnostics));
693
694        drop(data);
695        read_buffer.unmap();
696        self.is_mapped.store(false, Ordering::Release);
697
698        true
699    }
700}
701
702/// Resource which stores render diagnostics of the most recent frame.
703#[derive(Debug, Default, Clone, Resource)]
704pub struct RenderDiagnostics(Vec<RenderDiagnostic>);
705
706/// A render diagnostic which has been recorded, but not yet stored in [`DiagnosticsStore`].
707#[derive(Debug, Clone, Resource)]
708pub struct RenderDiagnostic {
709    pub path: DiagnosticPath,
710    pub suffix: &'static str,
711    pub value: f64,
712}
713
714/// Stores render diagnostics before they can be synced with the main app.
715///
716/// This mutex is locked twice per frame:
717///  1. in `PreUpdate`, during [`sync_diagnostics`],
718///  2. after rendering has finished and statistics have been downloaded from GPU.
719#[derive(Debug, Default, Clone, Resource)]
720pub struct RenderDiagnosticsMutex(pub(crate) Arc<Mutex<Option<RenderDiagnostics>>>);
721
722/// Updates render diagnostics measurements.
723pub fn sync_diagnostics(mutex: Res<RenderDiagnosticsMutex>, mut store: ResMut<DiagnosticsStore>) {
724    let Some(diagnostics) = mutex.0.lock().ok().and_then(|mut v| v.take()) else {
725        return;
726    };
727
728    let time = Instant::now();
729
730    for diagnostic in &diagnostics.0 {
731        if store.get(&diagnostic.path).is_none() {
732            store.add(Diagnostic::new(diagnostic.path.clone()).with_suffix(diagnostic.suffix));
733        }
734
735        store
736            .get_mut(&diagnostic.path)
737            .unwrap()
738            .add_measurement(DiagnosticMeasurement {
739                time,
740                value: diagnostic.value,
741            });
742    }
743}
744
745pub trait WriteTimestamp {
746    fn write_timestamp(&mut self, query_set: &QuerySet, index: u32);
747}
748
749impl WriteTimestamp for CommandEncoder {
750    fn write_timestamp(&mut self, query_set: &QuerySet, index: u32) {
751        if cfg!(target_os = "macos") {
752            // When using tracy (and thus this function), rendering was flickering on macOS Tahoe.
753            // See: https://github.com/bevyengine/bevy/issues/22257
754            // The issue seems to be triggered when `write_timestamp` is called very close to frame
755            // presentation.
756            return;
757        }
758        CommandEncoder::write_timestamp(self, query_set, index);
759    }
760}
761
762impl WriteTimestamp for RenderPass<'_> {
763    fn write_timestamp(&mut self, query_set: &QuerySet, index: u32) {
764        RenderPass::write_timestamp(self, query_set, index);
765    }
766}
767
768impl WriteTimestamp for ComputePass<'_> {
769    fn write_timestamp(&mut self, query_set: &QuerySet, index: u32) {
770        ComputePass::write_timestamp(self, query_set, index);
771    }
772}
773
774pub trait WritePipelineStatistics {
775    fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32);
776
777    fn end_pipeline_statistics_query(&mut self);
778}
779
780impl WritePipelineStatistics for RenderPass<'_> {
781    fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32) {
782        RenderPass::begin_pipeline_statistics_query(self, query_set, index);
783    }
784
785    fn end_pipeline_statistics_query(&mut self) {
786        RenderPass::end_pipeline_statistics_query(self);
787    }
788}
789
790impl WritePipelineStatistics for ComputePass<'_> {
791    fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, index: u32) {
792        ComputePass::begin_pipeline_statistics_query(self, query_set, index);
793    }
794
795    fn end_pipeline_statistics_query(&mut self) {
796        ComputePass::end_pipeline_statistics_query(self);
797    }
798}
799
800pub trait Pass: WritePipelineStatistics + WriteTimestamp {
801    const KIND: PassKind;
802}
803
804impl Pass for RenderPass<'_> {
805    const KIND: PassKind = PassKind::Render;
806}
807
808impl Pass for ComputePass<'_> {
809    const KIND: PassKind = PassKind::Compute;
810}
811
812#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
813pub enum PassKind {
814    Render,
815    Compute,
816}