bevy_render/renderer/
mod.rs

1mod graph_runner;
2mod render_device;
3
4use bevy_derive::{Deref, DerefMut};
5#[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
6use bevy_tasks::ComputeTaskPool;
7pub use graph_runner::*;
8pub use render_device::*;
9use tracing::{error, info, info_span, warn};
10
11use crate::{
12    diagnostic::{internal::DiagnosticsRecorder, RecordDiagnostics},
13    render_graph::RenderGraph,
14    render_phase::TrackedRenderPass,
15    render_resource::RenderPassDescriptor,
16    settings::{WgpuSettings, WgpuSettingsPriority},
17    view::{ExtractedWindows, ViewTarget},
18};
19use alloc::sync::Arc;
20use bevy_ecs::{prelude::*, system::SystemState};
21use bevy_platform::time::Instant;
22use bevy_time::TimeSender;
23use wgpu::{
24    Adapter, AdapterInfo, CommandBuffer, CommandEncoder, DeviceType, Instance, Queue,
25    RequestAdapterOptions,
26};
27
28/// Updates the [`RenderGraph`] with all of its nodes and then runs it to render the entire frame.
29pub fn render_system(world: &mut World, state: &mut SystemState<Query<Entity, With<ViewTarget>>>) {
30    world.resource_scope(|world, mut graph: Mut<RenderGraph>| {
31        graph.update(world);
32    });
33
34    let diagnostics_recorder = world.remove_resource::<DiagnosticsRecorder>();
35
36    let graph = world.resource::<RenderGraph>();
37    let render_device = world.resource::<RenderDevice>();
38    let render_queue = world.resource::<RenderQueue>();
39    #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
40    let render_adapter = world.resource::<RenderAdapter>();
41
42    let res = RenderGraphRunner::run(
43        graph,
44        render_device.clone(), // TODO: is this clone really necessary?
45        diagnostics_recorder,
46        &render_queue.0,
47        #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
48        &render_adapter.0,
49        world,
50        |encoder| {
51            crate::view::screenshot::submit_screenshot_commands(world, encoder);
52            crate::gpu_readback::submit_readback_commands(world, encoder);
53        },
54    );
55
56    match res {
57        Ok(Some(diagnostics_recorder)) => {
58            world.insert_resource(diagnostics_recorder);
59        }
60        Ok(None) => {}
61        Err(e) => {
62            error!("Error running render graph:");
63            {
64                let mut src: &dyn core::error::Error = &e;
65                loop {
66                    error!("> {}", src);
67                    match src.source() {
68                        Some(s) => src = s,
69                        None => break,
70                    }
71                }
72            }
73
74            panic!("Error running render graph: {e}");
75        }
76    }
77
78    {
79        let _span = info_span!("present_frames").entered();
80
81        // Remove ViewTarget components to ensure swap chain TextureViews are dropped.
82        // If all TextureViews aren't dropped before present, acquiring the next swap chain texture will fail.
83        let view_entities = state.get(world).iter().collect::<Vec<_>>();
84        for view_entity in view_entities {
85            world.entity_mut(view_entity).remove::<ViewTarget>();
86        }
87
88        let mut windows = world.resource_mut::<ExtractedWindows>();
89        for window in windows.values_mut() {
90            if let Some(surface_texture) = window.swap_chain_texture.take() {
91                // TODO(clean): winit docs recommends calling pre_present_notify before this.
92                // though `present()` doesn't present the frame, it schedules it to be presented
93                // by wgpu.
94                // https://docs.rs/winit/0.29.9/wasm32-unknown-unknown/winit/window/struct.Window.html#method.pre_present_notify
95                surface_texture.present();
96            }
97        }
98
99        #[cfg(feature = "tracing-tracy")]
100        tracing::event!(
101            tracing::Level::INFO,
102            message = "finished frame",
103            tracy.frame_mark = true
104        );
105    }
106
107    crate::view::screenshot::collect_screenshots(world);
108
109    // update the time and send it to the app world
110    let time_sender = world.resource::<TimeSender>();
111    if let Err(error) = time_sender.0.try_send(Instant::now()) {
112        match error {
113            bevy_time::TrySendError::Full(_) => {
114                panic!("The TimeSender channel should always be empty during render. You might need to add the bevy::core::time_system to your app.",);
115            }
116            bevy_time::TrySendError::Disconnected(_) => {
117                // ignore disconnected errors, the main world probably just got dropped during shutdown
118            }
119        }
120    }
121}
122
123/// A wrapper to safely make `wgpu` types Send / Sync on web with atomics enabled.
124///
125/// On web with `atomics` enabled the inner value can only be accessed
126/// or dropped on the `wgpu` thread or else a panic will occur.
127/// On other platforms the wrapper simply contains the wrapped value.
128#[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
129#[derive(Debug, Clone, Deref, DerefMut)]
130pub struct WgpuWrapper<T>(T);
131#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
132#[derive(Debug, Clone, Deref, DerefMut)]
133pub struct WgpuWrapper<T>(send_wrapper::SendWrapper<T>);
134
135// SAFETY: SendWrapper is always Send + Sync.
136#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
137unsafe impl<T> Send for WgpuWrapper<T> {}
138#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
139unsafe impl<T> Sync for WgpuWrapper<T> {}
140
141#[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
142impl<T> WgpuWrapper<T> {
143    pub fn new(t: T) -> Self {
144        Self(t)
145    }
146
147    pub fn into_inner(self) -> T {
148        self.0
149    }
150}
151
152#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
153impl<T> WgpuWrapper<T> {
154    pub fn new(t: T) -> Self {
155        Self(send_wrapper::SendWrapper::new(t))
156    }
157
158    pub fn into_inner(self) -> T {
159        self.0.take()
160    }
161}
162
163/// This queue is used to enqueue tasks for the GPU to execute asynchronously.
164#[derive(Resource, Clone, Deref, DerefMut)]
165pub struct RenderQueue(pub Arc<WgpuWrapper<Queue>>);
166
167/// The handle to the physical device being used for rendering.
168/// See [`Adapter`] for more info.
169#[derive(Resource, Clone, Debug, Deref, DerefMut)]
170pub struct RenderAdapter(pub Arc<WgpuWrapper<Adapter>>);
171
172/// The GPU instance is used to initialize the [`RenderQueue`] and [`RenderDevice`],
173/// as well as to create [`WindowSurfaces`](crate::view::window::WindowSurfaces).
174#[derive(Resource, Clone, Deref, DerefMut)]
175pub struct RenderInstance(pub Arc<WgpuWrapper<Instance>>);
176
177/// The [`AdapterInfo`] of the adapter in use by the renderer.
178#[derive(Resource, Clone, Deref, DerefMut)]
179pub struct RenderAdapterInfo(pub WgpuWrapper<AdapterInfo>);
180
181const GPU_NOT_FOUND_ERROR_MESSAGE: &str = if cfg!(target_os = "linux") {
182    "Unable to find a GPU! Make sure you have installed required drivers! For extra information, see: https://github.com/bevyengine/bevy/blob/latest/docs/linux_dependencies.md"
183} else {
184    "Unable to find a GPU! Make sure you have installed required drivers!"
185};
186
187/// Initializes the renderer by retrieving and preparing the GPU instance, device and queue
188/// for the specified backend.
189pub async fn initialize_renderer(
190    instance: &Instance,
191    options: &WgpuSettings,
192    request_adapter_options: &RequestAdapterOptions<'_, '_>,
193) -> (RenderDevice, RenderQueue, RenderAdapterInfo, RenderAdapter) {
194    let adapter = instance
195        .request_adapter(request_adapter_options)
196        .await
197        .expect(GPU_NOT_FOUND_ERROR_MESSAGE);
198
199    let adapter_info = adapter.get_info();
200    info!("{:?}", adapter_info);
201
202    if adapter_info.device_type == DeviceType::Cpu {
203        warn!(
204            "The selected adapter is using a driver that only supports software rendering. \
205             This is likely to be very slow. See https://bevyengine.org/learn/errors/b0006/"
206        );
207    }
208
209    // Maybe get features and limits based on what is supported by the adapter/backend
210    let mut features = wgpu::Features::empty();
211    let mut limits = options.limits.clone();
212    if matches!(options.priority, WgpuSettingsPriority::Functionality) {
213        features = adapter.features();
214        if adapter_info.device_type == DeviceType::DiscreteGpu {
215            // `MAPPABLE_PRIMARY_BUFFERS` can have a significant, negative performance impact for
216            // discrete GPUs due to having to transfer data across the PCI-E bus and so it
217            // should not be automatically enabled in this case. It is however beneficial for
218            // integrated GPUs.
219            features -= wgpu::Features::MAPPABLE_PRIMARY_BUFFERS;
220        }
221
222        // RAY_QUERY and RAY_TRACING_ACCELERATION STRUCTURE will sometimes cause DeviceLost failures on platforms
223        // that report them as supported:
224        // <https://github.com/gfx-rs/wgpu/issues/5488>
225        features -= wgpu::Features::EXPERIMENTAL_RAY_QUERY;
226        features -= wgpu::Features::EXPERIMENTAL_RAY_TRACING_ACCELERATION_STRUCTURE;
227
228        limits = adapter.limits();
229    }
230
231    // Enforce the disabled features
232    if let Some(disabled_features) = options.disabled_features {
233        features -= disabled_features;
234    }
235    // NOTE: |= is used here to ensure that any explicitly-enabled features are respected.
236    features |= options.features;
237
238    // Enforce the limit constraints
239    if let Some(constrained_limits) = options.constrained_limits.as_ref() {
240        // NOTE: Respect the configured limits as an 'upper bound'. This means for 'max' limits, we
241        // take the minimum of the calculated limits according to the adapter/backend and the
242        // specified max_limits. For 'min' limits, take the maximum instead. This is intended to
243        // err on the side of being conservative. We can't claim 'higher' limits that are supported
244        // but we can constrain to 'lower' limits.
245        limits = wgpu::Limits {
246            max_texture_dimension_1d: limits
247                .max_texture_dimension_1d
248                .min(constrained_limits.max_texture_dimension_1d),
249            max_texture_dimension_2d: limits
250                .max_texture_dimension_2d
251                .min(constrained_limits.max_texture_dimension_2d),
252            max_texture_dimension_3d: limits
253                .max_texture_dimension_3d
254                .min(constrained_limits.max_texture_dimension_3d),
255            max_texture_array_layers: limits
256                .max_texture_array_layers
257                .min(constrained_limits.max_texture_array_layers),
258            max_bind_groups: limits
259                .max_bind_groups
260                .min(constrained_limits.max_bind_groups),
261            max_dynamic_uniform_buffers_per_pipeline_layout: limits
262                .max_dynamic_uniform_buffers_per_pipeline_layout
263                .min(constrained_limits.max_dynamic_uniform_buffers_per_pipeline_layout),
264            max_dynamic_storage_buffers_per_pipeline_layout: limits
265                .max_dynamic_storage_buffers_per_pipeline_layout
266                .min(constrained_limits.max_dynamic_storage_buffers_per_pipeline_layout),
267            max_sampled_textures_per_shader_stage: limits
268                .max_sampled_textures_per_shader_stage
269                .min(constrained_limits.max_sampled_textures_per_shader_stage),
270            max_samplers_per_shader_stage: limits
271                .max_samplers_per_shader_stage
272                .min(constrained_limits.max_samplers_per_shader_stage),
273            max_storage_buffers_per_shader_stage: limits
274                .max_storage_buffers_per_shader_stage
275                .min(constrained_limits.max_storage_buffers_per_shader_stage),
276            max_storage_textures_per_shader_stage: limits
277                .max_storage_textures_per_shader_stage
278                .min(constrained_limits.max_storage_textures_per_shader_stage),
279            max_uniform_buffers_per_shader_stage: limits
280                .max_uniform_buffers_per_shader_stage
281                .min(constrained_limits.max_uniform_buffers_per_shader_stage),
282            max_uniform_buffer_binding_size: limits
283                .max_uniform_buffer_binding_size
284                .min(constrained_limits.max_uniform_buffer_binding_size),
285            max_storage_buffer_binding_size: limits
286                .max_storage_buffer_binding_size
287                .min(constrained_limits.max_storage_buffer_binding_size),
288            max_vertex_buffers: limits
289                .max_vertex_buffers
290                .min(constrained_limits.max_vertex_buffers),
291            max_vertex_attributes: limits
292                .max_vertex_attributes
293                .min(constrained_limits.max_vertex_attributes),
294            max_vertex_buffer_array_stride: limits
295                .max_vertex_buffer_array_stride
296                .min(constrained_limits.max_vertex_buffer_array_stride),
297            max_push_constant_size: limits
298                .max_push_constant_size
299                .min(constrained_limits.max_push_constant_size),
300            min_uniform_buffer_offset_alignment: limits
301                .min_uniform_buffer_offset_alignment
302                .max(constrained_limits.min_uniform_buffer_offset_alignment),
303            min_storage_buffer_offset_alignment: limits
304                .min_storage_buffer_offset_alignment
305                .max(constrained_limits.min_storage_buffer_offset_alignment),
306            max_inter_stage_shader_components: limits
307                .max_inter_stage_shader_components
308                .min(constrained_limits.max_inter_stage_shader_components),
309            max_compute_workgroup_storage_size: limits
310                .max_compute_workgroup_storage_size
311                .min(constrained_limits.max_compute_workgroup_storage_size),
312            max_compute_invocations_per_workgroup: limits
313                .max_compute_invocations_per_workgroup
314                .min(constrained_limits.max_compute_invocations_per_workgroup),
315            max_compute_workgroup_size_x: limits
316                .max_compute_workgroup_size_x
317                .min(constrained_limits.max_compute_workgroup_size_x),
318            max_compute_workgroup_size_y: limits
319                .max_compute_workgroup_size_y
320                .min(constrained_limits.max_compute_workgroup_size_y),
321            max_compute_workgroup_size_z: limits
322                .max_compute_workgroup_size_z
323                .min(constrained_limits.max_compute_workgroup_size_z),
324            max_compute_workgroups_per_dimension: limits
325                .max_compute_workgroups_per_dimension
326                .min(constrained_limits.max_compute_workgroups_per_dimension),
327            max_buffer_size: limits
328                .max_buffer_size
329                .min(constrained_limits.max_buffer_size),
330            max_bindings_per_bind_group: limits
331                .max_bindings_per_bind_group
332                .min(constrained_limits.max_bindings_per_bind_group),
333            max_non_sampler_bindings: limits
334                .max_non_sampler_bindings
335                .min(constrained_limits.max_non_sampler_bindings),
336            max_color_attachments: limits
337                .max_color_attachments
338                .min(constrained_limits.max_color_attachments),
339            max_color_attachment_bytes_per_sample: limits
340                .max_color_attachment_bytes_per_sample
341                .min(constrained_limits.max_color_attachment_bytes_per_sample),
342            min_subgroup_size: limits
343                .min_subgroup_size
344                .max(constrained_limits.min_subgroup_size),
345            max_subgroup_size: limits
346                .max_subgroup_size
347                .min(constrained_limits.max_subgroup_size),
348        };
349    }
350
351    let (device, queue) = adapter
352        .request_device(
353            &wgpu::DeviceDescriptor {
354                label: options.device_label.as_ref().map(AsRef::as_ref),
355                required_features: features,
356                required_limits: limits,
357                memory_hints: options.memory_hints.clone(),
358            },
359            options.trace_path.as_deref(),
360        )
361        .await
362        .unwrap();
363    let queue = Arc::new(WgpuWrapper::new(queue));
364    let adapter = Arc::new(WgpuWrapper::new(adapter));
365    (
366        RenderDevice::from(device),
367        RenderQueue(queue),
368        RenderAdapterInfo(WgpuWrapper::new(adapter_info)),
369        RenderAdapter(adapter),
370    )
371}
372
373/// The context with all information required to interact with the GPU.
374///
375/// The [`RenderDevice`] is used to create render resources and the
376/// the [`CommandEncoder`] is used to record a series of GPU operations.
377pub struct RenderContext<'w> {
378    render_device: RenderDevice,
379    command_encoder: Option<CommandEncoder>,
380    command_buffer_queue: Vec<QueuedCommandBuffer<'w>>,
381    #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
382    force_serial: bool,
383    diagnostics_recorder: Option<Arc<DiagnosticsRecorder>>,
384}
385
386impl<'w> RenderContext<'w> {
387    /// Creates a new [`RenderContext`] from a [`RenderDevice`].
388    pub fn new(
389        render_device: RenderDevice,
390        #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
391        adapter_info: AdapterInfo,
392        diagnostics_recorder: Option<DiagnosticsRecorder>,
393    ) -> Self {
394        // HACK: Parallel command encoding is currently bugged on AMD + Windows/Linux + Vulkan
395        #[cfg(any(target_os = "windows", target_os = "linux"))]
396        let force_serial =
397            adapter_info.driver.contains("AMD") && adapter_info.backend == wgpu::Backend::Vulkan;
398        #[cfg(not(any(
399            target_os = "windows",
400            target_os = "linux",
401            all(target_arch = "wasm32", target_feature = "atomics")
402        )))]
403        let force_serial = {
404            drop(adapter_info);
405            false
406        };
407
408        Self {
409            render_device,
410            command_encoder: None,
411            command_buffer_queue: Vec::new(),
412            #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
413            force_serial,
414            diagnostics_recorder: diagnostics_recorder.map(Arc::new),
415        }
416    }
417
418    /// Gets the underlying [`RenderDevice`].
419    pub fn render_device(&self) -> &RenderDevice {
420        &self.render_device
421    }
422
423    /// Gets the diagnostics recorder, used to track elapsed time and pipeline statistics
424    /// of various render and compute passes.
425    pub fn diagnostic_recorder(&self) -> impl RecordDiagnostics + use<> {
426        self.diagnostics_recorder.clone()
427    }
428
429    /// Gets the current [`CommandEncoder`].
430    pub fn command_encoder(&mut self) -> &mut CommandEncoder {
431        self.command_encoder.get_or_insert_with(|| {
432            self.render_device
433                .create_command_encoder(&wgpu::CommandEncoderDescriptor::default())
434        })
435    }
436
437    /// Creates a new [`TrackedRenderPass`] for the context,
438    /// configured using the provided `descriptor`.
439    pub fn begin_tracked_render_pass<'a>(
440        &'a mut self,
441        descriptor: RenderPassDescriptor<'_>,
442    ) -> TrackedRenderPass<'a> {
443        // Cannot use command_encoder() as we need to split the borrow on self
444        let command_encoder = self.command_encoder.get_or_insert_with(|| {
445            self.render_device
446                .create_command_encoder(&wgpu::CommandEncoderDescriptor::default())
447        });
448
449        let render_pass = command_encoder.begin_render_pass(&descriptor);
450        TrackedRenderPass::new(&self.render_device, render_pass)
451    }
452
453    /// Append a [`CommandBuffer`] to the command buffer queue.
454    ///
455    /// If present, this will flush the currently unflushed [`CommandEncoder`]
456    /// into a [`CommandBuffer`] into the queue before appending the provided
457    /// buffer.
458    pub fn add_command_buffer(&mut self, command_buffer: CommandBuffer) {
459        self.flush_encoder();
460
461        self.command_buffer_queue
462            .push(QueuedCommandBuffer::Ready(command_buffer));
463    }
464
465    /// Append a function that will generate a [`CommandBuffer`] to the
466    /// command buffer queue, to be ran later.
467    ///
468    /// If present, this will flush the currently unflushed [`CommandEncoder`]
469    /// into a [`CommandBuffer`] into the queue before appending the provided
470    /// buffer.
471    pub fn add_command_buffer_generation_task(
472        &mut self,
473        #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
474        task: impl FnOnce(RenderDevice) -> CommandBuffer + 'w + Send,
475        #[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
476        task: impl FnOnce(RenderDevice) -> CommandBuffer + 'w,
477    ) {
478        self.flush_encoder();
479
480        self.command_buffer_queue
481            .push(QueuedCommandBuffer::Task(Box::new(task)));
482    }
483
484    /// Finalizes and returns the queue of [`CommandBuffer`]s.
485    ///
486    /// This function will wait until all command buffer generation tasks are complete
487    /// by running them in parallel (where supported).
488    ///
489    /// The [`CommandBuffer`]s will be returned in the order that they were added.
490    pub fn finish(
491        mut self,
492    ) -> (
493        Vec<CommandBuffer>,
494        RenderDevice,
495        Option<DiagnosticsRecorder>,
496    ) {
497        self.flush_encoder();
498
499        let mut command_buffers = Vec::with_capacity(self.command_buffer_queue.len());
500
501        #[cfg(feature = "trace")]
502        let _command_buffer_generation_tasks_span =
503            info_span!("command_buffer_generation_tasks").entered();
504
505        #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
506        {
507            let mut task_based_command_buffers = ComputeTaskPool::get().scope(|task_pool| {
508                for (i, queued_command_buffer) in self.command_buffer_queue.into_iter().enumerate()
509                {
510                    match queued_command_buffer {
511                        QueuedCommandBuffer::Ready(command_buffer) => {
512                            command_buffers.push((i, command_buffer));
513                        }
514                        QueuedCommandBuffer::Task(command_buffer_generation_task) => {
515                            let render_device = self.render_device.clone();
516                            if self.force_serial {
517                                command_buffers
518                                    .push((i, command_buffer_generation_task(render_device)));
519                            } else {
520                                task_pool.spawn(async move {
521                                    (i, command_buffer_generation_task(render_device))
522                                });
523                            }
524                        }
525                    }
526                }
527            });
528            command_buffers.append(&mut task_based_command_buffers);
529        }
530
531        #[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
532        for (i, queued_command_buffer) in self.command_buffer_queue.into_iter().enumerate() {
533            match queued_command_buffer {
534                QueuedCommandBuffer::Ready(command_buffer) => {
535                    command_buffers.push((i, command_buffer));
536                }
537                QueuedCommandBuffer::Task(command_buffer_generation_task) => {
538                    let render_device = self.render_device.clone();
539                    command_buffers.push((i, command_buffer_generation_task(render_device)));
540                }
541            }
542        }
543
544        #[cfg(feature = "trace")]
545        drop(_command_buffer_generation_tasks_span);
546
547        command_buffers.sort_unstable_by_key(|(i, _)| *i);
548
549        let mut command_buffers = command_buffers
550            .into_iter()
551            .map(|(_, cb)| cb)
552            .collect::<Vec<CommandBuffer>>();
553
554        let mut diagnostics_recorder = self.diagnostics_recorder.take().map(|v| {
555            Arc::try_unwrap(v)
556                .ok()
557                .expect("diagnostic recorder shouldn't be held longer than necessary")
558        });
559
560        if let Some(recorder) = &mut diagnostics_recorder {
561            let mut command_encoder = self
562                .render_device
563                .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
564            recorder.resolve(&mut command_encoder);
565            command_buffers.push(command_encoder.finish());
566        }
567
568        (command_buffers, self.render_device, diagnostics_recorder)
569    }
570
571    fn flush_encoder(&mut self) {
572        if let Some(encoder) = self.command_encoder.take() {
573            self.command_buffer_queue
574                .push(QueuedCommandBuffer::Ready(encoder.finish()));
575        }
576    }
577}
578
579enum QueuedCommandBuffer<'w> {
580    Ready(CommandBuffer),
581    #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
582    Task(Box<dyn FnOnce(RenderDevice) -> CommandBuffer + 'w + Send>),
583    #[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
584    Task(Box<dyn FnOnce(RenderDevice) -> CommandBuffer + 'w>),
585}