bevy_render/renderer/
mod.rs

1mod graph_runner;
2mod render_device;
3
4use bevy_derive::{Deref, DerefMut};
5use bevy_tasks::ComputeTaskPool;
6use bevy_utils::tracing::{error, info, info_span, warn};
7pub use graph_runner::*;
8pub use render_device::*;
9
10use crate::{
11    diagnostic::{internal::DiagnosticsRecorder, RecordDiagnostics},
12    render_graph::RenderGraph,
13    render_phase::TrackedRenderPass,
14    render_resource::RenderPassDescriptor,
15    settings::{WgpuSettings, WgpuSettingsPriority},
16    view::{ExtractedWindows, ViewTarget},
17};
18use alloc::sync::Arc;
19use bevy_ecs::{prelude::*, system::SystemState};
20use bevy_time::TimeSender;
21use bevy_utils::Instant;
22use wgpu::{
23    Adapter, AdapterInfo, CommandBuffer, CommandEncoder, DeviceType, Instance, Queue,
24    RequestAdapterOptions,
25};
26
27/// Updates the [`RenderGraph`] with all of its nodes and then runs it to render the entire frame.
28pub fn render_system(world: &mut World, state: &mut SystemState<Query<Entity, With<ViewTarget>>>) {
29    world.resource_scope(|world, mut graph: Mut<RenderGraph>| {
30        graph.update(world);
31    });
32
33    let diagnostics_recorder = world.remove_resource::<DiagnosticsRecorder>();
34
35    let graph = world.resource::<RenderGraph>();
36    let render_device = world.resource::<RenderDevice>();
37    let render_queue = world.resource::<RenderQueue>();
38    let render_adapter = world.resource::<RenderAdapter>();
39
40    let res = RenderGraphRunner::run(
41        graph,
42        render_device.clone(), // TODO: is this clone really necessary?
43        diagnostics_recorder,
44        &render_queue.0,
45        &render_adapter.0,
46        world,
47        |encoder| {
48            crate::view::screenshot::submit_screenshot_commands(world, encoder);
49            crate::gpu_readback::submit_readback_commands(world, encoder);
50        },
51    );
52
53    match res {
54        Ok(Some(diagnostics_recorder)) => {
55            world.insert_resource(diagnostics_recorder);
56        }
57        Ok(None) => {}
58        Err(e) => {
59            error!("Error running render graph:");
60            {
61                let mut src: &dyn core::error::Error = &e;
62                loop {
63                    error!("> {}", src);
64                    match src.source() {
65                        Some(s) => src = s,
66                        None => break,
67                    }
68                }
69            }
70
71            panic!("Error running render graph: {e}");
72        }
73    }
74
75    {
76        let _span = info_span!("present_frames").entered();
77
78        // Remove ViewTarget components to ensure swap chain TextureViews are dropped.
79        // If all TextureViews aren't dropped before present, acquiring the next swap chain texture will fail.
80        let view_entities = state.get(world).iter().collect::<Vec<_>>();
81        for view_entity in view_entities {
82            world.entity_mut(view_entity).remove::<ViewTarget>();
83        }
84
85        let mut windows = world.resource_mut::<ExtractedWindows>();
86        for window in windows.values_mut() {
87            if let Some(wrapped_texture) = window.swap_chain_texture.take() {
88                if let Some(surface_texture) = wrapped_texture.try_unwrap() {
89                    // TODO(clean): winit docs recommends calling pre_present_notify before this.
90                    // though `present()` doesn't present the frame, it schedules it to be presented
91                    // by wgpu.
92                    // https://docs.rs/winit/0.29.9/wasm32-unknown-unknown/winit/window/struct.Window.html#method.pre_present_notify
93                    surface_texture.present();
94                }
95            }
96        }
97
98        #[cfg(feature = "tracing-tracy")]
99        bevy_utils::tracing::event!(
100            bevy_utils::tracing::Level::INFO,
101            message = "finished frame",
102            tracy.frame_mark = true
103        );
104    }
105
106    crate::view::screenshot::collect_screenshots(world);
107
108    // update the time and send it to the app world
109    let time_sender = world.resource::<TimeSender>();
110    if let Err(error) = time_sender.0.try_send(Instant::now()) {
111        match error {
112            bevy_time::TrySendError::Full(_) => {
113                panic!("The TimeSender channel should always be empty during render. You might need to add the bevy::core::time_system to your app.",);
114            }
115            bevy_time::TrySendError::Disconnected(_) => {
116                // ignore disconnected errors, the main world probably just got dropped during shutdown
117            }
118        }
119    }
120}
121
122/// A wrapper to safely make `wgpu` types Send / Sync on web with atomics enabled.
123///
124/// On web with `atomics` enabled the inner value can only be accessed
125/// or dropped on the `wgpu` thread or else a panic will occur.
126/// On other platforms the wrapper simply contains the wrapped value.
127#[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
128#[derive(Debug, Clone, Deref, DerefMut)]
129pub struct WgpuWrapper<T>(T);
130#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
131#[derive(Debug, Clone, Deref, DerefMut)]
132pub struct WgpuWrapper<T>(send_wrapper::SendWrapper<T>);
133
134// SAFETY: SendWrapper is always Send + Sync.
135#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
136unsafe impl<T> Send for WgpuWrapper<T> {}
137#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
138unsafe impl<T> Sync for WgpuWrapper<T> {}
139
140#[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
141impl<T> WgpuWrapper<T> {
142    pub fn new(t: T) -> Self {
143        Self(t)
144    }
145
146    pub fn into_inner(self) -> T {
147        self.0
148    }
149}
150
151#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
152impl<T> WgpuWrapper<T> {
153    pub fn new(t: T) -> Self {
154        Self(send_wrapper::SendWrapper::new(t))
155    }
156
157    pub fn into_inner(self) -> T {
158        self.0.take()
159    }
160}
161
162/// This queue is used to enqueue tasks for the GPU to execute asynchronously.
163#[derive(Resource, Clone, Deref, DerefMut)]
164pub struct RenderQueue(pub Arc<WgpuWrapper<Queue>>);
165
166/// The handle to the physical device being used for rendering.
167/// See [`Adapter`] for more info.
168#[derive(Resource, Clone, Debug, Deref, DerefMut)]
169pub struct RenderAdapter(pub Arc<WgpuWrapper<Adapter>>);
170
171/// The GPU instance is used to initialize the [`RenderQueue`] and [`RenderDevice`],
172/// as well as to create [`WindowSurfaces`](crate::view::window::WindowSurfaces).
173#[derive(Resource, Clone, Deref, DerefMut)]
174pub struct RenderInstance(pub Arc<WgpuWrapper<Instance>>);
175
176/// The [`AdapterInfo`] of the adapter in use by the renderer.
177#[derive(Resource, Clone, Deref, DerefMut)]
178pub struct RenderAdapterInfo(pub WgpuWrapper<AdapterInfo>);
179
180const GPU_NOT_FOUND_ERROR_MESSAGE: &str = if cfg!(target_os = "linux") {
181    "Unable to find a GPU! Make sure you have installed required drivers! For extra information, see: https://github.com/bevyengine/bevy/blob/latest/docs/linux_dependencies.md"
182} else {
183    "Unable to find a GPU! Make sure you have installed required drivers!"
184};
185
186/// Initializes the renderer by retrieving and preparing the GPU instance, device and queue
187/// for the specified backend.
188pub async fn initialize_renderer(
189    instance: &Instance,
190    options: &WgpuSettings,
191    request_adapter_options: &RequestAdapterOptions<'_, '_>,
192) -> (RenderDevice, RenderQueue, RenderAdapterInfo, RenderAdapter) {
193    let adapter = instance
194        .request_adapter(request_adapter_options)
195        .await
196        .expect(GPU_NOT_FOUND_ERROR_MESSAGE);
197
198    let adapter_info = adapter.get_info();
199    info!("{:?}", adapter_info);
200
201    if adapter_info.device_type == DeviceType::Cpu {
202        warn!(
203            "The selected adapter is using a driver that only supports software rendering. \
204             This is likely to be very slow. See https://bevyengine.org/learn/errors/b0006/"
205        );
206    }
207
208    // Maybe get features and limits based on what is supported by the adapter/backend
209    let mut features = wgpu::Features::empty();
210    let mut limits = options.limits.clone();
211    if matches!(options.priority, WgpuSettingsPriority::Functionality) {
212        features = adapter.features();
213        if adapter_info.device_type == DeviceType::DiscreteGpu {
214            // `MAPPABLE_PRIMARY_BUFFERS` can have a significant, negative performance impact for
215            // discrete GPUs due to having to transfer data across the PCI-E bus and so it
216            // should not be automatically enabled in this case. It is however beneficial for
217            // integrated GPUs.
218            features -= wgpu::Features::MAPPABLE_PRIMARY_BUFFERS;
219        }
220
221        // RAY_QUERY and RAY_TRACING_ACCELERATION STRUCTURE will sometimes cause DeviceLost failures on platforms
222        // that report them as supported:
223        // <https://github.com/gfx-rs/wgpu/issues/5488>
224        // WGPU also currently doesn't actually support these features yet, so we should disable
225        // them until they are safe to enable.
226        features -= wgpu::Features::RAY_QUERY;
227        features -= wgpu::Features::RAY_TRACING_ACCELERATION_STRUCTURE;
228
229        limits = adapter.limits();
230    }
231
232    // Enforce the disabled features
233    if let Some(disabled_features) = options.disabled_features {
234        features -= disabled_features;
235    }
236    // NOTE: |= is used here to ensure that any explicitly-enabled features are respected.
237    features |= options.features;
238
239    // Enforce the limit constraints
240    if let Some(constrained_limits) = options.constrained_limits.as_ref() {
241        // NOTE: Respect the configured limits as an 'upper bound'. This means for 'max' limits, we
242        // take the minimum of the calculated limits according to the adapter/backend and the
243        // specified max_limits. For 'min' limits, take the maximum instead. This is intended to
244        // err on the side of being conservative. We can't claim 'higher' limits that are supported
245        // but we can constrain to 'lower' limits.
246        limits = wgpu::Limits {
247            max_texture_dimension_1d: limits
248                .max_texture_dimension_1d
249                .min(constrained_limits.max_texture_dimension_1d),
250            max_texture_dimension_2d: limits
251                .max_texture_dimension_2d
252                .min(constrained_limits.max_texture_dimension_2d),
253            max_texture_dimension_3d: limits
254                .max_texture_dimension_3d
255                .min(constrained_limits.max_texture_dimension_3d),
256            max_texture_array_layers: limits
257                .max_texture_array_layers
258                .min(constrained_limits.max_texture_array_layers),
259            max_bind_groups: limits
260                .max_bind_groups
261                .min(constrained_limits.max_bind_groups),
262            max_dynamic_uniform_buffers_per_pipeline_layout: limits
263                .max_dynamic_uniform_buffers_per_pipeline_layout
264                .min(constrained_limits.max_dynamic_uniform_buffers_per_pipeline_layout),
265            max_dynamic_storage_buffers_per_pipeline_layout: limits
266                .max_dynamic_storage_buffers_per_pipeline_layout
267                .min(constrained_limits.max_dynamic_storage_buffers_per_pipeline_layout),
268            max_sampled_textures_per_shader_stage: limits
269                .max_sampled_textures_per_shader_stage
270                .min(constrained_limits.max_sampled_textures_per_shader_stage),
271            max_samplers_per_shader_stage: limits
272                .max_samplers_per_shader_stage
273                .min(constrained_limits.max_samplers_per_shader_stage),
274            max_storage_buffers_per_shader_stage: limits
275                .max_storage_buffers_per_shader_stage
276                .min(constrained_limits.max_storage_buffers_per_shader_stage),
277            max_storage_textures_per_shader_stage: limits
278                .max_storage_textures_per_shader_stage
279                .min(constrained_limits.max_storage_textures_per_shader_stage),
280            max_uniform_buffers_per_shader_stage: limits
281                .max_uniform_buffers_per_shader_stage
282                .min(constrained_limits.max_uniform_buffers_per_shader_stage),
283            max_uniform_buffer_binding_size: limits
284                .max_uniform_buffer_binding_size
285                .min(constrained_limits.max_uniform_buffer_binding_size),
286            max_storage_buffer_binding_size: limits
287                .max_storage_buffer_binding_size
288                .min(constrained_limits.max_storage_buffer_binding_size),
289            max_vertex_buffers: limits
290                .max_vertex_buffers
291                .min(constrained_limits.max_vertex_buffers),
292            max_vertex_attributes: limits
293                .max_vertex_attributes
294                .min(constrained_limits.max_vertex_attributes),
295            max_vertex_buffer_array_stride: limits
296                .max_vertex_buffer_array_stride
297                .min(constrained_limits.max_vertex_buffer_array_stride),
298            max_push_constant_size: limits
299                .max_push_constant_size
300                .min(constrained_limits.max_push_constant_size),
301            min_uniform_buffer_offset_alignment: limits
302                .min_uniform_buffer_offset_alignment
303                .max(constrained_limits.min_uniform_buffer_offset_alignment),
304            min_storage_buffer_offset_alignment: limits
305                .min_storage_buffer_offset_alignment
306                .max(constrained_limits.min_storage_buffer_offset_alignment),
307            max_inter_stage_shader_components: limits
308                .max_inter_stage_shader_components
309                .min(constrained_limits.max_inter_stage_shader_components),
310            max_compute_workgroup_storage_size: limits
311                .max_compute_workgroup_storage_size
312                .min(constrained_limits.max_compute_workgroup_storage_size),
313            max_compute_invocations_per_workgroup: limits
314                .max_compute_invocations_per_workgroup
315                .min(constrained_limits.max_compute_invocations_per_workgroup),
316            max_compute_workgroup_size_x: limits
317                .max_compute_workgroup_size_x
318                .min(constrained_limits.max_compute_workgroup_size_x),
319            max_compute_workgroup_size_y: limits
320                .max_compute_workgroup_size_y
321                .min(constrained_limits.max_compute_workgroup_size_y),
322            max_compute_workgroup_size_z: limits
323                .max_compute_workgroup_size_z
324                .min(constrained_limits.max_compute_workgroup_size_z),
325            max_compute_workgroups_per_dimension: limits
326                .max_compute_workgroups_per_dimension
327                .min(constrained_limits.max_compute_workgroups_per_dimension),
328            max_buffer_size: limits
329                .max_buffer_size
330                .min(constrained_limits.max_buffer_size),
331            max_bindings_per_bind_group: limits
332                .max_bindings_per_bind_group
333                .min(constrained_limits.max_bindings_per_bind_group),
334            max_non_sampler_bindings: limits
335                .max_non_sampler_bindings
336                .min(constrained_limits.max_non_sampler_bindings),
337            max_color_attachments: limits
338                .max_color_attachments
339                .min(constrained_limits.max_color_attachments),
340            max_color_attachment_bytes_per_sample: limits
341                .max_color_attachment_bytes_per_sample
342                .min(constrained_limits.max_color_attachment_bytes_per_sample),
343            min_subgroup_size: limits
344                .min_subgroup_size
345                .max(constrained_limits.min_subgroup_size),
346            max_subgroup_size: limits
347                .max_subgroup_size
348                .min(constrained_limits.max_subgroup_size),
349        };
350    }
351
352    let (device, queue) = adapter
353        .request_device(
354            &wgpu::DeviceDescriptor {
355                label: options.device_label.as_ref().map(AsRef::as_ref),
356                required_features: features,
357                required_limits: limits,
358                memory_hints: options.memory_hints.clone(),
359            },
360            options.trace_path.as_deref(),
361        )
362        .await
363        .unwrap();
364    let queue = Arc::new(WgpuWrapper::new(queue));
365    let adapter = Arc::new(WgpuWrapper::new(adapter));
366    (
367        RenderDevice::from(device),
368        RenderQueue(queue),
369        RenderAdapterInfo(WgpuWrapper::new(adapter_info)),
370        RenderAdapter(adapter),
371    )
372}
373
374/// The context with all information required to interact with the GPU.
375///
376/// The [`RenderDevice`] is used to create render resources and the
377/// the [`CommandEncoder`] is used to record a series of GPU operations.
378pub struct RenderContext<'w> {
379    render_device: RenderDevice,
380    command_encoder: Option<CommandEncoder>,
381    command_buffer_queue: Vec<QueuedCommandBuffer<'w>>,
382    force_serial: bool,
383    diagnostics_recorder: Option<Arc<DiagnosticsRecorder>>,
384}
385
386impl<'w> RenderContext<'w> {
387    /// Creates a new [`RenderContext`] from a [`RenderDevice`].
388    pub fn new(
389        render_device: RenderDevice,
390        adapter_info: AdapterInfo,
391        diagnostics_recorder: Option<DiagnosticsRecorder>,
392    ) -> Self {
393        // HACK: Parallel command encoding is currently bugged on AMD + Windows + Vulkan with wgpu 0.19.1
394        #[cfg(target_os = "windows")]
395        let force_serial =
396            adapter_info.driver.contains("AMD") && adapter_info.backend == wgpu::Backend::Vulkan;
397        #[cfg(not(target_os = "windows"))]
398        let force_serial = {
399            drop(adapter_info);
400            false
401        };
402
403        Self {
404            render_device,
405            command_encoder: None,
406            command_buffer_queue: Vec::new(),
407            force_serial,
408            diagnostics_recorder: diagnostics_recorder.map(Arc::new),
409        }
410    }
411
412    /// Gets the underlying [`RenderDevice`].
413    pub fn render_device(&self) -> &RenderDevice {
414        &self.render_device
415    }
416
417    /// Gets the diagnostics recorder, used to track elapsed time and pipeline statistics
418    /// of various render and compute passes.
419    pub fn diagnostic_recorder(&self) -> impl RecordDiagnostics {
420        self.diagnostics_recorder.clone()
421    }
422
423    /// Gets the current [`CommandEncoder`].
424    pub fn command_encoder(&mut self) -> &mut CommandEncoder {
425        self.command_encoder.get_or_insert_with(|| {
426            self.render_device
427                .create_command_encoder(&wgpu::CommandEncoderDescriptor::default())
428        })
429    }
430
431    /// Creates a new [`TrackedRenderPass`] for the context,
432    /// configured using the provided `descriptor`.
433    pub fn begin_tracked_render_pass<'a>(
434        &'a mut self,
435        descriptor: RenderPassDescriptor<'_>,
436    ) -> TrackedRenderPass<'a> {
437        // Cannot use command_encoder() as we need to split the borrow on self
438        let command_encoder = self.command_encoder.get_or_insert_with(|| {
439            self.render_device
440                .create_command_encoder(&wgpu::CommandEncoderDescriptor::default())
441        });
442
443        let render_pass = command_encoder.begin_render_pass(&descriptor);
444        TrackedRenderPass::new(&self.render_device, render_pass)
445    }
446
447    /// Append a [`CommandBuffer`] to the command buffer queue.
448    ///
449    /// If present, this will flush the currently unflushed [`CommandEncoder`]
450    /// into a [`CommandBuffer`] into the queue before appending the provided
451    /// buffer.
452    pub fn add_command_buffer(&mut self, command_buffer: CommandBuffer) {
453        self.flush_encoder();
454
455        self.command_buffer_queue
456            .push(QueuedCommandBuffer::Ready(command_buffer));
457    }
458
459    /// Append a function that will generate a [`CommandBuffer`] to the
460    /// command buffer queue, to be ran later.
461    ///
462    /// If present, this will flush the currently unflushed [`CommandEncoder`]
463    /// into a [`CommandBuffer`] into the queue before appending the provided
464    /// buffer.
465    pub fn add_command_buffer_generation_task(
466        &mut self,
467        #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
468        task: impl FnOnce(RenderDevice) -> CommandBuffer + 'w + Send,
469        #[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
470        task: impl FnOnce(RenderDevice) -> CommandBuffer + 'w,
471    ) {
472        self.flush_encoder();
473
474        self.command_buffer_queue
475            .push(QueuedCommandBuffer::Task(Box::new(task)));
476    }
477
478    /// Finalizes and returns the queue of [`CommandBuffer`]s.
479    ///
480    /// This function will wait until all command buffer generation tasks are complete
481    /// by running them in parallel (where supported).
482    ///
483    /// The [`CommandBuffer`]s will be returned in the order that they were added.
484    pub fn finish(
485        mut self,
486    ) -> (
487        Vec<CommandBuffer>,
488        RenderDevice,
489        Option<DiagnosticsRecorder>,
490    ) {
491        self.flush_encoder();
492
493        let mut command_buffers = Vec::with_capacity(self.command_buffer_queue.len());
494
495        #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
496        {
497            let mut task_based_command_buffers = ComputeTaskPool::get().scope(|task_pool| {
498                for (i, queued_command_buffer) in self.command_buffer_queue.into_iter().enumerate()
499                {
500                    match queued_command_buffer {
501                        QueuedCommandBuffer::Ready(command_buffer) => {
502                            command_buffers.push((i, command_buffer));
503                        }
504                        QueuedCommandBuffer::Task(command_buffer_generation_task) => {
505                            let render_device = self.render_device.clone();
506                            if self.force_serial {
507                                command_buffers
508                                    .push((i, command_buffer_generation_task(render_device)));
509                            } else {
510                                task_pool.spawn(async move {
511                                    (i, command_buffer_generation_task(render_device))
512                                });
513                            }
514                        }
515                    }
516                }
517            });
518            command_buffers.append(&mut task_based_command_buffers);
519        }
520
521        #[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
522        for (i, queued_command_buffer) in self.command_buffer_queue.into_iter().enumerate() {
523            match queued_command_buffer {
524                QueuedCommandBuffer::Ready(command_buffer) => {
525                    command_buffers.push((i, command_buffer));
526                }
527                QueuedCommandBuffer::Task(command_buffer_generation_task) => {
528                    let render_device = self.render_device.clone();
529                    command_buffers.push((i, command_buffer_generation_task(render_device)));
530                }
531            }
532        }
533
534        command_buffers.sort_unstable_by_key(|(i, _)| *i);
535
536        let mut command_buffers = command_buffers
537            .into_iter()
538            .map(|(_, cb)| cb)
539            .collect::<Vec<CommandBuffer>>();
540
541        let mut diagnostics_recorder = self.diagnostics_recorder.take().map(|v| {
542            Arc::try_unwrap(v)
543                .ok()
544                .expect("diagnostic recorder shouldn't be held longer than necessary")
545        });
546
547        if let Some(recorder) = &mut diagnostics_recorder {
548            let mut command_encoder = self
549                .render_device
550                .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
551            recorder.resolve(&mut command_encoder);
552            command_buffers.push(command_encoder.finish());
553        }
554
555        (command_buffers, self.render_device, diagnostics_recorder)
556    }
557
558    fn flush_encoder(&mut self) {
559        if let Some(encoder) = self.command_encoder.take() {
560            self.command_buffer_queue
561                .push(QueuedCommandBuffer::Ready(encoder.finish()));
562        }
563    }
564}
565
566enum QueuedCommandBuffer<'w> {
567    Ready(CommandBuffer),
568    #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
569    Task(Box<dyn FnOnce(RenderDevice) -> CommandBuffer + 'w + Send>),
570    #[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
571    Task(Box<dyn FnOnce(RenderDevice) -> CommandBuffer + 'w>),
572}