bevy_render/batching/
gpu_preprocessing.rs

1//! Batching functionality when GPU preprocessing is in use.
2
3use core::{any::TypeId, marker::PhantomData, mem};
4
5use bevy_app::{App, Plugin};
6use bevy_derive::{Deref, DerefMut};
7use bevy_ecs::{
8    prelude::Entity,
9    query::{Has, With},
10    resource::Resource,
11    schedule::IntoScheduleConfigs as _,
12    system::{Query, Res, ResMut, StaticSystemParam},
13    world::{FromWorld, World},
14};
15use bevy_encase_derive::ShaderType;
16use bevy_math::UVec4;
17use bevy_platform::collections::{hash_map::Entry, HashMap, HashSet};
18use bevy_tasks::ComputeTaskPool;
19use bevy_utils::{default, TypeIdMap};
20use bytemuck::{Pod, Zeroable};
21use encase::{internal::WriteInto, ShaderSize};
22use indexmap::IndexMap;
23use nonmax::NonMaxU32;
24use tracing::{error, info};
25use wgpu::{BindingResource, BufferUsages, DownlevelFlags, Features};
26
27use crate::{
28    experimental::occlusion_culling::OcclusionCulling,
29    render_phase::{
30        BinnedPhaseItem, BinnedRenderPhaseBatch, BinnedRenderPhaseBatchSet,
31        BinnedRenderPhaseBatchSets, CachedRenderPipelinePhaseItem, PhaseItem,
32        PhaseItemBatchSetKey as _, PhaseItemExtraIndex, RenderBin, SortedPhaseItem,
33        SortedRenderPhase, UnbatchableBinnedEntityIndices, ViewBinnedRenderPhases,
34        ViewSortedRenderPhases,
35    },
36    render_resource::{Buffer, GpuArrayBufferable, RawBufferVec, UninitBufferVec},
37    renderer::{RenderAdapter, RenderAdapterInfo, RenderDevice, RenderQueue, WgpuWrapper},
38    sync_world::MainEntity,
39    view::{ExtractedView, NoIndirectDrawing, RetainedViewEntity},
40    Render, RenderApp, RenderDebugFlags, RenderSystems,
41};
42
43use super::{BatchMeta, GetBatchData, GetFullBatchData};
44
45#[derive(Default)]
46pub struct BatchingPlugin {
47    /// Debugging flags that can optionally be set when constructing the renderer.
48    pub debug_flags: RenderDebugFlags,
49}
50
51impl Plugin for BatchingPlugin {
52    fn build(&self, app: &mut App) {
53        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
54            return;
55        };
56
57        render_app
58            .insert_resource(IndirectParametersBuffers::new(
59                self.debug_flags
60                    .contains(RenderDebugFlags::ALLOW_COPIES_FROM_INDIRECT_PARAMETERS),
61            ))
62            .add_systems(
63                Render,
64                write_indirect_parameters_buffers.in_set(RenderSystems::PrepareResourcesFlush),
65            )
66            .add_systems(
67                Render,
68                clear_indirect_parameters_buffers.in_set(RenderSystems::ManageViews),
69            );
70    }
71
72    fn finish(&self, app: &mut App) {
73        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
74            return;
75        };
76
77        render_app.init_resource::<GpuPreprocessingSupport>();
78    }
79}
80
81/// Records whether GPU preprocessing and/or GPU culling are supported on the
82/// device.
83///
84/// No GPU preprocessing is supported on WebGL because of the lack of compute
85/// shader support.  GPU preprocessing is supported on DirectX 12, but due to [a
86/// `wgpu` limitation] GPU culling is not.
87///
88/// [a `wgpu` limitation]: https://github.com/gfx-rs/wgpu/issues/2471
89#[derive(Clone, Copy, PartialEq, Resource)]
90pub struct GpuPreprocessingSupport {
91    /// The maximum amount of GPU preprocessing available on this platform.
92    pub max_supported_mode: GpuPreprocessingMode,
93}
94
95impl GpuPreprocessingSupport {
96    /// Returns true if this GPU preprocessing support level isn't `None`.
97    #[inline]
98    pub fn is_available(&self) -> bool {
99        self.max_supported_mode != GpuPreprocessingMode::None
100    }
101
102    /// Returns the given GPU preprocessing mode, capped to the current
103    /// preprocessing mode.
104    pub fn min(&self, mode: GpuPreprocessingMode) -> GpuPreprocessingMode {
105        match (self.max_supported_mode, mode) {
106            (GpuPreprocessingMode::None, _) | (_, GpuPreprocessingMode::None) => {
107                GpuPreprocessingMode::None
108            }
109            (mode, GpuPreprocessingMode::Culling) | (GpuPreprocessingMode::Culling, mode) => mode,
110            (GpuPreprocessingMode::PreprocessingOnly, GpuPreprocessingMode::PreprocessingOnly) => {
111                GpuPreprocessingMode::PreprocessingOnly
112            }
113        }
114    }
115
116    /// Returns true if GPU culling is supported on this platform.
117    pub fn is_culling_supported(&self) -> bool {
118        self.max_supported_mode == GpuPreprocessingMode::Culling
119    }
120}
121
122/// The amount of GPU preprocessing (compute and indirect draw) that we do.
123#[derive(Clone, Copy, PartialEq)]
124pub enum GpuPreprocessingMode {
125    /// No GPU preprocessing is in use at all.
126    ///
127    /// This is used when GPU compute isn't available.
128    None,
129
130    /// GPU preprocessing is in use, but GPU culling isn't.
131    ///
132    /// This is used when the [`NoIndirectDrawing`] component is present on the
133    /// camera.
134    PreprocessingOnly,
135
136    /// Both GPU preprocessing and GPU culling are in use.
137    ///
138    /// This is used by default.
139    Culling,
140}
141
142/// The GPU buffers holding the data needed to render batches.
143///
144/// For example, in the 3D PBR pipeline this holds `MeshUniform`s, which are the
145/// `BD` type parameter in that mode.
146///
147/// We have a separate *buffer data input* type (`BDI`) here, which a compute
148/// shader is expected to expand to the full buffer data (`BD`) type. GPU
149/// uniform building is generally faster and uses less system RAM to VRAM bus
150/// bandwidth, but only implemented for some pipelines (for example, not in the
151/// 2D pipeline at present) and only when compute shader is available.
152#[derive(Resource)]
153pub struct BatchedInstanceBuffers<BD, BDI>
154where
155    BD: GpuArrayBufferable + Sync + Send + 'static,
156    BDI: Pod + Default,
157{
158    /// The uniform data inputs for the current frame.
159    ///
160    /// These are uploaded during the extraction phase.
161    pub current_input_buffer: InstanceInputUniformBuffer<BDI>,
162
163    /// The uniform data inputs for the previous frame.
164    ///
165    /// The indices don't generally line up between `current_input_buffer`
166    /// and `previous_input_buffer`, because, among other reasons, entities
167    /// can spawn or despawn between frames. Instead, each current buffer
168    /// data input uniform is expected to contain the index of the
169    /// corresponding buffer data input uniform in this list.
170    pub previous_input_buffer: InstanceInputUniformBuffer<BDI>,
171
172    /// The data needed to render buffers for each phase.
173    ///
174    /// The keys of this map are the type IDs of each phase: e.g. `Opaque3d`,
175    /// `AlphaMask3d`, etc.
176    pub phase_instance_buffers: TypeIdMap<UntypedPhaseBatchedInstanceBuffers<BD>>,
177}
178
179impl<BD, BDI> Default for BatchedInstanceBuffers<BD, BDI>
180where
181    BD: GpuArrayBufferable + Sync + Send + 'static,
182    BDI: Pod + Sync + Send + Default + 'static,
183{
184    fn default() -> Self {
185        BatchedInstanceBuffers {
186            current_input_buffer: InstanceInputUniformBuffer::new(),
187            previous_input_buffer: InstanceInputUniformBuffer::new(),
188            phase_instance_buffers: HashMap::default(),
189        }
190    }
191}
192
193/// The GPU buffers holding the data needed to render batches for a single
194/// phase.
195///
196/// These are split out per phase so that we can run the phases in parallel.
197/// This is the version of the structure that has a type parameter, which
198/// enables Bevy's scheduler to run the batching operations for the different
199/// phases in parallel.
200///
201/// See the documentation for [`BatchedInstanceBuffers`] for more information.
202#[derive(Resource)]
203pub struct PhaseBatchedInstanceBuffers<PI, BD>
204where
205    PI: PhaseItem,
206    BD: GpuArrayBufferable + Sync + Send + 'static,
207{
208    /// The buffers for this phase.
209    pub buffers: UntypedPhaseBatchedInstanceBuffers<BD>,
210    phantom: PhantomData<PI>,
211}
212
213impl<PI, BD> Default for PhaseBatchedInstanceBuffers<PI, BD>
214where
215    PI: PhaseItem,
216    BD: GpuArrayBufferable + Sync + Send + 'static,
217{
218    fn default() -> Self {
219        PhaseBatchedInstanceBuffers {
220            buffers: UntypedPhaseBatchedInstanceBuffers::default(),
221            phantom: PhantomData,
222        }
223    }
224}
225
226/// The GPU buffers holding the data needed to render batches for a single
227/// phase, without a type parameter for that phase.
228///
229/// Since this structure doesn't have a type parameter, it can be placed in
230/// [`BatchedInstanceBuffers::phase_instance_buffers`].
231pub struct UntypedPhaseBatchedInstanceBuffers<BD>
232where
233    BD: GpuArrayBufferable + Sync + Send + 'static,
234{
235    /// A storage area for the buffer data that the GPU compute shader is
236    /// expected to write to.
237    ///
238    /// There will be one entry for each index.
239    pub data_buffer: UninitBufferVec<BD>,
240
241    /// The index of the buffer data in the current input buffer that
242    /// corresponds to each instance.
243    ///
244    /// This is keyed off each view. Each view has a separate buffer.
245    pub work_item_buffers: HashMap<RetainedViewEntity, PreprocessWorkItemBuffers>,
246
247    /// A buffer that holds the number of indexed meshes that weren't visible in
248    /// the previous frame, when GPU occlusion culling is in use.
249    ///
250    /// There's one set of [`LatePreprocessWorkItemIndirectParameters`] per
251    /// view. Bevy uses this value to determine how many threads to dispatch to
252    /// check meshes that weren't visible next frame to see if they became newly
253    /// visible this frame.
254    pub late_indexed_indirect_parameters_buffer:
255        RawBufferVec<LatePreprocessWorkItemIndirectParameters>,
256
257    /// A buffer that holds the number of non-indexed meshes that weren't
258    /// visible in the previous frame, when GPU occlusion culling is in use.
259    ///
260    /// There's one set of [`LatePreprocessWorkItemIndirectParameters`] per
261    /// view. Bevy uses this value to determine how many threads to dispatch to
262    /// check meshes that weren't visible next frame to see if they became newly
263    /// visible this frame.
264    pub late_non_indexed_indirect_parameters_buffer:
265        RawBufferVec<LatePreprocessWorkItemIndirectParameters>,
266}
267
268/// Holds the GPU buffer of instance input data, which is the data about each
269/// mesh instance that the CPU provides.
270///
271/// `BDI` is the *buffer data input* type, which the GPU mesh preprocessing
272/// shader is expected to expand to the full *buffer data* type.
273pub struct InstanceInputUniformBuffer<BDI>
274where
275    BDI: Pod + Default,
276{
277    /// The buffer containing the data that will be uploaded to the GPU.
278    buffer: RawBufferVec<BDI>,
279
280    /// Indices of slots that are free within the buffer.
281    ///
282    /// When adding data, we preferentially overwrite these slots first before
283    /// growing the buffer itself.
284    free_uniform_indices: Vec<u32>,
285}
286
287impl<BDI> InstanceInputUniformBuffer<BDI>
288where
289    BDI: Pod + Default,
290{
291    /// Creates a new, empty buffer.
292    pub fn new() -> InstanceInputUniformBuffer<BDI> {
293        InstanceInputUniformBuffer {
294            buffer: RawBufferVec::new(BufferUsages::STORAGE),
295            free_uniform_indices: vec![],
296        }
297    }
298
299    /// Clears the buffer and entity list out.
300    pub fn clear(&mut self) {
301        self.buffer.clear();
302        self.free_uniform_indices.clear();
303    }
304
305    /// Returns the [`RawBufferVec`] corresponding to this input uniform buffer.
306    #[inline]
307    pub fn buffer(&self) -> &RawBufferVec<BDI> {
308        &self.buffer
309    }
310
311    /// Adds a new piece of buffered data to the uniform buffer and returns its
312    /// index.
313    pub fn add(&mut self, element: BDI) -> u32 {
314        match self.free_uniform_indices.pop() {
315            Some(uniform_index) => {
316                self.buffer.values_mut()[uniform_index as usize] = element;
317                uniform_index
318            }
319            None => self.buffer.push(element) as u32,
320        }
321    }
322
323    /// Removes a piece of buffered data from the uniform buffer.
324    ///
325    /// This simply marks the data as free.
326    pub fn remove(&mut self, uniform_index: u32) {
327        self.free_uniform_indices.push(uniform_index);
328    }
329
330    /// Returns the piece of buffered data at the given index.
331    ///
332    /// Returns [`None`] if the index is out of bounds or the data is removed.
333    pub fn get(&self, uniform_index: u32) -> Option<BDI> {
334        if (uniform_index as usize) >= self.buffer.len()
335            || self.free_uniform_indices.contains(&uniform_index)
336        {
337            None
338        } else {
339            Some(self.get_unchecked(uniform_index))
340        }
341    }
342
343    /// Returns the piece of buffered data at the given index.
344    /// Can return data that has previously been removed.
345    ///
346    /// # Panics
347    /// if `uniform_index` is not in bounds of [`Self::buffer`].
348    pub fn get_unchecked(&self, uniform_index: u32) -> BDI {
349        self.buffer.values()[uniform_index as usize]
350    }
351
352    /// Stores a piece of buffered data at the given index.
353    ///
354    /// # Panics
355    /// if `uniform_index` is not in bounds of [`Self::buffer`].
356    pub fn set(&mut self, uniform_index: u32, element: BDI) {
357        self.buffer.values_mut()[uniform_index as usize] = element;
358    }
359
360    // Ensures that the buffers are nonempty, which the GPU requires before an
361    // upload can take place.
362    pub fn ensure_nonempty(&mut self) {
363        if self.buffer.is_empty() {
364            self.buffer.push(default());
365        }
366    }
367
368    /// Returns the number of instances in this buffer.
369    pub fn len(&self) -> usize {
370        self.buffer.len()
371    }
372
373    /// Returns true if this buffer has no instances or false if it contains any
374    /// instances.
375    pub fn is_empty(&self) -> bool {
376        self.buffer.is_empty()
377    }
378
379    /// Consumes this [`InstanceInputUniformBuffer`] and returns the raw buffer
380    /// ready to be uploaded to the GPU.
381    pub fn into_buffer(self) -> RawBufferVec<BDI> {
382        self.buffer
383    }
384}
385
386impl<BDI> Default for InstanceInputUniformBuffer<BDI>
387where
388    BDI: Pod + Default,
389{
390    fn default() -> Self {
391        Self::new()
392    }
393}
394
395/// The buffer of GPU preprocessing work items for a single view.
396#[cfg_attr(
397    not(target_arch = "wasm32"),
398    expect(
399        clippy::large_enum_variant,
400        reason = "See https://github.com/bevyengine/bevy/issues/19220"
401    )
402)]
403pub enum PreprocessWorkItemBuffers {
404    /// The work items we use if we aren't using indirect drawing.
405    ///
406    /// Because we don't have to separate indexed from non-indexed meshes in
407    /// direct mode, we only have a single buffer here.
408    Direct(RawBufferVec<PreprocessWorkItem>),
409
410    /// The buffer of work items we use if we are using indirect drawing.
411    ///
412    /// We need to separate out indexed meshes from non-indexed meshes in this
413    /// case because the indirect parameters for these two types of meshes have
414    /// different sizes.
415    Indirect {
416        /// The buffer of work items corresponding to indexed meshes.
417        indexed: RawBufferVec<PreprocessWorkItem>,
418        /// The buffer of work items corresponding to non-indexed meshes.
419        non_indexed: RawBufferVec<PreprocessWorkItem>,
420        /// The work item buffers we use when GPU occlusion culling is in use.
421        gpu_occlusion_culling: Option<GpuOcclusionCullingWorkItemBuffers>,
422    },
423}
424
425/// The work item buffers we use when GPU occlusion culling is in use.
426pub struct GpuOcclusionCullingWorkItemBuffers {
427    /// The buffer of work items corresponding to indexed meshes.
428    pub late_indexed: UninitBufferVec<PreprocessWorkItem>,
429    /// The buffer of work items corresponding to non-indexed meshes.
430    pub late_non_indexed: UninitBufferVec<PreprocessWorkItem>,
431    /// The offset into the
432    /// [`UntypedPhaseBatchedInstanceBuffers::late_indexed_indirect_parameters_buffer`]
433    /// where this view's indirect dispatch counts for indexed meshes live.
434    pub late_indirect_parameters_indexed_offset: u32,
435    /// The offset into the
436    /// [`UntypedPhaseBatchedInstanceBuffers::late_non_indexed_indirect_parameters_buffer`]
437    /// where this view's indirect dispatch counts for non-indexed meshes live.
438    pub late_indirect_parameters_non_indexed_offset: u32,
439}
440
441/// A GPU-side data structure that stores the number of workgroups to dispatch
442/// for the second phase of GPU occlusion culling.
443///
444/// The late mesh preprocessing phase checks meshes that weren't visible frame
445/// to determine if they're potentially visible this frame.
446#[derive(Clone, Copy, ShaderType, Pod, Zeroable)]
447#[repr(C)]
448pub struct LatePreprocessWorkItemIndirectParameters {
449    /// The number of workgroups to dispatch.
450    ///
451    /// This will be equal to `work_item_count / 64`, rounded *up*.
452    dispatch_x: u32,
453    /// The number of workgroups along the abstract Y axis to dispatch: always
454    /// 1.
455    dispatch_y: u32,
456    /// The number of workgroups along the abstract Z axis to dispatch: always
457    /// 1.
458    dispatch_z: u32,
459    /// The actual number of work items.
460    ///
461    /// The GPU indirect dispatch doesn't read this, but it's used internally to
462    /// determine the actual number of work items that exist in the late
463    /// preprocessing work item buffer.
464    work_item_count: u32,
465    /// Padding to 64-byte boundaries for some hardware.
466    pad: UVec4,
467}
468
469impl Default for LatePreprocessWorkItemIndirectParameters {
470    fn default() -> LatePreprocessWorkItemIndirectParameters {
471        LatePreprocessWorkItemIndirectParameters {
472            dispatch_x: 0,
473            dispatch_y: 1,
474            dispatch_z: 1,
475            work_item_count: 0,
476            pad: default(),
477        }
478    }
479}
480
481/// Returns the set of work item buffers for the given view, first creating it
482/// if necessary.
483///
484/// Bevy uses work item buffers to tell the mesh preprocessing compute shader
485/// which meshes are to be drawn.
486///
487/// You may need to call this function if you're implementing your own custom
488/// render phases. See the `specialized_mesh_pipeline` example.
489pub fn get_or_create_work_item_buffer<'a, I>(
490    work_item_buffers: &'a mut HashMap<RetainedViewEntity, PreprocessWorkItemBuffers>,
491    view: RetainedViewEntity,
492    no_indirect_drawing: bool,
493    enable_gpu_occlusion_culling: bool,
494) -> &'a mut PreprocessWorkItemBuffers
495where
496    I: 'static,
497{
498    let preprocess_work_item_buffers = match work_item_buffers.entry(view) {
499        Entry::Occupied(occupied_entry) => occupied_entry.into_mut(),
500        Entry::Vacant(vacant_entry) => {
501            if no_indirect_drawing {
502                vacant_entry.insert(PreprocessWorkItemBuffers::Direct(RawBufferVec::new(
503                    BufferUsages::STORAGE,
504                )))
505            } else {
506                vacant_entry.insert(PreprocessWorkItemBuffers::Indirect {
507                    indexed: RawBufferVec::new(BufferUsages::STORAGE),
508                    non_indexed: RawBufferVec::new(BufferUsages::STORAGE),
509                    // We fill this in below if `enable_gpu_occlusion_culling`
510                    // is set.
511                    gpu_occlusion_culling: None,
512                })
513            }
514        }
515    };
516
517    // Initialize the GPU occlusion culling buffers if necessary.
518    if let PreprocessWorkItemBuffers::Indirect {
519        ref mut gpu_occlusion_culling,
520        ..
521    } = *preprocess_work_item_buffers
522    {
523        match (
524            enable_gpu_occlusion_culling,
525            gpu_occlusion_culling.is_some(),
526        ) {
527            (false, false) | (true, true) => {}
528            (false, true) => {
529                *gpu_occlusion_culling = None;
530            }
531            (true, false) => {
532                *gpu_occlusion_culling = Some(GpuOcclusionCullingWorkItemBuffers {
533                    late_indexed: UninitBufferVec::new(BufferUsages::STORAGE),
534                    late_non_indexed: UninitBufferVec::new(BufferUsages::STORAGE),
535                    late_indirect_parameters_indexed_offset: 0,
536                    late_indirect_parameters_non_indexed_offset: 0,
537                });
538            }
539        }
540    }
541
542    preprocess_work_item_buffers
543}
544
545/// Initializes work item buffers for a phase in preparation for a new frame.
546pub fn init_work_item_buffers(
547    work_item_buffers: &mut PreprocessWorkItemBuffers,
548    late_indexed_indirect_parameters_buffer: &'_ mut RawBufferVec<
549        LatePreprocessWorkItemIndirectParameters,
550    >,
551    late_non_indexed_indirect_parameters_buffer: &'_ mut RawBufferVec<
552        LatePreprocessWorkItemIndirectParameters,
553    >,
554) {
555    // Add the offsets for indirect parameters that the late phase of mesh
556    // preprocessing writes to.
557    if let PreprocessWorkItemBuffers::Indirect {
558        gpu_occlusion_culling:
559            Some(GpuOcclusionCullingWorkItemBuffers {
560                ref mut late_indirect_parameters_indexed_offset,
561                ref mut late_indirect_parameters_non_indexed_offset,
562                ..
563            }),
564        ..
565    } = *work_item_buffers
566    {
567        *late_indirect_parameters_indexed_offset = late_indexed_indirect_parameters_buffer
568            .push(LatePreprocessWorkItemIndirectParameters::default())
569            as u32;
570        *late_indirect_parameters_non_indexed_offset = late_non_indexed_indirect_parameters_buffer
571            .push(LatePreprocessWorkItemIndirectParameters::default())
572            as u32;
573    }
574}
575
576impl PreprocessWorkItemBuffers {
577    /// Adds a new work item to the appropriate buffer.
578    ///
579    /// `indexed` specifies whether the work item corresponds to an indexed
580    /// mesh.
581    pub fn push(&mut self, indexed: bool, preprocess_work_item: PreprocessWorkItem) {
582        match *self {
583            PreprocessWorkItemBuffers::Direct(ref mut buffer) => {
584                buffer.push(preprocess_work_item);
585            }
586            PreprocessWorkItemBuffers::Indirect {
587                indexed: ref mut indexed_buffer,
588                non_indexed: ref mut non_indexed_buffer,
589                ref mut gpu_occlusion_culling,
590            } => {
591                if indexed {
592                    indexed_buffer.push(preprocess_work_item);
593                } else {
594                    non_indexed_buffer.push(preprocess_work_item);
595                }
596
597                if let Some(ref mut gpu_occlusion_culling) = *gpu_occlusion_culling {
598                    if indexed {
599                        gpu_occlusion_culling.late_indexed.add();
600                    } else {
601                        gpu_occlusion_culling.late_non_indexed.add();
602                    }
603                }
604            }
605        }
606    }
607
608    /// Clears out the GPU work item buffers in preparation for a new frame.
609    pub fn clear(&mut self) {
610        match *self {
611            PreprocessWorkItemBuffers::Direct(ref mut buffer) => {
612                buffer.clear();
613            }
614            PreprocessWorkItemBuffers::Indirect {
615                indexed: ref mut indexed_buffer,
616                non_indexed: ref mut non_indexed_buffer,
617                ref mut gpu_occlusion_culling,
618            } => {
619                indexed_buffer.clear();
620                non_indexed_buffer.clear();
621
622                if let Some(ref mut gpu_occlusion_culling) = *gpu_occlusion_culling {
623                    gpu_occlusion_culling.late_indexed.clear();
624                    gpu_occlusion_culling.late_non_indexed.clear();
625                    gpu_occlusion_culling.late_indirect_parameters_indexed_offset = 0;
626                    gpu_occlusion_culling.late_indirect_parameters_non_indexed_offset = 0;
627                }
628            }
629        }
630    }
631}
632
633/// One invocation of the preprocessing shader: i.e. one mesh instance in a
634/// view.
635#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
636#[repr(C)]
637pub struct PreprocessWorkItem {
638    /// The index of the batch input data in the input buffer that the shader
639    /// reads from.
640    pub input_index: u32,
641
642    /// In direct mode, the index of the mesh uniform; in indirect mode, the
643    /// index of the [`IndirectParametersGpuMetadata`].
644    ///
645    /// In indirect mode, this is the index of the
646    /// [`IndirectParametersGpuMetadata`] in the
647    /// `IndirectParametersBuffers::indexed_metadata` or
648    /// `IndirectParametersBuffers::non_indexed_metadata`.
649    pub output_or_indirect_parameters_index: u32,
650}
651
652/// The `wgpu` indirect parameters structure that specifies a GPU draw command.
653///
654/// This is the variant for indexed meshes. We generate the instances of this
655/// structure in the `build_indirect_params.wgsl` compute shader.
656#[derive(Clone, Copy, Debug, Pod, Zeroable, ShaderType)]
657#[repr(C)]
658pub struct IndirectParametersIndexed {
659    /// The number of indices that this mesh has.
660    pub index_count: u32,
661    /// The number of instances we are to draw.
662    pub instance_count: u32,
663    /// The offset of the first index for this mesh in the index buffer slab.
664    pub first_index: u32,
665    /// The offset of the first vertex for this mesh in the vertex buffer slab.
666    pub base_vertex: u32,
667    /// The index of the first mesh instance in the `MeshUniform` buffer.
668    pub first_instance: u32,
669}
670
671/// The `wgpu` indirect parameters structure that specifies a GPU draw command.
672///
673/// This is the variant for non-indexed meshes. We generate the instances of
674/// this structure in the `build_indirect_params.wgsl` compute shader.
675#[derive(Clone, Copy, Debug, Pod, Zeroable, ShaderType)]
676#[repr(C)]
677pub struct IndirectParametersNonIndexed {
678    /// The number of vertices that this mesh has.
679    pub vertex_count: u32,
680    /// The number of instances we are to draw.
681    pub instance_count: u32,
682    /// The offset of the first vertex for this mesh in the vertex buffer slab.
683    pub base_vertex: u32,
684    /// The index of the first mesh instance in the `Mesh` buffer.
685    pub first_instance: u32,
686}
687
688/// A structure, initialized on CPU and read on GPU, that contains metadata
689/// about each batch.
690///
691/// Each batch will have one instance of this structure.
692#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
693#[repr(C)]
694pub struct IndirectParametersCpuMetadata {
695    /// The index of the first instance of this mesh in the array of
696    /// `MeshUniform`s.
697    ///
698    /// Note that this is the *first* output index in this batch. Since each
699    /// instance of this structure refers to arbitrarily many instances, the
700    /// `MeshUniform`s corresponding to this batch span the indices
701    /// `base_output_index..(base_output_index + instance_count)`.
702    pub base_output_index: u32,
703
704    /// The index of the batch set that this batch belongs to in the
705    /// [`IndirectBatchSet`] buffer.
706    ///
707    /// A *batch set* is a set of meshes that may be multi-drawn together.
708    /// Multiple batches (and therefore multiple instances of
709    /// [`IndirectParametersGpuMetadata`] structures) can be part of the same
710    /// batch set.
711    pub batch_set_index: u32,
712}
713
714/// A structure, written and read GPU, that records how many instances of each
715/// mesh are actually to be drawn.
716///
717/// The GPU mesh preprocessing shader increments the
718/// [`Self::early_instance_count`] and [`Self::late_instance_count`] as it
719/// determines that meshes are visible.  The indirect parameter building shader
720/// reads this metadata in order to construct the indirect draw parameters.
721///
722/// Each batch will have one instance of this structure.
723#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
724#[repr(C)]
725pub struct IndirectParametersGpuMetadata {
726    /// The index of the first mesh in this batch in the array of
727    /// `MeshInputUniform`s.
728    pub mesh_index: u32,
729
730    /// The number of instances that were judged visible last frame.
731    ///
732    /// The CPU sets this value to 0, and the GPU mesh preprocessing shader
733    /// increments it as it culls mesh instances.
734    pub early_instance_count: u32,
735
736    /// The number of instances that have been judged potentially visible this
737    /// frame that weren't in the last frame's potentially visible set.
738    ///
739    /// The CPU sets this value to 0, and the GPU mesh preprocessing shader
740    /// increments it as it culls mesh instances.
741    pub late_instance_count: u32,
742}
743
744/// A structure, shared between CPU and GPU, that holds the number of on-GPU
745/// indirect draw commands for each *batch set*.
746///
747/// A *batch set* is a set of meshes that may be multi-drawn together.
748///
749/// If the current hardware and driver support `multi_draw_indirect_count`, the
750/// indirect parameters building shader increments
751/// [`Self::indirect_parameters_count`] as it generates indirect parameters. The
752/// `multi_draw_indirect_count` command reads
753/// [`Self::indirect_parameters_count`] in order to determine how many commands
754/// belong to each batch set.
755#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
756#[repr(C)]
757pub struct IndirectBatchSet {
758    /// The number of indirect parameter commands (i.e. batches) in this batch
759    /// set.
760    ///
761    /// The CPU sets this value to 0 before uploading this structure to GPU. The
762    /// indirect parameters building shader increments this value as it creates
763    /// indirect parameters. Then the `multi_draw_indirect_count` command reads
764    /// this value in order to determine how many indirect draw commands to
765    /// process.
766    pub indirect_parameters_count: u32,
767
768    /// The offset within the `IndirectParametersBuffers::indexed_data` or
769    /// `IndirectParametersBuffers::non_indexed_data` of the first indirect draw
770    /// command for this batch set.
771    ///
772    /// The CPU fills out this value.
773    pub indirect_parameters_base: u32,
774}
775
776/// The buffers containing all the information that indirect draw commands
777/// (`multi_draw_indirect`, `multi_draw_indirect_count`) use to draw the scene.
778///
779/// In addition to the indirect draw buffers themselves, this structure contains
780/// the buffers that store [`IndirectParametersGpuMetadata`], which are the
781/// structures that culling writes to so that the indirect parameter building
782/// pass can determine how many meshes are actually to be drawn.
783///
784/// These buffers will remain empty if indirect drawing isn't in use.
785#[derive(Resource, Deref, DerefMut)]
786pub struct IndirectParametersBuffers {
787    /// A mapping from a phase type ID to the indirect parameters buffers for
788    /// that phase.
789    ///
790    /// Examples of phase type IDs are `Opaque3d` and `AlphaMask3d`.
791    #[deref]
792    pub buffers: TypeIdMap<UntypedPhaseIndirectParametersBuffers>,
793    /// If true, this sets the `COPY_SRC` flag on indirect draw parameters so
794    /// that they can be read back to CPU.
795    ///
796    /// This is a debugging feature that may reduce performance. It primarily
797    /// exists for the `occlusion_culling` example.
798    pub allow_copies_from_indirect_parameter_buffers: bool,
799}
800
801impl IndirectParametersBuffers {
802    /// Initializes a new [`IndirectParametersBuffers`] resource.
803    pub fn new(allow_copies_from_indirect_parameter_buffers: bool) -> IndirectParametersBuffers {
804        IndirectParametersBuffers {
805            buffers: TypeIdMap::default(),
806            allow_copies_from_indirect_parameter_buffers,
807        }
808    }
809}
810
811/// The buffers containing all the information that indirect draw commands use
812/// to draw the scene, for a single phase.
813///
814/// This is the version of the structure that has a type parameter, so that the
815/// batching for different phases can run in parallel.
816///
817/// See the [`IndirectParametersBuffers`] documentation for more information.
818#[derive(Resource)]
819pub struct PhaseIndirectParametersBuffers<PI>
820where
821    PI: PhaseItem,
822{
823    /// The indirect draw buffers for the phase.
824    pub buffers: UntypedPhaseIndirectParametersBuffers,
825    phantom: PhantomData<PI>,
826}
827
828impl<PI> PhaseIndirectParametersBuffers<PI>
829where
830    PI: PhaseItem,
831{
832    pub fn new(allow_copies_from_indirect_parameter_buffers: bool) -> Self {
833        PhaseIndirectParametersBuffers {
834            buffers: UntypedPhaseIndirectParametersBuffers::new(
835                allow_copies_from_indirect_parameter_buffers,
836            ),
837            phantom: PhantomData,
838        }
839    }
840}
841
842/// The buffers containing all the information that indirect draw commands use
843/// to draw the scene, for a single phase.
844///
845/// This is the version of the structure that doesn't have a type parameter, so
846/// that it can be inserted into [`IndirectParametersBuffers::buffers`]
847///
848/// See the [`IndirectParametersBuffers`] documentation for more information.
849pub struct UntypedPhaseIndirectParametersBuffers {
850    /// Information that indirect draw commands use to draw indexed meshes in
851    /// the scene.
852    pub indexed: MeshClassIndirectParametersBuffers<IndirectParametersIndexed>,
853    /// Information that indirect draw commands use to draw non-indexed meshes
854    /// in the scene.
855    pub non_indexed: MeshClassIndirectParametersBuffers<IndirectParametersNonIndexed>,
856}
857
858impl UntypedPhaseIndirectParametersBuffers {
859    /// Creates the indirect parameters buffers.
860    pub fn new(
861        allow_copies_from_indirect_parameter_buffers: bool,
862    ) -> UntypedPhaseIndirectParametersBuffers {
863        let mut indirect_parameter_buffer_usages = BufferUsages::STORAGE | BufferUsages::INDIRECT;
864        if allow_copies_from_indirect_parameter_buffers {
865            indirect_parameter_buffer_usages |= BufferUsages::COPY_SRC;
866        }
867
868        UntypedPhaseIndirectParametersBuffers {
869            non_indexed: MeshClassIndirectParametersBuffers::new(
870                allow_copies_from_indirect_parameter_buffers,
871            ),
872            indexed: MeshClassIndirectParametersBuffers::new(
873                allow_copies_from_indirect_parameter_buffers,
874            ),
875        }
876    }
877
878    /// Reserves space for `count` new batches.
879    ///
880    /// The `indexed` parameter specifies whether the meshes that these batches
881    /// correspond to are indexed or not.
882    pub fn allocate(&mut self, indexed: bool, count: u32) -> u32 {
883        if indexed {
884            self.indexed.allocate(count)
885        } else {
886            self.non_indexed.allocate(count)
887        }
888    }
889
890    /// Returns the number of batches currently allocated.
891    ///
892    /// The `indexed` parameter specifies whether the meshes that these batches
893    /// correspond to are indexed or not.
894    fn batch_count(&self, indexed: bool) -> usize {
895        if indexed {
896            self.indexed.batch_count()
897        } else {
898            self.non_indexed.batch_count()
899        }
900    }
901
902    /// Returns the number of batch sets currently allocated.
903    ///
904    /// The `indexed` parameter specifies whether the meshes that these batch
905    /// sets correspond to are indexed or not.
906    pub fn batch_set_count(&self, indexed: bool) -> usize {
907        if indexed {
908            self.indexed.batch_sets.len()
909        } else {
910            self.non_indexed.batch_sets.len()
911        }
912    }
913
914    /// Adds a new batch set to `Self::indexed_batch_sets` or
915    /// `Self::non_indexed_batch_sets` as appropriate.
916    ///
917    /// `indexed` specifies whether the meshes that these batch sets correspond
918    /// to are indexed or not. `indirect_parameters_base` specifies the offset
919    /// within `Self::indexed_data` or `Self::non_indexed_data` of the first
920    /// batch in this batch set.
921    #[inline]
922    pub fn add_batch_set(&mut self, indexed: bool, indirect_parameters_base: u32) {
923        if indexed {
924            self.indexed.batch_sets.push(IndirectBatchSet {
925                indirect_parameters_base,
926                indirect_parameters_count: 0,
927            });
928        } else {
929            self.non_indexed.batch_sets.push(IndirectBatchSet {
930                indirect_parameters_base,
931                indirect_parameters_count: 0,
932            });
933        }
934    }
935
936    /// Returns the index that a newly-added batch set will have.
937    ///
938    /// The `indexed` parameter specifies whether the meshes in such a batch set
939    /// are indexed or not.
940    pub fn get_next_batch_set_index(&self, indexed: bool) -> Option<NonMaxU32> {
941        NonMaxU32::new(self.batch_set_count(indexed) as u32)
942    }
943
944    /// Clears out the buffers in preparation for a new frame.
945    pub fn clear(&mut self) {
946        self.indexed.clear();
947        self.non_indexed.clear();
948    }
949}
950
951/// The buffers containing all the information that indirect draw commands use
952/// to draw the scene, for a single mesh class (indexed or non-indexed), for a
953/// single phase.
954pub struct MeshClassIndirectParametersBuffers<IP>
955where
956    IP: Clone + ShaderSize + WriteInto,
957{
958    /// The GPU buffer that stores the indirect draw parameters for the meshes.
959    ///
960    /// The indirect parameters building shader writes to this buffer, while the
961    /// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from
962    /// it to perform the draws.
963    data: UninitBufferVec<IP>,
964
965    /// The GPU buffer that holds the data used to construct indirect draw
966    /// parameters for meshes.
967    ///
968    /// The GPU mesh preprocessing shader writes to this buffer, and the
969    /// indirect parameters building shader reads this buffer to construct the
970    /// indirect draw parameters.
971    cpu_metadata: RawBufferVec<IndirectParametersCpuMetadata>,
972
973    /// The GPU buffer that holds data built by the GPU used to construct
974    /// indirect draw parameters for meshes.
975    ///
976    /// The GPU mesh preprocessing shader writes to this buffer, and the
977    /// indirect parameters building shader reads this buffer to construct the
978    /// indirect draw parameters.
979    gpu_metadata: UninitBufferVec<IndirectParametersGpuMetadata>,
980
981    /// The GPU buffer that holds the number of indirect draw commands for each
982    /// phase of each view, for meshes.
983    ///
984    /// The indirect parameters building shader writes to this buffer, and the
985    /// `multi_draw_indirect_count` command reads from it in order to know how
986    /// many indirect draw commands to process.
987    batch_sets: RawBufferVec<IndirectBatchSet>,
988}
989
990impl<IP> MeshClassIndirectParametersBuffers<IP>
991where
992    IP: Clone + ShaderSize + WriteInto,
993{
994    fn new(
995        allow_copies_from_indirect_parameter_buffers: bool,
996    ) -> MeshClassIndirectParametersBuffers<IP> {
997        let mut indirect_parameter_buffer_usages = BufferUsages::STORAGE | BufferUsages::INDIRECT;
998        if allow_copies_from_indirect_parameter_buffers {
999            indirect_parameter_buffer_usages |= BufferUsages::COPY_SRC;
1000        }
1001
1002        MeshClassIndirectParametersBuffers {
1003            data: UninitBufferVec::new(indirect_parameter_buffer_usages),
1004            cpu_metadata: RawBufferVec::new(BufferUsages::STORAGE),
1005            gpu_metadata: UninitBufferVec::new(BufferUsages::STORAGE),
1006            batch_sets: RawBufferVec::new(indirect_parameter_buffer_usages),
1007        }
1008    }
1009
1010    /// Returns the GPU buffer that stores the indirect draw parameters for
1011    /// indexed meshes.
1012    ///
1013    /// The indirect parameters building shader writes to this buffer, while the
1014    /// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from
1015    /// it to perform the draws.
1016    #[inline]
1017    pub fn data_buffer(&self) -> Option<&Buffer> {
1018        self.data.buffer()
1019    }
1020
1021    /// Returns the GPU buffer that holds the CPU-constructed data used to
1022    /// construct indirect draw parameters for meshes.
1023    ///
1024    /// The CPU writes to this buffer, and the indirect parameters building
1025    /// shader reads this buffer to construct the indirect draw parameters.
1026    #[inline]
1027    pub fn cpu_metadata_buffer(&self) -> Option<&Buffer> {
1028        self.cpu_metadata.buffer()
1029    }
1030
1031    /// Returns the GPU buffer that holds the GPU-constructed data used to
1032    /// construct indirect draw parameters for meshes.
1033    ///
1034    /// The GPU mesh preprocessing shader writes to this buffer, and the
1035    /// indirect parameters building shader reads this buffer to construct the
1036    /// indirect draw parameters.
1037    #[inline]
1038    pub fn gpu_metadata_buffer(&self) -> Option<&Buffer> {
1039        self.gpu_metadata.buffer()
1040    }
1041
1042    /// Returns the GPU buffer that holds the number of indirect draw commands
1043    /// for each phase of each view.
1044    ///
1045    /// The indirect parameters building shader writes to this buffer, and the
1046    /// `multi_draw_indirect_count` command reads from it in order to know how
1047    /// many indirect draw commands to process.
1048    #[inline]
1049    pub fn batch_sets_buffer(&self) -> Option<&Buffer> {
1050        self.batch_sets.buffer()
1051    }
1052
1053    /// Reserves space for `count` new batches.
1054    ///
1055    /// This allocates in the [`Self::cpu_metadata`], [`Self::gpu_metadata`],
1056    /// and [`Self::data`] buffers.
1057    fn allocate(&mut self, count: u32) -> u32 {
1058        let length = self.data.len();
1059        self.cpu_metadata.reserve_internal(count as usize);
1060        self.gpu_metadata.add_multiple(count as usize);
1061        for _ in 0..count {
1062            self.data.add();
1063            self.cpu_metadata
1064                .push(IndirectParametersCpuMetadata::default());
1065        }
1066        length as u32
1067    }
1068
1069    /// Sets the [`IndirectParametersCpuMetadata`] for the mesh at the given
1070    /// index.
1071    pub fn set(&mut self, index: u32, value: IndirectParametersCpuMetadata) {
1072        self.cpu_metadata.set(index, value);
1073    }
1074
1075    /// Returns the number of batches corresponding to meshes that are currently
1076    /// allocated.
1077    #[inline]
1078    pub fn batch_count(&self) -> usize {
1079        self.data.len()
1080    }
1081
1082    /// Clears out all the buffers in preparation for a new frame.
1083    pub fn clear(&mut self) {
1084        self.data.clear();
1085        self.cpu_metadata.clear();
1086        self.gpu_metadata.clear();
1087        self.batch_sets.clear();
1088    }
1089}
1090
1091impl Default for IndirectParametersBuffers {
1092    fn default() -> Self {
1093        // By default, we don't allow GPU indirect parameter mapping, since
1094        // that's a debugging option.
1095        Self::new(false)
1096    }
1097}
1098
1099impl FromWorld for GpuPreprocessingSupport {
1100    fn from_world(world: &mut World) -> Self {
1101        let adapter = world.resource::<RenderAdapter>();
1102        let device = world.resource::<RenderDevice>();
1103
1104        // Filter Android drivers that are incompatible with GPU preprocessing:
1105        // - We filter out Adreno 730 and earlier GPUs (except 720, as it's newer
1106        //   than 730).
1107        // - We filter out Mali GPUs with driver versions lower than 48.
1108        fn is_non_supported_android_device(adapter_info: &RenderAdapterInfo) -> bool {
1109            crate::get_adreno_model(adapter_info).is_some_and(|model| model != 720 && model <= 730)
1110                || crate::get_mali_driver_version(adapter_info).is_some_and(|version| version < 48)
1111        }
1112
1113        let culling_feature_support = device
1114            .features()
1115            .contains(Features::INDIRECT_FIRST_INSTANCE | Features::PUSH_CONSTANTS);
1116        // Depth downsampling for occlusion culling requires 12 textures
1117        let limit_support = device.limits().max_storage_textures_per_shader_stage >= 12 &&
1118            // Even if the adapter supports compute, we might be simulating a lack of
1119            // compute via device limits (see `WgpuSettingsPriority::WebGL2` and
1120            // `wgpu::Limits::downlevel_webgl2_defaults()`). This will have set all the
1121            // `max_compute_*` limits to zero, so we arbitrarily pick one as a canary.
1122            device.limits().max_compute_workgroup_storage_size != 0;
1123
1124        let downlevel_support = adapter
1125            .get_downlevel_capabilities()
1126            .flags
1127            .contains(DownlevelFlags::COMPUTE_SHADERS);
1128
1129        let adapter_info = RenderAdapterInfo(WgpuWrapper::new(adapter.get_info()));
1130
1131        let max_supported_mode = if device.limits().max_compute_workgroup_size_x == 0
1132            || is_non_supported_android_device(&adapter_info)
1133            || adapter_info.backend == wgpu::Backend::Gl
1134        {
1135            info!(
1136                "GPU preprocessing is not supported on this device. \
1137                Falling back to CPU preprocessing.",
1138            );
1139            GpuPreprocessingMode::None
1140        } else if !(culling_feature_support && limit_support && downlevel_support) {
1141            info!("Some GPU preprocessing are limited on this device.");
1142            GpuPreprocessingMode::PreprocessingOnly
1143        } else {
1144            info!("GPU preprocessing is fully supported on this device.");
1145            GpuPreprocessingMode::Culling
1146        };
1147
1148        GpuPreprocessingSupport { max_supported_mode }
1149    }
1150}
1151
1152impl<BD, BDI> BatchedInstanceBuffers<BD, BDI>
1153where
1154    BD: GpuArrayBufferable + Sync + Send + 'static,
1155    BDI: Pod + Sync + Send + Default + 'static,
1156{
1157    /// Creates new buffers.
1158    pub fn new() -> Self {
1159        Self::default()
1160    }
1161
1162    /// Clears out the buffers in preparation for a new frame.
1163    pub fn clear(&mut self) {
1164        for phase_instance_buffer in self.phase_instance_buffers.values_mut() {
1165            phase_instance_buffer.clear();
1166        }
1167    }
1168}
1169
1170impl<BD> UntypedPhaseBatchedInstanceBuffers<BD>
1171where
1172    BD: GpuArrayBufferable + Sync + Send + 'static,
1173{
1174    pub fn new() -> Self {
1175        UntypedPhaseBatchedInstanceBuffers {
1176            data_buffer: UninitBufferVec::new(BufferUsages::STORAGE),
1177            work_item_buffers: HashMap::default(),
1178            late_indexed_indirect_parameters_buffer: RawBufferVec::new(
1179                BufferUsages::STORAGE | BufferUsages::INDIRECT,
1180            ),
1181            late_non_indexed_indirect_parameters_buffer: RawBufferVec::new(
1182                BufferUsages::STORAGE | BufferUsages::INDIRECT,
1183            ),
1184        }
1185    }
1186
1187    /// Returns the binding of the buffer that contains the per-instance data.
1188    ///
1189    /// This buffer needs to be filled in via a compute shader.
1190    pub fn instance_data_binding(&self) -> Option<BindingResource<'_>> {
1191        self.data_buffer
1192            .buffer()
1193            .map(|buffer| buffer.as_entire_binding())
1194    }
1195
1196    /// Clears out the buffers in preparation for a new frame.
1197    pub fn clear(&mut self) {
1198        self.data_buffer.clear();
1199        self.late_indexed_indirect_parameters_buffer.clear();
1200        self.late_non_indexed_indirect_parameters_buffer.clear();
1201
1202        // Clear each individual set of buffers, but don't depopulate the hash
1203        // table. We want to avoid reallocating these vectors every frame.
1204        for view_work_item_buffers in self.work_item_buffers.values_mut() {
1205            view_work_item_buffers.clear();
1206        }
1207    }
1208}
1209
1210impl<BD> Default for UntypedPhaseBatchedInstanceBuffers<BD>
1211where
1212    BD: GpuArrayBufferable + Sync + Send + 'static,
1213{
1214    fn default() -> Self {
1215        Self::new()
1216    }
1217}
1218
1219/// Information about a render batch that we're building up during a sorted
1220/// render phase.
1221struct SortedRenderBatch<F>
1222where
1223    F: GetBatchData,
1224{
1225    /// The index of the first phase item in this batch in the list of phase
1226    /// items.
1227    phase_item_start_index: u32,
1228
1229    /// The index of the first instance in this batch in the instance buffer.
1230    instance_start_index: u32,
1231
1232    /// True if the mesh in question has an index buffer; false otherwise.
1233    indexed: bool,
1234
1235    /// The index of the indirect parameters for this batch in the
1236    /// [`IndirectParametersBuffers`].
1237    ///
1238    /// If CPU culling is being used, then this will be `None`.
1239    indirect_parameters_index: Option<NonMaxU32>,
1240
1241    /// Metadata that can be used to determine whether an instance can be placed
1242    /// into this batch.
1243    ///
1244    /// If `None`, the item inside is unbatchable.
1245    meta: Option<BatchMeta<F::CompareData>>,
1246}
1247
1248impl<F> SortedRenderBatch<F>
1249where
1250    F: GetBatchData,
1251{
1252    /// Finalizes this batch and updates the [`SortedRenderPhase`] with the
1253    /// appropriate indices.
1254    ///
1255    /// `instance_end_index` is the index of the last instance in this batch
1256    /// plus one.
1257    fn flush<I>(
1258        self,
1259        instance_end_index: u32,
1260        phase: &mut SortedRenderPhase<I>,
1261        phase_indirect_parameters_buffers: &mut UntypedPhaseIndirectParametersBuffers,
1262    ) where
1263        I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
1264    {
1265        let (batch_range, batch_extra_index) =
1266            phase.items[self.phase_item_start_index as usize].batch_range_and_extra_index_mut();
1267        *batch_range = self.instance_start_index..instance_end_index;
1268        *batch_extra_index = match self.indirect_parameters_index {
1269            Some(indirect_parameters_index) => PhaseItemExtraIndex::IndirectParametersIndex {
1270                range: u32::from(indirect_parameters_index)
1271                    ..(u32::from(indirect_parameters_index) + 1),
1272                batch_set_index: None,
1273            },
1274            None => PhaseItemExtraIndex::None,
1275        };
1276        if let Some(indirect_parameters_index) = self.indirect_parameters_index {
1277            phase_indirect_parameters_buffers
1278                .add_batch_set(self.indexed, indirect_parameters_index.into());
1279        }
1280    }
1281}
1282
1283/// A system that runs early in extraction and clears out all the
1284/// [`BatchedInstanceBuffers`] for the frame.
1285///
1286/// We have to run this during extraction because, if GPU preprocessing is in
1287/// use, the extraction phase will write to the mesh input uniform buffers
1288/// directly, so the buffers need to be cleared before then.
1289pub fn clear_batched_gpu_instance_buffers<GFBD>(
1290    gpu_batched_instance_buffers: Option<
1291        ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
1292    >,
1293) where
1294    GFBD: GetFullBatchData,
1295{
1296    // Don't clear the entire table, because that would delete the buffers, and
1297    // we want to reuse those allocations.
1298    if let Some(mut gpu_batched_instance_buffers) = gpu_batched_instance_buffers {
1299        gpu_batched_instance_buffers.clear();
1300    }
1301}
1302
1303/// A system that removes GPU preprocessing work item buffers that correspond to
1304/// deleted [`ExtractedView`]s.
1305///
1306/// This is a separate system from [`clear_batched_gpu_instance_buffers`]
1307/// because [`ExtractedView`]s aren't created until after the extraction phase
1308/// is completed.
1309pub fn delete_old_work_item_buffers<GFBD>(
1310    mut gpu_batched_instance_buffers: ResMut<
1311        BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>,
1312    >,
1313    extracted_views: Query<&ExtractedView>,
1314) where
1315    GFBD: GetFullBatchData,
1316{
1317    let retained_view_entities: HashSet<_> = extracted_views
1318        .iter()
1319        .map(|extracted_view| extracted_view.retained_view_entity)
1320        .collect();
1321    for phase_instance_buffers in gpu_batched_instance_buffers
1322        .phase_instance_buffers
1323        .values_mut()
1324    {
1325        phase_instance_buffers
1326            .work_item_buffers
1327            .retain(|retained_view_entity, _| {
1328                retained_view_entities.contains(retained_view_entity)
1329            });
1330    }
1331}
1332
1333/// Batch the items in a sorted render phase, when GPU instance buffer building
1334/// is in use. This means comparing metadata needed to draw each phase item and
1335/// trying to combine the draws into a batch.
1336pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
1337    mut phase_batched_instance_buffers: ResMut<PhaseBatchedInstanceBuffers<I, GFBD::BufferData>>,
1338    mut phase_indirect_parameters_buffers: ResMut<PhaseIndirectParametersBuffers<I>>,
1339    mut sorted_render_phases: ResMut<ViewSortedRenderPhases<I>>,
1340    mut views: Query<(
1341        &ExtractedView,
1342        Has<NoIndirectDrawing>,
1343        Has<OcclusionCulling>,
1344    )>,
1345    system_param_item: StaticSystemParam<GFBD::Param>,
1346) where
1347    I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
1348    GFBD: GetFullBatchData,
1349{
1350    // We only process GPU-built batch data in this function.
1351    let UntypedPhaseBatchedInstanceBuffers {
1352        ref mut data_buffer,
1353        ref mut work_item_buffers,
1354        ref mut late_indexed_indirect_parameters_buffer,
1355        ref mut late_non_indexed_indirect_parameters_buffer,
1356    } = phase_batched_instance_buffers.buffers;
1357
1358    for (extracted_view, no_indirect_drawing, gpu_occlusion_culling) in &mut views {
1359        let Some(phase) = sorted_render_phases.get_mut(&extracted_view.retained_view_entity) else {
1360            continue;
1361        };
1362
1363        // Create the work item buffer if necessary.
1364        let work_item_buffer = get_or_create_work_item_buffer::<I>(
1365            work_item_buffers,
1366            extracted_view.retained_view_entity,
1367            no_indirect_drawing,
1368            gpu_occlusion_culling,
1369        );
1370
1371        // Initialize those work item buffers in preparation for this new frame.
1372        init_work_item_buffers(
1373            work_item_buffer,
1374            late_indexed_indirect_parameters_buffer,
1375            late_non_indexed_indirect_parameters_buffer,
1376        );
1377
1378        // Walk through the list of phase items, building up batches as we go.
1379        let mut batch: Option<SortedRenderBatch<GFBD>> = None;
1380
1381        for current_index in 0..phase.items.len() {
1382            // Get the index of the input data, and comparison metadata, for
1383            // this entity.
1384            let item = &phase.items[current_index];
1385            let entity = item.main_entity();
1386            let item_is_indexed = item.indexed();
1387            let current_batch_input_index =
1388                GFBD::get_index_and_compare_data(&system_param_item, entity);
1389
1390            // Unpack that index and metadata. Note that it's possible for index
1391            // and/or metadata to not be present, which signifies that this
1392            // entity is unbatchable. In that case, we break the batch here.
1393            // If the index isn't present the item is not part of this pipeline and so will be skipped.
1394            let Some((current_input_index, current_meta)) = current_batch_input_index else {
1395                // Break a batch if we need to.
1396                if let Some(batch) = batch.take() {
1397                    batch.flush(
1398                        data_buffer.len() as u32,
1399                        phase,
1400                        &mut phase_indirect_parameters_buffers.buffers,
1401                    );
1402                }
1403
1404                continue;
1405            };
1406            let current_meta =
1407                current_meta.map(|meta| BatchMeta::new(&phase.items[current_index], meta));
1408
1409            // Determine if this entity can be included in the batch we're
1410            // building up.
1411            let can_batch = batch.as_ref().is_some_and(|batch| {
1412                // `None` for metadata indicates that the items are unbatchable.
1413                match (&current_meta, &batch.meta) {
1414                    (Some(current_meta), Some(batch_meta)) => current_meta == batch_meta,
1415                    (_, _) => false,
1416                }
1417            });
1418
1419            // Make space in the data buffer for this instance.
1420            let output_index = data_buffer.add() as u32;
1421
1422            // If we can't batch, break the existing batch and make a new one.
1423            if !can_batch {
1424                // Break a batch if we need to.
1425                if let Some(batch) = batch.take() {
1426                    batch.flush(
1427                        output_index,
1428                        phase,
1429                        &mut phase_indirect_parameters_buffers.buffers,
1430                    );
1431                }
1432
1433                let indirect_parameters_index = if no_indirect_drawing {
1434                    None
1435                } else if item_is_indexed {
1436                    Some(
1437                        phase_indirect_parameters_buffers
1438                            .buffers
1439                            .indexed
1440                            .allocate(1),
1441                    )
1442                } else {
1443                    Some(
1444                        phase_indirect_parameters_buffers
1445                            .buffers
1446                            .non_indexed
1447                            .allocate(1),
1448                    )
1449                };
1450
1451                // Start a new batch.
1452                if let Some(indirect_parameters_index) = indirect_parameters_index {
1453                    GFBD::write_batch_indirect_parameters_metadata(
1454                        item_is_indexed,
1455                        output_index,
1456                        None,
1457                        &mut phase_indirect_parameters_buffers.buffers,
1458                        indirect_parameters_index,
1459                    );
1460                };
1461
1462                batch = Some(SortedRenderBatch {
1463                    phase_item_start_index: current_index as u32,
1464                    instance_start_index: output_index,
1465                    indexed: item_is_indexed,
1466                    indirect_parameters_index: indirect_parameters_index.and_then(NonMaxU32::new),
1467                    meta: current_meta,
1468                });
1469            }
1470
1471            // Add a new preprocessing work item so that the preprocessing
1472            // shader will copy the per-instance data over.
1473            if let Some(batch) = batch.as_ref() {
1474                work_item_buffer.push(
1475                    item_is_indexed,
1476                    PreprocessWorkItem {
1477                        input_index: current_input_index.into(),
1478                        output_or_indirect_parameters_index: match (
1479                            no_indirect_drawing,
1480                            batch.indirect_parameters_index,
1481                        ) {
1482                            (true, _) => output_index,
1483                            (false, Some(indirect_parameters_index)) => {
1484                                indirect_parameters_index.into()
1485                            }
1486                            (false, None) => 0,
1487                        },
1488                    },
1489                );
1490            }
1491        }
1492
1493        // Flush the final batch if necessary.
1494        if let Some(batch) = batch.take() {
1495            batch.flush(
1496                data_buffer.len() as u32,
1497                phase,
1498                &mut phase_indirect_parameters_buffers.buffers,
1499            );
1500        }
1501    }
1502}
1503
1504/// Creates batches for a render phase that uses bins.
1505pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
1506    mut phase_batched_instance_buffers: ResMut<PhaseBatchedInstanceBuffers<BPI, GFBD::BufferData>>,
1507    phase_indirect_parameters_buffers: ResMut<PhaseIndirectParametersBuffers<BPI>>,
1508    mut binned_render_phases: ResMut<ViewBinnedRenderPhases<BPI>>,
1509    mut views: Query<
1510        (
1511            &ExtractedView,
1512            Has<NoIndirectDrawing>,
1513            Has<OcclusionCulling>,
1514        ),
1515        With<ExtractedView>,
1516    >,
1517    param: StaticSystemParam<GFBD::Param>,
1518) where
1519    BPI: BinnedPhaseItem,
1520    GFBD: GetFullBatchData,
1521{
1522    let system_param_item = param.into_inner();
1523
1524    let phase_indirect_parameters_buffers = phase_indirect_parameters_buffers.into_inner();
1525
1526    let UntypedPhaseBatchedInstanceBuffers {
1527        ref mut data_buffer,
1528        ref mut work_item_buffers,
1529        ref mut late_indexed_indirect_parameters_buffer,
1530        ref mut late_non_indexed_indirect_parameters_buffer,
1531    } = phase_batched_instance_buffers.buffers;
1532
1533    for (extracted_view, no_indirect_drawing, gpu_occlusion_culling) in &mut views {
1534        let Some(phase) = binned_render_phases.get_mut(&extracted_view.retained_view_entity) else {
1535            continue;
1536        };
1537
1538        // Create the work item buffer if necessary; otherwise, just mark it as
1539        // used this frame.
1540        let work_item_buffer = get_or_create_work_item_buffer::<BPI>(
1541            work_item_buffers,
1542            extracted_view.retained_view_entity,
1543            no_indirect_drawing,
1544            gpu_occlusion_culling,
1545        );
1546
1547        // Initialize those work item buffers in preparation for this new frame.
1548        init_work_item_buffers(
1549            work_item_buffer,
1550            late_indexed_indirect_parameters_buffer,
1551            late_non_indexed_indirect_parameters_buffer,
1552        );
1553
1554        // Prepare multidrawables.
1555
1556        if let (
1557            &mut BinnedRenderPhaseBatchSets::MultidrawIndirect(ref mut batch_sets),
1558            &mut PreprocessWorkItemBuffers::Indirect {
1559                indexed: ref mut indexed_work_item_buffer,
1560                non_indexed: ref mut non_indexed_work_item_buffer,
1561                gpu_occlusion_culling: ref mut gpu_occlusion_culling_buffers,
1562            },
1563        ) = (&mut phase.batch_sets, &mut *work_item_buffer)
1564        {
1565            let mut output_index = data_buffer.len() as u32;
1566
1567            // Initialize the state for both indexed and non-indexed meshes.
1568            let mut indexed_preparer: MultidrawableBatchSetPreparer<BPI, GFBD> =
1569                MultidrawableBatchSetPreparer::new(
1570                    phase_indirect_parameters_buffers.buffers.batch_count(true) as u32,
1571                    phase_indirect_parameters_buffers
1572                        .buffers
1573                        .indexed
1574                        .batch_sets
1575                        .len() as u32,
1576                );
1577            let mut non_indexed_preparer: MultidrawableBatchSetPreparer<BPI, GFBD> =
1578                MultidrawableBatchSetPreparer::new(
1579                    phase_indirect_parameters_buffers.buffers.batch_count(false) as u32,
1580                    phase_indirect_parameters_buffers
1581                        .buffers
1582                        .non_indexed
1583                        .batch_sets
1584                        .len() as u32,
1585                );
1586
1587            // Prepare each batch set.
1588            for (batch_set_key, bins) in &phase.multidrawable_meshes {
1589                if batch_set_key.indexed() {
1590                    indexed_preparer.prepare_multidrawable_binned_batch_set(
1591                        bins,
1592                        &mut output_index,
1593                        data_buffer,
1594                        indexed_work_item_buffer,
1595                        &mut phase_indirect_parameters_buffers.buffers.indexed,
1596                        batch_sets,
1597                    );
1598                } else {
1599                    non_indexed_preparer.prepare_multidrawable_binned_batch_set(
1600                        bins,
1601                        &mut output_index,
1602                        data_buffer,
1603                        non_indexed_work_item_buffer,
1604                        &mut phase_indirect_parameters_buffers.buffers.non_indexed,
1605                        batch_sets,
1606                    );
1607                }
1608            }
1609
1610            // Reserve space in the occlusion culling buffers, if necessary.
1611            if let Some(gpu_occlusion_culling_buffers) = gpu_occlusion_culling_buffers {
1612                gpu_occlusion_culling_buffers
1613                    .late_indexed
1614                    .add_multiple(indexed_preparer.work_item_count);
1615                gpu_occlusion_culling_buffers
1616                    .late_non_indexed
1617                    .add_multiple(non_indexed_preparer.work_item_count);
1618            }
1619        }
1620
1621        // Prepare batchables.
1622
1623        for (key, bin) in &phase.batchable_meshes {
1624            let mut batch: Option<BinnedRenderPhaseBatch> = None;
1625            for (&main_entity, &input_index) in bin.entities() {
1626                let output_index = data_buffer.add() as u32;
1627
1628                match batch {
1629                    Some(ref mut batch) => {
1630                        batch.instance_range.end = output_index + 1;
1631
1632                        // Append to the current batch.
1633                        //
1634                        // If we're in indirect mode, then we write the first
1635                        // output index of this batch, so that we have a
1636                        // tightly-packed buffer if GPU culling discards some of
1637                        // the instances. Otherwise, we can just write the
1638                        // output index directly.
1639                        work_item_buffer.push(
1640                            key.0.indexed(),
1641                            PreprocessWorkItem {
1642                                input_index: *input_index,
1643                                output_or_indirect_parameters_index: match (
1644                                    no_indirect_drawing,
1645                                    &batch.extra_index,
1646                                ) {
1647                                    (true, _) => output_index,
1648                                    (
1649                                        false,
1650                                        PhaseItemExtraIndex::IndirectParametersIndex {
1651                                            range: indirect_parameters_range,
1652                                            ..
1653                                        },
1654                                    ) => indirect_parameters_range.start,
1655                                    (false, &PhaseItemExtraIndex::DynamicOffset(_))
1656                                    | (false, &PhaseItemExtraIndex::None) => 0,
1657                                },
1658                            },
1659                        );
1660                    }
1661
1662                    None if !no_indirect_drawing => {
1663                        // Start a new batch, in indirect mode.
1664                        let indirect_parameters_index = phase_indirect_parameters_buffers
1665                            .buffers
1666                            .allocate(key.0.indexed(), 1);
1667                        let batch_set_index = phase_indirect_parameters_buffers
1668                            .buffers
1669                            .get_next_batch_set_index(key.0.indexed());
1670
1671                        GFBD::write_batch_indirect_parameters_metadata(
1672                            key.0.indexed(),
1673                            output_index,
1674                            batch_set_index,
1675                            &mut phase_indirect_parameters_buffers.buffers,
1676                            indirect_parameters_index,
1677                        );
1678                        work_item_buffer.push(
1679                            key.0.indexed(),
1680                            PreprocessWorkItem {
1681                                input_index: *input_index,
1682                                output_or_indirect_parameters_index: indirect_parameters_index,
1683                            },
1684                        );
1685                        batch = Some(BinnedRenderPhaseBatch {
1686                            representative_entity: (Entity::PLACEHOLDER, main_entity),
1687                            instance_range: output_index..output_index + 1,
1688                            extra_index: PhaseItemExtraIndex::IndirectParametersIndex {
1689                                range: indirect_parameters_index..(indirect_parameters_index + 1),
1690                                batch_set_index: None,
1691                            },
1692                        });
1693                    }
1694
1695                    None => {
1696                        // Start a new batch, in direct mode.
1697                        work_item_buffer.push(
1698                            key.0.indexed(),
1699                            PreprocessWorkItem {
1700                                input_index: *input_index,
1701                                output_or_indirect_parameters_index: output_index,
1702                            },
1703                        );
1704                        batch = Some(BinnedRenderPhaseBatch {
1705                            representative_entity: (Entity::PLACEHOLDER, main_entity),
1706                            instance_range: output_index..output_index + 1,
1707                            extra_index: PhaseItemExtraIndex::None,
1708                        });
1709                    }
1710                }
1711            }
1712
1713            if let Some(batch) = batch {
1714                match phase.batch_sets {
1715                    BinnedRenderPhaseBatchSets::DynamicUniforms(_) => {
1716                        error!("Dynamic uniform batch sets shouldn't be used here");
1717                    }
1718                    BinnedRenderPhaseBatchSets::Direct(ref mut vec) => {
1719                        vec.push(batch);
1720                    }
1721                    BinnedRenderPhaseBatchSets::MultidrawIndirect(ref mut vec) => {
1722                        // The Bevy renderer will never mark a mesh as batchable
1723                        // but not multidrawable if multidraw is in use.
1724                        // However, custom render pipelines might do so, such as
1725                        // the `specialized_mesh_pipeline` example.
1726                        vec.push(BinnedRenderPhaseBatchSet {
1727                            first_batch: batch,
1728                            batch_count: 1,
1729                            bin_key: key.1.clone(),
1730                            index: phase_indirect_parameters_buffers
1731                                .buffers
1732                                .batch_set_count(key.0.indexed())
1733                                as u32,
1734                        });
1735                    }
1736                }
1737            }
1738        }
1739
1740        // Prepare unbatchables.
1741        for (key, unbatchables) in &mut phase.unbatchable_meshes {
1742            // Allocate the indirect parameters if necessary.
1743            let mut indirect_parameters_offset = if no_indirect_drawing {
1744                None
1745            } else if key.0.indexed() {
1746                Some(
1747                    phase_indirect_parameters_buffers
1748                        .buffers
1749                        .indexed
1750                        .allocate(unbatchables.entities.len() as u32),
1751                )
1752            } else {
1753                Some(
1754                    phase_indirect_parameters_buffers
1755                        .buffers
1756                        .non_indexed
1757                        .allocate(unbatchables.entities.len() as u32),
1758                )
1759            };
1760
1761            for main_entity in unbatchables.entities.keys() {
1762                let Some(input_index) = GFBD::get_binned_index(&system_param_item, *main_entity)
1763                else {
1764                    continue;
1765                };
1766                let output_index = data_buffer.add() as u32;
1767
1768                if let Some(ref mut indirect_parameters_index) = indirect_parameters_offset {
1769                    // We're in indirect mode, so add an indirect parameters
1770                    // index.
1771                    GFBD::write_batch_indirect_parameters_metadata(
1772                        key.0.indexed(),
1773                        output_index,
1774                        None,
1775                        &mut phase_indirect_parameters_buffers.buffers,
1776                        *indirect_parameters_index,
1777                    );
1778                    work_item_buffer.push(
1779                        key.0.indexed(),
1780                        PreprocessWorkItem {
1781                            input_index: input_index.into(),
1782                            output_or_indirect_parameters_index: *indirect_parameters_index,
1783                        },
1784                    );
1785                    unbatchables
1786                        .buffer_indices
1787                        .add(UnbatchableBinnedEntityIndices {
1788                            instance_index: *indirect_parameters_index,
1789                            extra_index: PhaseItemExtraIndex::IndirectParametersIndex {
1790                                range: *indirect_parameters_index..(*indirect_parameters_index + 1),
1791                                batch_set_index: None,
1792                            },
1793                        });
1794                    phase_indirect_parameters_buffers
1795                        .buffers
1796                        .add_batch_set(key.0.indexed(), *indirect_parameters_index);
1797                    *indirect_parameters_index += 1;
1798                } else {
1799                    work_item_buffer.push(
1800                        key.0.indexed(),
1801                        PreprocessWorkItem {
1802                            input_index: input_index.into(),
1803                            output_or_indirect_parameters_index: output_index,
1804                        },
1805                    );
1806                    unbatchables
1807                        .buffer_indices
1808                        .add(UnbatchableBinnedEntityIndices {
1809                            instance_index: output_index,
1810                            extra_index: PhaseItemExtraIndex::None,
1811                        });
1812                }
1813            }
1814        }
1815    }
1816}
1817
1818/// The state that [`batch_and_prepare_binned_render_phase`] uses to construct
1819/// multidrawable batch sets.
1820///
1821/// The [`batch_and_prepare_binned_render_phase`] system maintains two of these:
1822/// one for indexed meshes and one for non-indexed meshes.
1823struct MultidrawableBatchSetPreparer<BPI, GFBD>
1824where
1825    BPI: BinnedPhaseItem,
1826    GFBD: GetFullBatchData,
1827{
1828    /// The offset in the indirect parameters buffer at which the next indirect
1829    /// parameters will be written.
1830    indirect_parameters_index: u32,
1831    /// The number of batch sets we've built so far for this mesh class.
1832    batch_set_index: u32,
1833    /// The number of work items we've emitted so far for this mesh class.
1834    work_item_count: usize,
1835    phantom: PhantomData<(BPI, GFBD)>,
1836}
1837
1838impl<BPI, GFBD> MultidrawableBatchSetPreparer<BPI, GFBD>
1839where
1840    BPI: BinnedPhaseItem,
1841    GFBD: GetFullBatchData,
1842{
1843    /// Creates a new [`MultidrawableBatchSetPreparer`] that will start writing
1844    /// indirect parameters and batch sets at the given indices.
1845    #[inline]
1846    fn new(initial_indirect_parameters_index: u32, initial_batch_set_index: u32) -> Self {
1847        MultidrawableBatchSetPreparer {
1848            indirect_parameters_index: initial_indirect_parameters_index,
1849            batch_set_index: initial_batch_set_index,
1850            work_item_count: 0,
1851            phantom: PhantomData,
1852        }
1853    }
1854
1855    /// Creates batch sets and writes the GPU data needed to draw all visible
1856    /// entities of one mesh class in the given batch set.
1857    ///
1858    /// The *mesh class* represents whether the mesh has indices or not.
1859    #[inline]
1860    fn prepare_multidrawable_binned_batch_set<IP>(
1861        &mut self,
1862        bins: &IndexMap<BPI::BinKey, RenderBin>,
1863        output_index: &mut u32,
1864        data_buffer: &mut UninitBufferVec<GFBD::BufferData>,
1865        indexed_work_item_buffer: &mut RawBufferVec<PreprocessWorkItem>,
1866        mesh_class_buffers: &mut MeshClassIndirectParametersBuffers<IP>,
1867        batch_sets: &mut Vec<BinnedRenderPhaseBatchSet<BPI::BinKey>>,
1868    ) where
1869        IP: Clone + ShaderSize + WriteInto,
1870    {
1871        let current_indexed_batch_set_index = self.batch_set_index;
1872        let current_output_index = *output_index;
1873
1874        let indirect_parameters_base = self.indirect_parameters_index;
1875
1876        // We're going to write the first entity into the batch set. Do this
1877        // here so that we can preload the bin into cache as a side effect.
1878        let Some((first_bin_key, first_bin)) = bins.iter().next() else {
1879            return;
1880        };
1881        let first_bin_len = first_bin.entities().len();
1882        let first_bin_entity = first_bin
1883            .entities()
1884            .keys()
1885            .next()
1886            .copied()
1887            .unwrap_or(MainEntity::from(Entity::PLACEHOLDER));
1888
1889        // Traverse the batch set, processing each bin.
1890        for bin in bins.values() {
1891            // Record the first output index for this batch, as well as its own
1892            // index.
1893            mesh_class_buffers
1894                .cpu_metadata
1895                .push(IndirectParametersCpuMetadata {
1896                    base_output_index: *output_index,
1897                    batch_set_index: self.batch_set_index,
1898                });
1899
1900            // Traverse the bin, pushing `PreprocessWorkItem`s for each entity
1901            // within it. This is a hot loop, so make it as fast as possible.
1902            for &input_index in bin.entities().values() {
1903                indexed_work_item_buffer.push(PreprocessWorkItem {
1904                    input_index: *input_index,
1905                    output_or_indirect_parameters_index: self.indirect_parameters_index,
1906                });
1907            }
1908
1909            // Reserve space for the appropriate number of entities in the data
1910            // buffer. Also, advance the output index and work item count.
1911            let bin_entity_count = bin.entities().len();
1912            data_buffer.add_multiple(bin_entity_count);
1913            *output_index += bin_entity_count as u32;
1914            self.work_item_count += bin_entity_count;
1915
1916            self.indirect_parameters_index += 1;
1917        }
1918
1919        // Reserve space for the bins in this batch set in the GPU buffers.
1920        let bin_count = bins.len();
1921        mesh_class_buffers.gpu_metadata.add_multiple(bin_count);
1922        mesh_class_buffers.data.add_multiple(bin_count);
1923
1924        // Write the information the GPU will need about this batch set.
1925        mesh_class_buffers.batch_sets.push(IndirectBatchSet {
1926            indirect_parameters_base,
1927            indirect_parameters_count: 0,
1928        });
1929
1930        self.batch_set_index += 1;
1931
1932        // Record the batch set. The render node later processes this record to
1933        // render the batches.
1934        batch_sets.push(BinnedRenderPhaseBatchSet {
1935            first_batch: BinnedRenderPhaseBatch {
1936                representative_entity: (Entity::PLACEHOLDER, first_bin_entity),
1937                instance_range: current_output_index..(current_output_index + first_bin_len as u32),
1938                extra_index: PhaseItemExtraIndex::maybe_indirect_parameters_index(NonMaxU32::new(
1939                    indirect_parameters_base,
1940                )),
1941            },
1942            bin_key: (*first_bin_key).clone(),
1943            batch_count: self.indirect_parameters_index - indirect_parameters_base,
1944            index: current_indexed_batch_set_index,
1945        });
1946    }
1947}
1948
1949/// A system that gathers up the per-phase GPU buffers and inserts them into the
1950/// [`BatchedInstanceBuffers`] and [`IndirectParametersBuffers`] tables.
1951///
1952/// This runs after the [`batch_and_prepare_binned_render_phase`] or
1953/// [`batch_and_prepare_sorted_render_phase`] systems. It takes the per-phase
1954/// [`PhaseBatchedInstanceBuffers`] and [`PhaseIndirectParametersBuffers`]
1955/// resources and inserts them into the global [`BatchedInstanceBuffers`] and
1956/// [`IndirectParametersBuffers`] tables.
1957///
1958/// This system exists so that the [`batch_and_prepare_binned_render_phase`] and
1959/// [`batch_and_prepare_sorted_render_phase`] can run in parallel with one
1960/// another. If those two systems manipulated [`BatchedInstanceBuffers`] and
1961/// [`IndirectParametersBuffers`] directly, then they wouldn't be able to run in
1962/// parallel.
1963pub fn collect_buffers_for_phase<PI, GFBD>(
1964    mut phase_batched_instance_buffers: ResMut<PhaseBatchedInstanceBuffers<PI, GFBD::BufferData>>,
1965    mut phase_indirect_parameters_buffers: ResMut<PhaseIndirectParametersBuffers<PI>>,
1966    mut batched_instance_buffers: ResMut<
1967        BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>,
1968    >,
1969    mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
1970) where
1971    PI: PhaseItem,
1972    GFBD: GetFullBatchData + Send + Sync + 'static,
1973{
1974    // Insert the `PhaseBatchedInstanceBuffers` into the global table. Replace
1975    // the contents of the per-phase resource with the old batched instance
1976    // buffers in order to reuse allocations.
1977    let untyped_phase_batched_instance_buffers =
1978        mem::take(&mut phase_batched_instance_buffers.buffers);
1979    if let Some(mut old_untyped_phase_batched_instance_buffers) = batched_instance_buffers
1980        .phase_instance_buffers
1981        .insert(TypeId::of::<PI>(), untyped_phase_batched_instance_buffers)
1982    {
1983        old_untyped_phase_batched_instance_buffers.clear();
1984        phase_batched_instance_buffers.buffers = old_untyped_phase_batched_instance_buffers;
1985    }
1986
1987    // Insert the `PhaseIndirectParametersBuffers` into the global table.
1988    // Replace the contents of the per-phase resource with the old indirect
1989    // parameters buffers in order to reuse allocations.
1990    let untyped_phase_indirect_parameters_buffers = mem::replace(
1991        &mut phase_indirect_parameters_buffers.buffers,
1992        UntypedPhaseIndirectParametersBuffers::new(
1993            indirect_parameters_buffers.allow_copies_from_indirect_parameter_buffers,
1994        ),
1995    );
1996    if let Some(mut old_untyped_phase_indirect_parameters_buffers) = indirect_parameters_buffers
1997        .insert(
1998            TypeId::of::<PI>(),
1999            untyped_phase_indirect_parameters_buffers,
2000        )
2001    {
2002        old_untyped_phase_indirect_parameters_buffers.clear();
2003        phase_indirect_parameters_buffers.buffers = old_untyped_phase_indirect_parameters_buffers;
2004    }
2005}
2006
2007/// A system that writes all instance buffers to the GPU.
2008pub fn write_batched_instance_buffers<GFBD>(
2009    render_device: Res<RenderDevice>,
2010    render_queue: Res<RenderQueue>,
2011    gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
2012) where
2013    GFBD: GetFullBatchData,
2014{
2015    let BatchedInstanceBuffers {
2016        current_input_buffer,
2017        previous_input_buffer,
2018        phase_instance_buffers,
2019    } = gpu_array_buffer.into_inner();
2020
2021    let render_device = &*render_device;
2022    let render_queue = &*render_queue;
2023
2024    ComputeTaskPool::get().scope(|scope| {
2025        scope.spawn(async {
2026            let _span = tracing::info_span!("write_current_input_buffers").entered();
2027            current_input_buffer
2028                .buffer
2029                .write_buffer(render_device, render_queue);
2030        });
2031        scope.spawn(async {
2032            let _span = tracing::info_span!("write_previous_input_buffers").entered();
2033            previous_input_buffer
2034                .buffer
2035                .write_buffer(render_device, render_queue);
2036        });
2037
2038        for phase_instance_buffers in phase_instance_buffers.values_mut() {
2039            let UntypedPhaseBatchedInstanceBuffers {
2040                ref mut data_buffer,
2041                ref mut work_item_buffers,
2042                ref mut late_indexed_indirect_parameters_buffer,
2043                ref mut late_non_indexed_indirect_parameters_buffer,
2044            } = *phase_instance_buffers;
2045
2046            scope.spawn(async {
2047                let _span = tracing::info_span!("write_phase_instance_buffers").entered();
2048                data_buffer.write_buffer(render_device);
2049                late_indexed_indirect_parameters_buffer.write_buffer(render_device, render_queue);
2050                late_non_indexed_indirect_parameters_buffer
2051                    .write_buffer(render_device, render_queue);
2052            });
2053
2054            for phase_work_item_buffers in work_item_buffers.values_mut() {
2055                scope.spawn(async {
2056                    let _span = tracing::info_span!("write_work_item_buffers").entered();
2057                    match *phase_work_item_buffers {
2058                        PreprocessWorkItemBuffers::Direct(ref mut buffer_vec) => {
2059                            buffer_vec.write_buffer(render_device, render_queue);
2060                        }
2061                        PreprocessWorkItemBuffers::Indirect {
2062                            ref mut indexed,
2063                            ref mut non_indexed,
2064                            ref mut gpu_occlusion_culling,
2065                        } => {
2066                            indexed.write_buffer(render_device, render_queue);
2067                            non_indexed.write_buffer(render_device, render_queue);
2068
2069                            if let Some(GpuOcclusionCullingWorkItemBuffers {
2070                                ref mut late_indexed,
2071                                ref mut late_non_indexed,
2072                                late_indirect_parameters_indexed_offset: _,
2073                                late_indirect_parameters_non_indexed_offset: _,
2074                            }) = *gpu_occlusion_culling
2075                            {
2076                                if !late_indexed.is_empty() {
2077                                    late_indexed.write_buffer(render_device);
2078                                }
2079                                if !late_non_indexed.is_empty() {
2080                                    late_non_indexed.write_buffer(render_device);
2081                                }
2082                            }
2083                        }
2084                    }
2085                });
2086            }
2087        }
2088    });
2089}
2090
2091pub fn clear_indirect_parameters_buffers(
2092    mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
2093) {
2094    for phase_indirect_parameters_buffers in indirect_parameters_buffers.values_mut() {
2095        phase_indirect_parameters_buffers.clear();
2096    }
2097}
2098
2099pub fn write_indirect_parameters_buffers(
2100    render_device: Res<RenderDevice>,
2101    render_queue: Res<RenderQueue>,
2102    mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
2103) {
2104    let render_device = &*render_device;
2105    let render_queue = &*render_queue;
2106    ComputeTaskPool::get().scope(|scope| {
2107        for phase_indirect_parameters_buffers in indirect_parameters_buffers.values_mut() {
2108            scope.spawn(async {
2109                let _span = tracing::info_span!("indexed_data").entered();
2110                phase_indirect_parameters_buffers
2111                    .indexed
2112                    .data
2113                    .write_buffer(render_device);
2114            });
2115            scope.spawn(async {
2116                let _span = tracing::info_span!("non_indexed_data").entered();
2117                phase_indirect_parameters_buffers
2118                    .non_indexed
2119                    .data
2120                    .write_buffer(render_device);
2121            });
2122
2123            scope.spawn(async {
2124                let _span = tracing::info_span!("indexed_cpu_metadata").entered();
2125                phase_indirect_parameters_buffers
2126                    .indexed
2127                    .cpu_metadata
2128                    .write_buffer(render_device, render_queue);
2129            });
2130            scope.spawn(async {
2131                let _span = tracing::info_span!("non_indexed_cpu_metadata").entered();
2132                phase_indirect_parameters_buffers
2133                    .non_indexed
2134                    .cpu_metadata
2135                    .write_buffer(render_device, render_queue);
2136            });
2137
2138            scope.spawn(async {
2139                let _span = tracing::info_span!("non_indexed_gpu_metadata").entered();
2140                phase_indirect_parameters_buffers
2141                    .non_indexed
2142                    .gpu_metadata
2143                    .write_buffer(render_device);
2144            });
2145            scope.spawn(async {
2146                let _span = tracing::info_span!("indexed_gpu_metadata").entered();
2147                phase_indirect_parameters_buffers
2148                    .indexed
2149                    .gpu_metadata
2150                    .write_buffer(render_device);
2151            });
2152
2153            scope.spawn(async {
2154                let _span = tracing::info_span!("indexed_batch_sets").entered();
2155                phase_indirect_parameters_buffers
2156                    .indexed
2157                    .batch_sets
2158                    .write_buffer(render_device, render_queue);
2159            });
2160            scope.spawn(async {
2161                let _span = tracing::info_span!("non_indexed_batch_sets").entered();
2162                phase_indirect_parameters_buffers
2163                    .non_indexed
2164                    .batch_sets
2165                    .write_buffer(render_device, render_queue);
2166            });
2167        }
2168    });
2169}
2170
2171#[cfg(test)]
2172mod tests {
2173    use super::*;
2174
2175    #[test]
2176    fn instance_buffer_correct_behavior() {
2177        let mut instance_buffer = InstanceInputUniformBuffer::new();
2178
2179        let index = instance_buffer.add(2);
2180        instance_buffer.remove(index);
2181        assert_eq!(instance_buffer.get_unchecked(index), 2);
2182        assert_eq!(instance_buffer.get(index), None);
2183
2184        instance_buffer.add(5);
2185        assert_eq!(instance_buffer.buffer().len(), 1);
2186    }
2187}