bevy_render/batching/
gpu_preprocessing.rs

1//! Batching functionality when GPU preprocessing is in use.
2
3use core::{any::TypeId, marker::PhantomData, mem};
4
5use bevy_app::{App, Plugin};
6use bevy_derive::{Deref, DerefMut};
7use bevy_ecs::{
8    prelude::Entity,
9    query::{Has, With},
10    resource::Resource,
11    schedule::IntoScheduleConfigs as _,
12    system::{Query, Res, ResMut, StaticSystemParam},
13    world::{FromWorld, World},
14};
15use bevy_encase_derive::ShaderType;
16use bevy_math::UVec4;
17use bevy_platform::collections::{hash_map::Entry, HashMap, HashSet};
18use bevy_utils::{default, TypeIdMap};
19use bytemuck::{Pod, Zeroable};
20use encase::{internal::WriteInto, ShaderSize};
21use indexmap::IndexMap;
22use nonmax::NonMaxU32;
23use tracing::{error, info};
24use wgpu::{BindingResource, BufferUsages, DownlevelFlags, Features};
25
26use crate::{
27    experimental::occlusion_culling::OcclusionCulling,
28    render_phase::{
29        BinnedPhaseItem, BinnedRenderPhaseBatch, BinnedRenderPhaseBatchSet,
30        BinnedRenderPhaseBatchSets, CachedRenderPipelinePhaseItem, PhaseItem,
31        PhaseItemBatchSetKey as _, PhaseItemExtraIndex, RenderBin, SortedPhaseItem,
32        SortedRenderPhase, UnbatchableBinnedEntityIndices, ViewBinnedRenderPhases,
33        ViewSortedRenderPhases,
34    },
35    render_resource::{Buffer, GpuArrayBufferable, RawBufferVec, UninitBufferVec},
36    renderer::{RenderAdapter, RenderAdapterInfo, RenderDevice, RenderQueue, WgpuWrapper},
37    sync_world::MainEntity,
38    view::{ExtractedView, NoIndirectDrawing, RetainedViewEntity},
39    Render, RenderApp, RenderDebugFlags, RenderSystems,
40};
41
42use super::{BatchMeta, GetBatchData, GetFullBatchData};
43
44#[derive(Default)]
45pub struct BatchingPlugin {
46    /// Debugging flags that can optionally be set when constructing the renderer.
47    pub debug_flags: RenderDebugFlags,
48}
49
50impl Plugin for BatchingPlugin {
51    fn build(&self, app: &mut App) {
52        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
53            return;
54        };
55
56        render_app
57            .insert_resource(IndirectParametersBuffers::new(
58                self.debug_flags
59                    .contains(RenderDebugFlags::ALLOW_COPIES_FROM_INDIRECT_PARAMETERS),
60            ))
61            .add_systems(
62                Render,
63                write_indirect_parameters_buffers.in_set(RenderSystems::PrepareResourcesFlush),
64            )
65            .add_systems(
66                Render,
67                clear_indirect_parameters_buffers.in_set(RenderSystems::ManageViews),
68            );
69    }
70
71    fn finish(&self, app: &mut App) {
72        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
73            return;
74        };
75
76        render_app.init_resource::<GpuPreprocessingSupport>();
77    }
78}
79
80/// Records whether GPU preprocessing and/or GPU culling are supported on the
81/// device.
82///
83/// No GPU preprocessing is supported on WebGL because of the lack of compute
84/// shader support.  GPU preprocessing is supported on DirectX 12, but due to [a
85/// `wgpu` limitation] GPU culling is not.
86///
87/// [a `wgpu` limitation]: https://github.com/gfx-rs/wgpu/issues/2471
88#[derive(Clone, Copy, PartialEq, Resource)]
89pub struct GpuPreprocessingSupport {
90    /// The maximum amount of GPU preprocessing available on this platform.
91    pub max_supported_mode: GpuPreprocessingMode,
92}
93
94impl GpuPreprocessingSupport {
95    /// Returns true if this GPU preprocessing support level isn't `None`.
96    #[inline]
97    pub fn is_available(&self) -> bool {
98        self.max_supported_mode != GpuPreprocessingMode::None
99    }
100
101    /// Returns the given GPU preprocessing mode, capped to the current
102    /// preprocessing mode.
103    pub fn min(&self, mode: GpuPreprocessingMode) -> GpuPreprocessingMode {
104        match (self.max_supported_mode, mode) {
105            (GpuPreprocessingMode::None, _) | (_, GpuPreprocessingMode::None) => {
106                GpuPreprocessingMode::None
107            }
108            (mode, GpuPreprocessingMode::Culling) | (GpuPreprocessingMode::Culling, mode) => mode,
109            (GpuPreprocessingMode::PreprocessingOnly, GpuPreprocessingMode::PreprocessingOnly) => {
110                GpuPreprocessingMode::PreprocessingOnly
111            }
112        }
113    }
114
115    /// Returns true if GPU culling is supported on this platform.
116    pub fn is_culling_supported(&self) -> bool {
117        self.max_supported_mode == GpuPreprocessingMode::Culling
118    }
119}
120
121/// The amount of GPU preprocessing (compute and indirect draw) that we do.
122#[derive(Clone, Copy, PartialEq)]
123pub enum GpuPreprocessingMode {
124    /// No GPU preprocessing is in use at all.
125    ///
126    /// This is used when GPU compute isn't available.
127    None,
128
129    /// GPU preprocessing is in use, but GPU culling isn't.
130    ///
131    /// This is used when the [`NoIndirectDrawing`] component is present on the
132    /// camera.
133    PreprocessingOnly,
134
135    /// Both GPU preprocessing and GPU culling are in use.
136    ///
137    /// This is used by default.
138    Culling,
139}
140
141/// The GPU buffers holding the data needed to render batches.
142///
143/// For example, in the 3D PBR pipeline this holds `MeshUniform`s, which are the
144/// `BD` type parameter in that mode.
145///
146/// We have a separate *buffer data input* type (`BDI`) here, which a compute
147/// shader is expected to expand to the full buffer data (`BD`) type. GPU
148/// uniform building is generally faster and uses less system RAM to VRAM bus
149/// bandwidth, but only implemented for some pipelines (for example, not in the
150/// 2D pipeline at present) and only when compute shader is available.
151#[derive(Resource)]
152pub struct BatchedInstanceBuffers<BD, BDI>
153where
154    BD: GpuArrayBufferable + Sync + Send + 'static,
155    BDI: Pod + Default,
156{
157    /// The uniform data inputs for the current frame.
158    ///
159    /// These are uploaded during the extraction phase.
160    pub current_input_buffer: InstanceInputUniformBuffer<BDI>,
161
162    /// The uniform data inputs for the previous frame.
163    ///
164    /// The indices don't generally line up between `current_input_buffer`
165    /// and `previous_input_buffer`, because, among other reasons, entities
166    /// can spawn or despawn between frames. Instead, each current buffer
167    /// data input uniform is expected to contain the index of the
168    /// corresponding buffer data input uniform in this list.
169    pub previous_input_buffer: InstanceInputUniformBuffer<BDI>,
170
171    /// The data needed to render buffers for each phase.
172    ///
173    /// The keys of this map are the type IDs of each phase: e.g. `Opaque3d`,
174    /// `AlphaMask3d`, etc.
175    pub phase_instance_buffers: TypeIdMap<UntypedPhaseBatchedInstanceBuffers<BD>>,
176}
177
178impl<BD, BDI> Default for BatchedInstanceBuffers<BD, BDI>
179where
180    BD: GpuArrayBufferable + Sync + Send + 'static,
181    BDI: Pod + Sync + Send + Default + 'static,
182{
183    fn default() -> Self {
184        BatchedInstanceBuffers {
185            current_input_buffer: InstanceInputUniformBuffer::new(),
186            previous_input_buffer: InstanceInputUniformBuffer::new(),
187            phase_instance_buffers: HashMap::default(),
188        }
189    }
190}
191
192/// The GPU buffers holding the data needed to render batches for a single
193/// phase.
194///
195/// These are split out per phase so that we can run the phases in parallel.
196/// This is the version of the structure that has a type parameter, which
197/// enables Bevy's scheduler to run the batching operations for the different
198/// phases in parallel.
199///
200/// See the documentation for [`BatchedInstanceBuffers`] for more information.
201#[derive(Resource)]
202pub struct PhaseBatchedInstanceBuffers<PI, BD>
203where
204    PI: PhaseItem,
205    BD: GpuArrayBufferable + Sync + Send + 'static,
206{
207    /// The buffers for this phase.
208    pub buffers: UntypedPhaseBatchedInstanceBuffers<BD>,
209    phantom: PhantomData<PI>,
210}
211
212impl<PI, BD> Default for PhaseBatchedInstanceBuffers<PI, BD>
213where
214    PI: PhaseItem,
215    BD: GpuArrayBufferable + Sync + Send + 'static,
216{
217    fn default() -> Self {
218        PhaseBatchedInstanceBuffers {
219            buffers: UntypedPhaseBatchedInstanceBuffers::default(),
220            phantom: PhantomData,
221        }
222    }
223}
224
225/// The GPU buffers holding the data needed to render batches for a single
226/// phase, without a type parameter for that phase.
227///
228/// Since this structure doesn't have a type parameter, it can be placed in
229/// [`BatchedInstanceBuffers::phase_instance_buffers`].
230pub struct UntypedPhaseBatchedInstanceBuffers<BD>
231where
232    BD: GpuArrayBufferable + Sync + Send + 'static,
233{
234    /// A storage area for the buffer data that the GPU compute shader is
235    /// expected to write to.
236    ///
237    /// There will be one entry for each index.
238    pub data_buffer: UninitBufferVec<BD>,
239
240    /// The index of the buffer data in the current input buffer that
241    /// corresponds to each instance.
242    ///
243    /// This is keyed off each view. Each view has a separate buffer.
244    pub work_item_buffers: HashMap<RetainedViewEntity, PreprocessWorkItemBuffers>,
245
246    /// A buffer that holds the number of indexed meshes that weren't visible in
247    /// the previous frame, when GPU occlusion culling is in use.
248    ///
249    /// There's one set of [`LatePreprocessWorkItemIndirectParameters`] per
250    /// view. Bevy uses this value to determine how many threads to dispatch to
251    /// check meshes that weren't visible next frame to see if they became newly
252    /// visible this frame.
253    pub late_indexed_indirect_parameters_buffer:
254        RawBufferVec<LatePreprocessWorkItemIndirectParameters>,
255
256    /// A buffer that holds the number of non-indexed meshes that weren't
257    /// visible in the previous frame, when GPU occlusion culling is in use.
258    ///
259    /// There's one set of [`LatePreprocessWorkItemIndirectParameters`] per
260    /// view. Bevy uses this value to determine how many threads to dispatch to
261    /// check meshes that weren't visible next frame to see if they became newly
262    /// visible this frame.
263    pub late_non_indexed_indirect_parameters_buffer:
264        RawBufferVec<LatePreprocessWorkItemIndirectParameters>,
265}
266
267/// Holds the GPU buffer of instance input data, which is the data about each
268/// mesh instance that the CPU provides.
269///
270/// `BDI` is the *buffer data input* type, which the GPU mesh preprocessing
271/// shader is expected to expand to the full *buffer data* type.
272pub struct InstanceInputUniformBuffer<BDI>
273where
274    BDI: Pod + Default,
275{
276    /// The buffer containing the data that will be uploaded to the GPU.
277    buffer: RawBufferVec<BDI>,
278
279    /// Indices of slots that are free within the buffer.
280    ///
281    /// When adding data, we preferentially overwrite these slots first before
282    /// growing the buffer itself.
283    free_uniform_indices: Vec<u32>,
284}
285
286impl<BDI> InstanceInputUniformBuffer<BDI>
287where
288    BDI: Pod + Default,
289{
290    /// Creates a new, empty buffer.
291    pub fn new() -> InstanceInputUniformBuffer<BDI> {
292        InstanceInputUniformBuffer {
293            buffer: RawBufferVec::new(BufferUsages::STORAGE),
294            free_uniform_indices: vec![],
295        }
296    }
297
298    /// Clears the buffer and entity list out.
299    pub fn clear(&mut self) {
300        self.buffer.clear();
301        self.free_uniform_indices.clear();
302    }
303
304    /// Returns the [`RawBufferVec`] corresponding to this input uniform buffer.
305    #[inline]
306    pub fn buffer(&self) -> &RawBufferVec<BDI> {
307        &self.buffer
308    }
309
310    /// Adds a new piece of buffered data to the uniform buffer and returns its
311    /// index.
312    pub fn add(&mut self, element: BDI) -> u32 {
313        match self.free_uniform_indices.pop() {
314            Some(uniform_index) => {
315                self.buffer.values_mut()[uniform_index as usize] = element;
316                uniform_index
317            }
318            None => self.buffer.push(element) as u32,
319        }
320    }
321
322    /// Removes a piece of buffered data from the uniform buffer.
323    ///
324    /// This simply marks the data as free.
325    pub fn remove(&mut self, uniform_index: u32) {
326        self.free_uniform_indices.push(uniform_index);
327    }
328
329    /// Returns the piece of buffered data at the given index.
330    ///
331    /// Returns [`None`] if the index is out of bounds or the data is removed.
332    pub fn get(&self, uniform_index: u32) -> Option<BDI> {
333        if (uniform_index as usize) >= self.buffer.len()
334            || self.free_uniform_indices.contains(&uniform_index)
335        {
336            None
337        } else {
338            Some(self.get_unchecked(uniform_index))
339        }
340    }
341
342    /// Returns the piece of buffered data at the given index.
343    /// Can return data that has previously been removed.
344    ///
345    /// # Panics
346    /// if `uniform_index` is not in bounds of [`Self::buffer`].
347    pub fn get_unchecked(&self, uniform_index: u32) -> BDI {
348        self.buffer.values()[uniform_index as usize]
349    }
350
351    /// Stores a piece of buffered data at the given index.
352    ///
353    /// # Panics
354    /// if `uniform_index` is not in bounds of [`Self::buffer`].
355    pub fn set(&mut self, uniform_index: u32, element: BDI) {
356        self.buffer.values_mut()[uniform_index as usize] = element;
357    }
358
359    // Ensures that the buffers are nonempty, which the GPU requires before an
360    // upload can take place.
361    pub fn ensure_nonempty(&mut self) {
362        if self.buffer.is_empty() {
363            self.buffer.push(default());
364        }
365    }
366
367    /// Returns the number of instances in this buffer.
368    pub fn len(&self) -> usize {
369        self.buffer.len()
370    }
371
372    /// Returns true if this buffer has no instances or false if it contains any
373    /// instances.
374    pub fn is_empty(&self) -> bool {
375        self.buffer.is_empty()
376    }
377
378    /// Consumes this [`InstanceInputUniformBuffer`] and returns the raw buffer
379    /// ready to be uploaded to the GPU.
380    pub fn into_buffer(self) -> RawBufferVec<BDI> {
381        self.buffer
382    }
383}
384
385impl<BDI> Default for InstanceInputUniformBuffer<BDI>
386where
387    BDI: Pod + Default,
388{
389    fn default() -> Self {
390        Self::new()
391    }
392}
393
394/// The buffer of GPU preprocessing work items for a single view.
395#[cfg_attr(
396    not(target_arch = "wasm32"),
397    expect(
398        clippy::large_enum_variant,
399        reason = "See https://github.com/bevyengine/bevy/issues/19220"
400    )
401)]
402pub enum PreprocessWorkItemBuffers {
403    /// The work items we use if we aren't using indirect drawing.
404    ///
405    /// Because we don't have to separate indexed from non-indexed meshes in
406    /// direct mode, we only have a single buffer here.
407    Direct(RawBufferVec<PreprocessWorkItem>),
408
409    /// The buffer of work items we use if we are using indirect drawing.
410    ///
411    /// We need to separate out indexed meshes from non-indexed meshes in this
412    /// case because the indirect parameters for these two types of meshes have
413    /// different sizes.
414    Indirect {
415        /// The buffer of work items corresponding to indexed meshes.
416        indexed: RawBufferVec<PreprocessWorkItem>,
417        /// The buffer of work items corresponding to non-indexed meshes.
418        non_indexed: RawBufferVec<PreprocessWorkItem>,
419        /// The work item buffers we use when GPU occlusion culling is in use.
420        gpu_occlusion_culling: Option<GpuOcclusionCullingWorkItemBuffers>,
421    },
422}
423
424/// The work item buffers we use when GPU occlusion culling is in use.
425pub struct GpuOcclusionCullingWorkItemBuffers {
426    /// The buffer of work items corresponding to indexed meshes.
427    pub late_indexed: UninitBufferVec<PreprocessWorkItem>,
428    /// The buffer of work items corresponding to non-indexed meshes.
429    pub late_non_indexed: UninitBufferVec<PreprocessWorkItem>,
430    /// The offset into the
431    /// [`UntypedPhaseBatchedInstanceBuffers::late_indexed_indirect_parameters_buffer`]
432    /// where this view's indirect dispatch counts for indexed meshes live.
433    pub late_indirect_parameters_indexed_offset: u32,
434    /// The offset into the
435    /// [`UntypedPhaseBatchedInstanceBuffers::late_non_indexed_indirect_parameters_buffer`]
436    /// where this view's indirect dispatch counts for non-indexed meshes live.
437    pub late_indirect_parameters_non_indexed_offset: u32,
438}
439
440/// A GPU-side data structure that stores the number of workgroups to dispatch
441/// for the second phase of GPU occlusion culling.
442///
443/// The late mesh preprocessing phase checks meshes that weren't visible frame
444/// to determine if they're potentially visible this frame.
445#[derive(Clone, Copy, ShaderType, Pod, Zeroable)]
446#[repr(C)]
447pub struct LatePreprocessWorkItemIndirectParameters {
448    /// The number of workgroups to dispatch.
449    ///
450    /// This will be equal to `work_item_count / 64`, rounded *up*.
451    dispatch_x: u32,
452    /// The number of workgroups along the abstract Y axis to dispatch: always
453    /// 1.
454    dispatch_y: u32,
455    /// The number of workgroups along the abstract Z axis to dispatch: always
456    /// 1.
457    dispatch_z: u32,
458    /// The actual number of work items.
459    ///
460    /// The GPU indirect dispatch doesn't read this, but it's used internally to
461    /// determine the actual number of work items that exist in the late
462    /// preprocessing work item buffer.
463    work_item_count: u32,
464    /// Padding to 64-byte boundaries for some hardware.
465    pad: UVec4,
466}
467
468impl Default for LatePreprocessWorkItemIndirectParameters {
469    fn default() -> LatePreprocessWorkItemIndirectParameters {
470        LatePreprocessWorkItemIndirectParameters {
471            dispatch_x: 0,
472            dispatch_y: 1,
473            dispatch_z: 1,
474            work_item_count: 0,
475            pad: default(),
476        }
477    }
478}
479
480/// Returns the set of work item buffers for the given view, first creating it
481/// if necessary.
482///
483/// Bevy uses work item buffers to tell the mesh preprocessing compute shader
484/// which meshes are to be drawn.
485///
486/// You may need to call this function if you're implementing your own custom
487/// render phases. See the `specialized_mesh_pipeline` example.
488pub fn get_or_create_work_item_buffer<'a, I>(
489    work_item_buffers: &'a mut HashMap<RetainedViewEntity, PreprocessWorkItemBuffers>,
490    view: RetainedViewEntity,
491    no_indirect_drawing: bool,
492    enable_gpu_occlusion_culling: bool,
493) -> &'a mut PreprocessWorkItemBuffers
494where
495    I: 'static,
496{
497    let preprocess_work_item_buffers = match work_item_buffers.entry(view) {
498        Entry::Occupied(occupied_entry) => occupied_entry.into_mut(),
499        Entry::Vacant(vacant_entry) => {
500            if no_indirect_drawing {
501                vacant_entry.insert(PreprocessWorkItemBuffers::Direct(RawBufferVec::new(
502                    BufferUsages::STORAGE,
503                )))
504            } else {
505                vacant_entry.insert(PreprocessWorkItemBuffers::Indirect {
506                    indexed: RawBufferVec::new(BufferUsages::STORAGE),
507                    non_indexed: RawBufferVec::new(BufferUsages::STORAGE),
508                    // We fill this in below if `enable_gpu_occlusion_culling`
509                    // is set.
510                    gpu_occlusion_culling: None,
511                })
512            }
513        }
514    };
515
516    // Initialize the GPU occlusion culling buffers if necessary.
517    if let PreprocessWorkItemBuffers::Indirect {
518        ref mut gpu_occlusion_culling,
519        ..
520    } = *preprocess_work_item_buffers
521    {
522        match (
523            enable_gpu_occlusion_culling,
524            gpu_occlusion_culling.is_some(),
525        ) {
526            (false, false) | (true, true) => {}
527            (false, true) => {
528                *gpu_occlusion_culling = None;
529            }
530            (true, false) => {
531                *gpu_occlusion_culling = Some(GpuOcclusionCullingWorkItemBuffers {
532                    late_indexed: UninitBufferVec::new(BufferUsages::STORAGE),
533                    late_non_indexed: UninitBufferVec::new(BufferUsages::STORAGE),
534                    late_indirect_parameters_indexed_offset: 0,
535                    late_indirect_parameters_non_indexed_offset: 0,
536                });
537            }
538        }
539    }
540
541    preprocess_work_item_buffers
542}
543
544/// Initializes work item buffers for a phase in preparation for a new frame.
545pub fn init_work_item_buffers(
546    work_item_buffers: &mut PreprocessWorkItemBuffers,
547    late_indexed_indirect_parameters_buffer: &'_ mut RawBufferVec<
548        LatePreprocessWorkItemIndirectParameters,
549    >,
550    late_non_indexed_indirect_parameters_buffer: &'_ mut RawBufferVec<
551        LatePreprocessWorkItemIndirectParameters,
552    >,
553) {
554    // Add the offsets for indirect parameters that the late phase of mesh
555    // preprocessing writes to.
556    if let PreprocessWorkItemBuffers::Indirect {
557        gpu_occlusion_culling:
558            Some(GpuOcclusionCullingWorkItemBuffers {
559                ref mut late_indirect_parameters_indexed_offset,
560                ref mut late_indirect_parameters_non_indexed_offset,
561                ..
562            }),
563        ..
564    } = *work_item_buffers
565    {
566        *late_indirect_parameters_indexed_offset = late_indexed_indirect_parameters_buffer
567            .push(LatePreprocessWorkItemIndirectParameters::default())
568            as u32;
569        *late_indirect_parameters_non_indexed_offset = late_non_indexed_indirect_parameters_buffer
570            .push(LatePreprocessWorkItemIndirectParameters::default())
571            as u32;
572    }
573}
574
575impl PreprocessWorkItemBuffers {
576    /// Adds a new work item to the appropriate buffer.
577    ///
578    /// `indexed` specifies whether the work item corresponds to an indexed
579    /// mesh.
580    pub fn push(&mut self, indexed: bool, preprocess_work_item: PreprocessWorkItem) {
581        match *self {
582            PreprocessWorkItemBuffers::Direct(ref mut buffer) => {
583                buffer.push(preprocess_work_item);
584            }
585            PreprocessWorkItemBuffers::Indirect {
586                indexed: ref mut indexed_buffer,
587                non_indexed: ref mut non_indexed_buffer,
588                ref mut gpu_occlusion_culling,
589            } => {
590                if indexed {
591                    indexed_buffer.push(preprocess_work_item);
592                } else {
593                    non_indexed_buffer.push(preprocess_work_item);
594                }
595
596                if let Some(ref mut gpu_occlusion_culling) = *gpu_occlusion_culling {
597                    if indexed {
598                        gpu_occlusion_culling.late_indexed.add();
599                    } else {
600                        gpu_occlusion_culling.late_non_indexed.add();
601                    }
602                }
603            }
604        }
605    }
606
607    /// Clears out the GPU work item buffers in preparation for a new frame.
608    pub fn clear(&mut self) {
609        match *self {
610            PreprocessWorkItemBuffers::Direct(ref mut buffer) => {
611                buffer.clear();
612            }
613            PreprocessWorkItemBuffers::Indirect {
614                indexed: ref mut indexed_buffer,
615                non_indexed: ref mut non_indexed_buffer,
616                ref mut gpu_occlusion_culling,
617            } => {
618                indexed_buffer.clear();
619                non_indexed_buffer.clear();
620
621                if let Some(ref mut gpu_occlusion_culling) = *gpu_occlusion_culling {
622                    gpu_occlusion_culling.late_indexed.clear();
623                    gpu_occlusion_culling.late_non_indexed.clear();
624                    gpu_occlusion_culling.late_indirect_parameters_indexed_offset = 0;
625                    gpu_occlusion_culling.late_indirect_parameters_non_indexed_offset = 0;
626                }
627            }
628        }
629    }
630}
631
632/// One invocation of the preprocessing shader: i.e. one mesh instance in a
633/// view.
634#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
635#[repr(C)]
636pub struct PreprocessWorkItem {
637    /// The index of the batch input data in the input buffer that the shader
638    /// reads from.
639    pub input_index: u32,
640
641    /// In direct mode, the index of the mesh uniform; in indirect mode, the
642    /// index of the [`IndirectParametersGpuMetadata`].
643    ///
644    /// In indirect mode, this is the index of the
645    /// [`IndirectParametersGpuMetadata`] in the
646    /// `IndirectParametersBuffers::indexed_metadata` or
647    /// `IndirectParametersBuffers::non_indexed_metadata`.
648    pub output_or_indirect_parameters_index: u32,
649}
650
651/// The `wgpu` indirect parameters structure that specifies a GPU draw command.
652///
653/// This is the variant for indexed meshes. We generate the instances of this
654/// structure in the `build_indirect_params.wgsl` compute shader.
655#[derive(Clone, Copy, Debug, Pod, Zeroable, ShaderType)]
656#[repr(C)]
657pub struct IndirectParametersIndexed {
658    /// The number of indices that this mesh has.
659    pub index_count: u32,
660    /// The number of instances we are to draw.
661    pub instance_count: u32,
662    /// The offset of the first index for this mesh in the index buffer slab.
663    pub first_index: u32,
664    /// The offset of the first vertex for this mesh in the vertex buffer slab.
665    pub base_vertex: u32,
666    /// The index of the first mesh instance in the `MeshUniform` buffer.
667    pub first_instance: u32,
668}
669
670/// The `wgpu` indirect parameters structure that specifies a GPU draw command.
671///
672/// This is the variant for non-indexed meshes. We generate the instances of
673/// this structure in the `build_indirect_params.wgsl` compute shader.
674#[derive(Clone, Copy, Debug, Pod, Zeroable, ShaderType)]
675#[repr(C)]
676pub struct IndirectParametersNonIndexed {
677    /// The number of vertices that this mesh has.
678    pub vertex_count: u32,
679    /// The number of instances we are to draw.
680    pub instance_count: u32,
681    /// The offset of the first vertex for this mesh in the vertex buffer slab.
682    pub base_vertex: u32,
683    /// The index of the first mesh instance in the `Mesh` buffer.
684    pub first_instance: u32,
685}
686
687/// A structure, initialized on CPU and read on GPU, that contains metadata
688/// about each batch.
689///
690/// Each batch will have one instance of this structure.
691#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
692#[repr(C)]
693pub struct IndirectParametersCpuMetadata {
694    /// The index of the first instance of this mesh in the array of
695    /// `MeshUniform`s.
696    ///
697    /// Note that this is the *first* output index in this batch. Since each
698    /// instance of this structure refers to arbitrarily many instances, the
699    /// `MeshUniform`s corresponding to this batch span the indices
700    /// `base_output_index..(base_output_index + instance_count)`.
701    pub base_output_index: u32,
702
703    /// The index of the batch set that this batch belongs to in the
704    /// [`IndirectBatchSet`] buffer.
705    ///
706    /// A *batch set* is a set of meshes that may be multi-drawn together.
707    /// Multiple batches (and therefore multiple instances of
708    /// [`IndirectParametersGpuMetadata`] structures) can be part of the same
709    /// batch set.
710    pub batch_set_index: u32,
711}
712
713/// A structure, written and read GPU, that records how many instances of each
714/// mesh are actually to be drawn.
715///
716/// The GPU mesh preprocessing shader increments the
717/// [`Self::early_instance_count`] and [`Self::late_instance_count`] as it
718/// determines that meshes are visible.  The indirect parameter building shader
719/// reads this metadata in order to construct the indirect draw parameters.
720///
721/// Each batch will have one instance of this structure.
722#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
723#[repr(C)]
724pub struct IndirectParametersGpuMetadata {
725    /// The index of the first mesh in this batch in the array of
726    /// `MeshInputUniform`s.
727    pub mesh_index: u32,
728
729    /// The number of instances that were judged visible last frame.
730    ///
731    /// The CPU sets this value to 0, and the GPU mesh preprocessing shader
732    /// increments it as it culls mesh instances.
733    pub early_instance_count: u32,
734
735    /// The number of instances that have been judged potentially visible this
736    /// frame that weren't in the last frame's potentially visible set.
737    ///
738    /// The CPU sets this value to 0, and the GPU mesh preprocessing shader
739    /// increments it as it culls mesh instances.
740    pub late_instance_count: u32,
741}
742
743/// A structure, shared between CPU and GPU, that holds the number of on-GPU
744/// indirect draw commands for each *batch set*.
745///
746/// A *batch set* is a set of meshes that may be multi-drawn together.
747///
748/// If the current hardware and driver support `multi_draw_indirect_count`, the
749/// indirect parameters building shader increments
750/// [`Self::indirect_parameters_count`] as it generates indirect parameters. The
751/// `multi_draw_indirect_count` command reads
752/// [`Self::indirect_parameters_count`] in order to determine how many commands
753/// belong to each batch set.
754#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
755#[repr(C)]
756pub struct IndirectBatchSet {
757    /// The number of indirect parameter commands (i.e. batches) in this batch
758    /// set.
759    ///
760    /// The CPU sets this value to 0 before uploading this structure to GPU. The
761    /// indirect parameters building shader increments this value as it creates
762    /// indirect parameters. Then the `multi_draw_indirect_count` command reads
763    /// this value in order to determine how many indirect draw commands to
764    /// process.
765    pub indirect_parameters_count: u32,
766
767    /// The offset within the `IndirectParametersBuffers::indexed_data` or
768    /// `IndirectParametersBuffers::non_indexed_data` of the first indirect draw
769    /// command for this batch set.
770    ///
771    /// The CPU fills out this value.
772    pub indirect_parameters_base: u32,
773}
774
775/// The buffers containing all the information that indirect draw commands
776/// (`multi_draw_indirect`, `multi_draw_indirect_count`) use to draw the scene.
777///
778/// In addition to the indirect draw buffers themselves, this structure contains
779/// the buffers that store [`IndirectParametersGpuMetadata`], which are the
780/// structures that culling writes to so that the indirect parameter building
781/// pass can determine how many meshes are actually to be drawn.
782///
783/// These buffers will remain empty if indirect drawing isn't in use.
784#[derive(Resource, Deref, DerefMut)]
785pub struct IndirectParametersBuffers {
786    /// A mapping from a phase type ID to the indirect parameters buffers for
787    /// that phase.
788    ///
789    /// Examples of phase type IDs are `Opaque3d` and `AlphaMask3d`.
790    #[deref]
791    pub buffers: TypeIdMap<UntypedPhaseIndirectParametersBuffers>,
792    /// If true, this sets the `COPY_SRC` flag on indirect draw parameters so
793    /// that they can be read back to CPU.
794    ///
795    /// This is a debugging feature that may reduce performance. It primarily
796    /// exists for the `occlusion_culling` example.
797    pub allow_copies_from_indirect_parameter_buffers: bool,
798}
799
800impl IndirectParametersBuffers {
801    /// Initializes a new [`IndirectParametersBuffers`] resource.
802    pub fn new(allow_copies_from_indirect_parameter_buffers: bool) -> IndirectParametersBuffers {
803        IndirectParametersBuffers {
804            buffers: TypeIdMap::default(),
805            allow_copies_from_indirect_parameter_buffers,
806        }
807    }
808}
809
810/// The buffers containing all the information that indirect draw commands use
811/// to draw the scene, for a single phase.
812///
813/// This is the version of the structure that has a type parameter, so that the
814/// batching for different phases can run in parallel.
815///
816/// See the [`IndirectParametersBuffers`] documentation for more information.
817#[derive(Resource)]
818pub struct PhaseIndirectParametersBuffers<PI>
819where
820    PI: PhaseItem,
821{
822    /// The indirect draw buffers for the phase.
823    pub buffers: UntypedPhaseIndirectParametersBuffers,
824    phantom: PhantomData<PI>,
825}
826
827impl<PI> PhaseIndirectParametersBuffers<PI>
828where
829    PI: PhaseItem,
830{
831    pub fn new(allow_copies_from_indirect_parameter_buffers: bool) -> Self {
832        PhaseIndirectParametersBuffers {
833            buffers: UntypedPhaseIndirectParametersBuffers::new(
834                allow_copies_from_indirect_parameter_buffers,
835            ),
836            phantom: PhantomData,
837        }
838    }
839}
840
841/// The buffers containing all the information that indirect draw commands use
842/// to draw the scene, for a single phase.
843///
844/// This is the version of the structure that doesn't have a type parameter, so
845/// that it can be inserted into [`IndirectParametersBuffers::buffers`]
846///
847/// See the [`IndirectParametersBuffers`] documentation for more information.
848pub struct UntypedPhaseIndirectParametersBuffers {
849    /// Information that indirect draw commands use to draw indexed meshes in
850    /// the scene.
851    pub indexed: MeshClassIndirectParametersBuffers<IndirectParametersIndexed>,
852    /// Information that indirect draw commands use to draw non-indexed meshes
853    /// in the scene.
854    pub non_indexed: MeshClassIndirectParametersBuffers<IndirectParametersNonIndexed>,
855}
856
857impl UntypedPhaseIndirectParametersBuffers {
858    /// Creates the indirect parameters buffers.
859    pub fn new(
860        allow_copies_from_indirect_parameter_buffers: bool,
861    ) -> UntypedPhaseIndirectParametersBuffers {
862        let mut indirect_parameter_buffer_usages = BufferUsages::STORAGE | BufferUsages::INDIRECT;
863        if allow_copies_from_indirect_parameter_buffers {
864            indirect_parameter_buffer_usages |= BufferUsages::COPY_SRC;
865        }
866
867        UntypedPhaseIndirectParametersBuffers {
868            non_indexed: MeshClassIndirectParametersBuffers::new(
869                allow_copies_from_indirect_parameter_buffers,
870            ),
871            indexed: MeshClassIndirectParametersBuffers::new(
872                allow_copies_from_indirect_parameter_buffers,
873            ),
874        }
875    }
876
877    /// Reserves space for `count` new batches.
878    ///
879    /// The `indexed` parameter specifies whether the meshes that these batches
880    /// correspond to are indexed or not.
881    pub fn allocate(&mut self, indexed: bool, count: u32) -> u32 {
882        if indexed {
883            self.indexed.allocate(count)
884        } else {
885            self.non_indexed.allocate(count)
886        }
887    }
888
889    /// Returns the number of batches currently allocated.
890    ///
891    /// The `indexed` parameter specifies whether the meshes that these batches
892    /// correspond to are indexed or not.
893    fn batch_count(&self, indexed: bool) -> usize {
894        if indexed {
895            self.indexed.batch_count()
896        } else {
897            self.non_indexed.batch_count()
898        }
899    }
900
901    /// Returns the number of batch sets currently allocated.
902    ///
903    /// The `indexed` parameter specifies whether the meshes that these batch
904    /// sets correspond to are indexed or not.
905    pub fn batch_set_count(&self, indexed: bool) -> usize {
906        if indexed {
907            self.indexed.batch_sets.len()
908        } else {
909            self.non_indexed.batch_sets.len()
910        }
911    }
912
913    /// Adds a new batch set to `Self::indexed_batch_sets` or
914    /// `Self::non_indexed_batch_sets` as appropriate.
915    ///
916    /// `indexed` specifies whether the meshes that these batch sets correspond
917    /// to are indexed or not. `indirect_parameters_base` specifies the offset
918    /// within `Self::indexed_data` or `Self::non_indexed_data` of the first
919    /// batch in this batch set.
920    #[inline]
921    pub fn add_batch_set(&mut self, indexed: bool, indirect_parameters_base: u32) {
922        if indexed {
923            self.indexed.batch_sets.push(IndirectBatchSet {
924                indirect_parameters_base,
925                indirect_parameters_count: 0,
926            });
927        } else {
928            self.non_indexed.batch_sets.push(IndirectBatchSet {
929                indirect_parameters_base,
930                indirect_parameters_count: 0,
931            });
932        }
933    }
934
935    /// Returns the index that a newly-added batch set will have.
936    ///
937    /// The `indexed` parameter specifies whether the meshes in such a batch set
938    /// are indexed or not.
939    pub fn get_next_batch_set_index(&self, indexed: bool) -> Option<NonMaxU32> {
940        NonMaxU32::new(self.batch_set_count(indexed) as u32)
941    }
942
943    /// Clears out the buffers in preparation for a new frame.
944    pub fn clear(&mut self) {
945        self.indexed.clear();
946        self.non_indexed.clear();
947    }
948}
949
950/// The buffers containing all the information that indirect draw commands use
951/// to draw the scene, for a single mesh class (indexed or non-indexed), for a
952/// single phase.
953pub struct MeshClassIndirectParametersBuffers<IP>
954where
955    IP: Clone + ShaderSize + WriteInto,
956{
957    /// The GPU buffer that stores the indirect draw parameters for the meshes.
958    ///
959    /// The indirect parameters building shader writes to this buffer, while the
960    /// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from
961    /// it to perform the draws.
962    data: UninitBufferVec<IP>,
963
964    /// The GPU buffer that holds the data used to construct indirect draw
965    /// parameters for meshes.
966    ///
967    /// The GPU mesh preprocessing shader writes to this buffer, and the
968    /// indirect parameters building shader reads this buffer to construct the
969    /// indirect draw parameters.
970    cpu_metadata: RawBufferVec<IndirectParametersCpuMetadata>,
971
972    /// The GPU buffer that holds data built by the GPU used to construct
973    /// indirect draw parameters for meshes.
974    ///
975    /// The GPU mesh preprocessing shader writes to this buffer, and the
976    /// indirect parameters building shader reads this buffer to construct the
977    /// indirect draw parameters.
978    gpu_metadata: UninitBufferVec<IndirectParametersGpuMetadata>,
979
980    /// The GPU buffer that holds the number of indirect draw commands for each
981    /// phase of each view, for meshes.
982    ///
983    /// The indirect parameters building shader writes to this buffer, and the
984    /// `multi_draw_indirect_count` command reads from it in order to know how
985    /// many indirect draw commands to process.
986    batch_sets: RawBufferVec<IndirectBatchSet>,
987}
988
989impl<IP> MeshClassIndirectParametersBuffers<IP>
990where
991    IP: Clone + ShaderSize + WriteInto,
992{
993    fn new(
994        allow_copies_from_indirect_parameter_buffers: bool,
995    ) -> MeshClassIndirectParametersBuffers<IP> {
996        let mut indirect_parameter_buffer_usages = BufferUsages::STORAGE | BufferUsages::INDIRECT;
997        if allow_copies_from_indirect_parameter_buffers {
998            indirect_parameter_buffer_usages |= BufferUsages::COPY_SRC;
999        }
1000
1001        MeshClassIndirectParametersBuffers {
1002            data: UninitBufferVec::new(indirect_parameter_buffer_usages),
1003            cpu_metadata: RawBufferVec::new(BufferUsages::STORAGE),
1004            gpu_metadata: UninitBufferVec::new(BufferUsages::STORAGE),
1005            batch_sets: RawBufferVec::new(indirect_parameter_buffer_usages),
1006        }
1007    }
1008
1009    /// Returns the GPU buffer that stores the indirect draw parameters for
1010    /// indexed meshes.
1011    ///
1012    /// The indirect parameters building shader writes to this buffer, while the
1013    /// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from
1014    /// it to perform the draws.
1015    #[inline]
1016    pub fn data_buffer(&self) -> Option<&Buffer> {
1017        self.data.buffer()
1018    }
1019
1020    /// Returns the GPU buffer that holds the CPU-constructed data used to
1021    /// construct indirect draw parameters for meshes.
1022    ///
1023    /// The CPU writes to this buffer, and the indirect parameters building
1024    /// shader reads this buffer to construct the indirect draw parameters.
1025    #[inline]
1026    pub fn cpu_metadata_buffer(&self) -> Option<&Buffer> {
1027        self.cpu_metadata.buffer()
1028    }
1029
1030    /// Returns the GPU buffer that holds the GPU-constructed data used to
1031    /// construct indirect draw parameters for meshes.
1032    ///
1033    /// The GPU mesh preprocessing shader writes to this buffer, and the
1034    /// indirect parameters building shader reads this buffer to construct the
1035    /// indirect draw parameters.
1036    #[inline]
1037    pub fn gpu_metadata_buffer(&self) -> Option<&Buffer> {
1038        self.gpu_metadata.buffer()
1039    }
1040
1041    /// Returns the GPU buffer that holds the number of indirect draw commands
1042    /// for each phase of each view.
1043    ///
1044    /// The indirect parameters building shader writes to this buffer, and the
1045    /// `multi_draw_indirect_count` command reads from it in order to know how
1046    /// many indirect draw commands to process.
1047    #[inline]
1048    pub fn batch_sets_buffer(&self) -> Option<&Buffer> {
1049        self.batch_sets.buffer()
1050    }
1051
1052    /// Reserves space for `count` new batches.
1053    ///
1054    /// This allocates in the [`Self::cpu_metadata`], [`Self::gpu_metadata`],
1055    /// and [`Self::data`] buffers.
1056    fn allocate(&mut self, count: u32) -> u32 {
1057        let length = self.data.len();
1058        self.cpu_metadata.reserve_internal(count as usize);
1059        self.gpu_metadata.add_multiple(count as usize);
1060        for _ in 0..count {
1061            self.data.add();
1062            self.cpu_metadata
1063                .push(IndirectParametersCpuMetadata::default());
1064        }
1065        length as u32
1066    }
1067
1068    /// Sets the [`IndirectParametersCpuMetadata`] for the mesh at the given
1069    /// index.
1070    pub fn set(&mut self, index: u32, value: IndirectParametersCpuMetadata) {
1071        self.cpu_metadata.set(index, value);
1072    }
1073
1074    /// Returns the number of batches corresponding to meshes that are currently
1075    /// allocated.
1076    #[inline]
1077    pub fn batch_count(&self) -> usize {
1078        self.data.len()
1079    }
1080
1081    /// Clears out all the buffers in preparation for a new frame.
1082    pub fn clear(&mut self) {
1083        self.data.clear();
1084        self.cpu_metadata.clear();
1085        self.gpu_metadata.clear();
1086        self.batch_sets.clear();
1087    }
1088}
1089
1090impl Default for IndirectParametersBuffers {
1091    fn default() -> Self {
1092        // By default, we don't allow GPU indirect parameter mapping, since
1093        // that's a debugging option.
1094        Self::new(false)
1095    }
1096}
1097
1098impl FromWorld for GpuPreprocessingSupport {
1099    fn from_world(world: &mut World) -> Self {
1100        let adapter = world.resource::<RenderAdapter>();
1101        let device = world.resource::<RenderDevice>();
1102
1103        // Filter Android drivers that are incompatible with GPU preprocessing:
1104        // - We filter out Adreno 730 and earlier GPUs (except 720, as it's newer
1105        //   than 730).
1106        // - We filter out Mali GPUs with driver versions lower than 48.
1107        fn is_non_supported_android_device(adapter_info: &RenderAdapterInfo) -> bool {
1108            crate::get_adreno_model(adapter_info).is_some_and(|model| model != 720 && model <= 730)
1109                || crate::get_mali_driver_version(adapter_info).is_some_and(|version| version < 48)
1110        }
1111
1112        let culling_feature_support = device.features().contains(
1113            Features::INDIRECT_FIRST_INSTANCE
1114                | Features::MULTI_DRAW_INDIRECT
1115                | Features::PUSH_CONSTANTS,
1116        );
1117        // Depth downsampling for occlusion culling requires 12 textures
1118        let limit_support = device.limits().max_storage_textures_per_shader_stage >= 12 &&
1119            // Even if the adapter supports compute, we might be simulating a lack of
1120            // compute via device limits (see `WgpuSettingsPriority::WebGL2` and
1121            // `wgpu::Limits::downlevel_webgl2_defaults()`). This will have set all the
1122            // `max_compute_*` limits to zero, so we arbitrarily pick one as a canary.
1123            device.limits().max_compute_workgroup_storage_size != 0;
1124
1125        let downlevel_support = adapter
1126            .get_downlevel_capabilities()
1127            .flags
1128            .contains(DownlevelFlags::COMPUTE_SHADERS);
1129
1130        let adapter_info = RenderAdapterInfo(WgpuWrapper::new(adapter.get_info()));
1131
1132        let max_supported_mode = if device.limits().max_compute_workgroup_size_x == 0
1133            || is_non_supported_android_device(&adapter_info)
1134            || adapter_info.backend == wgpu::Backend::Gl
1135        {
1136            info!(
1137                "GPU preprocessing is not supported on this device. \
1138                Falling back to CPU preprocessing.",
1139            );
1140            GpuPreprocessingMode::None
1141        } else if !(culling_feature_support && limit_support && downlevel_support) {
1142            info!("Some GPU preprocessing are limited on this device.");
1143            GpuPreprocessingMode::PreprocessingOnly
1144        } else {
1145            info!("GPU preprocessing is fully supported on this device.");
1146            GpuPreprocessingMode::Culling
1147        };
1148
1149        GpuPreprocessingSupport { max_supported_mode }
1150    }
1151}
1152
1153impl<BD, BDI> BatchedInstanceBuffers<BD, BDI>
1154where
1155    BD: GpuArrayBufferable + Sync + Send + 'static,
1156    BDI: Pod + Sync + Send + Default + 'static,
1157{
1158    /// Creates new buffers.
1159    pub fn new() -> Self {
1160        Self::default()
1161    }
1162
1163    /// Clears out the buffers in preparation for a new frame.
1164    pub fn clear(&mut self) {
1165        for phase_instance_buffer in self.phase_instance_buffers.values_mut() {
1166            phase_instance_buffer.clear();
1167        }
1168    }
1169}
1170
1171impl<BD> UntypedPhaseBatchedInstanceBuffers<BD>
1172where
1173    BD: GpuArrayBufferable + Sync + Send + 'static,
1174{
1175    pub fn new() -> Self {
1176        UntypedPhaseBatchedInstanceBuffers {
1177            data_buffer: UninitBufferVec::new(BufferUsages::STORAGE),
1178            work_item_buffers: HashMap::default(),
1179            late_indexed_indirect_parameters_buffer: RawBufferVec::new(
1180                BufferUsages::STORAGE | BufferUsages::INDIRECT,
1181            ),
1182            late_non_indexed_indirect_parameters_buffer: RawBufferVec::new(
1183                BufferUsages::STORAGE | BufferUsages::INDIRECT,
1184            ),
1185        }
1186    }
1187
1188    /// Returns the binding of the buffer that contains the per-instance data.
1189    ///
1190    /// This buffer needs to be filled in via a compute shader.
1191    pub fn instance_data_binding(&self) -> Option<BindingResource<'_>> {
1192        self.data_buffer
1193            .buffer()
1194            .map(|buffer| buffer.as_entire_binding())
1195    }
1196
1197    /// Clears out the buffers in preparation for a new frame.
1198    pub fn clear(&mut self) {
1199        self.data_buffer.clear();
1200        self.late_indexed_indirect_parameters_buffer.clear();
1201        self.late_non_indexed_indirect_parameters_buffer.clear();
1202
1203        // Clear each individual set of buffers, but don't depopulate the hash
1204        // table. We want to avoid reallocating these vectors every frame.
1205        for view_work_item_buffers in self.work_item_buffers.values_mut() {
1206            view_work_item_buffers.clear();
1207        }
1208    }
1209}
1210
1211impl<BD> Default for UntypedPhaseBatchedInstanceBuffers<BD>
1212where
1213    BD: GpuArrayBufferable + Sync + Send + 'static,
1214{
1215    fn default() -> Self {
1216        Self::new()
1217    }
1218}
1219
1220/// Information about a render batch that we're building up during a sorted
1221/// render phase.
1222struct SortedRenderBatch<F>
1223where
1224    F: GetBatchData,
1225{
1226    /// The index of the first phase item in this batch in the list of phase
1227    /// items.
1228    phase_item_start_index: u32,
1229
1230    /// The index of the first instance in this batch in the instance buffer.
1231    instance_start_index: u32,
1232
1233    /// True if the mesh in question has an index buffer; false otherwise.
1234    indexed: bool,
1235
1236    /// The index of the indirect parameters for this batch in the
1237    /// [`IndirectParametersBuffers`].
1238    ///
1239    /// If CPU culling is being used, then this will be `None`.
1240    indirect_parameters_index: Option<NonMaxU32>,
1241
1242    /// Metadata that can be used to determine whether an instance can be placed
1243    /// into this batch.
1244    ///
1245    /// If `None`, the item inside is unbatchable.
1246    meta: Option<BatchMeta<F::CompareData>>,
1247}
1248
1249impl<F> SortedRenderBatch<F>
1250where
1251    F: GetBatchData,
1252{
1253    /// Finalizes this batch and updates the [`SortedRenderPhase`] with the
1254    /// appropriate indices.
1255    ///
1256    /// `instance_end_index` is the index of the last instance in this batch
1257    /// plus one.
1258    fn flush<I>(
1259        self,
1260        instance_end_index: u32,
1261        phase: &mut SortedRenderPhase<I>,
1262        phase_indirect_parameters_buffers: &mut UntypedPhaseIndirectParametersBuffers,
1263    ) where
1264        I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
1265    {
1266        let (batch_range, batch_extra_index) =
1267            phase.items[self.phase_item_start_index as usize].batch_range_and_extra_index_mut();
1268        *batch_range = self.instance_start_index..instance_end_index;
1269        *batch_extra_index = match self.indirect_parameters_index {
1270            Some(indirect_parameters_index) => PhaseItemExtraIndex::IndirectParametersIndex {
1271                range: u32::from(indirect_parameters_index)
1272                    ..(u32::from(indirect_parameters_index) + 1),
1273                batch_set_index: None,
1274            },
1275            None => PhaseItemExtraIndex::None,
1276        };
1277        if let Some(indirect_parameters_index) = self.indirect_parameters_index {
1278            phase_indirect_parameters_buffers
1279                .add_batch_set(self.indexed, indirect_parameters_index.into());
1280        }
1281    }
1282}
1283
1284/// A system that runs early in extraction and clears out all the
1285/// [`BatchedInstanceBuffers`] for the frame.
1286///
1287/// We have to run this during extraction because, if GPU preprocessing is in
1288/// use, the extraction phase will write to the mesh input uniform buffers
1289/// directly, so the buffers need to be cleared before then.
1290pub fn clear_batched_gpu_instance_buffers<GFBD>(
1291    gpu_batched_instance_buffers: Option<
1292        ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
1293    >,
1294) where
1295    GFBD: GetFullBatchData,
1296{
1297    // Don't clear the entire table, because that would delete the buffers, and
1298    // we want to reuse those allocations.
1299    if let Some(mut gpu_batched_instance_buffers) = gpu_batched_instance_buffers {
1300        gpu_batched_instance_buffers.clear();
1301    }
1302}
1303
1304/// A system that removes GPU preprocessing work item buffers that correspond to
1305/// deleted [`ExtractedView`]s.
1306///
1307/// This is a separate system from [`clear_batched_gpu_instance_buffers`]
1308/// because [`ExtractedView`]s aren't created until after the extraction phase
1309/// is completed.
1310pub fn delete_old_work_item_buffers<GFBD>(
1311    mut gpu_batched_instance_buffers: ResMut<
1312        BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>,
1313    >,
1314    extracted_views: Query<&ExtractedView>,
1315) where
1316    GFBD: GetFullBatchData,
1317{
1318    let retained_view_entities: HashSet<_> = extracted_views
1319        .iter()
1320        .map(|extracted_view| extracted_view.retained_view_entity)
1321        .collect();
1322    for phase_instance_buffers in gpu_batched_instance_buffers
1323        .phase_instance_buffers
1324        .values_mut()
1325    {
1326        phase_instance_buffers
1327            .work_item_buffers
1328            .retain(|retained_view_entity, _| {
1329                retained_view_entities.contains(retained_view_entity)
1330            });
1331    }
1332}
1333
1334/// Batch the items in a sorted render phase, when GPU instance buffer building
1335/// is in use. This means comparing metadata needed to draw each phase item and
1336/// trying to combine the draws into a batch.
1337pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
1338    mut phase_batched_instance_buffers: ResMut<PhaseBatchedInstanceBuffers<I, GFBD::BufferData>>,
1339    mut phase_indirect_parameters_buffers: ResMut<PhaseIndirectParametersBuffers<I>>,
1340    mut sorted_render_phases: ResMut<ViewSortedRenderPhases<I>>,
1341    mut views: Query<(
1342        &ExtractedView,
1343        Has<NoIndirectDrawing>,
1344        Has<OcclusionCulling>,
1345    )>,
1346    system_param_item: StaticSystemParam<GFBD::Param>,
1347) where
1348    I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
1349    GFBD: GetFullBatchData,
1350{
1351    // We only process GPU-built batch data in this function.
1352    let UntypedPhaseBatchedInstanceBuffers {
1353        ref mut data_buffer,
1354        ref mut work_item_buffers,
1355        ref mut late_indexed_indirect_parameters_buffer,
1356        ref mut late_non_indexed_indirect_parameters_buffer,
1357    } = phase_batched_instance_buffers.buffers;
1358
1359    for (extracted_view, no_indirect_drawing, gpu_occlusion_culling) in &mut views {
1360        let Some(phase) = sorted_render_phases.get_mut(&extracted_view.retained_view_entity) else {
1361            continue;
1362        };
1363
1364        // Create the work item buffer if necessary.
1365        let work_item_buffer = get_or_create_work_item_buffer::<I>(
1366            work_item_buffers,
1367            extracted_view.retained_view_entity,
1368            no_indirect_drawing,
1369            gpu_occlusion_culling,
1370        );
1371
1372        // Initialize those work item buffers in preparation for this new frame.
1373        init_work_item_buffers(
1374            work_item_buffer,
1375            late_indexed_indirect_parameters_buffer,
1376            late_non_indexed_indirect_parameters_buffer,
1377        );
1378
1379        // Walk through the list of phase items, building up batches as we go.
1380        let mut batch: Option<SortedRenderBatch<GFBD>> = None;
1381
1382        for current_index in 0..phase.items.len() {
1383            // Get the index of the input data, and comparison metadata, for
1384            // this entity.
1385            let item = &phase.items[current_index];
1386            let entity = item.main_entity();
1387            let item_is_indexed = item.indexed();
1388            let current_batch_input_index =
1389                GFBD::get_index_and_compare_data(&system_param_item, entity);
1390
1391            // Unpack that index and metadata. Note that it's possible for index
1392            // and/or metadata to not be present, which signifies that this
1393            // entity is unbatchable. In that case, we break the batch here.
1394            // If the index isn't present the item is not part of this pipeline and so will be skipped.
1395            let Some((current_input_index, current_meta)) = current_batch_input_index else {
1396                // Break a batch if we need to.
1397                if let Some(batch) = batch.take() {
1398                    batch.flush(
1399                        data_buffer.len() as u32,
1400                        phase,
1401                        &mut phase_indirect_parameters_buffers.buffers,
1402                    );
1403                }
1404
1405                continue;
1406            };
1407            let current_meta =
1408                current_meta.map(|meta| BatchMeta::new(&phase.items[current_index], meta));
1409
1410            // Determine if this entity can be included in the batch we're
1411            // building up.
1412            let can_batch = batch.as_ref().is_some_and(|batch| {
1413                // `None` for metadata indicates that the items are unbatchable.
1414                match (&current_meta, &batch.meta) {
1415                    (Some(current_meta), Some(batch_meta)) => current_meta == batch_meta,
1416                    (_, _) => false,
1417                }
1418            });
1419
1420            // Make space in the data buffer for this instance.
1421            let output_index = data_buffer.add() as u32;
1422
1423            // If we can't batch, break the existing batch and make a new one.
1424            if !can_batch {
1425                // Break a batch if we need to.
1426                if let Some(batch) = batch.take() {
1427                    batch.flush(
1428                        output_index,
1429                        phase,
1430                        &mut phase_indirect_parameters_buffers.buffers,
1431                    );
1432                }
1433
1434                let indirect_parameters_index = if no_indirect_drawing {
1435                    None
1436                } else if item_is_indexed {
1437                    Some(
1438                        phase_indirect_parameters_buffers
1439                            .buffers
1440                            .indexed
1441                            .allocate(1),
1442                    )
1443                } else {
1444                    Some(
1445                        phase_indirect_parameters_buffers
1446                            .buffers
1447                            .non_indexed
1448                            .allocate(1),
1449                    )
1450                };
1451
1452                // Start a new batch.
1453                if let Some(indirect_parameters_index) = indirect_parameters_index {
1454                    GFBD::write_batch_indirect_parameters_metadata(
1455                        item_is_indexed,
1456                        output_index,
1457                        None,
1458                        &mut phase_indirect_parameters_buffers.buffers,
1459                        indirect_parameters_index,
1460                    );
1461                };
1462
1463                batch = Some(SortedRenderBatch {
1464                    phase_item_start_index: current_index as u32,
1465                    instance_start_index: output_index,
1466                    indexed: item_is_indexed,
1467                    indirect_parameters_index: indirect_parameters_index.and_then(NonMaxU32::new),
1468                    meta: current_meta,
1469                });
1470            }
1471
1472            // Add a new preprocessing work item so that the preprocessing
1473            // shader will copy the per-instance data over.
1474            if let Some(batch) = batch.as_ref() {
1475                work_item_buffer.push(
1476                    item_is_indexed,
1477                    PreprocessWorkItem {
1478                        input_index: current_input_index.into(),
1479                        output_or_indirect_parameters_index: match (
1480                            no_indirect_drawing,
1481                            batch.indirect_parameters_index,
1482                        ) {
1483                            (true, _) => output_index,
1484                            (false, Some(indirect_parameters_index)) => {
1485                                indirect_parameters_index.into()
1486                            }
1487                            (false, None) => 0,
1488                        },
1489                    },
1490                );
1491            }
1492        }
1493
1494        // Flush the final batch if necessary.
1495        if let Some(batch) = batch.take() {
1496            batch.flush(
1497                data_buffer.len() as u32,
1498                phase,
1499                &mut phase_indirect_parameters_buffers.buffers,
1500            );
1501        }
1502    }
1503}
1504
1505/// Creates batches for a render phase that uses bins.
1506pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
1507    mut phase_batched_instance_buffers: ResMut<PhaseBatchedInstanceBuffers<BPI, GFBD::BufferData>>,
1508    phase_indirect_parameters_buffers: ResMut<PhaseIndirectParametersBuffers<BPI>>,
1509    mut binned_render_phases: ResMut<ViewBinnedRenderPhases<BPI>>,
1510    mut views: Query<
1511        (
1512            &ExtractedView,
1513            Has<NoIndirectDrawing>,
1514            Has<OcclusionCulling>,
1515        ),
1516        With<ExtractedView>,
1517    >,
1518    param: StaticSystemParam<GFBD::Param>,
1519) where
1520    BPI: BinnedPhaseItem,
1521    GFBD: GetFullBatchData,
1522{
1523    let system_param_item = param.into_inner();
1524
1525    let phase_indirect_parameters_buffers = phase_indirect_parameters_buffers.into_inner();
1526
1527    let UntypedPhaseBatchedInstanceBuffers {
1528        ref mut data_buffer,
1529        ref mut work_item_buffers,
1530        ref mut late_indexed_indirect_parameters_buffer,
1531        ref mut late_non_indexed_indirect_parameters_buffer,
1532    } = phase_batched_instance_buffers.buffers;
1533
1534    for (extracted_view, no_indirect_drawing, gpu_occlusion_culling) in &mut views {
1535        let Some(phase) = binned_render_phases.get_mut(&extracted_view.retained_view_entity) else {
1536            continue;
1537        };
1538
1539        // Create the work item buffer if necessary; otherwise, just mark it as
1540        // used this frame.
1541        let work_item_buffer = get_or_create_work_item_buffer::<BPI>(
1542            work_item_buffers,
1543            extracted_view.retained_view_entity,
1544            no_indirect_drawing,
1545            gpu_occlusion_culling,
1546        );
1547
1548        // Initialize those work item buffers in preparation for this new frame.
1549        init_work_item_buffers(
1550            work_item_buffer,
1551            late_indexed_indirect_parameters_buffer,
1552            late_non_indexed_indirect_parameters_buffer,
1553        );
1554
1555        // Prepare multidrawables.
1556
1557        if let (
1558            &mut BinnedRenderPhaseBatchSets::MultidrawIndirect(ref mut batch_sets),
1559            &mut PreprocessWorkItemBuffers::Indirect {
1560                indexed: ref mut indexed_work_item_buffer,
1561                non_indexed: ref mut non_indexed_work_item_buffer,
1562                gpu_occlusion_culling: ref mut gpu_occlusion_culling_buffers,
1563            },
1564        ) = (&mut phase.batch_sets, &mut *work_item_buffer)
1565        {
1566            let mut output_index = data_buffer.len() as u32;
1567
1568            // Initialize the state for both indexed and non-indexed meshes.
1569            let mut indexed_preparer: MultidrawableBatchSetPreparer<BPI, GFBD> =
1570                MultidrawableBatchSetPreparer::new(
1571                    phase_indirect_parameters_buffers.buffers.batch_count(true) as u32,
1572                    phase_indirect_parameters_buffers
1573                        .buffers
1574                        .indexed
1575                        .batch_sets
1576                        .len() as u32,
1577                );
1578            let mut non_indexed_preparer: MultidrawableBatchSetPreparer<BPI, GFBD> =
1579                MultidrawableBatchSetPreparer::new(
1580                    phase_indirect_parameters_buffers.buffers.batch_count(false) as u32,
1581                    phase_indirect_parameters_buffers
1582                        .buffers
1583                        .non_indexed
1584                        .batch_sets
1585                        .len() as u32,
1586                );
1587
1588            // Prepare each batch set.
1589            for (batch_set_key, bins) in &phase.multidrawable_meshes {
1590                if batch_set_key.indexed() {
1591                    indexed_preparer.prepare_multidrawable_binned_batch_set(
1592                        bins,
1593                        &mut output_index,
1594                        data_buffer,
1595                        indexed_work_item_buffer,
1596                        &mut phase_indirect_parameters_buffers.buffers.indexed,
1597                        batch_sets,
1598                    );
1599                } else {
1600                    non_indexed_preparer.prepare_multidrawable_binned_batch_set(
1601                        bins,
1602                        &mut output_index,
1603                        data_buffer,
1604                        non_indexed_work_item_buffer,
1605                        &mut phase_indirect_parameters_buffers.buffers.non_indexed,
1606                        batch_sets,
1607                    );
1608                }
1609            }
1610
1611            // Reserve space in the occlusion culling buffers, if necessary.
1612            if let Some(gpu_occlusion_culling_buffers) = gpu_occlusion_culling_buffers {
1613                gpu_occlusion_culling_buffers
1614                    .late_indexed
1615                    .add_multiple(indexed_preparer.work_item_count);
1616                gpu_occlusion_culling_buffers
1617                    .late_non_indexed
1618                    .add_multiple(non_indexed_preparer.work_item_count);
1619            }
1620        }
1621
1622        // Prepare batchables.
1623
1624        for (key, bin) in &phase.batchable_meshes {
1625            let mut batch: Option<BinnedRenderPhaseBatch> = None;
1626            for (&main_entity, &input_index) in bin.entities() {
1627                let output_index = data_buffer.add() as u32;
1628
1629                match batch {
1630                    Some(ref mut batch) => {
1631                        batch.instance_range.end = output_index + 1;
1632
1633                        // Append to the current batch.
1634                        //
1635                        // If we're in indirect mode, then we write the first
1636                        // output index of this batch, so that we have a
1637                        // tightly-packed buffer if GPU culling discards some of
1638                        // the instances. Otherwise, we can just write the
1639                        // output index directly.
1640                        work_item_buffer.push(
1641                            key.0.indexed(),
1642                            PreprocessWorkItem {
1643                                input_index: *input_index,
1644                                output_or_indirect_parameters_index: match (
1645                                    no_indirect_drawing,
1646                                    &batch.extra_index,
1647                                ) {
1648                                    (true, _) => output_index,
1649                                    (
1650                                        false,
1651                                        PhaseItemExtraIndex::IndirectParametersIndex {
1652                                            range: indirect_parameters_range,
1653                                            ..
1654                                        },
1655                                    ) => indirect_parameters_range.start,
1656                                    (false, &PhaseItemExtraIndex::DynamicOffset(_))
1657                                    | (false, &PhaseItemExtraIndex::None) => 0,
1658                                },
1659                            },
1660                        );
1661                    }
1662
1663                    None if !no_indirect_drawing => {
1664                        // Start a new batch, in indirect mode.
1665                        let indirect_parameters_index = phase_indirect_parameters_buffers
1666                            .buffers
1667                            .allocate(key.0.indexed(), 1);
1668                        let batch_set_index = phase_indirect_parameters_buffers
1669                            .buffers
1670                            .get_next_batch_set_index(key.0.indexed());
1671
1672                        GFBD::write_batch_indirect_parameters_metadata(
1673                            key.0.indexed(),
1674                            output_index,
1675                            batch_set_index,
1676                            &mut phase_indirect_parameters_buffers.buffers,
1677                            indirect_parameters_index,
1678                        );
1679                        work_item_buffer.push(
1680                            key.0.indexed(),
1681                            PreprocessWorkItem {
1682                                input_index: *input_index,
1683                                output_or_indirect_parameters_index: indirect_parameters_index,
1684                            },
1685                        );
1686                        batch = Some(BinnedRenderPhaseBatch {
1687                            representative_entity: (Entity::PLACEHOLDER, main_entity),
1688                            instance_range: output_index..output_index + 1,
1689                            extra_index: PhaseItemExtraIndex::IndirectParametersIndex {
1690                                range: indirect_parameters_index..(indirect_parameters_index + 1),
1691                                batch_set_index: None,
1692                            },
1693                        });
1694                    }
1695
1696                    None => {
1697                        // Start a new batch, in direct mode.
1698                        work_item_buffer.push(
1699                            key.0.indexed(),
1700                            PreprocessWorkItem {
1701                                input_index: *input_index,
1702                                output_or_indirect_parameters_index: output_index,
1703                            },
1704                        );
1705                        batch = Some(BinnedRenderPhaseBatch {
1706                            representative_entity: (Entity::PLACEHOLDER, main_entity),
1707                            instance_range: output_index..output_index + 1,
1708                            extra_index: PhaseItemExtraIndex::None,
1709                        });
1710                    }
1711                }
1712            }
1713
1714            if let Some(batch) = batch {
1715                match phase.batch_sets {
1716                    BinnedRenderPhaseBatchSets::DynamicUniforms(_) => {
1717                        error!("Dynamic uniform batch sets shouldn't be used here");
1718                    }
1719                    BinnedRenderPhaseBatchSets::Direct(ref mut vec) => {
1720                        vec.push(batch);
1721                    }
1722                    BinnedRenderPhaseBatchSets::MultidrawIndirect(ref mut vec) => {
1723                        // The Bevy renderer will never mark a mesh as batchable
1724                        // but not multidrawable if multidraw is in use.
1725                        // However, custom render pipelines might do so, such as
1726                        // the `specialized_mesh_pipeline` example.
1727                        vec.push(BinnedRenderPhaseBatchSet {
1728                            first_batch: batch,
1729                            batch_count: 1,
1730                            bin_key: key.1.clone(),
1731                            index: phase_indirect_parameters_buffers
1732                                .buffers
1733                                .batch_set_count(key.0.indexed())
1734                                as u32,
1735                        });
1736                    }
1737                }
1738            }
1739        }
1740
1741        // Prepare unbatchables.
1742        for (key, unbatchables) in &mut phase.unbatchable_meshes {
1743            // Allocate the indirect parameters if necessary.
1744            let mut indirect_parameters_offset = if no_indirect_drawing {
1745                None
1746            } else if key.0.indexed() {
1747                Some(
1748                    phase_indirect_parameters_buffers
1749                        .buffers
1750                        .indexed
1751                        .allocate(unbatchables.entities.len() as u32),
1752                )
1753            } else {
1754                Some(
1755                    phase_indirect_parameters_buffers
1756                        .buffers
1757                        .non_indexed
1758                        .allocate(unbatchables.entities.len() as u32),
1759                )
1760            };
1761
1762            for main_entity in unbatchables.entities.keys() {
1763                let Some(input_index) = GFBD::get_binned_index(&system_param_item, *main_entity)
1764                else {
1765                    continue;
1766                };
1767                let output_index = data_buffer.add() as u32;
1768
1769                if let Some(ref mut indirect_parameters_index) = indirect_parameters_offset {
1770                    // We're in indirect mode, so add an indirect parameters
1771                    // index.
1772                    GFBD::write_batch_indirect_parameters_metadata(
1773                        key.0.indexed(),
1774                        output_index,
1775                        None,
1776                        &mut phase_indirect_parameters_buffers.buffers,
1777                        *indirect_parameters_index,
1778                    );
1779                    work_item_buffer.push(
1780                        key.0.indexed(),
1781                        PreprocessWorkItem {
1782                            input_index: input_index.into(),
1783                            output_or_indirect_parameters_index: *indirect_parameters_index,
1784                        },
1785                    );
1786                    unbatchables
1787                        .buffer_indices
1788                        .add(UnbatchableBinnedEntityIndices {
1789                            instance_index: *indirect_parameters_index,
1790                            extra_index: PhaseItemExtraIndex::IndirectParametersIndex {
1791                                range: *indirect_parameters_index..(*indirect_parameters_index + 1),
1792                                batch_set_index: None,
1793                            },
1794                        });
1795                    phase_indirect_parameters_buffers
1796                        .buffers
1797                        .add_batch_set(key.0.indexed(), *indirect_parameters_index);
1798                    *indirect_parameters_index += 1;
1799                } else {
1800                    work_item_buffer.push(
1801                        key.0.indexed(),
1802                        PreprocessWorkItem {
1803                            input_index: input_index.into(),
1804                            output_or_indirect_parameters_index: output_index,
1805                        },
1806                    );
1807                    unbatchables
1808                        .buffer_indices
1809                        .add(UnbatchableBinnedEntityIndices {
1810                            instance_index: output_index,
1811                            extra_index: PhaseItemExtraIndex::None,
1812                        });
1813                }
1814            }
1815        }
1816    }
1817}
1818
1819/// The state that [`batch_and_prepare_binned_render_phase`] uses to construct
1820/// multidrawable batch sets.
1821///
1822/// The [`batch_and_prepare_binned_render_phase`] system maintains two of these:
1823/// one for indexed meshes and one for non-indexed meshes.
1824struct MultidrawableBatchSetPreparer<BPI, GFBD>
1825where
1826    BPI: BinnedPhaseItem,
1827    GFBD: GetFullBatchData,
1828{
1829    /// The offset in the indirect parameters buffer at which the next indirect
1830    /// parameters will be written.
1831    indirect_parameters_index: u32,
1832    /// The number of batch sets we've built so far for this mesh class.
1833    batch_set_index: u32,
1834    /// The number of work items we've emitted so far for this mesh class.
1835    work_item_count: usize,
1836    phantom: PhantomData<(BPI, GFBD)>,
1837}
1838
1839impl<BPI, GFBD> MultidrawableBatchSetPreparer<BPI, GFBD>
1840where
1841    BPI: BinnedPhaseItem,
1842    GFBD: GetFullBatchData,
1843{
1844    /// Creates a new [`MultidrawableBatchSetPreparer`] that will start writing
1845    /// indirect parameters and batch sets at the given indices.
1846    #[inline]
1847    fn new(initial_indirect_parameters_index: u32, initial_batch_set_index: u32) -> Self {
1848        MultidrawableBatchSetPreparer {
1849            indirect_parameters_index: initial_indirect_parameters_index,
1850            batch_set_index: initial_batch_set_index,
1851            work_item_count: 0,
1852            phantom: PhantomData,
1853        }
1854    }
1855
1856    /// Creates batch sets and writes the GPU data needed to draw all visible
1857    /// entities of one mesh class in the given batch set.
1858    ///
1859    /// The *mesh class* represents whether the mesh has indices or not.
1860    #[inline]
1861    fn prepare_multidrawable_binned_batch_set<IP>(
1862        &mut self,
1863        bins: &IndexMap<BPI::BinKey, RenderBin>,
1864        output_index: &mut u32,
1865        data_buffer: &mut UninitBufferVec<GFBD::BufferData>,
1866        indexed_work_item_buffer: &mut RawBufferVec<PreprocessWorkItem>,
1867        mesh_class_buffers: &mut MeshClassIndirectParametersBuffers<IP>,
1868        batch_sets: &mut Vec<BinnedRenderPhaseBatchSet<BPI::BinKey>>,
1869    ) where
1870        IP: Clone + ShaderSize + WriteInto,
1871    {
1872        let current_indexed_batch_set_index = self.batch_set_index;
1873        let current_output_index = *output_index;
1874
1875        let indirect_parameters_base = self.indirect_parameters_index;
1876
1877        // We're going to write the first entity into the batch set. Do this
1878        // here so that we can preload the bin into cache as a side effect.
1879        let Some((first_bin_key, first_bin)) = bins.iter().next() else {
1880            return;
1881        };
1882        let first_bin_len = first_bin.entities().len();
1883        let first_bin_entity = first_bin
1884            .entities()
1885            .keys()
1886            .next()
1887            .copied()
1888            .unwrap_or(MainEntity::from(Entity::PLACEHOLDER));
1889
1890        // Traverse the batch set, processing each bin.
1891        for bin in bins.values() {
1892            // Record the first output index for this batch, as well as its own
1893            // index.
1894            mesh_class_buffers
1895                .cpu_metadata
1896                .push(IndirectParametersCpuMetadata {
1897                    base_output_index: *output_index,
1898                    batch_set_index: self.batch_set_index,
1899                });
1900
1901            // Traverse the bin, pushing `PreprocessWorkItem`s for each entity
1902            // within it. This is a hot loop, so make it as fast as possible.
1903            for &input_index in bin.entities().values() {
1904                indexed_work_item_buffer.push(PreprocessWorkItem {
1905                    input_index: *input_index,
1906                    output_or_indirect_parameters_index: self.indirect_parameters_index,
1907                });
1908            }
1909
1910            // Reserve space for the appropriate number of entities in the data
1911            // buffer. Also, advance the output index and work item count.
1912            let bin_entity_count = bin.entities().len();
1913            data_buffer.add_multiple(bin_entity_count);
1914            *output_index += bin_entity_count as u32;
1915            self.work_item_count += bin_entity_count;
1916
1917            self.indirect_parameters_index += 1;
1918        }
1919
1920        // Reserve space for the bins in this batch set in the GPU buffers.
1921        let bin_count = bins.len();
1922        mesh_class_buffers.gpu_metadata.add_multiple(bin_count);
1923        mesh_class_buffers.data.add_multiple(bin_count);
1924
1925        // Write the information the GPU will need about this batch set.
1926        mesh_class_buffers.batch_sets.push(IndirectBatchSet {
1927            indirect_parameters_base,
1928            indirect_parameters_count: 0,
1929        });
1930
1931        self.batch_set_index += 1;
1932
1933        // Record the batch set. The render node later processes this record to
1934        // render the batches.
1935        batch_sets.push(BinnedRenderPhaseBatchSet {
1936            first_batch: BinnedRenderPhaseBatch {
1937                representative_entity: (Entity::PLACEHOLDER, first_bin_entity),
1938                instance_range: current_output_index..(current_output_index + first_bin_len as u32),
1939                extra_index: PhaseItemExtraIndex::maybe_indirect_parameters_index(NonMaxU32::new(
1940                    indirect_parameters_base,
1941                )),
1942            },
1943            bin_key: (*first_bin_key).clone(),
1944            batch_count: self.indirect_parameters_index - indirect_parameters_base,
1945            index: current_indexed_batch_set_index,
1946        });
1947    }
1948}
1949
1950/// A system that gathers up the per-phase GPU buffers and inserts them into the
1951/// [`BatchedInstanceBuffers`] and [`IndirectParametersBuffers`] tables.
1952///
1953/// This runs after the [`batch_and_prepare_binned_render_phase`] or
1954/// [`batch_and_prepare_sorted_render_phase`] systems. It takes the per-phase
1955/// [`PhaseBatchedInstanceBuffers`] and [`PhaseIndirectParametersBuffers`]
1956/// resources and inserts them into the global [`BatchedInstanceBuffers`] and
1957/// [`IndirectParametersBuffers`] tables.
1958///
1959/// This system exists so that the [`batch_and_prepare_binned_render_phase`] and
1960/// [`batch_and_prepare_sorted_render_phase`] can run in parallel with one
1961/// another. If those two systems manipulated [`BatchedInstanceBuffers`] and
1962/// [`IndirectParametersBuffers`] directly, then they wouldn't be able to run in
1963/// parallel.
1964pub fn collect_buffers_for_phase<PI, GFBD>(
1965    mut phase_batched_instance_buffers: ResMut<PhaseBatchedInstanceBuffers<PI, GFBD::BufferData>>,
1966    mut phase_indirect_parameters_buffers: ResMut<PhaseIndirectParametersBuffers<PI>>,
1967    mut batched_instance_buffers: ResMut<
1968        BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>,
1969    >,
1970    mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
1971) where
1972    PI: PhaseItem,
1973    GFBD: GetFullBatchData + Send + Sync + 'static,
1974{
1975    // Insert the `PhaseBatchedInstanceBuffers` into the global table. Replace
1976    // the contents of the per-phase resource with the old batched instance
1977    // buffers in order to reuse allocations.
1978    let untyped_phase_batched_instance_buffers =
1979        mem::take(&mut phase_batched_instance_buffers.buffers);
1980    if let Some(mut old_untyped_phase_batched_instance_buffers) = batched_instance_buffers
1981        .phase_instance_buffers
1982        .insert(TypeId::of::<PI>(), untyped_phase_batched_instance_buffers)
1983    {
1984        old_untyped_phase_batched_instance_buffers.clear();
1985        phase_batched_instance_buffers.buffers = old_untyped_phase_batched_instance_buffers;
1986    }
1987
1988    // Insert the `PhaseIndirectParametersBuffers` into the global table.
1989    // Replace the contents of the per-phase resource with the old indirect
1990    // parameters buffers in order to reuse allocations.
1991    let untyped_phase_indirect_parameters_buffers = mem::replace(
1992        &mut phase_indirect_parameters_buffers.buffers,
1993        UntypedPhaseIndirectParametersBuffers::new(
1994            indirect_parameters_buffers.allow_copies_from_indirect_parameter_buffers,
1995        ),
1996    );
1997    if let Some(mut old_untyped_phase_indirect_parameters_buffers) = indirect_parameters_buffers
1998        .insert(
1999            TypeId::of::<PI>(),
2000            untyped_phase_indirect_parameters_buffers,
2001        )
2002    {
2003        old_untyped_phase_indirect_parameters_buffers.clear();
2004        phase_indirect_parameters_buffers.buffers = old_untyped_phase_indirect_parameters_buffers;
2005    }
2006}
2007
2008/// A system that writes all instance buffers to the GPU.
2009pub fn write_batched_instance_buffers<GFBD>(
2010    render_device: Res<RenderDevice>,
2011    render_queue: Res<RenderQueue>,
2012    gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
2013) where
2014    GFBD: GetFullBatchData,
2015{
2016    let BatchedInstanceBuffers {
2017        current_input_buffer,
2018        previous_input_buffer,
2019        phase_instance_buffers,
2020    } = gpu_array_buffer.into_inner();
2021
2022    current_input_buffer
2023        .buffer
2024        .write_buffer(&render_device, &render_queue);
2025    previous_input_buffer
2026        .buffer
2027        .write_buffer(&render_device, &render_queue);
2028
2029    for phase_instance_buffers in phase_instance_buffers.values_mut() {
2030        let UntypedPhaseBatchedInstanceBuffers {
2031            ref mut data_buffer,
2032            ref mut work_item_buffers,
2033            ref mut late_indexed_indirect_parameters_buffer,
2034            ref mut late_non_indexed_indirect_parameters_buffer,
2035        } = *phase_instance_buffers;
2036
2037        data_buffer.write_buffer(&render_device);
2038        late_indexed_indirect_parameters_buffer.write_buffer(&render_device, &render_queue);
2039        late_non_indexed_indirect_parameters_buffer.write_buffer(&render_device, &render_queue);
2040
2041        for phase_work_item_buffers in work_item_buffers.values_mut() {
2042            match *phase_work_item_buffers {
2043                PreprocessWorkItemBuffers::Direct(ref mut buffer_vec) => {
2044                    buffer_vec.write_buffer(&render_device, &render_queue);
2045                }
2046                PreprocessWorkItemBuffers::Indirect {
2047                    ref mut indexed,
2048                    ref mut non_indexed,
2049                    ref mut gpu_occlusion_culling,
2050                } => {
2051                    indexed.write_buffer(&render_device, &render_queue);
2052                    non_indexed.write_buffer(&render_device, &render_queue);
2053
2054                    if let Some(GpuOcclusionCullingWorkItemBuffers {
2055                        ref mut late_indexed,
2056                        ref mut late_non_indexed,
2057                        late_indirect_parameters_indexed_offset: _,
2058                        late_indirect_parameters_non_indexed_offset: _,
2059                    }) = *gpu_occlusion_culling
2060                    {
2061                        if !late_indexed.is_empty() {
2062                            late_indexed.write_buffer(&render_device);
2063                        }
2064                        if !late_non_indexed.is_empty() {
2065                            late_non_indexed.write_buffer(&render_device);
2066                        }
2067                    }
2068                }
2069            }
2070        }
2071    }
2072}
2073
2074pub fn clear_indirect_parameters_buffers(
2075    mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
2076) {
2077    for phase_indirect_parameters_buffers in indirect_parameters_buffers.values_mut() {
2078        phase_indirect_parameters_buffers.clear();
2079    }
2080}
2081
2082pub fn write_indirect_parameters_buffers(
2083    render_device: Res<RenderDevice>,
2084    render_queue: Res<RenderQueue>,
2085    mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
2086) {
2087    for phase_indirect_parameters_buffers in indirect_parameters_buffers.values_mut() {
2088        phase_indirect_parameters_buffers
2089            .indexed
2090            .data
2091            .write_buffer(&render_device);
2092        phase_indirect_parameters_buffers
2093            .non_indexed
2094            .data
2095            .write_buffer(&render_device);
2096
2097        phase_indirect_parameters_buffers
2098            .indexed
2099            .cpu_metadata
2100            .write_buffer(&render_device, &render_queue);
2101        phase_indirect_parameters_buffers
2102            .non_indexed
2103            .cpu_metadata
2104            .write_buffer(&render_device, &render_queue);
2105
2106        phase_indirect_parameters_buffers
2107            .non_indexed
2108            .gpu_metadata
2109            .write_buffer(&render_device);
2110        phase_indirect_parameters_buffers
2111            .indexed
2112            .gpu_metadata
2113            .write_buffer(&render_device);
2114
2115        phase_indirect_parameters_buffers
2116            .indexed
2117            .batch_sets
2118            .write_buffer(&render_device, &render_queue);
2119        phase_indirect_parameters_buffers
2120            .non_indexed
2121            .batch_sets
2122            .write_buffer(&render_device, &render_queue);
2123    }
2124}
2125
2126#[cfg(test)]
2127mod tests {
2128    use super::*;
2129
2130    #[test]
2131    fn instance_buffer_correct_behavior() {
2132        let mut instance_buffer = InstanceInputUniformBuffer::new();
2133
2134        let index = instance_buffer.add(2);
2135        instance_buffer.remove(index);
2136        assert_eq!(instance_buffer.get_unchecked(index), 2);
2137        assert_eq!(instance_buffer.get(index), None);
2138
2139        instance_buffer.add(5);
2140        assert_eq!(instance_buffer.buffer().len(), 1);
2141    }
2142}