bevy_render/batching/
gpu_preprocessing.rs

1//! Batching functionality when GPU preprocessing is in use.
2
3use core::{any::TypeId, marker::PhantomData, mem};
4
5use bevy_app::{App, Plugin};
6use bevy_derive::{Deref, DerefMut};
7use bevy_ecs::{
8    prelude::Entity,
9    query::{Has, With},
10    resource::Resource,
11    schedule::IntoScheduleConfigs as _,
12    system::{Query, Res, ResMut, StaticSystemParam},
13    world::{FromWorld, World},
14};
15use bevy_encase_derive::ShaderType;
16use bevy_math::UVec4;
17use bevy_platform::collections::{hash_map::Entry, HashMap, HashSet};
18use bevy_utils::{default, TypeIdMap};
19use bytemuck::{Pod, Zeroable};
20use encase::{internal::WriteInto, ShaderSize};
21use indexmap::IndexMap;
22use nonmax::NonMaxU32;
23use tracing::{error, info};
24use wgpu::{BindingResource, BufferUsages, DownlevelFlags, Features};
25
26use crate::{
27    experimental::occlusion_culling::OcclusionCulling,
28    render_phase::{
29        BinnedPhaseItem, BinnedRenderPhaseBatch, BinnedRenderPhaseBatchSet,
30        BinnedRenderPhaseBatchSets, CachedRenderPipelinePhaseItem, PhaseItem,
31        PhaseItemBatchSetKey as _, PhaseItemExtraIndex, RenderBin, SortedPhaseItem,
32        SortedRenderPhase, UnbatchableBinnedEntityIndices, ViewBinnedRenderPhases,
33        ViewSortedRenderPhases,
34    },
35    render_resource::{Buffer, GpuArrayBufferable, RawBufferVec, UninitBufferVec},
36    renderer::{RenderAdapter, RenderDevice, RenderQueue},
37    sync_world::MainEntity,
38    view::{ExtractedView, NoIndirectDrawing, RetainedViewEntity},
39    Render, RenderApp, RenderDebugFlags, RenderSet,
40};
41
42use super::{BatchMeta, GetBatchData, GetFullBatchData};
43
44#[derive(Default)]
45pub struct BatchingPlugin {
46    /// Debugging flags that can optionally be set when constructing the renderer.
47    pub debug_flags: RenderDebugFlags,
48}
49
50impl Plugin for BatchingPlugin {
51    fn build(&self, app: &mut App) {
52        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
53            return;
54        };
55
56        render_app
57            .insert_resource(IndirectParametersBuffers::new(
58                self.debug_flags
59                    .contains(RenderDebugFlags::ALLOW_COPIES_FROM_INDIRECT_PARAMETERS),
60            ))
61            .add_systems(
62                Render,
63                write_indirect_parameters_buffers.in_set(RenderSet::PrepareResourcesFlush),
64            )
65            .add_systems(
66                Render,
67                clear_indirect_parameters_buffers.in_set(RenderSet::ManageViews),
68            );
69    }
70
71    fn finish(&self, app: &mut App) {
72        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
73            return;
74        };
75
76        render_app.init_resource::<GpuPreprocessingSupport>();
77    }
78}
79
80/// Records whether GPU preprocessing and/or GPU culling are supported on the
81/// device.
82///
83/// No GPU preprocessing is supported on WebGL because of the lack of compute
84/// shader support.  GPU preprocessing is supported on DirectX 12, but due to [a
85/// `wgpu` limitation] GPU culling is not.
86///
87/// [a `wgpu` limitation]: https://github.com/gfx-rs/wgpu/issues/2471
88#[derive(Clone, Copy, PartialEq, Resource)]
89pub struct GpuPreprocessingSupport {
90    /// The maximum amount of GPU preprocessing available on this platform.
91    pub max_supported_mode: GpuPreprocessingMode,
92}
93
94impl GpuPreprocessingSupport {
95    /// Returns true if this GPU preprocessing support level isn't `None`.
96    #[inline]
97    pub fn is_available(&self) -> bool {
98        self.max_supported_mode != GpuPreprocessingMode::None
99    }
100
101    /// Returns the given GPU preprocessing mode, capped to the current
102    /// preprocessing mode.
103    pub fn min(&self, mode: GpuPreprocessingMode) -> GpuPreprocessingMode {
104        match (self.max_supported_mode, mode) {
105            (GpuPreprocessingMode::None, _) | (_, GpuPreprocessingMode::None) => {
106                GpuPreprocessingMode::None
107            }
108            (mode, GpuPreprocessingMode::Culling) | (GpuPreprocessingMode::Culling, mode) => mode,
109            (GpuPreprocessingMode::PreprocessingOnly, GpuPreprocessingMode::PreprocessingOnly) => {
110                GpuPreprocessingMode::PreprocessingOnly
111            }
112        }
113    }
114
115    /// Returns true if GPU culling is supported on this platform.
116    pub fn is_culling_supported(&self) -> bool {
117        self.max_supported_mode == GpuPreprocessingMode::Culling
118    }
119}
120
121/// The amount of GPU preprocessing (compute and indirect draw) that we do.
122#[derive(Clone, Copy, PartialEq)]
123pub enum GpuPreprocessingMode {
124    /// No GPU preprocessing is in use at all.
125    ///
126    /// This is used when GPU compute isn't available.
127    None,
128
129    /// GPU preprocessing is in use, but GPU culling isn't.
130    ///
131    /// This is used when the [`NoIndirectDrawing`] component is present on the
132    /// camera.
133    PreprocessingOnly,
134
135    /// Both GPU preprocessing and GPU culling are in use.
136    ///
137    /// This is used by default.
138    Culling,
139}
140
141/// The GPU buffers holding the data needed to render batches.
142///
143/// For example, in the 3D PBR pipeline this holds `MeshUniform`s, which are the
144/// `BD` type parameter in that mode.
145///
146/// We have a separate *buffer data input* type (`BDI`) here, which a compute
147/// shader is expected to expand to the full buffer data (`BD`) type. GPU
148/// uniform building is generally faster and uses less system RAM to VRAM bus
149/// bandwidth, but only implemented for some pipelines (for example, not in the
150/// 2D pipeline at present) and only when compute shader is available.
151#[derive(Resource)]
152pub struct BatchedInstanceBuffers<BD, BDI>
153where
154    BD: GpuArrayBufferable + Sync + Send + 'static,
155    BDI: Pod + Default,
156{
157    /// The uniform data inputs for the current frame.
158    ///
159    /// These are uploaded during the extraction phase.
160    pub current_input_buffer: InstanceInputUniformBuffer<BDI>,
161
162    /// The uniform data inputs for the previous frame.
163    ///
164    /// The indices don't generally line up between `current_input_buffer`
165    /// and `previous_input_buffer`, because, among other reasons, entities
166    /// can spawn or despawn between frames. Instead, each current buffer
167    /// data input uniform is expected to contain the index of the
168    /// corresponding buffer data input uniform in this list.
169    pub previous_input_buffer: InstanceInputUniformBuffer<BDI>,
170
171    /// The data needed to render buffers for each phase.
172    ///
173    /// The keys of this map are the type IDs of each phase: e.g. `Opaque3d`,
174    /// `AlphaMask3d`, etc.
175    pub phase_instance_buffers: TypeIdMap<UntypedPhaseBatchedInstanceBuffers<BD>>,
176}
177
178impl<BD, BDI> Default for BatchedInstanceBuffers<BD, BDI>
179where
180    BD: GpuArrayBufferable + Sync + Send + 'static,
181    BDI: Pod + Sync + Send + Default + 'static,
182{
183    fn default() -> Self {
184        BatchedInstanceBuffers {
185            current_input_buffer: InstanceInputUniformBuffer::new(),
186            previous_input_buffer: InstanceInputUniformBuffer::new(),
187            phase_instance_buffers: HashMap::default(),
188        }
189    }
190}
191
192/// The GPU buffers holding the data needed to render batches for a single
193/// phase.
194///
195/// These are split out per phase so that we can run the phases in parallel.
196/// This is the version of the structure that has a type parameter, which
197/// enables Bevy's scheduler to run the batching operations for the different
198/// phases in parallel.
199///
200/// See the documentation for [`BatchedInstanceBuffers`] for more information.
201#[derive(Resource)]
202pub struct PhaseBatchedInstanceBuffers<PI, BD>
203where
204    PI: PhaseItem,
205    BD: GpuArrayBufferable + Sync + Send + 'static,
206{
207    /// The buffers for this phase.
208    pub buffers: UntypedPhaseBatchedInstanceBuffers<BD>,
209    phantom: PhantomData<PI>,
210}
211
212impl<PI, BD> Default for PhaseBatchedInstanceBuffers<PI, BD>
213where
214    PI: PhaseItem,
215    BD: GpuArrayBufferable + Sync + Send + 'static,
216{
217    fn default() -> Self {
218        PhaseBatchedInstanceBuffers {
219            buffers: UntypedPhaseBatchedInstanceBuffers::default(),
220            phantom: PhantomData,
221        }
222    }
223}
224
225/// The GPU buffers holding the data needed to render batches for a single
226/// phase, without a type parameter for that phase.
227///
228/// Since this structure doesn't have a type parameter, it can be placed in
229/// [`BatchedInstanceBuffers::phase_instance_buffers`].
230pub struct UntypedPhaseBatchedInstanceBuffers<BD>
231where
232    BD: GpuArrayBufferable + Sync + Send + 'static,
233{
234    /// A storage area for the buffer data that the GPU compute shader is
235    /// expected to write to.
236    ///
237    /// There will be one entry for each index.
238    pub data_buffer: UninitBufferVec<BD>,
239
240    /// The index of the buffer data in the current input buffer that
241    /// corresponds to each instance.
242    ///
243    /// This is keyed off each view. Each view has a separate buffer.
244    pub work_item_buffers: HashMap<RetainedViewEntity, PreprocessWorkItemBuffers>,
245
246    /// A buffer that holds the number of indexed meshes that weren't visible in
247    /// the previous frame, when GPU occlusion culling is in use.
248    ///
249    /// There's one set of [`LatePreprocessWorkItemIndirectParameters`] per
250    /// view. Bevy uses this value to determine how many threads to dispatch to
251    /// check meshes that weren't visible next frame to see if they became newly
252    /// visible this frame.
253    pub late_indexed_indirect_parameters_buffer:
254        RawBufferVec<LatePreprocessWorkItemIndirectParameters>,
255
256    /// A buffer that holds the number of non-indexed meshes that weren't
257    /// visible in the previous frame, when GPU occlusion culling is in use.
258    ///
259    /// There's one set of [`LatePreprocessWorkItemIndirectParameters`] per
260    /// view. Bevy uses this value to determine how many threads to dispatch to
261    /// check meshes that weren't visible next frame to see if they became newly
262    /// visible this frame.
263    pub late_non_indexed_indirect_parameters_buffer:
264        RawBufferVec<LatePreprocessWorkItemIndirectParameters>,
265}
266
267/// Holds the GPU buffer of instance input data, which is the data about each
268/// mesh instance that the CPU provides.
269///
270/// `BDI` is the *buffer data input* type, which the GPU mesh preprocessing
271/// shader is expected to expand to the full *buffer data* type.
272pub struct InstanceInputUniformBuffer<BDI>
273where
274    BDI: Pod + Default,
275{
276    /// The buffer containing the data that will be uploaded to the GPU.
277    buffer: RawBufferVec<BDI>,
278
279    /// Indices of slots that are free within the buffer.
280    ///
281    /// When adding data, we preferentially overwrite these slots first before
282    /// growing the buffer itself.
283    free_uniform_indices: Vec<u32>,
284}
285
286impl<BDI> InstanceInputUniformBuffer<BDI>
287where
288    BDI: Pod + Default,
289{
290    /// Creates a new, empty buffer.
291    pub fn new() -> InstanceInputUniformBuffer<BDI> {
292        InstanceInputUniformBuffer {
293            buffer: RawBufferVec::new(BufferUsages::STORAGE),
294            free_uniform_indices: vec![],
295        }
296    }
297
298    /// Clears the buffer and entity list out.
299    pub fn clear(&mut self) {
300        self.buffer.clear();
301        self.free_uniform_indices.clear();
302    }
303
304    /// Returns the [`RawBufferVec`] corresponding to this input uniform buffer.
305    #[inline]
306    pub fn buffer(&self) -> &RawBufferVec<BDI> {
307        &self.buffer
308    }
309
310    /// Adds a new piece of buffered data to the uniform buffer and returns its
311    /// index.
312    pub fn add(&mut self, element: BDI) -> u32 {
313        match self.free_uniform_indices.pop() {
314            Some(uniform_index) => {
315                self.buffer.values_mut()[uniform_index as usize] = element;
316                uniform_index
317            }
318            None => self.buffer.push(element) as u32,
319        }
320    }
321
322    /// Removes a piece of buffered data from the uniform buffer.
323    ///
324    /// This simply marks the data as free.
325    pub fn remove(&mut self, uniform_index: u32) {
326        self.free_uniform_indices.push(uniform_index);
327    }
328
329    /// Returns the piece of buffered data at the given index.
330    ///
331    /// Returns [`None`] if the index is out of bounds or the data is removed.
332    pub fn get(&self, uniform_index: u32) -> Option<BDI> {
333        if (uniform_index as usize) >= self.buffer.len()
334            || self.free_uniform_indices.contains(&uniform_index)
335        {
336            None
337        } else {
338            Some(self.get_unchecked(uniform_index))
339        }
340    }
341
342    /// Returns the piece of buffered data at the given index.
343    /// Can return data that has previously been removed.
344    ///
345    /// # Panics
346    /// if `uniform_index` is not in bounds of [`Self::buffer`].
347    pub fn get_unchecked(&self, uniform_index: u32) -> BDI {
348        self.buffer.values()[uniform_index as usize]
349    }
350
351    /// Stores a piece of buffered data at the given index.
352    ///
353    /// # Panics
354    /// if `uniform_index` is not in bounds of [`Self::buffer`].
355    pub fn set(&mut self, uniform_index: u32, element: BDI) {
356        self.buffer.values_mut()[uniform_index as usize] = element;
357    }
358
359    // Ensures that the buffers are nonempty, which the GPU requires before an
360    // upload can take place.
361    pub fn ensure_nonempty(&mut self) {
362        if self.buffer.is_empty() {
363            self.buffer.push(default());
364        }
365    }
366
367    /// Returns the number of instances in this buffer.
368    pub fn len(&self) -> usize {
369        self.buffer.len()
370    }
371
372    /// Returns true if this buffer has no instances or false if it contains any
373    /// instances.
374    pub fn is_empty(&self) -> bool {
375        self.buffer.is_empty()
376    }
377
378    /// Consumes this [`InstanceInputUniformBuffer`] and returns the raw buffer
379    /// ready to be uploaded to the GPU.
380    pub fn into_buffer(self) -> RawBufferVec<BDI> {
381        self.buffer
382    }
383}
384
385impl<BDI> Default for InstanceInputUniformBuffer<BDI>
386where
387    BDI: Pod + Default,
388{
389    fn default() -> Self {
390        Self::new()
391    }
392}
393
394/// The buffer of GPU preprocessing work items for a single view.
395pub enum PreprocessWorkItemBuffers {
396    /// The work items we use if we aren't using indirect drawing.
397    ///
398    /// Because we don't have to separate indexed from non-indexed meshes in
399    /// direct mode, we only have a single buffer here.
400    Direct(RawBufferVec<PreprocessWorkItem>),
401
402    /// The buffer of work items we use if we are using indirect drawing.
403    ///
404    /// We need to separate out indexed meshes from non-indexed meshes in this
405    /// case because the indirect parameters for these two types of meshes have
406    /// different sizes.
407    Indirect {
408        /// The buffer of work items corresponding to indexed meshes.
409        indexed: RawBufferVec<PreprocessWorkItem>,
410        /// The buffer of work items corresponding to non-indexed meshes.
411        non_indexed: RawBufferVec<PreprocessWorkItem>,
412        /// The work item buffers we use when GPU occlusion culling is in use.
413        gpu_occlusion_culling: Option<GpuOcclusionCullingWorkItemBuffers>,
414    },
415}
416
417/// The work item buffers we use when GPU occlusion culling is in use.
418pub struct GpuOcclusionCullingWorkItemBuffers {
419    /// The buffer of work items corresponding to indexed meshes.
420    pub late_indexed: UninitBufferVec<PreprocessWorkItem>,
421    /// The buffer of work items corresponding to non-indexed meshes.
422    pub late_non_indexed: UninitBufferVec<PreprocessWorkItem>,
423    /// The offset into the
424    /// [`UntypedPhaseBatchedInstanceBuffers::late_indexed_indirect_parameters_buffer`]
425    /// where this view's indirect dispatch counts for indexed meshes live.
426    pub late_indirect_parameters_indexed_offset: u32,
427    /// The offset into the
428    /// [`UntypedPhaseBatchedInstanceBuffers::late_non_indexed_indirect_parameters_buffer`]
429    /// where this view's indirect dispatch counts for non-indexed meshes live.
430    pub late_indirect_parameters_non_indexed_offset: u32,
431}
432
433/// A GPU-side data structure that stores the number of workgroups to dispatch
434/// for the second phase of GPU occlusion culling.
435///
436/// The late mesh preprocessing phase checks meshes that weren't visible frame
437/// to determine if they're potentially visible this frame.
438#[derive(Clone, Copy, ShaderType, Pod, Zeroable)]
439#[repr(C)]
440pub struct LatePreprocessWorkItemIndirectParameters {
441    /// The number of workgroups to dispatch.
442    ///
443    /// This will be equal to `work_item_count / 64`, rounded *up*.
444    dispatch_x: u32,
445    /// The number of workgroups along the abstract Y axis to dispatch: always
446    /// 1.
447    dispatch_y: u32,
448    /// The number of workgroups along the abstract Z axis to dispatch: always
449    /// 1.
450    dispatch_z: u32,
451    /// The actual number of work items.
452    ///
453    /// The GPU indirect dispatch doesn't read this, but it's used internally to
454    /// determine the actual number of work items that exist in the late
455    /// preprocessing work item buffer.
456    work_item_count: u32,
457    /// Padding to 64-byte boundaries for some hardware.
458    pad: UVec4,
459}
460
461impl Default for LatePreprocessWorkItemIndirectParameters {
462    fn default() -> LatePreprocessWorkItemIndirectParameters {
463        LatePreprocessWorkItemIndirectParameters {
464            dispatch_x: 0,
465            dispatch_y: 1,
466            dispatch_z: 1,
467            work_item_count: 0,
468            pad: default(),
469        }
470    }
471}
472
473/// Returns the set of work item buffers for the given view, first creating it
474/// if necessary.
475///
476/// Bevy uses work item buffers to tell the mesh preprocessing compute shader
477/// which meshes are to be drawn.
478///
479/// You may need to call this function if you're implementing your own custom
480/// render phases. See the `specialized_mesh_pipeline` example.
481pub fn get_or_create_work_item_buffer<'a, I>(
482    work_item_buffers: &'a mut HashMap<RetainedViewEntity, PreprocessWorkItemBuffers>,
483    view: RetainedViewEntity,
484    no_indirect_drawing: bool,
485    enable_gpu_occlusion_culling: bool,
486) -> &'a mut PreprocessWorkItemBuffers
487where
488    I: 'static,
489{
490    let preprocess_work_item_buffers = match work_item_buffers.entry(view) {
491        Entry::Occupied(occupied_entry) => occupied_entry.into_mut(),
492        Entry::Vacant(vacant_entry) => {
493            if no_indirect_drawing {
494                vacant_entry.insert(PreprocessWorkItemBuffers::Direct(RawBufferVec::new(
495                    BufferUsages::STORAGE,
496                )))
497            } else {
498                vacant_entry.insert(PreprocessWorkItemBuffers::Indirect {
499                    indexed: RawBufferVec::new(BufferUsages::STORAGE),
500                    non_indexed: RawBufferVec::new(BufferUsages::STORAGE),
501                    // We fill this in below if `enable_gpu_occlusion_culling`
502                    // is set.
503                    gpu_occlusion_culling: None,
504                })
505            }
506        }
507    };
508
509    // Initialize the GPU occlusion culling buffers if necessary.
510    if let PreprocessWorkItemBuffers::Indirect {
511        ref mut gpu_occlusion_culling,
512        ..
513    } = *preprocess_work_item_buffers
514    {
515        match (
516            enable_gpu_occlusion_culling,
517            gpu_occlusion_culling.is_some(),
518        ) {
519            (false, false) | (true, true) => {}
520            (false, true) => {
521                *gpu_occlusion_culling = None;
522            }
523            (true, false) => {
524                *gpu_occlusion_culling = Some(GpuOcclusionCullingWorkItemBuffers {
525                    late_indexed: UninitBufferVec::new(BufferUsages::STORAGE),
526                    late_non_indexed: UninitBufferVec::new(BufferUsages::STORAGE),
527                    late_indirect_parameters_indexed_offset: 0,
528                    late_indirect_parameters_non_indexed_offset: 0,
529                });
530            }
531        }
532    }
533
534    preprocess_work_item_buffers
535}
536
537/// Initializes work item buffers for a phase in preparation for a new frame.
538pub fn init_work_item_buffers(
539    work_item_buffers: &mut PreprocessWorkItemBuffers,
540    late_indexed_indirect_parameters_buffer: &'_ mut RawBufferVec<
541        LatePreprocessWorkItemIndirectParameters,
542    >,
543    late_non_indexed_indirect_parameters_buffer: &'_ mut RawBufferVec<
544        LatePreprocessWorkItemIndirectParameters,
545    >,
546) {
547    // Add the offsets for indirect parameters that the late phase of mesh
548    // preprocessing writes to.
549    if let PreprocessWorkItemBuffers::Indirect {
550        gpu_occlusion_culling:
551            Some(GpuOcclusionCullingWorkItemBuffers {
552                ref mut late_indirect_parameters_indexed_offset,
553                ref mut late_indirect_parameters_non_indexed_offset,
554                ..
555            }),
556        ..
557    } = *work_item_buffers
558    {
559        *late_indirect_parameters_indexed_offset = late_indexed_indirect_parameters_buffer
560            .push(LatePreprocessWorkItemIndirectParameters::default())
561            as u32;
562        *late_indirect_parameters_non_indexed_offset = late_non_indexed_indirect_parameters_buffer
563            .push(LatePreprocessWorkItemIndirectParameters::default())
564            as u32;
565    }
566}
567
568impl PreprocessWorkItemBuffers {
569    /// Adds a new work item to the appropriate buffer.
570    ///
571    /// `indexed` specifies whether the work item corresponds to an indexed
572    /// mesh.
573    pub fn push(&mut self, indexed: bool, preprocess_work_item: PreprocessWorkItem) {
574        match *self {
575            PreprocessWorkItemBuffers::Direct(ref mut buffer) => {
576                buffer.push(preprocess_work_item);
577            }
578            PreprocessWorkItemBuffers::Indirect {
579                indexed: ref mut indexed_buffer,
580                non_indexed: ref mut non_indexed_buffer,
581                ref mut gpu_occlusion_culling,
582            } => {
583                if indexed {
584                    indexed_buffer.push(preprocess_work_item);
585                } else {
586                    non_indexed_buffer.push(preprocess_work_item);
587                }
588
589                if let Some(ref mut gpu_occlusion_culling) = *gpu_occlusion_culling {
590                    if indexed {
591                        gpu_occlusion_culling.late_indexed.add();
592                    } else {
593                        gpu_occlusion_culling.late_non_indexed.add();
594                    }
595                }
596            }
597        }
598    }
599
600    /// Clears out the GPU work item buffers in preparation for a new frame.
601    pub fn clear(&mut self) {
602        match *self {
603            PreprocessWorkItemBuffers::Direct(ref mut buffer) => {
604                buffer.clear();
605            }
606            PreprocessWorkItemBuffers::Indirect {
607                indexed: ref mut indexed_buffer,
608                non_indexed: ref mut non_indexed_buffer,
609                ref mut gpu_occlusion_culling,
610            } => {
611                indexed_buffer.clear();
612                non_indexed_buffer.clear();
613
614                if let Some(ref mut gpu_occlusion_culling) = *gpu_occlusion_culling {
615                    gpu_occlusion_culling.late_indexed.clear();
616                    gpu_occlusion_culling.late_non_indexed.clear();
617                    gpu_occlusion_culling.late_indirect_parameters_indexed_offset = 0;
618                    gpu_occlusion_culling.late_indirect_parameters_non_indexed_offset = 0;
619                }
620            }
621        }
622    }
623}
624
625/// One invocation of the preprocessing shader: i.e. one mesh instance in a
626/// view.
627#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
628#[repr(C)]
629pub struct PreprocessWorkItem {
630    /// The index of the batch input data in the input buffer that the shader
631    /// reads from.
632    pub input_index: u32,
633
634    /// In direct mode, the index of the mesh uniform; in indirect mode, the
635    /// index of the [`IndirectParametersGpuMetadata`].
636    ///
637    /// In indirect mode, this is the index of the
638    /// [`IndirectParametersGpuMetadata`] in the
639    /// `IndirectParametersBuffers::indexed_metadata` or
640    /// `IndirectParametersBuffers::non_indexed_metadata`.
641    pub output_or_indirect_parameters_index: u32,
642}
643
644/// The `wgpu` indirect parameters structure that specifies a GPU draw command.
645///
646/// This is the variant for indexed meshes. We generate the instances of this
647/// structure in the `build_indirect_params.wgsl` compute shader.
648#[derive(Clone, Copy, Debug, Pod, Zeroable, ShaderType)]
649#[repr(C)]
650pub struct IndirectParametersIndexed {
651    /// The number of indices that this mesh has.
652    pub index_count: u32,
653    /// The number of instances we are to draw.
654    pub instance_count: u32,
655    /// The offset of the first index for this mesh in the index buffer slab.
656    pub first_index: u32,
657    /// The offset of the first vertex for this mesh in the vertex buffer slab.
658    pub base_vertex: u32,
659    /// The index of the first mesh instance in the `MeshUniform` buffer.
660    pub first_instance: u32,
661}
662
663/// The `wgpu` indirect parameters structure that specifies a GPU draw command.
664///
665/// This is the variant for non-indexed meshes. We generate the instances of
666/// this structure in the `build_indirect_params.wgsl` compute shader.
667#[derive(Clone, Copy, Debug, Pod, Zeroable, ShaderType)]
668#[repr(C)]
669pub struct IndirectParametersNonIndexed {
670    /// The number of vertices that this mesh has.
671    pub vertex_count: u32,
672    /// The number of instances we are to draw.
673    pub instance_count: u32,
674    /// The offset of the first vertex for this mesh in the vertex buffer slab.
675    pub base_vertex: u32,
676    /// The index of the first mesh instance in the `Mesh` buffer.
677    pub first_instance: u32,
678}
679
680/// A structure, initialized on CPU and read on GPU, that contains metadata
681/// about each batch.
682///
683/// Each batch will have one instance of this structure.
684#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
685#[repr(C)]
686pub struct IndirectParametersCpuMetadata {
687    /// The index of the first instance of this mesh in the array of
688    /// `MeshUniform`s.
689    ///
690    /// Note that this is the *first* output index in this batch. Since each
691    /// instance of this structure refers to arbitrarily many instances, the
692    /// `MeshUniform`s corresponding to this batch span the indices
693    /// `base_output_index..(base_output_index + instance_count)`.
694    pub base_output_index: u32,
695
696    /// The index of the batch set that this batch belongs to in the
697    /// [`IndirectBatchSet`] buffer.
698    ///
699    /// A *batch set* is a set of meshes that may be multi-drawn together.
700    /// Multiple batches (and therefore multiple instances of
701    /// [`IndirectParametersGpuMetadata`] structures) can be part of the same
702    /// batch set.
703    pub batch_set_index: u32,
704}
705
706/// A structure, written and read GPU, that records how many instances of each
707/// mesh are actually to be drawn.
708///
709/// The GPU mesh preprocessing shader increments the
710/// [`Self::early_instance_count`] and [`Self::late_instance_count`] as it
711/// determines that meshes are visible.  The indirect parameter building shader
712/// reads this metadata in order to construct the indirect draw parameters.
713///
714/// Each batch will have one instance of this structure.
715#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
716#[repr(C)]
717pub struct IndirectParametersGpuMetadata {
718    /// The index of the first mesh in this batch in the array of
719    /// `MeshInputUniform`s.
720    pub mesh_index: u32,
721
722    /// The number of instances that were judged visible last frame.
723    ///
724    /// The CPU sets this value to 0, and the GPU mesh preprocessing shader
725    /// increments it as it culls mesh instances.
726    pub early_instance_count: u32,
727
728    /// The number of instances that have been judged potentially visible this
729    /// frame that weren't in the last frame's potentially visible set.
730    ///
731    /// The CPU sets this value to 0, and the GPU mesh preprocessing shader
732    /// increments it as it culls mesh instances.
733    pub late_instance_count: u32,
734}
735
736/// A structure, shared between CPU and GPU, that holds the number of on-GPU
737/// indirect draw commands for each *batch set*.
738///
739/// A *batch set* is a set of meshes that may be multi-drawn together.
740///
741/// If the current hardware and driver support `multi_draw_indirect_count`, the
742/// indirect parameters building shader increments
743/// [`Self::indirect_parameters_count`] as it generates indirect parameters. The
744/// `multi_draw_indirect_count` command reads
745/// [`Self::indirect_parameters_count`] in order to determine how many commands
746/// belong to each batch set.
747#[derive(Clone, Copy, Default, Pod, Zeroable, ShaderType)]
748#[repr(C)]
749pub struct IndirectBatchSet {
750    /// The number of indirect parameter commands (i.e. batches) in this batch
751    /// set.
752    ///
753    /// The CPU sets this value to 0 before uploading this structure to GPU. The
754    /// indirect parameters building shader increments this value as it creates
755    /// indirect parameters. Then the `multi_draw_indirect_count` command reads
756    /// this value in order to determine how many indirect draw commands to
757    /// process.
758    pub indirect_parameters_count: u32,
759
760    /// The offset within the `IndirectParametersBuffers::indexed_data` or
761    /// `IndirectParametersBuffers::non_indexed_data` of the first indirect draw
762    /// command for this batch set.
763    ///
764    /// The CPU fills out this value.
765    pub indirect_parameters_base: u32,
766}
767
768/// The buffers containing all the information that indirect draw commands
769/// (`multi_draw_indirect`, `multi_draw_indirect_count`) use to draw the scene.
770///
771/// In addition to the indirect draw buffers themselves, this structure contains
772/// the buffers that store [`IndirectParametersGpuMetadata`], which are the
773/// structures that culling writes to so that the indirect parameter building
774/// pass can determine how many meshes are actually to be drawn.
775///
776/// These buffers will remain empty if indirect drawing isn't in use.
777#[derive(Resource, Deref, DerefMut)]
778pub struct IndirectParametersBuffers {
779    /// A mapping from a phase type ID to the indirect parameters buffers for
780    /// that phase.
781    ///
782    /// Examples of phase type IDs are `Opaque3d` and `AlphaMask3d`.
783    #[deref]
784    pub buffers: TypeIdMap<UntypedPhaseIndirectParametersBuffers>,
785    /// If true, this sets the `COPY_SRC` flag on indirect draw parameters so
786    /// that they can be read back to CPU.
787    ///
788    /// This is a debugging feature that may reduce performance. It primarily
789    /// exists for the `occlusion_culling` example.
790    pub allow_copies_from_indirect_parameter_buffers: bool,
791}
792
793impl IndirectParametersBuffers {
794    /// Initializes a new [`IndirectParametersBuffers`] resource.
795    pub fn new(allow_copies_from_indirect_parameter_buffers: bool) -> IndirectParametersBuffers {
796        IndirectParametersBuffers {
797            buffers: TypeIdMap::default(),
798            allow_copies_from_indirect_parameter_buffers,
799        }
800    }
801}
802
803/// The buffers containing all the information that indirect draw commands use
804/// to draw the scene, for a single phase.
805///
806/// This is the version of the structure that has a type parameter, so that the
807/// batching for different phases can run in parallel.
808///
809/// See the [`IndirectParametersBuffers`] documentation for more information.
810#[derive(Resource)]
811pub struct PhaseIndirectParametersBuffers<PI>
812where
813    PI: PhaseItem,
814{
815    /// The indirect draw buffers for the phase.
816    pub buffers: UntypedPhaseIndirectParametersBuffers,
817    phantom: PhantomData<PI>,
818}
819
820impl<PI> PhaseIndirectParametersBuffers<PI>
821where
822    PI: PhaseItem,
823{
824    pub fn new(allow_copies_from_indirect_parameter_buffers: bool) -> Self {
825        PhaseIndirectParametersBuffers {
826            buffers: UntypedPhaseIndirectParametersBuffers::new(
827                allow_copies_from_indirect_parameter_buffers,
828            ),
829            phantom: PhantomData,
830        }
831    }
832}
833
834/// The buffers containing all the information that indirect draw commands use
835/// to draw the scene, for a single phase.
836///
837/// This is the version of the structure that doesn't have a type parameter, so
838/// that it can be inserted into [`IndirectParametersBuffers::buffers`]
839///
840/// See the [`IndirectParametersBuffers`] documentation for more information.
841pub struct UntypedPhaseIndirectParametersBuffers {
842    /// Information that indirect draw commands use to draw indexed meshes in
843    /// the scene.
844    pub indexed: MeshClassIndirectParametersBuffers<IndirectParametersIndexed>,
845    /// Information that indirect draw commands use to draw non-indexed meshes
846    /// in the scene.
847    pub non_indexed: MeshClassIndirectParametersBuffers<IndirectParametersNonIndexed>,
848}
849
850impl UntypedPhaseIndirectParametersBuffers {
851    /// Creates the indirect parameters buffers.
852    pub fn new(
853        allow_copies_from_indirect_parameter_buffers: bool,
854    ) -> UntypedPhaseIndirectParametersBuffers {
855        let mut indirect_parameter_buffer_usages = BufferUsages::STORAGE | BufferUsages::INDIRECT;
856        if allow_copies_from_indirect_parameter_buffers {
857            indirect_parameter_buffer_usages |= BufferUsages::COPY_SRC;
858        }
859
860        UntypedPhaseIndirectParametersBuffers {
861            non_indexed: MeshClassIndirectParametersBuffers::new(
862                allow_copies_from_indirect_parameter_buffers,
863            ),
864            indexed: MeshClassIndirectParametersBuffers::new(
865                allow_copies_from_indirect_parameter_buffers,
866            ),
867        }
868    }
869
870    /// Reserves space for `count` new batches.
871    ///
872    /// The `indexed` parameter specifies whether the meshes that these batches
873    /// correspond to are indexed or not.
874    pub fn allocate(&mut self, indexed: bool, count: u32) -> u32 {
875        if indexed {
876            self.indexed.allocate(count)
877        } else {
878            self.non_indexed.allocate(count)
879        }
880    }
881
882    /// Returns the number of batches currently allocated.
883    ///
884    /// The `indexed` parameter specifies whether the meshes that these batches
885    /// correspond to are indexed or not.
886    fn batch_count(&self, indexed: bool) -> usize {
887        if indexed {
888            self.indexed.batch_count()
889        } else {
890            self.non_indexed.batch_count()
891        }
892    }
893
894    /// Returns the number of batch sets currently allocated.
895    ///
896    /// The `indexed` parameter specifies whether the meshes that these batch
897    /// sets correspond to are indexed or not.
898    pub fn batch_set_count(&self, indexed: bool) -> usize {
899        if indexed {
900            self.indexed.batch_sets.len()
901        } else {
902            self.non_indexed.batch_sets.len()
903        }
904    }
905
906    /// Adds a new batch set to `Self::indexed_batch_sets` or
907    /// `Self::non_indexed_batch_sets` as appropriate.
908    ///
909    /// `indexed` specifies whether the meshes that these batch sets correspond
910    /// to are indexed or not. `indirect_parameters_base` specifies the offset
911    /// within `Self::indexed_data` or `Self::non_indexed_data` of the first
912    /// batch in this batch set.
913    #[inline]
914    pub fn add_batch_set(&mut self, indexed: bool, indirect_parameters_base: u32) {
915        if indexed {
916            self.indexed.batch_sets.push(IndirectBatchSet {
917                indirect_parameters_base,
918                indirect_parameters_count: 0,
919            });
920        } else {
921            self.non_indexed.batch_sets.push(IndirectBatchSet {
922                indirect_parameters_base,
923                indirect_parameters_count: 0,
924            });
925        }
926    }
927
928    /// Returns the index that a newly-added batch set will have.
929    ///
930    /// The `indexed` parameter specifies whether the meshes in such a batch set
931    /// are indexed or not.
932    pub fn get_next_batch_set_index(&self, indexed: bool) -> Option<NonMaxU32> {
933        NonMaxU32::new(self.batch_set_count(indexed) as u32)
934    }
935
936    /// Clears out the buffers in preparation for a new frame.
937    pub fn clear(&mut self) {
938        self.indexed.clear();
939        self.non_indexed.clear();
940    }
941}
942
943/// The buffers containing all the information that indirect draw commands use
944/// to draw the scene, for a single mesh class (indexed or non-indexed), for a
945/// single phase.
946pub struct MeshClassIndirectParametersBuffers<IP>
947where
948    IP: Clone + ShaderSize + WriteInto,
949{
950    /// The GPU buffer that stores the indirect draw parameters for the meshes.
951    ///
952    /// The indirect parameters building shader writes to this buffer, while the
953    /// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from
954    /// it to perform the draws.
955    data: UninitBufferVec<IP>,
956
957    /// The GPU buffer that holds the data used to construct indirect draw
958    /// parameters for meshes.
959    ///
960    /// The GPU mesh preprocessing shader writes to this buffer, and the
961    /// indirect parameters building shader reads this buffer to construct the
962    /// indirect draw parameters.
963    cpu_metadata: RawBufferVec<IndirectParametersCpuMetadata>,
964
965    /// The GPU buffer that holds data built by the GPU used to construct
966    /// indirect draw parameters for meshes.
967    ///
968    /// The GPU mesh preprocessing shader writes to this buffer, and the
969    /// indirect parameters building shader reads this buffer to construct the
970    /// indirect draw parameters.
971    gpu_metadata: UninitBufferVec<IndirectParametersGpuMetadata>,
972
973    /// The GPU buffer that holds the number of indirect draw commands for each
974    /// phase of each view, for meshes.
975    ///
976    /// The indirect parameters building shader writes to this buffer, and the
977    /// `multi_draw_indirect_count` command reads from it in order to know how
978    /// many indirect draw commands to process.
979    batch_sets: RawBufferVec<IndirectBatchSet>,
980}
981
982impl<IP> MeshClassIndirectParametersBuffers<IP>
983where
984    IP: Clone + ShaderSize + WriteInto,
985{
986    fn new(
987        allow_copies_from_indirect_parameter_buffers: bool,
988    ) -> MeshClassIndirectParametersBuffers<IP> {
989        let mut indirect_parameter_buffer_usages = BufferUsages::STORAGE | BufferUsages::INDIRECT;
990        if allow_copies_from_indirect_parameter_buffers {
991            indirect_parameter_buffer_usages |= BufferUsages::COPY_SRC;
992        }
993
994        MeshClassIndirectParametersBuffers {
995            data: UninitBufferVec::new(indirect_parameter_buffer_usages),
996            cpu_metadata: RawBufferVec::new(BufferUsages::STORAGE),
997            gpu_metadata: UninitBufferVec::new(BufferUsages::STORAGE),
998            batch_sets: RawBufferVec::new(indirect_parameter_buffer_usages),
999        }
1000    }
1001
1002    /// Returns the GPU buffer that stores the indirect draw parameters for
1003    /// indexed meshes.
1004    ///
1005    /// The indirect parameters building shader writes to this buffer, while the
1006    /// `multi_draw_indirect` or `multi_draw_indirect_count` commands read from
1007    /// it to perform the draws.
1008    #[inline]
1009    pub fn data_buffer(&self) -> Option<&Buffer> {
1010        self.data.buffer()
1011    }
1012
1013    /// Returns the GPU buffer that holds the CPU-constructed data used to
1014    /// construct indirect draw parameters for meshes.
1015    ///
1016    /// The CPU writes to this buffer, and the indirect parameters building
1017    /// shader reads this buffer to construct the indirect draw parameters.
1018    #[inline]
1019    pub fn cpu_metadata_buffer(&self) -> Option<&Buffer> {
1020        self.cpu_metadata.buffer()
1021    }
1022
1023    /// Returns the GPU buffer that holds the GPU-constructed data used to
1024    /// construct indirect draw parameters for meshes.
1025    ///
1026    /// The GPU mesh preprocessing shader writes to this buffer, and the
1027    /// indirect parameters building shader reads this buffer to construct the
1028    /// indirect draw parameters.
1029    #[inline]
1030    pub fn gpu_metadata_buffer(&self) -> Option<&Buffer> {
1031        self.gpu_metadata.buffer()
1032    }
1033
1034    /// Returns the GPU buffer that holds the number of indirect draw commands
1035    /// for each phase of each view.
1036    ///
1037    /// The indirect parameters building shader writes to this buffer, and the
1038    /// `multi_draw_indirect_count` command reads from it in order to know how
1039    /// many indirect draw commands to process.
1040    #[inline]
1041    pub fn batch_sets_buffer(&self) -> Option<&Buffer> {
1042        self.batch_sets.buffer()
1043    }
1044
1045    /// Reserves space for `count` new batches.
1046    ///
1047    /// This allocates in the [`Self::cpu_metadata`], [`Self::gpu_metadata`],
1048    /// and [`Self::data`] buffers.
1049    fn allocate(&mut self, count: u32) -> u32 {
1050        let length = self.data.len();
1051        self.cpu_metadata.reserve_internal(count as usize);
1052        self.gpu_metadata.add_multiple(count as usize);
1053        for _ in 0..count {
1054            self.data.add();
1055            self.cpu_metadata
1056                .push(IndirectParametersCpuMetadata::default());
1057        }
1058        length as u32
1059    }
1060
1061    /// Sets the [`IndirectParametersCpuMetadata`] for the mesh at the given
1062    /// index.
1063    pub fn set(&mut self, index: u32, value: IndirectParametersCpuMetadata) {
1064        self.cpu_metadata.set(index, value);
1065    }
1066
1067    /// Returns the number of batches corresponding to meshes that are currently
1068    /// allocated.
1069    #[inline]
1070    pub fn batch_count(&self) -> usize {
1071        self.data.len()
1072    }
1073
1074    /// Clears out all the buffers in preparation for a new frame.
1075    pub fn clear(&mut self) {
1076        self.data.clear();
1077        self.cpu_metadata.clear();
1078        self.gpu_metadata.clear();
1079        self.batch_sets.clear();
1080    }
1081}
1082
1083impl Default for IndirectParametersBuffers {
1084    fn default() -> Self {
1085        // By default, we don't allow GPU indirect parameter mapping, since
1086        // that's a debugging option.
1087        Self::new(false)
1088    }
1089}
1090
1091impl FromWorld for GpuPreprocessingSupport {
1092    fn from_world(world: &mut World) -> Self {
1093        let adapter = world.resource::<RenderAdapter>();
1094        let device = world.resource::<RenderDevice>();
1095
1096        // Filter Android drivers that are incompatible with GPU preprocessing:
1097        // - We filter out Adreno 730 and earlier GPUs (except 720, as it's newer
1098        //   than 730).
1099        // - We filter out Mali GPUs with driver versions lower than 48.
1100        fn is_non_supported_android_device(adapter: &RenderAdapter) -> bool {
1101            crate::get_adreno_model(adapter).is_some_and(|model| model != 720 && model <= 730)
1102                || crate::get_mali_driver_version(adapter).is_some_and(|version| version < 48)
1103        }
1104
1105        let culling_feature_support = device.features().contains(
1106            Features::INDIRECT_FIRST_INSTANCE
1107                | Features::MULTI_DRAW_INDIRECT
1108                | Features::PUSH_CONSTANTS,
1109        );
1110        // Depth downsampling for occlusion culling requires 12 textures
1111        let limit_support = device.limits().max_storage_textures_per_shader_stage >= 12 &&
1112            // Even if the adapter supports compute, we might be simulating a lack of
1113            // compute via device limits (see `WgpuSettingsPriority::WebGL2` and
1114            // `wgpu::Limits::downlevel_webgl2_defaults()`). This will have set all the
1115            // `max_compute_*` limits to zero, so we arbitrarily pick one as a canary.
1116            device.limits().max_compute_workgroup_storage_size != 0;
1117
1118        let downlevel_support = adapter.get_downlevel_capabilities().flags.contains(
1119            DownlevelFlags::COMPUTE_SHADERS |
1120            DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW
1121        );
1122
1123        let max_supported_mode = if device.limits().max_compute_workgroup_size_x == 0
1124            || is_non_supported_android_device(adapter)
1125        {
1126            info!(
1127                "GPU preprocessing is not supported on this device. \
1128                Falling back to CPU preprocessing.",
1129            );
1130            GpuPreprocessingMode::None
1131        } else if !(culling_feature_support && limit_support && downlevel_support) {
1132            info!("Some GPU preprocessing are limited on this device.");
1133            GpuPreprocessingMode::PreprocessingOnly
1134        } else {
1135            info!("GPU preprocessing is fully supported on this device.");
1136            GpuPreprocessingMode::Culling
1137        };
1138
1139        GpuPreprocessingSupport { max_supported_mode }
1140    }
1141}
1142
1143impl<BD, BDI> BatchedInstanceBuffers<BD, BDI>
1144where
1145    BD: GpuArrayBufferable + Sync + Send + 'static,
1146    BDI: Pod + Sync + Send + Default + 'static,
1147{
1148    /// Creates new buffers.
1149    pub fn new() -> Self {
1150        Self::default()
1151    }
1152
1153    /// Clears out the buffers in preparation for a new frame.
1154    pub fn clear(&mut self) {
1155        for phase_instance_buffer in self.phase_instance_buffers.values_mut() {
1156            phase_instance_buffer.clear();
1157        }
1158    }
1159}
1160
1161impl<BD> UntypedPhaseBatchedInstanceBuffers<BD>
1162where
1163    BD: GpuArrayBufferable + Sync + Send + 'static,
1164{
1165    pub fn new() -> Self {
1166        UntypedPhaseBatchedInstanceBuffers {
1167            data_buffer: UninitBufferVec::new(BufferUsages::STORAGE),
1168            work_item_buffers: HashMap::default(),
1169            late_indexed_indirect_parameters_buffer: RawBufferVec::new(
1170                BufferUsages::STORAGE | BufferUsages::INDIRECT,
1171            ),
1172            late_non_indexed_indirect_parameters_buffer: RawBufferVec::new(
1173                BufferUsages::STORAGE | BufferUsages::INDIRECT,
1174            ),
1175        }
1176    }
1177
1178    /// Returns the binding of the buffer that contains the per-instance data.
1179    ///
1180    /// This buffer needs to be filled in via a compute shader.
1181    pub fn instance_data_binding(&self) -> Option<BindingResource> {
1182        self.data_buffer
1183            .buffer()
1184            .map(|buffer| buffer.as_entire_binding())
1185    }
1186
1187    /// Clears out the buffers in preparation for a new frame.
1188    pub fn clear(&mut self) {
1189        self.data_buffer.clear();
1190        self.late_indexed_indirect_parameters_buffer.clear();
1191        self.late_non_indexed_indirect_parameters_buffer.clear();
1192
1193        // Clear each individual set of buffers, but don't depopulate the hash
1194        // table. We want to avoid reallocating these vectors every frame.
1195        for view_work_item_buffers in self.work_item_buffers.values_mut() {
1196            view_work_item_buffers.clear();
1197        }
1198    }
1199}
1200
1201impl<BD> Default for UntypedPhaseBatchedInstanceBuffers<BD>
1202where
1203    BD: GpuArrayBufferable + Sync + Send + 'static,
1204{
1205    fn default() -> Self {
1206        Self::new()
1207    }
1208}
1209
1210/// Information about a render batch that we're building up during a sorted
1211/// render phase.
1212struct SortedRenderBatch<F>
1213where
1214    F: GetBatchData,
1215{
1216    /// The index of the first phase item in this batch in the list of phase
1217    /// items.
1218    phase_item_start_index: u32,
1219
1220    /// The index of the first instance in this batch in the instance buffer.
1221    instance_start_index: u32,
1222
1223    /// True if the mesh in question has an index buffer; false otherwise.
1224    indexed: bool,
1225
1226    /// The index of the indirect parameters for this batch in the
1227    /// [`IndirectParametersBuffers`].
1228    ///
1229    /// If CPU culling is being used, then this will be `None`.
1230    indirect_parameters_index: Option<NonMaxU32>,
1231
1232    /// Metadata that can be used to determine whether an instance can be placed
1233    /// into this batch.
1234    ///
1235    /// If `None`, the item inside is unbatchable.
1236    meta: Option<BatchMeta<F::CompareData>>,
1237}
1238
1239impl<F> SortedRenderBatch<F>
1240where
1241    F: GetBatchData,
1242{
1243    /// Finalizes this batch and updates the [`SortedRenderPhase`] with the
1244    /// appropriate indices.
1245    ///
1246    /// `instance_end_index` is the index of the last instance in this batch
1247    /// plus one.
1248    fn flush<I>(
1249        self,
1250        instance_end_index: u32,
1251        phase: &mut SortedRenderPhase<I>,
1252        phase_indirect_parameters_buffers: &mut UntypedPhaseIndirectParametersBuffers,
1253    ) where
1254        I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
1255    {
1256        let (batch_range, batch_extra_index) =
1257            phase.items[self.phase_item_start_index as usize].batch_range_and_extra_index_mut();
1258        *batch_range = self.instance_start_index..instance_end_index;
1259        *batch_extra_index = match self.indirect_parameters_index {
1260            Some(indirect_parameters_index) => PhaseItemExtraIndex::IndirectParametersIndex {
1261                range: u32::from(indirect_parameters_index)
1262                    ..(u32::from(indirect_parameters_index) + 1),
1263                batch_set_index: None,
1264            },
1265            None => PhaseItemExtraIndex::None,
1266        };
1267        if let Some(indirect_parameters_index) = self.indirect_parameters_index {
1268            phase_indirect_parameters_buffers
1269                .add_batch_set(self.indexed, indirect_parameters_index.into());
1270        }
1271    }
1272}
1273
1274/// A system that runs early in extraction and clears out all the
1275/// [`BatchedInstanceBuffers`] for the frame.
1276///
1277/// We have to run this during extraction because, if GPU preprocessing is in
1278/// use, the extraction phase will write to the mesh input uniform buffers
1279/// directly, so the buffers need to be cleared before then.
1280pub fn clear_batched_gpu_instance_buffers<GFBD>(
1281    gpu_batched_instance_buffers: Option<
1282        ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
1283    >,
1284) where
1285    GFBD: GetFullBatchData,
1286{
1287    // Don't clear the entire table, because that would delete the buffers, and
1288    // we want to reuse those allocations.
1289    if let Some(mut gpu_batched_instance_buffers) = gpu_batched_instance_buffers {
1290        gpu_batched_instance_buffers.clear();
1291    }
1292}
1293
1294/// A system that removes GPU preprocessing work item buffers that correspond to
1295/// deleted [`ExtractedView`]s.
1296///
1297/// This is a separate system from [`clear_batched_gpu_instance_buffers`]
1298/// because [`ExtractedView`]s aren't created until after the extraction phase
1299/// is completed.
1300pub fn delete_old_work_item_buffers<GFBD>(
1301    mut gpu_batched_instance_buffers: ResMut<
1302        BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>,
1303    >,
1304    extracted_views: Query<&ExtractedView>,
1305) where
1306    GFBD: GetFullBatchData,
1307{
1308    let retained_view_entities: HashSet<_> = extracted_views
1309        .iter()
1310        .map(|extracted_view| extracted_view.retained_view_entity)
1311        .collect();
1312    for phase_instance_buffers in gpu_batched_instance_buffers
1313        .phase_instance_buffers
1314        .values_mut()
1315    {
1316        phase_instance_buffers
1317            .work_item_buffers
1318            .retain(|retained_view_entity, _| {
1319                retained_view_entities.contains(retained_view_entity)
1320            });
1321    }
1322}
1323
1324/// Batch the items in a sorted render phase, when GPU instance buffer building
1325/// is in use. This means comparing metadata needed to draw each phase item and
1326/// trying to combine the draws into a batch.
1327pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
1328    mut phase_batched_instance_buffers: ResMut<PhaseBatchedInstanceBuffers<I, GFBD::BufferData>>,
1329    mut phase_indirect_parameters_buffers: ResMut<PhaseIndirectParametersBuffers<I>>,
1330    mut sorted_render_phases: ResMut<ViewSortedRenderPhases<I>>,
1331    mut views: Query<(
1332        &ExtractedView,
1333        Has<NoIndirectDrawing>,
1334        Has<OcclusionCulling>,
1335    )>,
1336    system_param_item: StaticSystemParam<GFBD::Param>,
1337) where
1338    I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
1339    GFBD: GetFullBatchData,
1340{
1341    // We only process GPU-built batch data in this function.
1342    let UntypedPhaseBatchedInstanceBuffers {
1343        ref mut data_buffer,
1344        ref mut work_item_buffers,
1345        ref mut late_indexed_indirect_parameters_buffer,
1346        ref mut late_non_indexed_indirect_parameters_buffer,
1347    } = phase_batched_instance_buffers.buffers;
1348
1349    for (extracted_view, no_indirect_drawing, gpu_occlusion_culling) in &mut views {
1350        let Some(phase) = sorted_render_phases.get_mut(&extracted_view.retained_view_entity) else {
1351            continue;
1352        };
1353
1354        // Create the work item buffer if necessary.
1355        let work_item_buffer = get_or_create_work_item_buffer::<I>(
1356            work_item_buffers,
1357            extracted_view.retained_view_entity,
1358            no_indirect_drawing,
1359            gpu_occlusion_culling,
1360        );
1361
1362        // Initialize those work item buffers in preparation for this new frame.
1363        init_work_item_buffers(
1364            work_item_buffer,
1365            late_indexed_indirect_parameters_buffer,
1366            late_non_indexed_indirect_parameters_buffer,
1367        );
1368
1369        // Walk through the list of phase items, building up batches as we go.
1370        let mut batch: Option<SortedRenderBatch<GFBD>> = None;
1371
1372        for current_index in 0..phase.items.len() {
1373            // Get the index of the input data, and comparison metadata, for
1374            // this entity.
1375            let item = &phase.items[current_index];
1376            let entity = item.main_entity();
1377            let item_is_indexed = item.indexed();
1378            let current_batch_input_index =
1379                GFBD::get_index_and_compare_data(&system_param_item, entity);
1380
1381            // Unpack that index and metadata. Note that it's possible for index
1382            // and/or metadata to not be present, which signifies that this
1383            // entity is unbatchable. In that case, we break the batch here.
1384            // If the index isn't present the item is not part of this pipeline and so will be skipped.
1385            let Some((current_input_index, current_meta)) = current_batch_input_index else {
1386                // Break a batch if we need to.
1387                if let Some(batch) = batch.take() {
1388                    batch.flush(
1389                        data_buffer.len() as u32,
1390                        phase,
1391                        &mut phase_indirect_parameters_buffers.buffers,
1392                    );
1393                }
1394
1395                continue;
1396            };
1397            let current_meta =
1398                current_meta.map(|meta| BatchMeta::new(&phase.items[current_index], meta));
1399
1400            // Determine if this entity can be included in the batch we're
1401            // building up.
1402            let can_batch = batch.as_ref().is_some_and(|batch| {
1403                // `None` for metadata indicates that the items are unbatchable.
1404                match (&current_meta, &batch.meta) {
1405                    (Some(current_meta), Some(batch_meta)) => current_meta == batch_meta,
1406                    (_, _) => false,
1407                }
1408            });
1409
1410            // Make space in the data buffer for this instance.
1411            let output_index = data_buffer.add() as u32;
1412
1413            // If we can't batch, break the existing batch and make a new one.
1414            if !can_batch {
1415                // Break a batch if we need to.
1416                if let Some(batch) = batch.take() {
1417                    batch.flush(
1418                        output_index,
1419                        phase,
1420                        &mut phase_indirect_parameters_buffers.buffers,
1421                    );
1422                }
1423
1424                let indirect_parameters_index = if no_indirect_drawing {
1425                    None
1426                } else if item_is_indexed {
1427                    Some(
1428                        phase_indirect_parameters_buffers
1429                            .buffers
1430                            .indexed
1431                            .allocate(1),
1432                    )
1433                } else {
1434                    Some(
1435                        phase_indirect_parameters_buffers
1436                            .buffers
1437                            .non_indexed
1438                            .allocate(1),
1439                    )
1440                };
1441
1442                // Start a new batch.
1443                if let Some(indirect_parameters_index) = indirect_parameters_index {
1444                    GFBD::write_batch_indirect_parameters_metadata(
1445                        item_is_indexed,
1446                        output_index,
1447                        None,
1448                        &mut phase_indirect_parameters_buffers.buffers,
1449                        indirect_parameters_index,
1450                    );
1451                };
1452
1453                batch = Some(SortedRenderBatch {
1454                    phase_item_start_index: current_index as u32,
1455                    instance_start_index: output_index,
1456                    indexed: item_is_indexed,
1457                    indirect_parameters_index: indirect_parameters_index.and_then(NonMaxU32::new),
1458                    meta: current_meta,
1459                });
1460            }
1461
1462            // Add a new preprocessing work item so that the preprocessing
1463            // shader will copy the per-instance data over.
1464            if let Some(batch) = batch.as_ref() {
1465                work_item_buffer.push(
1466                    item_is_indexed,
1467                    PreprocessWorkItem {
1468                        input_index: current_input_index.into(),
1469                        output_or_indirect_parameters_index: match (
1470                            no_indirect_drawing,
1471                            batch.indirect_parameters_index,
1472                        ) {
1473                            (true, _) => output_index,
1474                            (false, Some(indirect_parameters_index)) => {
1475                                indirect_parameters_index.into()
1476                            }
1477                            (false, None) => 0,
1478                        },
1479                    },
1480                );
1481            }
1482        }
1483
1484        // Flush the final batch if necessary.
1485        if let Some(batch) = batch.take() {
1486            batch.flush(
1487                data_buffer.len() as u32,
1488                phase,
1489                &mut phase_indirect_parameters_buffers.buffers,
1490            );
1491        }
1492    }
1493}
1494
1495/// Creates batches for a render phase that uses bins.
1496pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
1497    mut phase_batched_instance_buffers: ResMut<PhaseBatchedInstanceBuffers<BPI, GFBD::BufferData>>,
1498    phase_indirect_parameters_buffers: ResMut<PhaseIndirectParametersBuffers<BPI>>,
1499    mut binned_render_phases: ResMut<ViewBinnedRenderPhases<BPI>>,
1500    mut views: Query<
1501        (
1502            &ExtractedView,
1503            Has<NoIndirectDrawing>,
1504            Has<OcclusionCulling>,
1505        ),
1506        With<ExtractedView>,
1507    >,
1508    param: StaticSystemParam<GFBD::Param>,
1509) where
1510    BPI: BinnedPhaseItem,
1511    GFBD: GetFullBatchData,
1512{
1513    let system_param_item = param.into_inner();
1514
1515    let phase_indirect_parameters_buffers = phase_indirect_parameters_buffers.into_inner();
1516
1517    let UntypedPhaseBatchedInstanceBuffers {
1518        ref mut data_buffer,
1519        ref mut work_item_buffers,
1520        ref mut late_indexed_indirect_parameters_buffer,
1521        ref mut late_non_indexed_indirect_parameters_buffer,
1522    } = phase_batched_instance_buffers.buffers;
1523
1524    for (extracted_view, no_indirect_drawing, gpu_occlusion_culling) in &mut views {
1525        let Some(phase) = binned_render_phases.get_mut(&extracted_view.retained_view_entity) else {
1526            continue;
1527        };
1528
1529        // Create the work item buffer if necessary; otherwise, just mark it as
1530        // used this frame.
1531        let work_item_buffer = get_or_create_work_item_buffer::<BPI>(
1532            work_item_buffers,
1533            extracted_view.retained_view_entity,
1534            no_indirect_drawing,
1535            gpu_occlusion_culling,
1536        );
1537
1538        // Initialize those work item buffers in preparation for this new frame.
1539        init_work_item_buffers(
1540            work_item_buffer,
1541            late_indexed_indirect_parameters_buffer,
1542            late_non_indexed_indirect_parameters_buffer,
1543        );
1544
1545        // Prepare multidrawables.
1546
1547        if let (
1548            &mut BinnedRenderPhaseBatchSets::MultidrawIndirect(ref mut batch_sets),
1549            &mut PreprocessWorkItemBuffers::Indirect {
1550                indexed: ref mut indexed_work_item_buffer,
1551                non_indexed: ref mut non_indexed_work_item_buffer,
1552                gpu_occlusion_culling: ref mut gpu_occlusion_culling_buffers,
1553            },
1554        ) = (&mut phase.batch_sets, &mut *work_item_buffer)
1555        {
1556            let mut output_index = data_buffer.len() as u32;
1557
1558            // Initialize the state for both indexed and non-indexed meshes.
1559            let mut indexed_preparer: MultidrawableBatchSetPreparer<BPI, GFBD> =
1560                MultidrawableBatchSetPreparer::new(
1561                    phase_indirect_parameters_buffers.buffers.batch_count(true) as u32,
1562                    phase_indirect_parameters_buffers
1563                        .buffers
1564                        .indexed
1565                        .batch_sets
1566                        .len() as u32,
1567                );
1568            let mut non_indexed_preparer: MultidrawableBatchSetPreparer<BPI, GFBD> =
1569                MultidrawableBatchSetPreparer::new(
1570                    phase_indirect_parameters_buffers.buffers.batch_count(false) as u32,
1571                    phase_indirect_parameters_buffers
1572                        .buffers
1573                        .non_indexed
1574                        .batch_sets
1575                        .len() as u32,
1576                );
1577
1578            // Prepare each batch set.
1579            for (batch_set_key, bins) in &phase.multidrawable_meshes {
1580                if batch_set_key.indexed() {
1581                    indexed_preparer.prepare_multidrawable_binned_batch_set(
1582                        bins,
1583                        &mut output_index,
1584                        data_buffer,
1585                        indexed_work_item_buffer,
1586                        &mut phase_indirect_parameters_buffers.buffers.indexed,
1587                        batch_sets,
1588                    );
1589                } else {
1590                    non_indexed_preparer.prepare_multidrawable_binned_batch_set(
1591                        bins,
1592                        &mut output_index,
1593                        data_buffer,
1594                        non_indexed_work_item_buffer,
1595                        &mut phase_indirect_parameters_buffers.buffers.non_indexed,
1596                        batch_sets,
1597                    );
1598                }
1599            }
1600
1601            // Reserve space in the occlusion culling buffers, if necessary.
1602            if let Some(gpu_occlusion_culling_buffers) = gpu_occlusion_culling_buffers {
1603                gpu_occlusion_culling_buffers
1604                    .late_indexed
1605                    .add_multiple(indexed_preparer.work_item_count);
1606                gpu_occlusion_culling_buffers
1607                    .late_non_indexed
1608                    .add_multiple(non_indexed_preparer.work_item_count);
1609            }
1610        }
1611
1612        // Prepare batchables.
1613
1614        for (key, bin) in &phase.batchable_meshes {
1615            let mut batch: Option<BinnedRenderPhaseBatch> = None;
1616            for (&main_entity, &input_index) in bin.entities() {
1617                let output_index = data_buffer.add() as u32;
1618
1619                match batch {
1620                    Some(ref mut batch) => {
1621                        batch.instance_range.end = output_index + 1;
1622
1623                        // Append to the current batch.
1624                        //
1625                        // If we're in indirect mode, then we write the first
1626                        // output index of this batch, so that we have a
1627                        // tightly-packed buffer if GPU culling discards some of
1628                        // the instances. Otherwise, we can just write the
1629                        // output index directly.
1630                        work_item_buffer.push(
1631                            key.0.indexed(),
1632                            PreprocessWorkItem {
1633                                input_index: *input_index,
1634                                output_or_indirect_parameters_index: match (
1635                                    no_indirect_drawing,
1636                                    &batch.extra_index,
1637                                ) {
1638                                    (true, _) => output_index,
1639                                    (
1640                                        false,
1641                                        PhaseItemExtraIndex::IndirectParametersIndex {
1642                                            range: indirect_parameters_range,
1643                                            ..
1644                                        },
1645                                    ) => indirect_parameters_range.start,
1646                                    (false, &PhaseItemExtraIndex::DynamicOffset(_))
1647                                    | (false, &PhaseItemExtraIndex::None) => 0,
1648                                },
1649                            },
1650                        );
1651                    }
1652
1653                    None if !no_indirect_drawing => {
1654                        // Start a new batch, in indirect mode.
1655                        let indirect_parameters_index = phase_indirect_parameters_buffers
1656                            .buffers
1657                            .allocate(key.0.indexed(), 1);
1658                        let batch_set_index = phase_indirect_parameters_buffers
1659                            .buffers
1660                            .get_next_batch_set_index(key.0.indexed());
1661
1662                        GFBD::write_batch_indirect_parameters_metadata(
1663                            key.0.indexed(),
1664                            output_index,
1665                            batch_set_index,
1666                            &mut phase_indirect_parameters_buffers.buffers,
1667                            indirect_parameters_index,
1668                        );
1669                        work_item_buffer.push(
1670                            key.0.indexed(),
1671                            PreprocessWorkItem {
1672                                input_index: *input_index,
1673                                output_or_indirect_parameters_index: indirect_parameters_index,
1674                            },
1675                        );
1676                        batch = Some(BinnedRenderPhaseBatch {
1677                            representative_entity: (Entity::PLACEHOLDER, main_entity),
1678                            instance_range: output_index..output_index + 1,
1679                            extra_index: PhaseItemExtraIndex::IndirectParametersIndex {
1680                                range: indirect_parameters_index..(indirect_parameters_index + 1),
1681                                batch_set_index: None,
1682                            },
1683                        });
1684                    }
1685
1686                    None => {
1687                        // Start a new batch, in direct mode.
1688                        work_item_buffer.push(
1689                            key.0.indexed(),
1690                            PreprocessWorkItem {
1691                                input_index: *input_index,
1692                                output_or_indirect_parameters_index: output_index,
1693                            },
1694                        );
1695                        batch = Some(BinnedRenderPhaseBatch {
1696                            representative_entity: (Entity::PLACEHOLDER, main_entity),
1697                            instance_range: output_index..output_index + 1,
1698                            extra_index: PhaseItemExtraIndex::None,
1699                        });
1700                    }
1701                }
1702            }
1703
1704            if let Some(batch) = batch {
1705                match phase.batch_sets {
1706                    BinnedRenderPhaseBatchSets::DynamicUniforms(_) => {
1707                        error!("Dynamic uniform batch sets shouldn't be used here");
1708                    }
1709                    BinnedRenderPhaseBatchSets::Direct(ref mut vec) => {
1710                        vec.push(batch);
1711                    }
1712                    BinnedRenderPhaseBatchSets::MultidrawIndirect(ref mut vec) => {
1713                        // The Bevy renderer will never mark a mesh as batchable
1714                        // but not multidrawable if multidraw is in use.
1715                        // However, custom render pipelines might do so, such as
1716                        // the `specialized_mesh_pipeline` example.
1717                        vec.push(BinnedRenderPhaseBatchSet {
1718                            first_batch: batch,
1719                            batch_count: 1,
1720                            bin_key: key.1.clone(),
1721                            index: phase_indirect_parameters_buffers
1722                                .buffers
1723                                .batch_set_count(key.0.indexed())
1724                                as u32,
1725                        });
1726                    }
1727                }
1728            }
1729        }
1730
1731        // Prepare unbatchables.
1732        for (key, unbatchables) in &mut phase.unbatchable_meshes {
1733            // Allocate the indirect parameters if necessary.
1734            let mut indirect_parameters_offset = if no_indirect_drawing {
1735                None
1736            } else if key.0.indexed() {
1737                Some(
1738                    phase_indirect_parameters_buffers
1739                        .buffers
1740                        .indexed
1741                        .allocate(unbatchables.entities.len() as u32),
1742                )
1743            } else {
1744                Some(
1745                    phase_indirect_parameters_buffers
1746                        .buffers
1747                        .non_indexed
1748                        .allocate(unbatchables.entities.len() as u32),
1749                )
1750            };
1751
1752            for main_entity in unbatchables.entities.keys() {
1753                let Some(input_index) = GFBD::get_binned_index(&system_param_item, *main_entity)
1754                else {
1755                    continue;
1756                };
1757                let output_index = data_buffer.add() as u32;
1758
1759                if let Some(ref mut indirect_parameters_index) = indirect_parameters_offset {
1760                    // We're in indirect mode, so add an indirect parameters
1761                    // index.
1762                    GFBD::write_batch_indirect_parameters_metadata(
1763                        key.0.indexed(),
1764                        output_index,
1765                        None,
1766                        &mut phase_indirect_parameters_buffers.buffers,
1767                        *indirect_parameters_index,
1768                    );
1769                    work_item_buffer.push(
1770                        key.0.indexed(),
1771                        PreprocessWorkItem {
1772                            input_index: input_index.into(),
1773                            output_or_indirect_parameters_index: *indirect_parameters_index,
1774                        },
1775                    );
1776                    unbatchables
1777                        .buffer_indices
1778                        .add(UnbatchableBinnedEntityIndices {
1779                            instance_index: *indirect_parameters_index,
1780                            extra_index: PhaseItemExtraIndex::IndirectParametersIndex {
1781                                range: *indirect_parameters_index..(*indirect_parameters_index + 1),
1782                                batch_set_index: None,
1783                            },
1784                        });
1785                    phase_indirect_parameters_buffers
1786                        .buffers
1787                        .add_batch_set(key.0.indexed(), *indirect_parameters_index);
1788                    *indirect_parameters_index += 1;
1789                } else {
1790                    work_item_buffer.push(
1791                        key.0.indexed(),
1792                        PreprocessWorkItem {
1793                            input_index: input_index.into(),
1794                            output_or_indirect_parameters_index: output_index,
1795                        },
1796                    );
1797                    unbatchables
1798                        .buffer_indices
1799                        .add(UnbatchableBinnedEntityIndices {
1800                            instance_index: output_index,
1801                            extra_index: PhaseItemExtraIndex::None,
1802                        });
1803                }
1804            }
1805        }
1806    }
1807}
1808
1809/// The state that [`batch_and_prepare_binned_render_phase`] uses to construct
1810/// multidrawable batch sets.
1811///
1812/// The [`batch_and_prepare_binned_render_phase`] system maintains two of these:
1813/// one for indexed meshes and one for non-indexed meshes.
1814struct MultidrawableBatchSetPreparer<BPI, GFBD>
1815where
1816    BPI: BinnedPhaseItem,
1817    GFBD: GetFullBatchData,
1818{
1819    /// The offset in the indirect parameters buffer at which the next indirect
1820    /// parameters will be written.
1821    indirect_parameters_index: u32,
1822    /// The number of batch sets we've built so far for this mesh class.
1823    batch_set_index: u32,
1824    /// The number of work items we've emitted so far for this mesh class.
1825    work_item_count: usize,
1826    phantom: PhantomData<(BPI, GFBD)>,
1827}
1828
1829impl<BPI, GFBD> MultidrawableBatchSetPreparer<BPI, GFBD>
1830where
1831    BPI: BinnedPhaseItem,
1832    GFBD: GetFullBatchData,
1833{
1834    /// Creates a new [`MultidrawableBatchSetPreparer`] that will start writing
1835    /// indirect parameters and batch sets at the given indices.
1836    #[inline]
1837    fn new(initial_indirect_parameters_index: u32, initial_batch_set_index: u32) -> Self {
1838        MultidrawableBatchSetPreparer {
1839            indirect_parameters_index: initial_indirect_parameters_index,
1840            batch_set_index: initial_batch_set_index,
1841            work_item_count: 0,
1842            phantom: PhantomData,
1843        }
1844    }
1845
1846    /// Creates batch sets and writes the GPU data needed to draw all visible
1847    /// entities of one mesh class in the given batch set.
1848    ///
1849    /// The *mesh class* represents whether the mesh has indices or not.
1850    #[inline]
1851    fn prepare_multidrawable_binned_batch_set<IP>(
1852        &mut self,
1853        bins: &IndexMap<BPI::BinKey, RenderBin>,
1854        output_index: &mut u32,
1855        data_buffer: &mut UninitBufferVec<GFBD::BufferData>,
1856        indexed_work_item_buffer: &mut RawBufferVec<PreprocessWorkItem>,
1857        mesh_class_buffers: &mut MeshClassIndirectParametersBuffers<IP>,
1858        batch_sets: &mut Vec<BinnedRenderPhaseBatchSet<BPI::BinKey>>,
1859    ) where
1860        IP: Clone + ShaderSize + WriteInto,
1861    {
1862        let current_indexed_batch_set_index = self.batch_set_index;
1863        let current_output_index = *output_index;
1864
1865        let indirect_parameters_base = self.indirect_parameters_index;
1866
1867        // We're going to write the first entity into the batch set. Do this
1868        // here so that we can preload the bin into cache as a side effect.
1869        let Some((first_bin_key, first_bin)) = bins.iter().next() else {
1870            return;
1871        };
1872        let first_bin_len = first_bin.entities().len();
1873        let first_bin_entity = first_bin
1874            .entities()
1875            .keys()
1876            .next()
1877            .copied()
1878            .unwrap_or(MainEntity::from(Entity::PLACEHOLDER));
1879
1880        // Traverse the batch set, processing each bin.
1881        for bin in bins.values() {
1882            // Record the first output index for this batch, as well as its own
1883            // index.
1884            mesh_class_buffers
1885                .cpu_metadata
1886                .push(IndirectParametersCpuMetadata {
1887                    base_output_index: *output_index,
1888                    batch_set_index: self.batch_set_index,
1889                });
1890
1891            // Traverse the bin, pushing `PreprocessWorkItem`s for each entity
1892            // within it. This is a hot loop, so make it as fast as possible.
1893            for &input_index in bin.entities().values() {
1894                indexed_work_item_buffer.push(PreprocessWorkItem {
1895                    input_index: *input_index,
1896                    output_or_indirect_parameters_index: self.indirect_parameters_index,
1897                });
1898            }
1899
1900            // Reserve space for the appropriate number of entities in the data
1901            // buffer. Also, advance the output index and work item count.
1902            let bin_entity_count = bin.entities().len();
1903            data_buffer.add_multiple(bin_entity_count);
1904            *output_index += bin_entity_count as u32;
1905            self.work_item_count += bin_entity_count;
1906
1907            self.indirect_parameters_index += 1;
1908        }
1909
1910        // Reserve space for the bins in this batch set in the GPU buffers.
1911        let bin_count = bins.len();
1912        mesh_class_buffers.gpu_metadata.add_multiple(bin_count);
1913        mesh_class_buffers.data.add_multiple(bin_count);
1914
1915        // Write the information the GPU will need about this batch set.
1916        mesh_class_buffers.batch_sets.push(IndirectBatchSet {
1917            indirect_parameters_base,
1918            indirect_parameters_count: 0,
1919        });
1920
1921        self.batch_set_index += 1;
1922
1923        // Record the batch set. The render node later processes this record to
1924        // render the batches.
1925        batch_sets.push(BinnedRenderPhaseBatchSet {
1926            first_batch: BinnedRenderPhaseBatch {
1927                representative_entity: (Entity::PLACEHOLDER, first_bin_entity),
1928                instance_range: current_output_index..(current_output_index + first_bin_len as u32),
1929                extra_index: PhaseItemExtraIndex::maybe_indirect_parameters_index(NonMaxU32::new(
1930                    indirect_parameters_base,
1931                )),
1932            },
1933            bin_key: (*first_bin_key).clone(),
1934            batch_count: self.indirect_parameters_index - indirect_parameters_base,
1935            index: current_indexed_batch_set_index,
1936        });
1937    }
1938}
1939
1940/// A system that gathers up the per-phase GPU buffers and inserts them into the
1941/// [`BatchedInstanceBuffers`] and [`IndirectParametersBuffers`] tables.
1942///
1943/// This runs after the [`batch_and_prepare_binned_render_phase`] or
1944/// [`batch_and_prepare_sorted_render_phase`] systems. It takes the per-phase
1945/// [`PhaseBatchedInstanceBuffers`] and [`PhaseIndirectParametersBuffers`]
1946/// resources and inserts them into the global [`BatchedInstanceBuffers`] and
1947/// [`IndirectParametersBuffers`] tables.
1948///
1949/// This system exists so that the [`batch_and_prepare_binned_render_phase`] and
1950/// [`batch_and_prepare_sorted_render_phase`] can run in parallel with one
1951/// another. If those two systems manipulated [`BatchedInstanceBuffers`] and
1952/// [`IndirectParametersBuffers`] directly, then they wouldn't be able to run in
1953/// parallel.
1954pub fn collect_buffers_for_phase<PI, GFBD>(
1955    mut phase_batched_instance_buffers: ResMut<PhaseBatchedInstanceBuffers<PI, GFBD::BufferData>>,
1956    mut phase_indirect_parameters_buffers: ResMut<PhaseIndirectParametersBuffers<PI>>,
1957    mut batched_instance_buffers: ResMut<
1958        BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>,
1959    >,
1960    mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
1961) where
1962    PI: PhaseItem,
1963    GFBD: GetFullBatchData + Send + Sync + 'static,
1964{
1965    // Insert the `PhaseBatchedInstanceBuffers` into the global table. Replace
1966    // the contents of the per-phase resource with the old batched instance
1967    // buffers in order to reuse allocations.
1968    let untyped_phase_batched_instance_buffers =
1969        mem::take(&mut phase_batched_instance_buffers.buffers);
1970    if let Some(mut old_untyped_phase_batched_instance_buffers) = batched_instance_buffers
1971        .phase_instance_buffers
1972        .insert(TypeId::of::<PI>(), untyped_phase_batched_instance_buffers)
1973    {
1974        old_untyped_phase_batched_instance_buffers.clear();
1975        phase_batched_instance_buffers.buffers = old_untyped_phase_batched_instance_buffers;
1976    }
1977
1978    // Insert the `PhaseIndirectParametersBuffers` into the global table.
1979    // Replace the contents of the per-phase resource with the old indirect
1980    // parameters buffers in order to reuse allocations.
1981    let untyped_phase_indirect_parameters_buffers = mem::replace(
1982        &mut phase_indirect_parameters_buffers.buffers,
1983        UntypedPhaseIndirectParametersBuffers::new(
1984            indirect_parameters_buffers.allow_copies_from_indirect_parameter_buffers,
1985        ),
1986    );
1987    if let Some(mut old_untyped_phase_indirect_parameters_buffers) = indirect_parameters_buffers
1988        .insert(
1989            TypeId::of::<PI>(),
1990            untyped_phase_indirect_parameters_buffers,
1991        )
1992    {
1993        old_untyped_phase_indirect_parameters_buffers.clear();
1994        phase_indirect_parameters_buffers.buffers = old_untyped_phase_indirect_parameters_buffers;
1995    }
1996}
1997
1998/// A system that writes all instance buffers to the GPU.
1999pub fn write_batched_instance_buffers<GFBD>(
2000    render_device: Res<RenderDevice>,
2001    render_queue: Res<RenderQueue>,
2002    gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
2003) where
2004    GFBD: GetFullBatchData,
2005{
2006    let BatchedInstanceBuffers {
2007        current_input_buffer,
2008        previous_input_buffer,
2009        phase_instance_buffers,
2010    } = gpu_array_buffer.into_inner();
2011
2012    current_input_buffer
2013        .buffer
2014        .write_buffer(&render_device, &render_queue);
2015    previous_input_buffer
2016        .buffer
2017        .write_buffer(&render_device, &render_queue);
2018
2019    for phase_instance_buffers in phase_instance_buffers.values_mut() {
2020        let UntypedPhaseBatchedInstanceBuffers {
2021            ref mut data_buffer,
2022            ref mut work_item_buffers,
2023            ref mut late_indexed_indirect_parameters_buffer,
2024            ref mut late_non_indexed_indirect_parameters_buffer,
2025        } = *phase_instance_buffers;
2026
2027        data_buffer.write_buffer(&render_device);
2028        late_indexed_indirect_parameters_buffer.write_buffer(&render_device, &render_queue);
2029        late_non_indexed_indirect_parameters_buffer.write_buffer(&render_device, &render_queue);
2030
2031        for phase_work_item_buffers in work_item_buffers.values_mut() {
2032            match *phase_work_item_buffers {
2033                PreprocessWorkItemBuffers::Direct(ref mut buffer_vec) => {
2034                    buffer_vec.write_buffer(&render_device, &render_queue);
2035                }
2036                PreprocessWorkItemBuffers::Indirect {
2037                    ref mut indexed,
2038                    ref mut non_indexed,
2039                    ref mut gpu_occlusion_culling,
2040                } => {
2041                    indexed.write_buffer(&render_device, &render_queue);
2042                    non_indexed.write_buffer(&render_device, &render_queue);
2043
2044                    if let Some(GpuOcclusionCullingWorkItemBuffers {
2045                        ref mut late_indexed,
2046                        ref mut late_non_indexed,
2047                        late_indirect_parameters_indexed_offset: _,
2048                        late_indirect_parameters_non_indexed_offset: _,
2049                    }) = *gpu_occlusion_culling
2050                    {
2051                        if !late_indexed.is_empty() {
2052                            late_indexed.write_buffer(&render_device);
2053                        }
2054                        if !late_non_indexed.is_empty() {
2055                            late_non_indexed.write_buffer(&render_device);
2056                        }
2057                    }
2058                }
2059            }
2060        }
2061    }
2062}
2063
2064pub fn clear_indirect_parameters_buffers(
2065    mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
2066) {
2067    for phase_indirect_parameters_buffers in indirect_parameters_buffers.values_mut() {
2068        phase_indirect_parameters_buffers.clear();
2069    }
2070}
2071
2072pub fn write_indirect_parameters_buffers(
2073    render_device: Res<RenderDevice>,
2074    render_queue: Res<RenderQueue>,
2075    mut indirect_parameters_buffers: ResMut<IndirectParametersBuffers>,
2076) {
2077    for phase_indirect_parameters_buffers in indirect_parameters_buffers.values_mut() {
2078        phase_indirect_parameters_buffers
2079            .indexed
2080            .data
2081            .write_buffer(&render_device);
2082        phase_indirect_parameters_buffers
2083            .non_indexed
2084            .data
2085            .write_buffer(&render_device);
2086
2087        phase_indirect_parameters_buffers
2088            .indexed
2089            .cpu_metadata
2090            .write_buffer(&render_device, &render_queue);
2091        phase_indirect_parameters_buffers
2092            .non_indexed
2093            .cpu_metadata
2094            .write_buffer(&render_device, &render_queue);
2095
2096        phase_indirect_parameters_buffers
2097            .non_indexed
2098            .gpu_metadata
2099            .write_buffer(&render_device);
2100        phase_indirect_parameters_buffers
2101            .indexed
2102            .gpu_metadata
2103            .write_buffer(&render_device);
2104
2105        phase_indirect_parameters_buffers
2106            .indexed
2107            .batch_sets
2108            .write_buffer(&render_device, &render_queue);
2109        phase_indirect_parameters_buffers
2110            .non_indexed
2111            .batch_sets
2112            .write_buffer(&render_device, &render_queue);
2113    }
2114}
2115
2116#[cfg(test)]
2117mod tests {
2118    use super::*;
2119
2120    #[test]
2121    fn instance_buffer_correct_behavior() {
2122        let mut instance_buffer = InstanceInputUniformBuffer::new();
2123
2124        let index = instance_buffer.add(2);
2125        instance_buffer.remove(index);
2126        assert_eq!(instance_buffer.get_unchecked(index), 2);
2127        assert_eq!(instance_buffer.get(index), None);
2128
2129        instance_buffer.add(5);
2130        assert_eq!(instance_buffer.buffer().len(), 1);
2131    }
2132}