bevy_render/batching/
mod.rs

1use bevy_ecs::{
2    component::Component,
3    entity::Entity,
4    system::{ResMut, SystemParam, SystemParamItem},
5};
6use bytemuck::Pod;
7use gpu_preprocessing::UntypedPhaseIndirectParametersBuffers;
8use nonmax::NonMaxU32;
9
10use crate::{
11    render_phase::{
12        BinnedPhaseItem, CachedRenderPipelinePhaseItem, DrawFunctionId, PhaseItemExtraIndex,
13        SortedPhaseItem, SortedRenderPhase, ViewBinnedRenderPhases,
14    },
15    render_resource::{CachedRenderPipelineId, GpuArrayBufferable},
16    sync_world::MainEntity,
17};
18
19pub mod gpu_preprocessing;
20pub mod no_gpu_preprocessing;
21
22/// Add this component to mesh entities to disable automatic batching
23#[derive(Component, Default)]
24pub struct NoAutomaticBatching;
25
26/// Data necessary to be equal for two draw commands to be mergeable
27///
28/// This is based on the following assumptions:
29/// - Only entities with prepared assets (pipelines, materials, meshes) are
30///   queued to phases
31/// - View bindings are constant across a phase for a given draw function as
32///   phases are per-view
33/// - `batch_and_prepare_render_phase` is the only system that performs this
34///   batching and has sole responsibility for preparing the per-object data.
35///   As such the mesh binding and dynamic offsets are assumed to only be
36///   variable as a result of the `batch_and_prepare_render_phase` system, e.g.
37///   due to having to split data across separate uniform bindings within the
38///   same buffer due to the maximum uniform buffer binding size.
39#[derive(PartialEq)]
40struct BatchMeta<T: PartialEq> {
41    /// The pipeline id encompasses all pipeline configuration including vertex
42    /// buffers and layouts, shaders and their specializations, bind group
43    /// layouts, etc.
44    pipeline_id: CachedRenderPipelineId,
45    /// The draw function id defines the `RenderCommands` that are called to
46    /// set the pipeline and bindings, and make the draw command
47    draw_function_id: DrawFunctionId,
48    dynamic_offset: Option<NonMaxU32>,
49    user_data: T,
50}
51
52impl<T: PartialEq> BatchMeta<T> {
53    fn new(item: &impl CachedRenderPipelinePhaseItem, user_data: T) -> Self {
54        BatchMeta {
55            pipeline_id: item.cached_pipeline(),
56            draw_function_id: item.draw_function(),
57            dynamic_offset: match item.extra_index() {
58                PhaseItemExtraIndex::DynamicOffset(dynamic_offset) => {
59                    NonMaxU32::new(dynamic_offset)
60                }
61                PhaseItemExtraIndex::None | PhaseItemExtraIndex::IndirectParametersIndex { .. } => {
62                    None
63                }
64            },
65            user_data,
66        }
67    }
68}
69
70/// A trait to support getting data used for batching draw commands via phase
71/// items.
72///
73/// This is a simple version that only allows for sorting, not binning, as well
74/// as only CPU processing, not GPU preprocessing. For these fancier features,
75/// see [`GetFullBatchData`].
76pub trait GetBatchData {
77    /// The system parameters [`GetBatchData::get_batch_data`] needs in
78    /// order to compute the batch data.
79    type Param: SystemParam + 'static;
80    /// Data used for comparison between phase items. If the pipeline id, draw
81    /// function id, per-instance data buffer dynamic offset and this data
82    /// matches, the draws can be batched.
83    type CompareData: PartialEq;
84    /// The per-instance data to be inserted into the
85    /// [`crate::render_resource::GpuArrayBuffer`] containing these data for all
86    /// instances.
87    type BufferData: GpuArrayBufferable + Sync + Send + 'static;
88    /// Get the per-instance data to be inserted into the
89    /// [`crate::render_resource::GpuArrayBuffer`]. If the instance can be
90    /// batched, also return the data used for comparison when deciding whether
91    /// draws can be batched, else return None for the `CompareData`.
92    ///
93    /// This is only called when building instance data on CPU. In the GPU
94    /// instance data building path, we use
95    /// [`GetFullBatchData::get_index_and_compare_data`] instead.
96    fn get_batch_data(
97        param: &SystemParamItem<Self::Param>,
98        query_item: (Entity, MainEntity),
99    ) -> Option<(Self::BufferData, Option<Self::CompareData>)>;
100}
101
102/// A trait to support getting data used for batching draw commands via phase
103/// items.
104///
105/// This version allows for binning and GPU preprocessing.
106pub trait GetFullBatchData: GetBatchData {
107    /// The per-instance data that was inserted into the
108    /// [`crate::render_resource::BufferVec`] during extraction.
109    type BufferInputData: Pod + Default + Sync + Send;
110
111    /// Get the per-instance data to be inserted into the
112    /// [`crate::render_resource::GpuArrayBuffer`].
113    ///
114    /// This is only called when building uniforms on CPU. In the GPU instance
115    /// buffer building path, we use
116    /// [`GetFullBatchData::get_index_and_compare_data`] instead.
117    fn get_binned_batch_data(
118        param: &SystemParamItem<Self::Param>,
119        query_item: MainEntity,
120    ) -> Option<Self::BufferData>;
121
122    /// Returns the index of the [`GetFullBatchData::BufferInputData`] that the
123    /// GPU preprocessing phase will use.
124    ///
125    /// We already inserted the [`GetFullBatchData::BufferInputData`] during the
126    /// extraction phase before we got here, so this function shouldn't need to
127    /// look up any render data. If CPU instance buffer building is in use, this
128    /// function will never be called.
129    fn get_index_and_compare_data(
130        param: &SystemParamItem<Self::Param>,
131        query_item: MainEntity,
132    ) -> Option<(NonMaxU32, Option<Self::CompareData>)>;
133
134    /// Returns the index of the [`GetFullBatchData::BufferInputData`] that the
135    /// GPU preprocessing phase will use.
136    ///
137    /// We already inserted the [`GetFullBatchData::BufferInputData`] during the
138    /// extraction phase before we got here, so this function shouldn't need to
139    /// look up any render data.
140    ///
141    /// This function is currently only called for unbatchable entities when GPU
142    /// instance buffer building is in use. For batchable entities, the uniform
143    /// index is written during queuing (e.g. in `queue_material_meshes`). In
144    /// the case of CPU instance buffer building, the CPU writes the uniforms,
145    /// so there's no index to return.
146    fn get_binned_index(
147        param: &SystemParamItem<Self::Param>,
148        query_item: MainEntity,
149    ) -> Option<NonMaxU32>;
150
151    /// Writes the [`gpu_preprocessing::IndirectParametersGpuMetadata`]
152    /// necessary to draw this batch into the given metadata buffer at the given
153    /// index.
154    ///
155    /// This is only used if GPU culling is enabled (which requires GPU
156    /// preprocessing).
157    ///
158    /// * `indexed` is true if the mesh is indexed or false if it's non-indexed.
159    ///
160    /// * `base_output_index` is the index of the first mesh instance in this
161    ///   batch in the `MeshUniform` output buffer.
162    ///
163    /// * `batch_set_index` is the index of the batch set in the
164    ///   [`gpu_preprocessing::IndirectBatchSet`] buffer, if this batch belongs to
165    ///   a batch set.
166    ///
167    /// * `indirect_parameters_buffers` is the buffer in which to write the
168    ///   metadata.
169    ///
170    /// * `indirect_parameters_offset` is the index in that buffer at which to
171    ///   write the metadata.
172    fn write_batch_indirect_parameters_metadata(
173        indexed: bool,
174        base_output_index: u32,
175        batch_set_index: Option<NonMaxU32>,
176        indirect_parameters_buffers: &mut UntypedPhaseIndirectParametersBuffers,
177        indirect_parameters_offset: u32,
178    );
179}
180
181/// Sorts a render phase that uses bins.
182pub fn sort_binned_render_phase<BPI>(mut phases: ResMut<ViewBinnedRenderPhases<BPI>>)
183where
184    BPI: BinnedPhaseItem,
185{
186    for phase in phases.values_mut() {
187        phase.multidrawable_meshes.sort_unstable_keys();
188        phase.batchable_meshes.sort_unstable_keys();
189        phase.unbatchable_meshes.sort_unstable_keys();
190        phase.non_mesh_items.sort_unstable_keys();
191    }
192}
193
194/// Batches the items in a sorted render phase.
195///
196/// This means comparing metadata needed to draw each phase item and trying to
197/// combine the draws into a batch.
198///
199/// This is common code factored out from
200/// [`gpu_preprocessing::batch_and_prepare_sorted_render_phase`] and
201/// [`no_gpu_preprocessing::batch_and_prepare_sorted_render_phase`].
202fn batch_and_prepare_sorted_render_phase<I, GBD>(
203    phase: &mut SortedRenderPhase<I>,
204    mut process_item: impl FnMut(&mut I) -> Option<GBD::CompareData>,
205) where
206    I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
207    GBD: GetBatchData,
208{
209    let items = phase.items.iter_mut().map(|item| {
210        let batch_data = match process_item(item) {
211            Some(compare_data) if I::AUTOMATIC_BATCHING => Some(BatchMeta::new(item, compare_data)),
212            _ => None,
213        };
214        (item.batch_range_mut(), batch_data)
215    });
216
217    items.reduce(|(start_range, prev_batch_meta), (range, batch_meta)| {
218        if batch_meta.is_some() && prev_batch_meta == batch_meta {
219            start_range.end = range.end;
220            (start_range, prev_batch_meta)
221        } else {
222            (range, batch_meta)
223        }
224    });
225}