1use bevy_app::{App, Plugin};
4use bevy_derive::{Deref, DerefMut};
5use bevy_ecs::{
6 entity::{Entity, EntityHashMap},
7 query::{Has, With},
8 schedule::IntoSystemConfigs as _,
9 system::{Query, Res, ResMut, Resource, StaticSystemParam},
10 world::{FromWorld, World},
11};
12use bevy_encase_derive::ShaderType;
13use bytemuck::{Pod, Zeroable};
14use nonmax::NonMaxU32;
15use smallvec::smallvec;
16use wgpu::{BindingResource, BufferUsages, DownlevelFlags, Features};
17
18use crate::{
19 render_phase::{
20 BinnedPhaseItem, BinnedRenderPhaseBatch, CachedRenderPipelinePhaseItem,
21 PhaseItemExtraIndex, SortedPhaseItem, SortedRenderPhase, UnbatchableBinnedEntityIndices,
22 ViewBinnedRenderPhases, ViewSortedRenderPhases,
23 },
24 render_resource::{BufferVec, GpuArrayBufferable, RawBufferVec, UninitBufferVec},
25 renderer::{RenderAdapter, RenderDevice, RenderQueue},
26 view::{ExtractedView, GpuCulling, ViewTarget},
27 Render, RenderApp, RenderSet,
28};
29
30use super::{BatchMeta, GetBatchData, GetFullBatchData};
31
32pub struct BatchingPlugin;
33
34impl Plugin for BatchingPlugin {
35 fn build(&self, app: &mut App) {
36 let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
37 return;
38 };
39
40 render_app
41 .insert_resource(IndirectParametersBuffer::new())
42 .add_systems(
43 Render,
44 write_indirect_parameters_buffer.in_set(RenderSet::PrepareResourcesFlush),
45 );
46 }
47
48 fn finish(&self, app: &mut App) {
49 let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
50 return;
51 };
52
53 render_app.init_resource::<GpuPreprocessingSupport>();
54 }
55}
56
57#[derive(Clone, Copy, PartialEq, Resource)]
66pub enum GpuPreprocessingSupport {
67 None,
69 PreprocessingOnly,
71 Culling,
73}
74
75#[derive(Resource)]
86pub struct BatchedInstanceBuffers<BD, BDI>
87where
88 BD: GpuArrayBufferable + Sync + Send + 'static,
89 BDI: Pod,
90{
91 pub data_buffer: UninitBufferVec<BD>,
96
97 pub work_item_buffers: EntityHashMap<PreprocessWorkItemBuffer>,
102
103 pub current_input_buffer: RawBufferVec<BDI>,
107
108 pub previous_input_buffer: RawBufferVec<BDI>,
116}
117
118pub struct PreprocessWorkItemBuffer {
120 pub buffer: BufferVec<PreprocessWorkItem>,
122 pub gpu_culling: bool,
124}
125
126#[derive(Clone, Copy, Pod, Zeroable, ShaderType)]
129#[repr(C)]
130pub struct PreprocessWorkItem {
131 pub input_index: u32,
134 pub output_index: u32,
138}
139
140#[derive(Clone, Copy, Pod, Zeroable, ShaderType)]
176#[repr(C)]
177pub struct IndirectParameters {
178 pub vertex_or_index_count: u32,
181
182 pub instance_count: u32,
186
187 pub first_vertex_or_first_index: u32,
190
191 pub base_vertex_or_first_instance: u32,
194
195 pub first_instance: u32,
202}
203
204#[derive(Resource, Deref, DerefMut)]
206pub struct IndirectParametersBuffer(pub BufferVec<IndirectParameters>);
207
208impl IndirectParametersBuffer {
209 pub fn new() -> IndirectParametersBuffer {
211 IndirectParametersBuffer(BufferVec::new(
212 BufferUsages::STORAGE | BufferUsages::INDIRECT,
213 ))
214 }
215}
216
217impl Default for IndirectParametersBuffer {
218 fn default() -> Self {
219 Self::new()
220 }
221}
222
223impl FromWorld for GpuPreprocessingSupport {
224 fn from_world(world: &mut World) -> Self {
225 let adapter = world.resource::<RenderAdapter>();
226 let device = world.resource::<RenderDevice>();
227
228 fn is_non_supported_android_device(adapter: &RenderAdapter) -> bool {
230 if cfg!(target_os = "android") {
231 let adapter_name = adapter.get_info().name;
232
233 let non_supported_adreno_model = |model: &str| -> bool {
236 let model = model
237 .chars()
238 .map_while(|c| c.to_digit(10))
239 .fold(0, |acc, digit| acc * 10 + digit);
240
241 model != 720 && model <= 730
242 };
243
244 adapter_name
245 .strip_prefix("Adreno (TM) ")
246 .is_some_and(non_supported_adreno_model)
247 } else {
248 false
249 }
250 }
251
252 if device.limits().max_compute_workgroup_size_x == 0 || is_non_supported_android_device(adapter)
253 {
254 GpuPreprocessingSupport::None
255 } else if !device
256 .features()
257 .contains(Features::INDIRECT_FIRST_INSTANCE) ||
258 !adapter.get_downlevel_capabilities().flags.contains(
259 DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW)
260 {
261 GpuPreprocessingSupport::PreprocessingOnly
262 } else {
263 GpuPreprocessingSupport::Culling
264 }
265 }
266}
267
268impl<BD, BDI> BatchedInstanceBuffers<BD, BDI>
269where
270 BD: GpuArrayBufferable + Sync + Send + 'static,
271 BDI: Pod,
272{
273 pub fn new() -> Self {
275 BatchedInstanceBuffers {
276 data_buffer: UninitBufferVec::new(BufferUsages::STORAGE),
277 work_item_buffers: EntityHashMap::default(),
278 current_input_buffer: RawBufferVec::new(BufferUsages::STORAGE),
279 previous_input_buffer: RawBufferVec::new(BufferUsages::STORAGE),
280 }
281 }
282
283 pub fn instance_data_binding(&self) -> Option<BindingResource> {
287 self.data_buffer
288 .buffer()
289 .map(|buffer| buffer.as_entire_binding())
290 }
291
292 pub fn clear(&mut self) {
294 self.data_buffer.clear();
295 self.current_input_buffer.clear();
296 self.previous_input_buffer.clear();
297 for work_item_buffer in self.work_item_buffers.values_mut() {
298 work_item_buffer.buffer.clear();
299 }
300 }
301}
302
303impl<BD, BDI> Default for BatchedInstanceBuffers<BD, BDI>
304where
305 BD: GpuArrayBufferable + Sync + Send + 'static,
306 BDI: Pod,
307{
308 fn default() -> Self {
309 Self::new()
310 }
311}
312
313struct SortedRenderBatch<F>
316where
317 F: GetBatchData,
318{
319 phase_item_start_index: u32,
322
323 instance_start_index: u32,
325
326 indirect_parameters_index: Option<NonMaxU32>,
331
332 meta: Option<BatchMeta<F::CompareData>>,
337}
338
339impl<F> SortedRenderBatch<F>
340where
341 F: GetBatchData,
342{
343 fn flush<I>(self, instance_end_index: u32, phase: &mut SortedRenderPhase<I>)
349 where
350 I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
351 {
352 let (batch_range, batch_extra_index) =
353 phase.items[self.phase_item_start_index as usize].batch_range_and_extra_index_mut();
354 *batch_range = self.instance_start_index..instance_end_index;
355 *batch_extra_index =
356 PhaseItemExtraIndex::maybe_indirect_parameters_index(self.indirect_parameters_index);
357 }
358}
359
360pub fn clear_batched_gpu_instance_buffers<GFBD>(
367 gpu_batched_instance_buffers: Option<
368 ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
369 >,
370) where
371 GFBD: GetFullBatchData,
372{
373 if let Some(mut gpu_batched_instance_buffers) = gpu_batched_instance_buffers {
374 gpu_batched_instance_buffers.clear();
375 }
376}
377
378pub fn delete_old_work_item_buffers<GFBD>(
385 mut gpu_batched_instance_buffers: ResMut<
386 BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>,
387 >,
388 view_targets: Query<Entity, With<ViewTarget>>,
389) where
390 GFBD: GetFullBatchData,
391{
392 gpu_batched_instance_buffers
393 .work_item_buffers
394 .retain(|entity, _| view_targets.contains(*entity));
395}
396
397pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
401 gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
402 mut indirect_parameters_buffer: ResMut<IndirectParametersBuffer>,
403 mut sorted_render_phases: ResMut<ViewSortedRenderPhases<I>>,
404 mut views: Query<(Entity, Has<GpuCulling>), With<ExtractedView>>,
405 system_param_item: StaticSystemParam<GFBD::Param>,
406) where
407 I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
408 GFBD: GetFullBatchData,
409{
410 let BatchedInstanceBuffers {
412 ref mut data_buffer,
413 ref mut work_item_buffers,
414 ..
415 } = gpu_array_buffer.into_inner();
416
417 for (view, gpu_culling) in &mut views {
418 let Some(phase) = sorted_render_phases.get_mut(&view) else {
419 continue;
420 };
421
422 let work_item_buffer =
424 work_item_buffers
425 .entry(view)
426 .or_insert_with(|| PreprocessWorkItemBuffer {
427 buffer: BufferVec::new(BufferUsages::STORAGE),
428 gpu_culling,
429 });
430
431 let mut batch: Option<SortedRenderBatch<GFBD>> = None;
433 for current_index in 0..phase.items.len() {
434 let item = &phase.items[current_index];
437 let entity = (item.entity(), item.main_entity());
438 let current_batch_input_index =
439 GFBD::get_index_and_compare_data(&system_param_item, entity);
440
441 let Some((current_input_index, current_meta)) = current_batch_input_index else {
446 if let Some(batch) = batch.take() {
448 batch.flush(data_buffer.len() as u32, phase);
449 }
450
451 continue;
452 };
453 let current_meta =
454 current_meta.map(|meta| BatchMeta::new(&phase.items[current_index], meta));
455
456 let can_batch = batch.as_ref().is_some_and(|batch| {
459 match (¤t_meta, &batch.meta) {
461 (Some(current_meta), Some(batch_meta)) => current_meta == batch_meta,
462 (_, _) => false,
463 }
464 });
465
466 let item = &phase.items[current_index];
468 let entity = (item.entity(), item.main_entity());
469 let output_index = data_buffer.add() as u32;
470
471 if !can_batch {
473 if let Some(batch) = batch.take() {
475 batch.flush(output_index, phase);
476 }
477
478 let indirect_parameters_index = if gpu_culling {
480 GFBD::get_batch_indirect_parameters_index(
481 &system_param_item,
482 &mut indirect_parameters_buffer,
483 entity,
484 output_index,
485 )
486 } else {
487 None
488 };
489 batch = Some(SortedRenderBatch {
490 phase_item_start_index: current_index as u32,
491 instance_start_index: output_index,
492 indirect_parameters_index,
493 meta: current_meta,
494 });
495 }
496
497 if let Some(batch) = batch.as_ref() {
500 work_item_buffer.buffer.push(PreprocessWorkItem {
501 input_index: current_input_index.into(),
502 output_index: match batch.indirect_parameters_index {
503 Some(indirect_parameters_index) => indirect_parameters_index.into(),
504 None => output_index,
505 },
506 });
507 }
508 }
509
510 if let Some(batch) = batch.take() {
512 batch.flush(data_buffer.len() as u32, phase);
513 }
514 }
515}
516
517pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
519 gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
520 mut indirect_parameters_buffer: ResMut<IndirectParametersBuffer>,
521 mut binned_render_phases: ResMut<ViewBinnedRenderPhases<BPI>>,
522 mut views: Query<(Entity, Has<GpuCulling>), With<ExtractedView>>,
523 param: StaticSystemParam<GFBD::Param>,
524) where
525 BPI: BinnedPhaseItem,
526 GFBD: GetFullBatchData,
527{
528 let system_param_item = param.into_inner();
529
530 let BatchedInstanceBuffers {
531 ref mut data_buffer,
532 ref mut work_item_buffers,
533 ..
534 } = gpu_array_buffer.into_inner();
535
536 for (view, gpu_culling) in &mut views {
537 let Some(phase) = binned_render_phases.get_mut(&view) else {
538 continue;
539 };
540
541 let work_item_buffer =
544 work_item_buffers
545 .entry(view)
546 .or_insert_with(|| PreprocessWorkItemBuffer {
547 buffer: BufferVec::new(BufferUsages::STORAGE),
548 gpu_culling,
549 });
550
551 for key in &phase.batchable_mesh_keys {
554 let mut batch: Option<BinnedRenderPhaseBatch> = None;
555 for &(entity, main_entity) in &phase.batchable_mesh_values[key] {
556 let Some(input_index) =
557 GFBD::get_binned_index(&system_param_item, (entity, main_entity))
558 else {
559 continue;
560 };
561 let output_index = data_buffer.add() as u32;
562
563 match batch {
564 Some(ref mut batch) => {
565 batch.instance_range.end = output_index + 1;
566 work_item_buffer.buffer.push(PreprocessWorkItem {
567 input_index: input_index.into(),
568 output_index: batch
569 .extra_index
570 .as_indirect_parameters_index()
571 .unwrap_or(output_index),
572 });
573 }
574
575 None if gpu_culling => {
576 let indirect_parameters_index = GFBD::get_batch_indirect_parameters_index(
577 &system_param_item,
578 &mut indirect_parameters_buffer,
579 (entity, main_entity),
580 output_index,
581 );
582 work_item_buffer.buffer.push(PreprocessWorkItem {
583 input_index: input_index.into(),
584 output_index: indirect_parameters_index.unwrap_or_default().into(),
585 });
586 batch = Some(BinnedRenderPhaseBatch {
587 representative_entity: (entity, main_entity),
588 instance_range: output_index..output_index + 1,
589 extra_index: PhaseItemExtraIndex::maybe_indirect_parameters_index(
590 indirect_parameters_index,
591 ),
592 });
593 }
594
595 None => {
596 work_item_buffer.buffer.push(PreprocessWorkItem {
597 input_index: input_index.into(),
598 output_index,
599 });
600 batch = Some(BinnedRenderPhaseBatch {
601 representative_entity: (entity, main_entity),
602 instance_range: output_index..output_index + 1,
603 extra_index: PhaseItemExtraIndex::NONE,
604 });
605 }
606 }
607 }
608
609 if let Some(batch) = batch {
610 phase.batch_sets.push(smallvec![batch]);
611 }
612 }
613
614 for key in &phase.unbatchable_mesh_keys {
616 let unbatchables = phase.unbatchable_mesh_values.get_mut(key).unwrap();
617 for &(entity, main_entity) in &unbatchables.entities {
618 let Some(input_index) =
619 GFBD::get_binned_index(&system_param_item, (entity, main_entity))
620 else {
621 continue;
622 };
623 let output_index = data_buffer.add() as u32;
624
625 if gpu_culling {
626 let indirect_parameters_index = GFBD::get_batch_indirect_parameters_index(
627 &system_param_item,
628 &mut indirect_parameters_buffer,
629 (entity, main_entity),
630 output_index,
631 )
632 .unwrap_or_default();
633 work_item_buffer.buffer.push(PreprocessWorkItem {
634 input_index: input_index.into(),
635 output_index: indirect_parameters_index.into(),
636 });
637 unbatchables
638 .buffer_indices
639 .add(UnbatchableBinnedEntityIndices {
640 instance_index: indirect_parameters_index.into(),
641 extra_index: PhaseItemExtraIndex::indirect_parameters_index(
642 indirect_parameters_index.into(),
643 ),
644 });
645 } else {
646 work_item_buffer.buffer.push(PreprocessWorkItem {
647 input_index: input_index.into(),
648 output_index,
649 });
650 unbatchables
651 .buffer_indices
652 .add(UnbatchableBinnedEntityIndices {
653 instance_index: output_index,
654 extra_index: PhaseItemExtraIndex::NONE,
655 });
656 }
657 }
658 }
659 }
660}
661
662pub fn write_batched_instance_buffers<GFBD>(
664 render_device: Res<RenderDevice>,
665 render_queue: Res<RenderQueue>,
666 gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
667) where
668 GFBD: GetFullBatchData,
669{
670 let BatchedInstanceBuffers {
671 ref mut data_buffer,
672 work_item_buffers: ref mut index_buffers,
673 ref mut current_input_buffer,
674 previous_input_buffer: _,
675 } = gpu_array_buffer.into_inner();
676
677 data_buffer.write_buffer(&render_device);
678 current_input_buffer.write_buffer(&render_device, &render_queue);
679 for index_buffer in index_buffers.values_mut() {
683 index_buffer
684 .buffer
685 .write_buffer(&render_device, &render_queue);
686 }
687}
688
689pub fn write_indirect_parameters_buffer(
690 render_device: Res<RenderDevice>,
691 render_queue: Res<RenderQueue>,
692 mut indirect_parameters_buffer: ResMut<IndirectParametersBuffer>,
693) {
694 indirect_parameters_buffer.write_buffer(&render_device, &render_queue);
695 indirect_parameters_buffer.clear();
696}