bevy_pbr/render/
skin.rs

1use core::mem::{self, size_of};
2use std::sync::OnceLock;
3
4use bevy_asset::{prelude::AssetChanged, Assets};
5use bevy_ecs::prelude::*;
6use bevy_math::Mat4;
7use bevy_platform::collections::hash_map::Entry;
8use bevy_render::render_resource::{Buffer, BufferDescriptor};
9use bevy_render::sync_world::{MainEntity, MainEntityHashMap, MainEntityHashSet};
10use bevy_render::{
11    batching::NoAutomaticBatching,
12    mesh::skinning::{SkinnedMesh, SkinnedMeshInverseBindposes},
13    render_resource::BufferUsages,
14    renderer::{RenderDevice, RenderQueue},
15    view::ViewVisibility,
16    Extract,
17};
18use bevy_transform::prelude::GlobalTransform;
19use offset_allocator::{Allocation, Allocator};
20use smallvec::SmallVec;
21use tracing::error;
22
23/// Maximum number of joints supported for skinned meshes.
24///
25/// It is used to allocate buffers.
26/// The correctness of the value depends on the GPU/platform.
27/// The current value is chosen because it is guaranteed to work everywhere.
28/// To allow for bigger values, a check must be made for the limits
29/// of the GPU at runtime, which would mean not using consts anymore.
30pub const MAX_JOINTS: usize = 256;
31
32/// The total number of joints we support.
33///
34/// This is 256 GiB worth of joint matrices, which we will never hit under any
35/// reasonable circumstances.
36const MAX_TOTAL_JOINTS: u32 = 1024 * 1024 * 1024;
37
38/// The number of joints that we allocate at a time.
39///
40/// Some hardware requires that uniforms be allocated on 256-byte boundaries, so
41/// we need to allocate 4 64-byte matrices at a time to satisfy alignment
42/// requirements.
43const JOINTS_PER_ALLOCATION_UNIT: u32 = (256 / size_of::<Mat4>()) as u32;
44
45/// The maximum ratio of the number of entities whose transforms changed to the
46/// total number of joints before we re-extract all joints.
47///
48/// We use this as a heuristic to decide whether it's worth switching over to
49/// fine-grained detection to determine which skins need extraction. If the
50/// number of changed entities is over this threshold, we skip change detection
51/// and simply re-extract the transforms of all joints.
52const JOINT_EXTRACTION_THRESHOLD_FACTOR: f64 = 0.25;
53
54/// The location of the first joint matrix in the skin uniform buffer.
55#[derive(Clone, Copy)]
56pub struct SkinByteOffset {
57    /// The byte offset of the first joint matrix.
58    pub byte_offset: u32,
59}
60
61impl SkinByteOffset {
62    /// Index to be in address space based on the size of a skin uniform.
63    const fn from_index(index: usize) -> Self {
64        SkinByteOffset {
65            byte_offset: (index * size_of::<Mat4>()) as u32,
66        }
67    }
68
69    /// Returns this skin index in elements (not bytes).
70    ///
71    /// Each element is a 4x4 matrix.
72    pub fn index(&self) -> u32 {
73        self.byte_offset / size_of::<Mat4>() as u32
74    }
75}
76
77/// The GPU buffers containing joint matrices for all skinned meshes.
78///
79/// This is double-buffered: we store the joint matrices of each mesh for the
80/// previous frame in addition to those of each mesh for the current frame. This
81/// is for motion vector calculation. Every frame, we swap buffers and overwrite
82/// the joint matrix buffer from two frames ago with the data for the current
83/// frame.
84///
85/// Notes on implementation: see comment on top of the `extract_skins` system.
86#[derive(Resource)]
87pub struct SkinUniforms {
88    /// The CPU-side buffer that stores the joint matrices for skinned meshes in
89    /// the current frame.
90    pub current_staging_buffer: Vec<Mat4>,
91    /// The GPU-side buffer that stores the joint matrices for skinned meshes in
92    /// the current frame.
93    pub current_buffer: Buffer,
94    /// The GPU-side buffer that stores the joint matrices for skinned meshes in
95    /// the previous frame.
96    pub prev_buffer: Buffer,
97    /// The offset allocator that manages the placement of the joints within the
98    /// [`Self::current_buffer`].
99    allocator: Allocator,
100    /// Allocation information that we keep about each skin.
101    skin_uniform_info: MainEntityHashMap<SkinUniformInfo>,
102    /// Maps each joint entity to the skins it's associated with.
103    ///
104    /// We use this in conjunction with change detection to only update the
105    /// skins that need updating each frame.
106    ///
107    /// Note that conceptually this is a hash map of sets, but we use a
108    /// [`SmallVec`] to avoid allocations for the vast majority of the cases in
109    /// which each bone belongs to exactly one skin.
110    joint_to_skins: MainEntityHashMap<SmallVec<[MainEntity; 1]>>,
111    /// The total number of joints in the scene.
112    ///
113    /// We use this as part of our heuristic to decide whether to use
114    /// fine-grained change detection.
115    total_joints: usize,
116}
117
118impl FromWorld for SkinUniforms {
119    fn from_world(world: &mut World) -> Self {
120        let device = world.resource::<RenderDevice>();
121        let buffer_usages = (if skins_use_uniform_buffers(device) {
122            BufferUsages::UNIFORM
123        } else {
124            BufferUsages::STORAGE
125        }) | BufferUsages::COPY_DST;
126
127        // Create the current and previous buffer with the minimum sizes.
128        //
129        // These will be swapped every frame.
130        let current_buffer = device.create_buffer(&BufferDescriptor {
131            label: Some("skin uniform buffer"),
132            size: MAX_JOINTS as u64 * size_of::<Mat4>() as u64,
133            usage: buffer_usages,
134            mapped_at_creation: false,
135        });
136        let prev_buffer = device.create_buffer(&BufferDescriptor {
137            label: Some("skin uniform buffer"),
138            size: MAX_JOINTS as u64 * size_of::<Mat4>() as u64,
139            usage: buffer_usages,
140            mapped_at_creation: false,
141        });
142
143        Self {
144            current_staging_buffer: vec![],
145            current_buffer,
146            prev_buffer,
147            allocator: Allocator::new(MAX_TOTAL_JOINTS),
148            skin_uniform_info: MainEntityHashMap::default(),
149            joint_to_skins: MainEntityHashMap::default(),
150            total_joints: 0,
151        }
152    }
153}
154
155impl SkinUniforms {
156    /// Returns the current offset in joints of the skin in the buffer.
157    pub fn skin_index(&self, skin: MainEntity) -> Option<u32> {
158        self.skin_uniform_info
159            .get(&skin)
160            .map(SkinUniformInfo::offset)
161    }
162
163    /// Returns the current offset in bytes of the skin in the buffer.
164    pub fn skin_byte_offset(&self, skin: MainEntity) -> Option<SkinByteOffset> {
165        self.skin_uniform_info.get(&skin).map(|skin_uniform_info| {
166            SkinByteOffset::from_index(skin_uniform_info.offset() as usize)
167        })
168    }
169
170    /// Returns an iterator over all skins in the scene.
171    pub fn all_skins(&self) -> impl Iterator<Item = &MainEntity> {
172        self.skin_uniform_info.keys()
173    }
174}
175
176/// Allocation information about each skin.
177struct SkinUniformInfo {
178    /// The allocation of the joints within the [`SkinUniforms::current_buffer`].
179    allocation: Allocation,
180    /// The entities that comprise the joints.
181    joints: Vec<MainEntity>,
182}
183
184impl SkinUniformInfo {
185    /// The offset in joints within the [`SkinUniforms::current_staging_buffer`].
186    fn offset(&self) -> u32 {
187        self.allocation.offset * JOINTS_PER_ALLOCATION_UNIT
188    }
189}
190
191/// Returns true if skinning must use uniforms (and dynamic offsets) because
192/// storage buffers aren't supported on the current platform.
193pub fn skins_use_uniform_buffers(render_device: &RenderDevice) -> bool {
194    static SKINS_USE_UNIFORM_BUFFERS: OnceLock<bool> = OnceLock::new();
195    *SKINS_USE_UNIFORM_BUFFERS
196        .get_or_init(|| render_device.limits().max_storage_buffers_per_shader_stage == 0)
197}
198
199/// Uploads the buffers containing the joints to the GPU.
200pub fn prepare_skins(
201    render_device: Res<RenderDevice>,
202    render_queue: Res<RenderQueue>,
203    uniform: ResMut<SkinUniforms>,
204) {
205    let uniform = uniform.into_inner();
206
207    if uniform.current_staging_buffer.is_empty() {
208        return;
209    }
210
211    // Swap current and previous buffers.
212    mem::swap(&mut uniform.current_buffer, &mut uniform.prev_buffer);
213
214    // Resize the buffers if necessary. Include extra space equal to `MAX_JOINTS`
215    // because we need to be able to bind a full uniform buffer's worth of data
216    // if skins use uniform buffers on this platform.
217    let needed_size = (uniform.current_staging_buffer.len() as u64 + MAX_JOINTS as u64)
218        * size_of::<Mat4>() as u64;
219    if uniform.current_buffer.size() < needed_size {
220        let mut new_size = uniform.current_buffer.size();
221        while new_size < needed_size {
222            // 1.5× growth factor.
223            new_size += new_size / 2;
224        }
225
226        // Create the new buffers.
227        let buffer_usages = if skins_use_uniform_buffers(&render_device) {
228            BufferUsages::UNIFORM
229        } else {
230            BufferUsages::STORAGE
231        } | BufferUsages::COPY_DST;
232        uniform.current_buffer = render_device.create_buffer(&BufferDescriptor {
233            label: Some("skin uniform buffer"),
234            usage: buffer_usages,
235            size: new_size,
236            mapped_at_creation: false,
237        });
238        uniform.prev_buffer = render_device.create_buffer(&BufferDescriptor {
239            label: Some("skin uniform buffer"),
240            usage: buffer_usages,
241            size: new_size,
242            mapped_at_creation: false,
243        });
244
245        // We've created a new `prev_buffer` but we don't have the previous joint
246        // data needed to fill it out correctly. Use the current joint data
247        // instead.
248        //
249        // TODO: This is a bug - will cause motion blur to ignore joint movement
250        // for one frame.
251        render_queue.write_buffer(
252            &uniform.prev_buffer,
253            0,
254            bytemuck::must_cast_slice(&uniform.current_staging_buffer[..]),
255        );
256    }
257
258    // Write the data from `uniform.current_staging_buffer` into
259    // `uniform.current_buffer`.
260    render_queue.write_buffer(
261        &uniform.current_buffer,
262        0,
263        bytemuck::must_cast_slice(&uniform.current_staging_buffer[..]),
264    );
265
266    // We don't need to write `uniform.prev_buffer` because we already wrote it
267    // last frame, and the data should still be on the GPU.
268}
269
270// Notes on implementation:
271// We define the uniform binding as an array<mat4x4<f32>, N> in the shader,
272// where N is the maximum number of Mat4s we can fit in the uniform binding,
273// which may be as little as 16kB or 64kB. But, we may not need all N.
274// We may only need, for example, 10.
275//
276// If we used uniform buffers ‘normally’ then we would have to write a full
277// binding of data for each dynamic offset binding, which is wasteful, makes
278// the buffer much larger than it needs to be, and uses more memory bandwidth
279// to transfer the data, which then costs frame time So @superdump came up
280// with this design: just bind data at the specified offset and interpret
281// the data at that offset as an array<T, N> regardless of what is there.
282//
283// So instead of writing N Mat4s when you only need 10, you write 10, and
284// then pad up to the next dynamic offset alignment. Then write the next.
285// And for the last dynamic offset binding, make sure there is a full binding
286// of data after it so that the buffer is of size
287// `last dynamic offset` + `array<mat4x4<f32>>`.
288//
289// Then when binding the first dynamic offset, the first 10 entries in the array
290// are what you expect, but if you read the 11th you’re reading ‘invalid’ data
291// which could be padding or could be from the next binding.
292//
293// In this way, we can pack ‘variable sized arrays’ into uniform buffer bindings
294// which normally only support fixed size arrays. You just have to make sure
295// in the shader that you only read the values that are valid for that binding.
296pub fn extract_skins(
297    skin_uniforms: ResMut<SkinUniforms>,
298    skinned_meshes: Extract<Query<(Entity, &SkinnedMesh)>>,
299    changed_skinned_meshes: Extract<
300        Query<
301            (Entity, &ViewVisibility, &SkinnedMesh),
302            Or<(
303                Changed<ViewVisibility>,
304                Changed<SkinnedMesh>,
305                AssetChanged<SkinnedMesh>,
306            )>,
307        >,
308    >,
309    skinned_mesh_inverse_bindposes: Extract<Res<Assets<SkinnedMeshInverseBindposes>>>,
310    changed_transforms: Extract<Query<(Entity, &GlobalTransform), Changed<GlobalTransform>>>,
311    joints: Extract<Query<&GlobalTransform>>,
312    mut removed_visibilities_query: Extract<RemovedComponents<ViewVisibility>>,
313    mut removed_skinned_meshes_query: Extract<RemovedComponents<SkinnedMesh>>,
314) {
315    let skin_uniforms = skin_uniforms.into_inner();
316
317    // Find skins that have become visible or invisible on this frame. Allocate,
318    // reallocate, or free space for them as necessary.
319    add_or_delete_skins(
320        skin_uniforms,
321        &changed_skinned_meshes,
322        &skinned_mesh_inverse_bindposes,
323        &joints,
324    );
325
326    // Extract the transforms for all joints from the scene, and write them into
327    // the staging buffer at the appropriate spot.
328    extract_joints(
329        skin_uniforms,
330        &skinned_meshes,
331        &changed_skinned_meshes,
332        &skinned_mesh_inverse_bindposes,
333        &changed_transforms,
334        &joints,
335    );
336
337    // Delete skins that became invisible.
338    for skinned_mesh_entity in removed_visibilities_query
339        .read()
340        .chain(removed_skinned_meshes_query.read())
341    {
342        // Only remove a skin if we didn't pick it up in `add_or_delete_skins`.
343        // It's possible that a necessary component was removed and re-added in
344        // the same frame.
345        if !changed_skinned_meshes.contains(skinned_mesh_entity) {
346            remove_skin(skin_uniforms, skinned_mesh_entity.into());
347        }
348    }
349}
350
351/// Searches for all skins that have become visible or invisible this frame and
352/// allocations for them as necessary.
353fn add_or_delete_skins(
354    skin_uniforms: &mut SkinUniforms,
355    changed_skinned_meshes: &Query<
356        (Entity, &ViewVisibility, &SkinnedMesh),
357        Or<(
358            Changed<ViewVisibility>,
359            Changed<SkinnedMesh>,
360            AssetChanged<SkinnedMesh>,
361        )>,
362    >,
363    skinned_mesh_inverse_bindposes: &Assets<SkinnedMeshInverseBindposes>,
364    joints: &Query<&GlobalTransform>,
365) {
366    // Find every skinned mesh that changed one of (1) visibility; (2) joint
367    // entities (part of `SkinnedMesh`); (3) the associated
368    // `SkinnedMeshInverseBindposes` asset.
369    for (skinned_mesh_entity, skinned_mesh_view_visibility, skinned_mesh) in changed_skinned_meshes
370    {
371        // Remove the skin if it existed last frame.
372        let skinned_mesh_entity = MainEntity::from(skinned_mesh_entity);
373        remove_skin(skin_uniforms, skinned_mesh_entity);
374
375        // If the skin is invisible, we're done.
376        if !(*skinned_mesh_view_visibility).get() {
377            continue;
378        }
379
380        // Initialize the skin.
381        add_skin(
382            skinned_mesh_entity,
383            skinned_mesh,
384            skin_uniforms,
385            skinned_mesh_inverse_bindposes,
386            joints,
387        );
388    }
389}
390
391/// Extracts the global transforms of all joints and updates the staging buffer
392/// as necessary.
393fn extract_joints(
394    skin_uniforms: &mut SkinUniforms,
395    skinned_meshes: &Query<(Entity, &SkinnedMesh)>,
396    changed_skinned_meshes: &Query<
397        (Entity, &ViewVisibility, &SkinnedMesh),
398        Or<(
399            Changed<ViewVisibility>,
400            Changed<SkinnedMesh>,
401            AssetChanged<SkinnedMesh>,
402        )>,
403    >,
404    skinned_mesh_inverse_bindposes: &Assets<SkinnedMeshInverseBindposes>,
405    changed_transforms: &Query<(Entity, &GlobalTransform), Changed<GlobalTransform>>,
406    joints: &Query<&GlobalTransform>,
407) {
408    // If the number of entities that changed transforms exceeds a certain
409    // fraction (currently 25%) of the total joints in the scene, then skip
410    // fine-grained change detection.
411    //
412    // Note that this is a crude heuristic, for performance reasons. It doesn't
413    // consider the ratio of modified *joints* to total joints, only the ratio
414    // of modified *entities* to total joints. Thus in the worst case we might
415    // end up re-extracting all skins even though none of the joints changed.
416    // But making the heuristic finer-grained would make it slower to evaluate,
417    // and we don't want to lose performance.
418    let threshold =
419        (skin_uniforms.total_joints as f64 * JOINT_EXTRACTION_THRESHOLD_FACTOR).floor() as usize;
420
421    if changed_transforms.iter().nth(threshold).is_some() {
422        // Go ahead and re-extract all skins in the scene.
423        for (skin_entity, skin) in skinned_meshes {
424            extract_joints_for_skin(
425                skin_entity.into(),
426                skin,
427                skin_uniforms,
428                changed_skinned_meshes,
429                skinned_mesh_inverse_bindposes,
430                joints,
431            );
432        }
433        return;
434    }
435
436    // Use fine-grained change detection to figure out only the skins that need
437    // to have their joints re-extracted.
438    let dirty_skins: MainEntityHashSet = changed_transforms
439        .iter()
440        .flat_map(|(joint, _)| skin_uniforms.joint_to_skins.get(&MainEntity::from(joint)))
441        .flat_map(|skin_joint_mappings| skin_joint_mappings.iter())
442        .copied()
443        .collect();
444
445    // Re-extract the joints for only those skins.
446    for skin_entity in dirty_skins {
447        let Ok((_, skin)) = skinned_meshes.get(*skin_entity) else {
448            continue;
449        };
450        extract_joints_for_skin(
451            skin_entity,
452            skin,
453            skin_uniforms,
454            changed_skinned_meshes,
455            skinned_mesh_inverse_bindposes,
456            joints,
457        );
458    }
459}
460
461/// Extracts all joints for a single skin and writes their transforms into the
462/// CPU staging buffer.
463fn extract_joints_for_skin(
464    skin_entity: MainEntity,
465    skin: &SkinnedMesh,
466    skin_uniforms: &mut SkinUniforms,
467    changed_skinned_meshes: &Query<
468        (Entity, &ViewVisibility, &SkinnedMesh),
469        Or<(
470            Changed<ViewVisibility>,
471            Changed<SkinnedMesh>,
472            AssetChanged<SkinnedMesh>,
473        )>,
474    >,
475    skinned_mesh_inverse_bindposes: &Assets<SkinnedMeshInverseBindposes>,
476    joints: &Query<&GlobalTransform>,
477) {
478    // If we initialized the skin this frame, we already populated all
479    // the joints, so there's no need to populate them again.
480    if changed_skinned_meshes.contains(*skin_entity) {
481        return;
482    }
483
484    // Fetch information about the skin.
485    let Some(skin_uniform_info) = skin_uniforms.skin_uniform_info.get(&skin_entity) else {
486        return;
487    };
488    let Some(skinned_mesh_inverse_bindposes) =
489        skinned_mesh_inverse_bindposes.get(&skin.inverse_bindposes)
490    else {
491        return;
492    };
493
494    // Calculate and write in the new joint matrices.
495    for (joint_index, (&joint, skinned_mesh_inverse_bindpose)) in skin
496        .joints
497        .iter()
498        .zip(skinned_mesh_inverse_bindposes.iter())
499        .enumerate()
500    {
501        let Ok(joint_transform) = joints.get(joint) else {
502            continue;
503        };
504
505        let joint_matrix = joint_transform.affine() * *skinned_mesh_inverse_bindpose;
506        skin_uniforms.current_staging_buffer[skin_uniform_info.offset() as usize + joint_index] =
507            joint_matrix;
508    }
509}
510
511/// Allocates space for a new skin in the buffers, and populates its joints.
512fn add_skin(
513    skinned_mesh_entity: MainEntity,
514    skinned_mesh: &SkinnedMesh,
515    skin_uniforms: &mut SkinUniforms,
516    skinned_mesh_inverse_bindposes: &Assets<SkinnedMeshInverseBindposes>,
517    joints: &Query<&GlobalTransform>,
518) {
519    // Allocate space for the joints.
520    let Some(allocation) = skin_uniforms.allocator.allocate(
521        skinned_mesh
522            .joints
523            .len()
524            .div_ceil(JOINTS_PER_ALLOCATION_UNIT as usize) as u32,
525    ) else {
526        error!(
527            "Out of space for skin: {:?}. Tried to allocate space for {:?} joints.",
528            skinned_mesh_entity,
529            skinned_mesh.joints.len()
530        );
531        return;
532    };
533
534    // Store that allocation.
535    let skin_uniform_info = SkinUniformInfo {
536        allocation,
537        joints: skinned_mesh
538            .joints
539            .iter()
540            .map(|entity| MainEntity::from(*entity))
541            .collect(),
542    };
543
544    let skinned_mesh_inverse_bindposes =
545        skinned_mesh_inverse_bindposes.get(&skinned_mesh.inverse_bindposes);
546
547    for (joint_index, &joint) in skinned_mesh.joints.iter().enumerate() {
548        // Calculate the initial joint matrix.
549        let skinned_mesh_inverse_bindpose =
550            skinned_mesh_inverse_bindposes.and_then(|skinned_mesh_inverse_bindposes| {
551                skinned_mesh_inverse_bindposes.get(joint_index)
552            });
553        let joint_matrix = match (skinned_mesh_inverse_bindpose, joints.get(joint)) {
554            (Some(skinned_mesh_inverse_bindpose), Ok(transform)) => {
555                transform.affine() * *skinned_mesh_inverse_bindpose
556            }
557            _ => Mat4::IDENTITY,
558        };
559
560        // Write in the new joint matrix, growing the staging buffer if
561        // necessary.
562        let buffer_index = skin_uniform_info.offset() as usize + joint_index;
563        if skin_uniforms.current_staging_buffer.len() < buffer_index + 1 {
564            skin_uniforms
565                .current_staging_buffer
566                .resize(buffer_index + 1, Mat4::IDENTITY);
567        }
568        skin_uniforms.current_staging_buffer[buffer_index] = joint_matrix;
569
570        // Record the inverse mapping from the joint back to the skin. We use
571        // this in order to perform fine-grained joint extraction.
572        skin_uniforms
573            .joint_to_skins
574            .entry(MainEntity::from(joint))
575            .or_default()
576            .push(skinned_mesh_entity);
577    }
578
579    // Record the number of joints.
580    skin_uniforms.total_joints += skinned_mesh.joints.len();
581
582    skin_uniforms
583        .skin_uniform_info
584        .insert(skinned_mesh_entity, skin_uniform_info);
585}
586
587/// Deallocates a skin and removes it from the [`SkinUniforms`].
588fn remove_skin(skin_uniforms: &mut SkinUniforms, skinned_mesh_entity: MainEntity) {
589    let Some(old_skin_uniform_info) = skin_uniforms.skin_uniform_info.remove(&skinned_mesh_entity)
590    else {
591        return;
592    };
593
594    // Free the allocation.
595    skin_uniforms
596        .allocator
597        .free(old_skin_uniform_info.allocation);
598
599    // Remove the inverse mapping from each joint back to the skin.
600    for &joint in &old_skin_uniform_info.joints {
601        if let Entry::Occupied(mut entry) = skin_uniforms.joint_to_skins.entry(joint) {
602            entry.get_mut().retain(|skin| *skin != skinned_mesh_entity);
603            if entry.get_mut().is_empty() {
604                entry.remove();
605            }
606        }
607    }
608
609    // Update the total number of joints.
610    skin_uniforms.total_joints -= old_skin_uniform_info.joints.len();
611}
612
613// NOTE: The skinned joints uniform buffer has to be bound at a dynamic offset per
614// entity and so cannot currently be batched on WebGL 2.
615pub fn no_automatic_skin_batching(
616    mut commands: Commands,
617    query: Query<Entity, (With<SkinnedMesh>, Without<NoAutomaticBatching>)>,
618    render_device: Res<RenderDevice>,
619) {
620    if !skins_use_uniform_buffers(&render_device) {
621        return;
622    }
623
624    for entity in &query {
625        commands.entity(entity).try_insert(NoAutomaticBatching);
626    }
627}