bevy_pbr/render/
skin.rs

1use core::mem::{self, size_of};
2use std::sync::OnceLock;
3
4use bevy_asset::{prelude::AssetChanged, Assets};
5use bevy_camera::visibility::ViewVisibility;
6use bevy_ecs::prelude::*;
7use bevy_math::Mat4;
8use bevy_mesh::skinning::{SkinnedMesh, SkinnedMeshInverseBindposes};
9use bevy_platform::collections::hash_map::Entry;
10use bevy_render::render_resource::{Buffer, BufferDescriptor};
11use bevy_render::sync_world::{MainEntity, MainEntityHashMap, MainEntityHashSet};
12use bevy_render::{
13    batching::NoAutomaticBatching,
14    render_resource::BufferUsages,
15    renderer::{RenderDevice, RenderQueue},
16    Extract,
17};
18use bevy_transform::prelude::GlobalTransform;
19use offset_allocator::{Allocation, Allocator};
20use smallvec::SmallVec;
21use tracing::error;
22
23/// Maximum number of joints supported for skinned meshes.
24///
25/// It is used to allocate buffers.
26/// The correctness of the value depends on the GPU/platform.
27/// The current value is chosen because it is guaranteed to work everywhere.
28/// To allow for bigger values, a check must be made for the limits
29/// of the GPU at runtime, which would mean not using consts anymore.
30pub const MAX_JOINTS: usize = 256;
31
32/// The total number of joints we support.
33///
34/// This is 256 GiB worth of joint matrices, which we will never hit under any
35/// reasonable circumstances.
36const MAX_TOTAL_JOINTS: u32 = 1024 * 1024 * 1024;
37
38/// The number of joints that we allocate at a time.
39///
40/// Some hardware requires that uniforms be allocated on 256-byte boundaries, so
41/// we need to allocate 4 64-byte matrices at a time to satisfy alignment
42/// requirements.
43const JOINTS_PER_ALLOCATION_UNIT: u32 = (256 / size_of::<Mat4>()) as u32;
44
45/// The maximum ratio of the number of entities whose transforms changed to the
46/// total number of joints before we re-extract all joints.
47///
48/// We use this as a heuristic to decide whether it's worth switching over to
49/// fine-grained detection to determine which skins need extraction. If the
50/// number of changed entities is over this threshold, we skip change detection
51/// and simply re-extract the transforms of all joints.
52const JOINT_EXTRACTION_THRESHOLD_FACTOR: f64 = 0.25;
53
54/// The location of the first joint matrix in the skin uniform buffer.
55#[derive(Clone, Copy)]
56pub struct SkinByteOffset {
57    /// The byte offset of the first joint matrix.
58    pub byte_offset: u32,
59}
60
61impl SkinByteOffset {
62    /// Index to be in address space based on the size of a skin uniform.
63    const fn from_index(index: usize) -> Self {
64        SkinByteOffset {
65            byte_offset: (index * size_of::<Mat4>()) as u32,
66        }
67    }
68
69    /// Returns this skin index in elements (not bytes).
70    ///
71    /// Each element is a 4x4 matrix.
72    pub fn index(&self) -> u32 {
73        self.byte_offset / size_of::<Mat4>() as u32
74    }
75}
76
77/// The GPU buffers containing joint matrices for all skinned meshes.
78///
79/// This is double-buffered: we store the joint matrices of each mesh for the
80/// previous frame in addition to those of each mesh for the current frame. This
81/// is for motion vector calculation. Every frame, we swap buffers and overwrite
82/// the joint matrix buffer from two frames ago with the data for the current
83/// frame.
84///
85/// Notes on implementation: see comment on top of the `extract_skins` system.
86#[derive(Resource)]
87pub struct SkinUniforms {
88    /// The CPU-side buffer that stores the joint matrices for skinned meshes in
89    /// the current frame.
90    pub current_staging_buffer: Vec<Mat4>,
91    /// The GPU-side buffer that stores the joint matrices for skinned meshes in
92    /// the current frame.
93    pub current_buffer: Buffer,
94    /// The GPU-side buffer that stores the joint matrices for skinned meshes in
95    /// the previous frame.
96    pub prev_buffer: Buffer,
97    /// The offset allocator that manages the placement of the joints within the
98    /// [`Self::current_buffer`].
99    allocator: Allocator,
100    /// Allocation information that we keep about each skin.
101    skin_uniform_info: MainEntityHashMap<SkinUniformInfo>,
102    /// Maps each joint entity to the skins it's associated with.
103    ///
104    /// We use this in conjunction with change detection to only update the
105    /// skins that need updating each frame.
106    ///
107    /// Note that conceptually this is a hash map of sets, but we use a
108    /// [`SmallVec`] to avoid allocations for the vast majority of the cases in
109    /// which each bone belongs to exactly one skin.
110    joint_to_skins: MainEntityHashMap<SmallVec<[MainEntity; 1]>>,
111    /// The total number of joints in the scene.
112    ///
113    /// We use this as part of our heuristic to decide whether to use
114    /// fine-grained change detection.
115    total_joints: usize,
116}
117
118impl FromWorld for SkinUniforms {
119    fn from_world(world: &mut World) -> Self {
120        let device = world.resource::<RenderDevice>();
121        let buffer_usages = (if skins_use_uniform_buffers(device) {
122            BufferUsages::UNIFORM
123        } else {
124            BufferUsages::STORAGE
125        }) | BufferUsages::COPY_DST;
126
127        // Create the current and previous buffer with the minimum sizes.
128        //
129        // These will be swapped every frame.
130        let current_buffer = device.create_buffer(&BufferDescriptor {
131            label: Some("skin uniform buffer"),
132            size: MAX_JOINTS as u64 * size_of::<Mat4>() as u64,
133            usage: buffer_usages,
134            mapped_at_creation: false,
135        });
136        let prev_buffer = device.create_buffer(&BufferDescriptor {
137            label: Some("skin uniform buffer"),
138            size: MAX_JOINTS as u64 * size_of::<Mat4>() as u64,
139            usage: buffer_usages,
140            mapped_at_creation: false,
141        });
142
143        Self {
144            current_staging_buffer: vec![],
145            current_buffer,
146            prev_buffer,
147            allocator: Allocator::new(MAX_TOTAL_JOINTS),
148            skin_uniform_info: MainEntityHashMap::default(),
149            joint_to_skins: MainEntityHashMap::default(),
150            total_joints: 0,
151        }
152    }
153}
154
155impl SkinUniforms {
156    /// Returns the current offset in joints of the skin in the buffer.
157    pub fn skin_index(&self, skin: MainEntity) -> Option<u32> {
158        self.skin_uniform_info
159            .get(&skin)
160            .map(SkinUniformInfo::offset)
161    }
162
163    /// Returns the current offset in bytes of the skin in the buffer.
164    pub fn skin_byte_offset(&self, skin: MainEntity) -> Option<SkinByteOffset> {
165        self.skin_uniform_info.get(&skin).map(|skin_uniform_info| {
166            SkinByteOffset::from_index(skin_uniform_info.offset() as usize)
167        })
168    }
169
170    /// Returns an iterator over all skins in the scene.
171    pub fn all_skins(&self) -> impl Iterator<Item = &MainEntity> {
172        self.skin_uniform_info.keys()
173    }
174}
175
176/// Allocation information about each skin.
177struct SkinUniformInfo {
178    /// The allocation of the joints within the [`SkinUniforms::current_buffer`].
179    allocation: Allocation,
180    /// The entities that comprise the joints.
181    joints: Vec<MainEntity>,
182}
183
184impl SkinUniformInfo {
185    /// The offset in joints within the [`SkinUniforms::current_staging_buffer`].
186    fn offset(&self) -> u32 {
187        self.allocation.offset * JOINTS_PER_ALLOCATION_UNIT
188    }
189}
190
191/// Returns true if skinning must use uniforms (and dynamic offsets) because
192/// storage buffers aren't supported on the current platform.
193pub fn skins_use_uniform_buffers(render_device: &RenderDevice) -> bool {
194    static SKINS_USE_UNIFORM_BUFFERS: OnceLock<bool> = OnceLock::new();
195    *SKINS_USE_UNIFORM_BUFFERS
196        .get_or_init(|| render_device.limits().max_storage_buffers_per_shader_stage == 0)
197}
198
199/// Uploads the buffers containing the joints to the GPU.
200pub fn prepare_skins(
201    render_device: Res<RenderDevice>,
202    render_queue: Res<RenderQueue>,
203    uniform: ResMut<SkinUniforms>,
204) {
205    let uniform = uniform.into_inner();
206
207    if uniform.current_staging_buffer.is_empty() {
208        return;
209    }
210
211    // Swap current and previous buffers.
212    mem::swap(&mut uniform.current_buffer, &mut uniform.prev_buffer);
213
214    // Resize the buffers if necessary. Include extra space equal to `MAX_JOINTS`
215    // because we need to be able to bind a full uniform buffer's worth of data
216    // if skins use uniform buffers on this platform.
217    let needed_size = (uniform.current_staging_buffer.len() as u64 + MAX_JOINTS as u64)
218        * size_of::<Mat4>() as u64;
219    if uniform.current_buffer.size() < needed_size {
220        let mut new_size = uniform.current_buffer.size();
221        while new_size < needed_size {
222            // 1.5× growth factor.
223            new_size = (new_size + new_size / 2).next_multiple_of(4);
224        }
225
226        // Create the new buffers.
227        let buffer_usages = if skins_use_uniform_buffers(&render_device) {
228            BufferUsages::UNIFORM
229        } else {
230            BufferUsages::STORAGE
231        } | BufferUsages::COPY_DST;
232        uniform.current_buffer = render_device.create_buffer(&BufferDescriptor {
233            label: Some("skin uniform buffer"),
234            usage: buffer_usages,
235            size: new_size,
236            mapped_at_creation: false,
237        });
238        uniform.prev_buffer = render_device.create_buffer(&BufferDescriptor {
239            label: Some("skin uniform buffer"),
240            usage: buffer_usages,
241            size: new_size,
242            mapped_at_creation: false,
243        });
244
245        // We've created a new `prev_buffer` but we don't have the previous joint
246        // data needed to fill it out correctly. Use the current joint data
247        // instead.
248        //
249        // TODO: This is a bug - will cause motion blur to ignore joint movement
250        // for one frame.
251        render_queue.write_buffer(
252            &uniform.prev_buffer,
253            0,
254            bytemuck::must_cast_slice(&uniform.current_staging_buffer[..]),
255        );
256    }
257
258    // Write the data from `uniform.current_staging_buffer` into
259    // `uniform.current_buffer`.
260    render_queue.write_buffer(
261        &uniform.current_buffer,
262        0,
263        bytemuck::must_cast_slice(&uniform.current_staging_buffer[..]),
264    );
265
266    // We don't need to write `uniform.prev_buffer` because we already wrote it
267    // last frame, and the data should still be on the GPU.
268}
269
270// Notes on implementation:
271// We define the uniform binding as an array<mat4x4<f32>, N> in the shader,
272// where N is the maximum number of Mat4s we can fit in the uniform binding,
273// which may be as little as 16kB or 64kB. But, we may not need all N.
274// We may only need, for example, 10.
275//
276// If we used uniform buffers ‘normally’ then we would have to write a full
277// binding of data for each dynamic offset binding, which is wasteful, makes
278// the buffer much larger than it needs to be, and uses more memory bandwidth
279// to transfer the data, which then costs frame time So @superdump came up
280// with this design: just bind data at the specified offset and interpret
281// the data at that offset as an array<T, N> regardless of what is there.
282//
283// So instead of writing N Mat4s when you only need 10, you write 10, and
284// then pad up to the next dynamic offset alignment. Then write the next.
285// And for the last dynamic offset binding, make sure there is a full binding
286// of data after it so that the buffer is of size
287// `last dynamic offset` + `array<mat4x4<f32>>`.
288//
289// Then when binding the first dynamic offset, the first 10 entries in the array
290// are what you expect, but if you read the 11th you’re reading ‘invalid’ data
291// which could be padding or could be from the next binding.
292//
293// In this way, we can pack ‘variable sized arrays’ into uniform buffer bindings
294// which normally only support fixed size arrays. You just have to make sure
295// in the shader that you only read the values that are valid for that binding.
296pub fn extract_skins(
297    skin_uniforms: ResMut<SkinUniforms>,
298    skinned_meshes: Extract<Query<(Entity, &SkinnedMesh)>>,
299    changed_skinned_meshes: Extract<
300        Query<
301            (Entity, &ViewVisibility, &SkinnedMesh),
302            Or<(
303                Changed<ViewVisibility>,
304                Changed<SkinnedMesh>,
305                AssetChanged<SkinnedMesh>,
306            )>,
307        >,
308    >,
309    skinned_mesh_inverse_bindposes: Extract<Res<Assets<SkinnedMeshInverseBindposes>>>,
310    changed_transforms: Extract<Query<(Entity, &GlobalTransform), Changed<GlobalTransform>>>,
311    joints: Extract<Query<&GlobalTransform>>,
312    mut removed_skinned_meshes_query: Extract<RemovedComponents<SkinnedMesh>>,
313) {
314    let skin_uniforms = skin_uniforms.into_inner();
315
316    // Find skins that have become visible or invisible on this frame. Allocate,
317    // reallocate, or free space for them as necessary.
318    add_or_delete_skins(
319        skin_uniforms,
320        &changed_skinned_meshes,
321        &skinned_mesh_inverse_bindposes,
322        &joints,
323    );
324
325    // Extract the transforms for all joints from the scene, and write them into
326    // the staging buffer at the appropriate spot.
327    extract_joints(
328        skin_uniforms,
329        &skinned_meshes,
330        &changed_skinned_meshes,
331        &skinned_mesh_inverse_bindposes,
332        &changed_transforms,
333        &joints,
334    );
335
336    // Delete skins that became invisible.
337    for skinned_mesh_entity in removed_skinned_meshes_query.read() {
338        // Only remove a skin if we didn't pick it up in `add_or_delete_skins`.
339        // It's possible that a necessary component was removed and re-added in
340        // the same frame.
341        if !changed_skinned_meshes.contains(skinned_mesh_entity) {
342            remove_skin(skin_uniforms, skinned_mesh_entity.into());
343        }
344    }
345}
346
347/// Searches for all skins that have become visible or invisible this frame and
348/// allocations for them as necessary.
349fn add_or_delete_skins(
350    skin_uniforms: &mut SkinUniforms,
351    changed_skinned_meshes: &Query<
352        (Entity, &ViewVisibility, &SkinnedMesh),
353        Or<(
354            Changed<ViewVisibility>,
355            Changed<SkinnedMesh>,
356            AssetChanged<SkinnedMesh>,
357        )>,
358    >,
359    skinned_mesh_inverse_bindposes: &Assets<SkinnedMeshInverseBindposes>,
360    joints: &Query<&GlobalTransform>,
361) {
362    // Find every skinned mesh that changed one of (1) visibility; (2) joint
363    // entities (part of `SkinnedMesh`); (3) the associated
364    // `SkinnedMeshInverseBindposes` asset.
365    for (skinned_mesh_entity, skinned_mesh_view_visibility, skinned_mesh) in changed_skinned_meshes
366    {
367        // Remove the skin if it existed last frame.
368        let skinned_mesh_entity = MainEntity::from(skinned_mesh_entity);
369        remove_skin(skin_uniforms, skinned_mesh_entity);
370
371        // If the skin is invisible, we're done.
372        if !(*skinned_mesh_view_visibility).get() {
373            continue;
374        }
375
376        // Initialize the skin.
377        add_skin(
378            skinned_mesh_entity,
379            skinned_mesh,
380            skin_uniforms,
381            skinned_mesh_inverse_bindposes,
382            joints,
383        );
384    }
385}
386
387/// Extracts the global transforms of all joints and updates the staging buffer
388/// as necessary.
389fn extract_joints(
390    skin_uniforms: &mut SkinUniforms,
391    skinned_meshes: &Query<(Entity, &SkinnedMesh)>,
392    changed_skinned_meshes: &Query<
393        (Entity, &ViewVisibility, &SkinnedMesh),
394        Or<(
395            Changed<ViewVisibility>,
396            Changed<SkinnedMesh>,
397            AssetChanged<SkinnedMesh>,
398        )>,
399    >,
400    skinned_mesh_inverse_bindposes: &Assets<SkinnedMeshInverseBindposes>,
401    changed_transforms: &Query<(Entity, &GlobalTransform), Changed<GlobalTransform>>,
402    joints: &Query<&GlobalTransform>,
403) {
404    // If the number of entities that changed transforms exceeds a certain
405    // fraction (currently 25%) of the total joints in the scene, then skip
406    // fine-grained change detection.
407    //
408    // Note that this is a crude heuristic, for performance reasons. It doesn't
409    // consider the ratio of modified *joints* to total joints, only the ratio
410    // of modified *entities* to total joints. Thus in the worst case we might
411    // end up re-extracting all skins even though none of the joints changed.
412    // But making the heuristic finer-grained would make it slower to evaluate,
413    // and we don't want to lose performance.
414    let threshold =
415        (skin_uniforms.total_joints as f64 * JOINT_EXTRACTION_THRESHOLD_FACTOR).floor() as usize;
416
417    if changed_transforms.iter().nth(threshold).is_some() {
418        // Go ahead and re-extract all skins in the scene.
419        for (skin_entity, skin) in skinned_meshes {
420            extract_joints_for_skin(
421                skin_entity.into(),
422                skin,
423                skin_uniforms,
424                changed_skinned_meshes,
425                skinned_mesh_inverse_bindposes,
426                joints,
427            );
428        }
429        return;
430    }
431
432    // Use fine-grained change detection to figure out only the skins that need
433    // to have their joints re-extracted.
434    let dirty_skins: MainEntityHashSet = changed_transforms
435        .iter()
436        .flat_map(|(joint, _)| skin_uniforms.joint_to_skins.get(&MainEntity::from(joint)))
437        .flat_map(|skin_joint_mappings| skin_joint_mappings.iter())
438        .copied()
439        .collect();
440
441    // Re-extract the joints for only those skins.
442    for skin_entity in dirty_skins {
443        let Ok((_, skin)) = skinned_meshes.get(*skin_entity) else {
444            continue;
445        };
446        extract_joints_for_skin(
447            skin_entity,
448            skin,
449            skin_uniforms,
450            changed_skinned_meshes,
451            skinned_mesh_inverse_bindposes,
452            joints,
453        );
454    }
455}
456
457/// Extracts all joints for a single skin and writes their transforms into the
458/// CPU staging buffer.
459fn extract_joints_for_skin(
460    skin_entity: MainEntity,
461    skin: &SkinnedMesh,
462    skin_uniforms: &mut SkinUniforms,
463    changed_skinned_meshes: &Query<
464        (Entity, &ViewVisibility, &SkinnedMesh),
465        Or<(
466            Changed<ViewVisibility>,
467            Changed<SkinnedMesh>,
468            AssetChanged<SkinnedMesh>,
469        )>,
470    >,
471    skinned_mesh_inverse_bindposes: &Assets<SkinnedMeshInverseBindposes>,
472    joints: &Query<&GlobalTransform>,
473) {
474    // If we initialized the skin this frame, we already populated all
475    // the joints, so there's no need to populate them again.
476    if changed_skinned_meshes.contains(*skin_entity) {
477        return;
478    }
479
480    // Fetch information about the skin.
481    let Some(skin_uniform_info) = skin_uniforms.skin_uniform_info.get(&skin_entity) else {
482        return;
483    };
484    let Some(skinned_mesh_inverse_bindposes) =
485        skinned_mesh_inverse_bindposes.get(&skin.inverse_bindposes)
486    else {
487        return;
488    };
489
490    // Calculate and write in the new joint matrices.
491    for (joint_index, (&joint, skinned_mesh_inverse_bindpose)) in skin
492        .joints
493        .iter()
494        .zip(skinned_mesh_inverse_bindposes.iter())
495        .enumerate()
496    {
497        let Ok(joint_transform) = joints.get(joint) else {
498            continue;
499        };
500
501        let joint_matrix = joint_transform.affine() * *skinned_mesh_inverse_bindpose;
502        skin_uniforms.current_staging_buffer[skin_uniform_info.offset() as usize + joint_index] =
503            joint_matrix;
504    }
505}
506
507/// Allocates space for a new skin in the buffers, and populates its joints.
508fn add_skin(
509    skinned_mesh_entity: MainEntity,
510    skinned_mesh: &SkinnedMesh,
511    skin_uniforms: &mut SkinUniforms,
512    skinned_mesh_inverse_bindposes: &Assets<SkinnedMeshInverseBindposes>,
513    joints: &Query<&GlobalTransform>,
514) {
515    // Allocate space for the joints.
516    let Some(allocation) = skin_uniforms.allocator.allocate(
517        skinned_mesh
518            .joints
519            .len()
520            .div_ceil(JOINTS_PER_ALLOCATION_UNIT as usize) as u32,
521    ) else {
522        error!(
523            "Out of space for skin: {:?}. Tried to allocate space for {:?} joints.",
524            skinned_mesh_entity,
525            skinned_mesh.joints.len()
526        );
527        return;
528    };
529
530    // Store that allocation.
531    let skin_uniform_info = SkinUniformInfo {
532        allocation,
533        joints: skinned_mesh
534            .joints
535            .iter()
536            .map(|entity| MainEntity::from(*entity))
537            .collect(),
538    };
539
540    let skinned_mesh_inverse_bindposes =
541        skinned_mesh_inverse_bindposes.get(&skinned_mesh.inverse_bindposes);
542
543    for (joint_index, &joint) in skinned_mesh.joints.iter().enumerate() {
544        // Calculate the initial joint matrix.
545        let skinned_mesh_inverse_bindpose =
546            skinned_mesh_inverse_bindposes.and_then(|skinned_mesh_inverse_bindposes| {
547                skinned_mesh_inverse_bindposes.get(joint_index)
548            });
549        let joint_matrix = match (skinned_mesh_inverse_bindpose, joints.get(joint)) {
550            (Some(skinned_mesh_inverse_bindpose), Ok(transform)) => {
551                transform.affine() * *skinned_mesh_inverse_bindpose
552            }
553            _ => Mat4::IDENTITY,
554        };
555
556        // Write in the new joint matrix, growing the staging buffer if
557        // necessary.
558        let buffer_index = skin_uniform_info.offset() as usize + joint_index;
559        if skin_uniforms.current_staging_buffer.len() < buffer_index + 1 {
560            skin_uniforms
561                .current_staging_buffer
562                .resize(buffer_index + 1, Mat4::IDENTITY);
563        }
564        skin_uniforms.current_staging_buffer[buffer_index] = joint_matrix;
565
566        // Record the inverse mapping from the joint back to the skin. We use
567        // this in order to perform fine-grained joint extraction.
568        skin_uniforms
569            .joint_to_skins
570            .entry(MainEntity::from(joint))
571            .or_default()
572            .push(skinned_mesh_entity);
573    }
574
575    // Record the number of joints.
576    skin_uniforms.total_joints += skinned_mesh.joints.len();
577
578    skin_uniforms
579        .skin_uniform_info
580        .insert(skinned_mesh_entity, skin_uniform_info);
581}
582
583/// Deallocates a skin and removes it from the [`SkinUniforms`].
584fn remove_skin(skin_uniforms: &mut SkinUniforms, skinned_mesh_entity: MainEntity) {
585    let Some(old_skin_uniform_info) = skin_uniforms.skin_uniform_info.remove(&skinned_mesh_entity)
586    else {
587        return;
588    };
589
590    // Free the allocation.
591    skin_uniforms
592        .allocator
593        .free(old_skin_uniform_info.allocation);
594
595    // Remove the inverse mapping from each joint back to the skin.
596    for &joint in &old_skin_uniform_info.joints {
597        if let Entry::Occupied(mut entry) = skin_uniforms.joint_to_skins.entry(joint) {
598            entry.get_mut().retain(|skin| *skin != skinned_mesh_entity);
599            if entry.get_mut().is_empty() {
600                entry.remove();
601            }
602        }
603    }
604
605    // Update the total number of joints.
606    skin_uniforms.total_joints -= old_skin_uniform_info.joints.len();
607}
608
609// NOTE: The skinned joints uniform buffer has to be bound at a dynamic offset per
610// entity and so cannot currently be batched on WebGL 2.
611pub fn no_automatic_skin_batching(
612    mut commands: Commands,
613    query: Query<Entity, (With<SkinnedMesh>, Without<NoAutomaticBatching>)>,
614    render_device: Res<RenderDevice>,
615) {
616    if !skins_use_uniform_buffers(&render_device) {
617        return;
618    }
619
620    for entity in &query {
621        commands.entity(entity).try_insert(NoAutomaticBatching);
622    }
623}