wgpu_core/device/
queue.rs

1use alloc::{boxed::Box, string::ToString, sync::Arc, vec, vec::Vec};
2use core::{
3    iter,
4    mem::{self, ManuallyDrop},
5    num::NonZeroU64,
6    ptr::NonNull,
7    sync::atomic::Ordering,
8};
9use smallvec::SmallVec;
10use thiserror::Error;
11use wgt::{
12    error::{ErrorType, WebGpuError},
13    AccelerationStructureFlags,
14};
15
16use super::{life::LifetimeTracker, Device};
17#[cfg(feature = "trace")]
18use crate::device::trace::Action;
19use crate::{
20    api_log,
21    command::{
22        extract_texture_selector, validate_linear_texture_data, validate_texture_copy_range,
23        ClearError, CommandAllocator, CommandBuffer, CommandEncoderError, CopySide,
24        TexelCopyTextureInfo, TransferError,
25    },
26    conv,
27    device::{DeviceError, WaitIdleError},
28    get_lowest_common_denom,
29    global::Global,
30    id::{self, BlasId, QueueId},
31    init_tracker::{has_copy_partial_init_tracker_coverage, TextureInitRange},
32    lock::{rank, Mutex, MutexGuard, RwLock, RwLockWriteGuard},
33    ray_tracing::{BlasCompactReadyPendingClosure, CompactBlasError},
34    resource::{
35        Blas, BlasCompactState, Buffer, BufferAccessError, BufferMapState, DestroyedBuffer,
36        DestroyedResourceError, DestroyedTexture, Fallible, FlushedStagingBuffer,
37        InvalidResourceError, Labeled, ParentDevice, ResourceErrorIdent, StagingBuffer, Texture,
38        TextureInner, Trackable, TrackingData,
39    },
40    resource_log,
41    scratch::ScratchBuffer,
42    snatch::{SnatchGuard, Snatchable},
43    track::{self, Tracker, TrackerIndex},
44    FastHashMap, SubmissionIndex,
45};
46use crate::{device::resource::CommandIndices, resource::RawResourceAccess};
47
48pub struct Queue {
49    raw: Box<dyn hal::DynQueue>,
50    pub(crate) pending_writes: Mutex<PendingWrites>,
51    life_tracker: Mutex<LifetimeTracker>,
52    // The device needs to be dropped last (`Device.zero_buffer` might be referenced by the encoder in pending writes).
53    pub(crate) device: Arc<Device>,
54}
55
56impl Queue {
57    pub(crate) fn new(
58        device: Arc<Device>,
59        raw: Box<dyn hal::DynQueue>,
60    ) -> Result<Self, DeviceError> {
61        let pending_encoder = device
62            .command_allocator
63            .acquire_encoder(device.raw(), raw.as_ref())
64            .map_err(DeviceError::from_hal);
65
66        let pending_encoder = match pending_encoder {
67            Ok(pending_encoder) => pending_encoder,
68            Err(e) => {
69                return Err(e);
70            }
71        };
72
73        let mut pending_writes = PendingWrites::new(pending_encoder);
74
75        let zero_buffer = device.zero_buffer.as_ref();
76        pending_writes.activate();
77        unsafe {
78            pending_writes
79                .command_encoder
80                .transition_buffers(&[hal::BufferBarrier {
81                    buffer: zero_buffer,
82                    usage: hal::StateTransition {
83                        from: wgt::BufferUses::empty(),
84                        to: wgt::BufferUses::COPY_DST,
85                    },
86                }]);
87            pending_writes
88                .command_encoder
89                .clear_buffer(zero_buffer, 0..super::ZERO_BUFFER_SIZE);
90            pending_writes
91                .command_encoder
92                .transition_buffers(&[hal::BufferBarrier {
93                    buffer: zero_buffer,
94                    usage: hal::StateTransition {
95                        from: wgt::BufferUses::COPY_DST,
96                        to: wgt::BufferUses::COPY_SRC,
97                    },
98                }]);
99        }
100
101        Ok(Queue {
102            raw,
103            device,
104            pending_writes: Mutex::new(rank::QUEUE_PENDING_WRITES, pending_writes),
105            life_tracker: Mutex::new(rank::QUEUE_LIFE_TRACKER, LifetimeTracker::new()),
106        })
107    }
108
109    pub(crate) fn raw(&self) -> &dyn hal::DynQueue {
110        self.raw.as_ref()
111    }
112
113    #[track_caller]
114    pub(crate) fn lock_life<'a>(&'a self) -> MutexGuard<'a, LifetimeTracker> {
115        self.life_tracker.lock()
116    }
117
118    pub(crate) fn maintain(
119        &self,
120        submission_index: u64,
121        snatch_guard: &SnatchGuard,
122    ) -> (
123        SmallVec<[SubmittedWorkDoneClosure; 1]>,
124        Vec<super::BufferMapPendingClosure>,
125        Vec<BlasCompactReadyPendingClosure>,
126        bool,
127    ) {
128        let mut life_tracker = self.lock_life();
129        let submission_closures = life_tracker.triage_submissions(submission_index);
130
131        let mapping_closures = life_tracker.handle_mapping(snatch_guard);
132        let blas_closures = life_tracker.handle_compact_read_back();
133
134        let queue_empty = life_tracker.queue_empty();
135
136        (
137            submission_closures,
138            mapping_closures,
139            blas_closures,
140            queue_empty,
141        )
142    }
143}
144
145crate::impl_resource_type!(Queue);
146// TODO: https://github.com/gfx-rs/wgpu/issues/4014
147impl Labeled for Queue {
148    fn label(&self) -> &str {
149        ""
150    }
151}
152crate::impl_parent_device!(Queue);
153crate::impl_storage_item!(Queue);
154
155impl Drop for Queue {
156    fn drop(&mut self) {
157        resource_log!("Drop {}", self.error_ident());
158
159        let last_successful_submission_index = self
160            .device
161            .last_successful_submission_index
162            .load(Ordering::Acquire);
163
164        let fence = self.device.fence.read();
165
166        // Try waiting on the last submission using the following sequence of timeouts
167        let timeouts_in_ms = [100, 200, 400, 800, 1600, 3200];
168
169        for (i, timeout_ms) in timeouts_in_ms.into_iter().enumerate() {
170            let is_last_iter = i == timeouts_in_ms.len() - 1;
171
172            api_log!(
173                "Waiting on last submission. try: {}/{}. timeout: {}ms",
174                i + 1,
175                timeouts_in_ms.len(),
176                timeout_ms
177            );
178
179            let wait_res = unsafe {
180                self.device.raw().wait(
181                    fence.as_ref(),
182                    last_successful_submission_index,
183                    #[cfg(not(target_arch = "wasm32"))]
184                    timeout_ms,
185                    #[cfg(target_arch = "wasm32")]
186                    0, // WebKit and Chromium don't support a non-0 timeout
187                )
188            };
189            // Note: If we don't panic below we are in UB land (destroying resources while they are still in use by the GPU).
190            match wait_res {
191                Ok(true) => break,
192                Ok(false) => {
193                    // It's fine that we timed out on WebGL; GL objects can be deleted early as they
194                    // will be kept around by the driver if GPU work hasn't finished.
195                    // Moreover, the way we emulate read mappings on WebGL allows us to execute map_buffer earlier than on other
196                    // backends since getBufferSubData is synchronous with respect to the other previously enqueued GL commands.
197                    // Relying on this behavior breaks the clean abstraction wgpu-hal tries to maintain and
198                    // we should find ways to improve this. See https://github.com/gfx-rs/wgpu/issues/6538.
199                    #[cfg(target_arch = "wasm32")]
200                    {
201                        break;
202                    }
203                    #[cfg(not(target_arch = "wasm32"))]
204                    {
205                        if is_last_iter {
206                            panic!(
207                                "We timed out while waiting on the last successful submission to complete!"
208                            );
209                        }
210                    }
211                }
212                Err(e) => match e {
213                    hal::DeviceError::OutOfMemory => {
214                        if is_last_iter {
215                            panic!(
216                                "We ran into an OOM error while waiting on the last successful submission to complete!"
217                            );
218                        }
219                    }
220                    hal::DeviceError::Lost => {
221                        self.device.handle_hal_error(e); // will lose the device
222                        break;
223                    }
224                    hal::DeviceError::Unexpected => {
225                        panic!(
226                            "We ran into an unexpected error while waiting on the last successful submission to complete!"
227                        );
228                    }
229                },
230            }
231        }
232        drop(fence);
233
234        let snatch_guard = self.device.snatchable_lock.read();
235        let (submission_closures, mapping_closures, blas_compact_ready_closures, queue_empty) =
236            self.maintain(last_successful_submission_index, &snatch_guard);
237        drop(snatch_guard);
238
239        assert!(queue_empty);
240
241        let closures = crate::device::UserClosures {
242            mappings: mapping_closures,
243            blas_compact_ready: blas_compact_ready_closures,
244            submissions: submission_closures,
245            device_lost_invocations: SmallVec::new(),
246        };
247
248        closures.fire();
249    }
250}
251
252#[cfg(send_sync)]
253pub type SubmittedWorkDoneClosure = Box<dyn FnOnce() + Send + 'static>;
254#[cfg(not(send_sync))]
255pub type SubmittedWorkDoneClosure = Box<dyn FnOnce() + 'static>;
256
257/// A texture or buffer to be freed soon.
258///
259/// This is just a tagged raw texture or buffer, generally about to be added to
260/// some other more specific container like:
261///
262/// - `PendingWrites::temp_resources`: resources used by queue writes and
263///   unmaps, waiting to be folded in with the next queue submission
264///
265/// - `ActiveSubmission::temp_resources`: temporary resources used by a queue
266///   submission, to be freed when it completes
267#[derive(Debug)]
268pub enum TempResource {
269    StagingBuffer(FlushedStagingBuffer),
270    ScratchBuffer(ScratchBuffer),
271    DestroyedBuffer(DestroyedBuffer),
272    DestroyedTexture(DestroyedTexture),
273}
274
275/// A series of raw [`CommandBuffer`]s that have been submitted to a
276/// queue, and the [`wgpu_hal::CommandEncoder`] that built them.
277///
278/// [`CommandBuffer`]: hal::Api::CommandBuffer
279/// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder
280pub(crate) struct EncoderInFlight {
281    inner: crate::command::CommandEncoder,
282    pub(crate) trackers: Tracker,
283    pub(crate) temp_resources: Vec<TempResource>,
284    /// We only need to keep these resources alive.
285    _indirect_draw_validation_resources: crate::indirect_validation::DrawResources,
286
287    /// These are the buffers that have been tracked by `PendingWrites`.
288    pub(crate) pending_buffers: FastHashMap<TrackerIndex, Arc<Buffer>>,
289    /// These are the textures that have been tracked by `PendingWrites`.
290    pub(crate) pending_textures: FastHashMap<TrackerIndex, Arc<Texture>>,
291    /// These are the BLASes that have been tracked by `PendingWrites`.
292    pub(crate) pending_blas_s: FastHashMap<TrackerIndex, Arc<Blas>>,
293}
294
295/// A private command encoder for writes made directly on the device
296/// or queue.
297///
298/// Operations like `buffer_unmap`, `queue_write_buffer`, and
299/// `queue_write_texture` need to copy data to the GPU. At the hal
300/// level, this must be done by encoding and submitting commands, but
301/// these operations are not associated with any specific wgpu command
302/// buffer.
303///
304/// Instead, `Device::pending_writes` owns one of these values, which
305/// has its own hal command encoder and resource lists. The commands
306/// accumulated here are automatically submitted to the queue the next
307/// time the user submits a wgpu command buffer, ahead of the user's
308/// commands.
309///
310/// Important:
311/// When locking pending_writes be sure that tracker is not locked
312/// and try to lock trackers for the minimum timespan possible
313///
314/// All uses of [`StagingBuffer`]s end up here.
315#[derive(Debug)]
316pub(crate) struct PendingWrites {
317    // The command encoder needs to be destroyed before any other resource in pending writes.
318    pub command_encoder: Box<dyn hal::DynCommandEncoder>,
319
320    /// True if `command_encoder` is in the "recording" state, as
321    /// described in the docs for the [`wgpu_hal::CommandEncoder`]
322    /// trait.
323    ///
324    /// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder
325    pub is_recording: bool,
326
327    temp_resources: Vec<TempResource>,
328    dst_buffers: FastHashMap<TrackerIndex, Arc<Buffer>>,
329    dst_textures: FastHashMap<TrackerIndex, Arc<Texture>>,
330    copied_blas_s: FastHashMap<TrackerIndex, Arc<Blas>>,
331}
332
333impl PendingWrites {
334    pub fn new(command_encoder: Box<dyn hal::DynCommandEncoder>) -> Self {
335        Self {
336            command_encoder,
337            is_recording: false,
338            temp_resources: Vec::new(),
339            dst_buffers: FastHashMap::default(),
340            dst_textures: FastHashMap::default(),
341            copied_blas_s: FastHashMap::default(),
342        }
343    }
344
345    pub fn insert_buffer(&mut self, buffer: &Arc<Buffer>) {
346        self.dst_buffers
347            .insert(buffer.tracker_index(), buffer.clone());
348    }
349
350    pub fn insert_texture(&mut self, texture: &Arc<Texture>) {
351        self.dst_textures
352            .insert(texture.tracker_index(), texture.clone());
353    }
354
355    pub fn insert_blas(&mut self, blas: &Arc<Blas>) {
356        self.copied_blas_s
357            .insert(blas.tracker_index(), blas.clone());
358    }
359
360    pub fn contains_buffer(&self, buffer: &Arc<Buffer>) -> bool {
361        self.dst_buffers.contains_key(&buffer.tracker_index())
362    }
363
364    pub fn contains_texture(&self, texture: &Arc<Texture>) -> bool {
365        self.dst_textures.contains_key(&texture.tracker_index())
366    }
367
368    pub fn consume_temp(&mut self, resource: TempResource) {
369        self.temp_resources.push(resource);
370    }
371
372    pub fn consume(&mut self, buffer: FlushedStagingBuffer) {
373        self.temp_resources
374            .push(TempResource::StagingBuffer(buffer));
375    }
376
377    fn pre_submit(
378        &mut self,
379        command_allocator: &CommandAllocator,
380        device: &Arc<Device>,
381        queue: &Queue,
382    ) -> Result<Option<EncoderInFlight>, DeviceError> {
383        if self.is_recording {
384            let pending_buffers = mem::take(&mut self.dst_buffers);
385            let pending_textures = mem::take(&mut self.dst_textures);
386            let pending_blas_s = mem::take(&mut self.copied_blas_s);
387
388            let cmd_buf = unsafe { self.command_encoder.end_encoding() }
389                .map_err(|e| device.handle_hal_error(e))?;
390            self.is_recording = false;
391
392            let new_encoder = command_allocator
393                .acquire_encoder(device.raw(), queue.raw())
394                .map_err(|e| device.handle_hal_error(e))?;
395
396            let encoder = EncoderInFlight {
397                inner: crate::command::CommandEncoder {
398                    raw: ManuallyDrop::new(mem::replace(&mut self.command_encoder, new_encoder)),
399                    list: vec![cmd_buf],
400                    device: device.clone(),
401                    is_open: false,
402                    hal_label: None,
403                },
404                trackers: Tracker::new(),
405                temp_resources: mem::take(&mut self.temp_resources),
406                _indirect_draw_validation_resources: crate::indirect_validation::DrawResources::new(
407                    device.clone(),
408                ),
409                pending_buffers,
410                pending_textures,
411                pending_blas_s,
412            };
413            Ok(Some(encoder))
414        } else {
415            self.dst_buffers.clear();
416            self.dst_textures.clear();
417            self.copied_blas_s.clear();
418            Ok(None)
419        }
420    }
421
422    pub fn activate(&mut self) -> &mut dyn hal::DynCommandEncoder {
423        if !self.is_recording {
424            unsafe {
425                self.command_encoder
426                    .begin_encoding(Some("(wgpu internal) PendingWrites"))
427                    .unwrap();
428            }
429            self.is_recording = true;
430        }
431        self.command_encoder.as_mut()
432    }
433}
434
435impl Drop for PendingWrites {
436    fn drop(&mut self) {
437        unsafe {
438            if self.is_recording {
439                self.command_encoder.discard_encoding();
440            }
441        }
442    }
443}
444
445#[derive(Clone, Debug, Error)]
446#[non_exhaustive]
447pub enum QueueWriteError {
448    #[error(transparent)]
449    Queue(#[from] DeviceError),
450    #[error(transparent)]
451    Transfer(#[from] TransferError),
452    #[error(transparent)]
453    MemoryInitFailure(#[from] ClearError),
454    #[error(transparent)]
455    DestroyedResource(#[from] DestroyedResourceError),
456    #[error(transparent)]
457    InvalidResource(#[from] InvalidResourceError),
458}
459
460impl WebGpuError for QueueWriteError {
461    fn webgpu_error_type(&self) -> ErrorType {
462        let e: &dyn WebGpuError = match self {
463            Self::Queue(e) => e,
464            Self::Transfer(e) => e,
465            Self::MemoryInitFailure(e) => e,
466            Self::DestroyedResource(e) => e,
467            Self::InvalidResource(e) => e,
468        };
469        e.webgpu_error_type()
470    }
471}
472
473#[derive(Clone, Debug, Error)]
474#[non_exhaustive]
475pub enum QueueSubmitError {
476    #[error(transparent)]
477    Queue(#[from] DeviceError),
478    #[error(transparent)]
479    DestroyedResource(#[from] DestroyedResourceError),
480    #[error(transparent)]
481    Unmap(#[from] BufferAccessError),
482    #[error("{0} is still mapped")]
483    BufferStillMapped(ResourceErrorIdent),
484    #[error(transparent)]
485    InvalidResource(#[from] InvalidResourceError),
486    #[error(transparent)]
487    CommandEncoder(#[from] CommandEncoderError),
488    #[error(transparent)]
489    ValidateAsActionsError(#[from] crate::ray_tracing::ValidateAsActionsError),
490}
491
492impl WebGpuError for QueueSubmitError {
493    fn webgpu_error_type(&self) -> ErrorType {
494        let e: &dyn WebGpuError = match self {
495            Self::Queue(e) => e,
496            Self::Unmap(e) => e,
497            Self::CommandEncoder(e) => e,
498            Self::ValidateAsActionsError(e) => e,
499            Self::InvalidResource(e) => e,
500            Self::DestroyedResource(_) | Self::BufferStillMapped(_) => {
501                return ErrorType::Validation
502            }
503        };
504        e.webgpu_error_type()
505    }
506}
507
508//TODO: move out common parts of write_xxx.
509
510impl Queue {
511    pub fn write_buffer(
512        &self,
513        buffer: Fallible<Buffer>,
514        buffer_offset: wgt::BufferAddress,
515        data: &[u8],
516    ) -> Result<(), QueueWriteError> {
517        profiling::scope!("Queue::write_buffer");
518        api_log!("Queue::write_buffer");
519
520        self.device.check_is_valid()?;
521
522        let buffer = buffer.get()?;
523
524        let data_size = data.len() as wgt::BufferAddress;
525
526        self.same_device_as(buffer.as_ref())?;
527
528        let data_size = if let Some(data_size) = wgt::BufferSize::new(data_size) {
529            data_size
530        } else {
531            log::trace!("Ignoring write_buffer of size 0");
532            return Ok(());
533        };
534
535        let snatch_guard = self.device.snatchable_lock.read();
536
537        // Platform validation requires that the staging buffer always be
538        // freed, even if an error occurs. All paths from here must call
539        // `device.pending_writes.consume`.
540        let mut staging_buffer = StagingBuffer::new(&self.device, data_size)?;
541        let mut pending_writes = self.pending_writes.lock();
542
543        let staging_buffer = {
544            profiling::scope!("copy");
545            staging_buffer.write(data);
546            staging_buffer.flush()
547        };
548
549        let result = self.write_staging_buffer_impl(
550            &snatch_guard,
551            &mut pending_writes,
552            &staging_buffer,
553            buffer,
554            buffer_offset,
555        );
556
557        pending_writes.consume(staging_buffer);
558        result
559    }
560
561    pub fn create_staging_buffer(
562        &self,
563        buffer_size: wgt::BufferSize,
564    ) -> Result<(StagingBuffer, NonNull<u8>), QueueWriteError> {
565        profiling::scope!("Queue::create_staging_buffer");
566        resource_log!("Queue::create_staging_buffer");
567
568        self.device.check_is_valid()?;
569
570        let staging_buffer = StagingBuffer::new(&self.device, buffer_size)?;
571        let ptr = unsafe { staging_buffer.ptr() };
572
573        Ok((staging_buffer, ptr))
574    }
575
576    pub fn write_staging_buffer(
577        &self,
578        buffer: Fallible<Buffer>,
579        buffer_offset: wgt::BufferAddress,
580        staging_buffer: StagingBuffer,
581    ) -> Result<(), QueueWriteError> {
582        profiling::scope!("Queue::write_staging_buffer");
583
584        self.device.check_is_valid()?;
585
586        let buffer = buffer.get()?;
587
588        let snatch_guard = self.device.snatchable_lock.read();
589        let mut pending_writes = self.pending_writes.lock();
590
591        // At this point, we have taken ownership of the staging_buffer from the
592        // user. Platform validation requires that the staging buffer always
593        // be freed, even if an error occurs. All paths from here must call
594        // `device.pending_writes.consume`.
595        let staging_buffer = staging_buffer.flush();
596
597        let result = self.write_staging_buffer_impl(
598            &snatch_guard,
599            &mut pending_writes,
600            &staging_buffer,
601            buffer,
602            buffer_offset,
603        );
604
605        pending_writes.consume(staging_buffer);
606        result
607    }
608
609    pub fn validate_write_buffer(
610        &self,
611        buffer: Fallible<Buffer>,
612        buffer_offset: u64,
613        buffer_size: wgt::BufferSize,
614    ) -> Result<(), QueueWriteError> {
615        profiling::scope!("Queue::validate_write_buffer");
616
617        self.device.check_is_valid()?;
618
619        let buffer = buffer.get()?;
620
621        self.validate_write_buffer_impl(&buffer, buffer_offset, buffer_size)?;
622
623        Ok(())
624    }
625
626    fn validate_write_buffer_impl(
627        &self,
628        buffer: &Buffer,
629        buffer_offset: u64,
630        buffer_size: wgt::BufferSize,
631    ) -> Result<(), TransferError> {
632        buffer.check_usage(wgt::BufferUsages::COPY_DST)?;
633        if buffer_size.get() % wgt::COPY_BUFFER_ALIGNMENT != 0 {
634            return Err(TransferError::UnalignedCopySize(buffer_size.get()));
635        }
636        if buffer_offset % wgt::COPY_BUFFER_ALIGNMENT != 0 {
637            return Err(TransferError::UnalignedBufferOffset(buffer_offset));
638        }
639        if buffer_offset + buffer_size.get() > buffer.size {
640            return Err(TransferError::BufferOverrun {
641                start_offset: buffer_offset,
642                end_offset: buffer_offset + buffer_size.get(),
643                buffer_size: buffer.size,
644                side: CopySide::Destination,
645            });
646        }
647
648        Ok(())
649    }
650
651    fn write_staging_buffer_impl(
652        &self,
653        snatch_guard: &SnatchGuard,
654        pending_writes: &mut PendingWrites,
655        staging_buffer: &FlushedStagingBuffer,
656        buffer: Arc<Buffer>,
657        buffer_offset: u64,
658    ) -> Result<(), QueueWriteError> {
659        self.device.check_is_valid()?;
660
661        let transition = {
662            let mut trackers = self.device.trackers.lock();
663            trackers
664                .buffers
665                .set_single(&buffer, wgt::BufferUses::COPY_DST)
666        };
667
668        let dst_raw = buffer.try_raw(snatch_guard)?;
669
670        self.same_device_as(buffer.as_ref())?;
671
672        self.validate_write_buffer_impl(&buffer, buffer_offset, staging_buffer.size)?;
673
674        let region = hal::BufferCopy {
675            src_offset: 0,
676            dst_offset: buffer_offset,
677            size: staging_buffer.size,
678        };
679        let barriers = iter::once(hal::BufferBarrier {
680            buffer: staging_buffer.raw(),
681            usage: hal::StateTransition {
682                from: wgt::BufferUses::MAP_WRITE,
683                to: wgt::BufferUses::COPY_SRC,
684            },
685        })
686        .chain(transition.map(|pending| pending.into_hal(&buffer, snatch_guard)))
687        .collect::<Vec<_>>();
688        let encoder = pending_writes.activate();
689        unsafe {
690            encoder.transition_buffers(&barriers);
691            encoder.copy_buffer_to_buffer(staging_buffer.raw(), dst_raw, &[region]);
692        }
693
694        pending_writes.insert_buffer(&buffer);
695
696        // Ensure the overwritten bytes are marked as initialized so
697        // they don't need to be nulled prior to mapping or binding.
698        {
699            buffer
700                .initialization_status
701                .write()
702                .drain(buffer_offset..(buffer_offset + staging_buffer.size.get()));
703        }
704
705        Ok(())
706    }
707
708    pub fn write_texture(
709        &self,
710        destination: wgt::TexelCopyTextureInfo<Fallible<Texture>>,
711        data: &[u8],
712        data_layout: &wgt::TexelCopyBufferLayout,
713        size: &wgt::Extent3d,
714    ) -> Result<(), QueueWriteError> {
715        profiling::scope!("Queue::write_texture");
716        api_log!("Queue::write_texture");
717
718        self.device.check_is_valid()?;
719
720        if size.width == 0 || size.height == 0 || size.depth_or_array_layers == 0 {
721            log::trace!("Ignoring write_texture of size 0");
722            return Ok(());
723        }
724
725        let dst = destination.texture.get()?;
726        let destination = wgt::TexelCopyTextureInfo {
727            texture: (),
728            mip_level: destination.mip_level,
729            origin: destination.origin,
730            aspect: destination.aspect,
731        };
732
733        self.same_device_as(dst.as_ref())?;
734
735        dst.check_usage(wgt::TextureUsages::COPY_DST)
736            .map_err(TransferError::MissingTextureUsage)?;
737
738        // Note: Doing the copy range validation early is important because ensures that the
739        // dimensions are not going to cause overflow in other parts of the validation.
740        let (hal_copy_size, array_layer_count) =
741            validate_texture_copy_range(&destination, &dst.desc, CopySide::Destination, size)?;
742
743        let (selector, dst_base) = extract_texture_selector(&destination, size, &dst)?;
744
745        if !dst_base.aspect.is_one() {
746            return Err(TransferError::CopyAspectNotOne.into());
747        }
748
749        if !conv::is_valid_copy_dst_texture_format(dst.desc.format, destination.aspect) {
750            return Err(TransferError::CopyToForbiddenTextureFormat {
751                format: dst.desc.format,
752                aspect: destination.aspect,
753            }
754            .into());
755        }
756
757        // Note: `_source_bytes_per_array_layer` is ignored since we
758        // have a staging copy, and it can have a different value.
759        let (required_bytes_in_copy, _source_bytes_per_array_layer) = validate_linear_texture_data(
760            data_layout,
761            dst.desc.format,
762            destination.aspect,
763            data.len() as wgt::BufferAddress,
764            CopySide::Source,
765            size,
766            false,
767        )?;
768
769        if dst.desc.format.is_depth_stencil_format() {
770            self.device
771                .require_downlevel_flags(wgt::DownlevelFlags::DEPTH_TEXTURE_AND_BUFFER_COPIES)
772                .map_err(TransferError::from)?;
773        }
774
775        let snatch_guard = self.device.snatchable_lock.read();
776
777        let mut pending_writes = self.pending_writes.lock();
778        let encoder = pending_writes.activate();
779
780        // If the copy does not fully cover the layers, we need to initialize to
781        // zero *first* as we don't keep track of partial texture layer inits.
782        //
783        // Strictly speaking we only need to clear the areas of a layer
784        // untouched, but this would get increasingly messy.
785        let init_layer_range = if dst.desc.dimension == wgt::TextureDimension::D3 {
786            // volume textures don't have a layer range as array volumes aren't supported
787            0..1
788        } else {
789            destination.origin.z..destination.origin.z + size.depth_or_array_layers
790        };
791        let mut dst_initialization_status = dst.initialization_status.write();
792        if dst_initialization_status.mips[destination.mip_level as usize]
793            .check(init_layer_range.clone())
794            .is_some()
795        {
796            if has_copy_partial_init_tracker_coverage(size, destination.mip_level, &dst.desc) {
797                for layer_range in dst_initialization_status.mips[destination.mip_level as usize]
798                    .drain(init_layer_range)
799                    .collect::<Vec<core::ops::Range<u32>>>()
800                {
801                    let mut trackers = self.device.trackers.lock();
802                    crate::command::clear_texture(
803                        &dst,
804                        TextureInitRange {
805                            mip_range: destination.mip_level..(destination.mip_level + 1),
806                            layer_range,
807                        },
808                        encoder,
809                        &mut trackers.textures,
810                        &self.device.alignments,
811                        self.device.zero_buffer.as_ref(),
812                        &snatch_guard,
813                    )
814                    .map_err(QueueWriteError::from)?;
815                }
816            } else {
817                dst_initialization_status.mips[destination.mip_level as usize]
818                    .drain(init_layer_range);
819            }
820        }
821
822        let dst_raw = dst.try_raw(&snatch_guard)?;
823
824        let (block_width, block_height) = dst.desc.format.block_dimensions();
825        let width_in_blocks = size.width / block_width;
826        let height_in_blocks = size.height / block_height;
827
828        let block_size = dst
829            .desc
830            .format
831            .block_copy_size(Some(destination.aspect))
832            .unwrap();
833        let bytes_in_last_row = width_in_blocks * block_size;
834
835        let bytes_per_row = data_layout.bytes_per_row.unwrap_or(bytes_in_last_row);
836        let rows_per_image = data_layout.rows_per_image.unwrap_or(height_in_blocks);
837
838        let bytes_per_row_alignment = get_lowest_common_denom(
839            self.device.alignments.buffer_copy_pitch.get() as u32,
840            block_size,
841        );
842        let stage_bytes_per_row = wgt::math::align_to(bytes_in_last_row, bytes_per_row_alignment);
843
844        // Platform validation requires that the staging buffer always be
845        // freed, even if an error occurs. All paths from here must call
846        // `device.pending_writes.consume`.
847        let staging_buffer = if stage_bytes_per_row == bytes_per_row {
848            profiling::scope!("copy aligned");
849            // Fast path if the data is already being aligned optimally.
850            let stage_size = wgt::BufferSize::new(required_bytes_in_copy).unwrap();
851            let mut staging_buffer = StagingBuffer::new(&self.device, stage_size)?;
852            staging_buffer.write(&data[data_layout.offset as usize..]);
853            staging_buffer
854        } else {
855            profiling::scope!("copy chunked");
856            // Copy row by row into the optimal alignment.
857            let block_rows_in_copy =
858                (size.depth_or_array_layers - 1) * rows_per_image + height_in_blocks;
859            let stage_size =
860                wgt::BufferSize::new(stage_bytes_per_row as u64 * block_rows_in_copy as u64)
861                    .unwrap();
862            let mut staging_buffer = StagingBuffer::new(&self.device, stage_size)?;
863            let copy_bytes_per_row = stage_bytes_per_row.min(bytes_per_row) as usize;
864            for layer in 0..size.depth_or_array_layers {
865                let rows_offset = layer * rows_per_image;
866                for row in rows_offset..rows_offset + height_in_blocks {
867                    let src_offset = data_layout.offset as u32 + row * bytes_per_row;
868                    let dst_offset = row * stage_bytes_per_row;
869                    unsafe {
870                        staging_buffer.write_with_offset(
871                            data,
872                            src_offset as isize,
873                            dst_offset as isize,
874                            copy_bytes_per_row,
875                        )
876                    }
877                }
878            }
879            staging_buffer
880        };
881
882        let staging_buffer = staging_buffer.flush();
883
884        let regions = (0..array_layer_count)
885            .map(|array_layer_offset| {
886                let mut texture_base = dst_base.clone();
887                texture_base.array_layer += array_layer_offset;
888                hal::BufferTextureCopy {
889                    buffer_layout: wgt::TexelCopyBufferLayout {
890                        offset: array_layer_offset as u64
891                            * rows_per_image as u64
892                            * stage_bytes_per_row as u64,
893                        bytes_per_row: Some(stage_bytes_per_row),
894                        rows_per_image: Some(rows_per_image),
895                    },
896                    texture_base,
897                    size: hal_copy_size,
898                }
899            })
900            .collect::<Vec<_>>();
901
902        {
903            let buffer_barrier = hal::BufferBarrier {
904                buffer: staging_buffer.raw(),
905                usage: hal::StateTransition {
906                    from: wgt::BufferUses::MAP_WRITE,
907                    to: wgt::BufferUses::COPY_SRC,
908                },
909            };
910
911            let mut trackers = self.device.trackers.lock();
912            let transition =
913                trackers
914                    .textures
915                    .set_single(&dst, selector, wgt::TextureUses::COPY_DST);
916            let texture_barriers = transition
917                .map(|pending| pending.into_hal(dst_raw))
918                .collect::<Vec<_>>();
919
920            unsafe {
921                encoder.transition_textures(&texture_barriers);
922                encoder.transition_buffers(&[buffer_barrier]);
923                encoder.copy_buffer_to_texture(staging_buffer.raw(), dst_raw, &regions);
924            }
925        }
926
927        pending_writes.consume(staging_buffer);
928        pending_writes.insert_texture(&dst);
929
930        Ok(())
931    }
932
933    #[cfg(webgl)]
934    pub fn copy_external_image_to_texture(
935        &self,
936        source: &wgt::CopyExternalImageSourceInfo,
937        destination: wgt::CopyExternalImageDestInfo<Fallible<Texture>>,
938        size: wgt::Extent3d,
939    ) -> Result<(), QueueWriteError> {
940        profiling::scope!("Queue::copy_external_image_to_texture");
941
942        self.device.check_is_valid()?;
943
944        if size.width == 0 || size.height == 0 || size.depth_or_array_layers == 0 {
945            log::trace!("Ignoring write_texture of size 0");
946            return Ok(());
947        }
948
949        let mut needs_flag = false;
950        needs_flag |= matches!(source.source, wgt::ExternalImageSource::OffscreenCanvas(_));
951        needs_flag |= source.origin != wgt::Origin2d::ZERO;
952        needs_flag |= destination.color_space != wgt::PredefinedColorSpace::Srgb;
953        #[allow(clippy::bool_comparison)]
954        if matches!(source.source, wgt::ExternalImageSource::ImageBitmap(_)) {
955            needs_flag |= source.flip_y != false;
956            needs_flag |= destination.premultiplied_alpha != false;
957        }
958
959        if needs_flag {
960            self.device
961                .require_downlevel_flags(wgt::DownlevelFlags::UNRESTRICTED_EXTERNAL_TEXTURE_COPIES)
962                .map_err(TransferError::from)?;
963        }
964
965        let src_width = source.source.width();
966        let src_height = source.source.height();
967
968        let dst = destination.texture.get()?;
969        let premultiplied_alpha = destination.premultiplied_alpha;
970        let destination = wgt::TexelCopyTextureInfo {
971            texture: (),
972            mip_level: destination.mip_level,
973            origin: destination.origin,
974            aspect: destination.aspect,
975        };
976
977        if !conv::is_valid_external_image_copy_dst_texture_format(dst.desc.format) {
978            return Err(
979                TransferError::ExternalCopyToForbiddenTextureFormat(dst.desc.format).into(),
980            );
981        }
982        if dst.desc.dimension != wgt::TextureDimension::D2 {
983            return Err(TransferError::InvalidDimensionExternal.into());
984        }
985        dst.check_usage(wgt::TextureUsages::COPY_DST | wgt::TextureUsages::RENDER_ATTACHMENT)
986            .map_err(TransferError::MissingTextureUsage)?;
987        if dst.desc.sample_count != 1 {
988            return Err(TransferError::InvalidSampleCount {
989                sample_count: dst.desc.sample_count,
990            }
991            .into());
992        }
993
994        if source.origin.x + size.width > src_width {
995            return Err(TransferError::TextureOverrun {
996                start_offset: source.origin.x,
997                end_offset: source.origin.x + size.width,
998                texture_size: src_width,
999                dimension: crate::resource::TextureErrorDimension::X,
1000                side: CopySide::Source,
1001            }
1002            .into());
1003        }
1004        if source.origin.y + size.height > src_height {
1005            return Err(TransferError::TextureOverrun {
1006                start_offset: source.origin.y,
1007                end_offset: source.origin.y + size.height,
1008                texture_size: src_height,
1009                dimension: crate::resource::TextureErrorDimension::Y,
1010                side: CopySide::Source,
1011            }
1012            .into());
1013        }
1014        if size.depth_or_array_layers != 1 {
1015            return Err(TransferError::TextureOverrun {
1016                start_offset: 0,
1017                end_offset: size.depth_or_array_layers,
1018                texture_size: 1,
1019                dimension: crate::resource::TextureErrorDimension::Z,
1020                side: CopySide::Source,
1021            }
1022            .into());
1023        }
1024
1025        // Note: Doing the copy range validation early is important because ensures that the
1026        // dimensions are not going to cause overflow in other parts of the validation.
1027        let (hal_copy_size, _) =
1028            validate_texture_copy_range(&destination, &dst.desc, CopySide::Destination, &size)?;
1029
1030        let (selector, dst_base) = extract_texture_selector(&destination, &size, &dst)?;
1031
1032        let mut pending_writes = self.pending_writes.lock();
1033        let encoder = pending_writes.activate();
1034
1035        // If the copy does not fully cover the layers, we need to initialize to
1036        // zero *first* as we don't keep track of partial texture layer inits.
1037        //
1038        // Strictly speaking we only need to clear the areas of a layer
1039        // untouched, but this would get increasingly messy.
1040        let init_layer_range = if dst.desc.dimension == wgt::TextureDimension::D3 {
1041            // volume textures don't have a layer range as array volumes aren't supported
1042            0..1
1043        } else {
1044            destination.origin.z..destination.origin.z + size.depth_or_array_layers
1045        };
1046        let mut dst_initialization_status = dst.initialization_status.write();
1047        if dst_initialization_status.mips[destination.mip_level as usize]
1048            .check(init_layer_range.clone())
1049            .is_some()
1050        {
1051            if has_copy_partial_init_tracker_coverage(&size, destination.mip_level, &dst.desc) {
1052                for layer_range in dst_initialization_status.mips[destination.mip_level as usize]
1053                    .drain(init_layer_range)
1054                    .collect::<Vec<core::ops::Range<u32>>>()
1055                {
1056                    let mut trackers = self.device.trackers.lock();
1057                    crate::command::clear_texture(
1058                        &dst,
1059                        TextureInitRange {
1060                            mip_range: destination.mip_level..(destination.mip_level + 1),
1061                            layer_range,
1062                        },
1063                        encoder,
1064                        &mut trackers.textures,
1065                        &self.device.alignments,
1066                        self.device.zero_buffer.as_ref(),
1067                        &self.device.snatchable_lock.read(),
1068                    )
1069                    .map_err(QueueWriteError::from)?;
1070                }
1071            } else {
1072                dst_initialization_status.mips[destination.mip_level as usize]
1073                    .drain(init_layer_range);
1074            }
1075        }
1076
1077        let snatch_guard = self.device.snatchable_lock.read();
1078        let dst_raw = dst.try_raw(&snatch_guard)?;
1079
1080        let regions = hal::TextureCopy {
1081            src_base: hal::TextureCopyBase {
1082                mip_level: 0,
1083                array_layer: 0,
1084                origin: source.origin.to_3d(0),
1085                aspect: hal::FormatAspects::COLOR,
1086            },
1087            dst_base,
1088            size: hal_copy_size,
1089        };
1090
1091        let mut trackers = self.device.trackers.lock();
1092        let transitions = trackers
1093            .textures
1094            .set_single(&dst, selector, wgt::TextureUses::COPY_DST);
1095
1096        // `copy_external_image_to_texture` is exclusive to the WebGL backend.
1097        // Don't go through the `DynCommandEncoder` abstraction and directly to the WebGL backend.
1098        let encoder_webgl = encoder
1099            .as_any_mut()
1100            .downcast_mut::<hal::gles::CommandEncoder>()
1101            .unwrap();
1102        let dst_raw_webgl = dst_raw
1103            .as_any()
1104            .downcast_ref::<hal::gles::Texture>()
1105            .unwrap();
1106        let transitions_webgl = transitions.map(|pending| {
1107            let dyn_transition = pending.into_hal(dst_raw);
1108            hal::TextureBarrier {
1109                texture: dst_raw_webgl,
1110                range: dyn_transition.range,
1111                usage: dyn_transition.usage,
1112            }
1113        });
1114
1115        use hal::CommandEncoder as _;
1116        unsafe {
1117            encoder_webgl.transition_textures(transitions_webgl);
1118            encoder_webgl.copy_external_image_to_texture(
1119                source,
1120                dst_raw_webgl,
1121                premultiplied_alpha,
1122                iter::once(regions),
1123            );
1124        }
1125
1126        Ok(())
1127    }
1128
1129    pub fn submit(
1130        &self,
1131        command_buffers: &[Arc<CommandBuffer>],
1132    ) -> Result<SubmissionIndex, (SubmissionIndex, QueueSubmitError)> {
1133        profiling::scope!("Queue::submit");
1134        api_log!("Queue::submit");
1135
1136        let submit_index;
1137
1138        let res = 'error: {
1139            let snatch_guard = self.device.snatchable_lock.read();
1140
1141            // Fence lock must be acquired after the snatch lock everywhere to avoid deadlocks.
1142            let mut fence = self.device.fence.write();
1143
1144            let mut command_index_guard = self.device.command_indices.write();
1145            command_index_guard.active_submission_index += 1;
1146            submit_index = command_index_guard.active_submission_index;
1147
1148            if let Err(e) = self.device.check_is_valid() {
1149                break 'error Err(e.into());
1150            }
1151
1152            let mut active_executions = Vec::new();
1153
1154            let mut used_surface_textures = track::TextureUsageScope::default();
1155
1156            // Use a hashmap here to deduplicate the surface textures that are used in the command buffers.
1157            // This avoids vulkan deadlocking from the same surface texture being submitted multiple times.
1158            let mut submit_surface_textures_owned = FastHashMap::default();
1159
1160            {
1161                if !command_buffers.is_empty() {
1162                    profiling::scope!("prepare");
1163
1164                    let mut first_error = None;
1165
1166                    //TODO: if multiple command buffers are submitted, we can re-use the last
1167                    // native command buffer of the previous chain instead of always creating
1168                    // a temporary one, since the chains are not finished.
1169
1170                    // finish all the command buffers first
1171                    for command_buffer in command_buffers {
1172                        profiling::scope!("process command buffer");
1173
1174                        // we reset the used surface textures every time we use
1175                        // it, so make sure to set_size on it.
1176                        used_surface_textures.set_size(self.device.tracker_indices.textures.size());
1177
1178                        // Note that we are required to invalidate all command buffers in both the success and failure paths.
1179                        // This is why we `continue` and don't early return via `?`.
1180                        #[allow(unused_mut)]
1181                        let mut cmd_buf_data = command_buffer.take_finished();
1182
1183                        #[cfg(feature = "trace")]
1184                        if let Some(ref mut trace) = *self.device.trace.lock() {
1185                            if let Ok(ref mut cmd_buf_data) = cmd_buf_data {
1186                                trace.add(Action::Submit(
1187                                    submit_index,
1188                                    cmd_buf_data.commands.take().unwrap(),
1189                                ));
1190                            }
1191                        }
1192
1193                        if first_error.is_some() {
1194                            continue;
1195                        }
1196
1197                        let mut baked = match cmd_buf_data {
1198                            Ok(cmd_buf_data) => {
1199                                let res = validate_command_buffer(
1200                                    command_buffer,
1201                                    self,
1202                                    &cmd_buf_data,
1203                                    &snatch_guard,
1204                                    &mut submit_surface_textures_owned,
1205                                    &mut used_surface_textures,
1206                                    &mut command_index_guard,
1207                                );
1208                                if let Err(err) = res {
1209                                    first_error.get_or_insert(err);
1210                                    continue;
1211                                }
1212                                cmd_buf_data.into_baked_commands()
1213                            }
1214                            Err(err) => {
1215                                first_error.get_or_insert(err.into());
1216                                continue;
1217                            }
1218                        };
1219
1220                        // execute resource transitions
1221                        if let Err(e) = baked.encoder.open_pass(Some("(wgpu internal) Transit")) {
1222                            break 'error Err(e.into());
1223                        }
1224
1225                        //Note: locking the trackers has to be done after the storages
1226                        let mut trackers = self.device.trackers.lock();
1227                        if let Err(e) = baked.initialize_buffer_memory(&mut trackers, &snatch_guard)
1228                        {
1229                            break 'error Err(e.into());
1230                        }
1231                        if let Err(e) = baked.initialize_texture_memory(
1232                            &mut trackers,
1233                            &self.device,
1234                            &snatch_guard,
1235                        ) {
1236                            break 'error Err(e.into());
1237                        }
1238
1239                        //Note: stateless trackers are not merged:
1240                        // device already knows these resources exist.
1241                        CommandBuffer::insert_barriers_from_device_tracker(
1242                            baked.encoder.raw.as_mut(),
1243                            &mut trackers,
1244                            &baked.trackers,
1245                            &snatch_guard,
1246                        );
1247
1248                        if let Err(e) = baked.encoder.close_and_push_front() {
1249                            break 'error Err(e.into());
1250                        }
1251
1252                        // Transition surface textures into `Present` state.
1253                        // Note: we could technically do it after all of the command buffers,
1254                        // but here we have a command encoder by hand, so it's easier to use it.
1255                        if !used_surface_textures.is_empty() {
1256                            if let Err(e) = baked.encoder.open_pass(Some("(wgpu internal) Present"))
1257                            {
1258                                break 'error Err(e.into());
1259                            }
1260                            let texture_barriers = trackers
1261                                .textures
1262                                .set_from_usage_scope_and_drain_transitions(
1263                                    &used_surface_textures,
1264                                    &snatch_guard,
1265                                )
1266                                .collect::<Vec<_>>();
1267                            unsafe {
1268                                baked.encoder.raw.transition_textures(&texture_barriers);
1269                            };
1270                            if let Err(e) = baked.encoder.close() {
1271                                break 'error Err(e.into());
1272                            }
1273                            used_surface_textures = track::TextureUsageScope::default();
1274                        }
1275
1276                        // done
1277                        active_executions.push(EncoderInFlight {
1278                            inner: baked.encoder,
1279                            trackers: baked.trackers,
1280                            temp_resources: baked.temp_resources,
1281                            _indirect_draw_validation_resources: baked
1282                                .indirect_draw_validation_resources,
1283                            pending_buffers: FastHashMap::default(),
1284                            pending_textures: FastHashMap::default(),
1285                            pending_blas_s: FastHashMap::default(),
1286                        });
1287                    }
1288
1289                    if let Some(first_error) = first_error {
1290                        break 'error Err(first_error);
1291                    }
1292                }
1293            }
1294
1295            let mut pending_writes = self.pending_writes.lock();
1296
1297            {
1298                used_surface_textures.set_size(self.device.tracker_indices.textures.size());
1299                for texture in pending_writes.dst_textures.values() {
1300                    match texture.try_inner(&snatch_guard) {
1301                        Ok(TextureInner::Native { .. }) => {}
1302                        Ok(TextureInner::Surface { .. }) => {
1303                            // Compare the Arcs by pointer as Textures don't implement Eq
1304                            submit_surface_textures_owned
1305                                .insert(Arc::as_ptr(texture), texture.clone());
1306
1307                            unsafe {
1308                                used_surface_textures
1309                                    .merge_single(texture, None, wgt::TextureUses::PRESENT)
1310                                    .unwrap()
1311                            };
1312                        }
1313                        // The texture must not have been destroyed when its usage here was
1314                        // encoded. If it was destroyed after that, then it was transferred
1315                        // to `pending_writes.temp_resources` at the time of destruction, so
1316                        // we are still okay to use it.
1317                        Err(DestroyedResourceError(_)) => {}
1318                    }
1319                }
1320
1321                if !used_surface_textures.is_empty() {
1322                    let mut trackers = self.device.trackers.lock();
1323
1324                    let texture_barriers = trackers
1325                        .textures
1326                        .set_from_usage_scope_and_drain_transitions(
1327                            &used_surface_textures,
1328                            &snatch_guard,
1329                        )
1330                        .collect::<Vec<_>>();
1331                    unsafe {
1332                        pending_writes
1333                            .command_encoder
1334                            .transition_textures(&texture_barriers);
1335                    };
1336                }
1337            }
1338
1339            match pending_writes.pre_submit(&self.device.command_allocator, &self.device, self) {
1340                Ok(Some(pending_execution)) => {
1341                    active_executions.insert(0, pending_execution);
1342                }
1343                Ok(None) => {}
1344                Err(e) => break 'error Err(e.into()),
1345            }
1346            let hal_command_buffers = active_executions
1347                .iter()
1348                .flat_map(|e| e.inner.list.iter().map(|b| b.as_ref()))
1349                .collect::<Vec<_>>();
1350
1351            {
1352                let mut submit_surface_textures =
1353                    SmallVec::<[&dyn hal::DynSurfaceTexture; 2]>::with_capacity(
1354                        submit_surface_textures_owned.len(),
1355                    );
1356
1357                for texture in submit_surface_textures_owned.values() {
1358                    let raw = match texture.inner.get(&snatch_guard) {
1359                        Some(TextureInner::Surface { raw, .. }) => raw.as_ref(),
1360                        _ => unreachable!(),
1361                    };
1362                    submit_surface_textures.push(raw);
1363                }
1364
1365                if let Err(e) = unsafe {
1366                    self.raw().submit(
1367                        &hal_command_buffers,
1368                        &submit_surface_textures,
1369                        (fence.as_mut(), submit_index),
1370                    )
1371                }
1372                .map_err(|e| self.device.handle_hal_error(e))
1373                {
1374                    break 'error Err(e.into());
1375                }
1376
1377                drop(command_index_guard);
1378
1379                // Advance the successful submission index.
1380                self.device
1381                    .last_successful_submission_index
1382                    .fetch_max(submit_index, Ordering::SeqCst);
1383            }
1384
1385            profiling::scope!("cleanup");
1386
1387            // this will register the new submission to the life time tracker
1388            self.lock_life()
1389                .track_submission(submit_index, active_executions);
1390            drop(pending_writes);
1391
1392            // This will schedule destruction of all resources that are no longer needed
1393            // by the user but used in the command stream, among other things.
1394            let fence_guard = RwLockWriteGuard::downgrade(fence);
1395            let (closures, result) =
1396                self.device
1397                    .maintain(fence_guard, wgt::PollType::Poll, snatch_guard);
1398            match result {
1399                Ok(status) => {
1400                    debug_assert!(matches!(
1401                        status,
1402                        wgt::PollStatus::QueueEmpty | wgt::PollStatus::Poll
1403                    ));
1404                }
1405                Err(WaitIdleError::Device(err)) => break 'error Err(QueueSubmitError::Queue(err)),
1406                Err(WaitIdleError::WrongSubmissionIndex(..)) => {
1407                    unreachable!("Cannot get WrongSubmissionIndex from Poll")
1408                }
1409                Err(WaitIdleError::Timeout) => unreachable!("Cannot get Timeout from Poll"),
1410            };
1411
1412            Ok(closures)
1413        };
1414
1415        let callbacks = match res {
1416            Ok(ok) => ok,
1417            Err(e) => return Err((submit_index, e)),
1418        };
1419
1420        // the closures should execute with nothing locked!
1421        callbacks.fire();
1422
1423        self.device.lose_if_oom();
1424
1425        api_log!("Queue::submit returned submit index {submit_index}");
1426
1427        Ok(submit_index)
1428    }
1429
1430    pub fn get_timestamp_period(&self) -> f32 {
1431        unsafe { self.raw().get_timestamp_period() }
1432    }
1433
1434    /// `closure` is guaranteed to be called.
1435    pub fn on_submitted_work_done(
1436        &self,
1437        closure: SubmittedWorkDoneClosure,
1438    ) -> Option<SubmissionIndex> {
1439        api_log!("Queue::on_submitted_work_done");
1440        //TODO: flush pending writes
1441        self.lock_life().add_work_done_closure(closure)
1442    }
1443
1444    pub fn compact_blas(&self, blas: &Arc<Blas>) -> Result<Arc<Blas>, CompactBlasError> {
1445        profiling::scope!("Queue::compact_blas");
1446        api_log!("Queue::compact_blas");
1447
1448        self.device.check_is_valid()?;
1449        self.same_device_as(blas.as_ref())?;
1450
1451        let device = blas.device.clone();
1452
1453        let snatch_guard = device.snatchable_lock.read();
1454
1455        let BlasCompactState::Ready { size } = *blas.compacted_state.lock() else {
1456            return Err(CompactBlasError::BlasNotReady);
1457        };
1458
1459        let mut size_info = blas.size_info;
1460        size_info.acceleration_structure_size = size;
1461
1462        let mut pending_writes = self.pending_writes.lock();
1463        let cmd_buf_raw = pending_writes.activate();
1464
1465        let raw = unsafe {
1466            device
1467                .raw()
1468                .create_acceleration_structure(&hal::AccelerationStructureDescriptor {
1469                    label: None,
1470                    size: size_info.acceleration_structure_size,
1471                    format: hal::AccelerationStructureFormat::BottomLevel,
1472                    allow_compaction: false,
1473                })
1474        }
1475        .map_err(DeviceError::from_hal)?;
1476
1477        let src_raw = blas.try_raw(&snatch_guard)?;
1478
1479        unsafe {
1480            cmd_buf_raw.copy_acceleration_structure_to_acceleration_structure(
1481                src_raw,
1482                raw.as_ref(),
1483                wgt::AccelerationStructureCopy::Compact,
1484            )
1485        };
1486
1487        let handle = unsafe {
1488            device
1489                .raw()
1490                .get_acceleration_structure_device_address(raw.as_ref())
1491        };
1492
1493        drop(snatch_guard);
1494
1495        let mut command_indices_lock = device.command_indices.write();
1496        command_indices_lock.next_acceleration_structure_build_command_index += 1;
1497        let built_index =
1498            NonZeroU64::new(command_indices_lock.next_acceleration_structure_build_command_index)
1499                .unwrap();
1500
1501        let new_blas = Arc::new(Blas {
1502            raw: Snatchable::new(raw),
1503            device: device.clone(),
1504            size_info,
1505            sizes: blas.sizes.clone(),
1506            flags: blas.flags & !AccelerationStructureFlags::ALLOW_COMPACTION,
1507            update_mode: blas.update_mode,
1508            // Bypass the submit checks which update this because we don't submit this normally.
1509            built_index: RwLock::new(rank::BLAS_BUILT_INDEX, Some(built_index)),
1510            handle,
1511            label: blas.label.clone() + " compacted",
1512            tracking_data: TrackingData::new(blas.device.tracker_indices.blas_s.clone()),
1513            compaction_buffer: None,
1514            compacted_state: Mutex::new(rank::BLAS_COMPACTION_STATE, BlasCompactState::Compacted),
1515        });
1516
1517        pending_writes.insert_blas(blas);
1518        pending_writes.insert_blas(&new_blas);
1519
1520        Ok(new_blas)
1521    }
1522}
1523
1524impl Global {
1525    pub fn queue_write_buffer(
1526        &self,
1527        queue_id: QueueId,
1528        buffer_id: id::BufferId,
1529        buffer_offset: wgt::BufferAddress,
1530        data: &[u8],
1531    ) -> Result<(), QueueWriteError> {
1532        let queue = self.hub.queues.get(queue_id);
1533
1534        #[cfg(feature = "trace")]
1535        if let Some(ref mut trace) = *queue.device.trace.lock() {
1536            let data_path = trace.make_binary("bin", data);
1537            trace.add(Action::WriteBuffer {
1538                id: buffer_id,
1539                data: data_path,
1540                range: buffer_offset..buffer_offset + data.len() as u64,
1541                queued: true,
1542            });
1543        }
1544
1545        let buffer = self.hub.buffers.get(buffer_id);
1546        queue.write_buffer(buffer, buffer_offset, data)
1547    }
1548
1549    pub fn queue_create_staging_buffer(
1550        &self,
1551        queue_id: QueueId,
1552        buffer_size: wgt::BufferSize,
1553        id_in: Option<id::StagingBufferId>,
1554    ) -> Result<(id::StagingBufferId, NonNull<u8>), QueueWriteError> {
1555        let queue = self.hub.queues.get(queue_id);
1556        let (staging_buffer, ptr) = queue.create_staging_buffer(buffer_size)?;
1557
1558        let fid = self.hub.staging_buffers.prepare(id_in);
1559        let id = fid.assign(staging_buffer);
1560
1561        Ok((id, ptr))
1562    }
1563
1564    pub fn queue_write_staging_buffer(
1565        &self,
1566        queue_id: QueueId,
1567        buffer_id: id::BufferId,
1568        buffer_offset: wgt::BufferAddress,
1569        staging_buffer_id: id::StagingBufferId,
1570    ) -> Result<(), QueueWriteError> {
1571        let queue = self.hub.queues.get(queue_id);
1572        let buffer = self.hub.buffers.get(buffer_id);
1573        let staging_buffer = self.hub.staging_buffers.remove(staging_buffer_id);
1574        queue.write_staging_buffer(buffer, buffer_offset, staging_buffer)
1575    }
1576
1577    pub fn queue_validate_write_buffer(
1578        &self,
1579        queue_id: QueueId,
1580        buffer_id: id::BufferId,
1581        buffer_offset: u64,
1582        buffer_size: wgt::BufferSize,
1583    ) -> Result<(), QueueWriteError> {
1584        let queue = self.hub.queues.get(queue_id);
1585        let buffer = self.hub.buffers.get(buffer_id);
1586        queue.validate_write_buffer(buffer, buffer_offset, buffer_size)
1587    }
1588
1589    pub fn queue_write_texture(
1590        &self,
1591        queue_id: QueueId,
1592        destination: &TexelCopyTextureInfo,
1593        data: &[u8],
1594        data_layout: &wgt::TexelCopyBufferLayout,
1595        size: &wgt::Extent3d,
1596    ) -> Result<(), QueueWriteError> {
1597        let queue = self.hub.queues.get(queue_id);
1598
1599        #[cfg(feature = "trace")]
1600        if let Some(ref mut trace) = *queue.device.trace.lock() {
1601            let data_path = trace.make_binary("bin", data);
1602            trace.add(Action::WriteTexture {
1603                to: *destination,
1604                data: data_path,
1605                layout: *data_layout,
1606                size: *size,
1607            });
1608        }
1609
1610        let destination = wgt::TexelCopyTextureInfo {
1611            texture: self.hub.textures.get(destination.texture),
1612            mip_level: destination.mip_level,
1613            origin: destination.origin,
1614            aspect: destination.aspect,
1615        };
1616        queue.write_texture(destination, data, data_layout, size)
1617    }
1618
1619    #[cfg(webgl)]
1620    pub fn queue_copy_external_image_to_texture(
1621        &self,
1622        queue_id: QueueId,
1623        source: &wgt::CopyExternalImageSourceInfo,
1624        destination: crate::command::CopyExternalImageDestInfo,
1625        size: wgt::Extent3d,
1626    ) -> Result<(), QueueWriteError> {
1627        let queue = self.hub.queues.get(queue_id);
1628        let destination = wgt::CopyExternalImageDestInfo {
1629            texture: self.hub.textures.get(destination.texture),
1630            mip_level: destination.mip_level,
1631            origin: destination.origin,
1632            aspect: destination.aspect,
1633            color_space: destination.color_space,
1634            premultiplied_alpha: destination.premultiplied_alpha,
1635        };
1636        queue.copy_external_image_to_texture(source, destination, size)
1637    }
1638
1639    pub fn queue_submit(
1640        &self,
1641        queue_id: QueueId,
1642        command_buffer_ids: &[id::CommandBufferId],
1643    ) -> Result<SubmissionIndex, (SubmissionIndex, QueueSubmitError)> {
1644        let queue = self.hub.queues.get(queue_id);
1645        let command_buffer_guard = self.hub.command_buffers.read();
1646        let command_buffers = command_buffer_ids
1647            .iter()
1648            .map(|id| command_buffer_guard.get(*id))
1649            .collect::<Vec<_>>();
1650        drop(command_buffer_guard);
1651        queue.submit(&command_buffers)
1652    }
1653
1654    pub fn queue_get_timestamp_period(&self, queue_id: QueueId) -> f32 {
1655        let queue = self.hub.queues.get(queue_id);
1656
1657        if queue.device.timestamp_normalizer.get().unwrap().enabled() {
1658            return 1.0;
1659        }
1660
1661        queue.get_timestamp_period()
1662    }
1663
1664    pub fn queue_on_submitted_work_done(
1665        &self,
1666        queue_id: QueueId,
1667        closure: SubmittedWorkDoneClosure,
1668    ) -> SubmissionIndex {
1669        api_log!("Queue::on_submitted_work_done {queue_id:?}");
1670
1671        //TODO: flush pending writes
1672        let queue = self.hub.queues.get(queue_id);
1673        let result = queue.on_submitted_work_done(closure);
1674        result.unwrap_or(0) // '0' means no wait is necessary
1675    }
1676
1677    pub fn queue_compact_blas(
1678        &self,
1679        queue_id: QueueId,
1680        blas_id: BlasId,
1681        id_in: Option<BlasId>,
1682    ) -> (BlasId, Option<u64>, Option<CompactBlasError>) {
1683        api_log!("Queue::compact_blas {queue_id:?}, {blas_id:?}");
1684
1685        let fid = self.hub.blas_s.prepare(id_in);
1686
1687        let queue = self.hub.queues.get(queue_id);
1688        let blas = self.hub.blas_s.get(blas_id);
1689        let device = &queue.device;
1690
1691        // TODO: Tracing
1692
1693        let error = 'error: {
1694            match device.require_features(
1695                wgpu_types::Features::EXPERIMENTAL_RAY_TRACING_ACCELERATION_STRUCTURE,
1696            ) {
1697                Ok(_) => {}
1698                Err(err) => break 'error err.into(),
1699            }
1700
1701            let blas = match blas.get() {
1702                Ok(blas) => blas,
1703                Err(err) => break 'error err.into(),
1704            };
1705
1706            let new_blas = match queue.compact_blas(&blas) {
1707                Ok(blas) => blas,
1708                Err(err) => break 'error err,
1709            };
1710
1711            // We should have no more errors after this because we have marked the command encoder as successful.
1712            let old_blas_size = blas.size_info.acceleration_structure_size;
1713            let new_blas_size = new_blas.size_info.acceleration_structure_size;
1714            let handle = new_blas.handle;
1715
1716            let id = fid.assign(Fallible::Valid(new_blas));
1717
1718            api_log!("CommandEncoder::compact_blas {blas_id:?} (size: {old_blas_size}) -> {id:?} (size: {new_blas_size})");
1719
1720            return (id, Some(handle), None);
1721        };
1722
1723        let id = fid.assign(Fallible::Invalid(Arc::new(error.to_string())));
1724
1725        (id, None, Some(error))
1726    }
1727}
1728
1729fn validate_command_buffer(
1730    command_buffer: &CommandBuffer,
1731    queue: &Queue,
1732    cmd_buf_data: &crate::command::CommandBufferMutable,
1733    snatch_guard: &SnatchGuard,
1734    submit_surface_textures_owned: &mut FastHashMap<*const Texture, Arc<Texture>>,
1735    used_surface_textures: &mut track::TextureUsageScope,
1736    command_index_guard: &mut RwLockWriteGuard<CommandIndices>,
1737) -> Result<(), QueueSubmitError> {
1738    command_buffer.same_device_as(queue)?;
1739
1740    {
1741        profiling::scope!("check resource state");
1742
1743        {
1744            profiling::scope!("buffers");
1745            for buffer in cmd_buf_data.trackers.buffers.used_resources() {
1746                buffer.check_destroyed(snatch_guard)?;
1747
1748                match *buffer.map_state.lock() {
1749                    BufferMapState::Idle => (),
1750                    _ => return Err(QueueSubmitError::BufferStillMapped(buffer.error_ident())),
1751                }
1752            }
1753        }
1754        {
1755            profiling::scope!("textures");
1756            for texture in cmd_buf_data.trackers.textures.used_resources() {
1757                let should_extend = match texture.try_inner(snatch_guard)? {
1758                    TextureInner::Native { .. } => false,
1759                    TextureInner::Surface { .. } => {
1760                        // Compare the Arcs by pointer as Textures don't implement Eq.
1761                        submit_surface_textures_owned.insert(Arc::as_ptr(texture), texture.clone());
1762
1763                        true
1764                    }
1765                };
1766                if should_extend {
1767                    unsafe {
1768                        used_surface_textures
1769                            .merge_single(texture, None, wgt::TextureUses::PRESENT)
1770                            .unwrap();
1771                    };
1772                }
1773            }
1774        }
1775
1776        if let Err(e) =
1777            cmd_buf_data.validate_acceleration_structure_actions(snatch_guard, command_index_guard)
1778        {
1779            return Err(e.into());
1780        }
1781    }
1782    Ok(())
1783}
wgpu_core/device/queue.rs

wgpu_core/device/
queue.rs