• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

djeedai / bevy_hanabi / 11543837292

27 Oct 2024 09:10PM UTC coverage: 57.849% (-1.2%) from 59.035%
11543837292

Pull #387

github

web-flow
Merge a72c10537 into 75f07d778
Pull Request #387: Unify the clone modifier and spawners, and fix races.

114 of 621 new or added lines in 7 files covered. (18.36%)

23 existing lines in 5 files now uncovered.

3534 of 6109 relevant lines covered (57.85%)

23.02 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

70.37
/src/render/buffer_table.rs
1
use std::num::NonZeroU64;
2

3
use bevy::{
4
    log::trace,
5
    render::{
6
        render_resource::{
7
            Buffer, BufferAddress, BufferDescriptor, BufferUsages, CommandEncoder, ShaderSize,
8
            ShaderType,
9
        },
10
        renderer::{RenderDevice, RenderQueue},
11
    },
12
};
13
use bytemuck::{cast_slice, Pod};
14
use copyless::VecHelper;
15

16
use crate::next_multiple_of;
17

18
/// Index of a row in a [`BufferTable`].
19
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20
pub struct BufferTableId(pub(crate) u32); // TEMP: pub(crate)
21

22
impl BufferTableId {
23
    #[inline]
NEW
24
    pub fn offset(&self, index: u32) -> BufferTableId {
×
NEW
25
        BufferTableId(self.0 + index)
×
26
    }
27
}
28

29
#[derive(Debug)]
30
struct AllocatedBuffer {
31
    /// Currently allocated buffer, of size equal to `size`.
32
    buffer: Buffer,
33
    /// Size of the currently allocated buffer, in number of rows.
34
    count: u32,
35
    /// Previously allocated buffer if any, cached until the next buffer write
36
    /// so that old data can be copied into the newly-allocated buffer.
37
    old_buffer: Option<Buffer>,
38
    /// Size of the old buffer if any, in number of rows.
39
    old_count: u32,
40
}
41

42
impl AllocatedBuffer {
43
    /// Get the number of rows of the currently allocated GPU buffer.
44
    ///
45
    /// On capacity grow, the count is valid until the next buffer swap.
46
    pub fn allocated_count(&self) -> u32 {
3✔
47
        if self.old_buffer.is_some() {
3✔
48
            self.old_count
×
49
        } else {
50
            self.count
3✔
51
        }
52
    }
53
}
54

55
/// GPU buffer holding a table with concurrent interleaved CPU/GPU access.
56
///
57
/// The buffer table data structure represents a GPU buffer holding a table made
58
/// of individual rows. Each row of the table has the same layout (same size),
59
/// and can be allocated (assigned to an existing index) or free (available for
60
/// future allocation). The data structure manages a free list of rows, and copy
61
/// of rows modified on CPU to the GPU without touching other rows. This ensures
62
/// that existing rows in the GPU buffer can be accessed and modified by the GPU
63
/// without being overwritten by the CPU and without the need for the CPU to
64
/// read the data back from GPU into CPU memory.
65
///
66
/// The element type `T` needs to implement the following traits:
67
/// - [`Pod`] to allow copy.
68
/// - [`ShaderType`] because it needs to be mapped for a shader.
69
/// - [`ShaderSize`] to ensure a fixed footprint, to allow packing multiple
70
///   instances inside a single buffer. This therefore excludes any
71
///   runtime-sized array.
72
///
73
/// This is similar to a [`BufferVec`] or [`AlignedBufferVec`], but unlike those
74
/// data structures a buffer table preserves rows modified by the GPU without
75
/// overwriting. This is useful when the buffer is also modified by GPU shaders,
76
/// so neither the CPU side nor the GPU side have an up-to-date view of the
77
/// entire table, and so the CPU cannot re-upload the entire table on changes.
78
///
79
/// # Usage
80
///
81
/// - During the [`RenderStage::Prepare`] stage, call
82
///   [`clear_previous_frame_resizes()`] to clear any stale buffer from the
83
///   previous frame. Then insert new rows with [`insert()`] and if you made
84
///   changes call [`allocate_gpu()`] at the end to allocate any new buffer
85
///   needed.
86
/// - During the [`RenderStage::Render`] stage, call [`write_buffer()`] from a
87
///   command encoder before using any row, to perform any buffer resize copy
88
///   pending.
89
///
90
/// [`BufferVec`]: bevy::render::render_resource::BufferVec
91
/// [`AlignedBufferVec`]: crate::render::aligned_buffer_vec::AlignedBufferVec
92
#[derive(Debug)]
93
pub struct BufferTable<T: Pod + ShaderSize> {
94
    /// GPU buffer if already allocated, or `None` otherwise.
95
    buffer: Option<AllocatedBuffer>,
96
    /// GPU buffer usages.
97
    buffer_usage: BufferUsages,
98
    /// Optional GPU buffer name, for debugging.
99
    label: Option<String>,
100
    /// Size of a single buffer element, in bytes, in CPU memory (Rust layout).
101
    item_size: usize,
102
    /// Size of a single buffer element, in bytes, aligned to GPU memory
103
    /// constraints.
104
    aligned_size: usize,
105
    /// Capacity of the buffer, in number of rows.
106
    capacity: u32,
107
    /// Size of the "active" portion of the table, which includes allocated rows
108
    /// and any row in the free list. All other rows in the
109
    /// `active_size..capacity` range are implicitly unallocated.
110
    active_count: u32,
111
    /// Free list of rows available in the GPU buffer for a new allocation. This
112
    /// only contains indices in the `0..active_size` range; all row indices in
113
    /// `active_size..capacity` are assumed to be unallocated.
114
    free_indices: Vec<u32>,
115
    /// Pending values accumulated on CPU and not yet written to GPU, and their
116
    /// rows.
117
    pending_values: Vec<(u32, T)>,
118
    /// Extra pending values accumulated on CPU like `pending_values`, but for
119
    /// which there's not enough space in the current GPU buffer. Those values
120
    /// are sorted in index order, occupying the range `buffer.size..`.
121
    extra_pending_values: Vec<T>,
122
}
123

124
impl<T: Pod + ShaderSize> Default for BufferTable<T> {
125
    fn default() -> Self {
25✔
126
        let item_size = std::mem::size_of::<T>();
25✔
127
        let aligned_size = <T as ShaderSize>::SHADER_SIZE.get() as usize;
25✔
128
        assert!(aligned_size >= item_size);
25✔
129
        Self {
130
            buffer: None,
131
            buffer_usage: BufferUsages::all(),
25✔
132
            label: None,
133
            item_size,
134
            aligned_size,
135
            capacity: 0,
136
            active_count: 0,
137
            free_indices: Vec::new(),
25✔
138
            pending_values: Vec::new(),
25✔
139
            extra_pending_values: Vec::new(),
25✔
140
        }
141
    }
142
}
143

144
impl<T: Pod + ShaderSize> BufferTable<T> {
145
    /// Create a new collection.
146
    ///
147
    /// `item_align` is an optional additional alignment for items in the
148
    /// collection. If greater than the natural alignment dictated by WGSL
149
    /// rules, this extra alignment is enforced. Otherwise it's ignored (so you
150
    /// can pass `None` to ignore). This is useful if for example you want to
151
    /// bind individual rows or any subset of the table, to ensure each row is
152
    /// aligned to the device constraints.
153
    ///
154
    /// # Panics
155
    ///
156
    /// Panics if `buffer_usage` contains [`BufferUsages::UNIFORM`] and the
157
    /// layout of the element type `T` does not meet the requirements of the
158
    /// uniform address space, as tested by
159
    /// [`ShaderType::assert_uniform_compat()`].
160
    ///
161
    /// [`BufferUsages::UNIFORM`]: bevy::render::render_resource::BufferUsages::UNIFORM
162
    pub fn new(
25✔
163
        buffer_usage: BufferUsages,
164
        item_align: Option<NonZeroU64>,
165
        label: Option<String>,
166
    ) -> Self {
167
        // GPU-aligned item size, compatible with WGSL rules
168
        let item_size = <T as ShaderSize>::SHADER_SIZE.get() as usize;
25✔
169
        // Extra manual alignment for device constraints
170
        let aligned_size = if let Some(item_align) = item_align {
72✔
171
            let item_align = item_align.get() as usize;
×
172
            let aligned_size = next_multiple_of(item_size, item_align);
×
173
            assert!(aligned_size >= item_size);
×
174
            assert!(aligned_size % item_align == 0);
22✔
175
            aligned_size
22✔
176
        } else {
177
            item_size
3✔
178
        };
179
        trace!(
×
180
            "BufferTable[\"{}\"]: item_size={} aligned_size={}",
×
181
            label.as_ref().unwrap_or(&String::new()),
×
182
            item_size,
×
183
            aligned_size
×
184
        );
185
        if buffer_usage.contains(BufferUsages::UNIFORM) {
25✔
186
            <T as ShaderType>::assert_uniform_compat();
×
187
        }
188
        Self {
189
            // Need COPY_SRC and COPY_DST to copy from old to new buffer on resize
190
            buffer_usage: buffer_usage | BufferUsages::COPY_SRC | BufferUsages::COPY_DST,
×
191
            aligned_size,
192
            label,
193
            ..Default::default()
194
        }
195
    }
196

197
    /// Reference to the GPU buffer, if already allocated.
198
    ///
199
    /// This reference corresponds to the currently allocated GPU buffer, which
200
    /// may not contain all data since the last [`insert()`] call, and could
201
    /// become invalid if a new larger buffer needs to be allocated to store the
202
    /// pending values inserted with [`insert()`].
203
    ///
204
    /// [`insert()]`: BufferTable::insert
205
    #[inline]
206
    pub fn buffer(&self) -> Option<&Buffer> {
6✔
207
        self.buffer.as_ref().map(|ab| &ab.buffer)
18✔
208
    }
209

210
    /// Maximum number of rows the table can hold without reallocation.
211
    ///
212
    /// This is the maximum number of rows that can be added to the table
213
    /// without forcing a new GPU buffer to be allocated and a copy from the old
214
    /// to the new buffer.
215
    ///
216
    /// Note that this doesn't imply that no GPU buffer allocation will ever
217
    /// occur; if a GPU buffer was never allocated, and there are pending
218
    /// CPU rows to insert, then a new buffer will be allocated on next
219
    /// update with this capacity.
220
    #[inline]
221
    #[allow(dead_code)]
222
    pub fn capacity(&self) -> u32 {
27✔
223
        self.capacity
27✔
224
    }
225

226
    /// Current number of rows in use in the table.
227
    #[inline]
228
    #[allow(dead_code)]
229
    pub fn len(&self) -> u32 {
31✔
230
        self.active_count - self.free_indices.len() as u32
31✔
231
    }
232

233
    /// Size of a single row in the table, in bytes, aligned to GPU constraints.
234
    #[inline]
235
    #[allow(dead_code)]
236
    pub fn aligned_size(&self) -> usize {
22✔
237
        self.aligned_size
22✔
238
    }
239

240
    /// Is the table empty?
241
    #[inline]
242
    #[allow(dead_code)]
243
    pub fn is_empty(&self) -> bool {
52✔
244
        self.active_count == 0
52✔
245
    }
246

247
    /// Clear all rows of the table without deallocating any existing GPU
248
    /// buffer.
249
    ///
250
    /// This operation only updates the CPU cache of the table, without touching
251
    /// any GPU buffer. On next GPU buffer update, the GPU buffer will be
252
    /// deallocated.
253
    #[allow(dead_code)]
254
    pub fn clear(&mut self) {
×
255
        self.pending_values.clear();
×
256
        self.extra_pending_values.clear();
×
257
        self.free_indices.clear();
×
258
        self.active_count = 0;
×
259
    }
260

261
    /// Clear any stale buffer used for resize in the previous frame during
262
    /// rendering while the data structure was immutable.
263
    ///
264
    /// This must be called before any new [`insert()`].
265
    ///
266
    /// [`insert()`]: crate::BufferTable::insert
267
    pub fn clear_previous_frame_resizes(&mut self) {
37✔
268
        if let Some(ab) = self.buffer.as_mut() {
42✔
269
            ab.old_buffer = None;
×
270
            ab.old_count = 0;
×
271
        }
272
    }
273

274
    fn to_byte_size(&self, count: u32) -> usize {
7✔
275
        count as usize * self.aligned_size
7✔
276
    }
277

278
    /// Insert a new row into the table.
279
    ///
280
    /// For performance reasons, this buffers the row content on the CPU until
281
    /// the next GPU update, to minimize the number of CPU to GPU transfers.
282
    pub fn insert(&mut self, value: T) -> BufferTableId {
27✔
283
        trace!(
27✔
284
            "Inserting into table buffer with {} free indices, capacity: {}, active_size: {}",
×
285
            self.free_indices.len(),
×
286
            self.capacity,
×
287
            self.active_count
×
288
        );
289
        let index = if self.free_indices.is_empty() {
54✔
290
            let index = self.active_count;
26✔
291
            if index == self.capacity {
52✔
292
                self.capacity += 1;
26✔
293
            }
294
            debug_assert!(index < self.capacity);
52✔
295
            self.active_count += 1;
26✔
296
            index
26✔
297
        } else {
298
            // Note: this is inefficient O(n) but we need to apply the same logic as the
299
            // EffectCache because we rely on indices being in sync.
300
            self.free_indices.remove(0)
1✔
301
        };
302
        let allocated_count = self
×
303
            .buffer
×
304
            .as_ref()
305
            .map(|ab| ab.allocated_count())
3✔
306
            .unwrap_or(0);
307
        trace!(
×
308
            "Found free index {}, capacity: {}, active_count: {}, allocated_count: {}",
×
309
            index,
×
310
            self.capacity,
×
311
            self.active_count,
×
312
            allocated_count
×
313
        );
314
        if index < allocated_count {
29✔
315
            self.pending_values.alloc().init((index, value));
2✔
316
        } else {
317
            let extra_index = index - allocated_count;
25✔
318
            if extra_index < self.extra_pending_values.len() as u32 {
25✔
319
                self.extra_pending_values[extra_index as usize] = value;
×
320
            } else {
321
                self.extra_pending_values.alloc().init(value);
25✔
322
            }
323
        }
324
        BufferTableId(index)
27✔
325
    }
326

327
    /// Remove a row from the table.
328
    #[allow(dead_code)]
329
    pub fn remove(&mut self, id: BufferTableId) {
2✔
330
        let index = id.0;
2✔
331
        assert!(index < self.active_count);
2✔
332

333
        // If this is the last item in the active zone, just shrink the active zone
334
        // (implicit free list).
335
        if index == self.active_count - 1 {
3✔
336
            self.active_count -= 1;
1✔
337
            self.capacity -= 1;
1✔
338
        } else {
339
            // This is very inefficient but we need to apply the same logic as the
340
            // EffectCache because we rely on indices being in sync.
341
            let pos = self
1✔
342
                .free_indices
1✔
343
                .binary_search(&index) // will fail
1✔
344
                .unwrap_or_else(|e| e); // will get position of insertion
2✔
345
            self.free_indices.insert(pos, index);
×
346
        }
347
    }
348

349
    /// Allocate any GPU buffer if needed, based on the most recent capacity
350
    /// requested.
351
    ///
352
    /// This should be called only once per frame after all allocation requests
353
    /// have been made via [`insert()`] but before the GPU buffer is actually
354
    /// updated. This is an optimization to enable allocating the GPU buffer
355
    /// earlier than it's actually needed. Calling this multiple times will work
356
    /// but will be inefficient and allocate GPU buffers for nothing. Not
357
    /// calling it is safe, as the next update will call it just-in-time anyway.
358
    ///
359
    /// # Returns
360
    ///
361
    /// Returns `true` if a new buffer was (re-)allocated, to indicate any bind
362
    /// group needs to be re-created.
363
    ///
364
    /// [`insert()]`: crate::render::BufferTable::insert
365
    pub fn allocate_gpu(&mut self, device: &RenderDevice, queue: &RenderQueue) -> bool {
38✔
366
        // The allocated capacity is the capacity of the currently allocated GPU buffer,
367
        // which can be different from the expected capacity (self.capacity) for next
368
        // update.
369
        let allocated_count = self.buffer.as_ref().map(|ab| ab.count).unwrap_or(0);
82✔
370
        let reallocated = if self.capacity > allocated_count {
76✔
371
            let size = self.to_byte_size(self.capacity);
2✔
372
            trace!(
2✔
373
                "reserve: increase capacity from {} to {} elements, old size {} bytes, new size {} bytes",
×
374
                allocated_count,
×
375
                self.capacity,
×
376
                self.to_byte_size(allocated_count),
×
377
                size
×
378
            );
379

380
            // Create the new buffer, swapping with the old one if any
381
            let has_init_data = !self.extra_pending_values.is_empty();
2✔
382
            let new_buffer = device.create_buffer(&BufferDescriptor {
2✔
383
                label: self.label.as_ref().map(|s| &s[..]),
2✔
384
                size: size as BufferAddress,
×
385
                usage: self.buffer_usage,
×
386
                mapped_at_creation: has_init_data,
×
387
            });
388

389
            // Use any pending data to initialize the buffer. We only use CPU-available
390
            // data, which was inserted after the buffer was (re-)allocated and
391
            // has not been uploaded to GPU yet.
392
            if has_init_data {
×
393
                // Leave some space to copy the old buffer if any
394
                let base_size = self.to_byte_size(allocated_count) as u64;
2✔
395
                let extra_size = self.to_byte_size(self.extra_pending_values.len() as u32) as u64;
2✔
396

397
                // Scope get_mapped_range_mut() to force a drop before unmap()
398
                {
399
                    let dst_slice = &mut new_buffer
2✔
400
                        .slice(base_size..base_size + extra_size)
2✔
401
                        .get_mapped_range_mut();
2✔
402

403
                    for (index, content) in self.extra_pending_values.drain(..).enumerate() {
6✔
404
                        let byte_size = self.aligned_size; // single row
4✔
405
                        let byte_offset = byte_size * index;
4✔
406

407
                        // Copy Rust value into a GPU-ready format, including GPU padding.
408
                        let src: &[u8] = cast_slice(std::slice::from_ref(&content));
4✔
409
                        let dst_range = byte_offset..byte_offset + self.item_size;
4✔
410
                        trace!(
4✔
411
                            "+ copy: index={} src={:?} dst={:?} byte_offset={} byte_size={}",
×
412
                            index,
×
413
                            src.as_ptr(),
×
414
                            dst_range,
×
415
                            byte_offset,
×
416
                            byte_size
×
417
                        );
418
                        let dst = &mut dst_slice[dst_range];
4✔
419
                        dst.copy_from_slice(src);
4✔
420
                    }
421
                }
422

423
                new_buffer.unmap();
2✔
424
            }
425

426
            if let Some(ab) = self.buffer.as_mut() {
3✔
427
                // If there's any data currently in the GPU buffer, we need to copy it on next
428
                // update to preserve it, but only if there's no pending copy already.
429
                if self.active_count > 0 && ab.old_buffer.is_none() {
2✔
430
                    ab.old_buffer = Some(ab.buffer.clone()); // TODO: swap
1✔
431
                    ab.old_count = ab.count;
1✔
432
                }
433
                ab.buffer = new_buffer;
1✔
434
                ab.count = self.capacity;
1✔
435
            } else {
436
                self.buffer = Some(AllocatedBuffer {
1✔
437
                    buffer: new_buffer,
1✔
438
                    count: self.capacity,
1✔
439
                    old_buffer: None,
1✔
440
                    old_count: 0,
1✔
441
                });
442
            }
443

444
            true
2✔
445
        } else {
446
            false
36✔
447
        };
448

449
        // Immediately schedule a copy of old rows.
450
        // - For old rows, copy into the old buffer because the old-to-new buffer copy
451
        //   will be executed during a command queue while any CPU to GPU upload is
452
        //   prepended before the next command queue. To ensure things do get out of
453
        //   order with the CPU upload overwriting the GPU-to-GPU copy, make sure those
454
        //   two are disjoint.
455
        if let Some(ab) = self.buffer.as_ref() {
7✔
456
            let buffer = ab.old_buffer.as_ref().unwrap_or(&ab.buffer);
×
457
            for (index, content) in self.pending_values.drain(..) {
2✔
458
                let byte_size = self.aligned_size;
2✔
459
                let byte_offset = byte_size * index as usize;
2✔
460

461
                // Copy Rust value into a GPU-ready format, including GPU padding.
462
                // TODO - Do that in insert()!
463
                let mut aligned_buffer: Vec<u8> = vec![0; self.aligned_size];
2✔
464
                let src: &[u8] = cast_slice(std::slice::from_ref(&content));
2✔
465
                let dst_range = ..self.item_size;
2✔
466
                trace!(
2✔
467
                    "+ copy: index={} src={:?} dst={:?} byte_offset={} byte_size={}",
×
468
                    index,
×
469
                    src.as_ptr(),
×
470
                    dst_range,
×
471
                    byte_offset,
×
472
                    byte_size
×
473
                );
474
                let dst = &mut aligned_buffer[dst_range];
2✔
475
                dst.copy_from_slice(src);
2✔
476

477
                // Upload to GPU
478
                // TODO - Merge contiguous blocks into a single write_buffer()
479
                let bytes: &[u8] = cast_slice(&aligned_buffer);
2✔
480
                queue.write_buffer(buffer, byte_offset as u64, bytes);
2✔
481
            }
482
        } else {
483
            debug_assert!(self.pending_values.is_empty());
62✔
484
            debug_assert!(self.extra_pending_values.is_empty());
62✔
485
        }
486

487
        reallocated
38✔
488
    }
489

490
    /// Write CPU data to the GPU buffer, (re)allocating it as needed.
491
    pub fn write_buffer(&self, encoder: &mut CommandEncoder) {
37✔
492
        // Check if there's any work to do: either some pending values to upload or some
493
        // existing buffer to copy into a newly-allocated one.
494
        if self.pending_values.is_empty()
37✔
495
            && self
37✔
496
                .buffer
37✔
497
                .as_ref()
37✔
498
                .map(|ab| ab.old_buffer.is_none())
80✔
499
                .unwrap_or(true)
37✔
500
        {
501
            return;
36✔
502
        }
503

504
        trace!(
1✔
505
            "write_buffer: pending_values.len={} item_size={} aligned_size={} buffer={:?}",
×
506
            self.pending_values.len(),
×
507
            self.item_size,
×
508
            self.aligned_size,
×
509
            self.buffer,
×
510
        );
511

512
        // If there's no more GPU buffer, there's nothing to do
513
        let Some(ab) = self.buffer.as_ref() else {
2✔
514
            return;
×
515
        };
516

517
        // Copy any old buffer into the new one, and clear the old buffer. Note that we
518
        // only clear the ref-counted reference to the buffer, not the actual buffer,
519
        // which stays alive until the copy is done (but we don't need to care about
520
        // keeping it alive, wgpu does that for us).
521
        if let Some(old_buffer) = ab.old_buffer.as_ref() {
1✔
522
            let old_size = self.to_byte_size(ab.old_count) as u64;
×
523
            trace!("Copy old buffer id {:?} of size {} bytes into newly-allocated buffer {:?} of size {} bytes.", old_buffer.id(), old_size, ab.buffer.id(), self.to_byte_size(ab.count));
×
524
            encoder.copy_buffer_to_buffer(old_buffer, 0, &ab.buffer, 0, old_size);
1✔
525
        }
526
    }
527
}
528

529
#[cfg(test)]
530
mod tests {
531
    use bevy::math::Vec3;
532
    use bytemuck::{Pod, Zeroable};
533

534
    use super::*;
535

536
    #[repr(C)]
537
    #[derive(Debug, Default, Clone, Copy, Pod, Zeroable, ShaderType)]
538
    pub(crate) struct GpuDummy {
539
        pub v: Vec3,
540
    }
541

542
    #[repr(C)]
543
    #[derive(Debug, Default, Clone, Copy, Pod, Zeroable, ShaderType)]
544
    pub(crate) struct GpuDummyComposed {
545
        pub simple: GpuDummy,
546
        pub tag: u32,
547
        // GPU padding to 16 bytes due to GpuDummy forcing align to 16 bytes
548
    }
549

550
    #[repr(C)]
551
    #[derive(Debug, Clone, Copy, Pod, Zeroable, ShaderType)]
552
    pub(crate) struct GpuDummyLarge {
553
        pub simple: GpuDummy,
554
        pub tag: u32,
555
        pub large: [f32; 128],
556
    }
557

558
    #[test]
559
    fn table_sizes() {
560
        // Rust
561
        assert_eq!(std::mem::size_of::<GpuDummy>(), 12);
562
        assert_eq!(std::mem::align_of::<GpuDummy>(), 4);
563
        assert_eq!(std::mem::size_of::<GpuDummyComposed>(), 16); // tight packing
564
        assert_eq!(std::mem::align_of::<GpuDummyComposed>(), 4);
565
        assert_eq!(std::mem::size_of::<GpuDummyLarge>(), 132 * 4); // tight packing
566
        assert_eq!(std::mem::align_of::<GpuDummyLarge>(), 4);
567

568
        // GPU
569
        assert_eq!(<GpuDummy as ShaderType>::min_size().get(), 16); // Vec3 gets padded to 16 bytes
570
        assert_eq!(<GpuDummy as ShaderSize>::SHADER_SIZE.get(), 16);
571
        assert_eq!(<GpuDummyComposed as ShaderType>::min_size().get(), 32); // align is 16 bytes, forces padding
572
        assert_eq!(<GpuDummyComposed as ShaderSize>::SHADER_SIZE.get(), 32);
573
        assert_eq!(<GpuDummyLarge as ShaderType>::min_size().get(), 544); // align is 16 bytes, forces padding
574
        assert_eq!(<GpuDummyLarge as ShaderSize>::SHADER_SIZE.get(), 544);
575

576
        for (item_align, expected_aligned_size) in [
577
            (0, 16),
578
            (4, 16),
579
            (8, 16),
580
            (16, 16),
581
            (32, 32),
582
            (256, 256),
583
            (512, 512),
584
        ] {
585
            let mut table = BufferTable::<GpuDummy>::new(
586
                BufferUsages::STORAGE,
587
                NonZeroU64::new(item_align),
588
                None,
589
            );
590
            assert_eq!(table.aligned_size(), expected_aligned_size);
591
            assert!(table.is_empty());
592
            table.insert(GpuDummy::default());
593
            assert!(!table.is_empty());
594
            assert_eq!(table.len(), 1);
595
        }
596

597
        for (item_align, expected_aligned_size) in [
598
            (0, 32),
599
            (4, 32),
600
            (8, 32),
601
            (16, 32),
602
            (32, 32),
603
            (256, 256),
604
            (512, 512),
605
        ] {
606
            let mut table = BufferTable::<GpuDummyComposed>::new(
607
                BufferUsages::STORAGE,
608
                NonZeroU64::new(item_align),
609
                None,
610
            );
611
            assert_eq!(table.aligned_size(), expected_aligned_size);
612
            assert!(table.is_empty());
613
            table.insert(GpuDummyComposed::default());
614
            assert!(!table.is_empty());
615
            assert_eq!(table.len(), 1);
616
        }
617

618
        for (item_align, expected_aligned_size) in [
619
            (0, 544),
620
            (4, 544),
621
            (8, 544),
622
            (16, 544),
623
            (32, 544),
624
            (256, 768),
625
            (512, 1024),
626
        ] {
627
            let mut table = BufferTable::<GpuDummyLarge>::new(
628
                BufferUsages::STORAGE,
629
                NonZeroU64::new(item_align),
630
                None,
631
            );
632
            assert_eq!(table.aligned_size(), expected_aligned_size);
633
            assert!(table.is_empty());
634
            table.insert(GpuDummyLarge {
635
                simple: Default::default(),
636
                tag: 0,
637
                large: [0.; 128],
638
            });
639
            assert!(!table.is_empty());
640
            assert_eq!(table.len(), 1);
641
        }
642
    }
643
}
644

645
#[cfg(all(test, feature = "gpu_tests"))]
646
mod gpu_tests {
647
    use std::fmt::Write;
648

649
    use bevy::render::render_resource::BufferSlice;
650
    use tests::*;
651
    use wgpu::{BufferView, CommandBuffer};
652

653
    use super::*;
654
    use crate::test_utils::MockRenderer;
655

656
    /// Read data from GPU back into CPU memory.
657
    ///
658
    /// This call blocks until the data is available on CPU. Used for testing
659
    /// only.
660
    fn read_back_gpu<'a>(device: &RenderDevice, slice: BufferSlice<'a>) -> BufferView<'a> {
6✔
661
        let (tx, rx) = futures::channel::oneshot::channel();
6✔
662
        slice.map_async(wgpu::MapMode::Read, move |result| {
12✔
663
            tx.send(result).unwrap();
6✔
664
        });
665
        device.poll(wgpu::Maintain::Wait);
6✔
666
        let result = futures::executor::block_on(rx);
6✔
667
        assert!(result.is_ok());
6✔
668
        slice.get_mapped_range()
6✔
669
    }
670

671
    /// Submit a command buffer to GPU and wait for completion.
672
    ///
673
    /// This call blocks until the GPU executed the command buffer. Used for
674
    /// testing only.
675
    fn submit_gpu_and_wait(
7✔
676
        device: &RenderDevice,
677
        queue: &RenderQueue,
678
        command_buffer: CommandBuffer,
679
    ) {
680
        // Queue command
681
        queue.submit([command_buffer]);
7✔
682

683
        // Register callback to observe completion
684
        let (tx, rx) = futures::channel::oneshot::channel();
7✔
685
        queue.on_submitted_work_done(move || {
14✔
686
            tx.send(()).unwrap();
7✔
687
        });
688

689
        // Poll device, checking for completion and raising callback
690
        device.poll(wgpu::Maintain::Wait);
7✔
691

692
        // Wait for callback to be raised. This was need in previous versions, however
693
        // it's a bit unclear if it's still needed or if device.poll() is enough to
694
        // guarantee that the command was executed.
695
        let _ = futures::executor::block_on(rx);
7✔
696
    }
697

698
    /// Convert a byte slice to a string of hexadecimal values separated by a
699
    /// blank space.
700
    fn to_hex_string(slice: &[u8]) -> String {
19✔
701
        let len = slice.len();
19✔
702
        let num_chars = len * 3 - 1;
19✔
703
        let mut s = String::with_capacity(num_chars);
19✔
704
        for b in &slice[..len - 1] {
589✔
705
            write!(&mut s, "{:02x} ", *b).unwrap();
285✔
706
        }
707
        write!(&mut s, "{:02x}", slice[len - 1]).unwrap();
19✔
708
        debug_assert_eq!(s.len(), num_chars);
38✔
709
        s
19✔
710
    }
711

712
    fn write_buffers_and_wait<T: Pod + ShaderSize>(
7✔
713
        table: &BufferTable<T>,
714
        device: &RenderDevice,
715
        queue: &RenderQueue,
716
    ) {
717
        let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
7✔
718
            label: Some("test"),
7✔
719
        });
720
        table.write_buffer(&mut encoder);
7✔
721
        let command_buffer = encoder.finish();
7✔
722
        submit_gpu_and_wait(device, queue, command_buffer);
7✔
723
        println!("Buffer written to GPU");
7✔
724
    }
725

726
    #[test]
727
    fn table_write() {
728
        let renderer = MockRenderer::new();
729
        let device = renderer.device();
730
        let queue = renderer.queue();
731

732
        let item_align = device.limits().min_storage_buffer_offset_alignment as u64;
733
        println!("min_storage_buffer_offset_alignment = {item_align}");
734
        let mut table = BufferTable::<GpuDummyComposed>::new(
735
            BufferUsages::STORAGE | BufferUsages::MAP_READ,
736
            NonZeroU64::new(item_align),
737
            None,
738
        );
739
        let final_align = item_align.max(<GpuDummyComposed as ShaderSize>::SHADER_SIZE.get());
740
        assert_eq!(table.aligned_size(), final_align as usize);
741

742
        // Initial state
743
        assert!(table.is_empty());
744
        assert_eq!(table.len(), 0);
745
        assert_eq!(table.capacity(), 0);
746
        assert!(table.buffer.is_none());
747

748
        // This has no effect while the table is empty
749
        table.clear_previous_frame_resizes();
750
        table.allocate_gpu(&device, &queue);
751
        write_buffers_and_wait(&table, &device, &queue);
752
        assert!(table.is_empty());
753
        assert_eq!(table.len(), 0);
754
        assert_eq!(table.capacity(), 0);
755
        assert!(table.buffer.is_none());
756

757
        // New frame
758
        table.clear_previous_frame_resizes();
759

760
        // Insert some entries
761
        let len = 3;
762
        for i in 0..len {
763
            let row = table.insert(GpuDummyComposed {
764
                tag: i + 1,
765
                ..Default::default()
766
            });
767
            assert_eq!(row.0, i);
768
        }
769
        assert!(!table.is_empty());
770
        assert_eq!(table.len(), len);
771
        assert!(table.capacity() >= len); // contract: could over-allocate...
772
        assert!(table.buffer.is_none()); // not yet allocated on GPU
773

774
        // Allocate GPU buffer for current requested state
775
        table.allocate_gpu(&device, &queue);
776
        assert!(!table.is_empty());
777
        assert_eq!(table.len(), len);
778
        assert!(table.capacity() >= len);
779
        let ab = table
780
            .buffer
781
            .as_ref()
782
            .expect("GPU buffer should be allocated after allocate_gpu()");
783
        assert!(ab.old_buffer.is_none()); // no previous copy
784
        assert_eq!(ab.count, len);
785
        println!(
786
            "Allocated buffer #{:?} of {} rows",
787
            ab.buffer.id(),
788
            ab.count
789
        );
790
        let ab_buffer = ab.buffer.clone();
791

792
        // Another allocate_gpu() is a no-op
793
        table.allocate_gpu(&device, &queue);
794
        assert!(!table.is_empty());
795
        assert_eq!(table.len(), len);
796
        assert!(table.capacity() >= len);
797
        let ab = table
798
            .buffer
799
            .as_ref()
800
            .expect("GPU buffer should be allocated after allocate_gpu()");
801
        assert!(ab.old_buffer.is_none()); // no previous copy
802
        assert_eq!(ab.count, len);
803
        assert_eq!(ab_buffer.id(), ab.buffer.id()); // same buffer
804

805
        // Write buffer (CPU -> GPU)
806
        write_buffers_and_wait(&table, &device, &queue);
807

808
        {
809
            // Read back (GPU -> CPU)
810
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
811
            {
812
                let slice = buffer.slice(..);
813
                let view = read_back_gpu(&device, slice);
814
                println!(
815
                    "GPU data read back to CPU for validation: {} bytes",
816
                    view.len()
817
                );
818

819
                // Validate content
820
                assert_eq!(view.len(), final_align as usize * table.capacity() as usize);
821
                for i in 0..len as usize {
822
                    let offset = i * final_align as usize;
823
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
824
                    let src = &view[offset..offset + 16];
825
                    println!("{}", to_hex_string(src));
826
                    let dummy_composed: &[GpuDummyComposed] =
827
                        cast_slice(&view[offset..offset + item_size]);
828
                    assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
829
                }
830
            }
831
            buffer.unmap();
832
        }
833

834
        // New frame
835
        table.clear_previous_frame_resizes();
836

837
        // Insert more entries
838
        let old_capacity = table.capacity();
839
        let mut len = len;
840
        while table.capacity() == old_capacity {
841
            let row = table.insert(GpuDummyComposed {
842
                tag: len + 1,
843
                ..Default::default()
844
            });
845
            assert_eq!(row.0, len);
846
            len += 1;
847
        }
848
        println!(
849
            "Added {} rows to grow capacity from {} to {}.",
850
            len - 3,
851
            old_capacity,
852
            table.capacity()
853
        );
854

855
        // This re-allocates a new GPU buffer because the capacity changed
856
        table.allocate_gpu(&device, &queue);
857
        assert!(!table.is_empty());
858
        assert_eq!(table.len(), len);
859
        assert!(table.capacity() >= len);
860
        let ab = table
861
            .buffer
862
            .as_ref()
863
            .expect("GPU buffer should be allocated after allocate_gpu()");
864
        assert_eq!(ab.count, len);
865
        assert!(ab.old_buffer.is_some()); // old buffer to copy
866
        assert_ne!(ab.old_buffer.as_ref().unwrap().id(), ab.buffer.id());
867
        println!(
868
            "Allocated new buffer #{:?} of {} rows",
869
            ab.buffer.id(),
870
            ab.count
871
        );
872

873
        // Write buffer (CPU -> GPU)
874
        write_buffers_and_wait(&table, &device, &queue);
875

876
        {
877
            // Read back (GPU -> CPU)
878
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
879
            {
880
                let slice = buffer.slice(..);
881
                let view = read_back_gpu(&device, slice);
882
                println!(
883
                    "GPU data read back to CPU for validation: {} bytes",
884
                    view.len()
885
                );
886

887
                // Validate content
888
                assert_eq!(view.len(), final_align as usize * table.capacity() as usize);
889
                for i in 0..len as usize {
890
                    let offset = i * final_align as usize;
891
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
892
                    let src = &view[offset..offset + 16];
893
                    println!("{}", to_hex_string(src));
894
                    let dummy_composed: &[GpuDummyComposed] =
895
                        cast_slice(&view[offset..offset + item_size]);
896
                    assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
897
                }
898
            }
899
            buffer.unmap();
900
        }
901

902
        // New frame
903
        table.clear_previous_frame_resizes();
904

905
        // Delete the last allocated row
906
        let old_capacity = table.capacity();
907
        let len = len - 1;
908
        table.remove(BufferTableId(len));
909
        println!(
910
            "Removed last row to shrink capacity from {} to {}.",
911
            old_capacity,
912
            table.capacity()
913
        );
914

915
        // This doesn't do anything since we removed a row only
916
        table.allocate_gpu(&device, &queue);
917
        assert!(!table.is_empty());
918
        assert_eq!(table.len(), len);
919
        assert!(table.capacity() >= len);
920
        let ab = table
921
            .buffer
922
            .as_ref()
923
            .expect("GPU buffer should be allocated after allocate_gpu()");
924
        assert_eq!(ab.count, len + 1); // GPU buffer kept its size
925
        assert!(ab.old_buffer.is_none());
926

927
        // Write buffer (CPU -> GPU)
928
        write_buffers_and_wait(&table, &device, &queue);
929

930
        {
931
            // Read back (GPU -> CPU)
932
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
933
            {
934
                let slice = buffer.slice(..);
935
                let view = read_back_gpu(&device, slice);
936
                println!(
937
                    "GPU data read back to CPU for validation: {} bytes",
938
                    view.len()
939
                );
940

941
                // Validate content
942
                assert!(view.len() >= final_align as usize * table.capacity() as usize); // note the >=, the buffer is over-allocated
943
                for i in 0..len as usize {
944
                    let offset = i * final_align as usize;
945
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
946
                    let src = &view[offset..offset + 16];
947
                    println!("{}", to_hex_string(src));
948
                    let dummy_composed: &[GpuDummyComposed] =
949
                        cast_slice(&view[offset..offset + item_size]);
950
                    assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
951
                }
952
            }
953
            buffer.unmap();
954
        }
955

956
        // New frame
957
        table.clear_previous_frame_resizes();
958

959
        // Delete the first allocated row
960
        let old_capacity = table.capacity();
961
        let mut len = len - 1;
962
        table.remove(BufferTableId(0));
963
        assert_eq!(old_capacity, table.capacity());
964
        println!(
965
            "Removed first row to shrink capacity from {} to {} (no change).",
966
            old_capacity,
967
            table.capacity()
968
        );
969

970
        // This doesn't do anything since we only removed a row
971
        table.allocate_gpu(&device, &queue);
972
        assert!(!table.is_empty());
973
        assert_eq!(table.len(), len);
974
        assert!(table.capacity() >= len);
975
        let ab = table
976
            .buffer
977
            .as_ref()
978
            .expect("GPU buffer should be allocated after allocate_gpu()");
979
        assert_eq!(ab.count, len + 2); // GPU buffer kept its size
980
        assert!(ab.old_buffer.is_none());
981

982
        // Write buffer (CPU -> GPU)
983
        write_buffers_and_wait(&table, &device, &queue);
984

985
        {
986
            // Read back (GPU -> CPU)
987
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
988
            {
989
                let slice = buffer.slice(..);
990
                let view = read_back_gpu(&device, slice);
991
                println!(
992
                    "GPU data read back to CPU for validation: {} bytes",
993
                    view.len()
994
                );
995

996
                // Validate content
997
                assert!(view.len() >= final_align as usize * table.capacity() as usize); // note the >=, the buffer is over-allocated
998
                for i in 0..len as usize {
999
                    let offset = i * final_align as usize;
1000
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
1001
                    let src = &view[offset..offset + 16];
1002
                    println!("{}", to_hex_string(src));
1003
                    if i > 0 {
1004
                        let dummy_composed: &[GpuDummyComposed] =
1005
                            cast_slice(&view[offset..offset + item_size]);
1006
                        assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
1007
                    }
1008
                }
1009
            }
1010
            buffer.unmap();
1011
        }
1012

1013
        // New frame
1014
        table.clear_previous_frame_resizes();
1015

1016
        // Insert a row; this should get into row #0 in the buffer
1017
        let row = table.insert(GpuDummyComposed {
1018
            tag: 1,
1019
            ..Default::default()
1020
        });
1021
        assert_eq!(row.0, 0);
1022
        len += 1;
1023
        println!(
1024
            "Added 1 row to grow capacity from {} to {}.",
1025
            old_capacity,
1026
            table.capacity()
1027
        );
1028

1029
        // This doesn't reallocate the GPU buffer since we used a free list entry
1030
        table.allocate_gpu(&device, &queue);
1031
        assert!(!table.is_empty());
1032
        assert_eq!(table.len(), len);
1033
        assert!(table.capacity() >= len);
1034
        let ab = table
1035
            .buffer
1036
            .as_ref()
1037
            .expect("GPU buffer should be allocated after allocate_gpu()");
1038
        assert_eq!(ab.count, 4); // 4 == last time we grew
1039
        assert!(ab.old_buffer.is_none());
1040

1041
        // Write buffer (CPU -> GPU)
1042
        write_buffers_and_wait(&table, &device, &queue);
1043

1044
        {
1045
            // Read back (GPU -> CPU)
1046
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
1047
            {
1048
                let slice = buffer.slice(..);
1049
                let view = read_back_gpu(&device, slice);
1050
                println!(
1051
                    "GPU data read back to CPU for validation: {} bytes",
1052
                    view.len()
1053
                );
1054

1055
                // Validate content
1056
                assert!(view.len() >= final_align as usize * table.capacity() as usize);
1057
                for i in 0..len as usize {
1058
                    let offset = i * final_align as usize;
1059
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
1060
                    let src = &view[offset..offset + 16];
1061
                    println!("{}", to_hex_string(src));
1062
                    let dummy_composed: &[GpuDummyComposed] =
1063
                        cast_slice(&view[offset..offset + item_size]);
1064
                    assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
1065
                }
1066
            }
1067
            buffer.unmap();
1068
        }
1069

1070
        // New frame
1071
        table.clear_previous_frame_resizes();
1072

1073
        // Insert a row; this should get into row #3 at the end of the allocated buffer
1074
        let row = table.insert(GpuDummyComposed {
1075
            tag: 4,
1076
            ..Default::default()
1077
        });
1078
        assert_eq!(row.0, 3);
1079
        len += 1;
1080
        println!(
1081
            "Added 1 row to grow capacity from {} to {}.",
1082
            old_capacity,
1083
            table.capacity()
1084
        );
1085

1086
        // This doesn't reallocate the GPU buffer since we used an implicit free entry
1087
        table.allocate_gpu(&device, &queue);
1088
        assert!(!table.is_empty());
1089
        assert_eq!(table.len(), len);
1090
        assert!(table.capacity() >= len);
1091
        let ab = table
1092
            .buffer
1093
            .as_ref()
1094
            .expect("GPU buffer should be allocated after allocate_gpu()");
1095
        assert_eq!(ab.count, 4); // 4 == last time we grew
1096
        assert!(ab.old_buffer.is_none());
1097

1098
        // Write buffer (CPU -> GPU)
1099
        write_buffers_and_wait(&table, &device, &queue);
1100

1101
        {
1102
            // Read back (GPU -> CPU)
1103
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
1104
            {
1105
                let slice = buffer.slice(..);
1106
                let view = read_back_gpu(&device, slice);
1107
                println!(
1108
                    "GPU data read back to CPU for validation: {} bytes",
1109
                    view.len()
1110
                );
1111

1112
                // Validate content
1113
                assert!(view.len() >= final_align as usize * table.capacity() as usize);
1114
                for i in 0..len as usize {
1115
                    let offset = i * final_align as usize;
1116
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
1117
                    let src = &view[offset..offset + 16];
1118
                    println!("{}", to_hex_string(src));
1119
                    let dummy_composed: &[GpuDummyComposed] =
1120
                        cast_slice(&view[offset..offset + item_size]);
1121
                    assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
1122
                }
1123
            }
1124
            buffer.unmap();
1125
        }
1126
    }
1127
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc