• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

djeedai / bevy_hanabi / 12128238298

02 Dec 2024 09:24PM UTC coverage: 48.661% (-7.6%) from 56.217%
12128238298

Pull #401

github

web-flow
Merge 30c486d1a into 19aee8dbc
Pull Request #401: Upgrade to Bevy v0.15.0

39 of 284 new or added lines in 11 files covered. (13.73%)

435 existing lines in 8 files now uncovered.

3106 of 6383 relevant lines covered (48.66%)

21.61 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

65.38
/src/render/buffer_table.rs
1
use std::{
2
    borrow::Cow,
3
    num::{NonZeroU32, NonZeroU64},
4
};
5

6
use bevy::{
7
    log::trace,
8
    render::{
9
        render_resource::{
10
            Buffer, BufferAddress, BufferDescriptor, BufferUsages, CommandEncoder, ShaderSize,
11
            ShaderType,
12
        },
13
        renderer::{RenderDevice, RenderQueue},
14
    },
15
};
16
use bytemuck::{cast_slice, Pod};
17
use copyless::VecHelper;
18

19
/// Index of a row in a [`BufferTable`].
20
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21
pub struct BufferTableId(pub(crate) u32); // TEMP: pub(crate)
22

23
impl BufferTableId {
24
    /// An invalid value, often used as placeholder.
25
    pub const INVALID: BufferTableId = BufferTableId(u32::MAX);
26

27
    /// Check if the current ID is valid, that is, is different from
28
    /// [`INVALID`].
29
    ///
30
    /// [`INVALID`]: Self::INVALID
31
    #[inline]
NEW
32
    pub fn is_valid(&self) -> bool {
×
NEW
33
        *self != Self::INVALID
×
34
    }
35

36
    /// Compute a new buffer table ID by offseting an existing one by `count`
37
    /// rows.
38
    #[inline]
NEW
39
    pub fn offset(&self, count: u32) -> BufferTableId {
×
NEW
40
        debug_assert!(self.is_valid());
×
NEW
41
        BufferTableId(self.0 + count)
×
42
    }
43
}
44

45
#[derive(Debug)]
46
struct AllocatedBuffer {
47
    /// Currently allocated buffer, of size equal to `size`.
48
    buffer: Buffer,
49
    /// Size of the currently allocated buffer, in number of rows.
50
    count: u32,
51
    /// Previously allocated buffer if any, cached until the next buffer write
52
    /// so that old data can be copied into the newly-allocated buffer.
53
    old_buffer: Option<Buffer>,
54
    /// Size of the old buffer if any, in number of rows.
55
    old_count: u32,
56
}
57

58
impl AllocatedBuffer {
59
    /// Get the number of rows of the currently allocated GPU buffer.
60
    ///
61
    /// On capacity grow, the count is valid until the next buffer swap.
62
    pub fn allocated_count(&self) -> u32 {
3✔
63
        if self.old_buffer.is_some() {
3✔
64
            self.old_count
×
65
        } else {
66
            self.count
3✔
67
        }
68
    }
69
}
70

71
/// GPU buffer holding a table with concurrent interleaved CPU/GPU access.
72
///
73
/// The buffer table data structure represents a GPU buffer holding a table made
74
/// of individual rows. Each row of the table has the same layout (same size),
75
/// and can be allocated (assigned to an existing index) or free (available for
76
/// future allocation). The data structure manages a free list of rows, and copy
77
/// of rows modified on CPU to the GPU without touching other rows. This ensures
78
/// that existing rows in the GPU buffer can be accessed and modified by the GPU
79
/// without being overwritten by the CPU and without the need for the CPU to
80
/// read the data back from GPU into CPU memory.
81
///
82
/// The element type `T` needs to implement the following traits:
83
/// - [`Pod`] to allow copy.
84
/// - [`ShaderType`] because it needs to be mapped for a shader.
85
/// - [`ShaderSize`] to ensure a fixed footprint, to allow packing multiple
86
///   instances inside a single buffer. This therefore excludes any
87
///   runtime-sized array.
88
///
89
/// This is similar to a [`BufferVec`] or [`AlignedBufferVec`], but unlike those
90
/// data structures a buffer table preserves rows modified by the GPU without
91
/// overwriting. This is useful when the buffer is also modified by GPU shaders,
92
/// so neither the CPU side nor the GPU side have an up-to-date view of the
93
/// entire table, and so the CPU cannot re-upload the entire table on changes.
94
///
95
/// # Usage
96
///
97
/// - During the [`RenderStage::Prepare`] stage, call
98
///   [`clear_previous_frame_resizes()`] to clear any stale buffer from the
99
///   previous frame. Then insert new rows with [`insert()`] and if you made
100
///   changes call [`allocate_gpu()`] at the end to allocate any new buffer
101
///   needed.
102
/// - During the [`RenderStage::Render`] stage, call [`write_buffer()`] from a
103
///   command encoder before using any row, to perform any buffer resize copy
104
///   pending.
105
///
106
/// [`BufferVec`]: bevy::render::render_resource::BufferVec
107
/// [`AlignedBufferVec`]: crate::render::aligned_buffer_vec::AlignedBufferVec
108
#[derive(Debug)]
109
pub struct BufferTable<T: Pod + ShaderSize> {
110
    /// GPU buffer if already allocated, or `None` otherwise.
111
    buffer: Option<AllocatedBuffer>,
112
    /// GPU buffer usages.
113
    buffer_usage: BufferUsages,
114
    /// Optional GPU buffer name, for debugging.
115
    label: Option<String>,
116
    /// Size of a single buffer element, in bytes, in CPU memory (Rust layout).
117
    item_size: usize,
118
    /// Size of a single buffer element, in bytes, aligned to GPU memory
119
    /// constraints.
120
    aligned_size: usize,
121
    /// Capacity of the buffer, in number of rows.
122
    capacity: u32,
123
    /// Size of the "active" portion of the table, which includes allocated rows
124
    /// and any row in the free list. All other rows in the
125
    /// `active_size..capacity` range are implicitly unallocated.
126
    active_count: u32,
127
    /// Free list of rows available in the GPU buffer for a new allocation. This
128
    /// only contains indices in the `0..active_size` range; all row indices in
129
    /// `active_size..capacity` are assumed to be unallocated.
130
    free_indices: Vec<u32>,
131
    /// Pending values accumulated on CPU and not yet written to GPU, and their
132
    /// rows.
133
    pending_values: Vec<(u32, T)>,
134
    /// Extra pending values accumulated on CPU like `pending_values`, but for
135
    /// which there's not enough space in the current GPU buffer. Those values
136
    /// are sorted in index order, occupying the range `buffer.size..`.
137
    extra_pending_values: Vec<T>,
138
}
139

140
impl<T: Pod + ShaderSize> Default for BufferTable<T> {
141
    fn default() -> Self {
22✔
142
        let item_size = std::mem::size_of::<T>();
22✔
143
        let aligned_size = <T as ShaderSize>::SHADER_SIZE.get() as usize;
22✔
144
        assert!(aligned_size >= item_size);
22✔
145
        Self {
146
            buffer: None,
147
            buffer_usage: BufferUsages::all(),
22✔
148
            label: None,
149
            item_size,
150
            aligned_size,
151
            capacity: 0,
152
            active_count: 0,
153
            free_indices: Vec::new(),
22✔
154
            pending_values: Vec::new(),
22✔
155
            extra_pending_values: Vec::new(),
22✔
156
        }
157
    }
158
}
159

160
impl<T: Pod + ShaderSize> BufferTable<T> {
161
    /// Create a new collection.
162
    ///
163
    /// `item_align` is an optional additional alignment for items in the
164
    /// collection. If greater than the natural alignment dictated by WGSL
165
    /// rules, this extra alignment is enforced. Otherwise it's ignored (so you
166
    /// can pass `None` to ignore). This is useful if for example you want to
167
    /// bind individual rows or any subset of the table, to ensure each row is
168
    /// aligned to the device constraints.
169
    ///
170
    /// # Panics
171
    ///
172
    /// Panics if `buffer_usage` contains [`BufferUsages::UNIFORM`] and the
173
    /// layout of the element type `T` does not meet the requirements of the
174
    /// uniform address space, as tested by
175
    /// [`ShaderType::assert_uniform_compat()`].
176
    ///
177
    /// [`BufferUsages::UNIFORM`]: bevy::render::render_resource::BufferUsages::UNIFORM
178
    pub fn new(
22✔
179
        buffer_usage: BufferUsages,
180
        item_align: Option<NonZeroU64>,
181
        label: Option<String>,
182
    ) -> Self {
183
        // GPU-aligned item size, compatible with WGSL rules
184
        let item_size = <T as ShaderSize>::SHADER_SIZE.get() as usize;
22✔
185
        // Extra manual alignment for device constraints
186
        let aligned_size = if let Some(item_align) = item_align {
63✔
187
            let item_align = item_align.get() as usize;
×
NEW
188
            let aligned_size = item_size.next_multiple_of(item_align);
×
189
            assert!(aligned_size >= item_size);
×
190
            assert!(aligned_size % item_align == 0);
19✔
191
            aligned_size
19✔
192
        } else {
193
            item_size
3✔
194
        };
195
        trace!(
×
196
            "BufferTable[\"{}\"]: item_size={} aligned_size={}",
×
197
            label.as_ref().unwrap_or(&String::new()),
×
198
            item_size,
×
199
            aligned_size
×
200
        );
201
        if buffer_usage.contains(BufferUsages::UNIFORM) {
22✔
202
            <T as ShaderType>::assert_uniform_compat();
×
203
        }
204
        Self {
205
            // Need COPY_SRC and COPY_DST to copy from old to new buffer on resize
206
            buffer_usage: buffer_usage | BufferUsages::COPY_SRC | BufferUsages::COPY_DST,
×
207
            aligned_size,
208
            label,
209
            ..Default::default()
210
        }
211
    }
212

213
    /// Get a safe buffer label for debug display.
214
    ///
215
    /// Falls back to an empty string if no label was specified.
216
    pub fn safe_label(&self) -> Cow<'_, str> {
×
217
        self.label
×
218
            .as_ref()
219
            .map(|s| Cow::Borrowed(&s[..]))
×
220
            .unwrap_or(Cow::Borrowed(""))
×
221
    }
222

223
    /// Get a safe buffer name for debug display.
224
    ///
225
    /// Same as [`safe_label()`] but includes the buffer ID as well.
226
    ///
227
    /// [`safe_label()`]: self::BufferTable::safe_label
228
    pub fn safe_name(&self) -> String {
×
229
        let id = self
×
230
            .buffer
×
231
            .as_ref()
232
            .map(|ab| {
×
233
                let id: NonZeroU32 = ab.buffer.id().into();
×
234
                id.get()
×
235
            })
236
            .unwrap_or(0);
237
        format!("#{}:{}", id, self.safe_label())
×
238
    }
239

240
    /// Reference to the GPU buffer, if already allocated.
241
    ///
242
    /// This reference corresponds to the currently allocated GPU buffer, which
243
    /// may not contain all data since the last [`insert()`] call, and could
244
    /// become invalid if a new larger buffer needs to be allocated to store the
245
    /// pending values inserted with [`insert()`].
246
    ///
247
    /// [`insert()]`: BufferTable::insert
248
    #[inline]
249
    pub fn buffer(&self) -> Option<&Buffer> {
6✔
250
        self.buffer.as_ref().map(|ab| &ab.buffer)
18✔
251
    }
252

253
    /// Maximum number of rows the table can hold without reallocation.
254
    ///
255
    /// This is the maximum number of rows that can be added to the table
256
    /// without forcing a new GPU buffer to be allocated and a copy from the old
257
    /// to the new buffer.
258
    ///
259
    /// Note that this doesn't imply that no GPU buffer allocation will ever
260
    /// occur; if a GPU buffer was never allocated, and there are pending
261
    /// CPU rows to insert, then a new buffer will be allocated on next
262
    /// update with this capacity.
263
    #[inline]
264
    #[allow(dead_code)]
265
    pub fn capacity(&self) -> u32 {
27✔
266
        self.capacity
27✔
267
    }
268

269
    /// Current number of rows in use in the table.
270
    ///
271
    /// Note that rows in use are not necessarily contiguous. There may be gaps
272
    /// between used rows.
273
    #[inline]
274
    #[allow(dead_code)]
275
    pub fn len(&self) -> u32 {
31✔
276
        self.active_count - self.free_indices.len() as u32
31✔
277
    }
278

279
    /// Size of a single row in the table, in bytes, aligned to GPU constraints.
280
    #[inline]
281
    #[allow(dead_code)]
282
    pub fn aligned_size(&self) -> usize {
22✔
283
        self.aligned_size
22✔
284
    }
285

286
    /// Is the table empty?
287
    #[inline]
288
    #[allow(dead_code)]
289
    pub fn is_empty(&self) -> bool {
52✔
290
        self.active_count == 0
52✔
291
    }
292

293
    /// Clear all rows of the table without deallocating any existing GPU
294
    /// buffer.
295
    ///
296
    /// This operation only updates the CPU cache of the table, without touching
297
    /// any GPU buffer. On next GPU buffer update, the GPU buffer will be
298
    /// deallocated.
299
    #[allow(dead_code)]
300
    pub fn clear(&mut self) {
×
301
        self.pending_values.clear();
×
302
        self.extra_pending_values.clear();
×
303
        self.free_indices.clear();
×
304
        self.active_count = 0;
×
305
    }
306

307
    /// Clear any stale buffer used for resize in the previous frame during
308
    /// rendering while the data structure was immutable.
309
    ///
310
    /// This must be called before any new [`insert()`].
311
    ///
312
    /// [`insert()`]: crate::BufferTable::insert
313
    pub fn clear_previous_frame_resizes(&mut self) {
7✔
314
        if let Some(ab) = self.buffer.as_mut() {
12✔
315
            ab.old_buffer = None;
×
316
            ab.old_count = 0;
×
317
        }
318
    }
319

320
    /// Calculate the size in byte of `count` rows.
321
    #[inline]
322
    fn to_byte_size(&self, count: u32) -> usize {
7✔
323
        count as usize * self.aligned_size
7✔
324
    }
325

326
    /// Insert a new row into the table.
327
    ///
328
    /// For performance reasons, this buffers the row content on the CPU until
329
    /// the next GPU update, to minimize the number of CPU to GPU transfers.
330
    pub fn insert(&mut self, value: T) -> BufferTableId {
27✔
331
        trace!(
27✔
332
            "Inserting into table buffer '{}' with {} free indices, capacity: {}, active_size: {}",
×
333
            self.safe_name(),
×
334
            self.free_indices.len(),
×
335
            self.capacity,
×
336
            self.active_count
×
337
        );
338
        let index = if self.free_indices.is_empty() {
54✔
339
            let index = self.active_count;
26✔
340
            if index == self.capacity {
52✔
341
                self.capacity += 1;
26✔
342
            }
343
            debug_assert!(index < self.capacity);
52✔
344
            self.active_count += 1;
26✔
345
            index
26✔
346
        } else {
347
            // Note: this is inefficient O(n) but we need to apply the same logic as the
348
            // EffectCache because we rely on indices being in sync.
349
            self.free_indices.remove(0)
1✔
350
        };
351
        let allocated_count = self
×
352
            .buffer
×
353
            .as_ref()
354
            .map(|ab| ab.allocated_count())
3✔
355
            .unwrap_or(0);
356
        trace!(
×
357
            "Found free index {}, capacity: {}, active_count: {}, allocated_count: {}",
×
358
            index,
×
359
            self.capacity,
×
360
            self.active_count,
×
361
            allocated_count
×
362
        );
363
        if index < allocated_count {
29✔
364
            self.pending_values.alloc().init((index, value));
2✔
365
        } else {
366
            let extra_index = index - allocated_count;
25✔
367
            if extra_index < self.extra_pending_values.len() as u32 {
25✔
368
                self.extra_pending_values[extra_index as usize] = value;
×
369
            } else {
370
                self.extra_pending_values.alloc().init(value);
25✔
371
            }
372
        }
373
        BufferTableId(index)
27✔
374
    }
375

376
    /// Remove a row from the table.
377
    #[allow(dead_code)]
378
    pub fn remove(&mut self, id: BufferTableId) {
2✔
379
        let index = id.0;
2✔
380
        assert!(index < self.active_count);
2✔
381

382
        // If this is the last item in the active zone, just shrink the active zone
383
        // (implicit free list).
384
        if index == self.active_count - 1 {
3✔
385
            self.active_count -= 1;
1✔
386
            self.capacity -= 1;
1✔
387
        } else {
388
            // This is very inefficient but we need to apply the same logic as the
389
            // EffectCache because we rely on indices being in sync.
390
            let pos = self
1✔
391
                .free_indices
1✔
392
                .binary_search(&index) // will fail
1✔
393
                .unwrap_or_else(|e| e); // will get position of insertion
2✔
394
            self.free_indices.insert(pos, index);
×
395
        }
396
    }
397

398
    /// Allocate any GPU buffer if needed, based on the most recent capacity
399
    /// requested.
400
    ///
401
    /// This should be called only once per frame after all allocation requests
402
    /// have been made via [`insert()`] but before the GPU buffer is actually
403
    /// updated. This is an optimization to enable allocating the GPU buffer
404
    /// earlier than it's actually needed. Calling this multiple times will work
405
    /// but will be inefficient and allocate GPU buffers for nothing. Not
406
    /// calling it is safe, as the next update will call it just-in-time anyway.
407
    ///
408
    /// # Returns
409
    ///
410
    /// Returns `true` if a new buffer was (re-)allocated, to indicate any bind
411
    /// group needs to be re-created.
412
    ///
413
    /// [`insert()]`: crate::render::BufferTable::insert
414
    pub fn allocate_gpu(&mut self, device: &RenderDevice, queue: &RenderQueue) -> bool {
8✔
415
        // The allocated capacity is the capacity of the currently allocated GPU buffer,
416
        // which can be different from the expected capacity (self.capacity) for next
417
        // update.
418
        let allocated_count = self.buffer.as_ref().map(|ab| ab.count).unwrap_or(0);
22✔
419
        let reallocated = if self.capacity > allocated_count {
16✔
420
            let byte_size = self.to_byte_size(self.capacity);
2✔
421
            trace!(
2✔
422
                "reserve('{}'): increase capacity from {} to {} elements, old size {} bytes, new size {} bytes",
×
423
                self.safe_name(),
×
424
                allocated_count,
×
425
                self.capacity,
×
426
                self.to_byte_size(allocated_count),
×
NEW
427
                byte_size
×
428
            );
429

430
            // Create the new buffer, swapping with the old one if any
431
            let has_init_data = !self.extra_pending_values.is_empty();
2✔
432
            let new_buffer = device.create_buffer(&BufferDescriptor {
2✔
433
                label: self.label.as_ref().map(|s| &s[..]),
2✔
NEW
434
                size: byte_size as BufferAddress,
×
435
                usage: self.buffer_usage,
×
436
                mapped_at_creation: has_init_data,
×
437
            });
438

439
            // Use any pending data to initialize the buffer. We only use CPU-available
440
            // data, which was inserted after the buffer was (re-)allocated and
441
            // has not been uploaded to GPU yet.
442
            if has_init_data {
×
443
                // Leave some space to copy the old buffer if any
444
                let base_size = self.to_byte_size(allocated_count) as u64;
2✔
445
                let extra_size = self.to_byte_size(self.extra_pending_values.len() as u32) as u64;
2✔
446

447
                // Scope get_mapped_range_mut() to force a drop before unmap()
448
                {
449
                    let dst_slice = &mut new_buffer
2✔
450
                        .slice(base_size..base_size + extra_size)
2✔
451
                        .get_mapped_range_mut();
2✔
452

453
                    for (index, content) in self.extra_pending_values.drain(..).enumerate() {
6✔
454
                        let byte_size = self.aligned_size; // single row
4✔
455
                        let byte_offset = byte_size * index;
4✔
456

457
                        // Copy Rust value into a GPU-ready format, including GPU padding.
458
                        let src: &[u8] = cast_slice(std::slice::from_ref(&content));
4✔
459
                        let dst_range = byte_offset..byte_offset + self.item_size;
4✔
460
                        trace!(
4✔
461
                            "+ copy: index={} src={:?} dst={:?} byte_offset={} byte_size={}",
×
462
                            index,
×
463
                            src.as_ptr(),
×
464
                            dst_range,
×
465
                            byte_offset,
×
466
                            byte_size
×
467
                        );
468
                        let dst = &mut dst_slice[dst_range];
4✔
469
                        dst.copy_from_slice(src);
4✔
470
                    }
471
                }
472

473
                new_buffer.unmap();
2✔
474
            }
475

476
            if let Some(ab) = self.buffer.as_mut() {
3✔
477
                // If there's any data currently in the GPU buffer, we need to copy it on next
478
                // update to preserve it, but only if there's no pending copy already.
479
                if self.active_count > 0 && ab.old_buffer.is_none() {
2✔
480
                    ab.old_buffer = Some(ab.buffer.clone()); // TODO: swap
1✔
481
                    ab.old_count = ab.count;
1✔
482
                }
483
                ab.buffer = new_buffer;
1✔
484
                ab.count = self.capacity;
1✔
485
            } else {
486
                self.buffer = Some(AllocatedBuffer {
1✔
487
                    buffer: new_buffer,
1✔
488
                    count: self.capacity,
1✔
489
                    old_buffer: None,
1✔
490
                    old_count: 0,
1✔
491
                });
492
            }
493

494
            true
2✔
495
        } else {
496
            false
6✔
497
        };
498

499
        // Immediately schedule a copy of old rows.
500
        // - For old rows, copy into the old buffer because the old-to-new buffer copy
501
        //   will be executed during a command queue while any CPU to GPU upload is
502
        //   prepended before the next command queue. To ensure things do get out of
503
        //   order with the CPU upload overwriting the GPU-to-GPU copy, make sure those
504
        //   two are disjoint.
505
        if let Some(ab) = self.buffer.as_ref() {
7✔
506
            let buffer = ab.old_buffer.as_ref().unwrap_or(&ab.buffer);
×
507
            for (index, content) in self.pending_values.drain(..) {
2✔
508
                let byte_size = self.aligned_size;
2✔
509
                let byte_offset = byte_size * index as usize;
2✔
510

511
                // Copy Rust value into a GPU-ready format, including GPU padding.
512
                // TODO - Do that in insert()!
513
                let mut aligned_buffer: Vec<u8> = vec![0; self.aligned_size];
2✔
514
                let src: &[u8] = cast_slice(std::slice::from_ref(&content));
2✔
515
                let dst_range = ..self.item_size;
2✔
516
                trace!(
2✔
517
                    "+ copy: index={} src={:?} dst={:?} byte_offset={} byte_size={}",
×
518
                    index,
×
519
                    src.as_ptr(),
×
520
                    dst_range,
×
521
                    byte_offset,
×
522
                    byte_size
×
523
                );
524
                let dst = &mut aligned_buffer[dst_range];
2✔
525
                dst.copy_from_slice(src);
2✔
526

527
                // Upload to GPU
528
                // TODO - Merge contiguous blocks into a single write_buffer()
529
                let bytes: &[u8] = cast_slice(&aligned_buffer);
2✔
530
                queue.write_buffer(buffer, byte_offset as u64, bytes);
2✔
531
            }
532
        } else {
533
            debug_assert!(self.pending_values.is_empty());
2✔
534
            debug_assert!(self.extra_pending_values.is_empty());
2✔
535
        }
536

537
        reallocated
8✔
538
    }
539

540
    /// Write CPU data to the GPU buffer, (re)allocating it as needed.
541
    pub fn write_buffer(&self, encoder: &mut CommandEncoder) {
7✔
542
        // Check if there's any work to do: either some pending values to upload or some
543
        // existing buffer to copy into a newly-allocated one.
544
        if self.pending_values.is_empty()
7✔
545
            && self
7✔
546
                .buffer
7✔
547
                .as_ref()
7✔
548
                .map(|ab| ab.old_buffer.is_none())
20✔
549
                .unwrap_or(true)
7✔
550
        {
551
            trace!("write_buffer({}): nothing to do", self.safe_name());
6✔
552
            return;
6✔
553
        }
554

555
        trace!(
1✔
556
            "write_buffer({}): pending_values.len={} item_size={} aligned_size={} buffer={:?}",
×
557
            self.safe_name(),
×
558
            self.pending_values.len(),
×
559
            self.item_size,
×
560
            self.aligned_size,
×
561
            self.buffer,
×
562
        );
563

564
        // If there's no more GPU buffer, there's nothing to do
565
        let Some(ab) = self.buffer.as_ref() else {
2✔
566
            return;
×
567
        };
568

569
        // Copy any old buffer into the new one, and clear the old buffer. Note that we
570
        // only clear the ref-counted reference to the buffer, not the actual buffer,
571
        // which stays alive until the copy is done (but we don't need to care about
572
        // keeping it alive, wgpu does that for us).
573
        if let Some(old_buffer) = ab.old_buffer.as_ref() {
1✔
574
            let old_size = self.to_byte_size(ab.old_count) as u64;
×
575
            trace!("Copy old buffer id {:?} of size {} bytes into newly-allocated buffer {:?} of size {} bytes.", old_buffer.id(), old_size, ab.buffer.id(), self.to_byte_size(ab.count));
×
576
            encoder.copy_buffer_to_buffer(old_buffer, 0, &ab.buffer, 0, old_size);
1✔
577
        }
578
    }
579
}
580

581
#[cfg(test)]
582
mod tests {
583
    use bevy::math::Vec3;
584
    use bytemuck::{Pod, Zeroable};
585

586
    use super::*;
587

588
    #[repr(C)]
589
    #[derive(Debug, Default, Clone, Copy, Pod, Zeroable, ShaderType)]
590
    pub(crate) struct GpuDummy {
591
        pub v: Vec3,
592
    }
593

594
    #[repr(C)]
595
    #[derive(Debug, Default, Clone, Copy, Pod, Zeroable, ShaderType)]
596
    pub(crate) struct GpuDummyComposed {
597
        pub simple: GpuDummy,
598
        pub tag: u32,
599
        // GPU padding to 16 bytes due to GpuDummy forcing align to 16 bytes
600
    }
601

602
    #[repr(C)]
603
    #[derive(Debug, Clone, Copy, Pod, Zeroable, ShaderType)]
604
    pub(crate) struct GpuDummyLarge {
605
        pub simple: GpuDummy,
606
        pub tag: u32,
607
        pub large: [f32; 128],
608
    }
609

610
    #[test]
611
    fn table_sizes() {
612
        // Rust
613
        assert_eq!(std::mem::size_of::<GpuDummy>(), 12);
614
        assert_eq!(std::mem::align_of::<GpuDummy>(), 4);
615
        assert_eq!(std::mem::size_of::<GpuDummyComposed>(), 16); // tight packing
616
        assert_eq!(std::mem::align_of::<GpuDummyComposed>(), 4);
617
        assert_eq!(std::mem::size_of::<GpuDummyLarge>(), 132 * 4); // tight packing
618
        assert_eq!(std::mem::align_of::<GpuDummyLarge>(), 4);
619

620
        // GPU
621
        assert_eq!(<GpuDummy as ShaderType>::min_size().get(), 16); // Vec3 gets padded to 16 bytes
622
        assert_eq!(<GpuDummy as ShaderSize>::SHADER_SIZE.get(), 16);
623
        assert_eq!(<GpuDummyComposed as ShaderType>::min_size().get(), 32); // align is 16 bytes, forces padding
624
        assert_eq!(<GpuDummyComposed as ShaderSize>::SHADER_SIZE.get(), 32);
625
        assert_eq!(<GpuDummyLarge as ShaderType>::min_size().get(), 544); // align is 16 bytes, forces padding
626
        assert_eq!(<GpuDummyLarge as ShaderSize>::SHADER_SIZE.get(), 544);
627

628
        for (item_align, expected_aligned_size) in [
629
            (0, 16),
630
            (4, 16),
631
            (8, 16),
632
            (16, 16),
633
            (32, 32),
634
            (256, 256),
635
            (512, 512),
636
        ] {
637
            let mut table = BufferTable::<GpuDummy>::new(
638
                BufferUsages::STORAGE,
639
                NonZeroU64::new(item_align),
640
                None,
641
            );
642
            assert_eq!(table.aligned_size(), expected_aligned_size);
643
            assert!(table.is_empty());
644
            table.insert(GpuDummy::default());
645
            assert!(!table.is_empty());
646
            assert_eq!(table.len(), 1);
647
        }
648

649
        for (item_align, expected_aligned_size) in [
650
            (0, 32),
651
            (4, 32),
652
            (8, 32),
653
            (16, 32),
654
            (32, 32),
655
            (256, 256),
656
            (512, 512),
657
        ] {
658
            let mut table = BufferTable::<GpuDummyComposed>::new(
659
                BufferUsages::STORAGE,
660
                NonZeroU64::new(item_align),
661
                None,
662
            );
663
            assert_eq!(table.aligned_size(), expected_aligned_size);
664
            assert!(table.is_empty());
665
            table.insert(GpuDummyComposed::default());
666
            assert!(!table.is_empty());
667
            assert_eq!(table.len(), 1);
668
        }
669

670
        for (item_align, expected_aligned_size) in [
671
            (0, 544),
672
            (4, 544),
673
            (8, 544),
674
            (16, 544),
675
            (32, 544),
676
            (256, 768),
677
            (512, 1024),
678
        ] {
679
            let mut table = BufferTable::<GpuDummyLarge>::new(
680
                BufferUsages::STORAGE,
681
                NonZeroU64::new(item_align),
682
                None,
683
            );
684
            assert_eq!(table.aligned_size(), expected_aligned_size);
685
            assert!(table.is_empty());
686
            table.insert(GpuDummyLarge {
687
                simple: Default::default(),
688
                tag: 0,
689
                large: [0.; 128],
690
            });
691
            assert!(!table.is_empty());
692
            assert_eq!(table.len(), 1);
693
        }
694
    }
695
}
696

697
#[cfg(all(test, feature = "gpu_tests"))]
698
mod gpu_tests {
699
    use std::fmt::Write;
700

701
    use bevy::render::render_resource::BufferSlice;
702
    use tests::*;
703
    use wgpu::{BufferView, CommandBuffer};
704

705
    use super::*;
706
    use crate::test_utils::MockRenderer;
707

708
    /// Read data from GPU back into CPU memory.
709
    ///
710
    /// This call blocks until the data is available on CPU. Used for testing
711
    /// only.
712
    fn read_back_gpu<'a>(device: &RenderDevice, slice: BufferSlice<'a>) -> BufferView<'a> {
6✔
713
        let (tx, rx) = futures::channel::oneshot::channel();
6✔
714
        slice.map_async(wgpu::MapMode::Read, move |result| {
12✔
715
            tx.send(result).unwrap();
6✔
716
        });
717
        device.poll(wgpu::Maintain::Wait);
6✔
718
        let result = futures::executor::block_on(rx);
6✔
719
        assert!(result.is_ok());
6✔
720
        slice.get_mapped_range()
6✔
721
    }
722

723
    /// Submit a command buffer to GPU and wait for completion.
724
    ///
725
    /// This call blocks until the GPU executed the command buffer. Used for
726
    /// testing only.
727
    fn submit_gpu_and_wait(
7✔
728
        device: &RenderDevice,
729
        queue: &RenderQueue,
730
        command_buffer: CommandBuffer,
731
    ) {
732
        // Queue command
733
        queue.submit([command_buffer]);
7✔
734

735
        // Register callback to observe completion
736
        let (tx, rx) = futures::channel::oneshot::channel();
7✔
737
        queue.on_submitted_work_done(move || {
14✔
738
            tx.send(()).unwrap();
7✔
739
        });
740

741
        // Poll device, checking for completion and raising callback
742
        device.poll(wgpu::Maintain::Wait);
7✔
743

744
        // Wait for callback to be raised. This was need in previous versions, however
745
        // it's a bit unclear if it's still needed or if device.poll() is enough to
746
        // guarantee that the command was executed.
747
        let _ = futures::executor::block_on(rx);
7✔
748
    }
749

750
    /// Convert a byte slice to a string of hexadecimal values separated by a
751
    /// blank space.
752
    fn to_hex_string(slice: &[u8]) -> String {
19✔
753
        let len = slice.len();
19✔
754
        let num_chars = len * 3 - 1;
19✔
755
        let mut s = String::with_capacity(num_chars);
19✔
756
        for b in &slice[..len - 1] {
589✔
757
            write!(&mut s, "{:02x} ", *b).unwrap();
285✔
758
        }
759
        write!(&mut s, "{:02x}", slice[len - 1]).unwrap();
19✔
760
        debug_assert_eq!(s.len(), num_chars);
38✔
761
        s
19✔
762
    }
763

764
    fn write_buffers_and_wait<T: Pod + ShaderSize>(
7✔
765
        table: &BufferTable<T>,
766
        device: &RenderDevice,
767
        queue: &RenderQueue,
768
    ) {
769
        let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
7✔
770
            label: Some("test"),
7✔
771
        });
772
        table.write_buffer(&mut encoder);
7✔
773
        let command_buffer = encoder.finish();
7✔
774
        submit_gpu_and_wait(device, queue, command_buffer);
7✔
775
        println!("Buffer written to GPU");
7✔
776
    }
777

778
    #[test]
779
    fn table_write() {
780
        let renderer = MockRenderer::new();
781
        let device = renderer.device();
782
        let queue = renderer.queue();
783

784
        let item_align = device.limits().min_storage_buffer_offset_alignment as u64;
785
        println!("min_storage_buffer_offset_alignment = {item_align}");
786
        let mut table = BufferTable::<GpuDummyComposed>::new(
787
            BufferUsages::STORAGE | BufferUsages::MAP_READ,
788
            NonZeroU64::new(item_align),
789
            None,
790
        );
791
        let final_align = item_align.max(<GpuDummyComposed as ShaderSize>::SHADER_SIZE.get());
792
        assert_eq!(table.aligned_size(), final_align as usize);
793

794
        // Initial state
795
        assert!(table.is_empty());
796
        assert_eq!(table.len(), 0);
797
        assert_eq!(table.capacity(), 0);
798
        assert!(table.buffer.is_none());
799

800
        // This has no effect while the table is empty
801
        table.clear_previous_frame_resizes();
802
        table.allocate_gpu(&device, &queue);
803
        write_buffers_and_wait(&table, &device, &queue);
804
        assert!(table.is_empty());
805
        assert_eq!(table.len(), 0);
806
        assert_eq!(table.capacity(), 0);
807
        assert!(table.buffer.is_none());
808

809
        // New frame
810
        table.clear_previous_frame_resizes();
811

812
        // Insert some entries
813
        let len = 3;
814
        for i in 0..len {
815
            let row = table.insert(GpuDummyComposed {
816
                tag: i + 1,
817
                ..Default::default()
818
            });
819
            assert_eq!(row.0, i);
820
        }
821
        assert!(!table.is_empty());
822
        assert_eq!(table.len(), len);
823
        assert!(table.capacity() >= len); // contract: could over-allocate...
824
        assert!(table.buffer.is_none()); // not yet allocated on GPU
825

826
        // Allocate GPU buffer for current requested state
827
        table.allocate_gpu(&device, &queue);
828
        assert!(!table.is_empty());
829
        assert_eq!(table.len(), len);
830
        assert!(table.capacity() >= len);
831
        let ab = table
832
            .buffer
833
            .as_ref()
834
            .expect("GPU buffer should be allocated after allocate_gpu()");
835
        assert!(ab.old_buffer.is_none()); // no previous copy
836
        assert_eq!(ab.count, len);
837
        println!(
838
            "Allocated buffer #{:?} of {} rows",
839
            ab.buffer.id(),
840
            ab.count
841
        );
842
        let ab_buffer = ab.buffer.clone();
843

844
        // Another allocate_gpu() is a no-op
845
        table.allocate_gpu(&device, &queue);
846
        assert!(!table.is_empty());
847
        assert_eq!(table.len(), len);
848
        assert!(table.capacity() >= len);
849
        let ab = table
850
            .buffer
851
            .as_ref()
852
            .expect("GPU buffer should be allocated after allocate_gpu()");
853
        assert!(ab.old_buffer.is_none()); // no previous copy
854
        assert_eq!(ab.count, len);
855
        assert_eq!(ab_buffer.id(), ab.buffer.id()); // same buffer
856

857
        // Write buffer (CPU -> GPU)
858
        write_buffers_and_wait(&table, &device, &queue);
859

860
        {
861
            // Read back (GPU -> CPU)
862
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
863
            {
864
                let slice = buffer.slice(..);
865
                let view = read_back_gpu(&device, slice);
866
                println!(
867
                    "GPU data read back to CPU for validation: {} bytes",
868
                    view.len()
869
                );
870

871
                // Validate content
872
                assert_eq!(view.len(), final_align as usize * table.capacity() as usize);
873
                for i in 0..len as usize {
874
                    let offset = i * final_align as usize;
875
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
876
                    let src = &view[offset..offset + 16];
877
                    println!("{}", to_hex_string(src));
878
                    let dummy_composed: &[GpuDummyComposed] =
879
                        cast_slice(&view[offset..offset + item_size]);
880
                    assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
881
                }
882
            }
883
            buffer.unmap();
884
        }
885

886
        // New frame
887
        table.clear_previous_frame_resizes();
888

889
        // Insert more entries
890
        let old_capacity = table.capacity();
891
        let mut len = len;
892
        while table.capacity() == old_capacity {
893
            let row = table.insert(GpuDummyComposed {
894
                tag: len + 1,
895
                ..Default::default()
896
            });
897
            assert_eq!(row.0, len);
898
            len += 1;
899
        }
900
        println!(
901
            "Added {} rows to grow capacity from {} to {}.",
902
            len - 3,
903
            old_capacity,
904
            table.capacity()
905
        );
906

907
        // This re-allocates a new GPU buffer because the capacity changed
908
        table.allocate_gpu(&device, &queue);
909
        assert!(!table.is_empty());
910
        assert_eq!(table.len(), len);
911
        assert!(table.capacity() >= len);
912
        let ab = table
913
            .buffer
914
            .as_ref()
915
            .expect("GPU buffer should be allocated after allocate_gpu()");
916
        assert_eq!(ab.count, len);
917
        assert!(ab.old_buffer.is_some()); // old buffer to copy
918
        assert_ne!(ab.old_buffer.as_ref().unwrap().id(), ab.buffer.id());
919
        println!(
920
            "Allocated new buffer #{:?} of {} rows",
921
            ab.buffer.id(),
922
            ab.count
923
        );
924

925
        // Write buffer (CPU -> GPU)
926
        write_buffers_and_wait(&table, &device, &queue);
927

928
        {
929
            // Read back (GPU -> CPU)
930
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
931
            {
932
                let slice = buffer.slice(..);
933
                let view = read_back_gpu(&device, slice);
934
                println!(
935
                    "GPU data read back to CPU for validation: {} bytes",
936
                    view.len()
937
                );
938

939
                // Validate content
940
                assert_eq!(view.len(), final_align as usize * table.capacity() as usize);
941
                for i in 0..len as usize {
942
                    let offset = i * final_align as usize;
943
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
944
                    let src = &view[offset..offset + 16];
945
                    println!("{}", to_hex_string(src));
946
                    let dummy_composed: &[GpuDummyComposed] =
947
                        cast_slice(&view[offset..offset + item_size]);
948
                    assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
949
                }
950
            }
951
            buffer.unmap();
952
        }
953

954
        // New frame
955
        table.clear_previous_frame_resizes();
956

957
        // Delete the last allocated row
958
        let old_capacity = table.capacity();
959
        let len = len - 1;
960
        table.remove(BufferTableId(len));
961
        println!(
962
            "Removed last row to shrink capacity from {} to {}.",
963
            old_capacity,
964
            table.capacity()
965
        );
966

967
        // This doesn't do anything since we removed a row only
968
        table.allocate_gpu(&device, &queue);
969
        assert!(!table.is_empty());
970
        assert_eq!(table.len(), len);
971
        assert!(table.capacity() >= len);
972
        let ab = table
973
            .buffer
974
            .as_ref()
975
            .expect("GPU buffer should be allocated after allocate_gpu()");
976
        assert_eq!(ab.count, len + 1); // GPU buffer kept its size
977
        assert!(ab.old_buffer.is_none());
978

979
        // Write buffer (CPU -> GPU)
980
        write_buffers_and_wait(&table, &device, &queue);
981

982
        {
983
            // Read back (GPU -> CPU)
984
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
985
            {
986
                let slice = buffer.slice(..);
987
                let view = read_back_gpu(&device, slice);
988
                println!(
989
                    "GPU data read back to CPU for validation: {} bytes",
990
                    view.len()
991
                );
992

993
                // Validate content
994
                assert!(view.len() >= final_align as usize * table.capacity() as usize); // note the >=, the buffer is over-allocated
995
                for i in 0..len as usize {
996
                    let offset = i * final_align as usize;
997
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
998
                    let src = &view[offset..offset + 16];
999
                    println!("{}", to_hex_string(src));
1000
                    let dummy_composed: &[GpuDummyComposed] =
1001
                        cast_slice(&view[offset..offset + item_size]);
1002
                    assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
1003
                }
1004
            }
1005
            buffer.unmap();
1006
        }
1007

1008
        // New frame
1009
        table.clear_previous_frame_resizes();
1010

1011
        // Delete the first allocated row
1012
        let old_capacity = table.capacity();
1013
        let mut len = len - 1;
1014
        table.remove(BufferTableId(0));
1015
        assert_eq!(old_capacity, table.capacity());
1016
        println!(
1017
            "Removed first row to shrink capacity from {} to {} (no change).",
1018
            old_capacity,
1019
            table.capacity()
1020
        );
1021

1022
        // This doesn't do anything since we only removed a row
1023
        table.allocate_gpu(&device, &queue);
1024
        assert!(!table.is_empty());
1025
        assert_eq!(table.len(), len);
1026
        assert!(table.capacity() >= len);
1027
        let ab = table
1028
            .buffer
1029
            .as_ref()
1030
            .expect("GPU buffer should be allocated after allocate_gpu()");
1031
        assert_eq!(ab.count, len + 2); // GPU buffer kept its size
1032
        assert!(ab.old_buffer.is_none());
1033

1034
        // Write buffer (CPU -> GPU)
1035
        write_buffers_and_wait(&table, &device, &queue);
1036

1037
        {
1038
            // Read back (GPU -> CPU)
1039
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
1040
            {
1041
                let slice = buffer.slice(..);
1042
                let view = read_back_gpu(&device, slice);
1043
                println!(
1044
                    "GPU data read back to CPU for validation: {} bytes",
1045
                    view.len()
1046
                );
1047

1048
                // Validate content
1049
                assert!(view.len() >= final_align as usize * table.capacity() as usize); // note the >=, the buffer is over-allocated
1050
                for i in 0..len as usize {
1051
                    let offset = i * final_align as usize;
1052
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
1053
                    let src = &view[offset..offset + 16];
1054
                    println!("{}", to_hex_string(src));
1055
                    if i > 0 {
1056
                        let dummy_composed: &[GpuDummyComposed] =
1057
                            cast_slice(&view[offset..offset + item_size]);
1058
                        assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
1059
                    }
1060
                }
1061
            }
1062
            buffer.unmap();
1063
        }
1064

1065
        // New frame
1066
        table.clear_previous_frame_resizes();
1067

1068
        // Insert a row; this should get into row #0 in the buffer
1069
        let row = table.insert(GpuDummyComposed {
1070
            tag: 1,
1071
            ..Default::default()
1072
        });
1073
        assert_eq!(row.0, 0);
1074
        len += 1;
1075
        println!(
1076
            "Added 1 row to grow capacity from {} to {}.",
1077
            old_capacity,
1078
            table.capacity()
1079
        );
1080

1081
        // This doesn't reallocate the GPU buffer since we used a free list entry
1082
        table.allocate_gpu(&device, &queue);
1083
        assert!(!table.is_empty());
1084
        assert_eq!(table.len(), len);
1085
        assert!(table.capacity() >= len);
1086
        let ab = table
1087
            .buffer
1088
            .as_ref()
1089
            .expect("GPU buffer should be allocated after allocate_gpu()");
1090
        assert_eq!(ab.count, 4); // 4 == last time we grew
1091
        assert!(ab.old_buffer.is_none());
1092

1093
        // Write buffer (CPU -> GPU)
1094
        write_buffers_and_wait(&table, &device, &queue);
1095

1096
        {
1097
            // Read back (GPU -> CPU)
1098
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
1099
            {
1100
                let slice = buffer.slice(..);
1101
                let view = read_back_gpu(&device, slice);
1102
                println!(
1103
                    "GPU data read back to CPU for validation: {} bytes",
1104
                    view.len()
1105
                );
1106

1107
                // Validate content
1108
                assert!(view.len() >= final_align as usize * table.capacity() as usize);
1109
                for i in 0..len as usize {
1110
                    let offset = i * final_align as usize;
1111
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
1112
                    let src = &view[offset..offset + 16];
1113
                    println!("{}", to_hex_string(src));
1114
                    let dummy_composed: &[GpuDummyComposed] =
1115
                        cast_slice(&view[offset..offset + item_size]);
1116
                    assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
1117
                }
1118
            }
1119
            buffer.unmap();
1120
        }
1121

1122
        // New frame
1123
        table.clear_previous_frame_resizes();
1124

1125
        // Insert a row; this should get into row #3 at the end of the allocated buffer
1126
        let row = table.insert(GpuDummyComposed {
1127
            tag: 4,
1128
            ..Default::default()
1129
        });
1130
        assert_eq!(row.0, 3);
1131
        len += 1;
1132
        println!(
1133
            "Added 1 row to grow capacity from {} to {}.",
1134
            old_capacity,
1135
            table.capacity()
1136
        );
1137

1138
        // This doesn't reallocate the GPU buffer since we used an implicit free entry
1139
        table.allocate_gpu(&device, &queue);
1140
        assert!(!table.is_empty());
1141
        assert_eq!(table.len(), len);
1142
        assert!(table.capacity() >= len);
1143
        let ab = table
1144
            .buffer
1145
            .as_ref()
1146
            .expect("GPU buffer should be allocated after allocate_gpu()");
1147
        assert_eq!(ab.count, 4); // 4 == last time we grew
1148
        assert!(ab.old_buffer.is_none());
1149

1150
        // Write buffer (CPU -> GPU)
1151
        write_buffers_and_wait(&table, &device, &queue);
1152

1153
        {
1154
            // Read back (GPU -> CPU)
1155
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
1156
            {
1157
                let slice = buffer.slice(..);
1158
                let view = read_back_gpu(&device, slice);
1159
                println!(
1160
                    "GPU data read back to CPU for validation: {} bytes",
1161
                    view.len()
1162
                );
1163

1164
                // Validate content
1165
                assert!(view.len() >= final_align as usize * table.capacity() as usize);
1166
                for i in 0..len as usize {
1167
                    let offset = i * final_align as usize;
1168
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
1169
                    let src = &view[offset..offset + 16];
1170
                    println!("{}", to_hex_string(src));
1171
                    let dummy_composed: &[GpuDummyComposed] =
1172
                        cast_slice(&view[offset..offset + item_size]);
1173
                    assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
1174
                }
1175
            }
1176
            buffer.unmap();
1177
        }
1178
    }
1179
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc