• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

djeedai / bevy_hanabi / 11417344053

19 Oct 2024 12:00PM UTC coverage: 59.035% (-0.03%) from 59.062%
11417344053

push

github

web-flow
Upgrade tarpaulin to 0.31.2 (#386)

This fixes an issue with the latest llvm_profparser.

3486 of 5905 relevant lines covered (59.03%)

23.89 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

71.03
/src/render/buffer_table.rs
1
use std::num::NonZeroU64;
2

3
use bevy::{
4
    log::trace,
5
    render::{
6
        render_resource::{
7
            Buffer, BufferAddress, BufferDescriptor, BufferUsages, CommandEncoder, ShaderSize,
8
            ShaderType,
9
        },
10
        renderer::{RenderDevice, RenderQueue},
11
    },
12
};
13
use bytemuck::{cast_slice, Pod};
14
use copyless::VecHelper;
15

16
use crate::next_multiple_of;
17

18
/// Index of a row in a [`BufferTable`].
19
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20
pub struct BufferTableId(pub(crate) u32); // TEMP: pub(crate)
21

22
#[derive(Debug)]
23
struct AllocatedBuffer {
24
    /// Currently allocated buffer, of size equal to `size`.
25
    buffer: Buffer,
26
    /// Size of the currently allocated buffer, in number of rows.
27
    count: u32,
28
    /// Previously allocated buffer if any, cached until the next buffer write
29
    /// so that old data can be copied into the newly-allocated buffer.
30
    old_buffer: Option<Buffer>,
31
    /// Size of the old buffer if any, in number of rows.
32
    old_count: u32,
33
}
34

35
impl AllocatedBuffer {
36
    /// Get the number of rows of the currently allocated GPU buffer.
37
    ///
38
    /// On capacity grow, the count is valid until the next buffer swap.
39
    pub fn allocated_count(&self) -> u32 {
3✔
40
        if self.old_buffer.is_some() {
3✔
41
            self.old_count
×
42
        } else {
43
            self.count
3✔
44
        }
45
    }
46
}
47

48
/// GPU buffer holding a table with concurrent interleaved CPU/GPU access.
49
///
50
/// The buffer table data structure represents a GPU buffer holding a table made
51
/// of individual rows. Each row of the table has the same layout (same size),
52
/// and can be allocated (assigned to an existing index) or free (available for
53
/// future allocation). The data structure manages a free list of rows, and copy
54
/// of rows modified on CPU to the GPU without touching other rows. This ensures
55
/// that existing rows in the GPU buffer can be accessed and modified by the GPU
56
/// without being overwritten by the CPU and without the need for the CPU to
57
/// read the data back from GPU into CPU memory.
58
///
59
/// The element type `T` needs to implement the following traits:
60
/// - [`Pod`] to allow copy.
61
/// - [`ShaderType`] because it needs to be mapped for a shader.
62
/// - [`ShaderSize`] to ensure a fixed footprint, to allow packing multiple
63
///   instances inside a single buffer. This therefore excludes any
64
///   runtime-sized array.
65
///
66
/// This is similar to a [`BufferVec`] or [`AlignedBufferVec`], but unlike those
67
/// data structures a buffer table preserves rows modified by the GPU without
68
/// overwriting. This is useful when the buffer is also modified by GPU shaders,
69
/// so neither the CPU side nor the GPU side have an up-to-date view of the
70
/// entire table, and so the CPU cannot re-upload the entire table on changes.
71
///
72
/// # Usage
73
///
74
/// - During the [`RenderStage::Prepare`] stage, call
75
///   [`clear_previous_frame_resizes()`] to clear any stale buffer from the
76
///   previous frame. Then insert new rows with [`insert()`] and if you made
77
///   changes call [`allocate_gpu()`] at the end to allocate any new buffer
78
///   needed.
79
/// - During the [`RenderStage::Render`] stage, call [`write_buffer()`] from a
80
///   command encoder before using any row, to perform any buffer resize copy
81
///   pending.
82
///
83
/// [`BufferVec`]: bevy::render::render_resource::BufferVec
84
/// [`AlignedBufferVec`]: crate::render::aligned_buffer_vec::AlignedBufferVec
85
#[derive(Debug)]
86
pub struct BufferTable<T: Pod + ShaderSize> {
87
    /// GPU buffer if already allocated, or `None` otherwise.
88
    buffer: Option<AllocatedBuffer>,
89
    /// GPU buffer usages.
90
    buffer_usage: BufferUsages,
91
    /// Optional GPU buffer name, for debugging.
92
    label: Option<String>,
93
    /// Size of a single buffer element, in bytes, in CPU memory (Rust layout).
94
    item_size: usize,
95
    /// Size of a single buffer element, in bytes, aligned to GPU memory
96
    /// constraints.
97
    aligned_size: usize,
98
    /// Capacity of the buffer, in number of rows.
99
    capacity: u32,
100
    /// Size of the "active" portion of the table, which includes allocated rows
101
    /// and any row in the free list. All other rows in the
102
    /// `active_size..capacity` range are implicitly unallocated.
103
    active_count: u32,
104
    /// Free list of rows available in the GPU buffer for a new allocation. This
105
    /// only contains indices in the `0..active_size` range; all row indices in
106
    /// `active_size..capacity` are assumed to be unallocated.
107
    free_indices: Vec<u32>,
108
    /// Pending values accumulated on CPU and not yet written to GPU, and their
109
    /// rows.
110
    pending_values: Vec<(u32, T)>,
111
    /// Extra pending values accumulated on CPU like `pending_values`, but for
112
    /// which there's not enough space in the current GPU buffer. Those values
113
    /// are sorted in index order, occupying the range `buffer.size..`.
114
    extra_pending_values: Vec<T>,
115
}
116

117
impl<T: Pod + ShaderSize> Default for BufferTable<T> {
118
    fn default() -> Self {
25✔
119
        let item_size = std::mem::size_of::<T>();
25✔
120
        let aligned_size = <T as ShaderSize>::SHADER_SIZE.get() as usize;
25✔
121
        assert!(aligned_size >= item_size);
25✔
122
        Self {
123
            buffer: None,
124
            buffer_usage: BufferUsages::all(),
25✔
125
            label: None,
126
            item_size,
127
            aligned_size,
128
            capacity: 0,
129
            active_count: 0,
130
            free_indices: Vec::new(),
25✔
131
            pending_values: Vec::new(),
25✔
132
            extra_pending_values: Vec::new(),
25✔
133
        }
134
    }
135
}
136

137
impl<T: Pod + ShaderSize> BufferTable<T> {
138
    /// Create a new collection.
139
    ///
140
    /// `item_align` is an optional additional alignment for items in the
141
    /// collection. If greater than the natural alignment dictated by WGSL
142
    /// rules, this extra alignment is enforced. Otherwise it's ignored (so you
143
    /// can pass `None` to ignore). This is useful if for example you want to
144
    /// bind individual rows or any subset of the table, to ensure each row is
145
    /// aligned to the device constraints.
146
    ///
147
    /// # Panics
148
    ///
149
    /// Panics if `buffer_usage` contains [`BufferUsages::UNIFORM`] and the
150
    /// layout of the element type `T` does not meet the requirements of the
151
    /// uniform address space, as tested by
152
    /// [`ShaderType::assert_uniform_compat()`].
153
    ///
154
    /// [`BufferUsages::UNIFORM`]: bevy::render::render_resource::BufferUsages::UNIFORM
155
    pub fn new(
25✔
156
        buffer_usage: BufferUsages,
157
        item_align: Option<NonZeroU64>,
158
        label: Option<String>,
159
    ) -> Self {
160
        // GPU-aligned item size, compatible with WGSL rules
161
        let item_size = <T as ShaderSize>::SHADER_SIZE.get() as usize;
25✔
162
        // Extra manual alignment for device constraints
163
        let aligned_size = if let Some(item_align) = item_align {
72✔
164
            let item_align = item_align.get() as usize;
×
165
            let aligned_size = next_multiple_of(item_size, item_align);
×
166
            assert!(aligned_size >= item_size);
×
167
            assert!(aligned_size % item_align == 0);
22✔
168
            aligned_size
22✔
169
        } else {
170
            item_size
3✔
171
        };
172
        trace!(
×
173
            "BufferTable[\"{}\"]: item_size={} aligned_size={}",
×
174
            label.as_ref().unwrap_or(&String::new()),
×
175
            item_size,
×
176
            aligned_size
×
177
        );
178
        if buffer_usage.contains(BufferUsages::UNIFORM) {
25✔
179
            <T as ShaderType>::assert_uniform_compat();
×
180
        }
181
        Self {
182
            // Need COPY_SRC and COPY_DST to copy from old to new buffer on resize
183
            buffer_usage: buffer_usage | BufferUsages::COPY_SRC | BufferUsages::COPY_DST,
×
184
            aligned_size,
185
            label,
186
            ..Default::default()
187
        }
188
    }
189

190
    /// Reference to the GPU buffer, if already allocated.
191
    ///
192
    /// This reference corresponds to the currently allocated GPU buffer, which
193
    /// may not contain all data since the last [`insert()`] call, and could
194
    /// become invalid if a new larger buffer needs to be allocated to store the
195
    /// pending values inserted with [`insert()`].
196
    ///
197
    /// [`insert()]`: BufferTable::insert
198
    #[inline]
199
    pub fn buffer(&self) -> Option<&Buffer> {
6✔
200
        self.buffer.as_ref().map(|ab| &ab.buffer)
18✔
201
    }
202

203
    /// Maximum number of rows the table can hold without reallocation.
204
    ///
205
    /// This is the maximum number of rows that can be added to the table
206
    /// without forcing a new GPU buffer to be allocated and a copy from the old
207
    /// to the new buffer.
208
    ///
209
    /// Note that this doesn't imply that no GPU buffer allocation will ever
210
    /// occur; if a GPU buffer was never allocated, and there are pending
211
    /// CPU rows to insert, then a new buffer will be allocated on next
212
    /// update with this capacity.
213
    #[inline]
214
    #[allow(dead_code)]
215
    pub fn capacity(&self) -> u32 {
27✔
216
        self.capacity
27✔
217
    }
218

219
    /// Current number of rows in use in the table.
220
    #[inline]
221
    #[allow(dead_code)]
222
    pub fn len(&self) -> u32 {
31✔
223
        self.active_count - self.free_indices.len() as u32
31✔
224
    }
225

226
    /// Size of a single row in the table, in bytes, aligned to GPU constraints.
227
    #[inline]
228
    #[allow(dead_code)]
229
    pub fn aligned_size(&self) -> usize {
22✔
230
        self.aligned_size
22✔
231
    }
232

233
    /// Is the table empty?
234
    #[inline]
235
    #[allow(dead_code)]
236
    pub fn is_empty(&self) -> bool {
52✔
237
        self.active_count == 0
52✔
238
    }
239

240
    /// Clear all rows of the table without deallocating any existing GPU
241
    /// buffer.
242
    ///
243
    /// This operation only updates the CPU cache of the table, without touching
244
    /// any GPU buffer. On next GPU buffer update, the GPU buffer will be
245
    /// deallocated.
246
    #[allow(dead_code)]
247
    pub fn clear(&mut self) {
×
248
        self.pending_values.clear();
×
249
        self.extra_pending_values.clear();
×
250
        self.free_indices.clear();
×
251
        self.active_count = 0;
×
252
    }
253

254
    /// Clear any stale buffer used for resize in the previous frame during
255
    /// rendering while the data structure was immutable.
256
    ///
257
    /// This must be called before any new [`insert()`].
258
    ///
259
    /// [`insert()`]: crate::BufferTable::insert
260
    pub fn clear_previous_frame_resizes(&mut self) {
37✔
261
        if let Some(ab) = self.buffer.as_mut() {
42✔
262
            ab.old_buffer = None;
×
263
            ab.old_count = 0;
×
264
        }
265
    }
266

267
    fn to_byte_size(&self, count: u32) -> usize {
7✔
268
        count as usize * self.aligned_size
7✔
269
    }
270

271
    /// Insert a new row into the table.
272
    ///
273
    /// For performance reasons, this buffers the row content on the CPU until
274
    /// the next GPU update, to minimize the number of CPU to GPU transfers.
275
    pub fn insert(&mut self, value: T) -> BufferTableId {
27✔
276
        trace!(
27✔
277
            "Inserting into table buffer with {} free indices, capacity: {}, active_size: {}",
×
278
            self.free_indices.len(),
×
279
            self.capacity,
×
280
            self.active_count
×
281
        );
282
        let index = if self.free_indices.is_empty() {
54✔
283
            let index = self.active_count;
26✔
284
            if index == self.capacity {
52✔
285
                self.capacity += 1;
26✔
286
            }
287
            debug_assert!(index < self.capacity);
52✔
288
            self.active_count += 1;
26✔
289
            index
26✔
290
        } else {
291
            // Note: this is inefficient O(n) but we need to apply the same logic as the
292
            // EffectCache because we rely on indices being in sync.
293
            self.free_indices.remove(0)
1✔
294
        };
295
        let allocated_count = self
×
296
            .buffer
×
297
            .as_ref()
298
            .map(|ab| ab.allocated_count())
3✔
299
            .unwrap_or(0);
300
        trace!(
×
301
            "Found free index {}, capacity: {}, active_count: {}, allocated_count: {}",
×
302
            index,
×
303
            self.capacity,
×
304
            self.active_count,
×
305
            allocated_count
×
306
        );
307
        if index < allocated_count {
29✔
308
            self.pending_values.alloc().init((index, value));
2✔
309
        } else {
310
            let extra_index = index - allocated_count;
25✔
311
            if extra_index < self.extra_pending_values.len() as u32 {
25✔
312
                self.extra_pending_values[extra_index as usize] = value;
×
313
            } else {
314
                self.extra_pending_values.alloc().init(value);
25✔
315
            }
316
        }
317
        BufferTableId(index)
27✔
318
    }
319

320
    /// Remove a row from the table.
321
    #[allow(dead_code)]
322
    pub fn remove(&mut self, id: BufferTableId) {
2✔
323
        let index = id.0;
2✔
324
        assert!(index < self.active_count);
2✔
325

326
        // If this is the last item in the active zone, just shrink the active zone
327
        // (implicit free list).
328
        if index == self.active_count - 1 {
3✔
329
            self.active_count -= 1;
1✔
330
            self.capacity -= 1;
1✔
331
        } else {
332
            // This is very inefficient but we need to apply the same logic as the
333
            // EffectCache because we rely on indices being in sync.
334
            let pos = self
1✔
335
                .free_indices
1✔
336
                .binary_search(&index) // will fail
1✔
337
                .unwrap_or_else(|e| e); // will get position of insertion
2✔
338
            self.free_indices.insert(pos, index);
×
339
        }
340
    }
341

342
    /// Allocate any GPU buffer if needed, based on the most recent capacity
343
    /// requested.
344
    ///
345
    /// This should be called only once per frame after all allocation requests
346
    /// have been made via [`insert()`] but before the GPU buffer is actually
347
    /// updated. This is an optimization to enable allocating the GPU buffer
348
    /// earlier than it's actually needed. Calling this multiple times will work
349
    /// but will be inefficient and allocate GPU buffers for nothing. Not
350
    /// calling it is safe, as the next update will call it just-in-time anyway.
351
    ///
352
    /// # Returns
353
    ///
354
    /// Returns `true` if a new buffer was (re-)allocated, to indicate any bind
355
    /// group needs to be re-created.
356
    ///
357
    /// [`insert()]`: crate::render::BufferTable::insert
358
    pub fn allocate_gpu(&mut self, device: &RenderDevice, queue: &RenderQueue) -> bool {
38✔
359
        // The allocated capacity is the capacity of the currently allocated GPU buffer,
360
        // which can be different from the expected capacity (self.capacity) for next
361
        // update.
362
        let allocated_count = self.buffer.as_ref().map(|ab| ab.count).unwrap_or(0);
82✔
363
        let reallocated = if self.capacity > allocated_count {
76✔
364
            let size = self.to_byte_size(self.capacity);
2✔
365
            trace!(
2✔
366
                "reserve: increase capacity from {} to {} elements, old size {} bytes, new size {} bytes",
×
367
                allocated_count,
×
368
                self.capacity,
×
369
                self.to_byte_size(allocated_count),
×
370
                size
×
371
            );
372

373
            // Create the new buffer, swapping with the old one if any
374
            let has_init_data = !self.extra_pending_values.is_empty();
2✔
375
            let new_buffer = device.create_buffer(&BufferDescriptor {
2✔
376
                label: self.label.as_ref().map(|s| &s[..]),
2✔
377
                size: size as BufferAddress,
×
378
                usage: self.buffer_usage,
×
379
                mapped_at_creation: has_init_data,
×
380
            });
381

382
            // Use any pending data to initialize the buffer. We only use CPU-available
383
            // data, which was inserted after the buffer was (re-)allocated and
384
            // has not been uploaded to GPU yet.
385
            if has_init_data {
×
386
                // Leave some space to copy the old buffer if any
387
                let base_size = self.to_byte_size(allocated_count) as u64;
2✔
388
                let extra_size = self.to_byte_size(self.extra_pending_values.len() as u32) as u64;
2✔
389

390
                // Scope get_mapped_range_mut() to force a drop before unmap()
391
                {
392
                    let dst_slice = &mut new_buffer
2✔
393
                        .slice(base_size..base_size + extra_size)
2✔
394
                        .get_mapped_range_mut();
2✔
395

396
                    for (index, content) in self.extra_pending_values.drain(..).enumerate() {
6✔
397
                        let byte_size = self.aligned_size; // single row
4✔
398
                        let byte_offset = byte_size * index;
4✔
399

400
                        // Copy Rust value into a GPU-ready format, including GPU padding.
401
                        let src: &[u8] = cast_slice(std::slice::from_ref(&content));
4✔
402
                        let dst_range = byte_offset..byte_offset + self.item_size;
4✔
403
                        trace!(
4✔
404
                            "+ copy: index={} src={:?} dst={:?} byte_offset={} byte_size={}",
×
405
                            index,
×
406
                            src.as_ptr(),
×
407
                            dst_range,
×
408
                            byte_offset,
×
409
                            byte_size
×
410
                        );
411
                        let dst = &mut dst_slice[dst_range];
4✔
412
                        dst.copy_from_slice(src);
4✔
413
                    }
414
                }
415

416
                new_buffer.unmap();
2✔
417
            }
418

419
            if let Some(ab) = self.buffer.as_mut() {
3✔
420
                // If there's any data currently in the GPU buffer, we need to copy it on next
421
                // update to preserve it, but only if there's no pending copy already.
422
                if self.active_count > 0 && ab.old_buffer.is_none() {
2✔
423
                    ab.old_buffer = Some(ab.buffer.clone()); // TODO: swap
1✔
424
                    ab.old_count = ab.count;
1✔
425
                }
426
                ab.buffer = new_buffer;
1✔
427
                ab.count = self.capacity;
1✔
428
            } else {
429
                self.buffer = Some(AllocatedBuffer {
1✔
430
                    buffer: new_buffer,
1✔
431
                    count: self.capacity,
1✔
432
                    old_buffer: None,
1✔
433
                    old_count: 0,
1✔
434
                });
435
            }
436

437
            true
2✔
438
        } else {
439
            false
36✔
440
        };
441

442
        // Immediately schedule a copy of old rows.
443
        // - For old rows, copy into the old buffer because the old-to-new buffer copy
444
        //   will be executed during a command queue while any CPU to GPU upload is
445
        //   prepended before the next command queue. To ensure things do get out of
446
        //   order with the CPU upload overwriting the GPU-to-GPU copy, make sure those
447
        //   two are disjoint.
448
        if let Some(ab) = self.buffer.as_ref() {
7✔
449
            let buffer = ab.old_buffer.as_ref().unwrap_or(&ab.buffer);
×
450
            for (index, content) in self.pending_values.drain(..) {
2✔
451
                let byte_size = self.aligned_size;
2✔
452
                let byte_offset = byte_size * index as usize;
2✔
453

454
                // Copy Rust value into a GPU-ready format, including GPU padding.
455
                // TODO - Do that in insert()!
456
                let mut aligned_buffer: Vec<u8> = vec![0; self.aligned_size];
2✔
457
                let src: &[u8] = cast_slice(std::slice::from_ref(&content));
2✔
458
                let dst_range = ..self.item_size;
2✔
459
                trace!(
2✔
460
                    "+ copy: index={} src={:?} dst={:?} byte_offset={} byte_size={}",
×
461
                    index,
×
462
                    src.as_ptr(),
×
463
                    dst_range,
×
464
                    byte_offset,
×
465
                    byte_size
×
466
                );
467
                let dst = &mut aligned_buffer[dst_range];
2✔
468
                dst.copy_from_slice(src);
2✔
469

470
                // Upload to GPU
471
                // TODO - Merge contiguous blocks into a single write_buffer()
472
                let bytes: &[u8] = cast_slice(&aligned_buffer);
2✔
473
                queue.write_buffer(buffer, byte_offset as u64, bytes);
2✔
474
            }
475
        } else {
476
            debug_assert!(self.pending_values.is_empty());
62✔
477
            debug_assert!(self.extra_pending_values.is_empty());
62✔
478
        }
479

480
        reallocated
38✔
481
    }
482

483
    /// Write CPU data to the GPU buffer, (re)allocating it as needed.
484
    pub fn write_buffer(&self, encoder: &mut CommandEncoder) {
37✔
485
        // Check if there's any work to do: either some pending values to upload or some
486
        // existing buffer to copy into a newly-allocated one.
487
        if self.pending_values.is_empty()
37✔
488
            && self
37✔
489
                .buffer
37✔
490
                .as_ref()
37✔
491
                .map(|ab| ab.old_buffer.is_none())
80✔
492
                .unwrap_or(true)
37✔
493
        {
494
            return;
36✔
495
        }
496

497
        trace!(
1✔
498
            "write_buffer: pending_values.len={} item_size={} aligned_size={} buffer={:?}",
×
499
            self.pending_values.len(),
×
500
            self.item_size,
×
501
            self.aligned_size,
×
502
            self.buffer,
×
503
        );
504

505
        // If there's no more GPU buffer, there's nothing to do
506
        let Some(ab) = self.buffer.as_ref() else {
2✔
507
            return;
×
508
        };
509

510
        // Copy any old buffer into the new one, and clear the old buffer. Note that we
511
        // only clear the ref-counted reference to the buffer, not the actual buffer,
512
        // which stays alive until the copy is done (but we don't need to care about
513
        // keeping it alive, wgpu does that for us).
514
        if let Some(old_buffer) = ab.old_buffer.as_ref() {
1✔
515
            let old_size = self.to_byte_size(ab.old_count) as u64;
×
516
            trace!("Copy old buffer id {:?} of size {} bytes into newly-allocated buffer {:?} of size {} bytes.", old_buffer.id(), old_size, ab.buffer.id(), self.to_byte_size(ab.count));
×
517
            encoder.copy_buffer_to_buffer(old_buffer, 0, &ab.buffer, 0, old_size);
1✔
518
        }
519
    }
520
}
521

522
#[cfg(test)]
523
mod tests {
524
    use bevy::math::Vec3;
525
    use bytemuck::{Pod, Zeroable};
526

527
    use super::*;
528

529
    #[repr(C)]
530
    #[derive(Debug, Default, Clone, Copy, Pod, Zeroable, ShaderType)]
531
    pub(crate) struct GpuDummy {
532
        pub v: Vec3,
533
    }
534

535
    #[repr(C)]
536
    #[derive(Debug, Default, Clone, Copy, Pod, Zeroable, ShaderType)]
537
    pub(crate) struct GpuDummyComposed {
538
        pub simple: GpuDummy,
539
        pub tag: u32,
540
        // GPU padding to 16 bytes due to GpuDummy forcing align to 16 bytes
541
    }
542

543
    #[repr(C)]
544
    #[derive(Debug, Clone, Copy, Pod, Zeroable, ShaderType)]
545
    pub(crate) struct GpuDummyLarge {
546
        pub simple: GpuDummy,
547
        pub tag: u32,
548
        pub large: [f32; 128],
549
    }
550

551
    #[test]
552
    fn table_sizes() {
553
        // Rust
554
        assert_eq!(std::mem::size_of::<GpuDummy>(), 12);
555
        assert_eq!(std::mem::align_of::<GpuDummy>(), 4);
556
        assert_eq!(std::mem::size_of::<GpuDummyComposed>(), 16); // tight packing
557
        assert_eq!(std::mem::align_of::<GpuDummyComposed>(), 4);
558
        assert_eq!(std::mem::size_of::<GpuDummyLarge>(), 132 * 4); // tight packing
559
        assert_eq!(std::mem::align_of::<GpuDummyLarge>(), 4);
560

561
        // GPU
562
        assert_eq!(<GpuDummy as ShaderType>::min_size().get(), 16); // Vec3 gets padded to 16 bytes
563
        assert_eq!(<GpuDummy as ShaderSize>::SHADER_SIZE.get(), 16);
564
        assert_eq!(<GpuDummyComposed as ShaderType>::min_size().get(), 32); // align is 16 bytes, forces padding
565
        assert_eq!(<GpuDummyComposed as ShaderSize>::SHADER_SIZE.get(), 32);
566
        assert_eq!(<GpuDummyLarge as ShaderType>::min_size().get(), 544); // align is 16 bytes, forces padding
567
        assert_eq!(<GpuDummyLarge as ShaderSize>::SHADER_SIZE.get(), 544);
568

569
        for (item_align, expected_aligned_size) in [
570
            (0, 16),
571
            (4, 16),
572
            (8, 16),
573
            (16, 16),
574
            (32, 32),
575
            (256, 256),
576
            (512, 512),
577
        ] {
578
            let mut table = BufferTable::<GpuDummy>::new(
579
                BufferUsages::STORAGE,
580
                NonZeroU64::new(item_align),
581
                None,
582
            );
583
            assert_eq!(table.aligned_size(), expected_aligned_size);
584
            assert!(table.is_empty());
585
            table.insert(GpuDummy::default());
586
            assert!(!table.is_empty());
587
            assert_eq!(table.len(), 1);
588
        }
589

590
        for (item_align, expected_aligned_size) in [
591
            (0, 32),
592
            (4, 32),
593
            (8, 32),
594
            (16, 32),
595
            (32, 32),
596
            (256, 256),
597
            (512, 512),
598
        ] {
599
            let mut table = BufferTable::<GpuDummyComposed>::new(
600
                BufferUsages::STORAGE,
601
                NonZeroU64::new(item_align),
602
                None,
603
            );
604
            assert_eq!(table.aligned_size(), expected_aligned_size);
605
            assert!(table.is_empty());
606
            table.insert(GpuDummyComposed::default());
607
            assert!(!table.is_empty());
608
            assert_eq!(table.len(), 1);
609
        }
610

611
        for (item_align, expected_aligned_size) in [
612
            (0, 544),
613
            (4, 544),
614
            (8, 544),
615
            (16, 544),
616
            (32, 544),
617
            (256, 768),
618
            (512, 1024),
619
        ] {
620
            let mut table = BufferTable::<GpuDummyLarge>::new(
621
                BufferUsages::STORAGE,
622
                NonZeroU64::new(item_align),
623
                None,
624
            );
625
            assert_eq!(table.aligned_size(), expected_aligned_size);
626
            assert!(table.is_empty());
627
            table.insert(GpuDummyLarge {
628
                simple: Default::default(),
629
                tag: 0,
630
                large: [0.; 128],
631
            });
632
            assert!(!table.is_empty());
633
            assert_eq!(table.len(), 1);
634
        }
635
    }
636
}
637

638
#[cfg(all(test, feature = "gpu_tests"))]
639
mod gpu_tests {
640
    use std::fmt::Write;
641

642
    use bevy::render::render_resource::BufferSlice;
643
    use tests::*;
644
    use wgpu::{BufferView, CommandBuffer};
645

646
    use super::*;
647
    use crate::test_utils::MockRenderer;
648

649
    /// Read data from GPU back into CPU memory.
650
    ///
651
    /// This call blocks until the data is available on CPU. Used for testing
652
    /// only.
653
    fn read_back_gpu<'a>(device: &RenderDevice, slice: BufferSlice<'a>) -> BufferView<'a> {
6✔
654
        let (tx, rx) = futures::channel::oneshot::channel();
6✔
655
        slice.map_async(wgpu::MapMode::Read, move |result| {
12✔
656
            tx.send(result).unwrap();
6✔
657
        });
658
        device.poll(wgpu::Maintain::Wait);
6✔
659
        let result = futures::executor::block_on(rx);
6✔
660
        assert!(result.is_ok());
6✔
661
        slice.get_mapped_range()
6✔
662
    }
663

664
    /// Submit a command buffer to GPU and wait for completion.
665
    ///
666
    /// This call blocks until the GPU executed the command buffer. Used for
667
    /// testing only.
668
    fn submit_gpu_and_wait(
7✔
669
        device: &RenderDevice,
670
        queue: &RenderQueue,
671
        command_buffer: CommandBuffer,
672
    ) {
673
        // Queue command
674
        queue.submit([command_buffer]);
7✔
675

676
        // Register callback to observe completion
677
        let (tx, rx) = futures::channel::oneshot::channel();
7✔
678
        queue.on_submitted_work_done(move || {
14✔
679
            tx.send(()).unwrap();
7✔
680
        });
681

682
        // Poll device, checking for completion and raising callback
683
        device.poll(wgpu::Maintain::Wait);
7✔
684

685
        // Wait for callback to be raised. This was need in previous versions, however
686
        // it's a bit unclear if it's still needed or if device.poll() is enough to
687
        // guarantee that the command was executed.
688
        let _ = futures::executor::block_on(rx);
7✔
689
    }
690

691
    /// Convert a byte slice to a string of hexadecimal values separated by a
692
    /// blank space.
693
    fn to_hex_string(slice: &[u8]) -> String {
19✔
694
        let len = slice.len();
19✔
695
        let num_chars = len * 3 - 1;
19✔
696
        let mut s = String::with_capacity(num_chars);
19✔
697
        for b in &slice[..len - 1] {
589✔
698
            write!(&mut s, "{:02x} ", *b).unwrap();
285✔
699
        }
700
        write!(&mut s, "{:02x}", slice[len - 1]).unwrap();
19✔
701
        debug_assert_eq!(s.len(), num_chars);
38✔
702
        s
19✔
703
    }
704

705
    fn write_buffers_and_wait<T: Pod + ShaderSize>(
7✔
706
        table: &BufferTable<T>,
707
        device: &RenderDevice,
708
        queue: &RenderQueue,
709
    ) {
710
        let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
7✔
711
            label: Some("test"),
7✔
712
        });
713
        table.write_buffer(&mut encoder);
7✔
714
        let command_buffer = encoder.finish();
7✔
715
        submit_gpu_and_wait(device, queue, command_buffer);
7✔
716
        println!("Buffer written to GPU");
7✔
717
    }
718

719
    #[test]
720
    fn table_write() {
721
        let renderer = MockRenderer::new();
722
        let device = renderer.device();
723
        let queue = renderer.queue();
724

725
        let item_align = device.limits().min_storage_buffer_offset_alignment as u64;
726
        println!("min_storage_buffer_offset_alignment = {item_align}");
727
        let mut table = BufferTable::<GpuDummyComposed>::new(
728
            BufferUsages::STORAGE | BufferUsages::MAP_READ,
729
            NonZeroU64::new(item_align),
730
            None,
731
        );
732
        let final_align = item_align.max(<GpuDummyComposed as ShaderSize>::SHADER_SIZE.get());
733
        assert_eq!(table.aligned_size(), final_align as usize);
734

735
        // Initial state
736
        assert!(table.is_empty());
737
        assert_eq!(table.len(), 0);
738
        assert_eq!(table.capacity(), 0);
739
        assert!(table.buffer.is_none());
740

741
        // This has no effect while the table is empty
742
        table.clear_previous_frame_resizes();
743
        table.allocate_gpu(&device, &queue);
744
        write_buffers_and_wait(&table, &device, &queue);
745
        assert!(table.is_empty());
746
        assert_eq!(table.len(), 0);
747
        assert_eq!(table.capacity(), 0);
748
        assert!(table.buffer.is_none());
749

750
        // New frame
751
        table.clear_previous_frame_resizes();
752

753
        // Insert some entries
754
        let len = 3;
755
        for i in 0..len {
756
            let row = table.insert(GpuDummyComposed {
757
                tag: i + 1,
758
                ..Default::default()
759
            });
760
            assert_eq!(row.0, i);
761
        }
762
        assert!(!table.is_empty());
763
        assert_eq!(table.len(), len);
764
        assert!(table.capacity() >= len); // contract: could over-allocate...
765
        assert!(table.buffer.is_none()); // not yet allocated on GPU
766

767
        // Allocate GPU buffer for current requested state
768
        table.allocate_gpu(&device, &queue);
769
        assert!(!table.is_empty());
770
        assert_eq!(table.len(), len);
771
        assert!(table.capacity() >= len);
772
        let ab = table
773
            .buffer
774
            .as_ref()
775
            .expect("GPU buffer should be allocated after allocate_gpu()");
776
        assert!(ab.old_buffer.is_none()); // no previous copy
777
        assert_eq!(ab.count, len);
778
        println!(
779
            "Allocated buffer #{:?} of {} rows",
780
            ab.buffer.id(),
781
            ab.count
782
        );
783
        let ab_buffer = ab.buffer.clone();
784

785
        // Another allocate_gpu() is a no-op
786
        table.allocate_gpu(&device, &queue);
787
        assert!(!table.is_empty());
788
        assert_eq!(table.len(), len);
789
        assert!(table.capacity() >= len);
790
        let ab = table
791
            .buffer
792
            .as_ref()
793
            .expect("GPU buffer should be allocated after allocate_gpu()");
794
        assert!(ab.old_buffer.is_none()); // no previous copy
795
        assert_eq!(ab.count, len);
796
        assert_eq!(ab_buffer.id(), ab.buffer.id()); // same buffer
797

798
        // Write buffer (CPU -> GPU)
799
        write_buffers_and_wait(&table, &device, &queue);
800

801
        {
802
            // Read back (GPU -> CPU)
803
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
804
            {
805
                let slice = buffer.slice(..);
806
                let view = read_back_gpu(&device, slice);
807
                println!(
808
                    "GPU data read back to CPU for validation: {} bytes",
809
                    view.len()
810
                );
811

812
                // Validate content
813
                assert_eq!(view.len(), final_align as usize * table.capacity() as usize);
814
                for i in 0..len as usize {
815
                    let offset = i * final_align as usize;
816
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
817
                    let src = &view[offset..offset + 16];
818
                    println!("{}", to_hex_string(src));
819
                    let dummy_composed: &[GpuDummyComposed] =
820
                        cast_slice(&view[offset..offset + item_size]);
821
                    assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
822
                }
823
            }
824
            buffer.unmap();
825
        }
826

827
        // New frame
828
        table.clear_previous_frame_resizes();
829

830
        // Insert more entries
831
        let old_capacity = table.capacity();
832
        let mut len = len;
833
        while table.capacity() == old_capacity {
834
            let row = table.insert(GpuDummyComposed {
835
                tag: len + 1,
836
                ..Default::default()
837
            });
838
            assert_eq!(row.0, len);
839
            len += 1;
840
        }
841
        println!(
842
            "Added {} rows to grow capacity from {} to {}.",
843
            len - 3,
844
            old_capacity,
845
            table.capacity()
846
        );
847

848
        // This re-allocates a new GPU buffer because the capacity changed
849
        table.allocate_gpu(&device, &queue);
850
        assert!(!table.is_empty());
851
        assert_eq!(table.len(), len);
852
        assert!(table.capacity() >= len);
853
        let ab = table
854
            .buffer
855
            .as_ref()
856
            .expect("GPU buffer should be allocated after allocate_gpu()");
857
        assert_eq!(ab.count, len);
858
        assert!(ab.old_buffer.is_some()); // old buffer to copy
859
        assert_ne!(ab.old_buffer.as_ref().unwrap().id(), ab.buffer.id());
860
        println!(
861
            "Allocated new buffer #{:?} of {} rows",
862
            ab.buffer.id(),
863
            ab.count
864
        );
865

866
        // Write buffer (CPU -> GPU)
867
        write_buffers_and_wait(&table, &device, &queue);
868

869
        {
870
            // Read back (GPU -> CPU)
871
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
872
            {
873
                let slice = buffer.slice(..);
874
                let view = read_back_gpu(&device, slice);
875
                println!(
876
                    "GPU data read back to CPU for validation: {} bytes",
877
                    view.len()
878
                );
879

880
                // Validate content
881
                assert_eq!(view.len(), final_align as usize * table.capacity() as usize);
882
                for i in 0..len as usize {
883
                    let offset = i * final_align as usize;
884
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
885
                    let src = &view[offset..offset + 16];
886
                    println!("{}", to_hex_string(src));
887
                    let dummy_composed: &[GpuDummyComposed] =
888
                        cast_slice(&view[offset..offset + item_size]);
889
                    assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
890
                }
891
            }
892
            buffer.unmap();
893
        }
894

895
        // New frame
896
        table.clear_previous_frame_resizes();
897

898
        // Delete the last allocated row
899
        let old_capacity = table.capacity();
900
        let len = len - 1;
901
        table.remove(BufferTableId(len));
902
        println!(
903
            "Removed last row to shrink capacity from {} to {}.",
904
            old_capacity,
905
            table.capacity()
906
        );
907

908
        // This doesn't do anything since we removed a row only
909
        table.allocate_gpu(&device, &queue);
910
        assert!(!table.is_empty());
911
        assert_eq!(table.len(), len);
912
        assert!(table.capacity() >= len);
913
        let ab = table
914
            .buffer
915
            .as_ref()
916
            .expect("GPU buffer should be allocated after allocate_gpu()");
917
        assert_eq!(ab.count, len + 1); // GPU buffer kept its size
918
        assert!(ab.old_buffer.is_none());
919

920
        // Write buffer (CPU -> GPU)
921
        write_buffers_and_wait(&table, &device, &queue);
922

923
        {
924
            // Read back (GPU -> CPU)
925
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
926
            {
927
                let slice = buffer.slice(..);
928
                let view = read_back_gpu(&device, slice);
929
                println!(
930
                    "GPU data read back to CPU for validation: {} bytes",
931
                    view.len()
932
                );
933

934
                // Validate content
935
                assert!(view.len() >= final_align as usize * table.capacity() as usize); // note the >=, the buffer is over-allocated
936
                for i in 0..len as usize {
937
                    let offset = i * final_align as usize;
938
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
939
                    let src = &view[offset..offset + 16];
940
                    println!("{}", to_hex_string(src));
941
                    let dummy_composed: &[GpuDummyComposed] =
942
                        cast_slice(&view[offset..offset + item_size]);
943
                    assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
944
                }
945
            }
946
            buffer.unmap();
947
        }
948

949
        // New frame
950
        table.clear_previous_frame_resizes();
951

952
        // Delete the first allocated row
953
        let old_capacity = table.capacity();
954
        let mut len = len - 1;
955
        table.remove(BufferTableId(0));
956
        assert_eq!(old_capacity, table.capacity());
957
        println!(
958
            "Removed first row to shrink capacity from {} to {} (no change).",
959
            old_capacity,
960
            table.capacity()
961
        );
962

963
        // This doesn't do anything since we only removed a row
964
        table.allocate_gpu(&device, &queue);
965
        assert!(!table.is_empty());
966
        assert_eq!(table.len(), len);
967
        assert!(table.capacity() >= len);
968
        let ab = table
969
            .buffer
970
            .as_ref()
971
            .expect("GPU buffer should be allocated after allocate_gpu()");
972
        assert_eq!(ab.count, len + 2); // GPU buffer kept its size
973
        assert!(ab.old_buffer.is_none());
974

975
        // Write buffer (CPU -> GPU)
976
        write_buffers_and_wait(&table, &device, &queue);
977

978
        {
979
            // Read back (GPU -> CPU)
980
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
981
            {
982
                let slice = buffer.slice(..);
983
                let view = read_back_gpu(&device, slice);
984
                println!(
985
                    "GPU data read back to CPU for validation: {} bytes",
986
                    view.len()
987
                );
988

989
                // Validate content
990
                assert!(view.len() >= final_align as usize * table.capacity() as usize); // note the >=, the buffer is over-allocated
991
                for i in 0..len as usize {
992
                    let offset = i * final_align as usize;
993
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
994
                    let src = &view[offset..offset + 16];
995
                    println!("{}", to_hex_string(src));
996
                    if i > 0 {
997
                        let dummy_composed: &[GpuDummyComposed] =
998
                            cast_slice(&view[offset..offset + item_size]);
999
                        assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
1000
                    }
1001
                }
1002
            }
1003
            buffer.unmap();
1004
        }
1005

1006
        // New frame
1007
        table.clear_previous_frame_resizes();
1008

1009
        // Insert a row; this should get into row #0 in the buffer
1010
        let row = table.insert(GpuDummyComposed {
1011
            tag: 1,
1012
            ..Default::default()
1013
        });
1014
        assert_eq!(row.0, 0);
1015
        len += 1;
1016
        println!(
1017
            "Added 1 row to grow capacity from {} to {}.",
1018
            old_capacity,
1019
            table.capacity()
1020
        );
1021

1022
        // This doesn't reallocate the GPU buffer since we used a free list entry
1023
        table.allocate_gpu(&device, &queue);
1024
        assert!(!table.is_empty());
1025
        assert_eq!(table.len(), len);
1026
        assert!(table.capacity() >= len);
1027
        let ab = table
1028
            .buffer
1029
            .as_ref()
1030
            .expect("GPU buffer should be allocated after allocate_gpu()");
1031
        assert_eq!(ab.count, 4); // 4 == last time we grew
1032
        assert!(ab.old_buffer.is_none());
1033

1034
        // Write buffer (CPU -> GPU)
1035
        write_buffers_and_wait(&table, &device, &queue);
1036

1037
        {
1038
            // Read back (GPU -> CPU)
1039
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
1040
            {
1041
                let slice = buffer.slice(..);
1042
                let view = read_back_gpu(&device, slice);
1043
                println!(
1044
                    "GPU data read back to CPU for validation: {} bytes",
1045
                    view.len()
1046
                );
1047

1048
                // Validate content
1049
                assert!(view.len() >= final_align as usize * table.capacity() as usize);
1050
                for i in 0..len as usize {
1051
                    let offset = i * final_align as usize;
1052
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
1053
                    let src = &view[offset..offset + 16];
1054
                    println!("{}", to_hex_string(src));
1055
                    let dummy_composed: &[GpuDummyComposed] =
1056
                        cast_slice(&view[offset..offset + item_size]);
1057
                    assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
1058
                }
1059
            }
1060
            buffer.unmap();
1061
        }
1062

1063
        // New frame
1064
        table.clear_previous_frame_resizes();
1065

1066
        // Insert a row; this should get into row #3 at the end of the allocated buffer
1067
        let row = table.insert(GpuDummyComposed {
1068
            tag: 4,
1069
            ..Default::default()
1070
        });
1071
        assert_eq!(row.0, 3);
1072
        len += 1;
1073
        println!(
1074
            "Added 1 row to grow capacity from {} to {}.",
1075
            old_capacity,
1076
            table.capacity()
1077
        );
1078

1079
        // This doesn't reallocate the GPU buffer since we used an implicit free entry
1080
        table.allocate_gpu(&device, &queue);
1081
        assert!(!table.is_empty());
1082
        assert_eq!(table.len(), len);
1083
        assert!(table.capacity() >= len);
1084
        let ab = table
1085
            .buffer
1086
            .as_ref()
1087
            .expect("GPU buffer should be allocated after allocate_gpu()");
1088
        assert_eq!(ab.count, 4); // 4 == last time we grew
1089
        assert!(ab.old_buffer.is_none());
1090

1091
        // Write buffer (CPU -> GPU)
1092
        write_buffers_and_wait(&table, &device, &queue);
1093

1094
        {
1095
            // Read back (GPU -> CPU)
1096
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
1097
            {
1098
                let slice = buffer.slice(..);
1099
                let view = read_back_gpu(&device, slice);
1100
                println!(
1101
                    "GPU data read back to CPU for validation: {} bytes",
1102
                    view.len()
1103
                );
1104

1105
                // Validate content
1106
                assert!(view.len() >= final_align as usize * table.capacity() as usize);
1107
                for i in 0..len as usize {
1108
                    let offset = i * final_align as usize;
1109
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
1110
                    let src = &view[offset..offset + 16];
1111
                    println!("{}", to_hex_string(src));
1112
                    let dummy_composed: &[GpuDummyComposed] =
1113
                        cast_slice(&view[offset..offset + item_size]);
1114
                    assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
1115
                }
1116
            }
1117
            buffer.unmap();
1118
        }
1119
    }
1120
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc