• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

djeedai / bevy_hanabi / 17622440846

10 Sep 2025 05:55PM UTC coverage: 66.033% (-0.6%) from 66.641%
17622440846

push

github

web-flow
Fixes for rustc v1.89 (#494)

Works around a bug in `encase` :
https://github.com/teoxoy/encase/issues/95

9 of 17 new or added lines in 7 files covered. (52.94%)

133 existing lines in 10 files now uncovered.

4829 of 7313 relevant lines covered (66.03%)

437.02 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

64.07
/src/render/buffer_table.rs
1
use std::{
2
    borrow::Cow,
3
    num::{NonZeroU32, NonZeroU64},
4
    ops::Range,
5
};
6

7
use bevy::{
8
    log::trace,
9
    render::{
10
        render_resource::{
11
            Buffer, BufferAddress, BufferDescriptor, BufferUsages, CommandEncoder, ShaderSize,
12
            ShaderType,
13
        },
14
        renderer::{RenderDevice, RenderQueue},
15
    },
16
};
17
use bytemuck::{cast_slice, Pod};
18
use copyless::VecHelper;
19

20
/// Round a range start down to a given alignment, and return the new range and
21
/// the start offset inside the new range of the old range.
22
fn round_range_start_down(range: Range<u64>, align: u64) -> (Range<u64>, u64) {
8✔
23
    assert!(align > 0);
16✔
24
    let delta = align - 1;
16✔
25
    if range.start >= delta {
8✔
26
        // Snap range start to previous multiple of align
27
        let old_start = range.start;
6✔
28
        let new_start = (range.start - delta).next_multiple_of(align);
12✔
29
        let offset = old_start - new_start;
6✔
30
        (new_start..range.end, offset)
3✔
31
    } else {
32
        // Snap range start to 0
33
        (0..range.end, range.start)
5✔
34
    }
35
}
36

37
/// Index of a row in a [`BufferTable`].
38
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
39
pub struct BufferTableId(pub(crate) u32); // TEMP: pub(crate)
40

41
impl BufferTableId {
42
    /// An invalid value, often used as placeholder.
43
    pub const INVALID: BufferTableId = BufferTableId(u32::MAX);
44

45
    /// Check if the current ID is valid, that is, is different from
46
    /// [`INVALID`].
47
    ///
48
    /// [`INVALID`]: Self::INVALID
49
    #[inline]
50
    pub fn is_valid(&self) -> bool {
4✔
51
        *self != Self::INVALID
4✔
52
    }
53

54
    /// Compute a new buffer table ID by offseting an existing one by `count`
55
    /// rows.
56
    #[inline]
57
    #[allow(dead_code)]
58
    pub fn offset(&self, count: u32) -> BufferTableId {
×
59
        debug_assert!(self.is_valid());
×
60
        BufferTableId(self.0 + count)
×
61
    }
62
}
63

64
impl Default for BufferTableId {
65
    fn default() -> Self {
4✔
66
        Self::INVALID
4✔
67
    }
68
}
69

70
#[derive(Debug)]
71
struct AllocatedBuffer {
72
    /// Currently allocated buffer, of size equal to `size`.
73
    buffer: Buffer,
74
    /// Size of the currently allocated buffer, in number of rows.
75
    count: u32,
76
    /// Previously allocated buffer if any, cached until the next buffer write
77
    /// so that old data can be copied into the newly-allocated buffer.
78
    old_buffer: Option<Buffer>,
79
    /// Size of the old buffer if any, in number of rows.
80
    old_count: u32,
81
}
82

83
impl AllocatedBuffer {
84
    /// Get the number of rows of the currently allocated GPU buffer.
85
    ///
86
    /// On capacity grow, the count is valid until the next buffer swap.
87
    pub fn allocated_count(&self) -> u32 {
3✔
88
        if self.old_buffer.is_some() {
6✔
89
            self.old_count
×
90
        } else {
91
            self.count
3✔
92
        }
93
    }
94
}
95

96
/// GPU buffer holding a table with concurrent interleaved CPU/GPU access.
97
///
98
/// The buffer table data structure represents a GPU buffer holding a table made
99
/// of individual rows. Each row of the table has the same layout (same size),
100
/// and can be allocated (assigned to an existing index) or free (available for
101
/// future allocation). The data structure manages a free list of rows, and copy
102
/// of rows modified on CPU to the GPU without touching other rows. This ensures
103
/// that existing rows in the GPU buffer can be accessed and modified by the GPU
104
/// without being overwritten by the CPU and without the need for the CPU to
105
/// read the data back from GPU into CPU memory.
106
///
107
/// The element type `T` needs to implement the following traits:
108
/// - [`Pod`] to allow copy.
109
/// - [`ShaderType`] because it needs to be mapped for a shader.
110
/// - [`ShaderSize`] to ensure a fixed footprint, to allow packing multiple
111
///   instances inside a single buffer. This therefore excludes any
112
///   runtime-sized array.
113
///
114
/// This is similar to a [`BufferVec`] or [`AlignedBufferVec`], but unlike those
115
/// data structures a buffer table preserves rows modified by the GPU without
116
/// overwriting. This is useful when the buffer is also modified by GPU shaders,
117
/// so neither the CPU side nor the GPU side have an up-to-date view of the
118
/// entire table, and so the CPU cannot re-upload the entire table on changes.
119
///
120
/// # Usage
121
///
122
/// - During the [`RenderStage::Prepare`] stage, call
123
///   [`clear_previous_frame_resizes()`] to clear any stale buffer from the
124
///   previous frame. Then insert new rows with [`insert()`] and if you made
125
///   changes call [`allocate_gpu()`] at the end to allocate any new buffer
126
///   needed.
127
/// - During the [`RenderStage::Render`] stage, call [`write_buffer()`] from a
128
///   command encoder before using any row, to perform any buffer resize copy
129
///   pending.
130
///
131
/// [`BufferVec`]: bevy::render::render_resource::BufferVec
132
/// [`AlignedBufferVec`]: crate::render::aligned_buffer_vec::AlignedBufferVec
133
#[derive(Debug)]
134
pub struct BufferTable<T: Pod + ShaderSize> {
135
    /// GPU buffer if already allocated, or `None` otherwise.
136
    buffer: Option<AllocatedBuffer>,
137
    /// GPU buffer usages.
138
    buffer_usage: BufferUsages,
139
    /// Optional GPU buffer name, for debugging.
140
    label: Option<String>,
141
    /// Size of a single buffer element, in bytes, in CPU memory (Rust layout).
142
    item_size: usize,
143
    /// Size of a single buffer element, in bytes, aligned to GPU memory
144
    /// constraints.
145
    aligned_size: usize,
146
    /// Capacity of the buffer, in number of rows.
147
    ///
148
    /// This is the expected capacity, as requested by CPU side allocations and
149
    /// deallocations. The GPU buffer might not have been resized yet to handle
150
    /// it, so might be allocated with a different size.
151
    capacity: u32,
152
    /// Size of the "active" portion of the table, which includes allocated rows
153
    /// and any row in the free list. All other rows in the
154
    /// `active_size..capacity` range are implicitly unallocated.
155
    active_count: u32,
156
    /// Free list of rows available in the GPU buffer for a new allocation. This
157
    /// only contains indices in the `0..active_size` range; all row indices in
158
    /// `active_size..capacity` are assumed to be unallocated.
159
    free_indices: Vec<u32>,
160
    /// Pending values accumulated on CPU and not yet written to GPU, and their
161
    /// rows.
162
    pending_values: Vec<(u32, T)>,
163
    /// Extra pending values accumulated on CPU like `pending_values`, but for
164
    /// which there's not enough space in the current GPU buffer. Those values
165
    /// are sorted in index order, occupying the range `buffer.size..`.
166
    extra_pending_values: Vec<T>,
167
}
168

169
impl<T: Pod + ShaderSize> Default for BufferTable<T> {
170
    fn default() -> Self {
26✔
171
        let item_size = std::mem::size_of::<T>();
52✔
172
        let aligned_size = <T as ShaderSize>::SHADER_SIZE.get() as usize;
52✔
173
        assert!(aligned_size >= item_size);
52✔
174
        Self {
175
            buffer: None,
176
            buffer_usage: BufferUsages::all(),
52✔
177
            label: None,
178
            item_size,
179
            aligned_size,
180
            capacity: 0,
181
            active_count: 0,
182
            free_indices: Vec::new(),
52✔
183
            pending_values: Vec::new(),
26✔
184
            extra_pending_values: Vec::new(),
26✔
185
        }
186
    }
187
}
188

189
impl<T: Pod + ShaderSize> BufferTable<T> {
190
    /// Create a new collection.
191
    ///
192
    /// `item_align` is an optional additional alignment for items in the
193
    /// collection. If greater than the natural alignment dictated by WGSL
194
    /// rules, this extra alignment is enforced. Otherwise it's ignored (so you
195
    /// can pass `None` to ignore). This is useful if for example you want to
196
    /// bind individual rows or any subset of the table, to ensure each row is
197
    /// aligned to the device constraints.
198
    ///
199
    /// # Panics
200
    ///
201
    /// Panics if `buffer_usage` contains [`BufferUsages::UNIFORM`] and the
202
    /// layout of the element type `T` does not meet the requirements of the
203
    /// uniform address space, as tested by
204
    /// [`ShaderType::assert_uniform_compat()`].
205
    ///
206
    /// [`BufferUsages::UNIFORM`]: bevy::render::render_resource::BufferUsages::UNIFORM
207
    pub fn new(
26✔
208
        buffer_usage: BufferUsages,
209
        item_align: Option<NonZeroU64>,
210
        label: Option<String>,
211
    ) -> Self {
212
        // GPU-aligned item size, compatible with WGSL rules
213
        let item_size = <T as ShaderSize>::SHADER_SIZE.get() as usize;
52✔
214
        // Extra manual alignment for device constraints
215
        let aligned_size = if let Some(item_align) = item_align {
75✔
216
            let item_align = item_align.get() as usize;
×
217
            let aligned_size = item_size.next_multiple_of(item_align);
×
218
            assert!(aligned_size >= item_size);
×
219
            assert!(aligned_size % item_align == 0);
46✔
220
            aligned_size
23✔
221
        } else {
222
            item_size
3✔
223
        };
224
        trace!(
×
225
            "BufferTable[\"{}\"]: item_size={} aligned_size={}",
2✔
226
            label.as_ref().unwrap_or(&String::new()),
8✔
227
            item_size,
×
228
            aligned_size
×
229
        );
230
        if buffer_usage.contains(BufferUsages::UNIFORM) {
×
231
            <T as ShaderType>::assert_uniform_compat();
×
232
        }
233
        Self {
234
            // Need COPY_SRC and COPY_DST to copy from old to new buffer on resize
235
            buffer_usage: buffer_usage | BufferUsages::COPY_SRC | BufferUsages::COPY_DST,
×
236
            aligned_size,
237
            label,
238
            ..Default::default()
239
        }
240
    }
241

242
    /// Get a safe buffer label for debug display.
243
    ///
244
    /// Falls back to an empty string if no label was specified.
245
    pub fn safe_label(&self) -> Cow<'_, str> {
1,026✔
246
        self.label
1,026✔
247
            .as_ref()
248
            .map(|s| Cow::Borrowed(&s[..]))
2,052✔
249
            .unwrap_or(Cow::Borrowed(""))
2,052✔
250
    }
251

252
    /// Get a safe buffer name for debug display.
253
    ///
254
    /// Same as [`safe_label()`] but includes the buffer ID as well.
255
    ///
256
    /// [`safe_label()`]: self::BufferTable::safe_label
257
    pub fn safe_name(&self) -> String {
1,026✔
258
        let id = self
2,052✔
259
            .buffer
1,026✔
260
            .as_ref()
261
            .map(|ab| {
2,046✔
262
                let id: NonZeroU32 = ab.buffer.id().into();
5,100✔
263
                id.get()
2,040✔
264
            })
265
            .unwrap_or(0);
266
        format!("#{}:{}", id, self.safe_label())
4,104✔
267
    }
268

269
    /// Reference to the GPU buffer, if already allocated.
270
    ///
271
    /// This reference corresponds to the currently allocated GPU buffer, which
272
    /// may not contain all data since the last [`insert()`] call, and could
273
    /// become invalid if a new larger buffer needs to be allocated to store the
274
    /// pending values inserted with [`insert()`].
275
    ///
276
    /// [`insert()]`: BufferTable::insert
277
    #[inline]
278
    pub fn buffer(&self) -> Option<&Buffer> {
5,075✔
279
        self.buffer.as_ref().map(|ab| &ab.buffer)
15,225✔
280
    }
281

282
    /// Maximum number of rows the table can hold without reallocation.
283
    ///
284
    /// This is the maximum number of rows that can be added to the table
285
    /// without forcing a new GPU buffer to be allocated and a copy from the old
286
    /// to the new buffer.
287
    ///
288
    /// Note that this doesn't imply that no GPU buffer allocation will ever
289
    /// occur; if a GPU buffer was never allocated, and there are pending
290
    /// CPU rows to insert, then a new buffer will be allocated on next
291
    /// update with this capacity.
292
    #[inline]
293
    #[allow(dead_code)]
294
    pub fn capacity(&self) -> u32 {
27✔
295
        self.capacity
27✔
296
    }
297

298
    /// Current number of rows in use in the table.
299
    ///
300
    /// Note that rows in use are not necessarily contiguous. There may be gaps
301
    /// between used rows.
302
    #[inline]
303
    #[allow(dead_code)]
304
    pub fn len(&self) -> u32 {
31✔
305
        self.active_count - self.free_indices.len() as u32
62✔
306
    }
307

308
    /// Size of a single row in the table, in bytes, aligned to GPU constraints.
309
    #[inline]
310
    #[allow(dead_code)]
311
    pub fn aligned_size(&self) -> usize {
22✔
312
        self.aligned_size
22✔
313
    }
314

315
    /// Is the table empty?
316
    #[inline]
317
    #[allow(dead_code)]
318
    pub fn is_empty(&self) -> bool {
52✔
319
        self.active_count == 0
52✔
320
    }
321

322
    /// Clear all rows of the table without deallocating any existing GPU
323
    /// buffer.
324
    ///
325
    /// This operation only updates the CPU cache of the table, without touching
326
    /// any GPU buffer. On next GPU buffer update, the GPU buffer will be
327
    /// deallocated.
328
    #[allow(dead_code)]
329
    pub fn clear(&mut self) {
×
330
        self.pending_values.clear();
×
331
        self.extra_pending_values.clear();
×
332
        self.free_indices.clear();
×
333
        self.active_count = 0;
×
334
    }
335

336
    /// Clear any stale buffer used for resize in the previous frame during
337
    /// rendering while the data structure was immutable.
338
    ///
339
    /// This must be called before any new [`insert()`].
340
    ///
341
    /// [`insert()`]: crate::BufferTable::insert
342
    pub fn clear_previous_frame_resizes(&mut self) {
1,037✔
343
        if let Some(ab) = self.buffer.as_mut() {
2,060✔
344
            ab.old_buffer = None;
×
345
            ab.old_count = 0;
×
346
        }
347
    }
348

349
    /// Calculate the size in byte of `count` rows.
350
    #[inline]
351
    fn to_byte_size(&self, count: u32) -> usize {
15✔
352
        count as usize * self.aligned_size
15✔
353
    }
354

355
    /// Insert a new row into the table.
356
    ///
357
    /// For performance reasons, this buffers the row content on the CPU until
358
    /// the next GPU update, to minimize the number of CPU to GPU transfers.
359
    pub fn insert(&mut self, value: T) -> BufferTableId {
31✔
360
        trace!(
31✔
361
            "Inserting into table buffer '{}' with {} free indices, capacity: {}, active_size: {}",
2✔
362
            self.safe_name(),
4✔
363
            self.free_indices.len(),
4✔
364
            self.capacity,
×
365
            self.active_count
×
366
        );
367
        let index = if self.free_indices.is_empty() {
93✔
368
            let index = self.active_count;
60✔
369
            if index == self.capacity {
60✔
370
                self.capacity += 1;
30✔
371
            }
372
            debug_assert!(index < self.capacity);
60✔
373
            self.active_count += 1;
30✔
374
            index
30✔
375
        } else {
376
            self.free_indices.pop().unwrap()
1✔
377
        };
378
        let allocated_count = self
×
379
            .buffer
×
380
            .as_ref()
381
            .map(|ab| ab.allocated_count())
6✔
382
            .unwrap_or(0);
383
        trace!(
×
384
            "Found free index {}, capacity: {}, active_count: {}, allocated_count: {}",
2✔
385
            index,
×
386
            self.capacity,
×
387
            self.active_count,
×
388
            allocated_count
×
389
        );
390
        if index < allocated_count {
2✔
391
            self.pending_values.alloc().init((index, value));
8✔
392
        } else {
393
            let extra_index = index - allocated_count;
29✔
UNCOV
394
            if extra_index < self.extra_pending_values.len() as u32 {
×
395
                self.extra_pending_values[extra_index as usize] = value;
×
396
            } else {
397
                self.extra_pending_values.alloc().init(value);
29✔
398
            }
399
        }
400
        BufferTableId(index)
×
401
    }
402

403
    /// Calculate a dynamic byte offset for a bind group from a table entry.
404
    ///
405
    /// This returns the product of `id` by the internal [`aligned_size()`].
406
    ///
407
    /// # Panic
408
    ///
409
    /// Panics if the `index` is too large, producing a byte offset larger than
410
    /// `u32::MAX`.
411
    ///
412
    /// [`aligned_size()`]: Self::aligned_size
413
    #[inline]
414
    pub fn dynamic_offset(&self, id: BufferTableId) -> u32 {
×
415
        let offset = self.aligned_size * id.0 as usize;
×
416
        assert!(offset <= u32::MAX as usize);
×
417
        u32::try_from(offset).expect("BufferTable index out of bounds")
×
418
    }
419

420
    /// Update an existing row in the table.
421
    ///
422
    /// For performance reasons, this buffers the row content on the CPU until
423
    /// the next GPU update, to minimize the number of CPU to GPU transfers.
424
    ///
425
    /// Calling this function multiple times overwrites the previous value. Only
426
    /// the last value recorded each frame will be uploaded to GPU.
427
    ///
428
    /// # Panics
429
    ///
430
    /// Panics if the `id` is invalid.
431
    pub fn update(&mut self, id: BufferTableId, value: T) {
2✔
432
        assert!(id.is_valid());
6✔
433
        trace!(
2✔
434
            "Updating row {} of table buffer '{}'",
2✔
UNCOV
435
            id.0,
×
436
            self.safe_name(),
4✔
437
        );
438
        let allocated_count = self
4✔
439
            .buffer
2✔
440
            .as_ref()
441
            .map(|ab| ab.allocated_count())
2✔
442
            .unwrap_or(0);
443
        if id.0 < allocated_count {
2✔
444
            if let Some(idx) = self
×
445
                .pending_values
×
446
                .iter()
447
                .position(|&(index, _)| index == id.0)
×
448
            {
449
                // Overwrite a previous update. This ensures we never upload more than one
450
                // update per row, which would waste GPU bandwidth.
451
                self.pending_values[idx] = (id.0, value);
×
452
            } else {
453
                self.pending_values.alloc().init((id.0, value));
×
454
            }
455
        } else {
456
            let extra_index = (id.0 - allocated_count) as usize;
2✔
UNCOV
457
            assert!(extra_index < self.extra_pending_values.len());
×
458
            // Overwrite a previous update. This ensures we never upload more than one
459
            // update per row, which would waste GPU bandwidth.
460
            self.extra_pending_values[extra_index] = value;
2✔
461
        }
462
    }
463

464
    /// Insert several new contiguous rows into the table.
465
    ///
466
    /// For performance reasons, this buffers the row content on the CPU until
467
    /// the next GPU update, to minimize the number of CPU to GPU transfers.
468
    ///
469
    /// # Returns
470
    ///
471
    /// Returns the index of the first entry. Other entries follow right after
472
    /// it.
473
    #[allow(dead_code)] // unused but annoying to write, so keep if we need in the future
474
    pub fn insert_contiguous(&mut self, values: impl ExactSizeIterator<Item = T>) -> BufferTableId {
5✔
475
        let count = values.len() as u32;
10✔
476
        trace!(
5✔
477
            "Inserting {} contiguous values into table buffer '{}' with {} free indices, capacity: {}, active_size: {}",
×
478
            count,
×
479
            self.safe_name(),
×
480
            self.free_indices.len(),
×
481
            self.capacity,
×
482
            self.active_count
×
483
        );
484
        let first_index = if self.free_indices.is_empty() {
15✔
485
            let index = self.active_count;
6✔
486
            if index == self.capacity {
6✔
487
                self.capacity += count;
3✔
488
            }
489
            debug_assert!(index < self.capacity);
6✔
490
            self.active_count += count;
3✔
491
            index
3✔
492
        } else {
493
            let mut s = 0;
2✔
UNCOV
494
            let mut n = 1;
×
UNCOV
495
            let mut i = 1;
×
496
            while i < self.free_indices.len() {
4✔
497
                debug_assert!(self.free_indices[i] > self.free_indices[i - 1]); // always sorted
3✔
498
                if self.free_indices[i] == self.free_indices[i - 1] + 1 {
2✔
499
                    // contiguous
500
                    n += 1;
1✔
501
                    if n == count {
1✔
502
                        break;
1✔
503
                    }
504
                } else {
505
                    // non-contiguous; restart a new sequence
506
                    debug_assert!(n < count);
×
507
                    s = i;
×
508
                }
509
                i += 1;
×
510
            }
511
            if n == count {
2✔
512
                // Found a range of 'count' consecutive entries. Consume it.
513
                let index = self.free_indices[s];
2✔
514
                self.free_indices.splice(s..=i, []);
4✔
515
                index
1✔
516
            } else {
517
                // No free range for 'count' consecutive entries. Allocate at end instead.
518
                let index = self.active_count;
1✔
519
                if index == self.capacity {
1✔
520
                    self.capacity += count;
1✔
521
                }
522
                debug_assert!(index < self.capacity);
×
523
                self.active_count += count;
1✔
524
                index
1✔
525
            }
526
        };
527
        let allocated_count = self
×
528
            .buffer
×
529
            .as_ref()
530
            .map(|ab| ab.allocated_count())
×
531
            .unwrap_or(0);
532
        trace!(
×
533
            "Found {} free indices {}..{}, capacity: {}, active_count: {}, allocated_count: {}",
×
534
            count,
×
535
            first_index,
×
536
            first_index + count,
×
537
            self.capacity,
×
538
            self.active_count,
×
539
            allocated_count
×
540
        );
541
        for (i, value) in values.enumerate() {
10✔
542
            let index = first_index + i as u32;
×
543
            if index < allocated_count {
×
544
                self.pending_values.alloc().init((index, value));
×
545
            } else {
546
                let extra_index = index - allocated_count;
10✔
547
                if extra_index < self.extra_pending_values.len() as u32 {
6✔
548
                    self.extra_pending_values[extra_index as usize] = value;
6✔
549
                } else {
550
                    self.extra_pending_values.alloc().init(value);
4✔
551
                }
552
            }
553
        }
554
        BufferTableId(first_index)
×
555
    }
556

557
    /// Remove a row from the table.
558
    #[allow(dead_code)]
559
    pub fn remove(&mut self, id: BufferTableId) {
5✔
560
        let index = id.0;
10✔
561
        assert!(index < self.active_count);
10✔
562

563
        // If this is the last item in the active zone, just shrink the active zone
564
        // (implicit free list).
565
        if index == self.active_count - 1 {
7✔
566
            self.active_count -= 1;
2✔
567
            self.capacity -= 1;
2✔
568
        } else {
569
            // This is very inefficient but we need to apply the same logic as the
570
            // EffectCache because we rely on indices being in sync.
571
            let pos = self
3✔
UNCOV
572
                .free_indices
×
573
                .binary_search(&index) // will fail
574
                .unwrap_or_else(|e| e); // will get position of insertion
575
            self.free_indices.insert(pos, index);
×
576
        }
577
    }
578

579
    /// Remove a range of rows from the table.
580
    #[allow(dead_code)]
581
    pub fn remove_range(&mut self, first: BufferTableId, count: u32) {
4✔
582
        let index = first.0;
8✔
583
        assert!(index + count <= self.active_count);
8✔
584

585
        // If this is the last item in the active zone, just shrink the active zone
586
        // (implicit free list).
587
        if index == self.active_count - count {
8✔
588
            self.active_count -= count;
2✔
589
            self.capacity -= count;
2✔
590

591
            // Also try to remove free indices
592
            if self.free_indices.len() as u32 == self.active_count {
3✔
593
                // Easy case: everything is free, clear everything
594
                self.free_indices.clear();
3✔
595
                self.active_count = 0;
1✔
596
                self.capacity = 0;
1✔
597
            } else {
598
                // Some rows are still allocated. Dequeue from end while we have a contiguous
599
                // tail of free indices.
600
                let mut num_popped = 0;
1✔
601
                while let Some(idx) = self.free_indices.pop() {
5✔
602
                    if idx < self.active_count - 1 - num_popped {
×
603
                        self.free_indices.push(idx);
×
604
                        break;
×
605
                    }
UNCOV
606
                    num_popped += 1;
×
607
                }
608
                self.active_count -= num_popped;
×
609
                self.capacity -= num_popped;
×
610
            }
611
        } else {
612
            // This is very inefficient but we need to apply the same logic as the
613
            // EffectCache because we rely on indices being in sync.
614
            let pos = self
2✔
UNCOV
615
                .free_indices
×
UNCOV
616
                .binary_search(&index) // will fail
×
UNCOV
617
                .unwrap_or_else(|e| e); // will get position of insertion
×
618
            self.free_indices.splice(pos..pos, index..(index + count));
×
619
        }
620

621
        debug_assert!(
4✔
622
            (self.free_indices.is_empty() && self.active_count == 0)
10✔
623
                || (self.free_indices.len() as u32) < self.active_count
3✔
624
        );
625
    }
626

627
    /// Allocate any GPU buffer if needed, based on the most recent capacity
628
    /// requested.
629
    ///
630
    /// This should be called only once per frame after all allocation requests
631
    /// have been made via [`insert()`] but before the GPU buffer is actually
632
    /// updated. This is an optimization to enable allocating the GPU buffer
633
    /// earlier than it's actually needed. Calling this multiple times is not
634
    /// supported, and might assert. Not calling it is safe, as the next
635
    /// update will call it just-in-time anyway.
636
    ///
637
    /// # Returns
638
    ///
639
    /// Returns `true` if a new buffer was (re-)allocated, to indicate any bind
640
    /// group needs to be re-created.
641
    ///
642
    /// [`insert()]`: crate::render::BufferTable::insert
643
    pub fn allocate_gpu(&mut self, device: &RenderDevice, queue: &RenderQueue) -> bool {
1,038✔
644
        // The allocated capacity is the capacity of the currently allocated GPU buffer,
645
        // which can be different from the expected capacity (self.capacity) for next
646
        // update.
647
        let allocated_count = self.buffer.as_ref().map(|ab| ab.count).unwrap_or(0);
5,190✔
648
        let reallocated = if self.capacity > allocated_count {
2,076✔
649
            let byte_size = self.to_byte_size(self.capacity);
16✔
650
            trace!(
4✔
651
                "reserve('{}'): increase capacity from {} to {} elements, old size {} bytes, new size {} bytes",
2✔
652
                self.safe_name(),
4✔
UNCOV
653
                allocated_count,
×
UNCOV
654
                self.capacity,
×
655
                self.to_byte_size(allocated_count),
6✔
656
                byte_size
×
657
            );
658

659
            // Create the new buffer, swapping with the old one if any
660
            let has_init_data = !self.extra_pending_values.is_empty();
8✔
661
            let new_buffer = device.create_buffer(&BufferDescriptor {
12✔
662
                label: self.label.as_ref().map(|s| &s[..]),
14✔
663
                size: byte_size as BufferAddress,
4✔
664
                usage: self.buffer_usage,
4✔
665
                mapped_at_creation: has_init_data,
4✔
666
            });
667

668
            // Use any pending data to initialize the buffer. We only use CPU-available
669
            // data, which was inserted after the buffer was (re-)allocated and
670
            // has not been uploaded to GPU yet.
671
            if has_init_data {
4✔
672
                // Leave some space to copy the old buffer if any
673
                let base_size = self.to_byte_size(allocated_count) as u64;
12✔
674
                let extra_count = self.extra_pending_values.len() as u32;
8✔
675
                let extra_size = self.to_byte_size(extra_count) as u64;
12✔
676

677
                // Scope get_mapped_range_mut() to force a drop before unmap()
678
                {
679
                    // Note: get_mapped_range_mut() requires 8-byte alignment of the start offset.
680
                    let unaligned_range = base_size..(base_size + extra_size);
12✔
681
                    let (range, byte_offset) = round_range_start_down(unaligned_range, 8);
12✔
682

683
                    let dst_slice = &mut new_buffer.slice(range).get_mapped_range_mut();
12✔
684

685
                    let base_offset = byte_offset as usize;
8✔
686
                    let byte_size = self.aligned_size; // single row
8✔
687
                    for (index, content) in self.extra_pending_values.drain(..).enumerate() {
22✔
688
                        let byte_offset = base_offset + byte_size * index;
×
689

690
                        // Copy Rust value into a GPU-ready format, including GPU padding.
691
                        let src: &[u8] = cast_slice(std::slice::from_ref(&content));
×
692
                        let dst_range = byte_offset..(byte_offset + self.item_size);
×
693
                        trace!(
×
694
                            "+ init_copy: index={} src={:?} dst={:?} byte_offset={} byte_size={}",
2✔
UNCOV
695
                            index,
×
696
                            src.as_ptr(),
4✔
697
                            dst_range,
×
698
                            byte_offset,
×
699
                            byte_size
×
700
                        );
701
                        let dst = &mut dst_slice[dst_range];
×
702
                        dst.copy_from_slice(src);
×
703
                    }
704
                }
705

706
                new_buffer.unmap();
8✔
707
            }
708

709
            if let Some(ab) = self.buffer.as_mut() {
5✔
710
                // If there's any data currently in the GPU buffer, we need to copy it on next
711
                // update to preserve it.
712
                if self.active_count > 0 {
×
713
                    // Current buffer has value to preserve, save it into old_buffer before
714
                    // replacing it with the newly-allocated one.
715

716
                    // By design we can't have all active entries as free ones; we should have
717
                    // updated active_count=0 and cleared the free list if that was the case.
718
                    debug_assert!(self.free_indices.len() < self.active_count as usize);
3✔
719

720
                    // If we already have an old buffer, that means we already have scheduled a copy
721
                    // to preserve some values. And we can't do that twice per frame.
722
                    assert!(
1✔
723
                        ab.old_buffer.is_none(),
2✔
724
                        "allocate_gpu() called twice before write_buffer() took effect."
×
725
                    );
726

727
                    // Swap old <-> new
728
                    let mut old_buffer = new_buffer;
2✔
729
                    std::mem::swap(&mut old_buffer, &mut ab.buffer);
3✔
730
                    ab.old_buffer = Some(old_buffer);
2✔
731
                    ab.old_count = ab.count;
1✔
732
                } else {
733
                    // Current buffer is unused, so we don't need to preserve anything.
734

735
                    // It could happen we reallocate during the frame then immediately free the rows
736
                    // to preserve, such that we don't need in the end to preserve anything.
737
                    if let Some(old_buffer) = ab.old_buffer.take() {
×
738
                        old_buffer.destroy();
×
739
                    }
740

741
                    ab.buffer.destroy();
×
742
                    ab.buffer = new_buffer;
×
743
                }
744
                ab.count = self.capacity;
1✔
745
            } else {
746
                self.buffer = Some(AllocatedBuffer {
6✔
747
                    buffer: new_buffer,
6✔
748
                    count: self.capacity,
3✔
749
                    old_buffer: None,
3✔
750
                    old_count: 0,
3✔
751
                });
752
            }
753

754
            true
4✔
755
        } else {
756
            false
1,034✔
757
        };
758

759
        // Immediately schedule a copy of old rows.
760
        // - For old rows, copy into the old buffer because the old-to-new buffer copy
761
        //   will be executed during a command queue while any CPU to GPU upload is
762
        //   prepended before the next command queue. To ensure things do get out of
763
        //   order with the CPU upload overwriting the GPU-to-GPU copy, make sure those
764
        //   two are disjoint.
765
        if let Some(ab) = self.buffer.as_ref() {
1,027✔
766
            let buffer = ab.old_buffer.as_ref().unwrap_or(&ab.buffer);
×
767
            for (index, content) in self.pending_values.drain(..) {
2✔
768
                let byte_size = self.aligned_size;
×
769
                let byte_offset = byte_size * index as usize;
×
770

771
                // Copy Rust value into a GPU-ready format, including GPU padding.
772
                // TODO - Do that in insert()!
773
                let mut aligned_buffer: Vec<u8> = vec![0; self.aligned_size];
×
774
                let src: &[u8] = cast_slice(std::slice::from_ref(&content));
×
775
                let dst_range = ..self.item_size;
×
776
                trace!(
×
777
                    "+ old_copy: index={} src={:?} dst={:?} byte_offset={} byte_size={}",
×
778
                    index,
×
779
                    src.as_ptr(),
×
780
                    dst_range,
×
781
                    byte_offset,
×
782
                    byte_size
×
783
                );
784
                let dst = &mut aligned_buffer[dst_range];
×
785
                dst.copy_from_slice(src);
×
786

787
                // Upload to GPU
788
                // TODO - Merge contiguous blocks into a single write_buffer()
789
                let bytes: &[u8] = cast_slice(&aligned_buffer);
×
790
                queue.write_buffer(buffer, byte_offset as u64, bytes);
×
791
            }
792
        } else {
793
            debug_assert!(self.pending_values.is_empty());
33✔
794
            debug_assert!(self.extra_pending_values.is_empty());
33✔
795
        }
796

797
        reallocated
1,038✔
798
    }
799

800
    /// Write CPU data to the GPU buffer, (re)allocating it as needed.
801
    pub fn write_buffer(&self, encoder: &mut CommandEncoder) {
1,037✔
802
        // Check if there's any work to do: either some pending values to upload or some
803
        // existing buffer to copy into a newly-allocated one.
804
        if self.pending_values.is_empty()
2,074✔
805
            && self
1,037✔
806
                .buffer
1,037✔
807
                .as_ref()
1,037✔
808
                .map(|ab| ab.old_buffer.is_none())
3,089✔
809
                .unwrap_or(true)
1,037✔
810
        {
811
            trace!("write_buffer({}): nothing to do", self.safe_name());
4,096✔
812
            return;
1,036✔
813
        }
814

UNCOV
815
        trace!(
×
816
            "write_buffer({}): pending_values.len={} item_size={} aligned_size={} buffer={:?}",
×
817
            self.safe_name(),
×
818
            self.pending_values.len(),
×
819
            self.item_size,
×
820
            self.aligned_size,
×
821
            self.buffer,
×
822
        );
823

824
        // If there's no more GPU buffer, there's nothing to do
825
        let Some(ab) = self.buffer.as_ref() else {
1✔
826
            return;
×
827
        };
828

829
        // Copy any old buffer into the new one, and clear the old buffer. Note that we
830
        // only clear the ref-counted reference to the buffer, not the actual buffer,
831
        // which stays alive until the copy is done (but we don't need to care about
832
        // keeping it alive, wgpu does that for us).
833
        if let Some(old_buffer) = ab.old_buffer.as_ref() {
1✔
834
            let old_size = self.to_byte_size(ab.old_count) as u64;
×
835
            trace!("Copy old buffer id {:?} of size {} bytes into newly-allocated buffer {:?} of size {} bytes.", old_buffer.id(), old_size, ab.buffer.id(), self.to_byte_size(ab.count));
×
836
            encoder.copy_buffer_to_buffer(old_buffer, 0, &ab.buffer, 0, old_size);
×
837
        }
838
    }
839
}
840

841
#[cfg(test)]
842
mod tests {
843
    use bevy::math::Vec3;
844
    use bytemuck::{Pod, Zeroable};
845

846
    use super::*;
847

848
    #[test]
849
    fn test_round_range_start_down() {
850
        // r8(0..7) : no-op
851
        {
852
            let (r, o) = round_range_start_down(0..7, 8);
853
            assert_eq!(r, 0..7);
854
            assert_eq!(o, 0);
855
        }
856

857
        // r8(2..7) = 0..7, +2
858
        {
859
            let (r, o) = round_range_start_down(2..7, 8);
860
            assert_eq!(r, 0..7);
861
            assert_eq!(o, 2);
862
        }
863

864
        // r8(7..32) = 0..32, +7
865
        {
866
            let (r, o) = round_range_start_down(7..32, 8);
867
            assert_eq!(r, 0..32);
868
            assert_eq!(o, 7);
869
        }
870

871
        // r8(8..32) = no-op
872
        {
873
            let (r, o) = round_range_start_down(8..32, 8);
874
            assert_eq!(r, 8..32);
875
            assert_eq!(o, 0);
876
        }
877
    }
878

879
    #[repr(C)]
880
    #[derive(Debug, Default, Clone, Copy, Pod, Zeroable, ShaderType)]
881
    pub(crate) struct GpuDummy {
882
        pub v: Vec3,
883
    }
884

885
    #[repr(C)]
886
    #[derive(Debug, Default, Clone, Copy, Pod, Zeroable, ShaderType)]
887
    pub(crate) struct GpuDummyComposed {
888
        pub simple: GpuDummy,
889
        pub tag: u32,
890
        // GPU padding to 16 bytes due to GpuDummy forcing align to 16 bytes
891
    }
892

893
    #[repr(C)]
894
    #[derive(Debug, Clone, Copy, Pod, Zeroable, ShaderType)]
895
    pub(crate) struct GpuDummyLarge {
896
        pub simple: GpuDummy,
897
        pub tag: u32,
898
        pub large: [f32; 128],
899
    }
900

901
    #[test]
902
    fn buffer_table_sizes() {
903
        // Rust
904
        assert_eq!(std::mem::size_of::<GpuDummy>(), 12);
905
        assert_eq!(std::mem::align_of::<GpuDummy>(), 4);
906
        assert_eq!(std::mem::size_of::<GpuDummyComposed>(), 16); // tight packing
907
        assert_eq!(std::mem::align_of::<GpuDummyComposed>(), 4);
908
        assert_eq!(std::mem::size_of::<GpuDummyLarge>(), 132 * 4); // tight packing
909
        assert_eq!(std::mem::align_of::<GpuDummyLarge>(), 4);
910

911
        // GPU
912
        assert_eq!(<GpuDummy as ShaderType>::min_size().get(), 16); // Vec3 gets padded to 16 bytes
913
        assert_eq!(<GpuDummy as ShaderSize>::SHADER_SIZE.get(), 16);
914
        assert_eq!(<GpuDummyComposed as ShaderType>::min_size().get(), 32); // align is 16 bytes, forces padding
915
        assert_eq!(<GpuDummyComposed as ShaderSize>::SHADER_SIZE.get(), 32);
916
        assert_eq!(<GpuDummyLarge as ShaderType>::min_size().get(), 544); // align is 16 bytes, forces padding
917
        assert_eq!(<GpuDummyLarge as ShaderSize>::SHADER_SIZE.get(), 544);
918

919
        for (item_align, expected_aligned_size) in [
920
            (0, 16),
921
            (4, 16),
922
            (8, 16),
923
            (16, 16),
924
            (32, 32),
925
            (256, 256),
926
            (512, 512),
927
        ] {
928
            let mut table = BufferTable::<GpuDummy>::new(
929
                BufferUsages::STORAGE,
930
                NonZeroU64::new(item_align),
931
                None,
932
            );
933
            assert_eq!(table.aligned_size(), expected_aligned_size);
934
            assert!(table.is_empty());
935
            table.insert(GpuDummy::default());
936
            assert!(!table.is_empty());
937
            assert_eq!(table.len(), 1);
938
        }
939

940
        for (item_align, expected_aligned_size) in [
941
            (0, 32),
942
            (4, 32),
943
            (8, 32),
944
            (16, 32),
945
            (32, 32),
946
            (256, 256),
947
            (512, 512),
948
        ] {
949
            let mut table = BufferTable::<GpuDummyComposed>::new(
950
                BufferUsages::STORAGE,
951
                NonZeroU64::new(item_align),
952
                None,
953
            );
954
            assert_eq!(table.aligned_size(), expected_aligned_size);
955
            assert!(table.is_empty());
956
            table.insert(GpuDummyComposed::default());
957
            assert!(!table.is_empty());
958
            assert_eq!(table.len(), 1);
959
        }
960

961
        for (item_align, expected_aligned_size) in [
962
            (0, 544),
963
            (4, 544),
964
            (8, 544),
965
            (16, 544),
966
            (32, 544),
967
            (256, 768),
968
            (512, 1024),
969
        ] {
970
            let mut table = BufferTable::<GpuDummyLarge>::new(
971
                BufferUsages::STORAGE,
972
                NonZeroU64::new(item_align),
973
                None,
974
            );
975
            assert_eq!(table.aligned_size(), expected_aligned_size);
976
            assert!(table.is_empty());
977
            table.insert(GpuDummyLarge {
978
                simple: Default::default(),
979
                tag: 0,
980
                large: [0.; 128],
981
            });
982
            assert!(!table.is_empty());
983
            assert_eq!(table.len(), 1);
984
        }
985
    }
986

987
    #[test]
988
    fn buffer_table_insert() {
989
        let mut table =
990
            BufferTable::<GpuDummy>::new(BufferUsages::STORAGE, NonZeroU64::new(32), None);
991

992
        // [x]
993
        let id1 = table.insert(GpuDummy::default());
994
        assert_eq!(id1.0, 0);
995
        assert_eq!(table.active_count, 1);
996
        assert!(table.free_indices.is_empty());
997

998
        // [x x]
999
        let id2 = table.insert(GpuDummy::default());
1000
        assert_eq!(id2.0, 1);
1001
        assert_eq!(table.active_count, 2);
1002
        assert!(table.free_indices.is_empty());
1003

1004
        // [- x]
1005
        table.remove(id1);
1006
        assert_eq!(table.active_count, 2);
1007
        assert_eq!(table.free_indices.len(), 1);
1008
        assert_eq!(table.free_indices[0], 0);
1009

1010
        // [- x x x]
1011
        let id3 = table.insert_contiguous([GpuDummy::default(); 2].into_iter());
1012
        assert_eq!(id3.0, 2); // at the end (doesn't fit in free slot #0)
1013
        assert_eq!(table.active_count, 4);
1014
        assert_eq!(table.free_indices.len(), 1);
1015
        assert_eq!(table.free_indices[0], 0);
1016

1017
        // [- - x x]
1018
        table.remove(id2);
1019
        assert_eq!(table.active_count, 4);
1020
        assert_eq!(table.free_indices.len(), 2);
1021
        assert_eq!(table.free_indices[0], 0);
1022
        assert_eq!(table.free_indices[1], 1);
1023

1024
        // [x x x x]
1025
        let id4 = table.insert_contiguous([GpuDummy::default(); 2].into_iter());
1026
        assert_eq!(id4.0, 0); // this times it fit into slot #0-#1
1027
        assert_eq!(table.active_count, 4);
1028
        assert!(table.free_indices.is_empty());
1029

1030
        // [- - x x]
1031
        table.remove_range(id4, 2);
1032
        assert_eq!(table.active_count, 4);
1033
        assert_eq!(table.free_indices.len(), 2);
1034
        assert_eq!(table.free_indices[0], 0);
1035
        assert_eq!(table.free_indices[1], 1);
1036

1037
        // []
1038
        table.remove_range(id3, 2);
1039
        assert_eq!(table.active_count, 0);
1040
        assert!(table.free_indices.is_empty());
1041

1042
        // [x x]
1043
        let id5 = table.insert_contiguous([GpuDummy::default(); 2].into_iter());
1044
        assert_eq!(id5.0, 0);
1045
        assert_eq!(table.active_count, 2);
1046
        assert!(table.free_indices.is_empty());
1047

1048
        // [x x x x]
1049
        let id6 = table.insert_contiguous([GpuDummy::default(); 2].into_iter());
1050
        assert_eq!(id6.0, 2);
1051
        assert_eq!(table.active_count, 4);
1052
        assert!(table.free_indices.is_empty());
1053

1054
        // [x x x x x x]
1055
        let id7 = table.insert_contiguous([GpuDummy::default(); 2].into_iter());
1056
        assert_eq!(id7.0, 4);
1057
        assert_eq!(table.active_count, 6);
1058
        assert!(table.free_indices.is_empty());
1059

1060
        // [x x - - x x]
1061
        table.remove_range(id6, 2);
1062
        assert_eq!(table.active_count, 6);
1063
        assert_eq!(table.free_indices.len(), 2);
1064
        assert_eq!(table.free_indices[0], 2);
1065
        assert_eq!(table.free_indices[1], 3);
1066

1067
        // [x x]
1068
        table.remove_range(id7, 2);
1069
        assert_eq!(table.active_count, 2);
1070
        assert!(table.free_indices.is_empty());
1071
    }
1072
}
1073

1074
#[cfg(all(test, feature = "gpu_tests"))]
1075
mod gpu_tests {
1076
    use std::fmt::Write;
1077

1078
    use bevy::render::render_resource::BufferSlice;
1079
    use tests::*;
1080
    use wgpu::{BufferView, CommandBuffer};
1081

1082
    use super::*;
1083
    use crate::test_utils::MockRenderer;
1084

1085
    /// Read data from GPU back into CPU memory.
1086
    ///
1087
    /// This call blocks until the data is available on CPU. Used for testing
1088
    /// only.
1089
    fn read_back_gpu<'a>(device: &RenderDevice, slice: BufferSlice<'a>) -> BufferView<'a> {
6✔
1090
        let (tx, rx) = futures::channel::oneshot::channel();
18✔
1091
        slice.map_async(wgpu::MapMode::Read, move |result| {
24✔
1092
            tx.send(result).unwrap();
24✔
1093
        });
1094
        device.poll(wgpu::Maintain::Wait);
18✔
1095
        let result = futures::executor::block_on(rx);
18✔
1096
        assert!(result.is_ok());
18✔
1097
        slice.get_mapped_range()
6✔
1098
    }
1099

1100
    /// Submit a command buffer to GPU and wait for completion.
1101
    ///
1102
    /// This call blocks until the GPU executed the command buffer. Used for
1103
    /// testing only.
1104
    fn submit_gpu_and_wait(
7✔
1105
        device: &RenderDevice,
1106
        queue: &RenderQueue,
1107
        command_buffer: CommandBuffer,
1108
    ) {
1109
        // Queue command
1110
        queue.submit([command_buffer]);
14✔
1111

1112
        // Register callback to observe completion
1113
        let (tx, rx) = futures::channel::oneshot::channel();
21✔
1114
        queue.on_submitted_work_done(move || {
21✔
1115
            tx.send(()).unwrap();
28✔
1116
        });
1117

1118
        // Poll device, checking for completion and raising callback
1119
        device.poll(wgpu::Maintain::Wait);
21✔
1120

1121
        // Wait for callback to be raised. This was need in previous versions, however
1122
        // it's a bit unclear if it's still needed or if device.poll() is enough to
1123
        // guarantee that the command was executed.
1124
        let _ = futures::executor::block_on(rx);
7✔
1125
    }
1126

1127
    /// Convert a byte slice to a string of hexadecimal values separated by a
1128
    /// blank space.
1129
    fn to_hex_string(slice: &[u8]) -> String {
19✔
1130
        let len = slice.len();
57✔
1131
        let num_chars = len * 3 - 1;
38✔
1132
        let mut s = String::with_capacity(num_chars);
57✔
1133
        for b in &slice[..len - 1] {
304✔
1134
            write!(&mut s, "{:02x} ", *b).unwrap();
1135
        }
1136
        write!(&mut s, "{:02x}", slice[len - 1]).unwrap();
95✔
1137
        debug_assert_eq!(s.len(), num_chars);
57✔
1138
        s
19✔
1139
    }
1140

1141
    fn write_buffers_and_wait<T: Pod + ShaderSize>(
7✔
1142
        table: &BufferTable<T>,
1143
        device: &RenderDevice,
1144
        queue: &RenderQueue,
1145
    ) {
1146
        let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
28✔
1147
            label: Some("test"),
7✔
1148
        });
1149
        table.write_buffer(&mut encoder);
21✔
1150
        let command_buffer = encoder.finish();
21✔
1151
        submit_gpu_and_wait(device, queue, command_buffer);
28✔
1152
        println!("Buffer written to GPU");
14✔
1153
    }
1154

1155
    #[test]
1156
    fn table_write() {
1157
        let renderer = MockRenderer::new();
1158
        let device = renderer.device();
1159
        let queue = renderer.queue();
1160

1161
        let item_align = device.limits().min_storage_buffer_offset_alignment as u64;
1162
        println!("min_storage_buffer_offset_alignment = {item_align}");
1163
        let mut table = BufferTable::<GpuDummyComposed>::new(
1164
            BufferUsages::STORAGE | BufferUsages::MAP_READ,
1165
            NonZeroU64::new(item_align),
1166
            None,
1167
        );
1168
        let final_align = item_align.max(<GpuDummyComposed as ShaderSize>::SHADER_SIZE.get());
1169
        assert_eq!(table.aligned_size(), final_align as usize);
1170

1171
        // Initial state
1172
        assert!(table.is_empty());
1173
        assert_eq!(table.len(), 0);
1174
        assert_eq!(table.capacity(), 0);
1175
        assert!(table.buffer.is_none());
1176

1177
        // This has no effect while the table is empty
1178
        table.clear_previous_frame_resizes();
1179
        table.allocate_gpu(&device, &queue);
1180
        write_buffers_and_wait(&table, &device, &queue);
1181
        assert!(table.is_empty());
1182
        assert_eq!(table.len(), 0);
1183
        assert_eq!(table.capacity(), 0);
1184
        assert!(table.buffer.is_none());
1185

1186
        // New frame
1187
        table.clear_previous_frame_resizes();
1188

1189
        // Insert some entries
1190
        let len = 3;
1191
        for i in 0..len {
1192
            let row = table.insert(GpuDummyComposed {
1193
                tag: i + 1,
1194
                ..Default::default()
1195
            });
1196
            assert_eq!(row.0, i);
1197
        }
1198
        assert!(!table.is_empty());
1199
        assert_eq!(table.len(), len);
1200
        assert!(table.capacity() >= len); // contract: could over-allocate...
1201
        assert!(table.buffer.is_none()); // not yet allocated on GPU
1202

1203
        // Allocate GPU buffer for current requested state
1204
        table.allocate_gpu(&device, &queue);
1205
        assert!(!table.is_empty());
1206
        assert_eq!(table.len(), len);
1207
        assert!(table.capacity() >= len);
1208
        let ab = table
1209
            .buffer
1210
            .as_ref()
1211
            .expect("GPU buffer should be allocated after allocate_gpu()");
1212
        assert!(ab.old_buffer.is_none()); // no previous copy
1213
        assert_eq!(ab.count, len);
1214
        println!(
1215
            "Allocated buffer #{:?} of {} rows",
1216
            ab.buffer.id(),
1217
            ab.count
1218
        );
1219
        let ab_buffer = ab.buffer.clone();
1220

1221
        // Another allocate_gpu() is a no-op
1222
        table.allocate_gpu(&device, &queue);
1223
        assert!(!table.is_empty());
1224
        assert_eq!(table.len(), len);
1225
        assert!(table.capacity() >= len);
1226
        let ab = table
1227
            .buffer
1228
            .as_ref()
1229
            .expect("GPU buffer should be allocated after allocate_gpu()");
1230
        assert!(ab.old_buffer.is_none()); // no previous copy
1231
        assert_eq!(ab.count, len);
1232
        assert_eq!(ab_buffer.id(), ab.buffer.id()); // same buffer
1233

1234
        // Write buffer (CPU -> GPU)
1235
        write_buffers_and_wait(&table, &device, &queue);
1236

1237
        {
1238
            // Read back (GPU -> CPU)
1239
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
1240
            {
1241
                let slice = buffer.slice(..);
1242
                let view = read_back_gpu(&device, slice);
1243
                println!(
1244
                    "GPU data read back to CPU for validation: {} bytes",
1245
                    view.len()
1246
                );
1247

1248
                // Validate content
1249
                assert_eq!(view.len(), final_align as usize * table.capacity() as usize);
1250
                for i in 0..len as usize {
1251
                    let offset = i * final_align as usize;
1252
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
1253
                    let src = &view[offset..offset + 16];
1254
                    println!("{}", to_hex_string(src));
1255
                    let dummy_composed: &[GpuDummyComposed] =
1256
                        cast_slice(&view[offset..offset + item_size]);
1257
                    assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
1258
                }
1259
            }
1260
            buffer.unmap();
1261
        }
1262

1263
        // New frame
1264
        table.clear_previous_frame_resizes();
1265

1266
        // Insert more entries
1267
        let old_capacity = table.capacity();
1268
        let mut len = len;
1269
        while table.capacity() == old_capacity {
1270
            let row = table.insert(GpuDummyComposed {
1271
                tag: len + 1,
1272
                ..Default::default()
1273
            });
1274
            assert_eq!(row.0, len);
1275
            len += 1;
1276
        }
1277
        println!(
1278
            "Added {} rows to grow capacity from {} to {}.",
1279
            len - 3,
1280
            old_capacity,
1281
            table.capacity()
1282
        );
1283

1284
        // This re-allocates a new GPU buffer because the capacity changed
1285
        table.allocate_gpu(&device, &queue);
1286
        assert!(!table.is_empty());
1287
        assert_eq!(table.len(), len);
1288
        assert!(table.capacity() >= len);
1289
        let ab = table
1290
            .buffer
1291
            .as_ref()
1292
            .expect("GPU buffer should be allocated after allocate_gpu()");
1293
        assert_eq!(ab.count, len);
1294
        assert!(ab.old_buffer.is_some()); // old buffer to copy
1295
        assert_ne!(ab.old_buffer.as_ref().unwrap().id(), ab.buffer.id());
1296
        println!(
1297
            "Allocated new buffer #{:?} of {} rows",
1298
            ab.buffer.id(),
1299
            ab.count
1300
        );
1301

1302
        // Write buffer (CPU -> GPU)
1303
        write_buffers_and_wait(&table, &device, &queue);
1304

1305
        {
1306
            // Read back (GPU -> CPU)
1307
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
1308
            {
1309
                let slice = buffer.slice(..);
1310
                let view = read_back_gpu(&device, slice);
1311
                println!(
1312
                    "GPU data read back to CPU for validation: {} bytes",
1313
                    view.len()
1314
                );
1315

1316
                // Validate content
1317
                assert_eq!(view.len(), final_align as usize * table.capacity() as usize);
1318
                for i in 0..len as usize {
1319
                    let offset = i * final_align as usize;
1320
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
1321
                    let src = &view[offset..offset + 16];
1322
                    println!("{}", to_hex_string(src));
1323
                    let dummy_composed: &[GpuDummyComposed] =
1324
                        cast_slice(&view[offset..offset + item_size]);
1325
                    assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
1326
                }
1327
            }
1328
            buffer.unmap();
1329
        }
1330

1331
        // New frame
1332
        table.clear_previous_frame_resizes();
1333

1334
        // Delete the last allocated row
1335
        let old_capacity = table.capacity();
1336
        let len = len - 1;
1337
        table.remove(BufferTableId(len));
1338
        println!(
1339
            "Removed last row to shrink capacity from {} to {}.",
1340
            old_capacity,
1341
            table.capacity()
1342
        );
1343

1344
        // This doesn't do anything since we removed a row only
1345
        table.allocate_gpu(&device, &queue);
1346
        assert!(!table.is_empty());
1347
        assert_eq!(table.len(), len);
1348
        assert!(table.capacity() >= len);
1349
        let ab = table
1350
            .buffer
1351
            .as_ref()
1352
            .expect("GPU buffer should be allocated after allocate_gpu()");
1353
        assert_eq!(ab.count, len + 1); // GPU buffer kept its size
1354
        assert!(ab.old_buffer.is_none());
1355

1356
        // Write buffer (CPU -> GPU)
1357
        write_buffers_and_wait(&table, &device, &queue);
1358

1359
        {
1360
            // Read back (GPU -> CPU)
1361
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
1362
            {
1363
                let slice = buffer.slice(..);
1364
                let view = read_back_gpu(&device, slice);
1365
                println!(
1366
                    "GPU data read back to CPU for validation: {} bytes",
1367
                    view.len()
1368
                );
1369

1370
                // Validate content
1371
                assert!(view.len() >= final_align as usize * table.capacity() as usize); // note the >=, the buffer is over-allocated
1372
                for i in 0..len as usize {
1373
                    let offset = i * final_align as usize;
1374
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
1375
                    let src = &view[offset..offset + 16];
1376
                    println!("{}", to_hex_string(src));
1377
                    let dummy_composed: &[GpuDummyComposed] =
1378
                        cast_slice(&view[offset..offset + item_size]);
1379
                    assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
1380
                }
1381
            }
1382
            buffer.unmap();
1383
        }
1384

1385
        // New frame
1386
        table.clear_previous_frame_resizes();
1387

1388
        // Delete the first allocated row
1389
        let old_capacity = table.capacity();
1390
        let mut len = len - 1;
1391
        table.remove(BufferTableId(0));
1392
        assert_eq!(old_capacity, table.capacity());
1393
        println!(
1394
            "Removed first row to shrink capacity from {} to {} (no change).",
1395
            old_capacity,
1396
            table.capacity()
1397
        );
1398

1399
        // This doesn't do anything since we only removed a row
1400
        table.allocate_gpu(&device, &queue);
1401
        assert!(!table.is_empty());
1402
        assert_eq!(table.len(), len);
1403
        assert!(table.capacity() >= len);
1404
        let ab = table
1405
            .buffer
1406
            .as_ref()
1407
            .expect("GPU buffer should be allocated after allocate_gpu()");
1408
        assert_eq!(ab.count, len + 2); // GPU buffer kept its size
1409
        assert!(ab.old_buffer.is_none());
1410

1411
        // Write buffer (CPU -> GPU)
1412
        write_buffers_and_wait(&table, &device, &queue);
1413

1414
        {
1415
            // Read back (GPU -> CPU)
1416
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
1417
            {
1418
                let slice = buffer.slice(..);
1419
                let view = read_back_gpu(&device, slice);
1420
                println!(
1421
                    "GPU data read back to CPU for validation: {} bytes",
1422
                    view.len()
1423
                );
1424

1425
                // Validate content
1426
                assert!(view.len() >= final_align as usize * table.capacity() as usize); // note the >=, the buffer is over-allocated
1427
                for i in 0..len as usize {
1428
                    let offset = i * final_align as usize;
1429
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
1430
                    let src = &view[offset..offset + 16];
1431
                    println!("{}", to_hex_string(src));
1432
                    if i > 0 {
1433
                        let dummy_composed: &[GpuDummyComposed] =
1434
                            cast_slice(&view[offset..offset + item_size]);
1435
                        assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
1436
                    }
1437
                }
1438
            }
1439
            buffer.unmap();
1440
        }
1441

1442
        // New frame
1443
        table.clear_previous_frame_resizes();
1444

1445
        // Insert a row; this should get into row #0 in the buffer
1446
        let row = table.insert(GpuDummyComposed {
1447
            tag: 1,
1448
            ..Default::default()
1449
        });
1450
        assert_eq!(row.0, 0);
1451
        len += 1;
1452
        println!(
1453
            "Added 1 row to grow capacity from {} to {}.",
1454
            old_capacity,
1455
            table.capacity()
1456
        );
1457

1458
        // This doesn't reallocate the GPU buffer since we used a free list entry
1459
        table.allocate_gpu(&device, &queue);
1460
        assert!(!table.is_empty());
1461
        assert_eq!(table.len(), len);
1462
        assert!(table.capacity() >= len);
1463
        let ab = table
1464
            .buffer
1465
            .as_ref()
1466
            .expect("GPU buffer should be allocated after allocate_gpu()");
1467
        assert_eq!(ab.count, 4); // 4 == last time we grew
1468
        assert!(ab.old_buffer.is_none());
1469

1470
        // Write buffer (CPU -> GPU)
1471
        write_buffers_and_wait(&table, &device, &queue);
1472

1473
        {
1474
            // Read back (GPU -> CPU)
1475
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
1476
            {
1477
                let slice = buffer.slice(..);
1478
                let view = read_back_gpu(&device, slice);
1479
                println!(
1480
                    "GPU data read back to CPU for validation: {} bytes",
1481
                    view.len()
1482
                );
1483

1484
                // Validate content
1485
                assert!(view.len() >= final_align as usize * table.capacity() as usize);
1486
                for i in 0..len as usize {
1487
                    let offset = i * final_align as usize;
1488
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
1489
                    let src = &view[offset..offset + 16];
1490
                    println!("{}", to_hex_string(src));
1491
                    let dummy_composed: &[GpuDummyComposed] =
1492
                        cast_slice(&view[offset..offset + item_size]);
1493
                    assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
1494
                }
1495
            }
1496
            buffer.unmap();
1497
        }
1498

1499
        // New frame
1500
        table.clear_previous_frame_resizes();
1501

1502
        // Insert a row; this should get into row #3 at the end of the allocated buffer
1503
        let row = table.insert(GpuDummyComposed {
1504
            tag: 4,
1505
            ..Default::default()
1506
        });
1507
        assert_eq!(row.0, 3);
1508
        len += 1;
1509
        println!(
1510
            "Added 1 row to grow capacity from {} to {}.",
1511
            old_capacity,
1512
            table.capacity()
1513
        );
1514

1515
        // This doesn't reallocate the GPU buffer since we used an implicit free entry
1516
        table.allocate_gpu(&device, &queue);
1517
        assert!(!table.is_empty());
1518
        assert_eq!(table.len(), len);
1519
        assert!(table.capacity() >= len);
1520
        let ab = table
1521
            .buffer
1522
            .as_ref()
1523
            .expect("GPU buffer should be allocated after allocate_gpu()");
1524
        assert_eq!(ab.count, 4); // 4 == last time we grew
1525
        assert!(ab.old_buffer.is_none());
1526

1527
        // Write buffer (CPU -> GPU)
1528
        write_buffers_and_wait(&table, &device, &queue);
1529

1530
        {
1531
            // Read back (GPU -> CPU)
1532
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
1533
            {
1534
                let slice = buffer.slice(..);
1535
                let view = read_back_gpu(&device, slice);
1536
                println!(
1537
                    "GPU data read back to CPU for validation: {} bytes",
1538
                    view.len()
1539
                );
1540

1541
                // Validate content
1542
                assert!(view.len() >= final_align as usize * table.capacity() as usize);
1543
                for i in 0..len as usize {
1544
                    let offset = i * final_align as usize;
1545
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
1546
                    let src = &view[offset..offset + 16];
1547
                    println!("{}", to_hex_string(src));
1548
                    let dummy_composed: &[GpuDummyComposed] =
1549
                        cast_slice(&view[offset..offset + item_size]);
1550
                    assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
1551
                }
1552
            }
1553
            buffer.unmap();
1554
        }
1555
    }
1556
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc