• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

djeedai / bevy_hanabi / 21565578469

01 Feb 2026 03:38PM UTC coverage: 58.351% (-8.1%) from 66.442%
21565578469

push

github

web-flow
Update to Bevy v0.18 (#521)

Thanks to @morgenthum for the original work.

93 of 170 new or added lines in 6 files covered. (54.71%)

968 existing lines in 17 files now uncovered.

4954 of 8490 relevant lines covered (58.35%)

190.51 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

62.81
/src/render/buffer_table.rs
1
use std::{
2
    borrow::Cow,
3
    num::{NonZeroU32, NonZeroU64},
4
    ops::Range,
5
};
6

7
use bevy::{
8
    log::trace,
9
    render::{
10
        render_resource::{
11
            Buffer, BufferAddress, BufferDescriptor, BufferUsages, CommandEncoder, ShaderSize,
12
            ShaderType,
13
        },
14
        renderer::{RenderDevice, RenderQueue},
15
    },
16
};
17
use bytemuck::{cast_slice, Pod};
18

19
/// Round a range start down to a given alignment, and return the new range and
20
/// the start offset inside the new range of the old range.
21
fn round_range_start_down(range: Range<u64>, align: u64) -> (Range<u64>, u64) {
10✔
22
    assert!(align > 0);
20✔
23
    let delta = align - 1;
20✔
24
    if range.start >= delta {
10✔
25
        // Snap range start to previous multiple of align
26
        let old_start = range.start;
6✔
27
        let new_start = (range.start - delta).next_multiple_of(align);
12✔
28
        let offset = old_start - new_start;
6✔
29
        (new_start..range.end, offset)
3✔
30
    } else {
31
        // Snap range start to 0
32
        (0..range.end, range.start)
7✔
33
    }
34
}
35

36
/// Index of a row in a [`BufferTable`].
37
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38
pub struct BufferTableId(pub(crate) u32); // TEMP: pub(crate)
39

40
impl BufferTableId {
41
    /// An invalid value, often used as placeholder.
42
    pub const INVALID: BufferTableId = BufferTableId(u32::MAX);
43

44
    /// Check if the current ID is valid, that is, is different from
45
    /// [`INVALID`].
46
    ///
47
    /// [`INVALID`]: Self::INVALID
48
    #[inline]
49
    pub fn is_valid(&self) -> bool {
1,884✔
50
        *self != Self::INVALID
1,884✔
51
    }
52

53
    /// Compute a new buffer table ID by offseting an existing one by `count`
54
    /// rows.
55
    #[inline]
56
    #[allow(dead_code)]
57
    pub fn offset(&self, count: u32) -> BufferTableId {
×
58
        debug_assert!(self.is_valid());
×
59
        BufferTableId(self.0 + count)
×
60
    }
61
}
62

63
impl Default for BufferTableId {
64
    fn default() -> Self {
2✔
65
        Self::INVALID
2✔
66
    }
67
}
68

69
#[derive(Debug)]
70
struct AllocatedBuffer {
71
    /// Currently allocated buffer, of size equal to `size`.
72
    buffer: Buffer,
73
    /// Size of the currently allocated buffer, in number of rows.
74
    count: u32,
75
    /// Previously allocated buffer if any, cached until the next buffer write
76
    /// so that old data can be copied into the newly-allocated buffer.
77
    old_buffer: Option<Buffer>,
78
    /// Size of the old buffer if any, in number of rows.
79
    old_count: u32,
80
}
81

82
impl AllocatedBuffer {
83
    /// Get the number of rows of the currently allocated GPU buffer.
84
    ///
85
    /// On capacity grow, the count is valid until the next buffer swap.
86
    pub fn allocated_count(&self) -> u32 {
3✔
87
        if self.old_buffer.is_some() {
6✔
88
            self.old_count
×
89
        } else {
90
            self.count
3✔
91
        }
92
    }
93
}
94

95
/// GPU buffer holding a table with concurrent interleaved CPU/GPU access.
96
///
97
/// The buffer table data structure represents a GPU buffer holding a table made
98
/// of individual rows. Each row of the table has the same layout (same size),
99
/// and can be allocated (assigned to an existing index) or free (available for
100
/// future allocation). The data structure manages a free list of rows, and copy
101
/// of rows modified on CPU to the GPU without touching other rows. This ensures
102
/// that existing rows in the GPU buffer can be accessed and modified by the GPU
103
/// without being overwritten by the CPU and without the need for the CPU to
104
/// read the data back from GPU into CPU memory.
105
///
106
/// The element type `T` needs to implement the following traits:
107
/// - [`Pod`] to allow copy.
108
/// - [`ShaderType`] because it needs to be mapped for a shader.
109
/// - [`ShaderSize`] to ensure a fixed footprint, to allow packing multiple
110
///   instances inside a single buffer. This therefore excludes any
111
///   runtime-sized array.
112
///
113
/// This is similar to a [`BufferVec`] or [`AlignedBufferVec`], but unlike those
114
/// data structures a buffer table preserves rows modified by the GPU without
115
/// overwriting. This is useful when the buffer is also modified by GPU shaders,
116
/// so neither the CPU side nor the GPU side have an up-to-date view of the
117
/// entire table, and so the CPU cannot re-upload the entire table on changes.
118
///
119
/// # Usage
120
///
121
/// - During the [`RenderStage::Prepare`] stage, call
122
///   [`clear_previous_frame_resizes()`] to clear any stale buffer from the
123
///   previous frame. Then insert new rows with [`insert()`] and if you made
124
///   changes call [`allocate_gpu()`] at the end to allocate any new buffer
125
///   needed.
126
/// - During the [`RenderStage::Render`] stage, call [`write_buffer()`] from a
127
///   command encoder before using any row, to perform any buffer resize copy
128
///   pending.
129
///
130
/// [`BufferVec`]: bevy::render::render_resource::BufferVec
131
/// [`AlignedBufferVec`]: crate::render::aligned_buffer_vec::AlignedBufferVec
132
#[derive(Debug)]
133
pub struct BufferTable<T: Pod + ShaderSize> {
134
    /// GPU buffer if already allocated, or `None` otherwise.
135
    buffer: Option<AllocatedBuffer>,
136
    /// GPU buffer usages.
137
    buffer_usage: BufferUsages,
138
    /// Optional GPU buffer name, for debugging.
139
    label: Option<String>,
140
    /// Size of a single buffer element, in bytes, in CPU memory (Rust layout).
141
    item_size: usize,
142
    /// Size of a single buffer element, in bytes, aligned to GPU memory
143
    /// constraints.
144
    aligned_size: usize,
145
    /// Capacity of the buffer, in number of rows.
146
    ///
147
    /// This is the expected capacity, as requested by CPU side allocations and
148
    /// deallocations. The GPU buffer might not have been resized yet to handle
149
    /// it, so might be allocated with a different size.
150
    capacity: u32,
151
    /// Size of the "active" portion of the table, which includes allocated rows
152
    /// and any row in the free list. All other rows in the
153
    /// `active_size..capacity` range are implicitly unallocated.
154
    active_count: u32,
155
    /// Free list of rows available in the GPU buffer for a new allocation. This
156
    /// only contains indices in the `0..active_size` range; all row indices in
157
    /// `active_size..capacity` are assumed to be unallocated.
158
    free_indices: Vec<u32>,
159
    /// Pending values accumulated on CPU and not yet written to GPU, and their
160
    /// rows.
161
    pending_values: Vec<(u32, T)>,
162
    /// Extra pending values accumulated on CPU like `pending_values`, but for
163
    /// which there's not enough space in the current GPU buffer. Those values
164
    /// are sorted in index order, occupying the range `buffer.size..`.
165
    extra_pending_values: Vec<T>,
166
}
167

168
impl<T: Pod + ShaderSize> Default for BufferTable<T> {
169
    fn default() -> Self {
29✔
170
        let item_size = std::mem::size_of::<T>();
58✔
171
        let aligned_size = <T as ShaderSize>::SHADER_SIZE.get() as usize;
58✔
172
        assert!(aligned_size >= item_size);
58✔
173
        Self {
174
            buffer: None,
175
            buffer_usage: BufferUsages::all(),
58✔
176
            label: None,
177
            item_size,
178
            aligned_size,
179
            capacity: 0,
180
            active_count: 0,
181
            free_indices: Vec::new(),
58✔
182
            pending_values: Vec::new(),
29✔
183
            extra_pending_values: Vec::new(),
29✔
184
        }
185
    }
186
}
187

188
impl<T: Pod + ShaderSize> BufferTable<T> {
189
    /// Create a new collection.
190
    ///
191
    /// `item_align` is an optional additional alignment for items in the
192
    /// collection. If greater than the natural alignment dictated by WGSL
193
    /// rules, this extra alignment is enforced. Otherwise it's ignored (so you
194
    /// can pass `None` to ignore). This is useful if for example you want to
195
    /// bind individual rows or any subset of the table, to ensure each row is
196
    /// aligned to the device constraints.
197
    ///
198
    /// # Panics
199
    ///
200
    /// Panics if `buffer_usage` contains [`BufferUsages::UNIFORM`] and the
201
    /// layout of the element type `T` does not meet the requirements of the
202
    /// uniform address space, as tested by
203
    /// [`ShaderType::assert_uniform_compat()`].
204
    ///
205
    /// [`BufferUsages::UNIFORM`]: bevy::render::render_resource::BufferUsages::UNIFORM
206
    pub fn new(
29✔
207
        buffer_usage: BufferUsages,
208
        item_align: Option<NonZeroU64>,
209
        label: Option<String>,
210
    ) -> Self {
211
        // GPU-aligned item size, compatible with WGSL rules
212
        let item_size = <T as ShaderSize>::SHADER_SIZE.get() as usize;
58✔
213
        // Extra manual alignment for device constraints
214
        let aligned_size = if let Some(item_align) = item_align {
84✔
215
            let item_align = item_align.get() as usize;
×
216
            let aligned_size = item_size.next_multiple_of(item_align);
×
217
            assert!(aligned_size >= item_size);
×
218
            assert!(aligned_size.is_multiple_of(item_align));
104✔
219
            aligned_size
26✔
220
        } else {
221
            item_size
3✔
222
        };
223
        trace!(
×
UNCOV
224
            "BufferTable[\"{}\"]: item_size={} aligned_size={}",
×
225
            label.as_ref().unwrap_or(&String::new()),
16✔
226
            item_size,
×
227
            aligned_size
×
228
        );
229
        if buffer_usage.contains(BufferUsages::UNIFORM) {
×
230
            <T as ShaderType>::assert_uniform_compat();
×
231
        }
232
        Self {
233
            // Need COPY_SRC and COPY_DST to copy from old to new buffer on resize
234
            buffer_usage: buffer_usage | BufferUsages::COPY_SRC | BufferUsages::COPY_DST,
×
235
            aligned_size,
236
            label,
237
            ..Default::default()
238
        }
239
    }
240

241
    /// Get a safe buffer label for debug display.
242
    ///
243
    /// Falls back to an empty string if no label was specified.
244
    pub fn safe_label(&self) -> Cow<'_, str> {
650✔
245
        self.label
650✔
246
            .as_ref()
247
            .map(|s| Cow::Borrowed(&s[..]))
1,300✔
248
            .unwrap_or(Cow::Borrowed(""))
1,300✔
249
    }
250

251
    /// Get a safe buffer name for debug display.
252
    ///
253
    /// Same as [`safe_label()`] but includes the buffer ID as well.
254
    ///
255
    /// [`safe_label()`]: self::BufferTable::safe_label
256
    pub fn safe_name(&self) -> String {
650✔
257
        let id = self
1,300✔
258
            .buffer
650✔
259
            .as_ref()
260
            .map(|ab| {
1,290✔
261
                let id: NonZeroU32 = ab.buffer.id().into();
3,200✔
262
                id.get()
1,280✔
263
            })
264
            .unwrap_or(0);
265
        format!("#{}:{}", id, self.safe_label())
1,950✔
266
    }
267

268
    /// Reference to the GPU buffer, if already allocated.
269
    ///
270
    /// This reference corresponds to the currently allocated GPU buffer, which
271
    /// may not contain all data since the last [`insert()`] call, and could
272
    /// become invalid if a new larger buffer needs to be allocated to store the
273
    /// pending values inserted with [`insert()`].
274
    ///
275
    /// [`insert()]`: BufferTable::insert
276
    #[inline]
277
    pub fn buffer(&self) -> Option<&Buffer> {
1,879✔
278
        self.buffer.as_ref().map(|ab| &ab.buffer)
5,637✔
279
    }
280

281
    /// Maximum number of rows the table can hold without reallocation.
282
    ///
283
    /// This is the maximum number of rows that can be added to the table
284
    /// without forcing a new GPU buffer to be allocated and a copy from the old
285
    /// to the new buffer.
286
    ///
287
    /// Note that this doesn't imply that no GPU buffer allocation will ever
288
    /// occur; if a GPU buffer was never allocated, and there are pending
289
    /// CPU rows to insert, then a new buffer will be allocated on next
290
    /// update with this capacity.
291
    #[inline]
292
    #[allow(dead_code)]
293
    pub fn capacity(&self) -> u32 {
27✔
294
        self.capacity
27✔
295
    }
296

297
    /// Current number of rows in use in the table.
298
    ///
299
    /// Note that rows in use are not necessarily contiguous. There may be gaps
300
    /// between used rows.
301
    #[inline]
302
    #[allow(dead_code)]
303
    pub fn len(&self) -> u32 {
31✔
304
        self.active_count - self.free_indices.len() as u32
62✔
305
    }
306

307
    /// Size of a single row in the table, in bytes, aligned to GPU constraints.
308
    #[inline]
309
    #[allow(dead_code)]
310
    pub fn aligned_size(&self) -> usize {
22✔
311
        self.aligned_size
22✔
312
    }
313

314
    /// Is the table empty?
315
    #[inline]
316
    #[allow(dead_code)]
317
    pub fn is_empty(&self) -> bool {
52✔
318
        self.active_count == 0
52✔
319
    }
320

321
    /// Clear all rows of the table without deallocating any existing GPU
322
    /// buffer.
323
    ///
324
    /// This operation only updates the CPU cache of the table, without touching
325
    /// any GPU buffer. On next GPU buffer update, the GPU buffer will be
326
    /// deallocated.
327
    #[allow(dead_code)]
328
    pub fn clear(&mut self) {
×
329
        self.pending_values.clear();
×
330
        self.extra_pending_values.clear();
×
331
        self.free_indices.clear();
×
332
        self.active_count = 0;
×
333
    }
334

335
    /// Clear any stale buffer used for resize in the previous frame during
336
    /// rendering while the data structure was immutable.
337
    ///
338
    /// This must be called before any new [`insert()`].
339
    ///
340
    /// [`insert()`]: crate::BufferTable::insert
341
    pub fn clear_previous_frame_resizes(&mut self) {
667✔
342
        if let Some(ab) = self.buffer.as_mut() {
1,308✔
343
            ab.old_buffer = None;
×
344
            ab.old_count = 0;
×
345
        }
346
    }
347

348
    /// Calculate the size in byte of `count` rows.
349
    #[inline]
350
    fn to_byte_size(&self, count: u32) -> usize {
23✔
351
        count as usize * self.aligned_size
23✔
352
    }
353

354
    /// Insert a new row into the table.
355
    ///
356
    /// For performance reasons, this buffers the row content on the CPU until
357
    /// the next GPU update, to minimize the number of CPU to GPU transfers.
358
    pub fn insert(&mut self, value: T) -> BufferTableId {
33✔
359
        trace!(
33✔
UNCOV
360
            "Inserting into table buffer '{}' with {} free indices, capacity: {}, active_size: {}",
×
361
            self.safe_name(),
8✔
362
            self.free_indices.len(),
8✔
363
            self.capacity,
×
364
            self.active_count
×
365
        );
366
        let index = if self.free_indices.is_empty() {
99✔
367
            let index = self.active_count;
64✔
368
            if index == self.capacity {
64✔
369
                self.capacity += 1;
32✔
370
            }
371
            debug_assert!(index < self.capacity);
64✔
372
            self.active_count += 1;
32✔
373
            index
32✔
374
        } else {
375
            self.free_indices.pop().unwrap()
1✔
376
        };
377
        let allocated_count = self
×
378
            .buffer
×
379
            .as_ref()
380
            .map(|ab| ab.allocated_count())
6✔
381
            .unwrap_or(0);
382
        trace!(
×
UNCOV
383
            "Found free index {}, capacity: {}, active_count: {}, allocated_count: {}",
×
384
            index,
×
385
            self.capacity,
×
386
            self.active_count,
×
387
            allocated_count
×
388
        );
389
        if index < allocated_count {
2✔
390
            self.pending_values.push((index, value));
6✔
391
        } else {
392
            let extra_index = index - allocated_count;
31✔
393
            if extra_index < self.extra_pending_values.len() as u32 {
×
394
                self.extra_pending_values[extra_index as usize] = value;
×
395
            } else {
396
                self.extra_pending_values.push(value);
31✔
397
            }
398
        }
399
        BufferTableId(index)
×
400
    }
401

402
    /// Calculate a dynamic byte offset for a bind group from a table entry.
403
    ///
404
    /// This returns the product of `id` by the internal [`aligned_size()`].
405
    ///
406
    /// # Panic
407
    ///
408
    /// Panics if the `index` is too large, producing a byte offset larger than
409
    /// `u32::MAX`.
410
    ///
411
    /// [`aligned_size()`]: Self::aligned_size
412
    #[inline]
413
    pub fn dynamic_offset(&self, id: BufferTableId) -> u32 {
×
414
        let offset = self.aligned_size * id.0 as usize;
×
415
        assert!(offset <= u32::MAX as usize);
×
416
        u32::try_from(offset).expect("BufferTable index out of bounds")
×
417
    }
418

419
    /// Update an existing row in the table.
420
    ///
421
    /// For performance reasons, this buffers the row content on the CPU until
422
    /// the next GPU update, to minimize the number of CPU to GPU transfers.
423
    ///
424
    /// Calling this function multiple times overwrites the previous value. Only
425
    /// the last value recorded each frame will be uploaded to GPU.
426
    ///
427
    /// # Panics
428
    ///
429
    /// Panics if the `id` is invalid.
430
    pub fn update(&mut self, id: BufferTableId, value: T) {
2✔
431
        assert!(id.is_valid());
6✔
432
        trace!(
2✔
UNCOV
433
            "Updating row {} of table buffer '{}'",
×
434
            id.0,
×
435
            self.safe_name(),
4✔
436
        );
437
        let allocated_count = self
4✔
438
            .buffer
2✔
439
            .as_ref()
440
            .map(|ab| ab.allocated_count())
2✔
441
            .unwrap_or(0);
442
        if id.0 < allocated_count {
2✔
443
            if let Some(idx) = self
×
444
                .pending_values
×
445
                .iter()
446
                .position(|&(index, _)| index == id.0)
×
447
            {
448
                // Overwrite a previous update. This ensures we never upload more than one
449
                // update per row, which would waste GPU bandwidth.
450
                self.pending_values[idx] = (id.0, value);
×
451
            } else {
452
                self.pending_values.push((id.0, value));
×
453
            }
454
        } else {
455
            let extra_index = (id.0 - allocated_count) as usize;
2✔
456
            assert!(extra_index < self.extra_pending_values.len());
×
457
            // Overwrite a previous update. This ensures we never upload more than one
458
            // update per row, which would waste GPU bandwidth.
459
            self.extra_pending_values[extra_index] = value;
2✔
460
        }
461
    }
462

463
    /// Insert several new contiguous rows into the table.
464
    ///
465
    /// For performance reasons, this buffers the row content on the CPU until
466
    /// the next GPU update, to minimize the number of CPU to GPU transfers.
467
    ///
468
    /// # Returns
469
    ///
470
    /// Returns the index of the first entry. Other entries follow right after
471
    /// it.
472
    #[allow(dead_code)] // unused but annoying to write, so keep if we need in the future
473
    pub fn insert_contiguous(&mut self, values: impl ExactSizeIterator<Item = T>) -> BufferTableId {
5✔
474
        let count = values.len() as u32;
10✔
475
        trace!(
5✔
476
            "Inserting {} contiguous values into table buffer '{}' with {} free indices, capacity: {}, active_size: {}",
×
477
            count,
×
478
            self.safe_name(),
×
479
            self.free_indices.len(),
×
480
            self.capacity,
×
481
            self.active_count
×
482
        );
483
        let first_index = if self.free_indices.is_empty() {
15✔
484
            let index = self.active_count;
6✔
485
            if index == self.capacity {
6✔
486
                self.capacity += count;
3✔
487
            }
488
            debug_assert!(index < self.capacity);
6✔
489
            self.active_count += count;
3✔
490
            index
3✔
491
        } else {
492
            let mut s = 0;
2✔
493
            let mut n = 1;
×
494
            let mut i = 1;
×
495
            while i < self.free_indices.len() {
4✔
496
                debug_assert!(self.free_indices[i] > self.free_indices[i - 1]); // always sorted
3✔
497
                if self.free_indices[i] == self.free_indices[i - 1] + 1 {
2✔
498
                    // contiguous
499
                    n += 1;
1✔
500
                    if n == count {
1✔
501
                        break;
1✔
502
                    }
503
                } else {
504
                    // non-contiguous; restart a new sequence
505
                    debug_assert!(n < count);
×
506
                    s = i;
×
507
                }
508
                i += 1;
×
509
            }
510
            if n == count {
2✔
511
                // Found a range of 'count' consecutive entries. Consume it.
512
                let index = self.free_indices[s];
2✔
513
                self.free_indices.splice(s..=i, []);
4✔
514
                index
1✔
515
            } else {
516
                // No free range for 'count' consecutive entries. Allocate at end instead.
517
                let index = self.active_count;
1✔
518
                if index == self.capacity {
1✔
519
                    self.capacity += count;
1✔
520
                }
521
                debug_assert!(index < self.capacity);
×
522
                self.active_count += count;
1✔
523
                index
1✔
524
            }
525
        };
526
        let allocated_count = self
×
527
            .buffer
×
528
            .as_ref()
529
            .map(|ab| ab.allocated_count())
×
530
            .unwrap_or(0);
531
        trace!(
×
532
            "Found {} free indices {}..{}, capacity: {}, active_count: {}, allocated_count: {}",
×
533
            count,
×
534
            first_index,
×
535
            first_index + count,
×
536
            self.capacity,
×
537
            self.active_count,
×
538
            allocated_count
×
539
        );
540
        for (i, value) in values.enumerate() {
10✔
541
            let index = first_index + i as u32;
×
542
            if index < allocated_count {
×
543
                self.pending_values.push((index, value));
×
544
            } else {
545
                let extra_index = index - allocated_count;
10✔
546
                if extra_index < self.extra_pending_values.len() as u32 {
6✔
547
                    self.extra_pending_values[extra_index as usize] = value;
6✔
548
                } else {
549
                    self.extra_pending_values.push(value);
4✔
550
                }
551
            }
552
        }
553
        BufferTableId(first_index)
×
554
    }
555

556
    /// Remove a row from the table.
557
    #[allow(dead_code)]
558
    pub fn remove(&mut self, id: BufferTableId) {
6✔
559
        let index = id.0;
12✔
560
        assert!(index < self.active_count);
12✔
561

562
        // If this is the last item in the active zone, just shrink the active zone
563
        // (implicit free list).
564
        if index == self.active_count - 1 {
9✔
565
            self.active_count -= 1;
3✔
566
            self.capacity -= 1;
3✔
567
        } else {
568
            // This is very inefficient but we need to apply the same logic as the
569
            // EffectCache because we rely on indices being in sync.
570
            let pos = self
3✔
571
                .free_indices
×
572
                .binary_search(&index) // will fail
573
                .unwrap_or_else(|e| e); // will get position of insertion
574
            self.free_indices.insert(pos, index);
×
575
        }
576
    }
577

578
    /// Remove a range of rows from the table.
579
    #[allow(dead_code)]
580
    pub fn remove_range(&mut self, first: BufferTableId, count: u32) {
4✔
581
        let index = first.0;
8✔
582
        assert!(index + count <= self.active_count);
8✔
583

584
        // If this is the last item in the active zone, just shrink the active zone
585
        // (implicit free list).
586
        if index == self.active_count - count {
8✔
587
            self.active_count -= count;
2✔
588
            self.capacity -= count;
2✔
589

590
            // Also try to remove free indices
591
            if self.free_indices.len() as u32 == self.active_count {
3✔
592
                // Easy case: everything is free, clear everything
593
                self.free_indices.clear();
3✔
594
                self.active_count = 0;
1✔
595
                self.capacity = 0;
1✔
596
            } else {
597
                // Some rows are still allocated. Dequeue from end while we have a contiguous
598
                // tail of free indices.
599
                let mut num_popped = 0;
1✔
600
                while let Some(idx) = self.free_indices.pop() {
5✔
601
                    if idx < self.active_count - 1 - num_popped {
×
602
                        self.free_indices.push(idx);
×
603
                        break;
×
604
                    }
605
                    num_popped += 1;
×
606
                }
607
                self.active_count -= num_popped;
×
608
                self.capacity -= num_popped;
×
609
            }
610
        } else {
611
            // This is very inefficient but we need to apply the same logic as the
612
            // EffectCache because we rely on indices being in sync.
613
            let pos = self
2✔
614
                .free_indices
×
615
                .binary_search(&index) // will fail
×
616
                .unwrap_or_else(|e| e); // will get position of insertion
×
617
            self.free_indices.splice(pos..pos, index..(index + count));
×
618
        }
619

620
        debug_assert!(
4✔
621
            (self.free_indices.is_empty() && self.active_count == 0)
10✔
622
                || (self.free_indices.len() as u32) < self.active_count
3✔
623
        );
624
    }
625

626
    /// Allocate any GPU buffer if needed, based on the most recent capacity
627
    /// requested.
628
    ///
629
    /// This should be called only once per frame after all allocation requests
630
    /// have been made via [`insert()`] but before the GPU buffer is actually
631
    /// updated. This is an optimization to enable allocating the GPU buffer
632
    /// earlier than it's actually needed. Calling this multiple times is not
633
    /// supported, and might assert. Not calling it is safe, as the next
634
    /// update will call it just-in-time anyway.
635
    ///
636
    /// # Returns
637
    ///
638
    /// Returns `true` if a new buffer was (re-)allocated, to indicate any bind
639
    /// group needs to be re-created.
640
    ///
641
    /// [`insert()]`: crate::render::BufferTable::insert
642
    pub fn allocate_gpu(&mut self, device: &RenderDevice, queue: &RenderQueue) -> bool {
668✔
643
        // The allocated capacity is the capacity of the currently allocated GPU buffer,
644
        // which can be different from the expected capacity (self.capacity) for next
645
        // update.
646
        let allocated_count = self.buffer.as_ref().map(|ab| ab.count).unwrap_or(0);
3,340✔
647
        let reallocated = if self.capacity > allocated_count {
1,336✔
648
            let byte_size = self.to_byte_size(self.capacity);
24✔
649
            trace!(
6✔
UNCOV
650
                "reserve('{}'): increase capacity from {} to {} elements, old size {} bytes, new size {} bytes",
×
651
                self.safe_name(),
8✔
652
                allocated_count,
×
653
                self.capacity,
×
654
                self.to_byte_size(allocated_count),
12✔
655
                byte_size
×
656
            );
657

658
            // Create the new buffer, swapping with the old one if any
659
            let has_init_data = !self.extra_pending_values.is_empty();
12✔
660
            let new_buffer = device.create_buffer(&BufferDescriptor {
18✔
661
                label: self.label.as_ref().map(|s| &s[..]),
22✔
662
                size: byte_size as BufferAddress,
6✔
663
                usage: self.buffer_usage,
6✔
664
                mapped_at_creation: has_init_data,
6✔
665
            });
666

667
            // Use any pending data to initialize the buffer. We only use CPU-available
668
            // data, which was inserted after the buffer was (re-)allocated and
669
            // has not been uploaded to GPU yet.
670
            if has_init_data {
6✔
671
                // Leave some space to copy the old buffer if any
672
                let base_size = self.to_byte_size(allocated_count) as u64;
18✔
673
                let extra_count = self.extra_pending_values.len() as u32;
12✔
674
                let extra_size = self.to_byte_size(extra_count) as u64;
18✔
675

676
                // Scope get_mapped_range_mut() to force a drop before unmap()
677
                {
678
                    // Note: get_mapped_range_mut() requires 8-byte alignment of the start offset.
679
                    let unaligned_range = base_size..(base_size + extra_size);
18✔
680
                    let (range, byte_offset) = round_range_start_down(unaligned_range, 8);
18✔
681

682
                    let dst_slice = &mut new_buffer.slice(range).get_mapped_range_mut();
18✔
683

684
                    let base_offset = byte_offset as usize;
12✔
685
                    let byte_size = self.aligned_size; // single row
12✔
686
                    for (index, content) in self.extra_pending_values.drain(..).enumerate() {
32✔
687
                        let byte_offset = base_offset + byte_size * index;
×
688

689
                        // Copy Rust value into a GPU-ready format, including GPU padding.
690
                        let src: &[u8] = cast_slice(std::slice::from_ref(&content));
×
691
                        let dst_range = byte_offset..(byte_offset + self.item_size);
×
692
                        trace!(
×
UNCOV
693
                            "+ init_copy: index={} src={:?} dst={:?} byte_offset={} byte_size={}",
×
694
                            index,
×
695
                            src.as_ptr(),
8✔
696
                            dst_range,
×
697
                            byte_offset,
×
698
                            byte_size
×
699
                        );
700
                        let dst = &mut dst_slice[dst_range];
×
701
                        dst.copy_from_slice(src);
×
702
                    }
703
                }
704

705
                new_buffer.unmap();
12✔
706
            }
707

708
            if let Some(ab) = self.buffer.as_mut() {
7✔
709
                // If there's any data currently in the GPU buffer, we need to copy it on next
710
                // update to preserve it.
711
                if self.active_count > 0 {
×
712
                    // Current buffer has value to preserve, save it into old_buffer before
713
                    // replacing it with the newly-allocated one.
714

715
                    // By design we can't have all active entries as free ones; we should have
716
                    // updated active_count=0 and cleared the free list if that was the case.
717
                    debug_assert!(self.free_indices.len() < self.active_count as usize);
3✔
718

719
                    // If we already have an old buffer, that means we already have scheduled a copy
720
                    // to preserve some values. And we can't do that twice per frame.
721
                    assert!(
1✔
722
                        ab.old_buffer.is_none(),
2✔
723
                        "allocate_gpu() called twice before write_buffer() took effect."
×
724
                    );
725

726
                    // Swap old <-> new
727
                    let mut old_buffer = new_buffer;
2✔
728
                    std::mem::swap(&mut old_buffer, &mut ab.buffer);
3✔
729
                    ab.old_buffer = Some(old_buffer);
2✔
730
                    ab.old_count = ab.count;
1✔
731
                } else {
732
                    // Current buffer is unused, so we don't need to preserve anything.
733

734
                    // It could happen we reallocate during the frame then immediately free the rows
735
                    // to preserve, such that we don't need in the end to preserve anything.
736
                    if let Some(old_buffer) = ab.old_buffer.take() {
×
737
                        old_buffer.destroy();
×
738
                    }
739

740
                    ab.buffer.destroy();
×
741
                    ab.buffer = new_buffer;
×
742
                }
743
                ab.count = self.capacity;
1✔
744
            } else {
745
                self.buffer = Some(AllocatedBuffer {
10✔
746
                    buffer: new_buffer,
10✔
747
                    count: self.capacity,
5✔
748
                    old_buffer: None,
5✔
749
                    old_count: 0,
5✔
750
                });
751
            }
752

753
            true
6✔
754
        } else {
755
            false
662✔
756
        };
757

758
        // Immediately schedule a copy of old rows.
759
        // - For old rows, copy into the old buffer because the old-to-new buffer copy
760
        //   will be executed during a command queue while any CPU to GPU upload is
761
        //   prepended before the next command queue. To ensure things do get out of
762
        //   order with the CPU upload overwriting the GPU-to-GPU copy, make sure those
763
        //   two are disjoint.
764
        if let Some(ab) = self.buffer.as_ref() {
647✔
765
            let buffer = ab.old_buffer.as_ref().unwrap_or(&ab.buffer);
×
766
            for (index, content) in self.pending_values.drain(..) {
2✔
767
                let byte_size = self.aligned_size;
×
768
                let byte_offset = byte_size * index as usize;
×
769

770
                // Copy Rust value into a GPU-ready format, including GPU padding.
771
                // TODO - Do that in insert()!
772
                let mut aligned_buffer: Vec<u8> = vec![0; self.aligned_size];
×
773
                let src: &[u8] = cast_slice(std::slice::from_ref(&content));
×
774
                let dst_range = ..self.item_size;
×
775
                trace!(
×
776
                    "+ old_copy: index={} src={:?} dst={:?} byte_offset={} byte_size={}",
×
777
                    index,
×
778
                    src.as_ptr(),
×
779
                    dst_range,
×
780
                    byte_offset,
×
781
                    byte_size
×
782
                );
783
                let dst = &mut aligned_buffer[dst_range];
×
784
                dst.copy_from_slice(src);
×
785

786
                // Upload to GPU
787
                // TODO - Merge contiguous blocks into a single write_buffer()
788
                let bytes: &[u8] = cast_slice(&aligned_buffer);
×
789
                queue.write_buffer(buffer, byte_offset as u64, bytes);
×
790
            }
791
        } else {
792
            debug_assert!(self.pending_values.is_empty());
63✔
793
            debug_assert!(self.extra_pending_values.is_empty());
63✔
794
        }
795

796
        reallocated
668✔
797
    }
798

799
    /// Write CPU data to the GPU buffer, (re)allocating it as needed.
800
    pub fn write_buffer(&self, encoder: &mut CommandEncoder) {
667✔
801
        // Check if there's any work to do: either some pending values to upload or some
802
        // existing buffer to copy into a newly-allocated one.
803
        if self.pending_values.is_empty()
1,334✔
804
            && self
667✔
805
                .buffer
667✔
806
                .as_ref()
667✔
807
                .map(|ab| ab.old_buffer.is_none())
1,959✔
808
                .unwrap_or(true)
667✔
809
        {
810
            trace!("write_buffer({}): nothing to do", self.safe_name());
1,946✔
811
            return;
666✔
812
        }
813

814
        trace!(
×
815
            "write_buffer({}): pending_values.len={} item_size={} aligned_size={} buffer={:?}",
×
816
            self.safe_name(),
×
817
            self.pending_values.len(),
×
818
            self.item_size,
×
819
            self.aligned_size,
×
820
            self.buffer,
×
821
        );
822

823
        // If there's no more GPU buffer, there's nothing to do
824
        let Some(ab) = self.buffer.as_ref() else {
1✔
825
            return;
×
826
        };
827

828
        // Copy any old buffer into the new one, and clear the old buffer. Note that we
829
        // only clear the ref-counted reference to the buffer, not the actual buffer,
830
        // which stays alive until the copy is done (but we don't need to care about
831
        // keeping it alive, wgpu does that for us).
832
        if let Some(old_buffer) = ab.old_buffer.as_ref() {
1✔
833
            let old_size = self.to_byte_size(ab.old_count) as u64;
×
834
            trace!("Copy old buffer id {:?} of size {} bytes into newly-allocated buffer {:?} of size {} bytes.", old_buffer.id(), old_size, ab.buffer.id(), self.to_byte_size(ab.count));
×
835
            encoder.copy_buffer_to_buffer(old_buffer, 0, &ab.buffer, 0, old_size);
×
836
        }
837
    }
838
}
839

840
#[cfg(test)]
841
mod tests {
842
    use bevy::math::Vec3;
843
    use bytemuck::{Pod, Zeroable};
844

845
    use super::*;
846

847
    #[test]
848
    fn test_round_range_start_down() {
849
        // r8(0..7) : no-op
850
        {
851
            let (r, o) = round_range_start_down(0..7, 8);
852
            assert_eq!(r, 0..7);
853
            assert_eq!(o, 0);
854
        }
855

856
        // r8(2..7) = 0..7, +2
857
        {
858
            let (r, o) = round_range_start_down(2..7, 8);
859
            assert_eq!(r, 0..7);
860
            assert_eq!(o, 2);
861
        }
862

863
        // r8(7..32) = 0..32, +7
864
        {
865
            let (r, o) = round_range_start_down(7..32, 8);
866
            assert_eq!(r, 0..32);
867
            assert_eq!(o, 7);
868
        }
869

870
        // r8(8..32) = no-op
871
        {
872
            let (r, o) = round_range_start_down(8..32, 8);
873
            assert_eq!(r, 8..32);
874
            assert_eq!(o, 0);
875
        }
876
    }
877

878
    #[repr(C)]
879
    #[derive(Debug, Default, Clone, Copy, Pod, Zeroable, ShaderType)]
880
    pub(crate) struct GpuDummy {
881
        pub v: Vec3,
882
    }
883

884
    #[repr(C)]
885
    #[derive(Debug, Default, Clone, Copy, Pod, Zeroable, ShaderType)]
886
    pub(crate) struct GpuDummyComposed {
887
        pub simple: GpuDummy,
888
        pub tag: u32,
889
        // GPU padding to 16 bytes due to GpuDummy forcing align to 16 bytes
890
    }
891

892
    #[repr(C)]
893
    #[derive(Debug, Clone, Copy, Pod, Zeroable, ShaderType)]
894
    pub(crate) struct GpuDummyLarge {
895
        pub simple: GpuDummy,
896
        pub tag: u32,
897
        pub large: [f32; 128],
898
    }
899

900
    #[test]
901
    fn buffer_table_sizes() {
902
        // Rust
903
        assert_eq!(std::mem::size_of::<GpuDummy>(), 12);
904
        assert_eq!(std::mem::align_of::<GpuDummy>(), 4);
905
        assert_eq!(std::mem::size_of::<GpuDummyComposed>(), 16); // tight packing
906
        assert_eq!(std::mem::align_of::<GpuDummyComposed>(), 4);
907
        assert_eq!(std::mem::size_of::<GpuDummyLarge>(), 132 * 4); // tight packing
908
        assert_eq!(std::mem::align_of::<GpuDummyLarge>(), 4);
909

910
        // GPU
911
        assert_eq!(<GpuDummy as ShaderType>::min_size().get(), 16); // Vec3 gets padded to 16 bytes
912
        assert_eq!(<GpuDummy as ShaderSize>::SHADER_SIZE.get(), 16);
913
        assert_eq!(<GpuDummyComposed as ShaderType>::min_size().get(), 32); // align is 16 bytes, forces padding
914
        assert_eq!(<GpuDummyComposed as ShaderSize>::SHADER_SIZE.get(), 32);
915
        assert_eq!(<GpuDummyLarge as ShaderType>::min_size().get(), 544); // align is 16 bytes, forces padding
916
        assert_eq!(<GpuDummyLarge as ShaderSize>::SHADER_SIZE.get(), 544);
917

918
        for (item_align, expected_aligned_size) in [
919
            (0, 16),
920
            (4, 16),
921
            (8, 16),
922
            (16, 16),
923
            (32, 32),
924
            (256, 256),
925
            (512, 512),
926
        ] {
927
            let mut table = BufferTable::<GpuDummy>::new(
928
                BufferUsages::STORAGE,
929
                NonZeroU64::new(item_align),
930
                None,
931
            );
932
            assert_eq!(table.aligned_size(), expected_aligned_size);
933
            assert!(table.is_empty());
934
            table.insert(GpuDummy::default());
935
            assert!(!table.is_empty());
936
            assert_eq!(table.len(), 1);
937
        }
938

939
        for (item_align, expected_aligned_size) in [
940
            (0, 32),
941
            (4, 32),
942
            (8, 32),
943
            (16, 32),
944
            (32, 32),
945
            (256, 256),
946
            (512, 512),
947
        ] {
948
            let mut table = BufferTable::<GpuDummyComposed>::new(
949
                BufferUsages::STORAGE,
950
                NonZeroU64::new(item_align),
951
                None,
952
            );
953
            assert_eq!(table.aligned_size(), expected_aligned_size);
954
            assert!(table.is_empty());
955
            table.insert(GpuDummyComposed::default());
956
            assert!(!table.is_empty());
957
            assert_eq!(table.len(), 1);
958
        }
959

960
        for (item_align, expected_aligned_size) in [
961
            (0, 544),
962
            (4, 544),
963
            (8, 544),
964
            (16, 544),
965
            (32, 544),
966
            (256, 768),
967
            (512, 1024),
968
        ] {
969
            let mut table = BufferTable::<GpuDummyLarge>::new(
970
                BufferUsages::STORAGE,
971
                NonZeroU64::new(item_align),
972
                None,
973
            );
974
            assert_eq!(table.aligned_size(), expected_aligned_size);
975
            assert!(table.is_empty());
976
            table.insert(GpuDummyLarge {
977
                simple: Default::default(),
978
                tag: 0,
979
                large: [0.; 128],
980
            });
981
            assert!(!table.is_empty());
982
            assert_eq!(table.len(), 1);
983
        }
984
    }
985

986
    #[test]
987
    fn buffer_table_insert() {
988
        let mut table =
989
            BufferTable::<GpuDummy>::new(BufferUsages::STORAGE, NonZeroU64::new(32), None);
990

991
        // [x]
992
        let id1 = table.insert(GpuDummy::default());
993
        assert_eq!(id1.0, 0);
994
        assert_eq!(table.active_count, 1);
995
        assert!(table.free_indices.is_empty());
996

997
        // [x x]
998
        let id2 = table.insert(GpuDummy::default());
999
        assert_eq!(id2.0, 1);
1000
        assert_eq!(table.active_count, 2);
1001
        assert!(table.free_indices.is_empty());
1002

1003
        // [- x]
1004
        table.remove(id1);
1005
        assert_eq!(table.active_count, 2);
1006
        assert_eq!(table.free_indices.len(), 1);
1007
        assert_eq!(table.free_indices[0], 0);
1008

1009
        // [- x x x]
1010
        let id3 = table.insert_contiguous([GpuDummy::default(); 2].into_iter());
1011
        assert_eq!(id3.0, 2); // at the end (doesn't fit in free slot #0)
1012
        assert_eq!(table.active_count, 4);
1013
        assert_eq!(table.free_indices.len(), 1);
1014
        assert_eq!(table.free_indices[0], 0);
1015

1016
        // [- - x x]
1017
        table.remove(id2);
1018
        assert_eq!(table.active_count, 4);
1019
        assert_eq!(table.free_indices.len(), 2);
1020
        assert_eq!(table.free_indices[0], 0);
1021
        assert_eq!(table.free_indices[1], 1);
1022

1023
        // [x x x x]
1024
        let id4 = table.insert_contiguous([GpuDummy::default(); 2].into_iter());
1025
        assert_eq!(id4.0, 0); // this times it fit into slot #0-#1
1026
        assert_eq!(table.active_count, 4);
1027
        assert!(table.free_indices.is_empty());
1028

1029
        // [- - x x]
1030
        table.remove_range(id4, 2);
1031
        assert_eq!(table.active_count, 4);
1032
        assert_eq!(table.free_indices.len(), 2);
1033
        assert_eq!(table.free_indices[0], 0);
1034
        assert_eq!(table.free_indices[1], 1);
1035

1036
        // []
1037
        table.remove_range(id3, 2);
1038
        assert_eq!(table.active_count, 0);
1039
        assert!(table.free_indices.is_empty());
1040

1041
        // [x x]
1042
        let id5 = table.insert_contiguous([GpuDummy::default(); 2].into_iter());
1043
        assert_eq!(id5.0, 0);
1044
        assert_eq!(table.active_count, 2);
1045
        assert!(table.free_indices.is_empty());
1046

1047
        // [x x x x]
1048
        let id6 = table.insert_contiguous([GpuDummy::default(); 2].into_iter());
1049
        assert_eq!(id6.0, 2);
1050
        assert_eq!(table.active_count, 4);
1051
        assert!(table.free_indices.is_empty());
1052

1053
        // [x x x x x x]
1054
        let id7 = table.insert_contiguous([GpuDummy::default(); 2].into_iter());
1055
        assert_eq!(id7.0, 4);
1056
        assert_eq!(table.active_count, 6);
1057
        assert!(table.free_indices.is_empty());
1058

1059
        // [x x - - x x]
1060
        table.remove_range(id6, 2);
1061
        assert_eq!(table.active_count, 6);
1062
        assert_eq!(table.free_indices.len(), 2);
1063
        assert_eq!(table.free_indices[0], 2);
1064
        assert_eq!(table.free_indices[1], 3);
1065

1066
        // [x x]
1067
        table.remove_range(id7, 2);
1068
        assert_eq!(table.active_count, 2);
1069
        assert!(table.free_indices.is_empty());
1070
    }
1071
}
1072

1073
#[cfg(all(test, feature = "gpu_tests"))]
1074
mod gpu_tests {
1075
    use std::fmt::Write;
1076

1077
    use bevy::render::render_resource::BufferSlice;
1078
    use tests::*;
1079
    use wgpu::{BufferView, CommandBuffer};
1080

1081
    use super::*;
1082
    use crate::test_utils::MockRenderer;
1083

1084
    /// Read data from GPU back into CPU memory.
1085
    ///
1086
    /// This call blocks until the data is available on CPU. Used for testing
1087
    /// only.
1088
    fn read_back_gpu(device: &RenderDevice, slice: BufferSlice<'_>) -> BufferView {
6✔
1089
        let (tx, rx) = futures::channel::oneshot::channel();
18✔
1090
        slice.map_async(wgpu::MapMode::Read, move |result| {
24✔
1091
            tx.send(result).unwrap();
24✔
1092
        });
1093
        let _ = device.poll(wgpu::PollType::Wait {
18✔
1094
            submission_index: None,
6✔
1095
            timeout: None,
6✔
1096
        });
1097
        let result = futures::executor::block_on(rx);
18✔
1098
        assert!(result.is_ok());
18✔
1099
        slice.get_mapped_range()
6✔
1100
    }
1101

1102
    /// Submit a command buffer to GPU and wait for completion.
1103
    ///
1104
    /// This call blocks until the GPU executed the command buffer. Used for
1105
    /// testing only.
1106
    fn submit_gpu_and_wait(
7✔
1107
        device: &RenderDevice,
1108
        queue: &RenderQueue,
1109
        command_buffer: CommandBuffer,
1110
    ) {
1111
        // Queue command
1112
        queue.submit([command_buffer]);
14✔
1113

1114
        // Register callback to observe completion
1115
        let (tx, rx) = futures::channel::oneshot::channel();
21✔
1116
        queue.on_submitted_work_done(move || {
21✔
1117
            tx.send(()).unwrap();
28✔
1118
        });
1119

1120
        // Poll device, checking for completion and raising callback
1121
        let _ = device.poll(wgpu::PollType::Wait {
21✔
1122
            submission_index: None,
7✔
1123
            timeout: None,
7✔
1124
        });
1125

1126
        // Wait for callback to be raised. This was need in previous versions, however
1127
        // it's a bit unclear if it's still needed or if device.poll() is enough to
1128
        // guarantee that the command was executed.
1129
        let _ = futures::executor::block_on(rx);
7✔
1130
    }
1131

1132
    /// Convert a byte slice to a string of hexadecimal values separated by a
1133
    /// blank space.
1134
    fn to_hex_string(slice: &[u8]) -> String {
19✔
1135
        let len = slice.len();
57✔
1136
        let num_chars = len * 3 - 1;
38✔
1137
        let mut s = String::with_capacity(num_chars);
57✔
1138
        for b in &slice[..len - 1] {
304✔
1139
            write!(&mut s, "{:02x} ", *b).unwrap();
1140
        }
1141
        write!(&mut s, "{:02x}", slice[len - 1]).unwrap();
76✔
1142
        debug_assert_eq!(s.len(), num_chars);
57✔
1143
        s
19✔
1144
    }
1145

1146
    fn write_buffers_and_wait<T: Pod + ShaderSize>(
7✔
1147
        table: &BufferTable<T>,
1148
        device: &RenderDevice,
1149
        queue: &RenderQueue,
1150
    ) {
1151
        let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
28✔
1152
            label: Some("test"),
7✔
1153
        });
1154
        table.write_buffer(&mut encoder);
21✔
1155
        let command_buffer = encoder.finish();
21✔
1156
        submit_gpu_and_wait(device, queue, command_buffer);
28✔
1157
        println!("Buffer written to GPU");
7✔
1158
    }
1159

1160
    #[test]
1161
    fn table_write() {
1162
        let renderer = MockRenderer::new();
1163
        let device = renderer.device();
1164
        let queue = renderer.queue();
1165

1166
        let item_align = device.limits().min_storage_buffer_offset_alignment as u64;
1167
        println!("min_storage_buffer_offset_alignment = {item_align}");
1168
        let mut table = BufferTable::<GpuDummyComposed>::new(
1169
            BufferUsages::STORAGE | BufferUsages::MAP_READ,
1170
            NonZeroU64::new(item_align),
1171
            None,
1172
        );
1173
        let final_align = item_align.max(<GpuDummyComposed as ShaderSize>::SHADER_SIZE.get());
1174
        assert_eq!(table.aligned_size(), final_align as usize);
1175

1176
        // Initial state
1177
        assert!(table.is_empty());
1178
        assert_eq!(table.len(), 0);
1179
        assert_eq!(table.capacity(), 0);
1180
        assert!(table.buffer.is_none());
1181

1182
        // This has no effect while the table is empty
1183
        table.clear_previous_frame_resizes();
1184
        table.allocate_gpu(&device, &queue);
1185
        write_buffers_and_wait(&table, &device, &queue);
1186
        assert!(table.is_empty());
1187
        assert_eq!(table.len(), 0);
1188
        assert_eq!(table.capacity(), 0);
1189
        assert!(table.buffer.is_none());
1190

1191
        // New frame
1192
        table.clear_previous_frame_resizes();
1193

1194
        // Insert some entries
1195
        let len = 3;
1196
        for i in 0..len {
1197
            let row = table.insert(GpuDummyComposed {
1198
                tag: i + 1,
1199
                ..Default::default()
1200
            });
1201
            assert_eq!(row.0, i);
1202
        }
1203
        assert!(!table.is_empty());
1204
        assert_eq!(table.len(), len);
1205
        assert!(table.capacity() >= len); // contract: could over-allocate...
1206
        assert!(table.buffer.is_none()); // not yet allocated on GPU
1207

1208
        // Allocate GPU buffer for current requested state
1209
        table.allocate_gpu(&device, &queue);
1210
        assert!(!table.is_empty());
1211
        assert_eq!(table.len(), len);
1212
        assert!(table.capacity() >= len);
1213
        let ab = table
1214
            .buffer
1215
            .as_ref()
1216
            .expect("GPU buffer should be allocated after allocate_gpu()");
1217
        assert!(ab.old_buffer.is_none()); // no previous copy
1218
        assert_eq!(ab.count, len);
1219
        println!(
1220
            "Allocated buffer #{:?} of {} rows",
1221
            ab.buffer.id(),
1222
            ab.count
1223
        );
1224
        let ab_buffer = ab.buffer.clone();
1225

1226
        // Another allocate_gpu() is a no-op
1227
        table.allocate_gpu(&device, &queue);
1228
        assert!(!table.is_empty());
1229
        assert_eq!(table.len(), len);
1230
        assert!(table.capacity() >= len);
1231
        let ab = table
1232
            .buffer
1233
            .as_ref()
1234
            .expect("GPU buffer should be allocated after allocate_gpu()");
1235
        assert!(ab.old_buffer.is_none()); // no previous copy
1236
        assert_eq!(ab.count, len);
1237
        assert_eq!(ab_buffer.id(), ab.buffer.id()); // same buffer
1238

1239
        // Write buffer (CPU -> GPU)
1240
        write_buffers_and_wait(&table, &device, &queue);
1241

1242
        {
1243
            // Read back (GPU -> CPU)
1244
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
1245
            {
1246
                let slice = buffer.slice(..);
1247
                let view = read_back_gpu(&device, slice);
1248
                println!(
1249
                    "GPU data read back to CPU for validation: {} bytes",
1250
                    view.len()
1251
                );
1252

1253
                // Validate content
1254
                assert_eq!(view.len(), final_align as usize * table.capacity() as usize);
1255
                for i in 0..len as usize {
1256
                    let offset = i * final_align as usize;
1257
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
1258
                    let src = &view[offset..offset + 16];
1259
                    println!("{}", to_hex_string(src));
1260
                    let dummy_composed: &[GpuDummyComposed] =
1261
                        cast_slice(&view[offset..offset + item_size]);
1262
                    assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
1263
                }
1264
            }
1265
            buffer.unmap();
1266
        }
1267

1268
        // New frame
1269
        table.clear_previous_frame_resizes();
1270

1271
        // Insert more entries
1272
        let old_capacity = table.capacity();
1273
        let mut len = len;
1274
        while table.capacity() == old_capacity {
1275
            let row = table.insert(GpuDummyComposed {
1276
                tag: len + 1,
1277
                ..Default::default()
1278
            });
1279
            assert_eq!(row.0, len);
1280
            len += 1;
1281
        }
1282
        println!(
1283
            "Added {} rows to grow capacity from {} to {}.",
1284
            len - 3,
1285
            old_capacity,
1286
            table.capacity()
1287
        );
1288

1289
        // This re-allocates a new GPU buffer because the capacity changed
1290
        table.allocate_gpu(&device, &queue);
1291
        assert!(!table.is_empty());
1292
        assert_eq!(table.len(), len);
1293
        assert!(table.capacity() >= len);
1294
        let ab = table
1295
            .buffer
1296
            .as_ref()
1297
            .expect("GPU buffer should be allocated after allocate_gpu()");
1298
        assert_eq!(ab.count, len);
1299
        assert!(ab.old_buffer.is_some()); // old buffer to copy
1300
        assert_ne!(ab.old_buffer.as_ref().unwrap().id(), ab.buffer.id());
1301
        println!(
1302
            "Allocated new buffer #{:?} of {} rows",
1303
            ab.buffer.id(),
1304
            ab.count
1305
        );
1306

1307
        // Write buffer (CPU -> GPU)
1308
        write_buffers_and_wait(&table, &device, &queue);
1309

1310
        {
1311
            // Read back (GPU -> CPU)
1312
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
1313
            {
1314
                let slice = buffer.slice(..);
1315
                let view = read_back_gpu(&device, slice);
1316
                println!(
1317
                    "GPU data read back to CPU for validation: {} bytes",
1318
                    view.len()
1319
                );
1320

1321
                // Validate content
1322
                assert_eq!(view.len(), final_align as usize * table.capacity() as usize);
1323
                for i in 0..len as usize {
1324
                    let offset = i * final_align as usize;
1325
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
1326
                    let src = &view[offset..offset + 16];
1327
                    println!("{}", to_hex_string(src));
1328
                    let dummy_composed: &[GpuDummyComposed] =
1329
                        cast_slice(&view[offset..offset + item_size]);
1330
                    assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
1331
                }
1332
            }
1333
            buffer.unmap();
1334
        }
1335

1336
        // New frame
1337
        table.clear_previous_frame_resizes();
1338

1339
        // Delete the last allocated row
1340
        let old_capacity = table.capacity();
1341
        let len = len - 1;
1342
        table.remove(BufferTableId(len));
1343
        println!(
1344
            "Removed last row to shrink capacity from {} to {}.",
1345
            old_capacity,
1346
            table.capacity()
1347
        );
1348

1349
        // This doesn't do anything since we removed a row only
1350
        table.allocate_gpu(&device, &queue);
1351
        assert!(!table.is_empty());
1352
        assert_eq!(table.len(), len);
1353
        assert!(table.capacity() >= len);
1354
        let ab = table
1355
            .buffer
1356
            .as_ref()
1357
            .expect("GPU buffer should be allocated after allocate_gpu()");
1358
        assert_eq!(ab.count, len + 1); // GPU buffer kept its size
1359
        assert!(ab.old_buffer.is_none());
1360

1361
        // Write buffer (CPU -> GPU)
1362
        write_buffers_and_wait(&table, &device, &queue);
1363

1364
        {
1365
            // Read back (GPU -> CPU)
1366
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
1367
            {
1368
                let slice = buffer.slice(..);
1369
                let view = read_back_gpu(&device, slice);
1370
                println!(
1371
                    "GPU data read back to CPU for validation: {} bytes",
1372
                    view.len()
1373
                );
1374

1375
                // Validate content
1376
                assert!(view.len() >= final_align as usize * table.capacity() as usize); // note the >=, the buffer is over-allocated
1377
                for i in 0..len as usize {
1378
                    let offset = i * final_align as usize;
1379
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
1380
                    let src = &view[offset..offset + 16];
1381
                    println!("{}", to_hex_string(src));
1382
                    let dummy_composed: &[GpuDummyComposed] =
1383
                        cast_slice(&view[offset..offset + item_size]);
1384
                    assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
1385
                }
1386
            }
1387
            buffer.unmap();
1388
        }
1389

1390
        // New frame
1391
        table.clear_previous_frame_resizes();
1392

1393
        // Delete the first allocated row
1394
        let old_capacity = table.capacity();
1395
        let mut len = len - 1;
1396
        table.remove(BufferTableId(0));
1397
        assert_eq!(old_capacity, table.capacity());
1398
        println!(
1399
            "Removed first row to shrink capacity from {} to {} (no change).",
1400
            old_capacity,
1401
            table.capacity()
1402
        );
1403

1404
        // This doesn't do anything since we only removed a row
1405
        table.allocate_gpu(&device, &queue);
1406
        assert!(!table.is_empty());
1407
        assert_eq!(table.len(), len);
1408
        assert!(table.capacity() >= len);
1409
        let ab = table
1410
            .buffer
1411
            .as_ref()
1412
            .expect("GPU buffer should be allocated after allocate_gpu()");
1413
        assert_eq!(ab.count, len + 2); // GPU buffer kept its size
1414
        assert!(ab.old_buffer.is_none());
1415

1416
        // Write buffer (CPU -> GPU)
1417
        write_buffers_and_wait(&table, &device, &queue);
1418

1419
        {
1420
            // Read back (GPU -> CPU)
1421
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
1422
            {
1423
                let slice = buffer.slice(..);
1424
                let view = read_back_gpu(&device, slice);
1425
                println!(
1426
                    "GPU data read back to CPU for validation: {} bytes",
1427
                    view.len()
1428
                );
1429

1430
                // Validate content
1431
                assert!(view.len() >= final_align as usize * table.capacity() as usize); // note the >=, the buffer is over-allocated
1432
                for i in 0..len as usize {
1433
                    let offset = i * final_align as usize;
1434
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
1435
                    let src = &view[offset..offset + 16];
1436
                    println!("{}", to_hex_string(src));
1437
                    if i > 0 {
1438
                        let dummy_composed: &[GpuDummyComposed] =
1439
                            cast_slice(&view[offset..offset + item_size]);
1440
                        assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
1441
                    }
1442
                }
1443
            }
1444
            buffer.unmap();
1445
        }
1446

1447
        // New frame
1448
        table.clear_previous_frame_resizes();
1449

1450
        // Insert a row; this should get into row #0 in the buffer
1451
        let row = table.insert(GpuDummyComposed {
1452
            tag: 1,
1453
            ..Default::default()
1454
        });
1455
        assert_eq!(row.0, 0);
1456
        len += 1;
1457
        println!(
1458
            "Added 1 row to grow capacity from {} to {}.",
1459
            old_capacity,
1460
            table.capacity()
1461
        );
1462

1463
        // This doesn't reallocate the GPU buffer since we used a free list entry
1464
        table.allocate_gpu(&device, &queue);
1465
        assert!(!table.is_empty());
1466
        assert_eq!(table.len(), len);
1467
        assert!(table.capacity() >= len);
1468
        let ab = table
1469
            .buffer
1470
            .as_ref()
1471
            .expect("GPU buffer should be allocated after allocate_gpu()");
1472
        assert_eq!(ab.count, 4); // 4 == last time we grew
1473
        assert!(ab.old_buffer.is_none());
1474

1475
        // Write buffer (CPU -> GPU)
1476
        write_buffers_and_wait(&table, &device, &queue);
1477

1478
        {
1479
            // Read back (GPU -> CPU)
1480
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
1481
            {
1482
                let slice = buffer.slice(..);
1483
                let view = read_back_gpu(&device, slice);
1484
                println!(
1485
                    "GPU data read back to CPU for validation: {} bytes",
1486
                    view.len()
1487
                );
1488

1489
                // Validate content
1490
                assert!(view.len() >= final_align as usize * table.capacity() as usize);
1491
                for i in 0..len as usize {
1492
                    let offset = i * final_align as usize;
1493
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
1494
                    let src = &view[offset..offset + 16];
1495
                    println!("{}", to_hex_string(src));
1496
                    let dummy_composed: &[GpuDummyComposed] =
1497
                        cast_slice(&view[offset..offset + item_size]);
1498
                    assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
1499
                }
1500
            }
1501
            buffer.unmap();
1502
        }
1503

1504
        // New frame
1505
        table.clear_previous_frame_resizes();
1506

1507
        // Insert a row; this should get into row #3 at the end of the allocated buffer
1508
        let row = table.insert(GpuDummyComposed {
1509
            tag: 4,
1510
            ..Default::default()
1511
        });
1512
        assert_eq!(row.0, 3);
1513
        len += 1;
1514
        println!(
1515
            "Added 1 row to grow capacity from {} to {}.",
1516
            old_capacity,
1517
            table.capacity()
1518
        );
1519

1520
        // This doesn't reallocate the GPU buffer since we used an implicit free entry
1521
        table.allocate_gpu(&device, &queue);
1522
        assert!(!table.is_empty());
1523
        assert_eq!(table.len(), len);
1524
        assert!(table.capacity() >= len);
1525
        let ab = table
1526
            .buffer
1527
            .as_ref()
1528
            .expect("GPU buffer should be allocated after allocate_gpu()");
1529
        assert_eq!(ab.count, 4); // 4 == last time we grew
1530
        assert!(ab.old_buffer.is_none());
1531

1532
        // Write buffer (CPU -> GPU)
1533
        write_buffers_and_wait(&table, &device, &queue);
1534

1535
        {
1536
            // Read back (GPU -> CPU)
1537
            let buffer = table.buffer().expect("Buffer was not allocated").clone(); // clone() for lifetime
1538
            {
1539
                let slice = buffer.slice(..);
1540
                let view = read_back_gpu(&device, slice);
1541
                println!(
1542
                    "GPU data read back to CPU for validation: {} bytes",
1543
                    view.len()
1544
                );
1545

1546
                // Validate content
1547
                assert!(view.len() >= final_align as usize * table.capacity() as usize);
1548
                for i in 0..len as usize {
1549
                    let offset = i * final_align as usize;
1550
                    let item_size = std::mem::size_of::<GpuDummyComposed>();
1551
                    let src = &view[offset..offset + 16];
1552
                    println!("{}", to_hex_string(src));
1553
                    let dummy_composed: &[GpuDummyComposed] =
1554
                        cast_slice(&view[offset..offset + item_size]);
1555
                    assert_eq!(dummy_composed[0].tag, (i + 1) as u32);
1556
                }
1557
            }
1558
            buffer.unmap();
1559
        }
1560
    }
1561
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc