18243240671

Committed 04 Oct 2025 10:36AM UTC coverage: 66.58% (+0.1%) from 66.455%

Build # 18243240671

Build Type

push

github

Committed by

web-flow

Commit Message

Split extraction and render into unit systems (#499)

Reorganize most of the extraction and render systems into smaller,
unit-like systems with limited (ideally, a single) responsibility. Split
most of the data into separate, smaller components too. This not only
enable better multithreading, but also greatly simplify maintenance by
clarifying the logic and responsibility of each system and component.

As part of this change, add a "ready state" to the effect, which is read
back from the render world and informs the main world about whether an
effect is ready for simulation and rendering. This includes:

- All GPU resources being allocated, and in particular the PSOs
  (pipelines) which in Bevy are compiled asynchronously and can be very
  slow (many frames of delay).
- The ready state of all descendant effects, recursively. This ensures a
  child is ready to _e.g._ receive GPU spawn events before its parent,
  which emits those events, starts simulating.

This new ready state is accessed via
`CompiledParticleEffect::is_ready()`. Note that the state is updated
during the extract phase with the information collected from the
previous render frame, so by the time `is_ready()` returns `true`,
already one frame of simulation and rendering generally occurred.

Remove the outdated `copyless` dependency.

Run Details

594 of 896 new or added lines in 12 files covered. (66.29%)

21 existing lines in 3 files now uncovered.

5116 of 7684 relevant lines covered (66.58%)

416.91 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

63.39

/src/render/mod.rs

use std::{
    borrow::Cow,
    hash::{DefaultHasher, Hash, Hasher},
    marker::PhantomData,
    num::{NonZeroU32, NonZeroU64},
    ops::{Deref, DerefMut, Range},
    time::Duration,
    vec,
};

#[cfg(feature = "2d")]
use bevy::core_pipeline::core_2d::{Transparent2d, CORE_2D_DEPTH_FORMAT};
#[cfg(feature = "2d")]
use bevy::math::FloatOrd;
#[cfg(feature = "3d")]
use bevy::{
    core_pipeline::{
        core_3d::{
            AlphaMask3d, Opaque3d, Opaque3dBatchSetKey, Opaque3dBinKey, Transparent3d,
            CORE_3D_DEPTH_FORMAT,
        },
        prepass::{OpaqueNoLightmap3dBatchSetKey, OpaqueNoLightmap3dBinKey},
    },
    render::render_phase::{BinnedPhaseItem, ViewBinnedRenderPhases},
};
use bevy::{
    ecs::{
        component::Tick,
        prelude::*,
        system::{lifetimeless::*, SystemParam, SystemState},
    },
    log::trace,
    platform::collections::HashMap,
    prelude::*,
    render::{
        mesh::{
            allocator::MeshAllocator, MeshVertexBufferLayoutRef, RenderMesh, RenderMeshBufferInfo,
        },
        render_asset::RenderAssets,
        render_graph::{Node, NodeRunError, RenderGraphContext, SlotInfo},
        render_phase::{
            Draw, DrawError, DrawFunctions, PhaseItemExtraIndex, SortedPhaseItem,
            TrackedRenderPass, ViewSortedRenderPhases,
        },
        render_resource::*,
        renderer::{RenderContext, RenderDevice, RenderQueue},
        sync_world::{MainEntity, RenderEntity, TemporaryRenderEntity},
        texture::GpuImage,
        view::{
            ExtractedView, RenderVisibleEntities, ViewTarget, ViewUniform, ViewUniformOffset,
            ViewUniforms,
        },
        Extract, MainWorld,
    },
};
use bitflags::bitflags;
use bytemuck::{Pod, Zeroable};
use effect_cache::{CachedEffect, EffectSlice, SlabState};
use event::{CachedChildInfo, CachedEffectEvents, CachedParentInfo, GpuChildInfo};
use fixedbitset::FixedBitSet;
use gpu_buffer::GpuBuffer;
use naga_oil::compose::{Composer, NagaModuleDescriptor};

use crate::{
    asset::{DefaultMesh, EffectAsset},
    calc_func_id,
    render::{
        batch::{BatchInput, EffectDrawBatch, EffectSorter, InitAndUpdatePipelineIds},
        effect_cache::{
            AnyDrawIndirectArgs, CachedDrawIndirectArgs, DispatchBufferIndices, SlabId,
        },
    },
    AlphaMode, Attribute, CompiledParticleEffect, EffectProperties, EffectShader, EffectSimulation,
    EffectSpawner, EffectVisibilityClass, ParticleLayout, PropertyLayout, SimulationCondition,
    TextureLayout,
};

mod aligned_buffer_vec;
mod batch;
mod buffer_table;
mod effect_cache;
mod event;
mod gpu_buffer;
mod property;
mod shader_cache;
mod sort;

use aligned_buffer_vec::AlignedBufferVec;
use batch::BatchSpawnInfo;
pub(crate) use batch::SortedEffectBatches;
use buffer_table::{BufferTable, BufferTableId};
pub(crate) use effect_cache::EffectCache;
pub(crate) use event::{allocate_events, on_remove_cached_effect_events, EventCache};
pub(crate) use property::{
    allocate_properties, on_remove_cached_properties, prepare_property_buffers, PropertyBindGroups,
    PropertyCache,
};
use property::{CachedEffectProperties, PropertyBindGroupKey};
pub use shader_cache::ShaderCache;
pub(crate) use sort::SortBindGroups;

use self::batch::EffectBatch;

// Size of an indirect index (including both parts of the ping-pong buffer) in
// bytes.
const INDIRECT_INDEX_SIZE: u32 = 12;

/// Helper to calculate a hash of a given hashable value.
fn calc_hash<H: Hash>(value: &H) -> u64 {
    let mut hasher = DefaultHasher::default();
    value.hash(&mut hasher);
    hasher.finish()
}

/// Source data (buffer and range inside the buffer) to create a buffer binding.
#[derive(Debug, Clone)]
pub(crate) struct BufferBindingSource {
    buffer: Buffer,
    offset: u32,
    size: NonZeroU32,
}

impl BufferBindingSource {
    /// Get a binding over the source data.
    pub fn as_binding(&self) -> BindingResource<'_> {
        BindingResource::Buffer(BufferBinding {
            buffer: &self.buffer,
            offset: self.offset as u64 * 4,
            size: Some(self.size.into()),
        })
    }
}

impl PartialEq for BufferBindingSource {
    fn eq(&self, other: &Self) -> bool {
        self.buffer.id() == other.buffer.id()
            && self.offset == other.offset
            && self.size == other.size
    }
}

impl<'a> From<&'a BufferBindingSource> for BufferBinding<'a> {
    fn from(value: &'a BufferBindingSource) -> Self {
        BufferBinding {
            buffer: &value.buffer,
            offset: value.offset as u64,
            size: Some(value.size.into()),
        }
    }
}

/// Simulation parameters, available to all shaders of all effects.
#[derive(Debug, Default, Clone, Copy, Resource)]
pub(crate) struct SimParams {
    /// Current effect system simulation time since startup, in seconds.
    /// This is based on the [`Time<EffectSimulation>`](EffectSimulation) clock.
    time: f64,
    /// Delta time, in seconds, since last effect system update.
    delta_time: f32,

    /// Current virtual time since startup, in seconds.
    /// This is based on the [`Time<Virtual>`](Virtual) clock.
    virtual_time: f64,
    /// Virtual delta time, in seconds, since last effect system update.
    virtual_delta_time: f32,

    /// Current real time since startup, in seconds.
    /// This is based on the [`Time<Real>`](Real) clock.
    real_time: f64,
    /// Real delta time, in seconds, since last effect system update.
    real_delta_time: f32,
}

/// GPU representation of [`SimParams`], as well as additional per-frame
/// effect-independent values.
#[repr(C)]
#[derive(Debug, Copy, Clone, Pod, Zeroable, ShaderType)]
struct GpuSimParams {
    /// Delta time, in seconds, since last effect system update.
    delta_time: f32,
    /// Current effect system simulation time since startup, in seconds.
    ///
    /// This is a lower-precision variant of [`SimParams::time`].
    time: f32,
    /// Virtual delta time, in seconds, since last effect system update.
    virtual_delta_time: f32,
    /// Current virtual time since startup, in seconds.
    ///
    /// This is a lower-precision variant of [`SimParams::time`].
    virtual_time: f32,
    /// Real delta time, in seconds, since last effect system update.
    real_delta_time: f32,
    /// Current real time since startup, in seconds.
    ///
    /// This is a lower-precision variant of [`SimParams::time`].
    real_time: f32,
    /// Total number of effects to update this frame. Used by the indirect
    /// compute pipeline to cap the compute thread to the actual number of
    /// effects to process.
    ///
    /// This is only used by the `vfx_indirect` compute shader.
    num_effects: u32,
}

impl Default for GpuSimParams {
    fn default() -> Self {
        Self {
            delta_time: 0.04,
            time: 0.0,
            virtual_delta_time: 0.04,
            virtual_time: 0.0,
            real_delta_time: 0.04,
            real_time: 0.0,
            num_effects: 0,
        }
    }
}

impl From<SimParams> for GpuSimParams {
    #[inline]
    fn from(src: SimParams) -> Self {
        Self::from(&src)
    }
}

impl From<&SimParams> for GpuSimParams {
    fn from(src: &SimParams) -> Self {
        Self {
            delta_time: src.delta_time,
            time: src.time as f32,
            virtual_delta_time: src.virtual_delta_time,
            virtual_time: src.virtual_time as f32,
            real_delta_time: src.real_delta_time,
            real_time: src.real_time as f32,
            ..default()
        }
    }
}

/// Compressed representation of a transform for GPU transfer.
///
/// The transform is stored as the three first rows of a transposed [`Mat4`],
/// assuming the last row is the unit row [`Vec4::W`]. The transposing ensures
/// that the three values are [`Vec4`] types which are naturally aligned and
/// without padding when used in WGSL. Without this, storing only the first
/// three components of each column would introduce padding, and would use the
/// same storage size on GPU as a full [`Mat4`].
#[repr(C)]
#[derive(Debug, Default, Clone, Copy, Pod, Zeroable, ShaderType)]
pub(crate) struct GpuCompressedTransform {
    pub x_row: [f32; 4],
    pub y_row: [f32; 4],
    pub z_row: [f32; 4],
}

impl From<Mat4> for GpuCompressedTransform {
    fn from(value: Mat4) -> Self {
        let tr = value.transpose();
        #[cfg(test)]
        crate::test_utils::assert_approx_eq!(tr.w_axis, Vec4::W);
        Self {
            x_row: tr.x_axis.to_array(),
            y_row: tr.y_axis.to_array(),
            z_row: tr.z_axis.to_array(),
        }
    }
}

impl From<&Mat4> for GpuCompressedTransform {
    fn from(value: &Mat4) -> Self {
        let tr = value.transpose();
        #[cfg(test)]
        crate::test_utils::assert_approx_eq!(tr.w_axis, Vec4::W);
        Self {
            x_row: tr.x_axis.to_array(),
            y_row: tr.y_axis.to_array(),
            z_row: tr.z_axis.to_array(),
        }
    }
}

impl GpuCompressedTransform {
    /// Returns the translation as represented by this transform.
    #[allow(dead_code)]
    pub fn translation(&self) -> Vec3 {
        Vec3 {
            x: self.x_row[3],
            y: self.y_row[3],
            z: self.z_row[3],
        }
    }
}

/// Extension trait for shader types stored in a WGSL storage buffer.
pub(crate) trait StorageType {
    /// Get the aligned size, in bytes, of this type such that it aligns to the
    /// given alignment, in bytes.
    ///
    /// This is mainly used to align GPU types to device requirements.
    fn aligned_size(alignment: u32) -> NonZeroU64;

    /// Get the WGSL padding code to append to the GPU struct to align it.
    ///
    /// This is useful if the struct needs to be bound directly with a dynamic
    /// bind group offset, which requires the offset to be a multiple of a GPU
    /// device specific alignment value.
    fn padding_code(alignment: u32) -> String;
}

impl<T: ShaderType> StorageType for T {
    fn aligned_size(alignment: u32) -> NonZeroU64 {
        NonZeroU64::new(T::min_size().get().next_multiple_of(alignment as u64)).unwrap()
    }

    fn padding_code(alignment: u32) -> String {
        let aligned_size = T::aligned_size(alignment);
        trace!(
            "Aligning {} to {} bytes as device limits requires. Orignal size: {} bytes. Aligned size: {} bytes.",
            std::any::type_name::<T>(),
            alignment,
            T::min_size().get(),
            aligned_size
        );

        // We need to pad the Spawner WGSL struct based on the device padding so that we
        // can use it as an array element but also has a direct struct binding.
        if T::min_size() != aligned_size {
            let padding_size = aligned_size.get() - T::min_size().get();
            assert!(padding_size % 4 == 0);
            format!("padding: array<u32, {}>", padding_size / 4)
        } else {
            "".to_string()
        }
    }
}

/// GPU representation of spawner parameters.
#[repr(C)]
#[derive(Debug, Default, Clone, Copy, Pod, Zeroable, ShaderType)]
pub(crate) struct GpuSpawnerParams {
    /// Transform of the effect (origin of the emitter). This is either added to
    /// emitted particles at spawn time, if the effect simulated in world
    /// space, or to all simulated particles during rendering if the effect is
    /// simulated in local space.
    transform: GpuCompressedTransform,
    /// Inverse of [`transform`], stored with the same convention.
    ///
    /// [`transform`]: Self::transform
    inverse_transform: GpuCompressedTransform,
    /// Number of particles to spawn this frame.
    spawn: i32,
    /// Spawn seed, for randomized modifiers.
    seed: u32,
    /// Index of the pong (read) buffer for indirect indices, used by the render
    /// shader to fetch particles and render them. Only temporarily stored
    /// between indirect and render passes, and overwritten each frame by CPU
    /// upload. This is mostly a hack to transfer a value between those 2
    /// compute passes.
    render_pong: u32,
    /// Index of the [`GpuEffectMetadata`] for this effect.
    effect_metadata_index: u32,
    /// Index of the [`GpuDrawIndirect`] or [`GpuDrawIndexedIndirect`] for this
    /// effect.
    draw_indirect_index: u32,
}

/// GPU representation of an indirect compute dispatch input.
///
/// Note that unlike most other data structure, this doesn't need to be aligned
/// (except for the default 4-byte align for most GPU types) to any uniform or
/// storage buffer offset alignment, because the buffer storing this is only
/// ever used as input to indirect dispatch commands, and never bound as a
/// shader resource.
///
/// See https://docs.rs/wgpu/latest/wgpu/util/struct.DispatchIndirectArgs.html.
#[repr(C)]
#[derive(Debug, Clone, Copy, Pod, Zeroable, ShaderType)]
pub struct GpuDispatchIndirectArgs {
    pub x: u32,
    pub y: u32,
    pub z: u32,
}

impl Default for GpuDispatchIndirectArgs {
    fn default() -> Self {
        Self { x: 0, y: 1, z: 1 }
    }
}

/// GPU representation of an indirect (non-indexed) render input.
///
/// Note that unlike most other data structure, this doesn't need to be aligned
/// (except for the default 4-byte align for most GPU types) to any uniform or
/// storage buffer offset alignment, because the buffer storing this is only
/// ever used as input to indirect render commands, and never bound as a shader
/// resource.
///
/// See https://docs.rs/wgpu/latest/wgpu/util/struct.DrawIndirectArgs.html.
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Pod, Zeroable, ShaderType)]
pub struct GpuDrawIndirectArgs {
    pub vertex_count: u32,
    pub instance_count: u32,
    pub first_vertex: u32,
    pub first_instance: u32,
}

impl Default for GpuDrawIndirectArgs {
    fn default() -> Self {
        Self {
            vertex_count: 0,
            instance_count: 1,
            first_vertex: 0,
            first_instance: 0,
        }
    }
}

/// GPU representation of an indirect indexed render input.
///
/// Note that unlike most other data structure, this doesn't need to be aligned
/// (except for the default 4-byte align for most GPU types) to any uniform or
/// storage buffer offset alignment, because the buffer storing this is only
/// ever used as input to indirect render commands, and never bound as a shader
/// resource.
///
/// See https://docs.rs/wgpu/latest/wgpu/util/struct.DrawIndexedIndirectArgs.html.
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Pod, Zeroable, ShaderType)]
pub struct GpuDrawIndexedIndirectArgs {
    pub index_count: u32,
    pub instance_count: u32,
    pub first_index: u32,
    pub base_vertex: i32,
    pub first_instance: u32,
}

impl Default for GpuDrawIndexedIndirectArgs {
    fn default() -> Self {
        Self {
            index_count: 0,
            instance_count: 1,
            first_index: 0,
            base_vertex: 0,
            first_instance: 0,
        }
    }
}

/// Stores metadata about each particle effect.
///
/// This is written by the CPU and read by the GPU.
#[repr(C)]
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Pod, Zeroable, ShaderType)]
pub struct GpuEffectMetadata {
    //
    // Some runtime variables modified on GPU only (capacity is constant)
    /// Effect capacity, in number of particles.
    pub capacity: u32,
    // Additional data not part of the required draw indirect args
    /// Number of alive particles.
    pub alive_count: u32,
    /// Cached value of `alive_count` to cap threads in update pass.
    pub max_update: u32,
    /// Cached value of `dead_count` to cap threads in init pass.
    pub max_spawn: u32,
    /// Index of the ping buffer for particle indices. Init and update compute
    /// passes always write into the ping buffer and read from the pong buffer.
    /// The buffers are swapped (ping = 1 - ping) during the indirect dispatch.
    pub indirect_write_index: u32,

    //
    // Some real metadata values depending on where the effect instance is allocated.
    /// Index of the [`GpuDispatchIndirect`] struct inside the global
    /// [`EffectsMeta::dispatch_indirect_buffer`].
    pub indirect_dispatch_index: u32,
    /// Index of the [`GpuDrawIndirect`] or [`GpuDrawIndexedIndirect`] struct
    /// inside the global [`EffectsMeta::draw_indirect_buffer`] or
    /// [`EffectsMeta::draw_indexed_indirect_buffer`]. The actual buffer depends
    /// on whether the mesh is indexed or not, which is stored in
    /// [`CachedMeshLocation`].
    pub indirect_draw_index: u32,
    /// Offset (in u32 count) of the init indirect dispatch struct inside its
    /// buffer. This avoids having to align those 16-byte structs to the GPU
    /// alignment (at least 32 bytes, even 256 bytes on some).
    pub init_indirect_dispatch_index: u32,
    /// Index of this effect into its parent's ChildInfo array
    /// ([`EffectChildren::effect_cache_ids`] and its associated GPU
    /// array). This starts at zero for the first child of each effect, and is
    /// only unique per parent, not globally. Only available if this effect is a
    /// child of another effect (i.e. if it has a parent).
    pub local_child_index: u32,
    /// For children, global index of the ChildInfo into the shared array.
    pub global_child_index: u32,
    /// For parents, base index of the their first ChildInfo into the shared
    /// array.
    pub base_child_index: u32,

    /// Particle stride, in number of u32.
    pub particle_stride: u32,
    /// Offset from the particle start to the first sort key, in number of u32.
    pub sort_key_offset: u32,
    /// Offset from the particle start to the second sort key, in number of u32.
    pub sort_key2_offset: u32,

    //
    // Again some runtime-only GPU-mutated data
    /// Atomic counter incremented each time a particle spawns. Useful for
    /// things like RIBBON_ID or any other use where a unique value is needed.
    /// The value loops back after some time, but unless some particle lives
    /// forever there's little chance of repetition.
    pub particle_counter: u32,
}

/// Single init fill dispatch item in an [`InitFillDispatchQueue`].
#[derive(Debug)]
pub(super) struct InitFillDispatchItem {
    /// Index of the source [`GpuChildInfo`] entry to read the event count from.
    pub global_child_index: u32,
    /// Index of the [`GpuDispatchIndirect`] entry to write the workgroup count
    /// to.
    pub dispatch_indirect_index: u32,
}

/// Queue of fill dispatch operations for the init indirect pass.
///
/// The queue stores the init fill dispatch operations for the current frame,
/// without the reference to the source and destination buffers, which may be
/// reallocated later in the frame. This allows enqueuing operations during the
/// prepare rendering phase, while deferring GPU buffer (re-)allocation to a
/// later stage.
#[derive(Debug, Default, Resource)]
pub(super) struct InitFillDispatchQueue {
    queue: Vec<InitFillDispatchItem>,
    submitted_queue_index: Option<u32>,
}

impl InitFillDispatchQueue {
    /// Clear the queue.
    #[inline]
    pub fn clear(&mut self) {
        self.queue.clear();
        self.submitted_queue_index = None;
    }

    /// Check if the queue is empty.
    #[inline]
    pub fn is_empty(&self) -> bool {
        self.queue.is_empty()
    }

    /// Enqueue a new operation.
    #[inline]
    pub fn enqueue(&mut self, global_child_index: u32, dispatch_indirect_index: u32) {
        assert!(global_child_index != u32::MAX);
        self.queue.push(InitFillDispatchItem {
            global_child_index,
            dispatch_indirect_index,
        });
    }

    /// Submit pending operations for this frame.
    pub fn submit(
        &mut self,
        src_buffer: &Buffer,
        dst_buffer: &Buffer,
        gpu_buffer_operations: &mut GpuBufferOperations,
    ) {
        if self.queue.is_empty() {
            return;
        }

        // Sort by source. We can only batch if the destination is also contiguous, so
        // we can check with a linear walk if the source is already sorted.
        self.queue
            .sort_unstable_by_key(|item| item.global_child_index);

        let mut fill_queue = GpuBufferOperationQueue::new();

        // Batch and schedule all init indirect dispatch operations
        assert!(
            self.queue[0].global_child_index != u32::MAX,
            "Global child index not initialized"
        );
        let mut src_start = self.queue[0].global_child_index;
        let mut dst_start = self.queue[0].dispatch_indirect_index;
        let mut src_end = src_start + 1;
        let mut dst_end = dst_start + 1;
        let src_stride = GpuChildInfo::min_size().get() as u32 / 4;
        let dst_stride = GpuDispatchIndirectArgs::SHADER_SIZE.get() as u32 / 4;
        for i in 1..self.queue.len() {
            let InitFillDispatchItem {
                global_child_index: src,
                dispatch_indirect_index: dst,
            } = self.queue[i];
            if src != src_end || dst != dst_end {
                let count = src_end - src_start;
                debug_assert_eq!(count, dst_end - dst_start);
                let args = GpuBufferOperationArgs {
                    src_offset: src_start * src_stride + 1,
                    src_stride,
                    dst_offset: dst_start * dst_stride,
                    dst_stride,
                    count,
                };
                trace!(
                "enqueue_init_fill(): src:global_child_index={} dst:init_indirect_dispatch_index={} args={:?} src_buffer={:?} dst_buffer={:?}",
                src_start,
                dst_start,
                args,
                src_buffer.id(),
                dst_buffer.id(),
            );
                fill_queue.enqueue(
                    GpuBufferOperationType::FillDispatchArgs,
                    args,
                    src_buffer.clone(),
                    0,
                    None,
                    dst_buffer.clone(),
                    0,
                    None,
                );
                src_start = src;
                dst_start = dst;
            }
            src_end = src + 1;
            dst_end = dst + 1;
        }
        if src_start != src_end || dst_start != dst_end {
            let count = src_end - src_start;
            debug_assert_eq!(count, dst_end - dst_start);
            let args = GpuBufferOperationArgs {
                src_offset: src_start * src_stride + 1,
                src_stride,
                dst_offset: dst_start * dst_stride,
                dst_stride,
                count,
            };
            trace!(
            "IFDA::submit(): src:global_child_index={} dst:init_indirect_dispatch_index={} args={:?} src_buffer={:?} dst_buffer={:?}",
            src_start,
            dst_start,
            args,
            src_buffer.id(),
            dst_buffer.id(),
        );
            fill_queue.enqueue(
                GpuBufferOperationType::FillDispatchArgs,
                args,
                src_buffer.clone(),
                0,
                None,
                dst_buffer.clone(),
                0,
                None,
            );
        }

        debug_assert!(self.submitted_queue_index.is_none());
        if !fill_queue.operation_queue.is_empty() {
            self.submitted_queue_index = Some(gpu_buffer_operations.submit(fill_queue));
        }
    }
}

/// Compute pipeline to run the `vfx_indirect` dispatch workgroup calculation
/// shader.
#[derive(Resource)]
pub(crate) struct DispatchIndirectPipeline {
    /// Layout of bind group sim_params@0.
    sim_params_bind_group_layout: BindGroupLayout,
    /// Layout of bind group effect_metadata@1.
    effect_metadata_bind_group_layout: BindGroupLayout,
    /// Layout of bind group spawner@2.
    spawner_bind_group_layout: BindGroupLayout,
    /// Layout of bind group child_infos@3.
    child_infos_bind_group_layout: BindGroupLayout,
    /// Shader when no GPU events are used (no bind group @3).
    indirect_shader_noevent: Handle<Shader>,
    /// Shader when GPU events are used (bind group @3 present).
    indirect_shader_events: Handle<Shader>,
}

impl FromWorld for DispatchIndirectPipeline {
    fn from_world(world: &mut World) -> Self {
        let render_device = world.get_resource::<RenderDevice>().unwrap();

        // Copy the indirect pipeline shaders to self, because we can't access anything
        // else during pipeline specialization.
        let (indirect_shader_noevent, indirect_shader_events) = {
            let effects_meta = world.get_resource::<EffectsMeta>().unwrap();
            (
                effects_meta.indirect_shader_noevent.clone(),
                effects_meta.indirect_shader_events.clone(),
            )
        };

        let storage_alignment = render_device.limits().min_storage_buffer_offset_alignment;
        let effect_metadata_size = GpuEffectMetadata::aligned_size(storage_alignment);
        let spawner_min_binding_size = GpuSpawnerParams::aligned_size(storage_alignment);

        // @group(0) @binding(0) var<uniform> sim_params : SimParams;
        trace!("GpuSimParams: min_size={}", GpuSimParams::min_size());
        let sim_params_bind_group_layout = render_device.create_bind_group_layout(
            "hanabi:bind_group_layout:dispatch_indirect:sim_params",
            &[BindGroupLayoutEntry {
                binding: 0,
                visibility: ShaderStages::COMPUTE,
                ty: BindingType::Buffer {
                    ty: BufferBindingType::Uniform,
                    has_dynamic_offset: false,
                    min_binding_size: Some(GpuSimParams::min_size()),
                },
                count: None,
            }],
        );

        trace!(
            "GpuEffectMetadata: min_size={} padded_size={}",
            GpuEffectMetadata::min_size(),
            effect_metadata_size,
        );
        let effect_metadata_bind_group_layout = render_device.create_bind_group_layout(
            "hanabi:bind_group_layout:dispatch_indirect:effect_metadata@1",
            &[
                // @group(0) @binding(0) var<storage, read_write> effect_metadata_buffer :
                // array<u32>;
                BindGroupLayoutEntry {
                    binding: 0,
                    visibility: ShaderStages::COMPUTE,
                    ty: BindingType::Buffer {
                        ty: BufferBindingType::Storage { read_only: false },
                        has_dynamic_offset: false,
                        min_binding_size: Some(effect_metadata_size),
                    },
                    count: None,
                },
                // @group(0) @binding(1) var<storage, read_write> dispatch_indirect_buffer :
                // array<u32>;
                BindGroupLayoutEntry {
                    binding: 1,
                    visibility: ShaderStages::COMPUTE,
                    ty: BindingType::Buffer {
                        ty: BufferBindingType::Storage { read_only: false },
                        has_dynamic_offset: false,
                        min_binding_size: Some(
                            NonZeroU64::new(INDIRECT_INDEX_SIZE as u64).unwrap(),
                        ),
                    },
                    count: None,
                },
                // @group(0) @binding(2) var<storage, read_write> draw_indirect_buffer :
                // array<u32>;
                BindGroupLayoutEntry {
                    binding: 2,
                    visibility: ShaderStages::COMPUTE,
                    ty: BindingType::Buffer {
                        ty: BufferBindingType::Storage { read_only: false },
                        has_dynamic_offset: false,
                        min_binding_size: Some(GpuDrawIndexedIndirectArgs::SHADER_SIZE),
                    },
                    count: None,
                },
            ],
        );

        // @group(2) @binding(0) var<storage, read_write> spawner_buffer :
        // array<Spawner>;
        let spawner_bind_group_layout = render_device.create_bind_group_layout(
            "hanabi:bind_group_layout:dispatch_indirect:spawner@2",
            &[BindGroupLayoutEntry {
                binding: 0,
                visibility: ShaderStages::COMPUTE,
                ty: BindingType::Buffer {
                    ty: BufferBindingType::Storage { read_only: false },
                    has_dynamic_offset: false,
                    min_binding_size: Some(spawner_min_binding_size),
                },
                count: None,
            }],
        );

        // @group(3) @binding(0) var<storage, read_write> child_info_buffer :
        // ChildInfoBuffer;
        let child_infos_bind_group_layout = render_device.create_bind_group_layout(
            "hanabi:bind_group_layout:dispatch_indirect:child_infos",
            &[BindGroupLayoutEntry {
                binding: 0,
                visibility: ShaderStages::COMPUTE,
                ty: BindingType::Buffer {
                    ty: BufferBindingType::Storage { read_only: false },
                    has_dynamic_offset: false,
                    min_binding_size: Some(GpuChildInfo::min_size()),
                },
                count: None,
            }],
        );

        Self {
            sim_params_bind_group_layout,
            effect_metadata_bind_group_layout,
            spawner_bind_group_layout,
            child_infos_bind_group_layout,
            indirect_shader_noevent,
            indirect_shader_events,
        }
    }
}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(crate) struct DispatchIndirectPipelineKey {
    /// True if any allocated effect uses GPU spawn events. In that case, the
    /// pipeline is specialized to clear all GPU events each frame after the
    /// indirect init pass consumed them to spawn particles, and before the
    /// update pass optionally produce more events.
    /// Key: HAS_GPU_SPAWN_EVENTS
    has_events: bool,
}

impl SpecializedComputePipeline for DispatchIndirectPipeline {
    type Key = DispatchIndirectPipelineKey;

    fn specialize(&self, key: Self::Key) -> ComputePipelineDescriptor {
        trace!(
            "Specializing indirect pipeline (has_events={})",
            key.has_events
        );

        let mut shader_defs = Vec::with_capacity(2);
        // Spawner struct needs to be defined with padding, because it's bound as an
        // array
        shader_defs.push("SPAWNER_PADDING".into());
        if key.has_events {
            shader_defs.push("HAS_GPU_SPAWN_EVENTS".into());
        }

        let mut layout = Vec::with_capacity(4);
        layout.push(self.sim_params_bind_group_layout.clone());
        layout.push(self.effect_metadata_bind_group_layout.clone());
        layout.push(self.spawner_bind_group_layout.clone());
        if key.has_events {
            layout.push(self.child_infos_bind_group_layout.clone());
        }

        let label = format!(
            "hanabi:compute_pipeline:dispatch_indirect{}",
            if key.has_events {
                "_events"
            } else {
                "_noevent"
            }
        );

        ComputePipelineDescriptor {
            label: Some(label.into()),
            layout,
            shader: if key.has_events {
                self.indirect_shader_events.clone()
            } else {
                self.indirect_shader_noevent.clone()
            },
            shader_defs,
            entry_point: "main".into(),
            push_constant_ranges: vec![],
            zero_initialize_workgroup_memory: false,
        }
    }
}

/// Type of GPU buffer operation.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(super) enum GpuBufferOperationType {
    /// Clear the destination buffer to zero.
    ///
    /// The source parameters [`src_offset`] and [`src_stride`] are ignored.
    ///
    /// [`src_offset`]: crate::GpuBufferOperationArgs::src_offset
    /// [`src_stride`]: crate::GpuBufferOperationArgs::src_stride
    #[allow(dead_code)]
    Zero,
    /// Copy a source buffer into a destination buffer.
    ///
    /// The source can have a stride between each `u32` copied. The destination
    /// is always a contiguous buffer.
    #[allow(dead_code)]
    Copy,
    /// Fill the arguments for a later indirect dispatch call.
    ///
    /// This is similar to a copy, but will round up the source value to the
    /// number of threads per workgroup (64) before writing it into the
    /// destination.
    FillDispatchArgs,
    /// Fill the arguments for a later indirect dispatch call.
    ///
    /// This is the same as [`FillDispatchArgs`], but the source element count
    /// is read from the fourth entry in the destination buffer directly,
    /// and the source buffer and source arguments are unused.
    #[allow(dead_code)]
    FillDispatchArgsSelf,
}

/// GPU representation of the arguments of a block operation on a buffer.
#[repr(C)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Pod, Zeroable, ShaderType)]
pub(super) struct GpuBufferOperationArgs {
    /// Offset, as u32 count, where the operation starts in the source buffer.
    src_offset: u32,
    /// Stride, as u32 count, between elements in the source buffer.
    src_stride: u32,
    /// Offset, as u32 count, where the operation starts in the destination
    /// buffer.
    dst_offset: u32,
    /// Stride, as u32 count, between elements in the destination buffer.
    dst_stride: u32,
    /// Number of u32 elements to process for this operation.
    count: u32,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
struct QueuedOperationBindGroupKey {
    src_buffer: BufferId,
    src_binding_size: Option<NonZeroU32>,
    dst_buffer: BufferId,
    dst_binding_size: Option<NonZeroU32>,
}

#[derive(Debug, Clone)]
struct QueuedOperation {
    op: GpuBufferOperationType,
    args_index: u32,
    src_buffer: Buffer,
    src_binding_offset: u32,
    src_binding_size: Option<NonZeroU32>,
    dst_buffer: Buffer,
    dst_binding_offset: u32,
    dst_binding_size: Option<NonZeroU32>,
}

impl From<&QueuedOperation> for QueuedOperationBindGroupKey {
    fn from(value: &QueuedOperation) -> Self {
        Self {
            src_buffer: value.src_buffer.id(),
            src_binding_size: value.src_binding_size,
            dst_buffer: value.dst_buffer.id(),
            dst_binding_size: value.dst_binding_size,
        }
    }
}

/// Queue of GPU buffer operations.
///
/// The queue records a series of ordered operations on GPU buffers. It can be
/// submitted for this frame via [`GpuBufferOperations::submit()`], and
/// subsequently dispatched as a compute pass via
/// [`GpuBufferOperations::dispatch()`].
pub struct GpuBufferOperationQueue {
    /// Operation arguments.
    args: Vec<GpuBufferOperationArgs>,
    /// Queued operations.
    operation_queue: Vec<QueuedOperation>,
}

impl GpuBufferOperationQueue {
    /// Create a new empty queue.
    pub fn new() -> Self {
        Self {
            args: vec![],
            operation_queue: vec![],
        }
    }

    /// Enqueue a generic operation.
    pub fn enqueue(
        &mut self,
        op: GpuBufferOperationType,
        args: GpuBufferOperationArgs,
        src_buffer: Buffer,
        src_binding_offset: u32,
        src_binding_size: Option<NonZeroU32>,
        dst_buffer: Buffer,
        dst_binding_offset: u32,
        dst_binding_size: Option<NonZeroU32>,
    ) -> u32 {
        trace!(
            "Queue {:?} op: args={:?} src_buffer={:?} src_binding_offset={} src_binding_size={:?} dst_buffer={:?} dst_binding_offset={} dst_binding_size={:?}",
            op,
            args,
            src_buffer,
            src_binding_offset,
            src_binding_size,
            dst_buffer,
            dst_binding_offset,
            dst_binding_size,
        );
        let args_index = self.args.len() as u32;
        self.args.push(args);
        self.operation_queue.push(QueuedOperation {
            op,
            args_index,
            src_buffer,
            src_binding_offset,
            src_binding_size,
            dst_buffer,
            dst_binding_offset,
            dst_binding_size,
        });
        args_index
    }
}

/// GPU buffer operations for this frame.
///
/// This resource contains a list of submitted [`GpuBufferOperationQueue`] for
/// the current frame, and ensures the bind groups for those operations are up
/// to date.
#[derive(Resource)]
pub(super) struct GpuBufferOperations {
    /// Arguments for the buffer operations submitted this frame.
    args_buffer: AlignedBufferVec<GpuBufferOperationArgs>,

    /// Bind groups for the submitted operations.
    bind_groups: HashMap<QueuedOperationBindGroupKey, BindGroup>,

    /// Submitted queues for this frame.
    queues: Vec<Vec<QueuedOperation>>,
}

impl FromWorld for GpuBufferOperations {
    fn from_world(world: &mut World) -> Self {
        let render_device = world.get_resource::<RenderDevice>().unwrap();
        let align = render_device.limits().min_uniform_buffer_offset_alignment;
        Self::new(align)
    }
}

impl GpuBufferOperations {
    pub fn new(align: u32) -> Self {
        let args_buffer = AlignedBufferVec::new(
            BufferUsages::UNIFORM,
            Some(NonZeroU64::new(align as u64).unwrap()),
            Some("hanabi:buffer:gpu_operation_args".to_string()),
        );
        Self {
            args_buffer,
            bind_groups: default(),
            queues: vec![],
        }
    }

    /// Clear the queue and begin recording operations for a new frame.
    pub fn begin_frame(&mut self) {
        self.args_buffer.clear();
        self.bind_groups.clear(); // for now; might consider caching frame-to-frame
        self.queues.clear();
    }

    /// Submit a recorded queue.
    ///
    /// # Panics
    ///
    /// Panics if the queue submitted is empty.
    pub fn submit(&mut self, mut queue: GpuBufferOperationQueue) -> u32 {
        assert!(!queue.operation_queue.is_empty());
        let queue_index = self.queues.len() as u32;
        for qop in &mut queue.operation_queue {
            qop.args_index = self.args_buffer.push(queue.args[qop.args_index as usize]) as u32;
        }
        self.queues.push(queue.operation_queue);
        queue_index
    }

    /// Finish recording operations for this frame, and schedule buffer writes
    /// to GPU.
    pub fn end_frame(&mut self, device: &RenderDevice, render_queue: &RenderQueue) {
        assert_eq!(
            self.args_buffer.len(),
            self.queues.iter().fold(0, |len, q| len + q.len())
        );

        // Upload to GPU buffer
        self.args_buffer.write_buffer(device, render_queue);
    }

    /// Create all necessary bind groups for all queued operations.
    pub fn create_bind_groups(
        &mut self,
        render_device: &RenderDevice,
        utils_pipeline: &UtilsPipeline,
    ) {
        trace!(
            "Creating bind groups for {} operation queues...",
            self.queues.len()
        );
        for queue in &self.queues {
            for qop in queue {
                let key: QueuedOperationBindGroupKey = qop.into();
                self.bind_groups.entry(key).or_insert_with(|| {
                    let src_id: NonZeroU32 = qop.src_buffer.id().into();
                    let dst_id: NonZeroU32 = qop.dst_buffer.id().into();
                    let label = format!("hanabi:bind_group:util_{}_{}", src_id.get(), dst_id.get());
                    let use_dynamic_offset = matches!(qop.op, GpuBufferOperationType::FillDispatchArgs);
                    let bind_group_layout =
                        utils_pipeline.bind_group_layout(qop.op, use_dynamic_offset);
                    let (src_offset, dst_offset) = if use_dynamic_offset {
                        (0, 0)
                    } else {
                        (qop.src_binding_offset as u64, qop.dst_binding_offset as u64)
                    };
                    trace!(
                        "-> Creating new bind group '{}': src#{} (@+{}B:{:?}B) dst#{} (@+{}B:{:?}B)",
                        label,
                        src_id,
                        src_offset,
                        qop.src_binding_size,
                        dst_id,
                        dst_offset,
                        qop.dst_binding_size,
                    );
                    render_device.create_bind_group(
                        Some(&label[..]),
                        bind_group_layout,
                        &[
                            BindGroupEntry {
                                binding: 0,
                                resource: BindingResource::Buffer(BufferBinding {
                                    buffer: self.args_buffer.buffer().unwrap(),
                                    offset: 0,
                                    // We always bind exactly 1 row of arguments
                                    size: Some(
                                        NonZeroU64::new(self.args_buffer.aligned_size() as u64)
                                            .unwrap(),
                                    ),
                                }),
                            },
                            BindGroupEntry {
                                binding: 1,
                                resource: BindingResource::Buffer(BufferBinding {
                                    buffer: &qop.src_buffer,
                                    offset: src_offset,
                                    size: qop.src_binding_size.map(Into::into),
                                }),
                            },
                            BindGroupEntry {
                                binding: 2,
                                resource: BindingResource::Buffer(BufferBinding {
                                    buffer: &qop.dst_buffer,
                                    offset: dst_offset,
                                    size: qop.dst_binding_size.map(Into::into),
                                }),
                            },
                        ],
                    )
                });
            }
        }
    }

    /// Dispatch a submitted queue by index.
    ///
    /// This creates a new, optionally labelled, compute pass, and records to
    /// the render context a series of compute workgroup dispatch, one for each
    /// enqueued operation.
    ///
    /// The compute pipeline(s) used for each operation are fetched from the
    /// [`UtilsPipeline`], and the associated bind groups are used from a
    /// previous call to [`Self::create_bind_groups()`].
    pub fn dispatch(
        &self,
        index: u32,
        render_context: &mut RenderContext,
        utils_pipeline: &UtilsPipeline,
        compute_pass_label: Option<&str>,
    ) {
        let queue = &self.queues[index as usize];
        trace!(
            "Recording GPU commands for queue #{} ({} ops)...",
            index,
            queue.len(),
        );

        if queue.is_empty() {
            return;
        }

        let mut compute_pass =
            render_context
                .command_encoder()
                .begin_compute_pass(&ComputePassDescriptor {
                    label: compute_pass_label,
                    timestamp_writes: None,
                });

        let mut prev_op = None;
        for qop in queue {
            trace!("qop={:?}", qop);

            if Some(qop.op) != prev_op {
                compute_pass.set_pipeline(utils_pipeline.get_pipeline(qop.op));
                prev_op = Some(qop.op);
            }

            let key: QueuedOperationBindGroupKey = qop.into();
            if let Some(bind_group) = self.bind_groups.get(&key) {
                let args_offset = self.args_buffer.dynamic_offset(qop.args_index as usize);
                let use_dynamic_offset = matches!(qop.op, GpuBufferOperationType::FillDispatchArgs);
                let (src_offset, dst_offset) = if use_dynamic_offset {
                    (qop.src_binding_offset, qop.dst_binding_offset)
                } else {
                    (0, 0)
                };
                compute_pass.set_bind_group(0, bind_group, &[args_offset, src_offset, dst_offset]);
                trace!(
                    "set bind group with args_offset=+{}B src_offset=+{}B dst_offset=+{}B",
                    args_offset,
                    src_offset,
                    dst_offset
                );
            } else {
                error!("GPU fill dispatch buffer operation bind group not found for buffers src#{:?} dst#{:?}", qop.src_buffer.id(), qop.dst_buffer.id());
                continue;
            }

            // Dispatch the operations for this buffer
            const WORKGROUP_SIZE: u32 = 64;
            let num_ops = 1u32; // TODO - batching!
            let workgroup_count = num_ops.div_ceil(WORKGROUP_SIZE);
            compute_pass.dispatch_workgroups(workgroup_count, 1, 1);
            trace!(
                "-> fill dispatch compute dispatched: num_ops={} workgroup_count={}",
                num_ops,
                workgroup_count
            );
        }
    }
}

/// Compute pipeline to run the `vfx_utils` shader.
#[derive(Resource)]
pub(crate) struct UtilsPipeline {
    #[allow(dead_code)]
    bind_group_layout: BindGroupLayout,
    bind_group_layout_dyn: BindGroupLayout,
    bind_group_layout_no_src: BindGroupLayout,
    pipelines: [ComputePipeline; 4],
}

impl FromWorld for UtilsPipeline {
    fn from_world(world: &mut World) -> Self {
        let render_device = world.get_resource::<RenderDevice>().unwrap();

        let bind_group_layout = render_device.create_bind_group_layout(
            "hanabi:bind_group_layout:utils",
            &[
                BindGroupLayoutEntry {
                    binding: 0,
                    visibility: ShaderStages::COMPUTE,
                    ty: BindingType::Buffer {
                        ty: BufferBindingType::Uniform,
                        has_dynamic_offset: false,
                        min_binding_size: Some(GpuBufferOperationArgs::min_size()),
                    },
                    count: None,
                },
                BindGroupLayoutEntry {
                    binding: 1,
                    visibility: ShaderStages::COMPUTE,
                    ty: BindingType::Buffer {
                        ty: BufferBindingType::Storage { read_only: true },
                        has_dynamic_offset: false,
                        min_binding_size: NonZeroU64::new(4),
                    },
                    count: None,
                },
                BindGroupLayoutEntry {
                    binding: 2,
                    visibility: ShaderStages::COMPUTE,
                    ty: BindingType::Buffer {
                        ty: BufferBindingType::Storage { read_only: false },
                        has_dynamic_offset: false,
                        min_binding_size: NonZeroU64::new(4),
                    },
                    count: None,
                },
            ],
        );

        let pipeline_layout = render_device.create_pipeline_layout(&PipelineLayoutDescriptor {
            label: Some("hanabi:pipeline_layout:utils"),
            bind_group_layouts: &[&bind_group_layout],
            push_constant_ranges: &[],
        });

        let bind_group_layout_dyn = render_device.create_bind_group_layout(
            "hanabi:bind_group_layout:utils_dyn",
            &[
                BindGroupLayoutEntry {
                    binding: 0,
                    visibility: ShaderStages::COMPUTE,
                    ty: BindingType::Buffer {
                        ty: BufferBindingType::Uniform,
                        has_dynamic_offset: true,
                        min_binding_size: Some(GpuBufferOperationArgs::min_size()),
                    },
                    count: None,
                },
                BindGroupLayoutEntry {
                    binding: 1,
                    visibility: ShaderStages::COMPUTE,
                    ty: BindingType::Buffer {
                        ty: BufferBindingType::Storage { read_only: true },
                        has_dynamic_offset: true,
                        min_binding_size: NonZeroU64::new(4),
                    },
                    count: None,
                },
                BindGroupLayoutEntry {
                    binding: 2,
                    visibility: ShaderStages::COMPUTE,
                    ty: BindingType::Buffer {
                        ty: BufferBindingType::Storage { read_only: false },
                        has_dynamic_offset: true,
                        min_binding_size: NonZeroU64::new(4),
                    },
                    count: None,
                },
            ],
        );

        let pipeline_layout_dyn = render_device.create_pipeline_layout(&PipelineLayoutDescriptor {
            label: Some("hanabi:pipeline_layout:utils_dyn"),
            bind_group_layouts: &[&bind_group_layout_dyn],
            push_constant_ranges: &[],
        });

        let bind_group_layout_no_src = render_device.create_bind_group_layout(
            "hanabi:bind_group_layout:utils_no_src",
            &[
                BindGroupLayoutEntry {
                    binding: 0,
                    visibility: ShaderStages::COMPUTE,
                    ty: BindingType::Buffer {
                        ty: BufferBindingType::Uniform,
                        has_dynamic_offset: false,
                        min_binding_size: Some(GpuBufferOperationArgs::min_size()),
                    },
                    count: None,
                },
                BindGroupLayoutEntry {
                    binding: 2,
                    visibility: ShaderStages::COMPUTE,
                    ty: BindingType::Buffer {
                        ty: BufferBindingType::Storage { read_only: false },
                        has_dynamic_offset: false,
                        min_binding_size: NonZeroU64::new(4),
                    },
                    count: None,
                },
            ],
        );

        let pipeline_layout_no_src =
            render_device.create_pipeline_layout(&PipelineLayoutDescriptor {
                label: Some("hanabi:pipeline_layout:utils_no_src"),
                bind_group_layouts: &[&bind_group_layout_no_src],
                push_constant_ranges: &[],
            });

        let shader_code = include_str!("vfx_utils.wgsl");

        // Resolve imports. Because we don't insert this shader into Bevy' pipeline
        // cache, we don't get that part "for free", so we have to do it manually here.
        let shader_source = {
            let mut composer = Composer::default();

            let shader_defs = default();

            match composer.make_naga_module(NagaModuleDescriptor {
                source: shader_code,
                file_path: "vfx_utils.wgsl",
                shader_defs,
                ..Default::default()
            }) {
                Ok(naga_module) => ShaderSource::Naga(Cow::Owned(naga_module)),
                Err(compose_error) => panic!(
                    "Failed to compose vfx_utils.wgsl, naga_oil returned: {}",
                    compose_error.emit_to_string(&composer)
                ),
            }
        };

        debug!("Create utils shader module:\n{}", shader_code);
        #[allow(unsafe_code)]
        let shader_module = unsafe {
            render_device.create_shader_module(ShaderModuleDescriptor {
                label: Some("hanabi:shader:utils"),
                source: shader_source,
            })
        };

        trace!("Create vfx_utils pipelines...");
        let dummy = std::collections::HashMap::<String, f64>::new();
        let zero_pipeline = render_device.create_compute_pipeline(&RawComputePipelineDescriptor {
            label: Some("hanabi:compute_pipeline:zero_buffer"),
            layout: Some(&pipeline_layout),
            module: &shader_module,
            entry_point: Some("zero_buffer"),
            compilation_options: PipelineCompilationOptions {
                constants: &dummy,
                zero_initialize_workgroup_memory: false,
            },
            cache: None,
        });
        let copy_pipeline = render_device.create_compute_pipeline(&RawComputePipelineDescriptor {
            label: Some("hanabi:compute_pipeline:copy_buffer"),
            layout: Some(&pipeline_layout_dyn),
            module: &shader_module,
            entry_point: Some("copy_buffer"),
            compilation_options: PipelineCompilationOptions {
                constants: &dummy,
                zero_initialize_workgroup_memory: false,
            },
            cache: None,
        });
        let fill_dispatch_args_pipeline =
            render_device.create_compute_pipeline(&RawComputePipelineDescriptor {
                label: Some("hanabi:compute_pipeline:fill_dispatch_args"),
                layout: Some(&pipeline_layout_dyn),
                module: &shader_module,
                entry_point: Some("fill_dispatch_args"),
                compilation_options: PipelineCompilationOptions {
                    constants: &dummy,
                    zero_initialize_workgroup_memory: false,
                },
                cache: None,
            });
        let fill_dispatch_args_self_pipeline =
            render_device.create_compute_pipeline(&RawComputePipelineDescriptor {
                label: Some("hanabi:compute_pipeline:fill_dispatch_args_self"),
                layout: Some(&pipeline_layout_no_src),
                module: &shader_module,
                entry_point: Some("fill_dispatch_args_self"),
                compilation_options: PipelineCompilationOptions {
                    constants: &dummy,
                    zero_initialize_workgroup_memory: false,
                },
                cache: None,
            });

        Self {
            bind_group_layout,
            bind_group_layout_dyn,
            bind_group_layout_no_src,
            pipelines: [
                zero_pipeline,
                copy_pipeline,
                fill_dispatch_args_pipeline,
                fill_dispatch_args_self_pipeline,
            ],
        }
    }
}

impl UtilsPipeline {
    fn get_pipeline(&self, op: GpuBufferOperationType) -> &ComputePipeline {
        match op {
            GpuBufferOperationType::Zero => &self.pipelines[0],
            GpuBufferOperationType::Copy => &self.pipelines[1],
            GpuBufferOperationType::FillDispatchArgs => &self.pipelines[2],
            GpuBufferOperationType::FillDispatchArgsSelf => &self.pipelines[3],
        }
    }

    fn bind_group_layout(
        &self,
        op: GpuBufferOperationType,
        with_dynamic_offsets: bool,
    ) -> &BindGroupLayout {
        if op == GpuBufferOperationType::FillDispatchArgsSelf {
            assert!(
                !with_dynamic_offsets,
                "FillDispatchArgsSelf op cannot use dynamic offset (not implemented)"
            );
            &self.bind_group_layout_no_src
        } else if with_dynamic_offsets {
            &self.bind_group_layout_dyn
        } else {
            &self.bind_group_layout
        }
    }
}

#[derive(Resource)]
pub(crate) struct ParticlesInitPipeline {
    sim_params_layout: BindGroupLayout,

    // Temporary values passed to specialize()
    // https://github.com/bevyengine/bevy/issues/17132
    /// Layout of the particle@1 bind group this pipeline was specialized with.
    temp_particle_bind_group_layout: Option<BindGroupLayout>,
    /// Layout of the spawner@2 bind group this pipeline was specialized with.
    temp_spawner_bind_group_layout: Option<BindGroupLayout>,
    /// Layout of the metadata@3 bind group this pipeline was specialized with.
    temp_metadata_bind_group_layout: Option<BindGroupLayout>,
}

impl FromWorld for ParticlesInitPipeline {
    fn from_world(world: &mut World) -> Self {
        let render_device = world.get_resource::<RenderDevice>().unwrap();

        let sim_params_layout = render_device.create_bind_group_layout(
            "hanabi:bind_group_layout:vfx_init:sim_params@0",
            // @group(0) @binding(0) var<uniform> sim_params: SimParams;
            &[BindGroupLayoutEntry {
                binding: 0,
                visibility: ShaderStages::COMPUTE,
                ty: BindingType::Buffer {
                    ty: BufferBindingType::Uniform,
                    has_dynamic_offset: false,
                    min_binding_size: Some(GpuSimParams::min_size()),
                },
                count: None,
            }],
        );

        Self {
            sim_params_layout,
            temp_particle_bind_group_layout: None,
            temp_spawner_bind_group_layout: None,
            temp_metadata_bind_group_layout: None,
        }
    }
}

bitflags! {
    #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
    pub struct ParticleInitPipelineKeyFlags: u8 {
        //const CLONE = (1u8 << 0); // DEPRECATED
        const ATTRIBUTE_PREV = (1u8 << 1);
        const ATTRIBUTE_NEXT = (1u8 << 2);
        const CONSUME_GPU_SPAWN_EVENTS = (1u8 << 3);
    }
}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(crate) struct ParticleInitPipelineKey {
    /// Compute shader, with snippets applied, but not preprocessed yet.
    shader: Handle<Shader>,
    /// Minimum binding size in bytes for the particle layout buffer.
    particle_layout_min_binding_size: NonZeroU32,
    /// Minimum binding size in bytes for the particle layout buffer of the
    /// parent effect, if any.
    /// Key: READ_PARENT_PARTICLE
    parent_particle_layout_min_binding_size: Option<NonZeroU32>,
    /// Pipeline flags.
    flags: ParticleInitPipelineKeyFlags,
    /// Layout of the particle@1 bind group this pipeline was specialized with.
    // Note: can't directly store BindGroupLayout because it's not Eq nor Hash
    particle_bind_group_layout_id: BindGroupLayoutId,
    /// Layout of the spawner@2 bind group this pipeline was specialized with.
    // Note: can't directly store BindGroupLayout because it's not Eq nor Hash
    spawner_bind_group_layout_id: BindGroupLayoutId,
    /// Layout of the metadata@3 bind group this pipeline was specialized with.
    // Note: can't directly store BindGroupLayout because it's not Eq nor Hash
    metadata_bind_group_layout_id: BindGroupLayoutId,
}

impl SpecializedComputePipeline for ParticlesInitPipeline {
    type Key = ParticleInitPipelineKey;

    fn specialize(&self, key: Self::Key) -> ComputePipelineDescriptor {
        // We use the hash to correlate the key content with the GPU resource name
        let hash = calc_hash(&key);
        trace!("Specializing init pipeline {hash:016X} with key {key:?}");

        let mut shader_defs = Vec::with_capacity(4);
        if key
            .flags
            .contains(ParticleInitPipelineKeyFlags::ATTRIBUTE_PREV)
        {
            shader_defs.push("ATTRIBUTE_PREV".into());
        }
        if key
            .flags
            .contains(ParticleInitPipelineKeyFlags::ATTRIBUTE_NEXT)
        {
            shader_defs.push("ATTRIBUTE_NEXT".into());
        }
        let consume_gpu_spawn_events = key
            .flags
            .contains(ParticleInitPipelineKeyFlags::CONSUME_GPU_SPAWN_EVENTS);
        if consume_gpu_spawn_events {
            shader_defs.push("CONSUME_GPU_SPAWN_EVENTS".into());
        }
        // FIXME - for now this needs to keep in sync with consume_gpu_spawn_events
        if key.parent_particle_layout_min_binding_size.is_some() {
            assert!(consume_gpu_spawn_events);
            shader_defs.push("READ_PARENT_PARTICLE".into());
        } else {
            assert!(!consume_gpu_spawn_events);
        }

        // This should always be valid when specialize() is called, by design. This is
        // how we pass the value to specialize() to work around the lack of access to
        // external data.
        // https://github.com/bevyengine/bevy/issues/17132
        let particle_bind_group_layout = self.temp_particle_bind_group_layout.as_ref().unwrap();
        assert_eq!(
            particle_bind_group_layout.id(),
            key.particle_bind_group_layout_id
        );
        let spawner_bind_group_layout = self.temp_spawner_bind_group_layout.as_ref().unwrap();
        assert_eq!(
            spawner_bind_group_layout.id(),
            key.spawner_bind_group_layout_id
        );
        let metadata_bind_group_layout = self.temp_metadata_bind_group_layout.as_ref().unwrap();
        assert_eq!(
            metadata_bind_group_layout.id(),
            key.metadata_bind_group_layout_id
        );

        let label = format!("hanabi:pipeline:init_{hash:016X}");
        trace!(
            "-> creating pipeline '{}' with shader defs:{}",
            label,
            shader_defs
                .iter()
                .fold(String::new(), |acc, x| acc + &format!(" {x:?}"))
        );

        ComputePipelineDescriptor {
            label: Some(label.into()),
            layout: vec![
                self.sim_params_layout.clone(),
                particle_bind_group_layout.clone(),
                spawner_bind_group_layout.clone(),
                metadata_bind_group_layout.clone(),
            ],
            shader: key.shader,
            shader_defs,
            entry_point: "main".into(),
            push_constant_ranges: vec![],
            zero_initialize_workgroup_memory: false,
        }
    }
}

#[derive(Resource)]
pub(crate) struct ParticlesUpdatePipeline {
    sim_params_layout: BindGroupLayout,

    // Temporary values passed to specialize()
    // https://github.com/bevyengine/bevy/issues/17132
    /// Layout of the particle@1 bind group this pipeline was specialized with.
    temp_particle_bind_group_layout: Option<BindGroupLayout>,
    /// Layout of the spawner@2 bind group this pipeline was specialized with.
    temp_spawner_bind_group_layout: Option<BindGroupLayout>,
    /// Layout of the metadata@3 bind group this pipeline was specialized with.
    temp_metadata_bind_group_layout: Option<BindGroupLayout>,
}

impl FromWorld for ParticlesUpdatePipeline {
    fn from_world(world: &mut World) -> Self {
        let render_device = world.get_resource::<RenderDevice>().unwrap();

        trace!("GpuSimParams: min_size={}", GpuSimParams::min_size());
        let sim_params_layout = render_device.create_bind_group_layout(
            "hanabi:bind_group_layout:vfx_update:sim_params@0",
            &[
                // @group(0) @binding(0) var<uniform> sim_params : SimParams;
                BindGroupLayoutEntry {
                    binding: 0,
                    visibility: ShaderStages::COMPUTE,
                    ty: BindingType::Buffer {
                        ty: BufferBindingType::Uniform,
                        has_dynamic_offset: false,
                        min_binding_size: Some(GpuSimParams::min_size()),
                    },
                    count: None,
                },
                // @group(0) @binding(1) var<storage, read_write> draw_indirect_buffer :
                // array<DrawIndexedIndirectArgs>;
                BindGroupLayoutEntry {
                    binding: 1,
                    visibility: ShaderStages::COMPUTE,
                    ty: BindingType::Buffer {
                        ty: BufferBindingType::Storage { read_only: false },
                        has_dynamic_offset: false,
                        min_binding_size: Some(GpuDrawIndexedIndirectArgs::SHADER_SIZE),
                    },
                    count: None,
                },
            ],
        );

        Self {
            sim_params_layout,
            temp_particle_bind_group_layout: None,
            temp_spawner_bind_group_layout: None,
            temp_metadata_bind_group_layout: None,
        }
    }
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub(crate) struct ParticleUpdatePipelineKey {
    /// Compute shader, with snippets applied, but not preprocessed yet.
    shader: Handle<Shader>,
    /// Particle layout.
    particle_layout: ParticleLayout,
    /// Minimum binding size in bytes for the particle layout buffer of the
    /// parent effect, if any.
    /// Key: READ_PARENT_PARTICLE
    parent_particle_layout_min_binding_size: Option<NonZeroU32>,
    /// Key: EMITS_GPU_SPAWN_EVENTS
    num_event_buffers: u32,
    /// Layout of the particle@1 bind group this pipeline was specialized with.
    // Note: can't directly store BindGroupLayout because it's not Eq nor Hash
    particle_bind_group_layout_id: BindGroupLayoutId,
    /// Layout of the spawner@2 bind group this pipeline was specialized with.
    // Note: can't directly store BindGroupLayout because it's not Eq nor Hash
    spawner_bind_group_layout_id: BindGroupLayoutId,
    /// Layout of the metadata@3 bind group this pipeline was specialized with.
    // Note: can't directly store BindGroupLayout because it's not Eq nor Hash
    metadata_bind_group_layout_id: BindGroupLayoutId,
}

impl SpecializedComputePipeline for ParticlesUpdatePipeline {
    type Key = ParticleUpdatePipelineKey;

    fn specialize(&self, key: Self::Key) -> ComputePipelineDescriptor {
        // We use the hash to correlate the key content with the GPU resource name
        let hash = calc_hash(&key);
        trace!("Specializing update pipeline {hash:016X} with key {key:?}");

        let mut shader_defs = Vec::with_capacity(6);
        shader_defs.push("EM_MAX_SPAWN_ATOMIC".into());
        // ChildInfo needs atomic event_count because all threads append to the event
        // buffer(s) in parallel.
        shader_defs.push("CHILD_INFO_EVENT_COUNT_IS_ATOMIC".into());
        if key.particle_layout.contains(Attribute::PREV) {
            shader_defs.push("ATTRIBUTE_PREV".into());
        }
        if key.particle_layout.contains(Attribute::NEXT) {
            shader_defs.push("ATTRIBUTE_NEXT".into());
        }
        if key.parent_particle_layout_min_binding_size.is_some() {
            shader_defs.push("READ_PARENT_PARTICLE".into());
        }
        if key.num_event_buffers > 0 {
            shader_defs.push("EMITS_GPU_SPAWN_EVENTS".into());
        }

        // This should always be valid when specialize() is called, by design. This is
        // how we pass the value to specialize() to work around the lack of access to
        // external data.
        // https://github.com/bevyengine/bevy/issues/17132
        let particle_bind_group_layout = self.temp_particle_bind_group_layout.as_ref().unwrap();
        assert_eq!(
            particle_bind_group_layout.id(),
            key.particle_bind_group_layout_id
        );
        let spawner_bind_group_layout = self.temp_spawner_bind_group_layout.as_ref().unwrap();
        assert_eq!(
            spawner_bind_group_layout.id(),
            key.spawner_bind_group_layout_id
        );
        let metadata_bind_group_layout = self.temp_metadata_bind_group_layout.as_ref().unwrap();
        assert_eq!(
            metadata_bind_group_layout.id(),
            key.metadata_bind_group_layout_id
        );

        let hash = calc_func_id(&key);
        let label = format!("hanabi:pipeline:update_{hash:016X}");
        trace!(
            "-> creating pipeline '{}' with shader defs:{}",
            label,
            shader_defs
                .iter()
                .fold(String::new(), |acc, x| acc + &format!(" {x:?}"))
        );

        ComputePipelineDescriptor {
            label: Some(label.into()),
            layout: vec![
                self.sim_params_layout.clone(),
                particle_bind_group_layout.clone(),
                spawner_bind_group_layout.clone(),
                metadata_bind_group_layout.clone(),
            ],
            shader: key.shader,
            shader_defs,
            entry_point: "main".into(),
            push_constant_ranges: Vec::new(),
            zero_initialize_workgroup_memory: false,
        }
    }
}

#[derive(Resource)]
pub(crate) struct ParticlesRenderPipeline {
    render_device: RenderDevice,
    view_layout: BindGroupLayout,
    material_layouts: HashMap<TextureLayout, BindGroupLayout>,
}

impl ParticlesRenderPipeline {
    /// Cache a material, creating its bind group layout based on the texture
    /// layout.
    pub fn cache_material(&mut self, layout: &TextureLayout) {
        if layout.layout.is_empty() {
            return;
        }

        // FIXME - no current stable API to insert an entry into a HashMap only if it
        // doesn't exist, and without having to build a key (as opposed to a reference).
        // So do 2 lookups instead, to avoid having to clone the layout if it's already
        // cached (which should be the common case).
        if self.material_layouts.contains_key(layout) {
            return;
        }

        let mut entries = Vec::with_capacity(layout.layout.len() * 2);
        let mut index = 0;
        for _slot in &layout.layout {
            entries.push(BindGroupLayoutEntry {
                binding: index,
                visibility: ShaderStages::FRAGMENT,
                ty: BindingType::Texture {
                    multisampled: false,
                    sample_type: TextureSampleType::Float { filterable: true },
                    view_dimension: TextureViewDimension::D2,
                },
                count: None,
            });
            entries.push(BindGroupLayoutEntry {
                binding: index + 1,
                visibility: ShaderStages::FRAGMENT,
                ty: BindingType::Sampler(SamplerBindingType::Filtering),
                count: None,
            });
            index += 2;
        }
        debug!(
            "Creating material bind group with {} entries [{:?}] for layout {:?}",
            entries.len(),
            entries,
            layout
        );
        let material_bind_group_layout = self
            .render_device
            .create_bind_group_layout("hanabi:material_layout_render", &entries[..]);

        self.material_layouts
            .insert(layout.clone(), material_bind_group_layout);
    }

    /// Retrieve a bind group layout for a cached material.
    pub fn get_material(&self, layout: &TextureLayout) -> Option<&BindGroupLayout> {
        // Prevent a hash and lookup for the trivial case of an empty layout
        if layout.layout.is_empty() {
            return None;
        }

        self.material_layouts.get(layout)
    }
}

impl FromWorld for ParticlesRenderPipeline {
    fn from_world(world: &mut World) -> Self {
        let render_device = world.get_resource::<RenderDevice>().unwrap();

        let view_layout = render_device.create_bind_group_layout(
            "hanabi:bind_group_layout:render:view@0",
            &[
                // @group(0) @binding(0) var<uniform> view: View;
                BindGroupLayoutEntry {
                    binding: 0,
                    visibility: ShaderStages::VERTEX_FRAGMENT,
                    ty: BindingType::Buffer {
                        ty: BufferBindingType::Uniform,
                        has_dynamic_offset: true,
                        min_binding_size: Some(ViewUniform::min_size()),
                    },
                    count: None,
                },
                // @group(0) @binding(1) var<uniform> sim_params : SimParams;
                BindGroupLayoutEntry {
                    binding: 1,
                    visibility: ShaderStages::VERTEX_FRAGMENT,
                    ty: BindingType::Buffer {
                        ty: BufferBindingType::Uniform,
                        has_dynamic_offset: false,
                        min_binding_size: Some(GpuSimParams::min_size()),
                    },
                    count: None,
                },
            ],
        );

        Self {
            render_device: render_device.clone(),
            view_layout,
            material_layouts: default(),
        }
    }
}

#[cfg(all(feature = "2d", feature = "3d"))]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
enum PipelineMode {
    Camera2d,
    Camera3d,
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub(crate) struct ParticleRenderPipelineKey {
    /// Render shader, with snippets applied, but not preprocessed yet.
    shader: Handle<Shader>,
    /// Particle layout.
    particle_layout: ParticleLayout,
    mesh_layout: Option<MeshVertexBufferLayoutRef>,
    /// Texture layout.
    texture_layout: TextureLayout,
    /// Key: LOCAL_SPACE_SIMULATION
    /// The effect is simulated in local space, and during rendering all
    /// particles are transformed by the effect's [`GlobalTransform`].
    local_space_simulation: bool,
    /// Key: USE_ALPHA_MASK, OPAQUE
    /// The particle's alpha masking behavior.
    alpha_mask: ParticleRenderAlphaMaskPipelineKey,
    /// The effect needs Alpha blend.
    alpha_mode: AlphaMode,
    /// Key: FLIPBOOK
    /// The effect is rendered with flipbook texture animation based on the
    /// sprite index of each particle.
    flipbook: bool,
    /// Key: NEEDS_UV
    /// The effect needs UVs.
    needs_uv: bool,
    /// Key: NEEDS_NORMAL
    /// The effect needs normals.
    needs_normal: bool,
    /// Key: NEEDS_PARTICLE_IN_FRAGMENT
    /// The effect needs access to the particle index and buffer in the fragment
    /// shader.
    needs_particle_fragment: bool,
    /// Key: RIBBONS
    /// The effect has ribbons.
    ribbons: bool,
    /// For dual-mode configurations only, the actual mode of the current render
    /// pipeline. Otherwise the mode is implicitly determined by the active
    /// feature.
    #[cfg(all(feature = "2d", feature = "3d"))]
    pipeline_mode: PipelineMode,
    /// MSAA sample count.
    msaa_samples: u32,
    /// Is the camera using an HDR render target?
    hdr: bool,
}

#[derive(Clone, Copy, Default, Hash, PartialEq, Eq, Debug)]
pub(crate) enum ParticleRenderAlphaMaskPipelineKey {
    #[default]
    Blend,
    /// Key: USE_ALPHA_MASK
    /// The effect is rendered with alpha masking.
    AlphaMask,
    /// Key: OPAQUE
    /// The effect is rendered fully-opaquely.
    Opaque,
}

impl Default for ParticleRenderPipelineKey {
    fn default() -> Self {
        Self {
            shader: Handle::default(),
            particle_layout: ParticleLayout::empty(),
            mesh_layout: None,
            texture_layout: default(),
            local_space_simulation: false,
            alpha_mask: default(),
            alpha_mode: AlphaMode::Blend,
            flipbook: false,
            needs_uv: false,
            needs_normal: false,
            needs_particle_fragment: false,
            ribbons: false,
            #[cfg(all(feature = "2d", feature = "3d"))]
            pipeline_mode: PipelineMode::Camera3d,
            msaa_samples: Msaa::default().samples(),
            hdr: false,
        }
    }
}

impl SpecializedRenderPipeline for ParticlesRenderPipeline {
    type Key = ParticleRenderPipelineKey;

    fn specialize(&self, key: Self::Key) -> RenderPipelineDescriptor {
        trace!("Specializing render pipeline for key: {key:?}");

        trace!("Creating layout for bind group particle@1 of render pass");
        let alignment = self
            .render_device
            .limits()
            .min_storage_buffer_offset_alignment;
        let spawner_min_binding_size = GpuSpawnerParams::aligned_size(alignment);
        let entries = [
            // @group(1) @binding(0) var<storage, read> particle_buffer : ParticleBuffer;
            BindGroupLayoutEntry {
                binding: 0,
                visibility: ShaderStages::VERTEX_FRAGMENT,
                ty: BindingType::Buffer {
                    ty: BufferBindingType::Storage { read_only: true },
                    has_dynamic_offset: false,
                    min_binding_size: Some(key.particle_layout.min_binding_size()),
                },
                count: None,
            },
            // @group(1) @binding(1) var<storage, read> indirect_buffer : IndirectBuffer;
            BindGroupLayoutEntry {
                binding: 1,
                visibility: ShaderStages::VERTEX,
                ty: BindingType::Buffer {
                    ty: BufferBindingType::Storage { read_only: true },
                    has_dynamic_offset: false,
                    min_binding_size: Some(NonZeroU64::new(INDIRECT_INDEX_SIZE as u64).unwrap()),
                },
                count: None,
            },
            // @group(1) @binding(2) var<storage, read> spawner : Spawner;
            BindGroupLayoutEntry {
                binding: 2,
                visibility: ShaderStages::VERTEX,
                ty: BindingType::Buffer {
                    ty: BufferBindingType::Storage { read_only: true },
                    has_dynamic_offset: true,
                    min_binding_size: Some(spawner_min_binding_size),
                },
                count: None,
            },
        ];
        let particle_bind_group_layout = self
            .render_device
            .create_bind_group_layout("hanabi:bind_group_layout:render:particle@1", &entries[..]);

        let mut layout = vec![self.view_layout.clone(), particle_bind_group_layout];
        let mut shader_defs = vec![];

        let vertex_buffer_layout = key.mesh_layout.as_ref().and_then(|mesh_layout| {
            mesh_layout
                .0
                .get_layout(&[
                    Mesh::ATTRIBUTE_POSITION.at_shader_location(0),
                    Mesh::ATTRIBUTE_UV_0.at_shader_location(1),
                    Mesh::ATTRIBUTE_NORMAL.at_shader_location(2),
                ])
                .ok()
        });

        if let Some(material_bind_group_layout) = self.get_material(&key.texture_layout) {
            layout.push(material_bind_group_layout.clone());
        }

        // Key: LOCAL_SPACE_SIMULATION
        if key.local_space_simulation {
            shader_defs.push("LOCAL_SPACE_SIMULATION".into());
        }

        match key.alpha_mask {
            ParticleRenderAlphaMaskPipelineKey::Blend => {}
            ParticleRenderAlphaMaskPipelineKey::AlphaMask => {
                // Key: USE_ALPHA_MASK
                shader_defs.push("USE_ALPHA_MASK".into())
            }
            ParticleRenderAlphaMaskPipelineKey::Opaque => {
                // Key: OPAQUE
                shader_defs.push("OPAQUE".into())
            }
        }

        // Key: FLIPBOOK
        if key.flipbook {
            shader_defs.push("FLIPBOOK".into());
        }

        // Key: NEEDS_UV
        if key.needs_uv {
            shader_defs.push("NEEDS_UV".into());
        }

        // Key: NEEDS_NORMAL
        if key.needs_normal {
            shader_defs.push("NEEDS_NORMAL".into());
        }

        if key.needs_particle_fragment {
            shader_defs.push("NEEDS_PARTICLE_FRAGMENT".into());
        }

        // Key: RIBBONS
        if key.ribbons {
            shader_defs.push("RIBBONS".into());
        }

        #[cfg(feature = "2d")]
        let depth_stencil_2d = DepthStencilState {
            format: CORE_2D_DEPTH_FORMAT,
            // Use depth buffer with alpha-masked particles, not with transparent ones
            depth_write_enabled: false, // TODO - opaque/alphamask 2d
            // Bevy uses reverse-Z, so GreaterEqual really means closer
            depth_compare: CompareFunction::GreaterEqual,
            stencil: StencilState::default(),
            bias: DepthBiasState::default(),
        };

        #[cfg(feature = "3d")]
        let depth_stencil_3d = DepthStencilState {
            format: CORE_3D_DEPTH_FORMAT,
            // Use depth buffer with alpha-masked or opaque particles, not
            // with transparent ones
            depth_write_enabled: matches!(
                key.alpha_mask,
                ParticleRenderAlphaMaskPipelineKey::AlphaMask
                    | ParticleRenderAlphaMaskPipelineKey::Opaque
            ),
            // Bevy uses reverse-Z, so GreaterEqual really means closer
            depth_compare: CompareFunction::GreaterEqual,
            stencil: StencilState::default(),
            bias: DepthBiasState::default(),
        };

        #[cfg(all(feature = "2d", feature = "3d"))]
        assert_eq!(CORE_2D_DEPTH_FORMAT, CORE_3D_DEPTH_FORMAT);
        #[cfg(all(feature = "2d", feature = "3d"))]
        let depth_stencil = match key.pipeline_mode {
            PipelineMode::Camera2d => Some(depth_stencil_2d),
            PipelineMode::Camera3d => Some(depth_stencil_3d),
        };

        #[cfg(all(feature = "2d", not(feature = "3d")))]
        let depth_stencil = Some(depth_stencil_2d);

        #[cfg(all(feature = "3d", not(feature = "2d")))]
        let depth_stencil = Some(depth_stencil_3d);

        let format = if key.hdr {
            ViewTarget::TEXTURE_FORMAT_HDR
        } else {
            TextureFormat::bevy_default()
        };

        let hash = calc_func_id(&key);
        let label = format!("hanabi:pipeline:render_{hash:016X}");
        trace!(
            "-> creating pipeline '{}' with shader defs:{}",
            label,
            shader_defs
                .iter()
                .fold(String::new(), |acc, x| acc + &format!(" {x:?}"))
        );

        RenderPipelineDescriptor {
            label: Some(label.into()),
            vertex: VertexState {
                shader: key.shader.clone(),
                entry_point: "vertex".into(),
                shader_defs: shader_defs.clone(),
                buffers: vec![vertex_buffer_layout.expect("Vertex buffer layout not present")],
            },
            fragment: Some(FragmentState {
                shader: key.shader,
                shader_defs,
                entry_point: "fragment".into(),
                targets: vec![Some(ColorTargetState {
                    format,
                    blend: Some(key.alpha_mode.into()),
                    write_mask: ColorWrites::ALL,
                })],
            }),
            layout,
            primitive: PrimitiveState {
                front_face: FrontFace::Ccw,
                cull_mode: None,
                unclipped_depth: false,
                polygon_mode: PolygonMode::Fill,
                conservative: false,
                topology: PrimitiveTopology::TriangleList,
                strip_index_format: None,
            },
            depth_stencil,
            multisample: MultisampleState {
                count: key.msaa_samples,
                mask: !0,
                alpha_to_coverage_enabled: false,
            },
            push_constant_ranges: Vec::new(),
            zero_initialize_workgroup_memory: false,
        }
    }
}

/// A single effect instance extracted from a [`ParticleEffect`] as a
/// render world item.
///
/// [`ParticleEffect`]: crate::ParticleEffect
#[derive(Debug, Component)]
pub(crate) struct ExtractedEffectLegacy {
    /// Main world entity owning the [`CompiledParticleEffect`] this effect was
    /// extracted from. Mainly used for visibility.
    pub main_entity: MainEntity,
    /// Render world entity, if any, where the [`CachedEffect`] component
    /// caching this extracted effect resides. If this component was never
    /// cached in the render world, this is `None`. In that case a new
    /// [`CachedEffect`] will be spawned automatically.
    pub render_entity: RenderEntity,
    /// Handle to the effect asset this instance is based on.
    /// The handle is weak to prevent refcount cycles and gracefully handle
    /// assets unloaded or destroyed after a draw call has been submitted.
    pub handle: Handle<EffectAsset>,
    /// Particle layout for the effect.
    #[allow(dead_code)]
    pub particle_layout: ParticleLayout,
    /// Property layout for the effect.
    pub property_layout: PropertyLayout,
    /// Values of properties written in a binary blob according to
    /// [`property_layout`].
    ///
    /// This is `Some(blob)` if the data needs to be (re)uploaded to GPU, or
    /// `None` if nothing needs to be done for this frame.
    ///
    /// [`property_layout`]: crate::render::ExtractedEffect::property_layout
    pub property_data: Option<Vec<u8>>,
    /// Number of particles to spawn this frame.
    ///
    /// This is ignored if the effect is a child effect consuming GPU spawn
    /// events.
    pub spawn_count: u32,
    /// PRNG seed.
    pub prng_seed: u32,
    /// Global transform of the effect origin.
    pub transform: GlobalTransform,
    /// Layout flags.
    pub layout_flags: LayoutFlags,
    /// Texture layout.
    pub texture_layout: TextureLayout,
    /// Textures.
    pub textures: Vec<Handle<Image>>,
    /// Alpha mode.
    pub alpha_mode: AlphaMode,
    /// Effect shaders.
    pub effect_shaders: EffectShader,
}

/// A single effect instance extracted from a [`ParticleEffect`] as a
/// render world item.
///
/// [`ParticleEffect`]: crate::ParticleEffect
#[derive(Debug, Clone, PartialEq, Component)]
#[require(CachedPipelines, CachedReadyState, CachedEffectMetadata)]
pub(crate) struct ExtractedEffect {
    /// Handle to the effect asset this instance is based on.
    /// The handle is weak to prevent refcount cycles and gracefully handle
    /// assets unloaded or destroyed after a draw call has been submitted.
    pub handle: Handle<EffectAsset>,
    /// Particle layout for the effect.
    pub particle_layout: ParticleLayout,
    /// Effect capacity, in number of particles.
    pub capacity: u32,
    /// Layout flags.
    pub layout_flags: LayoutFlags,
    /// Texture layout.
    pub texture_layout: TextureLayout,
    /// Textures.
    pub textures: Vec<Handle<Image>>,
    /// Alpha mode.
    pub alpha_mode: AlphaMode,
    /// Effect shaders.
    pub effect_shaders: EffectShader,
    /// Condition under which the effect is simulated.
    pub simulation_condition: SimulationCondition,
}

/// Extracted data for the [`GpuSpawnerParams`].
///
/// This contains all data which may change each frame during the regular usage
/// of the effect, but doesn't require any particular GPU resource update
/// (except re-uploading that new data to GPU, of course).
#[derive(Debug, Clone, PartialEq, Component)]
pub(crate) struct ExtractedSpawner {
    /// Number of particles to spawn this frame.
    ///
    /// This is ignored if the effect is a child effect consuming GPU spawn
    /// events.
    pub spawn_count: u32,
    /// PRNG seed.
    pub prng_seed: u32,
    /// Global transform of the effect origin.
    pub transform: GlobalTransform,
    /// Is the effect visible this frame?
    pub is_visible: bool,
}

/// Cache info for the metadata of the effect.
///
/// This manages the GPU allocation of the [`GpuEffectMetadata`] for this
/// effect.
#[derive(Debug, Default, Component)]
pub(crate) struct CachedEffectMetadata {
    /// Allocation ID.
    pub table_id: BufferTableId,
    /// Current metadata values, cached on CPU for change detection.
    pub metadata: GpuEffectMetadata,
}

/// Extracted parent information for a child effect.
///
/// This component is present on the [`RenderEntity`] of an extracted effect if
/// the effect has a parent effect. Otherwise, it's removed.
///
/// This components forms an ECS relationship with [`ChildrenEffects`].
#[derive(Debug, Clone, Copy, PartialEq, Eq, Component)]
#[relationship(relationship_target = ChildrenEffects)]
pub(crate) struct ChildEffectOf {
    /// Render entity of the parent.
    pub parent: Entity,
}

/// Extracted children information for a parent effect.
///
/// This component is present on the [`RenderEntity`] of an extracted effect if
/// the effect is a parent effect for one or more child effects. Otherwise, it's
/// removed.
///
/// This components forms an ECS relationship with [`ChildEffectOf`]. Note that
/// we don't use `linked_spawn` because:
/// 1. This would fight with the `SyncToRenderWorld` as the main world
///    parent-child hierarchy is by design not an ECS relationship (it's a lose
///    declarative coupling).
/// 2. The components on the render entity often store GPU resources or other
///    data we need to clean-up manually, and not all of them currently use
///    lifecycle hooks, so we want to manage despawning manually to prevent
///    leaks.
#[derive(Debug, Clone, PartialEq, Eq, Component)]
#[relationship_target(relationship = ChildEffectOf)]
pub(crate) struct ChildrenEffects(Vec<Entity>);

impl<'a> IntoIterator for &'a ChildrenEffects {
    type Item = <Self::IntoIter as Iterator>::Item;

    type IntoIter = std::slice::Iter<'a, Entity>;

    #[inline(always)]
    fn into_iter(self) -> Self::IntoIter {
        self.0.iter()
    }
}

impl Deref for ChildrenEffects {
    type Target = [Entity];

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

/// Extracted data for an effect's properties, if any.
///
/// This component is present on the [`RenderEntity`] of an extracted effect if
/// that effect has properties. It optionally contains new CPU data to
/// (re-)upload this frame. If the effect has no property, this component is
/// removed.
#[derive(Debug, Component)]
pub(crate) struct ExtractedProperties {
    /// Property layout for the effect.
    pub property_layout: PropertyLayout,
    /// Values of properties written in a binary blob according to
    /// [`property_layout`].
    ///
    /// This is `Some(blob)` if the data needs to be (re)uploaded to GPU, or
    /// `None` if nothing needs to be done for this frame.
    ///
    /// [`property_layout`]: crate::render::ExtractedEffect::property_layout
    pub property_data: Option<Vec<u8>>,
}

#[derive(Default, Resource)]
pub(crate) struct EffectAssetEvents {
    pub images: Vec<AssetEvent<Image>>,
}

/// System extracting all the asset events for the [`Image`] assets to enable
/// dynamic update of images bound to any effect.
///
/// This system runs in parallel of [`extract_effects`].
pub(crate) fn extract_effect_events(
    mut events: ResMut<EffectAssetEvents>,
    mut image_events: Extract<EventReader<AssetEvent<Image>>>,
) {
    #[cfg(feature = "trace")]
    let _span = bevy::log::info_span!("extract_effect_events").entered();
    trace!("extract_effect_events()");

    let EffectAssetEvents { ref mut images } = *events;
    *images = image_events.read().copied().collect();
}

/// Debugging settings.
///
/// Settings used to debug Hanabi. These have no effect on the actual behavior
/// of Hanabi, but may affect its performance.
///
/// # Example
///
/// ```
/// # use bevy::prelude::*;
/// # use bevy_hanabi::*;
/// fn startup(mut debug_settings: ResMut<DebugSettings>) {
///     // Each time a new effect is spawned, capture 2 frames
///     debug_settings.start_capture_on_new_effect = true;
///     debug_settings.capture_frame_count = 2;
/// }
/// ```
#[derive(Debug, Default, Clone, Copy, Resource)]
pub struct DebugSettings {
    /// Enable automatically starting a GPU debugger capture as soon as this
    /// frame starts rendering (extract phase).
    ///
    /// Enable this feature to automatically capture one or more GPU frames when
    /// the `extract_effects()` system runs next. This instructs any attached
    /// GPU debugger to start a capture; this has no effect if no debugger
    /// is attached.
    ///
    /// If a capture is already on-going this has no effect; the on-going
    /// capture needs to be terminated first. Note however that a capture can
    /// stop and another start in the same frame.
    ///
    /// This value is not reset automatically. If you set this to `true`, you
    /// should set it back to `false` on next frame to avoid capturing forever.
    pub start_capture_this_frame: bool,

    /// Enable automatically starting a GPU debugger capture when one or more
    /// effects are spawned.
    ///
    /// Enable this feature to automatically capture one or more GPU frames when
    /// a new effect is spawned (as detected by ECS change detection). This
    /// instructs any attached GPU debugger to start a capture; this has no
    /// effect if no debugger is attached.
    pub start_capture_on_new_effect: bool,

    /// Number of frames to capture with a GPU debugger.
    ///
    /// By default this value is zero, and a GPU debugger capture runs for a
    /// single frame. If a non-zero frame count is specified here, the capture
    /// will instead stop once the specified number of frames has been recorded.
    ///
    /// You should avoid setting this to a value too large, to prevent the
    /// capture size from getting out of control. A typical value is 1 to 3
    /// frames, or possibly more (up to 10) for exceptional contexts. Some GPU
    /// debuggers or graphics APIs might further limit this value on their own,
    /// so there's no guarantee the graphics API will honor this value.
    pub capture_frame_count: u32,
}

#[derive(Debug, Default, Clone, Copy, Resource)]
pub(crate) struct RenderDebugSettings {
    /// Is a GPU debugger capture on-going?
    is_capturing: bool,
    /// Start time of any on-going GPU debugger capture.
    capture_start: Duration,
    /// Number of frames captured so far for on-going GPU debugger capture.
    captured_frames: u32,
}

/// Manage GPU debug capture start/stop.
///
/// If any GPU debug capture is configured to start or stop in
/// [`DebugSettings`], they do so during this system's run. This ensures
/// that all GPU commands produced by Hanabi are recorded (but may miss some
/// from Bevy itself, if another Bevy system runs before this one).
///
/// We do this during extract to try and capture as close as possible to an
/// entire GPU frame.
pub(crate) fn start_stop_gpu_debug_capture(
    real_time: Extract<Res<Time<Real>>>,
    render_device: Res<RenderDevice>,
    debug_settings: Extract<Res<DebugSettings>>,
    mut render_debug_settings: ResMut<RenderDebugSettings>,
    q_added_effects: Extract<Query<(), Added<CompiledParticleEffect>>>,
) {
    #[cfg(feature = "trace")]
    let _span = bevy::log::info_span!("start_stop_debug_capture").entered();
    trace!("start_stop_debug_capture()");

    // Stop any pending capture if needed
    if render_debug_settings.is_capturing {
        render_debug_settings.captured_frames += 1;

        if render_debug_settings.captured_frames >= debug_settings.capture_frame_count {
            render_device.wgpu_device().stop_capture();
            render_debug_settings.is_capturing = false;
            warn!(
                "Stopped GPU debug capture after {} frames, at t={}s.",
                render_debug_settings.captured_frames,
                real_time.elapsed().as_secs_f64()
            );
        }
    }

    // If no pending capture, consider starting a new one
    if !render_debug_settings.is_capturing
        && (debug_settings.start_capture_this_frame
            || (debug_settings.start_capture_on_new_effect && !q_added_effects.is_empty()))
    {
        render_device.wgpu_device().start_capture();
        render_debug_settings.is_capturing = true;
        render_debug_settings.capture_start = real_time.elapsed();
        render_debug_settings.captured_frames = 0;
        warn!(
            "Started GPU debug capture of {} frames at t={}s.",
            debug_settings.capture_frame_count,
            render_debug_settings.capture_start.as_secs_f64()
        );
    }
}

/// Write the ready state of all render world effects back into their source
/// effect in the main world.
pub(crate) fn report_ready_state(
    mut main_world: ResMut<MainWorld>,
    q_ready_state: Query<&CachedReadyState>,
) {
    let mut q_effects = main_world.query::<(RenderEntity, &mut CompiledParticleEffect)>();
    for (render_entity, mut compiled_particle_effect) in q_effects.iter_mut(&mut main_world) {
        if let Ok(cached_ready_state) = q_ready_state.get(render_entity) {
            compiled_particle_effect.is_ready = cached_ready_state.is_ready();
        }
    }
}

/// System extracting data for rendering of all active [`ParticleEffect`]
/// components.
///
/// [`ParticleEffect`]: crate::ParticleEffect
pub(crate) fn extract_effects(
    mut commands: Commands,
    effects: Extract<Res<Assets<EffectAsset>>>,
    default_mesh: Extract<Res<DefaultMesh>>,
    // Main world effects to extract
    q_effects: Extract<
        Query<(
            Entity,
            RenderEntity,
            Option<&InheritedVisibility>,
            Option<&ViewVisibility>,
            &EffectSpawner,
            &CompiledParticleEffect,
            Option<Ref<EffectProperties>>,
            &GlobalTransform,
        )>,
    >,
    // Render world effects extracted from a previous frame, if any
    mut q_extracted_effects: Query<(
        &mut ExtractedEffect,
        Option<&mut ExtractedSpawner>,
        Option<&ChildEffectOf>, // immutable, because of relationship
        Option<&mut ExtractedEffectMesh>,
        Option<&mut ExtractedProperties>,
    )>,
) {
    #[cfg(feature = "trace")]
    let _span = bevy::log::info_span!("extract_effects").entered();
    trace!("extract_effects()");

    // Loop over all existing effects to extract them
    trace!("Extracting {} effects...", q_effects.iter().len());
    for (
        main_entity,
        render_entity,
        maybe_inherited_visibility,
        maybe_view_visibility,
        effect_spawner,
        compiled_effect,
        maybe_properties,
        transform,
    ) in q_effects.iter()
    {
        // Check if shaders are configured
        let Some(effect_shaders) = compiled_effect.get_configured_shaders() else {
            trace!("Effect {:?}: no configured shader, skipped.", main_entity);
            continue;
        };

        // Check if asset is available, otherwise silently ignore
        let Some(asset) = effects.get(&compiled_effect.asset) else {
            trace!(
                "Effect {:?}: EffectAsset not ready, skipped. asset:{:?}",
                main_entity,
                compiled_effect.asset
            );
            continue;
        };

        let is_visible = maybe_inherited_visibility
            .map(|cv| cv.get())
            .unwrap_or(true)
            && maybe_view_visibility.map(|cv| cv.get()).unwrap_or(true);

        let mut cmd = commands.entity(render_entity);

        // Fetch the existing extraction compoennts, if any, which we need to update.
        // Because we use SyncToRenderWorld, there's always a render entity, but it may
        // miss all components. And because we can't query only optional components
        // (that would match all entities in the entire world), we force querying
        // ExtractedEffect, which means we get a miss if it's the first extraction and
        // it's not spawned yet. That's OK, we'll spawn it below.
        let (
            maybe_extracted_effect,
            maybe_extracted_spawner,
            maybe_child_of,
            maybe_extracted_mesh,
            maybe_extracted_properties,
        ) = q_extracted_effects
            .get_mut(render_entity)
            .map(|(extracted_effect, b, c, d, e)| (Some(extracted_effect), b, c, d, e))
            .unwrap_or((None, None, None, None, None));

        // Extract general effect data
        let texture_layout = asset.module().texture_layout();
        let layout_flags = compiled_effect.layout_flags;
        let alpha_mode = compiled_effect.alpha_mode;
        trace!(
            "Extracted instance of effect '{}' on entity {:?} (render entity {:?}): texture_layout_count={} texture_count={} layout_flags={:?}",
            asset.name,
            main_entity,
            render_entity,
            texture_layout.layout.len(),
            compiled_effect.textures.len(),
            layout_flags,
        );
        let new_extracted_effect = ExtractedEffect {
            handle: compiled_effect.asset.clone_weak(),
            particle_layout: asset.particle_layout().clone(),
            capacity: asset.capacity(),
            layout_flags,
            texture_layout,
            textures: compiled_effect.textures.clone(),
            alpha_mode,
            effect_shaders: effect_shaders.clone(),
            simulation_condition: asset.simulation_condition,
        };
        if let Some(mut extracted_effect) = maybe_extracted_effect {
            extracted_effect.set_if_neq(new_extracted_effect);
        } else {
            trace!(
                "Inserting new ExtractedEffect component on {:?}",
                render_entity
            );
            cmd.insert(new_extracted_effect);
        }

        // Extract the spawner data
        let new_spawner = ExtractedSpawner {
            spawn_count: effect_spawner.spawn_count,
            prng_seed: compiled_effect.prng_seed,
            transform: *transform,
            is_visible,
        };
        trace!(
            "[Effect {}] spawn_count={} prng_seed={}",
            render_entity,
            new_spawner.spawn_count,
            new_spawner.prng_seed
        );
        if let Some(mut extracted_spawner) = maybe_extracted_spawner {
            extracted_spawner.set_if_neq(new_spawner);
        } else {
            trace!(
                "Inserting new ExtractedSpawner component on {}",
                render_entity
            );
            cmd.insert(new_spawner);
        }

        // Extract the effect mesh
        let mesh = compiled_effect
            .mesh
            .clone()
            .unwrap_or(default_mesh.0.clone());
        let new_mesh = ExtractedEffectMesh { mesh: mesh.id() };
        if let Some(mut extracted_mesh) = maybe_extracted_mesh {
            extracted_mesh.set_if_neq(new_mesh);
        } else {
            trace!(
                "Inserting new ExtractedEffectMesh component on {:?}",
                render_entity
            );
            cmd.insert(new_mesh);
        }

        // Extract the parent, if any, and resolve its render entity
        let parent_render_entity = if let Some(main_entity) = compiled_effect.parent {
            let Ok((_, render_entity, _, _, _, _, _, _)) = q_effects.get(main_entity) else {
                error!(
                    "Failed to resolve render entity of parent with main entity {:?}.",
                    main_entity
                );
                cmd.remove::<ChildEffectOf>();
                // TODO - prevent extraction altogether here, instead of just de-parenting?
                continue;
            };
            Some(render_entity)
        } else {
            None
        };
        if let Some(render_entity) = parent_render_entity {
            let new_child_of = ChildEffectOf {
                parent: render_entity,
            };
            // If there's already an ExtractedParent component, ensure we overwrite only if
            // different, to not trigger ECS change detection that we rely on.
            if let Some(child_effect_of) = maybe_child_of {
                // The relationship makes ChildEffectOf immutable, so re-insert to mutate
                if *child_effect_of != new_child_of {
                    cmd.insert(new_child_of);
                }
            } else {
                trace!(
                    "Inserting new ChildEffectOf component on {:?}",
                    render_entity
                );
                cmd.insert(new_child_of);
            }
        } else {
            cmd.remove::<ChildEffectOf>();
        }

        // Extract property data
        let property_layout = asset.property_layout();
        if property_layout.is_empty() {
            cmd.remove::<ExtractedProperties>();
        } else {
            // Re-extract CPU property data if any. Note that this data is not a "new value"
            // but instead a "value that must be uploaded this frame", and therefore is
            // empty when there's no change (as opposed to, having a constant value
            // frame-to-frame).
            let property_data = if let Some(properties) = maybe_properties {
                if properties.is_changed() {
                    trace!("Detected property change, re-serializing...");
                    Some(properties.serialize(&property_layout))
                } else {
                    None
                }
            } else {
                None
            };

            let new_properties = ExtractedProperties {
                property_layout,
                property_data,
            };
            trace!("new_properties = {new_properties:?}");

            if let Some(mut extracted_properties) = maybe_extracted_properties {
                // Always mutate if there's new CPU data to re-upload. Otherwise check for any
                // other change.
                if new_properties.property_data.is_some()
                    || (extracted_properties.property_layout != new_properties.property_layout)
                {
                    trace!(
                        "Updating existing ExtractedProperties (was: {:?})",
                        extracted_properties.as_ref()
                    );
                    *extracted_properties = new_properties;
                }
            } else {
                trace!(
                    "Inserting new ExtractedProperties component on {:?}",
                    render_entity
                );
                cmd.insert(new_properties);
            }
        }
    }
}

pub(crate) fn extract_sim_params(
    real_time: Extract<Res<Time<Real>>>,
    virtual_time: Extract<Res<Time<Virtual>>>,
    time: Extract<Res<Time<EffectSimulation>>>,
    mut sim_params: ResMut<SimParams>,
) {
    #[cfg(feature = "trace")]
    let _span = bevy::log::info_span!("extract_sim_params").entered();
    trace!("extract_sim_params()");

    // Save simulation params into render world
    sim_params.time = time.elapsed_secs_f64();
    sim_params.delta_time = time.delta_secs();
    sim_params.virtual_time = virtual_time.elapsed_secs_f64();
    sim_params.virtual_delta_time = virtual_time.delta_secs();
    sim_params.real_time = real_time.elapsed_secs_f64();
    sim_params.real_delta_time = real_time.delta_secs();
    trace!(
        "SimParams: time={} delta_time={} vtime={} delta_vtime={} rtime={} delta_rtime={}",
        sim_params.time,
        sim_params.delta_time,
        sim_params.virtual_time,
        sim_params.virtual_delta_time,
        sim_params.real_time,
        sim_params.real_delta_time,
    );
}

/// Various GPU limits and aligned sizes computed once and cached.
struct GpuLimits {
    /// Value of [`WgpuLimits::min_storage_buffer_offset_alignment`].
    ///
    /// [`WgpuLimits::min_storage_buffer_offset_alignment`]: bevy::render::settings::WgpuLimits::min_storage_buffer_offset_alignment
    storage_buffer_align: NonZeroU32,

    /// Size of [`GpuEffectMetadata`] aligned to the contraint of
    /// [`WgpuLimits::min_storage_buffer_offset_alignment`].
    ///
    /// [`WgpuLimits::min_storage_buffer_offset_alignment`]: bevy::render::settings::WgpuLimits::min_storage_buffer_offset_alignment
    effect_metadata_aligned_size: NonZeroU32,
}

impl GpuLimits {
    pub fn from_device(render_device: &RenderDevice) -> Self {
        let storage_buffer_align =
            render_device.limits().min_storage_buffer_offset_alignment as u64;

        let effect_metadata_aligned_size = NonZeroU32::new(
            GpuEffectMetadata::min_size()
                .get()
                .next_multiple_of(storage_buffer_align) as u32,
        )
        .unwrap();

        trace!(
            "GPU-aligned sizes (align: {} B):\n- GpuEffectMetadata: {} B -> {} B",
            storage_buffer_align,
            GpuEffectMetadata::min_size().get(),
            effect_metadata_aligned_size.get(),
        );

        Self {
            storage_buffer_align: NonZeroU32::new(storage_buffer_align as u32).unwrap(),
            effect_metadata_aligned_size,
        }
    }

    /// Byte alignment for any storage buffer binding.
    pub fn storage_buffer_align(&self) -> NonZeroU32 {
        self.storage_buffer_align
    }

    /// Byte offset of the [`GpuEffectMetadata`] of a given buffer.
    pub fn effect_metadata_offset(&self, buffer_index: u32) -> u64 {
        self.effect_metadata_aligned_size.get() as u64 * buffer_index as u64
    }

    /// Byte alignment for [`GpuEffectMetadata`].
    pub fn effect_metadata_size(&self) -> NonZeroU64 {
        NonZeroU64::new(self.effect_metadata_aligned_size.get() as u64).unwrap()
    }
}

/// Global render world resource containing the GPU data to draw all the
/// particle effects in all views.
///
/// The resource is populated by [`prepare_effects()`] with all the effects to
/// render for the current frame, for all views in the frame, and consumed by
/// [`queue_effects()`] to actually enqueue the drawning commands to draw those
/// effects.
#[derive(Resource)]
pub struct EffectsMeta {
    /// Bind group for the camera view, containing the camera projection and
    /// other uniform values related to the camera.
    view_bind_group: Option<BindGroup>,
    /// Bind group #0 of the vfx_update shader, for the simulation parameters
    /// like the current time and frame delta time.
    update_sim_params_bind_group: Option<BindGroup>,
    /// Bind group #0 of the vfx_indirect shader, for the simulation parameters
    /// like the current time and frame delta time. This is shared with the
    /// vfx_init pass too.
    indirect_sim_params_bind_group: Option<BindGroup>,
    /// Bind group #1 of the vfx_indirect shader, containing both the indirect
    /// compute dispatch and render buffers.
    indirect_metadata_bind_group: Option<BindGroup>,
    /// Bind group #2 of the vfx_indirect shader, containing the spawners.
    indirect_spawner_bind_group: Option<BindGroup>,
    /// Global shared GPU uniform buffer storing the simulation parameters,
    /// uploaded each frame from CPU to GPU.
    sim_params_uniforms: UniformBuffer<GpuSimParams>,
    /// Global shared GPU buffer storing the various spawner parameter structs
    /// for the active effect instances.
    spawner_buffer: AlignedBufferVec<GpuSpawnerParams>,
    /// Global shared GPU buffer storing the various indirect dispatch structs
    /// for the indirect dispatch of the Update pass.
    dispatch_indirect_buffer: GpuBuffer<GpuDispatchIndirectArgs>,
    /// Global shared GPU buffer storing the various indirect draw structs
    /// for the indirect Render pass. Note that we use
    /// GpuDrawIndexedIndirectArgs as the largest of the two variants (the
    /// other being GpuDrawIndirectArgs). For non-indexed entries, we ignore
    /// the last `u32` value.
    draw_indirect_buffer: BufferTable<GpuDrawIndexedIndirectArgs>,
    /// Global shared GPU buffer storing the various `EffectMetadata`
    /// structs for the active effect instances.
    effect_metadata_buffer: BufferTable<GpuEffectMetadata>,
    /// Various GPU limits and aligned sizes lazily allocated and cached for
    /// convenience.
    gpu_limits: GpuLimits,
    indirect_shader_noevent: Handle<Shader>,
    indirect_shader_events: Handle<Shader>,
    /// Pipeline cache ID of the two indirect dispatch pass pipelines (the
    /// -noevent and -events variants).
    indirect_pipeline_ids: [CachedComputePipelineId; 2],
    /// Pipeline cache ID of the active indirect dispatch pass pipeline, which
    /// is either the -noevent or -events variant depending on whether there's
    /// any child effect with GPU events currently active.
    active_indirect_pipeline_id: CachedComputePipelineId,
}

impl EffectsMeta {
    pub fn new(
        device: RenderDevice,
        indirect_shader_noevent: Handle<Shader>,
        indirect_shader_events: Handle<Shader>,
    ) -> Self {
        let gpu_limits = GpuLimits::from_device(&device);

        // Ensure individual GpuSpawnerParams elements are properly aligned so they can
        // be addressed individually by the computer shaders.
        let item_align = gpu_limits.storage_buffer_align().get() as u64;
        trace!(
            "Aligning storage buffers to {} bytes as device limits requires.",
            item_align
        );

        Self {
            view_bind_group: None,
            update_sim_params_bind_group: None,
            indirect_sim_params_bind_group: None,
            indirect_metadata_bind_group: None,
            indirect_spawner_bind_group: None,
            sim_params_uniforms: UniformBuffer::default(),
            spawner_buffer: AlignedBufferVec::new(
                BufferUsages::STORAGE,
                NonZeroU64::new(item_align),
                Some("hanabi:buffer:spawner".to_string()),
            ),
            dispatch_indirect_buffer: GpuBuffer::new(
                BufferUsages::STORAGE | BufferUsages::INDIRECT,
                Some("hanabi:buffer:dispatch_indirect".to_string()),
            ),
            draw_indirect_buffer: BufferTable::new(
                BufferUsages::STORAGE | BufferUsages::INDIRECT,
                Some(GpuDrawIndexedIndirectArgs::SHADER_SIZE),
                Some("hanabi:buffer:draw_indirect".to_string()),
            ),
            effect_metadata_buffer: BufferTable::new(
                BufferUsages::STORAGE | BufferUsages::INDIRECT,
                Some(NonZeroU64::new(item_align).unwrap()),
                Some("hanabi:buffer:effect_metadata".to_string()),
            ),
            gpu_limits,
            indirect_shader_noevent,
            indirect_shader_events,
            indirect_pipeline_ids: [
                CachedComputePipelineId::INVALID,
                CachedComputePipelineId::INVALID,
            ],
            active_indirect_pipeline_id: CachedComputePipelineId::INVALID,
        }
    }

    pub fn allocate_spawner(
        &mut self,
        global_transform: &GlobalTransform,
        spawn_count: u32,
        prng_seed: u32,
        effect_metadata_buffer_table_id: BufferTableId,
        maybe_cached_draw_indirect_args: Option<&CachedDrawIndirectArgs>,
    ) -> u32 {
        let spawner_base = self.spawner_buffer.len() as u32;
        let transform = global_transform.compute_matrix().into();
        let inverse_transform = Mat4::from(
            // Inverse the Affine3A first, then convert to Mat4. This is a lot more
            // efficient than inversing the Mat4.
            global_transform.affine().inverse(),
        )
        .into();
        let spawner_params = GpuSpawnerParams {
            transform,
            inverse_transform,
            spawn: spawn_count as i32,
            seed: prng_seed,
            effect_metadata_index: effect_metadata_buffer_table_id.0,
            draw_indirect_index: maybe_cached_draw_indirect_args
                .map(|cdia| cdia.get_row().0)
                .unwrap_or_default(),
            ..default()
        };
        trace!("spawner params = {:?}", spawner_params);
        self.spawner_buffer.push(spawner_params);
        spawner_base
    }

    pub fn allocate_draw_indirect(
        &mut self,
        draw_args: &AnyDrawIndirectArgs,
    ) -> CachedDrawIndirectArgs {
        let row = self
            .draw_indirect_buffer
            .insert(draw_args.bitcast_to_row_entry());
        CachedDrawIndirectArgs {
            row,
            args: *draw_args,
        }
    }

    pub fn update_draw_indirect(&mut self, row_index: &CachedDrawIndirectArgs) {
        self.draw_indirect_buffer
            .update(row_index.get_row(), row_index.args.bitcast_to_row_entry());
    }

    pub fn free_draw_indirect(&mut self, row_index: &CachedDrawIndirectArgs) {
        self.draw_indirect_buffer.remove(row_index.get_row());
    }
}

bitflags! {
    /// Effect flags.
    #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
    pub struct LayoutFlags: u32 {
        /// No flags.
        const NONE = 0;
        // DEPRECATED - The effect uses an image texture.
        //const PARTICLE_TEXTURE = (1 << 0);
        /// The effect is simulated in local space.
        const LOCAL_SPACE_SIMULATION = (1 << 2);
        /// The effect uses alpha masking instead of alpha blending. Only used for 3D.
        const USE_ALPHA_MASK = (1 << 3);
        /// The effect is rendered with flipbook texture animation based on the
        /// [`Attribute::SPRITE_INDEX`] of each particle.
        const FLIPBOOK = (1 << 4);
        /// The effect needs UVs.
        const NEEDS_UV = (1 << 5);
        /// The effect has ribbons.
        const RIBBONS = (1 << 6);
        /// The effects needs normals.
        const NEEDS_NORMAL = (1 << 7);
        /// The effect is fully-opaque.
        const OPAQUE = (1 << 8);
        /// The (update) shader emits GPU spawn events to instruct another effect to spawn particles.
        const EMIT_GPU_SPAWN_EVENTS = (1 << 9);
        /// The (init) shader spawns particles by consuming GPU spawn events, instead of
        /// a single CPU spawn count.
        const CONSUME_GPU_SPAWN_EVENTS = (1 << 10);
        /// The (init or update) shader needs access to its parent particle. This allows
        /// a particle init or update pass to read the data of a parent particle, for
        /// example to inherit some of the attributes.
        const READ_PARENT_PARTICLE = (1 << 11);
        /// The effect access to the particle data in the fragment shader.
        const NEEDS_PARTICLE_FRAGMENT = (1 << 12);
    }
}

impl Default for LayoutFlags {
    fn default() -> Self {
        Self::NONE
    }
}

/// Observer raised when the [`CachedEffect`] component is removed, which
/// indicates that the effect instance was despawned.
pub(crate) fn on_remove_cached_effect(
    trigger: Trigger<OnRemove, CachedEffect>,
    query: Query<(
        Entity,
        &MainEntity,
        &CachedEffect,
        &DispatchBufferIndices,
        Option<&CachedEffectProperties>,
        Option<&CachedParentInfo>,
        Option<&CachedEffectEvents>,
    )>,
    mut effect_cache: ResMut<EffectCache>,
    mut effect_bind_groups: ResMut<EffectBindGroups>,
    mut effects_meta: ResMut<EffectsMeta>,
    mut event_cache: ResMut<EventCache>,
) {
    #[cfg(feature = "trace")]
    let _span = bevy::log::info_span!("on_remove_cached_effect").entered();

    // FIXME - review this Observer pattern; this triggers for each event one by
    // one, which could kill performance if many effects are removed.

    // Fecth the components of the effect being destroyed. Note that the despawn
    // command above is not yet applied, so this query should always succeed.
    let Ok((
        render_entity,
        main_entity,
        cached_effect,
        dispatch_buffer_indices,
        _opt_props,
        _opt_parent,
        opt_cached_effect_events,
    )) = query.get(trigger.target())
    else {
        return;
    };

    // Dealllocate the effect slice in the event buffer, if any.
    if let Some(cached_effect_events) = opt_cached_effect_events {
        match event_cache.free(cached_effect_events) {
            Err(err) => {
                error!("Error while freeing effect event slice: {err:?}");
            }
            Ok(buffer_state) => {
                if buffer_state != SlabState::Used {
                    // Clear bind groups associated with the old buffer
                    effect_bind_groups.init_metadata_bind_groups.clear();
                    effect_bind_groups.update_metadata_bind_groups.clear();
                }
            }
        }
    }

    // Deallocate the effect slice in the GPU effect buffer, and if this was the
    // last slice, also deallocate the GPU buffer itself.
    trace!(
        "=> ParticleEffect on render entity {:?} associated with main entity {:?}, removing...",
        render_entity,
        main_entity,
    );
    let Ok(SlabState::Free) = effect_cache.remove(cached_effect) else {
        // Buffer was not affected, so all bind groups are still valid. Nothing else to
        // do.
        return;
    };

    // Clear bind groups associated with the removed buffer
    trace!(
        "=> GPU particle slab #{} gone, destroying its bind groups...",
        cached_effect.slab_id.index()
    );
    effect_bind_groups
        .particle_slabs
        .remove(&cached_effect.slab_id);
    effects_meta
        .dispatch_indirect_buffer
        .free(dispatch_buffer_indices.update_dispatch_indirect_buffer_row_index);
}

/// Observer raised when the [`CachedEffectMetadata`] component is removed, to
/// deallocate the GPU resources associated with the indirect draw args.
pub(crate) fn on_remove_cached_metadata(
    trigger: Trigger<OnRemove, CachedEffectMetadata>,
    query: Query<&CachedEffectMetadata>,
    mut effects_meta: ResMut<EffectsMeta>,
) {
    #[cfg(feature = "trace")]
    let _span = bevy::log::info_span!("on_remove_cached_metadata").entered();

    if let Ok(cached_metadata) = query.get(trigger.target()) {
        if cached_metadata.table_id.is_valid() {
            effects_meta
                .effect_metadata_buffer
                .remove(cached_metadata.table_id);
        }
    };
}

/// Observer raised when the [`CachedDrawIndirectArgs`] component is removed, to
/// deallocate the GPU resources associated with the indirect draw args.
pub(crate) fn on_remove_cached_draw_indirect_args(
    trigger: Trigger<OnRemove, CachedDrawIndirectArgs>,
    query: Query<&CachedDrawIndirectArgs>,
    mut effects_meta: ResMut<EffectsMeta>,
) {
    #[cfg(feature = "trace")]
    let _span = bevy::log::info_span!("on_remove_cached_draw_indirect_args").entered();

    if let Ok(cached_draw_args) = query.get(trigger.target()) {
        effects_meta.free_draw_indirect(cached_draw_args);
    };
}

/// Clear pending GPU resources left from previous frame.
///
/// Those generally are source buffers for buffer-to-buffer copies on capacity
/// growth, which need the source buffer to be alive until the copy is done,
/// then can be discarded here.
pub(crate) fn clear_previous_frame_resizes(
    mut effects_meta: ResMut<EffectsMeta>,
    mut sort_bind_groups: ResMut<SortBindGroups>,
    mut init_fill_dispatch_queue: ResMut<InitFillDispatchQueue>,
) {
    #[cfg(feature = "trace")]
    let _span = bevy::log::info_span!("clear_previous_frame_resizes").entered();
    trace!("clear_previous_frame_resizes");

    init_fill_dispatch_queue.clear();

    // Clear last frame's buffer resizes which may have occured during last frame,
    // during `Node::run()` while the `BufferTable` could not be mutated. This is
    // the first point at which we can do that where we're not blocking the main
    // world (so, excluding the extract system).
    effects_meta
        .dispatch_indirect_buffer
        .clear_previous_frame_resizes();
    effects_meta
        .draw_indirect_buffer
        .clear_previous_frame_resizes();
    effects_meta
        .effect_metadata_buffer
        .clear_previous_frame_resizes();
    sort_bind_groups.clear_previous_frame_resizes();
}

// Fixup the [`CachedChildInfo::global_child_index`] once all child infos have
// been allocated.
pub fn fixup_parents(
    q_changed_parents: Query<(Entity, Ref<CachedParentInfo>)>,
    mut q_children: Query<&mut CachedChildInfo>,
) {
    #[cfg(feature = "trace")]
    let _span = bevy::log::info_span!("fixup_parents").entered();
    trace!("fixup_parents");

    // Once all parents are (re-)allocated, fix up the global index of all
    // children if the parent base index changed.
    trace!(
        "Updating the global index of children of parent effects whose child list just changed..."
    );
    for (parent_entity, cached_parent_info) in q_changed_parents.iter() {
        let base_index =
            cached_parent_info.byte_range.start / GpuChildInfo::SHADER_SIZE.get() as u32;
        let parent_changed = cached_parent_info.is_changed();
        trace!(
            "Updating {} children of parent effect {:?} with base child index {} (parent_changed:{})...",
            cached_parent_info.children.len(),
            parent_entity,
            base_index,
            parent_changed
        );
        for (child_entity, _) in &cached_parent_info.children {
            let Ok(mut cached_child_info) = q_children.get_mut(*child_entity) else {
                error!(
                    "Cannot find child {:?} declared by parent {:?}",
                    *child_entity, parent_entity
                );
                continue;
            };
            if !cached_child_info.is_changed() && !parent_changed {
                continue;
            }
            cached_child_info.global_child_index = base_index + cached_child_info.local_child_index;
            trace!(
                "+ Updated global index for child ID {:?} of parent {:?}: local={}, global={}",
                child_entity,
                parent_entity,
                cached_child_info.local_child_index,
                cached_child_info.global_child_index
            );
        }
    }
}

/// Allocate the GPU resources for all extracted effects.
///
/// This adds the [`CachedEffect`] component as needed, and update it with the
/// allocation in the [`EffectCache`].
pub fn allocate_effects(
    mut commands: Commands,
    mut q_extracted_effects: Query<
        (
            Entity,
            &ExtractedEffect,
            Has<ChildEffectOf>,
            Option<&mut CachedEffect>,
            Has<DispatchBufferIndices>,
        ),
        Changed<ExtractedEffect>,
    >,
    mut effect_cache: ResMut<EffectCache>,
    mut effects_meta: ResMut<EffectsMeta>,
) {
    #[cfg(feature = "trace")]
    let _span = bevy::log::info_span!("allocate_effects").entered();
    trace!("allocate_effects");

    for (entity, extracted_effect, has_parent, maybe_cached_effect, has_dispatch_buffer_indices) in
        &mut q_extracted_effects
    {
        // Insert or update the effect into the EffectCache
        if let Some(mut cached_effect) = maybe_cached_effect {
            trace!("Updating EffectCache entry for entity {entity:?}...");
            let _ = effect_cache.remove(cached_effect.as_ref());
            *cached_effect = effect_cache.insert(
                extracted_effect.handle.clone(),
                extracted_effect.capacity,
                &extracted_effect.particle_layout,
            );
        } else {
            trace!("Allocating new entry in EffectCache for entity {entity:?}...");
            let cached_effect = effect_cache.insert(
                extracted_effect.handle.clone(),
                extracted_effect.capacity,
                &extracted_effect.particle_layout,
            );
            commands.entity(entity).insert(cached_effect);
        }

        // Ensure the particle@1 bind group layout exists for the given configuration of
        // particle layout. We do this here only for effects without a parent; for those
        // with a parent, we'll do it after we resolved that parent.
        if !has_parent {
            let parent_min_binding_size = None;
            effect_cache.ensure_particle_bind_group_layout(
                extracted_effect.particle_layout.min_binding_size32(),
                parent_min_binding_size,
            );
        }

        // Ensure the metadata@3 bind group layout exists for the init pass.
        {
            let consume_gpu_spawn_events = extracted_effect
                .layout_flags
                .contains(LayoutFlags::CONSUME_GPU_SPAWN_EVENTS);
            effect_cache.ensure_metadata_init_bind_group_layout(consume_gpu_spawn_events);
        }

        // Allocate DispatchBufferIndices if not present yet
        if !has_dispatch_buffer_indices {
            let update_dispatch_indirect_buffer_row_index =
                effects_meta.dispatch_indirect_buffer.allocate();
            commands.entity(entity).insert(DispatchBufferIndices {
                update_dispatch_indirect_buffer_row_index,
            });
        }
    }
}

/// Update any cached mesh info based on any relocation done by Bevy itself.
///
/// Bevy will merge small meshes into larger GPU buffers automatically. When
/// this happens, the mesh location changes, and we need to update our
/// references to it in order to know how to issue the draw commands.
///
/// This system updates both the [`CachedMeshLocation`] and the
/// [`CachedIndirectDrawArgs`] components.
pub fn update_mesh_locations(
    mut commands: Commands,
    mut effects_meta: ResMut<EffectsMeta>,
    mesh_allocator: Res<MeshAllocator>,
    render_meshes: Res<RenderAssets<RenderMesh>>,
    mut q_cached_effects: Query<(
        Entity,
        &ExtractedEffectMesh,
        Option<&mut CachedMeshLocation>,
        Option<&mut CachedDrawIndirectArgs>,
    )>,
) {
    #[cfg(feature = "trace")]
    let _span = bevy::log::info_span!("update_mesh_locations").entered();
    trace!("update_mesh_locations");

    for (entity, extracted_mesh, maybe_cached_mesh_location, maybe_cached_draw_indirect_args) in
        &mut q_cached_effects
    {
        let mut cmds = commands.entity(entity);

        // Resolve the render mesh
        let Some(render_mesh) = render_meshes.get(extracted_mesh.mesh) else {
            warn!(
                "Cannot find render mesh of particle effect instance on entity {:?}, despite applying default mesh. Invalid asset handle: {:?}",
                entity, extracted_mesh.mesh
            );
            cmds.remove::<CachedMeshLocation>();
            continue;
        };

        // Find the location where the render mesh was allocated. This is handled by
        // Bevy itself in the allocate_and_free_meshes() system. Bevy might
        // re-batch the vertex and optional index data of meshes together at any point,
        // so we need to confirm that the location data we may have cached is still
        // valid.
        let Some(mesh_vertex_buffer_slice) = mesh_allocator.mesh_vertex_slice(&extracted_mesh.mesh)
        else {
            trace!(
                "Effect main_entity {:?}: cannot find vertex slice of render mesh {:?}",
                entity,
                extracted_mesh.mesh
            );
            cmds.remove::<CachedMeshLocation>();
            continue;
        };
        let mesh_index_buffer_slice = mesh_allocator.mesh_index_slice(&extracted_mesh.mesh);
        let indexed =
            if let RenderMeshBufferInfo::Indexed { index_format, .. } = render_mesh.buffer_info {
                if let Some(ref slice) = mesh_index_buffer_slice {
                    Some(MeshIndexSlice {
                        format: index_format,
                        buffer: slice.buffer.clone(),
                        range: slice.range.clone(),
                    })
                } else {
                    trace!(
                        "Effect main_entity {:?}: cannot find index slice of render mesh {:?}",
                        entity,
                        extracted_mesh.mesh
                    );
                    cmds.remove::<CachedMeshLocation>();
                    continue;
                }
            } else {
                None
            };

        // Calculate the new draw args and mesh location based on Bevy's info
        let new_draw_args = AnyDrawIndirectArgs::from_slices(
            &mesh_vertex_buffer_slice,
            mesh_index_buffer_slice.as_ref(),
        );
        let new_mesh_location = match &mesh_index_buffer_slice {
            // Indexed mesh rendering
            Some(mesh_index_buffer_slice) => CachedMeshLocation {
                vertex_buffer: mesh_vertex_buffer_slice.buffer.id(),
                vertex_or_index_count: mesh_index_buffer_slice.range.len() as u32,
                first_index_or_vertex_offset: mesh_index_buffer_slice.range.start,
                vertex_offset_or_base_instance: mesh_vertex_buffer_slice.range.start as i32,
                indexed,
            },
            // Non-indexed mesh rendering
            None => CachedMeshLocation {
                vertex_buffer: mesh_vertex_buffer_slice.buffer.id(),
                vertex_or_index_count: mesh_vertex_buffer_slice.range.len() as u32,
                first_index_or_vertex_offset: mesh_vertex_buffer_slice.range.start,
                vertex_offset_or_base_instance: 0,
                indexed: None,
            },
        };

        // We don't allocate the draw indirect args ahead of time because we need to
        // select the indexed vs. non-indexed buffer. Now that we know whether the mesh
        // is indexed, we can allocate it (or reallocate it if indexing mode changed).
        if let Some(mut cached_draw_indirect) = maybe_cached_draw_indirect_args {
            assert!(cached_draw_indirect.row.is_valid());

            // If the GPU draw args changed, re-upload to GPU.
            if new_draw_args != cached_draw_indirect.args {
                debug!(
                    "Indirect draw args changed for asset {:?}\nold:{:?}\nnew:{:?}",
                    entity, cached_draw_indirect.args, new_draw_args
                );
                cached_draw_indirect.args = new_draw_args;
                effects_meta.update_draw_indirect(cached_draw_indirect.as_ref());
            }
        } else {
            cmds.insert(effects_meta.allocate_draw_indirect(&new_draw_args));
        }

        // Compare to any cached data and update if necessary, or insert if missing.
        // This will trigger change detection in the ECS, which will in turn trigger
        // GpuEffectMetadata re-upload.
        if let Some(mut old_mesh_location) = maybe_cached_mesh_location {
            if *old_mesh_location != new_mesh_location {
                debug!(
                    "Mesh location changed for asset {:?}\nold:{:?}\nnew:{:?}",
                    entity, old_mesh_location, new_mesh_location
                );
                *old_mesh_location = new_mesh_location;
            }
        } else {
            cmds.insert(new_mesh_location);
        }
    }
}

/// Allocate an entry in the GPU table for any [`CachedEffectMetadata`] missing
/// one.
///
/// This system does NOT take care of (re-)uploading recent CPU data to GPU.
/// This is done much later in the frame, after batching and once all data for
/// it is ready. But it's necessary to ensure the allocation is determined
/// already ahead of time, in order to do batching of contiguous metadata
/// blocks (TODO; not currently used, also may end up using binary search in
/// shader, in which case we won't need continguous-ness and can maybe remove
/// this system).
// TODO - consider using observer OnAdd instead?
pub fn allocate_metadata(
    mut effects_meta: ResMut<EffectsMeta>,
    mut q_metadata: Query<&mut CachedEffectMetadata>,
) {
    for mut metadata in &mut q_metadata {
        if !metadata.table_id.is_valid() {
            metadata.table_id = effects_meta
                .effect_metadata_buffer
                .insert(metadata.metadata);
        } else {
            // Unless this is the first time we allocate the GPU entry (above),
            // we should never reach the beginning of this frame
            // with a changed metadata which has not
            // been re-uploaded last frame.
            // NO! We can only detect the change *since last run of THIS system*
            // so wont' see that a latter system the data.
            // assert!(!metadata.is_changed());
        }
    }
}

/// Update the [`CachedParentInfo`] of parent effects and the
/// [`CachedChildInfo`] of child effects.
pub fn allocate_parent_child_infos(
    mut commands: Commands,
    mut effect_cache: ResMut<EffectCache>,
    mut event_cache: ResMut<EventCache>,
    // All extracted child effects. May or may not already have a CachedChildInfo. If not, this
    // will be spawned below.
    mut q_child_effects: Query<(
        Entity,
        &ExtractedEffect,
        &ChildEffectOf,
        &CachedEffectEvents,
        Option<&mut CachedChildInfo>,
    )>,
    // All parent effects from a previous frame (already have CachedParentInfo), which can be
    // updated in-place without spawning a new CachedParentInfo.
    mut q_parent_effects: Query<(
        Entity,
        &ExtractedEffect,
        &CachedEffect,
        &ChildrenEffects,
        Option<&mut CachedParentInfo>,
    )>,
) {
    #[cfg(feature = "trace")]
    let _span = bevy::log::info_span!("allocate_child_infos").entered();
    trace!("allocate_child_infos");

    // Loop on all child effects and ensure their CachedChildInfo is up-to-date.
    for (child_entity, _, child_effect_of, cached_effect_events, maybe_cached_child_info) in
        &mut q_child_effects
    {
        // Fetch the parent effect
        let parent_entity = child_effect_of.parent;
        let Ok((_, _, parent_cached_effect, children_effects, _)) =
            q_parent_effects.get(parent_entity)
        else {
            warn!("Unknown parent #{parent_entity:?} on child entity {child_entity:?}, removing CachedChildInfo.");
            if maybe_cached_child_info.is_some() {
                commands.entity(child_entity).remove::<CachedChildInfo>();
            }
            continue;
        };

        // Find the index of this child entity in its parent's storage
        let Some(local_child_index) = children_effects.0.iter().position(|e| *e == child_entity)
        else {
            warn!("Cannot find child entity {child_entity:?} in the children collection of parent entity {parent_entity:?}. Relationship desync?");
            if maybe_cached_child_info.is_some() {
                commands.entity(child_entity).remove::<CachedChildInfo>();
            }
            continue;
        };
        let local_child_index = local_child_index as u32;

        // Fetch the effect buffer of the parent effect
        let Some(parent_buffer_binding_source) = effect_cache
            .get_slab(&parent_cached_effect.slab_id)
            .map(|effect_buffer| effect_buffer.max_binding_source())
        else {
            warn!(
                "Unknown parent slab #{} on parent entity {:?}, removing CachedChildInfo.",
                parent_cached_effect.slab_id.index(),
                parent_entity
            );
            if maybe_cached_child_info.is_some() {
                commands.entity(child_entity).remove::<CachedChildInfo>();
            }
            continue;
        };

        let new_cached_child_info = CachedChildInfo {
            parent_slab_id: parent_cached_effect.slab_id,
            parent_particle_layout: parent_cached_effect.slice.particle_layout.clone(),
            parent_buffer_binding_source,
            local_child_index,
            global_child_index: u32::MAX, // fixed up later by fixup_parents()
            init_indirect_dispatch_index: cached_effect_events.init_indirect_dispatch_index,
        };
        if let Some(mut cached_child_info) = maybe_cached_child_info {
            if !cached_child_info.is_locally_equal(&new_cached_child_info) {
                *cached_child_info = new_cached_child_info;
            }
        } else {
            commands.entity(child_entity).insert(new_cached_child_info);
        }
    }

    // Loop on all parent effects and ensure their CachedParentInfo is up-to-date.
    for (parent_entity, parent_extracted_effect, _, children_effects, maybe_cached_parent_info) in
        &mut q_parent_effects
    {
        let parent_min_binding_size = parent_extracted_effect.particle_layout.min_binding_size32();

        // Loop over children and gather GpuChildInfo
        let mut new_children = Vec::with_capacity(children_effects.0.len());
        let mut new_child_infos = Vec::with_capacity(children_effects.0.len());
        for child_entity in children_effects.0.iter() {
            // Fetch the child's event buffer allocation info
            let Ok((_, child_extracted_effect, _, cached_effect_events, _)) =
                q_child_effects.get(*child_entity)
            else {
                warn!("Child entity {child_entity:?} from parent entity {parent_entity:?} didnt't resolve to a child instance. The parent effect cannot be processed.");
                if maybe_cached_parent_info.is_some() {
                    commands.entity(parent_entity).remove::<CachedParentInfo>();
                }
                break;
            };

            // Fetch the GPU event buffer of the child
            let Some(event_buffer) = event_cache.get_buffer(cached_effect_events.buffer_index)
            else {
                warn!("Child entity {child_entity:?} from parent entity {parent_entity:?} doesn't have an allocated GPU event buffer. The parent effect cannot be processed.");
                break;
            };

            let buffer_binding_source = BufferBindingSource {
                buffer: event_buffer.clone(),
                offset: cached_effect_events.range.start,
                size: NonZeroU32::new(cached_effect_events.range.len() as u32).unwrap(),
            };
            new_children.push((*child_entity, buffer_binding_source));

            new_child_infos.push(GpuChildInfo {
                event_count: 0,
                init_indirect_dispatch_index: cached_effect_events.init_indirect_dispatch_index,
            });

            // Ensure the particle@1 bind group layout exists for the given configuration of
            // particle layout. We do this here only for effects with a parent; for those
            // without a parent, we already did this in allocate_effects().
            effect_cache.ensure_particle_bind_group_layout(
                child_extracted_effect.particle_layout.min_binding_size32(),
                Some(parent_min_binding_size),
            );
        }

        // If we don't have all children, just abort this effect. We don't try to have
        // partial relationships, this is too complex for shader bindings.
        debug_assert_eq!(new_children.len(), new_child_infos.len());
        if (new_children.len() < children_effects.len()) && maybe_cached_parent_info.is_some() {
            warn!("One or more child effect(s) on parent effect {parent_entity:?} failed to configure. The parent effect cannot be processed.");
            commands.entity(parent_entity).remove::<CachedParentInfo>();
            continue;
        }

        // Insert or update the CachedParentInfo component of the parent effect
        if let Some(mut cached_parent_info) = maybe_cached_parent_info {
            if cached_parent_info.children != new_children {
                // FIXME - missing way to just update in-place without changing the allocation
                // size!
                // if cached_parent_info.children.len() == new_children.len() {
                //} else {
                event_cache.reallocate_child_infos(
                    parent_entity,
                    new_children,
                    &new_child_infos[..],
                    cached_parent_info.as_mut(),
                );
                //}
            }
        } else {
            let cached_parent_info =
                event_cache.allocate_child_infos(parent_entity, new_children, &new_child_infos[..]);
            commands.entity(parent_entity).insert(cached_parent_info);
        }
    }
}

/// Prepare the init and update compute pipelines for an effect.
///
/// This caches the pipeline IDs once resolved, and their compiling state when
/// it changes, to determine when an effect is ready to be used.
///
/// Note that we do that proactively even if the effect will be skipped this
/// frame (for example because it's not visible). This ensures we queue pipeline
/// compilations ASAP, as they can take a long time (10+ frames). We also use
/// the pipeline compiling state, which we query here, to inform whether the
/// effect is ready for this frame. So in general if this is a new pipeline, it
/// won't be ready this frame.
pub fn prepare_init_update_pipelines(
    mut q_effects: Query<(
        Entity,
        &ExtractedEffect,
        &CachedEffect,
        Option<&CachedChildInfo>,
        Option<&CachedParentInfo>,
        Option<&CachedEffectProperties>,
        &mut CachedPipelines,
    )>,
    // FIXME - need mut for bind group layout creation; shouldn't be create there though
    mut effect_cache: ResMut<EffectCache>,
    pipeline_cache: Res<PipelineCache>,
    property_cache: ResMut<PropertyCache>,
    mut init_pipeline: ResMut<ParticlesInitPipeline>,
    mut update_pipeline: ResMut<ParticlesUpdatePipeline>,
    mut specialized_init_pipelines: ResMut<SpecializedComputePipelines<ParticlesInitPipeline>>,
    mut specialized_update_pipelines: ResMut<SpecializedComputePipelines<ParticlesUpdatePipeline>>,
) {
    #[cfg(feature = "trace")]
    let _span = bevy::log::info_span!("prepare_init_update_pipelines").entered();
    trace!("prepare_init_update_pipelines");

    // Note: As of Bevy 0.16 we can't evict old pipelines from the cache. They're
    // inserted forever. https://github.com/bevyengine/bevy/issues/19925

    for (
        entity,
        extracted_effect,
        cached_effect,
        maybe_cached_child_info,
        maybe_cached_parent_info,
        maybe_cached_properties,
        mut cached_pipelines,
    ) in &mut q_effects
    {
        trace!(
            "Preparing pipelines for effect {:?}... (flags: {:?})",
            entity,
            cached_pipelines.flags
        );

        let particle_layout = &cached_effect.slice.particle_layout;
        let particle_layout_min_binding_size = particle_layout.min_binding_size32();
        let has_event_buffer = maybe_cached_child_info.is_some();
        let parent_particle_layout_min_binding_size = maybe_cached_child_info
            .as_ref()
            .map(|cci| cci.parent_particle_layout.min_binding_size32());

        let Some(particle_bind_group_layout) = effect_cache.particle_bind_group_layout(
            particle_layout_min_binding_size,
            parent_particle_layout_min_binding_size,
        ) else {
            error!("Failed to find particle sim bind group @1 for min_binding_size={} parent_min_binding_size={:?}",
                particle_layout_min_binding_size, parent_particle_layout_min_binding_size);
            continue;
        };
        let particle_bind_group_layout = particle_bind_group_layout.clone();

        // This should always exist by the time we reach this point, because we should
        // have inserted any property in the cache, which would have allocated the
        // proper bind group layout (or the default no-property one).
        let property_layout_min_binding_size =
            maybe_cached_properties.map(|cp| cp.property_layout.min_binding_size());
        let spawner_bind_group_layout = property_cache
            .bind_group_layout(property_layout_min_binding_size)
            .unwrap_or_else(|| {
                panic!(
                    "Failed to find spawner@2 bind group layout for property binding size {:?}",
                    property_layout_min_binding_size,
                )
            });
        trace!(
            "Retrieved spawner@2 bind group layout {:?} for property binding size {:?}.",
            spawner_bind_group_layout.id(),
            property_layout_min_binding_size
        );

        // Resolve the init pipeline
        let init_pipeline_id = if let Some(init_pipeline_id) = cached_pipelines.init.as_ref() {
            *init_pipeline_id
        } else {
            // Clear flag just in case, to ensure consistency.
            cached_pipelines
                .flags
                .remove(CachedPipelineFlags::INIT_PIPELINE_READY);

            // Fetch the metadata@3 bind group layout from the cache
            let metadata_bind_group_layout = effect_cache
                .metadata_init_bind_group_layout(has_event_buffer)
                .unwrap()
                .clone();

            let init_pipeline_key_flags = {
                let mut flags = ParticleInitPipelineKeyFlags::empty();
                flags.set(
                    ParticleInitPipelineKeyFlags::ATTRIBUTE_PREV,
                    particle_layout.contains(Attribute::PREV),
                );
                flags.set(
                    ParticleInitPipelineKeyFlags::ATTRIBUTE_NEXT,
                    particle_layout.contains(Attribute::NEXT),
                );
                flags.set(
                    ParticleInitPipelineKeyFlags::CONSUME_GPU_SPAWN_EVENTS,
                    has_event_buffer,
                );
                flags
            };

            // https://github.com/bevyengine/bevy/issues/17132
            let particle_bind_group_layout_id = particle_bind_group_layout.id();
            let spawner_bind_group_layout_id = spawner_bind_group_layout.id();
            let metadata_bind_group_layout_id = metadata_bind_group_layout.id();
            init_pipeline.temp_particle_bind_group_layout =
                Some(particle_bind_group_layout.clone());
            init_pipeline.temp_spawner_bind_group_layout = Some(spawner_bind_group_layout.clone());
            init_pipeline.temp_metadata_bind_group_layout = Some(metadata_bind_group_layout);
            let init_pipeline_id: CachedComputePipelineId = specialized_init_pipelines.specialize(
                pipeline_cache.as_ref(),
                &init_pipeline,
                ParticleInitPipelineKey {
                    shader: extracted_effect.effect_shaders.init.clone(),
                    particle_layout_min_binding_size,
                    parent_particle_layout_min_binding_size,
                    flags: init_pipeline_key_flags,
                    particle_bind_group_layout_id,
                    spawner_bind_group_layout_id,
                    metadata_bind_group_layout_id,
                },
            );
            // keep things tidy; this is just a hack, should not persist
            init_pipeline.temp_particle_bind_group_layout = None;
            init_pipeline.temp_spawner_bind_group_layout = None;
            init_pipeline.temp_metadata_bind_group_layout = None;
            trace!("Init pipeline specialized: id={:?}", init_pipeline_id);

            cached_pipelines.init = Some(init_pipeline_id);
            init_pipeline_id
        };

        // Resolve the update pipeline
        let update_pipeline_id = if let Some(update_pipeline_id) = cached_pipelines.update.as_ref()
        {
            *update_pipeline_id
        } else {
            // Clear flag just in case, to ensure consistency.
            cached_pipelines
                .flags
                .remove(CachedPipelineFlags::UPDATE_PIPELINE_READY);

            let num_event_buffers = maybe_cached_parent_info
                .as_ref()
                .map(|p| p.children.len() as u32)
                .unwrap_or_default();

            // FIXME: currently don't hava a way to determine when this is needed, because
            // we know the number of children per parent only after resolving
            // all parents, but by that point we forgot if this is a newly added
            // effect or not. So since we need to re-ensure for all effects, not
            // only new ones, might as well do here...
            effect_cache.ensure_metadata_update_bind_group_layout(num_event_buffers);

            // Fetch the bind group layouts from the cache
            let metadata_bind_group_layout = effect_cache
                .metadata_update_bind_group_layout(num_event_buffers)
                .unwrap()
                .clone();

            // https://github.com/bevyengine/bevy/issues/17132
            let particle_bind_group_layout_id = particle_bind_group_layout.id();
            let spawner_bind_group_layout_id = spawner_bind_group_layout.id();
            let metadata_bind_group_layout_id = metadata_bind_group_layout.id();
            update_pipeline.temp_particle_bind_group_layout = Some(particle_bind_group_layout);
            update_pipeline.temp_spawner_bind_group_layout =
                Some(spawner_bind_group_layout.clone());
            update_pipeline.temp_metadata_bind_group_layout = Some(metadata_bind_group_layout);
            let update_pipeline_id = specialized_update_pipelines.specialize(
                pipeline_cache.as_ref(),
                &update_pipeline,
                ParticleUpdatePipelineKey {
                    shader: extracted_effect.effect_shaders.update.clone(),
                    particle_layout: particle_layout.clone(),
                    parent_particle_layout_min_binding_size,
                    num_event_buffers,
                    particle_bind_group_layout_id,
                    spawner_bind_group_layout_id,
                    metadata_bind_group_layout_id,
                },
            );
            // keep things tidy; this is just a hack, should not persist
            update_pipeline.temp_particle_bind_group_layout = None;
            update_pipeline.temp_spawner_bind_group_layout = None;
            update_pipeline.temp_metadata_bind_group_layout = None;
            trace!("Update pipeline specialized: id={:?}", update_pipeline_id);

            cached_pipelines.update = Some(update_pipeline_id);
            update_pipeline_id
        };

        // Never batch an effect with a pipeline not available; this will prevent its
        // init/update pass from running, but the vfx_indirect pass will run
        // nonetheless, which causes desyncs and leads to bugs.
        if pipeline_cache
            .get_compute_pipeline(init_pipeline_id)
            .is_none()
        {
            trace!(
                "Skipping effect from render entity {:?} due to missing or not ready init pipeline (status: {:?})",
                entity,
                pipeline_cache.get_compute_pipeline_state(init_pipeline_id)
            );
            cached_pipelines
                .flags
                .remove(CachedPipelineFlags::INIT_PIPELINE_READY);
            continue;
        }

        // PipelineCache::get_compute_pipeline() only returns a value if the pipeline is
        // ready
        cached_pipelines
            .flags
            .insert(CachedPipelineFlags::INIT_PIPELINE_READY);
        trace!("[Effect {:?}] Init pipeline ready.", entity);

        // Never batch an effect with a pipeline not available; this will prevent its
        // init/update pass from running, but the vfx_indirect pass will run
        // nonetheless, which causes desyncs and leads to bugs.
        if pipeline_cache
            .get_compute_pipeline(update_pipeline_id)
            .is_none()
        {
            trace!(
                "Skipping effect from render entity {:?} due to missing or not ready update pipeline (status: {:?})",
                entity,
                pipeline_cache.get_compute_pipeline_state(update_pipeline_id)
            );
            cached_pipelines
                .flags
                .remove(CachedPipelineFlags::UPDATE_PIPELINE_READY);
            continue;
        }

        // PipelineCache::get_compute_pipeline() only returns a value if the pipeline is
        // ready
        cached_pipelines
            .flags
            .insert(CachedPipelineFlags::UPDATE_PIPELINE_READY);
        trace!("[Effect {:?}] Update pipeline ready.", entity);
    }
}

pub fn prepare_indirect_pipeline(
    event_cache: Res<EventCache>,
    mut effects_meta: ResMut<EffectsMeta>,
    pipeline_cache: Res<PipelineCache>,
    indirect_pipeline: Res<DispatchIndirectPipeline>,
    mut specialized_indirect_pipelines: ResMut<
        SpecializedComputePipelines<DispatchIndirectPipeline>,
    >,
) {
    // Ensure the 2 variants of the indirect pipelines are created.
    // TODO - move that elsewhere in some one-time setup?
    if effects_meta.indirect_pipeline_ids[0] == CachedComputePipelineId::INVALID {
        effects_meta.indirect_pipeline_ids[0] = specialized_indirect_pipelines.specialize(
            pipeline_cache.as_ref(),
            &indirect_pipeline,
            DispatchIndirectPipelineKey { has_events: false },
        );
    }
    if effects_meta.indirect_pipeline_ids[1] == CachedComputePipelineId::INVALID {
        effects_meta.indirect_pipeline_ids[1] = specialized_indirect_pipelines.specialize(
            pipeline_cache.as_ref(),
            &indirect_pipeline,
            DispatchIndirectPipelineKey { has_events: true },
        );
    }

    // Select the active one depending on whether there's any child info to consume
    let is_empty = event_cache.child_infos().is_empty();
    if effects_meta.active_indirect_pipeline_id == CachedComputePipelineId::INVALID {
        if is_empty {
            effects_meta.active_indirect_pipeline_id = effects_meta.indirect_pipeline_ids[0];
        } else {
            effects_meta.active_indirect_pipeline_id = effects_meta.indirect_pipeline_ids[1];
        }
    } else {
        // If this is the first time we insert an event buffer, we need to switch the
        // indirect pass from non-event to event mode. That is, we need to re-allocate
        // the pipeline with the child infos buffer binding. Conversely, if there's no
        // more effect using GPU spawn events, we can deallocate.
        let was_empty =
            effects_meta.active_indirect_pipeline_id == effects_meta.indirect_pipeline_ids[0];
        if was_empty && !is_empty {
            trace!("First event buffer inserted; switching indirect pass to event mode...");
            effects_meta.active_indirect_pipeline_id = effects_meta.indirect_pipeline_ids[1];
        } else if is_empty && !was_empty {
            trace!("Last event buffer removed; switching indirect pass to no-event mode...");
            effects_meta.active_indirect_pipeline_id = effects_meta.indirect_pipeline_ids[0];
        }
    }
}

// TEMP - Mark all cached effects as invalid for this frame until another system
// explicitly marks them as valid. Otherwise we early out in some parts, and
// reuse by mistake the previous frame's extraction.
pub fn clear_transient_batch_inputs(
    mut commands: Commands,
    mut q_cached_effects: Query<Entity, With<BatchInput>>,
) {
    for entity in &mut q_cached_effects {
        if let Ok(mut cmd) = commands.get_entity(entity) {
            cmd.remove::<BatchInput>();
        }
    }
}

/// Effect mesh extracted from the main world.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Component)]
pub(crate) struct ExtractedEffectMesh {
    /// Asset of the effect mesh to draw.
    pub mesh: AssetId<Mesh>,
}

/// Indexed mesh metadata for [`CachedMesh`].
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub(crate) struct MeshIndexSlice {
    /// Index format.
    pub format: IndexFormat,
    /// GPU buffer containing the indices.
    pub buffer: Buffer,
    /// Range inside [`Self::buffer`] where the indices are.
    pub range: Range<u32>,
}

impl PartialEq for MeshIndexSlice {
    fn eq(&self, other: &Self) -> bool {
        self.format == other.format
            && self.buffer.id() == other.buffer.id()
            && self.range == other.range
    }
}

impl Eq for MeshIndexSlice {}

/// Cached info about a mesh location in a Bevy buffer. This information is
/// uploaded to GPU into [`GpuEffectMetadata`] for indirect rendering, but is
/// also kept CPU side in this component to detect when Bevy relocated a mesh,
/// so we can invalidate that GPU data.
#[derive(Debug, Clone, PartialEq, Eq, Component)]
pub(crate) struct CachedMeshLocation {
    /// Vertex buffer.
    pub vertex_buffer: BufferId,
    /// See [`GpuEffectMetadata::vertex_or_index_count`].
    pub vertex_or_index_count: u32,
    /// See [`GpuEffectMetadata::first_index_or_vertex_offset`].
    pub first_index_or_vertex_offset: u32,
    /// See [`GpuEffectMetadata::vertex_offset_or_base_instance`].
    pub vertex_offset_or_base_instance: i32,
    /// Indexed rendering metadata.
    pub indexed: Option<MeshIndexSlice>,
}

/// Cached info about the [`GpuEffectMetadata`] allocation for this effect.
///
/// The component is present when the [`GpuEffectMetadata`] is allocated.
#[derive(Debug, Clone, PartialEq, Eq, Component)]
pub(crate) struct CachedMetadata {
    pub buffer_table_id: BufferTableId,
}

bitflags! {
    #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
    pub struct CachedPipelineFlags: u8 {
        const NONE = 0;
        /// The init pipeline for this effect is ready for use. This means the compute pipeline is compiled and cached.
        const INIT_PIPELINE_READY = (1u8 << 0);
        /// The update pipeline for this effect is ready for use. This means the compute pipeline is compiled and cached.
        const UPDATE_PIPELINE_READY = (1u8 << 1);
    }
}

impl Default for CachedPipelineFlags {
    fn default() -> Self {
        Self::NONE
    }
}

/// Render world cached shader pipelines for a [`CachedEffect`].
///
/// This is updated with the IDs of the pipelines when they are queued for
/// compiling, and with the state of those pipelines to detect when the effect
/// is ready to be used.
///
/// This component is always auto-inserted alongside [`ExtractedEffect`] as soon
/// as a new effect instance is spawned, because it contains the readiness state
/// of those pipelines, which we want to query each frame. The pipelines are
/// also mandatory, so this component is always needed.
#[derive(Debug, Default, Component)]
pub(crate) struct CachedPipelines {
    /// Caching flags indicating the pipelines readiness.
    pub flags: CachedPipelineFlags,
    /// ID of the cached init pipeline. This is valid once the pipeline is
    /// queued for compilation, but this doesn't mean the pipeline is ready for
    /// use. Readiness is encoded in [`Self::flags`].
    pub init: Option<CachedComputePipelineId>,
    /// ID of the cached update pipeline. This is valid once the pipeline is
    /// queued for compilation, but this doesn't mean the pipeline is ready for
    /// use. Readiness is encoded in [`Self::flags`].
    pub update: Option<CachedComputePipelineId>,
}

impl CachedPipelines {
    /// Check if all pipelines for this effect are ready.
    #[inline]
    pub fn is_ready(&self) -> bool {
        self.flags.contains(
            CachedPipelineFlags::INIT_PIPELINE_READY | CachedPipelineFlags::UPDATE_PIPELINE_READY,
        )
    }
}

/// Ready state for this effect.
///
/// An effect is ready if:
/// - Its init and update pipelines are ready, as reported by
///   [`CachedPipelines::is_ready()`].
///
/// This components holds the calculated ready state propagated from all
/// ancestor effects, if any. That propagation is done by the
/// [`propagate_ready_state()`] system.
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Component)]
pub(crate) struct CachedReadyState {
    is_ready: bool,
}

impl CachedReadyState {
    #[inline(always)]
    pub fn new(is_ready: bool) -> Self {
        Self { is_ready }
    }

    #[inline(always)]
    pub fn and(mut self, ancestors_ready: bool) -> Self {
        self.and_with(ancestors_ready);
        self
    }

    #[inline(always)]
    pub fn and_with(&mut self, ancestors_ready: bool) {
        self.is_ready = self.is_ready && ancestors_ready;
    }

    #[inline(always)]
    pub fn is_ready(&self) -> bool {
        self.is_ready
    }
}

#[derive(SystemParam)]
pub struct PrepareEffectsReadOnlyParams<'w, 's> {
    sim_params: Res<'w, SimParams>,
    render_device: Res<'w, RenderDevice>,
    render_queue: Res<'w, RenderQueue>,
    marker: PhantomData<&'s usize>,
}

#[derive(SystemParam)]
pub struct PipelineSystemParams<'w, 's> {
    pipeline_cache: Res<'w, PipelineCache>,
    init_pipeline: ResMut<'w, ParticlesInitPipeline>,
    indirect_pipeline: Res<'w, DispatchIndirectPipeline>,
    update_pipeline: ResMut<'w, ParticlesUpdatePipeline>,
    marker: PhantomData<&'s usize>,
}

/// Update the ready state of all effects, and propagate recursively to
/// children.
pub(crate) fn propagate_ready_state(
    mut q_root_effects: Query<
        (
            Entity,
            Option<&ChildrenEffects>,
            Ref<CachedPipelines>,
            &mut CachedReadyState,
        ),
        Without<ChildEffectOf>,
    >,
    mut orphaned: RemovedComponents<ChildEffectOf>,
    q_ready_state: Query<
        (
            Ref<CachedPipelines>,
            &mut CachedReadyState,
            Option<&ChildrenEffects>,
        ),
        With<ChildEffectOf>,
    >,
    q_child_effects: Query<(Entity, Ref<ChildEffectOf>), With<CachedReadyState>>,
    mut orphaned_entities: Local<Vec<Entity>>,
) {
    #[cfg(feature = "trace")]
    let _span = bevy::log::info_span!("propagate_ready_state").entered();
    trace!("propagate_ready_state");

    // Update orphaned list for this frame, and sort it so we can efficiently binary
    // search it
    orphaned_entities.clear();
    orphaned_entities.extend(orphaned.read());
    orphaned_entities.sort_unstable();

    // Iterate in parallel over all root effects (those without any parent). This is
    // the most common case, so should take care of the heavy lifting of propagating
    // to most effects. For child effects, we then descend recursively.
    q_root_effects.par_iter_mut().for_each(
        |(entity, maybe_children, cached_pipelines, mut cached_ready_state)| {
            // Update the ready state of this root effect
            let changed = cached_pipelines.is_changed() || cached_ready_state.is_added() || orphaned_entities.binary_search(&entity).is_ok();
            trace!("[Entity {}] changed={} cached_pipelines={} ready_state={}", entity, changed, cached_pipelines.is_ready(), cached_ready_state.is_ready);
            if changed {
                // Root effects by default are ready since they have no ancestors to check. After that we check the ready conditions for this effect alone.
                let new_ready_state = CachedReadyState::new(cached_pipelines.is_ready());
                if *cached_ready_state != new_ready_state {
                    debug!(
                        "[Entity {}] Changed ready to: {}",
                        entity,
                        new_ready_state.is_ready()
                    );
                    *cached_ready_state = new_ready_state;
                }
            }

            // Recursively update the ready state of its descendants
            if let Some(children) = maybe_children {
                for (child, child_of) in q_child_effects.iter_many(children) {
                    assert_eq!(
                        child_of.parent, entity,
                        "Malformed hierarchy. This probably means that your hierarchy has been improperly maintained, or contains a cycle"
                    );
                    // SAFETY:
                    // - `child` must have consistent parentage, or the above assertion would panic.
                    //   Since `child` is parented to a root entity, the entire hierarchy leading to it
                    //   is consistent.
                    // - We may operate as if all descendants are consistent, since
                    //   `propagate_ready_state_recursive` will panic before continuing to propagate if it
                    //   encounters an entity with inconsistent parentage.
                    // - Since each root entity is unique and the hierarchy is consistent and
                    //   forest-like, other root entities' `propagate_ready_state_recursive` calls will not conflict
                    //   with this one.
                    // - Since this is the only place where `transform_query` gets used, there will be
                    //   no conflicting fetches elsewhere.
                    #[expect(unsafe_code, reason = "`propagate_ready_state_recursive()` is unsafe due to its use of `Query::get_unchecked()`.")]
                    unsafe {
                        propagate_ready_state_recursive(
                            &cached_ready_state,
                            &q_ready_state,
                            &q_child_effects,
                            child,
                            changed || child_of.is_changed(),
                        );
                    }
                }
            }
        },
    );
}

#[expect(
    unsafe_code,
    reason = "This function uses `Query::get_unchecked()`, which can result in multiple mutable references if the preconditions are not met."
)]
unsafe fn propagate_ready_state_recursive(
    parent_state: &CachedReadyState,
    q_ready_state: &Query<
        (
            Ref<CachedPipelines>,
            &mut CachedReadyState,
            Option<&ChildrenEffects>,
        ),
        With<ChildEffectOf>,
    >,
    q_child_of: &Query<(Entity, Ref<ChildEffectOf>), With<CachedReadyState>>,
    entity: Entity,
    mut changed: bool,
) {
    // Update this effect in-place by checking its own state and the state of its
    // parent (which has already been propagated from all the parent's ancestors, so
    // is correct for this frame).
    let (cached_ready_state, maybe_children) = {
        let Ok((cached_pipelines, mut cached_ready_state, maybe_children)) =
        // SAFETY: Copied from Bevy's transform propagation, same reasoning
        (unsafe { q_ready_state.get_unchecked(entity) }) else {
            return;
        };

        changed |= cached_pipelines.is_changed() || cached_ready_state.is_added();
        if changed {
            let new_ready_state =
                CachedReadyState::new(parent_state.is_ready()).and(cached_pipelines.is_ready());
            // Ensure we don't trigger ECS change detection here if state didn't change, so
            // we can avoid this effect branch on next iteration.
            if *cached_ready_state != new_ready_state {
                debug!(
                    "[Entity {}] Changed ready to: {}",
                    entity,
                    new_ready_state.is_ready()
                );
                *cached_ready_state = new_ready_state;
            }
        }
        (cached_ready_state, maybe_children)
    };

    // Recurse into descendants
    let Some(children) = maybe_children else {
        return;
    };
    for (child, child_of) in q_child_of.iter_many(children) {
        assert_eq!(
        child_of.parent, entity,
        "Malformed hierarchy. This probably means that your hierarchy has been improperly maintained, or contains a cycle"
    );
        // SAFETY: The caller guarantees that `transform_query` will not be fetched for
        // any descendants of `entity`, so it is safe to call
        // `propagate_recursive` for each child.
        //
        // The above assertion ensures that each child has one and only one unique
        // parent throughout the entire hierarchy.
        unsafe {
            propagate_ready_state_recursive(
                cached_ready_state.as_ref(),
                q_ready_state,
                q_child_of,
                child,
                changed || child_of.is_changed(),
            );
        }
    }
}

/// Once all effects are extracted and all cached components are updated, it's
/// time to prepare for sorting and batching. Collect all relevant data and
/// insert/update the [`BatchInput`] for each effect.
pub(crate) fn prepare_batch_inputs(
    mut commands: Commands,
    read_only_params: PrepareEffectsReadOnlyParams,
    pipelines: PipelineSystemParams,
    mut effects_meta: ResMut<EffectsMeta>,
    mut effect_bind_groups: ResMut<EffectBindGroups>,
    mut property_bind_groups: ResMut<PropertyBindGroups>,
    q_cached_effects: Query<(
        MainEntity,
        Entity,
        &ExtractedEffect,
        &ExtractedSpawner,
        &CachedEffect,
        &CachedEffectMetadata,
        &CachedReadyState,
        &CachedPipelines,
        Option<&CachedDrawIndirectArgs>,
        Option<&CachedParentInfo>,
        Option<&ChildEffectOf>,
        Option<&CachedChildInfo>,
        Option<&CachedEffectEvents>,
    )>,
    mut sort_bind_groups: ResMut<SortBindGroups>,
) {
    #[cfg(feature = "trace")]
    let _span = bevy::log::info_span!("prepare_batch_inputs").entered();
    trace!("prepare_batch_inputs");

    // Workaround for too many params in system (TODO: refactor to split work?)
    let sim_params = read_only_params.sim_params.into_inner();
    let render_device = read_only_params.render_device.into_inner();
    let render_queue = read_only_params.render_queue.into_inner();
    let pipeline_cache = pipelines.pipeline_cache.into_inner();

    // Clear per-instance buffers, which are filled below and re-uploaded each frame
    effects_meta.spawner_buffer.clear();

    // Build batcher inputs from extracted effects, updating all cached components
    // for each effect on the fly.
    let mut extracted_effect_count = 0;
    let mut prepared_effect_count = 0;
    for (
        main_entity,
        render_entity,
        extracted_effect,
        extracted_spawner,
        cached_effect,
        cached_effect_metadata,
        cached_ready_state,
        cached_pipelines,
        maybe_cached_draw_indirect_args,
        maybe_cached_parent_info,
        maybe_child_effect_of,
        maybe_cached_child_info,
        maybe_cached_effect_events,
    ) in &q_cached_effects
    {
        extracted_effect_count += 1;

        // Skip this effect if not ready
        if !cached_ready_state.is_ready() {
            trace!("Pipelines not ready for effect {}, skipped.", render_entity);
            continue;
        }

        // Skip this effect if not visible and not simulating when hidden
        if !extracted_spawner.is_visible
            && (extracted_effect.simulation_condition == SimulationCondition::WhenVisible)
        {
            trace!(
                "Effect {} not visible, and simulation condition is WhenVisible, so skipped.",
                render_entity
            );
            continue;
        }

        // Fetch the init and update pipelines.
        // SAFETY: If is_ready() returns true, this means the pipelines are cached and
        // ready, so the IDs must be valid.
        let init_and_update_pipeline_ids = InitAndUpdatePipelineIds {
            init: cached_pipelines.init.unwrap(),
            update: cached_pipelines.update.unwrap(),
        };

        let effect_slice = EffectSlice {
            slice: cached_effect.slice.range(),
            slab_id: cached_effect.slab_id,
            particle_layout: cached_effect.slice.particle_layout.clone(),
        };

        // Fetch the bind group layouts from the cache
        trace!("child_effect_of={:?}", maybe_child_effect_of);
        let parent_slab_id = if let Some(child_effect_of) = maybe_child_effect_of {
            let Ok((_, _, _, _, parent_cached_effect, _, _, _, _, _, _, _, _)) =
                q_cached_effects.get(child_effect_of.parent)
            else {
                // At this point we should have discarded invalid effects with a missing parent,
                // so if the parent is not found this is a bug.
                error!(
                    "Effect main_entity {:?}: parent render entity {:?} not found.",
                    main_entity, child_effect_of.parent
                );
                continue;
            };
            Some(parent_cached_effect.slab_id)
        } else {
            None
        };

        // For ribbons, we need the sorting pipeline to be ready to sort the ribbon's
        // particles by age in order to build a contiguous mesh.
        if extracted_effect.layout_flags.contains(LayoutFlags::RIBBONS) {
            // Ensure the bind group layout for sort-fill is ready. This will also ensure
            // the pipeline is created and queued if needed.
            if let Err(err) = sort_bind_groups.ensure_sort_fill_bind_group_layout(
                pipeline_cache,
                &extracted_effect.particle_layout,
            ) {
                error!(
                    "Failed to create bind group for ribbon effect sorting: {:?}",
                    err
                );
                continue;
            }

            // Check sort pipelines are ready, otherwise we might desync some buffers if
            // running only some of them but not all.
            if !sort_bind_groups
                .is_pipeline_ready(&extracted_effect.particle_layout, pipeline_cache)
            {
                trace!(
                    "Sort pipeline not ready for effect on main entity {:?}; skipped.",
                    main_entity
                );
                continue;
            }
        }

        // Output some debug info
        trace!("init_shader = {:?}", extracted_effect.effect_shaders.init);
        trace!(
            "update_shader = {:?}",
            extracted_effect.effect_shaders.update
        );
        trace!(
            "render_shader = {:?}",
            extracted_effect.effect_shaders.render
        );
        trace!("layout_flags = {:?}", extracted_effect.layout_flags);
        trace!("particle_layout = {:?}", effect_slice.particle_layout);

        assert!(cached_effect_metadata.table_id.is_valid());
        let spawner_index = effects_meta.allocate_spawner(
            &extracted_spawner.transform,
            extracted_spawner.spawn_count,
            extracted_spawner.prng_seed,
            cached_effect_metadata.table_id,
            maybe_cached_draw_indirect_args,
        );

        trace!("Updating cached effect at entity {render_entity:?}...");
        let mut cmd = commands.entity(render_entity);
        // Inserting the BatchInput component marks the effect as ready for this frame
        cmd.insert(BatchInput {
            effect_slice,
            init_and_update_pipeline_ids,
            parent_slab_id,
            event_buffer_index: maybe_cached_effect_events.map(|cee| cee.buffer_index),
            child_effects: maybe_cached_parent_info
                .as_ref()
                .map(|cp| cp.children.clone())
                .unwrap_or_default(),
            spawner_index,
            init_indirect_dispatch_index: maybe_cached_child_info
                .as_ref()
                .map(|cc| cc.init_indirect_dispatch_index),
        });

        prepared_effect_count += 1;
    }
    trace!("Prepared {prepared_effect_count}/{extracted_effect_count} extracted effect(s)");

    // Update simulation parameters, including the total effect count for this frame
    {
        let mut gpu_sim_params: GpuSimParams = sim_params.into();
        gpu_sim_params.num_effects = prepared_effect_count;
        trace!(
            "Simulation parameters: time={} delta_time={} virtual_time={} \
                virtual_delta_time={} real_time={} real_delta_time={} num_effects={}",
            gpu_sim_params.time,
            gpu_sim_params.delta_time,
            gpu_sim_params.virtual_time,
            gpu_sim_params.virtual_delta_time,
            gpu_sim_params.real_time,
            gpu_sim_params.real_delta_time,
            gpu_sim_params.num_effects,
        );
        effects_meta.sim_params_uniforms.set(gpu_sim_params);
    }

    // Write the entire spawner buffer for this frame, for all effects combined
    assert_eq!(
        prepared_effect_count,
        effects_meta.spawner_buffer.len() as u32
    );
    if effects_meta
        .spawner_buffer
        .write_buffer(render_device, render_queue)
    {
        // All property bind groups use the spawner buffer, which was reallocate
        effect_bind_groups.particle_slabs.clear();
        property_bind_groups.clear(true);
        effects_meta.indirect_spawner_bind_group = None;
    }
}

/// Batch compatible effects together into a single pass.
///
/// For all effects marked as ready for this frame (have a BatchInput
/// component), sort the effects by grouping compatible effects together, then
/// batch those groups together. Each batch can be updated and rendered with a
/// single compute dispatch or draw call.
pub(crate) fn batch_effects(
    mut commands: Commands,
    effects_meta: Res<EffectsMeta>,
    mut sort_bind_groups: ResMut<SortBindGroups>,
    mut q_cached_effects: Query<(
        Entity,
        &MainEntity,
        &ExtractedEffect,
        &ExtractedSpawner,
        &ExtractedEffectMesh,
        &CachedDrawIndirectArgs,
        &CachedEffectMetadata,
        Option<&CachedEffectEvents>,
        Option<&ChildEffectOf>,
        Option<&CachedChildInfo>,
        Option<&CachedEffectProperties>,
        &mut DispatchBufferIndices,
        // The presence of BatchInput ensure the effect is ready
        &mut BatchInput,
    )>,
    mut sorted_effect_batches: ResMut<SortedEffectBatches>,
    mut gpu_buffer_operations: ResMut<GpuBufferOperations>,
) {
    #[cfg(feature = "trace")]
    let _span = bevy::log::info_span!("batch_effects").entered();
    trace!("batch_effects");

    // Sort effects in batching order, so that we can batch by simply doing a linear
    // scan of the effects in this order. Currently compatible effects mean:
    // - same effect slab (so we can bind the buffers once for all batched effects)
    // - in order of increasing sub-allocation inside those buffers (to make the
    //   sort stable)
    // - with parents before their children, to ensure ???? FIXME don't we need to
    //   opposite?!!!
    let mut effect_sorter = EffectSorter::new();
    for (entity, _, _, _, _, _, _, _, child_of, _, _, _, input) in &q_cached_effects {
        effect_sorter.insert(
            entity,
            input.effect_slice.slab_id,
            input.effect_slice.slice.start,
            child_of.map(|co| co.parent),
        );
    }
    effect_sorter.sort();

    // For now we re-create that buffer each frame. Since there's no CPU -> GPU
    // transfer, this is pretty cheap in practice.
    sort_bind_groups.clear_indirect_dispatch_buffer();

    let mut sort_queue = GpuBufferOperationQueue::new();

    // Loop on all extracted effects in sorted order, and try to batch them together
    // to reduce draw calls. -- currently does nothing, batching was broken and
    // never fixed, but at least we minimize the GPU state changes with the sorting!
    trace!("Batching {} effects...", q_cached_effects.iter().len());
    sorted_effect_batches.clear();
    for entity in effect_sorter.effects.iter().map(|e| e.entity) {
        let Ok((
            entity,
            main_entity,
            extracted_effect,
            extracted_spawner,
            extracted_effect_mesh,
            cached_draw_indirect_args,
            cached_effect_metadata,
            cached_effect_events,
            _,
            cached_child_info,
            cached_properties,
            dispatch_buffer_indices,
            mut input,
        )) = q_cached_effects.get_mut(entity)
        else {
            continue;
        };

        let translation = extracted_spawner.transform.translation();

        // Spawn one EffectBatch per instance (no batching; TODO). This contains
        // most of the data needed to drive rendering. However this doesn't drive
        // rendering; this is just storage.
        let mut effect_batch = EffectBatch::from_input(
            main_entity.id(),
            extracted_effect,
            extracted_spawner,
            extracted_effect_mesh,
            cached_effect_events,
            cached_child_info,
            &mut input,
            *dispatch_buffer_indices,
            cached_draw_indirect_args.row,
            cached_effect_metadata.table_id,
            cached_properties.map(|cp| PropertyBindGroupKey {
                buffer_index: cp.buffer_index,
                binding_size: cp.property_layout.min_binding_size().get() as u32,
            }),
            cached_properties.map(|cp| cp.range.start),
        );

        // If the batch has ribbons, we need to sort the particles by RIBBON_ID and AGE
        // for ribbon meshing, in order to avoid gaps when some particles in the middle
        // of the ribbon die (since we can't guarantee a linear lifetime through the
        // ribbon).
        if extracted_effect.layout_flags.contains(LayoutFlags::RIBBONS) {
            // This buffer is allocated in prepare_effects(), so should always be available
            let Some(effect_metadata_buffer) = effects_meta.effect_metadata_buffer.buffer() else {
                error!("Failed to find effect metadata buffer. This is a bug.");
                continue;
            };

            // Allocate a GpuDispatchIndirect entry
            let sort_fill_indirect_dispatch_index = sort_bind_groups.allocate_indirect_dispatch();
            effect_batch.sort_fill_indirect_dispatch_index =
                Some(sort_fill_indirect_dispatch_index);

            // Enqueue a fill dispatch operation which reads GpuEffectMetadata::alive_count,
            // compute a number of workgroups to dispatch based on that particle count, and
            // store the result into a GpuDispatchIndirect struct which will be used to
            // dispatch the fill-sort pass.
            {
                let src_buffer = effect_metadata_buffer.clone();
                let src_binding_offset = effects_meta
                    .effect_metadata_buffer
                    .dynamic_offset(effect_batch.metadata_table_id);
                let src_binding_size = effects_meta.gpu_limits.effect_metadata_aligned_size;
                let Some(dst_buffer) = sort_bind_groups.indirect_buffer() else {
                    error!("Missing indirect dispatch buffer for sorting, cannot schedule particle sort for ribbon. This is a bug.");
                    continue;
                };
                let dst_buffer = dst_buffer.clone();
                let dst_binding_offset = 0; // see dst_offset below
                                            //let dst_binding_size = NonZeroU32::new(12).unwrap();
                trace!(
                    "queue_fill_dispatch(): src#{:?}@+{}B ({}B) -> dst#{:?}@+{}B ({}B)",
                    src_buffer.id(),
                    src_binding_offset,
                    src_binding_size.get(),
                    dst_buffer.id(),
                    dst_binding_offset,
                    -1, //dst_binding_size.get(),
                );
                let src_offset = std::mem::offset_of!(GpuEffectMetadata, alive_count) as u32 / 4;
                debug_assert_eq!(
                    src_offset, 1,
                    "GpuEffectMetadata changed, update this assert."
                );
                // FIXME - This is a quick fix to get 0.15 out. The previous code used the
                // dynamic binding offset, but the indirect dispatch structs are only 12 bytes,
                // so are not aligned to min_storage_buffer_offset_alignment. The fix uses a
                // binding offset of 0 and binds the entire destination buffer,
                // then use the dst_offset value embedded inside the GpuBufferOperationArgs to
                // index the proper offset in the buffer. This requires of
                // course binding the entire buffer, or at least enough to index all operations
                // (hence the None below). This is not really a general solution, so should be
                // reviewed.
                let dst_offset = sort_bind_groups
                    .get_indirect_dispatch_byte_offset(sort_fill_indirect_dispatch_index)
                    / 4;
                sort_queue.enqueue(
                    GpuBufferOperationType::FillDispatchArgs,
                    GpuBufferOperationArgs {
                        src_offset,
                        src_stride: effects_meta.gpu_limits.effect_metadata_aligned_size.get() / 4,
                        dst_offset,
                        dst_stride: GpuDispatchIndirectArgs::SHADER_SIZE.get() as u32 / 4,
                        count: 1,
                    },
                    src_buffer,
                    src_binding_offset,
                    Some(src_binding_size),
                    dst_buffer,
                    dst_binding_offset,
                    None, //Some(dst_binding_size),
                );
            }
        }

        let effect_batch_index = sorted_effect_batches.push(effect_batch);
        trace!(
            "Spawned effect batch #{:?} from cached instance on entity {:?}.",
            effect_batch_index,
            entity,
        );

        // Spawn an EffectDrawBatch, to actually drive rendering.
        commands
            .spawn(EffectDrawBatch {
                effect_batch_index,
                translation,
                main_entity: *main_entity,
            })
            .insert(TemporaryRenderEntity);
    }

    gpu_buffer_operations.begin_frame();
    debug_assert!(sorted_effect_batches.dispatch_queue_index.is_none());
    if !sort_queue.operation_queue.is_empty() {
        sorted_effect_batches.dispatch_queue_index = Some(gpu_buffer_operations.submit(sort_queue));
    }
}

/// Per-buffer bind groups for a GPU effect buffer.
///
/// This contains all bind groups specific to a single [`EffectBuffer`].
///
/// [`EffectBuffer`]: crate::render::effect_cache::EffectBuffer
pub(crate) struct BufferBindGroups {
    /// Bind group for the render shader.
    ///
    /// ```wgsl
    /// @binding(0) var<storage, read> particle_buffer : ParticleBuffer;
    /// @binding(1) var<storage, read> indirect_buffer : IndirectBuffer;
    /// @binding(2) var<storage, read> spawner : Spawner;
    /// ```
    render: BindGroup,
    // /// Bind group for filling the indirect dispatch arguments of any child init
    // /// pass.
    // ///
    // /// This bind group is optional; it's only created if the current effect has
    // /// a GPU spawn event buffer, irrelevant of whether it has child effects
    // /// (although normally the event buffer is not created if there's no
    // /// children).
    // ///
    // /// The source buffer is always the current effect's event buffer. The
    // /// destination buffer is the global shared buffer for indirect fill args
    // /// operations owned by the [`EffectCache`]. The uniform buffer of operation
    // /// args contains the data to index the relevant part of the global shared
    // /// buffer for this effect buffer; it may contain multiple entries in case
    // /// multiple effects are batched inside the current effect buffer.
    // ///
    // /// ```wgsl
    // /// @group(0) @binding(0) var<uniform> args : BufferOperationArgs;
    // /// @group(0) @binding(1) var<storage, read> src_buffer : array<u32>;
    // /// @group(0) @binding(2) var<storage, read_write> dst_buffer : array<u32>;
    // /// ```
    // init_fill_dispatch: Option<BindGroup>,
}

/// Combination of a texture layout and the bound textures.
#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)]
struct Material {
    layout: TextureLayout,
    textures: Vec<AssetId<Image>>,
}

impl Material {
    /// Get the bind group entries to create a bind group.
    pub fn make_entries<'a>(
        &self,
        gpu_images: &'a RenderAssets<GpuImage>,
    ) -> Result<Vec<BindGroupEntry<'a>>, ()> {
        if self.textures.is_empty() {
            return Ok(vec![]);
        }

        let entries: Vec<BindGroupEntry<'a>> = self
            .textures
            .iter()
            .enumerate()
            .flat_map(|(index, id)| {
                let base_binding = index as u32 * 2;
                if let Some(gpu_image) = gpu_images.get(*id) {
                    vec![
                        BindGroupEntry {
                            binding: base_binding,
                            resource: BindingResource::TextureView(&gpu_image.texture_view),
                        },
                        BindGroupEntry {
                            binding: base_binding + 1,
                            resource: BindingResource::Sampler(&gpu_image.sampler),
                        },
                    ]
                } else {
                    vec![]
                }
            })
            .collect();
        if entries.len() == self.textures.len() * 2 {
            return Ok(entries);
        }
        Err(())
    }
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
struct BindingKey {
    pub buffer_id: BufferId,
    pub offset: u32,
    pub size: NonZeroU32,
}

impl<'a> From<BufferSlice<'a>> for BindingKey {
    fn from(value: BufferSlice<'a>) -> Self {
        Self {
            buffer_id: value.buffer.id(),
            offset: value.offset,
            size: value.size,
        }
    }
}

impl<'a> From<&BufferSlice<'a>> for BindingKey {
    fn from(value: &BufferSlice<'a>) -> Self {
        Self {
            buffer_id: value.buffer.id(),
            offset: value.offset,
            size: value.size,
        }
    }
}

impl From<&BufferBindingSource> for BindingKey {
    fn from(value: &BufferBindingSource) -> Self {
        Self {
            buffer_id: value.buffer.id(),
            offset: value.offset,
            size: value.size,
        }
    }
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
struct ConsumeEventKey {
    child_infos_buffer_id: BufferId,
    events: BindingKey,
}

impl From<&ConsumeEventBuffers<'_>> for ConsumeEventKey {
    fn from(value: &ConsumeEventBuffers) -> Self {
        Self {
            child_infos_buffer_id: value.child_infos_buffer.id(),
            events: value.events.into(),
        }
    }
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
struct InitMetadataBindGroupKey {
    pub slab_id: SlabId,
    pub effect_metadata_buffer: BufferId,
    pub effect_metadata_offset: u32,
    pub consume_event_key: Option<ConsumeEventKey>,
}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct UpdateMetadataBindGroupKey {
    pub slab_id: SlabId,
    pub effect_metadata_buffer: BufferId,
    pub effect_metadata_offset: u32,
    pub child_info_buffer_id: Option<BufferId>,
    pub event_buffers_keys: Vec<BindingKey>,
}

/// Bind group cached with an associated key.
///
/// The cached bind group is associated with the given key representing the
/// inputs that the bind group depends on. When those inputs change, the key
/// should change, indicating the bind group needs to be recreated.
///
/// This object manages a single bind group and its key.
struct CachedBindGroup<K: Eq> {
    /// Key the bind group was created from. Each time the key changes, the bind
    /// group should be re-created.
    key: K,
    /// Bind group created from the key.
    bind_group: BindGroup,
}

#[derive(Debug, Clone, Copy)]
struct BufferSlice<'a> {
    pub buffer: &'a Buffer,
    pub offset: u32,
    pub size: NonZeroU32,
}

impl<'a> From<BufferSlice<'a>> for BufferBinding<'a> {
    fn from(value: BufferSlice<'a>) -> Self {
        Self {
            buffer: value.buffer,
            offset: value.offset.into(),
            size: Some(value.size.into()),
        }
    }
}

impl<'a> From<&BufferSlice<'a>> for BufferBinding<'a> {
    fn from(value: &BufferSlice<'a>) -> Self {
        Self {
            buffer: value.buffer,
            offset: value.offset.into(),
            size: Some(value.size.into()),
        }
    }
}

impl<'a> From<&'a BufferBindingSource> for BufferSlice<'a> {
    fn from(value: &'a BufferBindingSource) -> Self {
        Self {
            buffer: &value.buffer,
            offset: value.offset,
            size: value.size,
        }
    }
}

/// Optional input to [`EffectBindGroups::get_or_create_init_metadata()`] when
/// the init pass consumes GPU events as a mechanism to spawn particles.
struct ConsumeEventBuffers<'a> {
    /// Entire buffer containing the [`GpuChildInfo`] entries for all effects.
    /// This is dynamically indexed inside the shader.
    child_infos_buffer: &'a Buffer,
    /// Slice of the [`EventBuffer`] where the GPU spawn events are stored.
    events: BufferSlice<'a>,
}

#[derive(Default, Resource)]
pub struct EffectBindGroups {
    /// Map from a slab ID to the bind groups shared among all effects that
    /// use that particle slab.
    particle_slabs: HashMap<SlabId, BufferBindGroups>,
    /// Map of bind groups for image assets used as particle textures.
    images: HashMap<AssetId<Image>, BindGroup>,
    /// Map from buffer index to its metadata bind group (group 3) for the init
    /// pass.
    // FIXME - doesn't work with batching; this should be the instance ID
    init_metadata_bind_groups: HashMap<SlabId, CachedBindGroup<InitMetadataBindGroupKey>>,
    /// Map from buffer index to its metadata bind group (group 3) for the
    /// update pass.
    // FIXME - doesn't work with batching; this should be the instance ID
    update_metadata_bind_groups: HashMap<SlabId, CachedBindGroup<UpdateMetadataBindGroupKey>>,
    /// Map from an effect material to its bind group.
    material_bind_groups: HashMap<Material, BindGroup>,
}

impl EffectBindGroups {
    pub fn particle_render(&self, slab_id: &SlabId) -> Option<&BindGroup> {
        self.particle_slabs.get(slab_id).map(|bg| &bg.render)
    }

    /// Retrieve the metadata@3 bind group for the init pass, creating it if
    /// needed.
    pub(self) fn get_or_create_init_metadata(
        &mut self,
        effect_batch: &EffectBatch,
        gpu_limits: &GpuLimits,
        render_device: &RenderDevice,
        layout: &BindGroupLayout,
        effect_metadata_buffer: &Buffer,
        consume_event_buffers: Option<ConsumeEventBuffers>,
    ) -> Result<&BindGroup, ()> {
        assert!(effect_batch.metadata_table_id.is_valid());

        let effect_metadata_offset =
            gpu_limits.effect_metadata_offset(effect_batch.metadata_table_id.0) as u32;
        let key = InitMetadataBindGroupKey {
            slab_id: effect_batch.slab_id,
            effect_metadata_buffer: effect_metadata_buffer.id(),
            effect_metadata_offset,
            consume_event_key: consume_event_buffers.as_ref().map(Into::into),
        };

        let make_entry = || {
            let mut entries = Vec::with_capacity(3);
            entries.push(
                // @group(3) @binding(0) var<storage, read_write> effect_metadata : EffectMetadata;
                BindGroupEntry {
                    binding: 0,
                    resource: BindingResource::Buffer(BufferBinding {
                        buffer: effect_metadata_buffer,
                        offset: key.effect_metadata_offset as u64,
                        size: Some(gpu_limits.effect_metadata_size()),
                    }),
                },
            );
            if let Some(consume_event_buffers) = consume_event_buffers.as_ref() {
                entries.push(
                    // @group(3) @binding(1) var<storage, read> child_info_buffer :
                    // ChildInfoBuffer;
                    BindGroupEntry {
                        binding: 1,
                        resource: BindingResource::Buffer(BufferBinding {
                            buffer: consume_event_buffers.child_infos_buffer,
                            offset: 0,
                            size: None,
                        }),
                    },
                );
                entries.push(
                    // @group(3) @binding(2) var<storage, read> event_buffer : EventBuffer;
                    BindGroupEntry {
                        binding: 2,
                        resource: BindingResource::Buffer(consume_event_buffers.events.into()),
                    },
                );
            }

            let bind_group = render_device.create_bind_group(
                "hanabi:bind_group:init:metadata@3",
                layout,
                &entries[..],
            );

            trace!(
                    "Created new metadata@3 bind group for init pass and buffer index {}: effect_metadata=#{}",
                    effect_batch.slab_id.index(),
                    effect_batch.metadata_table_id.0,
                );

            bind_group
        };

        Ok(&self
            .init_metadata_bind_groups
            .entry(effect_batch.slab_id)
            .and_modify(|cbg| {
                if cbg.key != key {
                    trace!(
                        "Bind group key changed for init metadata@3, re-creating bind group... old={:?} new={:?}",
                        cbg.key,
                        key
                    );
                    cbg.key = key;
                    cbg.bind_group = make_entry();
                }
            })
            .or_insert_with(|| {
                trace!("Inserting new bind group for init metadata@3 with key={:?}", key);
                CachedBindGroup {
                    key,
                    bind_group: make_entry(),
                }
            })
            .bind_group)
    }

    /// Retrieve the metadata@3 bind group for the update pass, creating it if
    /// needed.
    pub(self) fn get_or_create_update_metadata(
        &mut self,
        effect_batch: &EffectBatch,
        gpu_limits: &GpuLimits,
        render_device: &RenderDevice,
        layout: &BindGroupLayout,
        effect_metadata_buffer: &Buffer,
        child_info_buffer: Option<&Buffer>,
        event_buffers: &[(Entity, BufferBindingSource)],
    ) -> Result<&BindGroup, ()> {
        assert!(effect_batch.metadata_table_id.is_valid());

        // Check arguments consistency
        assert_eq!(effect_batch.child_event_buffers.len(), event_buffers.len());
        let emits_gpu_spawn_events = !event_buffers.is_empty();
        let child_info_buffer_id = if emits_gpu_spawn_events {
            child_info_buffer.as_ref().map(|buffer| buffer.id())
        } else {
            // Note: child_info_buffer can be Some() if allocated, but we only consider it
            // if relevant, that is if the effect emits GPU spawn events.
            None
        };
        assert_eq!(emits_gpu_spawn_events, child_info_buffer_id.is_some());

        let event_buffers_keys = event_buffers
            .iter()
            .map(|(_, buffer_binding_source)| buffer_binding_source.into())
            .collect::<Vec<_>>();

        let key = UpdateMetadataBindGroupKey {
            slab_id: effect_batch.slab_id,
            effect_metadata_buffer: effect_metadata_buffer.id(),
            effect_metadata_offset: gpu_limits
                .effect_metadata_offset(effect_batch.metadata_table_id.0)
                as u32,
            child_info_buffer_id,
            event_buffers_keys,
        };

        let make_entry = || {
            let mut entries = Vec::with_capacity(2 + event_buffers.len());
            // @group(3) @binding(0) var<storage, read_write> effect_metadata :
            // EffectMetadata;
            entries.push(BindGroupEntry {
                binding: 0,
                resource: BindingResource::Buffer(BufferBinding {
                    buffer: effect_metadata_buffer,
                    offset: key.effect_metadata_offset as u64,
                    size: Some(gpu_limits.effect_metadata_aligned_size.into()),
                }),
            });
            if emits_gpu_spawn_events {
                let child_info_buffer = child_info_buffer.unwrap();

                // @group(3) @binding(1) var<storage, read_write> child_info_buffer :
                // ChildInfoBuffer;
                entries.push(BindGroupEntry {
                    binding: 1,
                    resource: BindingResource::Buffer(BufferBinding {
                        buffer: child_info_buffer,
                        offset: 0,
                        size: None,
                    }),
                });

                for (index, (_, buffer_binding_source)) in event_buffers.iter().enumerate() {
                    // @group(3) @binding(2+N) var<storage, read_write> event_buffer_N :
                    // EventBuffer;
                    // FIXME - BufferBindingSource originally was for Events, counting in u32, but
                    // then moved to counting in bytes, so now need some conversion. Need to review
                    // all of this...
                    let mut buffer_binding: BufferBinding = buffer_binding_source.into();
                    buffer_binding.offset *= 4;
                    buffer_binding.size = buffer_binding
                        .size
                        .map(|sz| NonZeroU64::new(sz.get() * 4).unwrap());
                    entries.push(BindGroupEntry {
                        binding: 2 + index as u32,
                        resource: BindingResource::Buffer(buffer_binding),
                    });
                }
            }

            let bind_group = render_device.create_bind_group(
                "hanabi:bind_group:update:metadata@3",
                layout,
                &entries[..],
            );

            trace!(
                "Created new metadata@3 bind group for update pass and slab ID {}: effect_metadata={}",
                effect_batch.slab_id.index(),
                effect_batch.metadata_table_id.0,
            );

            bind_group
        };

        Ok(&self
            .update_metadata_bind_groups
            .entry(effect_batch.slab_id)
            .and_modify(|cbg| {
                if cbg.key != key {
                    trace!(
                        "Bind group key changed for update metadata@3, re-creating bind group... old={:?} new={:?}",
                        cbg.key,
                        key
                    );
                    cbg.key = key.clone();
                    cbg.bind_group = make_entry();
                }
            })
            .or_insert_with(|| {
                trace!(
                    "Inserting new bind group for update metadata@3 with key={:?}",
                    key
                );
                CachedBindGroup {
                    key: key.clone(),
                    bind_group: make_entry(),
                }
            })
            .bind_group)
    }
}

#[derive(SystemParam)]
pub struct QueueEffectsReadOnlyParams<'w, 's> {
    #[cfg(feature = "2d")]
    draw_functions_2d: Res<'w, DrawFunctions<Transparent2d>>,
    #[cfg(feature = "3d")]
    draw_functions_3d: Res<'w, DrawFunctions<Transparent3d>>,
    #[cfg(feature = "3d")]
    draw_functions_alpha_mask: Res<'w, DrawFunctions<AlphaMask3d>>,
    #[cfg(feature = "3d")]
    draw_functions_opaque: Res<'w, DrawFunctions<Opaque3d>>,
    marker: PhantomData<&'s usize>,
}

fn emit_sorted_draw<T, F>(
    views: &Query<(&RenderVisibleEntities, &ExtractedView, &Msaa)>,
    render_phases: &mut ResMut<ViewSortedRenderPhases<T>>,
    view_entities: &mut FixedBitSet,
    sorted_effect_batches: &SortedEffectBatches,
    effect_draw_batches: &Query<(Entity, &mut EffectDrawBatch)>,
    render_pipeline: &mut ParticlesRenderPipeline,
    mut specialized_render_pipelines: Mut<SpecializedRenderPipelines<ParticlesRenderPipeline>>,
    render_meshes: &RenderAssets<RenderMesh>,
    pipeline_cache: &PipelineCache,
    make_phase_item: F,
    #[cfg(all(feature = "2d", feature = "3d"))] pipeline_mode: PipelineMode,
) where
    T: SortedPhaseItem,
    F: Fn(CachedRenderPipelineId, (Entity, MainEntity), &EffectDrawBatch, &ExtractedView) -> T,
{
    trace!("emit_sorted_draw() {} views", views.iter().len());

    for (visible_entities, view, msaa) in views.iter() {
        trace!(
            "Process new sorted view with {} visible particle effect entities",
            visible_entities.len::<CompiledParticleEffect>()
        );

        let Some(render_phase) = render_phases.get_mut(&view.retained_view_entity) else {
            continue;
        };

        {
            #[cfg(feature = "trace")]
            let _span = bevy::log::info_span!("collect_view_entities").entered();

            view_entities.clear();
            view_entities.extend(
                visible_entities
                    .iter::<EffectVisibilityClass>()
                    .map(|e| e.1.index() as usize),
            );
        }

        // For each view, loop over all the effect batches to determine if the effect
        // needs to be rendered for that view, and enqueue a view-dependent
        // batch if so.
        for (draw_entity, draw_batch) in effect_draw_batches.iter() {
            #[cfg(feature = "trace")]
            let _span_draw = bevy::log::info_span!("draw_batch").entered();

            trace!(
                "Process draw batch: draw_entity={:?} effect_batch_index={:?}",
                draw_entity,
                draw_batch.effect_batch_index,
            );

            // Get the EffectBatches this EffectDrawBatch is part of.
            let Some(effect_batch) = sorted_effect_batches.get(draw_batch.effect_batch_index)
            else {
                continue;
            };

            trace!(
                "-> EffectBach: slab_id={} spawner_base={} layout_flags={:?}",
                effect_batch.slab_id.index(),
                effect_batch.spawner_base,
                effect_batch.layout_flags,
            );

            // AlphaMask is a binned draw, so no sorted draw can possibly use it
            if effect_batch
                .layout_flags
                .intersects(LayoutFlags::USE_ALPHA_MASK | LayoutFlags::OPAQUE)
            {
                trace!("Non-transparent batch. Skipped.");
                continue;
            }

            // Check if batch contains any entity visible in the current view. Otherwise we
            // can skip the entire batch. Note: This is O(n^2) but (unlike
            // the Sprite renderer this is inspired from) we don't expect more than
            // a handful of particle effect instances, so would rather not pay the memory
            // cost of a FixedBitSet for the sake of an arguable speed-up.
            // TODO - Profile to confirm.
            #[cfg(feature = "trace")]
            let _span_check_vis = bevy::log::info_span!("check_visibility").entered();
            let has_visible_entity = effect_batch
                .entities
                .iter()
                .any(|index| view_entities.contains(*index as usize));
            if !has_visible_entity {
                trace!("No visible entity for view, not emitting any draw call.");
                continue;
            }
            #[cfg(feature = "trace")]
            _span_check_vis.exit();

            // Create and cache the bind group layout for this texture layout
            render_pipeline.cache_material(&effect_batch.texture_layout);

            // FIXME - We draw the entire batch, but part of it may not be visible in this
            // view! We should re-batch for the current view specifically!

            let local_space_simulation = effect_batch
                .layout_flags
                .contains(LayoutFlags::LOCAL_SPACE_SIMULATION);
            let alpha_mask = ParticleRenderAlphaMaskPipelineKey::from(effect_batch.layout_flags);
            let flipbook = effect_batch.layout_flags.contains(LayoutFlags::FLIPBOOK);
            let needs_uv = effect_batch.layout_flags.contains(LayoutFlags::NEEDS_UV);
            let needs_normal = effect_batch
                .layout_flags
                .contains(LayoutFlags::NEEDS_NORMAL);
            let needs_particle_fragment = effect_batch
                .layout_flags
                .contains(LayoutFlags::NEEDS_PARTICLE_FRAGMENT);
            let ribbons = effect_batch.layout_flags.contains(LayoutFlags::RIBBONS);
            let image_count = effect_batch.texture_layout.layout.len() as u8;

            // FIXME - Maybe it's better to copy the mesh layout into the batch, instead of
            // re-querying here...?
            let Some(render_mesh) = render_meshes.get(effect_batch.mesh) else {
                trace!("Batch has no render mesh, skipped.");
                continue;
            };
            let mesh_layout = render_mesh.layout.clone();

            // Specialize the render pipeline based on the effect batch
            trace!(
                "Specializing render pipeline: render_shader={:?} image_count={} alpha_mask={:?} flipbook={:?} hdr={}",
                effect_batch.render_shader,
                image_count,
                alpha_mask,
                flipbook,
                view.hdr
            );

            // Add a draw pass for the effect batch
            trace!("Emitting individual draw for batch");

            let alpha_mode = effect_batch.alpha_mode;

            #[cfg(feature = "trace")]
            let _span_specialize = bevy::log::info_span!("specialize").entered();
            let render_pipeline_id = specialized_render_pipelines.specialize(
                pipeline_cache,
                render_pipeline,
                ParticleRenderPipelineKey {
                    shader: effect_batch.render_shader.clone(),
                    mesh_layout: Some(mesh_layout),
                    particle_layout: effect_batch.particle_layout.clone(),
                    texture_layout: effect_batch.texture_layout.clone(),
                    local_space_simulation,
                    alpha_mask,
                    alpha_mode,
                    flipbook,
                    needs_uv,
                    needs_normal,
                    needs_particle_fragment,
                    ribbons,
                    #[cfg(all(feature = "2d", feature = "3d"))]
                    pipeline_mode,
                    msaa_samples: msaa.samples(),
                    hdr: view.hdr,
                },
            );
            #[cfg(feature = "trace")]
            _span_specialize.exit();

            trace!("+ Render pipeline specialized: id={:?}", render_pipeline_id,);
            trace!(
                "+ Add Transparent for batch on draw_entity {:?}: slab_id={} \
                spawner_base={} handle={:?}",
                draw_entity,
                effect_batch.slab_id.index(),
                effect_batch.spawner_base,
                effect_batch.handle
            );
            render_phase.add(make_phase_item(
                render_pipeline_id,
                (draw_entity, MainEntity::from(Entity::PLACEHOLDER)),
                draw_batch,
                view,
            ));
        }
    }
}

#[cfg(feature = "3d")]
fn emit_binned_draw<T, F, G>(
    views: &Query<(&RenderVisibleEntities, &ExtractedView, &Msaa)>,
    render_phases: &mut ResMut<ViewBinnedRenderPhases<T>>,
    view_entities: &mut FixedBitSet,
    sorted_effect_batches: &SortedEffectBatches,
    effect_draw_batches: &Query<(Entity, &mut EffectDrawBatch)>,
    render_pipeline: &mut ParticlesRenderPipeline,
    mut specialized_render_pipelines: Mut<SpecializedRenderPipelines<ParticlesRenderPipeline>>,
    pipeline_cache: &PipelineCache,
    render_meshes: &RenderAssets<RenderMesh>,
    make_batch_set_key: F,
    make_bin_key: G,
    #[cfg(all(feature = "2d", feature = "3d"))] pipeline_mode: PipelineMode,
    alpha_mask: ParticleRenderAlphaMaskPipelineKey,
    change_tick: &mut Tick,
) where
    T: BinnedPhaseItem,
    F: Fn(CachedRenderPipelineId, &EffectDrawBatch, &ExtractedView) -> T::BatchSetKey,
    G: Fn() -> T::BinKey,
{
    use bevy::render::render_phase::{BinnedRenderPhaseType, InputUniformIndex};

    trace!("emit_binned_draw() {} views", views.iter().len());

    for (visible_entities, view, msaa) in views.iter() {
        trace!("Process new binned view (alpha_mask={:?})", alpha_mask);

        let Some(render_phase) = render_phases.get_mut(&view.retained_view_entity) else {
            continue;
        };

        {
            #[cfg(feature = "trace")]
            let _span = bevy::log::info_span!("collect_view_entities").entered();

            view_entities.clear();
            view_entities.extend(
                visible_entities
                    .iter::<EffectVisibilityClass>()
                    .map(|e| e.1.index() as usize),
            );
        }

        // For each view, loop over all the effect batches to determine if the effect
        // needs to be rendered for that view, and enqueue a view-dependent
        // batch if so.
        for (draw_entity, draw_batch) in effect_draw_batches.iter() {
            #[cfg(feature = "trace")]
            let _span_draw = bevy::log::info_span!("draw_batch").entered();

            trace!(
                "Process draw batch: draw_entity={:?} effect_batch_index={:?}",
                draw_entity,
                draw_batch.effect_batch_index,
            );

            // Get the EffectBatches this EffectDrawBatch is part of.
            let Some(effect_batch) = sorted_effect_batches.get(draw_batch.effect_batch_index)
            else {
                continue;
            };

            trace!(
                "-> EffectBaches: slab_id={} spawner_base={} layout_flags={:?}",
                effect_batch.slab_id.index(),
                effect_batch.spawner_base,
                effect_batch.layout_flags,
            );

            if ParticleRenderAlphaMaskPipelineKey::from(effect_batch.layout_flags) != alpha_mask {
                trace!(
                    "Mismatching alpha mask pipeline key (batches={:?}, expected={:?}). Skipped.",
                    effect_batch.layout_flags,
                    alpha_mask
                );
                continue;
            }

            // Check if batch contains any entity visible in the current view. Otherwise we
            // can skip the entire batch. Note: This is O(n^2) but (unlike
            // the Sprite renderer this is inspired from) we don't expect more than
            // a handful of particle effect instances, so would rather not pay the memory
            // cost of a FixedBitSet for the sake of an arguable speed-up.
            // TODO - Profile to confirm.
            #[cfg(feature = "trace")]
            let _span_check_vis = bevy::log::info_span!("check_visibility").entered();
            let has_visible_entity = effect_batch
                .entities
                .iter()
                .any(|index| view_entities.contains(*index as usize));
            if !has_visible_entity {
                trace!("No visible entity for view, not emitting any draw call.");
                continue;
            }
            #[cfg(feature = "trace")]
            _span_check_vis.exit();

            // Create and cache the bind group layout for this texture layout
            render_pipeline.cache_material(&effect_batch.texture_layout);

            // FIXME - We draw the entire batch, but part of it may not be visible in this
            // view! We should re-batch for the current view specifically!

            let local_space_simulation = effect_batch
                .layout_flags
                .contains(LayoutFlags::LOCAL_SPACE_SIMULATION);
            let alpha_mask = ParticleRenderAlphaMaskPipelineKey::from(effect_batch.layout_flags);
            let flipbook = effect_batch.layout_flags.contains(LayoutFlags::FLIPBOOK);
            let needs_uv = effect_batch.layout_flags.contains(LayoutFlags::NEEDS_UV);
            let needs_normal = effect_batch
                .layout_flags
                .contains(LayoutFlags::NEEDS_NORMAL);
            let needs_particle_fragment = effect_batch
                .layout_flags
                .contains(LayoutFlags::NEEDS_PARTICLE_FRAGMENT);
            let ribbons = effect_batch.layout_flags.contains(LayoutFlags::RIBBONS);
            let image_count = effect_batch.texture_layout.layout.len() as u8;
            let render_mesh = render_meshes.get(effect_batch.mesh);

            // Specialize the render pipeline based on the effect batch
            trace!(
                "Specializing render pipeline: render_shaders={:?} image_count={} alpha_mask={:?} flipbook={:?} hdr={}",
                effect_batch.render_shader,
                image_count,
                alpha_mask,
                flipbook,
                view.hdr
            );

            // Add a draw pass for the effect batch
            trace!("Emitting individual draw for batch");

            let alpha_mode = effect_batch.alpha_mode;

            let Some(mesh_layout) = render_mesh.map(|gpu_mesh| gpu_mesh.layout.clone()) else {
                trace!("Missing mesh vertex buffer layout. Skipped.");
                continue;
            };

            #[cfg(feature = "trace")]
            let _span_specialize = bevy::log::info_span!("specialize").entered();
            let render_pipeline_id = specialized_render_pipelines.specialize(
                pipeline_cache,
                render_pipeline,
                ParticleRenderPipelineKey {
                    shader: effect_batch.render_shader.clone(),
                    mesh_layout: Some(mesh_layout),
                    particle_layout: effect_batch.particle_layout.clone(),
                    texture_layout: effect_batch.texture_layout.clone(),
                    local_space_simulation,
                    alpha_mask,
                    alpha_mode,
                    flipbook,
                    needs_uv,
                    needs_normal,
                    needs_particle_fragment,
                    ribbons,
                    #[cfg(all(feature = "2d", feature = "3d"))]
                    pipeline_mode,
                    msaa_samples: msaa.samples(),
                    hdr: view.hdr,
                },
            );
            #[cfg(feature = "trace")]
            _span_specialize.exit();

            trace!("+ Render pipeline specialized: id={:?}", render_pipeline_id,);
            trace!(
                "+ Add Transparent for batch on draw_entity {:?}: slab_id={} \
                spawner_base={} handle={:?}",
                draw_entity,
                effect_batch.slab_id.index(),
                effect_batch.spawner_base,
                effect_batch.handle
            );
            render_phase.add(
                make_batch_set_key(render_pipeline_id, draw_batch, view),
                make_bin_key(),
                (draw_entity, draw_batch.main_entity),
                InputUniformIndex::default(),
                BinnedRenderPhaseType::NonMesh,
                *change_tick,
            );
        }
    }
}

#[allow(clippy::too_many_arguments)]
pub(crate) fn queue_effects(
    views: Query<(&RenderVisibleEntities, &ExtractedView, &Msaa)>,
    effects_meta: Res<EffectsMeta>,
    mut render_pipeline: ResMut<ParticlesRenderPipeline>,
    mut specialized_render_pipelines: ResMut<SpecializedRenderPipelines<ParticlesRenderPipeline>>,
    pipeline_cache: Res<PipelineCache>,
    mut effect_bind_groups: ResMut<EffectBindGroups>,
    sorted_effect_batches: Res<SortedEffectBatches>,
    effect_draw_batches: Query<(Entity, &mut EffectDrawBatch)>,
    events: Res<EffectAssetEvents>,
    render_meshes: Res<RenderAssets<RenderMesh>>,
    read_params: QueueEffectsReadOnlyParams,
    mut view_entities: Local<FixedBitSet>,
    #[cfg(feature = "2d")] mut transparent_2d_render_phases: ResMut<
        ViewSortedRenderPhases<Transparent2d>,
    >,
    #[cfg(feature = "3d")] mut transparent_3d_render_phases: ResMut<
        ViewSortedRenderPhases<Transparent3d>,
    >,
    #[cfg(feature = "3d")] (mut opaque_3d_render_phases, mut alpha_mask_3d_render_phases): (
        ResMut<ViewBinnedRenderPhases<Opaque3d>>,
        ResMut<ViewBinnedRenderPhases<AlphaMask3d>>,
    ),
    mut change_tick: Local<Tick>,
) {
    #[cfg(feature = "trace")]
    let _span = bevy::log::info_span!("hanabi:queue_effects").entered();

    trace!("queue_effects");

    // Bump the change tick so that Bevy is forced to rebuild the binned render
    // phase bins. We don't use the built-in caching so we don't want Bevy to
    // reuse stale data.
    let next_change_tick = change_tick.get() + 1;
    change_tick.set(next_change_tick);

    // If an image has changed, the GpuImage has (probably) changed
    for event in &events.images {
        match event {
            AssetEvent::Added { .. } => (),
            AssetEvent::LoadedWithDependencies { .. } => (),
            AssetEvent::Unused { .. } => (),
            AssetEvent::Modified { id } => {
                if effect_bind_groups.images.remove(id).is_some() {
                    trace!("Destroyed bind group of modified image asset {:?}", id);
                }
            }
            AssetEvent::Removed { id } => {
                if effect_bind_groups.images.remove(id).is_some() {
                    trace!("Destroyes bind group of removed image asset {:?}", id);
                }
            }
        };
    }

    if effects_meta.spawner_buffer.buffer().is_none() || effects_meta.spawner_buffer.is_empty() {
        // No spawners are active
        return;
    }

    // Loop over all 2D cameras/views that need to render effects
    #[cfg(feature = "2d")]
    {
        #[cfg(feature = "trace")]
        let _span_draw = bevy::log::info_span!("draw_2d").entered();

        let draw_effects_function_2d = read_params
            .draw_functions_2d
            .read()
            .get_id::<DrawEffects>()
            .unwrap();

        // Effects with full alpha blending
        if !views.is_empty() {
            trace!("Emit effect draw calls for alpha blended 2D views...");
            emit_sorted_draw(
                &views,
                &mut transparent_2d_render_phases,
                &mut view_entities,
                &sorted_effect_batches,
                &effect_draw_batches,
                &mut render_pipeline,
                specialized_render_pipelines.reborrow(),
                &render_meshes,
                &pipeline_cache,
                |id, entity, draw_batch, _view| Transparent2d {
                    sort_key: FloatOrd(draw_batch.translation.z),
                    entity,
                    pipeline: id,
                    draw_function: draw_effects_function_2d,
                    batch_range: 0..1,
                    extracted_index: 0, // ???
                    extra_index: PhaseItemExtraIndex::None,
                    indexed: true, // ???
                },
                #[cfg(feature = "3d")]
                PipelineMode::Camera2d,
            );
        }
    }

    // Loop over all 3D cameras/views that need to render effects
    #[cfg(feature = "3d")]
    {
        #[cfg(feature = "trace")]
        let _span_draw = bevy::log::info_span!("draw_3d").entered();

        // Effects with full alpha blending
        if !views.is_empty() {
            trace!("Emit effect draw calls for alpha blended 3D views...");

            let draw_effects_function_3d = read_params
                .draw_functions_3d
                .read()
                .get_id::<DrawEffects>()
                .unwrap();

            emit_sorted_draw(
                &views,
                &mut transparent_3d_render_phases,
                &mut view_entities,
                &sorted_effect_batches,
                &effect_draw_batches,
                &mut render_pipeline,
                specialized_render_pipelines.reborrow(),
                &render_meshes,
                &pipeline_cache,
                |id, entity, batch, view| Transparent3d {
                    distance: view
                        .rangefinder3d()
                        .distance_translation(&batch.translation),
                    pipeline: id,
                    entity,
                    draw_function: draw_effects_function_3d,
                    batch_range: 0..1,
                    extra_index: PhaseItemExtraIndex::None,
                    indexed: true, // ???
                },
                #[cfg(feature = "2d")]
                PipelineMode::Camera3d,
            );
        }

        // Effects with alpha mask
        if !views.is_empty() {
            #[cfg(feature = "trace")]
            let _span_draw = bevy::log::info_span!("draw_alphamask").entered();

            trace!("Emit effect draw calls for alpha masked 3D views...");

            let draw_effects_function_alpha_mask = read_params
                .draw_functions_alpha_mask
                .read()
                .get_id::<DrawEffects>()
                .unwrap();

            emit_binned_draw(
                &views,
                &mut alpha_mask_3d_render_phases,
                &mut view_entities,
                &sorted_effect_batches,
                &effect_draw_batches,
                &mut render_pipeline,
                specialized_render_pipelines.reborrow(),
                &pipeline_cache,
                &render_meshes,
                |id, _batch, _view| OpaqueNoLightmap3dBatchSetKey {
                    pipeline: id,
                    draw_function: draw_effects_function_alpha_mask,
                    material_bind_group_index: None,
                    vertex_slab: default(),
                    index_slab: None,
                },
                // Unused for now
                || OpaqueNoLightmap3dBinKey {
                    asset_id: AssetId::<Mesh>::invalid().untyped(),
                },
                #[cfg(feature = "2d")]
                PipelineMode::Camera3d,
                ParticleRenderAlphaMaskPipelineKey::AlphaMask,
                &mut change_tick,
            );
        }

        // Opaque particles
        if !views.is_empty() {
            #[cfg(feature = "trace")]
            let _span_draw = bevy::log::info_span!("draw_opaque").entered();

            trace!("Emit effect draw calls for opaque 3D views...");

            let draw_effects_function_opaque = read_params
                .draw_functions_opaque
                .read()
                .get_id::<DrawEffects>()
                .unwrap();

            emit_binned_draw(
                &views,
                &mut opaque_3d_render_phases,
                &mut view_entities,
                &sorted_effect_batches,
                &effect_draw_batches,
                &mut render_pipeline,
                specialized_render_pipelines.reborrow(),
                &pipeline_cache,
                &render_meshes,
                |id, _batch, _view| Opaque3dBatchSetKey {
                    pipeline: id,
                    draw_function: draw_effects_function_opaque,
                    material_bind_group_index: None,
                    vertex_slab: default(),
                    index_slab: None,
                    lightmap_slab: None,
                },
                // Unused for now
                || Opaque3dBinKey {
                    asset_id: AssetId::<Mesh>::invalid().untyped(),
                },
                #[cfg(feature = "2d")]
                PipelineMode::Camera3d,
                ParticleRenderAlphaMaskPipelineKey::Opaque,
                &mut change_tick,
            );
        }
    }
}

/// Once a child effect is batched, and therefore passed validations to be
/// updated and rendered this frame, dispatch a new GPU operation to fill the
/// indirect dispatch args of its init pass based on the number of GPU events
/// emitted in the previous frame and stored in its event buffer.
pub fn queue_init_indirect_workgroup_update(
    q_cached_effects: Query<(Entity, &CachedChildInfo, &CachedEffectEvents)>,
    mut init_fill_dispatch_queue: ResMut<InitFillDispatchQueue>,
) {
    debug_assert_eq!(
        GpuChildInfo::min_size().get() % 4,
        0,
        "Invalid GpuChildInfo alignment."
    );

    // Schedule some GPU buffer operation to update the number of workgroups to
    // dispatch during the indirect init pass of this effect based on the number of
    // GPU spawn events written in its buffer.
    for (entity, cached_child_info, cached_effect_events) in &q_cached_effects {
        let init_indirect_dispatch_index = cached_effect_events.init_indirect_dispatch_index;
        let global_child_index = cached_child_info.global_child_index;
        trace!(
            "[Effect {:?}] init_fill_dispatch.enqueue(): src:global_child_index={} dst:init_indirect_dispatch_index={}",
            entity,
            global_child_index,
            init_indirect_dispatch_index,
        );
        assert!(global_child_index != u32::MAX);
        init_fill_dispatch_queue.enqueue(global_child_index, init_indirect_dispatch_index);
    }
}

/// Prepare GPU resources for effect rendering.
///
/// This system runs in the [`RenderSet::PrepareResources`] render set, after
/// Bevy has updated the [`ViewUniforms`], which need to be referenced to get
/// access to the current camera view.
pub(crate) fn prepare_gpu_resources(
    mut effects_meta: ResMut<EffectsMeta>,
    //mut effect_cache: ResMut<EffectCache>,
    mut event_cache: ResMut<EventCache>,
    mut effect_bind_groups: ResMut<EffectBindGroups>,
    mut sort_bind_groups: ResMut<SortBindGroups>,
    render_device: Res<RenderDevice>,
    render_queue: Res<RenderQueue>,
    view_uniforms: Res<ViewUniforms>,
    render_pipeline: Res<ParticlesRenderPipeline>,
) {
    // Get the binding for the ViewUniform, the uniform data structure containing
    // the Camera data for the current view. If not available, we cannot render
    // anything.
    let Some(view_binding) = view_uniforms.uniforms.binding() else {
        return;
    };

    // Upload simulation parameters for this frame
    let prev_buffer_id = effects_meta.sim_params_uniforms.buffer().map(|b| b.id());
    effects_meta
        .sim_params_uniforms
        .write_buffer(&render_device, &render_queue);
    if prev_buffer_id != effects_meta.sim_params_uniforms.buffer().map(|b| b.id()) {
        // Buffer changed, invalidate bind groups
        effects_meta.update_sim_params_bind_group = None;
        effects_meta.indirect_sim_params_bind_group = None;
    }

    // Create the bind group for the camera/view parameters
    // FIXME - Not here!
    effects_meta.view_bind_group = Some(render_device.create_bind_group(
        "hanabi:bind_group_camera_view",
        &render_pipeline.view_layout,
        &[
            BindGroupEntry {
                binding: 0,
                resource: view_binding,
            },
            BindGroupEntry {
                binding: 1,
                resource: effects_meta.sim_params_uniforms.binding().unwrap(),
            },
        ],
    ));

    // Re-/allocate the draw indirect args buffer if needed
    if effects_meta
        .draw_indirect_buffer
        .allocate_gpu(&render_device, &render_queue)
    {
        // All those bind groups use the buffer so need to be re-created
        trace!("*** Draw indirect args buffer re-allocated; clearing all bind groups using it.");
        effects_meta.update_sim_params_bind_group = None;
        effects_meta.indirect_metadata_bind_group = None;
    }

    // Re-/allocate any GPU buffer if needed
    //effect_cache.prepare_buffers(&render_device, &render_queue, &mut
    // effect_bind_groups);
    event_cache.prepare_buffers(&render_device, &render_queue, &mut effect_bind_groups);
    sort_bind_groups.prepare_buffers(&render_device);
    if effects_meta
        .dispatch_indirect_buffer
        .prepare_buffers(&render_device)
    {
        // All those bind groups use the buffer so need to be re-created
        trace!("*** Dispatch indirect buffer for update pass re-allocated; clearing all bind groups using it.");
        effect_bind_groups.particle_slabs.clear();
    }
}

/// Update the [`GpuEffectMetadata`] of all the effects queued for update/render
/// this frame.
///
/// By this point, all effects should have a [`CachedEffectMetadata`] with a
/// valid allocation in the GPU table for a [`GpuEffectMetadata`] entry. This
/// system actually synchronize the CPU value with the GPU one in case of
/// change.
pub(crate) fn prepare_effect_metadata(
    render_device: Res<RenderDevice>,
    render_queue: Res<RenderQueue>,
    mut q_effects: Query<(
        MainEntity,
        Ref<ExtractedEffect>,
        Ref<CachedEffect>,
        Ref<DispatchBufferIndices>,
        Option<Ref<CachedChildInfo>>,
        Option<Ref<CachedParentInfo>>,
        Option<Ref<CachedDrawIndirectArgs>>,
        Option<Ref<CachedEffectEvents>>,
        &mut CachedEffectMetadata,
    )>,
    mut effects_meta: ResMut<EffectsMeta>,
    mut effect_bind_groups: ResMut<EffectBindGroups>,
) {
    #[cfg(feature = "trace")]
    let _span = bevy::log::info_span!("prepare_effect_metadata").entered();
    trace!("prepare_effect_metadata");

    for (
        main_entity,
        extracted_effect,
        cached_effect,
        dispatch_buffer_indices,
        maybe_cached_child_info,
        maybe_cached_parent_info,
        maybe_cached_draw_indirect_args,
        maybe_cached_effect_events,
        mut cached_effect_metadata,
    ) in &mut q_effects
    {
        // Check if anything relevant to GpuEffectMetadata changed this frame; otherwise
        // early out and skip this effect.
        let is_changed_ee = extracted_effect.is_changed();
        let is_changed_ce = cached_effect.is_changed();
        let is_changed_dbi = dispatch_buffer_indices.is_changed();
        let is_changed_cci = maybe_cached_child_info
            .as_ref()
            .map(|cci| cci.is_changed())
            .unwrap_or(false);
        let is_changed_cpi = maybe_cached_parent_info
            .as_ref()
            .map(|cpi| cpi.is_changed())
            .unwrap_or(false);
        let is_changed_cdia = maybe_cached_draw_indirect_args
            .as_ref()
            .map(|cdia| cdia.is_changed())
            .unwrap_or(false);
        let is_changed_cee = maybe_cached_effect_events
            .as_ref()
            .map(|cee| cee.is_changed())
            .unwrap_or(false);
        trace!(
            "Preparting GpuEffectMetadata for effect {:?}: is_changed[] = {} {} {} {} {} {} {}",
            main_entity,
            is_changed_ee,
            is_changed_ce,
            is_changed_dbi,
            is_changed_cci,
            is_changed_cpi,
            is_changed_cdia,
            is_changed_cee
        );
        if !is_changed_ee
            && !is_changed_ce
            && !is_changed_dbi
            && !is_changed_cci
            && !is_changed_cpi
            && !is_changed_cdia
            && !is_changed_cee
        {
            continue;
        }

        let capacity = cached_effect.slice.len();

        // Global and local indices of this effect as a child of another (parent) effect
        let (global_child_index, local_child_index) = maybe_cached_child_info
            .map(|cci| (cci.global_child_index, cci.local_child_index))
            .unwrap_or((u32::MAX, u32::MAX));

        // Base index of all children of this (parent) effect
        let base_child_index = maybe_cached_parent_info
            .map(|cpi| {
                debug_assert_eq!(
                    cpi.byte_range.start % GpuChildInfo::SHADER_SIZE.get() as u32,
                    0
                );
                cpi.byte_range.start / GpuChildInfo::SHADER_SIZE.get() as u32
            })
            .unwrap_or(u32::MAX);

        let particle_stride = extracted_effect.particle_layout.min_binding_size32().get() / 4;
        let sort_key_offset = extracted_effect
            .particle_layout
            .byte_offset(Attribute::RIBBON_ID)
            .map(|byte_offset| byte_offset / 4)
            .unwrap_or(u32::MAX);
        let sort_key2_offset = extracted_effect
            .particle_layout
            .byte_offset(Attribute::AGE)
            .map(|byte_offset| byte_offset / 4)
            .unwrap_or(u32::MAX);

        let gpu_effect_metadata = GpuEffectMetadata {
            capacity,
            alive_count: 0,
            max_update: 0,
            max_spawn: capacity,
            indirect_write_index: 0,
            indirect_dispatch_index: dispatch_buffer_indices
                .update_dispatch_indirect_buffer_row_index,
            indirect_draw_index: maybe_cached_draw_indirect_args
                .map(|cdia| cdia.get_row().0)
                .unwrap_or(u32::MAX),
            init_indirect_dispatch_index: maybe_cached_effect_events
                .map(|cee| cee.init_indirect_dispatch_index)
                .unwrap_or(u32::MAX),
            local_child_index,
            global_child_index,
            base_child_index,
            particle_stride,
            sort_key_offset,
            sort_key2_offset,
            ..default()
        };

        // Insert of update entry in GPU buffer table
        assert!(cached_effect_metadata.table_id.is_valid());
        if gpu_effect_metadata != cached_effect_metadata.metadata {
            effects_meta
                .effect_metadata_buffer
                .update(cached_effect_metadata.table_id, gpu_effect_metadata);

            cached_effect_metadata.metadata = gpu_effect_metadata;

            // This triggers on all new spawns and annoys everyone; silence until we can at
            // least warn only on non-first-spawn, and ideally split indirect data from that
            // struct so we don't overwrite it and solve the issue.
            debug!(
                "Updated metadata entry {} for effect {:?}, this will reset it.",
                cached_effect_metadata.table_id.0, main_entity
            );
        }
    }

    // Once all EffectMetadata values are written, schedule a GPU upload
    if effects_meta
        .effect_metadata_buffer
        .allocate_gpu(render_device.as_ref(), render_queue.as_ref())
    {
        // All those bind groups use the buffer so need to be re-created
        trace!("*** Effect metadata buffer re-allocated; clearing all bind groups using it.");
        effects_meta.indirect_metadata_bind_group = None;
        effect_bind_groups.init_metadata_bind_groups.clear();
        effect_bind_groups.update_metadata_bind_groups.clear();
    }
}

/// Read the queued init fill dispatch operations, batch them together by
/// contiguous source and destination entries in the buffers, and enqueue
/// corresponding GPU buffer fill dispatch operations for all batches.
///
/// This system runs after the GPU buffers have been (re-)allocated in
/// [`prepare_gpu_resources()`], so that it can read the new buffer IDs and
/// reference them from the generic [`GpuBufferOperationQueue`].
pub(crate) fn queue_init_fill_dispatch_ops(
    event_cache: Res<EventCache>,
    render_device: Res<RenderDevice>,
    render_queue: Res<RenderQueue>,
    mut init_fill_dispatch_queue: ResMut<InitFillDispatchQueue>,
    mut gpu_buffer_operations: ResMut<GpuBufferOperations>,
) {
    // Submit all queued init fill dispatch operations with the proper buffers
    if !init_fill_dispatch_queue.is_empty() {
        let src_buffer = event_cache.child_infos().buffer();
        let dst_buffer = event_cache.init_indirect_dispatch_buffer();
        if let (Some(src_buffer), Some(dst_buffer)) = (src_buffer, dst_buffer) {
            init_fill_dispatch_queue.submit(src_buffer, dst_buffer, &mut gpu_buffer_operations);
        } else {
            if src_buffer.is_none() {
                warn!("Event cache has no allocated GpuChildInfo buffer, but there's {} init fill dispatch operation(s) queued. Ignoring those operations. This will prevent child particles from spawning.", init_fill_dispatch_queue.queue.len());
            }
            if dst_buffer.is_none() {
                warn!("Event cache has no allocated GpuDispatchIndirect buffer, but there's {} init fill dispatch operation(s) queued. Ignoring those operations. This will prevent child particles from spawning.", init_fill_dispatch_queue.queue.len());
            }
        }
    }

    // Once all GPU operations for this frame are enqueued, upload them to GPU
    gpu_buffer_operations.end_frame(&render_device, &render_queue);
}

pub(crate) fn prepare_bind_groups(
    mut effects_meta: ResMut<EffectsMeta>,
    mut effect_cache: ResMut<EffectCache>,
    mut event_cache: ResMut<EventCache>,
    mut effect_bind_groups: ResMut<EffectBindGroups>,
    mut property_bind_groups: ResMut<PropertyBindGroups>,
    mut sort_bind_groups: ResMut<SortBindGroups>,
    property_cache: Res<PropertyCache>,
    sorted_effect_batched: Res<SortedEffectBatches>,
    render_device: Res<RenderDevice>,
    dispatch_indirect_pipeline: Res<DispatchIndirectPipeline>,
    utils_pipeline: Res<UtilsPipeline>,
    init_pipeline: Res<ParticlesInitPipeline>,
    update_pipeline: Res<ParticlesUpdatePipeline>,
    render_pipeline: ResMut<ParticlesRenderPipeline>,
    gpu_images: Res<RenderAssets<GpuImage>>,
    mut gpu_buffer_operation_queue: ResMut<GpuBufferOperations>,
) {
    // We can't simulate nor render anything without at least the spawner buffer
    if effects_meta.spawner_buffer.is_empty() {
        return;
    }
    let Some(spawner_buffer) = effects_meta.spawner_buffer.buffer().cloned() else {
        return;
    };

    // Ensure child_infos@3 bind group for the indirect pass is available if needed.
    // This returns `None` if the buffer is not ready, either because it's not
    // created yet or because it's not needed (no child effect).
    event_cache.ensure_indirect_child_info_buffer_bind_group(&render_device);

    {
        #[cfg(feature = "trace")]
        let _span = bevy::log::info_span!("shared_bind_groups").entered();

        // Make a copy of the buffer IDs before borrowing effects_meta mutably in the
        // loop below. Also allows earlying out before doing any work in case some
        // buffer is missing.
        let Some(spawner_buffer) = effects_meta.spawner_buffer.buffer().cloned() else {
            return;
        };

        // Create the sim_params@0 bind group for the global simulation parameters,
        // which is shared by the init and update passes.
        if effects_meta.update_sim_params_bind_group.is_none() {
            if let Some(draw_indirect_buffer) = effects_meta.draw_indirect_buffer.buffer() {
                effects_meta.update_sim_params_bind_group = Some(render_device.create_bind_group(
                    "hanabi:bind_group:vfx_update:sim_params@0",
                    &update_pipeline.sim_params_layout,
                    &[
                        // @group(0) @binding(0) var<uniform> sim_params : SimParams;
                        BindGroupEntry {
                            binding: 0,
                            resource: effects_meta.sim_params_uniforms.binding().unwrap(),
                        },
                        // @group(0) @binding(1) var<storage, read_write> draw_indirect_buffer :
                        // array<DrawIndexedIndirectArgs>;
                        BindGroupEntry {
                            binding: 1,
                            resource: draw_indirect_buffer.as_entire_binding(),
                        },
                    ],
                ));
            } else {
                debug!("Cannot allocate bind group for vfx_update:sim_params@0 - draw_indirect_buffer not ready");
            }
        }
        if effects_meta.indirect_sim_params_bind_group.is_none() {
            effects_meta.indirect_sim_params_bind_group = Some(render_device.create_bind_group(
                "hanabi:bind_group:vfx_indirect:sim_params@0",
                &init_pipeline.sim_params_layout, // FIXME - Shared with init
                &[
                    // @group(0) @binding(0) var<uniform> sim_params : SimParams;
                    BindGroupEntry {
                        binding: 0,
                        resource: effects_meta.sim_params_uniforms.binding().unwrap(),
                    },
                ],
            ));
        }

        // Create the @1 bind group for the indirect dispatch preparation pass of all
        // effects at once
        effects_meta.indirect_metadata_bind_group = match (
            effects_meta.effect_metadata_buffer.buffer(),
            effects_meta.dispatch_indirect_buffer.buffer(),
            effects_meta.draw_indirect_buffer.buffer(),
        ) {
            (
                Some(effect_metadata_buffer),
                Some(dispatch_indirect_buffer),
                Some(draw_indirect_buffer),
            ) => {
                // Base bind group for indirect pass
                Some(render_device.create_bind_group(
                    "hanabi:bind_group:vfx_indirect:metadata@1",
                    &dispatch_indirect_pipeline.effect_metadata_bind_group_layout,
                    &[
                        // @group(1) @binding(0) var<storage, read_write> effect_metadata_buffer :
                        // array<u32>;
                        BindGroupEntry {
                            binding: 0,
                            resource: effect_metadata_buffer.as_entire_binding(),
                        },
                        // @group(1) @binding(1) var<storage, read_write> dispatch_indirect_buffer
                        // : array<u32>;
                        BindGroupEntry {
                            binding: 1,
                            resource: dispatch_indirect_buffer.as_entire_binding(),
                        },
                        // @group(1) @binding(2) var<storage, read_write> draw_indirect_buffer :
                        // array<u32>;
                        BindGroupEntry {
                            binding: 2,
                            resource: draw_indirect_buffer.as_entire_binding(),
                        },
                    ],
                ))
            }

            // Some buffer is not yet available, can't create the bind group
            _ => None,
        };

        // Create the @2 bind group for the indirect dispatch preparation pass of all
        // effects at once
        if effects_meta.indirect_spawner_bind_group.is_none() {
            let bind_group = render_device.create_bind_group(
                "hanabi:bind_group:vfx_indirect:spawner@2",
                &dispatch_indirect_pipeline.spawner_bind_group_layout,
                &[
                    // @group(2) @binding(0) var<storage, read> spawner_buffer : array<Spawner>;
                    BindGroupEntry {
                        binding: 0,
                        resource: BindingResource::Buffer(BufferBinding {
                            buffer: &spawner_buffer,
                            offset: 0,
                            size: None,
                        }),
                    },
                ],
            );

            effects_meta.indirect_spawner_bind_group = Some(bind_group);
        }
    }

    // Create the per-slab bind groups
    trace!("Create per-slab bind groups...");
    for (slab_index, particle_slab) in effect_cache.slabs().iter().enumerate() {
        #[cfg(feature = "trace")]
        let _span_buffer = bevy::log::info_span!("create_buffer_bind_groups").entered();

        let Some(particle_slab) = particle_slab else {
            trace!(
                "Particle slab index #{} has no allocated EffectBuffer, skipped.",
                slab_index
            );
            continue;
        };

        // Ensure all effects in this batch have a bind group for the entire buffer of
        // the group, since the update phase runs on an entire group/buffer at once,
        // with all the effect instances in it batched together.
        trace!("effect particle slab_index=#{}", slab_index);
        effect_bind_groups
            .particle_slabs
            .entry(SlabId::new(slab_index as u32))
            .or_insert_with(|| {
                // Bind group particle@1 for render pass
                trace!("Creating particle@1 bind group for buffer #{slab_index} in render pass");
                let spawner_min_binding_size = GpuSpawnerParams::aligned_size(
                    render_device.limits().min_storage_buffer_offset_alignment,
                );
                let entries = [
                    // @group(1) @binding(0) var<storage, read> particle_buffer : ParticleBuffer;
                    BindGroupEntry {
                        binding: 0,
                        resource: particle_slab.as_entire_binding_particle(),
                    },
                    // @group(1) @binding(1) var<storage, read> indirect_buffer : IndirectBuffer;
                    BindGroupEntry {
                        binding: 1,
                        resource: particle_slab.as_entire_binding_indirect(),
                    },
                    // @group(1) @binding(2) var<storage, read> spawner : Spawner;
                    BindGroupEntry {
                        binding: 2,
                        resource: BindingResource::Buffer(BufferBinding {
                            buffer: &spawner_buffer,
                            offset: 0,
                            size: Some(spawner_min_binding_size),
                        }),
                    },
                ];
                let render = render_device.create_bind_group(
                    &format!("hanabi:bind_group:render:particles@1:vfx{slab_index}")[..],
                    particle_slab.render_particles_buffer_layout(),
                    &entries[..],
                );

                BufferBindGroups { render }
            });
    }

    // Create bind groups for queued GPU buffer operations
    gpu_buffer_operation_queue.create_bind_groups(&render_device, &utils_pipeline);

    // Create the per-effect bind groups
    let spawner_buffer_binding_size =
        NonZeroU64::new(effects_meta.spawner_buffer.aligned_size() as u64).unwrap();
    for effect_batch in sorted_effect_batched.iter() {
        #[cfg(feature = "trace")]
        let _span_buffer = bevy::log::info_span!("create_batch_bind_groups").entered();

        // Create the property bind group @2 if needed
        if let Some(property_key) = &effect_batch.property_key {
            if let Err(err) = property_bind_groups.ensure_exists(
                property_key,
                &property_cache,
                &spawner_buffer,
                spawner_buffer_binding_size,
                &render_device,
            ) {
                error!("Failed to create property bind group for effect batch: {err:?}");
                continue;
            }
        } else if let Err(err) = property_bind_groups.ensure_exists_no_property(
            &property_cache,
            &spawner_buffer,
            spawner_buffer_binding_size,
            &render_device,
        ) {
            error!("Failed to create property bind group for effect batch: {err:?}");
            continue;
        }

        // Bind group particle@1 for the simulate compute shaders (init and udpate) to
        // simulate particles.
        if effect_cache
            .create_particle_sim_bind_group(
                &effect_batch.slab_id,
                &render_device,
                effect_batch.particle_layout.min_binding_size32(),
                effect_batch.parent_min_binding_size,
                effect_batch.parent_binding_source.as_ref(),
            )
            .is_err()
        {
            error!("No particle buffer allocated for effect batch.");
            continue;
        }

        // Bind group @3 of init pass
        // FIXME - this is instance-dependent, not buffer-dependent
        {
            let consume_gpu_spawn_events = effect_batch
                .layout_flags
                .contains(LayoutFlags::CONSUME_GPU_SPAWN_EVENTS);
            let consume_event_buffers = if let BatchSpawnInfo::GpuSpawner { .. } =
                effect_batch.spawn_info
            {
                assert!(consume_gpu_spawn_events);
                let cached_effect_events = effect_batch.cached_effect_events.as_ref().unwrap();
                Some(ConsumeEventBuffers {
                    child_infos_buffer: event_cache.child_infos_buffer().unwrap(),
                    events: BufferSlice {
                        buffer: event_cache
                            .get_buffer(cached_effect_events.buffer_index)
                            .unwrap(),
                        // Note: event range is in u32 count, not bytes
                        offset: cached_effect_events.range.start * 4,
                        size: NonZeroU32::new(cached_effect_events.range.len() as u32 * 4).unwrap(),
                    },
                })
            } else {
                assert!(!consume_gpu_spawn_events);
                None
            };
            let Some(init_metadata_layout) =
                effect_cache.metadata_init_bind_group_layout(consume_gpu_spawn_events)
            else {
                continue;
            };
            if effect_bind_groups
                .get_or_create_init_metadata(
                    effect_batch,
                    &effects_meta.gpu_limits,
                    &render_device,
                    init_metadata_layout,
                    effects_meta.effect_metadata_buffer.buffer().unwrap(),
                    consume_event_buffers,
                )
                .is_err()
            {
                continue;
            }
        }

        // Bind group @3 of update pass
        // FIXME - this is instance-dependent, not buffer-dependent#
        {
            let num_event_buffers = effect_batch.child_event_buffers.len() as u32;

            let Some(update_metadata_layout) =
                effect_cache.metadata_update_bind_group_layout(num_event_buffers)
            else {
                continue;
            };
            if effect_bind_groups
                .get_or_create_update_metadata(
                    effect_batch,
                    &effects_meta.gpu_limits,
                    &render_device,
                    update_metadata_layout,
                    effects_meta.effect_metadata_buffer.buffer().unwrap(),
                    event_cache.child_infos_buffer(),
                    &effect_batch.child_event_buffers[..],
                )
                .is_err()
            {
                continue;
            }
        }

        if effect_batch.layout_flags.contains(LayoutFlags::RIBBONS) {
            let effect_buffer = effect_cache.get_slab(&effect_batch.slab_id).unwrap();

            // Bind group @0 of sort-fill pass
            let particle_buffer = effect_buffer.particle_buffer();
            let indirect_index_buffer = effect_buffer.indirect_index_buffer();
            let effect_metadata_buffer = effects_meta.effect_metadata_buffer.buffer().unwrap();
            if let Err(err) = sort_bind_groups.ensure_sort_fill_bind_group(
                &effect_batch.particle_layout,
                particle_buffer,
                indirect_index_buffer,
                effect_metadata_buffer,
            ) {
                error!(
                    "Failed to create sort-fill bind group @0 for ribbon effect: {:?}",
                    err
                );
                continue;
            }

            // Bind group @0 of sort-copy pass
            let indirect_index_buffer = effect_buffer.indirect_index_buffer();
            if let Err(err) = sort_bind_groups
                .ensure_sort_copy_bind_group(indirect_index_buffer, effect_metadata_buffer)
            {
                error!(
                    "Failed to create sort-copy bind group @0 for ribbon effect: {:?}",
                    err
                );
                continue;
            }
        }

        // Ensure the particle texture(s) are available as GPU resources and that a bind
        // group for them exists
        // FIXME fix this insert+get below
        if !effect_batch.texture_layout.layout.is_empty() {
            // This should always be available, as this is cached into the render pipeline
            // just before we start specializing it.
            let Some(material_bind_group_layout) =
                render_pipeline.get_material(&effect_batch.texture_layout)
            else {
                error!(
                    "Failed to find material bind group layout for particle slab #{}",
                    effect_batch.slab_id.index()
                );
                continue;
            };

            // TODO = move
            let material = Material {
                layout: effect_batch.texture_layout.clone(),
                textures: effect_batch.textures.iter().map(|h| h.id()).collect(),
            };
            assert_eq!(material.layout.layout.len(), material.textures.len());

            //let bind_group_entries = material.make_entries(&gpu_images).unwrap();
            let Ok(bind_group_entries) = material.make_entries(&gpu_images) else {
                trace!(
                    "Temporarily ignoring material {:?} due to missing image(s)",
                    material
                );
                continue;
            };

            effect_bind_groups
                .material_bind_groups
                .entry(material.clone())
                .or_insert_with(|| {
                    debug!("Creating material bind group for material {:?}", material);
                    render_device.create_bind_group(
                        &format!(
                            "hanabi:material_bind_group_{}",
                            material.layout.layout.len()
                        )[..],
                        material_bind_group_layout,
                        &bind_group_entries[..],
                    )
                });
        }
    }
}

type DrawEffectsSystemState = SystemState<(
    SRes<EffectsMeta>,
    SRes<EffectBindGroups>,
    SRes<PipelineCache>,
    SRes<RenderAssets<RenderMesh>>,
    SRes<MeshAllocator>,
    SQuery<Read<ViewUniformOffset>>,
    SRes<SortedEffectBatches>,
    SQuery<Read<EffectDrawBatch>>,
)>;

/// Draw function for rendering all active effects for the current frame.
///
/// Effects are rendered in the [`Transparent2d`] phase of the main 2D pass,
/// and the [`Transparent3d`] phase of the main 3D pass.
pub(crate) struct DrawEffects {
    params: DrawEffectsSystemState,
}

impl DrawEffects {
    pub fn new(world: &mut World) -> Self {
        Self {
            params: SystemState::new(world),
        }
    }
}

/// Draw all particles of a single effect in view, in 2D or 3D.
///
/// FIXME: use pipeline ID to look up which group index it is.
fn draw<'w>(
    world: &'w World,
    pass: &mut TrackedRenderPass<'w>,
    view: Entity,
    entity: (Entity, MainEntity),
    pipeline_id: CachedRenderPipelineId,
    params: &mut DrawEffectsSystemState,
) {
    let (
        effects_meta,
        effect_bind_groups,
        pipeline_cache,
        meshes,
        mesh_allocator,
        views,
        sorted_effect_batches,
        effect_draw_batches,
    ) = params.get(world);
    let view_uniform = views.get(view).unwrap();
    let effects_meta = effects_meta.into_inner();
    let effect_bind_groups = effect_bind_groups.into_inner();
    let meshes = meshes.into_inner();
    let mesh_allocator = mesh_allocator.into_inner();
    let effect_draw_batch = effect_draw_batches.get(entity.0).unwrap();
    let effect_batch = sorted_effect_batches
        .get(effect_draw_batch.effect_batch_index)
        .unwrap();

    let Some(pipeline) = pipeline_cache.into_inner().get_render_pipeline(pipeline_id) else {
        return;
    };

    trace!("render pass");

    pass.set_render_pipeline(pipeline);

    let Some(render_mesh): Option<&RenderMesh> = meshes.get(effect_batch.mesh) else {
        return;
    };
    let Some(vertex_buffer_slice) = mesh_allocator.mesh_vertex_slice(&effect_batch.mesh) else {
        return;
    };

    // Vertex buffer containing the particle model to draw. Generally a quad.
    // FIXME - need to upload "vertex_buffer_slice.range.start as i32" into
    // "base_vertex" in the indirect struct...
    pass.set_vertex_buffer(0, vertex_buffer_slice.buffer.slice(..));

    // View properties (camera matrix, etc.)
    pass.set_bind_group(
        0,
        effects_meta.view_bind_group.as_ref().unwrap(),
        &[view_uniform.offset],
    );

    // Particles buffer
    let spawner_base = effect_batch.spawner_base;
    let spawner_buffer_aligned = effects_meta.spawner_buffer.aligned_size();
    assert!(spawner_buffer_aligned >= GpuSpawnerParams::min_size().get() as usize);
    let spawner_offset = spawner_base * spawner_buffer_aligned as u32;
    pass.set_bind_group(
        1,
        effect_bind_groups
            .particle_render(&effect_batch.slab_id)
            .unwrap(),
        &[spawner_offset],
    );

    // Particle texture
    // TODO = move
    let material = Material {
        layout: effect_batch.texture_layout.clone(),
        textures: effect_batch.textures.iter().map(|h| h.id()).collect(),
    };
    if !effect_batch.texture_layout.layout.is_empty() {
        if let Some(bind_group) = effect_bind_groups.material_bind_groups.get(&material) {
            pass.set_bind_group(2, bind_group, &[]);
        } else {
            // Texture(s) not ready; skip this drawing for now
            trace!(
                "Particle material bind group not available for batch slab_id={}. Skipping draw call.",
                effect_batch.slab_id.index(),
            );
            return;
        }
    }

    let draw_indirect_index = effect_batch.draw_indirect_buffer_row_index.0;
    assert_eq!(GpuDrawIndexedIndirectArgs::SHADER_SIZE.get(), 20);
    let draw_indirect_offset =
        draw_indirect_index as u64 * GpuDrawIndexedIndirectArgs::SHADER_SIZE.get();
    trace!(
        "Draw up to {} particles with {} vertices per particle for batch from particle slab #{} \
            (effect_metadata_index={}, draw_indirect_offset={}B).",
        effect_batch.slice.len(),
        render_mesh.vertex_count,
        effect_batch.slab_id.index(),
        draw_indirect_index,
        draw_indirect_offset,
    );

    let Some(indirect_buffer) = effects_meta.draw_indirect_buffer.buffer() else {
        trace!(
            "The draw indirect buffer containing the indirect draw args is not ready for batch slab_id=#{}. Skipping draw call.",
            effect_batch.slab_id.index(),
        );
        return;
    };

    match render_mesh.buffer_info {
        RenderMeshBufferInfo::Indexed { index_format, .. } => {
            let Some(index_buffer_slice) = mesh_allocator.mesh_index_slice(&effect_batch.mesh)
            else {
                trace!(
                    "The index buffer for indexed rendering is not ready for batch slab_id=#{}. Skipping draw call.",
                    effect_batch.slab_id.index(),
                );
                return;
            };

            pass.set_index_buffer(index_buffer_slice.buffer.slice(..), 0, index_format);
            pass.draw_indexed_indirect(indirect_buffer, draw_indirect_offset);
        }
        RenderMeshBufferInfo::NonIndexed => {
            pass.draw_indirect(indirect_buffer, draw_indirect_offset);
        }
    }
}

#[cfg(feature = "2d")]
impl Draw<Transparent2d> for DrawEffects {
    fn draw<'w>(
        &mut self,
        world: &'w World,
        pass: &mut TrackedRenderPass<'w>,
        view: Entity,
        item: &Transparent2d,
    ) -> Result<(), DrawError> {
        trace!("Draw<Transparent2d>: view={:?}", view);
        draw(
            world,
            pass,
            view,
            item.entity,
            item.pipeline,
            &mut self.params,
        );
        Ok(())
    }
}

#[cfg(feature = "3d")]
impl Draw<Transparent3d> for DrawEffects {
    fn draw<'w>(
        &mut self,
        world: &'w World,
        pass: &mut TrackedRenderPass<'w>,
        view: Entity,
        item: &Transparent3d,
    ) -> Result<(), DrawError> {
        trace!("Draw<Transparent3d>: view={:?}", view);
        draw(
            world,
            pass,
            view,
            item.entity,
            item.pipeline,
            &mut self.params,
        );
        Ok(())
    }
}

#[cfg(feature = "3d")]
impl Draw<AlphaMask3d> for DrawEffects {
    fn draw<'w>(
        &mut self,
        world: &'w World,
        pass: &mut TrackedRenderPass<'w>,
        view: Entity,
        item: &AlphaMask3d,
    ) -> Result<(), DrawError> {
        trace!("Draw<AlphaMask3d>: view={:?}", view);
        draw(
            world,
            pass,
            view,
            item.representative_entity,
            item.batch_set_key.pipeline,
            &mut self.params,
        );
        Ok(())
    }
}

#[cfg(feature = "3d")]
impl Draw<Opaque3d> for DrawEffects {
    fn draw<'w>(
        &mut self,
        world: &'w World,
        pass: &mut TrackedRenderPass<'w>,
        view: Entity,
        item: &Opaque3d,
    ) -> Result<(), DrawError> {
        trace!("Draw<Opaque3d>: view={:?}", view);
        draw(
            world,
            pass,
            view,
            item.representative_entity,
            item.batch_set_key.pipeline,
            &mut self.params,
        );
        Ok(())
    }
}

/// Render node to run the simulation sub-graph once per frame.
///
/// This node doesn't simulate anything by itself, but instead schedules the
/// simulation sub-graph, where other nodes like [`VfxSimulateNode`] do the
/// actual simulation.
///
/// The simulation sub-graph is scheduled to run before the [`CameraDriverNode`]
/// renders all the views, such that rendered views have access to the
/// just-simulated particles to render them.
///
/// [`CameraDriverNode`]: bevy::render::camera::CameraDriverNode
pub(crate) struct VfxSimulateDriverNode;

impl Node for VfxSimulateDriverNode {
    fn run(
        &self,
        graph: &mut RenderGraphContext,
        _render_context: &mut RenderContext,
        _world: &World,
    ) -> Result<(), NodeRunError> {
        graph.run_sub_graph(
            crate::plugin::simulate_graph::HanabiSimulateGraph,
            vec![],
            None,
        )?;
        Ok(())
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
enum HanabiPipelineId {
    Invalid,
    Cached(CachedComputePipelineId),
}

#[derive(Debug)]
pub(crate) enum ComputePipelineError {
    Queued,
    Creating,
    Error,
}

impl From<&CachedPipelineState> for ComputePipelineError {
    fn from(value: &CachedPipelineState) -> Self {
        match value {
            CachedPipelineState::Queued => Self::Queued,
            CachedPipelineState::Creating(_) => Self::Creating,
            CachedPipelineState::Err(_) => Self::Error,
            _ => panic!("Trying to convert Ok state to error."),
        }
    }
}

pub(crate) struct HanabiComputePass<'a> {
    /// Pipeline cache to fetch cached compute pipelines by ID.
    pipeline_cache: &'a PipelineCache,
    /// WGPU compute pass.
    compute_pass: ComputePass<'a>,
    /// Current pipeline (cached).
    pipeline_id: HanabiPipelineId,
}

impl<'a> Deref for HanabiComputePass<'a> {
    type Target = ComputePass<'a>;

    fn deref(&self) -> &Self::Target {
        &self.compute_pass
    }
}

impl DerefMut for HanabiComputePass<'_> {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.compute_pass
    }
}

impl<'a> HanabiComputePass<'a> {
    pub fn new(pipeline_cache: &'a PipelineCache, compute_pass: ComputePass<'a>) -> Self {
        Self {
            pipeline_cache,
            compute_pass,
            pipeline_id: HanabiPipelineId::Invalid,
        }
    }

    pub fn set_cached_compute_pipeline(
        &mut self,
        pipeline_id: CachedComputePipelineId,
    ) -> Result<(), ComputePipelineError> {
        if HanabiPipelineId::Cached(pipeline_id) == self.pipeline_id {
            trace!("set_cached_compute_pipeline() id={pipeline_id:?} -> already set; skipped");
            return Ok(());
        }
        trace!("set_cached_compute_pipeline() id={pipeline_id:?}");
        let Some(pipeline) = self.pipeline_cache.get_compute_pipeline(pipeline_id) else {
            let state = self.pipeline_cache.get_compute_pipeline_state(pipeline_id);
            if let CachedPipelineState::Err(err) = state {
                error!(
                    "Failed to find compute pipeline #{}: {:?}",
                    pipeline_id.id(),
                    err
                );
            } else {
                debug!("Compute pipeline not ready #{}", pipeline_id.id());
            }
            return Err(state.into());
        };
        self.compute_pass.set_pipeline(pipeline);
        self.pipeline_id = HanabiPipelineId::Cached(pipeline_id);
        Ok(())
    }
}

/// Render node to run the simulation of all effects once per frame.
///
/// Runs inside the simulation sub-graph, looping over all extracted effect
/// batches to simulate them.
pub(crate) struct VfxSimulateNode {}

impl VfxSimulateNode {
    /// Create a new node for simulating the effects of the given world.
    pub fn new(_world: &mut World) -> Self {
        Self {}
    }

    /// Begin a new compute pass and return a wrapper with extra
    /// functionalities.
    pub fn begin_compute_pass<'encoder>(
        &self,
        label: &str,
        pipeline_cache: &'encoder PipelineCache,
        render_context: &'encoder mut RenderContext,
    ) -> HanabiComputePass<'encoder> {
        let compute_pass =
            render_context
                .command_encoder()
                .begin_compute_pass(&ComputePassDescriptor {
                    label: Some(label),
                    timestamp_writes: None,
                });
        HanabiComputePass::new(pipeline_cache, compute_pass)
    }
}

impl Node for VfxSimulateNode {
    fn input(&self) -> Vec<SlotInfo> {
        vec![]
    }

    fn update(&mut self, _world: &mut World) {}

    fn run(
        &self,
        _graph: &mut RenderGraphContext,
        render_context: &mut RenderContext,
        world: &World,
    ) -> Result<(), NodeRunError> {
        trace!("VfxSimulateNode::run()");

        let pipeline_cache = world.resource::<PipelineCache>();
        let effects_meta = world.resource::<EffectsMeta>();
        let effect_bind_groups = world.resource::<EffectBindGroups>();
        let property_bind_groups = world.resource::<PropertyBindGroups>();
        let sort_bind_groups = world.resource::<SortBindGroups>();
        let utils_pipeline = world.resource::<UtilsPipeline>();
        let effect_cache = world.resource::<EffectCache>();
        let event_cache = world.resource::<EventCache>();
        let gpu_buffer_operations = world.resource::<GpuBufferOperations>();
        let sorted_effect_batches = world.resource::<SortedEffectBatches>();
        let init_fill_dispatch_queue = world.resource::<InitFillDispatchQueue>();

        // Make sure to schedule any buffer copy before accessing their content later in
        // the GPU commands below.
        {
            let command_encoder = render_context.command_encoder();
            effects_meta
                .dispatch_indirect_buffer
                .write_buffers(command_encoder);
            effects_meta
                .draw_indirect_buffer
                .write_buffer(command_encoder);
            effects_meta
                .effect_metadata_buffer
                .write_buffer(command_encoder);
            event_cache.write_buffers(command_encoder);
            sort_bind_groups.write_buffers(command_encoder);
        }

        // Compute init fill dispatch pass - Fill the indirect dispatch structs for any
        // upcoming init pass of this frame, based on the GPU spawn events emitted by
        // the update pass of their parent effect during the previous frame.
        if let Some(queue_index) = init_fill_dispatch_queue.submitted_queue_index.as_ref() {
            gpu_buffer_operations.dispatch(
                *queue_index,
                render_context,
                utils_pipeline,
                Some("hanabi:init_indirect_fill_dispatch"),
            );
        }

        // If there's no batch, there's nothing more to do. Avoid continuing because
        // some GPU resources are missing, which is expected when there's no effect but
        // is an error (and will log warnings/errors) otherwise.
        if sorted_effect_batches.is_empty() {
            return Ok(());
        }

        // Compute init pass
        {
            trace!("init: loop over effect batches...");

            let mut compute_pass =
                self.begin_compute_pass("hanabi:init", pipeline_cache, render_context);

            // Bind group simparams@0 is common to everything, only set once per init pass
            compute_pass.set_bind_group(
                0,
                effects_meta
                    .indirect_sim_params_bind_group
                    .as_ref()
                    .unwrap(),
                &[],
            );

            // Dispatch init compute jobs for all batches
            for effect_batch in sorted_effect_batches.iter() {
                // Do not dispatch any init work if there's nothing to spawn this frame for the
                // batch. Note that this hopefully should have been skipped earlier.
                {
                    let use_indirect_dispatch = effect_batch
                        .layout_flags
                        .contains(LayoutFlags::CONSUME_GPU_SPAWN_EVENTS);
                    match effect_batch.spawn_info {
                        BatchSpawnInfo::CpuSpawner { total_spawn_count } => {
                            assert!(!use_indirect_dispatch);
                            if total_spawn_count == 0 {
                                continue;
                            }
                        }
                        BatchSpawnInfo::GpuSpawner { .. } => {
                            assert!(use_indirect_dispatch);
                        }
                    }
                }

                // Fetch bind group particle@1
                let Some(particle_bind_group) =
                    effect_cache.particle_sim_bind_group(&effect_batch.slab_id)
                else {
                    error!(
                        "Failed to find init particle@1 bind group for slab #{}",
                        effect_batch.slab_id.index()
                    );
                    continue;
                };

                // Fetch bind group metadata@3
                let Some(metadata_bind_group) = effect_bind_groups
                    .init_metadata_bind_groups
                    .get(&effect_batch.slab_id)
                else {
                    error!(
                        "Failed to find init metadata@3 bind group for slab #{}",
                        effect_batch.slab_id.index()
                    );
                    continue;
                };

                if compute_pass
                    .set_cached_compute_pipeline(effect_batch.init_and_update_pipeline_ids.init)
                    .is_err()
                {
                    continue;
                }

                // Compute dynamic offsets
                let spawner_base = effect_batch.spawner_base;
                let spawner_aligned_size = effects_meta.spawner_buffer.aligned_size();
                debug_assert!(spawner_aligned_size >= GpuSpawnerParams::min_size().get() as usize);
                let spawner_offset = spawner_base * spawner_aligned_size as u32;
                let property_offset = effect_batch.property_offset;

                // Setup init pass
                compute_pass.set_bind_group(1, particle_bind_group, &[]);
                let offsets = if let Some(property_offset) = property_offset {
                    vec![spawner_offset, property_offset]
                } else {
                    vec![spawner_offset]
                };
                compute_pass.set_bind_group(
                    2,
                    property_bind_groups
                        .get(effect_batch.property_key.as_ref())
                        .unwrap(),
                    &offsets[..],
                );
                compute_pass.set_bind_group(3, &metadata_bind_group.bind_group, &[]);

                // Dispatch init job
                match effect_batch.spawn_info {
                    // Indirect dispatch via GPU spawn events
                    BatchSpawnInfo::GpuSpawner {
                        init_indirect_dispatch_index,
                        ..
                    } => {
                        assert!(effect_batch
                            .layout_flags
                            .contains(LayoutFlags::CONSUME_GPU_SPAWN_EVENTS));

                        // Note: the indirect offset of a dispatch workgroup only needs
                        // 4-byte alignment
                        assert_eq!(GpuDispatchIndirectArgs::min_size().get(), 12);
                        let indirect_offset = init_indirect_dispatch_index as u64 * 12;

                        trace!(
                            "record commands for indirect init pipeline of effect {:?} \
                                init_indirect_dispatch_index={} \
                                indirect_offset={} \
                                spawner_base={} \
                                spawner_offset={} \
                                property_key={:?}...",
                            effect_batch.handle,
                            init_indirect_dispatch_index,
                            indirect_offset,
                            spawner_base,
                            spawner_offset,
                            effect_batch.property_key,
                        );

                        compute_pass.dispatch_workgroups_indirect(
                            event_cache.init_indirect_dispatch_buffer().unwrap(),
                            indirect_offset,
                        );
                    }

                    // Direct dispatch via CPU spawn count
                    BatchSpawnInfo::CpuSpawner {
                        total_spawn_count: spawn_count,
                    } => {
                        assert!(!effect_batch
                            .layout_flags
                            .contains(LayoutFlags::CONSUME_GPU_SPAWN_EVENTS));

                        const WORKGROUP_SIZE: u32 = 64;
                        let workgroup_count = spawn_count.div_ceil(WORKGROUP_SIZE);

                        trace!(
                            "record commands for init pipeline of effect {:?} \
                                (spawn {} particles => {} workgroups) spawner_base={} \
                                spawner_offset={} \
                                property_key={:?}...",
                            effect_batch.handle,
                            spawn_count,
                            workgroup_count,
                            spawner_base,
                            spawner_offset,
                            effect_batch.property_key,
                        );

                        compute_pass.dispatch_workgroups(workgroup_count, 1, 1);
                    }
                }

                trace!("init compute dispatched");
            }
        }

        // Compute indirect dispatch pass
        if effects_meta.spawner_buffer.buffer().is_some()
            && !effects_meta.spawner_buffer.is_empty()
            && effects_meta.indirect_metadata_bind_group.is_some()
            && effects_meta.indirect_sim_params_bind_group.is_some()
        {
            // Only start a compute pass if there's an effect; makes things clearer in
            // debugger.
            let mut compute_pass =
                self.begin_compute_pass("hanabi:indirect_dispatch", pipeline_cache, render_context);

            // Dispatch indirect dispatch compute job
            trace!("record commands for indirect dispatch pipeline...");

            let has_gpu_spawn_events = !event_cache.child_infos().is_empty();
            if has_gpu_spawn_events {
                if let Some(indirect_child_info_buffer_bind_group) =
                    event_cache.indirect_child_info_buffer_bind_group()
                {
                    assert!(has_gpu_spawn_events);
                    compute_pass.set_bind_group(3, indirect_child_info_buffer_bind_group, &[]);
                } else {
                    error!("Missing child_info_buffer@3 bind group for the vfx_indirect pass.");
                    // render_context
                    //     .command_encoder()
                    //     .insert_debug_marker("ERROR:MissingIndirectBindGroup3");
                    // FIXME - Bevy doesn't allow returning custom errors here...
                    return Ok(());
                }
            }

            if compute_pass
                .set_cached_compute_pipeline(effects_meta.active_indirect_pipeline_id)
                .is_err()
            {
                // FIXME - Bevy doesn't allow returning custom errors here...
                return Ok(());
            }

            //error!("FIXME - effect_metadata_buffer has gaps!!!! this won't work. len() is
            // the size exluding gaps!");
            const WORKGROUP_SIZE: u32 = 64;
            //let total_effect_count = effects_meta.effect_metadata_buffer.len();
            let total_effect_count = effects_meta.spawner_buffer.len() as u32;
            let workgroup_count = total_effect_count.div_ceil(WORKGROUP_SIZE);

            // Setup vfx_indirect pass
            compute_pass.set_bind_group(
                0,
                effects_meta
                    .indirect_sim_params_bind_group
                    .as_ref()
                    .unwrap(),
                &[],
            );
            compute_pass.set_bind_group(
                1,
                // FIXME - got some unwrap() panic here, investigate... possibly race
                // condition!
                effects_meta.indirect_metadata_bind_group.as_ref().unwrap(),
                &[],
            );
            compute_pass.set_bind_group(
                2,
                effects_meta.indirect_spawner_bind_group.as_ref().unwrap(),
                &[],
            );
            compute_pass.dispatch_workgroups(workgroup_count, 1, 1);
            trace!(
                "indirect dispatch compute dispatched: total_effect_count={} workgroup_count={}",
                total_effect_count,
                workgroup_count
            );
        }

        // Compute update pass
        {
            let Some(indirect_buffer) = effects_meta.dispatch_indirect_buffer.buffer() else {
                warn!("Missing indirect buffer for update pass, cannot dispatch anything.");
                render_context
                    .command_encoder()
                    .insert_debug_marker("ERROR:MissingUpdateIndirectBuffer");
                // FIXME - Bevy doesn't allow returning custom errors here...
                return Ok(());
            };

            let mut compute_pass =
                self.begin_compute_pass("hanabi:update", pipeline_cache, render_context);

            // Bind group simparams@0 is common to everything, only set once per update pass
            compute_pass.set_bind_group(
                0,
                effects_meta.update_sim_params_bind_group.as_ref().unwrap(),
                &[],
            );

            // Dispatch update compute jobs
            for effect_batch in sorted_effect_batches.iter() {
                // Fetch bind group particle@1
                let Some(particle_bind_group) =
                    effect_cache.particle_sim_bind_group(&effect_batch.slab_id)
                else {
                    error!(
                        "Failed to find update particle@1 bind group for slab #{}",
                        effect_batch.slab_id.index()
                    );
                    compute_pass.insert_debug_marker("ERROR:MissingParticleSimBindGroup");
                    continue;
                };

                // Fetch bind group metadata@3
                let Some(metadata_bind_group) = effect_bind_groups
                    .update_metadata_bind_groups
                    .get(&effect_batch.slab_id)
                else {
                    error!(
                        "Failed to find update metadata@3 bind group for slab #{}",
                        effect_batch.slab_id.index()
                    );
                    compute_pass.insert_debug_marker("ERROR:MissingMetadataBindGroup");
                    continue;
                };

                // Fetch compute pipeline
                if let Err(err) = compute_pass
                    .set_cached_compute_pipeline(effect_batch.init_and_update_pipeline_ids.update)
                {
                    compute_pass.insert_debug_marker(&format!(
                        "ERROR:FailedToSetCachedUpdatePipeline:{:?}",
                        err
                    ));
                    continue;
                }

                // Compute dynamic offsets
                let spawner_index = effect_batch.spawner_base;
                let spawner_aligned_size = effects_meta.spawner_buffer.aligned_size();
                assert!(spawner_aligned_size >= GpuSpawnerParams::min_size().get() as usize);
                let spawner_offset = spawner_index * spawner_aligned_size as u32;
                let property_offset = effect_batch.property_offset;

                trace!(
                    "record commands for update pipeline of effect {:?} spawner_base={}",
                    effect_batch.handle,
                    spawner_index,
                );

                // Setup update pass
                compute_pass.set_bind_group(1, particle_bind_group, &[]);
                let offsets = if let Some(property_offset) = property_offset {
                    vec![spawner_offset, property_offset]
                } else {
                    vec![spawner_offset]
                };
                compute_pass.set_bind_group(
                    2,
                    property_bind_groups
                        .get(effect_batch.property_key.as_ref())
                        .unwrap(),
                    &offsets[..],
                );
                compute_pass.set_bind_group(3, &metadata_bind_group.bind_group, &[]);

                // Dispatch update job
                let dispatch_indirect_offset = effect_batch
                    .dispatch_buffer_indices
                    .update_dispatch_indirect_buffer_row_index
                    * 12;
                trace!(
                    "dispatch_workgroups_indirect: buffer={:?} offset={}B",
                    indirect_buffer,
                    dispatch_indirect_offset,
                );
                compute_pass
                    .dispatch_workgroups_indirect(indirect_buffer, dispatch_indirect_offset as u64);

                trace!("update compute dispatched");
            }
        }

        // Compute sort fill dispatch pass - Fill the indirect dispatch structs for any
        // batch of particles which needs sorting, based on the actual number of alive
        // particles in the batch after their update in the compute update pass. Since
        // particles may die during update, this may be different from the number of
        // particles updated.
        if let Some(queue_index) = sorted_effect_batches.dispatch_queue_index.as_ref() {
            gpu_buffer_operations.dispatch(
                *queue_index,
                render_context,
                utils_pipeline,
                Some("hanabi:sort_fill_dispatch"),
            );
        }

        // Compute sort pass
        {
            let mut compute_pass =
                self.begin_compute_pass("hanabi:sort", pipeline_cache, render_context);

            let effect_metadata_buffer = effects_meta.effect_metadata_buffer.buffer().unwrap();
            let indirect_buffer = sort_bind_groups.indirect_buffer().unwrap();

            // Loop on batches and find those which need sorting
            for effect_batch in sorted_effect_batches.iter() {
                trace!("Processing effect batch for sorting...");
                if !effect_batch.layout_flags.contains(LayoutFlags::RIBBONS) {
                    continue;
                }
                assert!(effect_batch.particle_layout.contains(Attribute::RIBBON_ID));
                assert!(effect_batch.particle_layout.contains(Attribute::AGE)); // or is that optional?

                let Some(effect_buffer) = effect_cache.get_slab(&effect_batch.slab_id) else {
                    warn!("Missing sort-fill effect buffer.");
                    // render_context
                    //     .command_encoder()
                    //     .insert_debug_marker("ERROR:MissingEffectBatchBuffer");
                    continue;
                };

                let indirect_dispatch_index = *effect_batch
                    .sort_fill_indirect_dispatch_index
                    .as_ref()
                    .unwrap();
                let indirect_offset =
                    sort_bind_groups.get_indirect_dispatch_byte_offset(indirect_dispatch_index);

                // Fill the sort buffer with the key-value pairs to sort
                {
                    compute_pass.push_debug_group("hanabi:sort_fill");

                    // Fetch compute pipeline
                    let Some(pipeline_id) =
                        sort_bind_groups.get_sort_fill_pipeline_id(&effect_batch.particle_layout)
                    else {
                        warn!("Missing sort-fill pipeline.");
                        compute_pass.insert_debug_marker("ERROR:MissingSortFillPipeline");
                        continue;
                    };
                    if compute_pass
                        .set_cached_compute_pipeline(pipeline_id)
                        .is_err()
                    {
                        compute_pass.insert_debug_marker("ERROR:FailedToSetSortFillPipeline");
                        compute_pass.pop_debug_group();
                        // FIXME - Bevy doesn't allow returning custom errors here...
                        return Ok(());
                    }

                    // Bind group sort_fill@0
                    let particle_buffer = effect_buffer.particle_buffer();
                    let indirect_index_buffer = effect_buffer.indirect_index_buffer();
                    let Some(bind_group) = sort_bind_groups.sort_fill_bind_group(
                        particle_buffer.id(),
                        indirect_index_buffer.id(),
                        effect_metadata_buffer.id(),
                    ) else {
                        warn!("Missing sort-fill bind group.");
                        compute_pass.insert_debug_marker("ERROR:MissingSortFillBindGroup");
                        continue;
                    };
                    let particle_offset = effect_buffer.particle_offset(effect_batch.slice.start);
                    let indirect_index_offset =
                        effect_buffer.indirect_index_offset(effect_batch.slice.start);
                    let effect_metadata_offset = effects_meta
                        .gpu_limits
                        .effect_metadata_offset(effect_batch.metadata_table_id.0)
                        as u32;
                    compute_pass.set_bind_group(
                        0,
                        bind_group,
                        &[
                            particle_offset,
                            indirect_index_offset,
                            effect_metadata_offset,
                        ],
                    );

                    compute_pass
                        .dispatch_workgroups_indirect(indirect_buffer, indirect_offset as u64);
                    trace!("Dispatched sort-fill with indirect offset +{indirect_offset}");

                    compute_pass.pop_debug_group();
                }

                // Do the actual sort
                {
                    compute_pass.push_debug_group("hanabi:sort");

                    if compute_pass
                        .set_cached_compute_pipeline(sort_bind_groups.sort_pipeline_id())
                        .is_err()
                    {
                        compute_pass.insert_debug_marker("ERROR:FailedToSetSortPipeline");
                        compute_pass.pop_debug_group();
                        // FIXME - Bevy doesn't allow returning custom errors here...
                        return Ok(());
                    }

                    compute_pass.set_bind_group(0, sort_bind_groups.sort_bind_group(), &[]);
                    compute_pass
                        .dispatch_workgroups_indirect(indirect_buffer, indirect_offset as u64);
                    trace!("Dispatched sort with indirect offset +{indirect_offset}");

                    compute_pass.pop_debug_group();
                }

                // Copy the sorted particle indices back into the indirect index buffer, where
                // the render pass will read them.
                {
                    compute_pass.push_debug_group("hanabi:copy_sorted_indices");

                    // Fetch compute pipeline
                    let pipeline_id = sort_bind_groups.get_sort_copy_pipeline_id();
                    if compute_pass
                        .set_cached_compute_pipeline(pipeline_id)
                        .is_err()
                    {
                        compute_pass.insert_debug_marker("ERROR:FailedToSetSortCopyPipeline");
                        compute_pass.pop_debug_group();
                        // FIXME - Bevy doesn't allow returning custom errors here...
                        return Ok(());
                    }

                    // Bind group sort_copy@0
                    let indirect_index_buffer = effect_buffer.indirect_index_buffer();
                    let Some(bind_group) = sort_bind_groups.sort_copy_bind_group(
                        indirect_index_buffer.id(),
                        effect_metadata_buffer.id(),
                    ) else {
                        warn!("Missing sort-copy bind group.");
                        compute_pass.insert_debug_marker("ERROR:MissingSortCopyBindGroup");
                        continue;
                    };
                    let indirect_index_offset = effect_batch.slice.start;
                    let effect_metadata_offset = effects_meta
                        .effect_metadata_buffer
                        .dynamic_offset(effect_batch.metadata_table_id);
                    compute_pass.set_bind_group(
                        0,
                        bind_group,
                        &[indirect_index_offset, effect_metadata_offset],
                    );

                    compute_pass
                        .dispatch_workgroups_indirect(indirect_buffer, indirect_offset as u64);
                    trace!("Dispatched sort-copy with indirect offset +{indirect_offset}");

                    compute_pass.pop_debug_group();
                }
            }
        }

        Ok(())
    }
}

impl From<LayoutFlags> for ParticleRenderAlphaMaskPipelineKey {
    fn from(layout_flags: LayoutFlags) -> Self {
        if layout_flags.contains(LayoutFlags::USE_ALPHA_MASK) {
            ParticleRenderAlphaMaskPipelineKey::AlphaMask
        } else if layout_flags.contains(LayoutFlags::OPAQUE) {
            ParticleRenderAlphaMaskPipelineKey::Opaque
        } else {
            ParticleRenderAlphaMaskPipelineKey::Blend
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn layout_flags() {
        let flags = LayoutFlags::default();
        assert_eq!(flags, LayoutFlags::NONE);
    }

    #[cfg(feature = "gpu_tests")]
    #[test]
    fn gpu_limits() {
        use crate::test_utils::MockRenderer;

        let renderer = MockRenderer::new();
        let device = renderer.device();
        let limits = GpuLimits::from_device(&device);

        // assert!(limits.storage_buffer_align().get() >= 1);
        assert!(limits.effect_metadata_offset(256) >= 256 * GpuEffectMetadata::min_size().get());
    }

    #[cfg(feature = "gpu_tests")]
    #[test]
    fn gpu_ops_ifda() {
        use crate::test_utils::MockRenderer;

        let renderer = MockRenderer::new();
        let device = renderer.device();
        let render_queue = renderer.queue();

        let mut world = World::new();
        world.insert_resource(device.clone());
        let mut buffer_ops = GpuBufferOperations::from_world(&mut world);

        let src_buffer = device.create_buffer(&BufferDescriptor {
            label: None,
            size: 256,
            usage: BufferUsages::STORAGE,
            mapped_at_creation: false,
        });
        let dst_buffer = device.create_buffer(&BufferDescriptor {
            label: None,
            size: 256,
            usage: BufferUsages::STORAGE,
            mapped_at_creation: false,
        });

        // Two consecutive ops can be merged. This includes having contiguous slices
        // both in source and destination.
        buffer_ops.begin_frame();
        {
            let mut q = InitFillDispatchQueue::default();
            q.enqueue(0, 0);
            assert_eq!(q.queue.len(), 1);
            q.enqueue(1, 1);
            // Ops are not batched yet
            assert_eq!(q.queue.len(), 2);
            // On submit, the ops get batched together
            q.submit(&src_buffer, &dst_buffer, &mut buffer_ops);
            assert_eq!(buffer_ops.args_buffer.len(), 1);
        }
        buffer_ops.end_frame(&device, &render_queue);

        // Even if out of order, the init fill dispatch ops are batchable. Here the
        // offsets are enqueued inverted.
        buffer_ops.begin_frame();
        {
            let mut q = InitFillDispatchQueue::default();
            q.enqueue(1, 1);
            assert_eq!(q.queue.len(), 1);
            q.enqueue(0, 0);
            // Ops are not batched yet
            assert_eq!(q.queue.len(), 2);
            // On submit, the ops get batched together
            q.submit(&src_buffer, &dst_buffer, &mut buffer_ops);
            assert_eq!(buffer_ops.args_buffer.len(), 1);
        }
        buffer_ops.end_frame(&device, &render_queue);

        // However, both the source and destination need to be contiguous at the same
        // time. Here they are mixed so we can't batch.
        buffer_ops.begin_frame();
        {
            let mut q = InitFillDispatchQueue::default();
            q.enqueue(0, 1);
            assert_eq!(q.queue.len(), 1);
            q.enqueue(1, 0);
            // Ops are not batched yet
            assert_eq!(q.queue.len(), 2);
            // On submit, the ops cannot get batched together
            q.submit(&src_buffer, &dst_buffer, &mut buffer_ops);
            assert_eq!(buffer_ops.args_buffer.len(), 2);
        }
        buffer_ops.end_frame(&device, &render_queue);
    }
}

djeedai / bevy_hanabi / 18243240671

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous