diff --git a/Lorr/Engine/Asset/Asset.cc b/Lorr/Engine/Asset/Asset.cc index 380655bb..562b69fc 100755 --- a/Lorr/Engine/Asset/Asset.cc +++ b/Lorr/Engine/Asset/Asset.cc @@ -17,6 +17,7 @@ #include "Engine/Scene/ECSModule/Core.hh" +#include #include #include @@ -124,10 +125,8 @@ struct Handle::Impl { SlotMap textures = {}; std::shared_mutex materials_mutex = {}; - vuk::PersistentDescriptorSet materials_descriptor_set = {}; SlotMap materials = {}; std::vector dirty_materials = {}; - Buffer materials_buffer = {}; SlotMap, SceneID> scenes = {}; }; @@ -141,12 +140,6 @@ auto AssetManager::create(Device *device) -> AssetManager { impl->device = device; impl->root_path = fs::current_path(); - BindlessDescriptorInfo bindless_set_info[] = { - { .binding = 0, .type = vuk::DescriptorType::eSampler, .descriptor_count = 1024 }, - { .binding = 1, .type = vuk::DescriptorType::eSampledImage, .descriptor_count = 1024 }, - }; - impl->materials_descriptor_set = device->create_persistent_descriptor_set(bindless_set_info, 1).release(); - return self; } @@ -154,9 +147,6 @@ auto AssetManager::destroy() -> void { ZoneScoped; auto read_lock = std::shared_lock(impl->registry_mutex); - if (impl->materials_buffer) { - impl->device->destroy(impl->materials_buffer.id()); - } for (const auto &[asset_uuid, asset] : impl->registry) { // sanity check @@ -680,7 +670,9 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { auto primitive_index = info->model->primitives.size(); auto &primitive = info->model->primitives.emplace_back(); auto *material_asset = app.asset_man.get_asset(info->model->materials[material_index]); - auto global_material_index = SlotMap_decode_id(material_asset->material_id).index; + + info->model->gpu_meshes.emplace_back(); + info->model->gpu_mesh_buffers.emplace_back(); info->vertex_positions.resize(info->vertex_positions.size() + vertex_count); info->vertex_normals.resize(info->vertex_normals.size() + vertex_count); @@ -688,7 +680,7 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { info->indices.resize(info->indices.size() + index_count); mesh.primitive_indices.push_back(primitive_index); - primitive.material_index = global_material_index; + primitive.material_id = material_asset->material_id; primitive.vertex_offset = vertex_offset; primitive.vertex_count = vertex_count; primitive.index_offset = index_offset; @@ -726,6 +718,8 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { return false; } + auto &transfer_man = impl->device->transfer_man(); + // ── SCENE HIERARCHY ───────────────────────────────────────────────── for (const auto &node : gltf_model->nodes) { model->nodes.push_back( @@ -744,50 +738,145 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { } // ── MESH PROCESSING ───────────────────────────────────────────────── - std::vector model_vertex_positions = {}; - std::vector model_indices = {}; - - std::vector model_meshlets = {}; - std::vector model_meshlet_bounds = {}; - std::vector model_local_triangle_indices = {}; - + auto model_indices = std::move(gltf_callbacks.indices); + auto model_vertices = std::move(gltf_callbacks.vertex_positions); + auto model_normals = std::move(gltf_callbacks.vertex_normals); + auto model_texcoords = std::move(gltf_callbacks.vertex_texcoords); + + // for each model (aka gltf scene): + // - for each mesh: + // - - for each primitive: + // - - - for each lod: + // - - - - generate lods + // - - - - optimize and remap geometry + // - - - - calculate meshlets and bounds + // for (const auto &mesh : model->meshes) { for (auto primitive_index : mesh.primitive_indices) { - ZoneScopedN("GPU Meshlet Generation"); - auto &primitive = model->primitives[primitive_index]; - auto vertex_offset = model_vertex_positions.size(); - auto index_offset = model_indices.size(); - auto triangle_offset = model_local_triangle_indices.size(); - auto meshlet_offset = model_meshlets.size(); - - auto raw_indices = ls::span(gltf_callbacks.indices.data() + primitive.index_offset, primitive.index_count); - auto raw_vertex_positions = ls::span(gltf_callbacks.vertex_positions.data() + primitive.vertex_offset, primitive.vertex_count); - auto raw_vertex_normals = ls::span(gltf_callbacks.vertex_normals.data() + primitive.vertex_offset, primitive.vertex_count); - - auto meshlets = std::vector(); - auto meshlet_bounds_infos = std::vector(); - auto meshlet_indices = std::vector(); - auto local_triangle_indices = std::vector(); - { - ZoneScopedN("Build Meshlets"); - // Worst case count - auto max_meshlets = meshopt_buildMeshletsBound( // - raw_indices.size(), - Model::MAX_MESHLET_INDICES, - Model::MAX_MESHLET_PRIMITIVES + auto &gpu_mesh = model->gpu_meshes[primitive_index]; + auto &gpu_mesh_buffer = model->gpu_mesh_buffers[primitive_index]; + + // ── Geometry remapping ────────────────────────────────────────────── + auto primitive_indices = ls::span(model_indices.data() + primitive.index_offset, primitive.index_count); + auto primitive_vertices = ls::span(model_vertices.data() + primitive.vertex_offset, primitive.vertex_count); + auto primitive_normals = ls::span(model_normals.data() + primitive.vertex_offset, primitive.vertex_count); + auto primitive_texcoords = ls::span(model_texcoords.data() + primitive.vertex_offset, primitive.vertex_count); + + auto remapped_vertices = std::vector(primitive_vertices.size()); + auto vertex_count = meshopt_optimizeVertexFetchRemap( + remapped_vertices.data(), + primitive_indices.data(), + primitive_indices.size(), + primitive.vertex_count + ); + + auto mesh_vertices = std::vector(vertex_count); + meshopt_remapVertexBuffer( + mesh_vertices.data(), + primitive_vertices.data(), + primitive_vertices.size(), + sizeof(glm::vec3), + remapped_vertices.data() + ); + + auto mesh_normals = std::vector(vertex_count); + meshopt_remapVertexBuffer( + mesh_normals.data(), + primitive_normals.data(), + primitive_normals.size(), + sizeof(glm::vec3), + remapped_vertices.data() + ); + + auto mesh_texcoords = std::vector(); + if (!primitive_texcoords.empty()) { + mesh_texcoords.resize(vertex_count); + meshopt_remapVertexBuffer( + mesh_texcoords.data(), + primitive_texcoords.data(), + primitive_texcoords.size(), + sizeof(glm::vec2), + remapped_vertices.data() ); - auto raw_meshlets = std::vector(max_meshlets); - meshlet_indices.resize(max_meshlets * Model::MAX_MESHLET_INDICES); - local_triangle_indices.resize(max_meshlets * Model::MAX_MESHLET_PRIMITIVES * 3); - auto meshlet_count = meshopt_buildMeshlets( // + } + + auto mesh_indices = std::vector(primitive.index_count); + meshopt_remapIndexBuffer(mesh_indices.data(), primitive_indices.data(), primitive_indices.size(), remapped_vertices.data()); + + // ── LOD generation ────────────────────────────────────────────────── + + const auto mesh_upload_size = 0 // + + ls::size_bytes(mesh_vertices) // + + ls::size_bytes(mesh_normals) // + + ls::size_bytes(mesh_texcoords); + auto upload_size = mesh_upload_size; + + ls::pair, u64> lod_cpu_buffers[GPU::Mesh::MAX_LODS] = {}; + auto last_lod_indices = std::vector(); + for (auto lod_index = 0_sz; lod_index < GPU::Mesh::MAX_LODS; lod_index++) { + ZoneNamedN(z, "GPU Meshlet Generation", true); + + auto &cur_lod = gpu_mesh.lods[lod_index]; + + auto simplified_indices = std::vector(); + if (lod_index == 0) { + simplified_indices = std::vector(mesh_indices.begin(), mesh_indices.end()); + } else { + const auto &last_lod = gpu_mesh.lods[lod_index - 1]; + auto lod_index_count = ((last_lod_indices.size() + 5_sz) / 6_sz) * 3_sz; + simplified_indices.resize(last_lod_indices.size(), 0_u32); + constexpr auto TARGET_ERROR = std::numeric_limits::max(); + constexpr f32 NORMAL_WEIGHTS[] = { 1.0f, 1.0f, 1.0f }; + + auto result_error = 0.0f; + auto result_index_count = meshopt_simplifyWithAttributes( + simplified_indices.data(), + last_lod_indices.data(), + last_lod_indices.size(), + reinterpret_cast(mesh_vertices.data()), + mesh_vertices.size(), + sizeof(glm::vec3), + reinterpret_cast(mesh_normals.data()), + sizeof(glm::vec3), + NORMAL_WEIGHTS, + ls::count_of(NORMAL_WEIGHTS), + nullptr, + lod_index_count, + TARGET_ERROR, + meshopt_SimplifyLockBorder, + &result_error + ); + + cur_lod.error = last_lod.error + result_error; + if (result_index_count > (lod_index_count + lod_index_count / 2) || result_error > 0.5 || result_index_count < 6) { + // Error bound + break; + } + + simplified_indices.resize(result_index_count); + } + + gpu_mesh.lod_count += 1; + last_lod_indices = simplified_indices; + + meshopt_optimizeVertexCache(simplified_indices.data(), simplified_indices.data(), simplified_indices.size(), vertex_count); + + // Worst case count + auto max_meshlet_count = + meshopt_buildMeshletsBound(simplified_indices.size(), Model::MAX_MESHLET_INDICES, Model::MAX_MESHLET_PRIMITIVES); + auto raw_meshlets = std::vector(max_meshlet_count); + auto indirect_vertex_indices = std::vector(max_meshlet_count * Model::MAX_MESHLET_INDICES); + auto local_triangle_indices = std::vector(max_meshlet_count * Model::MAX_MESHLET_PRIMITIVES * 3); + + auto meshlet_count = meshopt_buildMeshlets( raw_meshlets.data(), - meshlet_indices.data(), + indirect_vertex_indices.data(), local_triangle_indices.data(), - raw_indices.data(), - raw_indices.size(), - reinterpret_cast(raw_vertex_positions.data()), - raw_vertex_positions.size(), + simplified_indices.data(), + simplified_indices.size(), + reinterpret_cast(mesh_vertices.data()), + mesh_vertices.size(), sizeof(glm::vec3), Model::MAX_MESHLET_INDICES, Model::MAX_MESHLET_PRIMITIVES, @@ -796,81 +885,137 @@ auto AssetManager::load_model(const UUID &uuid) -> bool { // Trim meshlets from worst case to current case raw_meshlets.resize(meshlet_count); - meshlets.resize(meshlet_count); - meshlet_bounds_infos.resize(meshlet_count); + auto meshlets = std::vector(meshlet_count); const auto &last_meshlet = raw_meshlets[meshlet_count - 1]; - meshlet_indices.resize(last_meshlet.vertex_offset + last_meshlet.vertex_count); + indirect_vertex_indices.resize(last_meshlet.vertex_offset + last_meshlet.vertex_count); local_triangle_indices.resize(last_meshlet.triangle_offset + ((last_meshlet.triangle_count * 3 + 3) & ~3_u32)); - for (const auto &[raw_meshlet, meshlet, meshlet_bounds] : std::views::zip(raw_meshlets, meshlets, meshlet_bounds_infos)) { - // AABB Computing + auto mesh_bb_min = glm::vec3(std::numeric_limits::max()); + auto mesh_bb_max = glm::vec3(std::numeric_limits::lowest()); + auto meshlet_bounds = std::vector(meshlet_count); + for (const auto &[raw_meshlet, meshlet, bounds] : std::views::zip(raw_meshlets, meshlets, meshlet_bounds)) { + // AABB computation auto meshlet_bb_min = glm::vec3(std::numeric_limits::max()); auto meshlet_bb_max = glm::vec3(std::numeric_limits::lowest()); for (u32 i = 0; i < raw_meshlet.triangle_count * 3; i++) { - const auto &tri_pos = raw_vertex_positions - [meshlet_indices[raw_meshlet.vertex_offset + local_triangle_indices[raw_meshlet.triangle_offset + i]]]; + const auto &tri_pos = mesh_vertices + [indirect_vertex_indices[raw_meshlet.vertex_offset + local_triangle_indices[raw_meshlet.triangle_offset + i]]]; meshlet_bb_min = glm::min(meshlet_bb_min, tri_pos); meshlet_bb_max = glm::max(meshlet_bb_max, tri_pos); } - // SB and Cone Computing - // auto sphere_bounds = meshopt_computeMeshletBounds( // - // &meshlet_indices[raw_meshlet.vertex_offset], - // &local_triangle_indices[raw_meshlet.triangle_offset], - // raw_meshlet.triangle_count, - // reinterpret_cast(raw_vertex_positions.data()), - // raw_vertex_positions.size(), - // sizeof(glm::vec3) - // ); - - meshlet.vertex_offset = vertex_offset; - meshlet.index_offset = index_offset + raw_meshlet.vertex_offset; - meshlet.triangle_offset = triangle_offset + raw_meshlet.triangle_offset; + // Sphere and Cone computation + auto sphere_bounds = meshopt_computeMeshletBounds( + &indirect_vertex_indices[raw_meshlet.vertex_offset], + &local_triangle_indices[raw_meshlet.triangle_offset], + raw_meshlet.triangle_count, + reinterpret_cast(mesh_vertices.data()), + vertex_count, + sizeof(glm::vec3) + ); + + meshlet.indirect_vertex_index_offset = raw_meshlet.vertex_offset; + meshlet.local_triangle_index_offset = raw_meshlet.triangle_offset; + meshlet.vertex_count = raw_meshlet.vertex_count; meshlet.triangle_count = raw_meshlet.triangle_count; - meshlet_bounds.aabb_min = meshlet_bb_min; - meshlet_bounds.aabb_max = meshlet_bb_max; - // meshlet_bounds.sphere_center.x = sphere_bounds.center[0]; - // meshlet_bounds.sphere_center.y = sphere_bounds.center[1]; - // meshlet_bounds.sphere_center.z = sphere_bounds.center[2]; - // meshlet_bounds.sphere_radius = sphere_bounds.radius; + + bounds.aabb_center = (meshlet_bb_max + meshlet_bb_min) * 0.5f; + bounds.aabb_extent = meshlet_bb_max - meshlet_bb_min; + bounds.sphere_center = glm::make_vec3(sphere_bounds.center); + bounds.sphere_radius = sphere_bounds.radius; + + mesh_bb_min = glm::min(mesh_bb_min, meshlet_bb_min); + mesh_bb_max = glm::max(mesh_bb_max, meshlet_bb_max); } - primitive.meshlet_count = meshlet_count; - primitive.meshlet_offset = meshlet_offset; - primitive.local_triangle_indices_offset = triangle_offset; + gpu_mesh.bounds.aabb_center = (mesh_bb_max + mesh_bb_min) * 0.5f; + gpu_mesh.bounds.aabb_extent = mesh_bb_max - mesh_bb_min; + + auto lod_upload_size = 0 // + + ls::size_bytes(simplified_indices) // + + ls::size_bytes(meshlets) // + + ls::size_bytes(meshlet_bounds) // + + ls::size_bytes(local_triangle_indices) // + + ls::size_bytes(indirect_vertex_indices); + auto cpu_lod_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eCPUonly, lod_upload_size); + auto cpu_lod_ptr = reinterpret_cast(cpu_lod_buffer->mapped_ptr); + + auto upload_offset = 0_u64; + cur_lod.indices = upload_offset; + std::memcpy(cpu_lod_ptr + upload_offset, simplified_indices.data(), ls::size_bytes(simplified_indices)); + upload_offset += ls::size_bytes(simplified_indices); + + cur_lod.meshlets = upload_offset; + std::memcpy(cpu_lod_ptr + upload_offset, meshlets.data(), ls::size_bytes(meshlets)); + upload_offset += ls::size_bytes(meshlets); + + cur_lod.meshlet_bounds = upload_offset; + std::memcpy(cpu_lod_ptr + upload_offset, meshlet_bounds.data(), ls::size_bytes(meshlet_bounds)); + upload_offset += ls::size_bytes(meshlet_bounds); + + cur_lod.local_triangle_indices = upload_offset; + std::memcpy(cpu_lod_ptr + upload_offset, local_triangle_indices.data(), ls::size_bytes(local_triangle_indices)); + upload_offset += ls::size_bytes(local_triangle_indices); + + cur_lod.indirect_vertex_indices = upload_offset; + std::memcpy(cpu_lod_ptr + upload_offset, indirect_vertex_indices.data(), ls::size_bytes(indirect_vertex_indices)); + upload_offset += ls::size_bytes(indirect_vertex_indices); + + cur_lod.indices_count = simplified_indices.size(); + cur_lod.meshlet_count = meshlet_count; + cur_lod.meshlet_bounds_count = meshlet_bounds.size(); + cur_lod.local_triangle_indices_count = local_triangle_indices.size(); + cur_lod.indirect_vertex_indices_count = indirect_vertex_indices.size(); + + lod_cpu_buffers[lod_index] = ls::pair(cpu_lod_buffer, lod_upload_size); + upload_size += lod_upload_size; } - std::ranges::move(raw_vertex_positions, std::back_inserter(model_vertex_positions)); - std::ranges::move(meshlet_indices, std::back_inserter(model_indices)); - std::ranges::move(meshlets, std::back_inserter(model_meshlets)); - std::ranges::move(meshlet_bounds_infos, std::back_inserter(model_meshlet_bounds)); - std::ranges::move(local_triangle_indices, std::back_inserter(model_local_triangle_indices)); - } - } + auto mesh_upload_offset = 0_u64; + gpu_mesh_buffer = Buffer::create(*impl->device, upload_size, vuk::MemoryUsage::eGPUonly).value(); - auto &transfer_man = impl->device->transfer_man(); - model->indices = Buffer::create(*impl->device, ls::size_bytes(model_indices)).value(); - transfer_man.wait_on(transfer_man.upload_staging(ls::span(model_indices), model->indices)); + // Mesh first + auto cpu_mesh_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eCPUonly, mesh_upload_size); + auto cpu_mesh_ptr = reinterpret_cast(cpu_mesh_buffer->mapped_ptr); - model->vertex_positions = Buffer::create(*impl->device, ls::size_bytes(model_vertex_positions)).value(); - transfer_man.wait_on(transfer_man.upload_staging(ls::span(model_vertex_positions), model->vertex_positions)); + auto gpu_mesh_bda = gpu_mesh_buffer.device_address(); + gpu_mesh.vertex_positions = gpu_mesh_bda + mesh_upload_offset; + std::memcpy(cpu_mesh_ptr + mesh_upload_offset, mesh_vertices.data(), ls::size_bytes(mesh_vertices)); + mesh_upload_offset += ls::size_bytes(mesh_vertices); - model->vertex_normals = Buffer::create(*impl->device, ls::size_bytes(gltf_callbacks.vertex_normals)).value(); - transfer_man.wait_on(transfer_man.upload_staging(ls::span(gltf_callbacks.vertex_normals), model->vertex_normals)); + gpu_mesh.vertex_normals = gpu_mesh_bda + mesh_upload_offset; + std::memcpy(cpu_mesh_ptr + mesh_upload_offset, mesh_normals.data(), ls::size_bytes(mesh_normals)); + mesh_upload_offset += ls::size_bytes(mesh_normals); - if (!gltf_callbacks.vertex_texcoords.empty()) { - model->texture_coords = Buffer::create(*impl->device, ls::size_bytes(gltf_callbacks.vertex_texcoords)).value(); - transfer_man.wait_on(transfer_man.upload_staging(ls::span(gltf_callbacks.vertex_texcoords), model->texture_coords)); - } + if (!mesh_texcoords.empty()) { + gpu_mesh.texture_coords = gpu_mesh_bda + mesh_upload_offset; + std::memcpy(cpu_mesh_ptr + mesh_upload_offset, mesh_texcoords.data(), ls::size_bytes(mesh_texcoords)); + mesh_upload_offset += ls::size_bytes(mesh_texcoords); + } + + auto gpu_mesh_buffer_handle = impl->device->buffer(gpu_mesh_buffer.id()); + auto gpu_mesh_subrange = vuk::discard_buf("mesh", gpu_mesh_buffer_handle->subrange(0, mesh_upload_size)); + gpu_mesh_subrange = transfer_man.upload_staging(std::move(cpu_mesh_buffer), std::move(gpu_mesh_subrange)); + transfer_man.wait_on(std::move(gpu_mesh_subrange)); - model->meshlets = Buffer::create(*impl->device, ls::size_bytes(model_meshlets)).value(); - transfer_man.wait_on(transfer_man.upload_staging(ls::span(model_meshlets), model->meshlets)); + for (auto lod_index = 0_sz; lod_index < gpu_mesh.lod_count; lod_index++) { + auto &&[lod_cpu_buffer, lod_upload_size] = lod_cpu_buffers[lod_index]; + auto &lod = gpu_mesh.lods[lod_index]; - model->meshlet_bounds = Buffer::create(*impl->device, ls::size_bytes(model_meshlet_bounds)).value(); - transfer_man.wait_on(transfer_man.upload_staging(ls::span(model_meshlet_bounds), model->meshlet_bounds)); + lod.indices += gpu_mesh_bda + mesh_upload_offset; + lod.meshlets += gpu_mesh_bda + mesh_upload_offset; + lod.meshlet_bounds += gpu_mesh_bda + mesh_upload_offset; + lod.local_triangle_indices += gpu_mesh_bda + mesh_upload_offset; + lod.indirect_vertex_indices += gpu_mesh_bda + mesh_upload_offset; - model->local_triangle_indices = Buffer::create(*impl->device, ls::size_bytes(model_local_triangle_indices)).value(); - transfer_man.wait_on(transfer_man.upload_staging(ls::span(model_local_triangle_indices), model->local_triangle_indices)); + auto gpu_lod_subrange = vuk::discard_buf("mesh lod subrange", gpu_mesh_buffer_handle->subrange(mesh_upload_offset, lod_upload_size)); + gpu_lod_subrange = transfer_man.upload_staging(std::move(lod_cpu_buffer), std::move(gpu_lod_subrange)); + transfer_man.wait_on(std::move(gpu_lod_subrange)); + + mesh_upload_offset += lod_upload_size; + } + } + } return true; } @@ -889,15 +1034,9 @@ auto AssetManager::unload_model(const UUID &uuid) -> bool { this->unload_material(v); } - impl->device->destroy(model->indices.id()); - impl->device->destroy(model->vertex_positions.id()); - impl->device->destroy(model->vertex_normals.id()); - if (model->texture_coords) { - impl->device->destroy(model->texture_coords.id()); + for (const auto &buffer : model->gpu_mesh_buffers) { + impl->device->destroy(buffer.id()); } - impl->device->destroy(model->meshlets.id()); - impl->device->destroy(model->meshlet_bounds.id()); - impl->device->destroy(model->local_triangle_indices.id()); impl->models.destroy_slot(asset->model_id); asset->model_id = ModelID::Invalid; @@ -969,6 +1108,22 @@ auto AssetManager::load_texture(const UUID &uuid, const TextureInfo &info) -> bo } } + auto sampler_info = SamplerInfo{ + .min_filter = vuk::Filter::eLinear, + .mag_filter = vuk::Filter::eLinear, + .mipmap_mode = vuk::SamplerMipmapMode::eLinear, + .addr_u = vuk::SamplerAddressMode::eRepeat, + .addr_v = vuk::SamplerAddressMode::eRepeat, + .addr_w = vuk::SamplerAddressMode::eRepeat, + .compare_op = vuk::CompareOp::eNever, + .max_anisotropy = 8.0f, + .mip_lod_bias = 0.0f, + .min_lod = 0.0f, + .max_lod = static_cast(mip_level_count - 1), + .use_anisotropy = true, + }; + auto sampler = Sampler::create(*impl->device, sampler_info).value(); + auto rel_path = fs::relative(asset_path, impl->root_path); auto image_info = ImageInfo{ .format = format, @@ -1054,21 +1209,6 @@ auto AssetManager::load_texture(const UUID &uuid, const TextureInfo &info) -> bo { auto write_lock = std::unique_lock(impl->textures_mutex); auto *asset = this->get_asset(uuid); - auto sampler_info = SamplerInfo{ - .min_filter = vuk::Filter::eLinear, - .mag_filter = vuk::Filter::eLinear, - .mipmap_mode = vuk::SamplerMipmapMode::eLinear, - .addr_u = vuk::SamplerAddressMode::eRepeat, - .addr_v = vuk::SamplerAddressMode::eRepeat, - .addr_w = vuk::SamplerAddressMode::eRepeat, - .compare_op = vuk::CompareOp::eNever, - .max_anisotropy = 8.0f, - .mip_lod_bias = 0.0f, - .min_lod = 0.0f, - .max_lod = static_cast(mip_level_count - 1), - .use_anisotropy = true, - }; - auto sampler = Sampler::create(*impl->device, sampler_info).value(); asset->texture_id = impl->textures.create_slot(Texture{ .image = image, .image_view = image_view, .sampler = sampler }); } @@ -1101,7 +1241,7 @@ auto AssetManager::unload_texture(const UUID &uuid) -> bool { auto AssetManager::is_texture_loaded(const UUID &uuid) -> bool { ZoneScoped; - std::shared_lock _(impl->textures_mutex); + auto read_lock = std::shared_lock(impl->textures_mutex); auto *asset = this->get_asset(uuid); if (!asset) { return false; @@ -1400,7 +1540,7 @@ auto AssetManager::delete_asset(const UUID &uuid) -> void { } } - LOG_TRACE("Deleted asset {}.", uuid.str()); + // LOG_TRACE("Deleted asset {}.", uuid.str()); } auto AssetManager::get_asset(const UUID &uuid) -> Asset * { @@ -1522,163 +1662,27 @@ auto AssetManager::get_scene(SceneID scene_id) -> Scene * { auto AssetManager::set_material_dirty(MaterialID material_id) -> void { ZoneScoped; - std::shared_lock shared_lock(impl->materials_mutex); + auto read_lock = std::shared_lock(impl->materials_mutex); if (std::ranges::find(impl->dirty_materials, material_id) != impl->dirty_materials.end()) { return; } - shared_lock.unlock(); - impl->materials_mutex.lock(); + read_lock.unlock(); + auto write_lock = std::unique_lock(impl->materials_mutex); impl->dirty_materials.emplace_back(material_id); - impl->materials_mutex.unlock(); } -auto AssetManager::get_materials_buffer() -> vuk::Value { +auto AssetManager::get_dirty_material_ids() -> std::vector { ZoneScoped; - auto uuid_to_index = [this](UUID &uuid) -> ls::option { - if (!this->is_texture_loaded(uuid)) { - return ls::nullopt; - } - - auto *texture_asset = this->get_asset(uuid); - auto *texture = this->get_texture(texture_asset->texture_id); - auto texture_index = SlotMap_decode_id(texture_asset->texture_id).index; - auto image_view = impl->device->image_view(texture->image_view.id()); - auto sampler = impl->device->sampler(texture->sampler.id()); - - impl->materials_descriptor_set.update_sampler(0, texture_index, sampler.value()); - impl->materials_descriptor_set.update_sampled_image(1, texture_index, image_view.value(), vuk::ImageLayout::eShaderReadOnlyOptimal); - - return texture_index; - }; - - auto to_gpu_material = [&](Material *material) -> GPU::Material { - auto albedo_image_index = uuid_to_index(material->albedo_texture); - auto normal_image_index = uuid_to_index(material->normal_texture); - auto emissive_image_index = uuid_to_index(material->emissive_texture); - auto metallic_roughness_image_index = uuid_to_index(material->metallic_roughness_texture); - auto occlusion_image_index = uuid_to_index(material->occlusion_texture); - - auto flags = GPU::MaterialFlag::None; - flags |= albedo_image_index.has_value() ? GPU::MaterialFlag::HasAlbedoImage : GPU::MaterialFlag::None; - flags |= normal_image_index.has_value() ? GPU::MaterialFlag::HasNormalImage : GPU::MaterialFlag::None; - flags |= emissive_image_index.has_value() ? GPU::MaterialFlag::HasEmissiveImage : GPU::MaterialFlag::None; - flags |= metallic_roughness_image_index.has_value() ? GPU::MaterialFlag::HasMetallicRoughnessImage : GPU::MaterialFlag::None; - flags |= occlusion_image_index.has_value() ? GPU::MaterialFlag::HasOcclusionImage : GPU::MaterialFlag::None; - //flags |= GPU::MaterialFlag::NormalFlipY; - - return { - .albedo_color = material->albedo_color, - .emissive_color = material->emissive_color, - .roughness_factor = material->roughness_factor, - .metallic_factor = material->metallic_factor, - .alpha_cutoff = material->alpha_cutoff, - .flags = flags, - .albedo_image_index = albedo_image_index.value_or(~0_u32), - .normal_image_index = normal_image_index.value_or(~0_u32), - .emissive_image_index = emissive_image_index.value_or(~0_u32), - .metallic_roughness_image_index = metallic_roughness_image_index.value_or(~0_u32), - .occlusion_image_index = occlusion_image_index.value_or(~0_u32), - }; - }; - - auto all_materials_count = 0_sz; - auto dirty_materials = std::vector(); - { - auto read_lock = std::shared_lock(impl->materials_mutex); - if (impl->materials.size() == 0) { - return {}; - } - - read_lock.unlock(); - auto write_lock = std::unique_lock(impl->materials_mutex); - - all_materials_count = impl->materials.size(); - - // DO NOT MOVE!!! just take a snapshot of the contents - dirty_materials = impl->dirty_materials; - impl->dirty_materials.clear(); - } - - auto gpu_materials_bytes_size = all_materials_count * sizeof(GPU::Material); - auto dirty_material_count = dirty_materials.size(); - auto dirty_materials_size_bytes = dirty_materials.size() * sizeof(GPU::Material); + auto read_lock = std::shared_lock(impl->materials_mutex); + auto dirty_materials = std::vector(impl->dirty_materials); - auto materials_buffer = vuk::Value{}; - bool rebuild_materials = false; - if (gpu_materials_bytes_size > impl->materials_buffer.data_size()) { - if (impl->materials_buffer.id() != BufferID::Invalid) { - impl->device->wait(); - impl->device->destroy(impl->materials_buffer.id()); - } - - impl->materials_buffer = Buffer::create(*impl->device, gpu_materials_bytes_size, vuk::MemoryUsage::eGPUonly).value(); - materials_buffer = impl->materials_buffer.acquire(*impl->device, "materials buffer", vuk::eNone); - vuk::fill(materials_buffer, ~0_u32); - rebuild_materials = true; - } else if (impl->materials_buffer) { - materials_buffer = impl->materials_buffer.acquire(*impl->device, "materials buffer", vuk::eNone); - } - - auto &transfer_man = impl->device->transfer_man(); - if (rebuild_materials) { - auto _ = std::shared_lock(impl->registry_mutex); - auto upload_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eCPUonly, gpu_materials_bytes_size); - auto *dst_material_ptr = reinterpret_cast(upload_buffer->mapped_ptr); - - // All loaded materials - auto all_materials = impl->materials.slots_unsafe(); - for (auto &dirty_material : all_materials) { - auto gpu_material = to_gpu_material(&dirty_material); - std::memcpy(dst_material_ptr, &gpu_material, sizeof(GPU::Material)); - dst_material_ptr++; - } - - materials_buffer = transfer_man.upload_staging(std::move(upload_buffer), std::move(materials_buffer)); - } else if (dirty_material_count != 0) { - auto upload_offsets = std::vector(dirty_material_count); - auto upload_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eCPUonly, dirty_materials_size_bytes); - auto *dst_material_ptr = reinterpret_cast(upload_buffer->mapped_ptr); - for (const auto &[dirty_material_id, offset] : std::views::zip(dirty_materials, upload_offsets)) { - auto index = SlotMap_decode_id(dirty_material_id).index; - auto *material = this->get_material(dirty_material_id); - auto gpu_material = to_gpu_material(material); - - std::memcpy(dst_material_ptr, &gpu_material, sizeof(GPU::Material)); - offset = index * sizeof(GPU::Material); - dst_material_ptr++; - } - - auto update_materials_pass = vuk::make_pass( - "update materials", - [upload_offsets = std::move( - upload_offsets - )](vuk::CommandBuffer &cmd_list, VUK_BA(vuk::Access::eTransferRead) src_buffer, VUK_BA(vuk::Access::eTransferWrite) dst_buffer) { - for (usize i = 0; i < upload_offsets.size(); i++) { - auto offset = upload_offsets[i]; - auto src_subrange = src_buffer->subrange(i * sizeof(GPU::Material), sizeof(GPU::Material)); - auto dst_subrange = dst_buffer->subrange(offset, sizeof(GPU::Material)); - cmd_list.copy_buffer(src_subrange, dst_subrange); - } - - return dst_buffer; - } - ); - - materials_buffer = update_materials_pass(std::move(upload_buffer), std::move(materials_buffer)); - } else { - return materials_buffer; - } - - impl->device->commit_descriptor_set(impl->materials_descriptor_set); - return materials_buffer; -} - -auto AssetManager::get_materials_descriptor_set() -> vuk::PersistentDescriptorSet * { - ZoneScoped; + read_lock.unlock(); + auto write_lock = std::unique_lock(impl->materials_mutex); + impl->dirty_materials.clear(); - return &impl->materials_descriptor_set; + return dirty_materials; } } // namespace lr diff --git a/Lorr/Engine/Asset/Asset.hh b/Lorr/Engine/Asset/Asset.hh index ed82458f..9f5abd1c 100755 --- a/Lorr/Engine/Asset/Asset.hh +++ b/Lorr/Engine/Asset/Asset.hh @@ -119,7 +119,6 @@ struct AssetManager : Handle { auto get_scene(SceneID scene_id) -> Scene *; auto set_material_dirty(MaterialID material_id) -> void; - auto get_materials_buffer() -> vuk::Value; - auto get_materials_descriptor_set() -> vuk::PersistentDescriptorSet *; + auto get_dirty_material_ids() -> std::vector; }; } // namespace lr diff --git a/Lorr/Engine/Asset/Model.hh b/Lorr/Engine/Asset/Model.hh index 6a060978..8c3c1888 100644 --- a/Lorr/Engine/Asset/Model.hh +++ b/Lorr/Engine/Asset/Model.hh @@ -4,6 +4,7 @@ #include "Engine/Asset/UUID.hh" #include "Engine/Graphics/Vulkan.hh" +#include "Engine/Scene/GPUScene.hh" namespace lr { struct TextureSamplerInfo { @@ -65,10 +66,7 @@ struct Model { using Index = u32; struct Primitive { - u32 material_index = 0; - u32 meshlet_count = 0; - u32 meshlet_offset = 0; - u32 local_triangle_indices_offset = 0; + MaterialID material_id = MaterialID::Invalid; u32 vertex_count = 0; u32 vertex_offset = 0; u32 index_count = 0; @@ -101,14 +99,9 @@ struct Model { std::vector nodes = {}; std::vector scenes = {}; - usize default_scene_index = 0; + std::vector gpu_meshes = {}; + std::vector gpu_mesh_buffers = {}; - Buffer indices = {}; - Buffer vertex_positions = {}; - Buffer vertex_normals = {}; - Buffer texture_coords = {}; - Buffer meshlets = {}; - Buffer meshlet_bounds = {}; - Buffer local_triangle_indices = {}; + usize default_scene_index = 0; }; } // namespace lr diff --git a/Lorr/Engine/Core/JobManager.cc b/Lorr/Engine/Core/JobManager.cc index 60f9fd8b..f5258a29 100644 --- a/Lorr/Engine/Core/JobManager.cc +++ b/Lorr/Engine/Core/JobManager.cc @@ -79,8 +79,10 @@ auto JobManager::worker(this JobManager &self, u32 id) -> void { memory::ScopedStack stack; this_thread_worker.id = id; - os::set_thread_name(stack.format("Worker {}", id)); - fmtlog::setThreadName(stack.format_char("Worker {}", id)); + + const auto *thread_name = stack.format_char("Worker {}", id); + os::set_thread_name(thread_name); + fmtlog::setThreadName(thread_name); LS_DEFER() { this_thread_worker.id = ~0_u32; diff --git a/Lorr/Engine/Graphics/Slang/Compiler.cc b/Lorr/Engine/Graphics/Slang/Compiler.cc index 6bc4cf43..4415552e 100644 --- a/Lorr/Engine/Graphics/Slang/Compiler.cc +++ b/Lorr/Engine/Graphics/Slang/Compiler.cc @@ -332,7 +332,8 @@ auto SlangCompiler::new_session(const SlangSessionInfo &info) -> ls::option ls::option std::expected; auto data_size() const -> u64; @@ -30,6 +30,10 @@ struct Buffer { auto host_ptr() const -> u8 *; auto id() const -> BufferID; + // if new_size is smaller than current size, this will do nothing + [[nodiscard]] auto resize(Device &, u64 new_size, vuk::MemoryUsage memory_usage = vuk::MemoryUsage::eGPUonly, LR_THISCALL) + -> std::expected; + auto acquire(Device &, vuk::Name name, vuk::Access access, u64 offset = 0, u64 size = ~0_u64) -> vuk::Value; auto discard(Device &, vuk::Name name, u64 offset = 0, u64 size = ~0_u64) -> vuk::Value; auto subrange(Device &, u64 offset = 0, u64 size = ~0_u64) -> vuk::Buffer; diff --git a/Lorr/Engine/Graphics/Vulkan/Buffer.cc b/Lorr/Engine/Graphics/Vulkan/Buffer.cc index 4022b0ed..4f933da8 100644 --- a/Lorr/Engine/Graphics/Vulkan/Buffer.cc +++ b/Lorr/Engine/Graphics/Vulkan/Buffer.cc @@ -43,6 +43,19 @@ auto Buffer::id() const -> BufferID { return id_; } +auto Buffer::resize(Device &device, u64 new_size, vuk::MemoryUsage memory_usage, LR_CALLSTACK) -> std::expected { + if (new_size > this->data_size()) { + if (this->id() != BufferID::Invalid) { + device.wait(); + device.destroy(this->id()); + } + + return Buffer::create(device, new_size, memory_usage, LOC); + } + + return *this; +} + auto Buffer::acquire(Device &device, vuk::Name name, vuk::Access access, u64 offset, u64 size) -> vuk::Value { ZoneScoped; diff --git a/Lorr/Engine/Graphics/Vulkan/Device.cc b/Lorr/Engine/Graphics/Vulkan/Device.cc index 993e8c7a..fe4af8ed 100644 --- a/Lorr/Engine/Graphics/Vulkan/Device.cc +++ b/Lorr/Engine/Graphics/Vulkan/Device.cc @@ -2,6 +2,12 @@ #include +// i hate this +PFN_vkCreateDescriptorPool vk_CreateDescriptorPool; +PFN_vkCreateDescriptorSetLayout vk_CreateDescriptorSetLayout; +PFN_vkAllocateDescriptorSets vk_AllocateDescriptorSets; +PFN_vkUpdateDescriptorSets vk_UpdateDescriptorSets; + namespace lr { constexpr fmtlog::LogLevel to_log_category(VkDebugUtilsMessageSeverityFlagBitsEXT severity) { switch (severity) { @@ -30,6 +36,8 @@ auto Device::init(this Device &self, usize frame_count) -> std::expected std::expected device_extensions; device_extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); device_extensions.push_back(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME); device_extensions.push_back(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); + device_extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); //device_extensions.push_back(VK_EXT_SHADER_IMAGE_ATOMIC_INT64_EXTENSION_NAME); //device_extensions.push_back(VK_KHR_MAINTENANCE_8_EXTENSION_NAME); physical_device_selector.add_required_extensions(device_extensions); - auto physical_device_result = physical_device_selector.select(); - if (!physical_device_result) { - auto error = physical_device_result.error(); + auto physical_device_select_result = physical_device_selector.select(); + if (!physical_device_select_result) { + auto error = physical_device_select_result.error(); LOG_ERROR("Failed to select Vulkan Physical Device! {}", error.message()); return std::unexpected(VK_ERROR_DEVICE_LOST); } - self.physical_device = physical_device_result.value(); + self.physical_device = physical_device_select_result.value(); - LOG_TRACE("Created physical device."); + LOG_TRACE("Selected physical device \"{}\".", self.physical_device.name); VkPhysicalDeviceVulkan14Features vk14_features = {}; vk14_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_4_FEATURES; @@ -138,10 +151,6 @@ auto Device::init(this Device &self, usize frame_count) -> std::expected std::expected std::expected on shader yet. + // WARN: this extension is only supported by // .add_pNext(&maintenance_8_features) - - // NOTE: LLVMPipe does not support this extension yet - //.add_pNext(&image_atomic_int64_features) .add_pNext(&vk10_features); auto device_result = device_builder.build(); if (!device_result) { @@ -185,6 +197,11 @@ auto Device::init(this Device &self, usize frame_count) -> std::expected std::expected std::expected()); self.runtime.emplace( @@ -230,6 +249,63 @@ auto Device::init(this Device &self, usize frame_count) -> std::expected std::expected { + constexpr auto MAX_DESCRIPTORS = 1024_sz; + VkDescriptorSetLayoutBinding bindless_set_info[] = { + // Samplers + { .binding = DescriptorTable_SamplerIndex, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER, + .descriptorCount = MAX_DESCRIPTORS, + .stageFlags = VK_SHADER_STAGE_ALL, + .pImmutableSamplers = nullptr }, + // Sampled Images + { .binding = DescriptorTable_SampledImageIndex, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .descriptorCount = MAX_DESCRIPTORS, + .stageFlags = VK_SHADER_STAGE_ALL, + .pImmutableSamplers = nullptr }, + // Storage Images + { .binding = DescriptorTable_StorageImageIndex, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = MAX_DESCRIPTORS, + .stageFlags = VK_SHADER_STAGE_ALL, + .pImmutableSamplers = nullptr }, + }; + + constexpr static auto bindless_flags = VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT | VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT; + VkDescriptorBindingFlags bindless_set_binding_flags[] = { + bindless_flags, + bindless_flags, + bindless_flags, + }; + self.resources.descriptor_set = self.create_persistent_descriptor_set(1, bindless_set_info, bindless_set_binding_flags); + + auto invalid_image_info = ImageInfo{ + .format = vuk::Format::eR8G8B8A8Srgb, + .usage = vuk::ImageUsageFlagBits::eSampled | vuk::ImageUsageFlagBits::eStorage, + .type = vuk::ImageType::e2D, + .extent = vuk::Extent3D(1_u32, 1_u32, 1_u32), + .name = "Invalid Placeholder Image", + }; + auto [invalid_image, invalid_image_view] = Image::create_with_view(self, invalid_image_info).value(); + + auto invalid_image_data = 0xFFFFFFFF_u32; + auto fut = self.transfer_manager.upload_staging(invalid_image_view, &invalid_image_data, sizeof(u32)); + fut = fut.as_released(vuk::Access::eFragmentSampled, vuk::DomainFlagBits::eGraphicsQueue); + self.transfer_manager.wait_on(std::move(fut)); + + auto invalid_sampler_info = SamplerInfo{}; + std::ignore = Sampler::create(self, invalid_sampler_info).value(); + return {}; } @@ -329,45 +405,92 @@ auto Device::end_frame(this Device &self, vuk::Value &&tar .on_begin_pass = on_begin_pass, .on_end_pass = on_end_pass, .user_data = &self, - } }); + } } + ); } auto Device::wait(this Device &self, LR_CALLSTACK) -> void { ZoneScopedN("Device Wait Idle"); - LOG_TRACE("Device wait idle triggered at {}:{}!", LOC.file_name(), LOC.line()); + LOG_TRACE("Device wait idle triggered at {}!", LOC); self.runtime->wait_idle(); } -auto Device::create_persistent_descriptor_set(this Device &self, ls::span bindings, u32 index) - -> vuk::Unique { +auto Device::create_persistent_descriptor_set( + this Device &self, + u32 set_index, + ls::span bindings, + ls::span binding_flags +) -> vuk::PersistentDescriptorSet { ZoneScoped; - u32 descriptor_count = 0; - auto raw_bindings = std::vector(bindings.size()); - auto binding_flags = std::vector(bindings.size()); - for (const auto &[binding, raw_binding, raw_binding_flags] : std::views::zip(bindings, raw_bindings, binding_flags)) { - raw_binding.binding = binding.binding; - raw_binding.descriptorType = vuk::DescriptorBinding::vk_descriptor_type(binding.type); - raw_binding.descriptorCount = binding.descriptor_count; - raw_binding.stageFlags = VK_SHADER_STAGE_ALL; - raw_binding_flags = VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT; - descriptor_count += binding.descriptor_count; + LS_EXPECT(bindings.size() == binding_flags.size()); + + auto descriptor_sizes = std::vector(); + for (const auto &binding : bindings) { + LS_EXPECT(binding.descriptorType < VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT); + descriptor_sizes.emplace_back(binding.descriptorType, binding.descriptorCount); } - vuk::DescriptorSetLayoutCreateInfo layout_ci = { - .index = index, - .bindings = std::move(raw_bindings), - .flags = std::move(binding_flags), + auto pool_flags = VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT; + auto pool_info = VkDescriptorPoolCreateInfo{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = static_cast(pool_flags), + .maxSets = 1, + .poolSizeCount = static_cast(descriptor_sizes.size()), + .pPoolSizes = descriptor_sizes.data(), + }; + auto pool = VkDescriptorPool{}; + vk_CreateDescriptorPool(self.handle, &pool_info, nullptr, &pool); + + auto set_layout_binding_flags_info = VkDescriptorSetLayoutBindingFlagsCreateInfo{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO, + .pNext = nullptr, + .bindingCount = static_cast(binding_flags.size()), + .pBindingFlags = binding_flags.data(), }; - return self.runtime->create_persistent_descriptorset(self.allocator.value(), layout_ci, descriptor_count); + auto set_layout_info = VkDescriptorSetLayoutCreateInfo{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = &set_layout_binding_flags_info, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT, + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data(), + }; + auto set_layout = VkDescriptorSetLayout{}; + vk_CreateDescriptorSetLayout(self.handle, &set_layout_info, nullptr, &set_layout); + + auto set_alloc_info = VkDescriptorSetAllocateInfo{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .pNext = nullptr, + .descriptorPool = pool, + .descriptorSetCount = 1, + .pSetLayouts = &set_layout, + }; + auto descriptor_set = VkDescriptorSet{}; + vk_AllocateDescriptorSets(self.handle, &set_alloc_info, &descriptor_set); + + auto persistent_set_create_info = vuk::DescriptorSetLayoutCreateInfo{ + .dslci = set_layout_info, + .index = set_index, + .bindings = std::vector(bindings.begin(), bindings.end()), + .flags = std::vector(binding_flags.begin(), binding_flags.end()), + }; + return vuk::PersistentDescriptorSet{ + .backing_pool = pool, + .set_layout_create_info = persistent_set_create_info, + .set_layout = set_layout, + .backing_set = descriptor_set, + .wdss = {}, + .descriptor_bindings = {}, + }; } -auto Device::commit_descriptor_set(this Device &self, vuk::PersistentDescriptorSet &set) -> void { +auto Device::commit_descriptor_set(this Device &self, ls::span writes) -> void { ZoneScoped; - set.commit(self.runtime.value()); + vk_UpdateDescriptorSets(self.handle, writes.size(), writes.data(), 0, nullptr); } auto Device::create_swap_chain(this Device &self, VkSurfaceKHR surface, ls::option old_swap_chain) diff --git a/Lorr/Engine/Graphics/Vulkan/Image.cc b/Lorr/Engine/Graphics/Vulkan/Image.cc index 669bd856..3ce83f56 100644 --- a/Lorr/Engine/Graphics/Vulkan/Image.cc +++ b/Lorr/Engine/Graphics/Vulkan/Image.cc @@ -4,6 +4,8 @@ #include "Engine/Memory/Stack.hh" +#include + namespace lr { auto Image::create(Device &device, const ImageInfo &info, LR_CALLSTACK) -> std::expected { ZoneScoped; @@ -132,6 +134,22 @@ auto ImageView::create(Device &device, Image &image, const ImageViewInfo &info, return std::unexpected(result.error()); } + auto image_descriptors = ankerl::svector(); + if (info.image_usage & vuk::ImageUsageFlagBits::eSampled) { + image_descriptors.push_back( + { .sampler = nullptr, // + .imageView = image_view_handle.payload, + .imageLayout = VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL } + ); + } + if (info.image_usage & vuk::ImageUsageFlagBits::eStorage) { + image_descriptors.push_back( + { .sampler = nullptr, // + .imageView = image_view_handle.payload, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL } + ); + } + auto image_view = ImageView{}; image_view.format_ = image.format(); image_view.extent_ = image.extent(); @@ -141,6 +159,38 @@ auto ImageView::create(Device &device, Image &image, const ImageViewInfo &info, image_view.id_ = device.resources.image_views.create_slot(static_cast(image_view_handle)); device.set_name(image_view, info.name); + auto &bindless_set = device.get_descriptor_set(); + auto descriptor_writes = ankerl::svector(); + if (info.image_usage & vuk::ImageUsageFlagBits::eSampled) { + descriptor_writes.push_back( + { .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // + .pNext = nullptr, + .dstSet = bindless_set.backing_set, + .dstBinding = DescriptorTable_SampledImageIndex, + .dstArrayElement = image_view.index(), + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + .pImageInfo = &image_descriptors[0], + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr } + ); + } + if (info.image_usage & vuk::ImageUsageFlagBits::eStorage) { + descriptor_writes.push_back( + { .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // + .pNext = nullptr, + .dstSet = bindless_set.backing_set, + .dstBinding = DescriptorTable_StorageImageIndex, + .dstArrayElement = image_view.index(), + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .pImageInfo = &image_descriptors[1], + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr } + ); + } + device.commit_descriptor_set({ descriptor_writes.begin(), descriptor_writes.size() }); + return image_view; } @@ -243,9 +293,28 @@ auto Sampler::create(Device &device, const SamplerInfo &info, [[maybe_unused]] v }; auto sampler = Sampler{}; - sampler.id_ = device.resources.samplers.create_slot(); - auto *sampler_handle = device.resources.samplers.slot(sampler.id_); - *sampler_handle = device.runtime->acquire_sampler(create_info, device.frame_count()); + auto sampler_handle = device.runtime->acquire_sampler(create_info, device.frame_count()); + auto sampler_descriptor = VkDescriptorImageInfo{ + .sampler = sampler_handle.payload, + .imageView = nullptr, + .imageLayout = VK_IMAGE_LAYOUT_UNDEFINED, + }; + sampler.id_ = device.resources.samplers.create_slot(static_cast(sampler_handle)); + + auto &bindless_set = device.get_descriptor_set(); + auto descriptor_write = VkWriteDescriptorSet{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // + .pNext = nullptr, + .dstSet = bindless_set.backing_set, + .dstBinding = DescriptorTable_SamplerIndex, + .dstArrayElement = sampler.index(), + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER, + .pImageInfo = &sampler_descriptor, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }; + device.commit_descriptor_set(descriptor_write); return sampler; } diff --git a/Lorr/Engine/Graphics/Vulkan/Pipeline.cc b/Lorr/Engine/Graphics/Vulkan/Pipeline.cc index f976c2a6..79b228c4 100644 --- a/Lorr/Engine/Graphics/Vulkan/Pipeline.cc +++ b/Lorr/Engine/Graphics/Vulkan/Pipeline.cc @@ -13,8 +13,12 @@ auto Pipeline::create( vuk::PipelineBaseCreateInfo create_info = {}; - for (const auto &v : persistent_sets) { - create_info.explicit_set_layouts.push_back(v.set_layout_create_info); + for (const auto &set : persistent_sets) { + create_info.explicit_set_layouts.push_back(set.set_layout_create_info); + for (const auto &[binding, binding_flags] : std::views::zip(set.set_layout_create_info.bindings, set.set_layout_create_info.flags)) { + create_info + .set_binding_flags(set.set_layout_create_info.index, binding.binding, static_cast(binding_flags)); + } } auto slang_module = session.load_module({ .module_name = compile_info.module_name, .source = compile_info.shader_source }).value(); diff --git a/Lorr/Engine/Graphics/VulkanDevice.hh b/Lorr/Engine/Graphics/VulkanDevice.hh index 4a70d03b..7adbf492 100644 --- a/Lorr/Engine/Graphics/VulkanDevice.hh +++ b/Lorr/Engine/Graphics/VulkanDevice.hh @@ -15,12 +15,6 @@ #include namespace lr { -struct BindlessDescriptorInfo { - u32 binding = 0; - vuk::DescriptorType type = {}; - u32 descriptor_count = 0; -}; - struct TransferManager { private: Device *device = nullptr; @@ -95,12 +89,19 @@ protected: auto release(this TransferManager &) -> void; }; +enum : u32 { + DescriptorTable_SamplerIndex = 0, + DescriptorTable_SampledImageIndex, + DescriptorTable_StorageImageIndex, +}; + struct DeviceResources { SlotMap buffers = {}; SlotMap images = {}; SlotMap image_views = {}; SlotMap samplers = {}; SlotMap pipelines = {}; + vuk::PersistentDescriptorSet descriptor_set = {}; }; struct Device { @@ -132,6 +133,8 @@ private: public: auto init(this Device &, usize frame_count) -> std::expected; + auto init_resources(this Device &) -> std::expected; + auto destroy(this Device &) -> void; auto new_slang_session(this Device &, const SlangSessionInfo &info) -> ls::option; @@ -141,9 +144,13 @@ public: auto end_frame(this Device &, vuk::Value &&target_attachment) -> void; auto wait(this Device &, LR_THISCALL) -> void; - auto create_persistent_descriptor_set(this Device &, ls::span bindings, u32 index) - -> vuk::Unique; - auto commit_descriptor_set(this Device &, vuk::PersistentDescriptorSet &set) -> void; + auto create_persistent_descriptor_set( + this Device &, + u32 set_index, + ls::span bindings, + ls::span binding_flags + ) -> vuk::PersistentDescriptorSet; + auto commit_descriptor_set(this Device &, ls::span writes) -> void; auto create_swap_chain(this Device &, VkSurfaceKHR surface, ls::option old_swap_chain = ls::nullopt) -> std::expected; @@ -186,6 +193,9 @@ public: auto get_pass_queries() -> auto & { return pass_queries; } + auto get_descriptor_set() -> auto & { + return resources.descriptor_set; + } auto non_coherent_atom_size() -> u32 { return device_limits.nonCoherentAtomSize; diff --git a/Lorr/Engine/Resources/shaders/assert.slang b/Lorr/Engine/Resources/shaders/assert.slang new file mode 100644 index 00000000..d1c6307c --- /dev/null +++ b/Lorr/Engine/Resources/shaders/assert.slang @@ -0,0 +1,7 @@ +#ifdef ENABLE_ASSERTIONS +#define assert_msg(x, msg, ...) do { if (!bool(x)) { printf(msg, __VA_ARGS__); } } while(false) +#define assert(x) assert_msg(x, "Shader aborted at " __FILE__ ":%d", __LINE__) +#else +#define assert_msg(...) +#define assert(...) +#endif diff --git a/Lorr/Engine/Resources/shaders/cull.slang b/Lorr/Engine/Resources/shaders/cull.slang index d29c3eed..b84547a7 100644 --- a/Lorr/Engine/Resources/shaders/cull.slang +++ b/Lorr/Engine/Resources/shaders/cull.slang @@ -4,22 +4,19 @@ import std; import gpu; import debug_drawer; -public func normalize_plane(f32x4 p) -> f32x4 { - return p / length(p.xyz); -} - public struct ScreenAabb { public f32x3 min; public f32x3 max; } -// https://zeux.io/2023/01/12/approximate-projected-bounds -public func project_aabb(f32x4x4 mvp, f32 near, f32x3 aabb_min, f32x3 aabb_extent) -> Optional { +// Credits: +// - https://zeux.io/2023/01/12/approximate-projected-bounds +public func project_aabb(f32x4x4 mvp, f32 near, f32x3 aabb_center, f32x3 aabb_extent) -> Optional { let SX = mul(mvp, f32x4(aabb_extent.x, 0.0, 0.0, 0.0)); let SY = mul(mvp, f32x4(0.0, aabb_extent.y, 0.0, 0.0)); let SZ = mul(mvp, f32x4(0.0, 0.0, aabb_extent.z, 0.0)); - let P0 = mul(mvp, f32x4(aabb_min, 1.0)); + let P0 = mul(mvp, f32x4(aabb_center - aabb_extent * 0.5, 1.0)); let P1 = P0 + SZ; let P2 = P0 + SY; let P3 = P2 + SZ; @@ -51,6 +48,43 @@ public func project_aabb(f32x4x4 mvp, f32 near, f32x3 aabb_min, f32x3 aabb_exten return ret; } +func normalize_plane(f32x4 p) -> f32x4 { + return p / length(p.xyz); +} + +// Credits: +// - https://github.com/SparkyPotato/radiance/blob/eaf18b3bbf4942234fa929ef6ad5e04e3c45fc62/shaders/passes/mesh/cull.slang#L340 +// - https://fgiesen.wordpress.com/2012/08/31/frustum-planes-from-the-projection-matrix/ +// - https://fgiesen.wordpress.com/2010/10/17/view-frustum-culling/ +public func test_frustum(in f32x4x4 mvp, in f32x3 aabb_center, in f32x3 aabb_extent) -> bool { + f32x4 planes[] = { + // Left + normalize_plane(mvp[3] + mvp[0]), + // Right + normalize_plane(mvp[3] - mvp[0]), + // Bottom + normalize_plane(mvp[3] + mvp[1]), + // Top + normalize_plane(mvp[3] - mvp[1]), + // Near + normalize_plane(mvp[2]), + // Far + normalize_plane(mvp[3] - mvp[2]) + }; + + let aabb_half_extent = aabb_extent * 0.5; + [[unroll]] + for (uint i = 0; i < planes.getCount(); i++) { + let flip = asuint(planes[i].xyz) & 0x80000000; + let sign_flipped = asfloat(asuint(aabb_half_extent) ^ flip); + if (dot(aabb_center + sign_flipped, planes[i].xyz) <= -planes[i].w) { + return false; + } + } + + return true; +} + public func test_occlusion( in ScreenAabb screen_aabb, in Image2D hiz_image, @@ -77,4 +111,4 @@ public func test_occlusion( var uv = (min_uv + max_uv) * 0.5; let d = hiz_image.sample_mip(hiz_sampler, uv, mip); return screen_aabb.max.z <= d; -} \ No newline at end of file +} diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang index cb4e6862..a92cfdfb 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang @@ -4,40 +4,23 @@ import scene; import cull; import debug_drawer; +#include + struct ShaderParameters { - RWStructuredBuffer cull_triangles_cmd; ConstantBuffer camera; - RWStructuredBuffer visible_meshlet_instances_indices; StructuredBuffer meshlet_instances; + StructuredBuffer mesh_instances; StructuredBuffer meshes; StructuredBuffer transforms; Image2D hiz_image; Sampler hiz_sampler; + StructuredBuffer meshlet_instances_count; + + RWStructuredBuffer cull_triangles_cmd; + RWStructuredBuffer visible_meshlet_instances_indices; RWStructuredBuffer debug_drawer; }; -func test_frustum(in f32x4x4 mvp, in f32x3 aabb_center, in f32x3 aabb_extent) -> bool { - f32x4 planes[] = { - normalize_plane(mvp[3] + mvp[0]), - normalize_plane(mvp[3] - mvp[0]), - normalize_plane(mvp[3] + mvp[1]), - normalize_plane(mvp[3] - mvp[1]), - normalize_plane(mvp[2]) - }; - - let aabb_half_extent = aabb_extent * 0.5; - [[unroll]] - for (uint i = 0; i < planes.getCount(); i++) { - let flip = asuint(planes[i].xyz) & 0x80000000; - let sign_flipped = asfloat(asuint(aabb_half_extent) ^ flip); - if (dot(aabb_center + sign_flipped, planes[i].xyz) <= -planes[i].w) { - return false; - } - } - - return true; -} - #ifndef CULLING_MESHLET_COUNT #define CULLING_MESHLET_COUNT 64 #endif @@ -47,37 +30,32 @@ func test_frustum(in f32x4x4 mvp, in f32x3 aabb_center, in f32x3 aabb_extent) -> func cs_main( uint3 thread_id : SV_DispatchThreadID, uniform ParameterBlock params, - uniform u32 meshlet_instance_count, uniform CullFlags cull_flags ) -> void { + let meshlet_instance_count = params.meshlet_instances_count[0]; let meshlet_instance_index = thread_id.x; if (meshlet_instance_index >= meshlet_instance_count) { return; } let meshlet_instance = params.meshlet_instances[meshlet_instance_index]; - let mesh = params.meshes[meshlet_instance.mesh_index]; - let meshlet_index = meshlet_instance.meshlet_index; - let meshlet = mesh.meshlets[meshlet_index]; - let transform = params.transforms[meshlet_instance.transform_index]; - let bounds = mesh.meshlet_bounds[meshlet_index]; - - let aabb_min = bounds.aabb_min; - let aabb_max = bounds.aabb_max; - let aabb_extent = aabb_max - aabb_min; - let aabb_center = (aabb_min + aabb_max) * 0.5; + let mesh_instance = params.mesh_instances[meshlet_instance.mesh_instance_index]; + let mesh = params.meshes[mesh_instance.mesh_index]; + let transform = params.transforms[mesh_instance.transform_index]; + let mesh_lod = mesh.lods[mesh_instance.lod_index]; + let bounds = mesh_lod.meshlet_bounds[meshlet_instance.meshlet_index]; var visible = true; if (visible && (cull_flags & CullFlags::MeshletFrustum)) { let cur_mvp = mul(params.camera.projection_view_mat, transform.world); - visible = test_frustum(cur_mvp, aabb_center, aabb_extent); + visible = test_frustum(cur_mvp, bounds.aabb_center, bounds.aabb_extent); } if (visible && (cull_flags & CullFlags::Occlusion)) { let prev_mvp = mul(params.camera.frustum_projection_view_mat, transform.world); - if (let screen_aabb = project_aabb(prev_mvp, params.camera.near_clip, aabb_min, aabb_extent)) { + if (let screen_aabb = project_aabb(prev_mvp, params.camera.near_clip, bounds.aabb_center, bounds.aabb_extent)) { visible = !test_occlusion(screen_aabb, params.hiz_image, params.hiz_sampler); - if (visible) { + if (visible && true) { let ndc_aabb_max = screen_aabb.max.xy * 2.0 - 1.0; let ndc_aabb_min = screen_aabb.min.xy * 2.0 - 1.0; var debug_rect = DebugRect(); @@ -86,19 +64,13 @@ func cs_main( debug_rect.color = f32x3(1.0, 0.0, 0.0); debug_rect.coord = DebugDrawCoord::NDC; debug_draw_rect(params.debug_drawer[0], debug_rect); - - var debug_aabb = DebugAABB(); - debug_aabb.position = mul(transform.world, f32x4(aabb_center, 1.0)).xyz; - debug_aabb.size = mul(transform.world, f32x4(aabb_extent, 0.0)).xyz; - debug_aabb.color = f32x3(0.0, 1.0, 0.0); - debug_aabb.coord = DebugDrawCoord::World; - debug_draw_aabb(params.debug_drawer[0], debug_aabb); } } } if (visible) { let index = std::atomic_add(params.cull_triangles_cmd[0].x, 1, std::memory_order_relaxed); + assert(index < meshlet_instance_count); params.visible_meshlet_instances_indices[index] = meshlet_instance_index; } -} \ No newline at end of file +} diff --git a/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang b/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang index 254d065d..e530f4ab 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang @@ -7,12 +7,14 @@ import scene; import passes.visbuffer; struct ShaderParameters { - RWStructuredBuffer draw_cmd; ConstantBuffer camera; StructuredBuffer visible_meshlet_instances_indices; StructuredBuffer meshlet_instances; + StructuredBuffer mesh_instances; StructuredBuffer meshes; StructuredBuffer transforms; + + RWStructuredBuffer draw_cmd; RWStructuredBuffer reordered_indices; }; @@ -71,14 +73,10 @@ func CullSmallPrimitive(f32x2x3 vertices, f32x2 viewportExtent) -> bool { ); } -func test_triangle(in Mesh mesh, in Meshlet meshlet, in f32x2 resolution, CullFlags cull_flags, u32 triangle_index) -> bool { - const Triangle tri = meshlet.indices(mesh, triangle_index); - const u32x3 vertices = meshlet.vertices(mesh, tri); - const f32x3x3 positions = meshlet.positions(mesh, vertices); - - const f32x4 clip_pos_0 = mul(model_view_proj_shared, f32x4(positions[0], 1.0)); - const f32x4 clip_pos_1 = mul(model_view_proj_shared, f32x4(positions[1], 1.0)); - const f32x4 clip_pos_2 = mul(model_view_proj_shared, f32x4(positions[2], 1.0)); +func test_triangle(in f32x3x3 positions, in f32x2 resolution, CullFlags cull_flags, u32 triangle_index) -> bool { + let clip_pos_0 = mul(model_view_proj_shared, f32x4(positions[0], 1.0)); + let clip_pos_1 = mul(model_view_proj_shared, f32x4(positions[1], 1.0)); + let clip_pos_2 = mul(model_view_proj_shared, f32x4(positions[2], 1.0)); // Cull all triangles behind camera if (clip_pos_0.z < 0.0 && clip_pos_1.z < 0.0 && clip_pos_2.z < 0.0) { @@ -121,14 +119,15 @@ func cs_main( let visible_meshlet_index = group_id.x; let meshlet_instance_index = params.visible_meshlet_instances_indices[visible_meshlet_index]; let meshlet_instance = params.meshlet_instances[meshlet_instance_index]; - let mesh = params.meshes[meshlet_instance.mesh_index]; - let meshlet_index = meshlet_instance.meshlet_index; - let meshlet = mesh.meshlets[meshlet_index]; + let mesh_instance = params.mesh_instances[meshlet_instance.mesh_instance_index]; + let mesh = params.meshes[mesh_instance.mesh_index]; + let mesh_lod = mesh.lods[mesh_instance.lod_index]; + let meshlet = mesh_lod.meshlets[meshlet_instance.meshlet_index]; if (local_index == 0) { triangles_passed_shared = 0; - const Transform transform = params.transforms[meshlet_instance.transform_index]; - model_view_proj_shared = mul(params.camera->projection_view_mat, transform.world); + let transform = params.transforms[mesh_instance.transform_index]; + model_view_proj_shared = mul(params.camera.projection_view_mat, transform.world); } std::control_barrier(std::memory_order_acq_rel); @@ -136,7 +135,9 @@ func cs_main( var triangle_passed = false; var active_triangle_index = 0; if (local_index < meshlet.triangle_count) { - triangle_passed = test_triangle(mesh, meshlet, params.camera.resolution, cull_flags, local_index); + let indices = meshlet.indices(mesh_lod, local_index); + let positions = meshlet.positions(mesh, indices); + triangle_passed = test_triangle(positions, params.camera.resolution, cull_flags, local_index); if (triangle_passed) { active_triangle_index = std::atomic_add(triangles_passed_shared, 1, std::memory_order_relaxed); } @@ -151,9 +152,9 @@ func cs_main( std::control_barrier(std::memory_order_acq_rel); if (triangle_passed) { - const u32 index_offset = base_index_shared + active_triangle_index * 3; - params.reordered_indices[index_offset + 0] = (visible_meshlet_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 0) & MESHLET_PRIMITIVE_MASK); - params.reordered_indices[index_offset + 1] = (visible_meshlet_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 1) & MESHLET_PRIMITIVE_MASK); - params.reordered_indices[index_offset + 2] = (visible_meshlet_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 2) & MESHLET_PRIMITIVE_MASK); + let index_offset = base_index_shared + active_triangle_index * 3; + params.reordered_indices[index_offset + 0] = (meshlet_instance_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 0) & MESHLET_PRIMITIVE_MASK); + params.reordered_indices[index_offset + 1] = (meshlet_instance_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 1) & MESHLET_PRIMITIVE_MASK); + params.reordered_indices[index_offset + 2] = (meshlet_instance_index << MESHLET_PRIMITIVE_BITS) | ((triangle_index + 2) & MESHLET_PRIMITIVE_MASK); } } diff --git a/Lorr/Engine/Resources/shaders/passes/debug.slang b/Lorr/Engine/Resources/shaders/passes/debug.slang index 165fe4fd..886b5353 100644 --- a/Lorr/Engine/Resources/shaders/passes/debug.slang +++ b/Lorr/Engine/Resources/shaders/passes/debug.slang @@ -12,22 +12,22 @@ ParameterBlock params; func draw_aabb(u32 vertex_index, u32 instance_index, out f32x4 position, out f32x3 color, out DebugDrawCoord coord) { let aabb = params.debug_aabb_draws[instance_index]; - static let offsets = f32x3[24]( - // bottom - f32x3(-0.5, -0.5, -0.5), f32x3( 0.5, -0.5, -0.5), - f32x3(-0.5, -0.5, -0.5), f32x3(-0.5, 0.5, -0.5), - f32x3(-0.5, 0.5, -0.5), f32x3( 0.5, 0.5, -0.5), - f32x3( 0.5, -0.5, -0.5), f32x3( 0.5, 0.5, -0.5), + static let offsets = f32x3[]( // top + f32x3(-0.5, 0.5, 0.5), f32x3( 0.5, 0.5, 0.5), + f32x3( 0.5, 0.5, 0.5), f32x3( 0.5, 0.5, -0.5), + f32x3( 0.5, 0.5, -0.5), f32x3(-0.5, 0.5, -0.5), + f32x3(-0.5, 0.5, -0.5), f32x3(-0.5, 0.5, 0.5), + // bottom f32x3(-0.5, -0.5, 0.5), f32x3( 0.5, -0.5, 0.5), - f32x3(-0.5, -0.5, 0.5), f32x3(-0.5, 0.5, 0.5), - f32x3(-0.5, 0.5, 0.5), f32x3( 0.5, 0.5, 0.5), - f32x3( 0.5, -0.5, 0.5), f32x3( 0.5, 0.5, 0.5), - // connections + f32x3( 0.5, -0.5, 0.5), f32x3( 0.5, -0.5, -0.5), + f32x3( 0.5, -0.5, -0.5), f32x3(-0.5, -0.5, -0.5), f32x3(-0.5, -0.5, -0.5), f32x3(-0.5, -0.5, 0.5), - f32x3( 0.5, -0.5, -0.5), f32x3( 0.5, -0.5, 0.5), - f32x3(-0.5, 0.5, -0.5), f32x3(-0.5, 0.5, 0.5), - f32x3( 0.5, 0.5, -0.5), f32x3( 0.5, 0.5, 0.5) + // connections + f32x3(-0.5, 0.5, 0.5), f32x3(-0.5, -0.5, 0.5), + f32x3( 0.5, 0.5, 0.5), f32x3( 0.5, -0.5, 0.5), + f32x3( 0.5, 0.5, -0.5), f32x3( 0.5, -0.5, -0.5), + f32x3(-0.5, 0.5, -0.5), f32x3(-0.5, -0.5, -0.5), ); position = f32x4(offsets[vertex_index] * aabb.size, 1.0); position = position + f32x4(aabb.position, 0.0); diff --git a/Lorr/Engine/Resources/shaders/passes/editor_mousepick.slang b/Lorr/Engine/Resources/shaders/passes/editor_mousepick.slang index 3dfb215c..3caf42ee 100644 --- a/Lorr/Engine/Resources/shaders/passes/editor_mousepick.slang +++ b/Lorr/Engine/Resources/shaders/passes/editor_mousepick.slang @@ -10,10 +10,10 @@ import passes.visbuffer; Image2D visbuffer_data; [[vk::binding(1, 0)]] -StructuredBuffer visible_meshlet_instances_indices; +StructuredBuffer meshlet_instances; [[vk::binding(2, 0)]] -StructuredBuffer meshlet_instances; +StructuredBuffer mesh_instances; struct PushConstants { u32 *dst; @@ -31,8 +31,8 @@ func cs_main() -> void { return; } - const let vis = VisBufferData(texel); - const u32 meshlet_instance_index = visible_meshlet_instances_indices[vis.meshlet_instance_index]; - const MeshletInstance meshlet_instance = meshlet_instances[meshlet_instance_index]; - *C.dst = meshlet_instance.transform_index; + let vis = VisBufferData(texel); + let meshlet_instance = meshlet_instances[vis.meshlet_instance_index]; + let mesh_instance = mesh_instances[meshlet_instance.mesh_instance_index]; + *C.dst = mesh_instance.transform_index; } diff --git a/Lorr/Engine/Resources/shaders/passes/generate_cull_commands.slang b/Lorr/Engine/Resources/shaders/passes/generate_cull_commands.slang new file mode 100644 index 00000000..3495f0fe --- /dev/null +++ b/Lorr/Engine/Resources/shaders/passes/generate_cull_commands.slang @@ -0,0 +1,17 @@ +import std; +import gpu; + +struct ShaderParameters { + StructuredBuffer meshlet_instances_count; + + RWStructuredBuffer cull_meshlets_cmd; +}; + +[[shader("compute")]] +[[numthreads(1, 1, 1)]] +func cs_main( + uniform ParameterBlock params +) -> void { + params.cull_meshlets_cmd[0].x = (params.meshlet_instances_count[0] + (CULLING_MESHLET_COUNT - 1)) / CULLING_MESHLET_COUNT; +} + diff --git a/Lorr/Engine/Resources/shaders/passes/select_lods.slang b/Lorr/Engine/Resources/shaders/passes/select_lods.slang new file mode 100644 index 00000000..f5d9cce4 --- /dev/null +++ b/Lorr/Engine/Resources/shaders/passes/select_lods.slang @@ -0,0 +1,82 @@ +import std; +import gpu; +import scene; +import cull; +import debug_drawer; + +struct ShaderParameters { + ConstantBuffer camera; + StructuredBuffer meshes; + StructuredBuffer transforms; + + RWStructuredBuffer mesh_instances; + RWStructuredBuffer meshlet_instances; + RWStructuredBuffer meshlet_instances_count; + RWStructuredBuffer debug_drawer; +}; + +#ifndef CULLING_MESHES_COUNT + #define CULLING_MESHES_COUNT 64 +#endif + +[[shader("compute")]] +[[numthreads(CULLING_MESHES_COUNT, 1, 1)]] +func cs_main( + uint3 thread_id : SV_DispatchThreadID, + uniform ParameterBlock params, + uniform u32 mesh_instances_count, + uniform CullFlags cull_flags +) -> void { + let mesh_instance_index = thread_id.x; + if (mesh_instance_index >= mesh_instances_count) { + return; + } + + let mesh_instance = ¶ms.mesh_instances[mesh_instance_index]; + let mesh = params.meshes[mesh_instance.mesh_index]; + let transform = params.transforms[mesh_instance.transform_index]; + let mvp = mul(params.camera.projection_view_mat, transform.world); + if (!test_frustum(mvp, mesh.bounds.aabb_center, mesh.bounds.aabb_extent)) { + return; + } + + var lod_index = 0; + if (true) { + // Credits: + // - https://github.com/Sunset-Flock/Timberdoodle/blob/786f141e261dff4756e7f1a67dd7f7a5e1277956/src/scene/mesh_lod.hpp#L45 + let aabb_center = mul(transform.world, f32x4(mesh.bounds.aabb_center, 1.0)).xyz; + let aabb_extent_x = length(transform.world[0]) * mesh.bounds.aabb_extent.x; + let aabb_extent_y = length(transform.world[1]) * mesh.bounds.aabb_extent.y; + let aabb_extent_z = length(transform.world[2]) * mesh.bounds.aabb_extent.z; + let aabb_rough_extent = max(max(aabb_extent_x, aabb_extent_y), aabb_extent_z); + let aabb_rough_camera_distance = max(length(aabb_center - params.camera.position) - 0.5 * aabb_rough_extent, 0.0); + + // Avoiding the atan here + let rough_resolution = max(params.camera.resolution.x, params.camera.resolution.y); + let fov90_distance_to_screen_ratio = 2.0f; + let pixel_size_at_1m = fov90_distance_to_screen_ratio / rough_resolution; + let aabb_size_at_1m = (aabb_rough_extent / aabb_rough_camera_distance); + let rough_aabb_pixel_size = aabb_size_at_1m / pixel_size_at_1m; + + for (var i = 1; i < mesh.lod_count; i++) { + let mesh_lod = mesh.lods[i]; + let rough_pixel_error = rough_aabb_pixel_size * mesh_lod.error; + if (rough_pixel_error < params.camera.acceptable_lod_error) { + lod_index = i; + } else { + break; + } + } + } + + mesh_instance.lod_index = lod_index; + let mesh_lod = mesh.lods[lod_index]; + let meshlet_instance_offset = std::atomic_add(params.meshlet_instances_count[0], mesh_lod.meshlet_count, std::memory_order_relaxed); + for (u32 i = 0; i < mesh_lod.meshlet_count; i++) { + let offset = meshlet_instance_offset + i; + var meshlet_instance = MeshletInstance(); + meshlet_instance.mesh_instance_index = mesh_instance_index; + meshlet_instance.meshlet_index = i; + params.meshlet_instances[offset] = meshlet_instance; + } +} diff --git a/Lorr/Engine/Resources/shaders/passes/sky_transmittance.slang b/Lorr/Engine/Resources/shaders/passes/sky_transmittance.slang index 50841984..c4d46e30 100644 --- a/Lorr/Engine/Resources/shaders/passes/sky_transmittance.slang +++ b/Lorr/Engine/Resources/shaders/passes/sky_transmittance.slang @@ -38,7 +38,7 @@ func cs_main( for (f32 i = 0.0; i < STEP_COUNT; i += 1.0) { ray_pos += sun_dir * distance_per_step; let ray_altitude = length(ray_pos) - params.atmosphere.planet_radius; - const let medium = MediumScattering(params.atmosphere, ray_altitude); + let medium = MediumScattering(params.atmosphere, ray_altitude); optical_depth += medium.extinction_sum * distance_per_step; } diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer.slang index 80183ef1..8552fbd1 100644 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer.slang +++ b/Lorr/Engine/Resources/shaders/passes/visbuffer.slang @@ -3,9 +3,6 @@ module visbuffer; import std; import scene; -constexpr static u64 MESHLET_DEPTH_BITS = 32u; -constexpr static u64 MESHLET_DEPTH_MASK = (1u << MESHLET_DEPTH_BITS) - 1u; - constexpr static u32 MESHLET_INSTANCE_ID_BITS = 24u; constexpr static u32 MESHLET_INSTANCE_ID_MASK = (1u << MESHLET_INSTANCE_ID_BITS) - 1u; @@ -31,43 +28,4 @@ public struct VisBufferData { public func encode() -> u32 { return (this.meshlet_instance_index << MESHLET_PRIMITIVE_BITS) | (this.triangle_index & MESHLET_PRIMITIVE_MASK); } -}; - -// NOTE: This version of visibility buffering is not widely available. -// My current idea is to have 2 (R32, D32) attachments and render them -// just normally, and then have a compute pass to merge them into R64 -// image. R64 images are not widely available even with storage only. -// Investigate maintenance8 for D32<->R32 copies. - -public struct VisBuffer { - u32 data; - public f32 depth; - - [[mutating]] - public __init(u32 meshlet_instance_index, u32 triangle_index, f32 depth) { - const let data = VisBufferData(meshlet_instance_index, triangle_index); - this.data = data.encode(); - this.depth = depth; - } - - [[mutating]] - public __init(in VisBufferData data, f32 depth) { - this.data = data.encode(); - this.depth = depth; - } - - [[mutating]] - public __init(u64 data) { - this.data = u32(data & MESHLET_DEPTH_MASK); - this.depth = asfloat(u32(data >> MESHLET_DEPTH_BITS)); - } - - public func encode() -> u64 { - return (u64(asuint(this.depth)) << MESHLET_DEPTH_BITS) | u64(this.data); - } - - public func decode_vis() -> VisBufferData { - return VisBufferData(this.data); - }; -}; - +}; \ No newline at end of file diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang index 0bd391c1..63765fc4 100644 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang +++ b/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang @@ -8,13 +8,14 @@ import passes.visbuffer; #include struct ShaderParameters { - Image2D visbuffer; ConstantBuffer camera; - RWStructuredBuffer visible_meshlet_instances_indices; StructuredBuffer meshlet_instances; + StructuredBuffer mesh_instances; StructuredBuffer meshes; StructuredBuffer transforms; StructuredBuffer materials; + + Image2D visbuffer; }; ParameterBlock params; @@ -93,26 +94,27 @@ func fs_main(VertexOutput input) -> FragmentOutput { discard; } - FragmentOutput output = {}; - let vis = VisBufferData(texel); - let meshlet_instance_index = params.visible_meshlet_instances_indices[vis.meshlet_instance_index]; + let meshlet_instance_index = vis.meshlet_instance_index; let meshlet_instance = params.meshlet_instances[meshlet_instance_index]; - let mesh = params.meshes[meshlet_instance.mesh_index]; - let transform = params.transforms[meshlet_instance.transform_index]; - let material = params.materials[meshlet_instance.material_index]; - - let meshlet = mesh.meshlets[meshlet_instance.meshlet_index]; - let indices = meshlet.indices(mesh, vis.triangle_index); - let vertices = meshlet.vertices(mesh, indices); - let positions = meshlet.positions(mesh, vertices); - let normals = meshlet.normals(mesh, vertices); - let tex_coords = meshlet.tex_coords(mesh, vertices); + let mesh_instance = params.mesh_instances[meshlet_instance.mesh_instance_index]; + let mesh = params.meshes[mesh_instance.mesh_index]; + let material = params.materials[mesh_instance.material_index]; + let transform = params.transforms[mesh_instance.transform_index]; + let mesh_lod = mesh.lods[mesh_instance.lod_index]; + let meshlet = mesh_lod.meshlets[meshlet_instance.meshlet_index]; + + let indices = meshlet.indices(mesh_lod, vis.triangle_index); + let positions = meshlet.positions(mesh, indices); + let normals = meshlet.normals(mesh, indices); + let tex_coords = meshlet.tex_coords(mesh, indices); let world_positions = transform.to_world_positions(positions); let NDC = f32x3(input.tex_coord * 2.0 - 1.0, 1.0); let deriv = compute_partial_derivatives(world_positions, NDC.xy, params.camera.resolution); let tex_coord_grad = deriv.gradient_of(tex_coords); + FragmentOutput output = {}; + // ALBEDO ─────────────────────────────────────────────────────────── output.albedo_color = material.sample_albedo_color(tex_coord_grad); diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang index 8748131d..d4970a31 100644 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang +++ b/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang @@ -7,11 +7,11 @@ import passes.visbuffer; struct ShaderParameters { ConstantBuffer camera; - RWStructuredBuffer visible_meshlet_instances_indices; StructuredBuffer meshlet_instances; + StructuredBuffer mesh_instances; StructuredBuffer meshes; StructuredBuffer transforms; - StructuredBuffer materials; + StructuredBuffer materials; StorageImage2D overdraw; }; ParameterBlock params; @@ -28,16 +28,16 @@ struct VertexOutput { [[shader("vertex")]] func vs_main(u32 vertex_index : SV_VertexID) -> VertexOutput { let vis = VisBufferData(vertex_index); - let meshlet_instance_index = params.visible_meshlet_instances_indices[vis.meshlet_instance_index]; - let meshlet_instance = params.meshlet_instances[meshlet_instance_index]; - let mesh = params.meshes[meshlet_instance.mesh_index]; - let transform = params.transforms[meshlet_instance.transform_index]; - let meshlet = mesh.meshlets[meshlet_instance.meshlet_index]; + let meshlet_instance = params.meshlet_instances[vis.meshlet_instance_index]; + let mesh_instance = params.mesh_instances[meshlet_instance.mesh_instance_index]; + let mesh = params.meshes[mesh_instance.mesh_index]; + let mesh_lod = mesh.lods[mesh_instance.lod_index]; + let transform = params.transforms[mesh_instance.transform_index]; + let meshlet = mesh_lod.meshlets[meshlet_instance.meshlet_index]; - let index = meshlet.index(mesh, vis.triangle_index); - let vertex = meshlet.vertex(mesh, index); - let vertex_pos = meshlet.position(mesh, vertex); - let tex_coord = meshlet.tex_coord(mesh, vertex); + let index = meshlet.index(mesh_lod, vis.triangle_index); + let vertex_pos = meshlet.position(mesh, index); + let tex_coord = meshlet.tex_coord(mesh, index); let world_pos = transform.to_world_position(vertex_pos); let clip_pos = mul(params.camera.projection_view_mat, f32x4(world_pos.xyz, 1.0)); @@ -47,31 +47,31 @@ func vs_main(u32 vertex_index : SV_VertexID) -> VertexOutput { output.tex_coord = tex_coord; output.meshlet_instance_index = vis.meshlet_instance_index; output.triangle_index = vis.triangle_index / 3; - output.material_index = meshlet_instance.material_index; + output.material_index = mesh_instance.material_index; return output; } [[shader("fragment")]] func fs_main(VertexOutput input) -> u32 { +#if 1 let material = params.materials[input.material_index]; - if (material.albedo_image_index != ~0u) { + if (material.flags & MaterialFlag::HasAlbedoImage) { UVGradient grad; grad.uv = input.tex_coord; grad.ddx = ddx(input.tex_coord); grad.ddy = ddy(input.tex_coord); - const f32 alpha_color = material.sample_albedo_color(grad).a; + let alpha_color = material.sample_albedo_color(grad).a; // We are doing deferred, blend alpha mode is not supported in this pass. - if (alpha_color < clamp(material.alpha_cutoff, 0.001, 1.0) /* && - material.alpha_mode == AlphaMode::Mask*/) - { + if (alpha_color < clamp(material.alpha_cutoff, 0.001, 1.0)) { discard; } } +#endif std::atomic_add(params.overdraw[u32x2(input.position.xy)], 1u, std::memory_order_acq_rel, std::MemoryLocation::Image, MemoryScope::QueueFamily); - const let vis = VisBufferData(input.meshlet_instance_index, input.triangle_index); + let vis = VisBufferData(input.meshlet_instance_index, input.triangle_index); return vis.encode(); } diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer_merge.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer_merge.slang deleted file mode 100644 index caefe597..00000000 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer_merge.slang +++ /dev/null @@ -1,37 +0,0 @@ -module visbuffer_merge; - -import std; -import gpu; -import scene; -import passes.visbuffer; - -#include - -[[vk::binding(0, 0)]] -Image2D depth_image; - -[[vk::binding(1, 0)]] -Image2D visbuffer_data_image; - -[[vk::binding(2, 0)]] -StorageImage2D visbuffer_image; - -struct PushConstants { - u32x2 extent; -}; -[[vk::push_constant]] PushConstants C; - -[[shader("compute")]] -[[numthreads(16, 16, 1)]] -func cs_main(u32x2 thread_id : SV_DispatchThreadID) -> void { - if (any(C.extent < thread_id)) { - return; - } - - const f32 depth_texel = depth_image.load(thread_id.xy); - const u32 vis_texel = visbuffer_data_image.load(thread_id.xy); - - const let vis_data = VisBufferData(vis_texel); - const let vis = VisBuffer(vis_data, depth_texel); - visbuffer_image[thread_id.xy] = vis.encode(); -} diff --git a/Lorr/Engine/Resources/shaders/scene.slang b/Lorr/Engine/Resources/shaders/scene.slang index 1ca63fa5..90bdc0a9 100644 --- a/Lorr/Engine/Resources/shaders/scene.slang +++ b/Lorr/Engine/Resources/shaders/scene.slang @@ -3,6 +3,8 @@ module scene; import std; import gpu; +#include + public const static f32 CAMERA_SCALE_UNIT = 0.01; public const static f32 INV_CAMERA_SCALE_UNIT = 1.0 / CAMERA_SCALE_UNIT; public const static f32 PLANET_RADIUS_OFFSET = 0.001; @@ -70,6 +72,7 @@ public struct Camera { public f32 near_clip; public f32 far_clip; public f32x2 resolution; + public f32 acceptable_lod_error; }; public struct Transform { @@ -105,9 +108,11 @@ public struct UVGradient { }; [[vk::binding(0, 1)]] -Sampler material_samplers[]; +Sampler bindless_samplers[]; [[vk::binding(1, 1)]] -Image2D material_images[]; +Image2D bindless_images[]; +[[vk::binding(2, 1)]] +StorageImage2D bindless_storage_images[]; public enum MaterialFlag : u32 { None = 0, @@ -133,16 +138,17 @@ public struct Material { public f32 metallic_factor = 0.0; public f32 alpha_cutoff = 0.0; public MaterialFlag flags = MaterialFlag::None; - public u32 albedo_image_index = ~0u; - public u32 normal_image_index = ~0u; - public u32 emissive_image_index = ~0u; - public u32 metallic_rougness_image_index = ~0u; - public u32 occlusion_image_index = ~0u; + public u32 sampler_index = 0; + public u32 albedo_image_index = 0; + public u32 normal_image_index = 0; + public u32 emissive_image_index = 0; + public u32 metallic_roughness_image_index = 0; + public u32 occlusion_image_index = 0; public func sample_albedo_color(in UVGradient grad) -> f32x4 { if (this.flags & MaterialFlag::HasAlbedoImage) { - const let color = material_images[this.albedo_image_index] - .sample_grad(material_samplers[this.albedo_image_index], grad.uv, grad.ddx, grad.ddy); + let color = bindless_images[this.albedo_image_index] + .sample_grad(bindless_samplers[this.sampler_index], grad.uv, grad.ddx, grad.ddy); return this.albedo_color * color; } @@ -150,18 +156,14 @@ public struct Material { } public func sample_normal_color(in UVGradient grad) -> f32x3 { - if (this.flags & MaterialFlag::HasNormalImage) { - return material_images[this.normal_image_index] - .sample_grad(material_samplers[this.normal_image_index], grad.uv, grad.ddx, grad.ddy).rgb; - } - - return { 0.0 }; + return bindless_images[this.normal_image_index] + .sample_grad(bindless_samplers[this.sampler_index], grad.uv, grad.ddx, grad.ddy).rgb; } public func sample_emissive_color(in UVGradient grad) -> f32x3 { if (this.flags & MaterialFlag::HasEmissiveImage) { - const let color = material_images[this.emissive_image_index] - .sample_grad(material_samplers[this.emissive_image_index], grad.uv, grad.ddx, grad.ddy).rgb; + let color = bindless_images[this.emissive_image_index] + .sample_grad(bindless_samplers[this.sampler_index], grad.uv, grad.ddx, grad.ddy).rgb; return this.emissive_color * color; } @@ -169,10 +171,10 @@ public struct Material { } public func sample_metallic_roughness(in UVGradient grad) -> f32x2 { - const let metallic_roughness = f32x2(this.metallic_factor, this.roughness_factor); + let metallic_roughness = f32x2(this.metallic_factor, this.roughness_factor); if (this.flags & MaterialFlag::HasMetallicRoughnessImage) { - const let color = material_images[this.metallic_rougness_image_index] - .sample_grad(material_samplers[this.metallic_rougness_image_index], grad.uv, grad.ddx, grad.ddy).bg; + let color = bindless_images[this.metallic_roughness_image_index] + .sample_grad(bindless_samplers[this.sampler_index], grad.uv, grad.ddx, grad.ddy).bg; return metallic_roughness * color; } @@ -181,103 +183,124 @@ public struct Material { public func sample_occlusion_color(in UVGradient grad) -> f32 { if (this.flags & MaterialFlag::HasOcclusionImage) { - return material_images[this.occlusion_image_index] - .sample_grad(material_samplers[this.occlusion_image_index], grad.uv, grad.ddx, grad.ddy).r; + return bindless_images[this.occlusion_image_index] + .sample_grad(bindless_samplers[this.sampler_index], grad.uv, grad.ddx, grad.ddy).r; } return 1.0; } }; -public typealias Triangle = u32x3; public struct Meshlet { - public u32 vertex_offset = 0; - public u32 index_offset = 0; - public u32 triangle_offset = 0; + public u32 indirect_vertex_index_offset = 0; + public u32 local_triangle_index_offset = 0; + public u32 vertex_count = 0; public u32 triangle_count = 0; // Takes a local triange index and returns an index to index buffer. - public func index(in Mesh mesh, u32 i) -> u32 { - return u32(mesh.local_triangle_indices[this.triangle_offset + i]); - } + public func index(in MeshLOD mesh_lod, u32 i) -> u32 { + assert(this.local_triangle_index_offset + i < mesh_lod.local_triangle_indices_count); + let local_triangle_index = u32(mesh_lod.local_triangle_indices[this.local_triangle_index_offset + i]); - // Returns index to an actual vertex. - public func vertex(in Mesh mesh, u32 index) -> u32 { - return mesh.indices[this.index_offset + index]; + assert(this.indirect_vertex_index_offset + local_triangle_index < mesh_lod.indirect_vertex_indices_count); + return mesh_lod.indirect_vertex_indices[this.indirect_vertex_index_offset + local_triangle_index]; } // Returns position of a vertex. - public func position(in Mesh mesh, u32 vertex) -> f32x3 { - return mesh.vertex_positions[this.vertex_offset + vertex]; + public func position(in Mesh mesh, u32 index) -> f32x3 { + return mesh.vertex_positions[index]; } - public func tex_coord(in Mesh mesh, u32 vertex) -> f32x2 { + public func tex_coord(in Mesh mesh, u32 index) -> f32x2 { if (mesh.texture_coords == nullptr) { return {}; } - return mesh.texture_coords[this.vertex_offset + vertex]; + return mesh.texture_coords[index]; } // ---------------------------------------------------------- - public func indices(in Mesh mesh, u32 i) -> Triangle { - return { - u32(mesh.local_triangle_indices[this.triangle_offset + i * 3 + 0]), - u32(mesh.local_triangle_indices[this.triangle_offset + i * 3 + 1]), - u32(mesh.local_triangle_indices[this.triangle_offset + i * 3 + 2]), - }; - } + public func indices(in MeshLOD mesh_lod, u32 i) -> u32x3 { + let local_triangle_indices = u32x3( + u32(mesh_lod.local_triangle_indices[this.local_triangle_index_offset + i * 3 + 0]), + u32(mesh_lod.local_triangle_indices[this.local_triangle_index_offset + i * 3 + 1]), + u32(mesh_lod.local_triangle_indices[this.local_triangle_index_offset + i * 3 + 2]), + ); - public func vertices(in Mesh mesh, in Triangle indices) -> u32x3 { - return { mesh.indices[this.index_offset + indices.x], - mesh.indices[this.index_offset + indices.y], - mesh.indices[this.index_offset + indices.z] }; + return { mesh_lod.indirect_vertex_indices[this.indirect_vertex_index_offset + local_triangle_indices.x], + mesh_lod.indirect_vertex_indices[this.indirect_vertex_index_offset + local_triangle_indices.y], + mesh_lod.indirect_vertex_indices[this.indirect_vertex_index_offset + local_triangle_indices.z] }; } - public func positions(in Mesh mesh, in u32x3 vertices) -> f32x3x3 { - return { mesh.vertex_positions[this.vertex_offset + vertices.x], - mesh.vertex_positions[this.vertex_offset + vertices.y], - mesh.vertex_positions[this.vertex_offset + vertices.z] }; + public func positions(in Mesh mesh, in u32x3 indices) -> f32x3x3 { + return { mesh.vertex_positions[indices.x], + mesh.vertex_positions[indices.y], + mesh.vertex_positions[indices.z] }; } - public func normals(in Mesh mesh, in u32x3 vertices) -> f32x3x3 { - return { mesh.vertex_normals[this.vertex_offset + vertices.x], - mesh.vertex_normals[this.vertex_offset + vertices.y], - mesh.vertex_normals[this.vertex_offset + vertices.z] }; + public func normals(in Mesh mesh, in u32x3 indices) -> f32x3x3 { + return { mesh.vertex_normals[indices.x], + mesh.vertex_normals[indices.y], + mesh.vertex_normals[indices.z] }; } - public func tex_coords(in Mesh mesh, in u32x3 vertices) -> f32x2x3 { + public func tex_coords(in Mesh mesh, in u32x3 indices) -> f32x2x3 { if (mesh.texture_coords == nullptr) { return {}; } - return { mesh.texture_coords[this.vertex_offset + vertices.x], - mesh.texture_coords[this.vertex_offset + vertices.y], - mesh.texture_coords[this.vertex_offset + vertices.z] }; + return { mesh.texture_coords[indices.x], + mesh.texture_coords[indices.y], + mesh.texture_coords[indices.z] }; } }; -public struct MeshletBounds { - public f32x3 aabb_min = {}; - public f32x3 aabb_max = {}; +public struct Bounds { + public f32x3 aabb_center = {}; + public f32x3 aabb_extent = {}; + public f32x3 sphere_center = {}; + public f32 sphere_radius = 0.0f; }; public struct MeshletInstance { + public u32 mesh_instance_index = 0; + public u32 meshlet_index = 0; +}; + +public struct MeshInstance { public u32 mesh_index = 0; + public u32 lod_index = 0; public u32 material_index = 0; public u32 transform_index = 0; - public u32 meshlet_index = 0; }; -public struct Mesh { +public struct MeshLOD { public u32 *indices = nullptr; + public Meshlet *meshlets = nullptr; + public Bounds *meshlet_bounds = nullptr; + public u8 *local_triangle_indices = nullptr; + public u32 *indirect_vertex_indices = nullptr; + public u32 indices_count = 0; + public u32 meshlet_count = 0; + public u32 meshlet_bounds_count = 0; + public u32 local_triangle_indices_count = 0; + public u32 indirect_vertex_indices_count = 0; + public f32 error = 0.0; +}; + +#ifndef MESH_MAX_LODS +#define MESH_MAX_LODS 8 +#endif + +public struct Mesh { public f32x3 *vertex_positions = nullptr; public f32x3 *vertex_normals = nullptr; public f32x2 *texture_coords = nullptr; - public Meshlet *meshlets = nullptr; - public MeshletBounds *meshlet_bounds = nullptr; - public u8 *local_triangle_indices = nullptr; + public u32 _padding = 0; + public u32 lod_count = 0; + public MeshLOD lods[MESH_MAX_LODS] = {}; + public Bounds bounds = {}; }; public struct Light { diff --git a/Lorr/Engine/Resources/shaders/std/color.slang b/Lorr/Engine/Resources/shaders/std/color.slang index 7eef8515..95fb6c1b 100644 --- a/Lorr/Engine/Resources/shaders/std/color.slang +++ b/Lorr/Engine/Resources/shaders/std/color.slang @@ -23,7 +23,7 @@ public func rec2020_to_xyz(f32x3 color) -> f32x3 { f32x3x3 mat = { 0.636958, 0.1446169, 0.168881, 0.2627002, 0.6779981, 0.0593017, - 0.0, 0.0280727, 1.0609851 + 0.0, 0.0280727, 1.0609851 }; return mul(mat, color); } diff --git a/Lorr/Engine/Scene/ECSModule/CoreComponents.hh b/Lorr/Engine/Scene/ECSModule/CoreComponents.hh index ce922c80..105532e3 100644 --- a/Lorr/Engine/Scene/ECSModule/CoreComponents.hh +++ b/Lorr/Engine/Scene/ECSModule/CoreComponents.hh @@ -28,8 +28,8 @@ ECS_COMPONENT_BEGIN(Camera) ECS_COMPONENT_MEMBER(far_clip, f32, 1000.0f) ECS_COMPONENT_MEMBER(axis_velocity, glm::vec3, { 0.0, 0.0, 0.0 }) ECS_COMPONENT_MEMBER(velocity_mul, f32, 1.0) - ECS_COMPONENT_MEMBER(freeze_frustum, bool, false) ECS_COMPONENT_MEMBER(frustum_projection_view_mat, glm::mat4, glm::mat4(1.0)) + ECS_COMPONENT_MEMBER(acceptable_lod_error, f32, 2.0f) ECS_COMPONENT_END(); ECS_COMPONENT_TAG(PerspectiveCamera); diff --git a/Lorr/Engine/Scene/GPUScene.hh b/Lorr/Engine/Scene/GPUScene.hh index 746637a4..3a7806bc 100644 --- a/Lorr/Engine/Scene/GPUScene.hh +++ b/Lorr/Engine/Scene/GPUScene.hh @@ -109,6 +109,7 @@ struct Camera { alignas(4) f32 near_clip = {}; alignas(4) f32 far_clip = {}; alignas(4) glm::vec2 resolution = {}; + alignas(4) f32 acceptable_lod_error = 0.0f; }; enum class TransformID : u64 { Invalid = ~0_u64 }; @@ -143,40 +144,66 @@ struct Material { alignas(4) f32 metallic_factor = 0.0f; alignas(4) f32 alpha_cutoff = 0.0f; alignas(4) MaterialFlag flags = MaterialFlag::None; - alignas(4) u32 albedo_image_index = ~0_u32; - alignas(4) u32 normal_image_index = ~0_u32; - alignas(4) u32 emissive_image_index = ~0_u32; - alignas(4) u32 metallic_roughness_image_index = ~0_u32; - alignas(4) u32 occlusion_image_index = ~0_u32; + alignas(4) u32 sampler_index = 0; + alignas(4) u32 albedo_image_index = 0; + alignas(4) u32 normal_image_index = 0; + alignas(4) u32 emissive_image_index = 0; + alignas(4) u32 metallic_roughness_image_index = 0; + alignas(4) u32 occlusion_image_index = 0; }; -struct Meshlet { - alignas(4) u32 vertex_offset = 0; - alignas(4) u32 index_offset = 0; - alignas(4) u32 triangle_offset = 0; - alignas(4) u32 triangle_count = 0; +struct Bounds { + alignas(4) glm::vec3 aabb_center = {}; + alignas(4) glm::vec3 aabb_extent = {}; + alignas(4) glm::vec3 sphere_center = {}; + alignas(4) f32 sphere_radius = 0.0f; }; -struct MeshletBounds { - alignas(4) glm::vec3 aabb_min = {}; - alignas(4) glm::vec3 aabb_max = {}; +struct MeshletInstance { + alignas(4) u32 mesh_instance_index = 0; + alignas(4) u32 meshlet_index = 0; }; -struct MeshletInstance { +struct MeshInstance { alignas(4) u32 mesh_index = 0; + alignas(4) u32 lod_index = 0; alignas(4) u32 material_index = 0; alignas(4) u32 transform_index = 0; - alignas(4) u32 meshlet_index = 0; }; -struct Mesh { +struct Meshlet { + alignas(4) u32 indirect_vertex_index_offset = 0; + alignas(4) u32 local_triangle_index_offset = 0; + alignas(4) u32 vertex_count = 0; + alignas(4) u32 triangle_count = 0; +}; + +struct MeshLOD { alignas(8) u64 indices = 0; - alignas(8) u64 vertex_positions = 0; - alignas(8) u64 vertex_normals = 0; - alignas(8) u64 texture_coords = 0; alignas(8) u64 meshlets = 0; alignas(8) u64 meshlet_bounds = 0; alignas(8) u64 local_triangle_indices = 0; + alignas(8) u64 indirect_vertex_indices = 0; + + alignas(4) u32 indices_count = 0; + alignas(4) u32 meshlet_count = 0; + alignas(4) u32 meshlet_bounds_count = 0; + alignas(4) u32 local_triangle_indices_count = 0; + alignas(4) u32 indirect_vertex_indices_count = 0; + + alignas(4) f32 error = 0.0f; +}; + +struct Mesh { + constexpr static auto MAX_LODS = 8_sz; + + alignas(8) u64 vertex_positions = 0; + alignas(8) u64 vertex_normals = 0; + alignas(8) u64 texture_coords = 0; + alignas(4) u32 _padding = 0; + alignas(4) u32 lod_count = 0; + alignas(8) MeshLOD lods[MAX_LODS] = {}; + alignas(4) Bounds bounds = {}; }; constexpr static u32 HISTOGRAM_THREADS_X = 16; diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index f5a91f59..0d64b6db 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -153,6 +153,8 @@ auto Scene::destroy(this Scene &self) -> void { } } + self.mesh_instance_count = 0; + self.max_meshlet_instance_count = 0; self.root.destruct(); self.name.clear(); self.root.clear(); @@ -519,8 +521,6 @@ auto Scene::find_entity(this Scene &self, u32 transform_index) -> flecs::entity auto Scene::render(this Scene &self, SceneRenderer &renderer, SceneRenderInfo &info) -> vuk::Value { ZoneScoped; - auto &app = Application::get(); - // clang-format off auto camera_query = self.get_world() .query_builder() @@ -552,13 +552,9 @@ auto Scene::render(this Scene &self, SceneRenderer &renderer, SceneRenderInfo &i camera_data.near_clip = c.near_clip; camera_data.far_clip = c.far_clip; camera_data.resolution = glm::vec2(static_cast(info.extent.width), static_cast(info.extent.height)); - - if (!c.freeze_frustum) { - camera_data.frustum_projection_view_mat = c.frustum_projection_view_mat; - c.frustum_projection_view_mat = camera_data.projection_view_mat; - } else { - camera_data.frustum_projection_view_mat = c.frustum_projection_view_mat; - } + camera_data.acceptable_lod_error = c.acceptable_lod_error; + camera_data.frustum_projection_view_mat = c.frustum_projection_view_mat; + c.frustum_projection_view_mat = camera_data.projection_view_mat; }); ls::option sun_data = ls::nullopt; @@ -602,28 +598,15 @@ auto Scene::render(this Scene &self, SceneRenderer &renderer, SceneRenderInfo &i } }); - ls::option composed_scene = ls::nullopt; - if (self.models_dirty) { - memory::ScopedStack stack; - self.models_dirty = false; - - auto compose_info = self.compose(); - composed_scene.emplace(renderer.compose(compose_info)); - } + auto prepared_frame = self.prepare_frame(renderer); - info.materials_descriptor_set = app.asset_man.get_materials_descriptor_set(); - info.materials_buffer = app.asset_man.get_materials_buffer(); info.sun = sun_data; info.atmosphere = atmos_data; info.camera = active_camera_data; info.histogram_info = histogram_data; info.cull_flags = self.cull_flags; - info.dirty_transform_ids = self.dirty_transforms; - info.transforms = self.transforms.slots_unsafe(); - auto rendered_attachment = renderer.render(info, composed_scene); - self.dirty_transforms.clear(); - return rendered_attachment; + return renderer.render(info, prepared_frame); } auto Scene::tick(this Scene &self, f32 delta_time) -> bool { @@ -722,48 +705,110 @@ auto Scene::get_cull_flags(this Scene &self) -> GPU::CullFlags & { return self.cull_flags; } -auto Scene::compose(this Scene &self) -> SceneComposeInfo { +auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer) -> PreparedFrame { ZoneScoped; auto &app = Application::get(); + auto max_meshlet_instance_count = 0_u32; auto gpu_meshes = std::vector(); - auto gpu_meshlet_instances = std::vector(); - - for (const auto &[rendering_mesh, transform_ids] : self.rendering_meshes_map) { - auto *model = app.asset_man.get_model(rendering_mesh.n0); - const auto &mesh = model->meshes[rendering_mesh.n1]; - - // ── PER MESH INFORMATION ──────────────────────────────────────────── - auto mesh_offset = gpu_meshes.size(); - auto &gpu_mesh = gpu_meshes.emplace_back(); - gpu_mesh.indices = model->indices.device_address(); - gpu_mesh.vertex_positions = model->vertex_positions.device_address(); - gpu_mesh.vertex_normals = model->vertex_normals.device_address(); - gpu_mesh.texture_coords = model->texture_coords.device_address(); - gpu_mesh.local_triangle_indices = model->local_triangle_indices.device_address(); - gpu_mesh.meshlet_bounds = model->meshlet_bounds.device_address(); - gpu_mesh.meshlets = model->meshlets.device_address(); - - // ── INSTANCING ────────────────────────────────────────────────────── - for (const auto transform_id : transform_ids) { - for (const auto primitive_index : mesh.primitive_indices) { - auto &primitive = model->primitives[primitive_index]; - for (u32 meshlet_index = 0; meshlet_index < primitive.meshlet_count; meshlet_index++) { - auto &meshlet_instance = gpu_meshlet_instances.emplace_back(); - meshlet_instance.mesh_index = mesh_offset; - meshlet_instance.material_index = primitive.material_index; - meshlet_instance.transform_index = SlotMap_decode_id(transform_id).index; - meshlet_instance.meshlet_index = meshlet_index + primitive.meshlet_offset; + auto gpu_mesh_instances = std::vector(); + + if (self.models_dirty) { + for (const auto &[rendering_mesh, transform_ids] : self.rendering_meshes_map) { + auto *model = app.asset_man.get_model(rendering_mesh.n0); + const auto &mesh = model->meshes[rendering_mesh.n1]; + + for (auto primitive_index : mesh.primitive_indices) { + const auto &primitive = model->primitives[primitive_index]; + const auto &gpu_mesh = model->gpu_meshes[primitive_index]; + auto mesh_index = static_cast(gpu_meshes.size()); + gpu_meshes.emplace_back(gpu_mesh); + + // ── INSTANCING ────────────────────────────────────────────────── + for (const auto transform_id : transform_ids) { + auto lod0_index = 0; + const auto &lod0 = gpu_mesh.lods[lod0_index]; + + auto &mesh_instance = gpu_mesh_instances.emplace_back(); + mesh_instance.mesh_index = mesh_index; + mesh_instance.lod_index = lod0_index; + mesh_instance.material_index = SlotMap_decode_id(primitive.material_id).index; + mesh_instance.transform_index = SlotMap_decode_id(transform_id).index; + max_meshlet_instance_count += lod0.meshlet_count; } } } + + self.mesh_instance_count = gpu_mesh_instances.size(); + self.max_meshlet_instance_count = max_meshlet_instance_count; } - return SceneComposeInfo{ - .gpu_meshes = std::move(gpu_meshes), - .gpu_meshlet_instances = std::move(gpu_meshlet_instances), + auto uuid_to_image_index = [&](const UUID &uuid) -> ls::option { + if (!app.asset_man.is_texture_loaded(uuid)) { + return ls::nullopt; + } + + auto *texture = app.asset_man.get_texture(uuid); + return texture->image_view.index(); }; + + auto dirty_material_ids = app.asset_man.get_dirty_material_ids(); + auto gpu_materials = std::vector(dirty_material_ids.size()); + auto dirty_material_indices = std::vector(dirty_material_ids.size()); + for (const auto &[gpu_material, index, id] : std::views::zip(gpu_materials, dirty_material_indices, dirty_material_ids)) { + const auto *material = app.asset_man.get_material(id); + auto albedo_image_index = uuid_to_image_index(material->albedo_texture); + auto normal_image_index = uuid_to_image_index(material->normal_texture); + auto emissive_image_index = uuid_to_image_index(material->emissive_texture); + auto metallic_roughness_image_index = uuid_to_image_index(material->metallic_roughness_texture); + auto occlusion_image_index = uuid_to_image_index(material->occlusion_texture); + auto sampler_index = 0_u32; + + auto flags = GPU::MaterialFlag::None; + if (albedo_image_index.has_value()) { + auto *texture = app.asset_man.get_texture(material->albedo_texture); + sampler_index = texture->sampler.index(); + flags |= GPU::MaterialFlag::HasAlbedoImage; + } + + flags |= normal_image_index.has_value() ? GPU::MaterialFlag::HasNormalImage : GPU::MaterialFlag::None; + flags |= emissive_image_index.has_value() ? GPU::MaterialFlag::HasEmissiveImage : GPU::MaterialFlag::None; + flags |= metallic_roughness_image_index.has_value() ? GPU::MaterialFlag::HasMetallicRoughnessImage : GPU::MaterialFlag::None; + flags |= occlusion_image_index.has_value() ? GPU::MaterialFlag::HasOcclusionImage : GPU::MaterialFlag::None; + + gpu_material.albedo_color = material->albedo_color; + gpu_material.emissive_color = material->emissive_color; + gpu_material.roughness_factor = material->roughness_factor; + gpu_material.metallic_factor = material->metallic_factor; + gpu_material.alpha_cutoff = material->alpha_cutoff; + gpu_material.flags = flags; + gpu_material.sampler_index = sampler_index; + gpu_material.albedo_image_index = albedo_image_index.value_or(0_u32); + gpu_material.normal_image_index = normal_image_index.value_or(0_u32); + gpu_material.emissive_image_index = emissive_image_index.value_or(0_u32); + gpu_material.metallic_roughness_image_index = metallic_roughness_image_index.value_or(0_u32); + gpu_material.occlusion_image_index = occlusion_image_index.value_or(0_u32); + + index = SlotMap_decode_id(id).index; + } + + auto prepare_info = FramePrepareInfo{ + .mesh_instance_count = self.mesh_instance_count, + .max_meshlet_instance_count = self.max_meshlet_instance_count, + .dirty_transform_ids = self.dirty_transforms, + .gpu_transforms = self.transforms.slots_unsafe(), + .dirty_material_indices = dirty_material_indices, + .gpu_materials = gpu_materials, + .gpu_meshes = gpu_meshes, + .gpu_mesh_instances = gpu_mesh_instances, + }; + auto prepared_frame = renderer.prepare_frame(prepare_info); + + self.models_dirty = false; + self.dirty_transforms.clear(); + + return prepared_frame; } auto Scene::add_transform(this Scene &self, flecs::entity entity) -> GPU::TransformID { diff --git a/Lorr/Engine/Scene/Scene.hh b/Lorr/Engine/Scene/Scene.hh index 41f74fb8..124ab661 100644 --- a/Lorr/Engine/Scene/Scene.hh +++ b/Lorr/Engine/Scene/Scene.hh @@ -51,6 +51,8 @@ private: std::vector dirty_transforms = {}; bool models_dirty = false; + u32 mesh_instance_count = 0; + u32 max_meshlet_instance_count = 0; GPU::CullFlags cull_flags = GPU::CullFlags::All; @@ -92,7 +94,7 @@ public: auto get_cull_flags(this Scene &) -> GPU::CullFlags &; private: - auto compose(this Scene &) -> SceneComposeInfo; + auto prepare_frame(this Scene &, SceneRenderer &renderer) -> PreparedFrame; auto add_transform(this Scene &, flecs::entity entity) -> GPU::TransformID; auto remove_transform(this Scene &, flecs::entity entity) -> void; diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 49bd448f..5f81ab3a 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -27,14 +27,19 @@ auto SceneRenderer::create_persistent_resources(this SceneRenderer &self) -> voi auto &app = Application::get(); auto &asset_man = app.asset_man; auto &transfer_man = app.device.transfer_man(); + auto &bindless_descriptor_set = app.device.get_descriptor_set(); auto shaders_root = asset_man.asset_root_path(AssetType::Shader); - auto *materials_set = asset_man.get_materials_descriptor_set(); // ── EDITOR ────────────────────────────────────────────────────────── auto default_slang_session = self.device->new_slang_session({ .definitions = { +#ifdef LS_DEBUG + { "ENABLE_ASSERTIONS", "1" }, +#endif // DEBUG + { "CULLING_MESH_COUNT", "64" }, { "CULLING_MESHLET_COUNT", std::to_string(Model::MAX_MESHLET_INDICES) }, { "CULLING_TRIANGLE_COUNT", std::to_string(Model::MAX_MESHLET_PRIMITIVES) }, + { "MESH_MAX_LODS", std::to_string(GPU::Mesh::MAX_LODS) }, { "HISTOGRAM_THREADS_X", std::to_string(GPU::HISTOGRAM_THREADS_X) }, { "HISTOGRAM_THREADS_Y", std::to_string(GPU::HISTOGRAM_THREADS_Y) }, }, @@ -99,6 +104,18 @@ auto SceneRenderer::create_persistent_resources(this SceneRenderer &self) -> voi Pipeline::create(*self.device, default_slang_session, sky_final_pipeline_info).value(); // ── VISBUFFER ─────────────────────────────────────────────────────── + auto generate_cull_commands_pipeline_info = PipelineCompileInfo{ + .module_name = "passes.generate_cull_commands", + .entry_points = { "cs_main" }, + }; + Pipeline::create(*self.device, default_slang_session, generate_cull_commands_pipeline_info).value(); + + auto vis_select_lods_pipeline_info = PipelineCompileInfo{ + .module_name = "passes.select_lods", + .entry_points = { "cs_main" }, + }; + Pipeline::create(*self.device, default_slang_session, vis_select_lods_pipeline_info).value(); + auto vis_cull_meshlets_pipeline_info = PipelineCompileInfo{ .module_name = "passes.cull_meshlets", .entry_points = { "cs_main" }, @@ -115,7 +132,7 @@ auto SceneRenderer::create_persistent_resources(this SceneRenderer &self) -> voi .module_name = "passes.visbuffer_encode", .entry_points = { "vs_main", "fs_main" }, }; - Pipeline::create(*self.device, default_slang_session, vis_encode_pipeline_info, *materials_set).value(); + Pipeline::create(*self.device, default_slang_session, vis_encode_pipeline_info, bindless_descriptor_set).value(); auto vis_clear_pipeline_info = PipelineCompileInfo{ .module_name = "passes.visbuffer_clear", @@ -127,7 +144,7 @@ auto SceneRenderer::create_persistent_resources(this SceneRenderer &self) -> voi .module_name = "passes.visbuffer_decode", .entry_points = { "vs_main", "fs_main" }, }; - Pipeline::create(*self.device, default_slang_session, vis_decode_pipeline_info, *materials_set).value(); + Pipeline::create(*self.device, default_slang_session, vis_decode_pipeline_info, bindless_descriptor_set).value(); // ── PBR ───────────────────────────────────────────────────────────── auto pbr_basic_pipeline_info = PipelineCompileInfo{ @@ -226,155 +243,144 @@ auto SceneRenderer::create_persistent_resources(this SceneRenderer &self) -> voi transfer_man.wait_on(std::move(multiscatter_lut_attachment)); self.exposure_buffer = Buffer::create(*self.device, sizeof(GPU::HistogramLuminance)).value(); + vuk::fill(vuk::acquire_buf("exposure", *self.device->buffer(self.exposure_buffer.id()), vuk::eNone), 0); } -auto SceneRenderer::compose(this SceneRenderer &self, SceneComposeInfo &compose_info) -> ComposedScene { +auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &info) -> PreparedFrame { ZoneScoped; auto &transfer_man = self.device->transfer_man(); + auto prepared_frame = PreparedFrame{}; - // IMPORTANT: Only wait when buffer is being resized!!! - // We can still copy into gpu buffer if it has enough space. + if (!info.dirty_transform_ids.empty()) { + auto rebuild_transforms = !self.materials_buffer || self.transforms_buffer.data_size() <= info.gpu_transforms.size_bytes(); + self.transforms_buffer = self.transforms_buffer.resize(*self.device, info.gpu_transforms.size_bytes()).value(); - if (ls::size_bytes(compose_info.gpu_meshes) > self.meshes_buffer.data_size()) { - if (self.meshes_buffer) { - self.device->wait(); - self.device->destroy(self.meshes_buffer.id()); - } + if (rebuild_transforms) { + // If we resize buffer, we need to refill it again, so individual uploads are not required. + prepared_frame.transforms_buffer = transfer_man.upload_staging(info.gpu_transforms, self.transforms_buffer); + } else { + // Buffer is not resized, upload individual transforms. + + auto dirty_transforms_count = info.dirty_transform_ids.size(); + auto dirty_transforms_size_bytes = dirty_transforms_count * sizeof(GPU::Transforms); + auto upload_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eCPUtoGPU, dirty_transforms_size_bytes); + auto *dst_transform_ptr = reinterpret_cast(upload_buffer->mapped_ptr); + auto upload_offsets = std::vector(dirty_transforms_count); + + for (const auto &[dirty_transform_id, offset] : std::views::zip(info.dirty_transform_ids, upload_offsets)) { + auto index = SlotMap_decode_id(dirty_transform_id).index; + const auto &transform = info.gpu_transforms[index]; + std::memcpy(dst_transform_ptr, &transform, sizeof(GPU::Transforms)); + offset = index * sizeof(GPU::Transforms); + dst_transform_ptr++; + } - self.meshes_buffer = Buffer::create(*self.device, ls::size_bytes(compose_info.gpu_meshes)).value(); - } + auto update_transforms_pass = vuk::make_pass( + "update scene transforms", + [upload_offsets = std::move(upload_offsets)]( + vuk::CommandBuffer &cmd_list, // + VUK_BA(vuk::Access::eTransferRead) src_buffer, + VUK_BA(vuk::Access::eTransferWrite) dst_buffer + ) { + for (usize i = 0; i < upload_offsets.size(); i++) { + auto offset = upload_offsets[i]; + auto src_subrange = src_buffer->subrange(i * sizeof(GPU::Transforms), sizeof(GPU::Transforms)); + auto dst_subrange = dst_buffer->subrange(offset, sizeof(GPU::Transforms)); + cmd_list.copy_buffer(src_subrange, dst_subrange); + } + + return dst_buffer; + } + ); - if (ls::size_bytes(compose_info.gpu_meshlet_instances) > self.meshlet_instances_buffer.data_size()) { - if (self.meshlet_instances_buffer) { - self.device->wait(); - self.device->destroy(self.meshlet_instances_buffer.id()); + prepared_frame.transforms_buffer = self.transforms_buffer.acquire(*self.device, "transforms", vuk::Access::eMemoryRead); + prepared_frame.transforms_buffer = update_transforms_pass(std::move(upload_buffer), std::move(prepared_frame.transforms_buffer)); } - - self.meshlet_instances_buffer = Buffer::create(*self.device, ls::size_bytes(compose_info.gpu_meshlet_instances)).value(); - } - - self.meshlet_instance_count = compose_info.gpu_meshlet_instances.size(); - auto meshes_buffer = vuk::Value{}; - if (!compose_info.gpu_meshes.empty()) { - meshes_buffer = transfer_man.upload_staging(ls::span(compose_info.gpu_meshes), self.meshes_buffer); - } - - auto meshlet_instances_buffer = vuk::Value{}; - if (!compose_info.gpu_meshlet_instances.empty()) { - meshlet_instances_buffer = transfer_man.upload_staging(ls::span(compose_info.gpu_meshlet_instances), self.meshlet_instances_buffer); - } - - if (self.exposure_buffer) { - vuk::fill(vuk::acquire_buf("exposure", *self.device->buffer(self.exposure_buffer.id()), vuk::eNone), 0); + } else if (self.transforms_buffer) { + prepared_frame.transforms_buffer = self.transforms_buffer.acquire(*self.device, "transforms", vuk::Access::eMemoryRead); } - return ComposedScene{ - .meshes_buffer = meshes_buffer, - .meshlet_instances_buffer = meshlet_instances_buffer, - }; -} + if (!info.dirty_material_indices.empty()) { + auto rebuild_materials = !self.materials_buffer || self.materials_buffer.data_size() <= info.gpu_materials.size_bytes(); + self.materials_buffer = self.materials_buffer.resize(*self.device, info.gpu_materials.size_bytes()).value(); -auto SceneRenderer::cleanup(this SceneRenderer &self) -> void { - ZoneScoped; - - self.device->wait(); + if (rebuild_materials) { + prepared_frame.materials_buffer = transfer_man.upload_staging(info.gpu_materials, self.materials_buffer); + } else { + // TODO: Literally repeating code, find a solution to this + auto dirty_materials_count = info.dirty_material_indices.size(); + auto dirty_materials_size_bytes = dirty_materials_count * sizeof(GPU::Material); + auto upload_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eCPUtoGPU, dirty_materials_size_bytes); + auto *dst_materials_ptr = reinterpret_cast(upload_buffer->mapped_ptr); + auto upload_offsets = std::vector(dirty_materials_count); + + for (const auto &[dirty_material, index, offset] : std::views::zip(info.gpu_materials, info.dirty_material_indices, upload_offsets)) { + std::memcpy(dst_materials_ptr, &dirty_material, sizeof(GPU::Material)); + offset = index * sizeof(GPU::Material); + dst_materials_ptr++; + } - self.meshlet_instance_count = 0; + auto update_materials_pass = vuk::make_pass( + "update scene materials", + [upload_offsets = std::move(upload_offsets)]( + vuk::CommandBuffer &cmd_list, // + VUK_BA(vuk::Access::eTransferRead) src_buffer, + VUK_BA(vuk::Access::eTransferWrite) dst_buffer + ) { + for (usize i = 0; i < upload_offsets.size(); i++) { + auto offset = upload_offsets[i]; + auto src_subrange = src_buffer->subrange(i * sizeof(GPU::Material), sizeof(GPU::Material)); + auto dst_subrange = dst_buffer->subrange(offset, sizeof(GPU::Material)); + cmd_list.copy_buffer(src_subrange, dst_subrange); + } + + return dst_buffer; + } + ); - if (self.transforms_buffer) { - self.device->destroy(self.transforms_buffer.id()); - self.transforms_buffer = {}; + prepared_frame.materials_buffer = self.materials_buffer.acquire(*self.device, "materials", vuk::eMemoryRead); + prepared_frame.materials_buffer = update_materials_pass(std::move(upload_buffer), std::move(prepared_frame.materials_buffer)); + } + } else if (self.materials_buffer) { + prepared_frame.materials_buffer = self.materials_buffer.acquire(*self.device, "materials", vuk::eMemoryRead); } - if (self.meshlet_instances_buffer) { - self.device->destroy(self.meshlet_instances_buffer.id()); - self.meshlet_instances_buffer = {}; + if (!info.gpu_meshes.empty()) { + self.meshes_buffer = self.meshes_buffer.resize(*self.device, info.gpu_meshes.size_bytes()).value(); + prepared_frame.meshes_buffer = transfer_man.upload_staging(info.gpu_meshes, self.meshes_buffer); + } else if (self.meshes_buffer) { + prepared_frame.meshes_buffer = self.meshes_buffer.acquire(*self.device, "meshes", vuk::eMemoryRead); } - if (self.meshes_buffer) { - self.device->destroy(self.meshes_buffer.id()); - self.meshes_buffer = {}; + if (!info.gpu_mesh_instances.empty()) { + self.mesh_instances_buffer = self.mesh_instances_buffer.resize(*self.device, info.gpu_mesh_instances.size_bytes()).value(); + prepared_frame.mesh_instances_buffer = transfer_man.upload_staging(info.gpu_mesh_instances, self.mesh_instances_buffer); + } else if (self.mesh_instances_buffer) { + prepared_frame.mesh_instances_buffer = self.mesh_instances_buffer.acquire(*self.device, "mesh instances", vuk::eMemoryRead); } - if (self.hiz_view) { - self.device->destroy(self.hiz_view.id()); - self.hiz_view = {}; + if (info.max_meshlet_instance_count > 0) { + prepared_frame.meshlet_instances_buffer = + transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eGPUonly, info.max_meshlet_instance_count * sizeof(GPU::MeshletInstance)); + prepared_frame.visible_meshlet_instances_indices_buffer = + transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eGPUonly, info.max_meshlet_instance_count * sizeof(u32)); + prepared_frame.reordered_indices_buffer = transfer_man.alloc_transient_buffer( + vuk::MemoryUsage::eGPUonly, + info.max_meshlet_instance_count * Model::MAX_MESHLET_PRIMITIVES * 3 * sizeof(u32) + ); } - if (self.hiz) { - self.device->destroy(self.hiz.id()); - self.hiz = {}; - } + prepared_frame.mesh_instance_count = info.mesh_instance_count; + + return prepared_frame; } -auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls::option &composed_scene) - -> vuk::Value { +auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, PreparedFrame &frame) -> vuk::Value { ZoneScoped; auto &transfer_man = self.device->transfer_man(); - - // ── ENTITY TRANSFORMS ─────────────────────────────────────────────── - // - // WARN: compose_info.transforms contains _ALL_ transforms!!! - // - bool rebuild_transforms = false; - if (info.transforms.size_bytes() > self.transforms_buffer.data_size()) { - if (self.transforms_buffer.id() != BufferID::Invalid) { - // Device wait here is important, do not remove it. Why? - // We are using ONE transform buffer for all frames, if - // this buffer gets destroyed in current frame, previous - // rendering frame buffer will get corrupt and crash GPU. - self.device->wait(); - self.device->destroy(self.transforms_buffer.id()); - } - - self.transforms_buffer = Buffer::create(*self.device, info.transforms.size_bytes(), vuk::MemoryUsage::eGPUonly).value(); - - rebuild_transforms = true; - } - - auto transforms_buffer = self.transforms_buffer.acquire(*self.device, "Transforms Buffer", vuk::Access::eMemoryRead); - - if (rebuild_transforms) { - transforms_buffer = transfer_man.upload_staging(info.transforms, std::move(transforms_buffer)); - } else if (!info.dirty_transform_ids.empty()) { - auto transform_count = info.dirty_transform_ids.size(); - auto new_transforms_size_bytes = transform_count * sizeof(GPU::Transforms); - auto upload_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eCPUonly, new_transforms_size_bytes); - auto *dst_transform_ptr = reinterpret_cast(upload_buffer->mapped_ptr); - auto upload_offsets = std::vector(transform_count); - - for (const auto &[dirty_transform_id, offset] : std::views::zip(info.dirty_transform_ids, upload_offsets)) { - auto index = SlotMap_decode_id(dirty_transform_id).index; - const auto &transform = info.transforms[index]; - std::memcpy(dst_transform_ptr, &transform, sizeof(GPU::Transforms)); - offset = index * sizeof(GPU::Transforms); - dst_transform_ptr++; - } - - auto update_transforms_pass = vuk::make_pass( - "update scene transforms", - [upload_offsets = std::move( - upload_offsets - )]( // - vuk::CommandBuffer &cmd_list, - VUK_BA(vuk::Access::eTransferRead) src_buffer, - VUK_BA(vuk::Access::eTransferWrite) dst_buffer - ) { - for (usize i = 0; i < upload_offsets.size(); i++) { - auto offset = upload_offsets[i]; - auto src_subrange = src_buffer->subrange(i * sizeof(GPU::Transforms), sizeof(GPU::Transforms)); - auto dst_subrange = dst_buffer->subrange(offset, sizeof(GPU::Transforms)); - cmd_list.copy_buffer(src_subrange, dst_subrange); - } - - return dst_buffer; - } - ); - - transforms_buffer = update_transforms_pass(std::move(upload_buffer), std::move(transforms_buffer)); - } + auto &bindless_descriptor_set = self.device->get_descriptor_set(); // ────────────────────────────────────────────────────────────────────── auto final_attachment = vuk::declare_ia( @@ -517,82 +523,154 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: camera_buffer = transfer_man.scratch_buffer(info.camera.value()); } - if (self.meshlet_instance_count) { - auto meshes_buffer = vuk::Value{}; - auto meshlet_instances_buffer = vuk::Value{}; - if (composed_scene.has_value()) { - meshes_buffer = std::move(composed_scene->meshes_buffer); - meshlet_instances_buffer = std::move(composed_scene->meshlet_instances_buffer); - } else { - meshes_buffer = self.meshes_buffer.acquire(*self.device, "meshes", vuk::Access::eNone); - meshlet_instances_buffer = self.meshlet_instances_buffer.acquire(*self.device, "Meshlet Instances", vuk::Access::eNone); - } + if (frame.mesh_instance_count) { + auto transforms_buffer = std::move(frame.transforms_buffer); + auto meshes_buffer = std::move(frame.meshes_buffer); + auto mesh_instances_buffer = std::move(frame.mesh_instances_buffer); + auto meshlet_instances_buffer = std::move(frame.meshlet_instances_buffer); + auto materials_buffer = std::move(frame.materials_buffer); + + // ── CULL MESHES ───────────────────────────────────────────────────── + auto vis_select_lods_pass = vuk::make_pass( + "vis select lods", + [mesh_instance_count = frame.mesh_instance_count, cull_flags = info.cull_flags]( + vuk::CommandBuffer &cmd_list, + VUK_BA(vuk::eComputeRead) camera, + VUK_BA(vuk::eComputeRead) meshes, + VUK_BA(vuk::eComputeRead) transforms, + VUK_BA(vuk::eComputeRW) mesh_instances, + VUK_BA(vuk::eComputeRW) meshlet_instances, + VUK_BA(vuk::eComputeRW) visible_meshlet_instances_count, + VUK_BA(vuk::eComputeRW) debug_drawer + ) { + cmd_list // + .bind_compute_pipeline("passes.select_lods") + .bind_buffer(0, 0, camera) + .bind_buffer(0, 1, meshes) + .bind_buffer(0, 2, transforms) + .bind_buffer(0, 3, mesh_instances) + .bind_buffer(0, 4, meshlet_instances) + .bind_buffer(0, 5, visible_meshlet_instances_count) + .bind_buffer(0, 6, debug_drawer) + .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, PushConstants(mesh_instance_count, cull_flags)) + .dispatch_invocations(mesh_instance_count); + + return std::make_tuple(camera, meshes, transforms, mesh_instances, meshlet_instances, visible_meshlet_instances_count, debug_drawer); + } + ); + + auto visible_meshlet_instances_count_buffer = transfer_man.scratch_buffer({ 0 }); - auto materials_buffer = std::move(info.materials_buffer); - auto *materials_set = info.materials_descriptor_set; + std::tie( + camera_buffer, + meshes_buffer, + transforms_buffer, + mesh_instances_buffer, + meshlet_instances_buffer, + visible_meshlet_instances_count_buffer, + debug_drawer_buffer + ) = + vis_select_lods_pass( + std::move(camera_buffer), + std::move(meshes_buffer), + std::move(transforms_buffer), + std::move(mesh_instances_buffer), + std::move(meshlet_instances_buffer), + std::move(visible_meshlet_instances_count_buffer), + std::move(debug_drawer_buffer) + ); + + auto generate_cull_commands_pass = vuk::make_pass( + "generate cull commands", + [](vuk::CommandBuffer &cmd_list, // + VUK_BA(vuk::eComputeRead) visible_meshlet_instances_count, + VUK_BA(vuk::eComputeRW) cull_meshlets_cmd) { + cmd_list // + .bind_compute_pipeline("passes.generate_cull_commands") + .bind_buffer(0, 0, visible_meshlet_instances_count) + .bind_buffer(0, 1, cull_meshlets_cmd) + .dispatch(1); + + return std::make_tuple(visible_meshlet_instances_count, cull_meshlets_cmd); + } + ); + + auto cull_meshlets_cmd_buffer = transfer_man.scratch_buffer({ .x = 0, .y = 1, .z = 1 }); + std::tie(visible_meshlet_instances_count_buffer, cull_meshlets_cmd_buffer) = + generate_cull_commands_pass(std::move(visible_meshlet_instances_count_buffer), std::move(cull_meshlets_cmd_buffer)); // ── CULL MESHLETS ─────────────────────────────────────────────────── auto vis_cull_meshlets_pass = vuk::make_pass( "vis cull meshlets", - [meshlet_instance_count = self.meshlet_instance_count, cull_flags = info.cull_flags]( + [cull_flags = info.cull_flags]( vuk::CommandBuffer &cmd_list, - VUK_BA(vuk::eComputeWrite) cull_triangles_cmd, + VUK_BA(vuk::eIndirectRead) dispatch_cmd, VUK_BA(vuk::eComputeRead) camera, - VUK_BA(vuk::eComputeWrite) visible_meshlet_instances_indices, VUK_BA(vuk::eComputeRead) meshlet_instances, + VUK_BA(vuk::eComputeRead) mesh_instances, VUK_BA(vuk::eComputeRead) meshes, VUK_BA(vuk::eComputeRead) transforms, VUK_IA(vuk::eComputeRead) hiz, - VUK_BA(vuk::eComputeWrite) debug_drawer + VUK_BA(vuk::eComputeRead) visible_meshlet_instances_count, + VUK_BA(vuk::eComputeRW) cull_triangles_cmd, + VUK_BA(vuk::eComputeWrite) visible_meshlet_instances_indices, + VUK_BA(vuk::eComputeRW) debug_drawer ) { cmd_list // .bind_compute_pipeline("passes.cull_meshlets") - .bind_buffer(0, 0, cull_triangles_cmd) - .bind_buffer(0, 1, camera) - .bind_buffer(0, 2, visible_meshlet_instances_indices) - .bind_buffer(0, 3, meshlet_instances) - .bind_buffer(0, 4, meshes) - .bind_buffer(0, 5, transforms) - .bind_image(0, 6, hiz) - .bind_sampler(0, 7, hiz_sampler_info) - .bind_buffer(0, 8, debug_drawer) - .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, PushConstants(meshlet_instance_count, cull_flags)) - .dispatch((meshlet_instance_count + Model::MAX_MESHLET_INDICES - 1) / Model::MAX_MESHLET_INDICES); + .bind_buffer(0, 0, camera) + .bind_buffer(0, 1, meshlet_instances) + .bind_buffer(0, 2, mesh_instances) + .bind_buffer(0, 3, meshes) + .bind_buffer(0, 4, transforms) + .bind_image(0, 5, hiz) + .bind_sampler(0, 6, hiz_sampler_info) + .bind_buffer(0, 7, visible_meshlet_instances_count) + .bind_buffer(0, 8, cull_triangles_cmd) + .bind_buffer(0, 9, visible_meshlet_instances_indices) + .bind_buffer(0, 10, debug_drawer) + .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, cull_flags) + .dispatch_indirect(dispatch_cmd); + return std::make_tuple( - cull_triangles_cmd, camera, - visible_meshlet_instances_indices, meshlet_instances, + mesh_instances, meshes, transforms, hiz, + cull_triangles_cmd, + visible_meshlet_instances_indices, debug_drawer ); } ); auto cull_triangles_cmd_buffer = transfer_man.scratch_buffer({ .x = 0, .y = 1, .z = 1 }); - auto visible_meshlet_instances_indices_buffer = - transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eGPUonly, self.meshlet_instance_count * sizeof(u32)); + auto visible_meshlet_instances_indices_buffer = std::move(frame.visible_meshlet_instances_indices_buffer); std::tie( - cull_triangles_cmd_buffer, camera_buffer, - visible_meshlet_instances_indices_buffer, meshlet_instances_buffer, + mesh_instances_buffer, meshes_buffer, transforms_buffer, hiz_attachment, + cull_triangles_cmd_buffer, + visible_meshlet_instances_indices_buffer, debug_drawer_buffer ) = vis_cull_meshlets_pass( - std::move(cull_triangles_cmd_buffer), + std::move(cull_meshlets_cmd_buffer), std::move(camera_buffer), - std::move(visible_meshlet_instances_indices_buffer), std::move(meshlet_instances_buffer), + std::move(mesh_instances_buffer), std::move(meshes_buffer), std::move(transforms_buffer), std::move(hiz_attachment), + std::move(visible_meshlet_instances_count_buffer), + std::move(cull_triangles_cmd_buffer), + std::move(visible_meshlet_instances_indices_buffer), std::move(debug_drawer_buffer) ); @@ -602,60 +680,63 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: [cull_flags = info.cull_flags]( vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eIndirectRead) cull_triangles_cmd, - VUK_BA(vuk::eComputeWrite) draw_indexed_cmd, - VUK_BA(vuk::eComputeWrite) camera, + VUK_BA(vuk::eComputeRead) camera, VUK_BA(vuk::eComputeRead) visible_meshlet_instances_indices, VUK_BA(vuk::eComputeRead) meshlet_instances, + VUK_BA(vuk::eComputeRead) mesh_instances, VUK_BA(vuk::eComputeRead) meshes, VUK_BA(vuk::eComputeRead) transforms, + VUK_BA(vuk::eComputeRW) draw_indexed_cmd, VUK_BA(vuk::eComputeWrite) reordered_indices ) { cmd_list // .bind_compute_pipeline("passes.cull_triangles") - .bind_buffer(0, 0, draw_indexed_cmd) - .bind_buffer(0, 1, camera) - .bind_buffer(0, 2, visible_meshlet_instances_indices) - .bind_buffer(0, 3, meshlet_instances) + .bind_buffer(0, 0, camera) + .bind_buffer(0, 1, visible_meshlet_instances_indices) + .bind_buffer(0, 2, meshlet_instances) + .bind_buffer(0, 3, mesh_instances) .bind_buffer(0, 4, meshes) .bind_buffer(0, 5, transforms) - .bind_buffer(0, 6, reordered_indices) + .bind_buffer(0, 6, draw_indexed_cmd) + .bind_buffer(0, 7, reordered_indices) .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, cull_flags) .dispatch_indirect(cull_triangles_cmd); + return std::make_tuple( - draw_indexed_cmd, camera, visible_meshlet_instances_indices, meshlet_instances, + mesh_instances, meshes, transforms, + draw_indexed_cmd, reordered_indices ); } ); auto draw_command_buffer = transfer_man.scratch_buffer({ .instanceCount = 1 }); - auto reordered_indices_buffer = transfer_man.alloc_transient_buffer( - vuk::MemoryUsage::eGPUonly, - self.meshlet_instance_count * Model::MAX_MESHLET_PRIMITIVES * 3 * sizeof(u32) - ); + auto reordered_indices_buffer = std::move(frame.reordered_indices_buffer); std::tie( - draw_command_buffer, camera_buffer, visible_meshlet_instances_indices_buffer, meshlet_instances_buffer, + mesh_instances_buffer, meshes_buffer, transforms_buffer, + draw_command_buffer, reordered_indices_buffer ) = vis_cull_triangles_pass( std::move(cull_triangles_cmd_buffer), - std::move(draw_command_buffer), std::move(camera_buffer), std::move(visible_meshlet_instances_indices_buffer), std::move(meshlet_instances_buffer), + std::move(mesh_instances_buffer), std::move(meshes_buffer), std::move(transforms_buffer), + std::move(draw_command_buffer), std::move(reordered_indices_buffer) ); @@ -703,18 +784,18 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: // ── VISBUFFER ENCODE ──────────────────────────────────────────────── auto vis_encode_pass = vuk::make_pass( "vis encode", - [descriptor_set = materials_set]( + [descriptor_set = &bindless_descriptor_set]( vuk::CommandBuffer &cmd_list, VUK_BA(vuk::eIndirectRead) triangle_indirect, VUK_BA(vuk::eIndexRead) index_buffer, - VUK_IA(vuk::eColorWrite) visbuffer, - VUK_IA(vuk::eDepthStencilRW) depth, VUK_BA(vuk::eVertexRead) camera, - VUK_BA(vuk::eVertexRead) visible_meshlet_instances_indices, VUK_BA(vuk::eVertexRead) meshlet_instances, - VUK_BA(vuk::eVertexRead) transforms, + VUK_BA(vuk::eVertexRead) mesh_instances, VUK_BA(vuk::eVertexRead) meshes, + VUK_BA(vuk::eVertexRead) transforms, VUK_BA(vuk::eFragmentRead) materials, + VUK_IA(vuk::eColorRW) visbuffer, + VUK_IA(vuk::eDepthStencilRW) depth, VUK_IA(vuk::eFragmentRW) overdraw ) { cmd_list // @@ -727,50 +808,41 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: .set_scissor(0, vuk::Rect2D::framebuffer()) .bind_persistent(1, *descriptor_set) .bind_buffer(0, 0, camera) - .bind_buffer(0, 1, visible_meshlet_instances_indices) - .bind_buffer(0, 2, meshlet_instances) + .bind_buffer(0, 1, meshlet_instances) + .bind_buffer(0, 2, mesh_instances) .bind_buffer(0, 3, meshes) .bind_buffer(0, 4, transforms) .bind_buffer(0, 5, materials) .bind_image(0, 6, overdraw) .bind_index_buffer(index_buffer, vuk::IndexType::eUint32) .draw_indexed_indirect(1, triangle_indirect); - return std::make_tuple( - visbuffer, - depth, - camera, - visible_meshlet_instances_indices, - meshlet_instances, - transforms, - meshes, - materials, - overdraw - ); + + return std::make_tuple(camera, meshlet_instances, mesh_instances, meshes, transforms, materials, visbuffer, depth, overdraw); } ); std::tie( - visbuffer_attachment, - depth_attachment, camera_buffer, - visible_meshlet_instances_indices_buffer, meshlet_instances_buffer, - transforms_buffer, + mesh_instances_buffer, meshes_buffer, + transforms_buffer, materials_buffer, + visbuffer_attachment, + depth_attachment, overdraw_attachment ) = vis_encode_pass( std::move(draw_command_buffer), std::move(reordered_indices_buffer), - std::move(visbuffer_attachment), - std::move(depth_attachment), std::move(camera_buffer), - std::move(visible_meshlet_instances_indices_buffer), std::move(meshlet_instances_buffer), - std::move(transforms_buffer), + std::move(mesh_instances_buffer), std::move(meshes_buffer), + std::move(transforms_buffer), std::move(materials_buffer), + std::move(visbuffer_attachment), + std::move(depth_attachment), std::move(overdraw_attachment) ); @@ -781,15 +853,15 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: [picking_texel = *info.picking_texel]( vuk::CommandBuffer &cmd_list, VUK_IA(vuk::eComputeSampled) visbuffer, - VUK_BA(vuk::eComputeRead) visible_meshlet_instances_indices, VUK_BA(vuk::eComputeRead) meshlet_instances, + VUK_BA(vuk::eComputeRead) mesh_instances, VUK_BA(vuk::eComputeWrite) picked_transform_index_buffer ) { cmd_list // .bind_compute_pipeline("passes.editor_mousepick") .bind_image(0, 0, visbuffer) - .bind_buffer(0, 1, visible_meshlet_instances_indices) - .bind_buffer(0, 2, meshlet_instances) + .bind_buffer(0, 1, meshlet_instances) + .bind_buffer(0, 2, mesh_instances) .push_constants( vuk::ShaderStageFlagBits::eCompute, 0, @@ -802,8 +874,7 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: ); auto picking_texel_buffer = transfer_man.alloc_transient_buffer(vuk::MemoryUsage::eGPUtoCPU, sizeof(u32)); - auto picked_texel = - editor_mousepick_pass(visbuffer_attachment, visible_meshlet_instances_indices_buffer, meshlet_instances_buffer, picking_texel_buffer); + auto picked_texel = editor_mousepick_pass(visbuffer_attachment, meshlet_instances_buffer, mesh_instances_buffer, picking_texel_buffer); vuk::Compiler temp_compiler; picked_texel.wait(self.device->get_allocator(), temp_compiler); @@ -853,19 +924,19 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: // ── VISBUFFER DECODE ──────────────────────────────────────────────── auto vis_decode_pass = vuk::make_pass( "vis decode", - [descriptor_set = materials_set]( // + [descriptor_set = &bindless_descriptor_set]( // vuk::CommandBuffer &cmd_list, - VUK_IA(vuk::eColorRW) albedo, - VUK_IA(vuk::eColorRW) normal, - VUK_IA(vuk::eColorRW) emissive, - VUK_IA(vuk::eColorRW) metallic_roughness_occlusion, - VUK_IA(vuk::eFragmentSampled) visbuffer, VUK_BA(vuk::eFragmentRead) camera, - VUK_BA(vuk::eFragmentRead) visible_meshlet_instances_indices, VUK_BA(vuk::eFragmentRead) meshlet_instances, + VUK_BA(vuk::eFragmentRead) mesh_instances, VUK_BA(vuk::eFragmentRead) meshes, VUK_BA(vuk::eFragmentRead) transforms, - VUK_BA(vuk::eFragmentRead) materials + VUK_BA(vuk::eFragmentRead) materials, + VUK_IA(vuk::eFragmentRead) visbuffer, + VUK_IA(vuk::eColorRW) albedo, + VUK_IA(vuk::eColorRW) normal, + VUK_IA(vuk::eColorRW) emissive, + VUK_IA(vuk::eColorRW) metallic_roughness_occlusion ) { cmd_list // .bind_graphics_pipeline("passes.visbuffer_decode") @@ -878,27 +949,27 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: .set_dynamic_state(vuk::DynamicStateFlagBits::eViewport | vuk::DynamicStateFlagBits::eScissor) .set_viewport(0, vuk::Rect2D::framebuffer()) .set_scissor(0, vuk::Rect2D::framebuffer()) - .bind_image(0, 0, visbuffer) - .bind_buffer(0, 1, camera) - .bind_buffer(0, 2, visible_meshlet_instances_indices) - .bind_buffer(0, 3, meshlet_instances) - .bind_buffer(0, 4, meshes) - .bind_buffer(0, 5, transforms) - .bind_buffer(0, 6, materials) .bind_persistent(1, *descriptor_set) + .bind_buffer(0, 0, camera) + .bind_buffer(0, 1, meshlet_instances) + .bind_buffer(0, 2, mesh_instances) + .bind_buffer(0, 3, meshes) + .bind_buffer(0, 4, transforms) + .bind_buffer(0, 5, materials) + .bind_image(0, 6, visbuffer) .draw(3, 1, 0, 1); return std::make_tuple( - albedo, - normal, - emissive, - metallic_roughness_occlusion, - visbuffer, camera, - visible_meshlet_instances_indices, meshlet_instances, + mesh_instances, meshes, - transforms + transforms, + visbuffer, + albedo, + normal, + emissive, + metallic_roughness_occlusion ); } ); @@ -940,29 +1011,29 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: metallic_roughness_occlusion_attachment = vuk::clear_image(std::move(metallic_roughness_occlusion_attachment), vuk::Black); std::tie( - albedo_attachment, - normal_attachment, - emissive_attachment, - metallic_roughness_occlusion_attachment, - visbuffer_attachment, camera_buffer, - visible_meshlet_instances_indices_buffer, meshlet_instances_buffer, + mesh_instances_buffer, meshes_buffer, - transforms_buffer + transforms_buffer, + visbuffer_attachment, + albedo_attachment, + normal_attachment, + emissive_attachment, + metallic_roughness_occlusion_attachment ) = vis_decode_pass( - std::move(albedo_attachment), - std::move(normal_attachment), - std::move(emissive_attachment), - std::move(metallic_roughness_occlusion_attachment), - std::move(visbuffer_attachment), std::move(camera_buffer), - std::move(visible_meshlet_instances_indices_buffer), std::move(meshlet_instances_buffer), + std::move(mesh_instances_buffer), std::move(meshes_buffer), std::move(transforms_buffer), - std::move(materials_buffer) + std::move(materials_buffer), + std::move(visbuffer_attachment), + std::move(albedo_attachment), + std::move(normal_attachment), + std::move(emissive_attachment), + std::move(metallic_roughness_occlusion_attachment) ); if (info.atmosphere.has_value()) { @@ -1018,6 +1089,7 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: .bind_buffer(0, 10, sun) .bind_buffer(0, 11, camera) .draw(3, 1, 0, 0); + return std::make_tuple(dst, atmosphere, sun, camera, sky_transmittance_lut, sky_multiscatter_lut, depth); } ); @@ -1366,7 +1438,7 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: cmd_list // .bind_graphics_pipeline("passes.debug") .set_rasterization({ .polygonMode = vuk::PolygonMode::eFill, .lineWidth = 1.8f }) - .set_primitive_topology(vuk::PrimitiveTopology::eLineStrip) + .set_primitive_topology(vuk::PrimitiveTopology::eLineList) .set_color_blend(dst, vuk::BlendPreset::eOff) .set_dynamic_state(vuk::DynamicStateFlagBits::eViewport | vuk::DynamicStateFlagBits::eScissor) .set_viewport(0, vuk::Rect2D::framebuffer()) @@ -1399,4 +1471,40 @@ auto SceneRenderer::render(this SceneRenderer &self, SceneRenderInfo &info, ls:: return result_attachment; } +auto SceneRenderer::cleanup(this SceneRenderer &self) -> void { + ZoneScoped; + + self.device->wait(); + + if (self.transforms_buffer) { + self.device->destroy(self.transforms_buffer.id()); + self.transforms_buffer = {}; + } + + if (self.mesh_instances_buffer) { + self.device->destroy(self.mesh_instances_buffer.id()); + self.mesh_instances_buffer = {}; + } + + if (self.meshes_buffer) { + self.device->destroy(self.meshes_buffer.id()); + self.meshes_buffer = {}; + } + + if (self.materials_buffer) { + self.device->destroy(self.materials_buffer.id()); + self.materials_buffer = {}; + } + + if (self.hiz_view) { + self.device->destroy(self.hiz_view.id()); + self.hiz_view = {}; + } + + if (self.hiz) { + self.device->destroy(self.hiz.id()); + self.hiz = {}; + } +} + } // namespace lr diff --git a/Lorr/Engine/Scene/SceneRenderer.hh b/Lorr/Engine/Scene/SceneRenderer.hh index 060bb9ad..229cf86d 100644 --- a/Lorr/Engine/Scene/SceneRenderer.hh +++ b/Lorr/Engine/Scene/SceneRenderer.hh @@ -5,23 +5,36 @@ #include "Engine/Scene/GPUScene.hh" namespace lr { -struct SceneComposeInfo { - std::vector gpu_meshes = {}; - std::vector gpu_meshlet_instances = {}; +struct FramePrepareInfo { + u32 mesh_instance_count = 0; + u32 max_meshlet_instance_count = 0; + + ls::span dirty_transform_ids = {}; + ls::span gpu_transforms = {}; + + ls::span dirty_material_indices = {}; + ls::span gpu_materials = {}; + + ls::span gpu_meshes = {}; + ls::span gpu_mesh_instances = {}; }; -struct ComposedScene { +struct PreparedFrame { + u32 mesh_instance_count = 0; + vuk::Value transforms_buffer = {}; vuk::Value meshes_buffer = {}; + vuk::Value mesh_instances_buffer = {}; vuk::Value meshlet_instances_buffer = {}; + vuk::Value visible_meshlet_instances_indices_buffer = {}; + vuk::Value reordered_indices_buffer = {}; + vuk::Value materials_buffer = {}; }; struct SceneRenderInfo { vuk::Format format = vuk::Format::eR8G8B8A8Srgb; vuk::Extent3D extent = {}; f32 delta_time = 0.0f; - - vuk::PersistentDescriptorSet *materials_descriptor_set = nullptr; - vuk::Value materials_buffer = {}; + GPU::CullFlags cull_flags = {}; ls::option sun = ls::nullopt; ls::option atmosphere = ls::nullopt; @@ -29,10 +42,6 @@ struct SceneRenderInfo { ls::option picking_texel = ls::nullopt; ls::option histogram_info = ls::nullopt; - GPU::CullFlags cull_flags = {}; - ls::span dirty_transform_ids = {}; - ls::span transforms = {}; - ls::option picked_transform_index = ls::nullopt; }; @@ -42,9 +51,11 @@ struct SceneRenderer { // Scene resources Buffer exposure_buffer = {}; Buffer transforms_buffer = {}; - u32 meshlet_instance_count = 0; + + Buffer mesh_instances_buffer = {}; Buffer meshes_buffer = {}; - Buffer meshlet_instances_buffer = {}; + + Buffer materials_buffer = {}; // Then what are they? // TODO: Per scene sky settings @@ -59,7 +70,6 @@ struct SceneRenderer { ImageView hiz_view = {}; bool debug_lines = false; - f32 debug_heatmap_scale = 5.0; auto init(this SceneRenderer &, Device *device) -> bool; auto destroy(this SceneRenderer &) -> void; @@ -67,9 +77,9 @@ struct SceneRenderer { auto create_persistent_resources(this SceneRenderer &) -> void; // Scene - auto compose(this SceneRenderer &, SceneComposeInfo &compose_info) -> ComposedScene; + auto prepare_frame(this SceneRenderer &, FramePrepareInfo &info) -> PreparedFrame; + auto render(this SceneRenderer &, SceneRenderInfo &render_info, PreparedFrame &frame) -> vuk::Value; auto cleanup(this SceneRenderer &) -> void; - auto render(this SceneRenderer &, SceneRenderInfo &render_info, ls::option &composed_scene) -> vuk::Value; }; } // namespace lr diff --git a/Lorr/Engine/xmake.lua b/Lorr/Engine/xmake.lua index 8038a0ef..43deef24 100755 --- a/Lorr/Engine/xmake.lua +++ b/Lorr/Engine/xmake.lua @@ -22,6 +22,7 @@ target("Lorr") }) add_options("profile") + add_options("use_llvmpipe") add_deps( "ls", diff --git a/Lorr/ls/span.hh b/Lorr/ls/span.hh index 98a8adc7..1647633a 100755 --- a/Lorr/ls/span.hh +++ b/Lorr/ls/span.hh @@ -23,35 +23,35 @@ struct span : public std::span { constexpr span() = default; template - constexpr span(const span &other): std::span(other.data(), other.size()) {}; + constexpr span(const span &other) : std::span(other.data(), other.size()){}; - constexpr span(this_type::reference v): std::span({ &v, 1 }) {}; + constexpr span(this_type::reference v) : std::span({ &v, 1 }) {}; - constexpr explicit(EXTENT != std::dynamic_extent) span(T *v, this_type::size_type size): std::span(v, size) {}; + constexpr explicit(EXTENT != std::dynamic_extent) span(T *v, this_type::size_type size) : std::span(v, size){}; - constexpr explicit(EXTENT != std::dynamic_extent) span(this_type::iterator v, this_type::size_type size): std::span(v, size) {}; + constexpr explicit(EXTENT != std::dynamic_extent) span(this_type::iterator v, this_type::size_type size) : std::span(v, size){}; - constexpr explicit(EXTENT != std::dynamic_extent) span(this_type::iterator begin_it, this_type::iterator end_it): - std::span(begin_it, end_it) {}; + constexpr explicit(EXTENT != std::dynamic_extent) span(this_type::iterator begin_it, this_type::iterator end_it) : + std::span(begin_it, end_it){}; template - constexpr span(T (&arr)[N]): std::span(arr) {}; + constexpr span(T (&arr)[N]) : std::span(arr){}; template - constexpr span(std::array &arr): std::span(arr) {}; + constexpr span(std::array &arr) : std::span(arr){}; template - constexpr span(const std::array &arr): std::span(arr) {}; + constexpr span(const std::array &arr) : std::span(arr){}; - constexpr span(std::vector &v): std::span(v.begin(), v.end()) {}; + constexpr span(std::vector &v) : std::span(v.begin(), v.end()) {}; - constexpr span(const std::vector &v): std::span(v.begin(), v.end()) {}; + constexpr span(const std::vector &v) : std::span(v.begin(), v.end()) {}; template - constexpr span(static_vector &arr): std::span(arr.begin(), arr.end()) {}; + constexpr span(static_vector &arr) : std::span(arr.begin(), arr.end()){}; template - constexpr span(const static_vector &arr): std::span(arr.begin(), arr.end()) {}; + constexpr span(const static_vector &arr) : std::span(arr.begin(), arr.end()){}; }; template diff --git a/shell.nix b/shell.nix index adebdf46..8799823b 100644 --- a/shell.nix +++ b/shell.nix @@ -1,37 +1,39 @@ let pkgs = import {}; - pkgs-unstable = import {}; -in -pkgs.mkShell.override { stdenv = pkgs-unstable.llvmPackages_20.libcxxStdenv; } { +in +pkgs.mkShell.override { stdenv = pkgs.llvmPackages_20.libcxxStdenv; } { nativeBuildInputs = [ pkgs.cmake pkgs.ninja pkgs.gnumake pkgs.xmake + pkgs. - pkgs-unstable.llvmPackages_20.bintools-unwrapped - pkgs-unstable.llvmPackages_20.libcxx.dev - pkgs-unstable.llvmPackages_20.compiler-rt - (pkgs-unstable.llvmPackages_20.clang-tools.override { enableLibcxx = true; }) + pkgs.llvmPackages_20.bintools-unwrapped + pkgs.llvmPackages_20.libcxx.dev + pkgs.llvmPackages_20.compiler-rt + (pkgs.llvmPackages_20.clang-tools.override { enableLibcxx = true; }) pkgs.mold pkgs.pkg-config - pkgs-unstable.python313 - pkgs-unstable.python313Packages.pip - pkgs-unstable.python313Packages.setuptools - pkgs-unstable.python313Packages.wheel + pkgs.python313 + pkgs.python313Packages.pip + pkgs.python313Packages.setuptools + pkgs.python313Packages.wheel pkgs.zlib.dev # for gltfpack - pkgs-unstable.meshoptimizer + pkgs.meshoptimizer # for SDL3 - pkgs-unstable.sdl3 + pkgs.sdl3 ]; shellHook = '' - export LD_LIBRARY_PATH=${pkgs-unstable.llvmPackages_20.libcxx}/lib:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=${pkgs.llvmPackages_20.libcxx}/lib:$LD_LIBRARY_PATH + # slang needs libstdc++ + export LD_LIBRARY_PATH=${pkgs.gcc14.cc.lib}/lib:$LD_LIBRARY_PATH ''; hardeningDisable = [ "all" ]; diff --git a/xmake/options.lua b/xmake/options.lua index 3158a491..3d28b3a7 100755 --- a/xmake/options.lua +++ b/xmake/options.lua @@ -7,3 +7,9 @@ option("profile") end option_end() +option("use_llvmpipe") + set_default(false) + set_description("Select CPU graphics device.") + add_defines("LR_USE_LLVMPIPE=1", { public = true }) +option_end() + diff --git a/xmake/packages.lua b/xmake/packages.lua index 0c84ea51..f08b456b 100755 --- a/xmake/packages.lua +++ b/xmake/packages.lua @@ -36,9 +36,9 @@ add_requires("simdutf v6.2.0") add_requires("simdjson v3.12.2") add_requires("unordered_dense v4.5.0") add_requires("tracy v0.11.1", { configs = { - tracy_enable = false, - on_demand = true, - callstack = true, + tracy_enable = has_config("profile"), + on_demand = has_config("profile"), + callstack = has_config("profile"), callstack_inlines = false, code_transfer = true, exit = true, @@ -54,14 +54,12 @@ add_requires("flecs v4.0.4") add_requires("libsdl3") add_requires("shader-slang v2025.12.1") -add_requires("vuk 2025.06.15", { configs = { +add_requires("vuk 2025.07.09", { configs = { debug_allocations = false, disable_exceptions = true, }, debug = is_mode("debug") }) -add_requires("meshoptimizer v0.22") -add_requires("ktx v4.4.0", { - -- debug = is_mode("debug") -}) +add_requires("meshoptimizer v0.24") +add_requires("ktx v4.4.0") add_requires("svector v1.0.3")