diff --git a/src/include/rasterizer.hpp b/src/include/rasterizer.hpp index cd0b349..8fa7cf0 100644 --- a/src/include/rasterizer.hpp +++ b/src/include/rasterizer.hpp @@ -33,41 +33,6 @@ class Rasterizer { std::vector Rasterize(const Vertex& v0, const Vertex& v1, const Vertex& v2); - /** - * @brief 非分配版本:将片段直接写入调用方提供的容器 - * - * 可选的裁剪区域为半开区间 [x0, x1) × [y0, y1) - * 用于 TBR:将光栅化限制在 tile 边界内,便于复用外部 scratch 容器 - * - * @param v0 三角形第一个顶点 - * @param v1 三角形第二个顶点 - * @param v2 三角形第三个顶点 - * @param x0 裁剪区域左边界(包含) - * @param y0 裁剪区域上边界(包含) - * @param x1 裁剪区域右边界(不包含) - * @param y1 裁剪区域下边界(不包含) - * @param out 输出片段容器 - */ - void RasterizeTo(const Vertex& v0, const Vertex& v1, const Vertex& v2, - int x0, int y0, int x1, int y1, - std::vector& out); - - /** - * @brief SoA 版本:按顶点索引从 SoA 读取三角形三顶点 - * @param soa 结构体数组格式的顶点数据 - * @param i0 三角形第一个顶点索引 - * @param i1 三角形第二个顶点索引 - * @param i2 三角形第三个顶点索引 - * @param x0 裁剪区域左边界(包含) - * @param y0 裁剪区域上边界(包含) - * @param x1 裁剪区域右边界(不包含) - * @param y1 裁剪区域下边界(不包含) - * @param out 输出片段容器 - */ - void RasterizeTo(const VertexSoA& soa, size_t i0, size_t i1, size_t i2, - int x0, int y0, int x1, int y1, - std::vector& out); - private: size_t width_, height_; diff --git a/src/include/renderer.h b/src/include/renderer.h index e11c93f..2acb7a9 100755 --- a/src/include/renderer.h +++ b/src/include/renderer.h @@ -36,9 +36,10 @@ namespace simple_renderer { * - DEFERRED: 延迟渲染(片段收集后再着色) */ enum class RenderingMode { - PER_TRIANGLE, //!< 逐三角形(triangle-major) - TILE_BASED, //!< 基于 tile(tile-major) - DEFERRED //!< 延迟渲染 + PER_TRIANGLE, //!< 逐三角形(triangle-major) + TILE_BASED, //!< 基于 tile(tile-major) + DEFERRED, //!< 延迟渲染 + TILE_BASED_DEFERRED //!< 基于 tile 的延迟着色(TBDR) }; /** diff --git a/src/include/renderers/tile_based_deferred_renderer.hpp b/src/include/renderers/tile_based_deferred_renderer.hpp new file mode 100644 index 0000000..3db269a --- /dev/null +++ b/src/include/renderers/tile_based_deferred_renderer.hpp @@ -0,0 +1,59 @@ +#ifndef SIMPLERENDER_SRC_INCLUDE_RENDERERS_TILE_BASED_DEFERRED_RENDERER_HPP_ +#define SIMPLERENDER_SRC_INCLUDE_RENDERERS_TILE_BASED_DEFERRED_RENDERER_HPP_ + +#include "renderers/renderer_base.hpp" +#include "renderers/tile_based_renderer.hpp" // 复用 TileTriangleRef / TileGridContext 定义 + +namespace simple_renderer { + +/** + * @brief 基于 Tile 的延迟渲染器(Tile‑Based Deferred Renderer, TBDR) + * + * 设计要点: + * - SoA 顶点布局 + 三角形分箱(binning)→ 与 TBR 一致; + * - 以 Tile 为并行单元,避免跨 Tile 写冲突; + * - 2‑Pass(同现代 TBDR 思路): + * 1) Z 预通过(深度决胜):仅更新每像素最小深度与胜出三角形索引,并缓存透视矫正重心; + * 2) 延迟着色:仅对胜者像素执行一次片元着色,写入 tile 局部缓冲,最后整 Tile 拷贝到全局。 + * + * 优势:显著减少overdraw场景中的无效着色(FragmentShader 调用次数近似等于胜出像素数)。 + */ +class TileBasedDeferredRenderer final : public RendererBase { + public: + TileBasedDeferredRenderer(size_t width, size_t height, size_t tile_size = 64) + : RendererBase(width, height), tile_size_(tile_size) {} + + bool Render(const Model& model, const Shader& shader, uint32_t* out_color) override; + + private: + void TriangleTileBinning(const Model& model, + const TileGridContext& grid, + std::vector>& tile_triangles); + + void ProcessTriangleForTileBinning(size_t tri_idx, bool count_only, + const Model& model, + const TileGridContext& grid, + std::vector& tile_counts, + std::vector>& tile_triangles); + + void RasterizeTileDeferred(size_t tile_id, + const std::vector& triangles, + const TileGridContext& grid, + float* tile_depth_buffer, uint32_t* tile_color_buffer, + std::unique_ptr& global_depth_buffer, + std::unique_ptr& global_color_buffer, + const Shader& shader, + uint64_t* out_tested, uint64_t* out_covered, + uint64_t* out_winners, uint64_t* out_shaded); + + private: + // 深度与颜色清除默认值(与 TBR 保持一致) + static constexpr float kDepthClear = 1.0f; + static constexpr uint32_t kColorClear = 0u; + + const size_t tile_size_; +}; + +} // namespace simple_renderer + +#endif // SIMPLERENDER_SRC_INCLUDE_RENDERERS_TILE_BASED_DEFERRED_RENDERER_HPP_ diff --git a/src/include/renderers/tile_based_renderer.hpp b/src/include/renderers/tile_based_renderer.hpp index da7970c..7baf2a8 100644 --- a/src/include/renderers/tile_based_renderer.hpp +++ b/src/include/renderers/tile_based_renderer.hpp @@ -103,7 +103,6 @@ class TileBasedRenderer final : public RendererBase { * @param soa 经过变换后的 SoA 顶点数据 * @param shader 着色器 * @param use_early_z 是否启用 Early‑Z - * @param scratch_fragments 可复用片段临时容器 */ void RasterizeTile(size_t tile_id, const std::vector &triangles, @@ -113,7 +112,6 @@ class TileBasedRenderer final : public RendererBase { std::unique_ptr &global_color_buffer, const Shader& shader, bool use_early_z, - std::vector* scratch_fragments, TileMaskStats* out_stats); private: diff --git a/src/include/shader.hpp b/src/include/shader.hpp index 8314f55..8bfe041 100644 --- a/src/include/shader.hpp +++ b/src/include/shader.hpp @@ -6,6 +6,7 @@ #include #include #include +#include #include "light.h" #include "material.hpp" @@ -14,7 +15,8 @@ namespace simple_renderer { using UniformValue = std::variant; + Matrix3f, Matrix4f, Material, Light, + std::vector>; inline constexpr size_t kSpecularLutResolution = 256; @@ -27,7 +29,7 @@ class UniformBuffer { std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v || - std::is_same_v, + std::is_same_v || std::is_same_v>, "Type not supported by UniformValue"); uniforms_[name] = value; } @@ -83,10 +85,10 @@ struct VertexUniformCache { }; struct FragmentUniformCache { - Light light{}; + std::vector lights{}; // 支持多光源 Vector3f camera_pos = Vector3f(0.0f); - Vector3f light_dir_normalized = Vector3f(0.0f); - bool has_light = false; + std::vector light_dirs_normalized{}; + bool has_lights = false; bool has_camera = false; bool derived_valid = false; }; @@ -142,6 +144,7 @@ class Shader { void UpdateMatrixCache(const std::string &name, const Matrix4f &value); void UpdateFragmentCache(const std::string &name, const Light &value); + void UpdateFragmentCache(const std::string &name, const std::vector &value); void UpdateFragmentCache(const std::string &name, const Vector3f &value); void RecalculateDerivedMatrices(); void RecalculateFragmentDerived(); @@ -155,6 +158,10 @@ class Shader { Color SampleTexture(const Texture &texture, const Vector2f &uv) const; Color ClampColor(const Color color) const; + + public: + // 便捷接口:设置多光源 + void SetLights(const std::vector& lights) { SetUniform("lights", lights); } }; uint8_t FloatToUint8_t(float val); diff --git a/src/rasterizer.cpp b/src/rasterizer.cpp index 04aa6b1..e241444 100644 --- a/src/rasterizer.cpp +++ b/src/rasterizer.cpp @@ -81,159 +81,6 @@ std::vector Rasterizer::Rasterize(const Vertex& v0, const Vertex& v1, return fragments; } -void Rasterizer::RasterizeTo(const Vertex& v0, const Vertex& v1, const Vertex& v2, - int x0, int y0, int x1, int y1, - std::vector& out) { - // 获取三角形的最小 box(屏幕空间) - const Vector4f p0 = v0.GetPosition(); - const Vector4f p1 = v1.GetPosition(); - const Vector4f p2 = v2.GetPosition(); - - Vector2f a(p0.x, p0.y); - Vector2f b(p1.x, p1.y); - Vector2f c(p2.x, p2.y); - - Vector2f bboxMin = Vector2f{std::min({a.x, b.x, c.x}), std::min({a.y, b.y, c.y})}; - Vector2f bboxMax = Vector2f{std::max({a.x, b.x, c.x}), std::max({a.y, b.y, c.y})}; - - // Clamp 到屏幕尺寸 - float minx = std::max(0.0f, bboxMin.x); - float miny = std::max(0.0f, bboxMin.y); - float maxx = std::min(float(width_ - 1), bboxMax.x); - float maxy = std::min(float(height_ - 1), bboxMax.y); - - // 与外部提供的裁剪区域相交(半开区间) -> 闭区间扫描 - int sx = std::max(x0, static_cast(std::floor(minx))); - int sy = std::max(y0, static_cast(std::floor(miny))); - int ex = std::min(x1 - 1, static_cast(std::floor(maxx))); - int ey = std::min(y1 - 1, static_cast(std::floor(maxy))); - if (sx > ex || sy > ey) return; - - for (int x = sx; x <= ex; ++x) { - for (int y = sy; y <= ey; ++y) { - auto [is_inside, bary] = GetBarycentricCoord( - Vector3f(p0.x, p0.y, p0.z), Vector3f(p1.x, p1.y, p1.z), Vector3f(p2.x, p2.y, p2.z), - Vector3f(static_cast(x), static_cast(y), 0)); - if (!is_inside) continue; - - // 透视矫正插值 - auto perspective_result = PerformPerspectiveCorrection( - p0.w, p1.w, p2.w, - p0.z, p1.z, p2.z, - bary); - - const Vector3f& corrected_bary = perspective_result.corrected_barycentric; - float z = perspective_result.interpolated_z; - - Fragment frag; // material 指针由调用方填写 - frag.screen_coord = {x, y}; - frag.normal = Interpolate(v0.GetNormal(), v1.GetNormal(), v2.GetNormal(), corrected_bary); - frag.uv = Interpolate(v0.GetTexCoords(), v1.GetTexCoords(), v2.GetTexCoords(), corrected_bary); - frag.color = InterpolateColor(v0.GetColor(), v1.GetColor(), v2.GetColor(), corrected_bary); - frag.depth = z; - - out.push_back(frag); - } - } -} - -void Rasterizer::RasterizeTo(const VertexSoA& soa, size_t i0, size_t i1, size_t i2, - int x0, int y0, int x1, int y1, - std::vector& out) { - // 读取三顶点的屏幕空间位置 - const Vector4f& p0 = soa.pos_screen[i0]; - const Vector4f& p1 = soa.pos_screen[i1]; - const Vector4f& p2 = soa.pos_screen[i2]; - - // 为BarycentricCoord预构造Vec3f,避免循环内重复构造 - const Vector3f sp0(p0.x, p0.y, p0.z); - const Vector3f sp1(p1.x, p1.y, p1.z); - const Vector3f sp2(p2.x, p2.y, p2.z); - - // 计算屏幕空间AABB包围盒 - const float minx_f = std::max(0.0f, std::min({p0.x, p1.x, p2.x})); - const float miny_f = std::max(0.0f, std::min({p0.y, p1.y, p2.y})); - const float maxx_f = std::min(float(width_ - 1), std::max({p0.x, p1.x, p2.x})); - const float maxy_f = std::min(float(height_ - 1), std::max({p0.y, p1.y, p2.y})); - - // 与外部提供的裁剪区域相交(半开区间) -> 闭区间扫描 - int sx = std::max(x0, static_cast(std::floor(minx_f))); - int sy = std::max(y0, static_cast(std::floor(miny_f))); - int ex = std::min(x1 - 1, static_cast(std::floor(maxx_f))); - int ey = std::min(y1 - 1, static_cast(std::floor(maxy_f))); - if (sx > ex || sy > ey) return; - - // 预计算边函数系数:E(x,y) = A*x + B*y + C - // 使用相对坐标的边函数定义,避免大常数项导致的数值不稳定 - // 如使用绝对形式Ax+By+C会由于常数C的量级过大,造成浮点抵消,有效位丢失不稳定 - auto cross2 = [](float ax, float ay, float bx, float by) { - return ax * by - ay * bx; - }; - // 边向量 - const float e01x = p1.x - p0.x, e01y = p1.y - p0.y; // (p0->p1) - const float e12x = p2.x - p1.x, e12y = p2.y - p1.y; // (p1->p2) - const float e20x = p0.x - p2.x, e20y = p0.y - p2.y; // (p2->p0) - - // 有向面积(两倍),用相对面积定义:area2 = cross(p1 - p0, p2 - p0) - float area2 = cross2(e01x, e01y, p2.x - p0.x, p2.y - p0.y); - if (std::abs(area2) < 1e-6f) return; // 退化三角形 - const float inv_area2 = 1.0f / area2; - const bool positive = (area2 > 0.0f); - - // 行优先遍历:有利于 cache 与向量化 - #pragma omp simd - for (int y = sy; y <= ey; ++y) { - const float yf = static_cast(y); - - // 注意:此处存在对 out.push_back 的写入,属于有副作用操作,不适合使用 - // omp simd 进行强制向量化,否则可能导致不符合预期的行为(如周期性伪影)。 - // 先保持标量内层,后续如切换为“直写像素回调”再考虑安全的 SIMD 化。 - for (int x = sx; x <= ex; ++x) { - const float xf = static_cast(x); - - // 相对坐标边函数: - // E01(p) = cross(p1 - p0, p - p0) - // E12(p) = cross(p2 - p1, p - p1) - // E20(p) = cross(p0 - p2, p - p2) - const float E01 = cross2(e01x, e01y, xf - p0.x, yf - p0.y); - const float E12 = cross2(e12x, e12y, xf - p1.x, yf - p1.y); - const float E20 = cross2(e20x, e20y, xf - p2.x, yf - p2.y); - - // 半空间测试(根据朝向选择符号) - const bool inside = positive ? (E01 >= 0.0f && E12 >= 0.0f && E20 >= 0.0f) - : (E01 <= 0.0f && E12 <= 0.0f && E20 <= 0.0f); - if (!inside) continue; - - // 重心权重映射: - // b0 对应 v0,取与对边 (v1,v2) 的子面积 → E12 - // b1 对应 v1 → E20 - // b2 对应 v2 → E01 - const float b0 = E12 * inv_area2; - const float b1 = E20 * inv_area2; - const float b2 = E01 * inv_area2; - const Vector3f bary(b0, b1, b2); - - // 透视矫正插值 - auto perspective_result = PerformPerspectiveCorrection( - p0.w, p1.w, p2.w, - p0.z, p1.z, p2.z, - bary); - - const Vector3f& corrected_bary = perspective_result.corrected_barycentric; - const float z = perspective_result.interpolated_z; - - Fragment frag; // Note: material 指针由调用方填写 - frag.screen_coord = {x, y}; - frag.normal = Interpolate(soa.normal[i0], soa.normal[i1], soa.normal[i2], corrected_bary); - frag.uv = Interpolate(soa.uv[i0], soa.uv[i1], soa.uv[i2], corrected_bary); - frag.color = InterpolateColor(soa.color[i0], soa.color[i1], soa.color[i2], corrected_bary); - frag.depth = z; - - out.push_back(frag); - } - } -} - std::pair Rasterizer::GetBarycentricCoord(const Vector3f& p0, const Vector3f& p1, const Vector3f& p2, diff --git a/src/renderer.cpp b/src/renderer.cpp index 0939cf5..d647e93 100644 --- a/src/renderer.cpp +++ b/src/renderer.cpp @@ -6,6 +6,7 @@ #include "renderers/per_triangle_renderer.hpp" #include "renderers/tile_based_renderer.hpp" #include "renderers/deferred_renderer.hpp" +#include "renderers/tile_based_deferred_renderer.hpp" namespace simple_renderer { @@ -14,6 +15,7 @@ std::string RenderingModeToString(RenderingMode mode) { case RenderingMode::PER_TRIANGLE: return "PER_TRIANGLE"; case RenderingMode::TILE_BASED: return "TILE_BASED"; case RenderingMode::DEFERRED: return "DEFERRED"; + case RenderingMode::TILE_BASED_DEFERRED: return "TILE_BASED_DEFERRED"; } return "PER_TRIANGLE"; } @@ -77,6 +79,11 @@ void SimpleRenderer::EnsureRenderer() { renderer_ = std::move(r); break; } + case RenderingMode::TILE_BASED_DEFERRED: { + auto r = std::make_unique(width_, height_, tbr_tile_size_); + renderer_ = std::move(r); + break; + } } } diff --git a/src/renderers/tile_based_deferred_renderer.cpp b/src/renderers/tile_based_deferred_renderer.cpp new file mode 100644 index 0000000..93abf9b --- /dev/null +++ b/src/renderers/tile_based_deferred_renderer.cpp @@ -0,0 +1,435 @@ +// +// Tile-Based Deferred Renderer (TBDR) +// ----------------------------------- +// 本文件实现 CPU 侧的基于 Tile 的延迟着色(TBDR)。整体思路与现代 TBDR 硬件类似: +// 1) 先将三角形按屏幕划分到 Tile(binning); +// 2) 对每个 Tile,进行“两阶段”光栅化: +// - 阶段A:仅进行深度决胜(Z 预通过)——找出每个像素的“胜出三角形”和其最小深度,并缓存透视矫正后的重心权重; +// - 阶段B:仅对胜出的像素执行一次片元着色(FragmentShader),写回 Tile 局部缓冲,然后整 Tile 拷贝到全局。 +// +// 与现有 TBR(Tile-Based 前向渲染)相比,TBDR 避免了“对被随后覆盖的像素进行无用的着色”,在overdraw较多时显著减少 +// FragmentShader 调用次数;同时保持 Tile‑major 的访问局部性与单份全局 frame buffer 的并发安全写回。 + +#include "renderers/tile_based_deferred_renderer.hpp" + +#include + +#include +#include +#include +#include +#include + +#include "config.h" +#include "log_system.h" + +namespace simple_renderer { + +bool TileBasedDeferredRenderer::Render(const Model& model, const Shader& shader_in, + uint32_t* buffer) { + auto total_start_time = std::chrono::high_resolution_clock::now(); + auto shader = std::make_shared(shader_in); + shader->PrepareUniformCaches(); + + // 顶点阶段(SoA) + // - 统一进行裁剪空间->NDC->屏幕空间的变换,并将结果写入 SoA。 + // - 此阶段与 TBR 完全一致。 + auto vertex_start = std::chrono::high_resolution_clock::now(); + const auto& input_vertices = model.GetVertices(); + VertexSoA soa; soa.resize(input_vertices.size()); + +#pragma omp parallel for num_threads(kNProc) schedule(static) shared(shader, soa, input_vertices) + for (size_t i = 0; i < input_vertices.size(); ++i) { + const auto& v = input_vertices[i]; + auto clip = shader->VertexShader(v); + soa.pos_clip[i] = clip.GetPosition(); + auto ndc = PerspectiveDivision(clip); + auto screen = ViewportTransformation(ndc); + soa.pos_screen[i] = screen.GetPosition(); + soa.normal[i] = screen.GetNormal(); + soa.uv[i] = screen.GetTexCoords(); + soa.color[i] = screen.GetColor(); + } + auto vertex_end = std::chrono::high_resolution_clock::now(); + double vertex_ms = std::chrono::duration_cast(vertex_end - vertex_start).count() / 1000.0; + + // Tile Binning + // - 将三角形按屏幕空间包围盒映射到 Tile 网格; + // - 使后续处理以 Tile 为并行单元,避免跨 Tile 写冲突; + // - 仍复用现有 TBR 的数据结构与Binning逻辑。 + auto setup_start = std::chrono::high_resolution_clock::now(); + const size_t TILE_SIZE = tile_size_ > 0 ? tile_size_ : 64; + const size_t tiles_x = (width_ + TILE_SIZE - 1) / TILE_SIZE; + const size_t tiles_y = (height_ + TILE_SIZE - 1) / TILE_SIZE; + const size_t total_tiles = tiles_x * tiles_y; + std::vector> tile_triangles(total_tiles); + auto setup_end = std::chrono::high_resolution_clock::now(); + double setup_ms = std::chrono::duration_cast(setup_end - setup_start).count() / 1000.0; + + auto bin_start = std::chrono::high_resolution_clock::now(); + TileGridContext grid_ctx{soa, tiles_x, tiles_y, TILE_SIZE}; + TriangleTileBinning(model, grid_ctx, tile_triangles); + auto bin_end = std::chrono::high_resolution_clock::now(); + double bin_ms = std::chrono::duration_cast(bin_end - bin_start).count() / 1000.0; + + // 全局 framebuffer(单份) + // - 每个 Tile 完成后,整行拷贝到这份全局缓冲; + // - 不同 Tile 不重叠,省去同步/锁开销。 + auto buf_alloc_start = std::chrono::high_resolution_clock::now(); + std::unique_ptr depthBuffer = std::make_unique(width_ * height_); + std::unique_ptr colorBuffer = std::make_unique(width_ * height_); + std::fill_n(depthBuffer.get(), width_ * height_, kDepthClear); + std::fill_n(colorBuffer.get(), width_ * height_, kColorClear); + auto buf_alloc_end = std::chrono::high_resolution_clock::now(); + double buf_alloc_ms = std::chrono::duration_cast(buf_alloc_end - buf_alloc_start).count() / 1000.0; + + // 并行按 tile 渲染:两阶段(Z 决胜 -> 着色) + // - OpenMP 以 Tile 为单位并行; + // - Tile 内先进行“Z 预通过”(不着色),再统一“按像素胜者着色”。 + auto raster_start = std::chrono::high_resolution_clock::now(); + std::vector tile_stats(total_tiles); + +#pragma omp parallel num_threads(kNProc) default(none) \ + shared(tile_triangles, grid_ctx, depthBuffer, colorBuffer, shader, total_tiles, tile_stats) + { + std::unique_ptr tile_depth_buffer = std::make_unique(grid_ctx.tile_size * grid_ctx.tile_size); + std::unique_ptr tile_color_buffer = std::make_unique(grid_ctx.tile_size * grid_ctx.tile_size); + +#pragma omp for schedule(static) + for (size_t tile_id = 0; tile_id < total_tiles; ++tile_id) { + uint64_t tested = 0, covered = 0, winners = 0, shaded = 0; + // 2-pass 的核心逻辑在 RasterizeTileDeferred 内: + // A) 仅计算覆盖与深度,确定每像素胜者(三角形索引)并缓存透视矫正重心; + // B) 对胜者像素一次性着色写回,最后整 Tile 拷贝到全局。 + RasterizeTileDeferred(tile_id, tile_triangles[tile_id], grid_ctx, + tile_depth_buffer.get(), tile_color_buffer.get(), + depthBuffer, colorBuffer, *shader, + &tested, &covered, &winners, &shaded); + tile_stats[tile_id].tested = tested; + tile_stats[tile_id].covered = covered; + tile_stats[tile_id].zpass = winners; // 在 TBDR 中 zpass≈winner 数 + tile_stats[tile_id].shaded = shaded; + } + } + auto raster_end = std::chrono::high_resolution_clock::now(); + double raster_ms = std::chrono::duration_cast(raster_end - raster_start).count() / 1000.0; + + // 汇总统计 + uint64_t sum_tested = 0, sum_covered = 0, sum_winners = 0, sum_shaded = 0; + for (const auto& s : tile_stats) { + sum_tested += s.tested; + sum_covered += s.covered; + sum_winners += s.zpass; + sum_shaded += s.shaded; + } + auto rate = [](uint64_t num, uint64_t den) -> double { return (den == 0) ? 0.0 : double(num) / double(den) * 100.0; }; + SPDLOG_DEBUG("TBDR Stats: tested={}, covered={} ({:.1f}%), winners={} ({:.1f}%), shaded={} ({:.1f}%)", + sum_tested, sum_covered, rate(sum_covered, sum_tested), + sum_winners, rate(sum_winners, sum_covered), + sum_shaded, rate(sum_shaded, sum_covered)); + + // 拷贝到输出 + auto present_start = std::chrono::high_resolution_clock::now(); + std::memcpy(buffer, colorBuffer.get(), width_ * height_ * sizeof(uint32_t)); + auto present_end = std::chrono::high_resolution_clock::now(); + double present_ms = std::chrono::duration_cast(present_end - present_start).count() / 1000.0; + + auto total_end_time = std::chrono::high_resolution_clock::now(); + double total_ms = std::chrono::duration_cast(total_end_time - total_start_time).count() / 1000.0; + + SPDLOG_DEBUG("=== TILE-BASED DEFERRED RENDERING PERFORMANCE ==="); + double sum_ms = vertex_ms + (total_ms - vertex_ms); + SPDLOG_DEBUG("Vertex Shader: {:8.3f} ms ({:5.1f}%)", vertex_ms, vertex_ms / sum_ms * 100); + SPDLOG_DEBUG("Setup: {:8.3f} ms", setup_ms); + SPDLOG_DEBUG("Binning: {:8.3f} ms", bin_ms); + SPDLOG_DEBUG("Buffer Alloc: {:8.3f} ms", buf_alloc_ms); + SPDLOG_DEBUG("Tile Raster: {:8.3f} ms", raster_ms); + SPDLOG_DEBUG("Copy: {:8.3f} ms", present_ms); + SPDLOG_DEBUG("Total: {:8.3f} ms", vertex_ms + (setup_ms + bin_ms + buf_alloc_ms + raster_ms + present_ms)); + SPDLOG_DEBUG("==============================================="); + + return true; +} + +void TileBasedDeferredRenderer::TriangleTileBinning( + const Model& model, const TileGridContext& grid, + std::vector>& tile_triangles) { + const size_t total_triangles = model.GetFaces().size(); + SPDLOG_DEBUG("Starting triangle-tile binning (SoA) for {} triangles", total_triangles); + SPDLOG_DEBUG("Screen dimensions: {}x{}, Tile size: {}, Tiles: {}x{}", width_, height_, grid.tile_size, grid.tiles_x, grid.tiles_y); + + std::vector tile_counts(grid.tiles_x * grid.tiles_y, 0); + for (size_t tri_idx = 0; tri_idx < total_triangles; ++tri_idx) { + ProcessTriangleForTileBinning(tri_idx, true, model, grid, tile_counts, tile_triangles); + } + for (size_t tile_id = 0; tile_id < tile_triangles.size(); ++tile_id) { + if (tile_counts[tile_id] > 0) tile_triangles[tile_id].reserve(tile_counts[tile_id]); + } + for (size_t tri_idx = 0; tri_idx < total_triangles; ++tri_idx) { + ProcessTriangleForTileBinning(tri_idx, false, model, grid, tile_counts, tile_triangles); + } + + size_t total_refs = 0, non_empty = 0; + for (const auto& v : tile_triangles) { total_refs += v.size(); if (!v.empty()) non_empty++; } + SPDLOG_DEBUG(" (SoA) Total triangle references: {}", total_refs); + SPDLOG_DEBUG(" (SoA) Non-empty tiles: {}", non_empty); + SPDLOG_DEBUG(" (SoA) Average triangles per tile: {:.2f}", total_refs > 0 ? float(total_refs) / tile_triangles.size() : 0.0f); +} + +void TileBasedDeferredRenderer::ProcessTriangleForTileBinning( + size_t tri_idx, bool count_only, const Model& model, const TileGridContext& grid, + std::vector& tile_counts, + std::vector>& tile_triangles) { + const auto& f = model.GetFaces()[tri_idx]; + size_t i0 = f.GetIndex(0), i1 = f.GetIndex(1), i2 = f.GetIndex(2); + + // 视锥体裁剪(裁剪空间保守裁剪) + const Vector4f &c0 = grid.soa.pos_clip[i0]; + const Vector4f &c1 = grid.soa.pos_clip[i1]; + const Vector4f &c2 = grid.soa.pos_clip[i2]; + bool frustum_cull = + (c0.x > c0.w && c1.x > c1.w && c2.x > c2.w) || + (c0.x < -c0.w && c1.x < -c0.w && c2.x < -c0.w) || + (c0.y > c0.w && c1.y > c1.w && c2.y > c2.w) || + (c0.y < -c0.w && c1.y < -c0.w && c2.y < -c0.w) || + (c0.z > c0.w && c1.z > c1.w && c2.z > c2.w) || + (c0.z < -c0.w && c1.z < -c0.w && c2.z < -c0.w); + if (frustum_cull) return; + + const Vector4f &pos0 = grid.soa.pos_screen[i0]; + const Vector4f &pos1 = grid.soa.pos_screen[i1]; + const Vector4f &pos2 = grid.soa.pos_screen[i2]; + + // 背面剔除(屏幕空间叉积) + Vector2f screen0(pos0.x, pos0.y), screen1(pos1.x, pos1.y), screen2(pos2.x, pos2.y); + Vector2f edge1 = screen1 - screen0, edge2 = screen2 - screen0; + float cross_product = edge1.x * edge2.y - edge1.y * edge2.x; + if (cross_product > 0.0f) return; + + // tile 覆盖范围 + float min_x = std::min({pos0.x, pos1.x, pos2.x}); + float max_x = std::max({pos0.x, pos1.x, pos2.x}); + float min_y = std::min({pos0.y, pos1.y, pos2.y}); + float max_y = std::max({pos0.y, pos1.y, pos2.y}); + + int start_tile_x = std::max(0, static_cast(min_x) / static_cast(grid.tile_size)); + int end_tile_x = std::min(static_cast(grid.tiles_x - 1), static_cast(max_x) / static_cast(grid.tile_size)); + int start_tile_y = std::max(0, static_cast(min_y) / static_cast(grid.tile_size)); + int end_tile_y = std::min(static_cast(grid.tiles_y - 1), static_cast(max_y) / static_cast(grid.tile_size)); + if (start_tile_x > end_tile_x || start_tile_y > end_tile_y) return; + + if (count_only) { + for (int ty = start_tile_y; ty <= end_tile_y; ++ty) + for (int tx = start_tile_x; tx <= end_tile_x; ++tx) + tile_counts[ty * grid.tiles_x + tx]++; + } else { + TileTriangleRef tri_ref{i0, i1, i2, &f.GetMaterial(), tri_idx}; + for (int ty = start_tile_y; ty <= end_tile_y; ++ty) + for (int tx = start_tile_x; tx <= end_tile_x; ++tx) + tile_triangles[ty * grid.tiles_x + tx].push_back(tri_ref); + } +} + +void TileBasedDeferredRenderer::RasterizeTileDeferred( + size_t tile_id, const std::vector& triangles, + const TileGridContext& grid, float* tile_depth_buffer, uint32_t* tile_color_buffer, + std::unique_ptr& global_depth_buffer, std::unique_ptr& global_color_buffer, + const Shader& shader, uint64_t* out_tested, uint64_t* out_covered, uint64_t* out_winners, uint64_t* out_shaded) { + // 计算本 Tile 覆盖的屏幕区域(半开区间对齐到闭区间扫描) + size_t tile_x = tile_id % grid.tiles_x; + size_t tile_y = tile_id / grid.tiles_x; + size_t screen_x_start = tile_x * grid.tile_size; + size_t screen_y_start = tile_y * grid.tile_size; + size_t screen_x_end = std::min(screen_x_start + grid.tile_size, width_); + size_t screen_y_end = std::min(screen_y_start + grid.tile_size, height_); + + size_t tile_width = screen_x_end - screen_x_start; + size_t tile_height = screen_y_end - screen_y_start; + + // 阶段缓冲:Z 最小、胜者三角形索引、重心缓存(b0c/b1c) + // - zmin:本 Tile 每像素的当前最小深度; + // - winner:本 Tile 每像素的“胜出三角形”的局部索引(-1 表示尚未命中任何三角形); + // - b0c/b1c:缓存透视矫正后的重心权重(b2c = 1 - b0c - b1c),用于阶段B避免重复计算。 + std::vector zmin(tile_width * tile_height, kDepthClear); + std::vector winner(tile_width * tile_height, -1); + std::vector b0c(tile_width * tile_height, 0.0f); + std::vector b1c(tile_width * tile_height, 0.0f); + + // 初始化 tile 局部 color/depth 缓冲 + std::fill_n(tile_depth_buffer, tile_width * tile_height, kDepthClear); + std::fill_n(tile_color_buffer, tile_width * tile_height, kColorClear); + + constexpr int kLane = 8; + uint64_t tested_pixels = 0, covered_pixels = 0, winner_pixels = 0, shaded_pixels = 0; + + auto cross2 = [](float ax, float ay, float bx, float by) { return ax * by - ay * bx; }; + + // 阶段 A:Z 决胜(仅更新 zmin / winner / b0c/b1c) + // - 使用边函数进行半空间内点测试,行优先 + kLane 批处理,利于 cache 与自动向量化; + // - 对覆盖像素进行透视矫正重心计算(先插 1/w,再还原权重),并据此插值 z; + // - 若 z 更小,则更新该像素的胜者信息与缓存的重心;此阶段不执行着色。 + for (const auto& tri : triangles) { + const size_t i0 = tri.i0, i1 = tri.i1, i2 = tri.i2; + const Vector4f &p0 = grid.soa.pos_screen[i0]; + const Vector4f &p1 = grid.soa.pos_screen[i1]; + const Vector4f &p2 = grid.soa.pos_screen[i2]; + + // 屏幕空间 AABB 与 tile 相交 + const float tri_minx = std::min({p0.x, p1.x, p2.x}); + const float tri_miny = std::min({p0.y, p1.y, p2.y}); + const float tri_maxx = std::max({p0.x, p1.x, p2.x}); + const float tri_maxy = std::max({p0.y, p1.y, p2.y}); + + int sx = std::max(static_cast(screen_x_start), static_cast(std::floor(std::max(0.0f, tri_minx)))); + int sy = std::max(static_cast(screen_y_start), static_cast(std::floor(std::max(0.0f, tri_miny)))); + int ex = std::min(static_cast(screen_x_end - 1), static_cast(std::floor(std::min(width_ - 1, tri_maxx)))); + int ey = std::min(static_cast(screen_y_end - 1), static_cast(std::floor(std::min(height_ - 1, tri_maxy)))); + if (sx > ex || sy > ey) continue; + + // 边向量、面积及朝向 + const float e01x = p1.x - p0.x, e01y = p1.y - p0.y; + const float e12x = p2.x - p1.x, e12y = p2.y - p1.y; + const float e20x = p0.x - p2.x, e20y = p0.y - p2.y; + // 面积 area2 = cross(p1 - p0, p2 - p0);用于重心计算与正负朝向判别。 + const float area2 = cross2(e01x, e01y, p2.x - p0.x, p2.y - p0.y); + if (std::abs(area2) < 1e-6f) continue; + const bool positive = (area2 > 0.0f); + + // 深度与 1/w 插值准备 + // 透视校正思路:在屏幕空间中 1/w 线性,先插值 1/w,再将各顶点属性乘以 1/w 并归一。 + const float z0 = p0.z, z1 = p1.z, z2 = p2.z; + const float w0_inv = 1.0f / p0.w, w1_inv = 1.0f / p1.w, w2_inv = 1.0f / p2.w; + + for (int y = sy; y <= ey; ++y) { + const float yf = static_cast(y); + float E01_base = cross2(e01x, e01y, static_cast(sx) - p0.x, yf - p0.y); + float E12_base = cross2(e12x, e12y, static_cast(sx) - p1.x, yf - p1.y); + float E20_base = cross2(e20x, e20y, static_cast(sx) - p2.x, yf - p2.y); + const float dE01dx = -e01y; + const float dE12dx = -e12y; + const float dE20dx = -e20y; + + // 行扫描 + kLane 批处理:利于 cache 与自动向量化 + for (int xb = sx; xb <= ex; xb += kLane) { + const int lane = std::min(kLane, ex - xb + 1); + float E01[kLane], E12[kLane], E20[kLane]; +#pragma omp simd + for (int j = 0; j < lane; ++j) { + E01[j] = E01_base + dE01dx * static_cast(xb - sx + j); + E12[j] = E12_base + dE12dx * static_cast(xb - sx + j); + E20[j] = E20_base + dE20dx * static_cast(xb - sx + j); + } + + // 内点测试,如果三角形在像素内,则将该像素加入覆盖掩码 + unsigned mask_cover = 0u; int cover_count = 0; + for (int j = 0; j < lane; ++j) { + bool inside = positive ? (E01[j] >= 0.0f && E12[j] >= 0.0f && E20[j] >= 0.0f) + : (E01[j] <= 0.0f && E12[j] <= 0.0f && E20[j] <= 0.0f); + if (inside) { mask_cover |= (1u << j); ++cover_count; } + } + tested_pixels += static_cast(lane); + covered_pixels += static_cast(cover_count); + if (mask_cover == 0u) continue; + + for (int j = 0; j < lane; ++j) { + if (((mask_cover >> j) & 1u) == 0u) continue; + const float b0 = E12[j] / area2; + const float b1 = E20[j] / area2; + const float b2 = E01[j] / area2; + const float w_inv = b0 * w0_inv + b1 * w1_inv + b2 * w2_inv; // 透视校正 + const float b0c_ = (b0 * w0_inv) / w_inv; + const float b1c_ = (b1 * w1_inv) / w_inv; + const float b2c_ = (b2 * w2_inv) / w_inv; + const float z = z0 * b0c_ + z1 * b1c_ + z2 * b2c_; + + const int sx_pix = xb + j; + const int local_x = sx_pix - static_cast(screen_x_start); + const int local_y = y - static_cast(screen_y_start); + const size_t idx = static_cast(local_x + local_y * static_cast(tile_width)); + // 用极小 epsilon 防止抖动 + if (z < zmin[idx] - 1e-8f) { + if (winner[idx] < 0) winner_pixels++; + zmin[idx] = z; + // 记录本 Tile 内的“局部三角形索引”,便于阶段B无需再次查找 + winner[idx] = static_cast(&tri - &triangles[0]); + b0c[idx] = b0c_; + b1c[idx] = b1c_; + } + } + } + } + } + + // 阶段 B:仅对胜者像素着色并写入 tile 局部缓冲 + // - 对于 winner[idx] >= 0 的像素,从 SoA 插值 normal/uv/color,构造 Fragment; + // - 每像素仅进行一次 FragmentShader 调用,随后写回 tile 局部 color/depth。 + for (size_t y = 0; y < tile_height; ++y) { + for (size_t x = 0; x < tile_width; ++x) { + const size_t idx = x + y * tile_width; + int32_t win = winner[idx]; + if (win < 0) continue; + + const auto& tri = triangles[static_cast(win)]; + const size_t i0 = tri.i0, i1 = tri.i1, i2 = tri.i2; + const float b0c_ = b0c[idx]; + const float b1c_ = b1c[idx]; + const float b2c_ = 1.0f - b0c_ - b1c_; + + Fragment frag; + frag.screen_coord = {static_cast(screen_x_start + x), static_cast(screen_y_start + y)}; + frag.depth = zmin[idx]; + frag.material = tri.material; + + // 插值属性 + const Vector3f &n0 = grid.soa.normal[i0]; + const Vector3f &n1 = grid.soa.normal[i1]; + const Vector3f &n2 = grid.soa.normal[i2]; + frag.normal = n0 * b0c_ + n1 * b1c_ + n2 * b2c_; + + const Vector2f &uv0 = grid.soa.uv[i0]; + const Vector2f &uv1 = grid.soa.uv[i1]; + const Vector2f &uv2 = grid.soa.uv[i2]; + frag.uv = uv0 * b0c_ + uv1 * b1c_ + uv2 * b2c_; + + const Color &c0 = grid.soa.color[i0]; + const Color &c1 = grid.soa.color[i1]; + const Color &c2 = grid.soa.color[i2]; + auto color_r = FloatToUint8_t(static_cast(c0[Color::kColorIndexRed]) * b0c_ + + static_cast(c1[Color::kColorIndexRed]) * b1c_ + + static_cast(c2[Color::kColorIndexRed]) * b2c_); + auto color_g = FloatToUint8_t(static_cast(c0[Color::kColorIndexGreen]) * b0c_ + + static_cast(c1[Color::kColorIndexGreen]) * b1c_ + + static_cast(c2[Color::kColorIndexGreen]) * b2c_); + auto color_b = FloatToUint8_t(static_cast(c0[Color::kColorIndexBlue]) * b0c_ + + static_cast(c1[Color::kColorIndexBlue]) * b1c_ + + static_cast(c2[Color::kColorIndexBlue]) * b2c_); + frag.color = Color(color_r, color_g, color_b); + + auto out_color = shader.FragmentShader(frag); + tile_depth_buffer[idx] = frag.depth; + tile_color_buffer[idx] = uint32_t(out_color); + shaded_pixels++; + } + } + + // 写回全局缓冲(tile 行拷贝) + // 不同 Tile 区域不重叠,行拷贝无需锁 + for (size_t y = 0; y < tile_height; ++y) { + const size_t tile_row_off = y * tile_width; + const size_t global_row_off = (screen_y_start + y) * width_ + screen_x_start; + // 将局部 tile 的 color/depth 复制到全局 framebuffer 中对应位置。 + std::memcpy(global_color_buffer.get() + global_row_off, + tile_color_buffer + tile_row_off, + tile_width * sizeof(uint32_t)); + std::memcpy(global_depth_buffer.get() + global_row_off, + tile_depth_buffer + tile_row_off, + tile_width * sizeof(float)); + } + + if (out_tested) *out_tested = tested_pixels; + if (out_covered) *out_covered = covered_pixels; + if (out_winners) *out_winners = winner_pixels; + if (out_shaded) *out_shaded = shaded_pixels; +} + +} // namespace simple_renderer diff --git a/src/renderers/tile_based_renderer.cpp b/src/renderers/tile_based_renderer.cpp index e39526e..e1c63e7 100644 --- a/src/renderers/tile_based_renderer.cpp +++ b/src/renderers/tile_based_renderer.cpp @@ -100,10 +100,6 @@ bool TileBasedRenderer::Render(const Model &model, const Shader &shader_in, std::unique_ptr tile_color_buffer = std::make_unique(grid_ctx.tile_size * grid_ctx.tile_size); - // 为每个 tile 分配可复用片段临时容器,容量按单 tile 上限预估 - std::vector scratch_fragments; - scratch_fragments.reserve(grid_ctx.tile_size * grid_ctx.tile_size); - #pragma omp for schedule(static) for (size_t tile_id = 0; tile_id < total_tiles; ++tile_id) { // 按照 tile 进行光栅化(SoA) @@ -111,7 +107,7 @@ bool TileBasedRenderer::Render(const Model &model, const Shader &shader_in, RasterizeTile(tile_id, tile_triangles[tile_id], grid_ctx, tile_depth_buffer.get(), tile_color_buffer.get(), depthBuffer, colorBuffer, *shader, early_z_, - &scratch_fragments, &tile_stats[tile_id]); + &tile_stats[tile_id]); } } auto raster_end = std::chrono::high_resolution_clock::now(); @@ -129,7 +125,7 @@ bool TileBasedRenderer::Render(const Model &model, const Shader &shader_in, sum_shaded += s.shaded; } auto rate = [](uint64_t num, uint64_t den) -> double { - if (den == 0) return 0.0; return double(num) / double(den) * 100.0; + return (den == 0)?0.0:double(num) / double(den) * 100.0; }; SPDLOG_DEBUG( "TBR Mask Stats: tested={}, covered={} ({:.1f}%), zpass={} ({:.1f}%), shaded={} ({:.1f}%)", @@ -220,7 +216,6 @@ void TileBasedRenderer::RasterizeTile( uint32_t *tile_color_buffer, std::unique_ptr &global_depth_buffer, std::unique_ptr &global_color_buffer, const Shader &shader, bool use_early_z, - std::vector *scratch_fragments, TileMaskStats* out_stats) { // 计算 tile 屏幕范围 size_t tile_x = tile_id % grid.tiles_x; diff --git a/src/shader.cpp b/src/shader.cpp index 06ab241..708b419 100644 --- a/src/shader.cpp +++ b/src/shader.cpp @@ -136,13 +136,23 @@ void Shader::RecalculateDerivedMatrices() { void Shader::UpdateFragmentCache(const std::string& name, const Light& value) { - if (name != "light") { - return; + if (name != "light") { return; } + fragment_uniform_cache_.lights.clear(); + fragment_uniform_cache_.lights.push_back(value); + fragment_uniform_cache_.has_lights = true; + fragment_uniform_cache_.derived_valid = false; + if (fragment_uniform_cache_.has_lights && fragment_uniform_cache_.has_camera) { + RecalculateFragmentDerived(); } - fragment_uniform_cache_.light = value; - fragment_uniform_cache_.has_light = true; +} + +void Shader::UpdateFragmentCache(const std::string& name, + const std::vector& value) { + if (name != "lights") { return; } + fragment_uniform_cache_.lights = value; + fragment_uniform_cache_.has_lights = true; fragment_uniform_cache_.derived_valid = false; - if (fragment_uniform_cache_.has_light && fragment_uniform_cache_.has_camera) { + if (fragment_uniform_cache_.has_lights && fragment_uniform_cache_.has_camera) { RecalculateFragmentDerived(); } } @@ -155,14 +165,18 @@ void Shader::UpdateFragmentCache(const std::string& name, fragment_uniform_cache_.camera_pos = value; fragment_uniform_cache_.has_camera = true; fragment_uniform_cache_.derived_valid = false; - if (fragment_uniform_cache_.has_light && fragment_uniform_cache_.has_camera) { + if (fragment_uniform_cache_.has_lights && fragment_uniform_cache_.has_camera) { RecalculateFragmentDerived(); } } void Shader::RecalculateFragmentDerived() { - fragment_uniform_cache_.light_dir_normalized = - glm::normalize(fragment_uniform_cache_.light.dir); + fragment_uniform_cache_.light_dirs_normalized.clear(); + fragment_uniform_cache_.light_dirs_normalized.reserve( + fragment_uniform_cache_.lights.size()); + for (const auto& l : fragment_uniform_cache_.lights) { + fragment_uniform_cache_.light_dirs_normalized.push_back(glm::normalize(l.dir)); + } fragment_uniform_cache_.derived_valid = true; } @@ -196,13 +210,27 @@ void Shader::PrepareFragmentUniformCache() { if (fragment_uniform_cache_.derived_valid) { return; } + // 优先多光源 + if (uniformbuffer_.HasUniform>("lights") && + uniformbuffer_.HasUniform("cameraPos")) { + fragment_uniform_cache_.lights = + uniformbuffer_.GetUniform>("lights"); + fragment_uniform_cache_.has_lights = true; + fragment_uniform_cache_.camera_pos = + uniformbuffer_.GetUniform("cameraPos"); + fragment_uniform_cache_.has_camera = true; + RecalculateFragmentDerived(); + return; + } + // 兼容单光源 if (uniformbuffer_.HasUniform("light") && uniformbuffer_.HasUniform("cameraPos")) { - fragment_uniform_cache_.light = - uniformbuffer_.GetUniform("light"); + fragment_uniform_cache_.lights.clear(); + fragment_uniform_cache_.lights.push_back( + uniformbuffer_.GetUniform("light")); fragment_uniform_cache_.camera_pos = uniformbuffer_.GetUniform("cameraPos"); - fragment_uniform_cache_.has_light = true; + fragment_uniform_cache_.has_lights = true; fragment_uniform_cache_.has_camera = true; RecalculateFragmentDerived(); } @@ -259,59 +287,81 @@ auto Shader::EvaluateSpecular(float cos_theta, float shininess) const -> float { } Color Shader::FragmentShader(const Fragment& fragment) const { - // interpolate Normal, Color and UV - Color interpolateColor = fragment.color; + // Helper: 将 Color 转为 [0,1] 归一化向量 + auto color_to_vec = [](const Color& c) -> Vector3f { + constexpr float inv255 = 1.0f / 255.0f; + return Vector3f(static_cast(c[Color::kColorIndexRed]) * inv255, + static_cast(c[Color::kColorIndexGreen]) * inv255, + static_cast(c[Color::kColorIndexBlue]) * inv255); + }; + + // 输入插值属性 + Vector3f base_color = color_to_vec(fragment.color); Vector3f normal = glm::normalize(fragment.normal); Vector2f uv = fragment.uv; - // uniform - Light light; - Vector3f light_dir; + // uniform(优先缓存) + std::vector lights; + std::vector light_dirs; Vector3f camera_pos; if (fragment_uniform_cache_.derived_valid) { - light = fragment_uniform_cache_.light; - light_dir = fragment_uniform_cache_.light_dir_normalized; + lights = fragment_uniform_cache_.lights; + light_dirs = fragment_uniform_cache_.light_dirs_normalized; camera_pos = fragment_uniform_cache_.camera_pos; } else { - light = uniformbuffer_.GetUniform("light"); + if (uniformbuffer_.HasUniform>("lights")) { + lights = uniformbuffer_.GetUniform>("lights"); + light_dirs.reserve(lights.size()); + for (const auto& l : lights) light_dirs.push_back(glm::normalize(l.dir)); + } else if (uniformbuffer_.HasUniform("light")) { + lights = {uniformbuffer_.GetUniform("light")}; + light_dirs = {glm::normalize(lights[0].dir)}; + } camera_pos = uniformbuffer_.GetUniform("cameraPos"); - light_dir = glm::normalize(light.dir); } + Material material = *fragment.material; - // view direction - Vector3f view_dir = - glm::normalize(sharedDataInShader_.fragPos_varying - camera_pos); + // 视线方向 + Vector3f view_dir = glm::normalize(sharedDataInShader_.fragPos_varying - camera_pos); - auto intensity = std::max(glm::dot(normal, light_dir), 0.0f); - // texture color - Color ambient_color, diffuse_color, specular_color; + // ambient(只计算一次,使用纹理或顶点颜色) + Vector3f ambient_rgb; if (material.has_ambient_texture) { - Color texture_color = SampleTexture(material.ambient_texture, uv); - ambient_color = texture_color; + ambient_rgb = color_to_vec(SampleTexture(material.ambient_texture, uv)); } else { - ambient_color = interpolateColor; + ambient_rgb = base_color; } - if (material.has_diffuse_texture) { - Color texture_color = SampleTexture(material.diffuse_texture, uv); - diffuse_color = texture_color * intensity; - } else { - diffuse_color = interpolateColor * intensity; - } - - Vector3f halfVector = glm::normalize(light_dir + view_dir); - float cos_theta = std::max(glm::dot(normal, halfVector), 0.0f); - float spec = EvaluateSpecular(cos_theta, material.shininess); - if (material.has_specular_texture) { - Color texture_color = SampleTexture(material.specular_texture, uv); - specular_color = texture_color * spec; - } else { - specular_color = Color(1.0f, 1.0f, 1.0f) * spec; + // diffuse/specular 累加(float 归一化空间,避免 8bit 溢出与截断) + Vector3f diffuse_accum(0.0f); + Vector3f specular_accum(0.0f); + for (size_t i = 0; i < light_dirs.size(); ++i) { + const Vector3f& ldir = light_dirs[i]; + float intensity = std::max(glm::dot(normal, ldir), 0.0f); + + // diffuse + Vector3f kd = material.has_diffuse_texture + ? color_to_vec(SampleTexture(material.diffuse_texture, uv)) + : base_color; + diffuse_accum += kd * intensity; + + // specular + Vector3f halfVector = glm::normalize(ldir + view_dir); + float cos_theta = std::max(glm::dot(normal, halfVector), 0.0f); + float spec = EvaluateSpecular(cos_theta, material.shininess); + Vector3f ks = material.has_specular_texture + ? color_to_vec(SampleTexture(material.specular_texture, uv)) + : Vector3f(1.0f); + specular_accum += ks * spec; } - return ClampColor(ambient_color * 0.1f + diffuse_color + - specular_color * 0.2f); + Vector3f out_rgb = ambient_rgb * 0.1f + diffuse_accum + specular_accum * 0.2f; + // clamp 到 [0,1] + out_rgb.x = std::clamp(out_rgb.x, 0.0f, 1.0f); + out_rgb.y = std::clamp(out_rgb.y, 0.0f, 1.0f); + out_rgb.z = std::clamp(out_rgb.z, 0.0f, 1.0f); + return Color(out_rgb.x, out_rgb.y, out_rgb.z, 1.0f); } // 将浮点数转换为 uint8_t diff --git a/test/system_test/main.cpp b/test/system_test/main.cpp index d6491d9..6ec3502 100755 --- a/test/system_test/main.cpp +++ b/test/system_test/main.cpp @@ -74,9 +74,14 @@ int main(int argc, char **argv) { simple_renderer::Shader shader; shader.SetUniform("modelMatrix", modelMatrix); - simple_renderer::Light light; - light.dir = simple_renderer::Vector3f(1.0f, 5.0f, 1.0f); - shader.SetUniform("light", light); + // 多光源 + std::vector lights; + { + simple_renderer::Light l0; l0.dir = simple_renderer::Vector3f( 1.0f, 5.0f, 1.0f); lights.push_back(l0); + simple_renderer::Light l1; l1.dir = simple_renderer::Vector3f(-3.0f, -2.0f, 2.0f); lights.push_back(l1); + simple_renderer::Light l2; l2.dir = simple_renderer::Vector3f( 2.0f, 1.0f, -1.0f); lights.push_back(l2); + } + shader.SetLights(lights); simple_renderer::Camera camera(simple_renderer::Vector3f(0.0f, 0.0f, 1.0f));