diff --git a/PBR/Render/shared/core_renderer.hpp b/PBR/Render/shared/core_renderer.hpp index c0c5ef5..2800ff2 100644 --- a/PBR/Render/shared/core_renderer.hpp +++ b/PBR/Render/shared/core_renderer.hpp @@ -27,6 +27,104 @@ fgt_device_gpu inline fungt::Vec3 sampleHemisphere(const fungt::Vec3& normal, fu return (tangent * xs + bitangent * ys + normal * zs).normalize(); +} +fgt_device_gpu fungt::Vec3 pathTracer_CookTorrance( + const fungt::Ray& initialRay, + const Triangle* tris, + const BVHNode* nodes, + const Light* lights, + const TextureDeviceObject* textures, + int numOfTextures, + int numOfTriangles, + int numOfNodes, + int numOfLights, + fungt::RNG& fgtRng) +{ + fungt::Vec3 throughput(1.0f, 1.0f, 1.0f); + fungt::Vec3 radiance(0.0f, 0.0f, 0.0f); + fungt::Ray currRay = initialRay; + + for (int bounce = 0; bounce < 6; ++bounce) { + HitData hit; + //bool hitAny = traceRay(currRay, tris, numOfTriangles,textures, hit); + bool hitAny = traceRayBVH(currRay, tris, nodes, numOfNodes, textures, hit); + + if (!hitAny) { + radiance += throughput * skyColor(currRay); + break; + } + + fungt::Vec3 N = hit.normal.normalize(); + fungt::Vec3 V = (currRay.m_dir * (-1.0f)).normalize(); + + // Extract material properties + fungt::Vec3 baseColor = fungt::Vec3(hit.material.baseColor[0], + hit.material.baseColor[1], + hit.material.baseColor[2]); + float metallic = fmaxf(0.0f, fminf(hit.material.metallic, 1.0f)); + float roughness = fmaxf(0.05f, fminf(hit.material.roughness, 1.0f)); + fungt::Vec3 dielectricF0 = fungt::Vec3(hit.material.reflectance, + hit.material.reflectance, + hit.material.reflectance); + fungt::Vec3 F0 = lerp(dielectricF0, baseColor, metallic); + + // Add emission if any + if (hit.material.emission > 0.0f) { + radiance += throughput * baseColor * hit.material.emission; + } + + // Direct lighting from all lights + fungt::Vec3 directLight(0.0f); + for (int l = 0; l < numOfLights; ++l) { + fungt::Vec3 toLight = lights[l].m_pos - hit.point; + float dist = toLight.length(); + fungt::Vec3 L = toLight / dist; + + // Shadow test + fungt::Ray shadowRay(hit.point + hit.geometricNormal * 0.001f, L); + HitData temp; + //bool occluded = traceRay(shadowRay, tris, numOfTriangles,textures, temp) && temp.dis < dist; + bool occluded = traceRayBVH(shadowRay, tris, nodes, numOfNodes, textures, temp) && temp.dis < dist; + if (occluded) continue; + + // Light intensity with inverse square falloff + fungt::Vec3 lightRadiance = lights[l].m_intensity / (dist * dist + 1e-6f); + + // Evaluate BRDF + directLight += evaluateCookTorrance(N, V, L, hit.material, lightRadiance); + } + + radiance += throughput * directLight; + + // Prepare indirect bounce - sample diffuse hemisphere + fungt::Vec3 newDir = sampleHemisphere(N, fgtRng); + //fungt::Vec3 newDir = sampleHemisphere(N, rng); + //float cosTheta = fmaxf(newDir.dot(N), 0.0f); + + // Update throughput for next bounce + // kD is the diffuse component (energy NOT reflected by Fresnel) + fungt::Vec3 avgF = F_Schlick(F0, fmaxf(V.dot(N), 0.0f)); + fungt::Vec3 kD = (fungt::Vec3(1.0f, 1.0f, 1.0f) - avgF) * (1.0f - metallic); + + // For diffuse sampling: BRDF = kD * baseColor / PI + // PDF = cosTheta / PI + // throughput *= BRDF * cosTheta / PDF = (kD * baseColor / PI) * cosTheta / (cosTheta / PI) + // Simplifies to: throughput *= kD * baseColor + throughput = throughput * (kD * baseColor); + + currRay = fungt::Ray(hit.point + N * 0.001f, newDir); + + // Russian roulette termination + if (bounce > 2) { + float maxComponent = fmaxf(throughput.x, fmaxf(throughput.y, throughput.z)); + float p = fminf(0.95f, maxComponent); + //if (randomFloat(rng) > p) break; + if (fgtRng.nextFloat() > p) break; + throughput = throughput / p; + } + } + + return radiance; } fgt_device_gpu bool inline traceRayBVH( const fungt::Ray& ray, diff --git a/PBR/Render/src/cuda_renderer.cu b/PBR/Render/src/cuda_renderer.cu index 3947d1f..aeab299 100644 --- a/PBR/Render/src/cuda_renderer.cu +++ b/PBR/Render/src/cuda_renderer.cu @@ -180,7 +180,6 @@ fgt_device_gpu fungt::Vec3 pathTracer_CookTorrance( int numOfTriangles, int numOfNodes, int numOfLights, - curandState* rng, fungt::RNG &fgtRng) { fungt::Vec3 throughput(1.0f, 1.0f, 1.0f); @@ -269,7 +268,19 @@ fgt_device_gpu fungt::Vec3 pathTracer_CookTorrance( return radiance; } +fgt_global void finalize_kernel( + fungt::Vec3* framebuffer, + int width, + int height, + int totalSamples +) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + if (x >= width || y >= height) return; + int idx = y * width + x; + framebuffer[idx] = framebuffer[idx] / float(totalSamples); +} fgt_global void render_kernel( fungt::Vec3* framebuffer, const Triangle* triangles, @@ -293,9 +304,10 @@ fgt_global void render_kernel( int idx = y * width + x; fungt::RNG rng(idx * 1337ULL + 123ULL); + //fungt::RNG rng(idx * 1337ULL + samplesPerPixel * 7919ULL); - curandState randomState; - curand_init(seed + idx, 0, 0, &randomState); + //curandState randomState; + //curand_init(seed + idx, 0, 0, &randomState); //fungt::Vec3 pixelColor(0.0f, 0.0f, 0.0f); @@ -338,14 +350,13 @@ fgt_global void render_kernel( pixel += pathTracer_CookTorrance(ray, triangles,nodes, lights, textures,numTextures, numOfTriangles, - numOfNodes, numOfLights, &randomState,rng); + numOfNodes, numOfLights,rng); } pixel = pixel / float(samplesPerPixel); + //framebuffer[idx] = framebuffer[idx] + pixel; framebuffer[idx] = fungt::Vec3(pixel.x, pixel.y, pixel.z); - - } std::vector CUDA_Renderer::RenderScene( int width, int height, @@ -399,25 +410,27 @@ std::vector CUDA_Renderer::RenderScene( std::cout << "WARNING: CUDA Textures ptr is NUL " << std::endl; } - - render_kernel << > > ( - device_buff, - device_Tlist, - device_bvhNode, - device_lights, - m_textureObj, - m_numTextures, - int(triangleList.size()), - int(nodes.size()), - int(lightsList.size()), - width, - height, - camera, - samplesPerPixel, - seed - ); - - + //for (int sample = 0; sample> > ( + device_buff, + device_Tlist, + device_bvhNode, + device_lights, + m_textureObj, + m_numTextures, + int(triangleList.size()), + int(nodes.size()), + int(lightsList.size()), + width, + height, + camera, + samplesPerPixel, + seed + ); + + //} + //(cudaDeviceSynchronize()); //Wait for all samples + //finalize_kernel << > > (device_buff, width, height, samplesPerPixel); CUDA_CHECK(cudaGetLastError()); CUDA_CHECK(cudaDeviceSynchronize());