fix sign: adhere to mathematical definitions and comply with glsl standards

xiaomx32 · xiaomx32 · commit 52ace2bdd30b · 2026-03-16T20:05:26.000+08:00
diff --git a/include/luisa/core/mathematics.h b/include/luisa/core/mathematics.h
@@ -12,7 +12,7 @@ namespace luisa {
 
 /**
  * @brief Find next 2^n of v
- * 
+ *
  * @tparam uint32 or uint64
  * @param v input number
  * @return same as v
@@ -695,7 +695,9 @@ template<size_t N>
     return scaling(make_float3(s));
 }
 
-[[nodiscard]] constexpr auto sign(float x) noexcept { return x < 0.f ? -1.f : 1.f; }
+[[nodiscard]] constexpr auto sign(float x) noexcept {
+    return static_cast<float>(x > 0.0f) - static_cast<float>(x < 0.0f);
+}
 
 [[nodiscard]] constexpr auto sign(float2 v) noexcept {
     return make_float2(sign(v.x), sign(v.y));
@@ -755,7 +757,9 @@ template<size_t N>
     return scaling(make_double3(s));
 }
 
-[[nodiscard]] constexpr auto sign(double x) noexcept { return x < 0. ? -1. : 1.; }
+[[nodiscard]] constexpr auto sign(double x) noexcept {
+    return static_cast<double>(x > 0.0) - static_cast<double>(x < 0.0);
+}
 
 [[nodiscard]] constexpr auto sign(double2 v) noexcept {
     return make_double2(sign(v.x), sign(v.y));
@@ -769,8 +773,9 @@ template<size_t N>
     return make_double4(sign(v.x), sign(v.y), sign(v.z), sign(v.w));
 }
 
-
-[[nodiscard]] constexpr auto sign(int x) noexcept { return x < 0 ? -1 : 1; }
+[[nodiscard]] constexpr auto sign(int x) noexcept {
+    return static_cast<int>(x > 0) - static_cast<int>(x < 0);
+}
 
 [[nodiscard]] constexpr auto sign(int2 v) noexcept {
     return make_int2(sign(v.x), sign(v.y));
diff --git a/include/luisa/dsl/builtin.h b/include/luisa/dsl/builtin.h
@@ -22,21 +22,21 @@ inline namespace dsl {
  * @tparam Src Source expression type.
  * @param s Source expression.
  * @return A new DSL expression of type Dest.
- * 
+ *
  * Performs a value conversion (like static_cast in C++).
  * Supported conversions include:
  * - Between numeric types (int <-> float)
  * - Between vector types of same dimension
- * 
+ *
  * Example:
  * @code
  * Float f = 3.7f;
  * Int i = cast<int>(f);  // i = 3 (truncated)
- * 
+ *
  * Float3 f3 = make_float3(1.5f, 2.5f, 3.5f);
  * Int3 i3 = cast<int>(f3);  // (1, 2, 3)
  * @endcode
- * 
+ *
  * @see as() for bitwise reinterpretation
  */
 template<typename Dest, typename Src>
@@ -58,20 +58,20 @@ template<typename Dest, typename Src>
  * @tparam Src Source expression type.
  * @param s Source expression.
  * @return A new DSL expression of type Dest with the same bit pattern.
- * 
+ *
  * Performs a bitwise reinterpretation (like bit_cast/std::bit_cast in C++).
  * The source and destination types must have the same size.
- * 
+ *
  * Example:
  * @code
  * Float f = 1.0f;
  // Reinterpret float bits as uint
  * UInt bits = as<uint>(f);  // bits = 0x3f800000
- * 
+ *
  * Float2 f2 = make_float2(1.0f, 2.0f);
  * UInt2 u2 = as<uint2>(f2);  // Reinterpret as uints
  * @endcode
- * 
+ *
  * @see cast() for value conversion
  */
 template<typename Dest, typename Src>
@@ -90,19 +90,19 @@ template<typename Dest, typename Src>
 /**
  * @brief Provide a boolean assumption hint to the compiler.
  * @param pred Boolean expression that is assumed to be true.
- * 
+ *
  * The assume statement tells the optimizer that the condition is always
  * true, allowing it to generate more efficient code. Use with caution -
  * if the assumption is violated, undefined behavior occurs.
- * 
+ *
  * Example:
  * @code
  * Var<int> index = ...;
  assume(index >= 0 && index < buffer_size);
  * // Compiler can now optimize knowing index is in bounds
  * Float value = buffer.read(index);
  * @endcode
- * 
+ *
  * @see unreachable() for marking unreachable code
  */
 inline void assume(Expr<bool> pred) noexcept {
@@ -112,11 +112,11 @@ inline void assume(Expr<bool> pred) noexcept {
 
 /**
  * @brief Mark code as unreachable.
- * 
+ *
  * Tells the compiler that this code path should never be executed.
  * Useful after branches that always return/exit or for switch defaults
  * that should never be hit.
- * 
+ *
  * Example:
  * @code
  * $switch (value) {
@@ -127,7 +127,7 @@ inline void assume(Expr<bool> pred) noexcept {
  *     };
  * };
  * @endcode
- * 
+ *
  * @param msg Optional message for debugging
  * @see assume() for providing optimization hints
  */
@@ -160,18 +160,18 @@ inline void device_assert(Expr<bool> pred, luisa::string_view msg) noexcept {
 /**
  * @brief Get the thread index within its block.
  * @return uint3 containing (x, y, z) thread coordinates within the block.
- * 
+ *
  * The thread_id identifies a thread's position within its thread block.
  * It ranges from (0, 0, 0) to block_size() - 1.
- * 
+ *
  * Example:
  * @code
  * Kernel1D kernel = [&]() noexcept {
  *     UInt tid = thread_id().x;  // 0 to block_size().x - 1
  *     // Use tid for shared memory indexing...
  * };
  * @endcode
- * 
+ *
  * @see block_id() for block position in the grid
  * @see dispatch_id() for global thread position
  */
@@ -197,10 +197,10 @@ inline void device_assert(Expr<bool> pred, luisa::string_view msg) noexcept {
 /**
  * @brief Get the block index within the dispatch grid.
  * @return uint3 containing (x, y, z) block coordinates.
- * 
+ *
  * The block_id identifies which thread block this thread belongs to.
  * It ranges from (0, 0, 0) to (grid_dim - 1).
- * 
+ *
  * @see thread_id() for position within the block
  * @see dispatch_id() for global thread position
  */
@@ -226,13 +226,13 @@ inline void device_assert(Expr<bool> pred, luisa::string_view msg) noexcept {
 /**
  * @brief Get the global thread index in the dispatch grid.
  * @return uint3 containing (x, y, z) global coordinates.
- * 
+ *
  * The dispatch_id is the global thread identifier, computed as:
  * dispatch_id = block_id * block_size + thread_id
- * 
+ *
  * This is the most commonly used coordinate for indexing into
  * buffers and images.
- * 
+ *
  * Example:
  * @code
  * Kernel2D process_image = [&](ImageFloat img) noexcept {
@@ -242,7 +242,7 @@ inline void device_assert(Expr<bool> pred, luisa::string_view msg) noexcept {
  * };
  * stream << shader(image).dispatch(width, height);
  * @endcode
- * 
+ *
  * @see dispatch_size() for total grid dimensions
  * @see thread_id() for local thread position
  */
@@ -276,10 +276,10 @@ inline void device_assert(Expr<bool> pred, luisa::string_view msg) noexcept {
 /**
  * @brief Get the total dispatch grid size.
  * @return uint3 containing (width, height, depth) of the dispatch grid.
- * 
+ *
  * The dispatch_size represents the total number of threads in each dimension.
  * Useful for normalizing coordinates or computing global indices.
- * 
+ *
  * Example:
  * @code
  * Kernel2D render = [&](ImageFloat image) noexcept {
@@ -289,7 +289,7 @@ inline void device_assert(Expr<bool> pred, luisa::string_view msg) noexcept {
  *     // uv is now in [0, 1] range...
  * };
  * @endcode
- * 
+ *
  * @see dispatch_id() for current thread position
  * @see set_block_size() for configuring block dimensions
  */
@@ -315,10 +315,10 @@ inline void device_assert(Expr<bool> pred, luisa::string_view msg) noexcept {
 /**
  * @brief Get the thread block size.
  * @return uint3 containing (x, y, z) dimensions of each thread block.
- * 
+ *
  * The block_size represents how many threads are in each block.
  * Use this for computing local indices or shared memory offsets.
- * 
+ *
  * @see set_block_size() for configuring block dimensions at compile time
  */
 [[nodiscard]] inline const auto block_size() noexcept {
@@ -1722,8 +1722,12 @@ template<typename X, typename Y>
 template<typename X>
     requires is_dsl_v<X> && is_floating_point_or_vector_expr_v<X>
 [[nodiscard]] inline auto sign(X &&x) noexcept {
-    return copysign(1.0f, std::forward<X>(x));
+    using Scalar = expr_value_t<decltype(x)>;
+    auto zero = def<Scalar>(0);
+    auto value = std::forward<X>(x);
+    return cast<Scalar>(value > zero) - cast<Scalar>(value < zero);
 }
+
 template<typename X>
     requires is_dsl_v<X> && (is_scalar_v<expr_value_t<X>> || is_matrix_v<expr_value_t<X>> || is_vector_v<expr_value_t<X>>)
 [[nodiscard]] inline auto ddx(X &&x) noexcept {