InfiniTensor · PPPoint-t · Feb 5, 2026 · Feb 6, 2026 · Feb 6, 2026
diff --git a/include/infinicore/ops/addcdiv.hpp b/include/infinicore/ops/addcdiv.hpp
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "../device.hpp"
+#include "common/op.hpp"
+
+namespace infinicore::op {
+
+class Addcdiv {
+public:
+    using schema = void (*)(Tensor, Tensor, Tensor, Tensor, float);
+    static void execute(Tensor input, Tensor t1, Tensor t2, Tensor output, float value);
+    static common::OpDispatcher<schema> &dispatcher();
+};
+
+Tensor addcdiv(Tensor input, Tensor t1, Tensor t2, float value);
+void addcdiv_(Tensor input, Tensor t1, Tensor t2, Tensor output, float value);
+
+} // namespace infinicore::op
diff --git a/include/infinicore/ops/atan2.hpp b/include/infinicore/ops/atan2.hpp
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "../device.hpp"
+#include "common/op.hpp"
+
+namespace infinicore::op {
+
+class Atan2 {
+public:
+    using schema = void (*)(Tensor, Tensor, Tensor);
+    static void execute(Tensor input, Tensor other, Tensor output);
+    static common::OpDispatcher<schema> &dispatcher();
+};
+
+Tensor atan2(Tensor input, Tensor other);
+void atan2_(Tensor input, Tensor other, Tensor output);
+
+} // namespace infinicore::op
diff --git a/include/infinicore/ops/binary_cross_entropy.hpp b/include/infinicore/ops/binary_cross_entropy.hpp
@@ -0,0 +1,20 @@
+#pragma once
+
+#include "../device.hpp"
+#include "common/op.hpp"
+#include <optional>
+#include <string>
+
+namespace infinicore::op {
+
+class BinaryCrossEntropy {
+public:
+    using schema = void (*)(Tensor, Tensor, std::optional<Tensor>, Tensor, std::string);
+    static void execute(Tensor input, Tensor target, std::optional<Tensor> weight, Tensor output, std::string reduction);
+    static common::OpDispatcher<schema> &dispatcher();
+};
+
+Tensor binary_cross_entropy(Tensor input, Tensor target, std::optional<Tensor> weight, std::string reduction);
+void binary_cross_entropy_(Tensor input, Tensor target, std::optional<Tensor> weight, Tensor output, std::string reduction);
+
+} // namespace infinicore::op
diff --git a/include/infinicore/ops/bucketize.hpp b/include/infinicore/ops/bucketize.hpp
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "../device.hpp"
+#include "common/op.hpp"
+
+namespace infinicore::op {
+
+class Bucketize {
+public:
+    using schema = void (*)(Tensor, Tensor, Tensor, bool);
+    static void execute(Tensor input, Tensor boundaries, Tensor output, bool right);
+    static common::OpDispatcher<schema> &dispatcher();
+};
+
+Tensor bucketize(Tensor input, Tensor boundaries, bool right = false);
+void bucketize_(Tensor input, Tensor boundaries, Tensor output, bool right = false);
+
+} // namespace infinicore::op
diff --git a/include/infinicore/ops/minimum.hpp b/include/infinicore/ops/minimum.hpp
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "../device.hpp"
+#include "common/op.hpp"
+
+namespace infinicore::op {
+
+class Minimum {
+public:
+    using schema = void (*)(Tensor, Tensor, Tensor);
+    static void execute(Tensor input, Tensor other, Tensor output);
+    static common::OpDispatcher<schema> &dispatcher();
+};
+
+Tensor minimum(Tensor input, Tensor other);
+void minimum_(Tensor input, Tensor other, Tensor output);
+
+} // namespace infinicore::op
diff --git a/python/infinicore/__init__.py b/python/infinicore/__init__.py
@@ -44,8 +44,12 @@
 )
 from infinicore.ops.add import add
 from infinicore.ops.add_rms_norm import add_rms_norm, add_rms_norm_
+from infinicore.ops.addcdiv import addcdiv
+from infinicore.ops.atan2 import atan2
 from infinicore.ops.attention import attention
+from infinicore.ops.bucketize import bucketize
 from infinicore.ops.matmul import matmul
+from infinicore.ops.minimum import minimum
 from infinicore.ops.mul import mul
 from infinicore.ops.narrow import narrow
 from infinicore.ops.paged_attention import paged_attention
@@ -134,6 +138,10 @@
     "strided_empty",
     "strided_from_blob",
     "zeros",
+    "minimum",
+    "atan2",
+    "addcdiv",
+    "bucketize",
 ]
 
 use_ntops = False

diff --git a/python/infinicore/nn/functional/__init__.py b/python/infinicore/nn/functional/__init__.py
@@ -1,3 +1,4 @@
+from .binary_cross_entropy import binary_cross_entropy
 from .causal_softmax import causal_softmax
 from .embedding import embedding
 from .linear import linear
@@ -17,4 +18,5 @@
     "embedding",
     "rope",
     "RopeAlgo",
+    "binary_cross_entropy",
 ]
diff --git a/python/infinicore/nn/functional/binary_cross_entropy.py b/python/infinicore/nn/functional/binary_cross_entropy.py
@@ -0,0 +1,47 @@
+import infinicore
+from infinicore.lib import _infinicore
+from infinicore.tensor import Tensor
+
+
+def binary_cross_entropy(
+    input: Tensor,
+    target: Tensor,
+    weight: Tensor | None = None,
+    size_average=None,
+    reduce=None,
+    reduction: str = "mean",
+    *,
+    out=None,
+) -> Tensor:
+    r"""Apply the binary_cross_entropy function."""
+
+    if size_average is not None or reduce is not None:
+        if reduce is False:
+            reduction = "none"
+        elif size_average is True or size_average is None:
+            reduction = "mean"
+        else:
+            reduction = "sum"
+
+    if infinicore.use_ntops and input.device.type in ("cuda", "musa"):
+        return infinicore.ntops.torch.binary_cross_entropy(
+            input, target, weight=weight, reduction=reduction, out=out
+        )
+
+    weight_underlying = weight._underlying if weight is not None else None
+
+    if out is None:
+        return Tensor(
+            _infinicore.binary_cross_entropy(
+                input._underlying, target._underlying, weight_underlying, reduction
+            )
+        )
+
+    _infinicore.binary_cross_entropy_(
+        input._underlying,
+        target._underlying,
+        weight_underlying,
+        out._underlying,
+        reduction,
+    )
+    return out
diff --git a/python/infinicore/ops/addcdiv.py b/python/infinicore/ops/addcdiv.py
@@ -0,0 +1,38 @@
+import infinicore
+from infinicore.lib import _infinicore
+from infinicore.tensor import Tensor
+
+
+def addcdiv(
+    input: Tensor,
+    tensor1: Tensor,
+    tensor2: Tensor,
+    *,
+    value=1.0,
+    out=None,
+) -> Tensor:
+    r"""Apply the addcdiv function."""
+
+    if infinicore.use_ntops and input.device.type in ("cuda", "musa"):
+        return infinicore.ntops.torch.addcdiv(
+            input, tensor1, tensor2, value=value, out=out
+        )
+
+    if out is None:
+        return Tensor(
+            _infinicore.addcdiv(
+                input._underlying,
+                tensor1._underlying,
+                tensor2._underlying,
+                float(value),
+            )
+        )
+
+    _infinicore.addcdiv_(
+        input._underlying,
+        tensor1._underlying,
+        tensor2._underlying,
+        out._underlying,
+        float(value),
+    )
+    return out
diff --git a/python/infinicore/ops/atan2.py b/python/infinicore/ops/atan2.py
@@ -0,0 +1,21 @@
+import infinicore
+from infinicore.lib import _infinicore
+from infinicore.tensor import Tensor
+
+
+def atan2(
+    input: Tensor,
+    other: Tensor,
+    *,
+    out=None,
+) -> Tensor:
+    r"""Apply the atan2 function."""
+
+    if infinicore.use_ntops and input.device.type in ("cuda", "musa"):
+        return infinicore.ntops.torch.atan2(input, other, out=out)
+
+    if out is None:
+        return Tensor(_infinicore.atan2(input._underlying, other._underlying))
+
+    _infinicore.atan2_(input._underlying, other._underlying, out._underlying)
+    return out
diff --git a/python/infinicore/ops/bucketize.py b/python/infinicore/ops/bucketize.py
@@ -0,0 +1,28 @@
+import infinicore
+from infinicore.lib import _infinicore
+from infinicore.tensor import Tensor
+
+
+def bucketize(
+    input: Tensor,
+    boundaries: Tensor,
+    *,
+    out=None,
+    right=False,
+) -> Tensor:
+    r"""Apply the bucketize function."""
+
+    if infinicore.use_ntops and input.device.type in ("cuda", "musa"):
+        return infinicore.ntops.torch.bucketize(input, boundaries, out=out, right=right)
+
+    if out is None:
+        return Tensor(
+            _infinicore.bucketize(
+                input._underlying, boundaries._underlying, bool(right)
+            )
+        )
+
+    _infinicore.bucketize_(
+        input._underlying, boundaries._underlying, out._underlying, bool(right)
+    )
+    return out
diff --git a/python/infinicore/ops/minimum.py b/python/infinicore/ops/minimum.py
@@ -0,0 +1,21 @@
+import infinicore
+from infinicore.lib import _infinicore
+from infinicore.tensor import Tensor
+
+
+def minimum(
+    input: Tensor,
+    other: Tensor,
+    *,
+    out=None,
+) -> Tensor:
+    r"""Apply the minimum function."""
+
+    if infinicore.use_ntops and input.device.type in ("cuda", "musa"):
+        return infinicore.ntops.torch.minimum(input, other, out=out)
+
+    if out is None:
+        return Tensor(_infinicore.minimum(input._underlying, other._underlying))
+
+    _infinicore.minimum_(input._underlying, other._underlying, out._underlying)
+    return out
diff --git a/src/infinicore/ops/addcdiv/addcdiv.cc b/src/infinicore/ops/addcdiv/addcdiv.cc
@@ -0,0 +1,47 @@
+#include "infinicore/ops/addcdiv.hpp"
+#include "../../utils.hpp"
+#include <algorithm>
+
+namespace infinicore::op {
+
+common::OpDispatcher<Addcdiv::schema> &Addcdiv::dispatcher() {
+    static common::OpDispatcher<Addcdiv::schema> dispatcher_;
+    return dispatcher_;
+};
+
+void Addcdiv::execute(Tensor input, Tensor t1, Tensor t2, Tensor output, float value) {
+    infinicore::context::setDevice(input->device());
+    dispatcher().lookup(input->device().getType())(input, t1, t2, output, value);
+}
+
+static Shape broadcast_shape_3(const Shape &a, const Shape &b, const Shape &c) {
+    int ndim = std::max({a.size(), b.size(), c.size()});
+    Shape out_shape;
+
+    for (int i = 0; i < ndim; ++i) {
+        int dim_a = (i < ndim - a.size()) ? 1 : a[i - (ndim - a.size())];
+        int dim_b = (i < ndim - b.size()) ? 1 : b[i - (ndim - b.size())];
+        int dim_c = (i < ndim - c.size()) ? 1 : c[i - (ndim - c.size())];
+
+        int target = std::max({dim_a, dim_b, dim_c});
+
+        if ((dim_a != target && dim_a != 1) || (dim_b != target && dim_b != 1) || (dim_c != target && dim_c != 1)) {
+            throw std::runtime_error("Shapes are not broadcastable");
+        }
+        out_shape.push_back(target);
+    }
+    return out_shape;
+}
+
+Tensor addcdiv(Tensor input, Tensor t1, Tensor t2, float value) {
+    Shape out_shape = broadcast_shape_3(input->shape(), t1->shape(), t2->shape());
+    auto output = Tensor::empty(out_shape, input->dtype(), input->device());
+    addcdiv_(input, t1, t2, output, value);
+    return output;
+}
+
+void addcdiv_(Tensor input, Tensor t1, Tensor t2, Tensor output, float value) {
+    Addcdiv::execute(input, t1, t2, output, value);
+}
+
+} // namespace infinicore::op