Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions mllm/backends/ascend/AscendBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ namespace mllm::ascend {

AscendBackend::AscendBackend() : Backend(kAscend, createAscendAllocator()) {
regOpFactory<AscendAddOpFactory>();
regOpFactory<AscendSubOpFactory>();
regOpFactory<AscendMulOpFactory>();
regOpFactory<AscendX2XOpFactory>();
auto& devices = AscendDeviceMetaInfo::instance().devices;
for (const auto& device : devices) {
Expand Down
4 changes: 4 additions & 0 deletions mllm/backends/ascend/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ endif()
if(DEFINED ENV{ATB_HOME_PATH})
target_include_directories(MllmAscendBackend PUBLIC $ENV{ATB_HOME_PATH}/include)
target_link_directories(MllmAscendBackend PUBLIC $ENV{ATB_HOME_PATH}/lib)
elseif(EXISTS "${PROJECT_SOURCE_DIR}/../libs/atb")
message(STATUS "Found ATB in ${PROJECT_SOURCE_DIR}/../libs/atb")
target_include_directories(MllmAscendBackend PUBLIC "${PROJECT_SOURCE_DIR}/../libs/atb/include")
target_link_directories(MllmAscendBackend PUBLIC "${PROJECT_SOURCE_DIR}/../libs/atb/lib")
Comment on lines +37 to +40
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

# First, check if the file exists and examine its structure
find . -name "CMakeLists.txt" -path "*/backends/ascend/*" | head -20

Repository: UbiquitousLearning/mllm

Length of output: 104


🏁 Script executed:

# Get the full CMakeLists.txt file to verify lines 37-40 and understand context
if [ -f "mllm/backends/ascend/CMakeLists.txt" ]; then
  echo "=== File exists, showing content with line numbers ==="
  cat -n "mllm/backends/ascend/CMakeLists.txt" | head -50
else
  echo "File not found at mllm/backends/ascend/CMakeLists.txt"
fi

Repository: UbiquitousLearning/mllm

Length of output: 2355


🏁 Script executed:

# Check for similar ATB-related paths in the codebase
rg -n "libs/atb" --type cmake

Repository: UbiquitousLearning/mllm

Length of output: 531


Guard the ATB fallback path against partial installs.

If ../libs/atb exists but the include/ or lib/ subdirectories are missing, CMake will add invalid search paths that fail later during compilation. Check for required subdirectories before wiring the paths.

🔧 Suggested guard
-elseif(EXISTS "${PROJECT_SOURCE_DIR}/../libs/atb")
-  message(STATUS "Found ATB in ${PROJECT_SOURCE_DIR}/../libs/atb")
-  target_include_directories(MllmAscendBackend PUBLIC "${PROJECT_SOURCE_DIR}/../libs/atb/include")
-  target_link_directories(MllmAscendBackend PUBLIC "${PROJECT_SOURCE_DIR}/../libs/atb/lib")
+elseif(EXISTS "${PROJECT_SOURCE_DIR}/../libs/atb/include" AND EXISTS "${PROJECT_SOURCE_DIR}/../libs/atb/lib")
+  message(STATUS "Found ATB in ${PROJECT_SOURCE_DIR}/../libs/atb")
+  target_include_directories(MllmAscendBackend PUBLIC "${PROJECT_SOURCE_DIR}/../libs/atb/include")
+  target_link_directories(MllmAscendBackend PUBLIC "${PROJECT_SOURCE_DIR}/../libs/atb/lib")
+elseif(EXISTS "${PROJECT_SOURCE_DIR}/../libs/atb")
+  message(WARNING "ATB fallback path exists but include/lib are missing; skipping")
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
elseif(EXISTS "${PROJECT_SOURCE_DIR}/../libs/atb")
message(STATUS "Found ATB in ${PROJECT_SOURCE_DIR}/../libs/atb")
target_include_directories(MllmAscendBackend PUBLIC "${PROJECT_SOURCE_DIR}/../libs/atb/include")
target_link_directories(MllmAscendBackend PUBLIC "${PROJECT_SOURCE_DIR}/../libs/atb/lib")
elseif(EXISTS "${PROJECT_SOURCE_DIR}/../libs/atb/include" AND EXISTS "${PROJECT_SOURCE_DIR}/../libs/atb/lib")
message(STATUS "Found ATB in ${PROJECT_SOURCE_DIR}/../libs/atb")
target_include_directories(MllmAscendBackend PUBLIC "${PROJECT_SOURCE_DIR}/../libs/atb/include")
target_link_directories(MllmAscendBackend PUBLIC "${PROJECT_SOURCE_DIR}/../libs/atb/lib")
elseif(EXISTS "${PROJECT_SOURCE_DIR}/../libs/atb")
message(WARNING "ATB fallback path exists but include/lib are missing; skipping")
🤖 Prompt for AI Agents
In `@mllm/backends/ascend/CMakeLists.txt` around lines 37 - 40, The fallback block
that configures ATB when EXISTS "${PROJECT_SOURCE_DIR}/../libs/atb" must guard
against partial installs: before calling target_include_directories and
target_link_directories for target MllmAscendBackend, check that both
"${PROJECT_SOURCE_DIR}/../libs/atb/include" and
"${PROJECT_SOURCE_DIR}/../libs/atb/lib" exist (use if(EXISTS ...) or
if(IS_DIRECTORY ...)); only add the include and link directories when both are
present, otherwise emit a message(WARNING ...) and skip wiring the paths so
CMake doesn't get invalid search paths.

else()
message(WARNING "ATB_HOME_PATH not defined, ATB library will not be linked")
endif()
Expand Down
176 changes: 176 additions & 0 deletions mllm/backends/ascend/ops/AscendElewiseOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,4 +106,180 @@ void AscendAddOp::forward(const std::vector<Tensor>& inputs, std::vector<Tensor>
atb::DestroyOperation(op);
}

AscendSubOp::AscendSubOp(const aops::SubOpOptions& options) : aops::SubOp(options) {}

void AscendSubOp::setup(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
BaseOp::setup(inputs, outputs);
}

void AscendSubOp::forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
MLLM_RT_ASSERT_EQ(inputs.size(), 2);
MLLM_RT_ASSERT_EQ(outputs.size(), 1);

const auto& x = inputs[0];
const auto& y = inputs[1];
auto& z = outputs[0];

if (x.dtype() != y.dtype() || x.dtype() != z.dtype()) {
NYI("AscendSubOp currently requires x/y/z have same dtype");
}
if (x.numel() != y.numel() || x.numel() != z.numel()) {
NYI("AscendSubOp demo only supports no-broadcast case (numel equal)");
}

atb::infer::ElewiseParam subParam;
subParam.elewiseType = atb::infer::ElewiseParam::ELEWISE_SUB;

atb::Operation* op = nullptr;
auto st = atb::CreateOperation(subParam, &op);
if (st != atb::NO_ERROR || op == nullptr) {
MLLM_ERROR_EXIT(ExitCode::kAscendError, "ATB CreateOperation(ELEWISE_SUB) failed, status={}", static_cast<int>(st));
}

atb::Context* atb_ctx = getGlobalAtbContext();

atb::Tensor atb_x;
atb::Tensor atb_y;
atb::Tensor atb_z;

fillAtbTensorDesc(x, atb_x.desc);
fillAtbTensorDesc(y, atb_y.desc);
fillAtbTensorDesc(z, atb_z.desc);

atb_x.deviceData = reinterpret_cast<uint8_t*>(x.ptr<void>());
atb_x.dataSize = x.bytes();
atb_y.deviceData = reinterpret_cast<uint8_t*>(y.ptr<void>());
atb_y.dataSize = y.bytes();
atb_z.deviceData = reinterpret_cast<uint8_t*>(z.ptr<void>());
atb_z.dataSize = z.bytes();

atb::SVector<atb::Tensor> inTensors;
atb::SVector<atb::Tensor> outTensors;
inTensors.push_back(atb_x);
inTensors.push_back(atb_y);
outTensors.push_back(atb_z);

atb::VariantPack vp;
vp.inTensors = inTensors;
vp.outTensors = outTensors;

uint64_t workspaceSize = 0;
st = op->Setup(vp, workspaceSize, atb_ctx);
if (st != atb::NO_ERROR) {
MLLM_ERROR_EXIT(ExitCode::kAscendError, "ATB SubOp Setup failed, status={}", static_cast<int>(st));
}

void* workspace = nullptr;
int workspace_block_id = -1;
if (workspaceSize > 0) {
auto& mem_mgr = getAscendMemoryManager();
mem_mgr.allocateBlock(static_cast<uint32_t>(workspaceSize), workspace_block_id);
mem_mgr.getBlockPtr(workspace_block_id, workspace);
}
{
ASCEND_TIME_SCOPE("AscendSubOp::forward");
st = op->Execute(vp, reinterpret_cast<uint8_t*>(workspace), workspaceSize, atb_ctx);
}
if (st != atb::NO_ERROR) {
MLLM_ERROR_EXIT(ExitCode::kAscendError, "ATB SubOp Execute failed, status={}", static_cast<int>(st));
}

syncGlobalAtbStream();

if (workspace_block_id != -1) {
auto& mem_mgr = getAscendMemoryManager();
mem_mgr.freeBlock(workspace_block_id);
}

atb::DestroyOperation(op);
}

AscendMulOp::AscendMulOp(const aops::MulOpOptions& options) : aops::MulOp(options) {}

void AscendMulOp::setup(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
BaseOp::setup(inputs, outputs);
}

void AscendMulOp::forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) {
MLLM_RT_ASSERT_EQ(inputs.size(), 2);
MLLM_RT_ASSERT_EQ(outputs.size(), 1);

const auto& x = inputs[0];
const auto& y = inputs[1];
auto& z = outputs[0];

if (x.dtype() != y.dtype() || x.dtype() != z.dtype()) {
NYI("AscendMulOp currently requires x/y/z have same dtype");
}
if (x.numel() != y.numel() || x.numel() != z.numel()) {
NYI("AscendMulOp demo only supports no-broadcast case (numel equal)");
}

atb::infer::ElewiseParam mulParam;
mulParam.elewiseType = atb::infer::ElewiseParam::ELEWISE_MUL;

atb::Operation* op = nullptr;
auto st = atb::CreateOperation(mulParam, &op);
if (st != atb::NO_ERROR || op == nullptr) {
MLLM_ERROR_EXIT(ExitCode::kAscendError, "ATB CreateOperation(ELEWISE_MUL) failed, status={}", static_cast<int>(st));
}

atb::Context* atb_ctx = getGlobalAtbContext();

atb::Tensor atb_x;
atb::Tensor atb_y;
atb::Tensor atb_z;

fillAtbTensorDesc(x, atb_x.desc);
fillAtbTensorDesc(y, atb_y.desc);
fillAtbTensorDesc(z, atb_z.desc);

atb_x.deviceData = reinterpret_cast<uint8_t*>(x.ptr<void>());
atb_x.dataSize = x.bytes();
atb_y.deviceData = reinterpret_cast<uint8_t*>(y.ptr<void>());
atb_y.dataSize = y.bytes();
atb_z.deviceData = reinterpret_cast<uint8_t*>(z.ptr<void>());
atb_z.dataSize = z.bytes();

atb::SVector<atb::Tensor> inTensors;
atb::SVector<atb::Tensor> outTensors;
inTensors.push_back(atb_x);
inTensors.push_back(atb_y);
outTensors.push_back(atb_z);

atb::VariantPack vp;
vp.inTensors = inTensors;
vp.outTensors = outTensors;

uint64_t workspaceSize = 0;
st = op->Setup(vp, workspaceSize, atb_ctx);
if (st != atb::NO_ERROR) {
MLLM_ERROR_EXIT(ExitCode::kAscendError, "ATB MulOp Setup failed, status={}", static_cast<int>(st));
}

void* workspace = nullptr;
int workspace_block_id = -1;
if (workspaceSize > 0) {
auto& mem_mgr = getAscendMemoryManager();
mem_mgr.allocateBlock(static_cast<uint32_t>(workspaceSize), workspace_block_id);
mem_mgr.getBlockPtr(workspace_block_id, workspace);
}
{
ASCEND_TIME_SCOPE("AscendMulOp::forward");
st = op->Execute(vp, reinterpret_cast<uint8_t*>(workspace), workspaceSize, atb_ctx);
}
if (st != atb::NO_ERROR) {
MLLM_ERROR_EXIT(ExitCode::kAscendError, "ATB MulOp Execute failed, status={}", static_cast<int>(st));
}

syncGlobalAtbStream();

if (workspace_block_id != -1) {
auto& mem_mgr = getAscendMemoryManager();
mem_mgr.freeBlock(workspace_block_id);
}

atb::DestroyOperation(op);
}

} // namespace mllm::ascend
30 changes: 30 additions & 0 deletions mllm/backends/ascend/ops/AscendElewiseOps.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,34 @@ class AscendAddOpFactory final : public TypedOpFactory<OpTypes::kAdd, aops::AddO
}
};

class AscendSubOp final : public aops::SubOp {
public:
explicit AscendSubOp(const aops::SubOpOptions& options);

void setup(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
void forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
};

class AscendSubOpFactory final : public TypedOpFactory<OpTypes::kSub, aops::SubOpOptions> {
public:
std::shared_ptr<BaseOp> createOpImpl(const aops::SubOpOptions& options) override {
return std::make_shared<AscendSubOp>(options);
}
};

class AscendMulOp final : public aops::MulOp {
public:
explicit AscendMulOp(const aops::MulOpOptions& options);

void setup(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
void forward(const std::vector<Tensor>& inputs, std::vector<Tensor>& outputs) override;
};

class AscendMulOpFactory final : public TypedOpFactory<OpTypes::kMul, aops::MulOpOptions> {
public:
std::shared_ptr<BaseOp> createOpImpl(const aops::MulOpOptions& options) override {
return std::make_shared<AscendMulOp>(options);
}
};

} // namespace mllm::ascend
70 changes: 70 additions & 0 deletions tests/ascend/AscendKernelTest.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,75 @@ class AscendKernelTest : public KernelTest {
}
return true;
}

// Test Sub operation with different shapes
bool SubFloat16Test(const std::vector<mllm::Tensor::shape_t>& shapes) {
using namespace mllm; // NOLINT
for (auto& shape : shapes) {
// 1. Construct random FP16 inputs on CPU
Tensor x_cpu = Tensor::random(shape, -3, 3, kFloat16, kCPU);
Tensor y_cpu = Tensor::random(shape, -3, 3, kFloat16, kCPU);

// 2. Compute reference result (FP16) on CPU
Tensor ref_cpu = Tensor::zeros(shape, kFloat16, kCPU);
{
auto* x_ptr = x_cpu.ptr<mllm_fp16_t>();
auto* y_ptr = y_cpu.ptr<mllm_fp16_t>();
auto* r_ptr = ref_cpu.ptr<mllm_fp16_t>();
auto num_elements = x_cpu.numel();
for (size_t i = 0; i < num_elements; ++i) {
r_ptr[i] = x_ptr[i] - y_ptr[i];
}
}

// 3. Move inputs to Ascend and run Sub (z = x - y)
auto x_ascend = x_cpu.to(kAscend);
auto y_ascend = y_cpu.to(kAscend);
auto z_ascend = x_ascend - y_ascend;

// 4. Move result back to CPU and compare with reference using allClose
auto z_cpu = z_ascend.to(kCPU);
auto result = mllm::test::allClose(z_cpu, ref_cpu, 1e-2f, 1e-2f);
if (!result.is_close) {
return false;
}
}
return true;
}

// Test Mul operation with different shapes
bool MulFloat16Test(const std::vector<mllm::Tensor::shape_t>& shapes) {
using namespace mllm; // NOLINT
for (auto& shape : shapes) {
// 1. Construct random FP16 inputs on CPU
Tensor x_cpu = Tensor::random(shape, -3, 3, kFloat16, kCPU);
Tensor y_cpu = Tensor::random(shape, -3, 3, kFloat16, kCPU);

// 2. Compute reference result (FP16) on CPU
Tensor ref_cpu = Tensor::zeros(shape, kFloat16, kCPU);
{
auto* x_ptr = x_cpu.ptr<mllm_fp16_t>();
auto* y_ptr = y_cpu.ptr<mllm_fp16_t>();
auto* r_ptr = ref_cpu.ptr<mllm_fp16_t>();
auto num_elements = x_cpu.numel();
for (size_t i = 0; i < num_elements; ++i) {
r_ptr[i] = x_ptr[i] * y_ptr[i];
}
}

// 3. Move inputs to Ascend and run Mul (z = x * y)
auto x_ascend = x_cpu.to(kAscend);
auto y_ascend = y_cpu.to(kAscend);
auto z_ascend = x_ascend * y_ascend;

// 4. Move result back to CPU and compare with reference using allClose
auto z_cpu = z_ascend.to(kCPU);
auto result = mllm::test::allClose(z_cpu, ref_cpu, 1e-2f, 1e-2f);
if (!result.is_close) {
return false;
}
}
return true;
}
};

35 changes: 35 additions & 0 deletions tests/ascend/KernelTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,41 @@ TEST_F(AscendKernelTest, AddFloat16) {
true);
}

//===----------------------------------------------------------------------===//
// Element wise SUB.
//
// FP16 (Ascend currently uses FP16)
//===----------------------------------------------------------------------===//
TEST_F(AscendKernelTest, SubFloat16) {
EXPECT_EQ(SubFloat16Test({
{2, 3},
{1, 1},
{4, 4},
{8, 8},
{16, 16},
{32, 32},
}),
true);
}

//===----------------------------------------------------------------------===//
// Element wise MUL.
//
// FP16 (Ascend currently uses FP16)
//===----------------------------------------------------------------------===//
TEST_F(AscendKernelTest, MulFloat16) {
EXPECT_EQ(MulFloat16Test({
{2, 3},
{1, 1},
{4, 4},
{8, 8},
{16, 16},
{32, 32},
}),
true);
}


int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);

Expand Down