Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
5e581b8
issue/450: change indexToReducedOffset() to indexToOffset in elementw…
Ziminli Sep 15, 2025
9ef02a1
issue/450: remove indexToReducedOffset() in all platforms
Ziminli Sep 15, 2025
9db54b8
issue/450: add the testcases that pinpoint the issue in infiniop-test
Ziminli Sep 15, 2025
15ac019
Issue/450 Fix Elementwise Striding Broadcasting Issue (#452)
PanZezhong1725 Sep 16, 2025
8651576
issue/428: merge rope_v2 into rope with algorithm selection
Ziminli Sep 7, 2025
f6e8476
issue/428: accommodate the changes to c/gguf tests
Ziminli Sep 7, 2025
9f0ae73
issue/428: update the rope implementation on Ascend, Cambricon, and K…
Ziminli Sep 8, 2025
f9d1662
Merge pull request #429 from InfiniTensor/issue/428_merge_rope_and_ro…
PanZezhong1725 Sep 16, 2025
b8609df
issue/434 hccl support bf16
Ceng23333 Sep 9, 2025
3bb0c93
fix rope_v2 compiling && update infiniccl_test
Ceng23333 Sep 10, 2025
b9dd000
Merge pull request #438 from InfiniTensor/issue/434-metax
PanZezhong1725 Sep 16, 2025
8c777f9
fix: disable topkrouter on Iluvatar GPU via ENABLE_NVIDIA_API macro
spike-zhu Sep 16, 2025
1f50740
fix: disable topkrouter on Iluvatar GPU via ENABLE_NVIDIA_API macro (…
PanZezhong1725 Sep 16, 2025
badccb8
issue/410 Feature: Add infinicore python package
voltjia Sep 16, 2025
94280d8
issue/434 - added bf16 support for Cambricon MLU
wooway777 Sep 16, 2025
6892a7f
issue/436: support kunlun rope U32
xgqdut2016 Sep 9, 2025
3bdd832
issue/436: 支持9g7b 4b模型
zhangyue207 Sep 17, 2025
6680a8c
issue/436:修补昆仑芯端到端推理遇到的问题 (#437)
PanZezhong1725 Sep 18, 2025
c15189b
issue/466: success kunlun rope NEOX
xgqdut2016 Sep 18, 2025
ade3b5d
Merge pull request #462 from InfiniTensor/issue/434-cambricon
PanZezhong1725 Sep 18, 2025
d0b7bf9
feat:hccl support bf16
bitzyz Sep 18, 2025
2a81c8b
Merge pull request #467 from InfiniTensor/issue/466
zhangyue207 Sep 18, 2025
3a91947
Issue/459 (#460)
wooway777 Sep 18, 2025
82b2a84
issue/458 add AWQ dequantization torch test and improve variable nami…
spike-zhu Sep 18, 2025
be117fe
fix: disable NVIDIA-dequantize on Iluvatar GPU via ENABLE_NVIDIA_API …
spike-zhu Sep 19, 2025
d3d982d
Merge pull request #470 from InfiniTensor/issue/469
PanZezhong1725 Sep 23, 2025
4217976
feat: rename Dequantize to DequantizeAWQ in nvidia gpu
spike-zhu Sep 23, 2025
6b903fd
Merge pull request #476 from InfiniTensor/issue/474
PanZezhong1725 Sep 24, 2025
718a126
add mul
TRM-coding Sep 24, 2025
6af2e42
issue/477 - Cambricon MLU NeoX
wooway777 Sep 25, 2025
3fb3b2f
stash
TRM-coding Sep 25, 2025
20a2dbd
Merge pull request #478 from InfiniTensor/issue/477
PanZezhong1725 Sep 25, 2025
3959c94
Issue/472: 接入昆仑芯通信库 (#479)
zhangyue207 Sep 25, 2025
5a196e0
add gemm,causal_softmax for opencl
TRM-coding Sep 26, 2025
53c4d53
add rope and random_sample
TRM-coding Sep 26, 2025
1cfb7ba
rearrange,swiglu
TRM-coding Sep 27, 2025
33794bc
fixed dequantized_awq
TRM-coding Sep 27, 2025
8fb4205
Merge remote-tracking branch 'infini_tensor/main' into opencl-trm
TRM-coding Sep 27, 2025
ec72705
merge infini_tensor/main
TRM-coding Sep 27, 2025
262a8ca
可以运行推理的opencl算子
TRM-coding Sep 28, 2025
8201655
update gemm add sub group
TRM-coding Oct 1, 2025
2b0f34b
update
TRM-coding Oct 3, 2025
a515d16
添加测试截图和完成工作说明
TRM-coding Oct 10, 2025
3ef1628
删除无用输出
TRM-coding Oct 10, 2025
26cfff1
修复误删
TRM-coding Oct 10, 2025
f2ef58c
缓存算子
TRM-coding Oct 10, 2025
7a6e2d7
update rearrange
TRM-coding Oct 10, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
xmake-version: latest

- name: Build & Install
run: python scripts/install.py --omp=y
run: python scripts/install.py --omp=y -y

- name: install python packages
run: |
Expand Down
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
global-include *
6 changes: 3 additions & 3 deletions include/infiniccl.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@ struct InfinicclComm;

typedef struct InfinicclComm *infinicclComm_t;

__C __export infiniStatus_t infinicclCommInitAll(
INFINI_EXTERN_C __export infiniStatus_t infinicclCommInitAll(
infiniDevice_t device_type,
infinicclComm_t *comms,
int ndevice,
const int *device_ids);

__C __export infiniStatus_t infinicclCommDestroy(infinicclComm_t comm);
INFINI_EXTERN_C __export infiniStatus_t infinicclCommDestroy(infinicclComm_t comm);

__C __export infiniStatus_t infinicclAllReduce(
INFINI_EXTERN_C __export infiniStatus_t infinicclAllReduce(
void *sendbuf,
void *recvbuf,
size_t count,
Expand Down
3 changes: 2 additions & 1 deletion include/infinicore.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
#endif

#ifdef __cplusplus
#define __C extern "C"
// #define __C extern "C" 与emmintrin.h产生冲突,建议弃用用该宏
#define INFINI_EXTERN_C extern "C"
#include <cstddef>
#else
#define __C
Expand Down
6 changes: 6 additions & 0 deletions include/infinicore.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#ifndef __INFINICORE_API_HPP__
#define __INFINICORE_API_HPP__

#include "infinicore/tensor.hpp"

#endif
37 changes: 37 additions & 0 deletions include/infinicore/device.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#ifndef __INFINICORE_DEVICE_API_HPP__
#define __INFINICORE_DEVICE_API_HPP__

#include <cstdint>
#include <string>

namespace infinicore {

class Device {
public:
using Index = std::size_t;

enum class Type {
cpu,
cuda,
meta,
};

Device(const Type &type, const Index &index = 0);

const Type &get_type() const;

const Index &get_index() const;

std::string to_string() const;

static std::string to_string(const Type &type);

private:
Type type_;

Index index_;
};

} // namespace infinicore

#endif
22 changes: 22 additions & 0 deletions include/infinicore/dtype.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#ifndef __INFINICORE_DTYPE_API_HPP__
#define __INFINICORE_DTYPE_API_HPP__

#include <infinicore.h>

namespace infinicore {

enum class DataType {
bfloat16 = INFINI_DTYPE_BF16,
float16 = INFINI_DTYPE_F16,
float32 = INFINI_DTYPE_F32,
float64 = INFINI_DTYPE_F64,
int32 = INFINI_DTYPE_I32,
int64 = INFINI_DTYPE_I64,
uint8 = INFINI_DTYPE_U8,
};

std::string to_string(const DataType &dtype);

} // namespace infinicore

#endif
39 changes: 39 additions & 0 deletions include/infinicore/tensor.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#ifndef __INFINICORE_TENSOR_API_HPP__
#define __INFINICORE_TENSOR_API_HPP__

#include <vector>

#include "device.hpp"
#include "dtype.hpp"

namespace infinicore {

class Tensor {
public:
using Size = std::size_t;

using Stride = std::ptrdiff_t;

using Shape = std::vector<Size>;

using Strides = std::vector<Stride>;

Tensor(const Shape &shape, const DataType &dtype, const Device &device);

const Shape &get_shape() const;

const DataType &get_dtype() const;

const Device &get_device() const;

private:
Shape shape_;

DataType dtype_;

Device device_;
};

} // namespace infinicore

#endif
3 changes: 1 addition & 2 deletions include/infiniop.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,14 @@
#include "infiniop/ops/causal_softmax.h"
#include "infiniop/ops/clip.h"
#include "infiniop/ops/conv.h"
#include "infiniop/ops/dequantize.h"
#include "infiniop/ops/dequantize_awq.h"
#include "infiniop/ops/gemm.h"
#include "infiniop/ops/mul.h"
#include "infiniop/ops/random_sample.h"
#include "infiniop/ops/rearrange.h"
#include "infiniop/ops/relu.h"
#include "infiniop/ops/rms_norm.h"
#include "infiniop/ops/rope.h"
#include "infiniop/ops/rope_v2.h"
#include "infiniop/ops/softplus.h"
#include "infiniop/ops/sub.h"
#include "infiniop/ops/swiglu.h"
Expand Down
4 changes: 2 additions & 2 deletions include/infiniop/handle.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ struct InfiniopHandle;

typedef struct InfiniopHandle *infiniopHandle_t;

__C __export infiniStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr);
INFINI_EXTERN_C __export infiniStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr);

__C __export infiniStatus_t infiniopDestroyHandle(infiniopHandle_t handle);
INFINI_EXTERN_C __export infiniStatus_t infiniopDestroyHandle(infiniopHandle_t handle);

#endif
4 changes: 2 additions & 2 deletions include/infiniop/operator_descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
// Base descriptor for all operators
struct InfiniopDescriptor;

__C __export infiniStatus_t infiniopGetDescriptorDeviceType(const struct InfiniopDescriptor *desc_ptr, infiniDevice_t *device_type);
__C __export infiniStatus_t infiniopGetDescriptorDeviceId(const struct InfiniopDescriptor *desc_ptr, int *device_id);
INFINI_EXTERN_C __export infiniStatus_t infiniopGetDescriptorDeviceType(const struct InfiniopDescriptor *desc_ptr, infiniDevice_t *device_type);
INFINI_EXTERN_C __export infiniStatus_t infiniopGetDescriptorDeviceId(const struct InfiniopDescriptor *desc_ptr, int *device_id);

#endif //__INFINIOP_OPERATOR_DESCRIPTOR_API_H__
8 changes: 4 additions & 4 deletions include/infiniop/ops/add.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,22 @@

typedef struct InfiniopDescriptor *infiniopAddDescriptor_t;

__C __export infiniStatus_t infiniopCreateAddDescriptor(infiniopHandle_t handle,
INFINI_EXTERN_C __export infiniStatus_t infiniopCreateAddDescriptor(infiniopHandle_t handle,
infiniopAddDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t c,
infiniopTensorDescriptor_t a,
infiniopTensorDescriptor_t b);

__C __export infiniStatus_t infiniopGetAddWorkspaceSize(infiniopAddDescriptor_t desc, size_t *size);
INFINI_EXTERN_C __export infiniStatus_t infiniopGetAddWorkspaceSize(infiniopAddDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopAdd(infiniopAddDescriptor_t desc,
INFINI_EXTERN_C __export infiniStatus_t infiniopAdd(infiniopAddDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *c,
const void *a,
const void *b,
void *stream);

__C __export infiniStatus_t infiniopDestroyAddDescriptor(infiniopAddDescriptor_t desc);
INFINI_EXTERN_C __export infiniStatus_t infiniopDestroyAddDescriptor(infiniopAddDescriptor_t desc);

#endif
8 changes: 4 additions & 4 deletions include/infiniop/ops/attention.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

typedef struct InfiniopDescriptor *infiniopAttentionDescriptor_t;

__C __export infiniStatus_t infiniopCreateAttentionDescriptor(infiniopHandle_t handle,
INFINI_EXTERN_C __export infiniStatus_t infiniopCreateAttentionDescriptor(infiniopHandle_t handle,
infiniopAttentionDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t out_desc,
infiniopTensorDescriptor_t q_desc,
Expand All @@ -17,9 +17,9 @@ __C __export infiniStatus_t infiniopCreateAttentionDescriptor(infiniopHandle_t h
infiniopTensorDescriptor_t v_cache_desc,
size_t pos);

__C __export infiniStatus_t infiniopGetAttentionWorkspaceSize(infiniopAttentionDescriptor_t desc, size_t *size);
INFINI_EXTERN_C __export infiniStatus_t infiniopGetAttentionWorkspaceSize(infiniopAttentionDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopAttention(infiniopAttentionDescriptor_t desc,
INFINI_EXTERN_C __export infiniStatus_t infiniopAttention(infiniopAttentionDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *out,
Expand All @@ -30,5 +30,5 @@ __C __export infiniStatus_t infiniopAttention(infiniopAttentionDescriptor_t desc
void *v_cache,
void *stream);

__C __export infiniStatus_t infiniopDestroyAttentionDescriptor(infiniopAttentionDescriptor_t desc);
INFINI_EXTERN_C __export infiniStatus_t infiniopDestroyAttentionDescriptor(infiniopAttentionDescriptor_t desc);
#endif
8 changes: 4 additions & 4 deletions include/infiniop/ops/causal_softmax.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,22 @@

typedef struct InfiniopDescriptor *infiniopCausalSoftmaxDescriptor_t;

__C __export infiniStatus_t infiniopCreateCausalSoftmaxDescriptor(
INFINI_EXTERN_C __export infiniStatus_t infiniopCreateCausalSoftmaxDescriptor(
infiniopHandle_t handle,
infiniopCausalSoftmaxDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y_desc,
infiniopTensorDescriptor_t x_desc);

__C __export infiniStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmaxDescriptor_t desc, size_t *size);
INFINI_EXTERN_C __export infiniStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmaxDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopCausalSoftmax(
INFINI_EXTERN_C __export infiniStatus_t infiniopCausalSoftmax(
infiniopCausalSoftmaxDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
const void *x,
void *stream);

__C __export infiniStatus_t infiniopDestroyCausalSoftmaxDescriptor(infiniopCausalSoftmaxDescriptor_t desc);
INFINI_EXTERN_C __export infiniStatus_t infiniopDestroyCausalSoftmaxDescriptor(infiniopCausalSoftmaxDescriptor_t desc);

#endif
8 changes: 4 additions & 4 deletions include/infiniop/ops/clip.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@

typedef struct InfiniopDescriptor *infiniopClipDescriptor_t;

__C __export infiniStatus_t infiniopCreateClipDescriptor(infiniopHandle_t handle,
INFINI_EXTERN_C __export infiniStatus_t infiniopCreateClipDescriptor(infiniopHandle_t handle,
infiniopClipDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x,
infiniopTensorDescriptor_t min_val,
infiniopTensorDescriptor_t max_val);

__C __export infiniStatus_t infiniopGetClipWorkspaceSize(infiniopClipDescriptor_t desc, size_t *size);
INFINI_EXTERN_C __export infiniStatus_t infiniopGetClipWorkspaceSize(infiniopClipDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopClip(infiniopClipDescriptor_t desc,
INFINI_EXTERN_C __export infiniStatus_t infiniopClip(infiniopClipDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
Expand All @@ -23,6 +23,6 @@ __C __export infiniStatus_t infiniopClip(infiniopClipDescriptor_t desc,
const void *max_val,
void *stream);

__C __export infiniStatus_t infiniopDestroyClipDescriptor(infiniopClipDescriptor_t desc);
INFINI_EXTERN_C __export infiniStatus_t infiniopDestroyClipDescriptor(infiniopClipDescriptor_t desc);

#endif
8 changes: 4 additions & 4 deletions include/infiniop/ops/conv.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

typedef struct InfiniopDescriptor *infiniopConvDescriptor_t;

__C __export infiniStatus_t infiniopCreateConvDescriptor(infiniopHandle_t handle,
INFINI_EXTERN_C __export infiniStatus_t infiniopCreateConvDescriptor(infiniopHandle_t handle,
infiniopConvDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y_desc,
infiniopTensorDescriptor_t x_desc,
Expand All @@ -16,10 +16,10 @@ __C __export infiniStatus_t infiniopCreateConvDescriptor(infiniopHandle_t handle
void *dilations,
size_t n);

__C __export infiniStatus_t infiniopGetConvWorkspaceSize(infiniopConvDescriptor_t desc, size_t *size);
INFINI_EXTERN_C __export infiniStatus_t infiniopGetConvWorkspaceSize(infiniopConvDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopConv(infiniopConvDescriptor_t desc, void *workspace, size_t workspace_size, void *y, const void *x, const void *w, const void *bias, void *stream);
INFINI_EXTERN_C __export infiniStatus_t infiniopConv(infiniopConvDescriptor_t desc, void *workspace, size_t workspace_size, void *y, const void *x, const void *w, const void *bias, void *stream);

__C __export infiniStatus_t infiniopDestroyConvDescriptor(infiniopConvDescriptor_t desc);
INFINI_EXTERN_C __export infiniStatus_t infiniopDestroyConvDescriptor(infiniopConvDescriptor_t desc);

#endif
31 changes: 0 additions & 31 deletions include/infiniop/ops/dequantize.h

This file was deleted.

28 changes: 28 additions & 0 deletions include/infiniop/ops/dequantize_awq.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#ifndef __INFINIOP_DEQUANTIZE_AWQ_API_H__
#define __INFINIOP_DEQUANTIZE_AWQ_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopDequantizeAWQDescriptor_t;

INFINI_EXTERN_C __export infiniStatus_t infiniopCreateDequantizeAWQDescriptor(infiniopHandle_t handle,
infiniopDequantizeAWQDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t out_desc,
infiniopTensorDescriptor_t qweight_desc,
infiniopTensorDescriptor_t scales_desc,
infiniopTensorDescriptor_t zeros_desc);

INFINI_EXTERN_C __export infiniStatus_t infiniopGetDequantizeAWQWorkspaceSize(infiniopDequantizeAWQDescriptor_t desc, size_t *size);

INFINI_EXTERN_C __export infiniStatus_t infiniopDequantizeAWQ(infiniopDequantizeAWQDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *out,
const void *qweight,
const void *scales,
const void *zeros,
void *stream);

INFINI_EXTERN_C __export infiniStatus_t infiniopDestroyDequantizeAWQDescriptor(infiniopDequantizeAWQDescriptor_t desc);

#endif
Loading