diff --git a/.github/workflows/autopackage_linux_x86_64_cpu.yml b/.github/workflows/autopackage_linux_x86_64_cpu.yml index 76b725d..7031fd3 100644 --- a/.github/workflows/autopackage_linux_x86_64_cpu.yml +++ b/.github/workflows/autopackage_linux_x86_64_cpu.yml @@ -1,16 +1,9 @@ name: Publish Axono Package (Linux x86_64 For CPU) -# on: -# push: -# tags: -# - 'v*' - on: push: - branches: [ dev ] - pull_request: - branches: [ dev ] - workflow_dispatch: # 添加手动触发器 + tags: + - 'v*' jobs: build: diff --git a/.github/workflows/autopackage_linux_x86_64_cuda.yml b/.github/workflows/autopackage_linux_x86_64_cuda.yml index b38ba73..1689e9c 100644 --- a/.github/workflows/autopackage_linux_x86_64_cuda.yml +++ b/.github/workflows/autopackage_linux_x86_64_cuda.yml @@ -1,16 +1,9 @@ name: Publish Axono Package (Linux x86_64 For CUDA) -# on: -# push: -# tags: -# - 'v*' - on: - push: - branches: [ dev ] - pull_request: - branches: [ dev ] - workflow_dispatch: # 添加手动触发器 + push: + tags: + - 'v*' jobs: build: diff --git a/include/axono/core/module.h b/include/axono/core/module.h new file mode 100644 index 0000000..35706ad --- /dev/null +++ b/include/axono/core/module.h @@ -0,0 +1,16 @@ +#include "tensor.h" + +namespace axono::core { +class Module { +private: + std::unordered_map weights_; // 存储权重张量 +public: + void add_weight(const std::string& name, const Tensor& weight) { + weights_[name] = weight; + } + Tensor& get_weight(const std::string& name) { + return weights_.at(name); + } + auto& weights() { return weights_; } +}; +} diff --git a/include/axono/pybind/core/module.h b/include/axono/pybind/core/module.h new file mode 100644 index 0000000..a4b2d94 --- /dev/null +++ b/include/axono/pybind/core/module.h @@ -0,0 +1,23 @@ +#include +#include +#include "axono/core/module.h" + +namespace py = pybind11; + +void init_module(py::module &m) { + py::class_(m, "Module") + .def(py::init<>(), "创建一个空的 Module 实例") + .def("add_weight", + &axono::core::Module::add_weight, + py::arg("name"), py::arg("weight"), + "向模块添加权重张量") + .def("get_weight", + &axono::core::Module::get_weight, + py::arg("name"), + py::return_value_policy::reference_internal, + "获取指定名称的权重张量") + .def("weights", + &axono::core::Module::weights, + py::return_value_policy::reference_internal, + "返回模块中所有权重的映射"); +} diff --git a/pyproject.toml b/pyproject.toml index bfbc9ee..f6e8737 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ build-backend = "setuptools.build_meta" [project] name = "axono" -version = "0.1.0" +version = "0.2.0" description = "Deep learning framework with tensor operations and neural network modules" readme = "README.md" requires-python = ">=3.8" @@ -39,11 +39,7 @@ package-dir = { "" = "python" } packages = [ "axono", "axono.core", - "axono.data", - "axono.models", "axono.nn", - "axono.train", - "axono.viz", "axono.core.operators", "axono.core.ops" ] diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index ce8fe75..c43ff7f 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -16,26 +16,26 @@ endif() list(FILTER CORE_SOURCES EXCLUDE REGEX "\\.ipynb_checkpoints/") -pybind11_add_module(axonolib +pybind11_add_module(libaxono src/pybind11_module.cpp ${CORE_SOURCES} ) if(WITH_CUDA) -target_compile_definitions(axonolib PRIVATE COMPILED_WITH_CUDA) +target_compile_definitions(libaxono PRIVATE COMPILED_WITH_CUDA) endif() -target_include_directories(axonolib PRIVATE +target_include_directories(libaxono PRIVATE ${CMAKE_SOURCE_DIR}/include ) -target_include_directories(axonolib PRIVATE +target_include_directories(libaxono PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include ) # Set output properties -set_target_properties(axonolib PROPERTIES - OUTPUT_NAME "axonolib" +set_target_properties(libaxono PROPERTIES + OUTPUT_NAME "libaxono" LIBRARY_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/python/axono/library" RUNTIME_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/python/axono/library" PREFIX "" diff --git a/python/axono/__init__.py b/python/axono/__init__.py index d8cbde8..9ab0438 100644 --- a/python/axono/__init__.py +++ b/python/axono/__init__.py @@ -15,7 +15,7 @@ from .core import DataType, Status, Tensor, operators -__version__ = "0.1.0" +__version__ = "0.2.0" __author__ = "ByteRainLab" __description__ = "High performance computing library for big data processing" diff --git a/python/axono/core/__init__.py b/python/axono/core/__init__.py index 16edd9a..2667185 100644 --- a/python/axono/core/__init__.py +++ b/python/axono/core/__init__.py @@ -15,7 +15,7 @@ library_path = os.path.dirname(os.path.dirname(__file__)) + "/library/" sys.path.append(library_path) -from axonolib import DataType, Status # noqa: E402 +from libaxono import DataType, Status # noqa: E402 from . import operators # noqa: E402 from .tensor import Tensor # noqa: E402 diff --git a/python/axono/core/operators/add.py b/python/axono/core/operators/add.py index 88039f1..6145158 100644 --- a/python/axono/core/operators/add.py +++ b/python/axono/core/operators/add.py @@ -13,7 +13,7 @@ Axono Add """ -from axonolib import add as _add +from libaxono import add as _add from ..tensor import Tensor diff --git a/python/axono/core/operators/matmul.py b/python/axono/core/operators/matmul.py index 3af5c25..8abaedc 100644 --- a/python/axono/core/operators/matmul.py +++ b/python/axono/core/operators/matmul.py @@ -13,7 +13,7 @@ Axono Matmul """ -from axonolib import matmul as _matmul +from libaxono import matmul as _matmul from ..tensor import Tensor diff --git a/python/axono/core/ops/relu.py b/python/axono/core/ops/relu.py index d11854e..cff9c15 100644 --- a/python/axono/core/ops/relu.py +++ b/python/axono/core/ops/relu.py @@ -13,8 +13,8 @@ core.ops.Relu() """ -from axonolib import relu as relu_op -from axonolib import relu_ as relu_op_ +from libaxono import relu as relu_op +from libaxono import relu_ as relu_op_ from ..tensor import Tensor diff --git a/python/axono/core/tensor.py b/python/axono/core/tensor.py index a7f4dc8..c43ef1c 100644 --- a/python/axono/core/tensor.py +++ b/python/axono/core/tensor.py @@ -17,8 +17,8 @@ import os import numpy as np -from axonolib import DataType, Status -from axonolib import Tensor as _Tensor +from libaxono import DataType, Status +from libaxono import Tensor as _Tensor default_device = os.getenv("axono_default_device", "cpu") diff --git a/python/axono/data/dataloader.py b/python/axono/data/dataloader.py deleted file mode 100644 index 62930a2..0000000 --- a/python/axono/data/dataloader.py +++ /dev/null @@ -1,151 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from typing import Any, Callable, Dict, List, Optional - -import numpy as np -from PIL import Image - -from ..core import Tensor - - -class Dataset: - def __init__(self): - self.transform = None - - def __getitem__(self, index: int) -> Dict[str, Any]: - raise NotImplementedError - - def __len__(self) -> int: - raise NotImplementedError - - def set_transform(self, transform: Callable): - self.transform = transform - - -class DataLoader: - def __init__( - self, - dataset: Dataset, - batch_size: int = 1, - shuffle: bool = False, - num_workers: int = 0, - ): - self.dataset = dataset - self.batch_size = batch_size - self.shuffle = shuffle - self.num_workers = num_workers - - self._indices = list(range(len(dataset))) - - def __iter__(self): - if self.shuffle: - np.random.shuffle(self._indices) - - for i in range(0, len(self._indices), self.batch_size): - batch_indices = self._indices[i : i + self.batch_size] - batch = self._collate_fn([self.dataset[idx] for idx in batch_indices]) - yield batch - - def __len__(self): - return (len(self.dataset) + self.batch_size - 1) // self.batch_size - - def _collate_fn(self, batch: List[Dict[str, Any]]) -> Dict[str, Tensor]: - """Convert a list of samples to a batch""" - elem = batch[0] - if isinstance(elem, dict): - return { - key: self._collate_fn([d[key] for d in batch]) - if isinstance(elem[key], (dict, list)) - else Tensor.stack([d[key] for d in batch]) - if isinstance(elem[key], Tensor) - else Tensor.from_numpy(np.stack([d[key] for d in batch])) - for key in elem - } - elif isinstance(elem, list): - return [self._collate_fn([d[i] for d in batch]) for i in range(len(elem))] - else: - raise TypeError(f"Unsupported batch element type: {type(elem)}") - - -class ImageFolder(Dataset): - def __init__(self, root: str, transform: Optional[Callable] = None): - super().__init__() - self.root = root - self.transform = transform - - # Scan directory for images and classes - self._scan_dir() - - def _scan_dir(self): - """Scan directory and build dataset index""" - import os - - self.classes = sorted( - [ - d - for d in os.listdir(self.root) - if os.path.isdir(os.path.join(self.root, d)) - ] - ) - - self.class_to_idx = {cls_name: i for i, cls_name in enumerate(self.classes)} - - self.samples = [] - for target_class in self.classes: - class_path = os.path.join(self.root, target_class) - if not os.path.isdir(class_path): - continue - - for root, _, fnames in sorted(os.walk(class_path)): - for fname in sorted(fnames): - if self._is_image_file(fname): - path = os.path.join(root, fname) - item = (path, self.class_to_idx[target_class]) - self.samples.append(item) - - def _is_image_file(self, filename: str) -> bool: - """Check if a file is an image""" - img_extensions = ( - ".jpg", - ".jpeg", - ".png", - ".ppm", - ".bmp", - ".pgm", - ".tif", - ".tiff", - ) - return filename.lower().endswith(img_extensions) - - def __getitem__(self, index: int) -> Dict[str, Any]: - """ - Args: - index (int): Index - - Returns: - Dict containing: - 'inputs': Tensor image - 'targets': Class label - """ - path, target = self.samples[index] - - # Load image - with open(path, "rb") as f: - img = Image.open(f).convert("RGB") - - if self.transform is not None: - img = self.transform(img) - - return {"inputs": img, "targets": target} - - def __len__(self) -> int: - return len(self.samples) diff --git a/python/axono/data/transforms.py b/python/axono/data/transforms.py deleted file mode 100644 index 6764ec5..0000000 --- a/python/axono/data/transforms.py +++ /dev/null @@ -1,121 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import random -from typing import List, Tuple, Union - -import numpy as np -from PIL import Image - - -class Transform: - def __call__(self, img): - raise NotImplementedError - - -class Compose(Transform): - def __init__(self, transforms: List[Transform]): - self.transforms = transforms - - def __call__(self, img): - for t in self.transforms: - img = t(img) - return img - - -class Resize(Transform): - def __init__(self, size: Union[int, Tuple[int, int]]): - self.size = size if isinstance(size, tuple) else (size, size) - - def __call__(self, img: Image.Image) -> Image.Image: - return img.resize(self.size, Image.BILINEAR) - - -class RandomCrop(Transform): - def __init__(self, size: Union[int, Tuple[int, int]], padding: int = 0): - self.size = size if isinstance(size, tuple) else (size, size) - self.padding = padding - - def __call__(self, img: Image.Image) -> Image.Image: - if self.padding > 0: - img = pad(img, self.padding) - - w, h = img.size - th, tw = self.size - - if w == tw and h == th: - return img - - i = random.randint(0, h - th) - j = random.randint(0, w - tw) - return img.crop((j, i, j + tw, i + th)) - - -class RandomHorizontalFlip(Transform): - def __init__(self, p: float = 0.5): - self.p = p - - def __call__(self, img: Image.Image) -> Image.Image: - if random.random() < self.p: - return img.transpose(Image.FLIP_LEFT_RIGHT) - return img - - -class RandomRotation(Transform): - def __init__(self, degrees: Union[float, Tuple[float, float]]): - if isinstance(degrees, float): - degrees = (-degrees, degrees) - self.degrees = degrees - - def __call__(self, img: Image.Image) -> Image.Image: - angle = random.uniform(self.degrees[0], self.degrees[1]) - return img.rotate(angle, Image.BILINEAR, expand=False) - - -class Normalize(Transform): - def __init__(self, mean: List[float], std: List[float]): - self.mean = np.array(mean) - self.std = np.array(std) - - def __call__(self, img: np.ndarray) -> np.ndarray: - img = np.array(img).astype(np.float32) / 255.0 - img = (img - self.mean) / self.std - return img - - -class ToTensor(Transform): - def __call__(self, img: Union[Image.Image, np.ndarray]): - if isinstance(img, Image.Image): - img = np.array(img) - - # Handle PIL Image - if len(img.shape) == 2: - img = img[:, :, None] - - # Convert HWC to CHW format - img = img.transpose((2, 0, 1)) - return img - - -def pad(img: Image.Image, padding: int) -> Image.Image: - """Helper function to pad an image""" - if isinstance(padding, int): - padding = (padding, padding, padding, padding) - elif isinstance(padding, tuple) and len(padding) == 2: - padding = (padding[0], padding[1], padding[0], padding[1]) - - w, h = img.size - new_w = w + padding[0] + padding[2] - new_h = h + padding[1] + padding[3] - - result = Image.new(img.mode, (new_w, new_h), 0) - result.paste(img, (padding[0], padding[1])) - return result diff --git a/python/axono/models/container.py b/python/axono/models/container.py deleted file mode 100644 index 7136d4c..0000000 --- a/python/axono/models/container.py +++ /dev/null @@ -1,95 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from collections import OrderedDict -from typing import Dict, List, Optional - -from ..core import Tensor -from ..nn import Module - - -class Sequential(Module): - def __init__(self, layers: List[Module]): - super().__init__() - self.layers = layers - - # Register layers as submodules - for i, layer in enumerate(layers): - self.add_module(f"layer_{i}", layer) - - def forward(self, x: Tensor) -> Tensor: - for layer in self.layers: - x = layer(x) - return x - - def add_module(self, name: str, module: Optional[Module]): - if module is not None: - self._modules[name] = module - - -class ModuleList(Module): - def __init__(self, modules: List[Module] = None): - super().__init__() - self._modules = OrderedDict() - if modules is not None: - for i, module in enumerate(modules): - self.add_module(str(i), module) - - def append(self, module: Module): - self.add_module(str(len(self)), module) - return self - - def extend(self, modules: List[Module]): - for module in modules: - self.append(module) - return self - - def __len__(self): - return len(self._modules) - - def __iter__(self): - return iter(self._modules.values()) - - def __getitem__(self, idx): - return list(self._modules.values())[idx] - - -class ModuleDict(Module): - def __init__(self, modules: Dict[str, Module] = None): - super().__init__() - self._modules = OrderedDict() - if modules is not None: - for key, module in modules.items(): - self.add_module(key, module) - - def __getitem__(self, key: str) -> Module: - return self._modules[key] - - def __setitem__(self, key: str, module: Module): - self.add_module(key, module) - - def __delitem__(self, key: str): - del self._modules[key] - - def __len__(self): - return len(self._modules) - - def __iter__(self): - return iter(self._modules.values()) - - def keys(self): - return self._modules.keys() - - def items(self): - return self._modules.items() - - def values(self): - return self._modules.values() diff --git a/python/axono/models/sequential.py b/python/axono/models/sequential.py deleted file mode 100644 index 7d24860..0000000 --- a/python/axono/models/sequential.py +++ /dev/null @@ -1,219 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from typing import List, Tuple - -from ..core import Tensor -from ..nn import BatchNorm2d, Conv2d, Dropout, Linear, MaxPool2d, Module, ReLU -from .container import Sequential - - -class CNN(Module): - """A simple Convolutional Neural Network model.""" - - def __init__( - self, - input_channels: int, - num_classes: int, - hidden_channels: List[int] = [32, 64], - device: str = "cpu", - ): - super().__init__() - - layers = [] - in_channels = input_channels - - # Add convolutional layers - for out_channels in hidden_channels: - layers.extend( - [ - Conv2d( - in_channels, - out_channels, - kernel_size=3, - padding=1, - device=device, - ), - BatchNorm2d(out_channels, device=device), - ReLU(), - MaxPool2d(kernel_size=2), - ] - ) - in_channels = out_channels - - self.features = Sequential(layers) - - # Calculate the size of flattened features - self.avgpool = None # Will be initialized in forward - - # Add classifier - self.classifier = Sequential( - [ - Dropout(0.5), - Linear(hidden_channels[-1] * 7 * 7, 512, device=device), - ReLU(), - Dropout(0.5), - Linear(512, num_classes, device=device), - ] - ) - - def forward(self, x: Tensor) -> Tensor: - x = self.features(x) - if self.avgpool is None: - self.avgpool = x.shape[2] // 7 - x = x.view(x.shape[0], -1) - x = self.classifier(x) - return x - - -class RNN(Module): - """A simple Recurrent Neural Network model.""" - - def __init__( - self, - input_size: int, - hidden_size: int, - num_layers: int = 1, - dropout: float = 0.0, - device: str = "cpu", - ): - super().__init__() - - self.hidden_size = hidden_size - self.num_layers = num_layers - - # Input gate - self.wx = Linear(input_size, hidden_size, device=device) - self.wh = Linear(hidden_size, hidden_size, device=device) - - # Additional layers - self.additional_layers = [] - for _ in range(num_layers - 1): - layer = Linear(hidden_size, hidden_size, device=device) - self.additional_layers.append(layer) - - self.dropout = Dropout(dropout) - self.activation = ReLU() - - def forward(self, x: Tensor, hidden: Tensor = None) -> Tuple[Tensor, Tensor]: - if hidden is None: - hidden = Tensor.zeros( - (self.num_layers, x.shape[0], self.hidden_size), device=x.device - ) - - outputs = [] - for t in range(x.shape[1]): - xt = x[:, t, :] - h = self.wx(xt) + self.wh(hidden[0]) - h = self.activation(h) - h = self.dropout(h) - - # Process additional layers - hidden_states = [h] - for i, layer in enumerate(self.additional_layers): - h = layer(h) + hidden[i + 1] - h = self.activation(h) - h = self.dropout(h) - hidden_states.append(h) - - hidden = Tensor.stack(hidden_states) - outputs.append(h) - - return Tensor.stack(outputs, dim=1), hidden - - -class LSTM(Module): - """Long Short-Term Memory network.""" - - def __init__( - self, - input_size: int, - hidden_size: int, - num_layers: int = 1, - dropout: float = 0.0, - device: str = "cpu", - ): - super().__init__() - - self.hidden_size = hidden_size - self.num_layers = num_layers - - # Gates for each layer - self.layers = [] - layer_input_size = input_size - for _ in range(num_layers): - layer = { - "forget": Linear( - layer_input_size + hidden_size, hidden_size, device=device - ), - "input": Linear( - layer_input_size + hidden_size, hidden_size, device=device - ), - "cell": Linear( - layer_input_size + hidden_size, hidden_size, device=device - ), - "output": Linear( - layer_input_size + hidden_size, hidden_size, device=device - ), - } - self.layers.append(layer) - layer_input_size = hidden_size - - self.dropout = Dropout(dropout) - - def forward( - self, x: Tensor, hidden: Tuple[Tensor, Tensor] = None - ) -> Tuple[Tensor, Tuple[Tensor, Tensor]]: - batch_size = x.shape[0] - seq_length = x.shape[1] - - if hidden is None: - h = Tensor.zeros( - (self.num_layers, batch_size, self.hidden_size), device=x.device - ) - c = Tensor.zeros( - (self.num_layers, batch_size, self.hidden_size), device=x.device - ) - hidden = (h, c) - - h, c = hidden - output_sequence = [] - - for t in range(seq_length): - xt = x[:, t, :] - - for layer in range(self.num_layers): - if layer > 0: - xt = self.dropout(xt) - - layer_h = h[layer] - layer_c = c[layer] - - # Concatenate input and hidden state - combined = Tensor.cat([xt, layer_h], dim=1) - - # Gate computations - forget_gate = self.layers[layer]["forget"](combined).sigmoid() - input_gate = self.layers[layer]["input"](combined).sigmoid() - cell_gate = self.layers[layer]["cell"](combined).tanh() - output_gate = self.layers[layer]["output"](combined).sigmoid() - - # Update cell and hidden state - layer_c = forget_gate * layer_c + input_gate * cell_gate - layer_h = output_gate * layer_c.tanh() - - c[layer] = layer_c - h[layer] = layer_h - xt = layer_h - - output_sequence.append(h[-1]) - - return Tensor.stack(output_sequence, dim=1), (h, c) diff --git a/python/axono/models/transformer.py b/python/axono/models/transformer.py deleted file mode 100644 index 2752f5c..0000000 --- a/python/axono/models/transformer.py +++ /dev/null @@ -1,161 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from typing import Optional - -from ..core import Tensor -from ..nn import Dropout, LayerNorm, Linear, Module -from .container import Sequential - - -class MultiHeadAttention(Module): - def __init__( - self, - embed_dim: int, - num_heads: int, - dropout: float = 0.0, - bias: bool = True, - device: str = "cpu", - ): - super().__init__() - - if embed_dim % num_heads != 0: - raise ValueError( - f"embed_dim {embed_dim} not divisible by num_heads {num_heads}" - ) - - self.embed_dim = embed_dim - self.num_heads = num_heads - self.dropout = dropout - self.head_dim = embed_dim // num_heads - self.scaling = self.head_dim**-0.5 - - self.q_proj = Linear(embed_dim, embed_dim, bias=bias, device=device) - self.k_proj = Linear(embed_dim, embed_dim, bias=bias, device=device) - self.v_proj = Linear(embed_dim, embed_dim, bias=bias, device=device) - self.out_proj = Linear(embed_dim, embed_dim, bias=bias, device=device) - - self.dropout_layer = Dropout(dropout) - - def forward( - self, query: Tensor, key: Tensor, value: Tensor, mask: Optional[Tensor] = None - ) -> Tensor: - batch_size = query.shape[0] - - # Linear projections and reshape - q = self.q_proj(query).view(batch_size, -1, self.num_heads, self.head_dim) - k = self.k_proj(key).view(batch_size, -1, self.num_heads, self.head_dim) - v = self.v_proj(value).view(batch_size, -1, self.num_heads, self.head_dim) - - # Transpose for attention computation - q = q.transpose(1, 2) - k = k.transpose(1, 2) - v = v.transpose(1, 2) - - # Attention scores - attn_weights = (q @ k.transpose(-2, -1)) * self.scaling - - if mask is not None: - attn_weights = attn_weights.masked_fill(mask == 0, float("-inf")) - - attn_weights = attn_weights.softmax(dim=-1) - attn_weights = self.dropout_layer(attn_weights) - - # Attention output - attn_output = attn_weights @ v - - # Reshape and project output - attn_output = attn_output.transpose(1, 2).contiguous() - attn_output = attn_output.view(batch_size, -1, self.embed_dim) - attn_output = self.out_proj(attn_output) - - return attn_output - - -class TransformerEncoderLayer(Module): - def __init__( - self, - d_model: int, - nhead: int, - dim_feedforward: int = 2048, - dropout: float = 0.1, - device: str = "cpu", - ): - super().__init__() - - self.self_attn = MultiHeadAttention( - d_model, nhead, dropout=dropout, device=device - ) - - self.linear1 = Linear(d_model, dim_feedforward, device=device) - self.dropout = Dropout(dropout) - self.linear2 = Linear(dim_feedforward, d_model, device=device) - - self.norm1 = LayerNorm(d_model, device=device) - self.norm2 = LayerNorm(d_model, device=device) - self.dropout1 = Dropout(dropout) - self.dropout2 = Dropout(dropout) - - self.activation = self.gelu - - def forward(self, src: Tensor, mask: Optional[Tensor] = None) -> Tensor: - src2 = self.self_attn(src, src, src, mask=mask) - src = src + self.dropout1(src2) - src = self.norm1(src) - - src2 = self.linear2(self.dropout(self.activation(self.linear1(src)))) - src = src + self.dropout2(src2) - src = self.norm2(src) - - return src - - @staticmethod - def gelu(x: Tensor) -> Tensor: - return 0.5 * x * (1 + (x * 0.7978845608 * (1 + 0.044715 * x * x)).tanh()) - - -class Transformer(Module): - def __init__( - self, - d_model: int = 512, - nhead: int = 8, - num_encoder_layers: int = 6, - dim_feedforward: int = 2048, - dropout: float = 0.1, - device: str = "cpu", - ): - super().__init__() - - encoder_layers = [] - for _ in range(num_encoder_layers): - encoder_layers.append( - TransformerEncoderLayer( - d_model=d_model, - nhead=nhead, - dim_feedforward=dim_feedforward, - dropout=dropout, - device=device, - ) - ) - - self.encoder = Sequential(encoder_layers) - self.d_model = d_model - - self.reset_parameters() - - def reset_parameters(self): - for p in self.parameters(): - if p.dim() > 1: - # Initialize weights with scaled normal distribution - p.data.normal_(mean=0.0, std=0.02) - - def forward(self, src: Tensor, mask: Optional[Tensor] = None) -> Tensor: - return self.encoder(src) diff --git a/python/axono/nn/__init__.py b/python/axono/nn/__init__.py new file mode 100644 index 0000000..c9b7d89 --- /dev/null +++ b/python/axono/nn/__init__.py @@ -0,0 +1,4 @@ +from .module import Module +from .layers import Linear + +__all__ = ["Module", "Linear"] diff --git a/python/axono/nn/layers.py b/python/axono/nn/layers.py index 26b268a..bfb2fe8 100644 --- a/python/axono/nn/layers.py +++ b/python/axono/nn/layers.py @@ -1,74 +1,10 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from typing import Optional, Tuple, Union - +import os +from typing import Optional import numpy as np - from ..core import Tensor from .module import Module - -class Conv2d(Module): - def __init__( - self, - in_channels: int, - out_channels: int, - kernel_size: Union[int, Tuple[int, int]], - stride: Union[int, Tuple[int, int]] = 1, - padding: Union[int, Tuple[int, int]] = 0, - bias: bool = True, - device: str = "cpu", - ): - super().__init__() - - if isinstance(kernel_size, int): - kernel_size = (kernel_size, kernel_size) - if isinstance(stride, int): - stride = (stride, stride) - if isinstance(padding, int): - padding = (padding, padding) - - self.in_channels = in_channels - self.out_channels = out_channels - self.kernel_size = kernel_size - self.stride = stride - self.padding = padding - - # 初始化权重 - scale = np.sqrt(2.0 / (in_channels * kernel_size[0] * kernel_size[1])) - weight_data = np.random.normal( - 0, scale, (out_channels, in_channels, kernel_size[0], kernel_size[1]) - ) - - self._parameters["weight"] = Tensor.from_numpy(weight_data).to(device) - - if bias: - bias_data = np.zeros(out_channels) - self._parameters["bias"] = Tensor.from_numpy(bias_data).to(device) - else: - self._parameters["bias"] = None - - def forward(self, x: Tensor) -> Tensor: - # 使用CUDA kernel或优化的CPU实现 - from ..core.ops import conv2d - - return conv2d( - x, - self._parameters["weight"], - self._parameters.get("bias"), - self.stride, - self.padding, - ) - +device = os.getenv("axono_default_device", "cpu") class Linear(Module): def __init__( @@ -76,138 +12,42 @@ def __init__( in_features: int, out_features: int, bias: bool = True, - device: str = "cpu", + device: str = device ): super().__init__() + self._init_args = { + "in_features": in_features, + "out_features": out_features, + "bias": bias, + "device": device + } + + self.in_features = in_features + self.out_features = out_features + self.device = device - # 初始化权重 scale = np.sqrt(2.0 / in_features) - weight_data = np.random.normal(0, scale, (out_features, in_features)) - - self._parameters["weight"] = Tensor.from_numpy(weight_data).to(device) - + weight_data = np.random.normal( + loc=0.0, + scale=scale, + size=(out_features, in_features) + ).astype(np.float32) + weight_tensor = Tensor.from_numpy(weight_data).to(device) + print(1) + self.add_weight("weight", weight_tensor) if bias: - bias_data = np.zeros(out_features) - self._parameters["bias"] = Tensor.from_numpy(bias_data).to(device) + bias_data = np.zeros(out_features, dtype=np.float32) + bias_tensor = Tensor.from_numpy(bias_data).to(device) + self.add_weight("bias", bias_tensor) else: self._parameters["bias"] = None def forward(self, x: Tensor) -> Tensor: + """前向传播:y = x @ weight.T + bias(若启用)""" output = x @ self._parameters["weight"].T + + # 加上偏置(广播机制) if self._parameters["bias"] is not None: output = output + self._parameters["bias"] + return output - - -class BatchNorm2d(Module): - def __init__( - self, - num_features: int, - eps: float = 1e-5, - momentum: float = 0.1, - device: str = "cpu", - ): - super().__init__() - - self.num_features = num_features - self.eps = eps - self.momentum = momentum - - # 可学习参数 - self._parameters["weight"] = Tensor.from_numpy(np.ones(num_features)).to(device) - self._parameters["bias"] = Tensor.from_numpy(np.zeros(num_features)).to(device) - - # 运行时统计量 - self.register_buffer( - "running_mean", Tensor.from_numpy(np.zeros(num_features)).to(device) - ) - self.register_buffer( - "running_var", Tensor.from_numpy(np.ones(num_features)).to(device) - ) - - self.reset_parameters() - - def reset_parameters(self): - self.running_mean.fill_zero() - self.running_var.fill(1) - self._parameters["weight"].fill(1) - self._parameters["bias"].fill_zero() - - def forward(self, x: Tensor) -> Tensor: - if self.is_training: - # 计算批次统计量 - mean = x.mean(dim=(0, 2, 3)) - var = x.var(dim=(0, 2, 3), unbiased=False) - - # 更新运行时统计量 - self.running_mean = ( - 1 - self.momentum - ) * self.running_mean + self.momentum * mean - self.running_var = ( - 1 - self.momentum - ) * self.running_var + self.momentum * var - else: - mean = self.running_mean - var = self.running_var - - # 标准化 - x_normalized = (x - mean[None, :, None, None]) / ( - np.sqrt(var[None, :, None, None] + self.eps) - ) - - # 缩放和平移 - return ( - self._parameters["weight"][None, :, None, None] * x_normalized - + self._parameters["bias"][None, :, None, None] - ) - - -class ReLU(Module): - def forward(self, x: Tensor) -> Tensor: - from ..core.ops import relu - - return relu(x) - - -class MaxPool2d(Module): - def __init__( - self, - kernel_size: Union[int, Tuple[int, int]], - stride: Optional[Union[int, Tuple[int, int]]] = None, - padding: Union[int, Tuple[int, int]] = 0, - ): - super().__init__() - - if isinstance(kernel_size, int): - kernel_size = (kernel_size, kernel_size) - if stride is None: - stride = kernel_size - if isinstance(stride, int): - stride = (stride, stride) - if isinstance(padding, int): - padding = (padding, padding) - - self.kernel_size = kernel_size - self.stride = stride - self.padding = padding - - def forward(self, x: Tensor) -> Tensor: - from ..core.ops import max_pool2d - - return max_pool2d(x, self.kernel_size, self.stride, self.padding) - - -class Dropout(Module): - def __init__(self, p: float = 0.5): - super().__init__() - if p < 0 or p > 1: - raise ValueError("Dropout probability has to be between 0 and 1") - self.p = p - - def forward(self, x: Tensor) -> Tensor: - if self.is_training: - mask = Tensor.from_numpy( - (np.random.rand(*x.shape) > self.p).astype(np.float32) - ).to(x.device) - return x * mask / (1 - self.p) - return x diff --git a/python/axono/nn/module.py b/python/axono/nn/module.py index 8f17fe6..cb960d0 100644 --- a/python/axono/nn/module.py +++ b/python/axono/nn/module.py @@ -1,47 +1,49 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from abc import ABC, abstractmethod -from typing import List - +# python/axono/nn/module.py +from typing import Dict, List +from libaxono import Module as _Module +from libaxono import Tensor as _Tensor from ..core import Tensor - -class Module(ABC): +class Module: def __init__(self): - self._parameters = {} + self._parameters: Dict[str, Tensor] = {} + self._cpp_module = _Module() self._is_training = True + self._name = self.__class__.__name__ - @abstractmethod - def forward(self, x: Tensor) -> Tensor: - pass + def add_weight(self, name: str, tensor: Tensor) -> None: + self._parameters[name] = tensor + self._cpp_module.add_weight(name, tensor._tensor) - def __call__(self, x: Tensor) -> Tensor: - return self.forward(x) + def parameters(self) -> Dict[str, Tensor]: + for k, v in self._parameters.items(): + if type(v) == _Tensor: + self._parameters[k] = Tensor.from_raw(v) + return dict(self._parameters) - def train(self, mode: bool = True): + def train(self, mode: bool = True) -> "Module": self._is_training = mode return self - - def eval(self): - return self.train(False) - - @property - def is_training(self) -> bool: - return self._is_training - - def parameters(self) -> List[Tensor]: - return list(self._parameters.values()) - - def to(self, device: str) -> "Module": - for name, param in self._parameters.items(): - self._parameters[name] = param.to(device) - return self + def __repr__(self) -> str: + cls_name = self.__class__.__name__ + init_args = [] + if hasattr(self, '_init_args'): + init_args = [f"{k}={v}" for k, v in self._init_args.items()] + + if not hasattr(self, '_modules') or not self._modules: + if init_args: + return f"{cls_name}({', '.join(init_args)})" + else: + return f"{cls_name}()" + + lines = [f"{cls_name}("] + indent = " " + if init_args: + lines.append(f"{indent}{', '.join(init_args)},") + + for name, module in self._modules.items(): + submodule_repr = repr(module).replace("\n", f"\n{indent}") + lines.append(f"{indent}({name}): {submodule_repr}") + + lines.append(")") + return "\n".join(lines) diff --git a/python/axono/train/optimizer.py b/python/axono/train/optimizer.py deleted file mode 100644 index 5a87525..0000000 --- a/python/axono/train/optimizer.py +++ /dev/null @@ -1,122 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from typing import Any, Dict, List, Tuple - -from ..core import Tensor - - -class Optimizer: - def __init__(self, params: List[Tensor], lr: float = 0.01): - self.params = params - self.lr = lr - self._state: Dict[str, Any] = {} - - def step(self): - """Update parameters using gradients""" - raise NotImplementedError - - def zero_grad(self): - """Zero out parameter gradients""" - for param in self.params: - if param.grad is not None: - param.grad.fill_zero() - - -class SGD(Optimizer): - def __init__( - self, - params: List[Tensor], - lr: float = 0.01, - momentum: float = 0.0, - weight_decay: float = 0.0, - ): - super().__init__(params, lr) - self.momentum = momentum - self.weight_decay = weight_decay - - if momentum > 0: - self._state["momentum_buffer"] = [Tensor.zeros_like(p) for p in params] - - def step(self): - for i, param in enumerate(self.params): - if param.grad is None: - continue - - grad = param.grad - - if self.weight_decay != 0: - grad = grad + self.weight_decay * param - - if self.momentum > 0: - buf = self._state["momentum_buffer"][i] - buf = buf * self.momentum + grad - self._state["momentum_buffer"][i] = buf - grad = buf - - param -= self.lr * grad - - -class Adam(Optimizer): - def __init__( - self, - params: List[Tensor], - lr: float = 0.001, - betas: Tuple[float, float] = (0.9, 0.999), - eps: float = 1e-8, - weight_decay: float = 0.0, - ): - super().__init__(params, lr) - self.betas = betas - self.eps = eps - self.weight_decay = weight_decay - - self._state["step"] = 0 - self._state["exp_avg"] = [Tensor.zeros_like(p) for p in params] - self._state["exp_avg_sq"] = [Tensor.zeros_like(p) for p in params] - - def step(self): - self._state["step"] += 1 - - for i, param in enumerate(self.params): - if param.grad is None: - continue - - grad = param.grad - if self.weight_decay != 0: - grad = grad + self.weight_decay * param - - beta1, beta2 = self.betas - exp_avg = self._state["exp_avg"][i] - exp_avg_sq = self._state["exp_avg_sq"][i] - - # Update biased first moment estimate - exp_avg = beta1 * exp_avg + (1 - beta1) * grad - - # Update biased second raw moment estimate - exp_avg_sq = beta2 * exp_avg_sq + (1 - beta2) * grad * grad - - # Store updated moments - self._state["exp_avg"][i] = exp_avg - self._state["exp_avg_sq"][i] = exp_avg_sq - - # Bias correction - bias_correction1 = 1 - beta1 ** self._state["step"] - bias_correction2 = 1 - beta2 ** self._state["step"] - - # Compute bias-corrected moments - exp_avg_corrected = exp_avg / bias_correction1 - exp_avg_sq_corrected = exp_avg_sq / bias_correction2 - - # Update parameters - param -= ( - self.lr * exp_avg_corrected / (exp_avg_sq_corrected.sqrt() + self.eps) - ) diff --git a/python/axono/train/trainer.py b/python/axono/train/trainer.py deleted file mode 100644 index 00786f6..0000000 --- a/python/axono/train/trainer.py +++ /dev/null @@ -1,193 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import time -from typing import Any, Callable, Dict, Optional - -from ..core import Tensor -from ..nn import Module - - -class LossFunction: - @staticmethod - def mse_loss(pred: Tensor, target: Tensor) -> Tensor: - """Mean Squared Error Loss""" - return ((pred - target) ** 2).mean() - - @staticmethod - def cross_entropy_loss(pred: Tensor, target: Tensor) -> Tensor: - """Cross Entropy Loss""" - log_softmax = pred.log_softmax(dim=1) - return -(target * log_softmax).sum(dim=1).mean() - - @staticmethod - def bce_loss(pred: Tensor, target: Tensor) -> Tensor: - """Binary Cross Entropy Loss""" - return -(target * pred.log() + (1 - target) * (1 - pred).log()).mean() - - @staticmethod - def l1_loss(pred: Tensor, target: Tensor) -> Tensor: - """L1 Loss""" - return (pred - target).abs().mean() - - -class Trainer: - def __init__( - self, - model: Module, - optimizer: Any, # Will be implemented in optimizer.py - loss_fn: str = "cross_entropy", - device: str = "cpu", - callbacks: Optional[Dict[str, Callable]] = None, - ): - self.model = model.to(device) - self.optimizer = optimizer - self.device = device - self.callbacks = callbacks or {} - - # Set loss function - if isinstance(loss_fn, str): - if loss_fn == "mse": - self.loss_fn = LossFunction.mse_loss - elif loss_fn == "cross_entropy": - self.loss_fn = LossFunction.cross_entropy_loss - elif loss_fn == "bce": - self.loss_fn = LossFunction.bce_loss - elif loss_fn == "l1": - self.loss_fn = LossFunction.l1_loss - else: - raise ValueError(f"Unknown loss function: {loss_fn}") - else: - self.loss_fn = loss_fn - - def train_step(self, batch: Dict[str, Tensor]) -> Dict[str, float]: - """Single training step""" - self.model.train() - - # Move batch to device - inputs = batch["inputs"].to(self.device) - targets = batch["targets"].to(self.device) - - # Forward pass - outputs = self.model(inputs) - loss = self.loss_fn(outputs, targets) - - # Backward pass - self.optimizer.zero_grad() - loss.backward() - self.optimizer.step() - - return {"loss": loss.item()} - - def eval_step(self, batch: Dict[str, Tensor]) -> Dict[str, float]: - """Single evaluation step""" - self.model.eval() - - with Tensor.no_grad(): - # Move batch to device - inputs = batch["inputs"].to(self.device) - targets = batch["targets"].to(self.device) - - # Forward pass - outputs = self.model(inputs) - loss = self.loss_fn(outputs, targets) - - # Calculate accuracy - predictions = outputs.argmax(dim=1) - correct = (predictions == targets.argmax(dim=1)).sum() - accuracy = correct.item() / targets.shape[0] - - return {"loss": loss.item(), "accuracy": accuracy} - - def fit( - self, - train_loader: Any, # Will be implemented in data.py - valid_loader: Optional[Any] = None, - epochs: int = 10, - log_interval: int = 100, - ): - """Train the model""" - for epoch in range(epochs): - start_time = time.time() - train_metrics = [] - - # Training loop - for i, batch in enumerate(train_loader): - metrics = self.train_step(batch) - train_metrics.append(metrics) - - if i % log_interval == 0: - metrics_str = ", ".join(f"{k}: {v:.4f}" for k, v in metrics.items()) - print( - f"Epoch {epoch + 1}/{epochs} " - f"[{i}/{len(train_loader)}] {metrics_str}" - ) - - # Calculate average training metrics - train_avg_metrics = {} - for key in train_metrics[0].keys(): - train_avg_metrics[key] = sum(m[key] for m in train_metrics) / len( - train_metrics - ) - - # Validation loop - if valid_loader is not None: - valid_metrics = [] - for batch in valid_loader: - metrics = self.eval_step(batch) - valid_metrics.append(metrics) - - # Calculate average validation metrics - valid_avg_metrics = {} - for key in valid_metrics[0].keys(): - valid_avg_metrics[key] = sum(m[key] for m in valid_metrics) / len( - valid_metrics - ) - - # Log epoch metrics - epoch_time = time.time() - start_time - metrics_str = ", ".join( - f"train_{k}: {v:.4f}" for k, v in train_avg_metrics.items() - ) - if valid_loader is not None: - metrics_str += ", " + ", ".join( - f"valid_{k}: {v:.4f}" for k, v in valid_avg_metrics.items() - ) - print( - f"Epoch {epoch + 1}/{epochs} completed in {epoch_time:.2f}s. " - f"Metrics: {metrics_str}" - ) - - # Call callbacks - if "on_epoch_end" in self.callbacks: - self.callbacks["on_epoch_end"]( - epoch=epoch, - metrics={ - "train": train_avg_metrics, - "valid": valid_avg_metrics if valid_loader else None, - }, - ) - - def evaluate(self, test_loader: Any) -> Dict[str, float]: - """Evaluate the model""" - test_metrics = [] - for batch in test_loader: - metrics = self.eval_step(batch) - test_metrics.append(metrics) - - # Calculate average test metrics - test_avg_metrics = {} - for key in test_metrics[0].keys(): - test_avg_metrics[key] = sum(m[key] for m in test_metrics) / len( - test_metrics - ) - - return test_avg_metrics diff --git a/python/axono/viz/visualizer.py b/python/axono/viz/visualizer.py deleted file mode 100644 index 6c9d091..0000000 --- a/python/axono/viz/visualizer.py +++ /dev/null @@ -1,245 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from typing import Dict, List, Tuple - -import matplotlib.pyplot as plt -import numpy as np - -from ..core import Tensor -from ..nn import Module - - -class ModelVisualizer: - """Visualize model architecture and computational graph""" - - def __init__(self, model: Module): - self.model = model - self.graph = {} - self._build_graph() - - def _build_graph(self): - """Build computational graph from model""" - - def _add_module(module: Module, name: str = ""): - for child_name, child in module.named_children(): - child_full_name = f"{name}.{child_name}" if name else child_name - self.graph[child_full_name] = { - "type": type(child).__name__, - "params": { - name: tensor.shape for name, tensor in child.named_parameters() - }, - "children": [], - } - _add_module(child, child_full_name) - - _add_module(self.model) - - def plot(self, figsize: Tuple[int, int] = (12, 8)) -> None: - """Plot model architecture""" - import networkx as nx - - graph = nx.DiGraph() - pos = {} - labels = {} - - # Add nodes and edges - y_offset = 0 - for name, info in self.graph.items(): - graph.add_node(name) - pos[name] = (len(name.split(".")), y_offset) - labels[name] = f"{info['type']}\n{name}" - y_offset += 1 - - # Add edges between parent and child modules - parent = ".".join(name.split(".")[:-1]) - if parent in self.graph: - graph.add_edge(parent, name) - - plt.figure(figsize=figsize) - nx.draw( - graph, - pos, - labels=labels, - with_labels=True, - node_color="lightblue", - node_size=2000, - font_size=8, - font_weight="bold", - ) - plt.title("Model Architecture") - plt.tight_layout() - plt.show() - - def summary(self) -> None: - """Print model summary""" - total_params = 0 - trainable_params = 0 - - print("Model Summary:") - print("=" * 80) - print(f"{'Layer':<40} {'Output Shape':<20} {'Param #':<10}") - print("-" * 80) - - for name, module in self.model.named_modules(): - params = sum(p.numel() for p in module.parameters()) - trainable = sum(p.numel() for p in module.parameters() if p.requires_grad) - - if params > 0: - print(f"{name:<40} {str(module):<20} {params:<10,d}") - total_params += params - trainable_params += trainable - - print("=" * 80) - print(f"Total params: {total_params:,}") - print(f"Trainable params: {trainable_params:,}") - print(f"Non-trainable params: {total_params - trainable_params:,}") - - -class TrainingVisualizer: - """Visualize training progress and metrics""" - - def __init__(self): - self.history = { - "train": {"loss": [], "accuracy": []}, - "valid": {"loss": [], "accuracy": []}, - } - self.current_epoch = 0 - - def update(self, metrics: Dict[str, Dict[str, float]], epoch: int) -> None: - """Update training history with new metrics""" - self.current_epoch = epoch - - for split in ["train", "valid"]: - if split in metrics: - for metric, value in metrics[split].items(): - self.history[split][metric].append(value) - - def plot_metrics(self, figsize: Tuple[int, int] = (12, 4)) -> None: - """Plot training metrics""" - fig, (ax1, ax2) = plt.subplots(1, 2, figsize=figsize) - - epochs = range(1, self.current_epoch + 2) - - # Plot loss - ax1.plot(epochs, self.history["train"]["loss"], "b-", label="Training") - if self.history["valid"]["loss"]: - ax1.plot(epochs, self.history["valid"]["loss"], "r-", label="Validation") - ax1.set_title("Loss") - ax1.set_xlabel("Epoch") - ax1.set_ylabel("Loss") - ax1.legend() - ax1.grid(True) - - # Plot accuracy - ax2.plot(epochs, self.history["train"]["accuracy"], "b-", label="Training") - if self.history["valid"]["accuracy"]: - ax2.plot( - epochs, self.history["valid"]["accuracy"], "r-", label="Validation" - ) - ax2.set_title("Accuracy") - ax2.set_xlabel("Epoch") - ax2.set_ylabel("Accuracy") - ax2.legend() - ax2.grid(True) - - plt.tight_layout() - plt.show() - - -class FeatureVisualizer: - """Visualize model's feature maps and filters""" - - def __init__(self, model: Module): - self.model = model - self.hooks = [] - self.feature_maps = {} - - def _hook_fn(self, name: str): - def hook(module, input, output): - self.feature_maps[name] = output.to_numpy() - - return hook - - def register_hooks(self, layer_names: List[str]) -> None: - """Register forward hooks for specified layers""" - for name, module in self.model.named_modules(): - if name in layer_names: - hook = module.register_forward_hook(self._hook_fn(name)) - self.hooks.append(hook) - - def remove_hooks(self) -> None: - """Remove all registered hooks""" - for hook in self.hooks: - hook.remove() - self.hooks.clear() - - def plot_feature_maps( - self, - input_tensor: Tensor, - layer_name: str, - num_features: int = 16, - figsize: Tuple[int, int] = (12, 8), - ) -> None: - """Plot feature maps for a specific layer""" - # Forward pass to get feature maps - _ = self.model(input_tensor) - - if layer_name not in self.feature_maps: - raise ValueError(f"No feature maps found for layer {layer_name}") - - feature_maps = self.feature_maps[layer_name][0] # First batch only - num_features = min(num_features, feature_maps.shape[0]) - - # Plot feature maps - fig, axes = plt.subplots(4, num_features // 4, figsize=figsize) - axes = axes.ravel() - - for i in range(num_features): - axes[i].imshow(feature_maps[i], cmap="viridis") - axes[i].axis("off") - - plt.suptitle(f"Feature Maps - {layer_name}") - plt.tight_layout() - plt.show() - - def plot_filters( - self, layer_name: str, num_filters: int = 16, figsize: Tuple[int, int] = (12, 8) - ) -> None: - """Plot convolutional filters for a specific layer""" - for name, module in self.model.named_modules(): - if name == layer_name: - if not hasattr(module, "weight"): - raise ValueError(f"Layer {layer_name} has no weights") - - weights = module.weight.to_numpy() - num_filters = min(num_filters, weights.shape[0]) - - # Plot filters - fig, axes = plt.subplots(4, num_filters // 4, figsize=figsize) - axes = axes.ravel() - - for i in range(num_filters): - # For RGB filters, take mean across channels - if weights.shape[1] == 3: - filt = np.mean(weights[i], axis=0) - else: - filt = weights[i, 0] - - axes[i].imshow(filt, cmap="viridis") - axes[i].axis("off") - - plt.suptitle(f"Convolution Filters - {layer_name}") - plt.tight_layout() - plt.show() - return - - raise ValueError(f"Layer {layer_name} not found in model") diff --git a/python/src/pybind11_module.cpp b/python/src/pybind11_module.cpp index 31fa823..878a573 100644 --- a/python/src/pybind11_module.cpp +++ b/python/src/pybind11_module.cpp @@ -6,10 +6,11 @@ #include "axono/pybind/compute/operators/matmul.h" #include "axono/pybind/compute/ops/relu.h" #include "axono/pybind/core/tensor.h" +#include "axono/pybind/core/module.h" namespace py = pybind11; -PYBIND11_MODULE(axonolib, m) { +PYBIND11_MODULE(libaxono, m) { m.doc() = "Axono Library"; // 数据类型枚举 @@ -35,6 +36,7 @@ PYBIND11_MODULE(axonolib, m) { // 初始化 Tensor init_tensor(m); + init_module(m); init_matmul_operations(m); init_add_operations(m); init_relu_operations(m); diff --git a/python/tests/core/nn/test_module.py b/python/tests/core/nn/test_module.py new file mode 100644 index 0000000..f5f91d9 --- /dev/null +++ b/python/tests/core/nn/test_module.py @@ -0,0 +1,41 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import sys +import unittest + +import numpy as np + +sys.path.insert( + 0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) +) + +from axono.core import Tensor +from axono.nn import Module + +device = os.getenv("axono_default_device", "cpu") + + +class TestModule(unittest.TestCase): + """nn.Module 测试""" + def test_weight(self): + """测试权重功能""" + _Module = Module() + data = Tensor(shape=[1], device=device) + data.fill(1) + _Module.add_weight("weight", data) + # 测试填充 + _Module.parameters()["weight"].fill(2) + self.assertEqual(_Module.parameters()["weight"].shape, [1]) + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/src/core/tensor.cpp b/src/core/tensor.cpp index ad3f31c..a8d0994 100644 --- a/src/core/tensor.cpp +++ b/src/core/tensor.cpp @@ -75,55 +75,73 @@ Tensor::Tensor(const Tensor &other) device_(other.device_), num_elements_(other.num_elements_) { if (other.data_) { - // 根据设备类型初始化存储 - if (device_ == other.device_) { - // 相同设备,分配内存并拷贝 - InitializeStorage(); - if (device_.substr(0, 4) == "cuda") { - // CUDA设备间的拷贝 + // 总是重新分配存储 + InitializeStorage(); + + // 执行设备间拷贝 + if (is_cuda()) { #ifdef COMPILED_WITH_CUDA + if (other.is_cuda()) { cuda::detail::cuda_memcpy_d2d(data_.get(), other.data_.get(), num_bytes()); -#endif } else { - // CPU设备间的拷贝 - std::memcpy(data_.get(), other.data_.get(), num_bytes()); + cuda::detail::cuda_memcpy_h2d(data_.get(), other.data_.get(), + num_bytes()); } +#endif } else { - // 不同设备,需要转换 - InitializeStorage(); - if (other.device_.substr(0, 4) == "cuda" && - device_.substr(0, 3) == "cpu") { + if (other.is_cuda()) { #ifdef COMPILED_WITH_CUDA - // CUDA -> CPU cuda::detail::cuda_memcpy_d2h(data_.get(), other.data_.get(), num_bytes()); -#endif - } else if (other.device_.substr(0, 3) == "cpu" && - device_.substr(0, 4) == "cuda") { - // CPU -> CUDA -#ifdef COMPILED_WITH_CUDA - cuda::detail::cuda_memcpy_h2d(data_.get(), other.data_.get(), - num_bytes()); #endif } else { - // 其他情况 - throw std::runtime_error("Unsupported device copy"); + std::memcpy(data_.get(), other.data_.get(), num_bytes()); } } } } Tensor &Tensor::operator=(const Tensor &other) { if (this != &other) { + // 清理旧数据 + data_.reset(); + + // 更新所有成员变量 dtype_ = other.dtype_; shape_ = other.shape_; + device_ = other.device_; // 重要:更新设备信息! num_elements_ = other.num_elements_; - + if (other.data_) { + // 重新初始化存储 InitializeStorage(); - std::memcpy(data_.get(), other.data_.get(), num_bytes()); - } else { - data_.reset(); + + // 执行设备间正确的拷贝 + if (device_ == other.device_) { + if (is_cuda()) { +#ifdef COMPILED_WITH_CUDA + cuda::detail::cuda_memcpy_d2d(data_.get(), other.data_.get(), + num_bytes()); +#endif + } else { + std::memcpy(data_.get(), other.data_.get(), num_bytes()); + } + } else { + // 跨设备拷贝 + if (other.is_cuda() && !is_cuda()) { +#ifdef COMPILED_WITH_CUDA + cuda::detail::cuda_memcpy_d2h(data_.get(), other.data_.get(), + num_bytes()); +#endif + } else if (!other.is_cuda() && is_cuda()) { +#ifdef COMPILED_WITH_CUDA + cuda::detail::cuda_memcpy_h2d(data_.get(), other.data_.get(), + num_bytes()); +#endif + } else { + throw std::runtime_error("Unsupported device copy"); + } + } } } return *this; @@ -211,7 +229,7 @@ void Tensor::InitializeStorage() { size_t bytes = num_bytes(); if (bytes == 0) return; - if (device_.substr(0, 4) == "cuda") { + if (is_cuda()) { #ifdef COMPILED_WITH_CUDA data_ = cuda::detail::CudaAllocateStorage(bytes, device_); #endif