Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ if(MSVC)
$<$<CONFIG:Release>:NDEBUG>
)

# SIMD optimization option for MSVC
option(ENABLE_SIMD_AVX2 "Enable AVX2 SIMD optimizations for MSVC" ON)

target_compile_options(NNets PRIVATE
/W3 # Warning level 3
$<$<CONFIG:Debug>:/Od> # Disable optimization for Debug
Expand All @@ -45,6 +48,12 @@ if(MSVC)
$<$<CONFIG:Release>:/Oy> # Enable frame pointer omission
)

# Add SIMD flags for MSVC
if(ENABLE_SIMD_AVX2)
target_compile_options(NNets PRIVATE /arch:AVX2)
message(STATUS "SIMD: AVX2 enabled for MSVC")
endif()

# Set runtime library
set_property(TARGET NNets PROPERTY
MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
Expand All @@ -56,6 +65,11 @@ else()
strcpy_s=strcpy # Use standard strcpy instead of MSVC's strcpy_s
)

# SIMD optimization option (default: native architecture detection)
option(ENABLE_SIMD_NATIVE "Enable SIMD optimizations for native architecture" ON)
option(ENABLE_AVX "Enable AVX SIMD optimizations explicitly" OFF)
option(ENABLE_SSE "Enable SSE SIMD optimizations explicitly" OFF)

target_compile_options(NNets PRIVATE
-Wall
-Wextra
Expand All @@ -66,6 +80,21 @@ else()
$<$<CONFIG:Debug>:-O0>
$<$<CONFIG:Release>:-O3>
)

# Add SIMD flags based on options
if(ENABLE_SIMD_NATIVE)
# Use -march=native for automatic detection of CPU SIMD capabilities
target_compile_options(NNets PRIVATE -march=native)
message(STATUS "SIMD: Using native architecture detection (-march=native)")
elseif(ENABLE_AVX)
target_compile_options(NNets PRIVATE -mavx -mavx2)
message(STATUS "SIMD: AVX/AVX2 explicitly enabled")
elseif(ENABLE_SSE)
target_compile_options(NNets PRIVATE -msse -msse2 -msse4.1)
message(STATUS "SIMD: SSE/SSE2/SSE4.1 explicitly enabled")
else()
message(STATUS "SIMD: No SIMD optimizations (scalar operations)")
endif()
endif()

# Set output directories
Expand Down Expand Up @@ -211,3 +240,19 @@ set_tests_properties(test_retraining_mode PROPERTIES
TIMEOUT 600
LABELS "retraining;training"
)

# Test 9: SIMD optimization test
# Tests SIMD vector operations and --no-simd flag
add_test(
NAME test_simd_optimization
COMMAND ${CMAKE_COMMAND}
-DNNETS_EXE=$<TARGET_FILE:NNets>
-DCONFIG_DIR=${CMAKE_SOURCE_DIR}/configs
-DWORK_DIR=${CMAKE_BINARY_DIR}
-P ${CMAKE_SOURCE_DIR}/cmake/test_simd_benchmark.cmake
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
)
set_tests_properties(test_simd_optimization PROPERTIES
TIMEOUT 120
LABELS "simd;performance"
)
49 changes: 49 additions & 0 deletions cmake/test_simd_benchmark.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# test_simd_benchmark.cmake
# Тест для проверки работоспособности SIMD оптимизаций

message(STATUS "=== SIMD Benchmark Test ===")

# Run benchmark with SIMD enabled (default)
message(STATUS "Testing with SIMD enabled...")
execute_process(
COMMAND ${NNETS_EXE} -c ${CONFIG_DIR}/simple.json -b --single-thread
WORKING_DIRECTORY ${WORK_DIR}
RESULT_VARIABLE SIMD_RESULT
OUTPUT_VARIABLE SIMD_OUTPUT
ERROR_VARIABLE SIMD_ERROR
TIMEOUT 120
)

if(NOT SIMD_RESULT EQUAL 0)
message(FATAL_ERROR "SIMD enabled test failed: ${SIMD_ERROR}")
endif()

# Check that SIMD info is displayed
if(NOT SIMD_OUTPUT MATCHES "SIMD:")
message(FATAL_ERROR "SIMD info not displayed in output")
endif()

message(STATUS "SIMD enabled test passed")

# Run benchmark with SIMD disabled
message(STATUS "Testing with SIMD disabled...")
execute_process(
COMMAND ${NNETS_EXE} -c ${CONFIG_DIR}/simple.json -b --single-thread --no-simd
WORKING_DIRECTORY ${WORK_DIR}
RESULT_VARIABLE NO_SIMD_RESULT
OUTPUT_VARIABLE NO_SIMD_OUTPUT
ERROR_VARIABLE NO_SIMD_ERROR
TIMEOUT 120
)

if(NOT NO_SIMD_RESULT EQUAL 0)
message(FATAL_ERROR "SIMD disabled test failed: ${NO_SIMD_ERROR}")
endif()

# Verify SIMD was disabled
if(NOT NO_SIMD_OUTPUT MATCHES "disabled via --no-simd")
message(FATAL_ERROR "--no-simd flag did not work properly")
endif()

message(STATUS "SIMD disabled test passed")
message(STATUS "=== SIMD Benchmark Test Complete ===")
155 changes: 155 additions & 0 deletions experiments/test_simd_micro.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
/*
* test_simd_micro.cpp - Микро-бенчмарк для тестирования SIMD операций
*
* Тестирует производительность SIMD-оптимизированных векторных операций
* на больших массивах для измерения реального ускорения.
*
* Компиляция:
* g++ -O3 -march=native -o test_simd_micro test_simd_micro.cpp
*
* Запуск:
* ./test_simd_micro
*/

#include <iostream>
#include <chrono>
#include <vector>
#include <cstdlib>
#include <cmath>
#include <iomanip>

// Глобальный флаг для SIMD (требуется для simd_ops.h)
bool UseSIMD = true;

#include "../include/simd_ops.h"

using namespace std;
using namespace std::chrono;

// Количество повторений для усреднения результатов
const int ITERATIONS = 1000;

// Размеры массивов для тестирования
const int SIZES[] = {16, 48, 100, 256, 1000, 10000};
const int NUM_SIZES = sizeof(SIZES) / sizeof(SIZES[0]);

// Инициализация массива случайными значениями
void initRandom(float* arr, int size) {
for (int i = 0; i < size; i++) {
arr[i] = (float)rand() / RAND_MAX * 2.0f - 1.0f;
}
}

// Измерение времени выполнения операции сложения
double benchmarkAdd(float* r, const float* a, const float* b, int size, bool useSIMD) {
UseSIMD = useSIMD;
auto start = high_resolution_clock::now();

for (int iter = 0; iter < ITERATIONS; iter++) {
op_add_simd(r, a, b, size);
}

auto end = high_resolution_clock::now();
return duration_cast<nanoseconds>(end - start).count() / (double)ITERATIONS;
}

// Измерение времени выполнения операции вычитания
double benchmarkSub(float* r, const float* a, const float* b, int size, bool useSIMD) {
UseSIMD = useSIMD;
auto start = high_resolution_clock::now();

for (int iter = 0; iter < ITERATIONS; iter++) {
op_sub_simd(r, a, b, size);
}

auto end = high_resolution_clock::now();
return duration_cast<nanoseconds>(end - start).count() / (double)ITERATIONS;
}

// Измерение времени выполнения операции умножения
double benchmarkMul(float* r, const float* a, const float* b, int size, bool useSIMD) {
UseSIMD = useSIMD;
auto start = high_resolution_clock::now();

for (int iter = 0; iter < ITERATIONS; iter++) {
op_mul_simd(r, a, b, size);
}

auto end = high_resolution_clock::now();
return duration_cast<nanoseconds>(end - start).count() / (double)ITERATIONS;
}

// Проверка корректности результатов
bool verifyResults(const float* r_simd, const float* r_scalar, int size) {
for (int i = 0; i < size; i++) {
if (fabs(r_simd[i] - r_scalar[i]) > 1e-6) {
return false;
}
}
return true;
}

int main() {
cout << "=== SIMD Micro-Benchmark ===" << endl;
cout << "SIMD Extension: " << getSIMDInfo() << endl;
cout << "Iterations per test: " << ITERATIONS << endl;
cout << endl;

srand(42); // Фиксированный seed для воспроизводимости

cout << fixed << setprecision(2);
cout << "| Size | Op | Scalar (ns) | SIMD (ns) | Speedup |" << endl;
cout << "|---------|------|-------------|-----------|---------|" << endl;

for (int s = 0; s < NUM_SIZES; s++) {
int size = SIZES[s];

// Выделяем массивы
vector<float> a(size), b(size), r_simd(size), r_scalar(size);

// Инициализируем случайными значениями
initRandom(a.data(), size);
initRandom(b.data(), size);

// Тест сложения
double scalarAddTime = benchmarkAdd(r_scalar.data(), a.data(), b.data(), size, false);
double simdAddTime = benchmarkAdd(r_simd.data(), a.data(), b.data(), size, true);
double addSpeedup = scalarAddTime / simdAddTime;

if (!verifyResults(r_simd.data(), r_scalar.data(), size)) {
cout << "ERROR: Add verification failed for size " << size << endl;
}

cout << "| " << setw(7) << size << " | ADD | " << setw(11) << scalarAddTime
<< " | " << setw(9) << simdAddTime << " | " << setw(6) << addSpeedup << "x |" << endl;

// Тест вычитания
double scalarSubTime = benchmarkSub(r_scalar.data(), a.data(), b.data(), size, false);
double simdSubTime = benchmarkSub(r_simd.data(), a.data(), b.data(), size, true);
double subSpeedup = scalarSubTime / simdSubTime;

if (!verifyResults(r_simd.data(), r_scalar.data(), size)) {
cout << "ERROR: Sub verification failed for size " << size << endl;
}

cout << "| " << setw(7) << size << " | SUB | " << setw(11) << scalarSubTime
<< " | " << setw(9) << simdSubTime << " | " << setw(6) << subSpeedup << "x |" << endl;

// Тест умножения
double scalarMulTime = benchmarkMul(r_scalar.data(), a.data(), b.data(), size, false);
double simdMulTime = benchmarkMul(r_simd.data(), a.data(), b.data(), size, true);
double mulSpeedup = scalarMulTime / simdMulTime;

if (!verifyResults(r_simd.data(), r_scalar.data(), size)) {
cout << "ERROR: Mul verification failed for size " << size << endl;
}

cout << "| " << setw(7) << size << " | MUL | " << setw(11) << scalarMulTime
<< " | " << setw(9) << simdMulTime << " | " << setw(6) << mulSpeedup << "x |" << endl;
}

cout << endl;
cout << "=== End Micro-Benchmark ===" << endl;

return 0;
}
76 changes: 76 additions & 0 deletions experiments/test_simd_speedup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/bin/bash
#
# test_simd_speedup.sh - Тест ускорения векторных операций с SIMD
#
# Сравнивает производительность обучения нейросети с включённым и
# выключенным SIMD для измерения реального ускорения.
#
# Использование:
# ./experiments/test_simd_speedup.sh
#

set -e

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
BUILD_DIR="$PROJECT_DIR/build"
# NNets executable can be in different locations depending on CMake generator
NNETS_EXE="$BUILD_DIR/NNets"
[ -f "$BUILD_DIR/bin/NNets" ] && NNETS_EXE="$BUILD_DIR/bin/NNets"
[ -f "$BUILD_DIR/bin/Release/NNets" ] && NNETS_EXE="$BUILD_DIR/bin/Release/NNets"
CONFIG_FILE="$PROJECT_DIR/configs/benchmark.json"

echo "=== SIMD Speedup Test ==="
echo ""
echo "Project directory: $PROJECT_DIR"
echo "Build directory: $BUILD_DIR"

# Check if executable exists
if [ ! -f "$NNETS_EXE" ]; then
echo "Building project..."
mkdir -p "$BUILD_DIR"
cd "$BUILD_DIR"
cmake .. -DCMAKE_BUILD_TYPE=Release
cmake --build . --config Release -j $(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4)
cd "$PROJECT_DIR"
fi

# Verify executable exists after build
if [ ! -f "$NNETS_EXE" ]; then
echo "Error: Could not find NNets executable at $NNETS_EXE"
exit 1
fi

echo ""
echo "Testing with config: $CONFIG_FILE"
echo ""

# Run benchmark with SIMD enabled (default)
echo "=== Test 1: SIMD Enabled (default) ==="
"$NNETS_EXE" -c "$CONFIG_FILE" -b --single-thread 2>&1 | tee /tmp/simd_enabled.log
SIMD_TIME=$(grep "Training time:" /tmp/simd_enabled.log | awk '{print $3}')
echo ""

# Run benchmark with SIMD disabled
echo "=== Test 2: SIMD Disabled (--no-simd) ==="
"$NNETS_EXE" -c "$CONFIG_FILE" -b --single-thread --no-simd 2>&1 | tee /tmp/simd_disabled.log
NO_SIMD_TIME=$(grep "Training time:" /tmp/simd_disabled.log | awk '{print $3}')
echo ""

# Calculate speedup
echo "=== SIMD Speedup Summary ==="
echo "SIMD Enabled: $SIMD_TIME ms"
echo "SIMD Disabled: $NO_SIMD_TIME ms"

if [ -n "$SIMD_TIME" ] && [ -n "$NO_SIMD_TIME" ] && [ "$SIMD_TIME" -gt 0 ]; then
SPEEDUP=$(echo "scale=2; $NO_SIMD_TIME / $SIMD_TIME" | bc)
echo "Speedup: ${SPEEDUP}x"
else
echo "Could not calculate speedup (missing timing data)"
fi

echo ""
echo "=== End SIMD Speedup Test ==="

# Cleanup
rm -f /tmp/simd_enabled.log /tmp/simd_disabled.log
Loading