Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
4 changes: 4 additions & 0 deletions nsy/cuda/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
nsy
nsy.o
nsy.cpp
nsy.cu.hip
46 changes: 46 additions & 0 deletions nsy/cuda/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
ifndef CXX
CXX := nvcc
endif
CXXFLAGS := -O3 -Wall -Wno-pedantic
ifeq ($(CXX), nvcc)
CXXFLAGS := -Xcompiler "$(CXXFLAGS)"
endif
LDFLAGS :=

TARGET := nsy

SOURCES := $(wildcard ./*.cu)
OBJECTS := $(patsubst %.cu, %.o, $(SOURCES))

all: $(TARGET)

$(TARGET): $(OBJECTS)
$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $^

ifneq (,$(filter $(CXX),clang clang++))
CXXFLAGS += -march=native -std=c++17
LDFLAGS += -ltbb

_SOURCES := $(SOURCES)
SOURCES := $(patsubst %.cu, %.cpp, $(SOURCES))

$(SOURCES): $(_SOURCES)
hipify-clang --hip-kernel-execution-syntax -o $@ $<
endif

ifeq ($(CXX), hipcc)
_SOURCES := $(SOURCES)
SOURCES := $(patsubst %.cu, %.cu.hip, $(SOURCES))

$(SOURCES): $(_SOURCES)
hipify-clang -o $@ $<
endif

$(OBJECTS): $(SOURCES)
$(CXX) $(CXXFLAGS) -c -o $@ $<

clean:
rm -f nsy
rm -f nsy.o
rm -f nsy.cpp
rm -f nsy.cu.hip
26 changes: 26 additions & 0 deletions nsy/cuda/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Build for CPU

Requirements: clang compiler, [HIP-CPU](https://github.com/ROCm/HIP-CPU)

```
export CXX=clang++
make
```

# Build for NVIDIA GPUs

Requirements: NVIDIA Cuda compiler

```
export CXX=nvcc
make
```

# Build for AMDGPUs

Requirements: HIP C++ compiler, AMD clang compiler

```
export CXX=hipcc
make
```
39 changes: 17 additions & 22 deletions nsy/hip/nsy_hip.cu → nsy/cuda/nsy.cu
Original file line number Diff line number Diff line change
@@ -1,27 +1,21 @@
#ifdef _MSC_VER
#include "msvc_defines.h"
#endif
#include <hip/hip_runtime.h>
#include <cmath>
#include <cuda_runtime.h>
#include <iostream>
#include <locale>
#include <string>
#include <vector>

#define HIP_CHECK(status) \
if (status != hipSuccess) { \
fprintf(stderr, "error: '%s' at %s:%d\n", hipGetErrorString(status), \
#define CUDA_CHECK(status) \
if (status != cudaSuccess) { \
fprintf(stderr, "error: '%s' at %s:%d\n", cudaGetErrorString(status), \
__FILE__, __LINE__); \
exit(0); \
}

__global__ void kernel(const wchar_t *in, wchar_t *out, const size_t dim,
const wchar_t mark) {
const int idx = hipThreadIdx_x * dim + hipThreadIdx_y;
if (hipBlockIdx_x) {
out[idx * 2 + 1] = mark;
} else {
out[idx * 2] = in[idx];
}
const int idx = threadIdx.x * dim + threadIdx.y;
out[idx * 2 + blockIdx.x] = blockIdx.x == 0 ? in[idx] : mark;
}

wchar_t get_mark(char *s) {
Expand All @@ -41,7 +35,7 @@ int main(int argc, char *argv[]) {
std::vector<wchar_t> str;

wchar_t c;
while ((c = std::wcin.get()) != WEOF) {
while ((c = std::wcin.get()) != (wchar_t)WEOF) {
if (c == ' ') {
continue;
}
Expand All @@ -55,21 +49,22 @@ int main(int argc, char *argv[]) {
const size_t input_size = sizeof(wchar_t) * dim * dim;
const size_t output_length = dim * 2 * dim;
const size_t output_size = sizeof(wchar_t) * output_length;
HIP_CHECK(hipMalloc((void **)&input, input_size));
HIP_CHECK(hipMalloc((void **)&output, output_size));
HIP_CHECK(hipMemcpy(input, str.data(), sizeof(wchar_t) * length,
hipMemcpyHostToDevice));
CUDA_CHECK(cudaMalloc((void **)&input, input_size));
CUDA_CHECK(cudaMalloc((void **)&output, output_size));
CUDA_CHECK(cudaMemcpy(input, str.data(), sizeof(wchar_t) * length,
cudaMemcpyHostToDevice));

const wchar_t mark = argc < 2 ? L'\xFF01' : get_mark(argv[1]);
kernel<<<2, dim3(dim, dim), 0, 0>>>(input, output, dim, mark);

auto result = new wchar_t[output_length + 2]; // mark ... \x0000
*result = mark;
HIP_CHECK(hipMemcpy(result + 1, output, output_size, hipMemcpyDeviceToHost));
result[length * 2 + 1] = 0;
CUDA_CHECK(
cudaMemcpy(result + 1, output, output_size, cudaMemcpyDeviceToHost));
result[length * 2 + 1] = L'\0';

HIP_CHECK(hipFree(input));
HIP_CHECK(hipFree(output));
CUDA_CHECK(cudaFree(input));
CUDA_CHECK(cudaFree(output));

std::wcout << result << L'\n';

Expand Down
3 changes: 0 additions & 3 deletions nsy/hip/.gitignore

This file was deleted.

20 changes: 0 additions & 20 deletions nsy/hip/Makefile

This file was deleted.

62 changes: 0 additions & 62 deletions nsy/hip/msvc_defines.h

This file was deleted.

25 changes: 0 additions & 25 deletions nsy/hip/nsy_hip.sln

This file was deleted.

106 changes: 0 additions & 106 deletions nsy/hip/nsy_hip.vcxproj

This file was deleted.

Loading