From 6b56b3d003a6ee732d7d7c72b56b711f79862a6f Mon Sep 17 00:00:00 2001 From: Ilya Panfilov Date: Sun, 22 Feb 2026 14:21:08 -0500 Subject: [PATCH 1/3] Always use V2 hipify. Make all hipify results consistent --- 3rdparty/hipify_torch | 2 +- build_tools/utils.py | 21 +++---------- hipify_custom_map.json | 4 +-- setup.py | 5 ++- tests/cpp/CMakeLists.txt | 31 ++++++++++++++++++- tests/cpp/operator/CMakeLists.txt | 24 +------------- tests/cpp/util/CMakeLists.txt | 16 +++++----- transformer_engine/common/CMakeLists.txt | 12 +++---- .../include/transformer_engine/multi_stream.h | 4 ++- .../common/util/handle_manager.h | 7 ----- .../common/util/rocm_cast_gated_kernels.cuh | 7 ++--- .../common/util/rocm_cast_kernels.cuh | 21 ++++++------- .../common/util/rocm_dequantize_kernels.cuh | 9 +++--- 13 files changed, 77 insertions(+), 86 deletions(-) diff --git a/3rdparty/hipify_torch b/3rdparty/hipify_torch index 3456cd19d..e2875e28a 160000 --- a/3rdparty/hipify_torch +++ b/3rdparty/hipify_torch @@ -1 +1 @@ -Subproject commit 3456cd19d4eb5e469317bfcfae1a89b7ab70f6c2 +Subproject commit e2875e28a397cf4ec13c7414d4387fe761f36e92 diff --git a/build_tools/utils.py b/build_tools/utils.py index 20d508a6e..c6da47d0f 100644 --- a/build_tools/utils.py +++ b/build_tools/utils.py @@ -502,30 +502,17 @@ def uninstall_te_wheel_packages(): ] ) -def detect_hipify_v2(): - try: - from torch.utils.hipify import __version__ - from packaging.version import Version - if Version(__version__) >= Version("2.0.0"): - return True - except Exception as e: - print("failed to detect pytorch hipify version, defaulting to version 1.0.0 behavior") - print(e) - return False - def hipify(base_dir, src_dir, sources, include_dirs): cwd = os.getcwd() - if detect_hipify_v2(): - hipify_module = importlib.import_module("3rdparty.hipify_torch.hipify_torch.v2.hipify_python") - else: - hipify_module = importlib.import_module("3rdparty.hipify_torch.hipify_torch.hipify_python") + hipify_module = importlib.import_module("3rdparty.hipify_torch.hipify_torch.v2.hipify_python") do_hipify = hipify_module.hipify + print(f"Run hipify on {src_dir}") hipify_result = do_hipify( project_directory=src_dir, output_directory=src_dir, - includes=["*"], - ignores=["*/amd_detail/*", "*/aotriton/*", "*/ck_fused_attn/*"], + includes=["*/common/*", str(src_dir)+"/*"], + ignores=["*/amd_detail/*", "*/aotriton/*", "*/ck_fused_attn/*", "*/rocshmem_api/*"], header_include_dirs=[d for d in include_dirs if Path(d).is_relative_to(base_dir)], custom_map_list=base_dir / "hipify_custom_map.json", extra_files=[], diff --git a/hipify_custom_map.json b/hipify_custom_map.json index 97824bbdb..35e6b7999 100644 --- a/hipify_custom_map.json +++ b/hipify_custom_map.json @@ -1,8 +1,8 @@ { "custom_map" : { "" : "", - "" : "\"amd_detail/hip_float8.h\"", - "util/cuda_runtime.h" : "util/hip_runtime.h", + "" : "\"common/amd_detail/hip_float8.h\"", + "cuda_runtime.h\"" : "hip_runtime.h\"", "ATen/cudnn/Handle.h" : "ATen/miopen/Handle.h", "CUfunc_cache" : "hipFuncCache_t", "" : "", diff --git a/setup.py b/setup.py index 2f3e3c2ab..cd5cacc70 100644 --- a/setup.py +++ b/setup.py @@ -49,7 +49,10 @@ def run(self): if rocm_build(): print("Running hipification of installable headers for ROCm build...") common_headers_dir = current_file_path / "transformer_engine/common/include" - hipify(current_file_path, common_headers_dir, all_files_in_dir(common_headers_dir), []) + #TODO: some installable headers refer non installable headers (i.e not from common/include) + #so we need add extra include paths here to match hipification results with build process + hipify(current_file_path, common_headers_dir, all_files_in_dir(common_headers_dir), + [common_headers_dir, current_file_path / "transformer_engine"]) super().run() CMakeBuildExtension = get_build_ext(BuildExtension) diff --git a/tests/cpp/CMakeLists.txt b/tests/cpp/CMakeLists.txt index b71addebf..57cdb02fe 100644 --- a/tests/cpp/CMakeLists.txt +++ b/tests/cpp/CMakeLists.txt @@ -1,5 +1,5 @@ # This file was modified for portability to AMDGPU -# Copyright (c) 2022-2025, Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2022-2026, Advanced Micro Devices, Inc. All rights reserved. # Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # See LICENSE for license information. @@ -96,5 +96,34 @@ else() find_package(hip REQUIRED) endif() +if(USE_ROCM) + set(TE ${CMAKE_CURRENT_SOURCE_DIR}/../..) + set(THIRDPARTY ${TE}/3rdparty) + list(APPEND CMAKE_MODULE_PATH "${THIRDPARTY}/hipify_torch/cmake") + include(Hipify) + message(STATUS "CMAKE_MODULE_PATH: ${CMAKE_MODULE_PATH}") + + #hipify parameters should match those used in TE CMakeLists.txt + set(header_include_dir + ${TE}/transformer_engine/common/include + ${TE}/transformer_engine/common + ${TE}/transformer_engine) + + set(cuda_source_dir ${PROJECT_SOURCE_DIR} ) + message(STATUS "Run hipify on ${cuda_source_dir}") + hipify(CUDA_SOURCE_DIR ${cuda_source_dir} + HEADER_INCLUDE_DIR ${header_include_dir} + IGNORES "*/pytorch/csrc/*" + IGNORES "*/jax/csrc/*" + IGNORES "*/amd_detail/*" + IGNORES "*/aotriton/*" + IGNORES "*/ck_fused_attn/*" + IGNORES "*/rocshmem_api/*" + CUSTOM_MAP_FILE "${TE}/hipify_custom_map.json" + NO_MATH_REPLACE + V2 + ) +endif() + add_subdirectory(operator) add_subdirectory(util) diff --git a/tests/cpp/operator/CMakeLists.txt b/tests/cpp/operator/CMakeLists.txt index ebee930a1..d4beeb87c 100644 --- a/tests/cpp/operator/CMakeLists.txt +++ b/tests/cpp/operator/CMakeLists.txt @@ -41,31 +41,9 @@ endif() if(USE_CUDA) add_executable(test_operator ${test_cuda_sources}) else() - message("${message_line}") - message(STATUS "CMAKE_CURRENT_SOURCE_DIR: ${CMAKE_CURRENT_SOURCE_DIR}") - message(STATUS "PROJECT_SOURCE_DIR: ${PROJECT_SOURCE_DIR}") - - set(TE ${CMAKE_CURRENT_SOURCE_DIR}/../../..) - set(THIRDPARTY ${TE}/3rdparty) - list(APPEND CMAKE_MODULE_PATH "${THIRDPARTY}/hipify_torch/cmake") - include(Hipify) - message(STATUS "CMAKE_MODULE_PATH: ${CMAKE_MODULE_PATH}") - - file(REAL_PATH ../../../transformer_engine/common/include header_include_dir1) - file(REAL_PATH ../../../transformer_engine/common header_include_dir2) - set(header_include_dir ${header_include_dir1} ${header_include_dir2}) - - message(STATUS "CUDA_SOURCE_DIR: ${PROJECT_SOURCE_DIR}") - message(STATUS "HEADER_INCLUDE_DIR: ${header_include_dir}") - set(cuda_source_dir ${PROJECT_SOURCE_DIR} ) - hipify(CUDA_SOURCE_DIR ${cuda_source_dir} - HEADER_INCLUDE_DIR ${header_include_dir} - CUSTOM_MAP_FILE "${TE}/hipify_custom_map.json" - NO_MATH_REPLACE - ) get_hipified_list("${test_cuda_sources}" test_hip_sources) message("${message_line}") - message(STATUS "nvte tests hipified sources: ${test_hip_sources}") + message(STATUS "test_operator hipified sources: ${test_hip_sources}") add_executable(test_operator ${test_hip_sources}) endif() diff --git a/tests/cpp/util/CMakeLists.txt b/tests/cpp/util/CMakeLists.txt index 51c855a91..ea986bfa5 100644 --- a/tests/cpp/util/CMakeLists.txt +++ b/tests/cpp/util/CMakeLists.txt @@ -4,16 +4,18 @@ # # See LICENSE for license information. -if(USE_CUDA) -add_executable(test_util +list(APPEND test_cuda_sources test_nvrtc.cpp test_string.cpp ../test_common.cu) -else() -add_executable(test_util - test_nvrtc_hip.cpp - test_string.cpp - ../test_common.hip) +if(USE_CUDA) + add_executable(test_util ${test_cuda_sources}) +else() + get_hipified_list("${test_cuda_sources}" test_hip_sources) + message("${message_line}") + message(STATUS "test_util hipified sources: ${test_hip_sources}") + + add_executable(test_util ${test_hip_sources}) endif() find_package(OpenMP REQUIRED) diff --git a/transformer_engine/common/CMakeLists.txt b/transformer_engine/common/CMakeLists.txt index 50dcf90a0..ec0db3a66 100644 --- a/transformer_engine/common/CMakeLists.txt +++ b/transformer_engine/common/CMakeLists.txt @@ -1,5 +1,5 @@ # This file was modified for portability to AMDGPU -# Copyright (c) 2022-2025, Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2022-2026, Advanced Micro Devices, Inc. All rights reserved. # Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # See LICENSE for license information. @@ -211,13 +211,13 @@ else() include(Hipify) message(STATUS "CMAKE_MODULE_PATH: ${CMAKE_MODULE_PATH}") + # Parameters for hipify here should match hipify from build_tools/utils.py + # Because CMake hipify doesn't support INCLUDES we explicitly add pytorch and jax to IGNORES set(header_include_dir ${CMAKE_CURRENT_SOURCE_DIR}/.. - ${CMAKE_CURRENT_SOURCE_DIR}/include - ${CMAKE_CURRENT_SOURCE_DIR}/util + ${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}) - message(STATUS "HIPIFY CUDA_SOURCE_DIR: ${CMAKE_CURRENT_SOURCE_DIR}") - message(STATUS "HIPIFY HEADER_INCLUDE_DIR: ${header_include_dir}") + message(STATUS "Run hipify on ${CMAKE_CURRENT_SOURCE_DIR}") hipify(CUDA_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR} HEADER_INCLUDE_DIR ${header_include_dir} IGNORES "*/amd_detail/*" @@ -228,13 +228,13 @@ else() IGNORES "*/rocshmem_api/*" CUSTOM_MAP_FILE "${TE}/hipify_custom_map.json" NO_MATH_REPLACE + V2 ) get_hipified_list("${transformer_engine_SOURCES}" te_hip_sources) message("${message_line}") message(STATUS "nvte hipified sources: ${te_hip_sources}") add_library(transformer_engine SHARED ${te_hip_sources}) - target_include_directories(transformer_engine PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}") endif() target_include_directories(transformer_engine PUBLIC diff --git a/transformer_engine/common/include/transformer_engine/multi_stream.h b/transformer_engine/common/include/transformer_engine/multi_stream.h index e406a0786..ef786564a 100644 --- a/transformer_engine/common/include/transformer_engine/multi_stream.h +++ b/transformer_engine/common/include/transformer_engine/multi_stream.h @@ -1,4 +1,6 @@ /************************************************************************* + * This file was modified for portability to AMDGPU + * Copyright (c) 2026, Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * See LICENSE for license information. @@ -11,7 +13,7 @@ #ifndef TRANSFORMER_ENGINE_MULTI_STREAM_H #define TRANSFORMER_ENGINE_MULTI_STREAM_H -#include "cuda_runtime.h" +#include //system CUDA header #ifdef __cplusplus extern "C" { diff --git a/transformer_engine/common/util/handle_manager.h b/transformer_engine/common/util/handle_manager.h index a63cd61c3..adb2f5558 100644 --- a/transformer_engine/common/util/handle_manager.h +++ b/transformer_engine/common/util/handle_manager.h @@ -1,6 +1,4 @@ /************************************************************************* - * This file was modified for portability to AMDGPU - * Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * See LICENSE for license information. @@ -11,13 +9,8 @@ #include -#ifndef __HIP_PLATFORM_AMD__ #include "cuda_runtime.h" #include "logging.h" -#else -#include "util/cuda_runtime.h" -#include "util/logging.h" -#endif namespace transformer_engine::detail { diff --git a/transformer_engine/common/util/rocm_cast_gated_kernels.cuh b/transformer_engine/common/util/rocm_cast_gated_kernels.cuh index a53fd51c5..387445a78 100644 --- a/transformer_engine/common/util/rocm_cast_gated_kernels.cuh +++ b/transformer_engine/common/util/rocm_cast_gated_kernels.cuh @@ -7,17 +7,16 @@ #pragma once #include -#include -#include +#include -#include "common.h" +#include "../common.h" #include "math.h" #include "ptx.cuh" #include "rocm_vectorized_2d.cuh" #include "transformer_engine/activation.h" #include "transformer_engine/cast.h" #include "vectorized_pointwise.h" -#include "utils.cuh" +#include "../utils.cuh" namespace transformer_engine { namespace gated_kernels { diff --git a/transformer_engine/common/util/rocm_cast_kernels.cuh b/transformer_engine/common/util/rocm_cast_kernels.cuh index eb0c9b94d..33c53e8e8 100644 --- a/transformer_engine/common/util/rocm_cast_kernels.cuh +++ b/transformer_engine/common/util/rocm_cast_kernels.cuh @@ -6,17 +6,16 @@ #pragma once #include -#include -#include +#include -#include "common.h" +#include "../common.h" #include "math.h" #include "ptx.cuh" #include "rocm_vectorized_2d.cuh" #include "transformer_engine/cast.h" -#include "transpose/cast_transpose.h" +#include "../transpose/cast_transpose.h" #include "vectorized_pointwise.h" -#include "utils.cuh" +#include "../utils.cuh" namespace transformer_engine { @@ -24,7 +23,7 @@ namespace transformer_engine { template void mxfp8_quantize(const Tensor &input, const Tensor *act_input, const Tensor *noop, - Tensor *output, Tensor *dbias, Tensor *workspace, cudaStream_t stream); + Tensor *output, Tensor *dbias, Tensor *workspace, hipStream_t stream); constexpr size_t MXFP8_CHUNK_DIM_Y = 64; @@ -401,15 +400,15 @@ __global__ void __launch_bounds__(MXFP8_THREADS_PER_CHUNK) // Forward declaration of functions defined in `cast_kernels.cuh` template void reduce_dbias(const float *workspace_ptr, Tensor *dbias, const size_t rows, const size_t cols, - cudaStream_t stream); + hipStream_t stream); template void CastVectorizedUnaryKernelLauncher(const Tensor &input, const Tensor *noop, Tensor *output, - cudaStream_t stream); + hipStream_t stream); template void CastVectorizedUnaryGradKernelLauncher(const Tensor &grad, const Tensor *input, Tensor *output, - cudaStream_t stream); + hipStream_t stream); constexpr size_t TILE_DIM = 32; template @@ -445,7 +444,7 @@ __global__ void partial_reduce_kernel(const DTypeReduce* input, float* partial_o template void reduce_dbias_rocm(const DTypeReduce *workspace_ptr, Tensor *dbias, const size_t rows, - const size_t cols, cudaStream_t stream, Tensor* partial_sum_workspace) { + const size_t cols, hipStream_t stream, Tensor* partial_sum_workspace) { dim3 block_dim_partial(TILE_DIM, TILE_DIM); dim3 grid_dim_partial(DIVUP(cols, TILE_DIM), DIVUP(rows, TILE_DIM)); @@ -464,7 +463,7 @@ template void fp8_quantize_rocm(const Tensor &input, const Tensor *act_input, const Tensor *noop, Tensor *output, Tensor *dbias, Tensor *workspace, - cudaStream_t stream) { + hipStream_t stream) { switch (output->scaling_mode) { case NVTE_DELAYED_TENSOR_SCALING: { const size_t rows = input.flat_first_dim(); diff --git a/transformer_engine/common/util/rocm_dequantize_kernels.cuh b/transformer_engine/common/util/rocm_dequantize_kernels.cuh index 398e4c0ad..0d020b5eb 100644 --- a/transformer_engine/common/util/rocm_dequantize_kernels.cuh +++ b/transformer_engine/common/util/rocm_dequantize_kernels.cuh @@ -7,19 +7,18 @@ #pragma once #include -#include -#include +#include #include -#include "common.h" +#include "../common.h" #include "math.h" #include "ptx.cuh" #include "rocm_vectorized_2d.cuh" #include "transformer_engine/activation.h" #include "transformer_engine/cast.h" -#include "transpose/cast_transpose.h" +#include "../transpose/cast_transpose.h" #include "transformer_engine/transpose.h" -#include "utils.cuh" +#include "../utils.cuh" #include "vectorized_pointwise.h" namespace transformer_engine { From 20694175426216d4a4fa010bf82fcb81cdfccea1 Mon Sep 17 00:00:00 2001 From: Ilya Panfilov Date: Wed, 25 Feb 2026 11:27:46 -0500 Subject: [PATCH 2/3] Consolidate hipify calls in one service module --- 3rdparty/hipify_torch | 2 +- .../hipify/custom_map.json | 3 +- build_tools/hipify/hipify.cmake | 55 +++++ build_tools/hipify/hipify.py | 200 ++++++++++++++++++ build_tools/jax.py | 8 +- build_tools/pytorch.py | 10 +- build_tools/utils.py | 62 ------ setup.py | 8 +- tests/cpp/CMakeLists.txt | 28 +-- tests/cpp/operator/CMakeLists.txt | 2 +- tests/cpp/util/CMakeLists.txt | 2 +- transformer_engine/common/CMakeLists.txt | 30 +-- .../common/aotriton/CMakeLists.txt | 1 + transformer_engine/common/util/rtc.cpp | 4 +- transformer_engine/jax/MANIFEST.in | 1 - transformer_engine/jax/setup.py | 12 +- transformer_engine/pytorch/MANIFEST.in | 1 - transformer_engine/pytorch/setup.py | 6 +- 18 files changed, 290 insertions(+), 145 deletions(-) rename hipify_custom_map.json => build_tools/hipify/custom_map.json (81%) create mode 100644 build_tools/hipify/hipify.cmake create mode 100644 build_tools/hipify/hipify.py diff --git a/3rdparty/hipify_torch b/3rdparty/hipify_torch index e2875e28a..ade152daf 160000 --- a/3rdparty/hipify_torch +++ b/3rdparty/hipify_torch @@ -1 +1 @@ -Subproject commit e2875e28a397cf4ec13c7414d4387fe761f36e92 +Subproject commit ade152dafe55b65363d2dbdec889bfe8932712d2 diff --git a/hipify_custom_map.json b/build_tools/hipify/custom_map.json similarity index 81% rename from hipify_custom_map.json rename to build_tools/hipify/custom_map.json index 35e6b7999..27c1fc7fe 100644 --- a/hipify_custom_map.json +++ b/build_tools/hipify/custom_map.json @@ -6,7 +6,8 @@ "ATen/cudnn/Handle.h" : "ATen/miopen/Handle.h", "CUfunc_cache" : "hipFuncCache_t", "" : "", - "cudaFuncSetAttribute(" : "hipFuncSetAttribute((const void*)" + "cudaFuncSetAttribute(" : "hipFuncSetAttribute((const void*)", + "cuda::getCurrentCUDAStream" : "hip::getCurrentHIPStreamMasqueradingAsCUDA" } } diff --git a/build_tools/hipify/hipify.cmake b/build_tools/hipify/hipify.cmake new file mode 100644 index 000000000..934964b2e --- /dev/null +++ b/build_tools/hipify/hipify.cmake @@ -0,0 +1,55 @@ +set(_HIPIFY_CMAKE_DIR "${CMAKE_CURRENT_LIST_DIR}") + + +function(TE_Hipify SRC_DIR) + # Create result file + set(hipify_result "${CMAKE_BINARY_DIR}/hipify_result.json") + + # Call Python script + execute_process( + COMMAND python3 "${_HIPIFY_CMAKE_DIR}/hipify.py" hipify + --src-dir "${SRC_DIR}" + --hipify-result "${hipify_result}" + RESULT_VARIABLE script_result + ) + + if(NOT script_result EQUAL 0) + message(FATAL_ERROR "Python script failed with code ${script_result}") + endif() +endfunction() + + +function(TE_GetHipifiedSources SOURCE_LIST BASE_PATH OUTPUT_VARIABLE) + # Create a temporary file + string(RANDOM LENGTH 8 RANDOM_SUFFIX) + set(list_file "${CMAKE_BINARY_DIR}/source_list_${RANDOM_SUFFIX}.txt") + + # Write list to temp file + string(REPLACE ";" "\n" list_content "${SOURCE_LIST}") + file(WRITE "${list_file}" "${list_content}") + + set(hipify_result "${CMAKE_BINARY_DIR}/hipify_result.json") + + # Call Python script + execute_process( + COMMAND python3 "${_HIPIFY_CMAKE_DIR}/hipify.py" get_sources + --hipify-result "${hipify_result}" + --sources "${list_file}" + --base-path "${BASE_PATH}" + RESULT_VARIABLE script_result + ) + + if(NOT script_result EQUAL 0) + message(FATAL_ERROR "Python script failed with code ${script_result}") + endif() + + # Read result from output file + file(READ "${list_file}" result_content) + string(REPLACE "\n" ";" result_content "${result_content}") + + # Clean up temp files + file(REMOVE "${list_file}") + + # Set output variable in parent scope + set(${OUTPUT_VARIABLE} "${result_content}" PARENT_SCOPE) +endfunction() \ No newline at end of file diff --git a/build_tools/hipify/hipify.py b/build_tools/hipify/hipify.py new file mode 100644 index 000000000..e295c5be9 --- /dev/null +++ b/build_tools/hipify/hipify.py @@ -0,0 +1,200 @@ +# Copyright (c) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. +# License for AMD contributions = MIT. See LICENSE for more information + +import json +import os +from pathlib import Path +import shutil +import sys +from typing import Union, Optional + + +def do_hipify(te_root: Union[Path, str], src_dir: Union[Path, str], + common_headers_dir: Optional[Union[Path, str]] = None, + result_file: Optional[str] = None) -> dict: + """ + Converts TransformerEngine CUDA code to HIP code using the hipify_torch module. + This function runs the hipify transformation on source files in the specified TE directory, + converting CUDA-specific code to HIP-compatible code. It can optionally save the + transformation results to a JSON file. + Args: + te_root (Union[Path, str]): TE project root directory. + Used to located build tools and the hipify_torch module. + src_dir (Union[Path, str]): Source directory containing CUDA files to be hipified. + The hipified output will be written to the same directory. + common_headers_dir (Optional[Union[Path, str]]): directory containing common header + If not set {te_root}/transformer_engine will be used as default. + result_file (Optional[str]): Path to an optional JSON file where hipify results + will be saved. If not set, results are not persisted to disk. + Returns: + dict: A dictionary containing the hipify transformation results, including + details about converted files and any transformations applied. + Raises: + ImportError: If the hipify_torch module cannot be imported from the project root. + FileNotFoundError: If the source directory or required configuration files do not exist. + IOError: If there is an error writing to the result_file. + """ + te_root = Path(te_root).resolve() + hipify_root = te_root / "3rdparty" / "hipify_torch" + sys.path.insert(0, str(hipify_root)) + from hipify_torch.v2 import hipify_python as hipify_module + + common_headers_dir = (Path(common_headers_dir).resolve() if common_headers_dir else + te_root / "transformer_engine") + include_dirs = [common_headers_dir, + common_headers_dir / "common", + common_headers_dir / "common" / "include", + Path(src_dir).resolve()] + + print(f"Run hipify on {src_dir}") + + hipify_result = hipify_module.hipify( + project_directory=src_dir, + output_directory=src_dir, + includes=["*/common/*", str(Path(src_dir)/"*")], + ignores=["*/amd_detail/*", "*/aotriton/*", "*/ck_fused_attn/*", "*/rocshmem_api/*"], + header_include_dirs=include_dirs, + custom_map_list= te_root / "build_tools" / "hipify" / "custom_map.json", + extra_files=[], + is_pytorch_extension=True, + hipify_extra_files_only=False, + show_detailed=False, + no_math_replace=True) + + # Convert hipify objects to dictionaries for consistent behavior + hipify_result = {k: v.asdict() if hasattr(v, 'asdict') else v for k, v in hipify_result.items()} + + if result_file: + with open(result_file, 'w') as dict_file: + dict_file.write(json.dumps(hipify_result)) + + return hipify_result + + +def get_hipified_sources(hipify_result: Union[str, dict], sources: Union[list[Union[Path, str]], Path, str], + src_base_path: Union[Path, str]) -> Union[list[str], str]: + """ + Process and return hipified source file paths, updating the source list file if provided. + + This function takes hipify conversion results and a list of source files, then returns + the corresponding hipified file paths relative to the source base directory. If the sources + parameter points to a file, the file is updated with the hipified paths and the file path + is returned. Otherwise, a list of hipified paths is returned. + + Args: + hipify_result (Union[str, dict]): Either a file path to a JSON file containing hipify + conversion results as a dictionary, or a dictionary directly mapping original file + paths to their hipification results. Each result should have a `hipified_path` + attribute indicating the converted file path. + sources (Union[list[Union[Path, str]], Path, str]): Either a list of source file paths + (as strings or Path objects), or a file path (as string or Path) containing one + source file path per line. These are the original CUDA source files to be hipified. + src_base_path (Union[Path, str]): The base directory path used to compute relative + paths for the output. All returned paths will be relative to this directory. + + Returns: + Union[list[str], str]: If `sources` is a file path, returns the file path after updating + it with hipified source paths. If `sources` is a list, returns a list of strings + representing relative paths to hipified source files. Duplicate entries are removed + by converting to a set internally. + """ + if isinstance(hipify_result, str): + with open(hipify_result, 'r') as dict_file: + hipify_result = json.load(dict_file) + else: + hipify_result = dict(hipify_result) + + sources_fname = None + if isinstance(sources, (str, Path)): + sources_fname = os.path.abspath(str(sources)) + sources = [line.strip() for line in open(sources_fname).readlines() if line.strip()] + + # Because hipify output_directory == project_directory + # Original sources list may contain previous hipifying results that ends up with duplicated entries + # Keep unique entries only + hipified_sources = set() + for fname in sources: + if not os.path.isabs(fname): + fname = os.path.join(src_base_path, fname) + fname = os.path.abspath(str(fname)) + if fname in hipify_result: + file_result = hipify_result[fname] + if file_result['hipified_path'] is not None: + fname = hipify_result[fname]['hipified_path'] + hipified_sources.add(os.path.relpath(fname, str(src_base_path))) + + if sources_fname is not None: + with open(sources_fname, "w") as f: + for fname in hipified_sources: + f.write(fname + "\n") + return sources_fname + + return list(hipified_sources) + + +def hipify_sources(te_root: Union[Path, str], src_dir: Union[Path, str], + common_headers_dir: Optional[Union[Path, str]], + sources: Union[list[Union[Path, str]], Path, str], + src_base_path: Union[Path, str]) -> Union[list[str], str]: + """Hipify source files and return the list of hipified source paths. + """ + return get_hipified_sources(do_hipify(te_root, src_dir, common_headers_dir), + sources, src_base_path) + + +def copy_hipify_tools( + src_dir: Union[Path, str], + dst_dir: Union[Path, str], +) -> None: + """Copy necessary hipify tools from library root + src_dir should be the root or Transformer Engine repository. + """ + if bool(int(os.getenv("NVTE_RELEASE_BUILD", "0"))): + hipify_dir = src_dir / "3rdparty" / "hipify_torch" + hipify_copy = dst_dir / "3rdparty" / "hipify_torch" + if hipify_copy.exists(): + shutil.rmtree(hipify_copy) + shutil.copytree(hipify_dir, hipify_copy) + + +def clear_hipify_tools_copy( + dst_dir: Union[Path, str], +) -> None: + """Clear temporary copies of hipify tools + """ + hipify_copy = dst_dir / "3rdparty" + if hipify_copy.exists(): + shutil.rmtree(hipify_copy) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Hipify TE source files") + subparsers = parser.add_subparsers(dest="op", help="Operation to perform") + + parser_hipify = subparsers.add_parser("hipify", help="Run hipify on source directory") + parser_hipify.add_argument("--te-root", type=str, default=str(Path(__file__).parent.parent.parent), + help="Root directory of the transformer engine project") + parser_hipify.add_argument("--src-dir", type=str, required=True, + help="Source directory containing CUDA files to be hipified") + parser_hipify.add_argument("--hipify-result", type=str, required=True, + help="JSON file to save hipify results to") + + parser_sources = subparsers.add_parser("get_sources", help="Get hipified sources from hipify results") + parser_sources.add_argument("--hipify-result", type=str, required=True, + help="JSON file containing hipify results") + parser_sources.add_argument("--sources", type=str, required=True, + help="File containing list of source files to be updated with hipified paths") + parser_sources.add_argument("--base-path", type=str, default=None, dest="src_base_path", + help="Base path for computing relative paths of hipified sources") + + args = parser.parse_args() + if args.op == "hipify": + print(f"Hipifying sources in {args.src_dir} with TE root {args.te_root}, saving results to {args.hipify_result}") + do_hipify(args.te_root, args.src_dir, None, args.hipify_result) + elif args.op == "get_sources": + print(f"Getting hipified sources from {args.hipify_result} and updating {args.sources} with base path {args.src_base_path}") + get_hipified_sources(args.hipify_result, args.sources, args.src_base_path) + else: + raise ValueError(f"Unsupported operation: {args.op}") diff --git a/build_tools/jax.py b/build_tools/jax.py index a2f996eb2..e4f61d644 100644 --- a/build_tools/jax.py +++ b/build_tools/jax.py @@ -10,7 +10,7 @@ import setuptools -from .utils import rocm_build, rocm_path, hipify +from .utils import rocm_build, rocm_path from .utils import all_files_in_dir, get_cuda_include_dirs, debug_build_enabled from typing import List @@ -82,9 +82,9 @@ def setup_jax_extension( # If NVTE_RELEASE_BUILD is set, we assume not building but sources packaging # and we do not hipify the sources if rocm_build() and not bool(int(os.getenv("NVTE_RELEASE_BUILD", "0"))): - current_file_path = Path(__file__).parent.resolve() - base_dir = current_file_path.parent - sources = hipify(base_dir, csrc_source_files, sources, include_dirs) + from .hipify.hipify import hipify_sources as hipify + base_dir = Path(__file__).parent.parent.resolve() + sources = hipify(base_dir, csrc_source_files, common_header_files, sources, base_dir) # Compile flags cxx_flags = ["-O3"] diff --git a/build_tools/pytorch.py b/build_tools/pytorch.py index 55d2f8330..ed0c09adc 100644 --- a/build_tools/pytorch.py +++ b/build_tools/pytorch.py @@ -13,11 +13,7 @@ from .utils import ( rocm_build, rocm_path, - hipify, -) -from .utils import ( all_files_in_dir, - cuda_archs, cuda_version, get_cuda_include_dirs, debug_build_enabled, @@ -63,9 +59,9 @@ def setup_pytorch_extension( # If NVTE_RELEASE_BUILD is set, we assume not building but sources packaging # and we do not hipify the sources if rocm_build() and not bool(int(os.getenv("NVTE_RELEASE_BUILD", "0"))): - current_file_path = Path(__file__).parent.resolve() - base_dir = current_file_path.parent - sources = hipify(base_dir, csrc_source_files, sources, include_dirs) + from .hipify.hipify import hipify_sources as hipify + base_dir = Path(__file__).parent.parent.resolve() + sources = hipify(base_dir, csrc_source_files, common_header_files, sources, base_dir) # Compiler flags cxx_flags = ["-O3", "-fvisibility=hidden"] diff --git a/build_tools/utils.py b/build_tools/utils.py index c6da47d0f..2f924e894 100644 --- a/build_tools/utils.py +++ b/build_tools/utils.py @@ -451,34 +451,6 @@ def copy_common_headers( new_path.parent.mkdir(exist_ok=True, parents=True) shutil.copy(path, new_path) -def copy_hipify_tools( - src_dir: Union[Path, str], - dst_dir: Union[Path, str], -) -> None: - """Copy necessary hipify tools from library root - src_dir should be the root or Transformer Engine repository. - """ - if rocm_build() and bool(int(os.getenv("NVTE_RELEASE_BUILD", "0"))): - hipify_dir = src_dir / "3rdparty" / "hipify_torch" - hipify_copy = dst_dir / "3rdparty" / "hipify_torch" - if hipify_copy.exists(): - shutil.rmtree(hipify_copy) - shutil.copytree(hipify_dir, hipify_copy) - shutil.copy(src_dir / "hipify_custom_map.json", dst_dir / "hipify_custom_map.json") - - -def clear_hipify_tools_copy( - dst_dir: Union[Path, str], -) -> None: - """Clear temporary copies of hipify tools - """ - hipify_copy = dst_dir / "3rdparty" - if hipify_copy.exists(): - shutil.rmtree(hipify_copy) - map_copy = dst_dir / "hipify_custom_map.json" - if map_copy.exists(): - map_copy.unlink() - def install_and_import(package): """Install a package via pip (if not already installed) and import into globals.""" @@ -501,37 +473,3 @@ def uninstall_te_wheel_packages(): "transformer_engine_jax", ] ) - -def hipify(base_dir, src_dir, sources, include_dirs): - cwd = os.getcwd() - hipify_module = importlib.import_module("3rdparty.hipify_torch.hipify_torch.v2.hipify_python") - do_hipify = hipify_module.hipify - print(f"Run hipify on {src_dir}") - - hipify_result = do_hipify( - project_directory=src_dir, - output_directory=src_dir, - includes=["*/common/*", str(src_dir)+"/*"], - ignores=["*/amd_detail/*", "*/aotriton/*", "*/ck_fused_attn/*", "*/rocshmem_api/*"], - header_include_dirs=[d for d in include_dirs if Path(d).is_relative_to(base_dir)], - custom_map_list=base_dir / "hipify_custom_map.json", - extra_files=[], - is_pytorch_extension=True, - hipify_extra_files_only=False, - show_detailed=False, - no_math_replace=True) - - # Because hipify output_directory == project_directory - # Original sources list may contain previous hipifying results that ends up with duplicated entries - # Keep unique entries only - hipified_sources = set() - for fname in sources: - fname = os.path.abspath(str(fname)) - if fname in hipify_result: - file_result = hipify_result[fname] - if file_result.hipified_path is not None: - fname = hipify_result[fname].hipified_path - # setup() arguments must *always* be /-separated paths relative to the setup.py directory, - # *never* absolute paths - hipified_sources.add(os.path.relpath(fname, cwd)) - return list(hipified_sources) diff --git a/setup.py b/setup.py index cd5cacc70..cdc823b69 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,6 @@ from build_tools.utils import ( rocm_build, all_files_in_dir, - hipify, cuda_archs, cuda_version, get_frameworks, @@ -47,12 +46,9 @@ class HipifyMeta(egg_info): def run(self): if rocm_build(): + from build_tools.hipify.hipify import do_hipify print("Running hipification of installable headers for ROCm build...") - common_headers_dir = current_file_path / "transformer_engine/common/include" - #TODO: some installable headers refer non installable headers (i.e not from common/include) - #so we need add extra include paths here to match hipification results with build process - hipify(current_file_path, common_headers_dir, all_files_in_dir(common_headers_dir), - [common_headers_dir, current_file_path / "transformer_engine"]) + do_hipify(current_file_path, current_file_path / "transformer_engine/common/include") super().run() CMakeBuildExtension = get_build_ext(BuildExtension) diff --git a/tests/cpp/CMakeLists.txt b/tests/cpp/CMakeLists.txt index 57cdb02fe..c7aaac468 100644 --- a/tests/cpp/CMakeLists.txt +++ b/tests/cpp/CMakeLists.txt @@ -97,32 +97,8 @@ else() endif() if(USE_ROCM) - set(TE ${CMAKE_CURRENT_SOURCE_DIR}/../..) - set(THIRDPARTY ${TE}/3rdparty) - list(APPEND CMAKE_MODULE_PATH "${THIRDPARTY}/hipify_torch/cmake") - include(Hipify) - message(STATUS "CMAKE_MODULE_PATH: ${CMAKE_MODULE_PATH}") - - #hipify parameters should match those used in TE CMakeLists.txt - set(header_include_dir - ${TE}/transformer_engine/common/include - ${TE}/transformer_engine/common - ${TE}/transformer_engine) - - set(cuda_source_dir ${PROJECT_SOURCE_DIR} ) - message(STATUS "Run hipify on ${cuda_source_dir}") - hipify(CUDA_SOURCE_DIR ${cuda_source_dir} - HEADER_INCLUDE_DIR ${header_include_dir} - IGNORES "*/pytorch/csrc/*" - IGNORES "*/jax/csrc/*" - IGNORES "*/amd_detail/*" - IGNORES "*/aotriton/*" - IGNORES "*/ck_fused_attn/*" - IGNORES "*/rocshmem_api/*" - CUSTOM_MAP_FILE "${TE}/hipify_custom_map.json" - NO_MATH_REPLACE - V2 - ) + include("${CMAKE_CURRENT_SOURCE_DIR}/../../build_tools/hipify/hipify.cmake") + TE_Hipify(${CMAKE_CURRENT_SOURCE_DIR}) endif() add_subdirectory(operator) diff --git a/tests/cpp/operator/CMakeLists.txt b/tests/cpp/operator/CMakeLists.txt index d4beeb87c..87f416df1 100644 --- a/tests/cpp/operator/CMakeLists.txt +++ b/tests/cpp/operator/CMakeLists.txt @@ -41,7 +41,7 @@ endif() if(USE_CUDA) add_executable(test_operator ${test_cuda_sources}) else() - get_hipified_list("${test_cuda_sources}" test_hip_sources) + TE_GetHipifiedSources("${test_cuda_sources}" ${CMAKE_CURRENT_SOURCE_DIR} test_hip_sources) message("${message_line}") message(STATUS "test_operator hipified sources: ${test_hip_sources}") diff --git a/tests/cpp/util/CMakeLists.txt b/tests/cpp/util/CMakeLists.txt index ea986bfa5..35459b74d 100644 --- a/tests/cpp/util/CMakeLists.txt +++ b/tests/cpp/util/CMakeLists.txt @@ -11,7 +11,7 @@ list(APPEND test_cuda_sources if(USE_CUDA) add_executable(test_util ${test_cuda_sources}) else() - get_hipified_list("${test_cuda_sources}" test_hip_sources) + TE_GetHipifiedSources("${test_cuda_sources}" ${CMAKE_CURRENT_SOURCE_DIR} test_hip_sources) message("${message_line}") message(STATUS "test_util hipified sources: ${test_hip_sources}") diff --git a/transformer_engine/common/CMakeLists.txt b/transformer_engine/common/CMakeLists.txt index ec0db3a66..5e2021960 100644 --- a/transformer_engine/common/CMakeLists.txt +++ b/transformer_engine/common/CMakeLists.txt @@ -205,32 +205,10 @@ else() amd_detail/system.cpp) # process source code files - set(TE ${CMAKE_CURRENT_SOURCE_DIR}/../..) - set(THIRDPARTY ${TE}/3rdparty) - list(APPEND CMAKE_MODULE_PATH "${THIRDPARTY}/hipify_torch/cmake") - include(Hipify) - message(STATUS "CMAKE_MODULE_PATH: ${CMAKE_MODULE_PATH}") - - # Parameters for hipify here should match hipify from build_tools/utils.py - # Because CMake hipify doesn't support INCLUDES we explicitly add pytorch and jax to IGNORES - set(header_include_dir - ${CMAKE_CURRENT_SOURCE_DIR}/.. - ${CMAKE_CURRENT_SOURCE_DIR}/include - ${CMAKE_CURRENT_SOURCE_DIR}) - message(STATUS "Run hipify on ${CMAKE_CURRENT_SOURCE_DIR}") - hipify(CUDA_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR} - HEADER_INCLUDE_DIR ${header_include_dir} - IGNORES "*/amd_detail/*" - IGNORES "*/aotriton/*" - IGNORES "*/ck_fused_attn/*" - IGNORES "*/pytorch/csrc/*" - IGNORES "*/jax/csrc/*" - IGNORES "*/rocshmem_api/*" - CUSTOM_MAP_FILE "${TE}/hipify_custom_map.json" - NO_MATH_REPLACE - V2 - ) - get_hipified_list("${transformer_engine_SOURCES}" te_hip_sources) + include("${CMAKE_CURRENT_SOURCE_DIR}/../../build_tools/hipify/hipify.cmake") + TE_Hipify(${CMAKE_CURRENT_SOURCE_DIR}) + TE_GetHipifiedSources("${transformer_engine_SOURCES}" ${CMAKE_CURRENT_SOURCE_DIR} te_hip_sources) + message("${message_line}") message(STATUS "nvte hipified sources: ${te_hip_sources}") diff --git a/transformer_engine/common/aotriton/CMakeLists.txt b/transformer_engine/common/aotriton/CMakeLists.txt index 4ac780753..c4be66106 100644 --- a/transformer_engine/common/aotriton/CMakeLists.txt +++ b/transformer_engine/common/aotriton/CMakeLists.txt @@ -64,6 +64,7 @@ if(NOT DEFINED AOTRITON_PATH) # Build the AOTriton runtime from source with custom suffix to avoid # potential conflict with libaotriton as provided by PyTorch function(aotriton_build_from_source) + set(TE ${CMAKE_CURRENT_SOURCE_DIR}/../../..) get_git_commit(${TE}/3rdparty/aotriton AOTRITON_SHA) ExternalProject_Add(aotriton_external LIST_SEPARATOR "," diff --git a/transformer_engine/common/util/rtc.cpp b/transformer_engine/common/util/rtc.cpp index 0eeb9b4d6..b6aa382f6 100644 --- a/transformer_engine/common/util/rtc.cpp +++ b/transformer_engine/common/util/rtc.cpp @@ -1,6 +1,6 @@ /************************************************************************* * This file was modified for portability to AMDGPU - * Copyright (c) 2023-2025, Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2023-2026, Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * * See LICENSE for license information. @@ -187,7 +187,7 @@ void KernelManager::compile(const std::string& kernel_label, const std::string& #ifdef __HIP_PLATFORM_AMD__ constexpr int num_headers = 3; const char* headers[num_headers] = {string_code_utils_cuh, string_code_util_math_h, string_code_amd_detail_hip_float8_h}; - const char* include_names[num_headers] = {"utils_hip.cuh", "util/math.h", "amd_detail/hip_float8.h"}; + const char* include_names[num_headers] = {"utils_hip.cuh", "util/math.h", "common/amd_detail/hip_float8.h"}; #else constexpr int num_headers = 2; constexpr const char* headers[num_headers] = {string_code_utils_cuh, string_code_util_math_h}; diff --git a/transformer_engine/jax/MANIFEST.in b/transformer_engine/jax/MANIFEST.in index fdfea9fdc..4093ae065 100644 --- a/transformer_engine/jax/MANIFEST.in +++ b/transformer_engine/jax/MANIFEST.in @@ -2,4 +2,3 @@ recursive-include build_tools *.* recursive-include common_headers *.* recursive-include csrc *.* recursive-include 3rdparty *.* -include hipify_custom_map.json diff --git a/transformer_engine/jax/setup.py b/transformer_engine/jax/setup.py index b58d2df7f..33c5c011a 100644 --- a/transformer_engine/jax/setup.py +++ b/transformer_engine/jax/setup.py @@ -46,13 +46,15 @@ from build_tools.build_ext import get_build_ext -from build_tools.utils import ( rocm_build, copy_common_headers, copy_hipify_tools, - clear_hipify_tools_copy) +from build_tools.utils import rocm_build, copy_common_headers from build_tools.te_version import te_version from build_tools.jax import setup_jax_extension, install_requirements, test_requirements from pybind11.setup_helpers import build_ext as BuildExtension +if rocm_build(): + from build_tools.hipify.hipify import copy_hipify_tools, clear_hipify_tools_copy + os.environ["NVTE_PROJECT_BUILDING"] = "1" CMakeBuildExtension = get_build_ext(BuildExtension, True) @@ -90,7 +92,8 @@ # Extensions common_headers_dir = "common_headers" copy_common_headers(current_file_path.parent, str(current_file_path / common_headers_dir)) - copy_hipify_tools(current_file_path.parent.parent, current_file_path) + if rocm_build(): + copy_hipify_tools(current_file_path.parent.parent, current_file_path) ext_modules = [ setup_jax_extension( "csrc", current_file_path / "csrc", current_file_path / common_headers_dir @@ -110,4 +113,5 @@ if any(x in sys.argv for x in (".", "sdist", "bdist_wheel")): shutil.rmtree(common_headers_dir) shutil.rmtree("build_tools") - clear_hipify_tools_copy(current_file_path) + if rocm_build(): + clear_hipify_tools_copy(current_file_path) diff --git a/transformer_engine/pytorch/MANIFEST.in b/transformer_engine/pytorch/MANIFEST.in index fa4aefd74..b7edfbc0f 100644 --- a/transformer_engine/pytorch/MANIFEST.in +++ b/transformer_engine/pytorch/MANIFEST.in @@ -3,4 +3,3 @@ recursive-include common_headers *.* recursive-include csrc *.* recursive-include 3rdparty *.* recursive-include triton_kernels/gmm/configs *.json -include hipify_custom_map.json diff --git a/transformer_engine/pytorch/setup.py b/transformer_engine/pytorch/setup.py index e86873b12..634006c11 100644 --- a/transformer_engine/pytorch/setup.py +++ b/transformer_engine/pytorch/setup.py @@ -47,8 +47,7 @@ from build_tools.build_ext import get_build_ext -from build_tools.utils import ( - rocm_build, copy_common_headers, copy_hipify_tools, clear_hipify_tools_copy ) +from build_tools.utils import rocm_build, copy_common_headers from build_tools.te_version import te_version from build_tools.pytorch import ( setup_pytorch_extension, @@ -56,6 +55,9 @@ test_requirements, ) +if rocm_build(): + from build_tools.hipify.hipify import copy_hipify_tools, clear_hipify_tools_copy + os.environ["NVTE_PROJECT_BUILDING"] = "1" CMakeBuildExtension = get_build_ext(BuildExtension, True) From d255b17ebc263f84c2059b76a760d98803f2bd41 Mon Sep 17 00:00:00 2001 From: Ilya Panfilov Date: Wed, 25 Feb 2026 14:05:20 -0500 Subject: [PATCH 3/3] Copyright --- build_tools/hipify/hipify.cmake | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/build_tools/hipify/hipify.cmake b/build_tools/hipify/hipify.cmake index 934964b2e..39f4e97b8 100644 --- a/build_tools/hipify/hipify.cmake +++ b/build_tools/hipify/hipify.cmake @@ -1,3 +1,7 @@ +# Copyright (c) 2026, Advanced Micro Devices, Inc. All rights reserved. +# +# See LICENSE for license information. + set(_HIPIFY_CMAKE_DIR "${CMAKE_CURRENT_LIST_DIR}")