diff --git a/.github/workflows/cpp_clang_tidy.yml b/.github/workflows/cpp_clang_tidy.yml new file mode 100644 index 000000000000..172e0ffcdf69 --- /dev/null +++ b/.github/workflows/cpp_clang_tidy.yml @@ -0,0 +1,83 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Clang Tidy Check + +on: + workflow_run: + workflows: ["Velox Backend (x86)"] + types: + - completed + +env: + ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true + +concurrency: + group: clang-tidy-${{ github.event.workflow_run.id }} + cancel-in-progress: true + +jobs: + clang-tidy-check: + if: ${{ github.event.workflow_run.conclusion == 'success' }} + runs-on: ubuntu-22.04 + container: apache/gluten:centos-8-jdk8 + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.workflow_run.head_sha }} + fetch-depth: 0 + - name: Detect CPP file changes + id: filter + run: | + BASE_SHA=$(python3 -c " + import json, os + with open(os.environ['GITHUB_EVENT_PATH']) as f: + event = json.load(f) + wr = event.get('workflow_run', {}) + prs = wr.get('pull_requests', []) + if prs: + print(prs[0]['base']['sha']) + else: + print(wr.get('before', '')) + ") + HEAD_SHA="${{ github.event.workflow_run.head_sha }}" + if git diff --name-only $BASE_SHA $HEAD_SHA | grep -E '^cpp/.*\.(cc|h)$'; then + echo "cpp_changed=true" >> $GITHUB_OUTPUT + else + echo "cpp_changed=false" >> $GITHUB_OUTPUT + fi + - name: Download artifact from in-progress workflow + if: steps.filter.outputs.cpp_changed == 'true' + uses: dawidd6/action-download-artifact@v3 + with: + github_token: ${{secrets.GITHUB_TOKEN}} + workflow: .github/workflows/velox_backend_x86.yml + name: velox-native-lib-centos-7-${{ github.event.workflow_run.head_sha }} + commit: ${{ github.event.workflow_run.head_sha }} + workflow_conclusion: "" + check_artifacts: true + search_artifacts: true + if_no_artifact_found: fail + - name: Check Clang Tidy + run: | + pip3 install regex + yum install glibc-devel glibc-headers + cd $GITHUB_WORKSPACE/ + yum install clang llvm llvm-devel -y + dnf install clang-tools-extra -y + clang-tidy --version + sed -i "s|/work|$(pwd)|g" cpp/build/compile_commands.json + python3 dev/check.py tidy commit --fix + diff --git a/.github/workflows/velox_backend_x86.yml b/.github/workflows/velox_backend_x86.yml index d2a9768ddeee..3f5a1efb2bba 100644 --- a/.github/workflows/velox_backend_x86.yml +++ b/.github/workflows/velox_backend_x86.yml @@ -92,7 +92,7 @@ jobs: - uses: actions/upload-artifact@v4 with: name: velox-native-lib-centos-7-${{github.sha}} - path: ./cpp/build/releases/ + path: ./cpp/build/ if-no-files-found: error - uses: actions/upload-artifact@v4 with: @@ -139,7 +139,7 @@ jobs: uses: actions/download-artifact@v4 with: name: velox-native-lib-centos-7-${{github.sha}} - path: ./cpp/build/releases/ + path: ./cpp/build/ - name: Download All Arrow Jar Artifacts uses: actions/download-artifact@v4 with: @@ -216,7 +216,7 @@ jobs: uses: actions/download-artifact@v4 with: name: velox-native-lib-centos-7-${{github.sha}} - path: ./cpp/build/releases/ + path: ./cpp/build/ - name: Download All Arrow Jar Artifacts uses: actions/download-artifact@v4 with: @@ -291,7 +291,7 @@ jobs: uses: actions/download-artifact@v4 with: name: velox-native-lib-centos-7-${{github.sha}} - path: ./cpp/build/releases/ + path: ./cpp/build/ - name: Download All Arrow Jar Artifacts uses: actions/download-artifact@v4 with: @@ -359,7 +359,7 @@ jobs: uses: actions/download-artifact@v4 with: name: velox-native-lib-centos-7-${{github.sha}} - path: ./cpp/build/releases/ + path: ./cpp/build/ - name: Download All Arrow Jar Artifacts uses: actions/download-artifact@v4 with: @@ -474,7 +474,7 @@ jobs: uses: actions/download-artifact@v4 with: name: velox-native-lib-centos-7-${{github.sha}} - path: ./cpp/build/releases/ + path: ./cpp/build/ - name: Download All Arrow Jar Artifacts uses: actions/download-artifact@v4 with: @@ -517,7 +517,7 @@ jobs: uses: actions/download-artifact@v4 with: name: velox-native-lib-centos-7-${{github.sha}} - path: ./cpp/build/releases/ + path: ./cpp/build/ - name: Download All Arrow Jar Artifacts uses: actions/download-artifact@v4 with: @@ -568,7 +568,7 @@ jobs: uses: actions/download-artifact@v4 with: name: velox-native-lib-centos-7-${{github.sha}} - path: ./cpp/build/releases/ + path: ./cpp/build/ - name: Download All Arrow Jar Artifacts uses: actions/download-artifact@v4 with: @@ -640,7 +640,7 @@ jobs: - uses: actions/download-artifact@v4 with: name: velox-native-lib-centos-7-${{github.sha}} - path: ./cpp/build/releases + path: ./cpp/build/ - uses: actions/download-artifact@v4 with: name: arrow-jars-centos-7-${{github.sha}} @@ -691,7 +691,7 @@ jobs: uses: actions/download-artifact@v4 with: name: velox-native-lib-centos-7-${{github.sha}} - path: ./cpp/build/releases + path: ./cpp/build/ - name: Download Arrow Jars uses: actions/download-artifact@v4 with: @@ -728,7 +728,7 @@ jobs: uses: actions/download-artifact@v4 with: name: velox-native-lib-centos-7-${{github.sha}} - path: ./cpp/build/releases + path: ./cpp/build/ - name: Download Arrow Jars uses: actions/download-artifact@v4 with: @@ -784,7 +784,7 @@ jobs: uses: actions/download-artifact@v4 with: name: velox-native-lib-centos-7-${{github.sha}} - path: ./cpp/build/releases + path: ./cpp/build/ - name: Download Arrow Jars uses: actions/download-artifact@v4 with: @@ -827,7 +827,7 @@ jobs: uses: actions/download-artifact@v4 with: name: velox-native-lib-centos-7-${{github.sha}} - path: ./cpp/build/releases + path: ./cpp/build/ - name: Download Arrow Jars uses: actions/download-artifact@v4 with: @@ -885,7 +885,7 @@ jobs: uses: actions/download-artifact@v4 with: name: velox-native-lib-centos-7-${{github.sha}} - path: ./cpp/build/releases + path: ./cpp/build/ - name: Download Arrow Jars uses: actions/download-artifact@v4 with: @@ -929,7 +929,7 @@ jobs: uses: actions/download-artifact@v4 with: name: velox-native-lib-centos-7-${{github.sha}} - path: ./cpp/build/releases + path: ./cpp/build/ - name: Download Arrow Jars uses: actions/download-artifact@v4 with: @@ -985,7 +985,7 @@ jobs: uses: actions/download-artifact@v4 with: name: velox-native-lib-centos-7-${{github.sha}} - path: ./cpp/build/releases + path: ./cpp/build/ - name: Download Arrow Jars uses: actions/download-artifact@v4 with: @@ -1035,7 +1035,7 @@ jobs: uses: actions/download-artifact@v4 with: name: velox-native-lib-centos-7-${{github.sha}} - path: ./cpp/build/releases + path: ./cpp/build/ - name: Download Arrow Jars uses: actions/download-artifact@v4 with: @@ -1077,7 +1077,7 @@ jobs: uses: actions/download-artifact@v4 with: name: velox-native-lib-centos-7-${{github.sha}} - path: ./cpp/build/releases + path: ./cpp/build/ - name: Download Arrow Jars uses: actions/download-artifact@v4 with: @@ -1126,7 +1126,7 @@ jobs: uses: actions/download-artifact@v4 with: name: velox-native-lib-centos-7-${{github.sha}} - path: ./cpp/build/releases + path: ./cpp/build/ - name: Download Arrow Jars uses: actions/download-artifact@v4 with: @@ -1167,7 +1167,7 @@ jobs: uses: actions/download-artifact@v4 with: name: velox-native-lib-centos-7-${{github.sha}} - path: ./cpp/build/releases + path: ./cpp/build/ - name: Download Arrow Jars uses: actions/download-artifact@v4 with: @@ -1216,7 +1216,7 @@ jobs: uses: actions/download-artifact@v4 with: name: velox-native-lib-centos-7-${{github.sha}} - path: ./cpp/build/releases + path: ./cpp/build/ - name: Download Arrow Jars uses: actions/download-artifact@v4 with: @@ -1342,7 +1342,7 @@ jobs: uses: actions/download-artifact@v4 with: name: velox-native-lib-centos-7-${{github.sha}} - path: ./cpp/build/releases + path: ./cpp/build/ - name: Download Arrow Jars uses: actions/download-artifact@v4 with: @@ -1397,7 +1397,7 @@ jobs: uses: actions/download-artifact@v4 with: name: velox-native-lib-centos-7-${{github.sha}} - path: ./cpp/build/releases + path: ./cpp/build/ - name: Download Arrow Jars uses: actions/download-artifact@v4 with: diff --git a/cpp/core/memory/ArrowMemoryPool.h b/cpp/core/memory/ArrowMemoryPool.h index 9f3592ceec17..841244ec6b3d 100644 --- a/cpp/core/memory/ArrowMemoryPool.h +++ b/cpp/core/memory/ArrowMemoryPool.h @@ -17,10 +17,11 @@ #pragma once -#include "arrow/memory_pool.h" +#include "arrow/memory_pool.h" // IWYU pragma: keep -#include "MemoryAllocator.h" +#include "MemoryAllocator.h" // IWYU pragma: keep +// NOLINTNEXTLINE(cert-dcl58-cpp) namespace gluten { using ArrowMemoryPoolReleaser = std::function; @@ -28,12 +29,21 @@ using ArrowMemoryPoolReleaser = std::function; /// This pool was not tracked by Spark, should only used in test. class ArrowMemoryPool final : public arrow::MemoryPool { public: + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init, hicpp-member-init) explicit ArrowMemoryPool(AllocationListener* listener, ArrowMemoryPoolReleaser releaser = nullptr) : allocator_(std::make_unique(defaultMemoryAllocator().get(), listener)), releaser_(std::move(releaser)) {} ~ArrowMemoryPool() override; + ArrowMemoryPool(const ArrowMemoryPool&) = delete; + + ArrowMemoryPool& operator=(const ArrowMemoryPool&) = delete; + + ArrowMemoryPool(ArrowMemoryPool&&) = delete; + + ArrowMemoryPool& operator=(ArrowMemoryPool&&) = delete; + arrow::Status Allocate(int64_t size, int64_t alignment, uint8_t** out) override; arrow::Status Reallocate(int64_t oldSize, int64_t newSize, int64_t alignment, uint8_t** ptr) override; @@ -53,7 +63,8 @@ class ArrowMemoryPool final : public arrow::MemoryPool { MemoryAllocator* allocator() const; private: - std::unique_ptr allocator_; + std::unique_ptr allocator_ = nullptr; + ArrowMemoryPoolReleaser releaser_; }; diff --git a/dev/builddeps-veloxbe.sh b/dev/builddeps-veloxbe.sh index 34a5fe1a2ea3..dc7382838672 100755 --- a/dev/builddeps-veloxbe.sh +++ b/dev/builddeps-veloxbe.sh @@ -248,6 +248,7 @@ function build_gluten_cpp { -DENABLE_HDFS=$ENABLE_HDFS \ -DENABLE_ABFS=$ENABLE_ABFS \ -DENABLE_GPU=$ENABLE_GPU \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ -DENABLE_ENHANCED_FEATURES=$ENABLE_ENHANCED_FEATURES" if [ $OS == 'Darwin' ]; then diff --git a/dev/check.py b/dev/check.py index e237ae250a0d..f1ec051c90d5 100755 --- a/dev/check.py +++ b/dev/check.py @@ -168,7 +168,7 @@ def header_command(commit, files, fix): def tidy_command(commit, files, fix): - files = [file for file in files if regex.match(r".*\.cpp$", file)] + files = [file for file in files if regex.match(r".*\.(cc|cpp|h)$", file)] if not files: return 0 @@ -177,7 +177,7 @@ def tidy_command(commit, files, fix): fix = "--fix" if fix == "fix" else "" status, stdout, stderr = util.run( - f"{SCRIPTS}/run-clang-tidy.py {commit} {fix} -", input=files + f"{SCRIPTS}/run-clang-tidy.py {commit} {fix} ", input=files ) if stdout != "": diff --git a/dev/run-clang-tidy.py b/dev/run-clang-tidy.py new file mode 100755 index 000000000000..e1298106b74b --- /dev/null +++ b/dev/run-clang-tidy.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import argparse +import os +import regex +import subprocess +import sys + + +class string(str): + def extract(self, rexp): + return regex.match(rexp, self).group(1) + + def json(self): + return json.loads(self, object_hook=attrdict) + + +CODE_CHECKS = """* + -abseil-* + -android-* + -cert-err58-cpp + -clang-analyzer-osx-* + -cppcoreguidelines-avoid-c-arrays + -cppcoreguidelines-avoid-magic-numbers + -cppcoreguidelines-pro-bounds-array-to-pointer-decay + -cppcoreguidelines-pro-bounds-pointer-arithmetic + -cppcoreguidelines-pro-type-reinterpret-cast + -cppcoreguidelines-pro-type-vararg + -fuchsia-* + -google-* + -hicpp-avoid-c-arrays + -hicpp-deprecated-headers + -hicpp-no-array-decay + -hicpp-use-equals-default + -hicpp-vararg + -llvmlibc-* + -llvm-header-guard + -llvm-include-order + -mpi-* + -misc-non-private-member-variables-in-classes + -misc-no-recursion + -misc-unused-parameters + -modernize-avoid-c-arrays + -modernize-deprecated-headers + -modernize-use-nodiscard + -modernize-use-trailing-return-type + -objc-* + -openmp-* + -readability-avoid-const-params-in-decls + -readability-convert-member-functions-to-static + -readability-magic-numbers + -zircon-* +""" + + +def run(command, compressed=False, **kwargs): + if "input" in kwargs: + input = kwargs["input"] + + if type(input) == list: + input = "\n".join(input) + "\n" + + kwargs["input"] = input.encode("utf-8") + reply = subprocess.run( + command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, **kwargs + ) + + if compressed: + stdout = gzip.decompress(reply.stdout) + else: + stdout = reply.stdout + + stdout = ( + string(stdout.decode("utf-8", errors="ignore").strip()) + if stdout is not None + else "" + ) + stderr = ( + string(reply.stderr.decode("utf-8").strip()) if reply.stderr is not None else "" + ) + + if stderr != "": + print(stderr, file=sys.stderr) + + return reply.returncode, stdout, stderr + + +def check_list(check_string): + return ",".join([c.strip() for c in check_string.strip().splitlines() if c.strip()]) + + +CODE_CHECKS = check_list(CODE_CHECKS) + + +def check_output_has_warnings(output): + if not output: + return False + return bool(regex.search(r"(?i)\b(warning|error):", output)) + + +def ensure_compile_db(build_dir): + path = os.path.join(build_dir, "compile_commands.json") + return os.path.exists(path) + + +def tidy(args): + build_dir = "cpp/build/" + + if not ensure_compile_db(build_dir): + print("No compile_commands.json found in '{}'.".format(build_dir)) + return 1 + + fix = "--fix" if args.fix == "fix" else "" + files = args.files + + cmd = ( + "xargs clang-tidy -p={build} --format-style=file " + "--checks='{checks}' {fix} --quiet".format( + build=build_dir, + checks=CODE_CHECKS, + fix=fix, + ) + ) + + print("Running clang-tidy on {} file(s)...".format(len(files))) + status, stdout, stderr = run(cmd, input=files) + + combined_output = "" + if stdout: + combined_output += stdout + "\n" + if stderr: + combined_output += stderr + + if check_output_has_warnings(combined_output): + print("clang-tidy found warnings/errors.") + # Print a reasonably sized portion to avoid flooding logs; print all if small + if len(combined_output) > 20000: + print(combined_output[:20000]) + print("... (output truncated)") + else: + print(combined_output) + return 1 + + # Also check the return code (status) - clang-tidy may return non-zero for internal errors + if status != 0: + print("clang-tidy returned non-zero exit code:", status) + if combined_output: + print(combined_output) + return 1 + + print("clang-tidy completed successfully (no warnings).") + return 0 + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Run clang-tidy with project settings (compatible with older Python versions)" + ) + parser.add_argument("--fix", action="store_const", default="show", const="fix") + parser.add_argument("--commit", default="", help="Commit for check") + parser.add_argument("files", metavar="FILES", nargs="*", help="files to process") + return parser.parse_args() + + +def main(): + args = parse_args() + if not args.files: + args.files = [line.strip() for line in sys.stdin if line.strip()] + return tidy(args) + + +if __name__ == "__main__": + sys.exit(main())