diff --git a/.github/workflows/python-cibuildwheel-pybind.yml b/.github/workflows/python-cibuildwheel-pybind.yml new file mode 100644 index 000000000..82f512e40 --- /dev/null +++ b/.github/workflows/python-cibuildwheel-pybind.yml @@ -0,0 +1,183 @@ +name: Python Pybind cibuildwheel + +on: + push: + branches: [master, release-*] + pull_request: + branches: [master] + workflow_dispatch: + +jobs: + build_wheels: + name: cibuildwheel ${{ matrix.os }}/${{ matrix.arch }}/${{ matrix.flavor }}/${{ matrix.target }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-22.04] + # separate archs, so they use individual caches + arch: ["x86_64", "arm64"] + flavor: ["cpython", "pypy"] + # separate musl and many on linux, for mac we just skip one of those + target: ["many", "musl"] + exclude: + - os: ubuntu-22.04 + target: musl + flavor: pypy + steps: + - uses: actions/checkout@v4 + - name: Set up QEMU + if: ${{ (runner.os == 'Linux') && (matrix.arch == 'arm64') }} + uses: docker/setup-qemu-action@v3 + with: + platforms: all + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.11 + with: + key: ${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.target }}-${{ matrix.flavor }}-python + + - name: Sets env for x86_64 + run: | + echo "CIBW_ARCHS_LINUX=auto64" >> $GITHUB_ENV + echo "CIBW_ARCHS_MACOS=x86_64" >> $GITHUB_ENV + if: matrix.arch == 'x86_64' + + - name: Sets env for arm64 + run: | + echo "CIBW_ARCHS_LINUX=aarch64" >> $GITHUB_ENV + echo "CIBW_ARCHS_MACOS=arm64" >> $GITHUB_ENV + if: matrix.arch == 'arm64' + + - name: Skip manylinux for musllinux target + if: ${{ (runner.os == 'Linux') && (matrix.target == 'musl') }} + run: | + echo "CIBW_SKIP=*manylinux*" >> $GITHUB_ENV + + - name: Skip musllinux for manylinux target + if: ${{ (runner.os == 'Linux') && (matrix.target == 'many') }} + run: | + echo "CIBW_SKIP=*musllinux*" >> $GITHUB_ENV + + - name: Skip pypy for cpython + if: ${{ matrix.flavor == 'cpython' }} + run: | + echo "CIBW_SKIP=${{ env.CIBW_SKIP }} pp*" >> $GITHUB_ENV + + - name: Skip cpython for pypy + if: ${{ matrix.flavor == 'pypy' }} + run: | + echo "CIBW_SKIP=${{ env.CIBW_SKIP }} cp*" >> $GITHUB_ENV + + - name: install mac dependencies + if: ${{ runner.os == 'macOS' }} + # 2nd command: workaround https://github.com/actions/setup-python/issues/577 + run: | + brew update && \ + brew list -1 | grep python | while read formula; do brew unlink $formula; brew link --overwrite $formula; done && \ + brew install ccache + + - name: install mac dependencies X86_64 + if: ${{ (runner.os == 'macOS') && (matrix.arch == 'x86_64') }} + run: | + brew update && \ + brew install zlib snappy boost + + - name: install mac dependencies arm64 + if: ${{ (runner.os == 'macOS') && (matrix.arch == 'arm64') }} + run: | + set -e + echo "MACOSX_DEPLOYMENT_TARGET=12.3.0" >> $GITHUB_ENV + echo "_CMAKE_PREFIX_PATH=${{ github.workspace }}/arm64-homebrew" >> $GITHUB_ENV + echo "CIBW_REPAIR_WHEEL_COMMAND_MACOS=DYLD_LIBRARY_PATH=${{ github.workspace }}/arm64-homebrew delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel}" >> $GITHUB_ENV + mkdir arm64-homebrew && curl -L https://github.com/Homebrew/brew/tarball/master | tar xz --strip 1 -C arm64-homebrew + PACKAGES=( icu4c xz lz4 zstd zlib snappy boost ) + for PACKAGE in "${PACKAGES[@]}" + do + response=$(arm64-homebrew/bin/brew fetch --force --bottle-tag=arm64_sonoma $PACKAGE | grep Downloaded ) + download_path=$(echo $response | xargs -n 1 | tail -1) + arm64-homebrew/bin/brew reinstall -vd $download_path + done + arm64-homebrew/bin/brew config + ls /Users/runner/work/keyvi/keyvi/arm64-homebrew + + - name: Build python wheels for ${{ matrix.os }} on ${{ matrix.arch }} + uses: pypa/cibuildwheel@v2.17.0 + env: + # Skip CPython 3.6 and CPython 3.7 + CIBW_SKIP: ${{ env.CIBW_SKIP }} cp36-* cp37-* pp37-* + + # skip testing all python versions on linux arm, only test 3.12 + # skip tests on pypy, currently fails for indexer tests + CIBW_TEST_SKIP: "*p{38,39,310,311}-m*linux_aarch64 pp*" + + # (many)linux custom docker images + CIBW_MANYLINUX_X86_64_IMAGE: "keyvidev/manylinux-builder-x86_64" + CIBW_MANYLINUX_AARCH64_IMAGE: "keyvidev/manylinux-builder-aarch64" + CIBW_MUSLLINUX_X86_64_IMAGE: "keyvidev/musllinux-builder-x86_64" + CIBW_MUSLLINUX_AARCH64_IMAGE: "keyvidev/musllinux-builder-aarch64" + + # ccache using path + CIBW_ENVIRONMENT_MACOS: PATH=/usr/local/opt/ccache/libexec:$PATH + CIBW_ENVIRONMENT_LINUX: PATH=/usr/local/bin:/usr/lib/ccache:$PATH CCACHE_DIR=/host${{ github.workspace }}/.ccache CCACHE_CONFIGPATH=/host/home/runner/.config/ccache/ccache.conf + + # python dependencies + CIBW_BEFORE_BUILD: pip install -r python/requirements.txt + + # testing + CIBW_TEST_REQUIRES: pytest + CIBW_TEST_COMMAND: > + python -m pytest {package}/tests + + # for debugging set this to 1,2 or 3 + # CIBW_BUILD_VERBOSITY: 2 + with: + package-dir: python-pybind + + - uses: actions/upload-artifact@v4 + with: + name: artifact-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.flavor }}-${{ matrix.target }} + path: ./wheelhouse/*.whl + + build_sdist: + name: sdist + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: install Linux deps + run: | + sudo apt-get update && \ + sudo apt-get install -y libsnappy-dev libzzip-dev zlib1g-dev libboost-all-dev ccache + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.11 + with: + key: ${{ matrix.os }}-sdist-python + + - name: Build SDist + run: | + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cd python-pybind && \ + python -m pip install . && \ + python setup.py sdist -d wheelhouse && \ + python -m pip install wheelhouse/*.tar.gz -v && \ + python -m pip install pytest && \ + python -m pytest tests && \ + python -m pip uninstall -y keyvi_pybind11 + + - uses: actions/upload-artifact@v4 + with: + name: artifact-sdist + path: python-pybind/wheelhouse/*.tar.gz + + upload_all: + needs: [build_wheels, build_sdist] + runs-on: ubuntu-latest + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') + steps: + - uses: actions/download-artifact@v4 + with: + pattern: artifact-* + merge-multiple: true + path: dist + + - uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.pypi_password }} diff --git a/.gitignore b/.gitignore index 0c020fb18..e13d3cdcd 100644 --- a/.gitignore +++ b/.gitignore @@ -31,7 +31,7 @@ *.orig # cmake build dir -build/* +/*build* */cmake-build-debug/* build_dir_debug/ cmake-build-debug/ @@ -45,3 +45,11 @@ cmake-build-debug/ # vim swap files *.swp + +# python +*.egg-info + +# pybind build folder +python*/*build* +python*/dist +python*/.cache/ diff --git a/CMakeLists.txt b/CMakeLists.txt index d2e740b14..7ababae3d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.9) +cmake_minimum_required(VERSION 3.21) project(keyvi) #### Build Type @@ -6,14 +6,25 @@ if (CMAKE_BUILD_TYPE) string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER) endif() +#### Options + +option(KEYVI_C_BINDINGS "Keyvi: Build C binding" ${PROJECT_IS_TOP_LEVEL}) +option(KEYVI_PYTHON_BINDINGS "Keyvi: Build Python module" OFF) +option(KEYVI_TESTS "Keyvi: Build unit tests" ${PROJECT_IS_TOP_LEVEL}) +option(KEYVI_BINARIES "Keyvi: Build Python module" ${PROJECT_IS_TOP_LEVEL}) +option(KEYVI_CLANG_TIDY "Keyvi: Build with clang tidy" ${PROJECT_IS_TOP_LEVEL}) +option(KEYVI_DOCS "Keyvi: Build docs" ${PROJECT_IS_TOP_LEVEL}) + #### Linting -find_program(CLANGTIDY clang-tidy) -if(CLANGTIDY) - message ("-- Found clang-tidy") - set(CMAKE_CXX_CLANG_TIDY clang-tidy; --extra-arg-before=-std=c++17) -else() - message ("-- clang-tidy not found") -endif() +if(KEYVI_CLANG_TIDY) + find_program(CLANGTIDY clang-tidy) + if(CLANGTIDY) + message ("-- Found clang-tidy") + set(CMAKE_CXX_CLANG_TIDY clang-tidy; --extra-arg-before=-std=c++17) + else() + message ("-- clang-tidy not found") + endif() +endif(KEYVI_CLANG_TIDY) #### Cmake modules set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules/") @@ -141,67 +152,73 @@ string(REPLACE " " ";" _KEYVI_COMPILE_DEFINITIONS_LIST "${_KEYVI_COMPILE_DEFINIT #### Targets #### -# keyvicompiler -add_executable(keyvicompiler keyvi/bin/keyvicompiler/keyvicompiler.cpp) -target_link_libraries(keyvicompiler ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES}) -target_compile_options(keyvicompiler PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) -target_compile_definitions(keyvicompiler PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) -target_include_directories(keyvicompiler PRIVATE "$") +if(KEYVI_BINARIES) + # keyvicompiler + add_executable(keyvicompiler keyvi/bin/keyvicompiler/keyvicompiler.cpp) + target_link_libraries(keyvicompiler ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES}) + target_compile_options(keyvicompiler PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) + target_compile_definitions(keyvicompiler PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) + target_include_directories(keyvicompiler PRIVATE "$") -install (TARGETS keyvicompiler DESTINATION bin COMPONENT applications OPTIONAL) + install (TARGETS keyvicompiler DESTINATION bin COMPONENT applications OPTIONAL) -# keyviinspector -add_executable(keyviinspector keyvi/bin/keyviinspector/keyviinspector.cpp) -target_link_libraries(keyviinspector ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES}) -target_compile_options(keyviinspector PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) -target_compile_definitions(keyviinspector PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) -target_include_directories(keyviinspector PRIVATE "$") + # keyviinspector + add_executable(keyviinspector keyvi/bin/keyviinspector/keyviinspector.cpp) + target_link_libraries(keyviinspector ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES}) + target_compile_options(keyviinspector PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) + target_compile_definitions(keyviinspector PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) + target_include_directories(keyviinspector PRIVATE "$") -install (TARGETS keyviinspector DESTINATION bin COMPONENT applications OPTIONAL) + install (TARGETS keyviinspector DESTINATION bin COMPONENT applications OPTIONAL) -# keyvimerger -add_executable(keyvimerger keyvi/bin/keyvimerger/keyvimerger.cpp) -target_link_libraries(keyvimerger ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES}) -target_compile_options(keyvimerger PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) -target_compile_definitions(keyvimerger PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) -target_include_directories(keyvimerger PRIVATE "$") + # keyvimerger + add_executable(keyvimerger keyvi/bin/keyvimerger/keyvimerger.cpp) + target_link_libraries(keyvimerger ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES}) + target_compile_options(keyvimerger PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) + target_compile_definitions(keyvimerger PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) + target_include_directories(keyvimerger PRIVATE "$") -install (TARGETS keyvimerger DESTINATION bin COMPONENT applications) + install (TARGETS keyvimerger DESTINATION bin COMPONENT applications) +endif(KEYVI_BINARIES) # keyvi_c -add_library(keyvi_c SHARED keyvi/bin/keyvi_c/c_api.cpp) -target_link_libraries(keyvi_c ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES}) -target_compile_options(keyvi_c PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) -target_compile_definitions(keyvi_c PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) -target_include_directories(keyvi_c PRIVATE "$") +if(KEYVI_C_BINDINGS) + add_library(keyvi_c SHARED keyvi/bin/keyvi_c/c_api.cpp) + target_link_libraries(keyvi_c ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES}) + target_compile_options(keyvi_c PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) + target_compile_definitions(keyvi_c PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) + target_include_directories(keyvi_c PRIVATE "$") +endif(KEYVI_C_BINDINGS) # unit tests -FILE(GLOB_RECURSE UNIT_TEST_SOURCES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} keyvi/tests/keyvi/*.cpp) -add_executable(unit_test_all ${UNIT_TEST_SOURCES}) -target_link_libraries(unit_test_all ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES}) -target_compile_options(unit_test_all PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) -target_compile_definitions(unit_test_all PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) -target_include_directories(unit_test_all PRIVATE "$") -add_dependencies(unit_test_all keyvimerger) - -if (WIN32) - message(STATUS "zlib: ${ZLIB_LIBRARY_RELEASE}") - # copies the dlls required to run to the build folder - foreach(LIB ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY_RELEASE} ${Boost_FILESYSTEM_LIBRARY_RELEASE} ${ZLIB_LIBRARY_RELEASE}) - get_filename_component(UTF_BASE_NAME ${LIB} NAME_WE) - get_filename_component(UTF_PATH ${LIB} PATH) - if(EXISTS "${UTF_PATH}/${UTF_BASE_NAME}.dll") - add_custom_command(TARGET unit_test_all POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy "${UTF_PATH}/${UTF_BASE_NAME}.dll" ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE} - ) - # zlib might be stored in a different folder - elseif(EXISTS "${UTF_PATH}/../bin/${UTF_BASE_NAME}.dll") - add_custom_command(TARGET unit_test_all POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy "${UTF_PATH}/../bin/${UTF_BASE_NAME}.dll" ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE} - ) - endif() - endforeach() -endif (WIN32) +if(KEYVI_TESTS) + FILE(GLOB_RECURSE UNIT_TEST_SOURCES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} keyvi/tests/keyvi/*.cpp) + add_executable(unit_test_all ${UNIT_TEST_SOURCES}) + target_link_libraries(unit_test_all ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES}) + target_compile_options(unit_test_all PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) + target_compile_definitions(unit_test_all PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) + target_include_directories(unit_test_all PRIVATE "$") + add_dependencies(unit_test_all keyvimerger) + + if (WIN32) + message(STATUS "zlib: ${ZLIB_LIBRARY_RELEASE}") + # copies the dlls required to run to the build folder + foreach(LIB ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY_RELEASE} ${Boost_FILESYSTEM_LIBRARY_RELEASE} ${ZLIB_LIBRARY_RELEASE}) + get_filename_component(UTF_BASE_NAME ${LIB} NAME_WE) + get_filename_component(UTF_PATH ${LIB} PATH) + if(EXISTS "${UTF_PATH}/${UTF_BASE_NAME}.dll") + add_custom_command(TARGET unit_test_all POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "${UTF_PATH}/${UTF_BASE_NAME}.dll" ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE} + ) + # zlib might be stored in a different folder + elseif(EXISTS "${UTF_PATH}/../bin/${UTF_BASE_NAME}.dll") + add_custom_command(TARGET unit_test_all POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "${UTF_PATH}/../bin/${UTF_BASE_NAME}.dll" ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE} + ) + endif() + endforeach() + endif (WIN32) +endif(KEYVI_TESTS) # bindings add_custom_target(bindings @@ -226,10 +243,14 @@ target_include_directories(keyvi INTERFACE "$ target_compile_definitions(keyvi INTERFACE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) target_link_libraries(keyvi INTERFACE ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES}) +if (KEYVI_PYTHON_BINDINGS) + add_subdirectory(python-pybind) +endif () + ### docs # don't run it as part of a non-toplevel build, e.g. python -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/sphinx-docs) +if(KEYVI_DOCS) find_package(Doxygen) find_package(Sphinx COMPONENTS breathe) @@ -255,4 +276,4 @@ if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/sphinx-docs) else() message ("-- Skip doc target, doxygen/sphinx not found") endif() -endif() +endif(KEYVI_DOCS) diff --git a/keyvi/check-style.sh b/keyvi/check-style.sh index 13ae3a6cb..d6b3964c3 100755 --- a/keyvi/check-style.sh +++ b/keyvi/check-style.sh @@ -10,7 +10,7 @@ else commit_range="upstream/master...HEAD" fi -infiles=`git diff --name-only --diff-filter=ACMRT $(echo ${commit_range} | sed 's/\.//') | grep -v "3rdparty" | grep -E "\.(cpp|h)$"` +infiles=`git diff --name-only --diff-filter=ACMRT $(echo ${commit_range} | sed 's/\.//') | grep -v "3rdparty" | grep -v "pybind11" | grep -E "\.(cpp|h)$"` clang_format_files=() cpplint_files=() diff --git a/keyvi/include/keyvi/dictionary/match.h b/keyvi/include/keyvi/dictionary/match.h index db8b2a684..9586f7987 100644 --- a/keyvi/include/keyvi/dictionary/match.h +++ b/keyvi/include/keyvi/dictionary/match.h @@ -58,7 +58,7 @@ namespace dictionary { #ifdef Py_PYTHON_H class attributes_visitor : public boost::static_visitor { public: - PyObject* operator()(int i) const { return PyInt_FromLong(i); } + PyObject* operator()(int i) const { return PyLong_FromLong(i); } PyObject* operator()(double i) const { return PyFloat_FromDouble(i); } diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..b82c4641d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,74 @@ +[build-system] +requires = ["scikit-build-core>=0.10", "pybind11"] +build-backend = "scikit_build_core.build" + +[project] +name = "keyvi_scikit_core" +version = "0.6.3dev0" +dependencies = [ + "msgpack>=1.0.0", +] + +[tool.scikit-build] +wheel.expand-macos-universal-tags = true +minimum-version = "build-system.requires" + +[tool.scikit-build.cmake.define] +KEYVI_PYTHON_BINDINGS = "ON" +KEYVI_C_BINDINGS = "OFF" +KEYVI_TESTS = "OFF" +KEYVI_BINARIES = "OFF" +KEYVI_CLANG_TIDY = "OFF" +KEYVI_DOCS = "OFF" + +[tool.pytest.ini_options] +minversion = "8.0" +addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"] +xfail_strict = true +log_cli_level = "INFO" +filterwarnings = [ + "error", + "ignore::pytest.PytestCacheWarning", +] +testpaths = ["tests"] + +[tool.cibuildwheel] +build-frontend = "build[uv]" +test-command = "pytest {project}/python/tests" +test-extras = ["test"] + +[tool.cibuildwheel.pyodide] +build-frontend = {name = "build", args = ["--exports", "whole_archive"]} + +[tool.ruff.lint] +extend-select = [ + "B", # flake8-bugbear + "I", # isort + "ARG", # flake8-unused-arguments + "C4", # flake8-comprehensions + "EM", # flake8-errmsg + "ICN", # flake8-import-conventions + "G", # flake8-logging-format + "PGH", # pygrep-hooks + "PIE", # flake8-pie + "PL", # pylint + "PT", # flake8-pytest-style + "PTH", # flake8-use-pathlib + "RET", # flake8-return + "RUF", # Ruff-specific + "SIM", # flake8-simplify + "T20", # flake8-print + "UP", # pyupgrade + "YTT", # flake8-2020 + "EXE", # flake8-executable + "NPY", # NumPy specific rules + "PD", # pandas-vet +] +ignore = [ + "PLR09", # Too many X + "PLR2004", # Magic comparison +] +isort.required-imports = ["from __future__ import annotations"] + +[tool.ruff.lint.per-file-ignores] +"tests/**" = ["T20"] diff --git a/python-pybind/CMakeLists.txt b/python-pybind/CMakeLists.txt new file mode 100644 index 000000000..04a8bfd6c --- /dev/null +++ b/python-pybind/CMakeLists.txt @@ -0,0 +1,26 @@ +cmake_minimum_required(VERSION 3.15...3.27) + +# Scikit-build-core sets these values for you, or you can just hard-code the +# name and version. +project( + ${SKBUILD_PROJECT_NAME} + VERSION ${SKBUILD_PROJECT_VERSION} + LANGUAGES CXX) + +# Find the module development requirements (requires FindPython from 3.17 or +# scikit-build-core's built-in backport) +find_package(Python REQUIRED COMPONENTS Interpreter Development.Module) +find_package(pybind11 CONFIG REQUIRED) + +# Add a library using FindPython's tooling (pybind11 also provides a helper like +# this) +FILE(GLOB_RECURSE KEYVI_PYBIND_SOURCES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} src/*.cpp) +pybind11_add_module(keyvi_scikit_core ${KEYVI_PYBIND_SOURCES}) + +target_link_libraries(keyvi_scikit_core PRIVATE keyvi) + +# This is passing in the version as a define just as an example +target_compile_definitions(keyvi_scikit_core PRIVATE VERSION_INFO=${PROJECT_VERSION}) + +# The install directory is the output (wheel) directory +install(TARGETS keyvi_scikit_core DESTINATION .) diff --git a/python-pybind/src/.clang-format b/python-pybind/src/.clang-format new file mode 100644 index 000000000..ab84a2c7b --- /dev/null +++ b/python-pybind/src/.clang-format @@ -0,0 +1,12 @@ +--- +BasedOnStyle: Google +ColumnLimit: '120' +Language: Cpp +Standard: c++17 +TabWidth: '2' +UseTab: Never +ConstructorInitializerIndentWidth: 4 +AllowShortFunctionsOnASingleLine: Inline +IncludeBlocks: Preserve + +... diff --git a/python-pybind/src/CPPLINT.cfg b/python-pybind/src/CPPLINT.cfg new file mode 100644 index 000000000..6ed77f519 --- /dev/null +++ b/python-pybind/src/CPPLINT.cfg @@ -0,0 +1,3 @@ +linelength=120 +root=. +filter=-build/include_subdir,-whitespace/indent_namespace diff --git a/python-pybind/src/dictionary/py_dictionary.cpp b/python-pybind/src/dictionary/py_dictionary.cpp new file mode 100644 index 000000000..96403bd76 --- /dev/null +++ b/python-pybind/src/dictionary/py_dictionary.cpp @@ -0,0 +1,53 @@ +/* * keyvi - A key value store. + * + * Copyright 2024 Hendrik Muhs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include "keyvi/dictionary/dictionary.h" +#include "keyvi/dictionary/match.h" + +#include "py_match_iterator.h" + +namespace py = pybind11; +namespace kd = keyvi::dictionary; +namespace kpy = keyvi::pybind; + +void init_keyvi_dictionary(const py::module_ &m) { + m.doc() = R"pbdoc( + keyvi.dictionary + ----------------------- + + .. currentmodule:: keyvi.dictionary + + .. autosummary:: + :toctree: _generate + + )pbdoc"; + + py::class_(m, "Dictionary") + .def(py::init()) + .def("get", &kd::Dictionary::operator[], R"pbdoc( + Get an entry from the dictionary. + )pbdoc") + .def("search", &kd::Dictionary::Lookup) + .def("match", [](const kd::Dictionary &d, const std::string &key) { + auto m = d.Get(key); + return kpy::make_match_iterator(m.begin(), m.end()); + }); +} diff --git a/python-pybind/src/dictionary/py_match.cpp b/python-pybind/src/dictionary/py_match.cpp new file mode 100644 index 000000000..bedc21642 --- /dev/null +++ b/python-pybind/src/dictionary/py_match.cpp @@ -0,0 +1,54 @@ +/* * keyvi - A key value store. + * + * Copyright 2024 Hendrik Muhs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include "keyvi/dictionary/dictionary.h" +#include "keyvi/dictionary/match.h" + +#include "py_match_iterator.h" + +namespace py = pybind11; +namespace kd = keyvi::dictionary; + +void init_keyvi_match(const py::module_ &m) { + py::module_ msgpack_ = py::module_::import("msgpack"); + + py::class_>(m, "Match") + .def(py::init<>()) + .def_property("start", &kd::Match::GetStart, &kd::Match::SetStart) + .def_property("end", &kd::Match::GetEnd, &kd::Match::SetEnd) + .def_property("score", &kd::Match::GetScore, &kd::Match::SetScore) + .def_property("matched_string", &kd::Match::GetMatchedString, &kd::Match::SetMatchedString) + .def_property_readonly("value", + [&msgpack_](const kd::Match &m) -> py::object { + auto packed_value = m.GetMsgPackedValueAsString(); + if (packed_value.empty()) { + return py::none(); + } + return msgpack_.attr("loads")(packed_value); + }) + .def("value_as_string", &kd::Match::GetValueAsString) + .def("raw_value_as_string", &kd::Match::GetRawValueAsString) + .def("__get_item__", &kd::Match::GetAttributePy) + // __setitem__ + // dumps loads + .def_property_readonly("weight", &kd::Match::GetWeight) + .def("__bool__", [](const kd::Match &m) -> bool { return !m.IsEmpty(); }); +} diff --git a/python-pybind/src/dictionary/py_match_iterator.h b/python-pybind/src/dictionary/py_match_iterator.h new file mode 100644 index 000000000..21500d931 --- /dev/null +++ b/python-pybind/src/dictionary/py_match_iterator.h @@ -0,0 +1,75 @@ +/* * keyvi - A key value store. + * + * Copyright 2024 Hendrik Muhs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DICTIONARY_PY_MATCH_ITERATOR_H_ +#define DICTIONARY_PY_MATCH_ITERATOR_H_ + +#include + +#include + +namespace keyvi { +namespace pybind { + +// adapted from pybind11.h +template +pybind11::iterator make_match_iterator_impl(Iterator first, Sentinel last, Extra &&...extra) { + using state = pybind11::detail::iterator_state; + if (!pybind11::detail::get_type_info(typeid(state), false)) { + pybind11::class_(pybind11::handle(), "iterator", pybind11::module_local()) + .def("__iter__", [](state &s) -> state & { return s; }) + .def( + "__next__", + [](state &s) -> ValueType { + { + // release GIL as incrementing the iterator can be expensive, e.g. for fuzzy match + pybind11::gil_scoped_release no_gil; + if (!s.first_or_done) { + ++s.it; + } else { + s.first_or_done = false; + } + if (s.it == s.end) { + s.first_or_done = true; + throw pybind11::stop_iteration(); + } + } + + return Access()(s.it); + }, + std::forward(extra)..., Policy) + .def("set_min_weight", [](state &s, const uint32_t min_weight) -> void { s.it.SetMinWeight(min_weight); }); + } + + return pybind11::cast(state{std::forward(first), std::forward(last), true}); +} + +/// Makes a python iterator from a first and past-the-end C++ InputIterator. +template ::result_type, + typename... Extra> +pybind11::typing::Iterator make_match_iterator(Iterator first, Sentinel last, Extra &&...extra) { + return make_match_iterator_impl, Policy, Iterator, Sentinel, ValueType, + Extra...>(std::forward(first), std::forward(last), + std::forward(extra)...); +} + +} /* namespace pybind */ +} /* namespace keyvi */ + +#endif // DICTIONARY_PY_MATCH_ITERATOR_H_ diff --git a/python-pybind/src/py_keyvi.cpp b/python-pybind/src/py_keyvi.cpp new file mode 100644 index 000000000..402a381e7 --- /dev/null +++ b/python-pybind/src/py_keyvi.cpp @@ -0,0 +1,49 @@ +/* * keyvi - A key value store. + * + * Copyright 2015 Hendrik Muhs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#define STRINGIFY(x) #x +#define MACRO_STRINGIFY(x) STRINGIFY(x) + +namespace py = pybind11; + +void init_keyvi_dictionary(const py::module_ &); +void init_keyvi_match(const py::module_ &); + +PYBIND11_MODULE(keyvi_scikit_core, m) { + m.doc() = R"pbdoc( + keyvi - a key value store. + ----------------------- + + .. currentmodule:: keyvi + + .. autosummary:: + :toctree: _generate + + )pbdoc"; + + init_keyvi_match(m); + py::module keyvi_dictionary = m.def_submodule("dictionary", "keyvi_scikit_core.dictionary"); + init_keyvi_dictionary(keyvi_dictionary); + +#ifdef VERSION_INFO + m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO); +#else + m.attr("__version__") = "dev"; +#endif +} diff --git a/python-pybind/tests/match_object_test.py b/python-pybind/tests/match_object_test.py new file mode 100644 index 000000000..a2c217014 --- /dev/null +++ b/python-pybind/tests/match_object_test.py @@ -0,0 +1,161 @@ +# -*- coding: utf-8 -*- +# Usage: py.test tests + +import keyvi_pybind11 as keyvi +#from test_tools import tmp_dictionary +import warnings + + +#from keyvi.compiler import ( +# JsonDictionaryCompiler, +# CompletionDictionaryCompiler, +# KeyOnlyDictionaryCompiler, +# StringDictionaryCompiler, +#) + + +""" def test_serialization(): + m = keyvi.Match() + m.start = 22 + m.end = 30 + m.score = 42 + d = m.dumps() + m2 = keyvi.Match.loads(d) + assert m2.start == 22 + assert m2.end == 30 + assert m2.score == 42 """ + + +""" def test_raw_serialization(): + c = JsonDictionaryCompiler({"memory_limit_mb": "10"}) + c.Add("abc", '{"a" : 2}') + c.Add("abd", '{"a" : 3}') + with tmp_dictionary(c, 'match_object_json.kv') as d: + m = d["abc"] + assert m.value_as_string() == '{"a":2}' + d = m.dumps() + m2 = keyvi.Match.loads(d) + assert m2.value_as_string() == '{"a":2}' + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + assert m.GetValueAsString() == '{"a":2}' + assert len(w) == 1 + assert issubclass(w[-1].category, DeprecationWarning) + """ + +""" def test_unicode_attributes(): + m = keyvi.Match() + m["küy"] = 22 + assert m["küy"] == 22 + m["k2"] = " 吃饭了吗" + m.score = 99 + assert m["k2"] == " 吃饭了吗" + assert m.score == 99.0 + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + m.SetAttribute("k2", "öäü") + assert m["k2"] == "öäü" + assert m.GetAttribute("k2") == "öäü" + assert len(w) == 2 + assert issubclass(w[0].category, DeprecationWarning) + assert issubclass(w[1].category, DeprecationWarning) """ + + +""" def test_bytes_attributes(): + m = keyvi.Match() + bytes_key = bytes(u"äöü".encode('utf-8')) + bytes_value = bytes(u"äöüöäü".encode('utf-8')) + m[bytes_key] = 22 + assert m[bytes_key] == 22 + m["k2"] = bytes_value + assert m["k2"] == "äöüöäü" + + +def test_double_attributes(): + m = keyvi.Match() + bytes_key = bytes("abc".encode('utf-8')) + m[bytes_key] = 42.0 + assert m[bytes_key] == 42.0 + + +def test_boolean_attributes(): + m = keyvi.Match() + bytes_key = bytes("def".encode('utf-8')) + m[bytes_key] = True + assert m[bytes_key] == True """ + + +def test_start(): + m = keyvi.Match() + m.start = 42 + assert m.start == 42 + + +def test_end(): + m = keyvi.Match() + m.end = 49 + assert m.end == 49 + + +def test_score(): + m = keyvi.Match() + m.score = 149 + assert m.score == 149 + + +""" def test_get_value(): + c = JsonDictionaryCompiler({"memory_limit_mb": "10"}) + c.Add("abc", '{"a" : 2}') + c.Add("abd", '{"a" : 3}') + with tmp_dictionary(c, 'match_object_json.kv') as d: + m = d["abc"] + assert m.value == {"a": 2} + m = d["abd"] + assert m.value == {"a": 3} """ + + +""" def test_get_value_int(): + c = CompletionDictionaryCompiler({"memory_limit_mb": "10"}) + c.Add("abc", 42) + c.Add("abd", 21) + with tmp_dictionary(c, 'match_object_int.kv') as d: + m = d["abc"] + assert m.value == 42 + m = d["abd"] + assert m.value == 21 + + +def test_get_value_key_only(): + c = KeyOnlyDictionaryCompiler({"memory_limit_mb": "10"}) + c.Add("abc") + c.Add("abd") + with tmp_dictionary(c, 'match_object_key_only.kv') as d: + m = d["abc"] + assert m.value == '' + m = d["abd"] + assert m.value == '' + + +def test_get_value_string(): + c = StringDictionaryCompiler({"memory_limit_mb": "10"}) + c.Add("abc", "aaaaa") + c.Add("abd", "bbbbb") + with tmp_dictionary(c, 'match_object_string.kv') as d: + m = d["abc"] + assert m.value == "aaaaa" + m = d["abd"] + assert m.value == "bbbbb" + """ + +def test_matched_string(): + m = keyvi.Match() + m.matched_string = "match" + assert m.matched_string == "match" + + +def test_bool_operator(): + m = keyvi.Match() + assert not m + m.end = 42 + assert not m is False + assert m