From 4d016175601d39ac494d90dea2e50b36356564e8 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sat, 5 Oct 2024 15:13:00 +0200 Subject: [PATCH 01/61] add pybind11 bindings add CI for pybind wrapper use macos-14 runner update tag disable macos-14 remove integration tests skip pp37 adapt sdist fix style fix style revert cibuildwheel changes try to fix sdist move keyvi source out of src install pytest add more match bindings move module import switch to scikit_build remove license file --- .../workflows/python-cibuildwheel-pybind.yml | 183 ++++++++++++++++++ .gitignore | 10 +- CMakeLists.txt | 145 ++++++++------ keyvi/check-style.sh | 2 +- pyproject.toml | 74 +++++++ python-pybind/CMakeLists.txt | 26 +++ python-pybind/src/.clang-format | 12 ++ python-pybind/src/CPPLINT.cfg | 3 + .../src/dictionary/py_dictionary.cpp | 53 +++++ python-pybind/src/dictionary/py_match.cpp | 54 ++++++ .../src/dictionary/py_match_iterator.h | 75 +++++++ python-pybind/src/py_keyvi.cpp | 49 +++++ python-pybind/tests/match_object_test.py | 161 +++++++++++++++ 13 files changed, 783 insertions(+), 64 deletions(-) create mode 100644 .github/workflows/python-cibuildwheel-pybind.yml create mode 100644 pyproject.toml create mode 100644 python-pybind/CMakeLists.txt create mode 100644 python-pybind/src/.clang-format create mode 100644 python-pybind/src/CPPLINT.cfg create mode 100644 python-pybind/src/dictionary/py_dictionary.cpp create mode 100644 python-pybind/src/dictionary/py_match.cpp create mode 100644 python-pybind/src/dictionary/py_match_iterator.h create mode 100644 python-pybind/src/py_keyvi.cpp create mode 100644 python-pybind/tests/match_object_test.py diff --git a/.github/workflows/python-cibuildwheel-pybind.yml b/.github/workflows/python-cibuildwheel-pybind.yml new file mode 100644 index 000000000..82f512e40 --- /dev/null +++ b/.github/workflows/python-cibuildwheel-pybind.yml @@ -0,0 +1,183 @@ +name: Python Pybind cibuildwheel + +on: + push: + branches: [master, release-*] + pull_request: + branches: [master] + workflow_dispatch: + +jobs: + build_wheels: + name: cibuildwheel ${{ matrix.os }}/${{ matrix.arch }}/${{ matrix.flavor }}/${{ matrix.target }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-22.04] + # separate archs, so they use individual caches + arch: ["x86_64", "arm64"] + flavor: ["cpython", "pypy"] + # separate musl and many on linux, for mac we just skip one of those + target: ["many", "musl"] + exclude: + - os: ubuntu-22.04 + target: musl + flavor: pypy + steps: + - uses: actions/checkout@v4 + - name: Set up QEMU + if: ${{ (runner.os == 'Linux') && (matrix.arch == 'arm64') }} + uses: docker/setup-qemu-action@v3 + with: + platforms: all + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.11 + with: + key: ${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.target }}-${{ matrix.flavor }}-python + + - name: Sets env for x86_64 + run: | + echo "CIBW_ARCHS_LINUX=auto64" >> $GITHUB_ENV + echo "CIBW_ARCHS_MACOS=x86_64" >> $GITHUB_ENV + if: matrix.arch == 'x86_64' + + - name: Sets env for arm64 + run: | + echo "CIBW_ARCHS_LINUX=aarch64" >> $GITHUB_ENV + echo "CIBW_ARCHS_MACOS=arm64" >> $GITHUB_ENV + if: matrix.arch == 'arm64' + + - name: Skip manylinux for musllinux target + if: ${{ (runner.os == 'Linux') && (matrix.target == 'musl') }} + run: | + echo "CIBW_SKIP=*manylinux*" >> $GITHUB_ENV + + - name: Skip musllinux for manylinux target + if: ${{ (runner.os == 'Linux') && (matrix.target == 'many') }} + run: | + echo "CIBW_SKIP=*musllinux*" >> $GITHUB_ENV + + - name: Skip pypy for cpython + if: ${{ matrix.flavor == 'cpython' }} + run: | + echo "CIBW_SKIP=${{ env.CIBW_SKIP }} pp*" >> $GITHUB_ENV + + - name: Skip cpython for pypy + if: ${{ matrix.flavor == 'pypy' }} + run: | + echo "CIBW_SKIP=${{ env.CIBW_SKIP }} cp*" >> $GITHUB_ENV + + - name: install mac dependencies + if: ${{ runner.os == 'macOS' }} + # 2nd command: workaround https://github.com/actions/setup-python/issues/577 + run: | + brew update && \ + brew list -1 | grep python | while read formula; do brew unlink $formula; brew link --overwrite $formula; done && \ + brew install ccache + + - name: install mac dependencies X86_64 + if: ${{ (runner.os == 'macOS') && (matrix.arch == 'x86_64') }} + run: | + brew update && \ + brew install zlib snappy boost + + - name: install mac dependencies arm64 + if: ${{ (runner.os == 'macOS') && (matrix.arch == 'arm64') }} + run: | + set -e + echo "MACOSX_DEPLOYMENT_TARGET=12.3.0" >> $GITHUB_ENV + echo "_CMAKE_PREFIX_PATH=${{ github.workspace }}/arm64-homebrew" >> $GITHUB_ENV + echo "CIBW_REPAIR_WHEEL_COMMAND_MACOS=DYLD_LIBRARY_PATH=${{ github.workspace }}/arm64-homebrew delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel}" >> $GITHUB_ENV + mkdir arm64-homebrew && curl -L https://github.com/Homebrew/brew/tarball/master | tar xz --strip 1 -C arm64-homebrew + PACKAGES=( icu4c xz lz4 zstd zlib snappy boost ) + for PACKAGE in "${PACKAGES[@]}" + do + response=$(arm64-homebrew/bin/brew fetch --force --bottle-tag=arm64_sonoma $PACKAGE | grep Downloaded ) + download_path=$(echo $response | xargs -n 1 | tail -1) + arm64-homebrew/bin/brew reinstall -vd $download_path + done + arm64-homebrew/bin/brew config + ls /Users/runner/work/keyvi/keyvi/arm64-homebrew + + - name: Build python wheels for ${{ matrix.os }} on ${{ matrix.arch }} + uses: pypa/cibuildwheel@v2.17.0 + env: + # Skip CPython 3.6 and CPython 3.7 + CIBW_SKIP: ${{ env.CIBW_SKIP }} cp36-* cp37-* pp37-* + + # skip testing all python versions on linux arm, only test 3.12 + # skip tests on pypy, currently fails for indexer tests + CIBW_TEST_SKIP: "*p{38,39,310,311}-m*linux_aarch64 pp*" + + # (many)linux custom docker images + CIBW_MANYLINUX_X86_64_IMAGE: "keyvidev/manylinux-builder-x86_64" + CIBW_MANYLINUX_AARCH64_IMAGE: "keyvidev/manylinux-builder-aarch64" + CIBW_MUSLLINUX_X86_64_IMAGE: "keyvidev/musllinux-builder-x86_64" + CIBW_MUSLLINUX_AARCH64_IMAGE: "keyvidev/musllinux-builder-aarch64" + + # ccache using path + CIBW_ENVIRONMENT_MACOS: PATH=/usr/local/opt/ccache/libexec:$PATH + CIBW_ENVIRONMENT_LINUX: PATH=/usr/local/bin:/usr/lib/ccache:$PATH CCACHE_DIR=/host${{ github.workspace }}/.ccache CCACHE_CONFIGPATH=/host/home/runner/.config/ccache/ccache.conf + + # python dependencies + CIBW_BEFORE_BUILD: pip install -r python/requirements.txt + + # testing + CIBW_TEST_REQUIRES: pytest + CIBW_TEST_COMMAND: > + python -m pytest {package}/tests + + # for debugging set this to 1,2 or 3 + # CIBW_BUILD_VERBOSITY: 2 + with: + package-dir: python-pybind + + - uses: actions/upload-artifact@v4 + with: + name: artifact-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.flavor }}-${{ matrix.target }} + path: ./wheelhouse/*.whl + + build_sdist: + name: sdist + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: install Linux deps + run: | + sudo apt-get update && \ + sudo apt-get install -y libsnappy-dev libzzip-dev zlib1g-dev libboost-all-dev ccache + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.11 + with: + key: ${{ matrix.os }}-sdist-python + + - name: Build SDist + run: | + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cd python-pybind && \ + python -m pip install . && \ + python setup.py sdist -d wheelhouse && \ + python -m pip install wheelhouse/*.tar.gz -v && \ + python -m pip install pytest && \ + python -m pytest tests && \ + python -m pip uninstall -y keyvi_pybind11 + + - uses: actions/upload-artifact@v4 + with: + name: artifact-sdist + path: python-pybind/wheelhouse/*.tar.gz + + upload_all: + needs: [build_wheels, build_sdist] + runs-on: ubuntu-latest + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') + steps: + - uses: actions/download-artifact@v4 + with: + pattern: artifact-* + merge-multiple: true + path: dist + + - uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.pypi_password }} diff --git a/.gitignore b/.gitignore index 0c020fb18..e13d3cdcd 100644 --- a/.gitignore +++ b/.gitignore @@ -31,7 +31,7 @@ *.orig # cmake build dir -build/* +/*build* */cmake-build-debug/* build_dir_debug/ cmake-build-debug/ @@ -45,3 +45,11 @@ cmake-build-debug/ # vim swap files *.swp + +# python +*.egg-info + +# pybind build folder +python*/*build* +python*/dist +python*/.cache/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 452d3f09d..362c688b5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.9) +cmake_minimum_required(VERSION 3.21) project(keyvi) #### Build Type @@ -6,14 +6,25 @@ if (CMAKE_BUILD_TYPE) string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER) endif() +#### Options + +option(KEYVI_C_BINDINGS "Keyvi: Build C binding" ${PROJECT_IS_TOP_LEVEL}) +option(KEYVI_PYTHON_BINDINGS "Keyvi: Build Python module" OFF) +option(KEYVI_TESTS "Keyvi: Build unit tests" ${PROJECT_IS_TOP_LEVEL}) +option(KEYVI_BINARIES "Keyvi: Build Python module" ${PROJECT_IS_TOP_LEVEL}) +option(KEYVI_CLANG_TIDY "Keyvi: Build with clang tidy" ${PROJECT_IS_TOP_LEVEL}) +option(KEYVI_DOCS "Keyvi: Build docs" ${PROJECT_IS_TOP_LEVEL}) + #### Linting -find_program(CLANGTIDY clang-tidy) -if(CLANGTIDY) - message ("-- Found clang-tidy") - set(CMAKE_CXX_CLANG_TIDY clang-tidy; --extra-arg-before=-std=c++17) -else() - message ("-- clang-tidy not found") -endif() +if(KEYVI_CLANG_TIDY) + find_program(CLANGTIDY clang-tidy) + if(CLANGTIDY) + message ("-- Found clang-tidy") + set(CMAKE_CXX_CLANG_TIDY clang-tidy; --extra-arg-before=-std=c++17) + else() + message ("-- clang-tidy not found") + endif() +endif(KEYVI_CLANG_TIDY) #### Cmake modules set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules/") @@ -154,67 +165,73 @@ string(REPLACE " " ";" _KEYVI_COMPILE_DEFINITIONS_LIST "${_KEYVI_COMPILE_DEFINIT #### Targets #### -# keyvicompiler -add_executable(keyvicompiler keyvi/bin/keyvicompiler/keyvicompiler.cpp) -target_link_libraries(keyvicompiler ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${ZSTD_LIBRARIES} ${_OS_LIBRARIES}) -target_compile_options(keyvicompiler PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) -target_compile_definitions(keyvicompiler PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) -target_include_directories(keyvicompiler PRIVATE "$") +if(KEYVI_BINARIES) + # keyvicompiler + add_executable(keyvicompiler keyvi/bin/keyvicompiler/keyvicompiler.cpp) + target_link_libraries(keyvicompiler ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${ZSTD_LIBRARIES} ${_OS_LIBRARIES}) + target_compile_options(keyvicompiler PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) + target_compile_definitions(keyvicompiler PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) + target_include_directories(keyvicompiler PRIVATE "$") -install (TARGETS keyvicompiler DESTINATION bin COMPONENT applications OPTIONAL) + install (TARGETS keyvicompiler DESTINATION bin COMPONENT applications OPTIONAL) -# keyviinspector -add_executable(keyviinspector keyvi/bin/keyviinspector/keyviinspector.cpp) -target_link_libraries(keyviinspector ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${ZSTD_LIBRARIES} ${_OS_LIBRARIES}) -target_compile_options(keyviinspector PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) -target_compile_definitions(keyviinspector PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) -target_include_directories(keyviinspector PRIVATE "$") + # keyviinspector + add_executable(keyviinspector keyvi/bin/keyviinspector/keyviinspector.cpp) + target_link_libraries(keyviinspector ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${ZSTD_LIBRARIES} ${_OS_LIBRARIES}) + target_compile_options(keyviinspector PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) + target_compile_definitions(keyviinspector PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) + target_include_directories(keyviinspector PRIVATE "$") -install (TARGETS keyviinspector DESTINATION bin COMPONENT applications OPTIONAL) + install (TARGETS keyviinspector DESTINATION bin COMPONENT applications OPTIONAL) -# keyvimerger -add_executable(keyvimerger keyvi/bin/keyvimerger/keyvimerger.cpp) -target_link_libraries(keyvimerger ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${ZSTD_LIBRARIES} ${_OS_LIBRARIES}) -target_compile_options(keyvimerger PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) -target_compile_definitions(keyvimerger PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) -target_include_directories(keyvimerger PRIVATE "$") + # keyvimerger + add_executable(keyvimerger keyvi/bin/keyvimerger/keyvimerger.cpp) + target_link_libraries(keyvimerger ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${ZSTD_LIBRARIES} ${_OS_LIBRARIES}) + target_compile_options(keyvimerger PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) + target_compile_definitions(keyvimerger PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) + target_include_directories(keyvimerger PRIVATE "$") -install (TARGETS keyvimerger DESTINATION bin COMPONENT applications) + install (TARGETS keyvimerger DESTINATION bin COMPONENT applications) +endif(KEYVI_BINARIES) # keyvi_c -add_library(keyvi_c SHARED keyvi/bin/keyvi_c/c_api.cpp) -target_link_libraries(keyvi_c ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${ZSTD_LIBRARIES} ${_OS_LIBRARIES}) -target_compile_options(keyvi_c PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) -target_compile_definitions(keyvi_c PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) -target_include_directories(keyvi_c PRIVATE "$") +if(KEYVI_C_BINDINGS) + add_library(keyvi_c SHARED keyvi/bin/keyvi_c/c_api.cpp) + target_link_libraries(keyvi_c ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${ZSTD_LIBRARIES} ${_OS_LIBRARIES}) + target_compile_options(keyvi_c PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) + target_compile_definitions(keyvi_c PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) + target_include_directories(keyvi_c PRIVATE "$") +endif(KEYVI_C_BINDINGS) # unit tests -FILE(GLOB_RECURSE UNIT_TEST_SOURCES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} keyvi/tests/keyvi/*.cpp) -add_executable(unit_test_all ${UNIT_TEST_SOURCES}) -target_link_libraries(unit_test_all ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${ZSTD_LIBRARIES} ${_OS_LIBRARIES}) -target_compile_options(unit_test_all PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) -target_compile_definitions(unit_test_all PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) -target_include_directories(unit_test_all PRIVATE "$") -add_dependencies(unit_test_all keyvimerger) - -if (WIN32) - message(STATUS "zlib: ${ZLIB_LIBRARY_RELEASE}") - # copies the dlls required to run to the build folder - foreach(LIB ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY_RELEASE} ${Boost_FILESYSTEM_LIBRARY_RELEASE} ${ZLIB_LIBRARY_RELEASE}) - get_filename_component(UTF_BASE_NAME ${LIB} NAME_WE) - get_filename_component(UTF_PATH ${LIB} PATH) - if(EXISTS "${UTF_PATH}/${UTF_BASE_NAME}.dll") - add_custom_command(TARGET unit_test_all POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy "${UTF_PATH}/${UTF_BASE_NAME}.dll" ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE} - ) - # zlib might be stored in a different folder - elseif(EXISTS "${UTF_PATH}/../bin/${UTF_BASE_NAME}.dll") - add_custom_command(TARGET unit_test_all POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy "${UTF_PATH}/../bin/${UTF_BASE_NAME}.dll" ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE} - ) - endif() - endforeach() -endif (WIN32) +if(KEYVI_TESTS) + FILE(GLOB_RECURSE UNIT_TEST_SOURCES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} keyvi/tests/keyvi/*.cpp) + add_executable(unit_test_all ${UNIT_TEST_SOURCES}) + target_link_libraries(unit_test_all ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${ZSTD_LIBRARIES} ${_OS_LIBRARIES}) + target_compile_options(unit_test_all PRIVATE ${_KEYVI_CXX_FLAGS_LIST}) + target_compile_definitions(unit_test_all PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) + target_include_directories(unit_test_all PRIVATE "$") + add_dependencies(unit_test_all keyvimerger) + + if (WIN32) + message(STATUS "zlib: ${ZLIB_LIBRARY_RELEASE}") + # copies the dlls required to run to the build folder + foreach(LIB ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY_RELEASE} ${Boost_FILESYSTEM_LIBRARY_RELEASE} ${ZLIB_LIBRARY_RELEASE}) + get_filename_component(UTF_BASE_NAME ${LIB} NAME_WE) + get_filename_component(UTF_PATH ${LIB} PATH) + if(EXISTS "${UTF_PATH}/${UTF_BASE_NAME}.dll") + add_custom_command(TARGET unit_test_all POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "${UTF_PATH}/${UTF_BASE_NAME}.dll" ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE} + ) + # zlib might be stored in a different folder + elseif(EXISTS "${UTF_PATH}/../bin/${UTF_BASE_NAME}.dll") + add_custom_command(TARGET unit_test_all POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "${UTF_PATH}/../bin/${UTF_BASE_NAME}.dll" ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE} + ) + endif() + endforeach() + endif (WIN32) +endif(KEYVI_TESTS) # bindings add_custom_target(bindings @@ -239,10 +256,14 @@ target_include_directories(keyvi INTERFACE "$ target_compile_definitions(keyvi INTERFACE ${_KEYVI_COMPILE_DEFINITIONS_LIST}) target_link_libraries(keyvi INTERFACE ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${ZSTD_LIBRARIES} ${_OS_LIBRARIES}) +if (KEYVI_PYTHON_BINDINGS) + add_subdirectory(python-pybind) +endif () + ### docs # don't run it as part of a non-toplevel build, e.g. python -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/sphinx-docs) +if(KEYVI_DOCS) find_package(Doxygen) find_package(Sphinx COMPONENTS breathe) @@ -268,4 +289,4 @@ if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/sphinx-docs) else() message ("-- Skip doc target, doxygen/sphinx not found") endif() -endif() +endif(KEYVI_DOCS) diff --git a/keyvi/check-style.sh b/keyvi/check-style.sh index 13ae3a6cb..d6b3964c3 100755 --- a/keyvi/check-style.sh +++ b/keyvi/check-style.sh @@ -10,7 +10,7 @@ else commit_range="upstream/master...HEAD" fi -infiles=`git diff --name-only --diff-filter=ACMRT $(echo ${commit_range} | sed 's/\.//') | grep -v "3rdparty" | grep -E "\.(cpp|h)$"` +infiles=`git diff --name-only --diff-filter=ACMRT $(echo ${commit_range} | sed 's/\.//') | grep -v "3rdparty" | grep -v "pybind11" | grep -E "\.(cpp|h)$"` clang_format_files=() cpplint_files=() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..b82c4641d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,74 @@ +[build-system] +requires = ["scikit-build-core>=0.10", "pybind11"] +build-backend = "scikit_build_core.build" + +[project] +name = "keyvi_scikit_core" +version = "0.6.3dev0" +dependencies = [ + "msgpack>=1.0.0", +] + +[tool.scikit-build] +wheel.expand-macos-universal-tags = true +minimum-version = "build-system.requires" + +[tool.scikit-build.cmake.define] +KEYVI_PYTHON_BINDINGS = "ON" +KEYVI_C_BINDINGS = "OFF" +KEYVI_TESTS = "OFF" +KEYVI_BINARIES = "OFF" +KEYVI_CLANG_TIDY = "OFF" +KEYVI_DOCS = "OFF" + +[tool.pytest.ini_options] +minversion = "8.0" +addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"] +xfail_strict = true +log_cli_level = "INFO" +filterwarnings = [ + "error", + "ignore::pytest.PytestCacheWarning", +] +testpaths = ["tests"] + +[tool.cibuildwheel] +build-frontend = "build[uv]" +test-command = "pytest {project}/python/tests" +test-extras = ["test"] + +[tool.cibuildwheel.pyodide] +build-frontend = {name = "build", args = ["--exports", "whole_archive"]} + +[tool.ruff.lint] +extend-select = [ + "B", # flake8-bugbear + "I", # isort + "ARG", # flake8-unused-arguments + "C4", # flake8-comprehensions + "EM", # flake8-errmsg + "ICN", # flake8-import-conventions + "G", # flake8-logging-format + "PGH", # pygrep-hooks + "PIE", # flake8-pie + "PL", # pylint + "PT", # flake8-pytest-style + "PTH", # flake8-use-pathlib + "RET", # flake8-return + "RUF", # Ruff-specific + "SIM", # flake8-simplify + "T20", # flake8-print + "UP", # pyupgrade + "YTT", # flake8-2020 + "EXE", # flake8-executable + "NPY", # NumPy specific rules + "PD", # pandas-vet +] +ignore = [ + "PLR09", # Too many X + "PLR2004", # Magic comparison +] +isort.required-imports = ["from __future__ import annotations"] + +[tool.ruff.lint.per-file-ignores] +"tests/**" = ["T20"] diff --git a/python-pybind/CMakeLists.txt b/python-pybind/CMakeLists.txt new file mode 100644 index 000000000..04a8bfd6c --- /dev/null +++ b/python-pybind/CMakeLists.txt @@ -0,0 +1,26 @@ +cmake_minimum_required(VERSION 3.15...3.27) + +# Scikit-build-core sets these values for you, or you can just hard-code the +# name and version. +project( + ${SKBUILD_PROJECT_NAME} + VERSION ${SKBUILD_PROJECT_VERSION} + LANGUAGES CXX) + +# Find the module development requirements (requires FindPython from 3.17 or +# scikit-build-core's built-in backport) +find_package(Python REQUIRED COMPONENTS Interpreter Development.Module) +find_package(pybind11 CONFIG REQUIRED) + +# Add a library using FindPython's tooling (pybind11 also provides a helper like +# this) +FILE(GLOB_RECURSE KEYVI_PYBIND_SOURCES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} src/*.cpp) +pybind11_add_module(keyvi_scikit_core ${KEYVI_PYBIND_SOURCES}) + +target_link_libraries(keyvi_scikit_core PRIVATE keyvi) + +# This is passing in the version as a define just as an example +target_compile_definitions(keyvi_scikit_core PRIVATE VERSION_INFO=${PROJECT_VERSION}) + +# The install directory is the output (wheel) directory +install(TARGETS keyvi_scikit_core DESTINATION .) diff --git a/python-pybind/src/.clang-format b/python-pybind/src/.clang-format new file mode 100644 index 000000000..ab84a2c7b --- /dev/null +++ b/python-pybind/src/.clang-format @@ -0,0 +1,12 @@ +--- +BasedOnStyle: Google +ColumnLimit: '120' +Language: Cpp +Standard: c++17 +TabWidth: '2' +UseTab: Never +ConstructorInitializerIndentWidth: 4 +AllowShortFunctionsOnASingleLine: Inline +IncludeBlocks: Preserve + +... diff --git a/python-pybind/src/CPPLINT.cfg b/python-pybind/src/CPPLINT.cfg new file mode 100644 index 000000000..6ed77f519 --- /dev/null +++ b/python-pybind/src/CPPLINT.cfg @@ -0,0 +1,3 @@ +linelength=120 +root=. +filter=-build/include_subdir,-whitespace/indent_namespace diff --git a/python-pybind/src/dictionary/py_dictionary.cpp b/python-pybind/src/dictionary/py_dictionary.cpp new file mode 100644 index 000000000..96403bd76 --- /dev/null +++ b/python-pybind/src/dictionary/py_dictionary.cpp @@ -0,0 +1,53 @@ +/* * keyvi - A key value store. + * + * Copyright 2024 Hendrik Muhs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include "keyvi/dictionary/dictionary.h" +#include "keyvi/dictionary/match.h" + +#include "py_match_iterator.h" + +namespace py = pybind11; +namespace kd = keyvi::dictionary; +namespace kpy = keyvi::pybind; + +void init_keyvi_dictionary(const py::module_ &m) { + m.doc() = R"pbdoc( + keyvi.dictionary + ----------------------- + + .. currentmodule:: keyvi.dictionary + + .. autosummary:: + :toctree: _generate + + )pbdoc"; + + py::class_(m, "Dictionary") + .def(py::init()) + .def("get", &kd::Dictionary::operator[], R"pbdoc( + Get an entry from the dictionary. + )pbdoc") + .def("search", &kd::Dictionary::Lookup) + .def("match", [](const kd::Dictionary &d, const std::string &key) { + auto m = d.Get(key); + return kpy::make_match_iterator(m.begin(), m.end()); + }); +} diff --git a/python-pybind/src/dictionary/py_match.cpp b/python-pybind/src/dictionary/py_match.cpp new file mode 100644 index 000000000..bedc21642 --- /dev/null +++ b/python-pybind/src/dictionary/py_match.cpp @@ -0,0 +1,54 @@ +/* * keyvi - A key value store. + * + * Copyright 2024 Hendrik Muhs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include "keyvi/dictionary/dictionary.h" +#include "keyvi/dictionary/match.h" + +#include "py_match_iterator.h" + +namespace py = pybind11; +namespace kd = keyvi::dictionary; + +void init_keyvi_match(const py::module_ &m) { + py::module_ msgpack_ = py::module_::import("msgpack"); + + py::class_>(m, "Match") + .def(py::init<>()) + .def_property("start", &kd::Match::GetStart, &kd::Match::SetStart) + .def_property("end", &kd::Match::GetEnd, &kd::Match::SetEnd) + .def_property("score", &kd::Match::GetScore, &kd::Match::SetScore) + .def_property("matched_string", &kd::Match::GetMatchedString, &kd::Match::SetMatchedString) + .def_property_readonly("value", + [&msgpack_](const kd::Match &m) -> py::object { + auto packed_value = m.GetMsgPackedValueAsString(); + if (packed_value.empty()) { + return py::none(); + } + return msgpack_.attr("loads")(packed_value); + }) + .def("value_as_string", &kd::Match::GetValueAsString) + .def("raw_value_as_string", &kd::Match::GetRawValueAsString) + .def("__get_item__", &kd::Match::GetAttributePy) + // __setitem__ + // dumps loads + .def_property_readonly("weight", &kd::Match::GetWeight) + .def("__bool__", [](const kd::Match &m) -> bool { return !m.IsEmpty(); }); +} diff --git a/python-pybind/src/dictionary/py_match_iterator.h b/python-pybind/src/dictionary/py_match_iterator.h new file mode 100644 index 000000000..21500d931 --- /dev/null +++ b/python-pybind/src/dictionary/py_match_iterator.h @@ -0,0 +1,75 @@ +/* * keyvi - A key value store. + * + * Copyright 2024 Hendrik Muhs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DICTIONARY_PY_MATCH_ITERATOR_H_ +#define DICTIONARY_PY_MATCH_ITERATOR_H_ + +#include + +#include + +namespace keyvi { +namespace pybind { + +// adapted from pybind11.h +template +pybind11::iterator make_match_iterator_impl(Iterator first, Sentinel last, Extra &&...extra) { + using state = pybind11::detail::iterator_state; + if (!pybind11::detail::get_type_info(typeid(state), false)) { + pybind11::class_(pybind11::handle(), "iterator", pybind11::module_local()) + .def("__iter__", [](state &s) -> state & { return s; }) + .def( + "__next__", + [](state &s) -> ValueType { + { + // release GIL as incrementing the iterator can be expensive, e.g. for fuzzy match + pybind11::gil_scoped_release no_gil; + if (!s.first_or_done) { + ++s.it; + } else { + s.first_or_done = false; + } + if (s.it == s.end) { + s.first_or_done = true; + throw pybind11::stop_iteration(); + } + } + + return Access()(s.it); + }, + std::forward(extra)..., Policy) + .def("set_min_weight", [](state &s, const uint32_t min_weight) -> void { s.it.SetMinWeight(min_weight); }); + } + + return pybind11::cast(state{std::forward(first), std::forward(last), true}); +} + +/// Makes a python iterator from a first and past-the-end C++ InputIterator. +template ::result_type, + typename... Extra> +pybind11::typing::Iterator make_match_iterator(Iterator first, Sentinel last, Extra &&...extra) { + return make_match_iterator_impl, Policy, Iterator, Sentinel, ValueType, + Extra...>(std::forward(first), std::forward(last), + std::forward(extra)...); +} + +} /* namespace pybind */ +} /* namespace keyvi */ + +#endif // DICTIONARY_PY_MATCH_ITERATOR_H_ diff --git a/python-pybind/src/py_keyvi.cpp b/python-pybind/src/py_keyvi.cpp new file mode 100644 index 000000000..402a381e7 --- /dev/null +++ b/python-pybind/src/py_keyvi.cpp @@ -0,0 +1,49 @@ +/* * keyvi - A key value store. + * + * Copyright 2015 Hendrik Muhs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#define STRINGIFY(x) #x +#define MACRO_STRINGIFY(x) STRINGIFY(x) + +namespace py = pybind11; + +void init_keyvi_dictionary(const py::module_ &); +void init_keyvi_match(const py::module_ &); + +PYBIND11_MODULE(keyvi_scikit_core, m) { + m.doc() = R"pbdoc( + keyvi - a key value store. + ----------------------- + + .. currentmodule:: keyvi + + .. autosummary:: + :toctree: _generate + + )pbdoc"; + + init_keyvi_match(m); + py::module keyvi_dictionary = m.def_submodule("dictionary", "keyvi_scikit_core.dictionary"); + init_keyvi_dictionary(keyvi_dictionary); + +#ifdef VERSION_INFO + m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO); +#else + m.attr("__version__") = "dev"; +#endif +} diff --git a/python-pybind/tests/match_object_test.py b/python-pybind/tests/match_object_test.py new file mode 100644 index 000000000..a2c217014 --- /dev/null +++ b/python-pybind/tests/match_object_test.py @@ -0,0 +1,161 @@ +# -*- coding: utf-8 -*- +# Usage: py.test tests + +import keyvi_pybind11 as keyvi +#from test_tools import tmp_dictionary +import warnings + + +#from keyvi.compiler import ( +# JsonDictionaryCompiler, +# CompletionDictionaryCompiler, +# KeyOnlyDictionaryCompiler, +# StringDictionaryCompiler, +#) + + +""" def test_serialization(): + m = keyvi.Match() + m.start = 22 + m.end = 30 + m.score = 42 + d = m.dumps() + m2 = keyvi.Match.loads(d) + assert m2.start == 22 + assert m2.end == 30 + assert m2.score == 42 """ + + +""" def test_raw_serialization(): + c = JsonDictionaryCompiler({"memory_limit_mb": "10"}) + c.Add("abc", '{"a" : 2}') + c.Add("abd", '{"a" : 3}') + with tmp_dictionary(c, 'match_object_json.kv') as d: + m = d["abc"] + assert m.value_as_string() == '{"a":2}' + d = m.dumps() + m2 = keyvi.Match.loads(d) + assert m2.value_as_string() == '{"a":2}' + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + assert m.GetValueAsString() == '{"a":2}' + assert len(w) == 1 + assert issubclass(w[-1].category, DeprecationWarning) + """ + +""" def test_unicode_attributes(): + m = keyvi.Match() + m["küy"] = 22 + assert m["küy"] == 22 + m["k2"] = " 吃饭了吗" + m.score = 99 + assert m["k2"] == " 吃饭了吗" + assert m.score == 99.0 + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + m.SetAttribute("k2", "öäü") + assert m["k2"] == "öäü" + assert m.GetAttribute("k2") == "öäü" + assert len(w) == 2 + assert issubclass(w[0].category, DeprecationWarning) + assert issubclass(w[1].category, DeprecationWarning) """ + + +""" def test_bytes_attributes(): + m = keyvi.Match() + bytes_key = bytes(u"äöü".encode('utf-8')) + bytes_value = bytes(u"äöüöäü".encode('utf-8')) + m[bytes_key] = 22 + assert m[bytes_key] == 22 + m["k2"] = bytes_value + assert m["k2"] == "äöüöäü" + + +def test_double_attributes(): + m = keyvi.Match() + bytes_key = bytes("abc".encode('utf-8')) + m[bytes_key] = 42.0 + assert m[bytes_key] == 42.0 + + +def test_boolean_attributes(): + m = keyvi.Match() + bytes_key = bytes("def".encode('utf-8')) + m[bytes_key] = True + assert m[bytes_key] == True """ + + +def test_start(): + m = keyvi.Match() + m.start = 42 + assert m.start == 42 + + +def test_end(): + m = keyvi.Match() + m.end = 49 + assert m.end == 49 + + +def test_score(): + m = keyvi.Match() + m.score = 149 + assert m.score == 149 + + +""" def test_get_value(): + c = JsonDictionaryCompiler({"memory_limit_mb": "10"}) + c.Add("abc", '{"a" : 2}') + c.Add("abd", '{"a" : 3}') + with tmp_dictionary(c, 'match_object_json.kv') as d: + m = d["abc"] + assert m.value == {"a": 2} + m = d["abd"] + assert m.value == {"a": 3} """ + + +""" def test_get_value_int(): + c = CompletionDictionaryCompiler({"memory_limit_mb": "10"}) + c.Add("abc", 42) + c.Add("abd", 21) + with tmp_dictionary(c, 'match_object_int.kv') as d: + m = d["abc"] + assert m.value == 42 + m = d["abd"] + assert m.value == 21 + + +def test_get_value_key_only(): + c = KeyOnlyDictionaryCompiler({"memory_limit_mb": "10"}) + c.Add("abc") + c.Add("abd") + with tmp_dictionary(c, 'match_object_key_only.kv') as d: + m = d["abc"] + assert m.value == '' + m = d["abd"] + assert m.value == '' + + +def test_get_value_string(): + c = StringDictionaryCompiler({"memory_limit_mb": "10"}) + c.Add("abc", "aaaaa") + c.Add("abd", "bbbbb") + with tmp_dictionary(c, 'match_object_string.kv') as d: + m = d["abc"] + assert m.value == "aaaaa" + m = d["abd"] + assert m.value == "bbbbb" + """ + +def test_matched_string(): + m = keyvi.Match() + m.matched_string = "match" + assert m.matched_string == "match" + + +def test_bool_operator(): + m = keyvi.Match() + assert not m + m.end = 42 + assert not m is False + assert m From 7fba918a10daa5b59fdd2da0df2d24ef7417d186 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sun, 24 Nov 2024 10:52:08 +0100 Subject: [PATCH 02/61] build from root --- .github/workflows/python-cibuildwheel-pybind.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/python-cibuildwheel-pybind.yml b/.github/workflows/python-cibuildwheel-pybind.yml index 82f512e40..63c60d800 100644 --- a/.github/workflows/python-cibuildwheel-pybind.yml +++ b/.github/workflows/python-cibuildwheel-pybind.yml @@ -129,8 +129,6 @@ jobs: # for debugging set this to 1,2 or 3 # CIBW_BUILD_VERBOSITY: 2 - with: - package-dir: python-pybind - uses: actions/upload-artifact@v4 with: From dd71bc1d7fb2425c1bc88676f7cc0a11af77940a Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sun, 24 Nov 2024 11:01:11 +0100 Subject: [PATCH 03/61] fix test errors --- python/tests/dictionary/loading_test.py | 16 ++++++++-------- python/tests/index/merger_binary_test.py | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/python/tests/dictionary/loading_test.py b/python/tests/dictionary/loading_test.py index f92883e9b..2808b4061 100644 --- a/python/tests/dictionary/loading_test.py +++ b/python/tests/dictionary/loading_test.py @@ -39,14 +39,14 @@ def test_truncated_file_json(): c.write_to_file(os.path.join(tmp_dir,'truncation_test.kv')) size = os.path.getsize(os.path.join(tmp_dir, 'truncation_test.kv')) - fd_in = open(os.path.join(tmp_dir,'truncation_test.kv'), 'rb') - fd = open(os.path.join(tmp_dir,'truncation_test1.kv'), 'wb') - fd.write(fd_in.read(int(size/2))) - fd.close() - - fd2 = open(os.path.join(tmp_dir,'truncation_test2.kv'), 'wb') - fd2.write(fd_in.read(int(size-2))) - fd2.close() + with open(os.path.join(tmp_dir,'truncation_test.kv'), 'rb') as fd_in: + fd = open(os.path.join(tmp_dir,'truncation_test1.kv'), 'wb') + fd.write(fd_in.read(int(size/2))) + fd.close() + + fd2 = open(os.path.join(tmp_dir,'truncation_test2.kv'), 'wb') + fd2.write(fd_in.read(int(size-2))) + fd2.close() with pytest.raises(ValueError): d=Dictionary(os.path.join(tmp_dir, 'truncation_test1.kv')) diff --git a/python/tests/index/merger_binary_test.py b/python/tests/index/merger_binary_test.py index ad082268b..b894370e7 100644 --- a/python/tests/index/merger_binary_test.py +++ b/python/tests/index/merger_binary_test.py @@ -10,5 +10,5 @@ def test_merger_binary(): cmd = get_interpreter_executable() + b" " + os.path.join(get_package_root(), b"_pycore" , b"keyvimerger.py") + b" -h" - rc = subprocess.call(cmd, shell=True, stdout=open(os.devnull, 'w')) + rc = subprocess.call(cmd, shell=True) assert rc == 0 From caf0b8db040b79baefb46055b3a74bab0d9f7799 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sun, 24 Nov 2024 11:01:29 +0100 Subject: [PATCH 04/61] lower pytest minversion --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b82c4641d..e89e2bf12 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ KEYVI_CLANG_TIDY = "OFF" KEYVI_DOCS = "OFF" [tool.pytest.ini_options] -minversion = "8.0" +minversion = "7.1.1" addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"] xfail_strict = true log_cli_level = "INFO" From a98fa3a758ce0e57cca0089f4a1bb922013ea03c Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sun, 24 Nov 2024 11:10:24 +0100 Subject: [PATCH 05/61] remove pyodide --- pyproject.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e89e2bf12..52ef0cb20 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,9 +37,6 @@ build-frontend = "build[uv]" test-command = "pytest {project}/python/tests" test-extras = ["test"] -[tool.cibuildwheel.pyodide] -build-frontend = {name = "build", args = ["--exports", "whole_archive"]} - [tool.ruff.lint] extend-select = [ "B", # flake8-bugbear From f954b5abbbf598dcb84f0a809d39e4646a78bc5e Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sun, 24 Nov 2024 11:19:02 +0100 Subject: [PATCH 06/61] fix deprecation error --- .../compiler/var_length_short_calculation_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/integration-tests/compiler/var_length_short_calculation_test.py b/python/integration-tests/compiler/var_length_short_calculation_test.py index 590c63017..c04f21abc 100644 --- a/python/integration-tests/compiler/var_length_short_calculation_test.py +++ b/python/integration-tests/compiler/var_length_short_calculation_test.py @@ -26,7 +26,7 @@ def test_input_output_keys(): output_keys_count = 0 with tmp_dictionary(compiler, 'var_length_short_test.kv') as d: - for _ in d.GetAllItems(): + for _ in d.items(): output_keys_count += 1 assert input_keys_count == output_keys_count From ad793397d1b7673c8c33da154fde38e4c848f535 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sun, 24 Nov 2024 11:25:59 +0100 Subject: [PATCH 07/61] switch to default build-frontend --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 52ef0cb20..e261df62f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,8 +33,8 @@ filterwarnings = [ testpaths = ["tests"] [tool.cibuildwheel] -build-frontend = "build[uv]" -test-command = "pytest {project}/python/tests" +build-frontend = "default" +test-command = "pytest {project}/python-pybind/tests" test-extras = ["test"] [tool.ruff.lint] From 80e0c3597060ee8eb545d51591178f4d9fc47328 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sun, 24 Nov 2024 11:36:12 +0100 Subject: [PATCH 08/61] fix sdist --- .github/workflows/python-cibuildwheel-pybind.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/python-cibuildwheel-pybind.yml b/.github/workflows/python-cibuildwheel-pybind.yml index 63c60d800..be149632f 100644 --- a/.github/workflows/python-cibuildwheel-pybind.yml +++ b/.github/workflows/python-cibuildwheel-pybind.yml @@ -9,7 +9,7 @@ on: jobs: build_wheels: - name: cibuildwheel ${{ matrix.os }}/${{ matrix.arch }}/${{ matrix.flavor }}/${{ matrix.target }} + name: pybind11-cibuildwheel ${{ matrix.os }}/${{ matrix.arch }}/${{ matrix.flavor }}/${{ matrix.target }} runs-on: ${{ matrix.os }} strategy: matrix: @@ -152,12 +152,11 @@ jobs: - name: Build SDist run: | export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" - cd python-pybind && \ python -m pip install . && \ python setup.py sdist -d wheelhouse && \ python -m pip install wheelhouse/*.tar.gz -v && \ python -m pip install pytest && \ - python -m pytest tests && \ + python -m pytest python-pybind11/tests && \ python -m pip uninstall -y keyvi_pybind11 - uses: actions/upload-artifact@v4 From 17de53cd3597743d46a2ff9e0d7523c38e7de0be Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sun, 24 Nov 2024 14:49:21 +0100 Subject: [PATCH 09/61] update cibuildwheel --- .github/workflows/python-cibuildwheel-pybind.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-cibuildwheel-pybind.yml b/.github/workflows/python-cibuildwheel-pybind.yml index be149632f..41f0c0ab9 100644 --- a/.github/workflows/python-cibuildwheel-pybind.yml +++ b/.github/workflows/python-cibuildwheel-pybind.yml @@ -100,7 +100,7 @@ jobs: ls /Users/runner/work/keyvi/keyvi/arm64-homebrew - name: Build python wheels for ${{ matrix.os }} on ${{ matrix.arch }} - uses: pypa/cibuildwheel@v2.17.0 + uses: pypa/cibuildwheel@v2.21.3 env: # Skip CPython 3.6 and CPython 3.7 CIBW_SKIP: ${{ env.CIBW_SKIP }} cp36-* cp37-* pp37-* From 9af17ebe57c5c2da1491b93cdcb918e17fd5bd60 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sun, 24 Nov 2024 15:14:14 +0100 Subject: [PATCH 10/61] repair sdist and tests --- .github/workflows/python-cibuildwheel-pybind.yml | 6 +++--- pyproject.toml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/python-cibuildwheel-pybind.yml b/.github/workflows/python-cibuildwheel-pybind.yml index 41f0c0ab9..be3c5ae27 100644 --- a/.github/workflows/python-cibuildwheel-pybind.yml +++ b/.github/workflows/python-cibuildwheel-pybind.yml @@ -153,8 +153,8 @@ jobs: run: | export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" python -m pip install . && \ - python setup.py sdist -d wheelhouse && \ - python -m pip install wheelhouse/*.tar.gz -v && \ + python -m build --sdist + python -m pip install dist/*.tar.gz -v && \ python -m pip install pytest && \ python -m pytest python-pybind11/tests && \ python -m pip uninstall -y keyvi_pybind11 @@ -162,7 +162,7 @@ jobs: - uses: actions/upload-artifact@v4 with: name: artifact-sdist - path: python-pybind/wheelhouse/*.tar.gz + path: dist/*.tar.gz upload_all: needs: [build_wheels, build_sdist] diff --git a/pyproject.toml b/pyproject.toml index e261df62f..d12bf5d47 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,11 +30,11 @@ filterwarnings = [ "error", "ignore::pytest.PytestCacheWarning", ] -testpaths = ["tests"] +testpaths = ["python-pybind/tests"] [tool.cibuildwheel] build-frontend = "default" -test-command = "pytest {project}/python-pybind/tests" +test-command = "pytest python-pybind/tests" test-extras = ["test"] [tool.ruff.lint] From 66fcf9d9f8333234e23711298baacc604340664d Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sun, 24 Nov 2024 15:33:55 +0100 Subject: [PATCH 11/61] install ninja-build --- .github/workflows/python-cibuildwheel-pybind.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-cibuildwheel-pybind.yml b/.github/workflows/python-cibuildwheel-pybind.yml index be3c5ae27..be47def0e 100644 --- a/.github/workflows/python-cibuildwheel-pybind.yml +++ b/.github/workflows/python-cibuildwheel-pybind.yml @@ -143,7 +143,7 @@ jobs: - name: install Linux deps run: | sudo apt-get update && \ - sudo apt-get install -y libsnappy-dev libzzip-dev zlib1g-dev libboost-all-dev ccache + sudo apt-get install -y libsnappy-dev libzzip-dev zlib1g-dev libboost-all-dev ccache ninja-build - name: ccache uses: hendrikmuhs/ccache-action@v1.2.11 with: From 820000fc823bca42a0c8946b35527b72c8165703 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sun, 24 Nov 2024 16:14:14 +0100 Subject: [PATCH 12/61] install build module --- .github/workflows/python-cibuildwheel-pybind.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python-cibuildwheel-pybind.yml b/.github/workflows/python-cibuildwheel-pybind.yml index be47def0e..54d8fe980 100644 --- a/.github/workflows/python-cibuildwheel-pybind.yml +++ b/.github/workflows/python-cibuildwheel-pybind.yml @@ -152,6 +152,7 @@ jobs: - name: Build SDist run: | export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + python -m pip install --upgrade build python -m pip install . && \ python -m build --sdist python -m pip install dist/*.tar.gz -v && \ From b9c1248fc859270a354f710cb8e361583c2b0af1 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sun, 24 Nov 2024 21:02:12 +0100 Subject: [PATCH 13/61] fix folder --- .github/workflows/python-cibuildwheel-pybind.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-cibuildwheel-pybind.yml b/.github/workflows/python-cibuildwheel-pybind.yml index 54d8fe980..9ddd1316a 100644 --- a/.github/workflows/python-cibuildwheel-pybind.yml +++ b/.github/workflows/python-cibuildwheel-pybind.yml @@ -152,12 +152,12 @@ jobs: - name: Build SDist run: | export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" - python -m pip install --upgrade build + python -m pip install --upgrade build && \ python -m pip install . && \ python -m build --sdist python -m pip install dist/*.tar.gz -v && \ python -m pip install pytest && \ - python -m pytest python-pybind11/tests && \ + python -m pytest python-pybind/tests && \ python -m pip uninstall -y keyvi_pybind11 - uses: actions/upload-artifact@v4 From 273803652fe4394d6ff2c3dfc47c4ffa9bb3114b Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Mon, 25 Nov 2024 08:13:19 +0100 Subject: [PATCH 14/61] use pipx --- .github/workflows/python-cibuildwheel-pybind.yml | 6 ++---- python-pybind/tests/match_object_test.py | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/python-cibuildwheel-pybind.yml b/.github/workflows/python-cibuildwheel-pybind.yml index 9ddd1316a..80e4818c3 100644 --- a/.github/workflows/python-cibuildwheel-pybind.yml +++ b/.github/workflows/python-cibuildwheel-pybind.yml @@ -143,7 +143,7 @@ jobs: - name: install Linux deps run: | sudo apt-get update && \ - sudo apt-get install -y libsnappy-dev libzzip-dev zlib1g-dev libboost-all-dev ccache ninja-build + sudo apt-get install -y libsnappy-dev libzzip-dev zlib1g-dev libboost-all-dev ccache ninja-build pipx - name: ccache uses: hendrikmuhs/ccache-action@v1.2.11 with: @@ -152,9 +152,7 @@ jobs: - name: Build SDist run: | export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" - python -m pip install --upgrade build && \ - python -m pip install . && \ - python -m build --sdist + pipx run build --sdist python -m pip install dist/*.tar.gz -v && \ python -m pip install pytest && \ python -m pytest python-pybind/tests && \ diff --git a/python-pybind/tests/match_object_test.py b/python-pybind/tests/match_object_test.py index a2c217014..ff2391f21 100644 --- a/python-pybind/tests/match_object_test.py +++ b/python-pybind/tests/match_object_test.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Usage: py.test tests -import keyvi_pybind11 as keyvi +import keyvi_scikit_core as keyvi #from test_tools import tmp_dictionary import warnings From 090aa891fea8ca4d27840e1011d9a060920c6f2b Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Mon, 25 Nov 2024 08:27:39 +0100 Subject: [PATCH 15/61] remove env overrides --- .github/workflows/python-cibuildwheel-pybind.yml | 8 -------- pyproject.toml | 2 +- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/.github/workflows/python-cibuildwheel-pybind.yml b/.github/workflows/python-cibuildwheel-pybind.yml index 80e4818c3..b67bef836 100644 --- a/.github/workflows/python-cibuildwheel-pybind.yml +++ b/.github/workflows/python-cibuildwheel-pybind.yml @@ -119,14 +119,6 @@ jobs: CIBW_ENVIRONMENT_MACOS: PATH=/usr/local/opt/ccache/libexec:$PATH CIBW_ENVIRONMENT_LINUX: PATH=/usr/local/bin:/usr/lib/ccache:$PATH CCACHE_DIR=/host${{ github.workspace }}/.ccache CCACHE_CONFIGPATH=/host/home/runner/.config/ccache/ccache.conf - # python dependencies - CIBW_BEFORE_BUILD: pip install -r python/requirements.txt - - # testing - CIBW_TEST_REQUIRES: pytest - CIBW_TEST_COMMAND: > - python -m pytest {package}/tests - # for debugging set this to 1,2 or 3 # CIBW_BUILD_VERBOSITY: 2 diff --git a/pyproject.toml b/pyproject.toml index d12bf5d47..34cea98dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,8 +34,8 @@ testpaths = ["python-pybind/tests"] [tool.cibuildwheel] build-frontend = "default" +test-requires = "pytest" test-command = "pytest python-pybind/tests" -test-extras = ["test"] [tool.ruff.lint] extend-select = [ From 5db710d4e464e86455a86f3d75fb2e40de55fea2 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Mon, 25 Nov 2024 08:32:01 +0100 Subject: [PATCH 16/61] re-add project dir --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 34cea98dc..d7e3eadd3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,7 @@ testpaths = ["python-pybind/tests"] [tool.cibuildwheel] build-frontend = "default" test-requires = "pytest" -test-command = "pytest python-pybind/tests" +test-command = "pytest {project}/python-pybind/tests" [tool.ruff.lint] extend-select = [ From 8da0bf11eb468cee7234759f256b02c40cecaa83 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sun, 1 Dec 2024 13:14:32 +0100 Subject: [PATCH 17/61] add method --- .../src/dictionary/py_dictionary.cpp | 84 ++++++++++++++++++- 1 file changed, 80 insertions(+), 4 deletions(-) diff --git a/python-pybind/src/dictionary/py_dictionary.cpp b/python-pybind/src/dictionary/py_dictionary.cpp index 96403bd76..a7f34525e 100644 --- a/python-pybind/src/dictionary/py_dictionary.cpp +++ b/python-pybind/src/dictionary/py_dictionary.cpp @@ -42,12 +42,88 @@ void init_keyvi_dictionary(const py::module_ &m) { py::class_(m, "Dictionary") .def(py::init()) + .def( + "complete_fuzzy_multiword", + [](const kd::Dictionary &d, const std::string &query, const int32_t max_edit_distance, + const size_t minimum_exact_prefix = 0, const unsigned char multiword_separator = 0x1b) { + auto m = d.GetFuzzyMultiwordCompletion(query, max_edit_distance, minimum_exact_prefix, multiword_separator); + return kpy::make_match_iterator(m.begin(), m.end()); + }, + py::arg("query"), py::arg("max_edit_distance"), py::arg("minimum_exact_prefix") = 0, + py::arg("multiword_separator") = 0x1b, + R"pbdoc(Complete the given key to full matches after whitespace tokenizing, + allowing up to max_edit_distance distance(Levenshtein). + In case the used dictionary supports inner weights, the + completer traverses the dictionary according to weights, + otherwise byte-order. + )pbdoc") + .def( + "complete_multiword", + [](const kd::Dictionary &d, const std::string &query, const unsigned char multiword_separator = 0x1b) { + auto m = d.GetMultiwordCompletion(query, multiword_separator); + return kpy::make_match_iterator(m.begin(), m.end()); + }, + py::arg("query"), py::arg("multiword_separator") = 0x1b, + R"pbdoc(Complete the given key to full matches after whitespace tokenizing + and return the top n completions. + In case the used dictionary supports inner weights, the + completer traverses the dictionary according to weights, + otherwise byte-order. + + Note, due to depth-first traversal the traverser + immediately yields results when it visits them. The results are + neither in order nor limited to n. It is up to the caller to resort + and truncate the lists of results. + Only the number of top completions is guaranteed. + )pbdoc") + .def( + "complete_prefix", + [](const kd::Dictionary &d, const std::string &query) { + auto m = d.GetPrefixCompletion(query); + return kpy::make_match_iterator(m.begin(), m.end()); + }, + py::arg("query"), + R"pbdoc(Complete the given key to full matches after whitespace tokenizing + and return the top n completions. + In case the used dictionary supports inner weights, the + completer traverses the dictionary according to weights, + otherwise byte-order. + + Note, due to depth-first traversal the traverser + immediately yields results when it visits them. The results are + neither in order nor limited to n. It is up to the caller to resort + and truncate the lists of results. + Only the number of top completions is guaranteed. + )pbdoc") + .def( + "complete_prefix", + [](const kd::Dictionary &d, const std::string &query, size_t top_n) { + auto m = d.GetPrefixCompletion(query, top_n); + return kpy::make_match_iterator(m.begin(), m.end()); + }, + py::arg("query"), py::arg("top_n"), + R"pbdoc(Complete the given key to full matches after whitespace tokenizing + and return the top n completions. + In case the used dictionary supports inner weights, the + completer traverses the dictionary according to weights, + otherwise byte-order. + + Note, due to depth-first traversal the traverser + immediately yields results when it visits them. The results are + neither in order nor limited to n. It is up to the caller to resort + and truncate the lists of results. + Only the number of top completions is guaranteed. + )pbdoc") .def("get", &kd::Dictionary::operator[], R"pbdoc( Get an entry from the dictionary. )pbdoc") + // 'items', 'keys', 'manifest', 'match_fuzzy', 'match_near', + .def("match", + [](const kd::Dictionary &d, const std::string &key) { + auto m = d.Get(key); + return kpy::make_match_iterator(m.begin(), m.end()); + }) .def("search", &kd::Dictionary::Lookup) - .def("match", [](const kd::Dictionary &d, const std::string &key) { - auto m = d.Get(key); - return kpy::make_match_iterator(m.begin(), m.end()); - }); + // 'search_tokenized', 'statistics', 'values' + ; } From dc49e50263716ce1be4c94fd61e42f367fc3207f Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sun, 1 Dec 2024 20:56:30 +0100 Subject: [PATCH 18/61] move todo --- python-pybind/src/dictionary/py_dictionary.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/python-pybind/src/dictionary/py_dictionary.cpp b/python-pybind/src/dictionary/py_dictionary.cpp index a7f34525e..b738446e6 100644 --- a/python-pybind/src/dictionary/py_dictionary.cpp +++ b/python-pybind/src/dictionary/py_dictionary.cpp @@ -40,6 +40,8 @@ void init_keyvi_dictionary(const py::module_ &m) { )pbdoc"; + // TODO(hendrik): 'items', 'keys', 'manifest', 'match_fuzzy', 'match_near', + // 'search_tokenized', 'statistics', 'values' py::class_(m, "Dictionary") .def(py::init()) .def( @@ -117,13 +119,10 @@ void init_keyvi_dictionary(const py::module_ &m) { .def("get", &kd::Dictionary::operator[], R"pbdoc( Get an entry from the dictionary. )pbdoc") - // 'items', 'keys', 'manifest', 'match_fuzzy', 'match_near', .def("match", [](const kd::Dictionary &d, const std::string &key) { auto m = d.Get(key); return kpy::make_match_iterator(m.begin(), m.end()); }) - .def("search", &kd::Dictionary::Lookup) - // 'search_tokenized', 'statistics', 'values' - ; + .def("search", &kd::Dictionary::Lookup); } From 21fe7e592d4ce3e3cc4dfc5e2f77f99a93cf801d Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sun, 29 Dec 2024 14:04:18 +0100 Subject: [PATCH 19/61] add compiler --- .../src/compiler/py_dictionary_compilers.cpp | 55 +++++++++++++++++++ .../src/dictionary/py_dictionary.cpp | 2 +- python-pybind/src/py_keyvi.cpp | 3 + 3 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 python-pybind/src/compiler/py_dictionary_compilers.cpp diff --git a/python-pybind/src/compiler/py_dictionary_compilers.cpp b/python-pybind/src/compiler/py_dictionary_compilers.cpp new file mode 100644 index 000000000..1041f67c1 --- /dev/null +++ b/python-pybind/src/compiler/py_dictionary_compilers.cpp @@ -0,0 +1,55 @@ +/* keyvi - A key value store. + * + * Copyright 2024 Hendrik Muhs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "keyvi/dictionary/dictionary_types.h" + +namespace py = pybind11; +namespace kd = keyvi::dictionary; + +void init_keyvi_dictionary_compilers(const py::module_ &m) { + #define CREATE_COMPILER(compiler, name) \ + py::class_(m, name) \ + .def(py::init<>()) \ + .def("__enter__", [](compiler &c) { return &c; }) \ + .def("__exit__", [](compiler &c, void *exc_type, void *exc_value, void *traceback) { c.Compile(); }) \ + .def("__setitem__", &compiler::Add) \ + .def("add", &compiler::Add) \ + .def("compile", [](compiler &c, std::function progress_callback) { \ + if (progress_callback == nullptr) { \ + c.Compile(); \ + return; \ + } \ + auto progress_compiler_callback = [](size_t a, size_t b, void *user_data) { \ + auto py_callback = *reinterpret_cast*>(user_data); \ + py_callback(a,b); \ + }; \ + void* user_data = reinterpret_cast(&progress_callback); \ + c.Compile(progress_compiler_callback, user_data); \ + }, py::arg("progress_callback") = static_cast *>(nullptr)) \ + .def("set_manifest", &compiler::SetManifest) \ + .def("write_to_file", &compiler::WriteToFile); + + CREATE_COMPILER(kd::CompletionDictionaryCompiler, "CompletionDictionaryCompiler"); + + #undef CREATE_COMPILER +} + +//cdef void progress_compiler_callback(size_t a, size_t b, void* py_callback) noexcept with gil: +// (py_callback)(a, b) \ No newline at end of file diff --git a/python-pybind/src/dictionary/py_dictionary.cpp b/python-pybind/src/dictionary/py_dictionary.cpp index b738446e6..cf7d10e1c 100644 --- a/python-pybind/src/dictionary/py_dictionary.cpp +++ b/python-pybind/src/dictionary/py_dictionary.cpp @@ -1,4 +1,4 @@ -/* * keyvi - A key value store. +/* keyvi - A key value store. * * Copyright 2024 Hendrik Muhs * diff --git a/python-pybind/src/py_keyvi.cpp b/python-pybind/src/py_keyvi.cpp index 402a381e7..f07ca7190 100644 --- a/python-pybind/src/py_keyvi.cpp +++ b/python-pybind/src/py_keyvi.cpp @@ -23,6 +23,7 @@ namespace py = pybind11; void init_keyvi_dictionary(const py::module_ &); +void init_keyvi_dictionary_compilers(const py::module_ &); void init_keyvi_match(const py::module_ &); PYBIND11_MODULE(keyvi_scikit_core, m) { @@ -40,6 +41,8 @@ PYBIND11_MODULE(keyvi_scikit_core, m) { init_keyvi_match(m); py::module keyvi_dictionary = m.def_submodule("dictionary", "keyvi_scikit_core.dictionary"); init_keyvi_dictionary(keyvi_dictionary); + py::module keyvi_compilers = m.def_submodule("compiler", "keyvi_scikit_core.compiler"); + init_keyvi_dictionary_compilers(keyvi_compilers); #ifdef VERSION_INFO m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO); From b5e86b1230e492afa09b0eb105c8819a0bf689f3 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sun, 29 Dec 2024 14:12:13 +0100 Subject: [PATCH 20/61] checkstyle --- .../src/compiler/py_dictionary_compilers.cpp | 52 +++++++++---------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/python-pybind/src/compiler/py_dictionary_compilers.cpp b/python-pybind/src/compiler/py_dictionary_compilers.cpp index 1041f67c1..a9e23d240 100644 --- a/python-pybind/src/compiler/py_dictionary_compilers.cpp +++ b/python-pybind/src/compiler/py_dictionary_compilers.cpp @@ -15,8 +15,8 @@ * limitations under the License. */ -#include #include +#include #include "keyvi/dictionary/dictionary_types.h" @@ -24,32 +24,32 @@ namespace py = pybind11; namespace kd = keyvi::dictionary; void init_keyvi_dictionary_compilers(const py::module_ &m) { - #define CREATE_COMPILER(compiler, name) \ - py::class_(m, name) \ - .def(py::init<>()) \ - .def("__enter__", [](compiler &c) { return &c; }) \ - .def("__exit__", [](compiler &c, void *exc_type, void *exc_value, void *traceback) { c.Compile(); }) \ - .def("__setitem__", &compiler::Add) \ - .def("add", &compiler::Add) \ - .def("compile", [](compiler &c, std::function progress_callback) { \ - if (progress_callback == nullptr) { \ - c.Compile(); \ - return; \ - } \ - auto progress_compiler_callback = [](size_t a, size_t b, void *user_data) { \ - auto py_callback = *reinterpret_cast*>(user_data); \ - py_callback(a,b); \ - }; \ - void* user_data = reinterpret_cast(&progress_callback); \ - c.Compile(progress_compiler_callback, user_data); \ - }, py::arg("progress_callback") = static_cast *>(nullptr)) \ - .def("set_manifest", &compiler::SetManifest) \ - .def("write_to_file", &compiler::WriteToFile); +#define CREATE_COMPILER(compiler, name) \ + py::class_(m, name) \ + .def(py::init<>()) \ + .def("__enter__", [](compiler &c) { return &c; }) \ + .def("__exit__", [](compiler &c, void *exc_type, void *exc_value, void *traceback) { c.Compile(); }) \ + .def("__setitem__", &compiler::Add) \ + .def("add", &compiler::Add) \ + .def( \ + "compile", \ + [](compiler &c, std::function progress_callback) { \ + if (progress_callback == nullptr) { \ + c.Compile(); \ + return; \ + } \ + auto progress_compiler_callback = [](size_t a, size_t b, void *user_data) { \ + auto py_callback = *reinterpret_cast *>(user_data); \ + py_callback(a, b); \ + }; \ + void *user_data = reinterpret_cast(&progress_callback); \ + c.Compile(progress_compiler_callback, user_data); \ + }, \ + py::arg("progress_callback") = static_cast *>(nullptr)) \ + .def("set_manifest", &compiler::SetManifest) \ + .def("write_to_file", &compiler::WriteToFile); CREATE_COMPILER(kd::CompletionDictionaryCompiler, "CompletionDictionaryCompiler"); - #undef CREATE_COMPILER +#undef CREATE_COMPILER } - -//cdef void progress_compiler_callback(size_t a, size_t b, void* py_callback) noexcept with gil: -// (py_callback)(a, b) \ No newline at end of file From ab73173406406b11f9af544a49f383a7249228b0 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sun, 29 Dec 2024 22:05:47 +0100 Subject: [PATCH 21/61] add more compilers and mergers --- .../src/compiler/py_dictionary_compilers.cpp | 71 +++++++++++++++++-- .../src/dictionary/py_dictionary.cpp | 1 + python-pybind/src/py_keyvi.cpp | 14 ++++ python-pybind/tests/match_object_test.py | 12 ++-- 4 files changed, 88 insertions(+), 10 deletions(-) diff --git a/python-pybind/src/compiler/py_dictionary_compilers.cpp b/python-pybind/src/compiler/py_dictionary_compilers.cpp index a9e23d240..d1394b575 100644 --- a/python-pybind/src/compiler/py_dictionary_compilers.cpp +++ b/python-pybind/src/compiler/py_dictionary_compilers.cpp @@ -17,16 +17,21 @@ #include #include +#include + +#include +#include #include "keyvi/dictionary/dictionary_types.h" namespace py = pybind11; namespace kd = keyvi::dictionary; -void init_keyvi_dictionary_compilers(const py::module_ &m) { +void init_keyvi_dictionary_compilers(const py::module_ &module) { #define CREATE_COMPILER(compiler, name) \ - py::class_(m, name) \ + py::class_(module, name) \ .def(py::init<>()) \ + .def(py::init()) \ .def("__enter__", [](compiler &c) { return &c; }) \ .def("__exit__", [](compiler &c, void *exc_type, void *exc_value, void *traceback) { c.Compile(); }) \ .def("__setitem__", &compiler::Add) \ @@ -47,9 +52,67 @@ void init_keyvi_dictionary_compilers(const py::module_ &m) { }, \ py::arg("progress_callback") = static_cast *>(nullptr)) \ .def("set_manifest", &compiler::SetManifest) \ - .def("write_to_file", &compiler::WriteToFile); - + .def("write_to_file", &compiler::WriteToFile, py::call_guard()); +#define CREATE_SK_COMPILER(compiler, name) \ + py::class_(module, name) \ + .def(py::init &>()) \ + .def(py::init &, const keyvi::util::parameters_t &>()) \ + .def("__enter__", [](compiler &c) { return &c; }) \ + .def("__exit__", [](compiler &c, void *exc_type, void *exc_value, void *traceback) { c.Compile(); }) \ + .def("__setitem__", &compiler::Add) \ + .def("add", &compiler::Add) \ + .def( \ + "compile", \ + [](compiler &c, std::function progress_callback) { \ + pybind11::gil_scoped_release release_gil; \ + if (progress_callback == nullptr) { \ + c.Compile(); \ + return; \ + } \ + auto progress_compiler_callback = [](size_t a, size_t b, void *user_data) { \ + auto py_callback = *reinterpret_cast *>(user_data); \ + pybind11::gil_scoped_acquire acquire_gil; \ + py_callback(a, b); \ + }; \ + void *user_data = reinterpret_cast(&progress_callback); \ + c.Compile(progress_compiler_callback, user_data); \ + }, \ + py::arg("progress_callback") = static_cast *>(nullptr)) \ + .def("set_manifest", &compiler::SetManifest) \ + .def("write_to_file", &compiler::WriteToFile, py::call_guard()); +#define CREATE_MERGER(merger, name) \ + py::class_(module, name) \ + .def(py::init<>()) \ + .def(py::init()) \ + .def("__enter__", [](merger &m) { return &m; }) \ + .def("__exit__", [](merger &m, void *exc_type, void *exc_value, void *traceback) { m.Merge(); }) \ + .def("add", &merger::Add) \ + .def("merge", \ + [](merger &m) { \ + pybind11::gil_scoped_release release_gil; \ + m.Merge(); \ + }) \ + .def("merge", \ + [](merger &m, const std::string &filename) { \ + pybind11::gil_scoped_release release_gil; \ + m.Merge(filename); \ + }) \ + .def("set_manifest", &merger::SetManifest) \ + .def("write_to_file", &merger::WriteToFile, py::call_guard()); CREATE_COMPILER(kd::CompletionDictionaryCompiler, "CompletionDictionaryCompiler"); + CREATE_COMPILER(kd::FloatVectorDictionaryCompiler, "FloatVectorDictionaryCompiler"); + CREATE_COMPILER(kd::IntDictionaryCompiler, "IntDictionaryCompiler"); + CREATE_COMPILER(kd::JsonDictionaryCompiler, "JsonDictionaryCompiler"); + CREATE_COMPILER(kd::KeyOnlyDictionaryCompiler, "KeyOnlyDictionaryCompiler"); + CREATE_COMPILER(kd::StringDictionaryCompiler, "StringDictionaryCompiler"); + CREATE_SK_COMPILER(kd::SecondaryKeyCompletionDictionaryCompiler, "SecondaryKeyCompletionDictionaryCompiler"); + CREATE_SK_COMPILER(kd::SecondaryKeyFloatVectorDictionaryCompiler, "SecondaryKeyFloatVectorDictionaryCompiler"); + CREATE_SK_COMPILER(kd::SecondaryKeyIntDictionaryCompiler, "SecondaryKeyIntDictionaryCompiler"); + CREATE_SK_COMPILER(kd::SecondaryKeyJsonDictionaryCompiler, "SecondaryKeyJsonDictionaryCompiler"); + CREATE_SK_COMPILER(kd::SecondaryKeyKeyOnlyDictionaryCompiler, "SecondaryKeyKeyOnlyDictionaryCompiler"); + CREATE_SK_COMPILER(kd::SecondaryKeyStringDictionaryCompiler, "SecondaryKeyStringDictionaryCompiler"); + CREATE_MERGER(kd::CompletionDictionaryMerger, "CompletionDictionaryMerger"); + CREATE_MERGER(kd::IntDictionaryMerger, "IntDictionaryMerger"); #undef CREATE_COMPILER } diff --git a/python-pybind/src/dictionary/py_dictionary.cpp b/python-pybind/src/dictionary/py_dictionary.cpp index cf7d10e1c..ece876b65 100644 --- a/python-pybind/src/dictionary/py_dictionary.cpp +++ b/python-pybind/src/dictionary/py_dictionary.cpp @@ -44,6 +44,7 @@ void init_keyvi_dictionary(const py::module_ &m) { // 'search_tokenized', 'statistics', 'values' py::class_(m, "Dictionary") .def(py::init()) + .def(py::init()) .def( "complete_fuzzy_multiword", [](const kd::Dictionary &d, const std::string &query, const int32_t max_edit_distance, diff --git a/python-pybind/src/py_keyvi.cpp b/python-pybind/src/py_keyvi.cpp index f07ca7190..d10138cbb 100644 --- a/python-pybind/src/py_keyvi.cpp +++ b/python-pybind/src/py_keyvi.cpp @@ -17,10 +17,13 @@ #include +#include "keyvi/dictionary/fsa/internal/memory_map_flags.h" + #define STRINGIFY(x) #x #define MACRO_STRINGIFY(x) STRINGIFY(x) namespace py = pybind11; +namespace kd = keyvi::dictionary; void init_keyvi_dictionary(const py::module_ &); void init_keyvi_dictionary_compilers(const py::module_ &); @@ -38,6 +41,17 @@ PYBIND11_MODULE(keyvi_scikit_core, m) { )pbdoc"; + py::enum_(m, "loading_strategy_types") + .value("default_os", kd::loading_strategy_types::default_os) + .value("lazy", kd::loading_strategy_types::lazy) + .value("populate", kd::loading_strategy_types::populate) + .value("populate_key_part", kd::loading_strategy_types::populate_key_part) + .value("populate_lazy", kd::loading_strategy_types::populate_lazy) + .value("lazy_no_readahead", kd::loading_strategy_types::lazy_no_readahead) + .value("lazy_no_readahead_value_part", kd::loading_strategy_types::lazy_no_readahead_value_part) + .value("populate_key_part_no_readahead_value_part", + kd::loading_strategy_types::populate_key_part_no_readahead_value_part); + init_keyvi_match(m); py::module keyvi_dictionary = m.def_submodule("dictionary", "keyvi_scikit_core.dictionary"); init_keyvi_dictionary(keyvi_dictionary); diff --git a/python-pybind/tests/match_object_test.py b/python-pybind/tests/match_object_test.py index ff2391f21..16e2e10e3 100644 --- a/python-pybind/tests/match_object_test.py +++ b/python-pybind/tests/match_object_test.py @@ -6,12 +6,12 @@ import warnings -#from keyvi.compiler import ( -# JsonDictionaryCompiler, -# CompletionDictionaryCompiler, -# KeyOnlyDictionaryCompiler, -# StringDictionaryCompiler, -#) +from keyvi_scikit_core.compiler import ( + JsonDictionaryCompiler, + CompletionDictionaryCompiler, + KeyOnlyDictionaryCompiler, + StringDictionaryCompiler, +) """ def test_serialization(): From 031b60bbf8b321501eff67ccf5ce15a5f118f19b Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Fri, 10 Jan 2025 23:11:01 +0100 Subject: [PATCH 22/61] fix msgpack unpacking --- pyproject.toml | 9 ++++ .../src/compiler/py_dictionary_compilers.cpp | 51 ++++++++++--------- .../src/dictionary/py_dictionary.cpp | 3 ++ python-pybind/src/dictionary/py_match.cpp | 9 +++- python-pybind/tests/match_object_test.py | 10 ++-- 5 files changed, 52 insertions(+), 30 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d7e3eadd3..74074d82d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,15 @@ dependencies = [ "msgpack>=1.0.0", ] +[project.optional-dependencies] +lint = [ + 'ruff>=0.5.0', +] +test = [ + 'pytest>=7.4.2', +] +dev = ['keyvi_scikit_core[lint,test]'] + [tool.scikit-build] wheel.expand-macos-universal-tags = true minimum-version = "build-system.requires" diff --git a/python-pybind/src/compiler/py_dictionary_compilers.cpp b/python-pybind/src/compiler/py_dictionary_compilers.cpp index d1394b575..0f0774089 100644 --- a/python-pybind/src/compiler/py_dictionary_compilers.cpp +++ b/python-pybind/src/compiler/py_dictionary_compilers.cpp @@ -27,6 +27,20 @@ namespace py = pybind11; namespace kd = keyvi::dictionary; +template +inline void py_compile(Compiler *c, std::function progress_callback) { + if (progress_callback == nullptr) { + c->Compile(); + return; + } + auto progress_compiler_callback = [](size_t a, size_t b, void *user_data) { + auto py_callback = *reinterpret_cast *>(user_data); + py_callback(a, b); + }; + void *user_data = reinterpret_cast(&progress_callback); + c->Compile(progress_compiler_callback, user_data); +} + void init_keyvi_dictionary_compilers(const py::module_ &module) { #define CREATE_COMPILER(compiler, name) \ py::class_(module, name) \ @@ -35,24 +49,24 @@ void init_keyvi_dictionary_compilers(const py::module_ &module) { .def("__enter__", [](compiler &c) { return &c; }) \ .def("__exit__", [](compiler &c, void *exc_type, void *exc_value, void *traceback) { c.Compile(); }) \ .def("__setitem__", &compiler::Add) \ - .def("add", &compiler::Add) \ + .def("add", &compiler::Add) /* DEPRECATED */ \ + .def("Add", &compiler::Add) \ .def( \ "compile", \ [](compiler &c, std::function progress_callback) { \ - if (progress_callback == nullptr) { \ - c.Compile(); \ - return; \ - } \ - auto progress_compiler_callback = [](size_t a, size_t b, void *user_data) { \ - auto py_callback = *reinterpret_cast *>(user_data); \ - py_callback(a, b); \ - }; \ - void *user_data = reinterpret_cast(&progress_callback); \ - c.Compile(progress_compiler_callback, user_data); \ + py_compile(&c, progress_callback); \ + }, \ + py::arg("progress_callback") = \ + static_cast *>(nullptr)) /* DEPRECATED */ \ + .def( \ + "Compile", \ + [](compiler &c, std::function progress_callback) { \ + py_compile(&c, progress_callback); \ }, \ py::arg("progress_callback") = static_cast *>(nullptr)) \ .def("set_manifest", &compiler::SetManifest) \ - .def("write_to_file", &compiler::WriteToFile, py::call_guard()); + .def("write_to_file", &compiler::WriteToFile, py::call_guard()) /* DEPRECATED */ \ + .def("WriteToFile", &compiler::WriteToFile, py::call_guard()); #define CREATE_SK_COMPILER(compiler, name) \ py::class_(module, name) \ .def(py::init &>()) \ @@ -64,18 +78,7 @@ void init_keyvi_dictionary_compilers(const py::module_ &module) { .def( \ "compile", \ [](compiler &c, std::function progress_callback) { \ - pybind11::gil_scoped_release release_gil; \ - if (progress_callback == nullptr) { \ - c.Compile(); \ - return; \ - } \ - auto progress_compiler_callback = [](size_t a, size_t b, void *user_data) { \ - auto py_callback = *reinterpret_cast *>(user_data); \ - pybind11::gil_scoped_acquire acquire_gil; \ - py_callback(a, b); \ - }; \ - void *user_data = reinterpret_cast(&progress_callback); \ - c.Compile(progress_compiler_callback, user_data); \ + py_compile(&c, progress_callback); \ }, \ py::arg("progress_callback") = static_cast *>(nullptr)) \ .def("set_manifest", &compiler::SetManifest) \ diff --git a/python-pybind/src/dictionary/py_dictionary.cpp b/python-pybind/src/dictionary/py_dictionary.cpp index ece876b65..2333bb4a5 100644 --- a/python-pybind/src/dictionary/py_dictionary.cpp +++ b/python-pybind/src/dictionary/py_dictionary.cpp @@ -119,6 +119,9 @@ void init_keyvi_dictionary(const py::module_ &m) { )pbdoc") .def("get", &kd::Dictionary::operator[], R"pbdoc( Get an entry from the dictionary. + )pbdoc") + .def("__getitem__", &kd::Dictionary::operator[], R"pbdoc( + Get an entry from the dictionary. )pbdoc") .def("match", [](const kd::Dictionary &d, const std::string &key) { diff --git a/python-pybind/src/dictionary/py_match.cpp b/python-pybind/src/dictionary/py_match.cpp index bedc21642..f9b610676 100644 --- a/python-pybind/src/dictionary/py_match.cpp +++ b/python-pybind/src/dictionary/py_match.cpp @@ -27,6 +27,13 @@ namespace py = pybind11; namespace kd = keyvi::dictionary; +inline const py::object &get_msgpack_loads_func() { + PYBIND11_CONSTINIT static py::gil_safe_call_once_and_store storage; + return storage + .call_once_and_store_result([]() -> py::object { return py::getattr(py::module_::import("msgpack"), "loads"); }) + .get_stored(); +} + void init_keyvi_match(const py::module_ &m) { py::module_ msgpack_ = py::module_::import("msgpack"); @@ -42,7 +49,7 @@ void init_keyvi_match(const py::module_ &m) { if (packed_value.empty()) { return py::none(); } - return msgpack_.attr("loads")(packed_value); + return get_msgpack_loads_func()(py::bytes(packed_value)); }) .def("value_as_string", &kd::Match::GetValueAsString) .def("raw_value_as_string", &kd::Match::GetRawValueAsString) diff --git a/python-pybind/tests/match_object_test.py b/python-pybind/tests/match_object_test.py index 16e2e10e3..b1976a3be 100644 --- a/python-pybind/tests/match_object_test.py +++ b/python-pybind/tests/match_object_test.py @@ -2,7 +2,7 @@ # Usage: py.test tests import keyvi_scikit_core as keyvi -#from test_tools import tmp_dictionary +from test_tools import tmp_dictionary import warnings @@ -114,7 +114,7 @@ def test_score(): assert m.value == {"a": 3} """ -""" def test_get_value_int(): +def test_get_value_int(): c = CompletionDictionaryCompiler({"memory_limit_mb": "10"}) c.Add("abc", 42) c.Add("abd", 21) @@ -125,7 +125,7 @@ def test_score(): assert m.value == 21 -def test_get_value_key_only(): +""" def test_get_value_key_only(): c = KeyOnlyDictionaryCompiler({"memory_limit_mb": "10"}) c.Add("abc") c.Add("abd") @@ -134,7 +134,7 @@ def test_get_value_key_only(): assert m.value == '' m = d["abd"] assert m.value == '' - + """ def test_get_value_string(): c = StringDictionaryCompiler({"memory_limit_mb": "10"}) @@ -145,7 +145,7 @@ def test_get_value_string(): assert m.value == "aaaaa" m = d["abd"] assert m.value == "bbbbb" - """ + def test_matched_string(): m = keyvi.Match() From f9cd3dd904656865e4f88aa2ea528592ad99ff67 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Fri, 10 Jan 2025 23:26:34 +0100 Subject: [PATCH 23/61] add missing file --- python-pybind/tests/test_tools.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 python-pybind/tests/test_tools.py diff --git a/python-pybind/tests/test_tools.py b/python-pybind/tests/test_tools.py new file mode 100644 index 000000000..98bf058d4 --- /dev/null +++ b/python-pybind/tests/test_tools.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +# some common tools for tests + +import contextlib +import os +import tempfile + +from keyvi_scikit_core.dictionary import Dictionary + + +@contextlib.contextmanager +def tmp_dictionary(compiler, file_name): + tmp_dir = tempfile.gettempdir() + fq_file_name = os.path.join(tmp_dir, file_name) + compiler.Compile() + compiler.WriteToFile(fq_file_name) + del compiler + d = Dictionary(fq_file_name) + yield d + del d + os.remove(fq_file_name) From ee397ddc6da6e0605a5735c8e53358de2617b708 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sat, 26 Jul 2025 08:32:04 +0200 Subject: [PATCH 24/61] update workflow with changes from latest master --- .../workflows/python-cibuildwheel-pybind.yml | 150 ++++++++---------- 1 file changed, 65 insertions(+), 85 deletions(-) diff --git a/.github/workflows/python-cibuildwheel-pybind.yml b/.github/workflows/python-cibuildwheel-pybind.yml index b67bef836..81d81fc1e 100644 --- a/.github/workflows/python-cibuildwheel-pybind.yml +++ b/.github/workflows/python-cibuildwheel-pybind.yml @@ -2,50 +2,41 @@ name: Python Pybind cibuildwheel on: push: - branches: [master, release-*] + branches: [ master, release-* ] pull_request: - branches: [master] + branches: [ master ] + release: + types: [published] workflow_dispatch: jobs: build_wheels: - name: pybind11-cibuildwheel ${{ matrix.os }}/${{ matrix.arch }}/${{ matrix.flavor }}/${{ matrix.target }} + name: pybind-cibuildwheel ${{ matrix.os }}/${{ matrix.flavor }}/${{ matrix.target }} runs-on: ${{ matrix.os }} strategy: + fail-fast: false matrix: - os: [ubuntu-22.04] - # separate archs, so they use individual caches - arch: ["x86_64", "arm64"] - flavor: ["cpython", "pypy"] + # macos-13: x86, macos-14: Arm64 + os: [ubuntu-22.04, ubuntu-24.04-arm, macos-13, macos-14] + # skip pypy, https://github.com/pypa/distutils/issues/283 + flavor: ['cpython'] # separate musl and many on linux, for mac we just skip one of those - target: ["many", "musl"] + target: [ 'many', 'musl' ] exclude: - - os: ubuntu-22.04 + - os: macos-13 target: musl - flavor: pypy + # skip pypy, https://github.com/pypa/distutils/issues/283 + #- os: ubuntu-22.04 + # target: musl + # flavor: pypy + #- os: macos-13 + # flavor: pypy steps: - uses: actions/checkout@v4 - - name: Set up QEMU - if: ${{ (runner.os == 'Linux') && (matrix.arch == 'arm64') }} - uses: docker/setup-qemu-action@v3 - with: - platforms: all - name: ccache uses: hendrikmuhs/ccache-action@v1.2.11 with: - key: ${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.target }}-${{ matrix.flavor }}-python - - - name: Sets env for x86_64 - run: | - echo "CIBW_ARCHS_LINUX=auto64" >> $GITHUB_ENV - echo "CIBW_ARCHS_MACOS=x86_64" >> $GITHUB_ENV - if: matrix.arch == 'x86_64' - - - name: Sets env for arm64 - run: | - echo "CIBW_ARCHS_LINUX=aarch64" >> $GITHUB_ENV - echo "CIBW_ARCHS_MACOS=arm64" >> $GITHUB_ENV - if: matrix.arch == 'arm64' + key: ${{ matrix.os }}-${{ matrix.target }}-${{ matrix.flavor }}-python-pybind - name: Skip manylinux for musllinux target if: ${{ (runner.os == 'Linux') && (matrix.target == 'musl') }} @@ -73,47 +64,36 @@ jobs: run: | brew update && \ brew list -1 | grep python | while read formula; do brew unlink $formula; brew link --overwrite $formula; done && \ - brew install ccache + brew install ccache zlib snappy boost@1.85 + brew link boost@1.85 - - name: install mac dependencies X86_64 - if: ${{ (runner.os == 'macOS') && (matrix.arch == 'x86_64') }} + - name: set mac deployment target X64 + if: runner.os == 'macOS' && runner.arch == 'X64' run: | - brew update && \ - brew install zlib snappy boost + echo "MACOSX_DEPLOYMENT_TARGET=13.0" >> $GITHUB_ENV - - name: install mac dependencies arm64 - if: ${{ (runner.os == 'macOS') && (matrix.arch == 'arm64') }} + - name: set mac deployment target arm64 + if: runner.os == 'macOS' && runner.arch == 'ARM64' run: | - set -e - echo "MACOSX_DEPLOYMENT_TARGET=12.3.0" >> $GITHUB_ENV - echo "_CMAKE_PREFIX_PATH=${{ github.workspace }}/arm64-homebrew" >> $GITHUB_ENV - echo "CIBW_REPAIR_WHEEL_COMMAND_MACOS=DYLD_LIBRARY_PATH=${{ github.workspace }}/arm64-homebrew delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel}" >> $GITHUB_ENV - mkdir arm64-homebrew && curl -L https://github.com/Homebrew/brew/tarball/master | tar xz --strip 1 -C arm64-homebrew - PACKAGES=( icu4c xz lz4 zstd zlib snappy boost ) - for PACKAGE in "${PACKAGES[@]}" - do - response=$(arm64-homebrew/bin/brew fetch --force --bottle-tag=arm64_sonoma $PACKAGE | grep Downloaded ) - download_path=$(echo $response | xargs -n 1 | tail -1) - arm64-homebrew/bin/brew reinstall -vd $download_path - done - arm64-homebrew/bin/brew config - ls /Users/runner/work/keyvi/keyvi/arm64-homebrew - - - name: Build python wheels for ${{ matrix.os }} on ${{ matrix.arch }} - uses: pypa/cibuildwheel@v2.21.3 + echo "MACOSX_DEPLOYMENT_TARGET=14.0" >> $GITHUB_ENV + + - name: Build python wheels for ${{ matrix.os }} + uses: pypa/cibuildwheel@v3.0.1 env: - # Skip CPython 3.6 and CPython 3.7 - CIBW_SKIP: ${{ env.CIBW_SKIP }} cp36-* cp37-* pp37-* + # Skip CPython 3.{6, 7, 8} + CIBW_SKIP: ${{ env.CIBW_SKIP }} cp36-* cp37-* cp38-* + + # only build native packages + CIBW_ARCHS: native - # skip testing all python versions on linux arm, only test 3.12 # skip tests on pypy, currently fails for indexer tests - CIBW_TEST_SKIP: "*p{38,39,310,311}-m*linux_aarch64 pp*" + CIBW_TEST_SKIP: "pp*" # (many)linux custom docker images - CIBW_MANYLINUX_X86_64_IMAGE: "keyvidev/manylinux-builder-x86_64" - CIBW_MANYLINUX_AARCH64_IMAGE: "keyvidev/manylinux-builder-aarch64" - CIBW_MUSLLINUX_X86_64_IMAGE: "keyvidev/musllinux-builder-x86_64" - CIBW_MUSLLINUX_AARCH64_IMAGE: "keyvidev/musllinux-builder-aarch64" + CIBW_MANYLINUX_X86_64_IMAGE: 'keyvidev/manylinux-builder-x86_64' + CIBW_MANYLINUX_AARCH64_IMAGE: 'keyvidev/manylinux-builder-aarch64' + CIBW_MUSLLINUX_X86_64_IMAGE: 'keyvidev/musllinux-builder-x86_64' + CIBW_MUSLLINUX_AARCH64_IMAGE: 'keyvidev/musllinux-builder-aarch64' # ccache using path CIBW_ENVIRONMENT_MACOS: PATH=/usr/local/opt/ccache/libexec:$PATH @@ -124,41 +104,41 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: artifact-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.flavor }}-${{ matrix.target }} + name: artifact-${{ matrix.os }}-${{ matrix.flavor }}-${{ matrix.target }} path: ./wheelhouse/*.whl build_sdist: name: sdist runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - name: install Linux deps - run: | - sudo apt-get update && \ - sudo apt-get install -y libsnappy-dev libzzip-dev zlib1g-dev libboost-all-dev ccache ninja-build pipx - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.11 - with: - key: ${{ matrix.os }}-sdist-python - - - name: Build SDist - run: | - export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" - pipx run build --sdist - python -m pip install dist/*.tar.gz -v && \ - python -m pip install pytest && \ - python -m pytest python-pybind/tests && \ - python -m pip uninstall -y keyvi_pybind11 - - - uses: actions/upload-artifact@v4 - with: - name: artifact-sdist - path: dist/*.tar.gz + - uses: actions/checkout@v4 + - name: install Linux deps + run: | + sudo apt-get update && \ + sudo apt-get install -y libsnappy-dev libzzip-dev zlib1g-dev libboost-all-dev ccache + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.11 + with: + key: ubuntu-sdist-python + + - name: Build SDist + run: | + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + pipx run build --sdist + python -m pip install dist/*.tar.gz -v && \ + python -m pip install pytest && \ + python -m pytest python-pybind/tests && \ + python -m pip uninstall -y keyvi_pybind11 + + - uses: actions/upload-artifact@v4 + with: + name: artifact-sdist + path: python/wheelhouse/*.tar.gz upload_all: needs: [build_wheels, build_sdist] runs-on: ubuntu-latest - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') + if: github.event_name == 'release' steps: - uses: actions/download-artifact@v4 with: From a0ce94fcc6fa93e96f3973e3d1a5545241e3c27d Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sat, 26 Jul 2025 08:51:26 +0200 Subject: [PATCH 25/61] fix deprecation errors --- .../compiler/var_length_short_calculation_test.py | 2 +- python/integration-tests/dictionary/dictionary_leak_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/integration-tests/compiler/var_length_short_calculation_test.py b/python/integration-tests/compiler/var_length_short_calculation_test.py index c04f21abc..6ea7504ca 100644 --- a/python/integration-tests/compiler/var_length_short_calculation_test.py +++ b/python/integration-tests/compiler/var_length_short_calculation_test.py @@ -21,7 +21,7 @@ def test_input_output_keys(): k, v = line.split('\t') key = json.loads(k) value = json.loads(v) - compiler.Add(key, value) + compiler.add(key, value) input_keys_count += 1 output_keys_count = 0 diff --git a/python/integration-tests/dictionary/dictionary_leak_test.py b/python/integration-tests/dictionary/dictionary_leak_test.py index b2035f650..4d8b78c11 100644 --- a/python/integration-tests/dictionary/dictionary_leak_test.py +++ b/python/integration-tests/dictionary/dictionary_leak_test.py @@ -22,7 +22,7 @@ def memory_usage_ps(): def test_leak(): c = JsonDictionaryCompiler({"memory_limit_mb":"10"}) - c.Add("something", '["a" : 2]') + c.add("something", '["a" : 2]') with tmp_dictionary(c, 'near_simple.kv') as d: gc.collect() From ce5dc7c4b7b4e90090c2462ec4c3dc2a1267b6cc Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sun, 27 Jul 2025 22:10:42 +0200 Subject: [PATCH 26/61] rename to keyvi2 --- pyproject.toml | 6 +++--- python-pybind/CMakeLists.txt | 8 ++++---- python-pybind/src/py_keyvi.cpp | 6 +++--- python-pybind/tests/match_object_test.py | 4 ++-- python-pybind/tests/test_tools.py | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 74074d82d..da09a3a3c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,8 +3,8 @@ requires = ["scikit-build-core>=0.10", "pybind11"] build-backend = "scikit_build_core.build" [project] -name = "keyvi_scikit_core" -version = "0.6.3dev0" +name = "keyvi2" +version = "0.6.4dev0" dependencies = [ "msgpack>=1.0.0", ] @@ -16,7 +16,7 @@ lint = [ test = [ 'pytest>=7.4.2', ] -dev = ['keyvi_scikit_core[lint,test]'] +dev = ['keyvi2[lint,test]'] [tool.scikit-build] wheel.expand-macos-universal-tags = true diff --git a/python-pybind/CMakeLists.txt b/python-pybind/CMakeLists.txt index 04a8bfd6c..2acb54dfb 100644 --- a/python-pybind/CMakeLists.txt +++ b/python-pybind/CMakeLists.txt @@ -15,12 +15,12 @@ find_package(pybind11 CONFIG REQUIRED) # Add a library using FindPython's tooling (pybind11 also provides a helper like # this) FILE(GLOB_RECURSE KEYVI_PYBIND_SOURCES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} src/*.cpp) -pybind11_add_module(keyvi_scikit_core ${KEYVI_PYBIND_SOURCES}) +pybind11_add_module(keyvi2 ${KEYVI_PYBIND_SOURCES}) -target_link_libraries(keyvi_scikit_core PRIVATE keyvi) +target_link_libraries(keyvi2 PRIVATE keyvi) # This is passing in the version as a define just as an example -target_compile_definitions(keyvi_scikit_core PRIVATE VERSION_INFO=${PROJECT_VERSION}) +target_compile_definitions(keyvi2 PRIVATE VERSION_INFO=${PROJECT_VERSION}) # The install directory is the output (wheel) directory -install(TARGETS keyvi_scikit_core DESTINATION .) +install(TARGETS keyvi2 DESTINATION .) diff --git a/python-pybind/src/py_keyvi.cpp b/python-pybind/src/py_keyvi.cpp index d10138cbb..27f1ff1fb 100644 --- a/python-pybind/src/py_keyvi.cpp +++ b/python-pybind/src/py_keyvi.cpp @@ -29,7 +29,7 @@ void init_keyvi_dictionary(const py::module_ &); void init_keyvi_dictionary_compilers(const py::module_ &); void init_keyvi_match(const py::module_ &); -PYBIND11_MODULE(keyvi_scikit_core, m) { +PYBIND11_MODULE(keyvi2, m) { m.doc() = R"pbdoc( keyvi - a key value store. ----------------------- @@ -53,9 +53,9 @@ PYBIND11_MODULE(keyvi_scikit_core, m) { kd::loading_strategy_types::populate_key_part_no_readahead_value_part); init_keyvi_match(m); - py::module keyvi_dictionary = m.def_submodule("dictionary", "keyvi_scikit_core.dictionary"); + py::module keyvi_dictionary = m.def_submodule("dictionary", "keyvi2.dictionary"); init_keyvi_dictionary(keyvi_dictionary); - py::module keyvi_compilers = m.def_submodule("compiler", "keyvi_scikit_core.compiler"); + py::module keyvi_compilers = m.def_submodule("compiler", "keyvi2.compiler"); init_keyvi_dictionary_compilers(keyvi_compilers); #ifdef VERSION_INFO diff --git a/python-pybind/tests/match_object_test.py b/python-pybind/tests/match_object_test.py index b1976a3be..ef33e1833 100644 --- a/python-pybind/tests/match_object_test.py +++ b/python-pybind/tests/match_object_test.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- # Usage: py.test tests -import keyvi_scikit_core as keyvi +import keyvi2 as keyvi from test_tools import tmp_dictionary import warnings -from keyvi_scikit_core.compiler import ( +from keyvi2.compiler import ( JsonDictionaryCompiler, CompletionDictionaryCompiler, KeyOnlyDictionaryCompiler, diff --git a/python-pybind/tests/test_tools.py b/python-pybind/tests/test_tools.py index 98bf058d4..233d86ff1 100644 --- a/python-pybind/tests/test_tools.py +++ b/python-pybind/tests/test_tools.py @@ -5,7 +5,7 @@ import os import tempfile -from keyvi_scikit_core.dictionary import Dictionary +from keyvi2.dictionary import Dictionary @contextlib.contextmanager From e52481b1f5602f7a4d7678f0e6e4d807ff9deb7c Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Wed, 30 Jul 2025 08:45:50 +0200 Subject: [PATCH 27/61] implement dumps and loads --- python-pybind/src/dictionary/py_match.cpp | 95 ++++++++++++++++++++++- python-pybind/tests/match_object_test.py | 42 ++++------ 2 files changed, 107 insertions(+), 30 deletions(-) diff --git a/python-pybind/src/dictionary/py_match.cpp b/python-pybind/src/dictionary/py_match.cpp index f9b610676..5d839c7df 100644 --- a/python-pybind/src/dictionary/py_match.cpp +++ b/python-pybind/src/dictionary/py_match.cpp @@ -19,6 +19,8 @@ #include +#include "msgpack.hpp" + #include "keyvi/dictionary/dictionary.h" #include "keyvi/dictionary/match.h" @@ -53,9 +55,96 @@ void init_keyvi_match(const py::module_ &m) { }) .def("value_as_string", &kd::Match::GetValueAsString) .def("raw_value_as_string", &kd::Match::GetRawValueAsString) - .def("__get_item__", &kd::Match::GetAttributePy) - // __setitem__ - // dumps loads + .def("__getitem__", [](kd::Match &m, const std::string &key) { + return m.GetAttribute(key); + }) + .def("__setitem__", &kd::Match::SetAttribute) + .def("__setitem__", &kd::Match::SetAttribute) + .def("__setitem__", &kd::Match::SetAttribute) + .def("__setitem__", &kd::Match::SetAttribute) + .def("dumps", + [](const kd::Match &m) -> py::bytes { + bool do_pack_rest = false; + msgpack::sbuffer msgpack_buffer; + msgpack::packer packer(&msgpack_buffer); + const double score = m.GetScore(); + const size_t end = m.GetEnd(); + const size_t start = m.GetStart(); + const std::string matched_string = m.GetMatchedString(); + const std::string raw_value = m.GetRawValueAsString(); + + const size_t array_size = score > 0 ? 5 + : end > 0 ? 4 + : start > 0 ? 3 + : matched_string.size() > 0 ? 2 + : raw_value.size() > 0 ? 1 + : 0; + packer.pack_array(array_size); + + if (array_size > 0) { + packer.pack(raw_value); + } + if (array_size > 1) { + packer.pack(matched_string); + } + if (array_size > 2) { + packer.pack(start); + } + if (array_size > 3) { + packer.pack(end); + } + if (array_size > 4) { + packer.pack(score); + } + + return py::bytes(msgpack_buffer.data(), msgpack_buffer.size()); + }) + .def_static("loads", + [](const std::string_view &serialized_match) -> kd::Match { + kd::Match match; + msgpack::object_handle handle = msgpack::unpack(serialized_match.data(), serialized_match.size()); + msgpack::object obj = handle.get(); + + // Ensure it's an array + if (obj.type != msgpack::type::ARRAY) { + throw std::invalid_argument("not a serialized match"); + } + + // Get the array elements + const msgpack::object *array = obj.via.array.ptr; + uint32_t size = obj.via.array.size; + + if (size > 5) { + throw std::invalid_argument("not a serialized match, unexpected number of elements"); + } + + std::string matched_string, value; + double score; + size_t start, end; + + try { + switch (size) { + case 5: + array[4].convert(score); + match.SetScore(score); + case 4: + array[3].convert(end); + match.SetEnd(end); + case 3: + array[2].convert(start); + match.SetStart(start); + case 2: + array[1].convert(matched_string); + match.SetMatchedString(matched_string); + case 1: + array[0].convert(value); + match.SetRawValue(value); + } + } catch (const msgpack::type_error &e) { + throw std::invalid_argument("not a serialized match, unexpected element types"); + } + return match; + }) .def_property_readonly("weight", &kd::Match::GetWeight) .def("__bool__", [](const kd::Match &m) -> bool { return !m.IsEmpty(); }); } diff --git a/python-pybind/tests/match_object_test.py b/python-pybind/tests/match_object_test.py index ef33e1833..1997abd26 100644 --- a/python-pybind/tests/match_object_test.py +++ b/python-pybind/tests/match_object_test.py @@ -14,7 +14,7 @@ ) -""" def test_serialization(): +def test_serialization(): m = keyvi.Match() m.start = 22 m.end = 30 @@ -23,27 +23,22 @@ m2 = keyvi.Match.loads(d) assert m2.start == 22 assert m2.end == 30 - assert m2.score == 42 """ + assert m2.score == 42 -""" def test_raw_serialization(): +def test_raw_serialization(): c = JsonDictionaryCompiler({"memory_limit_mb": "10"}) c.Add("abc", '{"a" : 2}') c.Add("abd", '{"a" : 3}') - with tmp_dictionary(c, 'match_object_json.kv') as d: + with tmp_dictionary(c, "match_object_json.kv") as d: m = d["abc"] assert m.value_as_string() == '{"a":2}' d = m.dumps() m2 = keyvi.Match.loads(d) assert m2.value_as_string() == '{"a":2}' - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - assert m.GetValueAsString() == '{"a":2}' - assert len(w) == 1 - assert issubclass(w[-1].category, DeprecationWarning) - """ -""" def test_unicode_attributes(): + +def test_unicode_attributes(): m = keyvi.Match() m["küy"] = 22 assert m["küy"] == 22 @@ -51,20 +46,12 @@ m.score = 99 assert m["k2"] == " 吃饭了吗" assert m.score == 99.0 - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - m.SetAttribute("k2", "öäü") - assert m["k2"] == "öäü" - assert m.GetAttribute("k2") == "öäü" - assert len(w) == 2 - assert issubclass(w[0].category, DeprecationWarning) - assert issubclass(w[1].category, DeprecationWarning) """ -""" def test_bytes_attributes(): +def test_bytes_attributes(): m = keyvi.Match() - bytes_key = bytes(u"äöü".encode('utf-8')) - bytes_value = bytes(u"äöüöäü".encode('utf-8')) + bytes_key = bytes("äöü".encode("utf-8")) + bytes_value = bytes("äöüöäü".encode("utf-8")) m[bytes_key] = 22 assert m[bytes_key] == 22 m["k2"] = bytes_value @@ -73,16 +60,16 @@ def test_double_attributes(): m = keyvi.Match() - bytes_key = bytes("abc".encode('utf-8')) + bytes_key = bytes("abc".encode("utf-8")) m[bytes_key] = 42.0 assert m[bytes_key] == 42.0 def test_boolean_attributes(): m = keyvi.Match() - bytes_key = bytes("def".encode('utf-8')) + bytes_key = bytes("def".encode("utf-8")) m[bytes_key] = True - assert m[bytes_key] == True """ + assert m[bytes_key] == True def test_start(): @@ -118,7 +105,7 @@ def test_get_value_int(): c = CompletionDictionaryCompiler({"memory_limit_mb": "10"}) c.Add("abc", 42) c.Add("abd", 21) - with tmp_dictionary(c, 'match_object_int.kv') as d: + with tmp_dictionary(c, "match_object_int.kv") as d: m = d["abc"] assert m.value == 42 m = d["abd"] @@ -136,11 +123,12 @@ def test_get_value_int(): assert m.value == '' """ + def test_get_value_string(): c = StringDictionaryCompiler({"memory_limit_mb": "10"}) c.Add("abc", "aaaaa") c.Add("abd", "bbbbb") - with tmp_dictionary(c, 'match_object_string.kv') as d: + with tmp_dictionary(c, "match_object_string.kv") as d: m = d["abc"] assert m.value == "aaaaa" m = d["abd"] From afce66ace7fcf43444148c2af5d3c7e50cf8b537 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Wed, 30 Jul 2025 17:42:41 +0200 Subject: [PATCH 28/61] switch to std::variant --- keyvi/bin/keyviinspector/keyviinspector.cpp | 21 ----------- .../completion/forward_backward_completion.h | 17 ++++----- .../dictionary/fsa/internal/ivalue_store.h | 4 +- keyvi/include/keyvi/dictionary/match.h | 37 ++++++++++--------- .../keyvi/index/internal/index_settings.h | 18 ++++----- .../keyvi/dictionary/dictionary_test.cpp | 18 +++++---- 6 files changed, 50 insertions(+), 65 deletions(-) diff --git a/keyvi/bin/keyviinspector/keyviinspector.cpp b/keyvi/bin/keyviinspector/keyviinspector.cpp index 485a1ff19..8bd2595f0 100644 --- a/keyvi/bin/keyviinspector/keyviinspector.cpp +++ b/keyvi/bin/keyviinspector/keyviinspector.cpp @@ -26,7 +26,6 @@ #include #include -#include #include #include #include @@ -59,25 +58,6 @@ void dump(const std::string& input, const std::string& output, bool keys_only = out_stream.close(); } -void dump_with_attributes(const std::string& input, const std::string& output) { - keyvi::dictionary::fsa::automata_t const automata(new keyvi::dictionary::fsa::Automata(input)); - keyvi::dictionary::fsa::EntryIterator it(automata); - keyvi::dictionary::fsa::EntryIterator const end_it = keyvi::dictionary::fsa::EntryIterator(); - - std::ofstream out_stream(output); - - while (it != end_it) { - it.WriteKey(out_stream); - - out_stream << "\t"; - - out_stream << it.GetValueAsAttributeVector()->at("value"); - out_stream << "\n"; - ++it; - } - out_stream.close(); -} - void print_statistics(const std::string& input) { keyvi::dictionary::fsa::automata_t const automata(new keyvi::dictionary::fsa::Automata(input)); std::cout << automata->GetStatistics() << '\n'; @@ -121,7 +101,6 @@ int main(int argc, char** argv) { output_file = vm["output-file"].as(); dump(input_file, output_file, key_only); - // dump_with_attributes (input_file, output_file); return 0; } diff --git a/keyvi/include/keyvi/dictionary/completion/forward_backward_completion.h b/keyvi/include/keyvi/dictionary/completion/forward_backward_completion.h index 9b023a707..4346eaa7e 100644 --- a/keyvi/include/keyvi/dictionary/completion/forward_backward_completion.h +++ b/keyvi/include/keyvi/dictionary/completion/forward_backward_completion.h @@ -33,7 +33,6 @@ #include #include -#include #include "keyvi/dictionary/completion/prefix_completion.h" #include "keyvi/dictionary/util/bounded_priority_queue.h" @@ -72,8 +71,8 @@ class ForwardBackwardCompletion final { util::BoundedPriorityQueue best_scores(2 * number_of_results); std::vector results; - for (auto match : forward_completions_.GetCompletions(query, number_of_results)) { - uint32_t weight = boost::lexical_cast(match->GetAttribute("weight")); + for (auto& match : forward_completions_.GetCompletions(query, number_of_results)) { + uint32_t weight = match->GetWeight(); // put the weight into the priority queue best_scores.Put(weight); @@ -109,8 +108,8 @@ class ForwardBackwardCompletion final { best_scores.Back()); uint32_t last_weight = 0; - for (auto match : backward_completions_.GetCompletions(phrase.c_str(), number_of_results)) { - uint32_t weight = boost::lexical_cast(match->GetAttribute("weight")); + for (auto& match : backward_completions_.GetCompletions(phrase.c_str(), number_of_results)) { + uint32_t weight = match->GetWeight(); if (weight < best_scores.Back()) { TRACE("Skip Backward, score to low %d", weight); @@ -160,7 +159,7 @@ class ForwardBackwardCompletion final { // reuse results vector results.clear(); - for (auto match : backward_completions_.GetCompletions(phrase.c_str(), number_of_results)) { + for (auto& match : backward_completions_.GetCompletions(phrase.c_str(), number_of_results)) { std::string matched_string = match->GetMatchedString(); std::reverse(matched_string.begin(), matched_string.end()); // if the original query had a space at the end, this result should as well @@ -168,7 +167,7 @@ class ForwardBackwardCompletion final { matched_string.append(" "); } - uint32_t weight = boost::lexical_cast(match->GetAttribute("weight")); + uint32_t weight = match->GetWeight(); match->SetScore(weight); match->SetMatchedString(matched_string); @@ -195,9 +194,9 @@ class ForwardBackwardCompletion final { } // match forward with this - for (auto match_forward : + for (auto& match_forward : forward_completions_.GetCompletions(m->GetMatchedString().c_str(), number_of_results)) { - uint32_t weight = boost::lexical_cast(match_forward->GetAttribute("weight")); + uint32_t weight = match_forward->GetWeight(); if (weight < best_scores.Back()) { TRACE("Skip Backward forward, score to low %d", weight); diff --git a/keyvi/include/keyvi/dictionary/fsa/internal/ivalue_store.h b/keyvi/include/keyvi/dictionary/fsa/internal/ivalue_store.h index 909cf91c5..5b3a3be92 100644 --- a/keyvi/include/keyvi/dictionary/fsa/internal/ivalue_store.h +++ b/keyvi/include/keyvi/dictionary/fsa/internal/ivalue_store.h @@ -27,11 +27,11 @@ #include #include +#include #include #include #include -#include #include "keyvi/compression/compression_selector.h" #include "keyvi/dictionary/dictionary_merger_fwd.h" @@ -85,7 +85,7 @@ struct ValueStoreComponents {}; */ class IValueStoreReader { public: - typedef boost::container::flat_map> attributes_raw_t; + typedef boost::container::flat_map> attributes_raw_t; typedef std::shared_ptr attributes_t; /** diff --git a/keyvi/include/keyvi/dictionary/match.h b/keyvi/include/keyvi/dictionary/match.h index 0fe922107..e53767b0e 100644 --- a/keyvi/include/keyvi/dictionary/match.h +++ b/keyvi/include/keyvi/dictionary/match.h @@ -28,9 +28,9 @@ #include #include #include +#include #include -#include #include "keyvi/compression/compression_strategy.h" #include "keyvi/dictionary/fsa/automata.h" @@ -56,21 +56,8 @@ keyvi::dictionary::match_t FirstFilteredMatch(const MatcherT&, const DeletedT&); } // namespace index namespace dictionary { -#ifdef Py_PYTHON_H -class attributes_visitor : public boost::static_visitor { - public: - PyObject* operator()(int i) const { return PyLong_FromLong(i); } - - PyObject* operator()(double i) const { return PyFloat_FromDouble(i); } - - PyObject* operator()(bool i) const { return i ? Py_True : Py_False; } - - PyObject* operator()(const std::string& str) const { return PyUnicode_FromString(str.c_str()); } -}; -#endif - struct Match { - typedef std::shared_ptr>> + typedef std::shared_ptr>> attributes_t; Match(size_t a, size_t b, const std::string& matched_item, uint32_t score = 0, uint32_t weight = 0) @@ -139,11 +126,27 @@ struct Match { #ifdef Py_PYTHON_H PyObject* GetAttributePy(const std::string& key) { auto result = GetAttribute(key); - return boost::apply_visitor(attributes_visitor(), result); + + return std::visit( + [](auto&& arg) { + using T = std::decay_t; + if constexpr (std::is_same_v) { + return arg ? Py_True : Py_False; + } else if constexpr (std::is_same_v) { + return PyLong_FromLong(arg); + } else if constexpr (std::is_same_v) { + return PyFloat_FromDouble(arg); + } else if constexpr (std::is_same_v) { + return PyUnicode_FromString(arg.c_str()); + } + // not reachable + throw std::runtime_error("unexpected attribute type"); + }, + result); } #endif - const boost::variant& GetAttribute(const std::string& key) { + const std::variant& GetAttribute(const std::string& key) { // lazy creation if (!attributes_) { if (fsa_) { diff --git a/keyvi/include/keyvi/index/internal/index_settings.h b/keyvi/include/keyvi/index/internal/index_settings.h index 6617cda4d..0ea52a13e 100644 --- a/keyvi/include/keyvi/index/internal/index_settings.h +++ b/keyvi/include/keyvi/index/internal/index_settings.h @@ -25,9 +25,9 @@ #define KEYVI_INDEX_INTERNAL_INDEX_SETTINGS_H_ #include +#include #include - -#include +#include #include "keyvi/index/constants.h" #include "keyvi/index/internal/index_auto_config.h" @@ -77,24 +77,24 @@ class IndexSettings final { } } - const std::string& GetKeyviMergerBin() const { return boost::get(settings_.at(KEYVIMERGER_BIN)); } + const std::string& GetKeyviMergerBin() const { return std::get(settings_.at(KEYVIMERGER_BIN)); } - const size_t GetMaxSegments() const { return boost::get(settings_.at(INDEX_MAX_SEGMENTS)); } + const size_t GetMaxSegments() const { return std::get(settings_.at(INDEX_MAX_SEGMENTS)); } const size_t GetSegmentCompileKeyThreshold() const { - return boost::get(settings_.at(SEGMENT_COMPILE_KEY_THRESHOLD)); + return std::get(settings_.at(SEGMENT_COMPILE_KEY_THRESHOLD)); } - const size_t GetMaxConcurrentMerges() const { return boost::get(settings_.at(MAX_CONCURRENT_MERGES)); } + const size_t GetMaxConcurrentMerges() const { return std::get(settings_.at(MAX_CONCURRENT_MERGES)); } - const size_t GetRefreshInterval() const { return boost::get(settings_.at(INDEX_REFRESH_INTERVAL)); } + const size_t GetRefreshInterval() const { return std::get(settings_.at(INDEX_REFRESH_INTERVAL)); } const size_t GetSegmentExternalMergeKeyThreshold() const { - return boost::get(settings_.at(SEGMENT_EXTERNAL_MERGE_KEY_THRESHOLD)); + return std::get(settings_.at(SEGMENT_EXTERNAL_MERGE_KEY_THRESHOLD)); } private: - std::unordered_map> settings_; + std::unordered_map> settings_; }; } /* namespace internal */ diff --git a/keyvi/tests/keyvi/dictionary/dictionary_test.cpp b/keyvi/tests/keyvi/dictionary/dictionary_test.cpp index 207981e8c..8354b260a 100644 --- a/keyvi/tests/keyvi/dictionary/dictionary_test.cpp +++ b/keyvi/tests/keyvi/dictionary/dictionary_test.cpp @@ -23,6 +23,10 @@ * Author: hendrik */ +#include +#include +#include + #include #include @@ -71,7 +75,7 @@ BOOST_AUTO_TEST_CASE(DictGet) { bool matched = false; for (auto m : d->Get("test")) { BOOST_CHECK_EQUAL("test", m->GetMatchedString()); - BOOST_CHECK_EQUAL(std::string("22"), boost::get(m->GetAttribute("weight"))); + BOOST_CHECK_EQUAL(std::string("22"), std::get(m->GetAttribute("weight"))); matched = true; } BOOST_CHECK(matched); @@ -84,7 +88,7 @@ BOOST_AUTO_TEST_CASE(DictGet) { BOOST_CHECK(!matched); auto m = (*d)["test"]; - BOOST_CHECK_EQUAL("22", boost::get(m->GetAttribute("weight"))); + BOOST_CHECK_EQUAL("22", std::get(m->GetAttribute("weight"))); } BOOST_AUTO_TEST_CASE(DictLookup) { @@ -99,7 +103,7 @@ BOOST_AUTO_TEST_CASE(DictLookup) { bool matched = false; for (auto m : d->Lookup("nude")) { BOOST_CHECK_EQUAL("nude", m->GetMatchedString()); - BOOST_CHECK_EQUAL("22", boost::get(m->GetAttribute("weight"))); + BOOST_CHECK_EQUAL("22", std::get(m->GetAttribute("weight"))); matched = true; } BOOST_CHECK(matched); @@ -107,7 +111,7 @@ BOOST_AUTO_TEST_CASE(DictLookup) { matched = false; for (auto m : d->Lookup("nude ")) { BOOST_CHECK_EQUAL("nude", m->GetMatchedString()); - BOOST_CHECK_EQUAL("22", boost::get(m->GetAttribute("weight"))); + BOOST_CHECK_EQUAL("22", std::get(m->GetAttribute("weight"))); matched = true; } BOOST_CHECK(matched); @@ -115,7 +119,7 @@ BOOST_AUTO_TEST_CASE(DictLookup) { matched = false; for (auto m : d->Lookup("nude at work")) { BOOST_CHECK_EQUAL("nude", m->GetMatchedString()); - BOOST_CHECK_EQUAL("22", boost::get(m->GetAttribute("weight"))); + BOOST_CHECK_EQUAL("22", std::get(m->GetAttribute("weight"))); matched = true; } BOOST_CHECK(matched); @@ -207,7 +211,7 @@ BOOST_AUTO_TEST_CASE(DictGetZerobyte) { bool matched = false; for (auto m : d->Get(std::string("\0test", 5))) { BOOST_CHECK_EQUAL(std::string("\0test", 5), m->GetMatchedString()); - BOOST_CHECK_EQUAL(std::string("22"), boost::get(m->GetAttribute("weight"))); + BOOST_CHECK_EQUAL(std::string("22"), std::get(m->GetAttribute("weight"))); matched = true; } BOOST_CHECK(matched); @@ -220,7 +224,7 @@ BOOST_AUTO_TEST_CASE(DictGetZerobyte) { BOOST_CHECK(!matched); auto m = (*d)[std::string("\0test", 5)]; - BOOST_CHECK_EQUAL("22", boost::get(m->GetAttribute("weight"))); + BOOST_CHECK_EQUAL("22", std::get(m->GetAttribute("weight"))); } BOOST_AUTO_TEST_CASE(DictGetPrefixCompletion) { From c8b6ccdbf6ac22fbdef8179e9c97b48a99a44c29 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Wed, 30 Jul 2025 23:18:09 +0200 Subject: [PATCH 29/61] fix attribute access --- .../src/compiler/py_dictionary_compilers.cpp | 53 +++++++++---------- python-pybind/src/dictionary/py_match.cpp | 5 +- python-pybind/tests/match_object_test.py | 11 ++-- 3 files changed, 33 insertions(+), 36 deletions(-) diff --git a/python-pybind/src/compiler/py_dictionary_compilers.cpp b/python-pybind/src/compiler/py_dictionary_compilers.cpp index 0f0774089..254908f75 100644 --- a/python-pybind/src/compiler/py_dictionary_compilers.cpp +++ b/python-pybind/src/compiler/py_dictionary_compilers.cpp @@ -42,47 +42,44 @@ inline void py_compile(Compiler *c, std::function(module, name) \ - .def(py::init<>()) \ - .def(py::init()) \ - .def("__enter__", [](compiler &c) { return &c; }) \ +#define CREATE_COMPILER_COMMON(compiler) \ + .def("__enter__", [](compiler &c) { return &c; }) \ .def("__exit__", [](compiler &c, void *exc_type, void *exc_value, void *traceback) { c.Compile(); }) \ .def("__setitem__", &compiler::Add) \ - .def("add", &compiler::Add) /* DEPRECATED */ \ - .def("Add", &compiler::Add) \ .def( \ "compile", \ [](compiler &c, std::function progress_callback) { \ py_compile(&c, progress_callback); \ }, \ - py::arg("progress_callback") = \ - static_cast *>(nullptr)) /* DEPRECATED */ \ - .def( \ - "Compile", \ - [](compiler &c, std::function progress_callback) { \ - py_compile(&c, progress_callback); \ - }, \ py::arg("progress_callback") = static_cast *>(nullptr)) \ - .def("set_manifest", &compiler::SetManifest) \ - .def("write_to_file", &compiler::WriteToFile, py::call_guard()) /* DEPRECATED */ \ - .def("WriteToFile", &compiler::WriteToFile, py::call_guard()); -#define CREATE_SK_COMPILER(compiler, name) \ - py::class_(module, name) \ - .def(py::init &>()) \ - .def(py::init &, const keyvi::util::parameters_t &>()) \ - .def("__enter__", [](compiler &c) { return &c; }) \ - .def("__exit__", [](compiler &c, void *exc_type, void *exc_value, void *traceback) { c.Compile(); }) \ - .def("__setitem__", &compiler::Add) \ - .def("add", &compiler::Add) \ .def( \ - "compile", \ + "Compile", /* DEPRECATED */ \ [](compiler &c, std::function progress_callback) { \ py_compile(&c, progress_callback); \ }, \ py::arg("progress_callback") = static_cast *>(nullptr)) \ .def("set_manifest", &compiler::SetManifest) \ - .def("write_to_file", &compiler::WriteToFile, py::call_guard()); + .def("write_to_file", &compiler::WriteToFile, py::call_guard()) \ + .def("WriteToFile", &compiler::WriteToFile, py::call_guard()) /* DEPRECATED */ +#define CREATE_COMPILER(compiler, name) \ + py::class_(module, name) \ + .def(py::init<>()) \ + .def(py::init()) \ + CREATE_COMPILER_COMMON(compiler) \ + .def("add", &compiler::Add) /* DEPRECATED */ \ + .def("Add", &compiler::Add); +#define CREATE_KEY_ONLY_COMPILER(compiler, name) \ + py::class_(module, name) \ + .def(py::init<>()) \ + .def(py::init()) CREATE_COMPILER_COMMON(compiler) \ + .def("add", &compiler::Add) /* DEPRECATED */ \ + .def("Add", [](compiler &c, const std::string &key) { c.Add(key); }); +#define CREATE_SK_COMPILER(compiler, name) \ + py::class_(module, name) \ + .def(py::init &>()) \ + .def(py::init &, const keyvi::util::parameters_t &>()) \ + CREATE_COMPILER_COMMON(compiler) \ + .def("add", &compiler::Add); #define CREATE_MERGER(merger, name) \ py::class_(module, name) \ .def(py::init<>()) \ @@ -106,7 +103,7 @@ void init_keyvi_dictionary_compilers(const py::module_ &module) { CREATE_COMPILER(kd::FloatVectorDictionaryCompiler, "FloatVectorDictionaryCompiler"); CREATE_COMPILER(kd::IntDictionaryCompiler, "IntDictionaryCompiler"); CREATE_COMPILER(kd::JsonDictionaryCompiler, "JsonDictionaryCompiler"); - CREATE_COMPILER(kd::KeyOnlyDictionaryCompiler, "KeyOnlyDictionaryCompiler"); + CREATE_KEY_ONLY_COMPILER(kd::KeyOnlyDictionaryCompiler, "KeyOnlyDictionaryCompiler"); CREATE_COMPILER(kd::StringDictionaryCompiler, "StringDictionaryCompiler"); CREATE_SK_COMPILER(kd::SecondaryKeyCompletionDictionaryCompiler, "SecondaryKeyCompletionDictionaryCompiler"); CREATE_SK_COMPILER(kd::SecondaryKeyFloatVectorDictionaryCompiler, "SecondaryKeyFloatVectorDictionaryCompiler"); diff --git a/python-pybind/src/dictionary/py_match.cpp b/python-pybind/src/dictionary/py_match.cpp index 5d839c7df..1d8f18f0f 100644 --- a/python-pybind/src/dictionary/py_match.cpp +++ b/python-pybind/src/dictionary/py_match.cpp @@ -15,10 +15,11 @@ * limitations under the License. */ -#include - #include +#include +#include + #include "msgpack.hpp" #include "keyvi/dictionary/dictionary.h" diff --git a/python-pybind/tests/match_object_test.py b/python-pybind/tests/match_object_test.py index 1997abd26..a7eceff93 100644 --- a/python-pybind/tests/match_object_test.py +++ b/python-pybind/tests/match_object_test.py @@ -90,7 +90,7 @@ def test_score(): assert m.score == 149 -""" def test_get_value(): +def test_get_value(): c = JsonDictionaryCompiler({"memory_limit_mb": "10"}) c.Add("abc", '{"a" : 2}') c.Add("abd", '{"a" : 3}') @@ -98,7 +98,7 @@ def test_score(): m = d["abc"] assert m.value == {"a": 2} m = d["abd"] - assert m.value == {"a": 3} """ + assert m.value == {"a": 3} def test_get_value_int(): @@ -112,16 +112,15 @@ def test_get_value_int(): assert m.value == 21 -""" def test_get_value_key_only(): +def test_get_value_key_only(): c = KeyOnlyDictionaryCompiler({"memory_limit_mb": "10"}) c.Add("abc") c.Add("abd") with tmp_dictionary(c, 'match_object_key_only.kv') as d: m = d["abc"] - assert m.value == '' + assert m.value is None m = d["abd"] - assert m.value == '' - """ + assert m.value is None def test_get_value_string(): From 3956b868873b7f39dbe2d3572c966c3f2ca346d2 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Thu, 31 Jul 2025 12:00:01 +0200 Subject: [PATCH 30/61] WIP --- .../src/compiler/py_dictionary_compilers.cpp | 4 + python-pybind/tests/match_object_test.py | 130 ++++++++++++++++-- python-pybind/tests/test_tools.py | 4 +- 3 files changed, 123 insertions(+), 15 deletions(-) diff --git a/python-pybind/src/compiler/py_dictionary_compilers.cpp b/python-pybind/src/compiler/py_dictionary_compilers.cpp index 254908f75..88f9f5c98 100644 --- a/python-pybind/src/compiler/py_dictionary_compilers.cpp +++ b/python-pybind/src/compiler/py_dictionary_compilers.cpp @@ -55,6 +55,10 @@ void init_keyvi_dictionary_compilers(const py::module_ &module) { .def( \ "Compile", /* DEPRECATED */ \ [](compiler &c, std::function progress_callback) { \ + py::module_ warnings = py::module_::import("warnings"); \ + warnings.attr("warn")( \ + "Compile is deprecated and will be removed in a future version. Use compile instead.", \ + py::module_::import("builtins").attr("DeprecationWarning"), 2); \ py_compile(&c, progress_callback); \ }, \ py::arg("progress_callback") = static_cast *>(nullptr)) \ diff --git a/python-pybind/tests/match_object_test.py b/python-pybind/tests/match_object_test.py index a7eceff93..aa65a90cc 100644 --- a/python-pybind/tests/match_object_test.py +++ b/python-pybind/tests/match_object_test.py @@ -2,9 +2,12 @@ # Usage: py.test tests import keyvi2 as keyvi +import msgpack from test_tools import tmp_dictionary import warnings - +import zlib +import snappy +import zstd from keyvi2.compiler import ( JsonDictionaryCompiler, @@ -28,14 +31,20 @@ def test_serialization(): def test_raw_serialization(): c = JsonDictionaryCompiler({"memory_limit_mb": "10"}) - c.Add("abc", '{"a" : 2}') - c.Add("abd", '{"a" : 3}') + c.add("abc", '{"a" : 2}') + c.add("abd", '{"a" : 3}') with tmp_dictionary(c, "match_object_json.kv") as d: m = d["abc"] assert m.value_as_string() == '{"a":2}' d = m.dumps() m2 = keyvi.Match.loads(d) assert m2.value_as_string() == '{"a":2}' + assert msgpack.loads(m.msgpacked_value_as_string()) == {"a": 2} + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + assert m.GetValueAsString() == '{"a":2}' + assert len(w) == 1 + assert issubclass(w[-1].category, DeprecationWarning) def test_unicode_attributes(): @@ -46,6 +55,14 @@ def test_unicode_attributes(): m.score = 99 assert m["k2"] == " 吃饭了吗" assert m.score == 99.0 + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + m.SetAttribute("k2", "öäü") + assert m["k2"] == "öäü" + assert m.GetAttribute("k2") == "öäü" + assert len(w) == 2 + assert issubclass(w[0].category, DeprecationWarning) + assert issubclass(w[1].category, DeprecationWarning) def test_bytes_attributes(): @@ -76,72 +93,159 @@ def test_start(): m = keyvi.Match() m.start = 42 assert m.start == 42 + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + m.SetStart(44) + assert m.start == 44 + assert len(w) == 1 + assert issubclass(w[-1].category, DeprecationWarning) def test_end(): m = keyvi.Match() m.end = 49 assert m.end == 49 + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + m.SetEnd(55) + assert m.end == 55 + assert len(w) == 1 + assert issubclass(w[-1].category, DeprecationWarning) def test_score(): m = keyvi.Match() m.score = 149 assert m.score == 149 + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + m.SetScore(155) + assert m.score == 155 + assert len(w) == 1 + assert issubclass(w[-1].category, DeprecationWarning) def test_get_value(): c = JsonDictionaryCompiler({"memory_limit_mb": "10"}) - c.Add("abc", '{"a" : 2}') - c.Add("abd", '{"a" : 3}') - with tmp_dictionary(c, 'match_object_json.kv') as d: + c.add("abc", '{"a" : 2}') + c.add("abd", '{"a" : 3}') + with tmp_dictionary(c, "match_object_json.kv") as d: m = d["abc"] assert m.value == {"a": 2} m = d["abd"] assert m.value == {"a": 3} + assert msgpack.loads(m.msgpacked_value_as_string()) == {"a": 3} + assert msgpack.loads( + zlib.decompress( + m.msgpacked_value_as_string(keyvi.CompressionAlgorithm.ZLIB_COMPRESSION) + ) + ) == {"a": 3} + assert msgpack.loads( + snappy.decompress( + m.msgpacked_value_as_string( + keyvi.CompressionAlgorithm.SNAPPY_COMPRESSION + ) + ) + ) == {"a": 3} + assert msgpack.loads( + zstd.decompress( + m.msgpacked_value_as_string(keyvi.CompressionAlgorithm.ZSTD_COMPRESSION) + ) + ) == {"a": 3} + assert msgpack.loads( + m.msgpacked_value_as_string(keyvi.CompressionAlgorithm.NO_COMPRESSION) + ) == {"a": 3} def test_get_value_int(): c = CompletionDictionaryCompiler({"memory_limit_mb": "10"}) - c.Add("abc", 42) - c.Add("abd", 21) + c.add("abc", 42) + c.add("abd", 21) with tmp_dictionary(c, "match_object_int.kv") as d: m = d["abc"] assert m.value == 42 m = d["abd"] assert m.value == 21 + assert msgpack.loads(m.msgpacked_value_as_string()) == 21 + assert ( + msgpack.loads( + zlib.decompress( + m.msgpacked_value_as_string( + keyvi.CompressionAlgorithm.ZLIB_COMPRESSION + ) + ) + ) + == 21 + ) def test_get_value_key_only(): c = KeyOnlyDictionaryCompiler({"memory_limit_mb": "10"}) - c.Add("abc") - c.Add("abd") - with tmp_dictionary(c, 'match_object_key_only.kv') as d: + c.add("abc") + c.add("abd") + with tmp_dictionary(c, "match_object_key_only.kv") as d: m = d["abc"] assert m.value is None m = d["abd"] assert m.value is None + assert msgpack.loads(m.msgpacked_value_as_string()) is None + assert ( + msgpack.loads( + zlib.decompress( + m.msgpacked_value_as_string( + keyvi.CompressionAlgorithm.ZLIB_COMPRESSION + ) + ) + ) + is None + ) def test_get_value_string(): c = StringDictionaryCompiler({"memory_limit_mb": "10"}) - c.Add("abc", "aaaaa") - c.Add("abd", "bbbbb") + c.add("abc", "aaaaa") + c.add("abd", "bbbbb") + c.add("abe", "{}") with tmp_dictionary(c, "match_object_string.kv") as d: m = d["abc"] assert m.value == "aaaaa" m = d["abd"] assert m.value == "bbbbb" + assert msgpack.loads(m.msgpacked_value_as_string()) == "bbbbb" + assert ( + msgpack.loads( + zlib.decompress( + m.msgpacked_value_as_string( + keyvi.CompressionAlgorithm.ZLIB_COMPRESSION + ) + ) + ) + == "bbbbb" + ) + m = d["abe"] + # gh#333: keyvi < 0.6.4 returned a dictionary instead of a string + assert m.value == "{}" + assert isinstance(m.value, str) def test_matched_string(): m = keyvi.Match() m.matched_string = "match" assert m.matched_string == "match" + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + m.SetMatchedString("other_match") + assert m.matched_string == "other_match" + assert len(w) == 1 + assert issubclass(w[-1].category, DeprecationWarning) def test_bool_operator(): m = keyvi.Match() + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + assert m.IsEmpty() + assert issubclass(w[-1].category, DeprecationWarning) assert not m m.end = 42 assert not m is False diff --git a/python-pybind/tests/test_tools.py b/python-pybind/tests/test_tools.py index 233d86ff1..9f80f3c30 100644 --- a/python-pybind/tests/test_tools.py +++ b/python-pybind/tests/test_tools.py @@ -12,8 +12,8 @@ def tmp_dictionary(compiler, file_name): tmp_dir = tempfile.gettempdir() fq_file_name = os.path.join(tmp_dir, file_name) - compiler.Compile() - compiler.WriteToFile(fq_file_name) + compiler.compile() + compiler.write_to_file(fq_file_name) del compiler d = Dictionary(fq_file_name) yield d From 562cd781b3426d50fb0dd4984fc0f0856f71ed47 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Wed, 10 Sep 2025 17:32:45 +0200 Subject: [PATCH 31/61] update workflow --- .../workflows/python-cibuildwheel-pybind.yml | 42 ++++--------------- 1 file changed, 9 insertions(+), 33 deletions(-) diff --git a/.github/workflows/python-cibuildwheel-pybind.yml b/.github/workflows/python-cibuildwheel-pybind.yml index 81d81fc1e..eb91e90b7 100644 --- a/.github/workflows/python-cibuildwheel-pybind.yml +++ b/.github/workflows/python-cibuildwheel-pybind.yml @@ -25,16 +25,12 @@ jobs: exclude: - os: macos-13 target: musl - # skip pypy, https://github.com/pypa/distutils/issues/283 - #- os: ubuntu-22.04 - # target: musl - # flavor: pypy - #- os: macos-13 - # flavor: pypy + - os: macos-14 + target: musl steps: - uses: actions/checkout@v4 - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.11 + uses: hendrikmuhs/ccache-action@v1.2.18 with: key: ${{ matrix.os }}-${{ matrix.target }}-${{ matrix.flavor }}-python-pybind @@ -64,8 +60,7 @@ jobs: run: | brew update && \ brew list -1 | grep python | while read formula; do brew unlink $formula; brew link --overwrite $formula; done && \ - brew install ccache zlib snappy boost@1.85 - brew link boost@1.85 + brew install ccache zlib snappy boost - name: set mac deployment target X64 if: runner.os == 'macOS' && runner.arch == 'X64' @@ -80,8 +75,8 @@ jobs: - name: Build python wheels for ${{ matrix.os }} uses: pypa/cibuildwheel@v3.0.1 env: - # Skip CPython 3.{6, 7, 8} - CIBW_SKIP: ${{ env.CIBW_SKIP }} cp36-* cp37-* cp38-* + # Skip CPython 3.8 + CIBW_SKIP: ${{ env.CIBW_SKIP }} cp38-* # only build native packages CIBW_ARCHS: native @@ -111,15 +106,15 @@ jobs: name: sdist runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: install Linux deps run: | sudo apt-get update && \ sudo apt-get install -y libsnappy-dev libzzip-dev zlib1g-dev libboost-all-dev ccache - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.11 + uses: hendrikmuhs/ccache-action@v1.2.18 with: - key: ubuntu-sdist-python + key: ubuntu-sdist-python-pybind - name: Build SDist run: | @@ -130,22 +125,3 @@ jobs: python -m pytest python-pybind/tests && \ python -m pip uninstall -y keyvi_pybind11 - - uses: actions/upload-artifact@v4 - with: - name: artifact-sdist - path: python/wheelhouse/*.tar.gz - - upload_all: - needs: [build_wheels, build_sdist] - runs-on: ubuntu-latest - if: github.event_name == 'release' - steps: - - uses: actions/download-artifact@v4 - with: - pattern: artifact-* - merge-multiple: true - path: dist - - - uses: pypa/gh-action-pypi-publish@release/v1 - with: - password: ${{ secrets.pypi_password }} From f552c4f71b6b55704dc06148743b3e5795ec5a6d Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Mon, 29 Sep 2025 21:42:19 +0200 Subject: [PATCH 32/61] style fixes --- .../src/compiler/py_dictionary_compilers.cpp | 23 ++++++++++--------- python-pybind/src/dictionary/py_match.cpp | 9 ++++---- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/python-pybind/src/compiler/py_dictionary_compilers.cpp b/python-pybind/src/compiler/py_dictionary_compilers.cpp index 88f9f5c98..5bea802dd 100644 --- a/python-pybind/src/compiler/py_dictionary_compilers.cpp +++ b/python-pybind/src/compiler/py_dictionary_compilers.cpp @@ -65,18 +65,19 @@ void init_keyvi_dictionary_compilers(const py::module_ &module) { .def("set_manifest", &compiler::SetManifest) \ .def("write_to_file", &compiler::WriteToFile, py::call_guard()) \ .def("WriteToFile", &compiler::WriteToFile, py::call_guard()) /* DEPRECATED */ -#define CREATE_COMPILER(compiler, name) \ - py::class_(module, name) \ - .def(py::init<>()) \ - .def(py::init()) \ - CREATE_COMPILER_COMMON(compiler) \ - .def("add", &compiler::Add) /* DEPRECATED */ \ +#define CREATE_COMPILER(compiler, name) \ + py::class_(module, name) \ + .def(py::init<>()) \ + .def(py::init()) /* init with params */ \ + CREATE_COMPILER_COMMON(compiler) \ + .def("add", &compiler::Add) /* DEPRECATED */ \ .def("Add", &compiler::Add); -#define CREATE_KEY_ONLY_COMPILER(compiler, name) \ - py::class_(module, name) \ - .def(py::init<>()) \ - .def(py::init()) CREATE_COMPILER_COMMON(compiler) \ - .def("add", &compiler::Add) /* DEPRECATED */ \ +#define CREATE_KEY_ONLY_COMPILER(compiler, name) \ + py::class_(module, name) \ + .def(py::init<>()) \ + .def(py::init()) /* init with params */ \ + CREATE_COMPILER_COMMON(compiler) \ + .def("add", &compiler::Add) /* DEPRECATED */ \ .def("Add", [](compiler &c, const std::string &key) { c.Add(key); }); #define CREATE_SK_COMPILER(compiler, name) \ py::class_(module, name) \ diff --git a/python-pybind/src/dictionary/py_match.cpp b/python-pybind/src/dictionary/py_match.cpp index 1d8f18f0f..19f13653b 100644 --- a/python-pybind/src/dictionary/py_match.cpp +++ b/python-pybind/src/dictionary/py_match.cpp @@ -15,11 +15,12 @@ * limitations under the License. */ -#include - #include #include +#include +#include + #include "msgpack.hpp" #include "keyvi/dictionary/dictionary.h" @@ -56,9 +57,7 @@ void init_keyvi_match(const py::module_ &m) { }) .def("value_as_string", &kd::Match::GetValueAsString) .def("raw_value_as_string", &kd::Match::GetRawValueAsString) - .def("__getitem__", [](kd::Match &m, const std::string &key) { - return m.GetAttribute(key); - }) + .def("__getitem__", [](kd::Match &m, const std::string &key) { return m.GetAttribute(key); }) .def("__setitem__", &kd::Match::SetAttribute) .def("__setitem__", &kd::Match::SetAttribute) .def("__setitem__", &kd::Match::SetAttribute) From fe64e1c9a698bd7e1dfc4b5cedeeb52dcd0bffe0 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Mon, 29 Sep 2025 21:42:30 +0200 Subject: [PATCH 33/61] free-threading --- python-pybind/src/py_keyvi.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-pybind/src/py_keyvi.cpp b/python-pybind/src/py_keyvi.cpp index 27f1ff1fb..69adea7ce 100644 --- a/python-pybind/src/py_keyvi.cpp +++ b/python-pybind/src/py_keyvi.cpp @@ -29,7 +29,7 @@ void init_keyvi_dictionary(const py::module_ &); void init_keyvi_dictionary_compilers(const py::module_ &); void init_keyvi_match(const py::module_ &); -PYBIND11_MODULE(keyvi2, m) { +PYBIND11_MODULE(keyvi2, m, py::mod_gil_not_used()) { m.doc() = R"pbdoc( keyvi - a key value store. ----------------------- From d30a1fd40309214da04ea94eb4d1c6393c23e54d Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Tue, 30 Sep 2025 20:56:31 +0200 Subject: [PATCH 34/61] update workflow --- .github/workflows/python-cibuildwheel-pybind.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/python-cibuildwheel-pybind.yml b/.github/workflows/python-cibuildwheel-pybind.yml index eb91e90b7..b949a5e1b 100644 --- a/.github/workflows/python-cibuildwheel-pybind.yml +++ b/.github/workflows/python-cibuildwheel-pybind.yml @@ -16,19 +16,19 @@ jobs: strategy: fail-fast: false matrix: - # macos-13: x86, macos-14: Arm64 - os: [ubuntu-22.04, ubuntu-24.04-arm, macos-13, macos-14] + # macos-15-intel: x86, macos-14: Arm64 + os: [ubuntu-22.04, ubuntu-24.04-arm, macos-15-intel, macos-14] # skip pypy, https://github.com/pypa/distutils/issues/283 flavor: ['cpython'] # separate musl and many on linux, for mac we just skip one of those target: [ 'many', 'musl' ] exclude: - - os: macos-13 + - os: macos-15-intel target: musl - os: macos-14 target: musl steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: ccache uses: hendrikmuhs/ccache-action@v1.2.18 with: @@ -65,7 +65,7 @@ jobs: - name: set mac deployment target X64 if: runner.os == 'macOS' && runner.arch == 'X64' run: | - echo "MACOSX_DEPLOYMENT_TARGET=13.0" >> $GITHUB_ENV + echo "MACOSX_DEPLOYMENT_TARGET=15.0" >> $GITHUB_ENV - name: set mac deployment target arm64 if: runner.os == 'macOS' && runner.arch == 'ARM64' @@ -73,7 +73,7 @@ jobs: echo "MACOSX_DEPLOYMENT_TARGET=14.0" >> $GITHUB_ENV - name: Build python wheels for ${{ matrix.os }} - uses: pypa/cibuildwheel@v3.0.1 + uses: pypa/cibuildwheel@v3.2.0 env: # Skip CPython 3.8 CIBW_SKIP: ${{ env.CIBW_SKIP }} cp38-* From dcb80f9b536b940b3f3c9132bf1eb51d617da990 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Tue, 30 Sep 2025 21:08:23 +0200 Subject: [PATCH 35/61] update pyproject.toml --- pyproject.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index da09a3a3c..d82174f52 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "scikit_build_core.build" [project] name = "keyvi2" -version = "0.6.4dev0" +version = "0.7.2dev0" dependencies = [ "msgpack>=1.0.0", ] @@ -14,7 +14,8 @@ lint = [ 'ruff>=0.5.0', ] test = [ - 'pytest>=7.4.2', + 'pytest>=8.4.2', + 'python-snappy>=0.7.3', ] dev = ['keyvi2[lint,test]'] From 311170ddac9559c61244aefb780b4ded6c3c35e7 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Tue, 30 Sep 2025 22:07:32 +0200 Subject: [PATCH 36/61] move logic to pyproject.toml --- .github/workflows/python-cibuildwheel-pybind.yml | 6 ------ pyproject.toml | 15 ++++++++++++--- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/.github/workflows/python-cibuildwheel-pybind.yml b/.github/workflows/python-cibuildwheel-pybind.yml index b949a5e1b..165d43080 100644 --- a/.github/workflows/python-cibuildwheel-pybind.yml +++ b/.github/workflows/python-cibuildwheel-pybind.yml @@ -84,12 +84,6 @@ jobs: # skip tests on pypy, currently fails for indexer tests CIBW_TEST_SKIP: "pp*" - # (many)linux custom docker images - CIBW_MANYLINUX_X86_64_IMAGE: 'keyvidev/manylinux-builder-x86_64' - CIBW_MANYLINUX_AARCH64_IMAGE: 'keyvidev/manylinux-builder-aarch64' - CIBW_MUSLLINUX_X86_64_IMAGE: 'keyvidev/musllinux-builder-x86_64' - CIBW_MUSLLINUX_AARCH64_IMAGE: 'keyvidev/musllinux-builder-aarch64' - # ccache using path CIBW_ENVIRONMENT_MACOS: PATH=/usr/local/opt/ccache/libexec:$PATH CIBW_ENVIRONMENT_LINUX: PATH=/usr/local/bin:/usr/lib/ccache:$PATH CCACHE_DIR=/host${{ github.workspace }}/.ccache CCACHE_CONFIGPATH=/host/home/runner/.config/ccache/ccache.conf diff --git a/pyproject.toml b/pyproject.toml index d82174f52..d49892ddd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ dependencies = [ "msgpack>=1.0.0", ] -[project.optional-dependencies] +[dependency-groups] lint = [ 'ruff>=0.5.0', ] @@ -17,7 +17,12 @@ test = [ 'pytest>=8.4.2', 'python-snappy>=0.7.3', ] -dev = ['keyvi2[lint,test]'] +dev = [ + { include-group = "test" }, + { include-group = "lint" }, + "tox", +] + [tool.scikit-build] wheel.expand-macos-universal-tags = true @@ -44,8 +49,12 @@ testpaths = ["python-pybind/tests"] [tool.cibuildwheel] build-frontend = "default" -test-requires = "pytest" +test-groups = ["test"] test-command = "pytest {project}/python-pybind/tests" +manylinux-x86_64-image = "keyvidev/manylinux-builder-x86_64" +musllinux-x86_64-image = "keyvidev/musllinux-builder-x86_64" +manylinux-aarch64-image = "keyvidev/manylinux-builder-aarch64" +musllinux-aarch64-image = "keyvidev/musllinux-builder-aarch64" [tool.ruff.lint] extend-select = [ From 3995a55832506101a36ff2b4ce75ccad2787a5f2 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Tue, 30 Sep 2025 22:33:25 +0200 Subject: [PATCH 37/61] fix versions --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d49892ddd..abfcadf13 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,8 +14,9 @@ lint = [ 'ruff>=0.5.0', ] test = [ - 'pytest>=8.4.2', + 'pytest>=8.3.5', 'python-snappy>=0.7.3', + 'zstd>=1.5.7.2' ] dev = [ { include-group = "test" }, From c2cea874e96aed3ce85a8d263411c1d2511a6579 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Wed, 1 Oct 2025 11:28:59 +0200 Subject: [PATCH 38/61] add msgpacked_value_as_string: WIP --- python-pybind/src/dictionary/py_match.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python-pybind/src/dictionary/py_match.cpp b/python-pybind/src/dictionary/py_match.cpp index 19f13653b..87dbb40e1 100644 --- a/python-pybind/src/dictionary/py_match.cpp +++ b/python-pybind/src/dictionary/py_match.cpp @@ -57,6 +57,13 @@ void init_keyvi_match(const py::module_ &m) { }) .def("value_as_string", &kd::Match::GetValueAsString) .def("raw_value_as_string", &kd::Match::GetRawValueAsString) + .def( + "msgpacked_value_as_string", + [](const kd::Match &m, const keyvi::compression::CompressionAlgorithm compression_algorithm = + keyvi::compression::CompressionAlgorithm::NO_COMPRESSION) -> py::str { + return m.GetMsgPackedValueAsString(compression_algorithm); + }, + py::arg("compression_algorithm") = 0) .def("__getitem__", [](kd::Match &m, const std::string &key) { return m.GetAttribute(key); }) .def("__setitem__", &kd::Match::SetAttribute) .def("__setitem__", &kd::Match::SetAttribute) From a02899c246dfb7898deaa6cbd02b8a2188f401d3 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Thu, 9 Oct 2025 08:51:07 +0200 Subject: [PATCH 39/61] add msgpacked_value_as_string --- python-pybind/src/dictionary/py_match.cpp | 7 +++---- python-pybind/src/py_keyvi.cpp | 9 +++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/python-pybind/src/dictionary/py_match.cpp b/python-pybind/src/dictionary/py_match.cpp index 87dbb40e1..c91ad1e82 100644 --- a/python-pybind/src/dictionary/py_match.cpp +++ b/python-pybind/src/dictionary/py_match.cpp @@ -59,11 +59,10 @@ void init_keyvi_match(const py::module_ &m) { .def("raw_value_as_string", &kd::Match::GetRawValueAsString) .def( "msgpacked_value_as_string", - [](const kd::Match &m, const keyvi::compression::CompressionAlgorithm compression_algorithm = - keyvi::compression::CompressionAlgorithm::NO_COMPRESSION) -> py::str { - return m.GetMsgPackedValueAsString(compression_algorithm); + [](const kd::Match &m, const keyvi::compression::CompressionAlgorithm compression_algorithm) -> py::bytes { + return py::bytes(m.GetMsgPackedValueAsString(compression_algorithm)); }, - py::arg("compression_algorithm") = 0) + py::arg("compression_algorithm") = keyvi::compression::CompressionAlgorithm::NO_COMPRESSION) .def("__getitem__", [](kd::Match &m, const std::string &key) { return m.GetAttribute(key); }) .def("__setitem__", &kd::Match::SetAttribute) .def("__setitem__", &kd::Match::SetAttribute) diff --git a/python-pybind/src/py_keyvi.cpp b/python-pybind/src/py_keyvi.cpp index 69adea7ce..2b80aed3e 100644 --- a/python-pybind/src/py_keyvi.cpp +++ b/python-pybind/src/py_keyvi.cpp @@ -15,8 +15,10 @@ * limitations under the License. */ +#include #include +#include "keyvi/compression/compression_algorithm.h" #include "keyvi/dictionary/fsa/internal/memory_map_flags.h" #define STRINGIFY(x) #x @@ -40,6 +42,13 @@ PYBIND11_MODULE(keyvi2, m, py::mod_gil_not_used()) { :toctree: _generate )pbdoc"; + py::native_enum(m, "CompressionAlgorithm", "enum.Enum", + "Compression algorithm used for packing values") + .value("NO_COMPRESSION", keyvi::compression::CompressionAlgorithm::NO_COMPRESSION) + .value("ZLIB_COMPRESSION", keyvi::compression::CompressionAlgorithm::ZLIB_COMPRESSION) + .value("SNAPPY_COMPRESSION", keyvi::compression::CompressionAlgorithm::SNAPPY_COMPRESSION) + .value("ZSTD_COMPRESSION", keyvi::compression::CompressionAlgorithm::ZSTD_COMPRESSION) + .finalize(); py::enum_(m, "loading_strategy_types") .value("default_os", kd::loading_strategy_types::default_os) From 675b412fb9dc5fbc9b34c1c641b56d4b17e2c2aa Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Tue, 14 Oct 2025 21:59:07 +0200 Subject: [PATCH 40/61] bump version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index abfcadf13..e816517cc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "scikit_build_core.build" [project] name = "keyvi2" -version = "0.7.2dev0" +version = "0.7.4dev0" dependencies = [ "msgpack>=1.0.0", ] From 5c3873800d629405774dd862b3c92f3bbdc7267f Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Fri, 17 Oct 2025 22:19:06 +0200 Subject: [PATCH 41/61] fix add for key only compiler --- python-pybind/src/compiler/py_dictionary_compilers.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python-pybind/src/compiler/py_dictionary_compilers.cpp b/python-pybind/src/compiler/py_dictionary_compilers.cpp index 5bea802dd..ae7646c8d 100644 --- a/python-pybind/src/compiler/py_dictionary_compilers.cpp +++ b/python-pybind/src/compiler/py_dictionary_compilers.cpp @@ -70,14 +70,14 @@ void init_keyvi_dictionary_compilers(const py::module_ &module) { .def(py::init<>()) \ .def(py::init()) /* init with params */ \ CREATE_COMPILER_COMMON(compiler) \ - .def("add", &compiler::Add) /* DEPRECATED */ \ + .def("add", &compiler::Add) \ .def("Add", &compiler::Add); #define CREATE_KEY_ONLY_COMPILER(compiler, name) \ py::class_(module, name) \ .def(py::init<>()) \ .def(py::init()) /* init with params */ \ CREATE_COMPILER_COMMON(compiler) \ - .def("add", &compiler::Add) /* DEPRECATED */ \ + .def("add", [](compiler &c, const std::string &key) { c.Add(key); }) \ .def("Add", [](compiler &c, const std::string &key) { c.Add(key); }); #define CREATE_SK_COMPILER(compiler, name) \ py::class_(module, name) \ From ed41395a790493dd29fdc631f271bf251c398f97 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sat, 25 Oct 2025 22:25:21 +0200 Subject: [PATCH 42/61] update workflow --- .../workflows/python-cibuildwheel-pybind.yml | 34 ++++++++++++++----- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/.github/workflows/python-cibuildwheel-pybind.yml b/.github/workflows/python-cibuildwheel-pybind.yml index 165d43080..6bcfef1a8 100644 --- a/.github/workflows/python-cibuildwheel-pybind.yml +++ b/.github/workflows/python-cibuildwheel-pybind.yml @@ -3,8 +3,24 @@ name: Python Pybind cibuildwheel on: push: branches: [ master, release-* ] + paths-ignore: + - '.github/workflows/docs.yml' + - '.github/workflows/keyvi.yml' + - '.github/workflows/python-dockerimages-**.yml' + - '.github/workflows/rust**.yml' + - 'docker/**' + - 'doc/**' + - 'rust/**' pull_request: branches: [ master ] + paths-ignore: + - '.github/workflows/docs.yml' + - '.github/workflows/keyvi.yml' + - '.github/workflows/python-dockerimages-**.yml' + - '.github/workflows/rust**.yml' + - 'docker/**' + - 'doc/**' + - 'rust/**' release: types: [published] workflow_dispatch: @@ -27,6 +43,7 @@ jobs: target: musl - os: macos-14 target: musl + steps: - uses: actions/checkout@v5 - name: ccache @@ -34,12 +51,14 @@ jobs: with: key: ${{ matrix.os }}-${{ matrix.target }}-${{ matrix.flavor }}-python-pybind - - name: Skip manylinux for musllinux target + - name: Setup musllinux build if: ${{ (runner.os == 'Linux') && (matrix.target == 'musl') }} + # workaround: set CXX to g++, so it does not pick clang++ for python 3.14 builds run: | echo "CIBW_SKIP=*manylinux*" >> $GITHUB_ENV + echo "CIBW_ENVIRONMENT_LINUX=CXX=g++" >> $GITHUB_ENV - - name: Skip musllinux for manylinux target + - name: Setup manylinux build if: ${{ (runner.os == 'Linux') && (matrix.target == 'many') }} run: | echo "CIBW_SKIP=*musllinux*" >> $GITHUB_ENV @@ -62,7 +81,7 @@ jobs: brew list -1 | grep python | while read formula; do brew unlink $formula; brew link --overwrite $formula; done && \ brew install ccache zlib snappy boost - - name: set mac deployment target X64 + - name: set mac deployment target if: runner.os == 'macOS' && runner.arch == 'X64' run: | echo "MACOSX_DEPLOYMENT_TARGET=15.0" >> $GITHUB_ENV @@ -73,7 +92,7 @@ jobs: echo "MACOSX_DEPLOYMENT_TARGET=14.0" >> $GITHUB_ENV - name: Build python wheels for ${{ matrix.os }} - uses: pypa/cibuildwheel@v3.2.0 + uses: pypa/cibuildwheel@v3.2.1 env: # Skip CPython 3.8 CIBW_SKIP: ${{ env.CIBW_SKIP }} cp38-* @@ -84,9 +103,9 @@ jobs: # skip tests on pypy, currently fails for indexer tests CIBW_TEST_SKIP: "pp*" - # ccache using path - CIBW_ENVIRONMENT_MACOS: PATH=/usr/local/opt/ccache/libexec:$PATH - CIBW_ENVIRONMENT_LINUX: PATH=/usr/local/bin:/usr/lib/ccache:$PATH CCACHE_DIR=/host${{ github.workspace }}/.ccache CCACHE_CONFIGPATH=/host/home/runner/.config/ccache/ccache.conf + # prefix ccache in path + CIBW_ENVIRONMENT_MACOS: ${{ env.CIBW_ENVIRONMENT_MACOS }} PATH=/usr/local/opt/ccache/libexec:/opt/homebrew/opt/ccache/libexec:$PATH + CIBW_ENVIRONMENT_LINUX: ${{ env.CIBW_ENVIRONMENT_LINUX }} PATH=/usr/local/bin:/usr/lib/ccache:$PATH CCACHE_DIR=/host${{ github.workspace }}/.ccache CCACHE_CONFIGPATH=/host/home/runner/.config/ccache/ccache.conf # for debugging set this to 1,2 or 3 # CIBW_BUILD_VERBOSITY: 2 @@ -118,4 +137,3 @@ jobs: python -m pip install pytest && \ python -m pytest python-pybind/tests && \ python -m pip uninstall -y keyvi_pybind11 - From a66a9e92cd5c83124312004266a64e4150af4703 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sat, 25 Oct 2025 22:28:25 +0200 Subject: [PATCH 43/61] format --- .../src/compiler/py_dictionary_compilers.cpp | 100 +++++++++--------- .../src/dictionary/py_dictionary.cpp | 16 +-- python-pybind/src/dictionary/py_match.cpp | 20 ++-- .../src/dictionary/py_match_iterator.h | 10 +- python-pybind/src/py_keyvi.cpp | 6 +- 5 files changed, 76 insertions(+), 76 deletions(-) diff --git a/python-pybind/src/compiler/py_dictionary_compilers.cpp b/python-pybind/src/compiler/py_dictionary_compilers.cpp index ae7646c8d..987e9393e 100644 --- a/python-pybind/src/compiler/py_dictionary_compilers.cpp +++ b/python-pybind/src/compiler/py_dictionary_compilers.cpp @@ -28,77 +28,77 @@ namespace py = pybind11; namespace kd = keyvi::dictionary; template -inline void py_compile(Compiler *c, std::function progress_callback) { +inline void py_compile(Compiler* c, std::function progress_callback) { if (progress_callback == nullptr) { c->Compile(); return; } - auto progress_compiler_callback = [](size_t a, size_t b, void *user_data) { - auto py_callback = *reinterpret_cast *>(user_data); + auto progress_compiler_callback = [](size_t a, size_t b, void* user_data) { + auto py_callback = *reinterpret_cast*>(user_data); py_callback(a, b); }; - void *user_data = reinterpret_cast(&progress_callback); + void* user_data = reinterpret_cast(&progress_callback); c->Compile(progress_compiler_callback, user_data); } -void init_keyvi_dictionary_compilers(const py::module_ &module) { -#define CREATE_COMPILER_COMMON(compiler) \ - .def("__enter__", [](compiler &c) { return &c; }) \ - .def("__exit__", [](compiler &c, void *exc_type, void *exc_value, void *traceback) { c.Compile(); }) \ - .def("__setitem__", &compiler::Add) \ - .def( \ - "compile", \ - [](compiler &c, std::function progress_callback) { \ - py_compile(&c, progress_callback); \ - }, \ - py::arg("progress_callback") = static_cast *>(nullptr)) \ - .def( \ - "Compile", /* DEPRECATED */ \ - [](compiler &c, std::function progress_callback) { \ - py::module_ warnings = py::module_::import("warnings"); \ - warnings.attr("warn")( \ - "Compile is deprecated and will be removed in a future version. Use compile instead.", \ - py::module_::import("builtins").attr("DeprecationWarning"), 2); \ - py_compile(&c, progress_callback); \ - }, \ - py::arg("progress_callback") = static_cast *>(nullptr)) \ - .def("set_manifest", &compiler::SetManifest) \ - .def("write_to_file", &compiler::WriteToFile, py::call_guard()) \ +void init_keyvi_dictionary_compilers(const py::module_& module) { +#define CREATE_COMPILER_COMMON(compiler) \ + .def("__enter__", [](compiler& c) { return &c; }) \ + .def("__exit__", [](compiler& c, void* exc_type, void* exc_value, void* traceback) { c.Compile(); }) \ + .def("__setitem__", &compiler::Add) \ + .def( \ + "compile", \ + [](compiler& c, std::function progress_callback) { \ + py_compile(&c, progress_callback); \ + }, \ + py::arg("progress_callback") = static_cast*>(nullptr)) \ + .def( \ + "Compile", /* DEPRECATED */ \ + [](compiler& c, std::function progress_callback) { \ + py::module_ warnings = py::module_::import("warnings"); \ + warnings.attr("warn")( \ + "Compile is deprecated and will be removed in a future version. Use compile instead.", \ + py::module_::import("builtins").attr("DeprecationWarning"), 2); \ + py_compile(&c, progress_callback); \ + }, \ + py::arg("progress_callback") = static_cast*>(nullptr)) \ + .def("set_manifest", &compiler::SetManifest) \ + .def("write_to_file", &compiler::WriteToFile, py::call_guard()) \ .def("WriteToFile", &compiler::WriteToFile, py::call_guard()) /* DEPRECATED */ -#define CREATE_COMPILER(compiler, name) \ - py::class_(module, name) \ - .def(py::init<>()) \ - .def(py::init()) /* init with params */ \ - CREATE_COMPILER_COMMON(compiler) \ - .def("add", &compiler::Add) \ +#define CREATE_COMPILER(compiler, name) \ + py::class_(module, name) \ + .def(py::init<>()) \ + .def(py::init()) /* init with params */ \ + CREATE_COMPILER_COMMON(compiler) \ + .def("add", &compiler::Add) \ .def("Add", &compiler::Add); -#define CREATE_KEY_ONLY_COMPILER(compiler, name) \ - py::class_(module, name) \ - .def(py::init<>()) \ - .def(py::init()) /* init with params */ \ - CREATE_COMPILER_COMMON(compiler) \ - .def("add", [](compiler &c, const std::string &key) { c.Add(key); }) \ - .def("Add", [](compiler &c, const std::string &key) { c.Add(key); }); -#define CREATE_SK_COMPILER(compiler, name) \ - py::class_(module, name) \ - .def(py::init &>()) \ - .def(py::init &, const keyvi::util::parameters_t &>()) \ - CREATE_COMPILER_COMMON(compiler) \ +#define CREATE_KEY_ONLY_COMPILER(compiler, name) \ + py::class_(module, name) \ + .def(py::init<>()) \ + .def(py::init()) /* init with params */ \ + CREATE_COMPILER_COMMON(compiler) \ + .def("add", [](compiler& c, const std::string& key) { c.Add(key); }) \ + .def("Add", [](compiler& c, const std::string& key) { c.Add(key); }); +#define CREATE_SK_COMPILER(compiler, name) \ + py::class_(module, name) \ + .def(py::init&>()) \ + .def(py::init&, const keyvi::util::parameters_t&>()) \ + CREATE_COMPILER_COMMON(compiler) \ .def("add", &compiler::Add); #define CREATE_MERGER(merger, name) \ py::class_(module, name) \ .def(py::init<>()) \ - .def(py::init()) \ - .def("__enter__", [](merger &m) { return &m; }) \ - .def("__exit__", [](merger &m, void *exc_type, void *exc_value, void *traceback) { m.Merge(); }) \ + .def(py::init()) \ + .def("__enter__", [](merger& m) { return &m; }) \ + .def("__exit__", [](merger& m, void* exc_type, void* exc_value, void* traceback) { m.Merge(); }) \ .def("add", &merger::Add) \ .def("merge", \ - [](merger &m) { \ + [](merger& m) { \ pybind11::gil_scoped_release release_gil; \ m.Merge(); \ }) \ .def("merge", \ - [](merger &m, const std::string &filename) { \ + [](merger& m, const std::string& filename) { \ pybind11::gil_scoped_release release_gil; \ m.Merge(filename); \ }) \ diff --git a/python-pybind/src/dictionary/py_dictionary.cpp b/python-pybind/src/dictionary/py_dictionary.cpp index 2333bb4a5..28c117c15 100644 --- a/python-pybind/src/dictionary/py_dictionary.cpp +++ b/python-pybind/src/dictionary/py_dictionary.cpp @@ -28,7 +28,7 @@ namespace py = pybind11; namespace kd = keyvi::dictionary; namespace kpy = keyvi::pybind; -void init_keyvi_dictionary(const py::module_ &m) { +void init_keyvi_dictionary(const py::module_& m) { m.doc() = R"pbdoc( keyvi.dictionary ----------------------- @@ -43,11 +43,11 @@ void init_keyvi_dictionary(const py::module_ &m) { // TODO(hendrik): 'items', 'keys', 'manifest', 'match_fuzzy', 'match_near', // 'search_tokenized', 'statistics', 'values' py::class_(m, "Dictionary") - .def(py::init()) - .def(py::init()) + .def(py::init()) + .def(py::init()) .def( "complete_fuzzy_multiword", - [](const kd::Dictionary &d, const std::string &query, const int32_t max_edit_distance, + [](const kd::Dictionary& d, const std::string& query, const int32_t max_edit_distance, const size_t minimum_exact_prefix = 0, const unsigned char multiword_separator = 0x1b) { auto m = d.GetFuzzyMultiwordCompletion(query, max_edit_distance, minimum_exact_prefix, multiword_separator); return kpy::make_match_iterator(m.begin(), m.end()); @@ -62,7 +62,7 @@ void init_keyvi_dictionary(const py::module_ &m) { )pbdoc") .def( "complete_multiword", - [](const kd::Dictionary &d, const std::string &query, const unsigned char multiword_separator = 0x1b) { + [](const kd::Dictionary& d, const std::string& query, const unsigned char multiword_separator = 0x1b) { auto m = d.GetMultiwordCompletion(query, multiword_separator); return kpy::make_match_iterator(m.begin(), m.end()); }, @@ -81,7 +81,7 @@ void init_keyvi_dictionary(const py::module_ &m) { )pbdoc") .def( "complete_prefix", - [](const kd::Dictionary &d, const std::string &query) { + [](const kd::Dictionary& d, const std::string& query) { auto m = d.GetPrefixCompletion(query); return kpy::make_match_iterator(m.begin(), m.end()); }, @@ -100,7 +100,7 @@ void init_keyvi_dictionary(const py::module_ &m) { )pbdoc") .def( "complete_prefix", - [](const kd::Dictionary &d, const std::string &query, size_t top_n) { + [](const kd::Dictionary& d, const std::string& query, size_t top_n) { auto m = d.GetPrefixCompletion(query, top_n); return kpy::make_match_iterator(m.begin(), m.end()); }, @@ -124,7 +124,7 @@ void init_keyvi_dictionary(const py::module_ &m) { Get an entry from the dictionary. )pbdoc") .def("match", - [](const kd::Dictionary &d, const std::string &key) { + [](const kd::Dictionary& d, const std::string& key) { auto m = d.Get(key); return kpy::make_match_iterator(m.begin(), m.end()); }) diff --git a/python-pybind/src/dictionary/py_match.cpp b/python-pybind/src/dictionary/py_match.cpp index c91ad1e82..00f63f259 100644 --- a/python-pybind/src/dictionary/py_match.cpp +++ b/python-pybind/src/dictionary/py_match.cpp @@ -31,14 +31,14 @@ namespace py = pybind11; namespace kd = keyvi::dictionary; -inline const py::object &get_msgpack_loads_func() { +inline const py::object& get_msgpack_loads_func() { PYBIND11_CONSTINIT static py::gil_safe_call_once_and_store storage; return storage .call_once_and_store_result([]() -> py::object { return py::getattr(py::module_::import("msgpack"), "loads"); }) .get_stored(); } -void init_keyvi_match(const py::module_ &m) { +void init_keyvi_match(const py::module_& m) { py::module_ msgpack_ = py::module_::import("msgpack"); py::class_>(m, "Match") @@ -48,7 +48,7 @@ void init_keyvi_match(const py::module_ &m) { .def_property("score", &kd::Match::GetScore, &kd::Match::SetScore) .def_property("matched_string", &kd::Match::GetMatchedString, &kd::Match::SetMatchedString) .def_property_readonly("value", - [&msgpack_](const kd::Match &m) -> py::object { + [&msgpack_](const kd::Match& m) -> py::object { auto packed_value = m.GetMsgPackedValueAsString(); if (packed_value.empty()) { return py::none(); @@ -59,17 +59,17 @@ void init_keyvi_match(const py::module_ &m) { .def("raw_value_as_string", &kd::Match::GetRawValueAsString) .def( "msgpacked_value_as_string", - [](const kd::Match &m, const keyvi::compression::CompressionAlgorithm compression_algorithm) -> py::bytes { + [](const kd::Match& m, const keyvi::compression::CompressionAlgorithm compression_algorithm) -> py::bytes { return py::bytes(m.GetMsgPackedValueAsString(compression_algorithm)); }, py::arg("compression_algorithm") = keyvi::compression::CompressionAlgorithm::NO_COMPRESSION) - .def("__getitem__", [](kd::Match &m, const std::string &key) { return m.GetAttribute(key); }) + .def("__getitem__", [](kd::Match& m, const std::string& key) { return m.GetAttribute(key); }) .def("__setitem__", &kd::Match::SetAttribute) .def("__setitem__", &kd::Match::SetAttribute) .def("__setitem__", &kd::Match::SetAttribute) .def("__setitem__", &kd::Match::SetAttribute) .def("dumps", - [](const kd::Match &m) -> py::bytes { + [](const kd::Match& m) -> py::bytes { bool do_pack_rest = false; msgpack::sbuffer msgpack_buffer; msgpack::packer packer(&msgpack_buffer); @@ -106,7 +106,7 @@ void init_keyvi_match(const py::module_ &m) { return py::bytes(msgpack_buffer.data(), msgpack_buffer.size()); }) .def_static("loads", - [](const std::string_view &serialized_match) -> kd::Match { + [](const std::string_view& serialized_match) -> kd::Match { kd::Match match; msgpack::object_handle handle = msgpack::unpack(serialized_match.data(), serialized_match.size()); msgpack::object obj = handle.get(); @@ -117,7 +117,7 @@ void init_keyvi_match(const py::module_ &m) { } // Get the array elements - const msgpack::object *array = obj.via.array.ptr; + const msgpack::object* array = obj.via.array.ptr; uint32_t size = obj.via.array.size; if (size > 5) { @@ -146,11 +146,11 @@ void init_keyvi_match(const py::module_ &m) { array[0].convert(value); match.SetRawValue(value); } - } catch (const msgpack::type_error &e) { + } catch (const msgpack::type_error& e) { throw std::invalid_argument("not a serialized match, unexpected element types"); } return match; }) .def_property_readonly("weight", &kd::Match::GetWeight) - .def("__bool__", [](const kd::Match &m) -> bool { return !m.IsEmpty(); }); + .def("__bool__", [](const kd::Match& m) -> bool { return !m.IsEmpty(); }); } diff --git a/python-pybind/src/dictionary/py_match_iterator.h b/python-pybind/src/dictionary/py_match_iterator.h index 21500d931..d1e6c29c7 100644 --- a/python-pybind/src/dictionary/py_match_iterator.h +++ b/python-pybind/src/dictionary/py_match_iterator.h @@ -28,14 +28,14 @@ namespace pybind { // adapted from pybind11.h template -pybind11::iterator make_match_iterator_impl(Iterator first, Sentinel last, Extra &&...extra) { +pybind11::iterator make_match_iterator_impl(Iterator first, Sentinel last, Extra&&... extra) { using state = pybind11::detail::iterator_state; if (!pybind11::detail::get_type_info(typeid(state), false)) { pybind11::class_(pybind11::handle(), "iterator", pybind11::module_local()) - .def("__iter__", [](state &s) -> state & { return s; }) + .def("__iter__", [](state& s) -> state& { return s; }) .def( "__next__", - [](state &s) -> ValueType { + [](state& s) -> ValueType { { // release GIL as incrementing the iterator can be expensive, e.g. for fuzzy match pybind11::gil_scoped_release no_gil; @@ -53,7 +53,7 @@ pybind11::iterator make_match_iterator_impl(Iterator first, Sentinel last, Extra return Access()(s.it); }, std::forward(extra)..., Policy) - .def("set_min_weight", [](state &s, const uint32_t min_weight) -> void { s.it.SetMinWeight(min_weight); }); + .def("set_min_weight", [](state& s, const uint32_t min_weight) -> void { s.it.SetMinWeight(min_weight); }); } return pybind11::cast(state{std::forward(first), std::forward(last), true}); @@ -63,7 +63,7 @@ pybind11::iterator make_match_iterator_impl(Iterator first, Sentinel last, Extra template ::result_type, typename... Extra> -pybind11::typing::Iterator make_match_iterator(Iterator first, Sentinel last, Extra &&...extra) { +pybind11::typing::Iterator make_match_iterator(Iterator first, Sentinel last, Extra&&... extra) { return make_match_iterator_impl, Policy, Iterator, Sentinel, ValueType, Extra...>(std::forward(first), std::forward(last), std::forward(extra)...); diff --git a/python-pybind/src/py_keyvi.cpp b/python-pybind/src/py_keyvi.cpp index 2b80aed3e..49e135dfa 100644 --- a/python-pybind/src/py_keyvi.cpp +++ b/python-pybind/src/py_keyvi.cpp @@ -27,9 +27,9 @@ namespace py = pybind11; namespace kd = keyvi::dictionary; -void init_keyvi_dictionary(const py::module_ &); -void init_keyvi_dictionary_compilers(const py::module_ &); -void init_keyvi_match(const py::module_ &); +void init_keyvi_dictionary(const py::module_&); +void init_keyvi_dictionary_compilers(const py::module_&); +void init_keyvi_match(const py::module_&); PYBIND11_MODULE(keyvi2, m, py::mod_gil_not_used()) { m.doc() = R"pbdoc( From dd1af35ef73ec35250611731c015cd3194d730cc Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sat, 25 Oct 2025 22:36:48 +0200 Subject: [PATCH 44/61] test NOLINT block --- python-pybind/src/compiler/py_dictionary_compilers.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python-pybind/src/compiler/py_dictionary_compilers.cpp b/python-pybind/src/compiler/py_dictionary_compilers.cpp index 987e9393e..ac3ef5c12 100644 --- a/python-pybind/src/compiler/py_dictionary_compilers.cpp +++ b/python-pybind/src/compiler/py_dictionary_compilers.cpp @@ -15,9 +15,11 @@ * limitations under the License. */ +// NOLINTBEGIN #include #include #include +// NOLINTEND #include #include From 8018d0338066a0d25d1dfefeabf2d42cbbc1674e Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sat, 25 Oct 2025 22:57:22 +0200 Subject: [PATCH 45/61] try to make clang-tidy happy --- .github/workflows/clang-tidy-review.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/clang-tidy-review.yml b/.github/workflows/clang-tidy-review.yml index 9d5e28739..0045dccf4 100644 --- a/.github/workflows/clang-tidy-review.yml +++ b/.github/workflows/clang-tidy-review.yml @@ -18,7 +18,7 @@ jobs: with: split_workflow: true build_dir: build - apt_packages: "libsnappy-dev, libzzip-dev, zlib1g-dev, libboost-all-dev, libzstd-dev" + apt_packages: "libsnappy-dev, libzzip-dev, zlib1g-dev, libboost-all-dev, libzstd-dev, pybind11-dev" clang_tidy_checks: '' cmake_command: "cmake -Bbuild -DCMAKE_BUILD_TYPE=Debug -DCMAKE_EXPORT_COMPILE_COMMANDS=ON ." From 18c5a389488c0925c3590edc325ed581ddd3ab79 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sat, 25 Oct 2025 23:38:20 +0200 Subject: [PATCH 46/61] exclude python-pybind for clang-tidy --- .github/workflows/clang-tidy-review.yml | 3 ++- .gitignore | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/clang-tidy-review.yml b/.github/workflows/clang-tidy-review.yml index 0045dccf4..01a771db5 100644 --- a/.github/workflows/clang-tidy-review.yml +++ b/.github/workflows/clang-tidy-review.yml @@ -18,7 +18,8 @@ jobs: with: split_workflow: true build_dir: build - apt_packages: "libsnappy-dev, libzzip-dev, zlib1g-dev, libboost-all-dev, libzstd-dev, pybind11-dev" + exclude: 'python-pybind/*' + apt_packages: "libsnappy-dev, libzzip-dev, zlib1g-dev, libboost-all-dev, libzstd-dev" clang_tidy_checks: '' cmake_command: "cmake -Bbuild -DCMAKE_BUILD_TYPE=Debug -DCMAKE_EXPORT_COMPILE_COMMANDS=ON ." diff --git a/.gitignore b/.gitignore index e13d3cdcd..d1e88a552 100644 --- a/.gitignore +++ b/.gitignore @@ -48,6 +48,7 @@ cmake-build-debug/ # python *.egg-info +.venv # pybind build folder python*/*build* From 5b818a0e0c6e05c210950afde474baf4e6c6c453 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sun, 26 Oct 2025 08:57:25 +0100 Subject: [PATCH 47/61] Revert "test NOLINT block" This reverts commit dd1af35ef73ec35250611731c015cd3194d730cc. --- python-pybind/src/compiler/py_dictionary_compilers.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/python-pybind/src/compiler/py_dictionary_compilers.cpp b/python-pybind/src/compiler/py_dictionary_compilers.cpp index ac3ef5c12..987e9393e 100644 --- a/python-pybind/src/compiler/py_dictionary_compilers.cpp +++ b/python-pybind/src/compiler/py_dictionary_compilers.cpp @@ -15,11 +15,9 @@ * limitations under the License. */ -// NOLINTBEGIN #include #include #include -// NOLINTEND #include #include From 43d866b6c9ec769e509d0861b9e9e497c83aa80d Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sun, 26 Oct 2025 20:31:36 +0100 Subject: [PATCH 48/61] disable free threading --- .github/workflows/python-cibuildwheel.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-cibuildwheel.yml b/.github/workflows/python-cibuildwheel.yml index 084544d62..fb1bb68f6 100644 --- a/.github/workflows/python-cibuildwheel.yml +++ b/.github/workflows/python-cibuildwheel.yml @@ -94,8 +94,8 @@ jobs: - name: Build python wheels for ${{ matrix.os }} uses: pypa/cibuildwheel@v3.2.1 env: - # Skip CPython 3.8 - CIBW_SKIP: ${{ env.CIBW_SKIP }} cp38-* + # Skip CPython 3.8 and free threading + CIBW_SKIP: ${{ env.CIBW_SKIP }} cp38-* cp3??t-* # only build native packages CIBW_ARCHS: native From 16930bed0017a491f0c7c9ce6916f90eed428002 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sun, 26 Oct 2025 22:20:27 +0100 Subject: [PATCH 49/61] implement keyvi module overwrite in test --- python/tests/conftest.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 python/tests/conftest.py diff --git a/python/tests/conftest.py b/python/tests/conftest.py new file mode 100644 index 000000000..9a6258514 --- /dev/null +++ b/python/tests/conftest.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +import importlib +import os +import sys + + +# patch keyvi imports to an alternative module for testing purposes +if keyvi2_module_name := os.getenv("KEYVI_MODULE_OVERWRITE"): + for sub in ("", ".dictionary", ".compiler", ".completion"): + sub_module_name = "keyvi" + sub + keyvi2_sub_module_name = keyvi2_module_name + sub + sys.modules[sub_module_name] = importlib.import_module(keyvi2_sub_module_name) From ff7584c9fdf1e4515cbfd9eac01ec3e867356a77 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sun, 26 Oct 2025 22:20:41 +0100 Subject: [PATCH 50/61] add completion stub --- python-pybind/src/py_keyvi.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/python-pybind/src/py_keyvi.cpp b/python-pybind/src/py_keyvi.cpp index 49e135dfa..ab54abf9d 100644 --- a/python-pybind/src/py_keyvi.cpp +++ b/python-pybind/src/py_keyvi.cpp @@ -66,6 +66,7 @@ PYBIND11_MODULE(keyvi2, m, py::mod_gil_not_used()) { init_keyvi_dictionary(keyvi_dictionary); py::module keyvi_compilers = m.def_submodule("compiler", "keyvi2.compiler"); init_keyvi_dictionary_compilers(keyvi_compilers); + py::module keyvi_completion = m.def_submodule("completion", "keyvi2.completion"); #ifdef VERSION_INFO m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO); From 7cfa6442a99bb7f1c362d493e0af1a2e3e92b792 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Sun, 26 Oct 2025 22:22:47 +0100 Subject: [PATCH 51/61] fix import --- python/tests/conftest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/tests/conftest.py b/python/tests/conftest.py index 9a6258514..5b82e3ffd 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -4,7 +4,6 @@ import os import sys - # patch keyvi imports to an alternative module for testing purposes if keyvi2_module_name := os.getenv("KEYVI_MODULE_OVERWRITE"): for sub in ("", ".dictionary", ".compiler", ".completion"): From 076c1aa6e0c4d711ef164ac9d2b4d546ee6bd425 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Mon, 17 Nov 2025 23:11:33 +0100 Subject: [PATCH 52/61] add placeholders for missing submodules --- .github/workflows/python-cibuildwheel-pybind.yml | 4 ++-- pyproject.toml | 6 +++++- python-pybind/src/compiler/py_dictionary_compilers.cpp | 3 +++ python-pybind/src/dictionary/py_dictionary.cpp | 3 +++ python-pybind/src/py_keyvi.cpp | 7 +++++++ python/tests/conftest.py | 2 +- 6 files changed, 21 insertions(+), 4 deletions(-) diff --git a/.github/workflows/python-cibuildwheel-pybind.yml b/.github/workflows/python-cibuildwheel-pybind.yml index 6bcfef1a8..bbbbe0251 100644 --- a/.github/workflows/python-cibuildwheel-pybind.yml +++ b/.github/workflows/python-cibuildwheel-pybind.yml @@ -135,5 +135,5 @@ jobs: pipx run build --sdist python -m pip install dist/*.tar.gz -v && \ python -m pip install pytest && \ - python -m pytest python-pybind/tests && \ - python -m pip uninstall -y keyvi_pybind11 + KEYVI_SKIP_TEST_DEPRECATIONS=1 KEYVI_MODULE_OVERWRITE=keyvi2 python -m pytest python/tests && \ + python -m pip uninstall -y keyvi2 diff --git a/pyproject.toml b/pyproject.toml index e816517cc..43e89cbdf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,10 @@ KEYVI_CLANG_TIDY = "OFF" KEYVI_DOCS = "OFF" [tool.pytest.ini_options] +env = [ + "KEYVI_SKIP_TEST_DEPRECATIONS = 1", + "KEYVI_MODULE_OVERWRITE=keyvi2" +] minversion = "7.1.1" addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"] xfail_strict = true @@ -46,7 +50,7 @@ filterwarnings = [ "error", "ignore::pytest.PytestCacheWarning", ] -testpaths = ["python-pybind/tests"] +testpaths = ["python/tests"] [tool.cibuildwheel] build-frontend = "default" diff --git a/python-pybind/src/compiler/py_dictionary_compilers.cpp b/python-pybind/src/compiler/py_dictionary_compilers.cpp index 987e9393e..a9a2db88d 100644 --- a/python-pybind/src/compiler/py_dictionary_compilers.cpp +++ b/python-pybind/src/compiler/py_dictionary_compilers.cpp @@ -118,6 +118,9 @@ void init_keyvi_dictionary_compilers(const py::module_& module) { CREATE_SK_COMPILER(kd::SecondaryKeyStringDictionaryCompiler, "SecondaryKeyStringDictionaryCompiler"); CREATE_MERGER(kd::CompletionDictionaryMerger, "CompletionDictionaryMerger"); CREATE_MERGER(kd::IntDictionaryMerger, "IntDictionaryMerger"); + CREATE_MERGER(kd::JsonDictionaryMerger, "JsonDictionaryMerger"); + CREATE_MERGER(kd::KeyOnlyDictionaryMerger, "KeyOnlyDictionaryMerger"); + CREATE_MERGER(kd::StringDictionaryMerger, "StringDictionaryMerger"); #undef CREATE_COMPILER } diff --git a/python-pybind/src/dictionary/py_dictionary.cpp b/python-pybind/src/dictionary/py_dictionary.cpp index 28c117c15..e81ee33f8 100644 --- a/python-pybind/src/dictionary/py_dictionary.cpp +++ b/python-pybind/src/dictionary/py_dictionary.cpp @@ -21,6 +21,7 @@ #include "keyvi/dictionary/dictionary.h" #include "keyvi/dictionary/match.h" +#include "keyvi/dictionary/secondary_key_dictionary.h" #include "py_match_iterator.h" @@ -129,4 +130,6 @@ void init_keyvi_dictionary(const py::module_& m) { return kpy::make_match_iterator(m.begin(), m.end()); }) .def("search", &kd::Dictionary::Lookup); + + py::class_(m, "SecondaryKeyDictionary"); } diff --git a/python-pybind/src/py_keyvi.cpp b/python-pybind/src/py_keyvi.cpp index ab54abf9d..486f7b00b 100644 --- a/python-pybind/src/py_keyvi.cpp +++ b/python-pybind/src/py_keyvi.cpp @@ -30,6 +30,8 @@ namespace kd = keyvi::dictionary; void init_keyvi_dictionary(const py::module_&); void init_keyvi_dictionary_compilers(const py::module_&); void init_keyvi_match(const py::module_&); +void init_keyvi_completion(const py::module_&); +void init_keyvi_index(const py::module_&); PYBIND11_MODULE(keyvi2, m, py::mod_gil_not_used()) { m.doc() = R"pbdoc( @@ -67,6 +69,11 @@ PYBIND11_MODULE(keyvi2, m, py::mod_gil_not_used()) { py::module keyvi_compilers = m.def_submodule("compiler", "keyvi2.compiler"); init_keyvi_dictionary_compilers(keyvi_compilers); py::module keyvi_completion = m.def_submodule("completion", "keyvi2.completion"); + init_keyvi_completion(keyvi_completion); + py::module keyvi_index = m.def_submodule("index", "keyvi2.index"); + init_keyvi_index(keyvi_index); + py::module keyvi_util = m.def_submodule("util", "keyvi2.util"); + py::module keyvi_vector = m.def_submodule("vector", "keyvi2.vector"); #ifdef VERSION_INFO m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO); diff --git a/python/tests/conftest.py b/python/tests/conftest.py index 5b82e3ffd..a57fa35a1 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -6,7 +6,7 @@ # patch keyvi imports to an alternative module for testing purposes if keyvi2_module_name := os.getenv("KEYVI_MODULE_OVERWRITE"): - for sub in ("", ".dictionary", ".compiler", ".completion"): + for sub in ("", ".dictionary", ".compiler", ".completion", ".index", ".util", ".vector"): sub_module_name = "keyvi" + sub keyvi2_sub_module_name = keyvi2_module_name + sub sys.modules[sub_module_name] = importlib.import_module(keyvi2_sub_module_name) From d986c40b80ff5b01f4da58a34edda5efe5953ed0 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Mon, 17 Nov 2025 23:11:54 +0100 Subject: [PATCH 53/61] disable 2 tests --- python/tests/index/merger_binary_test.py | 10 +++++----- python/tests/utils/jump_consistent_hash_test.py | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/python/tests/index/merger_binary_test.py b/python/tests/index/merger_binary_test.py index b894370e7..a2e8a64c8 100644 --- a/python/tests/index/merger_binary_test.py +++ b/python/tests/index/merger_binary_test.py @@ -1,14 +1,14 @@ # -*- coding: utf-8 -*- # Usage: py.test tests -from keyvi._core import get_package_root, get_interpreter_executable +#from keyvi._core import get_package_root, get_interpreter_executable import os.path import subprocess import os -def test_merger_binary(): - cmd = get_interpreter_executable() + b" " + os.path.join(get_package_root(), b"_pycore" , b"keyvimerger.py") + b" -h" - rc = subprocess.call(cmd, shell=True) - assert rc == 0 +#def test_merger_binary(): +# cmd = get_interpreter_executable() + b" " + os.path.join(get_package_root(), b"_pycore" , b"keyvimerger.py") + b" -h" +# rc = subprocess.call(cmd, shell=True) +# assert rc == 0 diff --git a/python/tests/utils/jump_consistent_hash_test.py b/python/tests/utils/jump_consistent_hash_test.py index 934b6d242..49246a7f2 100644 --- a/python/tests/utils/jump_consistent_hash_test.py +++ b/python/tests/utils/jump_consistent_hash_test.py @@ -3,11 +3,11 @@ import sys -from keyvi.util import JumpConsistentHashString +#from keyvi.util import JumpConsistentHashString -def test_jump_consistent_hash(): - assert JumpConsistentHashString('some string', 117) == 60 +#def test_jump_consistent_hash(): +# assert JumpConsistentHashString('some string', 117) == 60 # test unicode on Python 2 only - if sys.version_info[0] == 2: - assert JumpConsistentHashString(u'some string', 117) == 60 +# if sys.version_info[0] == 2: +# assert JumpConsistentHashString(u'some string', 117) == 60 From df6de22f9ab9b35fd7903829e7fb49dda2235513 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Mon, 17 Nov 2025 23:20:52 +0100 Subject: [PATCH 54/61] add pytest-env --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 43e89cbdf..2f2554d83 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ lint = [ ] test = [ 'pytest>=8.3.5', + 'pytest-env>=1.2.0', 'python-snappy>=0.7.3', 'zstd>=1.5.7.2' ] From 24fccdb07124b947d785f3290ab306f3085ead6c Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Mon, 17 Nov 2025 23:37:51 +0100 Subject: [PATCH 55/61] downgrade pytest-env --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2f2554d83..076724a66 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ lint = [ ] test = [ 'pytest>=8.3.5', - 'pytest-env>=1.2.0', + 'pytest-env>=1.1.5', 'python-snappy>=0.7.3', 'zstd>=1.5.7.2' ] From a8e49a5caf8a814920aad91318ef63d0a8a0ae2f Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Mon, 17 Nov 2025 23:49:28 +0100 Subject: [PATCH 56/61] add missing files --- .../src/completion/py_completion.cpp | 37 +++++++++++++++++++ python-pybind/src/index/py_index.cpp | 34 +++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 python-pybind/src/completion/py_completion.cpp create mode 100644 python-pybind/src/index/py_index.cpp diff --git a/python-pybind/src/completion/py_completion.cpp b/python-pybind/src/completion/py_completion.cpp new file mode 100644 index 000000000..1066ab557 --- /dev/null +++ b/python-pybind/src/completion/py_completion.cpp @@ -0,0 +1,37 @@ +/* keyvi - A key value store. + * + * Copyright 2024 Hendrik Muhs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include "keyvi/dictionary/completion/forward_backward_completion.h" +#include "keyvi/dictionary/completion/multiword_completion.h" +#include "keyvi/dictionary/completion/prefix_completion.h" +#include "keyvi/dictionary/match.h" + +//#include "../py_match_iterator.h" + +namespace py = pybind11; +namespace kdc = keyvi::dictionary::completion; +//namespace kpy = keyvi::pybind; + +void init_keyvi_completion(const py::module_& module) { + py::class_(module, "ForwardBackwardCompletion"); + py::class_(module, "MultiWordCompletion"); + py::class_(module, "PrefixCompletion"); +} diff --git a/python-pybind/src/index/py_index.cpp b/python-pybind/src/index/py_index.cpp new file mode 100644 index 000000000..8b89d4c67 --- /dev/null +++ b/python-pybind/src/index/py_index.cpp @@ -0,0 +1,34 @@ +/* keyvi - A key value store. + * + * Copyright 2024 Hendrik Muhs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include "keyvi/index/index.h" +#include "keyvi/index/read_only_index.h" + +//#include "../py_match_iterator.h" + +namespace py = pybind11; +namespace ki = keyvi::index; +//namespace kpy = keyvi::pybind; + +void init_keyvi_index(const py::module_& module) { + py::class_(module, "Index"); + py::class_(module, "ReadOnlyIndex"); +} From 4444d29f20ee96e7c897f4a8d727df06c0bab565 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Tue, 18 Nov 2025 20:09:12 +0100 Subject: [PATCH 57/61] remove separate test files --- python-pybind/tests/match_object_test.py | 252 ----------------------- python-pybind/tests/test_tools.py | 21 -- 2 files changed, 273 deletions(-) delete mode 100644 python-pybind/tests/match_object_test.py delete mode 100644 python-pybind/tests/test_tools.py diff --git a/python-pybind/tests/match_object_test.py b/python-pybind/tests/match_object_test.py deleted file mode 100644 index aa65a90cc..000000000 --- a/python-pybind/tests/match_object_test.py +++ /dev/null @@ -1,252 +0,0 @@ -# -*- coding: utf-8 -*- -# Usage: py.test tests - -import keyvi2 as keyvi -import msgpack -from test_tools import tmp_dictionary -import warnings -import zlib -import snappy -import zstd - -from keyvi2.compiler import ( - JsonDictionaryCompiler, - CompletionDictionaryCompiler, - KeyOnlyDictionaryCompiler, - StringDictionaryCompiler, -) - - -def test_serialization(): - m = keyvi.Match() - m.start = 22 - m.end = 30 - m.score = 42 - d = m.dumps() - m2 = keyvi.Match.loads(d) - assert m2.start == 22 - assert m2.end == 30 - assert m2.score == 42 - - -def test_raw_serialization(): - c = JsonDictionaryCompiler({"memory_limit_mb": "10"}) - c.add("abc", '{"a" : 2}') - c.add("abd", '{"a" : 3}') - with tmp_dictionary(c, "match_object_json.kv") as d: - m = d["abc"] - assert m.value_as_string() == '{"a":2}' - d = m.dumps() - m2 = keyvi.Match.loads(d) - assert m2.value_as_string() == '{"a":2}' - assert msgpack.loads(m.msgpacked_value_as_string()) == {"a": 2} - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - assert m.GetValueAsString() == '{"a":2}' - assert len(w) == 1 - assert issubclass(w[-1].category, DeprecationWarning) - - -def test_unicode_attributes(): - m = keyvi.Match() - m["küy"] = 22 - assert m["küy"] == 22 - m["k2"] = " 吃饭了吗" - m.score = 99 - assert m["k2"] == " 吃饭了吗" - assert m.score == 99.0 - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - m.SetAttribute("k2", "öäü") - assert m["k2"] == "öäü" - assert m.GetAttribute("k2") == "öäü" - assert len(w) == 2 - assert issubclass(w[0].category, DeprecationWarning) - assert issubclass(w[1].category, DeprecationWarning) - - -def test_bytes_attributes(): - m = keyvi.Match() - bytes_key = bytes("äöü".encode("utf-8")) - bytes_value = bytes("äöüöäü".encode("utf-8")) - m[bytes_key] = 22 - assert m[bytes_key] == 22 - m["k2"] = bytes_value - assert m["k2"] == "äöüöäü" - - -def test_double_attributes(): - m = keyvi.Match() - bytes_key = bytes("abc".encode("utf-8")) - m[bytes_key] = 42.0 - assert m[bytes_key] == 42.0 - - -def test_boolean_attributes(): - m = keyvi.Match() - bytes_key = bytes("def".encode("utf-8")) - m[bytes_key] = True - assert m[bytes_key] == True - - -def test_start(): - m = keyvi.Match() - m.start = 42 - assert m.start == 42 - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - m.SetStart(44) - assert m.start == 44 - assert len(w) == 1 - assert issubclass(w[-1].category, DeprecationWarning) - - -def test_end(): - m = keyvi.Match() - m.end = 49 - assert m.end == 49 - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - m.SetEnd(55) - assert m.end == 55 - assert len(w) == 1 - assert issubclass(w[-1].category, DeprecationWarning) - - -def test_score(): - m = keyvi.Match() - m.score = 149 - assert m.score == 149 - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - m.SetScore(155) - assert m.score == 155 - assert len(w) == 1 - assert issubclass(w[-1].category, DeprecationWarning) - - -def test_get_value(): - c = JsonDictionaryCompiler({"memory_limit_mb": "10"}) - c.add("abc", '{"a" : 2}') - c.add("abd", '{"a" : 3}') - with tmp_dictionary(c, "match_object_json.kv") as d: - m = d["abc"] - assert m.value == {"a": 2} - m = d["abd"] - assert m.value == {"a": 3} - assert msgpack.loads(m.msgpacked_value_as_string()) == {"a": 3} - assert msgpack.loads( - zlib.decompress( - m.msgpacked_value_as_string(keyvi.CompressionAlgorithm.ZLIB_COMPRESSION) - ) - ) == {"a": 3} - assert msgpack.loads( - snappy.decompress( - m.msgpacked_value_as_string( - keyvi.CompressionAlgorithm.SNAPPY_COMPRESSION - ) - ) - ) == {"a": 3} - assert msgpack.loads( - zstd.decompress( - m.msgpacked_value_as_string(keyvi.CompressionAlgorithm.ZSTD_COMPRESSION) - ) - ) == {"a": 3} - assert msgpack.loads( - m.msgpacked_value_as_string(keyvi.CompressionAlgorithm.NO_COMPRESSION) - ) == {"a": 3} - - -def test_get_value_int(): - c = CompletionDictionaryCompiler({"memory_limit_mb": "10"}) - c.add("abc", 42) - c.add("abd", 21) - with tmp_dictionary(c, "match_object_int.kv") as d: - m = d["abc"] - assert m.value == 42 - m = d["abd"] - assert m.value == 21 - assert msgpack.loads(m.msgpacked_value_as_string()) == 21 - assert ( - msgpack.loads( - zlib.decompress( - m.msgpacked_value_as_string( - keyvi.CompressionAlgorithm.ZLIB_COMPRESSION - ) - ) - ) - == 21 - ) - - -def test_get_value_key_only(): - c = KeyOnlyDictionaryCompiler({"memory_limit_mb": "10"}) - c.add("abc") - c.add("abd") - with tmp_dictionary(c, "match_object_key_only.kv") as d: - m = d["abc"] - assert m.value is None - m = d["abd"] - assert m.value is None - assert msgpack.loads(m.msgpacked_value_as_string()) is None - assert ( - msgpack.loads( - zlib.decompress( - m.msgpacked_value_as_string( - keyvi.CompressionAlgorithm.ZLIB_COMPRESSION - ) - ) - ) - is None - ) - - -def test_get_value_string(): - c = StringDictionaryCompiler({"memory_limit_mb": "10"}) - c.add("abc", "aaaaa") - c.add("abd", "bbbbb") - c.add("abe", "{}") - with tmp_dictionary(c, "match_object_string.kv") as d: - m = d["abc"] - assert m.value == "aaaaa" - m = d["abd"] - assert m.value == "bbbbb" - assert msgpack.loads(m.msgpacked_value_as_string()) == "bbbbb" - assert ( - msgpack.loads( - zlib.decompress( - m.msgpacked_value_as_string( - keyvi.CompressionAlgorithm.ZLIB_COMPRESSION - ) - ) - ) - == "bbbbb" - ) - m = d["abe"] - # gh#333: keyvi < 0.6.4 returned a dictionary instead of a string - assert m.value == "{}" - assert isinstance(m.value, str) - - -def test_matched_string(): - m = keyvi.Match() - m.matched_string = "match" - assert m.matched_string == "match" - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - m.SetMatchedString("other_match") - assert m.matched_string == "other_match" - assert len(w) == 1 - assert issubclass(w[-1].category, DeprecationWarning) - - -def test_bool_operator(): - m = keyvi.Match() - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - assert m.IsEmpty() - assert issubclass(w[-1].category, DeprecationWarning) - assert not m - m.end = 42 - assert not m is False - assert m diff --git a/python-pybind/tests/test_tools.py b/python-pybind/tests/test_tools.py deleted file mode 100644 index 9f80f3c30..000000000 --- a/python-pybind/tests/test_tools.py +++ /dev/null @@ -1,21 +0,0 @@ -# -*- coding: utf-8 -*- -# some common tools for tests - -import contextlib -import os -import tempfile - -from keyvi2.dictionary import Dictionary - - -@contextlib.contextmanager -def tmp_dictionary(compiler, file_name): - tmp_dir = tempfile.gettempdir() - fq_file_name = os.path.join(tmp_dir, file_name) - compiler.compile() - compiler.write_to_file(fq_file_name) - del compiler - d = Dictionary(fq_file_name) - yield d - del d - os.remove(fq_file_name) From 199e6d2a2fa3dfc5ffb60f92fdefdb85db98422b Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Tue, 18 Nov 2025 20:29:40 +0100 Subject: [PATCH 58/61] ruff fixes --- python/tests/index/merger_binary_test.py | 9 +++------ python/tests/utils/jump_consistent_hash_test.py | 3 +-- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/python/tests/index/merger_binary_test.py b/python/tests/index/merger_binary_test.py index a2e8a64c8..d1499f6a2 100644 --- a/python/tests/index/merger_binary_test.py +++ b/python/tests/index/merger_binary_test.py @@ -1,14 +1,11 @@ -# -*- coding: utf-8 -*- # Usage: py.test tests -#from keyvi._core import get_package_root, get_interpreter_executable +# from keyvi._core import get_package_root, get_interpreter_executable +from __future__ import annotations -import os.path -import subprocess -import os -#def test_merger_binary(): +# def test_merger_binary(): # cmd = get_interpreter_executable() + b" " + os.path.join(get_package_root(), b"_pycore" , b"keyvimerger.py") + b" -h" # rc = subprocess.call(cmd, shell=True) # assert rc == 0 diff --git a/python/tests/utils/jump_consistent_hash_test.py b/python/tests/utils/jump_consistent_hash_test.py index 49246a7f2..43e8fa783 100644 --- a/python/tests/utils/jump_consistent_hash_test.py +++ b/python/tests/utils/jump_consistent_hash_test.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- # Usage: py.test tests +from __future__ import annotations -import sys #from keyvi.util import JumpConsistentHashString From 45dbae161a85c1cce16583e7392bacc357aa55e3 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Tue, 18 Nov 2025 20:31:25 +0100 Subject: [PATCH 59/61] pre-commit --- python-pybind/src/completion/py_completion.cpp | 10 +++++----- python-pybind/src/index/py_index.cpp | 8 ++++---- python/tests/conftest.py | 10 +++++++++- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/python-pybind/src/completion/py_completion.cpp b/python-pybind/src/completion/py_completion.cpp index 1066ab557..114fbe59d 100644 --- a/python-pybind/src/completion/py_completion.cpp +++ b/python-pybind/src/completion/py_completion.cpp @@ -24,14 +24,14 @@ #include "keyvi/dictionary/completion/prefix_completion.h" #include "keyvi/dictionary/match.h" -//#include "../py_match_iterator.h" +// #include "../py_match_iterator.h" namespace py = pybind11; namespace kdc = keyvi::dictionary::completion; -//namespace kpy = keyvi::pybind; +// namespace kpy = keyvi::pybind; void init_keyvi_completion(const py::module_& module) { - py::class_(module, "ForwardBackwardCompletion"); - py::class_(module, "MultiWordCompletion"); - py::class_(module, "PrefixCompletion"); + py::class_(module, "ForwardBackwardCompletion"); + py::class_(module, "MultiWordCompletion"); + py::class_(module, "PrefixCompletion"); } diff --git a/python-pybind/src/index/py_index.cpp b/python-pybind/src/index/py_index.cpp index 8b89d4c67..63731871f 100644 --- a/python-pybind/src/index/py_index.cpp +++ b/python-pybind/src/index/py_index.cpp @@ -22,13 +22,13 @@ #include "keyvi/index/index.h" #include "keyvi/index/read_only_index.h" -//#include "../py_match_iterator.h" +// #include "../py_match_iterator.h" namespace py = pybind11; namespace ki = keyvi::index; -//namespace kpy = keyvi::pybind; +// namespace kpy = keyvi::pybind; void init_keyvi_index(const py::module_& module) { - py::class_(module, "Index"); - py::class_(module, "ReadOnlyIndex"); + py::class_(module, "Index"); + py::class_(module, "ReadOnlyIndex"); } diff --git a/python/tests/conftest.py b/python/tests/conftest.py index a57fa35a1..ac8bb3a10 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -6,7 +6,15 @@ # patch keyvi imports to an alternative module for testing purposes if keyvi2_module_name := os.getenv("KEYVI_MODULE_OVERWRITE"): - for sub in ("", ".dictionary", ".compiler", ".completion", ".index", ".util", ".vector"): + for sub in ( + "", + ".dictionary", + ".compiler", + ".completion", + ".index", + ".util", + ".vector", + ): sub_module_name = "keyvi" + sub keyvi2_sub_module_name = keyvi2_module_name + sub sys.modules[sub_module_name] = importlib.import_module(keyvi2_sub_module_name) From 700cf8d5eb717a7cac743759c939598edbc8350c Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Tue, 18 Nov 2025 20:34:30 +0100 Subject: [PATCH 60/61] pre-commit again --- python/tests/index/merger_binary_test.py | 1 - python/tests/utils/jump_consistent_hash_test.py | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/python/tests/index/merger_binary_test.py b/python/tests/index/merger_binary_test.py index d1499f6a2..84f437806 100644 --- a/python/tests/index/merger_binary_test.py +++ b/python/tests/index/merger_binary_test.py @@ -4,7 +4,6 @@ from __future__ import annotations - # def test_merger_binary(): # cmd = get_interpreter_executable() + b" " + os.path.join(get_package_root(), b"_pycore" , b"keyvimerger.py") + b" -h" # rc = subprocess.call(cmd, shell=True) diff --git a/python/tests/utils/jump_consistent_hash_test.py b/python/tests/utils/jump_consistent_hash_test.py index 43e8fa783..83e1257b5 100644 --- a/python/tests/utils/jump_consistent_hash_test.py +++ b/python/tests/utils/jump_consistent_hash_test.py @@ -2,11 +2,11 @@ from __future__ import annotations -#from keyvi.util import JumpConsistentHashString +# from keyvi.util import JumpConsistentHashString -#def test_jump_consistent_hash(): +# def test_jump_consistent_hash(): # assert JumpConsistentHashString('some string', 117) == 60 - # test unicode on Python 2 only +# test unicode on Python 2 only # if sys.version_info[0] == 2: # assert JumpConsistentHashString(u'some string', 117) == 60 From 5ba857f18e2baf56e9a4e64a84c2ca42798d8d9c Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Tue, 18 Nov 2025 20:45:49 +0100 Subject: [PATCH 61/61] set config-file --- .github/workflows/python-cibuildwheel.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python-cibuildwheel.yml b/.github/workflows/python-cibuildwheel.yml index 3cd1ec7b9..e8da35054 100644 --- a/.github/workflows/python-cibuildwheel.yml +++ b/.github/workflows/python-cibuildwheel.yml @@ -125,6 +125,7 @@ jobs: # for debugging set this to 1,2 or 3 # CIBW_BUILD_VERBOSITY: 2 with: + config-file: "{package}/pyproject.toml" package-dir: python - uses: actions/upload-artifact@v5