diff --git a/.github/workflows/ci-freebsd.yml b/.github/workflows/ci-freebsd.yml index 2b6a42286c8..6b815923f7a 100644 --- a/.github/workflows/ci-freebsd.yml +++ b/.github/workflows/ci-freebsd.yml @@ -113,6 +113,9 @@ jobs: devel/pkgconf \ ftp/curl \ graphics/libdrm \ + graphics/shaderc \ + graphics/vulkan-headers \ + graphics/vulkan-loader \ graphics/wayland \ lang/python314 \ multimedia/libva \ diff --git a/cmake/compile_definitions/linux.cmake b/cmake/compile_definitions/linux.cmake index 89769acf3e1..c40c5460b3b 100644 --- a/cmake/compile_definitions/linux.cmake +++ b/cmake/compile_definitions/linux.cmake @@ -120,6 +120,73 @@ if(LIBVA_FOUND) "${CMAKE_SOURCE_DIR}/src/platform/linux/vaapi.cpp") endif() +# vulkan video encoding (via FFmpeg) +if(${SUNSHINE_ENABLE_VULKAN}) + # use Vulkan headers from build-deps submodule (system headers may be too old, e.g. Ubuntu 22.04) + set(VULKAN_HEADERS_DIR "${CMAKE_SOURCE_DIR}/third-party/build-deps/third-party/FFmpeg/Vulkan-Headers/include") + if(NOT EXISTS "${VULKAN_HEADERS_DIR}/vulkan/vulkan.h") + message(FATAL_ERROR "Vulkan headers not found in build-deps submodule") + endif() + + find_library(VULKAN_LIBRARY NAMES vulkan vulkan-1) + if(NOT VULKAN_LIBRARY) + message(FATAL_ERROR "libvulkan not found") + endif() + + # prefer glslc, fall back to glslangValidator + find_program(GLSLC_EXECUTABLE glslc) + if(NOT GLSLC_EXECUTABLE) + find_program(GLSLANG_EXECUTABLE glslangValidator) + endif() + if(NOT GLSLC_EXECUTABLE AND NOT GLSLANG_EXECUTABLE) + message(FATAL_ERROR "Vulkan shader compiler not found (need glslc or glslangValidator)") + endif() + + list(APPEND SUNSHINE_DEFINITIONS SUNSHINE_BUILD_VULKAN=1) + include_directories(SYSTEM ${VULKAN_HEADERS_DIR}) + list(APPEND PLATFORM_LIBRARIES ${VULKAN_LIBRARY}) + list(APPEND PLATFORM_TARGET_FILES + "${CMAKE_SOURCE_DIR}/src/platform/linux/vulkan_encode.h" + "${CMAKE_SOURCE_DIR}/src/platform/linux/vulkan_encode.cpp") + + # compile GLSL -> SPIR-V -> C include at build time + set(VULKAN_SHADER_DIR "${CMAKE_BINARY_DIR}/generated-src/shaders") + set(VULKAN_SHADER_SOURCE "${SUNSHINE_SOURCE_ASSETS_DIR}/linux/assets/shaders/vulkan/rgb2yuv.comp") + set(VULKAN_SHADER_SPV "${VULKAN_SHADER_DIR}/rgb2yuv.spv") + set(VULKAN_SHADER_DATA "${VULKAN_SHADER_DIR}/rgb2yuv.spv.inc") + + file(MAKE_DIRECTORY "${VULKAN_SHADER_DIR}") + + if(GLSLC_EXECUTABLE) + add_custom_command( + OUTPUT "${VULKAN_SHADER_SPV}" + COMMAND ${GLSLC_EXECUTABLE} -O "${VULKAN_SHADER_SOURCE}" -o "${VULKAN_SHADER_SPV}" + DEPENDS "${VULKAN_SHADER_SOURCE}" + COMMENT "Compiling Vulkan shader rgb2yuv.comp (glslc)" + VERBATIM) + else() + add_custom_command( + OUTPUT "${VULKAN_SHADER_SPV}" + COMMAND ${GLSLANG_EXECUTABLE} -V -o "${VULKAN_SHADER_SPV}" "${VULKAN_SHADER_SOURCE}" + DEPENDS "${VULKAN_SHADER_SOURCE}" + COMMENT "Compiling Vulkan shader rgb2yuv.comp (glslangValidator)" + VERBATIM) + endif() + + add_custom_command( + OUTPUT "${VULKAN_SHADER_DATA}" + COMMAND ${CMAKE_COMMAND} -DSPV_FILE=${VULKAN_SHADER_SPV} -DOUT_FILE=${VULKAN_SHADER_DATA} + -P "${CMAKE_SOURCE_DIR}/cmake/scripts/binary_to_c.cmake" + DEPENDS "${VULKAN_SHADER_SPV}" + COMMENT "Generating C include from rgb2yuv.spv" + VERBATIM) + + add_custom_target(vulkan_shaders + DEPENDS "${VULKAN_SHADER_DATA}" + COMMENT "Vulkan shader compilation") + set(SUNSHINE_TARGET_DEPENDENCIES ${SUNSHINE_TARGET_DEPENDENCIES} vulkan_shaders) +endif() + # wayland if(${SUNSHINE_ENABLE_WAYLAND}) find_package(Wayland REQUIRED) diff --git a/cmake/prep/options.cmake b/cmake/prep/options.cmake index 67d9a568a43..6ce5b1ab0e0 100644 --- a/cmake/prep/options.cmake +++ b/cmake/prep/options.cmake @@ -58,6 +58,8 @@ elseif(UNIX) # Linux "Enable KMS grab if available." ON) option(SUNSHINE_ENABLE_VAAPI "Enable building vaapi specific code." ON) + option(SUNSHINE_ENABLE_VULKAN + "Enable Vulkan video encoding." ON) option(SUNSHINE_ENABLE_WAYLAND "Enable building wayland specific code." ON) option(SUNSHINE_ENABLE_X11 diff --git a/cmake/scripts/binary_to_c.cmake b/cmake/scripts/binary_to_c.cmake new file mode 100644 index 00000000000..b1406477e45 --- /dev/null +++ b/cmake/scripts/binary_to_c.cmake @@ -0,0 +1,35 @@ +# binary_to_c.cmake - Convert a binary file to a C uint32_t initializer list. +# Input: SPV_FILE - path to SPIR-V binary +# Output: OUT_FILE - path to write C initializer (e.g. {0x07230203, ...}) + +file(READ "${SPV_FILE}" data HEX) +string(LENGTH "${data}" hex_len) +math(EXPR num_bytes "${hex_len} / 2") +math(EXPR num_words "${num_bytes} / 4") +math(EXPR last "${num_words} - 1") + +set(_out "{") +set(_idx 0) +while(_idx LESS_EQUAL last) + math(EXPR off "${_idx} * 8") + math(EXPR off1 "${off} + 2") + math(EXPR off2 "${off} + 4") + math(EXPR off3 "${off} + 6") + string(SUBSTRING "${data}" ${off} 2 b0) + string(SUBSTRING "${data}" ${off1} 2 b1) + string(SUBSTRING "${data}" ${off2} 2 b2) + string(SUBSTRING "${data}" ${off3} 2 b3) + # little-endian to uint32_t + string(APPEND _out "0x${b3}${b2}${b1}${b0}") + if(NOT _idx EQUAL last) + string(APPEND _out ",") + endif() + math(EXPR _col "(${_idx} + 1) % 8") + if(_col EQUAL 0) + string(APPEND _out "\n") + endif() + math(EXPR _idx "${_idx} + 1") +endwhile() +string(APPEND _out "}\n") + +file(WRITE "${OUT_FILE}" "${_out}") diff --git a/docs/configuration.md b/docs/configuration.md index 97f08576cd1..c661e955e1b 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -2155,6 +2155,11 @@ editing the `conf` file in a text editor. Use the examples as reference. vaapi Use VA-API (AMD, Intel) + + vulkan + Use Vulkan encoder (AMD, Intel, NVIDIA). + @note{Applies to Linux only.} + software Encoding occurs on the CPU @@ -2920,6 +2925,101 @@ editing the `conf` file in a text editor. Use the examples as reference. +## Vulkan Encoder + +### vk_tune + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Description + Encoder tuning preset. Low latency modes reduce encoding delay at the cost of quality. + @note{This option only applies when using Vulkan [encoder](#encoder).} +
Default@code{} + 2 + @endcode
Example@code{} + vk_tune = 1 + @endcode
Options0 (default)Let the driver decide
1 (hq)High Quality
2 (ll)Low Latency
3 (ull)Ultra Low Latency
4 (lossless)Lossless
+ +### vk_rc_mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Description + Rate control mode for encoding. Auto lets the driver decide. + @note{This option only applies when using Vulkan [encoder](#encoder).} +
Default@code{} + 4 + @endcode
Example@code{} + vk_rc_mode = 2 + @endcode
Options0Auto (driver decides)
1CQP (Constant QP)
2CBR (Constant Bitrate)
4VBR (Variable Bitrate)
+ ## Software Encoder ### sw_preset diff --git a/packaging/linux/Arch/PKGBUILD b/packaging/linux/Arch/PKGBUILD index 4ed601620e1..03f5b17f922 100644 --- a/packaging/linux/Arch/PKGBUILD +++ b/packaging/linux/Arch/PKGBUILD @@ -50,6 +50,7 @@ depends=( 'openssl' 'opus' 'udev' + 'vulkan-icd-loader' 'which' ) @@ -65,6 +66,7 @@ makedepends=( 'npm' 'python-jinja' # required by the glad OpenGL/EGL loader generator 'python-setuptools' # required for glad OpenGL/EGL loader generated, v2.0.0 + 'shaderc' ) checkdepends=( diff --git a/packaging/linux/copr/Sunshine.spec b/packaging/linux/copr/Sunshine.spec index 5b42be37f28..5a0e5366602 100644 --- a/packaging/linux/copr/Sunshine.spec +++ b/packaging/linux/copr/Sunshine.spec @@ -44,6 +44,7 @@ BuildRequires: openssl-devel BuildRequires: pipewire-devel BuildRequires: rpm-build BuildRequires: systemd-rpm-macros +BuildRequires: vulkan-loader-devel BuildRequires: wget BuildRequires: which @@ -51,6 +52,7 @@ BuildRequires: which # Fedora-specific BuildRequires BuildRequires: appstream # BuildRequires: boost-devel >= 1.86.0 +BuildRequires: glslc BuildRequires: libappstream-glib %if 0%{fedora} > 43 # needed for npm from nvm @@ -91,6 +93,7 @@ BuildRequires: npm BuildRequires: python311 BuildRequires: python311-Jinja2 BuildRequires: python311-setuptools +BuildRequires: shaderc BuildRequires: udev # for unit tests BuildRequires: xvfb-run @@ -157,6 +160,7 @@ Requires: libX11 >= 1.7.3.1 Requires: numactl-libs >= 2.0.14 Requires: openssl >= 3.0.2 Requires: pulseaudio-libs >= 10.0 +Requires: vulkan-loader %endif %if 0%{?suse_version} @@ -173,6 +177,7 @@ Requires: libX11-6 Requires: libnuma1 Requires: libopenssl3 Requires: libpulse0 +Requires: vulkan-loader %endif %description diff --git a/packaging/sunshine.rb b/packaging/sunshine.rb index 5ff104944fe..fdde9350d1b 100644 --- a/packaging/sunshine.rb +++ b/packaging/sunshine.rb @@ -96,7 +96,9 @@ class Sunshine < Formula depends_on "pango" depends_on "pipewire" depends_on "pulseaudio" + depends_on "shaderc" depends_on "systemd" + depends_on "vulkan-loader" depends_on "wayland" # Jinja2 is required at build time by the glad OpenGL/EGL loader generator (Linux only). diff --git a/scripts/linux_build.sh b/scripts/linux_build.sh index e950bd4d011..b0c7eef2943 100755 --- a/scripts/linux_build.sh +++ b/scripts/linux_build.sh @@ -194,7 +194,9 @@ function add_arch_deps() { 'opus' 'python-jinja' # glad OpenGL/EGL loader generator 'python-setuptools' # glad OpenGL/EGL loader generated, v2.0.0 + 'shaderc' 'udev' + 'vulkan-icd-loader' 'wayland' ) @@ -249,6 +251,8 @@ function add_debian_based_deps() { "libxfixes-dev" # X11 "libxrandr-dev" # X11 "libxtst-dev" # X11 + "libvulkan-dev" # Vulkan + "glslang-tools" # Vulkan shader compiler "ninja-build" "npm" # web-ui "python3-jinja2" # glad OpenGL/EGL loader generator @@ -332,6 +336,8 @@ function add_fedora_deps() { "python3-jinja2" # glad OpenGL/EGL loader generator "python3-setuptools" # glad OpenGL/EGL loader generated, v2.0.0 "rpm-build" # if you want to build an RPM binary package + "vulkan-loader-devel" + "glslc" "wget" # necessary for cuda install with `run` file "which" # necessary for cuda install with `run` file "xorg-x11-server-Xvfb" # necessary for headless unit testing diff --git a/src/config.cpp b/src/config.cpp index 47475a04b4d..83e279d28a6 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -486,6 +486,11 @@ namespace config { false, // strict_rc_buffer }, // vaapi + { + 2, // vk.tune (default: ll - low latency) + 4, // vk.rc_mode (default: vbr) + }, + {}, // capture {}, // encoder {}, // adapter_name @@ -1137,6 +1142,9 @@ namespace config { bool_f(vars, "vaapi_strict_rc_buffer", video.vaapi.strict_rc_buffer); + int_f(vars, "vk_tune", video.vk.tune); + int_f(vars, "vk_rc_mode", video.vk.rc_mode); + string_f(vars, "capture", video.capture); string_f(vars, "encoder", video.encoder); string_f(vars, "adapter_name", video.adapter_name); diff --git a/src/config.h b/src/config.h index 6e4f001b707..44ade5a3685 100644 --- a/src/config.h +++ b/src/config.h @@ -80,6 +80,11 @@ namespace config { bool strict_rc_buffer; } vaapi; + struct { + int tune; // 0=default, 1=hq, 2=ll, 3=ull, 4=lossless + int rc_mode; // 0=driver, 1=cqp, 2=cbr, 4=vbr + } vk; + std::string capture; std::string encoder; std::string adapter_name; diff --git a/src/platform/common.h b/src/platform/common.h index e334acf377b..a114af709e3 100644 --- a/src/platform/common.h +++ b/src/platform/common.h @@ -232,6 +232,7 @@ namespace platf { dxgi, ///< DXGI cuda, ///< CUDA videotoolbox, ///< VideoToolbox + vulkan, ///< Vulkan unknown ///< Unknown }; diff --git a/src/platform/linux/graphics.h b/src/platform/linux/graphics.h index 9a62dfce1f5..286a700bf25 100644 --- a/src/platform/linux/graphics.h +++ b/src/platform/linux/graphics.h @@ -305,6 +305,9 @@ namespace egl { // Increment sequence when new rgb_t needs to be created std::uint64_t sequence; + // Frame is vertically flipped (GL convention) + bool y_invert {false}; + // PipeWire metadata std::optional pts; std::optional seq; diff --git a/src/platform/linux/kmsgrab.cpp b/src/platform/linux/kmsgrab.cpp index c395791cea6..11571c7c361 100644 --- a/src/platform/linux/kmsgrab.cpp +++ b/src/platform/linux/kmsgrab.cpp @@ -27,6 +27,7 @@ #include "src/utility.h" #include "src/video.h" #include "vaapi.h" +#include "vulkan_encode.h" #include "wayland.h" using namespace std::literals; @@ -960,10 +961,7 @@ namespace platf { } else if (plane->fb_id != captured_cursor.fb_id) { BOOST_LOG(debug) << "Refreshing cursor image after FB changed"sv; cursor_dirty = true; - } else if (*prop_src_x != captured_cursor.prop_src_x || - *prop_src_y != captured_cursor.prop_src_y || - *prop_src_w != captured_cursor.prop_src_w || - *prop_src_h != captured_cursor.prop_src_h) { + } else if (*prop_src_x != captured_cursor.prop_src_x || *prop_src_y != captured_cursor.prop_src_y || *prop_src_w != captured_cursor.prop_src_w || *prop_src_h != captured_cursor.prop_src_h) { BOOST_LOG(debug) << "Refreshing cursor image after source dimensions changed"sv; cursor_dirty = true; } @@ -1379,6 +1377,12 @@ namespace platf { } #endif +#ifdef SUNSHINE_BUILD_VULKAN + if (mem_type == mem_type_e::vulkan) { + return vk::make_avcodec_encode_device_vram(width, height, img_offset_x, img_offset_y); + } +#endif + #ifdef SUNSHINE_BUILD_CUDA if (mem_type == mem_type_e::cuda) { return cuda::make_avcodec_gl_encode_device(width, height, img_offset_x, img_offset_y); @@ -1524,7 +1528,7 @@ namespace platf { } // namespace kms std::shared_ptr kms_display(mem_type_e hwdevice_type, const std::string &display_name, const ::video::config_t &config) { - if (hwdevice_type == mem_type_e::vaapi || hwdevice_type == mem_type_e::cuda) { + if (hwdevice_type == mem_type_e::vaapi || hwdevice_type == mem_type_e::cuda || hwdevice_type == mem_type_e::vulkan) { auto disp = std::make_shared(hwdevice_type); if (!disp->init(display_name, config)) { diff --git a/src/platform/linux/misc.cpp b/src/platform/linux/misc.cpp index b8421f60d55..ba40a802873 100644 --- a/src/platform/linux/misc.cpp +++ b/src/platform/linux/misc.cpp @@ -250,8 +250,7 @@ namespace platf { if (!interface_name.empty()) { // Find the AF_LINK entry for this interface to get MAC address for (auto pos = ifaddrs.get(); pos != nullptr; pos = pos->ifa_next) { - if (pos->ifa_addr && pos->ifa_addr->sa_family == AF_LINK && - interface_name == pos->ifa_name) { + if (pos->ifa_addr && pos->ifa_addr->sa_family == AF_LINK && interface_name == pos->ifa_name) { auto sdl = (struct sockaddr_dl *) pos->ifa_addr; auto mac = (unsigned char *) LLADDR(sdl); @@ -1087,6 +1086,9 @@ namespace platf { // https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30039 set_env("AMD_DEBUG", "lowlatencyenc"); + // enable Vulkan video extensions for AMD RADV + set_env("RADV_PERFTEST", "video_encode"); + // These are allowed to fail. gbm::init(); diff --git a/src/platform/linux/portalgrab.cpp b/src/platform/linux/portalgrab.cpp index 32ee0741933..badb4be2936 100644 --- a/src/platform/linux/portalgrab.cpp +++ b/src/platform/linux/portalgrab.cpp @@ -29,6 +29,7 @@ #include "src/platform/common.h" #include "src/video.h" #include "vaapi.h" +#include "vulkan_encode.h" #include "wayland.h" #if !defined(__FreeBSD__) @@ -877,6 +878,7 @@ namespace portal { // On hybrid GPU systems (Intel+NVIDIA), DMA-BUFs come from the Intel GPU and cannot // be imported into CUDA, so we fall back to memory buffers in that case. bool use_dmabuf = n_dmabuf_infos > 0 && (mem_type == platf::mem_type_e::vaapi || + mem_type == platf::mem_type_e::vulkan || (mem_type == platf::mem_type_e::cuda && display_is_nvidia)); if (use_dmabuf) { for (int i = 0; i < n_dmabuf_infos; i++) { @@ -1269,8 +1271,7 @@ namespace portal { } // Check previous logical dimensions - if (previous_width.load() == width && - previous_height.load() == height) { + if (previous_width.load() == width && previous_height.load() == height) { if (capture_running.load()) { { std::scoped_lock lock(pipewire.frame_mutex()); @@ -1283,8 +1284,7 @@ namespace portal { previous_height.store(height); } - if (negotiated_w > 0 && negotiated_h > 0 && - (negotiated_w != width || negotiated_h != height)) { + if (negotiated_w > 0 && negotiated_h > 0 && (negotiated_w != width || negotiated_h != height)) { BOOST_LOG(info) << "Using negotiated resolution "sv << negotiated_w << "x" << negotiated_h; @@ -1429,6 +1429,12 @@ namespace portal { } #endif +#ifdef SUNSHINE_BUILD_VULKAN + if (mem_type == platf::mem_type_e::vulkan && n_dmabuf_infos > 0) { + return vk::make_avcodec_encode_device_vram(width, height, 0, 0); + } +#endif + #ifdef SUNSHINE_BUILD_CUDA if (mem_type == platf::mem_type_e::cuda) { if (display_is_nvidia && n_dmabuf_infos > 0) { @@ -1612,7 +1618,7 @@ namespace portal { namespace platf { std::shared_ptr portal_display(mem_type_e hwdevice_type, const std::string &display_name, const video::config_t &config) { using enum platf::mem_type_e; - if (hwdevice_type != system && hwdevice_type != vaapi && hwdevice_type != cuda) { + if (hwdevice_type != system && hwdevice_type != vaapi && hwdevice_type != cuda && hwdevice_type != vulkan) { BOOST_LOG(error) << "Could not initialize display with the given hw device type."sv; return nullptr; } diff --git a/src/platform/linux/vulkan_encode.cpp b/src/platform/linux/vulkan_encode.cpp new file mode 100644 index 00000000000..3113e11fd48 --- /dev/null +++ b/src/platform/linux/vulkan_encode.cpp @@ -0,0 +1,975 @@ +/** + * @file src/platform/linux/vulkan_encode.cpp + * @brief Vulkan-native encoder: DMA-BUF -> Vulkan compute (RGB->YUV) -> Vulkan Video encode. + * No EGL/GL dependency — all GPU work stays in a single Vulkan queue. + */ +#include +#include +#include +#include +#if defined(__FreeBSD__) + #include +#else + #include +#endif +#include +#include + +extern "C" { +#include +#include +#include +} + +#include "graphics.h" +#include "src/config.h" +#include "src/logging.h" +#include "src/video_colorspace.h" +#include "vulkan_encode.h" + +// SPIR-V data generated at build time +static const std::vector rgb2yuv_comp_spv_data +#include "shaders/rgb2yuv.spv.inc" + ; +static const size_t rgb2yuv_comp_spv_size = rgb2yuv_comp_spv_data.size() * sizeof(uint32_t); + +using namespace std::literals; + +namespace vk { + + // Match a DRI render node path to a Vulkan device index via VK_EXT_physical_device_drm. + // Returns the index as a string (e.g. "1"), or empty string if no match. + static std::string find_vulkan_index_for_render_node(const char *render_path) { + struct stat node_stat; + if (stat(render_path, &node_stat) < 0) { + return {}; + } + + auto target_major = major(node_stat.st_rdev); + auto target_minor = minor(node_stat.st_rdev); + + VkApplicationInfo app = {VK_STRUCTURE_TYPE_APPLICATION_INFO}; + app.apiVersion = VK_API_VERSION_1_1; + VkInstanceCreateInfo ci = {VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO}; + ci.pApplicationInfo = &app; + VkInstance inst = VK_NULL_HANDLE; + if (vkCreateInstance(&ci, nullptr, &inst) != VK_SUCCESS) { + return {}; + } + + uint32_t count = 0; + vkEnumeratePhysicalDevices(inst, &count, nullptr); + std::vector devs(count); + vkEnumeratePhysicalDevices(inst, &count, devs.data()); + + std::string result; + for (uint32_t i = 0; i < count; i++) { + VkPhysicalDeviceDrmPropertiesEXT drm = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT}; + VkPhysicalDeviceProperties2 props2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2}; + props2.pNext = &drm; + vkGetPhysicalDeviceProperties2(devs[i], &props2); + if (drm.hasRender && drm.renderMajor == (int64_t) target_major && drm.renderMinor == (int64_t) target_minor) { + result = std::to_string(i); + break; + } + } + vkDestroyInstance(inst, nullptr); + return result; + } + + static int create_vulkan_hwdevice(AVBufferRef **hw_device_buf) { + // Resolve render device path to Vulkan device index + if (auto render_path = config::video.adapter_name.empty() ? "/dev/dri/renderD128" : config::video.adapter_name; render_path[0] == '/') { + if (auto idx = find_vulkan_index_for_render_node(render_path.c_str()); !idx.empty() && av_hwdevice_ctx_create(hw_device_buf, AV_HWDEVICE_TYPE_VULKAN, idx.c_str(), nullptr, 0) >= 0) { + return 0; + } + } else { + // Non-path: treat as device name substring or numeric index + if (av_hwdevice_ctx_create(hw_device_buf, AV_HWDEVICE_TYPE_VULKAN, render_path.c_str(), nullptr, 0) >= 0) { + return 0; + } + } + // Final fallback: let FFmpeg pick default + if (av_hwdevice_ctx_create(hw_device_buf, AV_HWDEVICE_TYPE_VULKAN, nullptr, nullptr, 0) >= 0) { + return 0; + } + return -1; + } + + struct PushConstants { + std::array color_vec_y; + std::array color_vec_u; + std::array color_vec_v; + std::array range_y; + std::array range_uv; + std::array src_offset; + std::array src_size; + std::array dst_size; + std::array cursor_pos; + std::array cursor_size; + int32_t y_invert; + }; + +// Helper to check VkResult +#define VK_CHECK(expr) \ + do { \ + VkResult _r = (expr); \ + if (_r != VK_SUCCESS) { \ + BOOST_LOG(error) << #expr << " failed: " << _r; \ + return -1; \ + } \ + } while (0) +#define VK_CHECK_BOOL(expr) \ + do { \ + VkResult _r = (expr); \ + if (_r != VK_SUCCESS) { \ + BOOST_LOG(error) << #expr << " failed: " << _r; \ + return false; \ + } \ + } while (0) + + class vk_vram_t: public platf::avcodec_encode_device_t { + public: + ~vk_vram_t() override { + cleanup_pipeline(); + } + + int init(int in_width, int in_height, int in_offset_x = 0, int in_offset_y = 0) { + width = in_width; + height = in_height; + offset_x = in_offset_x; + offset_y = in_offset_y; + this->data = (void *) &init_hw_device; + return 0; + } + + void init_codec_options(AVCodecContext *ctx, AVDictionary **options) override { + // When VBR mode is selected (rc_mode=4), don't pin rc_min_rate to the target bitrate. + // Having rc_min_rate == rc_max_rate == bit_rate in VBR mode prevents the encoder from + // undershooting on simple frames, which builds up headroom that causes large overshoots + // on complex frames. + if (config::video.vk.rc_mode == 4) { + ctx->rc_min_rate = 0; + } + } + + int set_frame(AVFrame *new_frame, AVBufferRef *hw_frames_ctx_buf) override { + this->hwframe.reset(new_frame); + this->frame = new_frame; + this->hw_frames_ctx = hw_frames_ctx_buf; + + auto *frames_ctx = (AVHWFramesContext *) hw_frames_ctx_buf->data; + auto *dev_ctx = (AVHWDeviceContext *) frames_ctx->device_ref->data; + vk_dev.ctx = (AVVulkanDeviceContext *) dev_ctx->hwctx; + vk_dev.dev = vk_dev.ctx->act_dev; + vk_dev.phys_dev = vk_dev.ctx->phys_dev; + is_10bit = (frames_ctx->sw_format == AV_PIX_FMT_P010); + + { + VkPhysicalDeviceProperties p; + vkGetPhysicalDeviceProperties(vk_dev.phys_dev, &p); + BOOST_LOG(info) << "Vulkan encode using GPU: " << p.deviceName; + } + + // Find a compute-capable queue family from FFmpeg's context + vk_dev.compute_qf = -1; + for (int i = 0; i < vk_dev.ctx->nb_qf; i++) { + if (vk_dev.ctx->qf[i].flags & VK_QUEUE_COMPUTE_BIT) { + vk_dev.compute_qf = vk_dev.ctx->qf[i].idx; + break; + } + } + if (vk_dev.compute_qf < 0) { + BOOST_LOG(error) << "No compute queue family in Vulkan device"sv; + return -1; + } + + vkGetDeviceQueue(vk_dev.dev, vk_dev.compute_qf, 0, &vk_dev.compute_queue); + + // Load extension functions + vk_dev.getMemoryFdProperties = (PFN_vkGetMemoryFdPropertiesKHR) + vkGetDeviceProcAddr(vk_dev.dev, "vkGetMemoryFdPropertiesKHR"); + + if (!create_compute_pipeline()) { + return -1; + } + if (!create_command_resources()) { + return -1; + } + + return 0; + } + + void apply_colorspace() override { + auto *colors = video::color_vectors_from_colorspace(colorspace, true); + if (colors) { + memcpy(push.color_vec_y.data(), colors->color_vec_y, sizeof(push.color_vec_y)); + memcpy(push.color_vec_u.data(), colors->color_vec_u, sizeof(push.color_vec_u)); + memcpy(push.color_vec_v.data(), colors->color_vec_v, sizeof(push.color_vec_v)); + memcpy(push.range_y.data(), colors->range_y, sizeof(push.range_y)); + memcpy(push.range_uv.data(), colors->range_uv, sizeof(push.range_uv)); + } + } + + void init_hwframes(AVHWFramesContext *frames) override { + frames->initial_pool_size = 4; + auto *vk_frames = (AVVulkanFramesContext *) frames->hwctx; + vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL; + vk_frames->usage = (VkImageUsageFlagBits) (VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT); + } + + int convert(platf::img_t &img) override { + auto &descriptor = (egl::img_descriptor_t &) img; + + // Get encoder target frame + if (!frame->buf[0]) { + if (av_hwframe_get_buffer(hw_frames_ctx, frame, 0) < 0) { + BOOST_LOG(error) << "Failed to get Vulkan frame"sv; + return -1; + } + } + + // Import new DMA-BUF as VkImage when capture sequence changes + if (descriptor.sequence == 0) { + // Dummy frame — clear the target + return 0; + } + + if (descriptor.sequence > sequence) { + sequence = descriptor.sequence; + if (!import_dmabuf(descriptor.sd)) { + BOOST_LOG(error) << "Failed to import DMA-BUF"sv; + return -1; + } + descriptors_dirty = true; + } + + if (src.image == VK_NULL_HANDLE) { + return -1; + } + + // Setup Y/UV image views for the encoder target (once) + if (!target.views_created) { + if (!create_target_views()) { + return -1; + } + target.views_created = true; + descriptors_dirty = true; + } + + // Update descriptor set only when source or target changed + if (descriptors_dirty) { + update_descriptors(); + descriptors_dirty = false; + } + + if (descriptor.data && descriptor.serial != cursor_serial) { + cursor_serial = descriptor.serial; + if (!create_cursor_image(descriptor.src_w, descriptor.src_h, descriptor.data)) { + return -1; + } + update_descriptors(); + descriptors_dirty = false; + } + + // Fill push constants + push.src_offset[0] = offset_x; + push.src_offset[1] = offset_y; + push.src_size[0] = width; + push.src_size[1] = height; + push.dst_size[0] = frame->width; + push.dst_size[1] = frame->height; + push.y_invert = descriptor.y_invert ? 1 : 0; + + if (descriptor.data) { + float scale_x = (float) frame->width / width; + float scale_y = (float) frame->height / height; + push.cursor_pos[0] = (int32_t) ((descriptor.x - offset_x) * scale_x); + push.cursor_pos[1] = (int32_t) ((descriptor.y - offset_y) * scale_y); + push.cursor_size[0] = (int32_t) (descriptor.width * scale_x); + push.cursor_size[1] = (int32_t) (descriptor.height * scale_y); + } else { + push.cursor_size[0] = 0; + } + + // Record and submit compute dispatch + return dispatch_compute(); + } + + private: + bool create_compute_pipeline() { + // Shader module + VkShaderModuleCreateInfo shader_ci = {VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO}; + shader_ci.codeSize = rgb2yuv_comp_spv_size; + shader_ci.pCode = rgb2yuv_comp_spv_data.data(); + VK_CHECK_BOOL(vkCreateShaderModule(vk_dev.dev, &shader_ci, nullptr, &compute.shader_module)); + + // Descriptor set layout: binding 0=sampler, 1=Y storage, 2=UV storage, 3=cursor sampler + std::array bindings = {}; + bindings[0] = {0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}; + bindings[1] = {1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}; + bindings[2] = {2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}; + bindings[3] = {3, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT, nullptr}; + + VkDescriptorSetLayoutCreateInfo ds_layout_ci = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; + ds_layout_ci.bindingCount = bindings.size(); + ds_layout_ci.pBindings = bindings.data(); + VK_CHECK_BOOL(vkCreateDescriptorSetLayout(vk_dev.dev, &ds_layout_ci, nullptr, &compute.ds_layout)); + + // Push constant range + VkPushConstantRange pc_range = {VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(PushConstants)}; + + VkPipelineLayoutCreateInfo pl_ci = {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO}; + pl_ci.setLayoutCount = 1; + pl_ci.pSetLayouts = &compute.ds_layout; + pl_ci.pushConstantRangeCount = 1; + pl_ci.pPushConstantRanges = &pc_range; + VK_CHECK_BOOL(vkCreatePipelineLayout(vk_dev.dev, &pl_ci, nullptr, &compute.pipeline_layout)); + + // Compute pipeline + VkComputePipelineCreateInfo comp_ci = {VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO}; + comp_ci.stage = {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO}; + comp_ci.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT; + comp_ci.stage.module = compute.shader_module; + comp_ci.stage.pName = "main"; + comp_ci.layout = compute.pipeline_layout; + VK_CHECK_BOOL(vkCreateComputePipelines(vk_dev.dev, VK_NULL_HANDLE, 1, &comp_ci, nullptr, &compute.pipeline)); + + // Descriptor pool + std::array pool_sizes = {{ + {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 2}, + {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2}, + }}; + VkDescriptorPoolCreateInfo pool_ci = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO}; + pool_ci.maxSets = 1; + pool_ci.poolSizeCount = pool_sizes.size(); + pool_ci.pPoolSizes = pool_sizes.data(); + VK_CHECK_BOOL(vkCreateDescriptorPool(vk_dev.dev, &pool_ci, nullptr, &compute.desc_pool)); + + VkDescriptorSetAllocateInfo alloc_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO}; + alloc_info.descriptorPool = compute.desc_pool; + alloc_info.descriptorSetCount = 1; + alloc_info.pSetLayouts = &compute.ds_layout; + VK_CHECK_BOOL(vkAllocateDescriptorSets(vk_dev.dev, &alloc_info, &compute.desc_set)); + + // Sampler for source image + VkSamplerCreateInfo sampler_ci = {VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO}; + sampler_ci.magFilter = VK_FILTER_LINEAR; + sampler_ci.minFilter = VK_FILTER_LINEAR; + sampler_ci.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + sampler_ci.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + VK_CHECK_BOOL(vkCreateSampler(vk_dev.dev, &sampler_ci, nullptr, &compute.sampler)); + + if (!create_cursor_image(1, 1, nullptr)) { + return false; + } + + return true; + } + + bool create_command_resources() { + VkCommandPoolCreateInfo pool_ci = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO}; + pool_ci.queueFamilyIndex = vk_dev.compute_qf; + pool_ci.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + VK_CHECK_BOOL(vkCreateCommandPool(vk_dev.dev, &pool_ci, nullptr, &cmd.pool)); + + VkCommandBufferAllocateInfo alloc_ci = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO}; + alloc_ci.commandPool = cmd.pool; + alloc_ci.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + alloc_ci.commandBufferCount = CMD_RING_SIZE; + VK_CHECK_BOOL(vkAllocateCommandBuffers(vk_dev.dev, &alloc_ci, cmd.ring.data())); + + return true; + } + + static VkFormat drm_fourcc_to_vk_format(uint32_t fourcc) { + switch (fourcc) { + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_ARGB8888: + return VK_FORMAT_B8G8R8A8_UNORM; + case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_ABGR8888: + return VK_FORMAT_R8G8B8A8_UNORM; + case DRM_FORMAT_XRGB2101010: + case DRM_FORMAT_ARGB2101010: + return VK_FORMAT_A2R10G10B10_UNORM_PACK32; + case DRM_FORMAT_XBGR2101010: + case DRM_FORMAT_ABGR2101010: + return VK_FORMAT_A2B10G10R10_UNORM_PACK32; + default: + BOOST_LOG(warning) << "Unknown DRM fourcc 0x" << std::hex << fourcc << std::dec << ", assuming B8G8R8A8"; + return VK_FORMAT_B8G8R8A8_UNORM; + } + } + + bool import_dmabuf(const egl::surface_descriptor_t &sd) { + destroy_src_image(); + + int fd = dup(sd.fds[0]); + if (fd < 0) { + return false; + } + + // Query memory requirements for this DMA-BUF + VkMemoryFdPropertiesKHR fd_props = {VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR}; + if (vk_dev.getMemoryFdProperties) { + vk_dev.getMemoryFdProperties(vk_dev.dev, VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, fd, &fd_props); + } + + // Create VkImage for the DMA-BUF + VkExternalMemoryImageCreateInfo ext_ci = {VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO}; + ext_ci.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; + + std::array drm_layouts = {}; + VkImageDrmFormatModifierExplicitCreateInfoEXT drm_ci = { + VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT + }; + VkImageTiling tiling; + + if (sd.modifier != DRM_FORMAT_MOD_INVALID) { + int plane_count = 0; + for (int i = 0; i < 4 && sd.fds[i] >= 0; ++i) { + drm_layouts[i].offset = sd.offsets[i]; + drm_layouts[i].rowPitch = sd.pitches[i]; + plane_count++; + } + drm_ci.drmFormatModifier = sd.modifier; + drm_ci.drmFormatModifierPlaneCount = plane_count; + drm_ci.pPlaneLayouts = drm_layouts.data(); + ext_ci.pNext = &drm_ci; + tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT; + } else { + tiling = VK_IMAGE_TILING_LINEAR; + } + + auto vk_format = drm_fourcc_to_vk_format(sd.fourcc); + + VkImageCreateInfo img_ci = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO}; + img_ci.pNext = &ext_ci; + img_ci.imageType = VK_IMAGE_TYPE_2D; + img_ci.format = vk_format; + img_ci.extent = {(uint32_t) sd.width, (uint32_t) sd.height, 1}; + img_ci.mipLevels = 1; + img_ci.arrayLayers = 1; + img_ci.samples = VK_SAMPLE_COUNT_1_BIT; + img_ci.tiling = tiling; + img_ci.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + img_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + + auto res = vkCreateImage(vk_dev.dev, &img_ci, nullptr, &src.image); + if (res != VK_SUCCESS) { + close(fd); + BOOST_LOG(error) << "vkCreateImage for DMA-BUF failed: " << res + << " (modifier=0x" << std::hex << sd.modifier << std::dec + << ", pitch=" << sd.pitches[0] << ", offset=" << sd.offsets[0] << ")"; + return false; + } + + // Bind imported DMA-BUF memory + VkMemoryRequirements mem_req; + vkGetImageMemoryRequirements(vk_dev.dev, src.image, &mem_req); + + VkImportMemoryFdInfoKHR import_fd = {VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR}; + import_fd.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; + import_fd.fd = fd; // Vulkan takes ownership + + VkMemoryAllocateInfo alloc_info = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; + alloc_info.pNext = &import_fd; + alloc_info.allocationSize = mem_req.size; + alloc_info.memoryTypeIndex = find_memory_type( + fd_props.memoryTypeBits ? fd_props.memoryTypeBits : mem_req.memoryTypeBits, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT + ); + + VkDeviceMemory src_mem = VK_NULL_HANDLE; + res = vkAllocateMemory(vk_dev.dev, &alloc_info, nullptr, &src_mem); + if (res != VK_SUCCESS) { + BOOST_LOG(error) << "vkAllocateMemory for DMA-BUF failed: " << res; + vkDestroyImage(vk_dev.dev, src.image, nullptr); + src.image = VK_NULL_HANDLE; + return false; + } + + vkBindImageMemory(vk_dev.dev, src.image, src_mem, 0); + + // Create image view (Vulkan sampling always returns RGBA order regardless of memory layout) + VkImageViewCreateInfo view_ci = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO}; + view_ci.image = src.image; + view_ci.viewType = VK_IMAGE_VIEW_TYPE_2D; + view_ci.format = vk_format; + view_ci.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + VK_CHECK_BOOL(vkCreateImageView(vk_dev.dev, &view_ci, nullptr, &src.view)); + + src.mem = src_mem; + return true; + } + + bool create_cursor_image(int w, int h, const uint8_t *pixels) { + destroy_cursor_image(); + + VkImageCreateInfo img_ci = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO}; + img_ci.imageType = VK_IMAGE_TYPE_2D; + img_ci.format = VK_FORMAT_B8G8R8A8_UNORM; + img_ci.extent = {(uint32_t) w, (uint32_t) h, 1}; + img_ci.mipLevels = 1; + img_ci.arrayLayers = 1; + img_ci.samples = VK_SAMPLE_COUNT_1_BIT; + img_ci.tiling = VK_IMAGE_TILING_LINEAR; + img_ci.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + img_ci.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; + VK_CHECK_BOOL(vkCreateImage(vk_dev.dev, &img_ci, nullptr, &cursor.image)); + + VkMemoryRequirements mem_req; + vkGetImageMemoryRequirements(vk_dev.dev, cursor.image, &mem_req); + VkMemoryAllocateInfo alloc = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; + alloc.allocationSize = mem_req.size; + alloc.memoryTypeIndex = find_memory_type(mem_req.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + VK_CHECK_BOOL(vkAllocateMemory(vk_dev.dev, &alloc, nullptr, &cursor.mem)); + VK_CHECK_BOOL(vkBindImageMemory(vk_dev.dev, cursor.image, cursor.mem, 0)); + + if (pixels) { + void *mapped; + VK_CHECK_BOOL(vkMapMemory(vk_dev.dev, cursor.mem, 0, VK_WHOLE_SIZE, 0, &mapped)); + VkImageSubresource subres = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0}; + VkSubresourceLayout layout; + vkGetImageSubresourceLayout(vk_dev.dev, cursor.image, &subres, &layout); + for (int y = 0; y < h; y++) { + memcpy((uint8_t *) mapped + layout.offset + y * layout.rowPitch, pixels + y * w * 4, w * 4); + } + vkUnmapMemory(vk_dev.dev, cursor.mem); + } + + VkImageViewCreateInfo view_ci = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO}; + view_ci.image = cursor.image; + view_ci.viewType = VK_IMAGE_VIEW_TYPE_2D; + view_ci.format = VK_FORMAT_B8G8R8A8_UNORM; + view_ci.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + VK_CHECK_BOOL(vkCreateImageView(vk_dev.dev, &view_ci, nullptr, &cursor.view)); + + cursor.needs_transition = true; + descriptors_dirty = true; + return true; + } + + void destroy_cursor_image() { + if (cursor.view) { + vkDestroyImageView(vk_dev.dev, cursor.view, nullptr); + cursor.view = VK_NULL_HANDLE; + } + if (cursor.image) { + vkDestroyImage(vk_dev.dev, cursor.image, nullptr); + cursor.image = VK_NULL_HANDLE; + } + if (cursor.mem) { + vkFreeMemory(vk_dev.dev, cursor.mem, nullptr); + cursor.mem = VK_NULL_HANDLE; + } + } + + bool create_target_views() { + auto *vk_frame = (AVVkFrame *) frame->data[0]; + if (!vk_frame) { + return false; + } + + auto y_fmt = is_10bit ? VK_FORMAT_R16_UNORM : VK_FORMAT_R8_UNORM; + auto uv_fmt = is_10bit ? VK_FORMAT_R16G16_UNORM : VK_FORMAT_R8G8_UNORM; + + // Detect multiplane vs multi-image layout + int num_imgs = 0; + for (int i = 0; i < AV_NUM_DATA_POINTERS && vk_frame->img[i]; i++) { + num_imgs++; + } + + if (num_imgs == 1) { + // Single multiplane image — create plane views + VkImageViewCreateInfo view_ci = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO}; + view_ci.image = vk_frame->img[0]; + view_ci.viewType = VK_IMAGE_VIEW_TYPE_2D; + + // Y plane + view_ci.format = y_fmt; + view_ci.subresourceRange = {VK_IMAGE_ASPECT_PLANE_0_BIT, 0, 1, 0, 1}; + VK_CHECK_BOOL(vkCreateImageView(vk_dev.dev, &view_ci, nullptr, &target.y_view)); + + // UV plane + view_ci.format = uv_fmt; + view_ci.subresourceRange = {VK_IMAGE_ASPECT_PLANE_1_BIT, 0, 1, 0, 1}; + VK_CHECK_BOOL(vkCreateImageView(vk_dev.dev, &view_ci, nullptr, &target.uv_view)); + } else { + // Separate images per plane + VkImageViewCreateInfo view_ci = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO}; + view_ci.viewType = VK_IMAGE_VIEW_TYPE_2D; + view_ci.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + + view_ci.image = vk_frame->img[0]; + view_ci.format = y_fmt; + VK_CHECK_BOOL(vkCreateImageView(vk_dev.dev, &view_ci, nullptr, &target.y_view)); + + view_ci.image = vk_frame->img[1]; + view_ci.format = uv_fmt; + VK_CHECK_BOOL(vkCreateImageView(vk_dev.dev, &view_ci, nullptr, &target.uv_view)); + } + return true; + } + + void update_descriptors() { + VkDescriptorImageInfo src_info = {compute.sampler, src.view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + VkDescriptorImageInfo y_info = {VK_NULL_HANDLE, target.y_view, VK_IMAGE_LAYOUT_GENERAL}; + VkDescriptorImageInfo uv_info = {VK_NULL_HANDLE, target.uv_view, VK_IMAGE_LAYOUT_GENERAL}; + VkDescriptorImageInfo cursor_info = {compute.sampler, cursor.view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}; + + std::array writes = {}; + writes[0] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, compute.desc_set, 0, 0, 1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, &src_info, nullptr, nullptr}; + writes[1] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, compute.desc_set, 1, 0, 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &y_info, nullptr, nullptr}; + writes[2] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, compute.desc_set, 2, 0, 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &uv_info, nullptr, nullptr}; + writes[3] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, compute.desc_set, 3, 0, 1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, &cursor_info, nullptr, nullptr}; + vkUpdateDescriptorSets(vk_dev.dev, writes.size(), writes.data(), 0, nullptr); + } + + int dispatch_compute() { + auto *vk_frame = (AVVkFrame *) frame->data[0]; + int num_imgs = 0; + for (int i = 0; i < AV_NUM_DATA_POINTERS && vk_frame->img[i]; i++) { + num_imgs++; + } + + // Rotate to next command buffer. With CMD_RING_SIZE slots, the buffer + // we're about to reuse was submitted CMD_RING_SIZE frames ago. + // At 60fps that's ~50ms for a <1ms compute dispatch — always complete. + // No fences, no semaphore waits, no CPU blocking. + auto cmd_buf = cmd.ring[cmd.ring_idx]; + cmd.ring_idx = (cmd.ring_idx + 1) % CMD_RING_SIZE; + + VkCommandBufferBeginInfo begin_ci = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO}; + begin_ci.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + VK_CHECK(vkBeginCommandBuffer(cmd_buf, &begin_ci)); + + // Transition source image to SHADER_READ_ONLY + VkImageMemoryBarrier src_barrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; + src_barrier.srcAccessMask = 0; + src_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + src_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + src_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + src_barrier.image = src.image; + src_barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + src_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_EXTERNAL; + src_barrier.dstQueueFamilyIndex = vk_dev.compute_qf; + + vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &src_barrier); + + // Transition cursor image if needed + if (cursor.needs_transition) { + VkImageMemoryBarrier cursor_barrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; + cursor_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + cursor_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + cursor_barrier.oldLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; + cursor_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + cursor_barrier.image = cursor.image; + cursor_barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + cursor_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + cursor_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &cursor_barrier); + cursor.needs_transition = false; + } + + // Transition target planes to GENERAL for storage writes + std::array dst_barriers = {}; + int num_dst_barriers = (num_imgs == 1) ? 1 : 2; + for (int i = 0; i < num_dst_barriers; i++) { + dst_barriers[i] = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; + dst_barriers[i].srcAccessMask = target.initialized ? VK_ACCESS_SHADER_READ_BIT : 0; + dst_barriers[i].dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + dst_barriers[i].oldLayout = target.initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED; + dst_barriers[i].newLayout = VK_IMAGE_LAYOUT_GENERAL; + dst_barriers[i].image = vk_frame->img[num_imgs == 1 ? 0 : i]; + dst_barriers[i].subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + dst_barriers[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + dst_barriers[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + } + + vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, num_dst_barriers, dst_barriers.data()); + + // Bind pipeline and dispatch + vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipeline); + vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipeline_layout, 0, 1, &compute.desc_set, 0, nullptr); + vkCmdPushConstants(cmd_buf, compute.pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(PushConstants), &push); + + uint32_t gx = (frame->width + 15) / 16; + uint32_t gy = (frame->height + 15) / 16; + vkCmdDispatch(cmd_buf, gx, gy, 1); + + VK_CHECK(vkEndCommandBuffer(cmd_buf)); + + // Submit with timeline semaphore signaling for FFmpeg + VkTimelineSemaphoreSubmitInfo timeline_info = {VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO}; + std::array wait_sems = {}; + std::array signal_sems = {}; + std::array wait_vals = {}; + std::array signal_vals = {}; + std::array wait_stages = {}; + int sem_count = 0; + + for (int i = 0; i < AV_NUM_DATA_POINTERS && vk_frame->sem[i]; i++) { + wait_sems[sem_count] = vk_frame->sem[i]; + wait_vals[sem_count] = vk_frame->sem_value[i]; + wait_stages[sem_count] = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + + signal_sems[sem_count] = vk_frame->sem[i]; + signal_vals[sem_count] = vk_frame->sem_value[i] + 1; + vk_frame->sem_value[i]++; + sem_count++; + } + + timeline_info.waitSemaphoreValueCount = sem_count; + timeline_info.pWaitSemaphoreValues = wait_vals.data(); + timeline_info.signalSemaphoreValueCount = sem_count; + timeline_info.pSignalSemaphoreValues = signal_vals.data(); + + VkSubmitInfo submit = {VK_STRUCTURE_TYPE_SUBMIT_INFO}; + submit.pNext = &timeline_info; + submit.waitSemaphoreCount = sem_count; + submit.pWaitSemaphores = wait_sems.data(); + submit.pWaitDstStageMask = wait_stages.data(); + submit.commandBufferCount = 1; + submit.pCommandBuffers = &cmd_buf; + submit.signalSemaphoreCount = sem_count; + submit.pSignalSemaphores = signal_sems.data(); + + // Lock the queue (FFmpeg requires this) + vk_dev.ctx->lock_queue( + (AVHWDeviceContext *) ((AVHWFramesContext *) hw_frames_ctx->data)->device_ref->data, + vk_dev.compute_qf, + 0 + ); + auto res = vkQueueSubmit(vk_dev.compute_queue, 1, &submit, VK_NULL_HANDLE); + vk_dev.ctx->unlock_queue( + (AVHWDeviceContext *) ((AVHWFramesContext *) hw_frames_ctx->data)->device_ref->data, + vk_dev.compute_qf, + 0 + ); + + if (res != VK_SUCCESS) { + BOOST_LOG(error) << "vkQueueSubmit failed: " << res; + return -1; + } + + // Update frame layouts for FFmpeg + for (int i = 0; i < AV_NUM_DATA_POINTERS && vk_frame->img[i]; i++) { + vk_frame->layout[i] = VK_IMAGE_LAYOUT_GENERAL; + vk_frame->access[i] = VK_ACCESS_SHADER_WRITE_BIT; + } + + target.initialized = true; + + return 0; + } + + uint32_t find_memory_type(uint32_t type_bits, VkMemoryPropertyFlags props) { + VkPhysicalDeviceMemoryProperties mem_props; + vkGetPhysicalDeviceMemoryProperties(vk_dev.phys_dev, &mem_props); + for (uint32_t i = 0; i < mem_props.memoryTypeCount; i++) { + if ((type_bits & (1 << i)) && (mem_props.memoryTypes[i].propertyFlags & props) == props) { + return i; + } + } + // Fallback: any matching type bit + for (uint32_t i = 0; i < mem_props.memoryTypeCount; i++) { + if (type_bits & (1 << i)) { + return i; + } + } + return 0; + } + + void destroy_src_image() { + if (src.image) { + // Defer destruction — the GPU may still be using this image. + // By the time we wrap around (4 frames later), it's guaranteed done. + auto &slot = defer_ring[defer_idx]; + if (slot.view) { + vkDestroyImageView(vk_dev.dev, slot.view, nullptr); + } + if (slot.image) { + vkDestroyImage(vk_dev.dev, slot.image, nullptr); + } + if (slot.mem) { + vkFreeMemory(vk_dev.dev, slot.mem, nullptr); + } + slot = src; + defer_idx = (defer_idx + 1) % DEFER_RING_SIZE; + } + src = {}; + } + + void cleanup_pipeline() { + if (!vk_dev.dev) { + return; + } + vkDeviceWaitIdle(vk_dev.dev); + destroy_src_image(); + // Flush deferred destroys + for (auto &slot : defer_ring) { + if (slot.view) { + vkDestroyImageView(vk_dev.dev, slot.view, nullptr); + } + if (slot.image) { + vkDestroyImage(vk_dev.dev, slot.image, nullptr); + } + if (slot.mem) { + vkFreeMemory(vk_dev.dev, slot.mem, nullptr); + } + slot = {}; + } + if (target.y_view) { + vkDestroyImageView(vk_dev.dev, target.y_view, nullptr); + } + if (target.uv_view) { + vkDestroyImageView(vk_dev.dev, target.uv_view, nullptr); + } + destroy_cursor_image(); + if (cmd.pool) { + vkDestroyCommandPool(vk_dev.dev, cmd.pool, nullptr); + } + if (compute.sampler) { + vkDestroySampler(vk_dev.dev, compute.sampler, nullptr); + } + if (compute.desc_pool) { + vkDestroyDescriptorPool(vk_dev.dev, compute.desc_pool, nullptr); + } + if (compute.pipeline) { + vkDestroyPipeline(vk_dev.dev, compute.pipeline, nullptr); + } + if (compute.pipeline_layout) { + vkDestroyPipelineLayout(vk_dev.dev, compute.pipeline_layout, nullptr); + } + if (compute.ds_layout) { + vkDestroyDescriptorSetLayout(vk_dev.dev, compute.ds_layout, nullptr); + } + if (compute.shader_module) { + vkDestroyShaderModule(vk_dev.dev, compute.shader_module, nullptr); + } + } + + static int init_hw_device(platf::avcodec_encode_device_t *, AVBufferRef **hw_device_buf) { + return create_vulkan_hwdevice(hw_device_buf); + } + + // Dimensions + int width = 0; + int height = 0; + int offset_x = 0; + int offset_y = 0; + bool is_10bit = false; + AVBufferRef *hw_frames_ctx = nullptr; + frame_t hwframe; + std::uint64_t sequence = 0; + + // Vulkan device (from FFmpeg) + struct vk_device_t { + VkDevice dev = VK_NULL_HANDLE; + VkPhysicalDevice phys_dev = VK_NULL_HANDLE; + AVVulkanDeviceContext *ctx = nullptr; + int compute_qf = -1; + VkQueue compute_queue = VK_NULL_HANDLE; + PFN_vkGetMemoryFdPropertiesKHR getMemoryFdProperties = nullptr; + }; + + vk_device_t vk_dev = {}; + + // Compute pipeline + struct compute_pipeline_t { + VkShaderModule shader_module = VK_NULL_HANDLE; + VkDescriptorSetLayout ds_layout = VK_NULL_HANDLE; + VkPipelineLayout pipeline_layout = VK_NULL_HANDLE; + VkPipeline pipeline = VK_NULL_HANDLE; + VkDescriptorPool desc_pool = VK_NULL_HANDLE; + VkDescriptorSet desc_set = VK_NULL_HANDLE; + VkSampler sampler = VK_NULL_HANDLE; + }; + + compute_pipeline_t compute = {}; + + // Command submission — ring of buffers to avoid reuse while in-flight. + // No CPU waits: by the time we wrap around, the old submission is long done. + static constexpr int CMD_RING_SIZE = 3; + + struct cmd_submission_t { + VkCommandPool pool = VK_NULL_HANDLE; + std::array ring = {}; + int ring_idx = 0; + }; + + cmd_submission_t cmd = {}; + + // Source DMA-BUF image with deferred destruction + struct src_image_t { + VkImage image = VK_NULL_HANDLE; + VkDeviceMemory mem = VK_NULL_HANDLE; + VkImageView view = VK_NULL_HANDLE; + }; + + src_image_t src = {}; + static constexpr int DEFER_RING_SIZE = 4; + std::array defer_ring = {}; + int defer_idx = 0; + + // Target NV12 plane views + struct target_state_t { + VkImageView y_view = VK_NULL_HANDLE; + VkImageView uv_view = VK_NULL_HANDLE; + bool views_created = false; + bool initialized = false; + }; + + target_state_t target = {}; + + bool descriptors_dirty = false; + + // Cursor image + struct { + VkImage image = VK_NULL_HANDLE; + VkDeviceMemory mem = VK_NULL_HANDLE; + VkImageView view = VK_NULL_HANDLE; + bool needs_transition = false; + } cursor = {}; + + unsigned long cursor_serial = 0; + + // Push constants (color matrix) + PushConstants push = {}; + }; + + // Free functions + + int vulkan_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *, AVBufferRef **hw_device_buf) { + return create_vulkan_hwdevice(hw_device_buf); + } + + bool validate() { + if (!avcodec_find_encoder_by_name("h264_vulkan") && !avcodec_find_encoder_by_name("hevc_vulkan")) { + return false; + } + AVBufferRef *dev = nullptr; + if (create_vulkan_hwdevice(&dev) < 0) { + return false; + } + av_buffer_unref(&dev); + return true; + } + + std::unique_ptr make_avcodec_encode_device_vram(int w, int h, int offset_x, int offset_y) { + auto dev = std::make_unique(); + if (dev->init(w, h, offset_x, offset_y) < 0) { + return nullptr; + } + return dev; + } + + std::unique_ptr make_avcodec_encode_device_ram(int, int) { + return nullptr; + } + +} // namespace vk diff --git a/src/platform/linux/vulkan_encode.h b/src/platform/linux/vulkan_encode.h new file mode 100644 index 00000000000..db887f504c7 --- /dev/null +++ b/src/platform/linux/vulkan_encode.h @@ -0,0 +1,36 @@ +/** + * @file src/platform/linux/vulkan_encode.h + * @brief Declarations for FFmpeg Vulkan Video encoder. + */ +#pragma once + +#include "src/platform/common.h" + +extern "C" struct AVBufferRef; + +namespace vk { + + /** + * @brief Initialize Vulkan hardware device for FFmpeg encoding. + * @param encode_device The encode device (vk_t). + * @param hw_device_buf Output hardware device buffer. + * @return 0 on success, negative on error. + */ + int vulkan_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *encode_device, AVBufferRef **hw_device_buf); + + /** + * @brief Create a Vulkan encode device for RAM capture. + */ + std::unique_ptr make_avcodec_encode_device_ram(int width, int height); + + /** + * @brief Create a Vulkan encode device for VRAM capture. + */ + std::unique_ptr make_avcodec_encode_device_vram(int width, int height, int offset_x, int offset_y); + + /** + * @brief Check if FFmpeg Vulkan Video encoding is available. + */ + bool validate(); + +} // namespace vk diff --git a/src/platform/linux/wayland.cpp b/src/platform/linux/wayland.cpp index 03765629e40..6d6195ffce2 100644 --- a/src/platform/linux/wayland.cpp +++ b/src/platform/linux/wayland.cpp @@ -374,7 +374,7 @@ namespace wl { } // Create GBM buffer - current_bo = gbm_bo_create(gbm_device, dmabuf_info.width, dmabuf_info.height, dmabuf_info.format, GBM_BO_USE_RENDERING); + current_bo = gbm_bo_create(gbm_device, dmabuf_info.width, dmabuf_info.height, dmabuf_info.format, GBM_BO_USE_RENDERING | GBM_BO_USE_LINEAR); if (!current_bo) { BOOST_LOG(error) << "Failed to create GBM buffer"sv; zwlr_screencopy_frame_v1_destroy(frame); diff --git a/src/platform/linux/wayland.h b/src/platform/linux/wayland.h index 286c247bb52..d765950f887 100644 --- a/src/platform/linux/wayland.h +++ b/src/platform/linux/wayland.h @@ -69,6 +69,7 @@ namespace wl { std::array frames; frame_t *current_frame; zwlr_screencopy_frame_v1_listener listener; + bool y_invert {false}; private: bool init_gbm(); @@ -95,7 +96,6 @@ namespace wl { struct gbm_device *gbm_device {nullptr}; struct gbm_bo *current_bo {nullptr}; struct wl_buffer *current_wl_buffer {nullptr}; - bool y_invert {false}; }; class monitor_t { diff --git a/src/platform/linux/wlgrab.cpp b/src/platform/linux/wlgrab.cpp index 848e5121cab..917ea8614bb 100644 --- a/src/platform/linux/wlgrab.cpp +++ b/src/platform/linux/wlgrab.cpp @@ -11,6 +11,7 @@ #include "src/platform/common.h" #include "src/video.h" #include "vaapi.h" +#include "vulkan_encode.h" #include "wayland.h" using namespace std::literals; @@ -348,6 +349,7 @@ namespace wl { img->sd = current_frame->sd; img->frame_timestamp = current_frame->frame_timestamp; + img->y_invert = dmabuf.y_invert; // Prevent dmabuf from closing the file descriptors. std::fill_n(current_frame->sd.fds, 4, -1); @@ -377,6 +379,12 @@ namespace wl { } #endif +#ifdef SUNSHINE_BUILD_VULKAN + if (mem_type == platf::mem_type_e::vulkan) { + return vk::make_avcodec_encode_device_vram(width, height, 0, 0); + } +#endif + #ifdef SUNSHINE_BUILD_CUDA if (mem_type == platf::mem_type_e::cuda) { return cuda::make_avcodec_gl_encode_device(width, height, 0, 0); @@ -398,12 +406,12 @@ namespace wl { namespace platf { std::shared_ptr wl_display(mem_type_e hwdevice_type, const std::string &display_name, const video::config_t &config) { - if (hwdevice_type != platf::mem_type_e::system && hwdevice_type != platf::mem_type_e::vaapi && hwdevice_type != platf::mem_type_e::cuda) { + if (hwdevice_type != platf::mem_type_e::system && hwdevice_type != platf::mem_type_e::vaapi && hwdevice_type != platf::mem_type_e::cuda && hwdevice_type != platf::mem_type_e::vulkan) { BOOST_LOG(error) << "[wlgrab] Could not initialize display with the given hw device type."sv; return nullptr; } - if (hwdevice_type == platf::mem_type_e::vaapi || hwdevice_type == platf::mem_type_e::cuda) { + if (hwdevice_type == platf::mem_type_e::vaapi || hwdevice_type == platf::mem_type_e::cuda || hwdevice_type == platf::mem_type_e::vulkan) { auto wlr = std::make_shared(); if (wlr->init(hwdevice_type, display_name, config)) { return nullptr; diff --git a/src/video.cpp b/src/video.cpp index 6b2b635277c..67234bc112d 100644 --- a/src/video.cpp +++ b/src/video.cpp @@ -3,6 +3,7 @@ * @brief Definitions for video. */ // standard includes +#include #include #include #include @@ -122,6 +123,7 @@ namespace video { util::Either vaapi_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *); util::Either cuda_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *); util::Either vt_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *); + util::Either vulkan_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *); class avcodec_software_encode_device_t: public platf::avcodec_encode_device_t { public: @@ -1012,7 +1014,78 @@ namespace video { // RC buffer size will be set in platform code if supported LIMITED_GOP_SIZE | PARALLEL_ENCODING | NO_RC_BUF_LIMIT }; -#endif + + #ifdef SUNSHINE_BUILD_VULKAN + encoder_t vulkan { + "vulkan"sv, + std::make_unique( + AV_HWDEVICE_TYPE_VULKAN, + AV_HWDEVICE_TYPE_NONE, + AV_PIX_FMT_VULKAN, + AV_PIX_FMT_NV12, + AV_PIX_FMT_P010, + AV_PIX_FMT_NONE, + AV_PIX_FMT_NONE, + vulkan_init_avcodec_hardware_input_buffer + ), + { + // AV1 + { + {"idr_interval"s, std::numeric_limits::max()}, + {"tune"s, &config::video.vk.tune}, + {"rc_mode"s, &config::video.vk.rc_mode}, + {"units"s, 0}, + {"usage"s, "stream"s}, + {"content"s, "rendered"s}, + {"async_depth"s, 1}, + }, + {}, // SDR-specific options + {}, // HDR-specific options + {}, // YUV444 SDR-specific options + {}, // YUV444 HDR-specific options + {}, // Fallback options + "av1_vulkan"s, + }, + { + // HEVC + { + {"idr_interval"s, std::numeric_limits::max()}, + {"tune"s, &config::video.vk.tune}, + {"rc_mode"s, &config::video.vk.rc_mode}, + {"units"s, 0}, + {"usage"s, "stream"s}, + {"content"s, "rendered"s}, + {"async_depth"s, 1}, + }, + {}, // SDR-specific options + {}, // HDR-specific options + {}, // YUV444 SDR-specific options + {}, // YUV444 HDR-specific options + {}, // Fallback options + "hevc_vulkan"s, + }, + { + // H.264 + { + {"idr_interval"s, std::numeric_limits::max()}, + {"tune"s, &config::video.vk.tune}, + {"rc_mode"s, &config::video.vk.rc_mode}, + {"units"s, 0}, + {"usage"s, "stream"s}, + {"content"s, "rendered"s}, + {"async_depth"s, 1}, + }, + {}, // SDR-specific options + {}, // HDR-specific options + {}, // YUV444 SDR-specific options + {}, // YUV444 HDR-specific options + {}, // Fallback options + "h264_vulkan"s, + }, + LIMITED_GOP_SIZE | PARALLEL_ENCODING + }; + #endif // SUNSHINE_BUILD_VULKAN +#endif // linux #ifdef __APPLE__ encoder_t videotoolbox { @@ -1092,6 +1165,9 @@ namespace video { &mediafoundation, #endif #if defined(__linux__) || defined(linux) || defined(__linux) || defined(__FreeBSD__) + #ifdef SUNSHINE_BUILD_VULKAN + &vulkan, + #endif &vaapi, #endif #ifdef __APPLE__ @@ -2677,9 +2753,7 @@ namespace video { // Test 4:4:4 HDR first. If 4:4:4 is supported, 4:2:0 should also be supported. config.chromaSamplingType = 1; - if ((encoder.flags & YUV444_SUPPORT) && - disp->is_codec_supported(encoder_codec_name, config) && - validate_config(disp, encoder, config) >= 0) { + if ((encoder.flags & YUV444_SUPPORT) && disp->is_codec_supported(encoder_codec_name, config) && validate_config(disp, encoder, config) >= 0) { flag_map[encoder_t::DYNAMIC_RANGE] = true; flag_map[encoder_t::YUV444] = true; return; @@ -2689,8 +2763,7 @@ namespace video { // Test 4:2:0 HDR config.chromaSamplingType = 0; - if (disp->is_codec_supported(encoder_codec_name, config) && - validate_config(disp, encoder, config) >= 0) { + if (disp->is_codec_supported(encoder_codec_name, config) && validate_config(disp, encoder, config) >= 0) { flag_map[encoder_t::DYNAMIC_RANGE] = true; } else { flag_map[encoder_t::DYNAMIC_RANGE] = false; @@ -2798,15 +2871,13 @@ namespace video { } // Skip it if it doesn't support the specified codec at all - if ((active_hevc_mode >= 2 && !encoder->hevc[encoder_t::PASSED]) || - (active_av1_mode >= 2 && !encoder->av1[encoder_t::PASSED])) { + if ((active_hevc_mode >= 2 && !encoder->hevc[encoder_t::PASSED]) || (active_av1_mode >= 2 && !encoder->av1[encoder_t::PASSED])) { pos++; continue; } // Skip it if it doesn't support HDR on the specified codec - if ((active_hevc_mode == 3 && !encoder->hevc[encoder_t::DYNAMIC_RANGE]) || - (active_av1_mode == 3 && !encoder->av1[encoder_t::DYNAMIC_RANGE])) { + if ((active_hevc_mode == 3 && !encoder->hevc[encoder_t::DYNAMIC_RANGE]) || (active_av1_mode == 3 && !encoder->av1[encoder_t::DYNAMIC_RANGE])) { pos++; continue; } @@ -2935,6 +3006,43 @@ namespace video { return hw_device_buf; } +#ifdef SUNSHINE_BUILD_VULKAN + using vulkan_init_avcodec_hardware_input_buffer_fn = int (*)(platf::avcodec_encode_device_t *encode_device, AVBufferRef **hw_device_buf); + + util::Either vulkan_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *encode_device) { + avcodec_buffer_t hw_device_buf; + + if (encode_device && encode_device->data) { + if (((vulkan_init_avcodec_hardware_input_buffer_fn) encode_device->data)(encode_device, &hw_device_buf)) { + return -1; + } + return hw_device_buf; + } + + // Try render device path first (like VAAPI does), then fallback to device indices + auto render_device = config::video.adapter_name.empty() ? "/dev/dri/renderD128" : config::video.adapter_name.c_str(); + + auto status = av_hwdevice_ctx_create(&hw_device_buf, AV_HWDEVICE_TYPE_VULKAN, render_device, nullptr, 0); + if (status >= 0) { + BOOST_LOG(info) << "Using Vulkan device: "sv << render_device; + return hw_device_buf; + } + + // Fallback: try device indices for multi-GPU systems + const std::array devices = {"1", "0", "2", "3"}; + for (auto device : devices) { + status = av_hwdevice_ctx_create(&hw_device_buf, AV_HWDEVICE_TYPE_VULKAN, device, nullptr, 0); + if (status >= 0) { + BOOST_LOG(info) << "Using Vulkan device index: "sv << device; + return hw_device_buf; + } + } + + BOOST_LOG(error) << "Failed to create a Vulkan device"sv; + return -1; + } +#endif + util::Either cuda_init_avcodec_hardware_input_buffer(platf::avcodec_encode_device_t *encode_device) { avcodec_buffer_t hw_device_buf; @@ -3032,6 +3140,10 @@ namespace video { return platf::mem_type_e::dxgi; case AV_HWDEVICE_TYPE_VAAPI: return platf::mem_type_e::vaapi; +#ifdef SUNSHINE_BUILD_VULKAN + case AV_HWDEVICE_TYPE_VULKAN: + return platf::mem_type_e::vulkan; +#endif case AV_HWDEVICE_TYPE_CUDA: return platf::mem_type_e::cuda; case AV_HWDEVICE_TYPE_NONE: diff --git a/src_assets/common/assets/web/config.html b/src_assets/common/assets/web/config.html index b391e3e7470..b90a62145e3 100644 --- a/src_assets/common/assets/web/config.html +++ b/src_assets/common/assets/web/config.html @@ -332,6 +332,14 @@

{{ $t('config.configuration') }}

"vaapi_strict_rc_buffer": "disabled", }, }, + { + id: "vulkan", + name: "Vulkan Encoder", + options: { + "vk_tune": 2, + "vk_rc_mode": 4, + }, + }, { id: "sw", name: "Software Encoder", @@ -383,7 +391,7 @@

{{ $t('config.configuration') }}

var app = document.getElementById("app"); if (this.platform === "windows") { this.tabs = this.tabs.filter((el) => { - return el.id !== "vt" && el.id !== "vaapi"; + return el.id !== "vt" && el.id !== "vaapi" && el.id !== "vulkan"; }); } if (this.platform === "freebsd" || this.platform === "linux") { @@ -393,7 +401,7 @@

{{ $t('config.configuration') }}

} if (this.platform === "macos") { this.tabs = this.tabs.filter((el) => { - return el.id !== "amd" && el.id !== "nv" && el.id !== "qsv" && el.id !== "vaapi"; + return el.id !== "amd" && el.id !== "nv" && el.id !== "qsv" && el.id !== "vaapi" && el.id !== "vulkan"; }); } @@ -437,6 +445,7 @@

{{ $t('config.configuration') }}

'qsv': 'Gpu', 'vaapi': 'Gpu', 'vt': 'Gpu', + 'vulkan': 'Gpu', 'sw': 'Cpu', }; return iconMap[tabId] || 'Settings'; diff --git a/src_assets/common/assets/web/configs/tabs/Advanced.vue b/src_assets/common/assets/web/configs/tabs/Advanced.vue index d63d095f2d9..d0b13adf3f7 100644 --- a/src_assets/common/assets/web/configs/tabs/Advanced.vue +++ b/src_assets/common/assets/web/configs/tabs/Advanced.vue @@ -97,11 +97,13 @@ const config = ref(props.config)