Skip to content

Commit 50f740d

Browse files
refactor(nodes): use VA-API Image API for encoders, drop GBM encoder path
Replace GBM buffer allocation (GbmVideoFrame + GBM_BO_USE_HW_VIDEO_ENCODER) with direct VA surface creation + Image API upload (vaCreateImage/vaPutImage) for both AV1 and H264 VA-API encoders. This bypasses the GBM NV12 allocation that Mesa's iris driver rejects on Intel Tiger Lake, eliminating the need for the vendored GbmUsage::Linear and GbmUsage::Separated workarounds. Changes: - Add open_va_display() helper (VA-only, no GBM device needed) - Add write_nv12_to_va_surface() with bounds-check error handling (#291) - Encoder type aliases use Surface<()> instead of GbmVideoFrame - Encoder structs drop gbm/gbm_usage fields - Encoder::encode() creates VA surfaces and uploads via Image API - Revert vendored gbm_video_frame.rs to upstream (drop Linear/Separated) - Simplify decoder alloc callbacks to GbmUsage::Decode only - Update Cargo.toml vendor comment (now only for display_resolution #292) Decoders remain GBM-backed (GBM_BO_USE_HW_VIDEO_DECODER works on all tested hardware including Tiger Lake). Signed-off-by: StreamKit Devin <devin@streamkit.dev> Co-Authored-By: Claudio Costa <cstcld91@gmail.com>
1 parent 243541e commit 50f740d

File tree

4 files changed

+242
-407
lines changed

4 files changed

+242
-407
lines changed

Cargo.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,10 @@ must_use_candidate = "allow"
112112
doc_markdown = "allow"
113113

114114
# ---------------------------------------------------------------------------
115-
# Patched cros-codecs: adds GbmUsage::Linear for drivers where neither
116-
# GBM_BO_USE_HW_VIDEO_DECODER nor GBM_BO_USE_HW_VIDEO_ENCODER is supported
117-
# (e.g. Mesa iris on Intel Tiger Lake with Mesa 23.x).
118-
# Remove this patch once upstream cros-codecs ships the Linear variant.
115+
# Patched cros-codecs: adds display_resolution field to AV1 EncoderConfig
116+
# so the frame header signals render_width/render_height when the visible
117+
# area differs from the superblock-aligned coded dimensions (fixes #292).
118+
# Remove this patch once upstream cros-codecs ships display_resolution.
119119
# ---------------------------------------------------------------------------
120120
[patch.crates-io]
121121
cros-codecs = { path = "vendor/cros-codecs" }

crates/nodes/src/video/vaapi_av1.rs

Lines changed: 168 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,9 @@ use cros_codecs::encoder::{
6161
FrameMetadata as CrosFrameMetadata, PredictionStructure, RateControl, Tunings, VideoEncoder,
6262
};
6363
use cros_codecs::libva;
64-
use cros_codecs::video_frame::gbm_video_frame::{
65-
GbmDevice, GbmExternalBufferDescriptor, GbmUsage, GbmVideoFrame,
66-
};
67-
use cros_codecs::video_frame::{ReadMapping, VideoFrame as CrosVideoFrame, WriteMapping};
68-
use cros_codecs::{Fourcc as CrosFourcc, FrameLayout, PlaneLayout, Resolution as CrosResolution};
64+
use cros_codecs::video_frame::gbm_video_frame::{GbmDevice, GbmUsage, GbmVideoFrame};
65+
use cros_codecs::video_frame::{ReadMapping, VideoFrame as CrosVideoFrame};
66+
use cros_codecs::{FrameLayout, PlaneLayout, Resolution as CrosResolution};
6967

7068
use super::encoder_trait::{self, EncodedPacket, EncoderNodeRunner, StandardVideoEncoder};
7169
use super::HwAccelMode;
@@ -163,6 +161,136 @@ pub(super) fn open_va_and_gbm(
163161
Ok((display, gbm, path))
164162
}
165163

164+
/// Open a VA display without a GBM device.
165+
///
166+
/// Used by encoder paths that pass VA surfaces directly to the encoder,
167+
/// bypassing GBM buffer allocation entirely. This avoids the
168+
/// `GBM_BO_USE_HW_VIDEO_ENCODER` flag that Mesa's iris driver does not
169+
/// support for NV12 on some hardware (e.g. Intel Tiger Lake).
170+
pub(super) fn open_va_display(
171+
render_device: Option<&String>,
172+
) -> Result<(Rc<libva::Display>, String), String> {
173+
let path = resolve_render_device(render_device);
174+
let display = libva::Display::open_drm_display(&path)
175+
.map_err(|e| format!("failed to open VA display on {path}: {e}"))?;
176+
Ok((display, path))
177+
}
178+
179+
/// Write NV12 (or I420→NV12) data from a StreamKit [`VideoFrame`] into a VA
180+
/// surface using the VA-API Image API.
181+
///
182+
/// Uses `vaCreateImage` + `vaMapBuffer` to obtain a writable mapping, writes
183+
/// NV12 data respecting the driver's internal pitches/offsets, then drops the
184+
/// [`Image`] which flushes the data back via `vaPutImage`.
185+
///
186+
/// Returns `(pitches, offsets)` — the per-plane stride and byte-offset arrays
187+
/// from the `VAImage`, needed to build the [`FrameLayout`] for the encoder.
188+
pub(super) fn write_nv12_to_va_surface(
189+
display: &Rc<libva::Display>,
190+
surface: &libva::Surface<()>,
191+
frame: &VideoFrame,
192+
) -> Result<([usize; 2], [usize; 2]), String> {
193+
let nv12_fourcc_val: u32 = nv12_fourcc().into();
194+
let image_fmts = display
195+
.query_image_formats()
196+
.map_err(|e| format!("failed to query VA image formats: {e}"))?;
197+
let image_fmt = image_fmts
198+
.into_iter()
199+
.find(|f| f.fourcc == nv12_fourcc_val)
200+
.ok_or("VA driver does not support NV12 image format")?;
201+
202+
let mut image = libva::Image::create_from(surface, image_fmt, surface.size(), surface.size())
203+
.map_err(|e| format!("failed to create VA image for NV12 upload: {e}"))?;
204+
205+
let va_image = *image.image();
206+
let y_pitch = va_image.pitches[0] as usize;
207+
let uv_pitch = va_image.pitches[1] as usize;
208+
let y_offset = va_image.offsets[0] as usize;
209+
let uv_offset = va_image.offsets[1] as usize;
210+
211+
let dest = image.as_mut();
212+
let src = frame.data.as_ref().as_ref();
213+
let w = frame.width as usize;
214+
let h = frame.height as usize;
215+
216+
match frame.pixel_format {
217+
PixelFormat::Nv12 => {
218+
// Y plane.
219+
for row in 0..h {
220+
let s = row * w;
221+
let d = y_offset + row * y_pitch;
222+
if s + w > src.len() || d + w > dest.len() {
223+
return Err(format!(
224+
"NV12 Y row copy out of bounds: src[{}..{}] (len {}), dest[{}..{}] (len {})",
225+
s, s + w, src.len(), d, d + w, dest.len()
226+
));
227+
}
228+
dest[d..d + w].copy_from_slice(&src[s..s + w]);
229+
}
230+
// UV plane (already interleaved in NV12).
231+
let uv_h = h.div_ceil(2);
232+
let chroma_w = w.div_ceil(2) * 2;
233+
let src_uv = &src[w * h..];
234+
for row in 0..uv_h {
235+
let s = row * chroma_w;
236+
let d = uv_offset + row * uv_pitch;
237+
if s + chroma_w > src_uv.len() || d + chroma_w > dest.len() {
238+
return Err(format!(
239+
"NV12 UV row copy out of bounds: src[{}..{}] (len {}), dest[{}..{}] (len {})",
240+
s, s + chroma_w, src_uv.len(), d, d + chroma_w, dest.len()
241+
));
242+
}
243+
dest[d..d + chroma_w].copy_from_slice(&src_uv[s..s + chroma_w]);
244+
}
245+
},
246+
PixelFormat::I420 => {
247+
// Y plane — same as NV12.
248+
for row in 0..h {
249+
let s = row * w;
250+
let d = y_offset + row * y_pitch;
251+
if s + w > src.len() || d + w > dest.len() {
252+
return Err(format!(
253+
"I420 Y row copy out of bounds: src[{}..{}] (len {}), dest[{}..{}] (len {})",
254+
s, s + w, src.len(), d, d + w, dest.len()
255+
));
256+
}
257+
dest[d..d + w].copy_from_slice(&src[s..s + w]);
258+
}
259+
// I420 → NV12: interleave U and V into a single UV plane.
260+
let uv_w = w.div_ceil(2);
261+
let uv_h = h.div_ceil(2);
262+
let u_start = w * h;
263+
let v_start = u_start + uv_w * uv_h;
264+
for row in 0..uv_h {
265+
for col in 0..uv_w {
266+
let u_idx = u_start + row * uv_w + col;
267+
let v_idx = v_start + row * uv_w + col;
268+
let d = uv_offset + row * uv_pitch + col * 2;
269+
if u_idx >= src.len() || v_idx >= src.len() || d + 1 >= dest.len() {
270+
return Err(format!(
271+
"I420 UV interleave out of bounds: u_idx={u_idx}, v_idx={v_idx} \
272+
(src len {}), dst_idx={d} (dest len {})",
273+
src.len(),
274+
dest.len()
275+
));
276+
}
277+
dest[d] = src[u_idx];
278+
dest[d + 1] = src[v_idx];
279+
}
280+
}
281+
},
282+
other => {
283+
drop(image);
284+
return Err(format!("write_nv12_to_va_surface: unsupported pixel format {other:?}"));
285+
},
286+
}
287+
288+
// Drop `image` to flush data back to the surface via vaPutImage.
289+
drop(image);
290+
291+
Ok(([y_pitch, uv_pitch], [y_offset, uv_offset]))
292+
}
293+
166294
/// Copy NV12 plane data from a GBM read-mapping into a flat `Vec<u8>` suitable
167295
/// for a packed StreamKit [`VideoFrame`].
168296
///
@@ -576,26 +704,7 @@ fn vaapi_av1_decode_loop(
576704
let ch = coded_height;
577705
let mut alloc_cb = move || {
578706
let res = CrosResolution { width: cw, height: ch };
579-
gbm_ref
580-
.clone()
581-
.new_frame(nv12_fourcc(), res.clone(), res.clone(), GbmUsage::Decode)
582-
.or_else(|_| {
583-
gbm_ref.clone().new_frame(
584-
nv12_fourcc(),
585-
res.clone(),
586-
res.clone(),
587-
GbmUsage::Linear,
588-
)
589-
})
590-
.or_else(|_| {
591-
gbm_ref.clone().new_frame(
592-
nv12_fourcc(),
593-
res.clone(),
594-
res,
595-
GbmUsage::Separated,
596-
)
597-
})
598-
.ok()
707+
gbm_ref.clone().new_frame(nv12_fourcc(), res.clone(), res, GbmUsage::Decode).ok()
599708
};
600709

601710
let mut made_progress = false;
@@ -877,20 +986,17 @@ impl EncoderNodeRunner for VaapiAv1EncoderNode {
877986
// Encoder — internal codec wrapper
878987
// ---------------------------------------------------------------------------
879988

880-
/// Type alias for the VA-API AV1 encoder using GBM-backed video frames.
989+
/// Type alias for the VA-API AV1 encoder using direct VA surfaces.
881990
///
882-
/// The `GbmVideoFrame` handle satisfies the `VideoFrame` trait bound
883-
/// required by `StatelessEncoder::new_vaapi()`. At runtime, GBM buffer
884-
/// allocation uses `GBM_BO_USE_HW_VIDEO_ENCODER` when supported, and
885-
/// falls back to `GBM_BO_USE_HW_VIDEO_DECODER` on drivers where the
886-
/// encoder flag is unsupported (e.g. Mesa iris on Intel Tiger Lake).
991+
/// Bypasses GBM buffer allocation entirely — input frames are uploaded to
992+
/// VA surfaces via the VA-API Image API and passed straight through to the
993+
/// encoder backend. This avoids the `GBM_BO_USE_HW_VIDEO_ENCODER` flag
994+
/// which Mesa's iris driver does not support for NV12 on some hardware
995+
/// (e.g. Intel Tiger Lake with Mesa 23.x).
887996
type CrosVaapiAv1Encoder = StatelessEncoder<
888997
cros_codecs::encoder::av1::AV1,
889-
GbmVideoFrame,
890-
cros_codecs::backend::vaapi::encoder::VaapiBackend<
891-
GbmExternalBufferDescriptor,
892-
libva::Surface<GbmExternalBufferDescriptor>,
893-
>,
998+
libva::Surface<()>,
999+
cros_codecs::backend::vaapi::encoder::VaapiBackend<(), libva::Surface<()>>,
8941000
>;
8951001

8961002
/// Internal encoder state wrapping the cros-codecs `StatelessEncoder`.
@@ -900,12 +1006,6 @@ type CrosVaapiAv1Encoder = StatelessEncoder<
9001006
struct VaapiAv1Encoder {
9011007
encoder: CrosVaapiAv1Encoder,
9021008
display: Rc<libva::Display>,
903-
gbm: Arc<GbmDevice>,
904-
/// GBM buffer usage flag. Defaults to `Encode` (optimal tiling for the
905-
/// encoder HW), but falls back to `Decode` on drivers where
906-
/// `GBM_BO_USE_HW_VIDEO_ENCODER` is unsupported (e.g. Mesa iris on
907-
/// Intel Tiger Lake with Mesa 23.x).
908-
gbm_usage: GbmUsage,
9091009
width: u32,
9101010
height: u32,
9111011
coded_width: u32,
@@ -918,61 +1018,12 @@ impl StandardVideoEncoder for VaapiAv1Encoder {
9181018
const CODEC_NAME: &'static str = "VA-API AV1";
9191019

9201020
fn new_encoder(width: u32, height: u32, config: &Self::Config) -> Result<Self, String> {
921-
let (display, gbm, path) = open_va_and_gbm(config.render_device.as_ref())?;
1021+
let (display, path) = open_va_display(config.render_device.as_ref())?;
9221022
tracing::info!(device = %path, width, height, "VA-API AV1 encoder opening");
9231023

9241024
let coded_width = align_up_u32(width, AV1_SB_SIZE);
9251025
let coded_height = align_up_u32(height, AV1_SB_SIZE);
9261026

927-
// Probe GBM encoder buffer support.
928-
//
929-
// Three-level fallback:
930-
// 1. GBM_BO_USE_HW_VIDEO_ENCODER — optimal tiling for the encoder HW
931-
// 2. GBM_BO_USE_HW_VIDEO_DECODER — decoder-tiled, still HW-friendly
932-
// 3. GBM_BO_USE_LINEAR — universally supported, no tiling
933-
//
934-
// Mesa iris on Intel Tiger Lake (Mesa ≤ 23.x) rejects both HW_VIDEO
935-
// flags for NV12 contiguous allocation; only LINEAR succeeds.
936-
let gbm_usage = {
937-
let probe_res = CrosResolution { width: coded_width, height: coded_height };
938-
let try_alloc = |usage: GbmUsage| {
939-
Arc::clone(&gbm).new_frame(
940-
nv12_fourcc(),
941-
probe_res.clone(),
942-
probe_res.clone(),
943-
usage,
944-
)
945-
};
946-
947-
if try_alloc(GbmUsage::Encode).is_ok() {
948-
tracing::debug!("GBM encoder buffer allocation OK (HW_VIDEO_ENCODER)");
949-
GbmUsage::Encode
950-
} else if try_alloc(GbmUsage::Decode).is_ok() {
951-
tracing::warn!(
952-
"GBM_BO_USE_HW_VIDEO_ENCODER unsupported on this driver; \
953-
falling back to GBM_BO_USE_HW_VIDEO_DECODER for encoder input buffers"
954-
);
955-
GbmUsage::Decode
956-
} else if try_alloc(GbmUsage::Linear).is_ok() {
957-
tracing::warn!(
958-
"GBM HW_VIDEO_ENCODER and HW_VIDEO_DECODER both unsupported; \
959-
falling back to GBM_BO_USE_LINEAR for encoder input buffers"
960-
);
961-
GbmUsage::Linear
962-
} else if try_alloc(GbmUsage::Separated).is_ok() {
963-
tracing::warn!(
964-
"GBM rejects NV12 fourcc with all usage flags; \
965-
falling back to per-plane R8 allocation (GbmUsage::Separated)"
966-
);
967-
GbmUsage::Separated
968-
} else {
969-
return Err(format!(
970-
"GBM cannot allocate NV12 {coded_width}×{coded_height} buffers \
971-
with any supported usage flag (tried Encode, Decode, Linear, Separated)"
972-
));
973-
}
974-
};
975-
9761027
// Pass display_resolution so the AV1 frame header sets
9771028
// render_width/render_height to the visible area, not the
9781029
// superblock-aligned coded dimensions (fixes #292).
@@ -1013,21 +1064,10 @@ impl StandardVideoEncoder for VaapiAv1Encoder {
10131064
coded_width,
10141065
coded_height,
10151066
quality = config.quality,
1016-
gbm_usage = ?gbm_usage,
10171067
"VA-API AV1 encoder created"
10181068
);
10191069

1020-
Ok(Self {
1021-
encoder,
1022-
display,
1023-
gbm,
1024-
gbm_usage,
1025-
width,
1026-
height,
1027-
coded_width,
1028-
coded_height,
1029-
frame_count: 0,
1030-
})
1070+
Ok(Self { encoder, display, width, height, coded_width, coded_height, frame_count: 0 })
10311071
}
10321072

10331073
fn encode(
@@ -1041,41 +1081,44 @@ impl StandardVideoEncoder for VaapiAv1Encoder {
10411081
.into());
10421082
}
10431083

1044-
// Allocate a GBM frame and write NV12 data into it.
1045-
let visible_res = CrosResolution { width: self.width, height: self.height };
1046-
let coded_res = CrosResolution { width: self.coded_width, height: self.coded_height };
1047-
let mut gbm_frame = Arc::clone(&self.gbm)
1048-
.new_frame(nv12_fourcc(), visible_res, coded_res, self.gbm_usage.clone())
1049-
.map_err(|e| format!("failed to allocate GBM frame for encoding: {e}"))?;
1050-
1051-
// Write NV12 (or I420→NV12) data into the GBM buffer.
1052-
let pitches = gbm_frame.get_plane_pitch();
1053-
{
1054-
let mapping = gbm_frame
1055-
.map_mut()
1056-
.map_err(|e| format!("failed to map GBM frame for writing: {e}"))?;
1057-
write_nv12_to_mapping(mapping.as_ref(), frame, &pitches)?;
1058-
}
1084+
// Create a VA surface and upload NV12 data via the Image API.
1085+
// This bypasses GBM buffer allocation (GBM_BO_USE_HW_VIDEO_ENCODER),
1086+
// which Mesa's iris driver does not support for NV12 on all hardware.
1087+
let nv12_fourcc_val: u32 = nv12_fourcc().into();
1088+
let mut surfaces = self
1089+
.display
1090+
.create_surfaces(
1091+
libva::VA_RT_FORMAT_YUV420,
1092+
Some(nv12_fourcc_val),
1093+
self.coded_width,
1094+
self.coded_height,
1095+
Some(libva::UsageHint::USAGE_HINT_ENCODER),
1096+
vec![()],
1097+
)
1098+
.map_err(|e| format!("failed to create VA surface for encoding: {e}"))?;
1099+
let surface =
1100+
surfaces.pop().ok_or_else(|| "create_surfaces returned empty vec".to_string())?;
1101+
1102+
// Write frame data into the VA surface.
1103+
let (pitches, offsets) = write_nv12_to_va_surface(&self.display, &surface, frame)?;
10591104

10601105
let is_keyframe = metadata.as_ref().and_then(|m| m.keyframe).unwrap_or(false);
10611106
let timestamp = metadata.as_ref().and_then(|m| m.timestamp_us).unwrap_or(self.frame_count);
10621107

1063-
// Build the frame layout from the GBM buffer's pitches.
1064-
let plane_sizes = gbm_frame.get_plane_size();
10651108
let frame_layout = FrameLayout {
1066-
format: (nv12_fourcc(), 0),
1109+
format: (nv12_fourcc(), 0), // DRM_FORMAT_MOD_LINEAR
10671110
size: CrosResolution { width: self.coded_width, height: self.coded_height },
10681111
planes: vec![
1069-
PlaneLayout { buffer_index: 0, offset: 0, stride: pitches[0] },
1070-
PlaneLayout { buffer_index: 0, offset: plane_sizes[0], stride: pitches[1] },
1112+
PlaneLayout { buffer_index: 0, offset: offsets[0], stride: pitches[0] },
1113+
PlaneLayout { buffer_index: 0, offset: offsets[1], stride: pitches[1] },
10711114
],
10721115
};
10731116

10741117
let cros_meta =
10751118
CrosFrameMetadata { timestamp, layout: frame_layout, force_keyframe: is_keyframe };
10761119

10771120
self.encoder
1078-
.encode(cros_meta, gbm_frame)
1121+
.encode(cros_meta, surface)
10791122
.map_err(|e| format!("VA-API AV1 encode error: {e}"))?;
10801123

10811124
self.frame_count += 1;

0 commit comments

Comments
 (0)