Skip to content

Commit 9bd780c

Browse files
fix(nodes): derive codec metadata from actual encoder/decoder output
- VA-API H.264 encoder: parse NAL unit types to detect IDR keyframes instead of cloning input metadata (which blindly propagated video::colorbars keyframe=true to every packet). Also derive timestamp from CodedBitstreamBuffer metadata. flush_encoder() now emits proper metadata instead of None. - VA-API AV1 encoder: scan OBU headers for Sequence Header to detect keyframes instead of cloning input metadata. Same timestamp and flush_encoder() fixes as H.264. - VA-API H.264 decoder: use DecodedHandle::timestamp() for output frame metadata instead of cloning the current input packet's metadata. Fixes incorrect timestamps on B-frame reordered output. - Vulkan Video decoder: always replace timestamp_us with the decoder output PTS (not just when input metadata had no timestamp). Create fresh metadata from decoder PTS when input had none. - NV-AV1 encoder: remove frame-ordinal next_pts that was incorrectly used as timestamp_us when input had no metadata. Downstream consumers expect real microsecond timestamps, not counters. - VA-API NV12 helpers: add bounds checks on UV-plane slice access to prevent panics on short buffers. Add up-front buffer size validation in write_nv12_to_mapping. Add debug_assert on plane count in dma_frame_layout. Fix codec-specific error message in shared helper. Fixes keyframe flag propagation for downstream transport::rtmp, containers::mp4, and transport::moq::peer consumers. Fixes timestamp ordering for H.264 streams with B-frame reordering. Signed-off-by: StreamKit Devin <devin@streamkit.dev> Co-Authored-By: Claudio Costa <cstcld91@gmail.com>
1 parent 1d08b2c commit 9bd780c

File tree

4 files changed

+261
-41
lines changed

4 files changed

+261
-41
lines changed

crates/nodes/src/video/nv_av1.rs

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -513,7 +513,6 @@ impl EncoderNodeRunner for NvAv1EncoderNode {
513513

514514
struct NvAv1Encoder {
515515
encoder: shiguredo_nvcodec::Encoder,
516-
next_pts: i64,
517516
}
518517

519518
impl StandardVideoEncoder for NvAv1Encoder {
@@ -558,7 +557,7 @@ impl StandardVideoEncoder for NvAv1Encoder {
558557
"NVENC AV1 encoder created"
559558
);
560559

561-
Ok(Self { encoder, next_pts: 0 })
560+
Ok(Self { encoder })
562561
}
563562

564563
fn encode(
@@ -609,11 +608,8 @@ impl NvAv1Encoder {
609608
);
610609
let data = Bytes::from(encoded.into_data());
611610

612-
let pts = self.next_pts;
613-
self.next_pts += 1;
614-
615611
let meta = remaining_metadata.take();
616-
let output_metadata = merge_keyframe_metadata(meta, is_keyframe, pts);
612+
let output_metadata = merge_keyframe_metadata(meta, is_keyframe);
617613

618614
packets.push(EncodedPacket { data, metadata: Some(output_metadata) });
619615
}
@@ -664,16 +660,16 @@ fn i420_to_nv12_buffer(frame: &VideoFrame) -> Vec<u8> {
664660
nv12
665661
}
666662

663+
/// Build output metadata from the input metadata (if any) with the correct
664+
/// keyframe flag from the encoder output. When input metadata is `None`
665+
/// (e.g. during `flush_encoder`), the timestamp is left as `None` rather
666+
/// than using a frame ordinal — downstream consumers expect real
667+
/// microsecond timestamps, not counters.
667668
#[allow(clippy::missing_const_for_fn)] // map_or with closures is not yet stable in const fn
668-
fn merge_keyframe_metadata(
669-
metadata: Option<PacketMetadata>,
670-
keyframe: bool,
671-
pts: i64,
672-
) -> PacketMetadata {
669+
fn merge_keyframe_metadata(metadata: Option<PacketMetadata>, keyframe: bool) -> PacketMetadata {
673670
metadata.map_or(
674671
PacketMetadata {
675-
#[allow(clippy::cast_sign_loss)]
676-
timestamp_us: if pts >= 0 { Some(pts as u64) } else { None },
672+
timestamp_us: None,
677673
duration_us: None,
678674
sequence: None,
679675
keyframe: Some(keyframe),

crates/nodes/src/video/vaapi_av1.rs

Lines changed: 147 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,14 @@ pub(super) fn upload_nv12_to_dma_frame(
313313
}
314314
let uv_h = h.div_ceil(2);
315315
let chroma_row_bytes = w.div_ceil(2) * 2;
316-
let src_uv = &src[w * h..];
316+
let y_size = w * h;
317+
if src.len() < y_size {
318+
return Err(format!(
319+
"NV12 source buffer too short for UV plane: {len} < {y_size} (Y size)",
320+
len = src.len(),
321+
));
322+
}
323+
let src_uv = &src[y_size..];
317324
for row in 0..uv_h {
318325
let s = row * chroma_row_bytes;
319326
let d = uv_offset_img + row * uv_pitch;
@@ -413,6 +420,16 @@ fn dma_frame_layout(
413420
coded_width: u32,
414421
coded_height: u32,
415422
) -> FrameLayout {
423+
// This layout computation assumes a single contiguous buffer object
424+
// (all planes share buffer_index 0). Multi-BO configurations would
425+
// need per-plane buffer_index values derived from the DMA-BUF handles.
426+
// NV12 has exactly 2 planes (Y + interleaved UV).
427+
debug_assert!(
428+
CrosVideoFrame::num_planes(dma_frame) == 2,
429+
"dma_frame_layout expects 2-plane NV12 but frame has {} planes",
430+
CrosVideoFrame::num_planes(dma_frame),
431+
);
432+
416433
let pitches = CrosVideoFrame::get_plane_pitch(dma_frame);
417434
let sizes = CrosVideoFrame::get_plane_size(dma_frame);
418435

@@ -511,18 +528,21 @@ pub(super) fn read_nv12_from_mapping(
511528
let mut data = vec![0u8; y_size + uv_size];
512529

513530
// Y plane.
514-
if !planes.is_empty() {
515-
let y_stride = plane_pitches.first().copied().unwrap_or(w);
516-
if y_stride == w {
517-
let copy_len = y_size.min(planes[0].len());
518-
data[..copy_len].copy_from_slice(&planes[0][..copy_len]);
519-
} else {
520-
for row in 0..h {
521-
let dst_off = row * w;
522-
let src_off = row * y_stride;
523-
if src_off + w <= planes[0].len() && dst_off + w <= y_size {
524-
data[dst_off..dst_off + w].copy_from_slice(&planes[0][src_off..src_off + w]);
525-
}
531+
if planes.is_empty() {
532+
tracing::warn!(width, height, "read_nv12_from_mapping: mapping returned no planes");
533+
return data;
534+
}
535+
536+
let y_stride = plane_pitches.first().copied().unwrap_or(w);
537+
if y_stride == w {
538+
let copy_len = y_size.min(planes[0].len());
539+
data[..copy_len].copy_from_slice(&planes[0][..copy_len]);
540+
} else {
541+
for row in 0..h {
542+
let dst_off = row * w;
543+
let src_off = row * y_stride;
544+
if src_off + w <= planes[0].len() && dst_off + w <= y_size {
545+
data[dst_off..dst_off + w].copy_from_slice(&planes[0][src_off..src_off + w]);
526546
}
527547
}
528548
}
@@ -567,6 +587,21 @@ pub(super) fn write_nv12_to_mapping(
567587
let h = frame.height as usize;
568588
let src = frame.data.as_ref().as_ref();
569589

590+
// Validate minimum source buffer size up-front so we return an error
591+
// rather than silently producing a partial (black-row) frame.
592+
let expected_min = match frame.pixel_format {
593+
PixelFormat::Nv12 => w * h + w.div_ceil(2) * 2 * h.div_ceil(2),
594+
PixelFormat::I420 => w * h + 2 * (w.div_ceil(2) * h.div_ceil(2)),
595+
_ => 0, // handled by the match-arm error below
596+
};
597+
if expected_min > 0 && src.len() < expected_min {
598+
return Err(format!(
599+
"source frame buffer too short for {:?} {w}×{h}: {len} < {expected_min}",
600+
frame.pixel_format,
601+
len = src.len(),
602+
));
603+
}
604+
570605
match frame.pixel_format {
571606
PixelFormat::Nv12 => {
572607
let y_size = w * h;
@@ -597,6 +632,12 @@ pub(super) fn write_nv12_to_mapping(
597632
if planes.len() > 1 {
598633
let uv_stride = plane_pitches.get(1).copied().unwrap_or(chroma_w);
599634
let mut uv_plane = planes[1].borrow_mut();
635+
if src.len() < y_size {
636+
return Err(format!(
637+
"NV12 source buffer too short for UV plane: {len} < {y_size} (Y size)",
638+
len = src.len(),
639+
));
640+
}
600641
let src_uv = &src[y_size..];
601642
if uv_stride == chroma_w {
602643
let n = uv_size.min(uv_plane.len()).min(src_uv.len());
@@ -656,7 +697,7 @@ pub(super) fn write_nv12_to_mapping(
656697
},
657698
_ => {
658699
return Err(format!(
659-
"VA-API AV1 encoder requires NV12 or I420 input, got {:?}",
700+
"VA-API encoder requires NV12 or I420 input, got {:?}",
660701
frame.pixel_format
661702
));
662703
},
@@ -1284,9 +1325,12 @@ impl StandardVideoEncoder for VaapiAv1Encoder {
12841325
loop {
12851326
match self.encoder.poll() {
12861327
Ok(Some(coded)) => {
1328+
let is_key = av1_bitstream_is_keyframe(&coded.bitstream);
1329+
let out_meta =
1330+
build_av1_encoder_output_metadata(metadata.clone(), is_key, &coded);
12871331
packets.push(EncodedPacket {
12881332
data: Bytes::from(coded.bitstream),
1289-
metadata: metadata.clone(),
1333+
metadata: Some(out_meta),
12901334
});
12911335
},
12921336
Ok(None) => break,
@@ -1304,8 +1348,12 @@ impl StandardVideoEncoder for VaapiAv1Encoder {
13041348
loop {
13051349
match self.encoder.poll() {
13061350
Ok(Some(coded)) => {
1307-
packets
1308-
.push(EncodedPacket { data: Bytes::from(coded.bitstream), metadata: None });
1351+
let is_key = av1_bitstream_is_keyframe(&coded.bitstream);
1352+
let out_meta = build_av1_encoder_output_metadata(None, is_key, &coded);
1353+
packets.push(EncodedPacket {
1354+
data: Bytes::from(coded.bitstream),
1355+
metadata: Some(out_meta),
1356+
});
13091357
},
13101358
Ok(None) => break,
13111359
Err(e) => return Err(format!("VA-API AV1 encoder poll error: {e}")),
@@ -1320,6 +1368,88 @@ impl StandardVideoEncoder for VaapiAv1Encoder {
13201368
}
13211369
}
13221370

1371+
// ---------------------------------------------------------------------------
1372+
// Encoder metadata helpers
1373+
// ---------------------------------------------------------------------------
1374+
1375+
use cros_codecs::encoder::CodedBitstreamBuffer;
1376+
1377+
/// Returns `true` if the AV1 bitstream contains a Sequence Header OBU
1378+
/// (type 1), which the cros-codecs AV1 encoder emits only at keyframes.
1379+
///
1380+
/// The `CodedBitstreamBuffer::metadata.force_keyframe` only reflects the
1381+
/// *input* request — not the actual coded frame type. Scanning for a
1382+
/// Sequence Header OBU gives ground truth because the cros-codecs AV1
1383+
/// predictor emits one exclusively for keyframes (both forced and periodic).
1384+
fn av1_bitstream_is_keyframe(bitstream: &[u8]) -> bool {
1385+
const OBU_SEQUENCE_HEADER: u8 = 1;
1386+
1387+
let mut pos = 0;
1388+
while pos < bitstream.len() {
1389+
let header = bitstream[pos];
1390+
let obu_type = (header >> 3) & 0x0F;
1391+
let has_extension = (header >> 2) & 1 != 0;
1392+
let has_size = (header >> 1) & 1 != 0;
1393+
pos += 1;
1394+
1395+
if obu_type == OBU_SEQUENCE_HEADER {
1396+
return true;
1397+
}
1398+
1399+
if has_extension {
1400+
if pos >= bitstream.len() {
1401+
break;
1402+
}
1403+
pos += 1;
1404+
}
1405+
1406+
if has_size {
1407+
// Parse LEB128 size field.
1408+
let mut size: usize = 0;
1409+
for shift in 0..8 {
1410+
if pos >= bitstream.len() {
1411+
return false;
1412+
}
1413+
let byte = bitstream[pos];
1414+
pos += 1;
1415+
size |= usize::from(byte & 0x7F) << (shift * 7);
1416+
if byte & 0x80 == 0 {
1417+
break;
1418+
}
1419+
}
1420+
pos += size;
1421+
} else {
1422+
// Without a size field the OBU extends to the end.
1423+
break;
1424+
}
1425+
}
1426+
false
1427+
}
1428+
1429+
/// Build output `PacketMetadata` from the coded bitstream rather than
1430+
/// blindly cloning the input metadata. The keyframe flag is determined
1431+
/// by OBU-header inspection and the timestamp comes from the coded
1432+
/// buffer's `FrameMetadata`.
1433+
fn build_av1_encoder_output_metadata(
1434+
input_meta: Option<PacketMetadata>,
1435+
is_keyframe: bool,
1436+
coded: &CodedBitstreamBuffer,
1437+
) -> PacketMetadata {
1438+
match input_meta {
1439+
Some(mut m) => {
1440+
m.keyframe = Some(is_keyframe);
1441+
m.timestamp_us = Some(coded.metadata.timestamp);
1442+
m
1443+
},
1444+
None => PacketMetadata {
1445+
timestamp_us: Some(coded.metadata.timestamp),
1446+
duration_us: None,
1447+
sequence: None,
1448+
keyframe: Some(is_keyframe),
1449+
},
1450+
}
1451+
}
1452+
13231453
// ---------------------------------------------------------------------------
13241454
// Registration
13251455
// ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)