Skip to content

Commit d348d7c

Browse files
authored
Handle H265 encoders that produce multi-slice IDR (#844)
1 parent e5311e9 commit d348d7c

File tree

2 files changed

+618
-40
lines changed

2 files changed

+618
-40
lines changed

readersampleprovider.go

Lines changed: 166 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
package lksdk
1616

1717
import (
18+
"bytes"
1819
"context"
1920
"encoding/binary"
2021
"fmt"
@@ -42,6 +43,8 @@ const (
4243
defaultG711SamplesPerFrame = 160
4344
)
4445

46+
var annexBStartCode = [...]byte{0, 0, 0, 1}
47+
4548
// ---------------------------------
4649

4750
type H26xStreamingFormat int
@@ -94,6 +97,8 @@ type ReaderSampleProvider struct {
9497

9598
// for h265
9699
h265reader *h265reader.H265Reader
100+
// pending H265 NAL when we detect a new access unit
101+
h265PendingNAL *h265reader.NAL
97102

98103
// for ogg
99104
oggReader *oggreader.OggReader
@@ -385,77 +390,108 @@ func (p *ReaderSampleProvider) NextSample(ctx context.Context) (media.Sample, er
385390

386391
case webrtc.MimeTypeH265:
387392
var (
388-
isFrame bool
389-
needPrefix bool
393+
// haveVCL tracks whether we've started a picture (any VCL NAL seen).
394+
haveVCL bool
395+
builder h265AccessUnitBuilder
390396
)
391397

392398
for {
393-
nal, err := p.h265reader.NextNAL()
399+
nal, err := p.nextH265NAL()
394400
if err != nil {
401+
if err == io.EOF && haveVCL && len(sample.Data) > 0 {
402+
// Flush the last access unit at EOF.
403+
break
404+
}
395405
return sample, err
396406
}
397407

408+
if haveVCL {
409+
// Once we've started a picture, detect the next access-unit boundary.
410+
if nal.NalUnitType < 32 {
411+
// VCL: split when first_slice_segment_in_pic_flag starts a new picture.
412+
if isFirstSlice, ok := h265FirstSliceInPic(nal.Data); !ok || isFirstSlice {
413+
// If we can't parse the flag, err on the side of splitting.
414+
p.h265PendingNAL = nal
415+
break
416+
}
417+
} else {
418+
// Non-VCL after VCL belongs to the next access unit.
419+
switch nal.NalUnitType {
420+
case 40: // suffix SEI, ignore
421+
continue
422+
default:
423+
// Prefix SEI / VPS / SPS / PPS and other non-VCL NALs begin the next access unit.
424+
p.h265PendingNAL = nal
425+
}
426+
break
427+
}
428+
}
429+
398430
if nal.NalUnitType == 39 { // prefix SEI
399431
if p.appendUserTimestamp {
400432
if ts, ok := parseH265SEIUserTimestamp(nal.Data); ok {
401433
p.pendingUserTimestampUs = ts
402434
p.hasPendingUserTimestamp = true
403435
}
404436
}
405-
// If SEI, clear the data and do not return a frame (try next NAL)
406-
sample.Data = nil
407-
sample.Duration = 0
408-
return sample, nil
437+
// If SEI and no frame yet, skip it unless we're only holding param sets.
438+
if !haveVCL && len(sample.Data) == 0 {
439+
sample.Data = nil
440+
sample.Duration = 0
441+
return sample, nil
442+
}
443+
continue
409444
}
410445

411446
if nal.NalUnitType == 40 { // suffix SEI
412-
// Ignore suffix SEI entirely (do not parse or append).
413-
sample.Data = nil
414-
sample.Duration = 0
415-
return sample, nil
447+
// Ignore suffix SEI entirely.
448+
if !haveVCL && len(sample.Data) == 0 {
449+
sample.Data = nil
450+
sample.Duration = 0
451+
return sample, nil
452+
}
453+
continue
416454
}
417455

418-
// aggregate vps,sps,pps into a single AP packet (chrome requires this)
456+
// Aggregate VPS/SPS/PPS before the next access unit.
457+
// 32: VPS, 33: SPS, 34: PPS
419458
if nal.NalUnitType == 32 || nal.NalUnitType == 33 || nal.NalUnitType == 34 {
420-
sample.Data = append(sample.Data, []byte{0, 0, 0, 1}...) // add NAL prefix
421-
sample.Data = append(sample.Data, nal.Data...)
422-
needPrefix = true
459+
builder.AppendAnnexB(nal.Data)
460+
sample.Data = builder.Bytes()
423461
continue
424462
}
425463

426-
if needPrefix {
427-
sample.Data = append(sample.Data, []byte{0, 0, 0, 1}...) // add NAL prefix
428-
sample.Data = append(sample.Data, nal.Data...)
429-
} else {
430-
sample.Data = nal.Data
431-
}
432-
433-
if !isFrame {
434-
isFrame = nal.NalUnitType < 32
435-
}
464+
// Append this NAL to the current access unit payload.
465+
builder.Append(nal.Data)
466+
sample.Data = builder.Bytes()
436467

437-
if !isFrame {
468+
if nal.NalUnitType < 32 {
469+
haveVCL = true
470+
} else if !haveVCL {
438471
// return it without duration
439472
return sample, nil
440473
}
474+
}
441475

442-
// Attach the LKTS trailer to the encoded frame payload when enabled.
443-
// If we didn't see a preceding timestamp, we still append a trailer with
444-
// a zero timestamp.
445-
if p.appendUserTimestamp {
446-
ts := int64(0)
447-
if p.hasPendingUserTimestamp {
448-
ts = p.pendingUserTimestampUs
449-
p.hasPendingUserTimestamp = false
450-
p.pendingUserTimestampUs = 0
451-
}
452-
sample.Data = appendUserTimestampTrailer(sample.Data, ts)
453-
}
476+
if !haveVCL {
477+
return sample, nil
478+
}
454479

455-
sample.Duration = defaultH265FrameDuration
456-
break
480+
// Attach the LKTS trailer to the encoded frame payload when enabled.
481+
// If we didn't see a preceding timestamp, we still append a trailer with
482+
// a zero timestamp.
483+
if p.appendUserTimestamp {
484+
ts := int64(0)
485+
if p.hasPendingUserTimestamp {
486+
ts = p.pendingUserTimestampUs
487+
p.hasPendingUserTimestamp = false
488+
p.pendingUserTimestampUs = 0
489+
}
490+
sample.Data = appendUserTimestampTrailer(sample.Data, ts)
457491
}
458492

493+
sample.Duration = defaultH265FrameDuration
494+
459495
case webrtc.MimeTypeVP8, webrtc.MimeTypeVP9, webrtc.MimeTypeAV1:
460496
frame, header, err := p.ivfReader.ParseNextFrame()
461497
if err != nil {
@@ -691,3 +727,93 @@ func detectWavFormat(r io.Reader) (*wavReader, string, error) {
691727

692728
return wavReader, mime, nil
693729
}
730+
731+
func (p *ReaderSampleProvider) nextH265NAL() (*h265reader.NAL, error) {
732+
if p.h265PendingNAL != nil {
733+
// Consume the buffered NAL that starts the next access unit.
734+
nal := p.h265PendingNAL
735+
p.h265PendingNAL = nil
736+
return nal, nil
737+
}
738+
return p.h265reader.NextNAL()
739+
}
740+
741+
func h265FirstSliceInPic(nalData []byte) (bool, bool) {
742+
// Parse first_slice_segment_in_pic_flag from the byte after the 2-byte header.
743+
if len(nalData) < 3 {
744+
return true, false
745+
}
746+
return (nalData[2] & 0x80) != 0, true
747+
}
748+
749+
// h265AccessUnitBuilder keeps a lone NAL as a raw slice and only allocates an
750+
// owned Annex-B buffer once we need to join multiple NAL units together.
751+
type h265AccessUnitBuilder struct {
752+
rawFirstNAL []byte
753+
data bytes.Buffer
754+
}
755+
756+
func (b *h265AccessUnitBuilder) Append(nalData []byte) {
757+
// Preserve the single-NAL fast path without copying until another NAL forces
758+
// us to materialize Annex-B framing.
759+
if b.rawFirstNAL == nil && b.data.Len() == 0 {
760+
b.rawFirstNAL = nalData
761+
return
762+
}
763+
764+
b.materializeAnnexB(len(nalData) + len(annexBStartCode))
765+
_, _ = b.data.Write(annexBStartCode[:])
766+
_, _ = b.data.Write(nalData)
767+
}
768+
769+
func (b *h265AccessUnitBuilder) AppendAnnexB(nalData []byte) {
770+
b.materializeAnnexB(len(nalData) + len(annexBStartCode))
771+
_, _ = b.data.Write(annexBStartCode[:])
772+
_, _ = b.data.Write(nalData)
773+
}
774+
775+
func (b *h265AccessUnitBuilder) Bytes() []byte {
776+
if b.rawFirstNAL != nil {
777+
return b.rawFirstNAL
778+
}
779+
return b.data.Bytes()
780+
}
781+
782+
func (b *h265AccessUnitBuilder) Len() int {
783+
if b.rawFirstNAL != nil {
784+
return len(b.rawFirstNAL)
785+
}
786+
return b.data.Len()
787+
}
788+
789+
func (b *h265AccessUnitBuilder) materializeAnnexB(extra int) {
790+
if b.rawFirstNAL == nil && b.data.Len() != 0 {
791+
if extra > 0 {
792+
b.grow(extra)
793+
}
794+
return
795+
}
796+
797+
needed := extra
798+
if b.rawFirstNAL != nil {
799+
// When the second NAL arrives, convert the saved raw NAL into
800+
// Annex-B form exactly once before appending new data.
801+
needed += len(annexBStartCode) + len(b.rawFirstNAL)
802+
}
803+
804+
var data bytes.Buffer
805+
data.Grow(needed)
806+
if b.rawFirstNAL != nil {
807+
_, _ = data.Write(annexBStartCode[:])
808+
_, _ = data.Write(b.rawFirstNAL)
809+
b.rawFirstNAL = nil
810+
}
811+
b.data = data
812+
}
813+
814+
func (b *h265AccessUnitBuilder) grow(extra int) {
815+
if b.data.Cap()-b.data.Len() >= extra {
816+
return
817+
}
818+
b.data.Grow(extra)
819+
}

0 commit comments

Comments
 (0)