|
15 | 15 | package lksdk |
16 | 16 |
|
17 | 17 | import ( |
| 18 | + "bytes" |
18 | 19 | "context" |
19 | 20 | "encoding/binary" |
20 | 21 | "fmt" |
@@ -42,6 +43,8 @@ const ( |
42 | 43 | defaultG711SamplesPerFrame = 160 |
43 | 44 | ) |
44 | 45 |
|
| 46 | +var annexBStartCode = [...]byte{0, 0, 0, 1} |
| 47 | + |
45 | 48 | // --------------------------------- |
46 | 49 |
|
47 | 50 | type H26xStreamingFormat int |
@@ -94,6 +97,8 @@ type ReaderSampleProvider struct { |
94 | 97 |
|
95 | 98 | // for h265 |
96 | 99 | h265reader *h265reader.H265Reader |
| 100 | + // pending H265 NAL when we detect a new access unit |
| 101 | + h265PendingNAL *h265reader.NAL |
97 | 102 |
|
98 | 103 | // for ogg |
99 | 104 | oggReader *oggreader.OggReader |
@@ -385,77 +390,108 @@ func (p *ReaderSampleProvider) NextSample(ctx context.Context) (media.Sample, er |
385 | 390 |
|
386 | 391 | case webrtc.MimeTypeH265: |
387 | 392 | var ( |
388 | | - isFrame bool |
389 | | - needPrefix bool |
| 393 | + // haveVCL tracks whether we've started a picture (any VCL NAL seen). |
| 394 | + haveVCL bool |
| 395 | + builder h265AccessUnitBuilder |
390 | 396 | ) |
391 | 397 |
|
392 | 398 | for { |
393 | | - nal, err := p.h265reader.NextNAL() |
| 399 | + nal, err := p.nextH265NAL() |
394 | 400 | if err != nil { |
| 401 | + if err == io.EOF && haveVCL && len(sample.Data) > 0 { |
| 402 | + // Flush the last access unit at EOF. |
| 403 | + break |
| 404 | + } |
395 | 405 | return sample, err |
396 | 406 | } |
397 | 407 |
|
| 408 | + if haveVCL { |
| 409 | + // Once we've started a picture, detect the next access-unit boundary. |
| 410 | + if nal.NalUnitType < 32 { |
| 411 | + // VCL: split when first_slice_segment_in_pic_flag starts a new picture. |
| 412 | + if isFirstSlice, ok := h265FirstSliceInPic(nal.Data); !ok || isFirstSlice { |
| 413 | + // If we can't parse the flag, err on the side of splitting. |
| 414 | + p.h265PendingNAL = nal |
| 415 | + break |
| 416 | + } |
| 417 | + } else { |
| 418 | + // Non-VCL after VCL belongs to the next access unit. |
| 419 | + switch nal.NalUnitType { |
| 420 | + case 40: // suffix SEI, ignore |
| 421 | + continue |
| 422 | + default: |
| 423 | + // Prefix SEI / VPS / SPS / PPS and other non-VCL NALs begin the next access unit. |
| 424 | + p.h265PendingNAL = nal |
| 425 | + } |
| 426 | + break |
| 427 | + } |
| 428 | + } |
| 429 | + |
398 | 430 | if nal.NalUnitType == 39 { // prefix SEI |
399 | 431 | if p.appendUserTimestamp { |
400 | 432 | if ts, ok := parseH265SEIUserTimestamp(nal.Data); ok { |
401 | 433 | p.pendingUserTimestampUs = ts |
402 | 434 | p.hasPendingUserTimestamp = true |
403 | 435 | } |
404 | 436 | } |
405 | | - // If SEI, clear the data and do not return a frame (try next NAL) |
406 | | - sample.Data = nil |
407 | | - sample.Duration = 0 |
408 | | - return sample, nil |
| 437 | + // If SEI and no frame yet, skip it unless we're only holding param sets. |
| 438 | + if !haveVCL && len(sample.Data) == 0 { |
| 439 | + sample.Data = nil |
| 440 | + sample.Duration = 0 |
| 441 | + return sample, nil |
| 442 | + } |
| 443 | + continue |
409 | 444 | } |
410 | 445 |
|
411 | 446 | if nal.NalUnitType == 40 { // suffix SEI |
412 | | - // Ignore suffix SEI entirely (do not parse or append). |
413 | | - sample.Data = nil |
414 | | - sample.Duration = 0 |
415 | | - return sample, nil |
| 447 | + // Ignore suffix SEI entirely. |
| 448 | + if !haveVCL && len(sample.Data) == 0 { |
| 449 | + sample.Data = nil |
| 450 | + sample.Duration = 0 |
| 451 | + return sample, nil |
| 452 | + } |
| 453 | + continue |
416 | 454 | } |
417 | 455 |
|
418 | | - // aggregate vps,sps,pps into a single AP packet (chrome requires this) |
| 456 | + // Aggregate VPS/SPS/PPS before the next access unit. |
| 457 | + // 32: VPS, 33: SPS, 34: PPS |
419 | 458 | if nal.NalUnitType == 32 || nal.NalUnitType == 33 || nal.NalUnitType == 34 { |
420 | | - sample.Data = append(sample.Data, []byte{0, 0, 0, 1}...) // add NAL prefix |
421 | | - sample.Data = append(sample.Data, nal.Data...) |
422 | | - needPrefix = true |
| 459 | + builder.AppendAnnexB(nal.Data) |
| 460 | + sample.Data = builder.Bytes() |
423 | 461 | continue |
424 | 462 | } |
425 | 463 |
|
426 | | - if needPrefix { |
427 | | - sample.Data = append(sample.Data, []byte{0, 0, 0, 1}...) // add NAL prefix |
428 | | - sample.Data = append(sample.Data, nal.Data...) |
429 | | - } else { |
430 | | - sample.Data = nal.Data |
431 | | - } |
432 | | - |
433 | | - if !isFrame { |
434 | | - isFrame = nal.NalUnitType < 32 |
435 | | - } |
| 464 | + // Append this NAL to the current access unit payload. |
| 465 | + builder.Append(nal.Data) |
| 466 | + sample.Data = builder.Bytes() |
436 | 467 |
|
437 | | - if !isFrame { |
| 468 | + if nal.NalUnitType < 32 { |
| 469 | + haveVCL = true |
| 470 | + } else if !haveVCL { |
438 | 471 | // return it without duration |
439 | 472 | return sample, nil |
440 | 473 | } |
| 474 | + } |
441 | 475 |
|
442 | | - // Attach the LKTS trailer to the encoded frame payload when enabled. |
443 | | - // If we didn't see a preceding timestamp, we still append a trailer with |
444 | | - // a zero timestamp. |
445 | | - if p.appendUserTimestamp { |
446 | | - ts := int64(0) |
447 | | - if p.hasPendingUserTimestamp { |
448 | | - ts = p.pendingUserTimestampUs |
449 | | - p.hasPendingUserTimestamp = false |
450 | | - p.pendingUserTimestampUs = 0 |
451 | | - } |
452 | | - sample.Data = appendUserTimestampTrailer(sample.Data, ts) |
453 | | - } |
| 476 | + if !haveVCL { |
| 477 | + return sample, nil |
| 478 | + } |
454 | 479 |
|
455 | | - sample.Duration = defaultH265FrameDuration |
456 | | - break |
| 480 | + // Attach the LKTS trailer to the encoded frame payload when enabled. |
| 481 | + // If we didn't see a preceding timestamp, we still append a trailer with |
| 482 | + // a zero timestamp. |
| 483 | + if p.appendUserTimestamp { |
| 484 | + ts := int64(0) |
| 485 | + if p.hasPendingUserTimestamp { |
| 486 | + ts = p.pendingUserTimestampUs |
| 487 | + p.hasPendingUserTimestamp = false |
| 488 | + p.pendingUserTimestampUs = 0 |
| 489 | + } |
| 490 | + sample.Data = appendUserTimestampTrailer(sample.Data, ts) |
457 | 491 | } |
458 | 492 |
|
| 493 | + sample.Duration = defaultH265FrameDuration |
| 494 | + |
459 | 495 | case webrtc.MimeTypeVP8, webrtc.MimeTypeVP9, webrtc.MimeTypeAV1: |
460 | 496 | frame, header, err := p.ivfReader.ParseNextFrame() |
461 | 497 | if err != nil { |
@@ -691,3 +727,93 @@ func detectWavFormat(r io.Reader) (*wavReader, string, error) { |
691 | 727 |
|
692 | 728 | return wavReader, mime, nil |
693 | 729 | } |
| 730 | + |
| 731 | +func (p *ReaderSampleProvider) nextH265NAL() (*h265reader.NAL, error) { |
| 732 | + if p.h265PendingNAL != nil { |
| 733 | + // Consume the buffered NAL that starts the next access unit. |
| 734 | + nal := p.h265PendingNAL |
| 735 | + p.h265PendingNAL = nil |
| 736 | + return nal, nil |
| 737 | + } |
| 738 | + return p.h265reader.NextNAL() |
| 739 | +} |
| 740 | + |
| 741 | +func h265FirstSliceInPic(nalData []byte) (bool, bool) { |
| 742 | + // Parse first_slice_segment_in_pic_flag from the byte after the 2-byte header. |
| 743 | + if len(nalData) < 3 { |
| 744 | + return true, false |
| 745 | + } |
| 746 | + return (nalData[2] & 0x80) != 0, true |
| 747 | +} |
| 748 | + |
| 749 | +// h265AccessUnitBuilder keeps a lone NAL as a raw slice and only allocates an |
| 750 | +// owned Annex-B buffer once we need to join multiple NAL units together. |
| 751 | +type h265AccessUnitBuilder struct { |
| 752 | + rawFirstNAL []byte |
| 753 | + data bytes.Buffer |
| 754 | +} |
| 755 | + |
| 756 | +func (b *h265AccessUnitBuilder) Append(nalData []byte) { |
| 757 | + // Preserve the single-NAL fast path without copying until another NAL forces |
| 758 | + // us to materialize Annex-B framing. |
| 759 | + if b.rawFirstNAL == nil && b.data.Len() == 0 { |
| 760 | + b.rawFirstNAL = nalData |
| 761 | + return |
| 762 | + } |
| 763 | + |
| 764 | + b.materializeAnnexB(len(nalData) + len(annexBStartCode)) |
| 765 | + _, _ = b.data.Write(annexBStartCode[:]) |
| 766 | + _, _ = b.data.Write(nalData) |
| 767 | +} |
| 768 | + |
| 769 | +func (b *h265AccessUnitBuilder) AppendAnnexB(nalData []byte) { |
| 770 | + b.materializeAnnexB(len(nalData) + len(annexBStartCode)) |
| 771 | + _, _ = b.data.Write(annexBStartCode[:]) |
| 772 | + _, _ = b.data.Write(nalData) |
| 773 | +} |
| 774 | + |
| 775 | +func (b *h265AccessUnitBuilder) Bytes() []byte { |
| 776 | + if b.rawFirstNAL != nil { |
| 777 | + return b.rawFirstNAL |
| 778 | + } |
| 779 | + return b.data.Bytes() |
| 780 | +} |
| 781 | + |
| 782 | +func (b *h265AccessUnitBuilder) Len() int { |
| 783 | + if b.rawFirstNAL != nil { |
| 784 | + return len(b.rawFirstNAL) |
| 785 | + } |
| 786 | + return b.data.Len() |
| 787 | +} |
| 788 | + |
| 789 | +func (b *h265AccessUnitBuilder) materializeAnnexB(extra int) { |
| 790 | + if b.rawFirstNAL == nil && b.data.Len() != 0 { |
| 791 | + if extra > 0 { |
| 792 | + b.grow(extra) |
| 793 | + } |
| 794 | + return |
| 795 | + } |
| 796 | + |
| 797 | + needed := extra |
| 798 | + if b.rawFirstNAL != nil { |
| 799 | + // When the second NAL arrives, convert the saved raw NAL into |
| 800 | + // Annex-B form exactly once before appending new data. |
| 801 | + needed += len(annexBStartCode) + len(b.rawFirstNAL) |
| 802 | + } |
| 803 | + |
| 804 | + var data bytes.Buffer |
| 805 | + data.Grow(needed) |
| 806 | + if b.rawFirstNAL != nil { |
| 807 | + _, _ = data.Write(annexBStartCode[:]) |
| 808 | + _, _ = data.Write(b.rawFirstNAL) |
| 809 | + b.rawFirstNAL = nil |
| 810 | + } |
| 811 | + b.data = data |
| 812 | +} |
| 813 | + |
| 814 | +func (b *h265AccessUnitBuilder) grow(extra int) { |
| 815 | + if b.data.Cap()-b.data.Len() >= extra { |
| 816 | + return |
| 817 | + } |
| 818 | + b.data.Grow(extra) |
| 819 | +} |
0 commit comments