diff --git a/src/demux/flv-demuxer.js b/src/demux/flv-demuxer.js index f3681e7..9342c16 100644 --- a/src/demux/flv-demuxer.js +++ b/src/demux/flv-demuxer.js @@ -26,6 +26,7 @@ import H265Parser from './h265-parser.js'; import buffersAreEqual from '../utils/typedarray-equality.ts'; import AV1OBUParser from './av1-parser.ts'; import ExpGolomb from './exp-golomb.js'; +import {FrameType, SliceHeaderParser} from './slice-header-parser.js'; function Swap16(src) { return (((src >>> 8) & 0xFF) | @@ -46,12 +47,20 @@ function ReadBig32(array, index) { (array[index + 3])); } +function isMacOS() { + if (navigator.userAgentData?.platform === "macOS") return true; + + return false; +} class FLVDemuxer { constructor(probeData, config) { this.TAG = 'FLVDemuxer'; + this._isMacOS = isMacOS(); + Log.d(this.TAG, 'isMacOS: ' + this._isMacOS); + this._config = config; this._onError = null; @@ -81,6 +90,16 @@ class FLVDemuxer { this._audioMetadata = null; this._videoMetadata = null; + this._h264SpsInfo = null; + this._h264MaxFrameNum = -1; + this._h264HasBFrame = false; + this._h264DroppingFrame = false; + this._h264LastVideoFrame = -1; + this._h264LastVideoFrameDts = -1; + this._h264LastVideoFramePts = -1; + this._h264LastIFrameDts = -1; + this._h264MinGopDuration = -1; + this._naluLengthSize = 4; this._timestampBase = 0; // int32, in milliseconds this._timescale = 1000; @@ -363,6 +382,15 @@ class FLVDemuxer { offset += 11 + dataSize + 4; // tagBody + dataSize + prevTagSize } + if (this._hasAudio && this._hasVideo && !this._audioInitialMetadataDispatched) { + // both audio & video, but audio initial meta data still not dispatched + let samples = this._videoTrack.samples; + if (samples.length > 0 && samples[samples.length - 1].dts > samples[0].dts + 3000) { + Log.d(this.TAG, 'we need regard it as video only, last sample: ' + samples[samples.length - 1].dts + ', first sample: ' + samples[0].dts); + this._hasAudio = false; + } + } + // dispatch parsed frames to consumer (typically, the remuxer) if (this._isInitialMetadataDispatched()) { if (this._dispatch && (this._audioTrack.length || this._videoTrack.length)) { @@ -1321,6 +1349,11 @@ class FLVDemuxer { continue; } + if (Object.keys(config).length === 0) continue; + + this._h264SpsInfo = config; + this._h264MaxFrameNum = Math.pow(2, config.log2_max_frame_num_minus4 + 4); + meta.codecWidth = config.codec_size.width; meta.codecHeight = config.codec_size.height; meta.presentWidth = config.present_size.width; @@ -1686,11 +1719,130 @@ class FLVDemuxer { cts: cts, pts: (dts + cts) }; + if (keyframe) { avcSample.fileposition = tagPosition; + if (this._h264LastIFrameDts !== -1) { + let gopDuration = dts - this._h264LastIFrameDts; + if (gopDuration > 0 && gopDuration < 50000) { + // valid GOP duration 0~50s + if (this._h264MinGopDuration === -1 || gopDuration < this._h264MinGopDuration) { + this._h264MinGopDuration = gopDuration; + Log.v(this.TAG, 'GOP minimum duration: ' + this._h264MinGopDuration); + } + } + } + + this._h264LastIFrameDts = dts; + } + + let dropThisFrame = false; + let slice = units[0].data.slice(lengthSize, units[0].data.length - lengthSize); + let result = SliceHeaderParser.parseSliceHeader(slice, this._h264SpsInfo); + if (result.success === true) { + Log.d(this.TAG, 'video sample, dts: ' + dts + ', size: ' + length + ', frame_type: ' + result.data.frame_type + + ', frame_num: ' + result.data.frame_num); + + if (result.data.frame_type === FrameType.FrameType_B) { + this._h264HasBFrame = true; + } + + // drop frame only for no B Frame case + if (this._h264HasBFrame === false) { + if (result.data.frame_type === FrameType.FrameType_I) { + // I frame + if (this._h264DroppingFrame) { + // after dropping frames, we need modify the timestamp for I frame + // since if non I frame has big duration, MSE will hang + let meta = this._videoMetadata; + if (meta) { + Log.w(this.TAG, 'dropping meet I frame, need stop dropping, last video frame dts: ' + + this._h264LastVideoFrameDts + ', I frame dts: ' + dts + ', ref duration: ' + meta.refSampleDuration); + avcSample.dts = this._h264LastVideoFrameDts + meta.refSampleDuration; + avcSample.pts = this._h264LastVideoFramePts + meta.refSampleDuration; + } else { + Log.w(this.TAG, 'dropping meet I frame, need stop dropping, last video frame dts: ' + + this._h264LastVideoFrameDts + ', I frame dts: ' + dts + ', ref duration: nil'); + avcSample.dts = this._h264LastVideoFrameDts + 40; + avcSample.pts = this._h264LastVideoFramePts + 40; + } + } + this._h264DroppingFrame = false; + this._h264LastVideoFrame = result.data.frame_num; + this._h264LastVideoFrameDts = dts; + this._h264LastVideoFramePts = (dts + cts); + } else { + // not I frame + if (this._h264DroppingFrame) { + dropThisFrame = true; + } else { + // if not dropping frame, we need judge if need start dropping + // 1, first frame; + // 2, normal case; + // 3, normal frame_num overflow case; + if ((this._h264LastVideoFrame === -1) || + (this._h264LastVideoFrame + 1 === result.data.frame_num) || + ((this._h264LastVideoFrame + 1) === this._h264MaxFrameNum && result.data.frame_num === 0)) { + this._h264LastVideoFrame = result.data.frame_num; + this._h264LastVideoFrameDts = dts; + this._h264LastVideoFramePts = (dts + cts); + } else if (this._h264LastVideoFrame === result.data.frame_num) { + // if same frame_num, we need judge timestamp + if (this._h264MinGopDuration !== -1 && dts > this._h264LastVideoFrameDts + this._h264MinGopDuration / 2) { + // maybe cross GOP + Log.w(this.TAG, 'frame_num not continuous(cross GOP): ' + this._h264LastVideoFrame + '(' + this._h264LastVideoFrameDts + '), ' + + result.data.frame_num + '(' + dts + '), need dropping...'); + this._h264DroppingFrame = true; + dropThisFrame = true; + } else { + // in same GOP, it is normal case since non-reference frame will use the same frame_num as previous reference frame + this._h264LastVideoFrame = result.data.frame_num; + this._h264LastVideoFrameDts = dts; + this._h264LastVideoFramePts = (dts + cts); + } + } else { + if (this._isMacOS) { + Log.w(this.TAG, 'frame_num not continuous: ' + this._h264LastVideoFrame + '(' + this._h264LastVideoFrameDts + '), ' + + result.data.frame_num + '(' + dts + '), need dropping...'); + this._h264DroppingFrame = true; + dropThisFrame = true; + } else { + if (this._h264LastIFrameDts !== -1 && this._h264MinGopDuration !== -1 && + dts >= this._h264LastIFrameDts + this._h264MinGopDuration) { + // only I frame dropped, we need drop frames to next I frame + // it seems MSE can handle P frames dropped case + Log.w(this.TAG, 'frame_num not continuous: ' + this._h264LastVideoFrame + '(' + this._h264LastVideoFrameDts + '), ' + + result.data.frame_num + '(' + dts + '), and I frame seems dropped, last I frame ' + this._h264LastIFrameDts + + ', gop duration: ' + this._h264MinGopDuration + ', need dropping...'); + this._h264DroppingFrame = true; + dropThisFrame = true; + } else { + Log.w(this.TAG, 'frame_num not continuous: ' + this._h264LastVideoFrame + '(' + this._h264LastVideoFrameDts + '), ' + + result.data.frame_num + '(' + dts + '), but I frame not dropped, last I frame ' + this._h264LastIFrameDts + + ', gop duration: ' + this._h264MinGopDuration + ', just warning.'); + this._h264LastVideoFrame = result.data.frame_num; + this._h264LastVideoFrameDts = dts; + this._h264LastVideoFramePts = (dts + cts); + } + } + } + } + } + } else { + // B frame detected, disable drop frame mechanism + this._h264DroppingFrame = false; + this._h264LastVideoFrame = -1; + } + } else { + Log.w(this.TAG, 'parse slice fail, video sample, dts: ' + dts + ', size: ' + length + ', keyframe: ' + keyframe); + dropThisFrame = true; + } + + if (!dropThisFrame) { + // Log.d(this.TAG, 'video sample, dts: ' + dts + ', size: ' + length + ', keyframe: ' + keyframe); + track.samples.push(avcSample); + track.length += length; } - track.samples.push(avcSample); - track.length += length; } } diff --git a/src/demux/slice-header-parser.js b/src/demux/slice-header-parser.js new file mode 100644 index 0000000..0f35910 --- /dev/null +++ b/src/demux/slice-header-parser.js @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2016 Bilibili. All Rights Reserved. + * + * @author East Zhou + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Log from '../utils/logger.js'; +import ExpGolomb from './exp-golomb.js'; + +export const FrameType = { + FrameType_U: 0, // unknown + FrameType_I: 1, // I frame + FrameType_P: 2, // P frame + FrameType_B: 3 // B frame +}; + +export class SliceHeaderParser { + static parseSliceHeader(uint8array, sps_info) { + do { + if (!sps_info) { + Log.e('SliceHeaderParser', 'missing sps or pps!'); + break; + } + + try { + let gb = new ExpGolomb(uint8array); + + let nalu_header = gb.readByte(); + if (nalu_header & 0x80) // forbidden_zero_bit:在H.264规范中规定了这一位必须为0。 + break; + + if (!(nalu_header & 0x60)) // 取00~11,似乎指示这个NALU的重要性, 如00的NALU解码器可以丢弃它而不影响图像的回放。 + break; + + let unitType = nalu_header & 0x1F; + gb.readUEG(); // first_mb_in_slice + let slice_type = gb.readUEG(); // slice_type + let slice_type_i = (slice_type % 5 === 2); + let slice_type_p = (slice_type % 5 === 0); + let slice_type_b = (slice_type % 5 === 1); + let slice_type_si = (slice_type % 5 === 4); + let slice_type_sp = (slice_type % 5 === 3); + + let frame_type = FrameType.FrameType_I; + if (slice_type_p || slice_type_sp) { + frame_type = FrameType.FrameType_P; + } else if (slice_type_b) { + frame_type = FrameType.FrameType_B; + } + + gb.readUEG(); // pic_parameter_set_id + + if (sps_info.separate_colour_plane_flag) { + gb.readBits(16); // colour_plane_id + } + + let frame_num = gb.readBits(sps_info.log2_max_frame_num_minus4 + 4); + + return { + success: true, + data: { + frame_type: frame_type, + frame_num: frame_num + } + }; + } catch (error) { + Log.e('SliceHeaderParser', error.message); + break; + } + } while (0); + + return { + success: false, + data: { + } + }; + } +} diff --git a/src/demux/sps-parser.js b/src/demux/sps-parser.js index 0cb1d8b..0c08eac 100644 --- a/src/demux/sps-parser.js +++ b/src/demux/sps-parser.js @@ -16,6 +16,7 @@ * limitations under the License. */ +import Log from '../utils/logger.js'; import ExpGolomb from './exp-golomb.js'; class SPSParser { @@ -56,13 +57,14 @@ class SPSParser { gb.readByte(); let profile_idc = gb.readByte(); // profile_idc - gb.readByte(); // constraint_set_flags[5] + reserved_zero[3] + gb.readByte(); // constraint_set_flags[6] + reserved_zero[2] let level_idc = gb.readByte(); // level_idc gb.readUEG(); // seq_parameter_set_id let profile_string = SPSParser.getProfileString(profile_idc); let level_string = SPSParser.getLevelString(level_idc); let chroma_format_idc = 1; + let separate_colour_plane_flag = 0; let chroma_format = 420; let chroma_format_table = [0, 420, 422, 444]; let bit_depth_luma = 8; @@ -72,10 +74,15 @@ class SPSParser { profile_idc === 244 || profile_idc === 44 || profile_idc === 83 || profile_idc === 86 || profile_idc === 118 || profile_idc === 128 || profile_idc === 138 || profile_idc === 144) { - chroma_format_idc = gb.readUEG(); + if (chroma_format_idc > 3) { + Log.e('SPSParser', 'illegal chroma format idc: ' + chroma_format_idc); + return { + }; + } + if (chroma_format_idc === 3) { - gb.readBits(1); // separate_colour_plane_flag + separate_colour_plane_flag = gb.readBits(1); // separate_colour_plane_flag } if (chroma_format_idc <= 3) { chroma_format = chroma_format_table[chroma_format_idc]; @@ -97,12 +104,14 @@ class SPSParser { } } } - gb.readUEG(); // log2_max_frame_num_minus4 - let pic_order_cnt_type = gb.readUEG(); + let log2_max_frame_num_minus4 = gb.readUEG(); // log2_max_frame_num_minus4 + let pic_order_cnt_type = gb.readUEG(); // pic_order_cnt_type + let log2_max_pic_order_cnt_lsb_minus_4 = 0; + let delta_pic_order_always_zero_flag = 0; if (pic_order_cnt_type === 0) { - gb.readUEG(); // log2_max_pic_order_cnt_lsb_minus_4 + log2_max_pic_order_cnt_lsb_minus_4 = gb.readUEG(); // log2_max_pic_order_cnt_lsb_minus_4 } else if (pic_order_cnt_type === 1) { - gb.readBits(1); // delta_pic_order_always_zero_flag + delta_pic_order_always_zero_flag = gb.readBits(1); // delta_pic_order_always_zero_flag gb.readSEG(); // offset_for_non_ref_pic gb.readSEG(); // offset_for_top_to_bottom_field let num_ref_frames_in_pic_order_cnt_cycle = gb.readUEG(); @@ -212,9 +221,14 @@ class SPSParser { profile_string, // baseline, high, high10, ... level_string, // 3, 3.1, 4, 4.1, 5, 5.1, ... chroma_format_idc, + separate_colour_plane_flag, bit_depth: bit_depth_luma, // 8bit, 10bit, ... bit_depth_luma, bit_depth_chroma, + log2_max_frame_num_minus4, + pic_order_cnt_type, + log2_max_pic_order_cnt_lsb_minus_4, + delta_pic_order_always_zero_flag, ref_frames, chroma_format, // 4:2:0, 4:2:2, ... chroma_format_string: SPSParser.getChromaFormatString(chroma_format), @@ -293,6 +307,11 @@ class SPSParser { } } + static getChromaFormat(chroma_format_idc) { + const chroma_format_table = [0, 420, 422, 444]; + return chroma_format_table[chroma_format_idc]; + } + } export default SPSParser; \ No newline at end of file diff --git a/src/remux/mp4-generator.js b/src/remux/mp4-generator.js index fa608a7..dafe265 100644 --- a/src/remux/mp4-generator.js +++ b/src/remux/mp4-generator.js @@ -24,22 +24,17 @@ class MP4 { MP4.types = { avc1: [], avcC: [], btrt: [], dinf: [], dref: [], esds: [], ftyp: [], hdlr: [], - hvc1: [], hvcC: [], av01: [], av1C: [], mdat: [], mdhd: [], mdia: [], mfhd: [], minf: [], moof: [], moov: [], mp4a: [], mvex: [], mvhd: [], sdtp: [], stbl: [], stco: [], stsc: [], stsd: [], stsz: [], stts: [], tfdt: [], tfhd: [], traf: [], trak: [], trun: [], trex: [], tkhd: [], - vmhd: [], smhd: [], chnl: [], - '.mp3': [], - Opus: [], dOps: [], fLaC: [], dfLa: [], - ipcm: [], pcmC: [], - 'ac-3': [], dac3: [], 'ec-3': [], dec3: [], + vmhd: [], smhd: [], '.mp3': [] }; for (let name in MP4.types) { - if (MP4.types.hasOwnProperty(name)) { + if (Object.prototype.hasOwnProperty.call(MP4.types, name)) { MP4.types[name] = [ name.charCodeAt(0), name.charCodeAt(1), @@ -316,8 +311,8 @@ class MP4 { MP4.box(MP4.types.stsc, MP4.constants.STSC), // Sample-To-Chunk MP4.box(MP4.types.stsz, MP4.constants.STSZ), // Sample size MP4.box(MP4.types.stco, MP4.constants.STCO) // Chunk offset - ); - return result; + ); + return result; } // Sample description box @@ -325,23 +320,9 @@ class MP4 { if (meta.type === 'audio') { if (meta.codec === 'mp3') { return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.mp3(meta)); - } else if (meta.codec === 'ac-3') { - return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.ac3(meta)); - } else if (meta.codec === 'ec-3') { - return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.ec3(meta)); - } else if(meta.codec === 'opus') { - return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.Opus(meta)); - } else if (meta.codec == 'flac') { - return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.fLaC(meta)); - } else if (meta.codec == 'ipcm') { - return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.ipcm(meta)); } // else: aac -> mp4a return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.mp4a(meta)); - } else if (meta.type === 'video' && meta.codec.startsWith('hvc1')) { - return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.hvc1(meta)); - } else if (meta.type === 'video' && meta.codec.startsWith('av01')) { - return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.av01(meta)); } else { return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.avc1(meta)); } @@ -387,46 +368,6 @@ class MP4 { return MP4.box(MP4.types.mp4a, data, MP4.esds(meta)); } - static ac3(meta) { - let channelCount = meta.channelCount; - let sampleRate = meta.audioSampleRate; - - let data = new Uint8Array([ - 0x00, 0x00, 0x00, 0x00, // reserved(4) - 0x00, 0x00, 0x00, 0x01, // reserved(2) + data_reference_index(2) - 0x00, 0x00, 0x00, 0x00, // reserved: 2 * 4 bytes - 0x00, 0x00, 0x00, 0x00, - 0x00, channelCount, // channelCount(2) - 0x00, 0x10, // sampleSize(2) - 0x00, 0x00, 0x00, 0x00, // reserved(4) - (sampleRate >>> 8) & 0xFF, // Audio sample rate - (sampleRate) & 0xFF, - 0x00, 0x00 - ]); - - return MP4.box(MP4.types['ac-3'], data, MP4.box(MP4.types.dac3, new Uint8Array(meta.config))); - } - - static ec3(meta) { - let channelCount = meta.channelCount; - let sampleRate = meta.audioSampleRate; - - let data = new Uint8Array([ - 0x00, 0x00, 0x00, 0x00, // reserved(4) - 0x00, 0x00, 0x00, 0x01, // reserved(2) + data_reference_index(2) - 0x00, 0x00, 0x00, 0x00, // reserved: 2 * 4 bytes - 0x00, 0x00, 0x00, 0x00, - 0x00, channelCount, // channelCount(2) - 0x00, 0x10, // sampleSize(2) - 0x00, 0x00, 0x00, 0x00, // reserved(4) - (sampleRate >>> 8) & 0xFF, // Audio sample rate - (sampleRate) & 0xFF, - 0x00, 0x00 - ]); - - return MP4.box(MP4.types['ec-3'], data, MP4.box(MP4.types.dec3, new Uint8Array(meta.config))); - } - static esds(meta) { let config = meta.config || []; let configSize = config.length; @@ -457,176 +398,6 @@ class MP4 { return MP4.box(MP4.types.esds, data); } - static Opus(meta) { - let channelCount = meta.channelCount; - let sampleRate = meta.audioSampleRate; - - let data = new Uint8Array([ - 0x00, 0x00, 0x00, 0x00, // reserved(4) - 0x00, 0x00, 0x00, 0x01, // reserved(2) + data_reference_index(2) - 0x00, 0x00, 0x00, 0x00, // reserved: 2 * 4 bytes - 0x00, 0x00, 0x00, 0x00, - 0x00, channelCount, // channelCount(2) - 0x00, 0x10, // sampleSize(2) - 0x00, 0x00, 0x00, 0x00, // reserved(4) - (sampleRate >>> 8) & 0xFF, // Audio sample rate - (sampleRate) & 0xFF, - 0x00, 0x00 - ]); - - return MP4.box(MP4.types.Opus, data, MP4.dOps(meta)); - } - - static dOps(meta) { - let channelCount = meta.channelCount; - let channelConfigCode = meta.channelConfigCode; - let sampleRate = meta.audioSampleRate; - - if (meta.config) { - return MP4.box(MP4.types.dOps, meta.config); - } - - let mapping = []; - switch (channelConfigCode) { - case 0x01: - case 0x02: - mapping = [0x0]; - break; - case 0x00: // dualmono - mapping = [0xFF, 1, 1, 0, 1]; - break; - case 0x80: // dualmono - mapping = [0xFF, 2, 0, 0, 1]; - break; - case 0x03: - mapping = [0x01, 2, 1, 0, 2, 1]; - break; - case 0x04: - mapping = [0x01, 2, 2, 0, 1, 2, 3]; - break; - case 0x05: - mapping = [0x01, 3, 2, 0, 4, 1, 2, 3]; - break; - case 0x06: - mapping = [0x01, 4, 2, 0, 4, 1, 2, 3, 5]; - break; - case 0x07: - mapping = [0x01, 4, 2, 0, 4, 1, 2, 3, 5, 6]; - break; - case 0x08: - mapping = [0x01, 5, 3, 0, 6, 1, 2, 3, 4, 5, 7]; - break; - case 0x82: - mapping = [0x01, 1, 2, 0, 1]; - break; - case 0x83: - mapping = [0x01, 1, 3, 0, 1, 2]; - break; - case 0x84: - mapping = [0x01, 1, 4, 0, 1, 2, 3]; - break; - case 0x85: - mapping = [0x01, 1, 5, 0, 1, 2, 3, 4]; - break; - case 0x86: - mapping = [0x01, 1, 6, 0, 1, 2, 3, 4, 5]; - break; - case 0x87: - mapping = [0x01, 1, 7, 0, 1, 2, 3, 4, 5, 6]; - break; - case 0x88: - mapping = [0x01, 1, 8, 0, 1, 2, 3, 4, 5, 6, 7]; - break; - } - - let data = new Uint8Array([ - 0x00, // Version (1) - channelCount, // OutputChannelCount: 2 - 0x00, 0x00, // PreSkip: 2 - (sampleRate >>> 24) & 0xFF, // Audio sample rate: 4 - (sampleRate >>> 17) & 0xFF, - (sampleRate >>> 8) & 0xFF, - (sampleRate >>> 0) & 0xFF, - 0x00, 0x00, // Global Gain : 2 - ... mapping - ]); - return MP4.box(MP4.types.dOps, data); - } - - static fLaC(meta) { - let channelCount = meta.channelCount; - let sampleRate = Math.min(meta.audioSampleRate, 65535); - let sampleSize = meta.sampleSize; - - let data = new Uint8Array([ - 0x00, 0x00, 0x00, 0x00, // reserved(4) - 0x00, 0x00, 0x00, 0x01, // reserved(2) + data_reference_index(2) - 0x00, 0x00, 0x00, 0x00, // reserved: 2 * 4 bytes - 0x00, 0x00, 0x00, 0x00, - 0x00, channelCount, // channelCount(2) - 0x00, (sampleSize), // sampleSize(2) - 0x00, 0x00, 0x00, 0x00, // reserved(4) - (sampleRate >>> 8) & 0xFF, // Audio sample rate - (sampleRate) & 0xFF, - 0x00, 0x00 - ]); - - return MP4.box(MP4.types.fLaC, data, MP4.dfLa(meta)); - } - - static dfLa(meta) { - let data = new Uint8Array([ - 0x00, 0x00, 0x00, 0x00, // version, flag - ... meta.config - ]); - return MP4.box(MP4.types.dfLa, data); - } - - static ipcm(meta) { - let channelCount = meta.channelCount; - let sampleRate = Math.min(meta.audioSampleRate, 65535); - let sampleSize = meta.sampleSize; - - let data = new Uint8Array([ - 0x00, 0x00, 0x00, 0x00, // reserved(4) - 0x00, 0x00, 0x00, 0x01, // reserved(2) + data_reference_index(2) - 0x00, 0x00, 0x00, 0x00, // reserved: 2 * 4 bytes - 0x00, 0x00, 0x00, 0x00, - 0x00, channelCount, // channelCount(2) - 0x00, (sampleSize), // sampleSize(2) - 0x00, 0x00, 0x00, 0x00, // reserved(4) - (sampleRate >>> 8) & 0xFF, // Audio sample rate - (sampleRate) & 0xFF, - 0x00, 0x00 - ]); - - if (meta.channelCount === 1) { - return MP4.box(MP4.types.ipcm, data, MP4.pcmC(meta)); - } else { - return MP4.box(MP4.types.ipcm, data, MP4.chnl(meta), MP4.pcmC(meta)); - } - } - - static chnl(meta) { - let data = new Uint8Array([ - 0x00, 0x00, 0x00, 0x00, // version, flag - 0x01, // Channel Based Layout - meta.channelCount, // AudioConfiguration - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // omittedChannelsMap - ]); - return MP4.box(MP4.types.chnl, data); - } - - static pcmC(meta) { - let littleEndian = meta.littleEndian ? 0x01 : 0x00 - let sampleSize = meta.sampleSize; - let data = new Uint8Array([ - 0x00, 0x00, 0x00, 0x00, // version, flag - littleEndian, sampleSize - ]); - return MP4.box(MP4.types.pcmC, data); - } - static avc1(meta) { let avcc = meta.avcc; let width = meta.codecWidth, height = meta.codecHeight; @@ -661,74 +432,6 @@ class MP4 { return MP4.box(MP4.types.avc1, data, MP4.box(MP4.types.avcC, avcc)); } - static hvc1(meta) { - let hvcc = meta.hvcc; - let width = meta.codecWidth, height = meta.codecHeight; - - let data = new Uint8Array([ - 0x00, 0x00, 0x00, 0x00, // reserved(4) - 0x00, 0x00, 0x00, 0x01, // reserved(2) + data_reference_index(2) - 0x00, 0x00, 0x00, 0x00, // pre_defined(2) + reserved(2) - 0x00, 0x00, 0x00, 0x00, // pre_defined: 3 * 4 bytes - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - (width >>> 8) & 0xFF, // width: 2 bytes - (width) & 0xFF, - (height >>> 8) & 0xFF, // height: 2 bytes - (height) & 0xFF, - 0x00, 0x48, 0x00, 0x00, // horizresolution: 4 bytes - 0x00, 0x48, 0x00, 0x00, // vertresolution: 4 bytes - 0x00, 0x00, 0x00, 0x00, // reserved: 4 bytes - 0x00, 0x01, // frame_count - 0x0A, // strlen - 0x78, 0x71, 0x71, 0x2F, // compressorname: 32 bytes - 0x66, 0x6C, 0x76, 0x2E, - 0x6A, 0x73, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, - 0x00, 0x18, // depth - 0xFF, 0xFF // pre_defined = -1 - ]); - return MP4.box(MP4.types.hvc1, data, MP4.box(MP4.types.hvcC, hvcc)); - } - - static av01(meta) { - let av1c = meta.av1c; - let width = meta.codecWidth || 192, height = meta.codecHeight || 108; - - let data = new Uint8Array([ - 0x00, 0x00, 0x00, 0x00, // reserved(4) - 0x00, 0x00, 0x00, 0x01, // reserved(2) + data_reference_index(2) - 0x00, 0x00, 0x00, 0x00, // pre_defined(2) + reserved(2) - 0x00, 0x00, 0x00, 0x00, // pre_defined: 3 * 4 bytes - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - (width >>> 8) & 0xFF, // width: 2 bytes - (width) & 0xFF, - (height >>> 8) & 0xFF, // height: 2 bytes - (height) & 0xFF, - 0x00, 0x48, 0x00, 0x00, // horizresolution: 4 bytes - 0x00, 0x48, 0x00, 0x00, // vertresolution: 4 bytes - 0x00, 0x00, 0x00, 0x00, // reserved: 4 bytes - 0x00, 0x01, // frame_count - 0x0A, // strlen - 0x78, 0x71, 0x71, 0x2F, // compressorname: 32 bytes - 0x66, 0x6C, 0x76, 0x2E, - 0x6A, 0x73, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, - 0x00, 0x18, // depth - 0xFF, 0xFF // pre_defined = -1 - ]); - return MP4.box(MP4.types.av01, data, MP4.box(MP4.types.av1C, av1c)); - } - // Movie Extends box static mvex(meta) { return MP4.box(MP4.types.mvex, MP4.trex(meta)); @@ -795,7 +498,7 @@ class MP4 { // Sample Dependency Type box static sdtp(track) { - let samples = track.samples || []; + let samples = track.mp4Samples || []; let sampleCount = samples.length; let data = new Uint8Array(4 + sampleCount); // 0~4 bytes: version(0) & flags @@ -811,7 +514,7 @@ class MP4 { // Track fragment run box static trun(track, offset) { - let samples = track.samples || []; + let samples = track.mp4Samples || []; let sampleCount = samples.length; let dataSize = 12 + 16 * sampleCount; let data = new Uint8Array(dataSize); diff --git a/src/remux/mp4-remuxer.js b/src/remux/mp4-remuxer.js index 8803357..f061254 100644 --- a/src/remux/mp4-remuxer.js +++ b/src/remux/mp4-remuxer.js @@ -20,8 +20,8 @@ import Log from '../utils/logger.js'; import MP4 from './mp4-generator.js'; import AAC from './aac-silent.js'; import Browser from '../utils/browser.js'; -import { SampleInfo, MediaSegmentInfo, MediaSegmentInfoList } from '../core/media-segment-info.js'; -import { IllegalStateException } from '../utils/exception.js'; +import {SampleInfo, MediaSegmentInfo, MediaSegmentInfoList} from '../core/media-segment-info.js'; +import {IllegalStateException} from '../utils/exception.js'; // Fragmented mp4 remuxer @@ -38,9 +38,8 @@ class MP4Remuxer { this._audioDtsBase = Infinity; this._videoDtsBase = Infinity; this._audioNextDts = undefined; - this._videoNextDts = undefined; - this._audioStashedLastSample = null; - this._videoStashedLastSample = null; + this._audioLastDts = undefined; + this._videoLastDts = undefined; this._audioMeta = null; this._videoMeta = null; @@ -119,12 +118,11 @@ class MP4Remuxer { } insertDiscontinuity() { - this._audioNextDts = this._videoNextDts = undefined; + this._audioNextDts = undefined; + this._audioLastDts = this._videoLastDts = undefined; } seek(originalDts) { - this._audioStashedLastSample = null; - this._videoStashedLastSample = null; this._videoSegmentInfoList.clear(); this._audioSegmentInfoList.clear(); } @@ -136,12 +134,8 @@ class MP4Remuxer { if (!this._dtsBaseInited) { this._calculateDtsBase(audioTrack, videoTrack); } - if (videoTrack) { - this._remuxVideo(videoTrack); - } - if (audioTrack) { - this._remuxAudio(audioTrack); - } + this._remuxVideo(videoTrack); + this._remuxAudio(audioTrack); } _onTrackMetadataReceived(type, metadata) { @@ -186,70 +180,36 @@ class MP4Remuxer { return; } - if (audioTrack && audioTrack.samples && audioTrack.samples.length) { + if (audioTrack.samples && audioTrack.samples.length) { this._audioDtsBase = audioTrack.samples[0].dts; } - if (videoTrack && videoTrack.samples && videoTrack.samples.length) { + if (videoTrack.samples && videoTrack.samples.length) { this._videoDtsBase = videoTrack.samples[0].dts; } this._dtsBase = Math.min(this._audioDtsBase, this._videoDtsBase); - this._dtsBaseInited = true; - } - - getTimestampBase() { - if (!this._dtsBaseInited) { - return undefined; - } - return this._dtsBase; - } - - flushStashedSamples() { - let videoSample = this._videoStashedLastSample; - let audioSample = this._audioStashedLastSample; - - let videoTrack = { - type: 'video', - id: 1, - sequenceNumber: 0, - samples: [], - length: 0 - }; - - if (videoSample != null) { - videoTrack.samples.push(videoSample); - videoTrack.length = videoSample.length; - } - let audioTrack = { - type: 'audio', - id: 2, - sequenceNumber: 0, - samples: [], - length: 0 - }; + Log.v(this.TAG, 'base DTS initialized: ' + this._dtsBase + ' (audio ' + this._audioDtsBase + + ', video ' + this._videoDtsBase + ')'); - if (audioSample != null) { - audioTrack.samples.push(audioSample); - audioTrack.length = audioSample.length; - } - - this._videoStashedLastSample = null; - this._audioStashedLastSample = null; - - this._remuxVideo(videoTrack, true); - this._remuxAudio(audioTrack, true); + this._dtsBaseInited = true; } - _remuxAudio(audioTrack, force) { + _remuxAudio(audioTrack) { if (this._audioMeta == null) { return; } let track = audioTrack; let samples = track.samples; - let dtsCorrection = undefined; - let firstDts = -1, lastDts = -1, lastPts = -1; + let sampleLength = samples.length; + // we will reserve the last sample for accurate sample duration + if (!samples || sampleLength <= 1) { + return; + } + + let lastSample = samples[sampleLength - 1]; + let firstDts = -1, lastDts = -1; let refSampleDuration = this._audioMeta.refSampleDuration; let mpegRawTrack = this._audioMeta.codec === 'mp3' && this._mp3UseMpegAudio; @@ -257,15 +217,6 @@ class MP4Remuxer { let insertPrefixSilentFrame = false; - if (!samples || samples.length === 0) { - return; - } - if (samples.length === 1 && !force) { - // If [sample count in current batch] === 1 && (force != true) - // Ignore and keep in demuxer's queue - return; - } // else if (force === true) do remux - let offset = 0; let mdatbox = null; let mdatBytes = 0; @@ -274,67 +225,25 @@ class MP4Remuxer { if (mpegRawTrack) { // for raw mpeg buffer offset = 0; - mdatBytes = track.length; + mdatBytes = track.length - lastSample.unit.byteLength; } else { // for fmp4 mdat box offset = 8; // size + type - mdatBytes = 8 + track.length; - } - - - let lastSample = null; - - // Pop the lastSample and waiting for stash - if (samples.length > 1) { - lastSample = samples.pop(); - mdatBytes -= lastSample.length; - } - - // Insert [stashed lastSample in the previous batch] to the front - if (this._audioStashedLastSample != null) { - let sample = this._audioStashedLastSample; - this._audioStashedLastSample = null; - samples.unshift(sample); - mdatBytes += sample.length; + mdatBytes = 8 + track.length - lastSample.unit.byteLength; } - // Stash the lastSample of current batch, waiting for next batch - if (lastSample != null) { - this._audioStashedLastSample = lastSample; - } - - let firstSampleOriginalDts = samples[0].dts - this._dtsBase; - // calculate dtsCorrection - if (this._audioNextDts) { - dtsCorrection = firstSampleOriginalDts - this._audioNextDts; - } else { // this._audioNextDts == undefined - if (this._audioSegmentInfoList.isEmpty()) { - dtsCorrection = 0; - if (this._fillSilentAfterSeek && !this._videoSegmentInfoList.isEmpty()) { - if (this._audioMeta.originalCodec !== 'mp3') { - insertPrefixSilentFrame = true; - } - } - } else { - let lastSample = this._audioSegmentInfoList.getLastSampleBefore(firstSampleOriginalDts); - if (lastSample != null) { - let distance = (firstSampleOriginalDts - (lastSample.originalDts + lastSample.duration)); - if (distance <= 3) { - distance = 0; - } - let expectedDts = lastSample.dts + lastSample.duration + distance; - dtsCorrection = firstSampleOriginalDts - expectedDts; - } else { // lastSample == null, cannot found - dtsCorrection = 0; - } - } + if (this._audioNextDts == undefined && + this._audioSegmentInfoList.isEmpty() && + this._fillSilentAfterSeek && !this._videoSegmentInfoList.isEmpty() && + this._audioMeta.originalCodec !== 'mp3') { + insertPrefixSilentFrame = true; } if (insertPrefixSilentFrame) { // align audio segment beginDts to match with current video segment's beginDts - let firstSampleDts = firstSampleOriginalDts - dtsCorrection; + let firstSampleDts = firstSampleOriginalDts; let videoSegment = this._videoSegmentInfoList.getLastSegmentBefore(firstSampleOriginalDts); if (videoSegment != null && videoSegment.beginDts < firstSampleDts) { let silentUnit = AAC.getSilentFrame(this._audioMeta.originalCodec, this._audioMeta.channelCount); @@ -342,7 +251,7 @@ class MP4Remuxer { let dts = videoSegment.beginDts; let silentFrameDuration = firstSampleDts - videoSegment.beginDts; Log.v(this.TAG, `InsertPrefixSilentAudio: dts: ${dts}, duration: ${silentFrameDuration}`); - samples.unshift({ unit: silentUnit, dts: dts, pts: dts }); + samples.unshift({unit: silentUnit, dts: dts, pts: dts}); mdatBytes += silentUnit.byteLength; } // silentUnit == null: Cannot generate, skip } else { @@ -353,113 +262,96 @@ class MP4Remuxer { let mp4Samples = []; // Correct dts for each sample, and calculate sample duration. Then output to mp4Samples - for (let i = 0; i < samples.length; i++) { + for (let i = 0; i < sampleLength - 1; i++) { let sample = samples[i]; let unit = sample.unit; let originalDts = sample.dts - this._dtsBase; let dts = originalDts; - let needFillSilentFrames = false; - let silentFrames = null; - let sampleDuration = 0; - if (originalDts < -0.001) { - continue; //pass the first sample with the invalid dts - } - - if (this._audioMeta.codec !== 'mp3' && refSampleDuration != null) { - // for AAC codec, we need to keep dts increase based on refSampleDuration - let curRefDts = originalDts; - const maxAudioFramesDrift = 3; - if (this._audioNextDts) { - curRefDts = this._audioNextDts; - } + //let myDate = new Date(); + //Log.d(this.TAG, 'audio sample, dts: ' + dts + ', size: ' + sample.unit.byteLength + ', time: ' + myDate.getTime()); - dtsCorrection = originalDts - curRefDts; - if (dtsCorrection <= -maxAudioFramesDrift * refSampleDuration) { - // If we're overlapping by more than maxAudioFramesDrift number of frame, drop this sample - Log.w(this.TAG, `Dropping 1 audio frame (originalDts: ${originalDts} ms ,curRefDts: ${curRefDts} ms) due to dtsCorrection: ${dtsCorrection} ms overlap.`); - continue; + if (this._audioLastDts) { + if (dts <= this._audioLastDts) { + Log.d(this.TAG, 'audio dts rollback, we need re-adjust it, ' + dts + ', ' + this._audioLastDts); + dts = this._audioLastDts + 1; } - else if (dtsCorrection >= maxAudioFramesDrift * refSampleDuration && this._fillAudioTimestampGap && !Browser.safari) { - // Silent frame generation, if large timestamp gap detected && config.fixAudioTimestampGap - needFillSilentFrames = true; - // We need to insert silent frames to fill timestamp gap - let frameCount = Math.floor(dtsCorrection / refSampleDuration); - Log.w(this.TAG, 'Large audio timestamp gap detected, may cause AV sync to drift. ' + - 'Silent frames will be generated to avoid unsync.\n' + - `originalDts: ${originalDts} ms, curRefDts: ${curRefDts} ms, ` + - `dtsCorrection: ${Math.round(dtsCorrection)} ms, generate: ${frameCount} frames`); - - - dts = Math.floor(curRefDts); - sampleDuration = Math.floor(curRefDts + refSampleDuration) - dts; - - let silentUnit = AAC.getSilentFrame(this._audioMeta.originalCodec, this._audioMeta.channelCount); - if (silentUnit == null) { - Log.w(this.TAG, 'Unable to generate silent frame for ' + - `${this._audioMeta.originalCodec} with ${this._audioMeta.channelCount} channels, repeat last frame`); - // Repeat last frame - silentUnit = unit; - } - silentFrames = []; - - for (let j = 0; j < frameCount; j++) { - curRefDts = curRefDts + refSampleDuration; - let intDts = Math.floor(curRefDts); // change to integer - let intDuration = Math.floor(curRefDts + refSampleDuration) - intDts; - let frame = { - dts: intDts, - pts: intDts, - cts: 0, - unit: silentUnit, - size: silentUnit.byteLength, - duration: intDuration, // wait for next sample - originalDts: originalDts, - flags: { - isLeading: 0, - dependsOn: 1, - isDependedOn: 0, - hasRedundancy: 0 - } - }; - silentFrames.push(frame); - mdatBytes += frame.size; + } + this._audioLastDts = dts; - } + if (firstDts === -1) { + firstDts = dts; + } - this._audioNextDts = curRefDts + refSampleDuration; + let nextDts = samples[i + 1].dts - this._dtsBase; + let sampleDuration = (nextDts > dts) ? (nextDts - dts) : 1; - } else { + let needFillSilentFrames = false; + let silentFrames = null; - dts = Math.floor(curRefDts); - sampleDuration = Math.floor(curRefDts + refSampleDuration) - dts; - this._audioNextDts = curRefDts + refSampleDuration; + // Silent frame generation, if large timestamp gap detected && config.fixAudioTimestampGap + if (sampleDuration > refSampleDuration * 1.5 && this._audioMeta.codec !== 'mp3' && this._fillAudioTimestampGap && !Browser.safari) { + // We need to insert silent frames to fill timestamp gap + Log.w(this.TAG, 'need insert silent audio frames, sampleDuration: ' + sampleDuration + ', ' + + 'refSampleDuration: ' + refSampleDuration); + needFillSilentFrames = true; + let delta = Math.abs(sampleDuration - refSampleDuration); + let frameCount = Math.ceil(delta / refSampleDuration); + let currentDts = dts + refSampleDuration; // Notice: in float + + Log.w(this.TAG, 'Large audio timestamp gap detected, may cause AV sync to drift. ' + + 'Silent frames will be generated to avoid unsync.\n' + + `dts: ${dts + sampleDuration} ms, expected: ${dts + Math.round(refSampleDuration)} ms, ` + + `delta: ${Math.round(delta)} ms, generate: ${frameCount} frames`); + let silentUnit = AAC.getSilentFrame(this._audioMeta.originalCodec, this._audioMeta.channelCount); + if (silentUnit == null) { + Log.w(this.TAG, 'Unable to generate silent frame for ' + + `${this._audioMeta.originalCodec} with ${this._audioMeta.channelCount} channels, repeat last frame`); + // Repeat last frame + silentUnit = unit; } - } else { - // keep the original dts calculate algorithm for mp3 - dts = originalDts - dtsCorrection; - - - if (i !== samples.length - 1) { - let nextDts = samples[i + 1].dts - this._dtsBase - dtsCorrection; - sampleDuration = nextDts - dts; - } else { // the last sample - if (lastSample != null) { // use stashed sample's dts to calculate sample duration - let nextDts = lastSample.dts - this._dtsBase - dtsCorrection; - sampleDuration = nextDts - dts; - } else if (mp4Samples.length >= 1) { // use second last sample duration - sampleDuration = mp4Samples[mp4Samples.length - 1].duration; - } else { // the only one sample, use reference sample duration - sampleDuration = Math.floor(refSampleDuration); + silentFrames = []; + + for (let j = 0; j < frameCount; j++) { + let intDts = Math.round(currentDts); // round to integer + if (silentFrames.length > 0) { + // Set previous frame sample duration + let previousFrame = silentFrames[silentFrames.length - 1]; + previousFrame.duration = intDts - previousFrame.dts; } + let frame = { + dts: intDts, + pts: intDts, + cts: 0, + unit: silentUnit, + size: silentUnit.byteLength, + duration: 0, // wait for next sample + originalDts: originalDts, + flags: { + isLeading: 0, + dependsOn: 1, + isDependedOn: 0, + hasRedundancy: 0 + } + }; + silentFrames.push(frame); + mdatBytes += unit.byteLength; + currentDts += refSampleDuration; } - this._audioNextDts = dts + sampleDuration; - } - if (firstDts === -1) { - firstDts = dts; + // last frame: align end time to next frame dts + let lastFrame = silentFrames[silentFrames.length - 1]; + lastFrame.duration = dts + sampleDuration - lastFrame.dts; + + // silentFrames.forEach((frame) => { + // Log.w(this.TAG, `SilentAudio: dts: ${frame.dts}, duration: ${frame.duration}`); + // }); + + // Set correct sample duration for current frame + sampleDuration = Math.round(refSampleDuration); } + mp4Samples.push({ dts: dts, pts: dts, @@ -476,19 +368,14 @@ class MP4Remuxer { } }); + // Log.d(this.TAG, 'mp4 audio sample, dts: ' + dts + ', size: ' + sample.unit.byteLength + ', duration: ' + sampleDuration + ', odts: ' + originalDts); + if (needFillSilentFrames) { // Silent frames should be inserted after wrong-duration frame mp4Samples.push.apply(mp4Samples, silentFrames); } } - if (mp4Samples.length === 0) { - //no samples need to remux - track.samples = []; - track.length = 0; - return; - } - // allocate mdatbox if (mpegRawTrack) { // allocate for raw mpeg buffer @@ -514,7 +401,7 @@ class MP4Remuxer { let latest = mp4Samples[mp4Samples.length - 1]; lastDts = latest.dts + latest.duration; - //this._audioNextDts = lastDts; + this._audioNextDts = lastDts; // fill media segment info & add to info list let info = new MediaSegmentInfo(); @@ -525,20 +412,20 @@ class MP4Remuxer { info.originalBeginDts = mp4Samples[0].originalDts; info.originalEndDts = latest.originalDts + latest.duration; info.firstSample = new SampleInfo(mp4Samples[0].dts, - mp4Samples[0].pts, - mp4Samples[0].duration, - mp4Samples[0].originalDts, - false); + mp4Samples[0].pts, + mp4Samples[0].duration, + mp4Samples[0].originalDts, + false); info.lastSample = new SampleInfo(latest.dts, - latest.pts, - latest.duration, - latest.originalDts, - false); + latest.pts, + latest.duration, + latest.originalDts, + false); if (!this._isLive) { this._audioSegmentInfoList.append(info); } - track.samples = mp4Samples; + track.mp4Samples = mp4Samples; track.sequenceNumber++; let moofbox = null; @@ -551,8 +438,9 @@ class MP4Remuxer { moofbox = MP4.moof(track, firstDts); } - track.samples = []; - track.length = 0; + track.length = lastSample.unit.byteLength; + track.samples.splice(0, sampleLength - 1); + track.mp4Samples = []; let segment = { type: 'audio', @@ -570,108 +458,64 @@ class MP4Remuxer { this._onMediaSegment('audio', segment); } - _remuxVideo(videoTrack, force) { + _remuxVideo(videoTrack) { if (this._videoMeta == null) { return; } let track = videoTrack; let samples = track.samples; - let dtsCorrection = undefined; + let sampleLength = samples.length; let firstDts = -1, lastDts = -1; let firstPts = -1, lastPts = -1; - if (!samples || samples.length === 0) { + // we will reserve the last sample for accurate sample duration + if (!samples || sampleLength <= 1) { return; } - if (samples.length === 1 && !force) { - // If [sample count in current batch] === 1 && (force != true) - // Ignore and keep in demuxer's queue - return; - } // else if (force === true) do remux + let lastSample = samples[sampleLength - 1]; let offset = 8; - let mdatbox = null; - let mdatBytes = 8 + videoTrack.length; - - - let lastSample = null; - - // Pop the lastSample and waiting for stash - if (samples.length > 1) { - lastSample = samples.pop(); - mdatBytes -= lastSample.length; - } - - // Insert [stashed lastSample in the previous batch] to the front - if (this._videoStashedLastSample != null) { - let sample = this._videoStashedLastSample; - this._videoStashedLastSample = null; - samples.unshift(sample); - mdatBytes += sample.length; - } - - // Stash the lastSample of current batch, waiting for next batch - if (lastSample != null) { - this._videoStashedLastSample = lastSample; - } - - - let firstSampleOriginalDts = samples[0].dts - this._dtsBase; - - // calculate dtsCorrection - if (this._videoNextDts) { - dtsCorrection = firstSampleOriginalDts - this._videoNextDts; - } else { // this._videoNextDts == undefined - if (this._videoSegmentInfoList.isEmpty()) { - dtsCorrection = 0; - } else { - let lastSample = this._videoSegmentInfoList.getLastSampleBefore(firstSampleOriginalDts); - if (lastSample != null) { - let distance = (firstSampleOriginalDts - (lastSample.originalDts + lastSample.duration)); - if (distance <= 3) { - distance = 0; - } - let expectedDts = lastSample.dts + lastSample.duration + distance; - dtsCorrection = firstSampleOriginalDts - expectedDts; - } else { // lastSample == null, cannot found - dtsCorrection = 0; - } - } - } + let mdatBytes = 8 + track.length - lastSample.length; + let mdatbox = new Uint8Array(mdatBytes); + mdatbox[0] = (mdatBytes >>> 24) & 0xFF; + mdatbox[1] = (mdatBytes >>> 16) & 0xFF; + mdatbox[2] = (mdatBytes >>> 8) & 0xFF; + mdatbox[3] = (mdatBytes) & 0xFF; + mdatbox.set(MP4.types.mdat, 4); let info = new MediaSegmentInfo(); let mp4Samples = []; // Correct dts for each sample, and calculate sample duration. Then output to mp4Samples - for (let i = 0; i < samples.length; i++) { + for (let i = 0; i < sampleLength - 1; i++) { let sample = samples[i]; let originalDts = sample.dts - this._dtsBase; let isKeyframe = sample.isKeyframe; - let dts = originalDts - dtsCorrection; + let dts = originalDts; let cts = sample.cts; let pts = dts + cts; + // let myDate = new Date(); + // Log.d(this.TAG, 'video sample, dts: ' + dts + ', size: ' + sample.length + ', time: ' + myDate.getTime()); + + if (this._videoLastDts) { + if (dts <= this._videoLastDts) { + Log.d(this.TAG, 'video dts rollback, we need re-adjust it, ' + dts + ', ' + this._videoLastDts); + let dtsOffset = this._videoLastDts + 1 - dts; + dts = this._videoLastDts + 1; + pts += dtsOffset; + } + } + this._videoLastDts = dts; + if (firstDts === -1) { firstDts = dts; firstPts = pts; } - let sampleDuration = 0; - - if (i !== samples.length - 1) { - let nextDts = samples[i + 1].dts - this._dtsBase - dtsCorrection; - sampleDuration = nextDts - dts; - } else { // the last sample - if (lastSample != null) { // use stashed sample's dts to calculate sample duration - let nextDts = lastSample.dts - this._dtsBase - dtsCorrection; - sampleDuration = nextDts - dts; - } else if (mp4Samples.length >= 1) { // use second last sample duration - sampleDuration = mp4Samples[mp4Samples.length - 1].duration; - } else { // the only one sample, use reference sample duration - sampleDuration = Math.floor(this._videoMeta.refSampleDuration); - } - } + let nextDts = samples[i + 1].dts - this._dtsBase; + let sampleDuration = (nextDts > dts) ? (nextDts - dts) : 1; if (isKeyframe) { let syncPoint = new SampleInfo(dts, pts, sampleDuration, sample.dts, true); @@ -696,15 +540,9 @@ class MP4Remuxer { isNonSync: isKeyframe ? 0 : 1 } }); - } - // allocate mdatbox - mdatbox = new Uint8Array(mdatBytes); - mdatbox[0] = (mdatBytes >>> 24) & 0xFF; - mdatbox[1] = (mdatBytes >>> 16) & 0xFF; - mdatbox[2] = (mdatBytes >>> 8) & 0xFF; - mdatbox[3] = (mdatBytes) & 0xFF; - mdatbox.set(MP4.types.mdat, 4); + // Log.d(this.TAG, 'mp4 video sample, dts: ' + dts + ', pts: ' + pts + ', cts: ' + cts + ', size: ' + sample.length + ', duration: ' + sampleDuration + ', odts: ' + originalDts); + } // Write samples into mdatbox for (let i = 0; i < mp4Samples.length; i++) { @@ -720,7 +558,6 @@ class MP4Remuxer { let latest = mp4Samples[mp4Samples.length - 1]; lastDts = latest.dts + latest.duration; lastPts = latest.pts + latest.duration; - this._videoNextDts = lastDts; // fill media segment info & add to info list info.beginDts = firstDts; @@ -730,20 +567,20 @@ class MP4Remuxer { info.originalBeginDts = mp4Samples[0].originalDts; info.originalEndDts = latest.originalDts + latest.duration; info.firstSample = new SampleInfo(mp4Samples[0].dts, - mp4Samples[0].pts, - mp4Samples[0].duration, - mp4Samples[0].originalDts, - mp4Samples[0].isKeyframe); + mp4Samples[0].pts, + mp4Samples[0].duration, + mp4Samples[0].originalDts, + mp4Samples[0].isKeyframe); info.lastSample = new SampleInfo(latest.dts, - latest.pts, - latest.duration, - latest.originalDts, - latest.isKeyframe); + latest.pts, + latest.duration, + latest.originalDts, + latest.isKeyframe); if (!this._isLive) { this._videoSegmentInfoList.append(info); } - track.samples = mp4Samples; + track.mp4Samples = mp4Samples; track.sequenceNumber++; // workaround for chrome < 50: force first sample as a random access point @@ -755,8 +592,11 @@ class MP4Remuxer { } let moofbox = MP4.moof(track, firstDts); - track.samples = []; - track.length = 0; + + // reserve the last sample + track.length = lastSample.length; + track.samples.splice(0, sampleLength - 1); + track.mp4Samples = []; this._onMediaSegment('video', { type: 'video', @@ -775,4 +615,4 @@ class MP4Remuxer { } -export default MP4Remuxer; +export default MP4Remuxer; \ No newline at end of file