diff --git a/src/demux/flv-demuxer.js b/src/demux/flv-demuxer.js
index f3681e7..9342c16 100644
--- a/src/demux/flv-demuxer.js
+++ b/src/demux/flv-demuxer.js
@@ -26,6 +26,7 @@ import H265Parser from './h265-parser.js';
 import buffersAreEqual from '../utils/typedarray-equality.ts';
 import AV1OBUParser from './av1-parser.ts';
 import ExpGolomb from './exp-golomb.js';
+import {FrameType, SliceHeaderParser} from './slice-header-parser.js';
 
 function Swap16(src) {
     return (((src >>> 8) & 0xFF) |
@@ -46,12 +47,20 @@ function ReadBig32(array, index) {
             (array[index + 3]));
 }
 
+function isMacOS() {
+    if (navigator.userAgentData?.platform === "macOS") return true;
+
+    return false;
+}
 
 class FLVDemuxer {
 
     constructor(probeData, config) {
         this.TAG = 'FLVDemuxer';
 
+        this._isMacOS = isMacOS();
+        Log.d(this.TAG, 'isMacOS: ' + this._isMacOS);
+
         this._config = config;
 
         this._onError = null;
@@ -81,6 +90,16 @@ class FLVDemuxer {
         this._audioMetadata = null;
         this._videoMetadata = null;
 
+        this._h264SpsInfo = null;
+        this._h264MaxFrameNum = -1;
+        this._h264HasBFrame = false;
+        this._h264DroppingFrame = false;
+        this._h264LastVideoFrame = -1;
+        this._h264LastVideoFrameDts = -1;
+        this._h264LastVideoFramePts = -1;
+        this._h264LastIFrameDts = -1;
+        this._h264MinGopDuration = -1;
+
         this._naluLengthSize = 4;
         this._timestampBase = 0;  // int32, in milliseconds
         this._timescale = 1000;
@@ -363,6 +382,15 @@ class FLVDemuxer {
             offset += 11 + dataSize + 4;  // tagBody + dataSize + prevTagSize
         }
 
+        if (this._hasAudio && this._hasVideo && !this._audioInitialMetadataDispatched) {
+            // both audio & video, but audio initial meta data still not dispatched
+            let samples = this._videoTrack.samples;
+            if (samples.length > 0 && samples[samples.length - 1].dts > samples[0].dts + 3000) {
+                Log.d(this.TAG, 'we need regard it as video only, last sample: ' + samples[samples.length - 1].dts + ', first sample: ' + samples[0].dts);
+                this._hasAudio = false;
+            }
+        }
+
         // dispatch parsed frames to consumer (typically, the remuxer)
         if (this._isInitialMetadataDispatched()) {
             if (this._dispatch && (this._audioTrack.length || this._videoTrack.length)) {
@@ -1321,6 +1349,11 @@ class FLVDemuxer {
                 continue;
             }
 
+            if (Object.keys(config).length === 0) continue;
+
+            this._h264SpsInfo = config;
+            this._h264MaxFrameNum = Math.pow(2, config.log2_max_frame_num_minus4 + 4);
+
             meta.codecWidth = config.codec_size.width;
             meta.codecHeight = config.codec_size.height;
             meta.presentWidth = config.present_size.width;
@@ -1686,11 +1719,130 @@ class FLVDemuxer {
                 cts: cts,
                 pts: (dts + cts)
             };
+
             if (keyframe) {
                 avcSample.fileposition = tagPosition;
+                if (this._h264LastIFrameDts !== -1) {
+                    let gopDuration = dts - this._h264LastIFrameDts;
+                    if (gopDuration > 0 && gopDuration < 50000) {
+                        // valid GOP duration 0~50s
+                        if (this._h264MinGopDuration === -1 || gopDuration < this._h264MinGopDuration) {
+                            this._h264MinGopDuration = gopDuration;
+                            Log.v(this.TAG, 'GOP minimum duration: ' + this._h264MinGopDuration);
+                        }
+                    }
+                }
+
+                this._h264LastIFrameDts = dts;
+            }
+
+            let dropThisFrame = false;
+            let slice = units[0].data.slice(lengthSize, units[0].data.length - lengthSize);
+            let result = SliceHeaderParser.parseSliceHeader(slice, this._h264SpsInfo);
+            if (result.success === true) {
+                Log.d(this.TAG, 'video sample, dts: ' + dts + ', size: ' + length + ', frame_type: ' + result.data.frame_type + 
+                    ', frame_num: ' + result.data.frame_num);
+
+                if (result.data.frame_type === FrameType.FrameType_B) {
+                    this._h264HasBFrame = true;
+                }
+
+                // drop frame only for no B Frame case
+                if (this._h264HasBFrame === false) {
+                    if (result.data.frame_type === FrameType.FrameType_I) {
+                        // I frame
+                        if (this._h264DroppingFrame) {
+                            // after dropping frames, we need modify the timestamp for I frame
+                            // since if non I frame has big duration, MSE will hang
+                            let meta = this._videoMetadata;
+                            if (meta) {
+                                Log.w(this.TAG, 'dropping meet I frame, need stop dropping, last video frame dts: ' + 
+                                    this._h264LastVideoFrameDts + ', I frame dts: ' + dts + ', ref duration: ' + meta.refSampleDuration);
+                                avcSample.dts = this._h264LastVideoFrameDts + meta.refSampleDuration;
+                                avcSample.pts = this._h264LastVideoFramePts + meta.refSampleDuration;
+                            } else {
+                                Log.w(this.TAG, 'dropping meet I frame, need stop dropping, last video frame dts: ' + 
+                                    this._h264LastVideoFrameDts + ', I frame dts: ' + dts + ', ref duration: nil');
+                                avcSample.dts = this._h264LastVideoFrameDts + 40;
+                                avcSample.pts = this._h264LastVideoFramePts + 40;
+                            }
+                        }
+                        this._h264DroppingFrame = false;
+                        this._h264LastVideoFrame = result.data.frame_num;
+                        this._h264LastVideoFrameDts = dts;
+                        this._h264LastVideoFramePts = (dts + cts);
+                    } else {
+                        // not I frame
+                        if (this._h264DroppingFrame) {
+                            dropThisFrame = true;
+                        } else {
+                            // if not dropping frame, we need judge if need start dropping
+                            // 1, first frame;
+                            // 2, normal case;
+                            // 3, normal frame_num overflow case;
+                            if ((this._h264LastVideoFrame === -1) || 
+                                (this._h264LastVideoFrame + 1 === result.data.frame_num) || 
+                                ((this._h264LastVideoFrame + 1) === this._h264MaxFrameNum && result.data.frame_num === 0)) {
+                                this._h264LastVideoFrame = result.data.frame_num;
+                                this._h264LastVideoFrameDts = dts;
+                                this._h264LastVideoFramePts = (dts + cts);
+                            } else if (this._h264LastVideoFrame === result.data.frame_num) {
+                                // if same frame_num, we need judge timestamp
+                                if (this._h264MinGopDuration !== -1 && dts > this._h264LastVideoFrameDts + this._h264MinGopDuration / 2) {
+                                    // maybe cross GOP
+                                    Log.w(this.TAG, 'frame_num not continuous(cross GOP): ' + this._h264LastVideoFrame + '(' + this._h264LastVideoFrameDts + '), ' + 
+                                        result.data.frame_num + '(' + dts + '), need dropping...');
+                                    this._h264DroppingFrame = true;
+                                    dropThisFrame = true;
+                                } else {
+                                    // in same GOP, it is normal case since non-reference frame will use the same frame_num as previous reference frame
+                                    this._h264LastVideoFrame = result.data.frame_num;
+                                    this._h264LastVideoFrameDts = dts;
+                                    this._h264LastVideoFramePts = (dts + cts);
+                                }
+                            } else {
+                                if (this._isMacOS) {
+                                    Log.w(this.TAG, 'frame_num not continuous: ' + this._h264LastVideoFrame + '(' + this._h264LastVideoFrameDts + '), ' + 
+                                        result.data.frame_num + '(' + dts + '), need dropping...');
+                                    this._h264DroppingFrame = true;
+                                    dropThisFrame = true;
+                                } else {
+                                    if (this._h264LastIFrameDts !== -1 && this._h264MinGopDuration !== -1 && 
+                                        dts >= this._h264LastIFrameDts + this._h264MinGopDuration) {
+                                        // only I frame dropped, we need drop frames to next I frame
+                                        // it seems MSE can handle P frames dropped case
+                                        Log.w(this.TAG, 'frame_num not continuous: ' + this._h264LastVideoFrame + '(' + this._h264LastVideoFrameDts + '), ' + 
+                                            result.data.frame_num + '(' + dts + '), and I frame seems dropped, last I frame ' + this._h264LastIFrameDts + 
+                                            ', gop duration: ' + this._h264MinGopDuration + ', need dropping...');
+                                        this._h264DroppingFrame = true;
+                                        dropThisFrame = true;
+                                    } else {
+                                        Log.w(this.TAG, 'frame_num not continuous: ' + this._h264LastVideoFrame + '(' + this._h264LastVideoFrameDts + '), ' + 
+                                            result.data.frame_num + '(' + dts + '), but I frame not dropped, last I frame ' + this._h264LastIFrameDts + 
+                                            ', gop duration: ' + this._h264MinGopDuration + ', just warning.');
+                                        this._h264LastVideoFrame = result.data.frame_num;
+                                        this._h264LastVideoFrameDts = dts;
+                                        this._h264LastVideoFramePts = (dts + cts);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                } else {
+                    // B frame detected, disable drop frame mechanism
+                    this._h264DroppingFrame = false;
+                    this._h264LastVideoFrame = -1;
+                }
+            } else {
+                Log.w(this.TAG, 'parse slice fail, video sample, dts: ' + dts + ', size: ' + length + ', keyframe: ' + keyframe);
+                dropThisFrame = true;
+            }
+
+            if (!dropThisFrame) {
+                // Log.d(this.TAG, 'video sample, dts: ' + dts + ', size: ' + length + ', keyframe: ' + keyframe);
+                track.samples.push(avcSample);
+                track.length += length;
             }
-            track.samples.push(avcSample);
-            track.length += length;
         }
     }
 
diff --git a/src/demux/slice-header-parser.js b/src/demux/slice-header-parser.js
new file mode 100644
index 0000000..0f35910
--- /dev/null
+++ b/src/demux/slice-header-parser.js
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2016 Bilibili. All Rights Reserved.
+ *
+ * @author East Zhou <zrdong@ulucu.com>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import Log from '../utils/logger.js';
+import ExpGolomb from './exp-golomb.js';
+
+export const FrameType = {
+    FrameType_U: 0,     // unknown
+    FrameType_I: 1,     // I frame
+    FrameType_P: 2,     // P frame
+    FrameType_B: 3      // B frame
+};
+
+export class SliceHeaderParser {
+    static parseSliceHeader(uint8array, sps_info) {
+        do {
+            if (!sps_info) {
+                Log.e('SliceHeaderParser', 'missing sps or pps!');
+                break;
+            }
+
+            try {
+                let gb = new ExpGolomb(uint8array);
+
+                let nalu_header = gb.readByte();
+                if (nalu_header & 0x80) // forbidden_zero_bit：在H.264规范中规定了这一位必须为0。
+                    break;
+    
+                if (!(nalu_header & 0x60)) // 取00~11，似乎指示这个NALU的重要性, 如00的NALU解码器可以丢弃它而不影响图像的回放。
+                    break;
+    
+                let unitType = nalu_header & 0x1F;
+                gb.readUEG();   // first_mb_in_slice
+                let slice_type = gb.readUEG();  // slice_type
+                let slice_type_i  = (slice_type % 5 === 2);
+                let slice_type_p  = (slice_type % 5 === 0);
+                let slice_type_b  = (slice_type % 5 === 1);
+                let slice_type_si = (slice_type % 5 === 4);
+                let slice_type_sp = (slice_type % 5 === 3);
+        
+                let frame_type = FrameType.FrameType_I;
+                if (slice_type_p || slice_type_sp) {
+                    frame_type = FrameType.FrameType_P;
+                } else if (slice_type_b) {
+                    frame_type = FrameType.FrameType_B;
+                }
+        
+                gb.readUEG();   // pic_parameter_set_id
+        
+                if (sps_info.separate_colour_plane_flag) {
+                    gb.readBits(16);    // colour_plane_id
+                }
+        
+                let frame_num = gb.readBits(sps_info.log2_max_frame_num_minus4 + 4);
+        
+                return {
+                    success: true, 
+                    data: {
+                        frame_type: frame_type, 
+                        frame_num: frame_num
+                    }
+                };
+            } catch (error) {
+                Log.e('SliceHeaderParser', error.message);
+                break;
+            }
+        } while (0);
+
+        return {
+            success: false, 
+            data: {
+            }
+        };
+    }
+}
diff --git a/src/demux/sps-parser.js b/src/demux/sps-parser.js
index 0cb1d8b..0c08eac 100644
--- a/src/demux/sps-parser.js
+++ b/src/demux/sps-parser.js
@@ -16,6 +16,7 @@
  * limitations under the License.
  */
 
+import Log from '../utils/logger.js';
 import ExpGolomb from './exp-golomb.js';
 
 class SPSParser {
@@ -56,13 +57,14 @@ class SPSParser {
 
         gb.readByte();
         let profile_idc = gb.readByte();  // profile_idc
-        gb.readByte();  // constraint_set_flags[5] + reserved_zero[3]
+        gb.readByte();  // constraint_set_flags[6] + reserved_zero[2]
         let level_idc = gb.readByte();  // level_idc
         gb.readUEG();  // seq_parameter_set_id
 
         let profile_string = SPSParser.getProfileString(profile_idc);
         let level_string = SPSParser.getLevelString(level_idc);
         let chroma_format_idc = 1;
+        let separate_colour_plane_flag = 0;
         let chroma_format = 420;
         let chroma_format_table = [0, 420, 422, 444];
         let bit_depth_luma = 8;
@@ -72,10 +74,15 @@ class SPSParser {
             profile_idc === 244 || profile_idc === 44 || profile_idc === 83 ||
             profile_idc === 86 || profile_idc === 118 || profile_idc === 128 ||
             profile_idc === 138 || profile_idc === 144) {
-
             chroma_format_idc = gb.readUEG();
+            if (chroma_format_idc > 3) {
+                Log.e('SPSParser', 'illegal chroma format idc: ' + chroma_format_idc);
+                return {
+                };
+            }
+
             if (chroma_format_idc === 3) {
-                gb.readBits(1);  // separate_colour_plane_flag
+                separate_colour_plane_flag = gb.readBits(1);  // separate_colour_plane_flag
             }
             if (chroma_format_idc <= 3) {
                 chroma_format = chroma_format_table[chroma_format_idc];
@@ -97,12 +104,14 @@ class SPSParser {
                 }
             }
         }
-        gb.readUEG();  // log2_max_frame_num_minus4
-        let pic_order_cnt_type = gb.readUEG();
+        let log2_max_frame_num_minus4 = gb.readUEG();  // log2_max_frame_num_minus4
+        let pic_order_cnt_type = gb.readUEG();  // pic_order_cnt_type
+        let log2_max_pic_order_cnt_lsb_minus_4 = 0;
+        let delta_pic_order_always_zero_flag = 0;
         if (pic_order_cnt_type === 0) {
-            gb.readUEG();  // log2_max_pic_order_cnt_lsb_minus_4
+            log2_max_pic_order_cnt_lsb_minus_4 = gb.readUEG();  // log2_max_pic_order_cnt_lsb_minus_4
         } else if (pic_order_cnt_type === 1) {
-            gb.readBits(1);  // delta_pic_order_always_zero_flag
+            delta_pic_order_always_zero_flag = gb.readBits(1);  // delta_pic_order_always_zero_flag
             gb.readSEG();  // offset_for_non_ref_pic
             gb.readSEG();  // offset_for_top_to_bottom_field
             let num_ref_frames_in_pic_order_cnt_cycle = gb.readUEG();
@@ -212,9 +221,14 @@ class SPSParser {
             profile_string,  // baseline, high, high10, ...
             level_string,  // 3, 3.1, 4, 4.1, 5, 5.1, ...
             chroma_format_idc,
+            separate_colour_plane_flag,
             bit_depth: bit_depth_luma,  // 8bit, 10bit, ...
             bit_depth_luma,
             bit_depth_chroma,
+            log2_max_frame_num_minus4,
+            pic_order_cnt_type,
+            log2_max_pic_order_cnt_lsb_minus_4,
+            delta_pic_order_always_zero_flag,
             ref_frames,
             chroma_format,  // 4:2:0, 4:2:2, ...
             chroma_format_string: SPSParser.getChromaFormatString(chroma_format),
@@ -293,6 +307,11 @@ class SPSParser {
         }
     }
 
+    static getChromaFormat(chroma_format_idc) {
+        const chroma_format_table = [0, 420, 422, 444];
+        return chroma_format_table[chroma_format_idc];
+    }
+
 }
 
 export default SPSParser;
\ No newline at end of file
diff --git a/src/remux/mp4-generator.js b/src/remux/mp4-generator.js
index fa608a7..dafe265 100644
--- a/src/remux/mp4-generator.js
+++ b/src/remux/mp4-generator.js
@@ -24,22 +24,17 @@ class MP4 {
         MP4.types = {
             avc1: [], avcC: [], btrt: [], dinf: [],
             dref: [], esds: [], ftyp: [], hdlr: [],
-            hvc1: [], hvcC: [], av01: [], av1C: [],
             mdat: [], mdhd: [], mdia: [], mfhd: [],
             minf: [], moof: [], moov: [], mp4a: [],
             mvex: [], mvhd: [], sdtp: [], stbl: [],
             stco: [], stsc: [], stsd: [], stsz: [],
             stts: [], tfdt: [], tfhd: [], traf: [],
             trak: [], trun: [], trex: [], tkhd: [],
-            vmhd: [], smhd: [], chnl: [],
-            '.mp3': [],
-            Opus: [], dOps: [], fLaC: [], dfLa: [],
-            ipcm: [], pcmC: [],
-            'ac-3': [], dac3: [], 'ec-3': [], dec3: [],
+            vmhd: [], smhd: [], '.mp3': []
         };
 
         for (let name in MP4.types) {
-            if (MP4.types.hasOwnProperty(name)) {
+            if (Object.prototype.hasOwnProperty.call(MP4.types, name)) {
                 MP4.types[name] = [
                     name.charCodeAt(0),
                     name.charCodeAt(1),
@@ -316,8 +311,8 @@ class MP4 {
             MP4.box(MP4.types.stsc, MP4.constants.STSC),  // Sample-To-Chunk
             MP4.box(MP4.types.stsz, MP4.constants.STSZ),  // Sample size
             MP4.box(MP4.types.stco, MP4.constants.STCO)   // Chunk offset
-        );
-        return result;
+        ); 
+        return result; 
     }
 
     // Sample description box
@@ -325,23 +320,9 @@ class MP4 {
         if (meta.type === 'audio') {
             if (meta.codec === 'mp3') {
                 return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.mp3(meta));
-            } else if (meta.codec === 'ac-3') {
-                return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.ac3(meta));
-            } else if (meta.codec === 'ec-3') {
-                return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.ec3(meta));
-            } else if(meta.codec === 'opus') {
-                return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.Opus(meta));
-            } else if (meta.codec == 'flac') {
-                return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.fLaC(meta));
-            } else if (meta.codec == 'ipcm') {
-                return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.ipcm(meta));
             }
             // else: aac -> mp4a
             return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.mp4a(meta));
-        } else if (meta.type === 'video' && meta.codec.startsWith('hvc1')) {
-            return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.hvc1(meta));
-        } else if (meta.type === 'video' && meta.codec.startsWith('av01')) {
-            return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.av01(meta));
         } else {
             return MP4.box(MP4.types.stsd, MP4.constants.STSD_PREFIX, MP4.avc1(meta));
         }
@@ -387,46 +368,6 @@ class MP4 {
         return MP4.box(MP4.types.mp4a, data, MP4.esds(meta));
     }
 
-    static ac3(meta) {
-        let channelCount = meta.channelCount;
-        let sampleRate = meta.audioSampleRate;
-
-        let data = new Uint8Array([
-            0x00, 0x00, 0x00, 0x00,  // reserved(4)
-            0x00, 0x00, 0x00, 0x01,  // reserved(2) + data_reference_index(2)
-            0x00, 0x00, 0x00, 0x00,  // reserved: 2 * 4 bytes
-            0x00, 0x00, 0x00, 0x00,
-            0x00, channelCount,      // channelCount(2)
-            0x00, 0x10,              // sampleSize(2)
-            0x00, 0x00, 0x00, 0x00,  // reserved(4)
-            (sampleRate >>> 8) & 0xFF,  // Audio sample rate
-            (sampleRate) & 0xFF,
-            0x00, 0x00
-        ]);
-
-        return MP4.box(MP4.types['ac-3'], data, MP4.box(MP4.types.dac3, new Uint8Array(meta.config)));
-    }
-
-    static ec3(meta) {
-        let channelCount = meta.channelCount;
-        let sampleRate = meta.audioSampleRate;
-
-        let data = new Uint8Array([
-            0x00, 0x00, 0x00, 0x00,  // reserved(4)
-            0x00, 0x00, 0x00, 0x01,  // reserved(2) + data_reference_index(2)
-            0x00, 0x00, 0x00, 0x00,  // reserved: 2 * 4 bytes
-            0x00, 0x00, 0x00, 0x00,
-            0x00, channelCount,      // channelCount(2)
-            0x00, 0x10,              // sampleSize(2)
-            0x00, 0x00, 0x00, 0x00,  // reserved(4)
-            (sampleRate >>> 8) & 0xFF,  // Audio sample rate
-            (sampleRate) & 0xFF,
-            0x00, 0x00
-        ]);
-
-        return MP4.box(MP4.types['ec-3'], data, MP4.box(MP4.types.dec3, new Uint8Array(meta.config)));
-    }
-
     static esds(meta) {
         let config = meta.config || [];
         let configSize = config.length;
@@ -457,176 +398,6 @@ class MP4 {
         return MP4.box(MP4.types.esds, data);
     }
 
-    static Opus(meta) {
-        let channelCount = meta.channelCount;
-        let sampleRate = meta.audioSampleRate;
-
-        let data = new Uint8Array([
-            0x00, 0x00, 0x00, 0x00,  // reserved(4)
-            0x00, 0x00, 0x00, 0x01,  // reserved(2) + data_reference_index(2)
-            0x00, 0x00, 0x00, 0x00,  // reserved: 2 * 4 bytes
-            0x00, 0x00, 0x00, 0x00,
-            0x00, channelCount, // channelCount(2)
-            0x00, 0x10,              // sampleSize(2)
-            0x00, 0x00, 0x00, 0x00,  // reserved(4)
-            (sampleRate >>> 8) & 0xFF,  // Audio sample rate
-            (sampleRate) & 0xFF,
-            0x00, 0x00
-        ]);
-
-        return MP4.box(MP4.types.Opus, data, MP4.dOps(meta));
-    }
-
-    static dOps(meta) {
-        let channelCount = meta.channelCount;
-        let channelConfigCode = meta.channelConfigCode;
-        let sampleRate = meta.audioSampleRate;
-
-        if (meta.config) {
-            return MP4.box(MP4.types.dOps, meta.config);
-        }
-
-        let mapping = [];
-        switch (channelConfigCode) {
-            case 0x01:
-            case 0x02:
-                mapping = [0x0];
-                break;
-            case 0x00: // dualmono
-                mapping = [0xFF, 1, 1, 0, 1];
-                break;
-            case 0x80: // dualmono
-                mapping = [0xFF, 2, 0, 0, 1];
-                break;
-            case 0x03:
-                mapping = [0x01, 2, 1, 0, 2, 1];
-                break;
-            case 0x04:
-                mapping = [0x01, 2, 2, 0, 1, 2, 3];
-                break;
-            case 0x05:
-                mapping = [0x01, 3, 2, 0, 4, 1, 2, 3];
-                break;
-            case 0x06:
-                mapping = [0x01, 4, 2, 0, 4, 1, 2, 3, 5];
-                break;
-            case 0x07:
-                mapping = [0x01, 4, 2, 0, 4, 1, 2, 3, 5, 6];
-                break;
-            case 0x08:
-                mapping = [0x01, 5, 3, 0, 6, 1, 2, 3, 4, 5, 7];
-                break;
-            case 0x82:
-                mapping = [0x01, 1, 2, 0, 1];
-                break;
-            case 0x83:
-                mapping = [0x01, 1, 3, 0, 1, 2];
-                break;
-            case 0x84:
-                mapping = [0x01, 1, 4, 0, 1, 2, 3];
-                break;
-            case 0x85:
-                mapping = [0x01, 1, 5, 0, 1, 2, 3, 4];
-                break;
-            case 0x86:
-                mapping = [0x01, 1, 6, 0, 1, 2, 3, 4, 5];
-                break;
-            case 0x87:
-                mapping = [0x01, 1, 7, 0, 1, 2, 3, 4, 5, 6];
-                break;
-            case 0x88:
-                mapping = [0x01, 1, 8, 0, 1, 2, 3, 4, 5, 6, 7];
-                break;
-        }
-
-        let data = new Uint8Array([
-            0x00,         // Version (1)
-            channelCount, // OutputChannelCount: 2
-            0x00, 0x00,   // PreSkip: 2
-            (sampleRate >>> 24) & 0xFF,  // Audio sample rate: 4
-            (sampleRate >>> 17) & 0xFF,
-            (sampleRate >>>  8) & 0xFF,
-            (sampleRate >>>  0) & 0xFF,
-            0x00, 0x00,  // Global Gain : 2
-            ... mapping
-        ]);
-        return MP4.box(MP4.types.dOps, data);
-    }
-
-    static fLaC(meta) {
-        let channelCount = meta.channelCount;
-        let sampleRate = Math.min(meta.audioSampleRate, 65535);
-        let sampleSize = meta.sampleSize;
-
-        let data = new Uint8Array([
-            0x00, 0x00, 0x00, 0x00,  // reserved(4)
-            0x00, 0x00, 0x00, 0x01,  // reserved(2) + data_reference_index(2)
-            0x00, 0x00, 0x00, 0x00,  // reserved: 2 * 4 bytes
-            0x00, 0x00, 0x00, 0x00,
-            0x00, channelCount, // channelCount(2)
-            0x00, (sampleSize), // sampleSize(2)
-            0x00, 0x00, 0x00, 0x00,  // reserved(4)
-            (sampleRate >>> 8) & 0xFF,  // Audio sample rate
-            (sampleRate) & 0xFF,
-            0x00, 0x00
-        ]);
-
-        return MP4.box(MP4.types.fLaC, data, MP4.dfLa(meta));
-    }
-
-    static dfLa(meta) {
-        let data = new Uint8Array([
-            0x00, 0x00, 0x00, 0x00, // version, flag
-            ... meta.config
-        ]);
-        return MP4.box(MP4.types.dfLa, data);
-    }
-
-    static ipcm(meta) {
-        let channelCount = meta.channelCount;
-        let sampleRate = Math.min(meta.audioSampleRate, 65535);
-        let sampleSize = meta.sampleSize;
-
-        let data = new Uint8Array([
-            0x00, 0x00, 0x00, 0x00,  // reserved(4)
-            0x00, 0x00, 0x00, 0x01,  // reserved(2) + data_reference_index(2)
-            0x00, 0x00, 0x00, 0x00,  // reserved: 2 * 4 bytes
-            0x00, 0x00, 0x00, 0x00,
-            0x00, channelCount, // channelCount(2)
-            0x00, (sampleSize), // sampleSize(2)
-            0x00, 0x00, 0x00, 0x00,  // reserved(4)
-            (sampleRate >>> 8) & 0xFF,  // Audio sample rate
-            (sampleRate) & 0xFF,
-            0x00, 0x00
-        ]);
-
-        if (meta.channelCount === 1) {
-            return MP4.box(MP4.types.ipcm, data, MP4.pcmC(meta));
-        } else {
-            return MP4.box(MP4.types.ipcm, data, MP4.chnl(meta), MP4.pcmC(meta));
-        }
-    }
-
-    static chnl(meta) {
-        let data = new Uint8Array([
-            0x00, 0x00, 0x00, 0x00, // version, flag
-            0x01, // Channel Based Layout
-            meta.channelCount, // AudioConfiguration
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // omittedChannelsMap
-        ]);
-        return MP4.box(MP4.types.chnl, data);
-    }
-
-    static pcmC(meta) {
-        let littleEndian = meta.littleEndian ? 0x01 : 0x00
-        let sampleSize = meta.sampleSize;
-        let data = new Uint8Array([
-            0x00, 0x00, 0x00, 0x00, // version, flag
-            littleEndian, sampleSize
-        ]);
-        return MP4.box(MP4.types.pcmC, data);
-    }
-
     static avc1(meta) {
         let avcc = meta.avcc;
         let width = meta.codecWidth, height = meta.codecHeight;
@@ -661,74 +432,6 @@ class MP4 {
         return MP4.box(MP4.types.avc1, data, MP4.box(MP4.types.avcC, avcc));
     }
 
-    static hvc1(meta) {
-        let hvcc = meta.hvcc;
-        let width = meta.codecWidth, height = meta.codecHeight;
-
-        let data = new Uint8Array([
-            0x00, 0x00, 0x00, 0x00,  // reserved(4)
-            0x00, 0x00, 0x00, 0x01,  // reserved(2) + data_reference_index(2)
-            0x00, 0x00, 0x00, 0x00,  // pre_defined(2) + reserved(2)
-            0x00, 0x00, 0x00, 0x00,  // pre_defined: 3 * 4 bytes
-            0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00,
-            (width >>> 8) & 0xFF,    // width: 2 bytes
-            (width) & 0xFF,
-            (height >>> 8) & 0xFF,   // height: 2 bytes
-            (height) & 0xFF,
-            0x00, 0x48, 0x00, 0x00,  // horizresolution: 4 bytes
-            0x00, 0x48, 0x00, 0x00,  // vertresolution: 4 bytes
-            0x00, 0x00, 0x00, 0x00,  // reserved: 4 bytes
-            0x00, 0x01,              // frame_count
-            0x0A,                    // strlen
-            0x78, 0x71, 0x71, 0x2F,  // compressorname: 32 bytes
-            0x66, 0x6C, 0x76, 0x2E,
-            0x6A, 0x73, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00,
-            0x00, 0x18,              // depth
-            0xFF, 0xFF               // pre_defined = -1
-        ]);
-        return MP4.box(MP4.types.hvc1, data, MP4.box(MP4.types.hvcC, hvcc));
-    }
-
-    static av01(meta) {
-        let av1c = meta.av1c;
-        let width = meta.codecWidth || 192, height = meta.codecHeight || 108;
-
-        let data = new Uint8Array([
-            0x00, 0x00, 0x00, 0x00,  // reserved(4)
-            0x00, 0x00, 0x00, 0x01,  // reserved(2) + data_reference_index(2)
-            0x00, 0x00, 0x00, 0x00,  // pre_defined(2) + reserved(2)
-            0x00, 0x00, 0x00, 0x00,  // pre_defined: 3 * 4 bytes
-            0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00,
-            (width >>> 8) & 0xFF,    // width: 2 bytes
-            (width) & 0xFF,
-            (height >>> 8) & 0xFF,   // height: 2 bytes
-            (height) & 0xFF,
-            0x00, 0x48, 0x00, 0x00,  // horizresolution: 4 bytes
-            0x00, 0x48, 0x00, 0x00,  // vertresolution: 4 bytes
-            0x00, 0x00, 0x00, 0x00,  // reserved: 4 bytes
-            0x00, 0x01,              // frame_count
-            0x0A,                    // strlen
-            0x78, 0x71, 0x71, 0x2F,  // compressorname: 32 bytes
-            0x66, 0x6C, 0x76, 0x2E,
-            0x6A, 0x73, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00, 0x00,
-            0x00, 0x00, 0x00,
-            0x00, 0x18,              // depth
-            0xFF, 0xFF               // pre_defined = -1
-        ]);
-        return MP4.box(MP4.types.av01, data, MP4.box(MP4.types.av1C, av1c));
-    }
-
     // Movie Extends box
     static mvex(meta) {
         return MP4.box(MP4.types.mvex, MP4.trex(meta));
@@ -795,7 +498,7 @@ class MP4 {
 
     // Sample Dependency Type box
     static sdtp(track) {
-        let samples = track.samples || [];
+        let samples = track.mp4Samples || [];
         let sampleCount = samples.length;
         let data = new Uint8Array(4 + sampleCount);
         // 0~4 bytes: version(0) & flags
@@ -811,7 +514,7 @@ class MP4 {
 
     // Track fragment run box
     static trun(track, offset) {
-        let samples = track.samples || [];
+        let samples = track.mp4Samples || [];
         let sampleCount = samples.length;
         let dataSize = 12 + 16 * sampleCount;
         let data = new Uint8Array(dataSize);
diff --git a/src/remux/mp4-remuxer.js b/src/remux/mp4-remuxer.js
index 8803357..f061254 100644
--- a/src/remux/mp4-remuxer.js
+++ b/src/remux/mp4-remuxer.js
@@ -20,8 +20,8 @@ import Log from '../utils/logger.js';
 import MP4 from './mp4-generator.js';
 import AAC from './aac-silent.js';
 import Browser from '../utils/browser.js';
-import { SampleInfo, MediaSegmentInfo, MediaSegmentInfoList } from '../core/media-segment-info.js';
-import { IllegalStateException } from '../utils/exception.js';
+import {SampleInfo, MediaSegmentInfo, MediaSegmentInfoList} from '../core/media-segment-info.js';
+import {IllegalStateException} from '../utils/exception.js';
 
 
 // Fragmented mp4 remuxer
@@ -38,9 +38,8 @@ class MP4Remuxer {
         this._audioDtsBase = Infinity;
         this._videoDtsBase = Infinity;
         this._audioNextDts = undefined;
-        this._videoNextDts = undefined;
-        this._audioStashedLastSample = null;
-        this._videoStashedLastSample = null;
+        this._audioLastDts = undefined;
+        this._videoLastDts = undefined;
 
         this._audioMeta = null;
         this._videoMeta = null;
@@ -119,12 +118,11 @@ class MP4Remuxer {
     }
 
     insertDiscontinuity() {
-        this._audioNextDts = this._videoNextDts = undefined;
+        this._audioNextDts = undefined;
+        this._audioLastDts = this._videoLastDts = undefined;
     }
 
     seek(originalDts) {
-        this._audioStashedLastSample = null;
-        this._videoStashedLastSample = null;
         this._videoSegmentInfoList.clear();
         this._audioSegmentInfoList.clear();
     }
@@ -136,12 +134,8 @@ class MP4Remuxer {
         if (!this._dtsBaseInited) {
             this._calculateDtsBase(audioTrack, videoTrack);
         }
-        if (videoTrack) {
-            this._remuxVideo(videoTrack);
-        }
-        if (audioTrack) {
-            this._remuxAudio(audioTrack);
-        }
+        this._remuxVideo(videoTrack);
+        this._remuxAudio(audioTrack);
     }
 
     _onTrackMetadataReceived(type, metadata) {
@@ -186,70 +180,36 @@ class MP4Remuxer {
             return;
         }
 
-        if (audioTrack && audioTrack.samples && audioTrack.samples.length) {
+        if (audioTrack.samples && audioTrack.samples.length) {
             this._audioDtsBase = audioTrack.samples[0].dts;
         }
-        if (videoTrack && videoTrack.samples && videoTrack.samples.length) {
+        if (videoTrack.samples && videoTrack.samples.length) {
             this._videoDtsBase = videoTrack.samples[0].dts;
         }
 
         this._dtsBase = Math.min(this._audioDtsBase, this._videoDtsBase);
-        this._dtsBaseInited = true;
-    }
-
-    getTimestampBase() {
-        if (!this._dtsBaseInited) {
-            return undefined;
-        }
-        return this._dtsBase;
-    }
-
-    flushStashedSamples() {
-        let videoSample = this._videoStashedLastSample;
-        let audioSample = this._audioStashedLastSample;
-
-        let videoTrack = {
-            type: 'video',
-            id: 1,
-            sequenceNumber: 0,
-            samples: [],
-            length: 0
-        };
-
-        if (videoSample != null) {
-            videoTrack.samples.push(videoSample);
-            videoTrack.length = videoSample.length;
-        }
 
-        let audioTrack = {
-            type: 'audio',
-            id: 2,
-            sequenceNumber: 0,
-            samples: [],
-            length: 0
-        };
+        Log.v(this.TAG, 'base DTS initialized: ' + this._dtsBase + ' (audio ' + this._audioDtsBase +
+            ', video ' + this._videoDtsBase + ')');
 
-        if (audioSample != null) {
-            audioTrack.samples.push(audioSample);
-            audioTrack.length = audioSample.length;
-        }
-
-        this._videoStashedLastSample = null;
-        this._audioStashedLastSample = null;
-
-        this._remuxVideo(videoTrack, true);
-        this._remuxAudio(audioTrack, true);
+        this._dtsBaseInited = true;
     }
 
-    _remuxAudio(audioTrack, force) {
+    _remuxAudio(audioTrack) {
         if (this._audioMeta == null) {
             return;
         }
 
         let track = audioTrack;
         let samples = track.samples;
-        let dtsCorrection = undefined;
-        let firstDts = -1, lastDts = -1, lastPts = -1;
+        let sampleLength = samples.length;
+        // we will reserve the last sample for accurate sample duration
+        if (!samples || sampleLength <= 1) {
+            return;
+        }
+
+        let lastSample = samples[sampleLength - 1];
+        let firstDts = -1, lastDts = -1;
         let refSampleDuration = this._audioMeta.refSampleDuration;
 
         let mpegRawTrack = this._audioMeta.codec === 'mp3' && this._mp3UseMpegAudio;
@@ -257,15 +217,6 @@ class MP4Remuxer {
 
         let insertPrefixSilentFrame = false;
 
-        if (!samples || samples.length === 0) {
-            return;
-        }
-        if (samples.length === 1 && !force) {
-            // If [sample count in current batch] === 1 && (force != true)
-            // Ignore and keep in demuxer's queue
-            return;
-        }  // else if (force === true) do remux
-
         let offset = 0;
         let mdatbox = null;
         let mdatBytes = 0;
@@ -274,67 +225,25 @@ class MP4Remuxer {
         if (mpegRawTrack) {
             // for raw mpeg buffer
             offset = 0;
-            mdatBytes = track.length;
+            mdatBytes = track.length - lastSample.unit.byteLength;
         } else {
             // for fmp4 mdat box
             offset = 8;  // size + type
-            mdatBytes = 8 + track.length;
-        }
-
-
-        let lastSample = null;
-
-        // Pop the lastSample and waiting for stash
-        if (samples.length > 1) {
-            lastSample = samples.pop();
-            mdatBytes -= lastSample.length;
-        }
-
-        // Insert [stashed lastSample in the previous batch] to the front
-        if (this._audioStashedLastSample != null) {
-            let sample = this._audioStashedLastSample;
-            this._audioStashedLastSample = null;
-            samples.unshift(sample);
-            mdatBytes += sample.length;
+            mdatBytes = 8 + track.length - lastSample.unit.byteLength;
         }
 
-        // Stash the lastSample of current batch, waiting for next batch
-        if (lastSample != null) {
-            this._audioStashedLastSample = lastSample;
-        }
-
-
         let firstSampleOriginalDts = samples[0].dts - this._dtsBase;
 
-        // calculate dtsCorrection
-        if (this._audioNextDts) {
-            dtsCorrection = firstSampleOriginalDts - this._audioNextDts;
-        } else {  // this._audioNextDts == undefined
-            if (this._audioSegmentInfoList.isEmpty()) {
-                dtsCorrection = 0;
-                if (this._fillSilentAfterSeek && !this._videoSegmentInfoList.isEmpty()) {
-                    if (this._audioMeta.originalCodec !== 'mp3') {
-                        insertPrefixSilentFrame = true;
-                    }
-                }
-            } else {
-                let lastSample = this._audioSegmentInfoList.getLastSampleBefore(firstSampleOriginalDts);
-                if (lastSample != null) {
-                    let distance = (firstSampleOriginalDts - (lastSample.originalDts + lastSample.duration));
-                    if (distance <= 3) {
-                        distance = 0;
-                    }
-                    let expectedDts = lastSample.dts + lastSample.duration + distance;
-                    dtsCorrection = firstSampleOriginalDts - expectedDts;
-                } else { // lastSample == null, cannot found
-                    dtsCorrection = 0;
-                }
-            }
+        if (this._audioNextDts == undefined &&
+            this._audioSegmentInfoList.isEmpty() &&
+            this._fillSilentAfterSeek && !this._videoSegmentInfoList.isEmpty() &&
+            this._audioMeta.originalCodec !== 'mp3') {
+            insertPrefixSilentFrame = true;
         }
 
         if (insertPrefixSilentFrame) {
             // align audio segment beginDts to match with current video segment's beginDts
-            let firstSampleDts = firstSampleOriginalDts - dtsCorrection;
+            let firstSampleDts = firstSampleOriginalDts;
             let videoSegment = this._videoSegmentInfoList.getLastSegmentBefore(firstSampleOriginalDts);
             if (videoSegment != null && videoSegment.beginDts < firstSampleDts) {
                 let silentUnit = AAC.getSilentFrame(this._audioMeta.originalCodec, this._audioMeta.channelCount);
@@ -342,7 +251,7 @@ class MP4Remuxer {
                     let dts = videoSegment.beginDts;
                     let silentFrameDuration = firstSampleDts - videoSegment.beginDts;
                     Log.v(this.TAG, `InsertPrefixSilentAudio: dts: ${dts}, duration: ${silentFrameDuration}`);
-                    samples.unshift({ unit: silentUnit, dts: dts, pts: dts });
+                    samples.unshift({unit: silentUnit, dts: dts, pts: dts});
                     mdatBytes += silentUnit.byteLength;
                 }  // silentUnit == null: Cannot generate, skip
             } else {
@@ -353,113 +262,96 @@ class MP4Remuxer {
         let mp4Samples = [];
 
         // Correct dts for each sample, and calculate sample duration. Then output to mp4Samples
-        for (let i = 0; i < samples.length; i++) {
+        for (let i = 0; i < sampleLength - 1; i++) {
             let sample = samples[i];
             let unit = sample.unit;
             let originalDts = sample.dts - this._dtsBase;
             let dts = originalDts;
-            let needFillSilentFrames = false;
-            let silentFrames = null;
-            let sampleDuration = 0;
 
-            if (originalDts < -0.001) {
-                continue; //pass the first sample with the invalid dts
-            }
-
-            if (this._audioMeta.codec !== 'mp3' && refSampleDuration != null) {
-                // for AAC codec, we need to keep dts increase based on refSampleDuration
-                let curRefDts = originalDts;
-                const maxAudioFramesDrift = 3;
-                if (this._audioNextDts) {
-                    curRefDts = this._audioNextDts;
-                }
+            //let myDate = new Date();
+            //Log.d(this.TAG, 'audio sample, dts: ' + dts + ', size: ' + sample.unit.byteLength + ', time: ' + myDate.getTime());
 
-                dtsCorrection = originalDts - curRefDts;
-                if (dtsCorrection <= -maxAudioFramesDrift * refSampleDuration) {
-                    // If we're overlapping by more than maxAudioFramesDrift number of frame, drop this sample
-                    Log.w(this.TAG, `Dropping 1 audio frame (originalDts: ${originalDts} ms ,curRefDts: ${curRefDts} ms)  due to dtsCorrection: ${dtsCorrection} ms overlap.`);
-                    continue;
+            if (this._audioLastDts) {
+                if (dts <= this._audioLastDts) {
+                    Log.d(this.TAG, 'audio dts rollback, we need re-adjust it, ' + dts + ', ' + this._audioLastDts);
+                    dts = this._audioLastDts + 1;
                 }
-                else if (dtsCorrection >= maxAudioFramesDrift * refSampleDuration && this._fillAudioTimestampGap && !Browser.safari) {
-                    // Silent frame generation, if large timestamp gap detected && config.fixAudioTimestampGap
-                    needFillSilentFrames = true;
-                    // We need to insert silent frames to fill timestamp gap
-                    let frameCount = Math.floor(dtsCorrection / refSampleDuration);
-                    Log.w(this.TAG, 'Large audio timestamp gap detected, may cause AV sync to drift. ' +
-                        'Silent frames will be generated to avoid unsync.\n' +
-                        `originalDts: ${originalDts} ms, curRefDts: ${curRefDts} ms, ` +
-                        `dtsCorrection: ${Math.round(dtsCorrection)} ms, generate: ${frameCount} frames`);
-
-
-                    dts = Math.floor(curRefDts);
-                    sampleDuration = Math.floor(curRefDts + refSampleDuration) - dts;
-
-                    let silentUnit = AAC.getSilentFrame(this._audioMeta.originalCodec, this._audioMeta.channelCount);
-                    if (silentUnit == null) {
-                        Log.w(this.TAG, 'Unable to generate silent frame for ' +
-                            `${this._audioMeta.originalCodec} with ${this._audioMeta.channelCount} channels, repeat last frame`);
-                        // Repeat last frame
-                        silentUnit = unit;
-                    }
-                    silentFrames = [];
-
-                    for (let j = 0; j < frameCount; j++) {
-                        curRefDts = curRefDts + refSampleDuration;
-                        let intDts = Math.floor(curRefDts);  // change to integer
-                        let intDuration = Math.floor(curRefDts + refSampleDuration) - intDts;
-                        let frame = {
-                            dts: intDts,
-                            pts: intDts,
-                            cts: 0,
-                            unit: silentUnit,
-                            size: silentUnit.byteLength,
-                            duration: intDuration,  // wait for next sample
-                            originalDts: originalDts,
-                            flags: {
-                                isLeading: 0,
-                                dependsOn: 1,
-                                isDependedOn: 0,
-                                hasRedundancy: 0
-                            }
-                        };
-                        silentFrames.push(frame);
-                        mdatBytes += frame.size;
+            }
+            this._audioLastDts = dts;
 
-                    }
+            if (firstDts === -1) {
+                firstDts = dts;
+            }
 
-                    this._audioNextDts = curRefDts + refSampleDuration;
+            let nextDts = samples[i + 1].dts - this._dtsBase;
+            let sampleDuration = (nextDts > dts) ? (nextDts - dts) : 1;
 
-                } else {
+            let needFillSilentFrames = false;
+            let silentFrames = null;
 
-                    dts = Math.floor(curRefDts);
-                    sampleDuration = Math.floor(curRefDts + refSampleDuration) - dts;
-                    this._audioNextDts = curRefDts + refSampleDuration;
+            // Silent frame generation, if large timestamp gap detected && config.fixAudioTimestampGap
+            if (sampleDuration > refSampleDuration * 1.5 && this._audioMeta.codec !== 'mp3' && this._fillAudioTimestampGap && !Browser.safari) {
+                // We need to insert silent frames to fill timestamp gap
+                Log.w(this.TAG, 'need insert silent audio frames, sampleDuration: ' + sampleDuration + ', ' +
+                    'refSampleDuration: ' + refSampleDuration);
+                needFillSilentFrames = true;
+                let delta = Math.abs(sampleDuration - refSampleDuration);
+                let frameCount = Math.ceil(delta / refSampleDuration);
+                let currentDts = dts + refSampleDuration;  // Notice: in float
+
+                Log.w(this.TAG, 'Large audio timestamp gap detected, may cause AV sync to drift. ' +
+                                'Silent frames will be generated to avoid unsync.\n' +
+                                `dts: ${dts + sampleDuration} ms, expected: ${dts + Math.round(refSampleDuration)} ms, ` +
+                                `delta: ${Math.round(delta)} ms, generate: ${frameCount} frames`);
 
+                let silentUnit = AAC.getSilentFrame(this._audioMeta.originalCodec, this._audioMeta.channelCount);
+                if (silentUnit == null) {
+                    Log.w(this.TAG, 'Unable to generate silent frame for ' +
+                                    `${this._audioMeta.originalCodec} with ${this._audioMeta.channelCount} channels, repeat last frame`);
+                    // Repeat last frame
+                    silentUnit = unit;
                 }
-            } else {
-                // keep the original dts calculate algorithm for mp3
-                dts = originalDts - dtsCorrection;
-
-
-                if (i !== samples.length - 1) {
-                    let nextDts = samples[i + 1].dts - this._dtsBase - dtsCorrection;
-                    sampleDuration = nextDts - dts;
-                } else {  // the last sample
-                    if (lastSample != null) {  // use stashed sample's dts to calculate sample duration
-                        let nextDts = lastSample.dts - this._dtsBase - dtsCorrection;
-                        sampleDuration = nextDts - dts;
-                    } else if (mp4Samples.length >= 1) {  // use second last sample duration
-                        sampleDuration = mp4Samples[mp4Samples.length - 1].duration;
-                    } else {  // the only one sample, use reference sample duration
-                        sampleDuration = Math.floor(refSampleDuration);
+                silentFrames = [];
+
+                for (let j = 0; j < frameCount; j++) {
+                    let intDts = Math.round(currentDts);  // round to integer
+                    if (silentFrames.length > 0) {
+                        // Set previous frame sample duration
+                        let previousFrame = silentFrames[silentFrames.length - 1];
+                        previousFrame.duration = intDts - previousFrame.dts;
                     }
+                    let frame = {
+                        dts: intDts,
+                        pts: intDts,
+                        cts: 0,
+                        unit: silentUnit,
+                        size: silentUnit.byteLength,
+                        duration: 0,  // wait for next sample
+                        originalDts: originalDts,
+                        flags: {
+                            isLeading: 0,
+                            dependsOn: 1,
+                            isDependedOn: 0,
+                            hasRedundancy: 0
+                        }
+                    };
+                    silentFrames.push(frame);
+                    mdatBytes += unit.byteLength;
+                    currentDts += refSampleDuration;
                 }
-                this._audioNextDts = dts + sampleDuration;
-            }
 
-            if (firstDts === -1) {
-                firstDts = dts;
+                // last frame: align end time to next frame dts
+                let lastFrame = silentFrames[silentFrames.length - 1];
+                lastFrame.duration = dts + sampleDuration - lastFrame.dts;
+
+                // silentFrames.forEach((frame) => {
+                //     Log.w(this.TAG, `SilentAudio: dts: ${frame.dts}, duration: ${frame.duration}`);
+                // });
+
+                // Set correct sample duration for current frame
+                sampleDuration = Math.round(refSampleDuration);
             }
+
             mp4Samples.push({
                 dts: dts,
                 pts: dts,
@@ -476,19 +368,14 @@ class MP4Remuxer {
                 }
             });
 
+            // Log.d(this.TAG, 'mp4 audio sample, dts: ' + dts + ', size: ' + sample.unit.byteLength + ', duration: ' + sampleDuration + ', odts: ' + originalDts);
+
             if (needFillSilentFrames) {
                 // Silent frames should be inserted after wrong-duration frame
                 mp4Samples.push.apply(mp4Samples, silentFrames);
             }
         }
 
-        if (mp4Samples.length === 0) {
-            //no samples need to remux
-            track.samples = [];
-            track.length = 0;
-            return;
-        }
-
         // allocate mdatbox
         if (mpegRawTrack) {
             // allocate for raw mpeg buffer
@@ -514,7 +401,7 @@ class MP4Remuxer {
 
         let latest = mp4Samples[mp4Samples.length - 1];
         lastDts = latest.dts + latest.duration;
-        //this._audioNextDts = lastDts;
+        this._audioNextDts = lastDts;
 
         // fill media segment info & add to info list
         let info = new MediaSegmentInfo();
@@ -525,20 +412,20 @@ class MP4Remuxer {
         info.originalBeginDts = mp4Samples[0].originalDts;
         info.originalEndDts = latest.originalDts + latest.duration;
         info.firstSample = new SampleInfo(mp4Samples[0].dts,
-                                          mp4Samples[0].pts,
-                                          mp4Samples[0].duration,
-                                          mp4Samples[0].originalDts,
-                                          false);
+            mp4Samples[0].pts,
+            mp4Samples[0].duration,
+            mp4Samples[0].originalDts,
+            false);
         info.lastSample = new SampleInfo(latest.dts,
-                                         latest.pts,
-                                         latest.duration,
-                                         latest.originalDts,
-                                         false);
+            latest.pts,
+            latest.duration,
+            latest.originalDts,
+            false);
         if (!this._isLive) {
             this._audioSegmentInfoList.append(info);
         }
 
-        track.samples = mp4Samples;
+        track.mp4Samples = mp4Samples;
         track.sequenceNumber++;
 
         let moofbox = null;
@@ -551,8 +438,9 @@ class MP4Remuxer {
             moofbox = MP4.moof(track, firstDts);
         }
 
-        track.samples = [];
-        track.length = 0;
+        track.length = lastSample.unit.byteLength;
+        track.samples.splice(0, sampleLength - 1);
+        track.mp4Samples = [];
 
         let segment = {
             type: 'audio',
@@ -570,108 +458,64 @@ class MP4Remuxer {
         this._onMediaSegment('audio', segment);
     }
 
-    _remuxVideo(videoTrack, force) {
+    _remuxVideo(videoTrack) {
         if (this._videoMeta == null) {
             return;
         }
 
         let track = videoTrack;
         let samples = track.samples;
-        let dtsCorrection = undefined;
+        let sampleLength = samples.length;
         let firstDts = -1, lastDts = -1;
         let firstPts = -1, lastPts = -1;
 
-        if (!samples || samples.length === 0) {
+        // we will reserve the last sample for accurate sample duration
+        if (!samples || sampleLength <= 1) {
             return;
         }
-        if (samples.length === 1 && !force) {
-            // If [sample count in current batch] === 1 && (force != true)
-            // Ignore and keep in demuxer's queue
-            return;
-        }  // else if (force === true) do remux
 
+        let lastSample = samples[sampleLength - 1];
         let offset = 8;
-        let mdatbox = null;
-        let mdatBytes = 8 + videoTrack.length;
-
-
-        let lastSample = null;
-
-        // Pop the lastSample and waiting for stash
-        if (samples.length > 1) {
-            lastSample = samples.pop();
-            mdatBytes -= lastSample.length;
-        }
-
-        // Insert [stashed lastSample in the previous batch] to the front
-        if (this._videoStashedLastSample != null) {
-            let sample = this._videoStashedLastSample;
-            this._videoStashedLastSample = null;
-            samples.unshift(sample);
-            mdatBytes += sample.length;
-        }
-
-        // Stash the lastSample of current batch, waiting for next batch
-        if (lastSample != null) {
-            this._videoStashedLastSample = lastSample;
-        }
-
-
-        let firstSampleOriginalDts = samples[0].dts - this._dtsBase;
-
-        // calculate dtsCorrection
-        if (this._videoNextDts) {
-            dtsCorrection = firstSampleOriginalDts - this._videoNextDts;
-        } else {  // this._videoNextDts == undefined
-            if (this._videoSegmentInfoList.isEmpty()) {
-                dtsCorrection = 0;
-            } else {
-                let lastSample = this._videoSegmentInfoList.getLastSampleBefore(firstSampleOriginalDts);
-                if (lastSample != null) {
-                    let distance = (firstSampleOriginalDts - (lastSample.originalDts + lastSample.duration));
-                    if (distance <= 3) {
-                        distance = 0;
-                    }
-                    let expectedDts = lastSample.dts + lastSample.duration + distance;
-                    dtsCorrection = firstSampleOriginalDts - expectedDts;
-                } else { // lastSample == null, cannot found
-                    dtsCorrection = 0;
-                }
-            }
-        }
+        let mdatBytes = 8 + track.length - lastSample.length;
+        let mdatbox = new Uint8Array(mdatBytes);
+        mdatbox[0] = (mdatBytes >>> 24) & 0xFF;
+        mdatbox[1] = (mdatBytes >>> 16) & 0xFF;
+        mdatbox[2] = (mdatBytes >>>  8) & 0xFF;
+        mdatbox[3] = (mdatBytes) & 0xFF;
+        mdatbox.set(MP4.types.mdat, 4);
 
         let info = new MediaSegmentInfo();
         let mp4Samples = [];
 
         // Correct dts for each sample, and calculate sample duration. Then output to mp4Samples
-        for (let i = 0; i < samples.length; i++) {
+        for (let i = 0; i < sampleLength - 1; i++) {
             let sample = samples[i];
             let originalDts = sample.dts - this._dtsBase;
             let isKeyframe = sample.isKeyframe;
-            let dts = originalDts - dtsCorrection;
+            let dts = originalDts;
             let cts = sample.cts;
             let pts = dts + cts;
 
+            // let myDate = new Date();
+            // Log.d(this.TAG, 'video sample, dts: ' + dts + ', size: ' + sample.length + ', time: ' + myDate.getTime());
+
+            if (this._videoLastDts) {
+                if (dts <= this._videoLastDts) {
+                    Log.d(this.TAG, 'video dts rollback, we need re-adjust it, ' + dts + ', ' + this._videoLastDts);
+                    let dtsOffset = this._videoLastDts + 1 - dts;
+                    dts = this._videoLastDts + 1;
+                    pts += dtsOffset;
+                }
+            }
+            this._videoLastDts = dts;
+
             if (firstDts === -1) {
                 firstDts = dts;
                 firstPts = pts;
             }
 
-            let sampleDuration = 0;
-
-            if (i !== samples.length - 1) {
-                let nextDts = samples[i + 1].dts - this._dtsBase - dtsCorrection;
-                sampleDuration = nextDts - dts;
-            } else {  // the last sample
-                if (lastSample != null) {  // use stashed sample's dts to calculate sample duration
-                    let nextDts = lastSample.dts - this._dtsBase - dtsCorrection;
-                    sampleDuration = nextDts - dts;
-                } else if (mp4Samples.length >= 1) {  // use second last sample duration
-                    sampleDuration = mp4Samples[mp4Samples.length - 1].duration;
-                } else {  // the only one sample, use reference sample duration
-                    sampleDuration = Math.floor(this._videoMeta.refSampleDuration);
-                }
-            }
+            let nextDts = samples[i + 1].dts - this._dtsBase;
+            let sampleDuration = (nextDts > dts) ? (nextDts - dts) : 1;
 
             if (isKeyframe) {
                 let syncPoint = new SampleInfo(dts, pts, sampleDuration, sample.dts, true);
@@ -696,15 +540,9 @@ class MP4Remuxer {
                     isNonSync: isKeyframe ? 0 : 1
                 }
             });
-        }
 
-        // allocate mdatbox
-        mdatbox = new Uint8Array(mdatBytes);
-        mdatbox[0] = (mdatBytes >>> 24) & 0xFF;
-        mdatbox[1] = (mdatBytes >>> 16) & 0xFF;
-        mdatbox[2] = (mdatBytes >>>  8) & 0xFF;
-        mdatbox[3] = (mdatBytes) & 0xFF;
-        mdatbox.set(MP4.types.mdat, 4);
+            // Log.d(this.TAG, 'mp4 video sample, dts: ' + dts + ', pts: ' + pts + ', cts: ' + cts + ', size: ' + sample.length + ', duration: ' + sampleDuration + ', odts: ' + originalDts);
+        }
 
         // Write samples into mdatbox
         for (let i = 0; i < mp4Samples.length; i++) {
@@ -720,7 +558,6 @@ class MP4Remuxer {
         let latest = mp4Samples[mp4Samples.length - 1];
         lastDts = latest.dts + latest.duration;
         lastPts = latest.pts + latest.duration;
-        this._videoNextDts = lastDts;
 
         // fill media segment info & add to info list
         info.beginDts = firstDts;
@@ -730,20 +567,20 @@ class MP4Remuxer {
         info.originalBeginDts = mp4Samples[0].originalDts;
         info.originalEndDts = latest.originalDts + latest.duration;
         info.firstSample = new SampleInfo(mp4Samples[0].dts,
-                                          mp4Samples[0].pts,
-                                          mp4Samples[0].duration,
-                                          mp4Samples[0].originalDts,
-                                          mp4Samples[0].isKeyframe);
+            mp4Samples[0].pts,
+            mp4Samples[0].duration,
+            mp4Samples[0].originalDts,
+            mp4Samples[0].isKeyframe);
         info.lastSample = new SampleInfo(latest.dts,
-                                         latest.pts,
-                                         latest.duration,
-                                         latest.originalDts,
-                                         latest.isKeyframe);
+            latest.pts,
+            latest.duration,
+            latest.originalDts,
+            latest.isKeyframe);
         if (!this._isLive) {
             this._videoSegmentInfoList.append(info);
         }
 
-        track.samples = mp4Samples;
+        track.mp4Samples = mp4Samples;
         track.sequenceNumber++;
 
         // workaround for chrome < 50: force first sample as a random access point
@@ -755,8 +592,11 @@ class MP4Remuxer {
         }
 
         let moofbox = MP4.moof(track, firstDts);
-        track.samples = [];
-        track.length = 0;
+
+        // reserve the last sample
+        track.length = lastSample.length;
+        track.samples.splice(0, sampleLength - 1);
+        track.mp4Samples = [];
 
         this._onMediaSegment('video', {
             type: 'video',
@@ -775,4 +615,4 @@ class MP4Remuxer {
 
 }
 
-export default MP4Remuxer;
+export default MP4Remuxer;
\ No newline at end of file