Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Custom ignored
.vscode
*.mp4
*.mov
# But keep test media fixtures in git
!test-media/*.mp4
!test-media/*.mp3
!test-media/*.mov
build/*
docs/_build
.DS_Store
Expand Down
4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@ else(MSVC)
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag("-std=c++11" SUPPORT_CXX11)
#set(CMAKE_CUDA_FLAGS "-std=c++11 ${CMAKE_CUDA_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic")
if(NOT APPLE)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic")
endif()
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
message(STATUS "Build in Debug mode")
set(CMAKE_C_FLAGS "-O0 -g -Wall -fPIC ${CMAKE_C_FLAGS}")
Expand Down
16 changes: 14 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,24 @@
Table of contents
=================

- [FFmpeg Compatibility](#ffmpeg-compatibility)
- [Benchmark](#preliminary-benchmark)
- [Installation](#installation)
- [Usage](#usage)
- [Bridge for Deep Learning frameworks](#bridges-for-deep-learning-frameworks)

## FFmpeg Compatibility

Decord was originally written against FFmpeg 4.x APIs. This fork includes patches to support newer FFmpeg versions with version-gated preprocessor guards:

| FFmpeg Version | Status | Notes |
|---|---|---|
| 4.x | Supported | Original target API |
| 5.x | Supported | `const AVCodec*`/`const AVInputFormat*` signatures, `bsf.h` include |
| 6.x | Supported | `ch_layout` API replaces `channels`/`channel_layout` |
| 7.x | Supported | `av_packet_side_data_get`, gated `avcodec_close`, pixel format string changes in filter graph, SAR sanitization |
| 8.x | Supported | Tested on 8.0.1 |

## Preliminary benchmark

Decord is good at handling random access patterns, which is rather common during neural network training.
Expand Down Expand Up @@ -57,8 +70,7 @@ Supported platforms:
Install the system packages for building the shared library, for Debian/Ubuntu users, run:

```bash
# official PPA comes with ffmpeg 2.8, which lacks tons of features, we use ffmpeg 4.0 here
sudo add-apt-repository ppa:jonathonf/ffmpeg-4 # for ubuntu20.04 official PPA is already version 4.2, you may skip this step
# FFmpeg 4.x through 8.x are supported (see FFmpeg Compatibility above)
sudo apt-get update
sudo apt-get install -y build-essential python3-dev python3-setuptools make cmake
sudo apt-get install -y ffmpeg libavcodec-dev libavfilter-dev libavformat-dev libavutil-dev
Expand Down
1 change: 1 addition & 0 deletions python/MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include decord/libdecord.so
30 changes: 30 additions & 0 deletions src/audio/audio_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,11 @@ namespace decord {
pCodecParameters = tempCodecParameters;
originalSampleRate = tempCodecParameters->sample_rate;
if (targetSampleRate == -1) targetSampleRate = originalSampleRate;
#if LIBAVCODEC_VERSION_MAJOR >= 60
numChannels = tempCodecParameters->ch_layout.nb_channels;
#else
numChannels = tempCodecParameters->channels;
#endif
break;
}
}
Expand All @@ -148,7 +152,9 @@ namespace decord {
if (codecOpenRet < 0) {
char errstr[200];
av_strerror(codecOpenRet, errstr, 200);
#if LIBAVCODEC_VERSION_MAJOR < 61
avcodec_close(pCodecContext);
#endif
avcodec_free_context(&pCodecContext);
avformat_close_input(&pFormatContext);
LOG(FATAL) << "ERROR open codec through avcodec_open2: " << errstr;
Expand Down Expand Up @@ -210,7 +216,9 @@ namespace decord {
// clean up
av_frame_free(&pFrame);
av_packet_free(&pPacket);
#if LIBAVCODEC_VERSION_MAJOR < 61
avcodec_close(pCodecContext);
#endif
swr_close(swr);
swr_free(&swr);
avcodec_free_context(&pCodecContext);
Expand All @@ -229,7 +237,11 @@ namespace decord {
// allocate resample buffer
float** outBuffer;
int outLinesize = 0;
#if LIBAVCODEC_VERSION_MAJOR >= 60
int outNumChannels = mono ? 1 : pFrame->ch_layout.nb_channels;
#else
int outNumChannels = av_get_channel_layout_nb_channels(mono ? AV_CH_LAYOUT_MONO : pFrame->channel_layout);
#endif
numChannels = outNumChannels;
int outNumSamples = av_rescale_rnd(pFrame->nb_samples,
this->targetSampleRate, pFrame->sample_rate, AV_ROUND_UP);
Expand Down Expand Up @@ -281,11 +293,29 @@ namespace decord {
if (!this->swr) {
LOG(FATAL) << "ERROR Failed to allocate resample context";
}
#if LIBAVCODEC_VERSION_MAJOR >= 60
AVChannelLayout in_ch_layout;
av_channel_layout_copy(&in_ch_layout, &pCodecContext->ch_layout);
if (in_ch_layout.nb_channels == 0) {
av_channel_layout_default(&in_ch_layout, 1);
}
av_opt_set_chlayout(this->swr, "in_chlayout", &in_ch_layout, 0);
AVChannelLayout out_ch_layout;
if (mono) {
av_channel_layout_default(&out_ch_layout, 1);
} else {
av_channel_layout_copy(&out_ch_layout, &in_ch_layout);
}
av_opt_set_chlayout(this->swr, "out_chlayout", &out_ch_layout, 0);
av_channel_layout_uninit(&in_ch_layout);
av_channel_layout_uninit(&out_ch_layout);
#else
if (pCodecContext->channel_layout == 0) {
pCodecContext->channel_layout = av_get_default_channel_layout( pCodecContext->channels );
}
av_opt_set_channel_layout(this->swr, "in_channel_layout", pCodecContext->channel_layout, 0);
av_opt_set_channel_layout(this->swr, "out_channel_layout", mono ? AV_CH_LAYOUT_MONO : pCodecContext->channel_layout, 0);
#endif
av_opt_set_int(this->swr, "in_sample_rate", pCodecContext->sample_rate, 0);
av_opt_set_int(this->swr, "out_sample_rate", this->targetSampleRate, 0);
av_opt_set_sample_fmt(this->swr, "in_sample_fmt", pCodecContext->sample_fmt, 0);
Expand Down
3 changes: 3 additions & 0 deletions src/video/ffmpeg/ffmpeg_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
extern "C" {
#endif
#include <libavcodec/avcodec.h>
#if LIBAVCODEC_VERSION_MAJOR >= 59
#include <libavcodec/bsf.h>
#endif
#include <libavformat/avformat.h>
#include <libavformat/avio.h>
#include <libavfilter/avfilter.h>
Expand Down
47 changes: 30 additions & 17 deletions src/video/ffmpeg/filter_graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
#include "filter_graph.h"

#include <dmlc/logging.h>
extern "C" {
#include <libavutil/pixdesc.h>
}

namespace decord {
namespace ffmpeg {
Expand Down Expand Up @@ -36,41 +39,51 @@ void FFMPEGFilterGraph::Init(std::string filters_descr, AVCodecContext *dec_ctx)
CHECK(buffersink) << "Error: no buffersink";
AVFilterInOut *outputs = avfilter_inout_alloc();
AVFilterInOut *inputs = avfilter_inout_alloc();
enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_RGB24 , AV_PIX_FMT_NONE };
// AVBufferSinkParams *buffersink_params;

filter_graph_.reset(avfilter_graph_alloc());
/* set threads to 1, details see https://github.com/dmlc/decord/pull/63 */
//LOG(INFO) << "Original GraphFilter nb_threads: " << filter_graph_->nb_threads;
filter_graph_->nb_threads = 1;
/* buffer video source: the decoded frames from the decoder will be inserted here. */
std::snprintf(args, sizeof(args),
// Sanitize sample_aspect_ratio: a zero denominator causes inf which FFmpeg 7+ rejects
int sar_num = dec_ctx->sample_aspect_ratio.num;
int sar_den = dec_ctx->sample_aspect_ratio.den;
if (sar_den == 0) {
sar_num = 1;
sar_den = 1;
}
#if LIBAVFILTER_VERSION_MAJOR >= 10
// FFmpeg 7+: pix_fmt option uses AV_OPT_TYPE_PIXEL_FMT, requiring a format name string
const char *pix_fmt_name = av_get_pix_fmt_name(dec_ctx->pix_fmt);
if (!pix_fmt_name) pix_fmt_name = "yuv420p";
std::snprintf(args, sizeof(args),
"video_size=%dx%d:pix_fmt=%s:time_base=%d/%d:pixel_aspect=%d/%d",
dec_ctx->width, dec_ctx->height, pix_fmt_name,
dec_ctx->time_base.num, dec_ctx->time_base.den,
sar_num, sar_den);
#else
std::snprintf(args, sizeof(args),
"video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d",
dec_ctx->width, dec_ctx->height, dec_ctx->pix_fmt,
dec_ctx->time_base.num, dec_ctx->time_base.den,
dec_ctx->sample_aspect_ratio.num, dec_ctx->sample_aspect_ratio.den);
// std::snprintf(args, sizeof(args),
// "video_size=%dx%d:pix_fmt=%d",
// dec_ctx->width, dec_ctx->height, dec_ctx->pix_fmt);
sar_num, sar_den);
#endif

// LOG(INFO) << "filter args: " << args;

// AVFilterContext *buffersrc_ctx;
// AVFilterContext *buffersink_ctx;
CHECK_GE(avfilter_graph_create_filter(&buffersrc_ctx_, buffersrc, "in",
args, NULL, filter_graph_.get()), 0) << "Cannot create buffer source";

// LOG(INFO) << "create filter src";

/* buffer video sink: to terminate the filter chain. */
// buffersink_params = av_buffersink_params_alloc();
// buffersink_params->pixel_fmts = pix_fmts;
CHECK_GE(avfilter_graph_create_filter(&buffersink_ctx_, buffersink, "out",
NULL, NULL, filter_graph_.get()), 0) << "Cannot create buffer sink";
// av_free(buffersink_params);
// LOG(INFO) << "create filter sink";
// CHECK_GE(av_opt_set_bin(buffersink_ctx_, "pix_fmts", (uint8_t *)&pix_fmts, sizeof(AV_PIX_FMT_RGB24), AV_OPT_SEARCH_CHILDREN), 0) << "Set bin error";
#if LIBAVFILTER_VERSION_MAJOR < 10
enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_RGB24 , AV_PIX_FMT_NONE };
CHECK_GE(av_opt_set_int_list(buffersink_ctx_, "pix_fmts", pix_fmts, AV_PIX_FMT_NONE, AV_OPT_SEARCH_CHILDREN), 0) << "Set output pixel format error.";
#else
// FFmpeg 7+: pix_fmts is no longer a runtime option on buffersink,
// so enforce output format via the filter chain instead.
filters_descr += ",format=rgb24";
#endif

// LOG(INFO) << "create filter set opt";
/* Endpoints for the filter graph. */
Expand Down
4 changes: 2 additions & 2 deletions src/video/nvcodec/cuda_threaded_decoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ namespace decord {
namespace cuda {
using namespace runtime;

CUThreadedDecoder::CUThreadedDecoder(int device_id, AVCodecParameters *codecpar, AVInputFormat *iformat)
CUThreadedDecoder::CUThreadedDecoder(int device_id, AVCodecParameters *codecpar, const AVInputFormat *iformat)
: device_id_(device_id), stream_({device_id, false}), device_{}, ctx_{}, parser_{}, decoder_{},
pkt_queue_{}, frame_queue_{},
run_(false), frame_count_(0), draining_(false),
Expand Down Expand Up @@ -70,7 +70,7 @@ CUThreadedDecoder::CUThreadedDecoder(int device_id, AVCodecParameters *codecpar,
}
}

void CUThreadedDecoder::InitBitStreamFilter(AVCodecParameters *codecpar, AVInputFormat *iformat) {
void CUThreadedDecoder::InitBitStreamFilter(AVCodecParameters *codecpar, const AVInputFormat *iformat) {
const char* bsf_name = nullptr;
if (AV_CODEC_ID_H264 == codecpar->codec_id) {
// H.264
Expand Down
4 changes: 2 additions & 2 deletions src/video/nvcodec/cuda_threaded_decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class CUThreadedDecoder final : public ThreadedDecoderInterface {
using FrameOrderQueuePtr = std::unique_ptr<FrameOrderQueue>;

public:
CUThreadedDecoder(int device_id, AVCodecParameters *codecpar, AVInputFormat *iformat);
CUThreadedDecoder(int device_id, AVCodecParameters *codecpar, const AVInputFormat *iformat);
void SetCodecContext(AVCodecContext *dec_ctx, int width = -1, int height = -1, int rotation = 0);
bool Initialized() const;
void Start();
Expand All @@ -70,7 +70,7 @@ class CUThreadedDecoder final : public ThreadedDecoderInterface {
void LaunchThreadImpl();
void RecordInternalError(std::string message);
void CheckErrorStatus();
void InitBitStreamFilter(AVCodecParameters *codecpar, AVInputFormat *iformat);
void InitBitStreamFilter(AVCodecParameters *codecpar, const AVInputFormat *iformat);

int device_id_;
CUStream stream_;
Expand Down
11 changes: 10 additions & 1 deletion src/video/video_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ VideoReader::~VideoReader(){

void VideoReader::SetVideoStream(int stream_nb) {
if (!fmt_ctx_) return;
AVCodec *dec;
const AVCodec *dec;
int st_nb = av_find_best_stream(fmt_ctx_.get(), AVMEDIA_TYPE_VIDEO, stream_nb, -1, &dec, 0);
// LOG(INFO) << "find best stream: " << st_nb;
CHECK_GE(st_nb, 0) << "ERROR cannot find video stream with wanted index: " << stream_nb;
Expand Down Expand Up @@ -554,9 +554,18 @@ double VideoReader::GetRotation() const {
if (rotate && *rotate->value && strcmp(rotate->value, "0"))
theta = atof(rotate->value);

#if LIBAVFORMAT_VERSION_MAJOR >= 61
const AVPacketSideData *sd = av_packet_side_data_get(
active_st->codecpar->coded_side_data,
active_st->codecpar->nb_coded_side_data,
AV_PKT_DATA_DISPLAYMATRIX);
if (sd && !theta)
theta = -av_display_rotation_get((const int32_t*) sd->data);
#else
uint8_t* displaymatrix = av_stream_get_side_data(active_st, AV_PKT_DATA_DISPLAYMATRIX, NULL);
if (displaymatrix && !theta)
theta = -av_display_rotation_get((int32_t*) displaymatrix);
#endif

theta = std::fmod(theta, 360);
if(theta < 0) theta += 360;
Expand Down
Binary file not shown.
Binary file added test-media/big_buck_bunny.mp4
Binary file not shown.
74 changes: 74 additions & 0 deletions test-media/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""Shared fixtures for decord tests."""
import os
import pytest
import numpy as np

from decord import VideoReader, AudioReader, AVReader, cpu


MEDIA_DIR = os.path.dirname(__file__)
TEST_DATA_DIR = os.path.join(MEDIA_DIR, '..', 'tests', 'test_data')

# Primary test file: big_buck_bunny.mp4
# - 1440 video frames, 640x360, ~24fps, ~60s
# - AAC stereo audio at 22050 Hz
BBB_PATH = os.path.join(MEDIA_DIR, 'big_buck_bunny.mp4')

# Small test video (video-only fixtures)
PANCAKE_PATH = os.path.join(MEDIA_DIR, '..', 'examples', 'flipping_a_pancake.mkv')

# MP3 audio-only file: ~878s, mono, 44100 Hz
MP3_PATH = os.path.join(MEDIA_DIR, '26_Universität_Wien_Informationen-Audioversion-ElevenLabs_20260123_final.mp3')

# Corrupted video
CORRUPTED_PATH = os.path.join(TEST_DATA_DIR, 'corrupted.mp4')

# Rotation test videos
ROTATION_VIDEOS = {
rot: os.path.join(TEST_DATA_DIR, f'video_{rot}.mov')
for rot in [0, 90, 180, 270]
}

# Unordered PTS video
UNORDERED_PATH = os.path.join(TEST_DATA_DIR, 'unordered.mov')

# Video-only file (no audio stream)
VIDEO_ONLY_PATH = os.path.join(TEST_DATA_DIR, 'video_0.mov')

CTX = cpu(0)


@pytest.fixture
def bbb_video():
"""VideoReader for big_buck_bunny.mp4."""
return VideoReader(BBB_PATH, ctx=CTX)


@pytest.fixture
def bbb_audio():
"""AudioReader for big_buck_bunny.mp4 (mono)."""
return AudioReader(BBB_PATH, ctx=CTX, mono=True)


@pytest.fixture
def bbb_audio_stereo():
"""AudioReader for big_buck_bunny.mp4 (stereo)."""
return AudioReader(BBB_PATH, ctx=CTX, mono=False)


@pytest.fixture
def bbb_av():
"""AVReader for big_buck_bunny.mp4."""
return AVReader(BBB_PATH, ctx=CTX)


@pytest.fixture
def pancake_video():
"""VideoReader for flipping_a_pancake.mkv."""
return VideoReader(PANCAKE_PATH, ctx=CTX)


@pytest.fixture
def mp3_audio():
"""AudioReader for the MP3 test file (mono, 44100 Hz)."""
return AudioReader(MP3_PATH, ctx=CTX, mono=True)
Loading