diff --git a/.gitignore b/.gitignore index 4b1f945d..f16afce9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,11 @@ # Custom ignored .vscode +*.mp4 +*.mov +# But keep test media fixtures in git +!test-media/*.mp4 +!test-media/*.mp3 +!test-media/*.mov build/* docs/_build .DS_Store diff --git a/CMakeLists.txt b/CMakeLists.txt index b121d24a..9060a3bd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,7 +60,9 @@ else(MSVC) include(CheckCXXCompilerFlag) check_cxx_compiler_flag("-std=c++11" SUPPORT_CXX11) #set(CMAKE_CUDA_FLAGS "-std=c++11 ${CMAKE_CUDA_FLAGS}") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic") + if(NOT APPLE) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic") + endif() if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") message(STATUS "Build in Debug mode") set(CMAKE_C_FLAGS "-O0 -g -Wall -fPIC ${CMAKE_C_FLAGS}") diff --git a/README.md b/README.md index 376305a2..f1f6c8d3 100644 --- a/README.md +++ b/README.md @@ -20,11 +20,24 @@ Table of contents ================= +- [FFmpeg Compatibility](#ffmpeg-compatibility) - [Benchmark](#preliminary-benchmark) - [Installation](#installation) - [Usage](#usage) - [Bridge for Deep Learning frameworks](#bridges-for-deep-learning-frameworks) +## FFmpeg Compatibility + +Decord was originally written against FFmpeg 4.x APIs. This fork includes patches to support newer FFmpeg versions with version-gated preprocessor guards: + +| FFmpeg Version | Status | Notes | +|---|---|---| +| 4.x | Supported | Original target API | +| 5.x | Supported | `const AVCodec*`/`const AVInputFormat*` signatures, `bsf.h` include | +| 6.x | Supported | `ch_layout` API replaces `channels`/`channel_layout` | +| 7.x | Supported | `av_packet_side_data_get`, gated `avcodec_close`, pixel format string changes in filter graph, SAR sanitization | +| 8.x | Supported | Tested on 8.0.1 | + ## Preliminary benchmark Decord is good at handling random access patterns, which is rather common during neural network training. @@ -57,8 +70,7 @@ Supported platforms: Install the system packages for building the shared library, for Debian/Ubuntu users, run: ```bash -# official PPA comes with ffmpeg 2.8, which lacks tons of features, we use ffmpeg 4.0 here -sudo add-apt-repository ppa:jonathonf/ffmpeg-4 # for ubuntu20.04 official PPA is already version 4.2, you may skip this step +# FFmpeg 4.x through 8.x are supported (see FFmpeg Compatibility above) sudo apt-get update sudo apt-get install -y build-essential python3-dev python3-setuptools make cmake sudo apt-get install -y ffmpeg libavcodec-dev libavfilter-dev libavformat-dev libavutil-dev diff --git a/python/MANIFEST.in b/python/MANIFEST.in new file mode 100644 index 00000000..0597ee4a --- /dev/null +++ b/python/MANIFEST.in @@ -0,0 +1 @@ +include decord/libdecord.so diff --git a/src/audio/audio_reader.cc b/src/audio/audio_reader.cc index be706f10..19f4e13b 100644 --- a/src/audio/audio_reader.cc +++ b/src/audio/audio_reader.cc @@ -128,7 +128,11 @@ namespace decord { pCodecParameters = tempCodecParameters; originalSampleRate = tempCodecParameters->sample_rate; if (targetSampleRate == -1) targetSampleRate = originalSampleRate; +#if LIBAVCODEC_VERSION_MAJOR >= 60 + numChannels = tempCodecParameters->ch_layout.nb_channels; +#else numChannels = tempCodecParameters->channels; +#endif break; } } @@ -148,7 +152,9 @@ namespace decord { if (codecOpenRet < 0) { char errstr[200]; av_strerror(codecOpenRet, errstr, 200); +#if LIBAVCODEC_VERSION_MAJOR < 61 avcodec_close(pCodecContext); +#endif avcodec_free_context(&pCodecContext); avformat_close_input(&pFormatContext); LOG(FATAL) << "ERROR open codec through avcodec_open2: " << errstr; @@ -210,7 +216,9 @@ namespace decord { // clean up av_frame_free(&pFrame); av_packet_free(&pPacket); +#if LIBAVCODEC_VERSION_MAJOR < 61 avcodec_close(pCodecContext); +#endif swr_close(swr); swr_free(&swr); avcodec_free_context(&pCodecContext); @@ -229,7 +237,11 @@ namespace decord { // allocate resample buffer float** outBuffer; int outLinesize = 0; +#if LIBAVCODEC_VERSION_MAJOR >= 60 + int outNumChannels = mono ? 1 : pFrame->ch_layout.nb_channels; +#else int outNumChannels = av_get_channel_layout_nb_channels(mono ? AV_CH_LAYOUT_MONO : pFrame->channel_layout); +#endif numChannels = outNumChannels; int outNumSamples = av_rescale_rnd(pFrame->nb_samples, this->targetSampleRate, pFrame->sample_rate, AV_ROUND_UP); @@ -281,11 +293,29 @@ namespace decord { if (!this->swr) { LOG(FATAL) << "ERROR Failed to allocate resample context"; } +#if LIBAVCODEC_VERSION_MAJOR >= 60 + AVChannelLayout in_ch_layout; + av_channel_layout_copy(&in_ch_layout, &pCodecContext->ch_layout); + if (in_ch_layout.nb_channels == 0) { + av_channel_layout_default(&in_ch_layout, 1); + } + av_opt_set_chlayout(this->swr, "in_chlayout", &in_ch_layout, 0); + AVChannelLayout out_ch_layout; + if (mono) { + av_channel_layout_default(&out_ch_layout, 1); + } else { + av_channel_layout_copy(&out_ch_layout, &in_ch_layout); + } + av_opt_set_chlayout(this->swr, "out_chlayout", &out_ch_layout, 0); + av_channel_layout_uninit(&in_ch_layout); + av_channel_layout_uninit(&out_ch_layout); +#else if (pCodecContext->channel_layout == 0) { pCodecContext->channel_layout = av_get_default_channel_layout( pCodecContext->channels ); } av_opt_set_channel_layout(this->swr, "in_channel_layout", pCodecContext->channel_layout, 0); av_opt_set_channel_layout(this->swr, "out_channel_layout", mono ? AV_CH_LAYOUT_MONO : pCodecContext->channel_layout, 0); +#endif av_opt_set_int(this->swr, "in_sample_rate", pCodecContext->sample_rate, 0); av_opt_set_int(this->swr, "out_sample_rate", this->targetSampleRate, 0); av_opt_set_sample_fmt(this->swr, "in_sample_fmt", pCodecContext->sample_fmt, 0); diff --git a/src/video/ffmpeg/ffmpeg_common.h b/src/video/ffmpeg/ffmpeg_common.h index b0b973f9..aa6c420f 100644 --- a/src/video/ffmpeg/ffmpeg_common.h +++ b/src/video/ffmpeg/ffmpeg_common.h @@ -21,6 +21,9 @@ extern "C" { #endif #include +#if LIBAVCODEC_VERSION_MAJOR >= 59 +#include +#endif #include #include #include diff --git a/src/video/ffmpeg/filter_graph.cc b/src/video/ffmpeg/filter_graph.cc index 1913c9c2..fca868d8 100644 --- a/src/video/ffmpeg/filter_graph.cc +++ b/src/video/ffmpeg/filter_graph.cc @@ -7,6 +7,9 @@ #include "filter_graph.h" #include +extern "C" { +#include +} namespace decord { namespace ffmpeg { @@ -36,7 +39,6 @@ void FFMPEGFilterGraph::Init(std::string filters_descr, AVCodecContext *dec_ctx) CHECK(buffersink) << "Error: no buffersink"; AVFilterInOut *outputs = avfilter_inout_alloc(); AVFilterInOut *inputs = avfilter_inout_alloc(); - enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_RGB24 , AV_PIX_FMT_NONE }; // AVBufferSinkParams *buffersink_params; filter_graph_.reset(avfilter_graph_alloc()); @@ -44,33 +46,44 @@ void FFMPEGFilterGraph::Init(std::string filters_descr, AVCodecContext *dec_ctx) //LOG(INFO) << "Original GraphFilter nb_threads: " << filter_graph_->nb_threads; filter_graph_->nb_threads = 1; /* buffer video source: the decoded frames from the decoder will be inserted here. */ - std::snprintf(args, sizeof(args), + // Sanitize sample_aspect_ratio: a zero denominator causes inf which FFmpeg 7+ rejects + int sar_num = dec_ctx->sample_aspect_ratio.num; + int sar_den = dec_ctx->sample_aspect_ratio.den; + if (sar_den == 0) { + sar_num = 1; + sar_den = 1; + } +#if LIBAVFILTER_VERSION_MAJOR >= 10 + // FFmpeg 7+: pix_fmt option uses AV_OPT_TYPE_PIXEL_FMT, requiring a format name string + const char *pix_fmt_name = av_get_pix_fmt_name(dec_ctx->pix_fmt); + if (!pix_fmt_name) pix_fmt_name = "yuv420p"; + std::snprintf(args, sizeof(args), + "video_size=%dx%d:pix_fmt=%s:time_base=%d/%d:pixel_aspect=%d/%d", + dec_ctx->width, dec_ctx->height, pix_fmt_name, + dec_ctx->time_base.num, dec_ctx->time_base.den, + sar_num, sar_den); +#else + std::snprintf(args, sizeof(args), "video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d", dec_ctx->width, dec_ctx->height, dec_ctx->pix_fmt, dec_ctx->time_base.num, dec_ctx->time_base.den, - dec_ctx->sample_aspect_ratio.num, dec_ctx->sample_aspect_ratio.den); - // std::snprintf(args, sizeof(args), - // "video_size=%dx%d:pix_fmt=%d", - // dec_ctx->width, dec_ctx->height, dec_ctx->pix_fmt); + sar_num, sar_den); +#endif - // LOG(INFO) << "filter args: " << args; - - // AVFilterContext *buffersrc_ctx; - // AVFilterContext *buffersink_ctx; CHECK_GE(avfilter_graph_create_filter(&buffersrc_ctx_, buffersrc, "in", args, NULL, filter_graph_.get()), 0) << "Cannot create buffer source"; - // LOG(INFO) << "create filter src"; - /* buffer video sink: to terminate the filter chain. */ - // buffersink_params = av_buffersink_params_alloc(); - // buffersink_params->pixel_fmts = pix_fmts; CHECK_GE(avfilter_graph_create_filter(&buffersink_ctx_, buffersink, "out", NULL, NULL, filter_graph_.get()), 0) << "Cannot create buffer sink"; - // av_free(buffersink_params); - // LOG(INFO) << "create filter sink"; - // CHECK_GE(av_opt_set_bin(buffersink_ctx_, "pix_fmts", (uint8_t *)&pix_fmts, sizeof(AV_PIX_FMT_RGB24), AV_OPT_SEARCH_CHILDREN), 0) << "Set bin error"; +#if LIBAVFILTER_VERSION_MAJOR < 10 + enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_RGB24 , AV_PIX_FMT_NONE }; CHECK_GE(av_opt_set_int_list(buffersink_ctx_, "pix_fmts", pix_fmts, AV_PIX_FMT_NONE, AV_OPT_SEARCH_CHILDREN), 0) << "Set output pixel format error."; +#else + // FFmpeg 7+: pix_fmts is no longer a runtime option on buffersink, + // so enforce output format via the filter chain instead. + filters_descr += ",format=rgb24"; +#endif // LOG(INFO) << "create filter set opt"; /* Endpoints for the filter graph. */ diff --git a/src/video/nvcodec/cuda_threaded_decoder.cc b/src/video/nvcodec/cuda_threaded_decoder.cc index 62bc7ee4..957a90dc 100644 --- a/src/video/nvcodec/cuda_threaded_decoder.cc +++ b/src/video/nvcodec/cuda_threaded_decoder.cc @@ -17,7 +17,7 @@ namespace decord { namespace cuda { using namespace runtime; -CUThreadedDecoder::CUThreadedDecoder(int device_id, AVCodecParameters *codecpar, AVInputFormat *iformat) +CUThreadedDecoder::CUThreadedDecoder(int device_id, AVCodecParameters *codecpar, const AVInputFormat *iformat) : device_id_(device_id), stream_({device_id, false}), device_{}, ctx_{}, parser_{}, decoder_{}, pkt_queue_{}, frame_queue_{}, run_(false), frame_count_(0), draining_(false), @@ -70,7 +70,7 @@ CUThreadedDecoder::CUThreadedDecoder(int device_id, AVCodecParameters *codecpar, } } -void CUThreadedDecoder::InitBitStreamFilter(AVCodecParameters *codecpar, AVInputFormat *iformat) { +void CUThreadedDecoder::InitBitStreamFilter(AVCodecParameters *codecpar, const AVInputFormat *iformat) { const char* bsf_name = nullptr; if (AV_CODEC_ID_H264 == codecpar->codec_id) { // H.264 diff --git a/src/video/nvcodec/cuda_threaded_decoder.h b/src/video/nvcodec/cuda_threaded_decoder.h index d7e6fcd2..61958a12 100644 --- a/src/video/nvcodec/cuda_threaded_decoder.h +++ b/src/video/nvcodec/cuda_threaded_decoder.h @@ -46,7 +46,7 @@ class CUThreadedDecoder final : public ThreadedDecoderInterface { using FrameOrderQueuePtr = std::unique_ptr; public: - CUThreadedDecoder(int device_id, AVCodecParameters *codecpar, AVInputFormat *iformat); + CUThreadedDecoder(int device_id, AVCodecParameters *codecpar, const AVInputFormat *iformat); void SetCodecContext(AVCodecContext *dec_ctx, int width = -1, int height = -1, int rotation = 0); bool Initialized() const; void Start(); @@ -70,7 +70,7 @@ class CUThreadedDecoder final : public ThreadedDecoderInterface { void LaunchThreadImpl(); void RecordInternalError(std::string message); void CheckErrorStatus(); - void InitBitStreamFilter(AVCodecParameters *codecpar, AVInputFormat *iformat); + void InitBitStreamFilter(AVCodecParameters *codecpar, const AVInputFormat *iformat); int device_id_; CUStream stream_; diff --git a/src/video/video_reader.cc b/src/video/video_reader.cc index af4858d2..fa08cb1c 100644 --- a/src/video/video_reader.cc +++ b/src/video/video_reader.cc @@ -145,7 +145,7 @@ VideoReader::~VideoReader(){ void VideoReader::SetVideoStream(int stream_nb) { if (!fmt_ctx_) return; - AVCodec *dec; + const AVCodec *dec; int st_nb = av_find_best_stream(fmt_ctx_.get(), AVMEDIA_TYPE_VIDEO, stream_nb, -1, &dec, 0); // LOG(INFO) << "find best stream: " << st_nb; CHECK_GE(st_nb, 0) << "ERROR cannot find video stream with wanted index: " << stream_nb; @@ -554,9 +554,18 @@ double VideoReader::GetRotation() const { if (rotate && *rotate->value && strcmp(rotate->value, "0")) theta = atof(rotate->value); +#if LIBAVFORMAT_VERSION_MAJOR >= 61 + const AVPacketSideData *sd = av_packet_side_data_get( + active_st->codecpar->coded_side_data, + active_st->codecpar->nb_coded_side_data, + AV_PKT_DATA_DISPLAYMATRIX); + if (sd && !theta) + theta = -av_display_rotation_get((const int32_t*) sd->data); +#else uint8_t* displaymatrix = av_stream_get_side_data(active_st, AV_PKT_DATA_DISPLAYMATRIX, NULL); if (displaymatrix && !theta) theta = -av_display_rotation_get((int32_t*) displaymatrix); +#endif theta = std::fmod(theta, 360); if(theta < 0) theta += 360; diff --git "a/test-media/26_Universit\303\244t_Wien_Informationen-Audioversion-ElevenLabs_20260123_final.mp3" "b/test-media/26_Universit\303\244t_Wien_Informationen-Audioversion-ElevenLabs_20260123_final.mp3" new file mode 100644 index 00000000..59877886 Binary files /dev/null and "b/test-media/26_Universit\303\244t_Wien_Informationen-Audioversion-ElevenLabs_20260123_final.mp3" differ diff --git a/test-media/big_buck_bunny.mp4 b/test-media/big_buck_bunny.mp4 new file mode 100644 index 00000000..81d11df5 Binary files /dev/null and b/test-media/big_buck_bunny.mp4 differ diff --git a/test-media/conftest.py b/test-media/conftest.py new file mode 100644 index 00000000..2a7a8481 --- /dev/null +++ b/test-media/conftest.py @@ -0,0 +1,74 @@ +"""Shared fixtures for decord tests.""" +import os +import pytest +import numpy as np + +from decord import VideoReader, AudioReader, AVReader, cpu + + +MEDIA_DIR = os.path.dirname(__file__) +TEST_DATA_DIR = os.path.join(MEDIA_DIR, '..', 'tests', 'test_data') + +# Primary test file: big_buck_bunny.mp4 +# - 1440 video frames, 640x360, ~24fps, ~60s +# - AAC stereo audio at 22050 Hz +BBB_PATH = os.path.join(MEDIA_DIR, 'big_buck_bunny.mp4') + +# Small test video (video-only fixtures) +PANCAKE_PATH = os.path.join(MEDIA_DIR, '..', 'examples', 'flipping_a_pancake.mkv') + +# MP3 audio-only file: ~878s, mono, 44100 Hz +MP3_PATH = os.path.join(MEDIA_DIR, '26_Universität_Wien_Informationen-Audioversion-ElevenLabs_20260123_final.mp3') + +# Corrupted video +CORRUPTED_PATH = os.path.join(TEST_DATA_DIR, 'corrupted.mp4') + +# Rotation test videos +ROTATION_VIDEOS = { + rot: os.path.join(TEST_DATA_DIR, f'video_{rot}.mov') + for rot in [0, 90, 180, 270] +} + +# Unordered PTS video +UNORDERED_PATH = os.path.join(TEST_DATA_DIR, 'unordered.mov') + +# Video-only file (no audio stream) +VIDEO_ONLY_PATH = os.path.join(TEST_DATA_DIR, 'video_0.mov') + +CTX = cpu(0) + + +@pytest.fixture +def bbb_video(): + """VideoReader for big_buck_bunny.mp4.""" + return VideoReader(BBB_PATH, ctx=CTX) + + +@pytest.fixture +def bbb_audio(): + """AudioReader for big_buck_bunny.mp4 (mono).""" + return AudioReader(BBB_PATH, ctx=CTX, mono=True) + + +@pytest.fixture +def bbb_audio_stereo(): + """AudioReader for big_buck_bunny.mp4 (stereo).""" + return AudioReader(BBB_PATH, ctx=CTX, mono=False) + + +@pytest.fixture +def bbb_av(): + """AVReader for big_buck_bunny.mp4.""" + return AVReader(BBB_PATH, ctx=CTX) + + +@pytest.fixture +def pancake_video(): + """VideoReader for flipping_a_pancake.mkv.""" + return VideoReader(PANCAKE_PATH, ctx=CTX) + + +@pytest.fixture +def mp3_audio(): + """AudioReader for the MP3 test file (mono, 44100 Hz).""" + return AudioReader(MP3_PATH, ctx=CTX, mono=True) diff --git a/test-media/test_audio_reader.py b/test-media/test_audio_reader.py new file mode 100644 index 00000000..ade12845 --- /dev/null +++ b/test-media/test_audio_reader.py @@ -0,0 +1,277 @@ +"""Tests for decord.AudioReader using big_buck_bunny.mp4.""" +import io + +import numpy as np +import pytest + +from decord import AudioReader, cpu +from decord.base import DECORDError + +from conftest import BBB_PATH, MP3_PATH, VIDEO_ONLY_PATH, CTX + + +# --------------------------------------------------------------------------- +# Basic properties +# --------------------------------------------------------------------------- + +class TestAudioReaderProperties: + def test_mono_shape(self, bbb_audio): + assert bbb_audio.shape[0] == 1 # mono = 1 channel + + def test_mono_sample_count(self, bbb_audio): + num_samples = bbb_audio.shape[1] + # ~60s at 22050 Hz -> ~1.3M samples + assert 1_200_000 < num_samples < 1_400_000 + + def test_stereo_shape(self, bbb_audio_stereo): + assert bbb_audio_stereo.shape[0] == 2 # stereo = 2 channels + + def test_stereo_same_sample_count(self, bbb_audio, bbb_audio_stereo): + """Mono and stereo should have same number of samples per channel.""" + assert bbb_audio.shape[1] == bbb_audio_stereo.shape[1] + + def test_len_returns_num_channels(self, bbb_audio): + assert len(bbb_audio) == 1 + + def test_len_stereo(self, bbb_audio_stereo): + assert len(bbb_audio_stereo) == 2 + + def test_duration(self, bbb_audio): + dur = bbb_audio.duration() + assert 59.0 < dur < 61.0 # ~60 seconds + + +# --------------------------------------------------------------------------- +# Indexing +# --------------------------------------------------------------------------- + +class TestAudioReaderIndexing: + def test_index_first_sample(self, bbb_audio): + sample = bbb_audio[0] + assert sample.asnumpy().shape == (1,) + + def test_index_last_sample(self, bbb_audio): + sample = bbb_audio[-1] + assert sample.asnumpy().shape == (1,) + + def test_negative_index(self, bbb_audio): + n = bbb_audio.shape[1] + sample_pos = bbb_audio[n - 1].asnumpy() + sample_neg = bbb_audio[-1].asnumpy() + assert np.allclose(sample_pos, sample_neg) + + def test_out_of_bounds_raises(self, bbb_audio): + with pytest.raises(IndexError): + bbb_audio[bbb_audio.shape[1]] + + def test_stereo_index(self, bbb_audio_stereo): + sample = bbb_audio_stereo[0] + assert sample.asnumpy().shape == (2,) + + +# --------------------------------------------------------------------------- +# Slicing +# --------------------------------------------------------------------------- + +class TestAudioReaderSlicing: + def test_slice_range(self, bbb_audio): + samples = bbb_audio[100:200] + assert samples.asnumpy().shape == (1, 100) + + def test_slice_all(self, bbb_audio): + samples = bbb_audio[:] + assert samples.asnumpy().shape[0] == 1 + assert samples.asnumpy().shape[1] == bbb_audio.shape[1] + + def test_slice_negative(self, bbb_audio): + samples = bbb_audio[-100:-50] + assert samples.asnumpy().shape == (1, 50) + + def test_slice_stereo(self, bbb_audio_stereo): + samples = bbb_audio_stereo[0:1000] + assert samples.asnumpy().shape == (2, 1000) + + +# --------------------------------------------------------------------------- +# Batch access +# --------------------------------------------------------------------------- + +class TestAudioReaderBatch: + def test_get_batch(self, bbb_audio): + indices = [0, 100, 200, 300, 400] + batch = bbb_audio.get_batch(indices) + assert batch.asnumpy().shape == (1, 5) + + def test_get_batch_negative_indices(self, bbb_audio): + batch = bbb_audio.get_batch([-1, -2, -3]) + assert batch.asnumpy().shape == (1, 3) + + def test_get_batch_stereo(self, bbb_audio_stereo): + batch = bbb_audio_stereo.get_batch([0, 1000, 2000]) + assert batch.asnumpy().shape == (2, 3) + + +# --------------------------------------------------------------------------- +# Resampling +# --------------------------------------------------------------------------- + +class TestAudioReaderResample: + def test_resample_lower(self): + ar = AudioReader(BBB_PATH, ctx=CTX, sample_rate=11025, mono=True) + original = AudioReader(BBB_PATH, ctx=CTX, mono=True) + # Resampled should have roughly half the samples + ratio = original.shape[1] / ar.shape[1] + assert 1.8 < ratio < 2.2 + + def test_resample_higher(self): + ar = AudioReader(BBB_PATH, ctx=CTX, sample_rate=44100, mono=True) + original = AudioReader(BBB_PATH, ctx=CTX, mono=True) + # Resampled should have roughly double the samples + ratio = ar.shape[1] / original.shape[1] + assert 1.8 < ratio < 2.2 + + def test_resample_preserves_channels(self): + ar = AudioReader(BBB_PATH, ctx=CTX, sample_rate=11025, mono=False) + assert ar.shape[0] == 2 # still stereo + + +# --------------------------------------------------------------------------- +# Channel conversion +# --------------------------------------------------------------------------- + +class TestAudioReaderChannels: + def test_stereo_to_mono(self): + ar_stereo = AudioReader(BBB_PATH, ctx=CTX, mono=False) + ar_mono = AudioReader(BBB_PATH, ctx=CTX, mono=True) + assert ar_stereo.shape[0] == 2 + assert ar_mono.shape[0] == 1 + # Same number of samples per channel + assert ar_stereo.shape[1] == ar_mono.shape[1] + + +# --------------------------------------------------------------------------- +# Audio values +# --------------------------------------------------------------------------- + +class TestAudioReaderValues: + def test_samples_are_float(self, bbb_audio): + samples = bbb_audio[0:100].asnumpy() + assert samples.dtype == np.float32 or samples.dtype == np.float64 + + def test_samples_in_reasonable_range(self, bbb_audio): + """Audio samples should be in a reasonable floating point range.""" + samples = bbb_audio[10000:20000].asnumpy() + assert np.all(np.abs(samples) < 10.0) # normalized audio + + def test_not_all_zeros(self, bbb_audio): + """Audio should not be entirely silent.""" + # Sample from middle of file where there should be audio + samples = bbb_audio[100000:200000].asnumpy() + assert np.any(samples != 0) + + +# --------------------------------------------------------------------------- +# Padding +# --------------------------------------------------------------------------- + +class TestAudioReaderPadding: + def test_add_padding(self, bbb_audio): + original_samples = bbb_audio.shape[1] + num_padding = bbb_audio.add_padding() + # After padding, total samples should increase by padding amount + # (add_padding modifies internal array, not shape property) + assert num_padding >= 0 + + +# --------------------------------------------------------------------------- +# BytesIO / file-like objects +# --------------------------------------------------------------------------- + +class TestAudioReaderBytesIO: + def test_read_from_bytes_io(self): + with open(BBB_PATH, 'rb') as f: + ar = AudioReader(f, ctx=CTX, mono=True) + assert ar.shape[0] == 1 + assert ar.shape[1] > 0 + + def test_bytes_io_matches_file(self, bbb_audio): + with open(BBB_PATH, 'rb') as f: + ar_bio = AudioReader(f, ctx=CTX, mono=True) + file_samples = bbb_audio[0:1000].asnumpy() + bio_samples = ar_bio[0:1000].asnumpy() + assert np.allclose(file_samples, bio_samples) + + +# --------------------------------------------------------------------------- +# Error handling +# --------------------------------------------------------------------------- + +class TestAudioReaderErrors: + def test_no_audio_stream_raises(self): + """Opening a video-only file with AudioReader should raise.""" + with pytest.raises(DECORDError): + AudioReader(VIDEO_ONLY_PATH, ctx=CTX) + + +# --------------------------------------------------------------------------- +# Resource cleanup +# --------------------------------------------------------------------------- + +class TestAudioReaderCleanup: + def test_del(self): + ar = AudioReader(BBB_PATH, ctx=CTX) + _ = ar[0] + del ar # should not raise + + def test_multiple_readers(self): + ar1 = AudioReader(BBB_PATH, ctx=CTX, mono=True) + ar2 = AudioReader(BBB_PATH, ctx=CTX, mono=True) + assert ar1.shape == ar2.shape + + +# --------------------------------------------------------------------------- +# MP3 format support +# --------------------------------------------------------------------------- + +class TestAudioReaderMP3: + def test_mp3_loads(self, mp3_audio): + assert mp3_audio.shape[0] == 1 # mono + assert mp3_audio.shape[1] > 0 + + def test_mp3_duration(self, mp3_audio): + dur = mp3_audio.duration() + # ~878 seconds + assert 870 < dur < 890 + + def test_mp3_sample_count(self, mp3_audio): + # ~878s at 44100 Hz -> ~38.7M samples + n = mp3_audio.shape[1] + assert 38_000_000 < n < 40_000_000 + + def test_mp3_indexing(self, mp3_audio): + sample = mp3_audio[0] + assert sample.asnumpy().shape == (1,) + + def test_mp3_slicing(self, mp3_audio): + samples = mp3_audio[1000:2000] + assert samples.asnumpy().shape == (1, 1000) + + def test_mp3_not_silent(self, mp3_audio): + # Sample from well into the file + samples = mp3_audio[500000:600000].asnumpy() + assert np.any(samples != 0) + + def test_mp3_samples_are_float(self, mp3_audio): + samples = mp3_audio[0:100].asnumpy() + assert samples.dtype in (np.float32, np.float64) + + def test_mp3_resample(self): + ar = AudioReader(MP3_PATH, ctx=CTX, sample_rate=22050, mono=True) + original = AudioReader(MP3_PATH, ctx=CTX, mono=True) + ratio = original.shape[1] / ar.shape[1] + assert 1.8 < ratio < 2.2 + + def test_mp3_bytes_io(self, mp3_audio): + with open(MP3_PATH, 'rb') as f: + ar_bio = AudioReader(f, ctx=CTX, mono=True) + assert ar_bio.shape == mp3_audio.shape diff --git a/test-media/test_av_reader.py b/test-media/test_av_reader.py new file mode 100644 index 00000000..675ee8bc --- /dev/null +++ b/test-media/test_av_reader.py @@ -0,0 +1,174 @@ +"""Tests for decord.AVReader using big_buck_bunny.mp4.""" +import numpy as np +import pytest + +from decord import AVReader, cpu +from decord.base import DECORDError + +from conftest import BBB_PATH, VIDEO_ONLY_PATH, CTX + + +# --------------------------------------------------------------------------- +# Basic properties +# --------------------------------------------------------------------------- + +class TestAVReaderProperties: + def test_frame_count(self, bbb_av): + assert len(bbb_av) == 1440 + + def test_single_index_returns_tuple(self, bbb_av): + result = bbb_av[0] + assert isinstance(result, tuple) + assert len(result) == 2 + audio, video = result + # Video frame shape + assert video.shape == (360, 640, 3) + + def test_audio_has_samples(self, bbb_av): + audio, video = bbb_av[0] + arr = audio.asnumpy() + assert arr.shape[0] == 1 # mono (default) + assert arr.shape[1] > 0 # has samples + + +# --------------------------------------------------------------------------- +# Single frame access +# --------------------------------------------------------------------------- + +class TestAVReaderFrameAccess: + def test_first_frame(self, bbb_av): + audio, video = bbb_av[0] + assert video.shape == (360, 640, 3) + + def test_mid_frame(self, bbb_av): + audio, video = bbb_av[720] + assert video.shape == (360, 640, 3) + + def test_last_frame(self, bbb_av): + audio, video = bbb_av[1439] + assert video.shape == (360, 640, 3) + + def test_negative_index(self, bbb_av): + audio, video = bbb_av[-1] + assert video.shape == (360, 640, 3) + + def test_out_of_bounds_raises(self, bbb_av): + with pytest.raises(IndexError): + bbb_av[1440] + + +# --------------------------------------------------------------------------- +# Slice access +# --------------------------------------------------------------------------- + +class TestAVReaderSlice: + def test_slice_returns_tuple(self, bbb_av): + result = bbb_av[0:5] + assert isinstance(result, tuple) + assert len(result) == 2 + + def test_slice_video_shape(self, bbb_av): + audio_list, video = bbb_av[0:5] + assert video.shape == (5, 360, 640, 3) + + def test_slice_audio_is_list(self, bbb_av): + audio_list, video = bbb_av[0:5] + assert isinstance(audio_list, list) + assert len(audio_list) == 5 + + def test_slice_audio_entries_have_samples(self, bbb_av): + audio_list, video = bbb_av[10:15] + for audio in audio_list: + arr = audio.asnumpy() + assert arr.shape[0] == 1 # mono + assert arr.shape[1] > 0 + + +# --------------------------------------------------------------------------- +# Batch access +# --------------------------------------------------------------------------- + +class TestAVReaderBatch: + def test_get_batch(self, bbb_av): + result = bbb_av.get_batch([0, 100, 500]) + assert isinstance(result, tuple) + audio_list, video = result + assert video.shape == (3, 360, 640, 3) + assert len(audio_list) == 3 + + def test_get_batch_negative_indices(self, bbb_av): + audio_list, video = bbb_av.get_batch([-1, 0, 1]) + assert video.shape[0] == 3 + + def test_get_batch_single(self, bbb_av): + audio_list, video = bbb_av.get_batch([500]) + assert video.shape[0] == 1 + assert len(audio_list) == 1 + + +# --------------------------------------------------------------------------- +# Audio-video synchronization +# --------------------------------------------------------------------------- + +class TestAVReaderSync: + def test_consecutive_frames_have_consecutive_audio(self, bbb_av): + """Audio for consecutive frames should cover consecutive time ranges.""" + audio_list, video = bbb_av[100:105] + # Each frame's audio should have a reasonable number of samples + for audio in audio_list: + n_samples = audio.asnumpy().shape[1] + # At 44100 Hz and ~24fps, expect ~1837 samples per frame + # Allow wide tolerance for edge cases + assert 500 < n_samples < 5000 + + def test_audio_not_silent_during_video(self, bbb_av): + """Audio in the middle of the video should not be all zeros.""" + audio, video = bbb_av[720] + arr = audio.asnumpy() + # The bunny video has audio throughout + # (might be near-silent at some frames, so just check it's not exactly zero) + # Use a range to be safe + audio_list, _ = bbb_av[700:740] + combined = np.concatenate([a.asnumpy() for a in audio_list], axis=1) + assert np.any(combined != 0) + + +# --------------------------------------------------------------------------- +# BytesIO / file-like objects +# --------------------------------------------------------------------------- + +class TestAVReaderBytesIO: + def test_read_from_bytes_io(self): + with open(BBB_PATH, 'rb') as f: + av = AVReader(f, ctx=CTX) + assert len(av) == 1440 + + def test_bytes_io_matches_file(self, bbb_av): + with open(BBB_PATH, 'rb') as f: + av_bio = AVReader(f, ctx=CTX) + audio1, video1 = bbb_av[50] + audio2, video2 = av_bio[50] + assert np.allclose(audio1.asnumpy(), audio2.asnumpy()) + assert np.allclose(video1.asnumpy(), video2.asnumpy()) + + +# --------------------------------------------------------------------------- +# Error handling +# --------------------------------------------------------------------------- + +class TestAVReaderErrors: + def test_no_audio_stream_raises(self): + """Opening a video-only file with AVReader should raise.""" + with pytest.raises(DECORDError): + AVReader(VIDEO_ONLY_PATH, ctx=CTX) + + +# --------------------------------------------------------------------------- +# Resource cleanup +# --------------------------------------------------------------------------- + +class TestAVReaderCleanup: + def test_del(self): + av = AVReader(BBB_PATH, ctx=CTX) + _ = av[0] + del av # should not raise diff --git a/test-media/test_bridge.py b/test-media/test_bridge.py new file mode 100644 index 00000000..b6b2553f --- /dev/null +++ b/test-media/test_bridge.py @@ -0,0 +1,88 @@ +"""Tests for decord framework bridge integration.""" +import numpy as np +import pytest + +import decord +from decord import VideoReader, cpu +from decord.bridge import set_bridge, reset_bridge + +from conftest import BBB_PATH, CTX + + +def _torch_available(): + try: + import torch + return True + except ImportError: + return False + + +# --------------------------------------------------------------------------- +# Native bridge (default) +# --------------------------------------------------------------------------- + +class TestNativeBridge: + def test_default_returns_decord_ndarray(self): + vr = VideoReader(BBB_PATH, ctx=CTX) + frame = vr[0] + assert isinstance(frame, decord.nd.NDArray) + + def test_ndarray_to_numpy(self): + vr = VideoReader(BBB_PATH, ctx=CTX) + frame = vr[0] + arr = frame.asnumpy() + assert isinstance(arr, np.ndarray) + assert arr.dtype == np.uint8 + + def test_ndarray_shape(self): + vr = VideoReader(BBB_PATH, ctx=CTX) + frame = vr[0] + assert frame.shape == (360, 640, 3) + + +# --------------------------------------------------------------------------- +# PyTorch bridge +# --------------------------------------------------------------------------- + +class TestTorchBridge: + @pytest.fixture(autouse=True) + def _reset_bridge(self): + yield + reset_bridge() + + @pytest.mark.skipif( + not _torch_available(), + reason="PyTorch not installed" + ) + def test_torch_bridge_returns_tensor(self): + import torch + vr = VideoReader(BBB_PATH, ctx=CTX) + set_bridge('torch') + frame = vr[0] + assert isinstance(frame, torch.Tensor) + + @pytest.mark.skipif( + not _torch_available(), + reason="PyTorch not installed" + ) + def test_torch_bridge_shape(self): + import torch + vr = VideoReader(BBB_PATH, ctx=CTX) + set_bridge('torch') + frame = vr[0] + assert frame.shape == (360, 640, 3) + + @pytest.mark.skipif( + not _torch_available(), + reason="PyTorch not installed" + ) + def test_torch_bridge_context_manager(self): + import torch + from decord.bridge import use_torch + vr = VideoReader(BBB_PATH, ctx=CTX) + with use_torch(): + frame = vr[0] + assert isinstance(frame, torch.Tensor) + # After context, should be back to native + frame2 = vr[0] + assert isinstance(frame2, decord.nd.NDArray) diff --git a/test-media/test_logging.py b/test-media/test_logging.py new file mode 100644 index 00000000..46f63a6e --- /dev/null +++ b/test-media/test_logging.py @@ -0,0 +1,21 @@ +"""Tests for decord logging module.""" +import decord +from decord import logging + + +class TestLogging: + def test_set_level_quiet(self): + logging.set_level(logging.QUIET) + + def test_set_level_error(self): + logging.set_level(logging.ERROR) + + def test_set_level_warning(self): + logging.set_level(logging.WARNING) + + def test_set_level_info(self): + logging.set_level(logging.INFO) + + def test_restore_default(self): + """Restore to ERROR level (the default set in __init__).""" + logging.set_level(logging.ERROR) diff --git a/test-media/test_ndarray.py b/test-media/test_ndarray.py new file mode 100644 index 00000000..be786ffc --- /dev/null +++ b/test-media/test_ndarray.py @@ -0,0 +1,46 @@ +"""Tests for decord NDArray and context functions.""" +import numpy as np +import pytest + +import decord +from decord import cpu, gpu +from decord import nd + + +class TestContext: + def test_cpu_context(self): + ctx = cpu(0) + assert ctx.device_type == 1 # kDLCPU + + def test_cpu_default_id(self): + ctx = cpu() + assert ctx.device_id == 0 + + def test_gpu_context(self): + ctx = gpu(0) + assert ctx.device_type == 2 # kDLGPU + + +class TestNDArray: + def test_from_numpy(self): + arr = np.random.rand(10, 10).astype(np.float32) + nd_arr = nd.array(arr) + assert nd_arr.shape == (10, 10) + + def test_to_numpy(self): + arr = np.random.rand(5, 5).astype(np.float32) + nd_arr = nd.array(arr) + result = nd_arr.asnumpy() + assert isinstance(result, np.ndarray) + assert np.allclose(arr, result) + + def test_roundtrip_int(self): + arr = np.array([1, 2, 3, 4, 5], dtype=np.int64) + nd_arr = nd.array(arr) + result = nd_arr.asnumpy() + assert np.array_equal(arr, result) + + def test_shape_property(self): + arr = np.zeros((3, 4, 5), dtype=np.float32) + nd_arr = nd.array(arr) + assert nd_arr.shape == (3, 4, 5) diff --git a/test-media/test_video_reader.py b/test-media/test_video_reader.py new file mode 100644 index 00000000..498cfe89 --- /dev/null +++ b/test-media/test_video_reader.py @@ -0,0 +1,351 @@ +"""Tests for decord.VideoReader using big_buck_bunny.mp4.""" +import io +import random + +import numpy as np +import pytest + +from decord import VideoReader, cpu +from decord.base import DECORDError + +from conftest import BBB_PATH, PANCAKE_PATH, CORRUPTED_PATH, ROTATION_VIDEOS, UNORDERED_PATH, CTX + + +# --------------------------------------------------------------------------- +# Basic properties +# --------------------------------------------------------------------------- + +class TestVideoReaderProperties: + def test_frame_count(self, bbb_video): + assert len(bbb_video) == 1440 + + def test_frame_shape(self, bbb_video): + frame = bbb_video[0] + assert frame.shape == (360, 640, 3) + + def test_avg_fps(self, bbb_video): + fps = bbb_video.get_avg_fps() + assert 23.0 < fps < 25.0 # ~23.96 fps + + def test_key_indices_returns_list(self, bbb_video): + keys = bbb_video.get_key_indices() + assert isinstance(keys, list) + assert len(keys) > 0 + # First keyframe should be frame 0 + assert keys[0] == 0 + # All indices should be valid + for k in keys: + assert 0 <= k < len(bbb_video) + + def test_key_indices_are_sorted(self, bbb_video): + keys = bbb_video.get_key_indices() + assert keys == sorted(keys) + + def test_pancake_frame_count(self, pancake_video): + assert len(pancake_video) == 310 + + +# --------------------------------------------------------------------------- +# Single frame access +# --------------------------------------------------------------------------- + +class TestVideoReaderFrameAccess: + def test_first_frame(self, bbb_video): + frame = bbb_video[0] + arr = frame.asnumpy() + assert arr.dtype == np.uint8 + assert arr.shape == (360, 640, 3) + + def test_last_frame(self, bbb_video): + frame = bbb_video[len(bbb_video) - 1] + assert frame.shape == (360, 640, 3) + + def test_negative_index(self, bbb_video): + frame_pos = bbb_video[len(bbb_video) - 1] + frame_neg = bbb_video[-1] + assert np.array_equal(frame_pos.asnumpy(), frame_neg.asnumpy()) + + def test_out_of_bounds_raises(self, bbb_video): + with pytest.raises(IndexError): + bbb_video[1440] + with pytest.raises(IndexError): + bbb_video[-1441] + + def test_pixel_values_in_range(self, bbb_video): + frame = bbb_video[100].asnumpy() + assert frame.min() >= 0 + assert frame.max() <= 255 + + def test_different_frames_differ(self, bbb_video): + """Frames far apart should have different content.""" + frame_a = bbb_video[0].asnumpy() + frame_b = bbb_video[720].asnumpy() # ~halfway + assert not np.array_equal(frame_a, frame_b) + + +# --------------------------------------------------------------------------- +# Sequential reading +# --------------------------------------------------------------------------- + +class TestVideoReaderSequential: + def test_read_first_30_frames(self, bbb_video): + for i in range(30): + frame = bbb_video[i] + assert frame.shape == (360, 640, 3) + + def test_sequential_consistency(self, bbb_video): + """Reading frame N twice should give same result.""" + frame1 = bbb_video[50].asnumpy() + frame2 = bbb_video[50].asnumpy() + assert np.array_equal(frame1, frame2) + + +# --------------------------------------------------------------------------- +# Slice access +# --------------------------------------------------------------------------- + +class TestVideoReaderSlice: + def test_slice_all(self, pancake_video): + """Slice all frames from the smaller pancake video.""" + frames = pancake_video[:] + assert frames.shape[0] == 310 + + def test_slice_range(self, bbb_video): + frames = bbb_video[10:20] + assert frames.shape == (10, 360, 640, 3) + + def test_slice_with_step(self, bbb_video): + frames = bbb_video[0:100:10] + assert frames.shape[0] == 10 + + def test_slice_from_start(self, bbb_video): + frames = bbb_video[:5] + assert frames.shape[0] == 5 + + def test_slice_to_end(self, bbb_video): + frames = bbb_video[1435:] + assert frames.shape[0] == 5 + + def test_slice_negative(self, bbb_video): + frames = bbb_video[-5:] + assert frames.shape[0] == 5 + + +# --------------------------------------------------------------------------- +# Batch access +# --------------------------------------------------------------------------- + +class TestVideoReaderBatch: + def test_get_batch_sequential(self, bbb_video): + indices = list(range(10)) + frames = bbb_video.get_batch(indices) + assert frames.shape == (10, 360, 640, 3) + + def test_get_batch_random(self, bbb_video): + random.seed(42) + indices = random.sample(range(1440), 20) + frames = bbb_video.get_batch(indices) + assert frames.shape == (20, 360, 640, 3) + + def test_get_batch_single(self, bbb_video): + frames = bbb_video.get_batch([500]) + assert frames.shape == (1, 360, 640, 3) + + def test_get_batch_duplicates(self, bbb_video): + """Duplicate indices should return duplicate frames.""" + frames = bbb_video.get_batch([100, 100, 100]) + assert frames.shape[0] == 3 + arr = frames.asnumpy() + assert np.array_equal(arr[0], arr[1]) + assert np.array_equal(arr[1], arr[2]) + + def test_get_batch_negative_indices(self, bbb_video): + frames = bbb_video.get_batch([-1, -2, -3]) + assert frames.shape[0] == 3 + + def test_batch_matches_individual(self, bbb_video): + """Batch result should match individual frame reads.""" + indices = [0, 100, 500, 1000] + batch = bbb_video.get_batch(indices).asnumpy() + for i, idx in enumerate(indices): + individual = bbb_video[idx].asnumpy() + assert np.array_equal(batch[i], individual) + + +# --------------------------------------------------------------------------- +# Seeking +# --------------------------------------------------------------------------- + +class TestVideoReaderSeeking: + def test_seek(self, bbb_video): + bbb_video.seek(100) + frame = bbb_video.next() + assert frame.shape == (360, 640, 3) + + def test_seek_accurate(self, bbb_video): + bbb_video.seek_accurate(100) + frame = bbb_video.next() + assert frame.shape == (360, 640, 3) + + def test_seek_to_start(self, bbb_video): + bbb_video.seek(0) + frame = bbb_video.next() + assert frame.shape == (360, 640, 3) + + def test_seek_to_near_end(self, bbb_video): + bbb_video.seek_accurate(1439) + frame = bbb_video.next() + assert frame.shape == (360, 640, 3) + + def test_skip_frames(self, bbb_video): + bbb_video.seek(0) + bbb_video.skip_frames(5) + frame = bbb_video.next() + assert frame.shape == (360, 640, 3) + + +# --------------------------------------------------------------------------- +# Frame timestamps +# --------------------------------------------------------------------------- + +class TestVideoReaderTimestamps: + def test_timestamp_shape(self, bbb_video): + ts = bbb_video.get_frame_timestamp(range(10)) + assert ts.shape == (10, 2) + + def test_first_frame_starts_at_zero(self, bbb_video): + ts = bbb_video.get_frame_timestamp([0]) + assert ts[0, 0] == pytest.approx(0.0, abs=0.01) + + def test_timestamps_are_monotonic(self, bbb_video): + ts = bbb_video.get_frame_timestamp(range(100)) + starts = ts[:, 0] + assert all(starts[i] <= starts[i + 1] for i in range(len(starts) - 1)) + + def test_timestamps_match_fps(self, bbb_video): + """Frame interval should roughly match 1/fps.""" + ts = bbb_video.get_frame_timestamp(range(10)) + fps = bbb_video.get_avg_fps() + expected_interval = 1.0 / fps + for i in range(1, 10): + actual_interval = ts[i, 0] - ts[i - 1, 0] + assert actual_interval == pytest.approx(expected_interval, rel=0.1) + + def test_last_frame_timestamp_reasonable(self, bbb_video): + ts = bbb_video.get_frame_timestamp([1439]) + # ~60 second video, last frame should be near 60s + assert 58.0 < ts[0, 0] < 61.0 + + +# --------------------------------------------------------------------------- +# Resize +# --------------------------------------------------------------------------- + +class TestVideoReaderResize: + def test_resize(self): + vr = VideoReader(BBB_PATH, ctx=CTX, width=320, height=180) + frame = vr[0] + assert frame.shape == (180, 320, 3) + + def test_resize_width_only(self): + vr = VideoReader(BBB_PATH, ctx=CTX, width=320) + frame = vr[0] + assert frame.shape[2] == 3 + assert frame.shape[1] == 320 + + def test_resize_height_only(self): + vr = VideoReader(BBB_PATH, ctx=CTX, height=180) + frame = vr[0] + assert frame.shape[0] == 180 + + +# --------------------------------------------------------------------------- +# BytesIO / file-like objects +# --------------------------------------------------------------------------- + +class TestVideoReaderBytesIO: + def test_read_from_bytes_io(self): + with open(BBB_PATH, 'rb') as f: + vr = VideoReader(f, ctx=CTX) + assert len(vr) == 1440 + + def test_bytes_io_matches_file(self, bbb_video): + with open(BBB_PATH, 'rb') as f: + vr_bio = VideoReader(f, ctx=CTX) + frame_file = bbb_video[50].asnumpy().astype('float') + frame_bio = vr_bio[50].asnumpy().astype('float') + assert np.mean(np.abs(frame_file - frame_bio)) < 2 + + +# --------------------------------------------------------------------------- +# Rotation handling +# --------------------------------------------------------------------------- + +class TestVideoReaderRotation: + def test_landscape_no_rotation(self): + vr = VideoReader(ROTATION_VIDEOS[0], ctx=CTX) + assert vr[0].shape == (320, 568, 3) + + def test_landscape_180_rotation(self): + vr = VideoReader(ROTATION_VIDEOS[180], ctx=CTX) + assert vr[0].shape == (320, 568, 3) + + def test_portrait_90_rotation(self): + vr = VideoReader(ROTATION_VIDEOS[90], ctx=CTX) + assert vr[0].shape == (568, 320, 3) + + def test_portrait_270_rotation(self): + vr = VideoReader(ROTATION_VIDEOS[270], ctx=CTX) + assert vr[0].shape == (568, 320, 3) + + def test_rotated_with_resize(self): + vr = VideoReader(ROTATION_VIDEOS[90], ctx=CTX, height=300, width=200) + assert vr[0].shape == (300, 200, 3) + + +# --------------------------------------------------------------------------- +# Corrupted video +# --------------------------------------------------------------------------- + +class TestVideoReaderCorrupted: + def test_corrupted_batch_raises(self): + vr = VideoReader(CORRUPTED_PATH, ctx=CTX) + with pytest.raises(DECORDError): + vr.get_batch(range(40)) + + +# --------------------------------------------------------------------------- +# Unordered PTS +# --------------------------------------------------------------------------- + +class TestVideoReaderUnorderedPTS: + def test_unordered_timestamps_sorted(self): + vr = VideoReader(UNORDERED_PATH, ctx=CTX) + ts = vr.get_frame_timestamp(range(4)) + starts = ts[:, 0] + assert all(starts[i] <= starts[i + 1] for i in range(len(starts) - 1)) + + def test_unordered_timestamps_values(self): + vr = VideoReader(UNORDERED_PATH, ctx=CTX) + ts = vr.get_frame_timestamp(range(4)) + assert np.allclose(ts[:, 0], [0.0, 0.03125, 0.0625, 0.09375]) + + +# --------------------------------------------------------------------------- +# Resource cleanup +# --------------------------------------------------------------------------- + +class TestVideoReaderCleanup: + def test_del(self): + vr = VideoReader(BBB_PATH, ctx=CTX) + _ = vr[0] + del vr # should not raise + + def test_multiple_readers(self): + """Multiple simultaneous readers should not conflict.""" + vr1 = VideoReader(BBB_PATH, ctx=CTX) + vr2 = VideoReader(BBB_PATH, ctx=CTX) + assert len(vr1) == len(vr2) + f1 = vr1[100].asnumpy() + f2 = vr2[100].asnumpy() + assert np.array_equal(f1, f2) diff --git a/tests/python/unittests/test_audio_reader.py b/tests/python/unittests/test_audio_reader.py index 90826c3e..3a1d1521 100644 --- a/tests/python/unittests/test_audio_reader.py +++ b/tests/python/unittests/test_audio_reader.py @@ -6,44 +6,44 @@ CTX = cpu(0) def get_single_channel_reader(): - return AudioReader(os.path.join(os.path.dirname(__file__), '..', '..', 'cpp', 'audio', 'count_down.mov'), CTX) + return AudioReader(os.path.join(os.path.dirname(__file__), '..', '..', 'cpp', 'audio', 'big_buck_bunny.mp4'), CTX) def get_double_channels_reader(): - return AudioReader(os.path.join(os.path.dirname(__file__), '..', '..', 'cpp', 'audio', 'sample-mov-file.mov'), CTX, mono=False) + return AudioReader(os.path.join(os.path.dirname(__file__), '..', '..', 'cpp', 'audio', 'file_example_MOV_1280_1_4MB.mov'), CTX, mono=False) def get_resampled_reader(): - return AudioReader(os.path.join(os.path.dirname(__file__), '..', '..', 'cpp', 'audio', 'count_down.mov'), CTX, 4410) + return AudioReader(os.path.join(os.path.dirname(__file__), '..', '..', 'cpp', 'audio', 'big_buck_bunny.mp4'), CTX, 4410) def get_channel_change_reader(): - return AudioReader(os.path.join(os.path.dirname(__file__), '..', '..', 'cpp', 'audio', 'sample-mov-file.mov'), CTX) + return AudioReader(os.path.join(os.path.dirname(__file__), '..', '..', 'cpp', 'audio', 'file_example_MOV_1280_1_4MB.mov'), CTX) def test_single_channel_audio_reader(): ar = get_single_channel_reader() - assert ar.shape() == (1, 482240) + assert ar.shape == (1, 1326080) def test_double_channels_audio_reader(): ar = get_double_channels_reader() - assert ar.shape() == (2, 5555200) + assert ar.shape == (2, 1465344) def test_no_audio_stream(): from nose.tools import assert_raises assert_raises(DECORDError, AudioReader, os.path.join(os.path.dirname(__file__), '..', '..', 'test_data', 'video_0.mov'), CTX) def test_bytes_io(): - fn = os.path.join(os.path.dirname(__file__), '..', '..', 'cpp', 'audio', 'count_down.mov') + fn = os.path.join(os.path.dirname(__file__), '..', '..', 'cpp', 'audio', 'big_buck_bunny.mp4') with open(fn, 'rb') as f: ar = AudioReader(f) - assert ar.shape() == (1, 482240) + assert ar.shape == (1, 1326080) ar2 = get_single_channel_reader() assert np.allclose(ar[10].asnumpy(), ar2[10].asnumpy()) def test_resample(): ar = get_resampled_reader() - assert ar.shape() == (1, 48224) + assert ar.shape == (1, 265216) def test_channel_change(): ar = get_channel_change_reader() - assert ar.shape() == (1, 5555200) + assert ar.shape == (1, 1465344) def test_index(): ar = get_double_channels_reader() @@ -65,7 +65,7 @@ def test_get_info(): def test_add_padding(): ar = get_single_channel_reader() - num_channels = ar.shape()[0] + num_channels = ar.shape[0] num_padding = ar.add_padding() assert np.array_equal(ar[:num_padding].asnumpy(), np.zeros((num_channels, num_padding))) diff --git a/tests/python/unittests/test_av_reader.py b/tests/python/unittests/test_av_reader.py index ccafadb1..481dc46b 100644 --- a/tests/python/unittests/test_av_reader.py +++ b/tests/python/unittests/test_av_reader.py @@ -6,17 +6,17 @@ CTX = cpu(0) def get_normal_av_reader(): - return AVReader('/Users/weisy/Developer/yinweisu/decord/tests/cpp/audio/count_down.mov', CTX) + return AVReader(os.path.join(os.path.dirname(__file__), '..', '..', 'cpp', 'audio', 'big_buck_bunny.mp4'), CTX) def test_normal_av_reader(): av = get_normal_av_reader() - assert len(av) == 328 + assert len(av) == 1440 def test_bytes_io(): - fn = os.path.join(os.path.dirname(__file__), '..', '..', 'cpp', 'audio', 'count_down.mov') + fn = os.path.join(os.path.dirname(__file__), '..', '..', 'cpp', 'audio', 'big_buck_bunny.mp4') with open(fn, 'rb') as f: av = AVReader(f) - assert len(av) == 328 + assert len(av) == 1440 av2 = get_normal_av_reader() audio, video = av[10] audio2, video2 = av2[10]