diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index af19028f3..f91be9a89 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -42,6 +42,10 @@ git clone git@github.com:pytorch/torchcodec.git # Or, using https instead of ssh: git clone https://github.com/pytorch/torchcodec.git cd torchcodec +# Optional, but recommended: define a persistent build directory which speeds-up +# subsequent builds. +export TORCHCODEC_CMAKE_BUILD_DIR="${PWD}/build" + pip install -e ".[dev]" --no-build-isolation -vv # Or, for cuda support: ENABLE_CUDA=1 pip install -e ".[dev]" --no-build-isolation -vv ``` diff --git a/setup.py b/setup.py index 5074ef478..2efccf982 100644 --- a/setup.py +++ b/setup.py @@ -126,12 +126,17 @@ def _build_all_extensions_with_cmake(self): f"-DTORCHCODEC_DISABLE_COMPILE_WARNING_AS_ERROR={torchcodec_disable_compile_warning_as_error}", ] + self.build_temp = os.getenv("TORCHCODEC_CMAKE_BUILD_DIR", self.build_temp) + print(f"Using {self.build_temp = }", flush=True) Path(self.build_temp).mkdir(parents=True, exist_ok=True) + print("Calling cmake (configure)", flush=True) subprocess.check_call( ["cmake", str(_ROOT_DIR)] + cmake_args, cwd=self.build_temp ) + print("Calling cmake --build", flush=True) subprocess.check_call(["cmake", "--build", "."], cwd=self.build_temp) + print("Calling cmake --install", flush=True) subprocess.check_call(["cmake", "--install", "."], cwd=self.build_temp) def copy_extensions_to_source(self): diff --git a/src/torchcodec/_core/CudaDeviceInterface.cpp b/src/torchcodec/_core/CudaDeviceInterface.cpp index 1da00484f..857617a51 100644 --- a/src/torchcodec/_core/CudaDeviceInterface.cpp +++ b/src/torchcodec/_core/CudaDeviceInterface.cpp @@ -227,14 +227,44 @@ void CudaDeviceInterface::convertAVFrameToFrameOutput( NppiSize oSizeROI = {width, height}; Npp8u* input[2] = {avFrame->data[0], avFrame->data[1]}; + // Conversion matrix taken from https://mymusing.co/bt-709-yuv-to-rgb-conversion-color/ + // The -128 offset is needed to first center the U and V channels around 0 + static const Npp32f bt709ColorTwist[3][4] = { + {1.0f, 0.0f, 1.5748f, 0.0f}, + {1.0f, -0.187324, -0.468124, -128.0f}, + {1.0f, 1.8556, 0.0f, -128.0f} + }; + NppStatus status; if (avFrame->colorspace == AVColorSpace::AVCOL_SPC_BT709) { - status = nppiNV12ToRGB_709CSC_8u_P2C3R( - input, - avFrame->linesize[0], - static_cast(dst.data_ptr()), - dst.stride(0), - oSizeROI); + if (avFrame->color_range == AVColorRange::AVCOL_RANGE_JPEG) { + // BT.709 full range using custom ColorTwist to match libswscale + // Create NPP stream context for the _Ctx function + printf("it's a BT.709 full range frame\n"); + NppStreamContext nppStreamCtx; + nppGetStreamContext(&nppStreamCtx); + + // ColorTwist function expects step arrays for planar input format + int srcStep[2] = {avFrame->linesize[0], avFrame->linesize[1]}; + + status = nppiNV12ToRGB_8u_ColorTwist32f_P2C3R_Ctx( + input, + srcStep, + static_cast(dst.data_ptr()), + dst.stride(0), + oSizeROI, + bt709ColorTwist, + nppStreamCtx); + } else { + printf("it's a BT.709 studio range frame\n"); + // BT.709 studio range + status = nppiNV12ToRGB_709CSC_8u_P2C3R( + input, + avFrame->linesize[0], + static_cast(dst.data_ptr()), + dst.stride(0), + oSizeROI); + } } else { status = nppiNV12ToRGB_8u_P2C3R( input, diff --git a/test/resources/full_range_709.mp4 b/test/resources/full_range_709.mp4 new file mode 100644 index 000000000..004028190 Binary files /dev/null and b/test/resources/full_range_709.mp4 differ diff --git a/test/test_decoders.py b/test/test_decoders.py index dcf9a1585..b95a911bd 100644 --- a/test/test_decoders.py +++ b/test/test_decoders.py @@ -25,6 +25,7 @@ assert_frames_equal, AV1_VIDEO, cpu_and_cuda, + FULL_COLOR_RANGE, get_ffmpeg_major_version, H265_VIDEO, in_fbcode, @@ -32,6 +33,7 @@ NASA_AUDIO_MP3, NASA_AUDIO_MP3_44100, NASA_VIDEO, + needs_cuda, SINE_MONO_S16, SINE_MONO_S32, SINE_MONO_S32_44100, @@ -1138,6 +1140,17 @@ def test_pts_to_dts_fallback(self, seek_mode): with pytest.raises(AssertionError, match="not equal"): torch.testing.assert_close(decoder[0], decoder[10]) + @needs_cuda + def test_full_range_bt709_video(self): + decoder_gpu = VideoDecoder(FULL_COLOR_RANGE.path, device="cuda") + decoder_cpu = VideoDecoder(FULL_COLOR_RANGE.path, device="cpu") + + a, b = decoder_gpu[0].data.cpu(), decoder_cpu[0].data + for frame_index in (0, 10, 20, 5): + gpu_frame = decoder_gpu.get_frame_at(frame_index).data.cpu() + cpu_frame = decoder_cpu.get_frame_at(frame_index).data + torch.testing.assert_close(gpu_frame, cpu_frame, rtol=0, atol=2) + class TestAudioDecoder: @pytest.mark.parametrize("asset", (NASA_AUDIO, NASA_AUDIO_MP3, SINE_MONO_S32)) diff --git a/test/utils.py b/test/utils.py index c83a0f241..c7160d594 100644 --- a/test/utils.py +++ b/test/utils.py @@ -609,3 +609,12 @@ def sample_format(self) -> str: }, }, ) + +FULL_COLOR_RANGE = TestVideo( + filename="full_range_709.mp4", + default_stream_index=0, + stream_infos={ + 0: TestVideoStreamInfo(width=1280, height=720, num_color_channels=3), + }, + frames={0: {}}, # Not needed for now +)