FFmpeg/tests/checkasm/sw_range_convert.c
Ramiro Polla 384fe39623 swscale/range_convert: fix mpeg ranges in yuv range conversion for non-8-bit pixel formats
There is an issue with the constants used in YUV to YUV range conversion,
where the upper bound is not respected when converting to mpeg range.

With this commit, the constants are calculated at runtime, depending on
the bit depth. This approach also allows us to more easily understand how
the constants are derived.

For bit depths <= 14, the number of fixed point bits has been set to 14
for all conversions, to simplify the code.
For bit depths > 14, the number of fixed points bits has been raised and
set to 18, to allow for the conversion to be accurate enough for the mpeg
range to be respected.

The convert functions now take the conversion constants (coeff and offset)
as function arguments.
For bit depths <= 14, coeff is unsigned 16-bit and offset is 32-bit.
For bit depths > 14, coeff is unsigned 32-bit and offset is 64-bit.

x86_64:
chrRangeFromJpeg8_1920_c:    2127.4   2125.0  (1.00x)
chrRangeFromJpeg16_1920_c:   2325.2   2127.2  (1.09x)
chrRangeToJpeg8_1920_c:      3166.9   3168.7  (1.00x)
chrRangeToJpeg16_1920_c:     2152.4   3164.8  (0.68x)
lumRangeFromJpeg8_1920_c:    1263.0   1302.5  (0.97x)
lumRangeFromJpeg16_1920_c:   1080.5   1299.2  (0.83x)
lumRangeToJpeg8_1920_c:      1886.8   2112.2  (0.89x)
lumRangeToJpeg16_1920_c:     1077.0   1906.5  (0.56x)

aarch64 A55:
chrRangeFromJpeg8_1920_c:   28835.2  28835.6  (1.00x)
chrRangeFromJpeg16_1920_c:  28839.8  32680.8  (0.88x)
chrRangeToJpeg8_1920_c:     23074.7  23075.4  (1.00x)
chrRangeToJpeg16_1920_c:    17318.9  24996.0  (0.69x)
lumRangeFromJpeg8_1920_c:   15389.7  15384.5  (1.00x)
lumRangeFromJpeg16_1920_c:  15388.2  17306.7  (0.89x)
lumRangeToJpeg8_1920_c:     19227.8  19226.6  (1.00x)
lumRangeToJpeg16_1920_c:    15387.0  21146.3  (0.73x)

aarch64 A76:
chrRangeFromJpeg8_1920_c:    6324.4   6268.1  (1.01x)
chrRangeFromJpeg16_1920_c:   6339.9  11521.5  (0.55x)
chrRangeToJpeg8_1920_c:      9656.0   9612.8  (1.00x)
chrRangeToJpeg16_1920_c:     6340.4  11651.8  (0.54x)
lumRangeFromJpeg8_1920_c:    4422.0   4420.8  (1.00x)
lumRangeFromJpeg16_1920_c:   4420.9   5762.0  (0.77x)
lumRangeToJpeg8_1920_c:      5949.1   5977.5  (1.00x)
lumRangeToJpeg16_1920_c:     4446.8   5946.2  (0.75x)

NOTE: all simd optimizations for range_convert have been disabled.
      they will be re-enabled when they are fixed for each architecture.

NOTE2: the same issue still exists in rgb2yuv conversions, which is not
       addressed in this commit.
2024-12-05 21:10:29 +01:00

234 lines
8.9 KiB
C

/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <string.h>
#include "libavutil/common.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/mem.h"
#include "libavutil/mem_internal.h"
#include "libswscale/swscale.h"
#include "libswscale/swscale_internal.h"
#include "checkasm.h"
static const enum AVPixelFormat pixel_formats[] = {
AV_PIX_FMT_YUV444P,
AV_PIX_FMT_YUV444P9,
AV_PIX_FMT_YUV444P10,
AV_PIX_FMT_YUV444P12,
AV_PIX_FMT_YUV444P14,
AV_PIX_FMT_YUV444P16,
};
static void randomize_buffers(int16_t *buf0, int16_t *buf1, int bit_depth, int width)
{
int32_t *buf0_32 = (int32_t *) buf0;
int32_t *buf1_32 = (int32_t *) buf1;
int mask = (1 << bit_depth) - 1;
int src_shift = bit_depth <= 14 ? 15 - bit_depth : 19 - bit_depth;
for (int i = 0; i < width; i++) {
int32_t r = rnd() & mask;
if (bit_depth == 16) {
buf0_32[i] = r << src_shift;
buf1_32[i] = r << src_shift;
} else {
buf0[i] = r << src_shift;
buf1[i] = r << src_shift;
}
}
}
static void check_lumConvertRange(int from)
{
const char *func_str = from ? "lumRangeFromJpeg" : "lumRangeToJpeg";
#define LARGEST_INPUT_SIZE 1920
static const int input_sizes[] = {8, LARGEST_INPUT_SIZE};
SwsContext *sws;
SwsInternal *c;
LOCAL_ALIGNED_32(int16_t, dst0, [LARGEST_INPUT_SIZE * 2]);
LOCAL_ALIGNED_32(int16_t, dst1, [LARGEST_INPUT_SIZE * 2]);
int32_t *dst0_32 = (int32_t *) dst0;
int32_t *dst1_32 = (int32_t *) dst1;
declare_func(void, int16_t *dst, int width,
uint32_t coeff, int64_t offset);
sws = sws_alloc_context();
if (sws_init_context(sws, NULL, NULL) < 0)
fail();
c = sws_internal(sws);
sws->src_range = from;
sws->dst_range = !from;
for (int pfi = 0; pfi < FF_ARRAY_ELEMS(pixel_formats); pfi++) {
enum AVPixelFormat pix_fmt = pixel_formats[pfi];
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
int bit_depth = desc->comp[0].depth;
int sample_size = bit_depth == 16 ? sizeof(int32_t) : sizeof(int16_t);
int src_shift = bit_depth <= 14 ? 15 - bit_depth : 19 - bit_depth;
int mpeg_min = 16 << (bit_depth - 8);
int mpeg_max = 235 << (bit_depth - 8);
int jpeg_max = (1 << bit_depth) - 1;
sws->src_format = pix_fmt;
sws->dst_format = pix_fmt;
c->dstBpc = bit_depth;
ff_sws_init_scale(c);
for (int dstWi = 0; dstWi < FF_ARRAY_ELEMS(input_sizes); dstWi++) {
int width = input_sizes[dstWi];
if (check_func(c->lumConvertRange, "%s%d_%d", func_str, bit_depth, width)) {
randomize_buffers(dst0, dst1, bit_depth, width);
if (bit_depth == 16) {
if (!from) {
dst1_32[0] = dst0_32[0] = mpeg_min << src_shift;
dst1_32[1] = dst0_32[1] = mpeg_max << src_shift;
}
dst1_32[2] = dst0_32[2] = -1;
} else {
if (!from) {
dst1[0] = dst0[0] = mpeg_min << src_shift;
dst1[1] = dst0[1] = mpeg_max << src_shift;
}
dst1[2] = dst0[2] = -1;
}
call_ref(dst0, width,
c->lumConvertRange_coeff, c->lumConvertRange_offset);
call_new(dst1, width,
c->lumConvertRange_coeff, c->lumConvertRange_offset);
if (memcmp(dst0, dst1, width * sample_size))
fail();
if (!from) {
/* check that the mpeg range is respected */
if (bit_depth == 16) {
if ((dst1_32[0] >> src_shift) > 0 || (dst1_32[1] >> src_shift) != jpeg_max)
fail();
} else {
if ((dst1[0] >> src_shift) > 0 || (dst1[1] >> src_shift) != jpeg_max)
fail();
}
}
if (width == LARGEST_INPUT_SIZE && (bit_depth == 8 || bit_depth == 16))
bench_new(dst1, width,
c->lumConvertRange_coeff, c->lumConvertRange_offset);
}
}
}
sws_freeContext(sws);
}
#undef LARGEST_INPUT_SIZE
static void check_chrConvertRange(int from)
{
const char *func_str = from ? "chrRangeFromJpeg" : "chrRangeToJpeg";
#define LARGEST_INPUT_SIZE 1920
static const int input_sizes[] = {8, LARGEST_INPUT_SIZE};
SwsContext *sws;
SwsInternal *c;
LOCAL_ALIGNED_32(int16_t, dstU0, [LARGEST_INPUT_SIZE * 2]);
LOCAL_ALIGNED_32(int16_t, dstV0, [LARGEST_INPUT_SIZE * 2]);
LOCAL_ALIGNED_32(int16_t, dstU1, [LARGEST_INPUT_SIZE * 2]);
LOCAL_ALIGNED_32(int16_t, dstV1, [LARGEST_INPUT_SIZE * 2]);
int32_t *dstU0_32 = (int32_t *) dstU0;
int32_t *dstU1_32 = (int32_t *) dstU1;
declare_func(void, int16_t *dstU, int16_t *dstV, int width,
uint32_t coeff, int64_t offset);
sws = sws_alloc_context();
if (sws_init_context(sws, NULL, NULL) < 0)
fail();
c = sws_internal(sws);
sws->src_range = from;
sws->dst_range = !from;
for (int pfi = 0; pfi < FF_ARRAY_ELEMS(pixel_formats); pfi++) {
enum AVPixelFormat pix_fmt = pixel_formats[pfi];
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
int bit_depth = desc->comp[0].depth;
int sample_size = bit_depth == 16 ? sizeof(int32_t) : sizeof(int16_t);
int src_shift = bit_depth <= 14 ? 15 - bit_depth : 19 - bit_depth;
int mpeg_min = 16 << (bit_depth - 8);
int mpeg_max = 240 << (bit_depth - 8);
int jpeg_max = (1 << bit_depth) - 1;
sws->src_format = pix_fmt;
sws->dst_format = pix_fmt;
c->dstBpc = bit_depth;
ff_sws_init_scale(c);
for (int dstWi = 0; dstWi < FF_ARRAY_ELEMS(input_sizes); dstWi++) {
int width = input_sizes[dstWi];
if (check_func(c->chrConvertRange, "%s%d_%d", func_str, bit_depth, width)) {
randomize_buffers(dstU0, dstU1, bit_depth, width);
randomize_buffers(dstV0, dstV1, bit_depth, width);
if (bit_depth == 16) {
if (!from) {
dstU1_32[0] = dstU0_32[0] = mpeg_min << src_shift;
dstU1_32[1] = dstU0_32[1] = mpeg_max << src_shift;
}
dstU1_32[2] = dstU0_32[2] = -1;
} else {
if (!from) {
dstU1[0] = dstU0[0] = mpeg_min << src_shift;
dstU1[1] = dstU0[1] = mpeg_max << src_shift;
}
dstU1[2] = dstU0[2] = -1;
}
call_ref(dstU0, dstV0, width,
c->chrConvertRange_coeff, c->chrConvertRange_offset);
call_new(dstU1, dstV1, width,
c->chrConvertRange_coeff, c->chrConvertRange_offset);
if (memcmp(dstU0, dstU1, width * sample_size) ||
memcmp(dstV0, dstV1, width * sample_size))
fail();
if (!from) {
/* check that the mpeg range is respected */
if (bit_depth == 16) {
if ((dstU1_32[0] >> src_shift) > 0 || (dstU1_32[1] >> src_shift) != jpeg_max)
fail();
} else {
if ((dstU1[0] >> src_shift) > 0 || (dstU1[1] >> src_shift) != jpeg_max)
fail();
}
}
if (width == LARGEST_INPUT_SIZE && (bit_depth == 8 || bit_depth == 16))
bench_new(dstU1, dstV1, width,
c->chrConvertRange_coeff, c->chrConvertRange_offset);
}
}
}
sws_freeContext(sws);
}
#undef LARGEST_INPUT_SIZE
void checkasm_check_sw_range_convert(void)
{
check_lumConvertRange(1);
report("lumRangeFromJpeg");
check_chrConvertRange(1);
report("chrRangeFromJpeg");
check_lumConvertRange(0);
report("lumRangeToJpeg");
check_chrConvertRange(0);
report("chrRangeToJpeg");
}