From 7175544c0bab30c12c24a2c440bff40a28ea83d3 Mon Sep 17 00:00:00 2001 From: Nuo Mi Date: Tue, 20 Aug 2024 21:22:36 +0800 Subject: [PATCH] checkasm: add vvc_bdof test apply_bdof_8_8x16_c: 5776.5 apply_bdof_8_8x16_avx2: 396.2 apply_bdof_8_16x8_c: 5722.0 apply_bdof_8_16x8_avx2: 216.0 apply_bdof_8_16x16_c: 11213.2 apply_bdof_8_16x16_avx2: 434.5 apply_bdof_10_8x16_c: 5657.7 apply_bdof_10_8x16_avx2: 1096.0 apply_bdof_10_16x8_c: 5531.7 apply_bdof_10_16x8_avx2: 212.5 apply_bdof_10_16x16_c: 11043.7 apply_bdof_10_16x16_avx2: 1252.7 apply_bdof_12_8x16_c: 5680.0 apply_bdof_12_8x16_avx2: 1096.5 apply_bdof_12_16x8_c: 5646.2 apply_bdof_12_16x8_avx2: 624.5 apply_bdof_12_16x16_c: 11076.0 apply_bdof_12_16x16_avx2: 1241.5 --- tests/checkasm/vvc_mc.c | 50 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/tests/checkasm/vvc_mc.c b/tests/checkasm/vvc_mc.c index 62fa6aa7d0..754cf19065 100644 --- a/tests/checkasm/vvc_mc.c +++ b/tests/checkasm/vvc_mc.c @@ -64,6 +64,14 @@ static const int sizes[] = { 2, 4, 8, 16, 32, 64, 128 }; randomize_buffers(buf0, buf1, size, mask); \ } while (0) +#define randomize_prof_src(buf0, buf1, size) \ + do { \ + const int shift = 14 - bit_depth; \ + const int mask16 = 0x3fff >> shift << shift; \ + uint32_t mask = (mask16 << 16) | mask16; \ + randomize_buffers(buf0, buf1, size, mask); \ + } while (0) + static void check_put_vvc_luma(void) { LOCAL_ALIGNED_32(int16_t, dst0, [DST_BUF_SIZE / 2]); @@ -382,6 +390,47 @@ static void check_dmvr(void) report("dmvr"); } +#define BDOF_BLOCK_SIZE 16 +#define BDOF_SRC_SIZE (MAX_PB_SIZE* (BDOF_BLOCK_SIZE + 2)) +#define BDOF_SRC_OFFSET (MAX_PB_SIZE + 1) +#define BDOF_DST_SIZE (BDOF_BLOCK_SIZE * BDOF_BLOCK_SIZE * 2) +static void check_bdof(void) +{ + LOCAL_ALIGNED_32(uint8_t, dst0, [BDOF_DST_SIZE]); + LOCAL_ALIGNED_32(uint8_t, dst1, [BDOF_DST_SIZE]); + LOCAL_ALIGNED_32(uint16_t, src00, [BDOF_SRC_SIZE]); + LOCAL_ALIGNED_32(uint16_t, src01, [BDOF_SRC_SIZE]); + LOCAL_ALIGNED_32(uint16_t, src10, [BDOF_SRC_SIZE]); + LOCAL_ALIGNED_32(uint16_t, src11, [BDOF_SRC_SIZE]); + + VVCDSPContext c; + declare_func(void, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *src0, const int16_t *src1, int block_w, int block_h); + + for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) { + const int dst_stride = BDOF_BLOCK_SIZE * SIZEOF_PIXEL; + + ff_vvc_dsp_init(&c, bit_depth); + randomize_prof_src(src00, src10, BDOF_SRC_SIZE); + randomize_prof_src(src01, src11, BDOF_SRC_SIZE); + for (int h = 8; h <= 16; h *= 2) { + for (int w = 8; w <= 16; w *= 2) { + if (w * h < 128) + continue; + if (check_func(c.inter.apply_bdof, "apply_bdof_%d_%dx%d", bit_depth, w, h)) { + memset(dst0, 0, BDOF_DST_SIZE); + memset(dst1, 0, BDOF_DST_SIZE); + call_ref(dst0, dst_stride, src00 + BDOF_SRC_OFFSET, src01 + BDOF_SRC_OFFSET, w, h); + call_new(dst1, dst_stride, src10 + BDOF_SRC_OFFSET, src11 + BDOF_SRC_OFFSET, w, h); + if (memcmp(dst0, dst1, BDOF_DST_SIZE)) + fail(); + bench_new(dst0, dst_stride, src00 + BDOF_SRC_OFFSET, src01 + BDOF_SRC_OFFSET, w, h); + } + } + } + } + report("apply_bdof"); +} + static void check_vvc_sad(void) { const int bit_depth = 10; @@ -422,6 +471,7 @@ static void check_vvc_sad(void) void checkasm_check_vvc_mc(void) { check_dmvr(); + check_bdof(); check_vvc_sad(); check_put_vvc_luma(); check_put_vvc_luma_uni();