forked from FFmpeg/FFmpeg
diracdsp: add dequantization SIMD
Currently unused, to be used in the following commits. Signed-off-by: Rostislav Pehlivanov <rpehlivanov@obe.tv>
This commit is contained in:
parent
244d22452c
commit
80721cc1ff
4 changed files with 71 additions and 0 deletions
|
@ -189,6 +189,27 @@ static void add_rect_clamped_c(uint8_t *dst, const uint16_t *src, int stride,
|
|||
}
|
||||
}
|
||||
|
||||
#define DEQUANT_SUBBAND(PX) \
|
||||
static void dequant_subband_ ## PX ## _c(uint8_t *src, uint8_t *dst, ptrdiff_t stride, \
|
||||
const int qf, const int qs, int tot_v, int tot_h) \
|
||||
{ \
|
||||
int i, y; \
|
||||
for (y = 0; y < tot_v; y++) { \
|
||||
PX c, sign, *src_r = (PX *)src, *dst_r = (PX *)dst; \
|
||||
for (i = 0; i < tot_h; i++) { \
|
||||
c = *src_r++; \
|
||||
sign = FFSIGN(c)*(!!c); \
|
||||
c = (FFABS(c)*qf + qs) >> 2; \
|
||||
*dst_r++ = c*sign; \
|
||||
} \
|
||||
src += tot_h << (sizeof(PX) >> 1); \
|
||||
dst += stride; \
|
||||
} \
|
||||
}
|
||||
|
||||
DEQUANT_SUBBAND(int16_t)
|
||||
DEQUANT_SUBBAND(int32_t)
|
||||
|
||||
#define PIXFUNC(PFX, WIDTH) \
|
||||
c->PFX ## _dirac_pixels_tab[WIDTH>>4][0] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _c; \
|
||||
c->PFX ## _dirac_pixels_tab[WIDTH>>4][1] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _l2_c; \
|
||||
|
@ -214,6 +235,9 @@ av_cold void ff_diracdsp_init(DiracDSPContext *c)
|
|||
c->biweight_dirac_pixels_tab[1] = biweight_dirac_pixels16_c;
|
||||
c->biweight_dirac_pixels_tab[2] = biweight_dirac_pixels32_c;
|
||||
|
||||
c->dequant_subband[0] = c->dequant_subband[2] = dequant_subband_int16_t_c;
|
||||
c->dequant_subband[1] = c->dequant_subband[3] = dequant_subband_int32_t_c;
|
||||
|
||||
PIXFUNC(put, 8);
|
||||
PIXFUNC(put, 16);
|
||||
PIXFUNC(put, 32);
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#define AVCODEC_DIRACDSP_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
typedef void (*dirac_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int h);
|
||||
typedef void (*dirac_biweight_func)(uint8_t *dst, const uint8_t *src, int stride, int log2_denom, int weightd, int weights, int h);
|
||||
|
@ -46,6 +47,9 @@ typedef struct {
|
|||
void (*add_rect_clamped)(uint8_t *dst/*align 16*/, const uint16_t *src/*align 16*/, int stride, const int16_t *idwt/*align 16*/, int idwt_stride, int width, int height/*mod 2*/);
|
||||
void (*add_dirac_obmc[3])(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
|
||||
|
||||
/* 0-1: int16_t and int32_t asm/c, 2-3: int16 and int32_t, C only */
|
||||
void (*dequant_subband[4])(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h);
|
||||
|
||||
dirac_weight_func weight_dirac_pixels_tab[3];
|
||||
dirac_biweight_func biweight_dirac_pixels_tab[3];
|
||||
} DiracDSPContext;
|
||||
|
|
|
@ -263,3 +263,40 @@ ADD_RECT sse2
|
|||
HPEL_FILTER sse2
|
||||
ADD_OBMC 32, sse2
|
||||
ADD_OBMC 16, sse2
|
||||
|
||||
INIT_XMM sse4
|
||||
|
||||
; void dequant_subband_32(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h)
|
||||
cglobal dequant_subband_32, 7, 7, 4, src, dst, stride, qf, qs, tot_v, tot_h
|
||||
movd m2, qfd
|
||||
movd m3, qsd
|
||||
SPLATD m2
|
||||
SPLATD m3
|
||||
mov r4, tot_hq
|
||||
mov r3, dstq
|
||||
|
||||
.loop_v:
|
||||
mov tot_hq, r4
|
||||
mov dstq, r3
|
||||
|
||||
.loop_h:
|
||||
movu m0, [srcq]
|
||||
|
||||
pabsd m1, m0
|
||||
pmulld m1, m2
|
||||
paddd m1, m3
|
||||
psrld m1, 2
|
||||
psignd m1, m0
|
||||
|
||||
movu [dstq], m1
|
||||
|
||||
add srcq, mmsize
|
||||
add dstq, mmsize
|
||||
sub tot_hd, 4
|
||||
jg .loop_h
|
||||
|
||||
add r3, strideq
|
||||
dec tot_vd
|
||||
jg .loop_v
|
||||
|
||||
RET
|
||||
|
|
|
@ -46,6 +46,8 @@ void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src,
|
|||
void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
|
||||
void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
|
||||
|
||||
void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h);
|
||||
|
||||
#if HAVE_YASM
|
||||
|
||||
#define HPEL_FILTER(MMSIZE, EXT) \
|
||||
|
@ -184,4 +186,8 @@ void ff_diracdsp_init_x86(DiracDSPContext* c)
|
|||
c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2;
|
||||
c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2;
|
||||
}
|
||||
|
||||
if (EXTERNAL_SSE4(mm_flags)) {
|
||||
c->dequant_subband[1] = ff_dequant_subband_32_sse4;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue