forked from FFmpeg/FFmpeg
float_dsp: Move vector_fmac_scalar() from libavcodec to libavutil
This commit is contained in:
parent
4e4dd71730
commit
cb5042d02c
9 changed files with 82 additions and 75 deletions
|
@ -154,8 +154,6 @@ void ff_vector_fmul_window_neon(float *dst, const float *src0,
|
||||||
const float *src1, const float *win, int len);
|
const float *src1, const float *win, int len);
|
||||||
void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
|
void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
|
||||||
int len);
|
int len);
|
||||||
void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul,
|
|
||||||
int len);
|
|
||||||
void ff_butterflies_float_neon(float *v1, float *v2, int len);
|
void ff_butterflies_float_neon(float *v1, float *v2, int len);
|
||||||
float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
|
float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
|
||||||
void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
|
void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
|
||||||
|
@ -329,7 +327,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
|
||||||
|
|
||||||
c->vector_fmul_window = ff_vector_fmul_window_neon;
|
c->vector_fmul_window = ff_vector_fmul_window_neon;
|
||||||
c->vector_fmul_scalar = ff_vector_fmul_scalar_neon;
|
c->vector_fmul_scalar = ff_vector_fmul_scalar_neon;
|
||||||
c->vector_fmac_scalar = ff_vector_fmac_scalar_neon;
|
|
||||||
c->butterflies_float = ff_butterflies_float_neon;
|
c->butterflies_float = ff_butterflies_float_neon;
|
||||||
c->scalarproduct_float = ff_scalarproduct_float_neon;
|
c->scalarproduct_float = ff_scalarproduct_float_neon;
|
||||||
c->vector_fmul_reverse = ff_vector_fmul_reverse_neon;
|
c->vector_fmul_reverse = ff_vector_fmul_reverse_neon;
|
||||||
|
|
|
@ -682,54 +682,6 @@ NOVFP vdup.32 q8, r2
|
||||||
.unreq len
|
.unreq len
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
function ff_vector_fmac_scalar_neon, export=1
|
|
||||||
VFP len .req r2
|
|
||||||
VFP acc .req r3
|
|
||||||
NOVFP len .req r3
|
|
||||||
NOVFP acc .req r2
|
|
||||||
VFP vdup.32 q15, d0[0]
|
|
||||||
NOVFP vdup.32 q15, r2
|
|
||||||
bics r12, len, #15
|
|
||||||
mov acc, r0
|
|
||||||
beq 3f
|
|
||||||
vld1.32 {q0}, [r1,:128]!
|
|
||||||
vld1.32 {q8}, [acc,:128]!
|
|
||||||
vld1.32 {q1}, [r1,:128]!
|
|
||||||
vld1.32 {q9}, [acc,:128]!
|
|
||||||
1: vmla.f32 q8, q0, q15
|
|
||||||
vld1.32 {q2}, [r1,:128]!
|
|
||||||
vld1.32 {q10}, [acc,:128]!
|
|
||||||
vmla.f32 q9, q1, q15
|
|
||||||
vld1.32 {q3}, [r1,:128]!
|
|
||||||
vld1.32 {q11}, [acc,:128]!
|
|
||||||
vmla.f32 q10, q2, q15
|
|
||||||
vst1.32 {q8}, [r0,:128]!
|
|
||||||
vmla.f32 q11, q3, q15
|
|
||||||
vst1.32 {q9}, [r0,:128]!
|
|
||||||
subs r12, r12, #16
|
|
||||||
beq 2f
|
|
||||||
vld1.32 {q0}, [r1,:128]!
|
|
||||||
vld1.32 {q8}, [acc,:128]!
|
|
||||||
vst1.32 {q10}, [r0,:128]!
|
|
||||||
vld1.32 {q1}, [r1,:128]!
|
|
||||||
vld1.32 {q9}, [acc,:128]!
|
|
||||||
vst1.32 {q11}, [r0,:128]!
|
|
||||||
b 1b
|
|
||||||
2: vst1.32 {q10}, [r0,:128]!
|
|
||||||
vst1.32 {q11}, [r0,:128]!
|
|
||||||
ands len, len, #15
|
|
||||||
it eq
|
|
||||||
bxeq lr
|
|
||||||
3: vld1.32 {q0}, [r1,:128]!
|
|
||||||
vld1.32 {q8}, [acc,:128]!
|
|
||||||
vmla.f32 q8, q0, q15
|
|
||||||
vst1.32 {q8}, [r0,:128]!
|
|
||||||
subs len, len, #4
|
|
||||||
bgt 3b
|
|
||||||
bx lr
|
|
||||||
.unreq len
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
function ff_butterflies_float_neon, export=1
|
function ff_butterflies_float_neon, export=1
|
||||||
1: vld1.32 {q0},[r0,:128]
|
1: vld1.32 {q0},[r0,:128]
|
||||||
vld1.32 {q1},[r1,:128]
|
vld1.32 {q1},[r1,:128]
|
||||||
|
|
|
@ -27,6 +27,7 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
#include "libavutil/common.h"
|
#include "libavutil/common.h"
|
||||||
|
#include "libavutil/float_dsp.h"
|
||||||
#include "libavutil/intmath.h"
|
#include "libavutil/intmath.h"
|
||||||
#include "libavutil/intreadwrite.h"
|
#include "libavutil/intreadwrite.h"
|
||||||
#include "libavutil/mathematics.h"
|
#include "libavutil/mathematics.h"
|
||||||
|
@ -383,7 +384,7 @@ typedef struct {
|
||||||
int profile;
|
int profile;
|
||||||
|
|
||||||
int debug_flag; ///< used for suppressing repeated error messages output
|
int debug_flag; ///< used for suppressing repeated error messages output
|
||||||
DSPContext dsp;
|
AVFloatDSPContext fdsp;
|
||||||
FFTContext imdct;
|
FFTContext imdct;
|
||||||
SynthFilterContext synth;
|
SynthFilterContext synth;
|
||||||
DCADSPContext dcadsp;
|
DCADSPContext dcadsp;
|
||||||
|
@ -1865,8 +1866,8 @@ static int dca_decode_frame(AVCodecContext *avctx, void *data,
|
||||||
float *back_chan = s->samples + s->channel_order_tab[s->xch_base_channel] * 256;
|
float *back_chan = s->samples + s->channel_order_tab[s->xch_base_channel] * 256;
|
||||||
float *lt_chan = s->samples + s->channel_order_tab[s->xch_base_channel - 2] * 256;
|
float *lt_chan = s->samples + s->channel_order_tab[s->xch_base_channel - 2] * 256;
|
||||||
float *rt_chan = s->samples + s->channel_order_tab[s->xch_base_channel - 1] * 256;
|
float *rt_chan = s->samples + s->channel_order_tab[s->xch_base_channel - 1] * 256;
|
||||||
s->dsp.vector_fmac_scalar(lt_chan, back_chan, -M_SQRT1_2, 256);
|
s->fdsp.vector_fmac_scalar(lt_chan, back_chan, -M_SQRT1_2, 256);
|
||||||
s->dsp.vector_fmac_scalar(rt_chan, back_chan, -M_SQRT1_2, 256);
|
s->fdsp.vector_fmac_scalar(rt_chan, back_chan, -M_SQRT1_2, 256);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) {
|
if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) {
|
||||||
|
@ -1908,7 +1909,7 @@ static av_cold int dca_decode_init(AVCodecContext *avctx)
|
||||||
s->avctx = avctx;
|
s->avctx = avctx;
|
||||||
dca_init_vlcs();
|
dca_init_vlcs();
|
||||||
|
|
||||||
ff_dsputil_init(&s->dsp, avctx);
|
avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
|
||||||
ff_mdct_init(&s->imdct, 6, 1, 1.0);
|
ff_mdct_init(&s->imdct, 6, 1, 1.0);
|
||||||
ff_synth_filter_init(&s->synth);
|
ff_synth_filter_init(&s->synth);
|
||||||
ff_dcadsp_init(&s->dcadsp);
|
ff_dcadsp_init(&s->dcadsp);
|
||||||
|
|
|
@ -2401,14 +2401,6 @@ static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
|
||||||
dst[i] = src[i] * mul;
|
dst[i] = src[i] * mul;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
|
|
||||||
int len)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
for (i = 0; i < len; i++)
|
|
||||||
dst[i] += src[i] * mul;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void butterflies_float_c(float *restrict v1, float *restrict v2,
|
static void butterflies_float_c(float *restrict v1, float *restrict v2,
|
||||||
int len)
|
int len)
|
||||||
{
|
{
|
||||||
|
@ -2904,7 +2896,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
||||||
c->butterflies_float = butterflies_float_c;
|
c->butterflies_float = butterflies_float_c;
|
||||||
c->butterflies_float_interleave = butterflies_float_interleave_c;
|
c->butterflies_float_interleave = butterflies_float_interleave_c;
|
||||||
c->vector_fmul_scalar = vector_fmul_scalar_c;
|
c->vector_fmul_scalar = vector_fmul_scalar_c;
|
||||||
c->vector_fmac_scalar = vector_fmac_scalar_c;
|
|
||||||
|
|
||||||
c->shrink[0]= av_image_copy_plane;
|
c->shrink[0]= av_image_copy_plane;
|
||||||
c->shrink[1]= ff_shrink22;
|
c->shrink[1]= ff_shrink22;
|
||||||
|
|
|
@ -416,17 +416,6 @@ typedef struct DSPContext {
|
||||||
*/
|
*/
|
||||||
void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
|
void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
|
||||||
int len);
|
int len);
|
||||||
/**
|
|
||||||
* Multiply a vector of floats by a scalar float and add to
|
|
||||||
* destination vector. Source and destination vectors must
|
|
||||||
* overlap exactly or not at all.
|
|
||||||
* @param dst result vector, 16-byte aligned
|
|
||||||
* @param src input vector, 16-byte aligned
|
|
||||||
* @param mul scalar value
|
|
||||||
* @param len length of vector, multiple of 4
|
|
||||||
*/
|
|
||||||
void (*vector_fmac_scalar)(float *dst, const float *src, float mul,
|
|
||||||
int len);
|
|
||||||
/**
|
/**
|
||||||
* Calculate the scalar product of two vectors of floats.
|
* Calculate the scalar product of two vectors of floats.
|
||||||
* @param v1 first vector, 16-byte aligned
|
* @param v1 first vector, 16-byte aligned
|
||||||
|
|
|
@ -26,7 +26,11 @@
|
||||||
|
|
||||||
void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len);
|
void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len);
|
||||||
|
|
||||||
|
void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul,
|
||||||
|
int len);
|
||||||
|
|
||||||
void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
|
void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
|
||||||
{
|
{
|
||||||
fdsp->vector_fmul = ff_vector_fmul_neon;
|
fdsp->vector_fmul = ff_vector_fmul_neon;
|
||||||
|
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon;
|
||||||
}
|
}
|
||||||
|
|
|
@ -62,3 +62,51 @@ function ff_vector_fmul_neon, export=1
|
||||||
3: vst1.32 {d16-d19},[r0,:128]!
|
3: vst1.32 {d16-d19},[r0,:128]!
|
||||||
bx lr
|
bx lr
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
|
function ff_vector_fmac_scalar_neon, export=1
|
||||||
|
VFP len .req r2
|
||||||
|
VFP acc .req r3
|
||||||
|
NOVFP len .req r3
|
||||||
|
NOVFP acc .req r2
|
||||||
|
VFP vdup.32 q15, d0[0]
|
||||||
|
NOVFP vdup.32 q15, r2
|
||||||
|
bics r12, len, #15
|
||||||
|
mov acc, r0
|
||||||
|
beq 3f
|
||||||
|
vld1.32 {q0}, [r1,:128]!
|
||||||
|
vld1.32 {q8}, [acc,:128]!
|
||||||
|
vld1.32 {q1}, [r1,:128]!
|
||||||
|
vld1.32 {q9}, [acc,:128]!
|
||||||
|
1: vmla.f32 q8, q0, q15
|
||||||
|
vld1.32 {q2}, [r1,:128]!
|
||||||
|
vld1.32 {q10}, [acc,:128]!
|
||||||
|
vmla.f32 q9, q1, q15
|
||||||
|
vld1.32 {q3}, [r1,:128]!
|
||||||
|
vld1.32 {q11}, [acc,:128]!
|
||||||
|
vmla.f32 q10, q2, q15
|
||||||
|
vst1.32 {q8}, [r0,:128]!
|
||||||
|
vmla.f32 q11, q3, q15
|
||||||
|
vst1.32 {q9}, [r0,:128]!
|
||||||
|
subs r12, r12, #16
|
||||||
|
beq 2f
|
||||||
|
vld1.32 {q0}, [r1,:128]!
|
||||||
|
vld1.32 {q8}, [acc,:128]!
|
||||||
|
vst1.32 {q10}, [r0,:128]!
|
||||||
|
vld1.32 {q1}, [r1,:128]!
|
||||||
|
vld1.32 {q9}, [acc,:128]!
|
||||||
|
vst1.32 {q11}, [r0,:128]!
|
||||||
|
b 1b
|
||||||
|
2: vst1.32 {q10}, [r0,:128]!
|
||||||
|
vst1.32 {q11}, [r0,:128]!
|
||||||
|
ands len, len, #15
|
||||||
|
it eq
|
||||||
|
bxeq lr
|
||||||
|
3: vld1.32 {q0}, [r1,:128]!
|
||||||
|
vld1.32 {q8}, [acc,:128]!
|
||||||
|
vmla.f32 q8, q0, q15
|
||||||
|
vst1.32 {q8}, [r0,:128]!
|
||||||
|
subs len, len, #4
|
||||||
|
bgt 3b
|
||||||
|
bx lr
|
||||||
|
.unreq len
|
||||||
|
endfunc
|
||||||
|
|
|
@ -28,9 +28,18 @@ static void vector_fmul_c(float *dst, const float *src0, const float *src1,
|
||||||
dst[i] = src0[i] * src1[i];
|
dst[i] = src0[i] * src1[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
|
||||||
|
int len)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < len; i++)
|
||||||
|
dst[i] += src[i] * mul;
|
||||||
|
}
|
||||||
|
|
||||||
void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
|
void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
|
||||||
{
|
{
|
||||||
fdsp->vector_fmul = vector_fmul_c;
|
fdsp->vector_fmul = vector_fmul_c;
|
||||||
|
fdsp->vector_fmac_scalar = vector_fmac_scalar_c;
|
||||||
|
|
||||||
#if ARCH_ARM
|
#if ARCH_ARM
|
||||||
ff_float_dsp_init_arm(fdsp);
|
ff_float_dsp_init_arm(fdsp);
|
||||||
|
|
|
@ -35,6 +35,22 @@ typedef struct AVFloatDSPContext {
|
||||||
*/
|
*/
|
||||||
void (*vector_fmul)(float *dst, const float *src0, const float *src1,
|
void (*vector_fmul)(float *dst, const float *src0, const float *src1,
|
||||||
int len);
|
int len);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Multiply a vector of floats by a scalar float and add to
|
||||||
|
* destination vector. Source and destination vectors must
|
||||||
|
* overlap exactly or not at all.
|
||||||
|
*
|
||||||
|
* @param dst result vector
|
||||||
|
* constraints: 16-byte aligned
|
||||||
|
* @param src input vector
|
||||||
|
* constraints: 16-byte aligned
|
||||||
|
* @param mul scalar value
|
||||||
|
* @param len length of vector
|
||||||
|
* constraints: multiple of 4
|
||||||
|
*/
|
||||||
|
void (*vector_fmac_scalar)(float *dst, const float *src, float mul,
|
||||||
|
int len);
|
||||||
} AVFloatDSPContext;
|
} AVFloatDSPContext;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Add table
Reference in a new issue