forked from FFmpeg/FFmpeg
swscale: split yuv2packedX_altivec in smaller functions.
This will likely lead to a considerable performance boost, since it removes a branch from the inner loop. Part of the Great Evil Plan to simplify swscale.
This commit is contained in:
parent
0d994b2f45
commit
dc179ec819
3 changed files with 46 additions and 14 deletions
|
@ -414,10 +414,14 @@ void ff_sws_init_swScale_altivec(SwsContext *c)
|
||||||
|
|
||||||
/* The following list of supported dstFormat values should
|
/* The following list of supported dstFormat values should
|
||||||
* match what's found in the body of ff_yuv2packedX_altivec() */
|
* match what's found in the body of ff_yuv2packedX_altivec() */
|
||||||
if (!(c->flags & (SWS_BITEXACT | SWS_FULL_CHR_H_INT)) && !c->alpPixBuf &&
|
if (!(c->flags & (SWS_BITEXACT | SWS_FULL_CHR_H_INT)) && !c->alpPixBuf) {
|
||||||
(c->dstFormat==PIX_FMT_ABGR || c->dstFormat==PIX_FMT_BGRA ||
|
switch (c->dstFormat) {
|
||||||
c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
|
case PIX_FMT_ABGR: c->yuv2packedX = ff_yuv2abgr_X_altivec; break;
|
||||||
c->dstFormat==PIX_FMT_RGBA || c->dstFormat==PIX_FMT_ARGB)) {
|
case PIX_FMT_BGRA: c->yuv2packedX = ff_yuv2bgra_X_altivec; break;
|
||||||
c->yuv2packedX = ff_yuv2packedX_altivec;
|
case PIX_FMT_ARGB: c->yuv2packedX = ff_yuv2argb_X_altivec; break;
|
||||||
|
case PIX_FMT_RGBA: c->yuv2packedX = ff_yuv2rgba_X_altivec; break;
|
||||||
|
case PIX_FMT_BGR24: c->yuv2packedX = ff_yuv2bgr24_X_altivec; break;
|
||||||
|
case PIX_FMT_RGB24: c->yuv2packedX = ff_yuv2rgb24_X_altivec; break;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -626,13 +626,13 @@ void ff_yuv2rgb_init_tables_altivec(SwsContext *c, const int inv_table[4], int b
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
static av_always_inline void
|
||||||
ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
|
ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
|
||||||
const int16_t **lumSrc, int lumFilterSize,
|
const int16_t **lumSrc, int lumFilterSize,
|
||||||
const int16_t *chrFilter, const int16_t **chrUSrc,
|
const int16_t *chrFilter, const int16_t **chrUSrc,
|
||||||
const int16_t **chrVSrc, int chrFilterSize,
|
const int16_t **chrVSrc, int chrFilterSize,
|
||||||
const int16_t **alpSrc, uint8_t *dest,
|
const int16_t **alpSrc, uint8_t *dest,
|
||||||
int dstW, int dstY)
|
int dstW, int dstY, enum PixelFormat target)
|
||||||
{
|
{
|
||||||
int i,j;
|
int i,j;
|
||||||
vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
|
vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
|
||||||
|
@ -706,7 +706,7 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
|
||||||
G = vec_packclp (G0,G1);
|
G = vec_packclp (G0,G1);
|
||||||
B = vec_packclp (B0,B1);
|
B = vec_packclp (B0,B1);
|
||||||
|
|
||||||
switch(c->dstFormat) {
|
switch(target) {
|
||||||
case PIX_FMT_ABGR: out_abgr (R,G,B,out); break;
|
case PIX_FMT_ABGR: out_abgr (R,G,B,out); break;
|
||||||
case PIX_FMT_BGRA: out_bgra (R,G,B,out); break;
|
case PIX_FMT_BGRA: out_bgra (R,G,B,out); break;
|
||||||
case PIX_FMT_RGBA: out_rgba (R,G,B,out); break;
|
case PIX_FMT_RGBA: out_rgba (R,G,B,out); break;
|
||||||
|
@ -785,7 +785,7 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
|
||||||
B = vec_packclp (B0,B1);
|
B = vec_packclp (B0,B1);
|
||||||
|
|
||||||
nout = (vector unsigned char *)scratch;
|
nout = (vector unsigned char *)scratch;
|
||||||
switch(c->dstFormat) {
|
switch(target) {
|
||||||
case PIX_FMT_ABGR: out_abgr (R,G,B,nout); break;
|
case PIX_FMT_ABGR: out_abgr (R,G,B,nout); break;
|
||||||
case PIX_FMT_BGRA: out_bgra (R,G,B,nout); break;
|
case PIX_FMT_BGRA: out_bgra (R,G,B,nout); break;
|
||||||
case PIX_FMT_RGBA: out_rgba (R,G,B,nout); break;
|
case PIX_FMT_RGBA: out_rgba (R,G,B,nout); break;
|
||||||
|
@ -803,3 +803,23 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define YUV2PACKEDX_WRAPPER(suffix, pixfmt) \
|
||||||
|
void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, const int16_t *lumFilter, \
|
||||||
|
const int16_t **lumSrc, int lumFilterSize, \
|
||||||
|
const int16_t *chrFilter, const int16_t **chrUSrc, \
|
||||||
|
const int16_t **chrVSrc, int chrFilterSize, \
|
||||||
|
const int16_t **alpSrc, uint8_t *dest, \
|
||||||
|
int dstW, int dstY) \
|
||||||
|
{ \
|
||||||
|
ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize, \
|
||||||
|
chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
|
||||||
|
alpSrc, dest, dstW, dstY, pixfmt); \
|
||||||
|
}
|
||||||
|
|
||||||
|
YUV2PACKEDX_WRAPPER(abgr, PIX_FMT_ABGR);
|
||||||
|
YUV2PACKEDX_WRAPPER(bgra, PIX_FMT_BGRA);
|
||||||
|
YUV2PACKEDX_WRAPPER(argb, PIX_FMT_ARGB);
|
||||||
|
YUV2PACKEDX_WRAPPER(rgba, PIX_FMT_RGBA);
|
||||||
|
YUV2PACKEDX_WRAPPER(rgb24, PIX_FMT_RGB24);
|
||||||
|
YUV2PACKEDX_WRAPPER(bgr24, PIX_FMT_BGR24);
|
||||||
|
|
|
@ -24,11 +24,19 @@
|
||||||
#ifndef PPC_YUV2RGB_ALTIVEC_H
|
#ifndef PPC_YUV2RGB_ALTIVEC_H
|
||||||
#define PPC_YUV2RGB_ALTIVEC_H 1
|
#define PPC_YUV2RGB_ALTIVEC_H 1
|
||||||
|
|
||||||
void ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
|
#define YUV2PACKEDX_HEADER(suffix) \
|
||||||
const int16_t **lumSrc, int lumFilterSize,
|
void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, const int16_t *lumFilter, \
|
||||||
const int16_t *chrFilter, const int16_t **chrUSrc,
|
const int16_t **lumSrc, int lumFilterSize, \
|
||||||
const int16_t **chrVSrc, int chrFilterSize,
|
const int16_t *chrFilter, const int16_t **chrUSrc, \
|
||||||
const int16_t **alpSrc, uint8_t *dest,
|
const int16_t **chrVSrc, int chrFilterSize, \
|
||||||
|
const int16_t **alpSrc, uint8_t *dest, \
|
||||||
int dstW, int dstY);
|
int dstW, int dstY);
|
||||||
|
|
||||||
|
YUV2PACKEDX_HEADER(abgr);
|
||||||
|
YUV2PACKEDX_HEADER(bgra);
|
||||||
|
YUV2PACKEDX_HEADER(argb);
|
||||||
|
YUV2PACKEDX_HEADER(rgba);
|
||||||
|
YUV2PACKEDX_HEADER(rgb24);
|
||||||
|
YUV2PACKEDX_HEADER(bgr24);
|
||||||
|
|
||||||
#endif /* PPC_YUV2RGB_ALTIVEC_H */
|
#endif /* PPC_YUV2RGB_ALTIVEC_H */
|
||||||
|
|
Loading…
Add table
Reference in a new issue