fixed a bug in the tmp buffer
fixed the color range for yuv fixed the width %8!=0 bug (another 1% speed loss) Originally committed as revision 2286 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
This commit is contained in:
parent
0f25d72b3f
commit
658a85f26f
3 changed files with 103 additions and 8 deletions
|
@ -122,7 +122,7 @@ static uint64_t temp3=0;
|
||||||
static uint64_t temp4=0;
|
static uint64_t temp4=0;
|
||||||
static uint64_t temp5=0;
|
static uint64_t temp5=0;
|
||||||
static uint64_t pQPb=0;
|
static uint64_t pQPb=0;
|
||||||
static uint8_t tempBlock[16*16];
|
static uint8_t tempBlock[16*16]; //used so the horizontal code gets aligned data
|
||||||
|
|
||||||
int hFlatnessThreshold= 56 - 16;
|
int hFlatnessThreshold= 56 - 16;
|
||||||
int vFlatnessThreshold= 56 - 16;
|
int vFlatnessThreshold= 56 - 16;
|
||||||
|
@ -132,7 +132,7 @@ double maxClippedThreshold= 0.01;
|
||||||
|
|
||||||
int maxAllowedY=255;
|
int maxAllowedY=255;
|
||||||
//FIXME can never make a movie´s black brighter (anyone needs that?)
|
//FIXME can never make a movie´s black brighter (anyone needs that?)
|
||||||
int minAllowedY=0;
|
int minAllowedY=16;
|
||||||
|
|
||||||
#ifdef TIMING
|
#ifdef TIMING
|
||||||
static inline long long rdtsc()
|
static inline long long rdtsc()
|
||||||
|
@ -2398,6 +2398,13 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
|
||||||
static uint8_t *tempDst= NULL;
|
static uint8_t *tempDst= NULL;
|
||||||
static uint8_t *tempSrc= NULL;
|
static uint8_t *tempSrc= NULL;
|
||||||
|
|
||||||
|
/* Temporary buffers for handling the last block */
|
||||||
|
static uint8_t *tempDstBlock= NULL;
|
||||||
|
static uint8_t *tempSrcBlock= NULL;
|
||||||
|
|
||||||
|
uint8_t *dstBlockPtrBackup;
|
||||||
|
uint8_t *srcBlockPtrBackup;
|
||||||
|
|
||||||
#ifdef TIMING
|
#ifdef TIMING
|
||||||
long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0;
|
long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0;
|
||||||
sumTime= rdtsc();
|
sumTime= rdtsc();
|
||||||
|
@ -2407,6 +2414,8 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
|
||||||
{
|
{
|
||||||
tempDst= (uint8_t*)memalign(8, 1024*24);
|
tempDst= (uint8_t*)memalign(8, 1024*24);
|
||||||
tempSrc= (uint8_t*)memalign(8, 1024*24);
|
tempSrc= (uint8_t*)memalign(8, 1024*24);
|
||||||
|
tempDstBlock= (uint8_t*)memalign(8, 1024*24);
|
||||||
|
tempSrcBlock= (uint8_t*)memalign(8, 1024*24);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!yHistogram)
|
if(!yHistogram)
|
||||||
|
@ -2414,6 +2423,12 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
|
||||||
int i;
|
int i;
|
||||||
yHistogram= (uint64_t*)malloc(8*256);
|
yHistogram= (uint64_t*)malloc(8*256);
|
||||||
for(i=0; i<256; i++) yHistogram[i]= width*height/64*15/256;
|
for(i=0; i<256; i++) yHistogram[i]= width*height/64*15/256;
|
||||||
|
|
||||||
|
if(mode & FULL_Y_RANGE)
|
||||||
|
{
|
||||||
|
maxAllowedY=255;
|
||||||
|
minAllowedY=0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!isColor)
|
if(!isColor)
|
||||||
|
@ -2505,6 +2520,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
|
||||||
srcBlock= tempSrc;
|
srcBlock= tempSrc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// From this point on it is guranteed that we can read and write 16 lines downward
|
||||||
// finish 1 block before the next otherwise we´ll might have a problem
|
// finish 1 block before the next otherwise we´ll might have a problem
|
||||||
// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
|
// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
|
||||||
for(x=0; x<width; x+=BLOCK_SIZE)
|
for(x=0; x<width; x+=BLOCK_SIZE)
|
||||||
|
@ -2545,6 +2561,23 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
|
||||||
|
|
||||||
if(!isColor) yHistogram[ srcBlock[srcStride*5] ]++;
|
if(!isColor) yHistogram[ srcBlock[srcStride*5] ]++;
|
||||||
|
|
||||||
|
//can we mess with a 8x16 block, if not use a temp buffer, yes again
|
||||||
|
if(x+7 >= width)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
dstBlockPtrBackup= dstBlock;
|
||||||
|
srcBlockPtrBackup= srcBlock;
|
||||||
|
|
||||||
|
for(i=0;i<BLOCK_SIZE*2; i++)
|
||||||
|
{
|
||||||
|
memcpy(tempSrcBlock+i*srcStride, srcBlock+i*srcStride, width-x);
|
||||||
|
memcpy(tempDstBlock+i*dstStride, dstBlock+i*dstStride, width-x);
|
||||||
|
}
|
||||||
|
|
||||||
|
dstBlock= tempDstBlock;
|
||||||
|
srcBlock= tempSrcBlock;
|
||||||
|
}
|
||||||
|
|
||||||
blockCopy(dstBlock + dstStride*5, dstStride,
|
blockCopy(dstBlock + dstStride*5, dstStride,
|
||||||
srcBlock + srcStride*5, srcStride, 8, mode & LEVEL_FIX);
|
srcBlock + srcStride*5, srcStride, 8, mode & LEVEL_FIX);
|
||||||
|
|
||||||
|
@ -2593,7 +2626,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
|
||||||
}
|
}
|
||||||
|
|
||||||
/* check if we have a previous block to deblock it with dstBlock */
|
/* check if we have a previous block to deblock it with dstBlock */
|
||||||
if(x - 8 >= 0 && x<width)
|
if(x - 8 >= 0)
|
||||||
{
|
{
|
||||||
#ifdef MORE_TIMING
|
#ifdef MORE_TIMING
|
||||||
T0= rdtsc();
|
T0= rdtsc();
|
||||||
|
@ -2624,12 +2657,25 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
|
||||||
dering(dstBlock - stride*9 + width-9, stride, QP);
|
dering(dstBlock - stride*9 + width-9, stride, QP);
|
||||||
//FIXME dering filter will not be applied to last block (bottom right)
|
//FIXME dering filter will not be applied to last block (bottom right)
|
||||||
|
|
||||||
|
/* did we use a tmp-block buffer */
|
||||||
|
if(x+7 >= width)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
dstBlock= dstBlockPtrBackup;
|
||||||
|
srcBlock= srcBlockPtrBackup;
|
||||||
|
|
||||||
|
for(i=0;i<BLOCK_SIZE*2; i++)
|
||||||
|
{
|
||||||
|
memcpy(dstBlock+i*dstStride, tempDstBlock+i*dstStride, width-x);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
dstBlock+=8;
|
dstBlock+=8;
|
||||||
srcBlock+=8;
|
srcBlock+=8;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* did we use a tmp buffer */
|
/* did we use a tmp buffer */
|
||||||
if(y+15 > height)
|
if(y+15 >= height)
|
||||||
{
|
{
|
||||||
uint8_t *dstBlock= &(dst[y*dstStride]);
|
uint8_t *dstBlock= &(dst[y*dstStride]);
|
||||||
memcpy(dstBlock, tempDst, dstStride*(height-y) );
|
memcpy(dstBlock, tempDst, dstStride*(height-y) );
|
||||||
|
|
|
@ -46,6 +46,9 @@
|
||||||
#define H_RK1_FILTER 0x1000 // 4096 (not implemented yet)
|
#define H_RK1_FILTER 0x1000 // 4096 (not implemented yet)
|
||||||
#define H_X1_FILTER 0x2000 // 8192
|
#define H_X1_FILTER 0x2000 // 8192
|
||||||
|
|
||||||
|
// select between full y range (255-0) or standart one (
|
||||||
|
#define FULL_Y_RANGE 0x8000 // 32768
|
||||||
|
|
||||||
//Deinterlacing Filters
|
//Deinterlacing Filters
|
||||||
#define LINEAR_IPOL_DEINT_FILTER 0x10000 // 65536
|
#define LINEAR_IPOL_DEINT_FILTER 0x10000 // 65536
|
||||||
#define LINEAR_BLEND_DEINT_FILTER 0x20000 // 131072
|
#define LINEAR_BLEND_DEINT_FILTER 0x20000 // 131072
|
||||||
|
|
|
@ -122,7 +122,7 @@ static uint64_t temp3=0;
|
||||||
static uint64_t temp4=0;
|
static uint64_t temp4=0;
|
||||||
static uint64_t temp5=0;
|
static uint64_t temp5=0;
|
||||||
static uint64_t pQPb=0;
|
static uint64_t pQPb=0;
|
||||||
static uint8_t tempBlock[16*16];
|
static uint8_t tempBlock[16*16]; //used so the horizontal code gets aligned data
|
||||||
|
|
||||||
int hFlatnessThreshold= 56 - 16;
|
int hFlatnessThreshold= 56 - 16;
|
||||||
int vFlatnessThreshold= 56 - 16;
|
int vFlatnessThreshold= 56 - 16;
|
||||||
|
@ -132,7 +132,7 @@ double maxClippedThreshold= 0.01;
|
||||||
|
|
||||||
int maxAllowedY=255;
|
int maxAllowedY=255;
|
||||||
//FIXME can never make a movie´s black brighter (anyone needs that?)
|
//FIXME can never make a movie´s black brighter (anyone needs that?)
|
||||||
int minAllowedY=0;
|
int minAllowedY=16;
|
||||||
|
|
||||||
#ifdef TIMING
|
#ifdef TIMING
|
||||||
static inline long long rdtsc()
|
static inline long long rdtsc()
|
||||||
|
@ -2398,6 +2398,13 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
|
||||||
static uint8_t *tempDst= NULL;
|
static uint8_t *tempDst= NULL;
|
||||||
static uint8_t *tempSrc= NULL;
|
static uint8_t *tempSrc= NULL;
|
||||||
|
|
||||||
|
/* Temporary buffers for handling the last block */
|
||||||
|
static uint8_t *tempDstBlock= NULL;
|
||||||
|
static uint8_t *tempSrcBlock= NULL;
|
||||||
|
|
||||||
|
uint8_t *dstBlockPtrBackup;
|
||||||
|
uint8_t *srcBlockPtrBackup;
|
||||||
|
|
||||||
#ifdef TIMING
|
#ifdef TIMING
|
||||||
long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0;
|
long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0;
|
||||||
sumTime= rdtsc();
|
sumTime= rdtsc();
|
||||||
|
@ -2407,6 +2414,8 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
|
||||||
{
|
{
|
||||||
tempDst= (uint8_t*)memalign(8, 1024*24);
|
tempDst= (uint8_t*)memalign(8, 1024*24);
|
||||||
tempSrc= (uint8_t*)memalign(8, 1024*24);
|
tempSrc= (uint8_t*)memalign(8, 1024*24);
|
||||||
|
tempDstBlock= (uint8_t*)memalign(8, 1024*24);
|
||||||
|
tempSrcBlock= (uint8_t*)memalign(8, 1024*24);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!yHistogram)
|
if(!yHistogram)
|
||||||
|
@ -2414,6 +2423,12 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
|
||||||
int i;
|
int i;
|
||||||
yHistogram= (uint64_t*)malloc(8*256);
|
yHistogram= (uint64_t*)malloc(8*256);
|
||||||
for(i=0; i<256; i++) yHistogram[i]= width*height/64*15/256;
|
for(i=0; i<256; i++) yHistogram[i]= width*height/64*15/256;
|
||||||
|
|
||||||
|
if(mode & FULL_Y_RANGE)
|
||||||
|
{
|
||||||
|
maxAllowedY=255;
|
||||||
|
minAllowedY=0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!isColor)
|
if(!isColor)
|
||||||
|
@ -2505,6 +2520,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
|
||||||
srcBlock= tempSrc;
|
srcBlock= tempSrc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// From this point on it is guranteed that we can read and write 16 lines downward
|
||||||
// finish 1 block before the next otherwise we´ll might have a problem
|
// finish 1 block before the next otherwise we´ll might have a problem
|
||||||
// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
|
// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
|
||||||
for(x=0; x<width; x+=BLOCK_SIZE)
|
for(x=0; x<width; x+=BLOCK_SIZE)
|
||||||
|
@ -2545,6 +2561,23 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
|
||||||
|
|
||||||
if(!isColor) yHistogram[ srcBlock[srcStride*5] ]++;
|
if(!isColor) yHistogram[ srcBlock[srcStride*5] ]++;
|
||||||
|
|
||||||
|
//can we mess with a 8x16 block, if not use a temp buffer, yes again
|
||||||
|
if(x+7 >= width)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
dstBlockPtrBackup= dstBlock;
|
||||||
|
srcBlockPtrBackup= srcBlock;
|
||||||
|
|
||||||
|
for(i=0;i<BLOCK_SIZE*2; i++)
|
||||||
|
{
|
||||||
|
memcpy(tempSrcBlock+i*srcStride, srcBlock+i*srcStride, width-x);
|
||||||
|
memcpy(tempDstBlock+i*dstStride, dstBlock+i*dstStride, width-x);
|
||||||
|
}
|
||||||
|
|
||||||
|
dstBlock= tempDstBlock;
|
||||||
|
srcBlock= tempSrcBlock;
|
||||||
|
}
|
||||||
|
|
||||||
blockCopy(dstBlock + dstStride*5, dstStride,
|
blockCopy(dstBlock + dstStride*5, dstStride,
|
||||||
srcBlock + srcStride*5, srcStride, 8, mode & LEVEL_FIX);
|
srcBlock + srcStride*5, srcStride, 8, mode & LEVEL_FIX);
|
||||||
|
|
||||||
|
@ -2593,7 +2626,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
|
||||||
}
|
}
|
||||||
|
|
||||||
/* check if we have a previous block to deblock it with dstBlock */
|
/* check if we have a previous block to deblock it with dstBlock */
|
||||||
if(x - 8 >= 0 && x<width)
|
if(x - 8 >= 0)
|
||||||
{
|
{
|
||||||
#ifdef MORE_TIMING
|
#ifdef MORE_TIMING
|
||||||
T0= rdtsc();
|
T0= rdtsc();
|
||||||
|
@ -2624,12 +2657,25 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
|
||||||
dering(dstBlock - stride*9 + width-9, stride, QP);
|
dering(dstBlock - stride*9 + width-9, stride, QP);
|
||||||
//FIXME dering filter will not be applied to last block (bottom right)
|
//FIXME dering filter will not be applied to last block (bottom right)
|
||||||
|
|
||||||
|
/* did we use a tmp-block buffer */
|
||||||
|
if(x+7 >= width)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
dstBlock= dstBlockPtrBackup;
|
||||||
|
srcBlock= srcBlockPtrBackup;
|
||||||
|
|
||||||
|
for(i=0;i<BLOCK_SIZE*2; i++)
|
||||||
|
{
|
||||||
|
memcpy(dstBlock+i*dstStride, tempDstBlock+i*dstStride, width-x);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
dstBlock+=8;
|
dstBlock+=8;
|
||||||
srcBlock+=8;
|
srcBlock+=8;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* did we use a tmp buffer */
|
/* did we use a tmp buffer */
|
||||||
if(y+15 > height)
|
if(y+15 >= height)
|
||||||
{
|
{
|
||||||
uint8_t *dstBlock= &(dst[y*dstStride]);
|
uint8_t *dstBlock= &(dst[y*dstStride]);
|
||||||
memcpy(dstBlock, tempDst, dstStride*(height-y) );
|
memcpy(dstBlock, tempDst, dstStride*(height-y) );
|
||||||
|
|
Loading…
Add table
Reference in a new issue