diff --git a/configure b/configure index c9eedaf974..865851d9c1 100755 --- a/configure +++ b/configure @@ -3064,7 +3064,6 @@ fi if enabled small; then add_cflags $size_cflags - optimizations="small" elif enabled optimizations; then add_cflags $speed_cflags else @@ -3204,6 +3203,7 @@ if enabled sparc; then fi echo "debug symbols ${debug-no}" echo "strip symbols ${stripping-no}" +echo "optimize for size ${small-no}" echo "optimizations ${optimizations-no}" echo "static ${static-no}" echo "shared ${shared-no}" diff --git a/libavcodec/h264.c b/libavcodec/h264.c index 5170a559b0..aed5219649 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -1682,7 +1682,7 @@ static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, int mb_ty uint64_t tr_high; if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){ const int topright_avail= (h->topright_samples_available<mb_y || linesize <= block_offset[i]); if(!topright_avail){ if (pixel_shift) { tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL; diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index de57b58d00..7579cb16ae 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -15,6 +15,7 @@ YASM-OBJS-$(CONFIG_H264DSP) += x86/h264_deblock.o \ x86/h264_idct.o \ x86/h264_idct_10bit.o \ x86/h264_weight.o \ + x86/h264_weight_10bit.o \ YASM-OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred.o \ x86/h264_intrapred_10bit.o diff --git a/libavcodec/x86/h264_weight_10bit.asm b/libavcodec/x86/h264_weight_10bit.asm new file mode 100644 index 0000000000..1c58d72d94 --- /dev/null +++ b/libavcodec/x86/h264_weight_10bit.asm @@ -0,0 +1,321 @@ +;***************************************************************************** +;* MMX/SSE2/AVX-optimized 10-bit H.264 weighted prediction code +;***************************************************************************** +;* Copyright (C) 2005-2011 x264 project +;* +;* Authors: Daniel Kang +;* +;* This file is part of Libav. +;* +;* Libav is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* Libav is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with Libav; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "x86inc.asm" +%include "x86util.asm" + +SECTION_RODATA 32 + +pw_pixel_max: times 8 dw ((1 << 10)-1) +sq_1: dq 1 + dq 0 + +cextern pw_1 + +SECTION .text + +;----------------------------------------------------------------------------- +; void h264_weight(uint8_t *dst, int stride, int log2_denom, +; int weight, int offset); +;----------------------------------------------------------------------------- +%ifdef ARCH_X86_32 +DECLARE_REG_TMP 2 +%else +DECLARE_REG_TMP 10 +%endif + +%macro WEIGHT_PROLOGUE 1 + mov t0, %1 +.prologue + PROLOGUE 0,5,8 + movifnidn r0, r0mp + movifnidn r1d, r1m + movifnidn r3d, r3m + movifnidn r4d, r4m +%endmacro + +%macro WEIGHT_SETUP 1 + mova m0, [pw_1] + movd m2, r2m + pslld m0, m2 ; 1<h264_idct8_add4 = ff_h264_idct8_add4_10_sse2; #endif + c->weight_h264_pixels_tab[0] = ff_h264_weight_16x16_10_sse2; + c->weight_h264_pixels_tab[1] = ff_h264_weight_16x8_10_sse2; + c->weight_h264_pixels_tab[2] = ff_h264_weight_8x16_10_sse2; + c->weight_h264_pixels_tab[3] = ff_h264_weight_8x8_10_sse2; + c->weight_h264_pixels_tab[4] = ff_h264_weight_8x4_10_sse2; + c->weight_h264_pixels_tab[5] = ff_h264_weight_4x8_10_sse2; + c->weight_h264_pixels_tab[6] = ff_h264_weight_4x4_10_sse2; + c->weight_h264_pixels_tab[7] = ff_h264_weight_4x2_10_sse2; + + c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16x16_10_sse2; + c->biweight_h264_pixels_tab[1] = ff_h264_biweight_16x8_10_sse2; + c->biweight_h264_pixels_tab[2] = ff_h264_biweight_8x16_10_sse2; + c->biweight_h264_pixels_tab[3] = ff_h264_biweight_8x8_10_sse2; + c->biweight_h264_pixels_tab[4] = ff_h264_biweight_8x4_10_sse2; + c->biweight_h264_pixels_tab[5] = ff_h264_biweight_4x8_10_sse2; + c->biweight_h264_pixels_tab[6] = ff_h264_biweight_4x4_10_sse2; + c->biweight_h264_pixels_tab[7] = ff_h264_biweight_4x2_10_sse2; + c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_sse2; c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_sse2; #if HAVE_ALIGNED_STACK @@ -463,6 +505,25 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2; #endif } + if (mm_flags&AV_CPU_FLAG_SSE4) { + c->weight_h264_pixels_tab[0] = ff_h264_weight_16x16_10_sse4; + c->weight_h264_pixels_tab[1] = ff_h264_weight_16x8_10_sse4; + c->weight_h264_pixels_tab[2] = ff_h264_weight_8x16_10_sse4; + c->weight_h264_pixels_tab[3] = ff_h264_weight_8x8_10_sse4; + c->weight_h264_pixels_tab[4] = ff_h264_weight_8x4_10_sse4; + c->weight_h264_pixels_tab[5] = ff_h264_weight_4x8_10_sse4; + c->weight_h264_pixels_tab[6] = ff_h264_weight_4x4_10_sse4; + c->weight_h264_pixels_tab[7] = ff_h264_weight_4x2_10_sse4; + + c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16x16_10_sse4; + c->biweight_h264_pixels_tab[1] = ff_h264_biweight_16x8_10_sse4; + c->biweight_h264_pixels_tab[2] = ff_h264_biweight_8x16_10_sse4; + c->biweight_h264_pixels_tab[3] = ff_h264_biweight_8x8_10_sse4; + c->biweight_h264_pixels_tab[4] = ff_h264_biweight_8x4_10_sse4; + c->biweight_h264_pixels_tab[5] = ff_h264_biweight_4x8_10_sse4; + c->biweight_h264_pixels_tab[6] = ff_h264_biweight_4x4_10_sse4; + c->biweight_h264_pixels_tab[7] = ff_h264_biweight_4x2_10_sse4; + } #if HAVE_AVX if (mm_flags&AV_CPU_FLAG_AVX) { c->h264_idct_dc_add = diff --git a/libavformat/isom.c b/libavformat/isom.c index 33a448da34..09ee23bdfd 100644 --- a/libavformat/isom.c +++ b/libavformat/isom.c @@ -246,7 +246,7 @@ const AVCodecTag codec_movaudio_tags[] = { { CODEC_ID_AC3, MKTAG('a', 'c', '-', '3') }, /* ETSI TS 102 366 Annex F */ { CODEC_ID_AC3, MKTAG('s', 'a', 'c', '3') }, /* Nero Recode */ { CODEC_ID_DTS, MKTAG('d', 't', 's', 'c') }, /* mp4ra.org */ - { CODEC_ID_DTS, MKTAG('D', 'T', 'S', ' ') }, /* non standard */ + { CODEC_ID_DTS, MKTAG('D', 'T', 'S', ' ') }, /* non-standard */ { CODEC_ID_AMR_NB, MKTAG('s', 'a', 'm', 'r') }, /* AMR-NB 3gp */ { CODEC_ID_AMR_WB, MKTAG('s', 'a', 'w', 'b') }, /* AMR-WB 3gp */ diff --git a/libswscale/bfin/swscale_bfin.c b/libswscale/bfin/swscale_bfin.c index 4b26ba67c2..870636ea05 100644 --- a/libswscale/bfin/swscale_bfin.c +++ b/libswscale/bfin/swscale_bfin.c @@ -78,8 +78,6 @@ static int yuyvtoyv12_unscaled(SwsContext *c, uint8_t* src[], int srcStride[], i void ff_bfin_get_unscaled_swscale(SwsContext *c) { - SwsFunc swScale = c->swScale; - if (c->dstFormat == PIX_FMT_YUV420P && c->srcFormat == PIX_FMT_UYVY422) { av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized uyvytoyv12_unscaled\n"); c->swScale = uyvytoyv12_unscaled; diff --git a/tests/codec-regression.sh b/tests/codec-regression.sh index b210231aea..981cc2f74c 100755 --- a/tests/codec-regression.sh +++ b/tests/codec-regression.sh @@ -11,8 +11,6 @@ set -e eval do_$test=y -rm -f "$logfile" - # generate reference for quality check if [ -n "$do_vref" ]; then do_ffmpeg $raw_ref -f image2 -vcodec pgmyuv -i $raw_src -an -f rawvideo @@ -280,14 +278,14 @@ fi if [ -n "$do_mp2" ] ; then do_audio_encoding mp2.mp2 do_audio_decoding -$tiny_psnr $pcm_dst $pcm_ref 2 1924 >> $logfile +$tiny_psnr $pcm_dst $pcm_ref 2 1924 fi if [ -n "$do_ac3_fixed" ] ; then do_audio_encoding ac3.rm "-vn -acodec ac3_fixed" # binaries configured with --disable-sse decode ac3 differently #do_audio_decoding -#$tiny_psnr $pcm_dst $pcm_ref 2 1024 >> $logfile +#$tiny_psnr $pcm_dst $pcm_ref 2 1024 fi if [ -n "$do_g726" ] ; then @@ -333,12 +331,12 @@ fi if [ -n "$do_wmav1" ] ; then do_audio_encoding wmav1.asf "-acodec wmav1" do_ffmpeg_nomd5 $pcm_dst $DEC_OPTS -i $target_path/$file -f wav -$tiny_psnr $pcm_dst $pcm_ref 2 8192 >> $logfile +$tiny_psnr $pcm_dst $pcm_ref 2 8192 fi if [ -n "$do_wmav2" ] ; then do_audio_encoding wmav2.asf "-acodec wmav2" do_ffmpeg_nomd5 $pcm_dst $DEC_OPTS -i $target_path/$file -f wav -$tiny_psnr $pcm_dst $pcm_ref 2 8192 >> $logfile +$tiny_psnr $pcm_dst $pcm_ref 2 8192 fi #if [ -n "$do_vorbis" ] ; then diff --git a/tests/fate-run.sh b/tests/fate-run.sh index 366145d22f..2c744b880f 100755 --- a/tests/fate-run.sh +++ b/tests/fate-run.sh @@ -76,9 +76,6 @@ pcm(){ regtest(){ t="${test#$2-}" ref=${base}/ref/$2/$t - cleanfiles="$cleanfiles $outfile $errfile" - outfile=tests/data/regression/$2/$t - errfile=tests/data/$t.$2.err ${base}/${1}-regression.sh $t $2 $3 "$target_exec" "$target_path" "$threads" "$thread_type" } diff --git a/tests/lavf-regression.sh b/tests/lavf-regression.sh index 39e752b3c6..d7e684032c 100755 --- a/tests/lavf-regression.sh +++ b/tests/lavf-regression.sh @@ -31,9 +31,9 @@ do_image_formats() mkdir -p "$outfile" file=${outfile}%02d.$1 run_ffmpeg $DEC_OPTS -f image2 -vcodec pgmyuv -i $raw_src $2 $ENC_OPTS $3 -t 0.5 -y -qscale 10 $target_path/$file - do_md5sum ${outfile}02.$1 >> $logfile + do_md5sum ${outfile}02.$1 do_ffmpeg_crc $file $DEC_OPTS $3 -i $target_path/$file - wc -c ${outfile}02.$1 >> $logfile + wc -c ${outfile}02.$1 } do_audio_only() @@ -43,8 +43,6 @@ do_audio_only() do_ffmpeg_crc $file $DEC_OPTS $4 -i $target_path/$file } -rm -f "$logfile" - if [ -n "$do_avi" ] ; then do_lavf avi fi diff --git a/tests/lavfi-regression.sh b/tests/lavfi-regression.sh index 0322134163..a5a06e1d00 100755 --- a/tests/lavfi-regression.sh +++ b/tests/lavfi-regression.sh @@ -11,15 +11,13 @@ set -e eval do_$test=y -rm -f "$logfile" - do_video_filter() { label=$1 filters=$2 shift 2 - printf '%-20s' $label >>$logfile + printf '%-20s' $label run_ffmpeg $DEC_OPTS -f image2 -vcodec pgmyuv -i $raw_src \ - $ENC_OPTS -vf "$filters" -vcodec rawvideo $* -f nut md5: >>$logfile + $ENC_OPTS -vf "$filters" -vcodec rawvideo $* -f nut md5: } do_lavfi() { diff --git a/tests/regression-funcs.sh b/tests/regression-funcs.sh index e57cdf111e..979157bcf9 100755 --- a/tests/regression-funcs.sh +++ b/tests/regression-funcs.sh @@ -15,10 +15,7 @@ datadir="./tests/data" target_datadir="${target_path}/${datadir}" this="$test.$test_ref" -logdir="$datadir/regression/$test_ref" -logfile="$logdir/$test" outfile="$datadir/$test_ref/" -errfile="$datadir/$this.err" # various files ffmpeg="$target_exec ${target_path}/ffmpeg" @@ -37,12 +34,8 @@ trap 'rm -f -- $cleanfiles' EXIT mkdir -p "$datadir" mkdir -p "$outfile" -mkdir -p "$logdir" - -(exec >&3) 2>/dev/null || exec 3>&2 [ "${V-0}" -gt 0 ] && echov=echov || echov=: -[ "${V-0}" -gt 1 ] || exec 2>$errfile echov(){ echo "$@" >&3 @@ -67,13 +60,13 @@ do_ffmpeg() shift set -- $* ${target_path}/$f run_ffmpeg $* - do_md5sum $f >> $logfile + do_md5sum $f if [ $f = $raw_dst ] ; then - $tiny_psnr $f $raw_ref >> $logfile + $tiny_psnr $f $raw_ref elif [ $f = $pcm_dst ] ; then - $tiny_psnr $f $pcm_ref 2 >> $logfile + $tiny_psnr $f $pcm_ref 2 else - wc -c $f >> $logfile + wc -c $f fi } @@ -84,11 +77,11 @@ do_ffmpeg_nomd5() set -- $* ${target_path}/$f run_ffmpeg $* if [ $f = $raw_dst ] ; then - $tiny_psnr $f $raw_ref >> $logfile + $tiny_psnr $f $raw_ref elif [ $f = $pcm_dst ] ; then - $tiny_psnr $f $pcm_ref 2 >> $logfile + $tiny_psnr $f $pcm_ref 2 else - wc -c $f >> $logfile + wc -c $f fi } @@ -97,7 +90,7 @@ do_ffmpeg_crc() f="$1" shift run_ffmpeg $* -f crc "$target_crcfile" - echo "$f $(cat $crcfile)" >> $logfile + echo "$f $(cat $crcfile)" } do_video_decoding()