diff --git a/libavcodec/arm/vp9mc_neon.S b/libavcodec/arm/vp9mc_neon.S index bd8cda7c30..2ec729bb31 100644 --- a/libavcodec/arm/vp9mc_neon.S +++ b/libavcodec/arm/vp9mc_neon.S @@ -279,11 +279,13 @@ function \type\()_8tap_\size\()h_\idx1\idx2 sub r1, r1, r5 .endif @ size >= 16 loads two qwords and increments r2, - @ for size 4/8 it's enough with one qword and no - @ postincrement + @ size 4 loads 1 d word, increments r2 and loads 1 32-bit lane + @ for size 8 it's enough with one qword and no postincrement .if \size >= 16 sub r3, r3, r5 sub r3, r3, #8 +.elseif \size == 4 + sub r3, r3, #8 .endif @ Load the filter vector vld1.16 {q0}, [r12,:128] @@ -295,9 +297,14 @@ function \type\()_8tap_\size\()h_\idx1\idx2 .if \size >= 16 vld1.8 {d18, d19, d20}, [r2]! vld1.8 {d24, d25, d26}, [r7]! -.else +.elseif \size == 8 vld1.8 {q9}, [r2] vld1.8 {q12}, [r7] +.else @ size == 4 + vld1.8 {d18}, [r2]! + vld1.8 {d24}, [r7]! + vld1.32 {d19[0]}, [r2] + vld1.32 {d25[0]}, [r7] .endif vmovl.u8 q8, d18 vmovl.u8 q9, d19