Mercurial > libavcodec.hg
diff x86/vp8dsp.asm @ 12340:2d15f62f4f8a libavcodec
VP8: move zeroing of luma DC block into the WHT
Lets us do the zeroing in asm instead of C.
Also makes it consistent with the way the regular iDCT code does it.
| author | darkshikari |
|---|---|
| date | Mon, 02 Aug 2010 20:18:09 +0000 |
| parents | 435319d67bd8 |
| children | 4f13b2ded34d |
line wrap: on
line diff
--- a/x86/vp8dsp.asm Mon Aug 02 09:44:53 2010 +0000 +++ b/x86/vp8dsp.asm Mon Aug 02 20:18:09 2010 +0000 @@ -1186,12 +1186,23 @@ SWAP %1, %4, %3 %endmacro -INIT_MMX -cglobal vp8_luma_dc_wht_mmx, 2,3 +%macro VP8_DC_WHT 1 +cglobal vp8_luma_dc_wht_%1, 2,3 movq m0, [r1] movq m1, [r1+8] movq m2, [r1+16] movq m3, [r1+24] +%ifidn %1, sse + xorps xmm0, xmm0 + movaps [r1+ 0], xmm0 + movaps [r1+16], xmm0 +%else + pxor m4, m4 + movq [r1+ 0], m4 + movq [r1+ 8], m4 + movq [r1+16], m4 + movq [r1+24], m4 +%endif HADAMARD4_1D 0, 1, 2, 3 TRANSPOSE4x4W 0, 1, 2, 3, 4 paddw m0, [pw_3] @@ -1203,6 +1214,11 @@ SCATTER_WHT 0, 1, 0 SCATTER_WHT 2, 3, 2 RET +%endmacro + +INIT_MMX +VP8_DC_WHT mmx +VP8_DC_WHT sse ;----------------------------------------------------------------------------- ; void vp8_h/v_loop_filter_simple_<opt>(uint8_t *dst, int stride, int flim);
