comparison libpostproc/postprocess.c @ 166:ec349ac7869b libavcodec

1% speedup
author michael
date Tue, 20 Nov 2001 20:46:39 +0000
parents ea3b49451497
children 2d97f0157a79
comparison
equal deleted inserted replaced
165:ea3b49451497 166:ec349ac7869b
3084 #endif 3084 #endif
3085 if(levelFix) 3085 if(levelFix)
3086 { 3086 {
3087 #ifdef HAVE_MMX 3087 #ifdef HAVE_MMX
3088 asm volatile( 3088 asm volatile(
3089 "leal (%2,%2), %%eax \n\t" 3089 "leal (%0,%2), %%eax \n\t"
3090 "leal (%3,%3), %%ebx \n\t" 3090 "leal (%1,%3), %%ebx \n\t"
3091 "movq packedYOffset, %%mm2 \n\t" 3091 "movq packedYOffset, %%mm2 \n\t"
3092 "movq packedYScale, %%mm3 \n\t" 3092 "movq packedYScale, %%mm3 \n\t"
3093 "pxor %%mm4, %%mm4 \n\t" 3093 "pxor %%mm4, %%mm4 \n\t"
3094 3094
3095 #define SCALED_CPY \ 3095 #define SCALED_CPY(src1, src2, dst1, dst2) \
3096 "movq (%0), %%mm0 \n\t"\ 3096 "movq " #src1 ", %%mm0 \n\t"\
3097 "movq (%0), %%mm5 \n\t"\ 3097 "movq " #src1 ", %%mm5 \n\t"\
3098 "punpcklbw %%mm4, %%mm0 \n\t"\ 3098 "punpcklbw %%mm4, %%mm0 \n\t"\
3099 "punpckhbw %%mm4, %%mm5 \n\t"\ 3099 "punpckhbw %%mm4, %%mm5 \n\t"\
3100 "psubw %%mm2, %%mm0 \n\t"\ 3100 "psubw %%mm2, %%mm0 \n\t"\
3101 "psubw %%mm2, %%mm5 \n\t"\ 3101 "psubw %%mm2, %%mm5 \n\t"\
3102 "movq (%0,%2), %%mm1 \n\t"\ 3102 "movq " #src2 ", %%mm1 \n\t"\
3103 "psllw $6, %%mm0 \n\t"\ 3103 "psllw $6, %%mm0 \n\t"\
3104 "psllw $6, %%mm5 \n\t"\ 3104 "psllw $6, %%mm5 \n\t"\
3105 "pmulhw %%mm3, %%mm0 \n\t"\ 3105 "pmulhw %%mm3, %%mm0 \n\t"\
3106 "movq (%0,%2), %%mm6 \n\t"\ 3106 "movq " #src2 ", %%mm6 \n\t"\
3107 "pmulhw %%mm3, %%mm5 \n\t"\ 3107 "pmulhw %%mm3, %%mm5 \n\t"\
3108 "punpcklbw %%mm4, %%mm1 \n\t"\ 3108 "punpcklbw %%mm4, %%mm1 \n\t"\
3109 "punpckhbw %%mm4, %%mm6 \n\t"\ 3109 "punpckhbw %%mm4, %%mm6 \n\t"\
3110 "psubw %%mm2, %%mm1 \n\t"\ 3110 "psubw %%mm2, %%mm1 \n\t"\
3111 "psubw %%mm2, %%mm6 \n\t"\ 3111 "psubw %%mm2, %%mm6 \n\t"\
3112 "psllw $6, %%mm1 \n\t"\ 3112 "psllw $6, %%mm1 \n\t"\
3113 "psllw $6, %%mm6 \n\t"\ 3113 "psllw $6, %%mm6 \n\t"\
3114 "pmulhw %%mm3, %%mm1 \n\t"\ 3114 "pmulhw %%mm3, %%mm1 \n\t"\
3115 "pmulhw %%mm3, %%mm6 \n\t"\ 3115 "pmulhw %%mm3, %%mm6 \n\t"\
3116 "addl %%eax, %0 \n\t"\
3117 "packuswb %%mm5, %%mm0 \n\t"\ 3116 "packuswb %%mm5, %%mm0 \n\t"\
3118 "packuswb %%mm6, %%mm1 \n\t"\ 3117 "packuswb %%mm6, %%mm1 \n\t"\
3119 "movq %%mm0, (%1) \n\t"\ 3118 "movq %%mm0, " #dst1 " \n\t"\
3120 "movq %%mm1, (%1, %3) \n\t"\ 3119 "movq %%mm1, " #dst2 " \n\t"\
3121 3120
3122 SCALED_CPY 3121 SCALED_CPY((%0) , (%0, %2) , (%1) , (%1, %3))
3123 "addl %%ebx, %1 \n\t" 3122 SCALED_CPY((%0, %2, 2), (%%eax, %2, 2), (%1, %3, 2), (%%ebx, %3, 2))
3124 SCALED_CPY 3123 SCALED_CPY((%0, %2, 4), (%%eax, %2, 4), (%1, %3, 4), (%%ebx, %3, 4))
3125 "addl %%ebx, %1 \n\t" 3124 "leal (%%eax,%2,4), %%eax \n\t"
3126 SCALED_CPY 3125 "leal (%%ebx,%3,4), %%ebx \n\t"
3127 "addl %%ebx, %1 \n\t" 3126 SCALED_CPY((%%eax, %2), (%%eax, %2, 2), (%%ebx, %3), (%%ebx, %3, 2))
3128 SCALED_CPY 3127
3129 3128
3130 : "+r"(src), 3129 : : "r"(src),
3131 "+r"(dst) 3130 "r"(dst),
3132 :"r" (srcStride), 3131 "r" (srcStride),
3133 "r" (dstStride) 3132 "r" (dstStride)
3134 : "%eax", "%ebx" 3133 : "%eax", "%ebx"
3135 ); 3134 );
3136 #else 3135 #else
3137 for(i=0; i<8; i++) 3136 for(i=0; i<8; i++)
3141 } 3140 }
3142 else 3141 else
3143 { 3142 {
3144 #ifdef HAVE_MMX 3143 #ifdef HAVE_MMX
3145 asm volatile( 3144 asm volatile(
3146 "pushl %0 \n\t" 3145 "leal (%0,%2), %%eax \n\t"
3147 "pushl %1 \n\t" 3146 "leal (%1,%3), %%ebx \n\t"
3148 "leal (%2,%2), %%eax \n\t" 3147
3149 "leal (%3,%3), %%ebx \n\t" 3148 #define SIMPLE_CPY(src1, src2, dst1, dst2) \
3150 3149 "movq " #src1 ", %%mm0 \n\t"\
3151 #define SIMPLE_CPY \ 3150 "movq " #src2 ", %%mm1 \n\t"\
3152 "movq (%0), %%mm0 \n\t"\ 3151 "movq %%mm0, " #dst1 " \n\t"\
3153 "movq (%0,%2), %%mm1 \n\t"\ 3152 "movq %%mm1, " #dst2 " \n\t"\
3154 "movq %%mm0, (%1) \n\t"\ 3153
3155 "movq %%mm1, (%1, %3) \n\t"\ 3154 SIMPLE_CPY((%0) , (%0, %2) , (%1) , (%1, %3))
3156 3155 SIMPLE_CPY((%0, %2, 2), (%%eax, %2, 2), (%1, %3, 2), (%%ebx, %3, 2))
3157 SIMPLE_CPY 3156 SIMPLE_CPY((%0, %2, 4), (%%eax, %2, 4), (%1, %3, 4), (%%ebx, %3, 4))
3158 "addl %%eax, %0 \n\t" 3157 "leal (%%eax,%2,4), %%eax \n\t"
3159 "addl %%ebx, %1 \n\t" 3158 "leal (%%ebx,%3,4), %%ebx \n\t"
3160 SIMPLE_CPY 3159 SIMPLE_CPY((%%eax, %2), (%%eax, %2, 2), (%%ebx, %3), (%%ebx, %3, 2))
3161 "addl %%eax, %0 \n\t" 3160
3162 "addl %%ebx, %1 \n\t"
3163 SIMPLE_CPY
3164 "addl %%eax, %0 \n\t"
3165 "addl %%ebx, %1 \n\t"
3166 SIMPLE_CPY
3167
3168 "popl %1 \n\t"
3169 "popl %0 \n\t"
3170 : : "r" (src), 3161 : : "r" (src),
3171 "r" (dst), 3162 "r" (dst),
3172 "r" (srcStride), 3163 "r" (srcStride),
3173 "r" (dstStride) 3164 "r" (dstStride)
3174 : "%eax", "%ebx" 3165 : "%eax", "%ebx"