Mercurial > libavcodec.hg
comparison libpostproc/postprocess.c @ 166:ec349ac7869b libavcodec
1% speedup
| author | michael |
|---|---|
| date | Tue, 20 Nov 2001 20:46:39 +0000 |
| parents | ea3b49451497 |
| children | 2d97f0157a79 |
comparison
equal
deleted
inserted
replaced
| 165:ea3b49451497 | 166:ec349ac7869b |
|---|---|
| 3084 #endif | 3084 #endif |
| 3085 if(levelFix) | 3085 if(levelFix) |
| 3086 { | 3086 { |
| 3087 #ifdef HAVE_MMX | 3087 #ifdef HAVE_MMX |
| 3088 asm volatile( | 3088 asm volatile( |
| 3089 "leal (%2,%2), %%eax \n\t" | 3089 "leal (%0,%2), %%eax \n\t" |
| 3090 "leal (%3,%3), %%ebx \n\t" | 3090 "leal (%1,%3), %%ebx \n\t" |
| 3091 "movq packedYOffset, %%mm2 \n\t" | 3091 "movq packedYOffset, %%mm2 \n\t" |
| 3092 "movq packedYScale, %%mm3 \n\t" | 3092 "movq packedYScale, %%mm3 \n\t" |
| 3093 "pxor %%mm4, %%mm4 \n\t" | 3093 "pxor %%mm4, %%mm4 \n\t" |
| 3094 | 3094 |
| 3095 #define SCALED_CPY \ | 3095 #define SCALED_CPY(src1, src2, dst1, dst2) \ |
| 3096 "movq (%0), %%mm0 \n\t"\ | 3096 "movq " #src1 ", %%mm0 \n\t"\ |
| 3097 "movq (%0), %%mm5 \n\t"\ | 3097 "movq " #src1 ", %%mm5 \n\t"\ |
| 3098 "punpcklbw %%mm4, %%mm0 \n\t"\ | 3098 "punpcklbw %%mm4, %%mm0 \n\t"\ |
| 3099 "punpckhbw %%mm4, %%mm5 \n\t"\ | 3099 "punpckhbw %%mm4, %%mm5 \n\t"\ |
| 3100 "psubw %%mm2, %%mm0 \n\t"\ | 3100 "psubw %%mm2, %%mm0 \n\t"\ |
| 3101 "psubw %%mm2, %%mm5 \n\t"\ | 3101 "psubw %%mm2, %%mm5 \n\t"\ |
| 3102 "movq (%0,%2), %%mm1 \n\t"\ | 3102 "movq " #src2 ", %%mm1 \n\t"\ |
| 3103 "psllw $6, %%mm0 \n\t"\ | 3103 "psllw $6, %%mm0 \n\t"\ |
| 3104 "psllw $6, %%mm5 \n\t"\ | 3104 "psllw $6, %%mm5 \n\t"\ |
| 3105 "pmulhw %%mm3, %%mm0 \n\t"\ | 3105 "pmulhw %%mm3, %%mm0 \n\t"\ |
| 3106 "movq (%0,%2), %%mm6 \n\t"\ | 3106 "movq " #src2 ", %%mm6 \n\t"\ |
| 3107 "pmulhw %%mm3, %%mm5 \n\t"\ | 3107 "pmulhw %%mm3, %%mm5 \n\t"\ |
| 3108 "punpcklbw %%mm4, %%mm1 \n\t"\ | 3108 "punpcklbw %%mm4, %%mm1 \n\t"\ |
| 3109 "punpckhbw %%mm4, %%mm6 \n\t"\ | 3109 "punpckhbw %%mm4, %%mm6 \n\t"\ |
| 3110 "psubw %%mm2, %%mm1 \n\t"\ | 3110 "psubw %%mm2, %%mm1 \n\t"\ |
| 3111 "psubw %%mm2, %%mm6 \n\t"\ | 3111 "psubw %%mm2, %%mm6 \n\t"\ |
| 3112 "psllw $6, %%mm1 \n\t"\ | 3112 "psllw $6, %%mm1 \n\t"\ |
| 3113 "psllw $6, %%mm6 \n\t"\ | 3113 "psllw $6, %%mm6 \n\t"\ |
| 3114 "pmulhw %%mm3, %%mm1 \n\t"\ | 3114 "pmulhw %%mm3, %%mm1 \n\t"\ |
| 3115 "pmulhw %%mm3, %%mm6 \n\t"\ | 3115 "pmulhw %%mm3, %%mm6 \n\t"\ |
| 3116 "addl %%eax, %0 \n\t"\ | |
| 3117 "packuswb %%mm5, %%mm0 \n\t"\ | 3116 "packuswb %%mm5, %%mm0 \n\t"\ |
| 3118 "packuswb %%mm6, %%mm1 \n\t"\ | 3117 "packuswb %%mm6, %%mm1 \n\t"\ |
| 3119 "movq %%mm0, (%1) \n\t"\ | 3118 "movq %%mm0, " #dst1 " \n\t"\ |
| 3120 "movq %%mm1, (%1, %3) \n\t"\ | 3119 "movq %%mm1, " #dst2 " \n\t"\ |
| 3121 | 3120 |
| 3122 SCALED_CPY | 3121 SCALED_CPY((%0) , (%0, %2) , (%1) , (%1, %3)) |
| 3123 "addl %%ebx, %1 \n\t" | 3122 SCALED_CPY((%0, %2, 2), (%%eax, %2, 2), (%1, %3, 2), (%%ebx, %3, 2)) |
| 3124 SCALED_CPY | 3123 SCALED_CPY((%0, %2, 4), (%%eax, %2, 4), (%1, %3, 4), (%%ebx, %3, 4)) |
| 3125 "addl %%ebx, %1 \n\t" | 3124 "leal (%%eax,%2,4), %%eax \n\t" |
| 3126 SCALED_CPY | 3125 "leal (%%ebx,%3,4), %%ebx \n\t" |
| 3127 "addl %%ebx, %1 \n\t" | 3126 SCALED_CPY((%%eax, %2), (%%eax, %2, 2), (%%ebx, %3), (%%ebx, %3, 2)) |
| 3128 SCALED_CPY | 3127 |
| 3129 | 3128 |
| 3130 : "+r"(src), | 3129 : : "r"(src), |
| 3131 "+r"(dst) | 3130 "r"(dst), |
| 3132 :"r" (srcStride), | 3131 "r" (srcStride), |
| 3133 "r" (dstStride) | 3132 "r" (dstStride) |
| 3134 : "%eax", "%ebx" | 3133 : "%eax", "%ebx" |
| 3135 ); | 3134 ); |
| 3136 #else | 3135 #else |
| 3137 for(i=0; i<8; i++) | 3136 for(i=0; i<8; i++) |
| 3141 } | 3140 } |
| 3142 else | 3141 else |
| 3143 { | 3142 { |
| 3144 #ifdef HAVE_MMX | 3143 #ifdef HAVE_MMX |
| 3145 asm volatile( | 3144 asm volatile( |
| 3146 "pushl %0 \n\t" | 3145 "leal (%0,%2), %%eax \n\t" |
| 3147 "pushl %1 \n\t" | 3146 "leal (%1,%3), %%ebx \n\t" |
| 3148 "leal (%2,%2), %%eax \n\t" | 3147 |
| 3149 "leal (%3,%3), %%ebx \n\t" | 3148 #define SIMPLE_CPY(src1, src2, dst1, dst2) \ |
| 3150 | 3149 "movq " #src1 ", %%mm0 \n\t"\ |
| 3151 #define SIMPLE_CPY \ | 3150 "movq " #src2 ", %%mm1 \n\t"\ |
| 3152 "movq (%0), %%mm0 \n\t"\ | 3151 "movq %%mm0, " #dst1 " \n\t"\ |
| 3153 "movq (%0,%2), %%mm1 \n\t"\ | 3152 "movq %%mm1, " #dst2 " \n\t"\ |
| 3154 "movq %%mm0, (%1) \n\t"\ | 3153 |
| 3155 "movq %%mm1, (%1, %3) \n\t"\ | 3154 SIMPLE_CPY((%0) , (%0, %2) , (%1) , (%1, %3)) |
| 3156 | 3155 SIMPLE_CPY((%0, %2, 2), (%%eax, %2, 2), (%1, %3, 2), (%%ebx, %3, 2)) |
| 3157 SIMPLE_CPY | 3156 SIMPLE_CPY((%0, %2, 4), (%%eax, %2, 4), (%1, %3, 4), (%%ebx, %3, 4)) |
| 3158 "addl %%eax, %0 \n\t" | 3157 "leal (%%eax,%2,4), %%eax \n\t" |
| 3159 "addl %%ebx, %1 \n\t" | 3158 "leal (%%ebx,%3,4), %%ebx \n\t" |
| 3160 SIMPLE_CPY | 3159 SIMPLE_CPY((%%eax, %2), (%%eax, %2, 2), (%%ebx, %3), (%%ebx, %3, 2)) |
| 3161 "addl %%eax, %0 \n\t" | 3160 |
| 3162 "addl %%ebx, %1 \n\t" | |
| 3163 SIMPLE_CPY | |
| 3164 "addl %%eax, %0 \n\t" | |
| 3165 "addl %%ebx, %1 \n\t" | |
| 3166 SIMPLE_CPY | |
| 3167 | |
| 3168 "popl %1 \n\t" | |
| 3169 "popl %0 \n\t" | |
| 3170 : : "r" (src), | 3161 : : "r" (src), |
| 3171 "r" (dst), | 3162 "r" (dst), |
| 3172 "r" (srcStride), | 3163 "r" (srcStride), |
| 3173 "r" (dstStride) | 3164 "r" (dstStride) |
| 3174 : "%eax", "%ebx" | 3165 : "%eax", "%ebx" |
