Mercurial > libavcodec.hg
comparison libpostproc/postprocess.c @ 119:b2f0e40866b1 libavcodec
optimizations (+2% speedup)
| author | michael |
|---|---|
| date | Tue, 23 Oct 2001 10:29:48 +0000 |
| parents | 3dd1950ac98d |
| children | b0b89f5d0288 |
comparison
equal
deleted
inserted
replaced
| 118:3dd1950ac98d | 119:b2f0e40866b1 |
|---|---|
| 212 static inline int isVertDC(uint8_t src[], int stride){ | 212 static inline int isVertDC(uint8_t src[], int stride){ |
| 213 int numEq= 0; | 213 int numEq= 0; |
| 214 int y; | 214 int y; |
| 215 src+= stride*4; // src points to begin of the 8x8 Block | 215 src+= stride*4; // src points to begin of the 8x8 Block |
| 216 #ifdef HAVE_MMX | 216 #ifdef HAVE_MMX |
| 217 asm volatile( | 217 asm volatile( |
| 218 "pushl %1\n\t" | 218 "leal (%1, %2), %%eax \n\t" |
| 219 "leal (%%eax, %2, 4), %%ebx \n\t" | |
| 220 // 0 1 2 3 4 5 6 7 8 9 | |
| 221 // %1 eax eax+%2 eax+2%2 %1+4%2 ebx ebx+%2 ebx+2%2 %1+8%2 ebx+4%2 | |
| 219 "movq b7E, %%mm7 \n\t" // mm7 = 0x7F | 222 "movq b7E, %%mm7 \n\t" // mm7 = 0x7F |
| 220 "movq b7C, %%mm6 \n\t" // mm6 = 0x7D | 223 "movq b7C, %%mm6 \n\t" // mm6 = 0x7D |
| 221 "movq (%1), %%mm0 \n\t" | 224 "movq (%1), %%mm0 \n\t" |
| 222 "addl %2, %1 \n\t" | 225 "movq (%%eax), %%mm1 \n\t" |
| 223 "movq (%1), %%mm1 \n\t" | |
| 224 "psubb %%mm1, %%mm0 \n\t" // mm0 = differnece | 226 "psubb %%mm1, %%mm0 \n\t" // mm0 = differnece |
| 225 "paddb %%mm7, %%mm0 \n\t" | 227 "paddb %%mm7, %%mm0 \n\t" |
| 226 "pcmpgtb %%mm6, %%mm0 \n\t" | 228 "pcmpgtb %%mm6, %%mm0 \n\t" |
| 227 | 229 |
| 228 "addl %2, %1 \n\t" | 230 "movq (%%eax,%2), %%mm2 \n\t" |
| 229 "movq (%1), %%mm2 \n\t" | |
| 230 "psubb %%mm2, %%mm1 \n\t" | 231 "psubb %%mm2, %%mm1 \n\t" |
| 231 "paddb %%mm7, %%mm1 \n\t" | 232 "paddb %%mm7, %%mm1 \n\t" |
| 232 "pcmpgtb %%mm6, %%mm1 \n\t" | 233 "pcmpgtb %%mm6, %%mm1 \n\t" |
| 233 "paddb %%mm1, %%mm0 \n\t" | 234 "paddb %%mm1, %%mm0 \n\t" |
| 234 | 235 |
| 235 "addl %2, %1 \n\t" | 236 "movq (%%eax, %2, 2), %%mm1 \n\t" |
| 236 "movq (%1), %%mm1 \n\t" | |
| 237 "psubb %%mm1, %%mm2 \n\t" | 237 "psubb %%mm1, %%mm2 \n\t" |
| 238 "paddb %%mm7, %%mm2 \n\t" | 238 "paddb %%mm7, %%mm2 \n\t" |
| 239 "pcmpgtb %%mm6, %%mm2 \n\t" | 239 "pcmpgtb %%mm6, %%mm2 \n\t" |
| 240 "paddb %%mm2, %%mm0 \n\t" | 240 "paddb %%mm2, %%mm0 \n\t" |
| 241 | 241 |
| 242 "addl %2, %1 \n\t" | 242 "movq (%1, %2, 4), %%mm2 \n\t" |
| 243 "movq (%1), %%mm2 \n\t" | |
| 244 "psubb %%mm2, %%mm1 \n\t" | 243 "psubb %%mm2, %%mm1 \n\t" |
| 245 "paddb %%mm7, %%mm1 \n\t" | 244 "paddb %%mm7, %%mm1 \n\t" |
| 246 "pcmpgtb %%mm6, %%mm1 \n\t" | 245 "pcmpgtb %%mm6, %%mm1 \n\t" |
| 247 "paddb %%mm1, %%mm0 \n\t" | 246 "paddb %%mm1, %%mm0 \n\t" |
| 248 | 247 |
| 249 "addl %2, %1 \n\t" | 248 "movq (%%ebx), %%mm1 \n\t" |
| 250 "movq (%1), %%mm1 \n\t" | |
| 251 "psubb %%mm1, %%mm2 \n\t" | 249 "psubb %%mm1, %%mm2 \n\t" |
| 252 "paddb %%mm7, %%mm2 \n\t" | 250 "paddb %%mm7, %%mm2 \n\t" |
| 253 "pcmpgtb %%mm6, %%mm2 \n\t" | 251 "pcmpgtb %%mm6, %%mm2 \n\t" |
| 254 "paddb %%mm2, %%mm0 \n\t" | 252 "paddb %%mm2, %%mm0 \n\t" |
| 255 | 253 |
| 256 "addl %2, %1 \n\t" | 254 "movq (%%ebx, %2), %%mm2 \n\t" |
| 257 "movq (%1), %%mm2 \n\t" | |
| 258 "psubb %%mm2, %%mm1 \n\t" | 255 "psubb %%mm2, %%mm1 \n\t" |
| 259 "paddb %%mm7, %%mm1 \n\t" | 256 "paddb %%mm7, %%mm1 \n\t" |
| 260 "pcmpgtb %%mm6, %%mm1 \n\t" | 257 "pcmpgtb %%mm6, %%mm1 \n\t" |
| 261 "paddb %%mm1, %%mm0 \n\t" | 258 "paddb %%mm1, %%mm0 \n\t" |
| 262 | 259 |
| 263 "addl %2, %1 \n\t" | 260 "movq (%%ebx, %2, 2), %%mm1 \n\t" |
| 264 "movq (%1), %%mm1 \n\t" | |
| 265 "psubb %%mm1, %%mm2 \n\t" | 261 "psubb %%mm1, %%mm2 \n\t" |
| 266 "paddb %%mm7, %%mm2 \n\t" | 262 "paddb %%mm7, %%mm2 \n\t" |
| 267 "pcmpgtb %%mm6, %%mm2 \n\t" | 263 "pcmpgtb %%mm6, %%mm2 \n\t" |
| 268 "paddb %%mm2, %%mm0 \n\t" | 264 "paddb %%mm2, %%mm0 \n\t" |
| 269 | 265 |
| 275 "psrlq $16, %%mm0 \n\t" | 271 "psrlq $16, %%mm0 \n\t" |
| 276 "paddb %%mm1, %%mm0 \n\t" | 272 "paddb %%mm1, %%mm0 \n\t" |
| 277 "movq %%mm0, %%mm1 \n\t" | 273 "movq %%mm0, %%mm1 \n\t" |
| 278 "psrlq $32, %%mm0 \n\t" | 274 "psrlq $32, %%mm0 \n\t" |
| 279 "paddb %%mm1, %%mm0 \n\t" | 275 "paddb %%mm1, %%mm0 \n\t" |
| 280 "popl %1\n\t" | |
| 281 "movd %%mm0, %0 \n\t" | 276 "movd %%mm0, %0 \n\t" |
| 282 : "=r" (numEq) | 277 : "=r" (numEq) |
| 283 : "r" (src), "r" (stride) | 278 : "r" (src), "r" (stride) |
| 284 ); | 279 ); |
| 285 // printf("%d\n", numEq); | 280 |
| 286 numEq= (256 - (numEq & 0xFF)) &0xFF; | 281 numEq= (256 - numEq) &0xFF; |
| 287 | |
| 288 // int asmEq= numEq; | |
| 289 // numEq=0; | |
| 290 // uint8_t *temp= src; | |
| 291 | 282 |
| 292 #else | 283 #else |
| 293 for(y=0; y<BLOCK_SIZE-1; y++) | 284 for(y=0; y<BLOCK_SIZE-1; y++) |
| 294 { | 285 { |
| 295 if(((src[0] - src[0+stride] + 1)&0xFFFF) < 3) numEq++; | 286 if(((src[0] - src[0+stride] + 1)&0xFFFF) < 3) numEq++; |
| 2489 int i; | 2480 int i; |
| 2490 if(levelFix) | 2481 if(levelFix) |
| 2491 { | 2482 { |
| 2492 #ifdef HAVE_MMX | 2483 #ifdef HAVE_MMX |
| 2493 asm volatile( | 2484 asm volatile( |
| 2494 "pushl %0 \n\t" | |
| 2495 "pushl %1 \n\t" | |
| 2496 "leal (%2,%2), %%eax \n\t" | 2485 "leal (%2,%2), %%eax \n\t" |
| 2497 "leal (%3,%3), %%ebx \n\t" | 2486 "leal (%3,%3), %%ebx \n\t" |
| 2498 "movq packedYOffset, %%mm2 \n\t" | 2487 "movq packedYOffset, %%mm2 \n\t" |
| 2499 "movq packedYScale, %%mm3 \n\t" | 2488 "movq packedYScale, %%mm3 \n\t" |
| 2500 "pxor %%mm4, %%mm4 \n\t" | 2489 "pxor %%mm4, %%mm4 \n\t" |
| 2532 "addl %%ebx, %1 \n\t" | 2521 "addl %%ebx, %1 \n\t" |
| 2533 SCALED_CPY | 2522 SCALED_CPY |
| 2534 "addl %%ebx, %1 \n\t" | 2523 "addl %%ebx, %1 \n\t" |
| 2535 SCALED_CPY | 2524 SCALED_CPY |
| 2536 | 2525 |
| 2537 "popl %1 \n\t" | 2526 : "+r"(src), |
| 2538 "popl %0 \n\t" | 2527 "+r"(dst) |
| 2539 : : "r" (src), | 2528 :"r" (srcStride), |
| 2540 "r" (dst), | |
| 2541 "r" (srcStride), | |
| 2542 "r" (dstStride) | 2529 "r" (dstStride) |
| 2543 : "%eax", "%ebx" | 2530 : "%eax", "%ebx" |
| 2544 ); | 2531 ); |
| 2545 #else | 2532 #else |
| 2546 for(i=0; i<numLines; i++) | 2533 for(i=0; i<numLines; i++) |
