Mercurial > libavcodec.hg
comparison libpostproc/postprocess.c @ 163:32e7f17a04a7 libavcodec
faster mmx2 / 3dnow deblocking filter
brightness_debug (draws luminance histogram & autodetected white/black level)
| author | michael |
|---|---|
| date | Mon, 19 Nov 2001 22:20:30 +0000 |
| parents | d1a4f4ca7178 |
| children | dedb3aef2bee |
comparison
equal
deleted
inserted
replaced
| 162:de80712db90b | 163:32e7f17a04a7 |
|---|---|
| 19 /* | 19 /* |
| 20 C MMX MMX2 3DNow | 20 C MMX MMX2 3DNow |
| 21 isVertDC Ec Ec | 21 isVertDC Ec Ec |
| 22 isVertMinMaxOk Ec Ec | 22 isVertMinMaxOk Ec Ec |
| 23 doVertLowPass E e e | 23 doVertLowPass E e e |
| 24 doVertDefFilter Ec Ec Ec | 24 doVertDefFilter Ec Ec e e |
| 25 isHorizDC Ec Ec | 25 isHorizDC Ec Ec |
| 26 isHorizMinMaxOk a E | 26 isHorizMinMaxOk a E |
| 27 doHorizLowPass E e e | 27 doHorizLowPass E e e |
| 28 doHorizDefFilter Ec Ec Ec | 28 doHorizDefFilter Ec Ec e e |
| 29 deRing E e e* | 29 deRing E e e* |
| 30 Vertical RKAlgo1 E a a | 30 Vertical RKAlgo1 E a a |
| 31 Horizontal RKAlgo1 a a | 31 Horizontal RKAlgo1 a a |
| 32 Vertical X1# a E E | 32 Vertical X1# a E E |
| 33 Horizontal X1# a E E | 33 Horizontal X1# a E E |
| 61 border remover | 61 border remover |
| 62 optimize c versions | 62 optimize c versions |
| 63 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks | 63 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks |
| 64 smart blur | 64 smart blur |
| 65 ... | 65 ... |
| 66 | |
| 67 Notes: | |
| 68 */ | 66 */ |
| 69 | 67 |
| 70 //Changelog: use the CVS log | 68 //Changelog: use the CVS log |
| 71 | 69 |
| 72 #include "../config.h" | 70 #include "../config.h" |
| 78 #include <malloc.h> | 76 #include <malloc.h> |
| 79 #endif | 77 #endif |
| 80 //#undef HAVE_MMX2 | 78 //#undef HAVE_MMX2 |
| 81 //#define HAVE_3DNOW | 79 //#define HAVE_3DNOW |
| 82 //#undef HAVE_MMX | 80 //#undef HAVE_MMX |
| 81 //#define DEBUG_BRIGHTNESS | |
| 83 #include "postprocess.h" | 82 #include "postprocess.h" |
| 84 | 83 |
| 85 #define MIN(a,b) ((a) > (b) ? (b) : (a)) | 84 #define MIN(a,b) ((a) > (b) ? (b) : (a)) |
| 86 #define MAX(a,b) ((a) < (b) ? (b) : (a)) | 85 #define MAX(a,b) ((a) < (b) ? (b) : (a)) |
| 87 #define ABS(a) ((a) > 0 ? (a) : (-(a))) | 86 #define ABS(a) ((a) > 0 ? (a) : (-(a))) |
| 1065 } | 1064 } |
| 1066 | 1065 |
| 1067 | 1066 |
| 1068 static inline void doVertDefFilter(uint8_t src[], int stride, int QP) | 1067 static inline void doVertDefFilter(uint8_t src[], int stride, int QP) |
| 1069 { | 1068 { |
| 1070 #ifdef HAVE_MMX | 1069 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
| 1070 /* | |
| 1071 uint8_t tmp[16]; | |
| 1072 const int l1= stride; | |
| 1073 const int l2= stride + l1; | |
| 1074 const int l3= stride + l2; | |
| 1075 const int l4= (int)tmp - (int)src - stride*3; | |
| 1076 const int l5= (int)tmp - (int)src - stride*3 + 8; | |
| 1077 const int l6= stride*3 + l3; | |
| 1078 const int l7= stride + l6; | |
| 1079 const int l8= stride + l7; | |
| 1080 | |
| 1081 memcpy(tmp, src+stride*7, 8); | |
| 1082 memcpy(tmp+8, src+stride*8, 8); | |
| 1083 */ | |
| 1071 src+= stride*4; | 1084 src+= stride*4; |
| 1072 //FIXME try pmul for *5 stuff | 1085 asm volatile( |
| 1073 // src[0]=0; | 1086 |
| 1087 #if 0 //sligtly more accurate and slightly slower | |
| 1088 "pxor %%mm7, %%mm7 \n\t" // 0 | |
| 1089 "leal (%0, %1), %%eax \n\t" | |
| 1090 "leal (%%eax, %1, 4), %%ebx \n\t" | |
| 1091 // 0 1 2 3 4 5 6 7 | |
| 1092 // %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 ebx+%1 ebx+2%1 | |
| 1093 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 | |
| 1094 | |
| 1095 | |
| 1096 "movq (%0, %1, 2), %%mm0 \n\t" // l2 | |
| 1097 "movq (%0), %%mm1 \n\t" // l0 | |
| 1098 "movq %%mm0, %%mm2 \n\t" // l2 | |
| 1099 PAVGB(%%mm7, %%mm0) // ~l2/2 | |
| 1100 PAVGB(%%mm1, %%mm0) // ~(l2 + 2l0)/4 | |
| 1101 PAVGB(%%mm2, %%mm0) // ~(5l2 + 2l0)/8 | |
| 1102 | |
| 1103 "movq (%%eax), %%mm1 \n\t" // l1 | |
| 1104 "movq (%%eax, %1, 2), %%mm3 \n\t" // l3 | |
| 1105 "movq %%mm1, %%mm4 \n\t" // l1 | |
| 1106 PAVGB(%%mm7, %%mm1) // ~l1/2 | |
| 1107 PAVGB(%%mm3, %%mm1) // ~(l1 + 2l3)/4 | |
| 1108 PAVGB(%%mm4, %%mm1) // ~(5l1 + 2l3)/8 | |
| 1109 | |
| 1110 "movq %%mm0, %%mm4 \n\t" // ~(5l2 + 2l0)/8 | |
| 1111 "psubusb %%mm1, %%mm0 \n\t" | |
| 1112 "psubusb %%mm4, %%mm1 \n\t" | |
| 1113 "por %%mm0, %%mm1 \n\t" // ~|2l0 - 5l1 + 5l2 - 2l3|/8 | |
| 1114 // mm1= |lenergy|, mm2= l2, mm3= l3, mm7=0 | |
| 1115 | |
| 1116 "movq (%0, %1, 4), %%mm0 \n\t" // l4 | |
| 1117 "movq %%mm0, %%mm4 \n\t" // l4 | |
| 1118 PAVGB(%%mm7, %%mm0) // ~l4/2 | |
| 1119 PAVGB(%%mm2, %%mm0) // ~(l4 + 2l2)/4 | |
| 1120 PAVGB(%%mm4, %%mm0) // ~(5l4 + 2l2)/8 | |
| 1121 | |
| 1122 "movq (%%ebx), %%mm2 \n\t" // l5 | |
| 1123 "movq %%mm3, %%mm5 \n\t" // l3 | |
| 1124 PAVGB(%%mm7, %%mm3) // ~l3/2 | |
| 1125 PAVGB(%%mm2, %%mm3) // ~(l3 + 2l5)/4 | |
| 1126 PAVGB(%%mm5, %%mm3) // ~(5l3 + 2l5)/8 | |
| 1127 | |
| 1128 "movq %%mm0, %%mm6 \n\t" // ~(5l4 + 2l2)/8 | |
| 1129 "psubusb %%mm3, %%mm0 \n\t" | |
| 1130 "psubusb %%mm6, %%mm3 \n\t" | |
| 1131 "por %%mm0, %%mm3 \n\t" // ~|2l2 - 5l3 + 5l4 - 2l5|/8 | |
| 1132 "pcmpeqb %%mm7, %%mm0 \n\t" // SIGN(2l2 - 5l3 + 5l4 - 2l5) | |
| 1133 // mm0= SIGN(menergy), mm1= |lenergy|, mm2= l5, mm3= |menergy|, mm4=l4, mm5= l3, mm7=0 | |
| 1134 | |
| 1135 "movq (%%ebx, %1), %%mm6 \n\t" // l6 | |
| 1136 "movq %%mm6, %%mm5 \n\t" // l6 | |
| 1137 PAVGB(%%mm7, %%mm6) // ~l6/2 | |
| 1138 PAVGB(%%mm4, %%mm6) // ~(l6 + 2l4)/4 | |
| 1139 PAVGB(%%mm5, %%mm6) // ~(5l6 + 2l4)/8 | |
| 1140 | |
| 1141 "movq (%%ebx, %1, 2), %%mm5 \n\t" // l7 | |
| 1142 "movq %%mm2, %%mm4 \n\t" // l5 | |
| 1143 PAVGB(%%mm7, %%mm2) // ~l5/2 | |
| 1144 PAVGB(%%mm5, %%mm2) // ~(l5 + 2l7)/4 | |
| 1145 PAVGB(%%mm4, %%mm2) // ~(5l5 + 2l7)/8 | |
| 1146 | |
| 1147 "movq %%mm6, %%mm4 \n\t" // ~(5l6 + 2l4)/8 | |
| 1148 "psubusb %%mm2, %%mm6 \n\t" | |
| 1149 "psubusb %%mm4, %%mm2 \n\t" | |
| 1150 "por %%mm6, %%mm2 \n\t" // ~|2l4 - 5l5 + 5l6 - 2l7|/8 | |
| 1151 // mm0= SIGN(menergy), mm1= |lenergy|/8, mm2= |renergy|/8, mm3= |menergy|/8, mm7=0 | |
| 1152 | |
| 1153 | |
| 1154 PMINUB(%%mm2, %%mm1, %%mm4) // MIN(|lenergy|,|renergy|)/8 | |
| 1155 "movq pQPb, %%mm4 \n\t" // QP //FIXME QP+1 ? | |
| 1156 "paddusb b01, %%mm4 \n\t" | |
| 1157 "pcmpgtb %%mm3, %%mm4 \n\t" // |menergy|/8 < QP | |
| 1158 "psubusb %%mm1, %%mm3 \n\t" // d=|menergy|/8-MIN(|lenergy|,|renergy|)/8 | |
| 1159 "pand %%mm4, %%mm3 \n\t" | |
| 1160 | |
| 1161 "movq %%mm3, %%mm1 \n\t" | |
| 1162 // "psubusb b01, %%mm3 \n\t" | |
| 1163 PAVGB(%%mm7, %%mm3) | |
| 1164 PAVGB(%%mm7, %%mm3) | |
| 1165 "paddusb %%mm1, %%mm3 \n\t" | |
| 1166 // "paddusb b01, %%mm3 \n\t" | |
| 1167 | |
| 1168 "movq (%%eax, %1, 2), %%mm6 \n\t" //l3 | |
| 1169 "movq (%0, %1, 4), %%mm5 \n\t" //l4 | |
| 1170 "movq (%0, %1, 4), %%mm4 \n\t" //l4 | |
| 1171 "psubusb %%mm6, %%mm5 \n\t" | |
| 1172 "psubusb %%mm4, %%mm6 \n\t" | |
| 1173 "por %%mm6, %%mm5 \n\t" // |l3-l4| | |
| 1174 "pcmpeqb %%mm7, %%mm6 \n\t" // SIGN(l3-l4) | |
| 1175 "pxor %%mm6, %%mm0 \n\t" | |
| 1176 "pand %%mm0, %%mm3 \n\t" | |
| 1177 PMINUB(%%mm5, %%mm3, %%mm0) | |
| 1178 | |
| 1179 "psubusb b01, %%mm3 \n\t" | |
| 1180 PAVGB(%%mm7, %%mm3) | |
| 1181 | |
| 1182 "movq (%%eax, %1, 2), %%mm0 \n\t" | |
| 1183 "movq (%0, %1, 4), %%mm2 \n\t" | |
| 1184 "pxor %%mm6, %%mm0 \n\t" | |
| 1185 "pxor %%mm6, %%mm2 \n\t" | |
| 1186 "psubb %%mm3, %%mm0 \n\t" | |
| 1187 "paddb %%mm3, %%mm2 \n\t" | |
| 1188 "pxor %%mm6, %%mm0 \n\t" | |
| 1189 "pxor %%mm6, %%mm2 \n\t" | |
| 1190 "movq %%mm0, (%%eax, %1, 2) \n\t" | |
| 1191 "movq %%mm2, (%0, %1, 4) \n\t" | |
| 1192 #endif | |
| 1193 | |
| 1194 "leal (%0, %1), %%eax \n\t" | |
| 1195 "pcmpeqb %%mm6, %%mm6 \n\t" // -1 | |
| 1196 // 0 1 2 3 4 5 6 7 | |
| 1197 // %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 ebx+%1 ebx+2%1 | |
| 1198 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 | |
| 1199 | |
| 1200 | |
| 1201 "movq (%%eax, %1, 2), %%mm1 \n\t" // l3 | |
| 1202 "movq (%0, %1, 4), %%mm0 \n\t" // l4 | |
| 1203 "pxor %%mm6, %%mm1 \n\t" // -l3-1 | |
| 1204 PAVGB(%%mm1, %%mm0) // -q+128 = (l4-l3+256)/2 | |
| 1205 // mm1=-l3-1, mm0=128-q | |
| 1206 | |
| 1207 "movq (%%eax, %1, 4), %%mm2 \n\t" // l5 | |
| 1208 "movq (%%eax, %1), %%mm3 \n\t" // l2 | |
| 1209 "pxor %%mm6, %%mm2 \n\t" // -l5-1 | |
| 1210 "movq %%mm2, %%mm5 \n\t" // -l5-1 | |
| 1211 "movq b80, %%mm4 \n\t" // 128 | |
| 1212 "leal (%%eax, %1, 4), %%ebx \n\t" | |
| 1213 PAVGB(%%mm3, %%mm2) // (l2-l5+256)/2 | |
| 1214 PAVGB(%%mm0, %%mm4) // ~(l4-l3)/4 + 128 | |
| 1215 PAVGB(%%mm2, %%mm4) // ~(l2-l5)/4 +(l4-l3)/8 + 128 | |
| 1216 PAVGB(%%mm0, %%mm4) // ~(l2-l5)/8 +5(l4-l3)/16 + 128 | |
| 1217 // mm1=-l3-1, mm0=128-q, mm3=l2, mm4=menergy/16 + 128, mm5= -l5-1 | |
| 1218 | |
| 1219 "movq (%%eax), %%mm2 \n\t" // l1 | |
| 1220 "pxor %%mm6, %%mm2 \n\t" // -l1-1 | |
| 1221 PAVGB(%%mm3, %%mm2) // (l2-l1+256)/2 | |
| 1222 PAVGB((%0), %%mm1) // (l0-l3+256)/2 | |
| 1223 "movq b80, %%mm3 \n\t" // 128 | |
| 1224 PAVGB(%%mm2, %%mm3) // ~(l2-l1)/4 + 128 | |
| 1225 PAVGB(%%mm1, %%mm3) // ~(l0-l3)/4 +(l2-l1)/8 + 128 | |
| 1226 PAVGB(%%mm2, %%mm3) // ~(l0-l3)/8 +5(l2-l1)/16 + 128 | |
| 1227 // mm0=128-q, mm3=lenergy/16 + 128, mm4= menergy/16 + 128, mm5= -l5-1 | |
| 1228 | |
| 1229 PAVGB((%%ebx, %1), %%mm5) // (l6-l5+256)/2 | |
| 1230 "movq (%%ebx, %1, 2), %%mm1 \n\t" // l7 | |
| 1231 "pxor %%mm6, %%mm1 \n\t" // -l7-1 | |
| 1232 PAVGB((%0, %1, 4), %%mm1) // (l4-l7+256)/2 | |
| 1233 "movq b80, %%mm2 \n\t" // 128 | |
| 1234 PAVGB(%%mm5, %%mm2) // ~(l6-l5)/4 + 128 | |
| 1235 PAVGB(%%mm1, %%mm2) // ~(l4-l7)/4 +(l6-l5)/8 + 128 | |
| 1236 PAVGB(%%mm5, %%mm2) // ~(l4-l7)/8 +5(l6-l5)/16 + 128 | |
| 1237 // mm0=128-q, mm2=renergy/16 + 128, mm3=lenergy/16 + 128, mm4= menergy/16 + 128 | |
| 1238 | |
| 1239 "movq b00, %%mm1 \n\t" // 0 | |
| 1240 "movq b00, %%mm5 \n\t" // 0 | |
| 1241 "psubb %%mm2, %%mm1 \n\t" // 128 - renergy/16 | |
| 1242 "psubb %%mm3, %%mm5 \n\t" // 128 - lenergy/16 | |
| 1243 PMAXUB(%%mm1, %%mm2) // 128 + |renergy/16| | |
| 1244 PMAXUB(%%mm5, %%mm3) // 128 + |lenergy/16| | |
| 1245 PMINUB(%%mm2, %%mm3, %%mm1) // 128 + MIN(|lenergy|,|renergy|)/16 | |
| 1246 | |
| 1247 // mm0=128-q, mm3=128 + MIN(|lenergy|,|renergy|)/16, mm4= menergy/16 + 128 | |
| 1248 | |
| 1249 "movq b00, %%mm7 \n\t" // 0 | |
| 1250 "movq pQPb, %%mm2 \n\t" // QP | |
| 1251 PAVGB(%%mm6, %%mm2) // 128 + QP/2 | |
| 1252 "psubb %%mm6, %%mm2 \n\t" | |
| 1253 | |
| 1254 "movq %%mm4, %%mm1 \n\t" | |
| 1255 "pcmpgtb %%mm7, %%mm1 \n\t" // SIGN(menergy) | |
| 1256 "pxor %%mm1, %%mm4 \n\t" | |
| 1257 "psubb %%mm1, %%mm4 \n\t" // 128 + |menergy|/16 | |
| 1258 "pcmpgtb %%mm4, %%mm2 \n\t" // |menergy|/16 < QP/2 | |
| 1259 "psubusb %%mm3, %%mm4 \n\t" //d=|menergy|/16 - MIN(|lenergy|,|renergy|)/16 | |
| 1260 // mm0=128-q, mm1= SIGN(menergy), mm2= |menergy|/16 < QP/2, mm4= d/16 | |
| 1261 | |
| 1262 "movq %%mm4, %%mm3 \n\t" // d | |
| 1263 "psubusb b01, %%mm4 \n\t" | |
| 1264 PAVGB(%%mm7, %%mm4) // d/32 | |
| 1265 PAVGB(%%mm7, %%mm4) // (d + 32)/64 | |
| 1266 "paddb %%mm3, %%mm4 \n\t" // 5d/64 | |
| 1267 "pand %%mm2, %%mm4 \n\t" | |
| 1268 | |
| 1269 "movq b80, %%mm5 \n\t" // 128 | |
| 1270 "psubb %%mm0, %%mm5 \n\t" // q | |
| 1271 "paddsb %%mm6, %%mm5 \n\t" // fix bad rounding | |
| 1272 "pcmpgtb %%mm5, %%mm7 \n\t" // SIGN(q) | |
| 1273 "pxor %%mm7, %%mm5 \n\t" | |
| 1274 | |
| 1275 PMINUB(%%mm5, %%mm4, %%mm3) // MIN(|q|, 5d/64) | |
| 1276 "pxor %%mm1, %%mm7 \n\t" // SIGN(d*q) | |
| 1277 | |
| 1278 "pand %%mm7, %%mm4 \n\t" | |
| 1279 "movq (%%eax, %1, 2), %%mm0 \n\t" | |
| 1280 "movq (%0, %1, 4), %%mm2 \n\t" | |
| 1281 "pxor %%mm1, %%mm0 \n\t" | |
| 1282 "pxor %%mm1, %%mm2 \n\t" | |
| 1283 "paddb %%mm4, %%mm0 \n\t" | |
| 1284 "psubb %%mm4, %%mm2 \n\t" | |
| 1285 "pxor %%mm1, %%mm0 \n\t" | |
| 1286 "pxor %%mm1, %%mm2 \n\t" | |
| 1287 "movq %%mm0, (%%eax, %1, 2) \n\t" | |
| 1288 "movq %%mm2, (%0, %1, 4) \n\t" | |
| 1289 | |
| 1290 : | |
| 1291 : "r" (src), "r" (stride) | |
| 1292 : "%eax", "%ebx" | |
| 1293 ); | |
| 1294 | |
| 1295 /* | |
| 1296 { | |
| 1297 int x; | |
| 1298 src-= stride; | |
| 1299 for(x=0; x<BLOCK_SIZE; x++) | |
| 1300 { | |
| 1301 const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]); | |
| 1302 if(ABS(middleEnergy)< 8*QP) | |
| 1303 { | |
| 1304 const int q=(src[l4] - src[l5])/2; | |
| 1305 const int leftEnergy= 5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]); | |
| 1306 const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]); | |
| 1307 | |
| 1308 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); | |
| 1309 d= MAX(d, 0); | |
| 1310 | |
| 1311 d= (5*d + 32) >> 6; | |
| 1312 d*= SIGN(-middleEnergy); | |
| 1313 | |
| 1314 if(q>0) | |
| 1315 { | |
| 1316 d= d<0 ? 0 : d; | |
| 1317 d= d>q ? q : d; | |
| 1318 } | |
| 1319 else | |
| 1320 { | |
| 1321 d= d>0 ? 0 : d; | |
| 1322 d= d<q ? q : d; | |
| 1323 } | |
| 1324 | |
| 1325 src[l4]-= d; | |
| 1326 src[l5]+= d; | |
| 1327 } | |
| 1328 src++; | |
| 1329 } | |
| 1330 src-=8; | |
| 1331 for(x=0; x<8; x++) | |
| 1332 { | |
| 1333 int y; | |
| 1334 for(y=4; y<6; y++) | |
| 1335 { | |
| 1336 int d= src[x+y*stride] - tmp[x+(y-4)*8]; | |
| 1337 int ad= ABS(d); | |
| 1338 static int max=0; | |
| 1339 static int sum=0; | |
| 1340 static int num=0; | |
| 1341 static int bias=0; | |
| 1342 | |
| 1343 if(max<ad) max=ad; | |
| 1344 sum+= ad>3 ? 1 : 0; | |
| 1345 if(ad>3) | |
| 1346 { | |
| 1347 src[0] = src[7] = src[stride*7] = src[(stride+1)*7]=255; | |
| 1348 } | |
| 1349 if(y==4) bias+=d; | |
| 1350 num++; | |
| 1351 if(num%1000000 == 0) | |
| 1352 { | |
| 1353 printf(" %d %d %d %d\n", num, sum, max, bias); | |
| 1354 } | |
| 1355 } | |
| 1356 } | |
| 1357 } | |
| 1358 */ | |
| 1359 #elif defined (HAVE_MMX) | |
| 1360 src+= stride*4; | |
| 1361 | |
| 1074 asm volatile( | 1362 asm volatile( |
| 1075 "pxor %%mm7, %%mm7 \n\t" | 1363 "pxor %%mm7, %%mm7 \n\t" |
| 1076 "leal (%0, %1), %%eax \n\t" | 1364 "leal (%0, %1), %%eax \n\t" |
| 1077 "leal (%%eax, %1, 4), %%ebx \n\t" | 1365 "leal (%%eax, %1, 4), %%ebx \n\t" |
| 1078 // 0 1 2 3 4 5 6 7 | 1366 // 0 1 2 3 4 5 6 7 |
| 3959 if(y+15 >= height) | 4247 if(y+15 >= height) |
| 3960 { | 4248 { |
| 3961 uint8_t *dstBlock= &(dst[y*dstStride]); | 4249 uint8_t *dstBlock= &(dst[y*dstStride]); |
| 3962 memcpy(dstBlock, tempDst + dstStride, dstStride*(height-y) ); | 4250 memcpy(dstBlock, tempDst + dstStride, dstStride*(height-y) ); |
| 3963 } | 4251 } |
| 3964 } | 4252 /* |
| 4253 for(x=0; x<width; x+=32) | |
| 4254 { | |
| 4255 int i; | |
| 4256 i+= + dstBlock[x + 7*dstStride] + dstBlock[x + 8*dstStride] | |
| 4257 + dstBlock[x + 9*dstStride] + dstBlock[x +10*dstStride] | |
| 4258 + dstBlock[x +11*dstStride] + dstBlock[x +12*dstStride] | |
| 4259 + dstBlock[x +13*dstStride] + dstBlock[x +14*dstStride] | |
| 4260 + dstBlock[x +15*dstStride]; | |
| 4261 } | |
| 4262 */ } | |
| 3965 #ifdef HAVE_3DNOW | 4263 #ifdef HAVE_3DNOW |
| 3966 asm volatile("femms"); | 4264 asm volatile("femms"); |
| 3967 #elif defined (HAVE_MMX) | 4265 #elif defined (HAVE_MMX) |
| 3968 asm volatile("emms"); | 4266 asm volatile("emms"); |
| 3969 #endif | 4267 #endif |
| 3975 printf("cpy:%4dk, vert:%4dk, horiz:%4dk, sum:%4dk, diff:%4dk, color: %d/%d \r", | 4273 printf("cpy:%4dk, vert:%4dk, horiz:%4dk, sum:%4dk, diff:%4dk, color: %d/%d \r", |
| 3976 (int)(memcpyTime/1000), (int)(vertTime/1000), (int)(horizTime/1000), | 4274 (int)(memcpyTime/1000), (int)(vertTime/1000), (int)(horizTime/1000), |
| 3977 (int)(sumTime/1000), (int)((sumTime-memcpyTime-vertTime-horizTime)/1000) | 4275 (int)(sumTime/1000), (int)((sumTime-memcpyTime-vertTime-horizTime)/1000) |
| 3978 , black, white); | 4276 , black, white); |
| 3979 #endif | 4277 #endif |
| 4278 #ifdef DEBUG_BRIGHTNESS | |
| 4279 if(!isColor) | |
| 4280 { | |
| 4281 int max=1; | |
| 4282 int i; | |
| 4283 for(i=0; i<256; i++) | |
| 4284 if(yHistogram[i] > max) max=yHistogram[i]; | |
| 4285 | |
| 4286 for(i=1; i<256; i++) | |
| 4287 { | |
| 4288 int x; | |
| 4289 int start=yHistogram[i-1]/(max/256+1); | |
| 4290 int end=yHistogram[i]/(max/256+1); | |
| 4291 int inc= end > start ? 1 : -1; | |
| 4292 for(x=start; x!=end+inc; x+=inc) | |
| 4293 dst[ i*dstStride + x]+=128; | |
| 4294 } | |
| 4295 | |
| 4296 for(i=0; i<100; i+=2) | |
| 4297 { | |
| 4298 dst[ (white)*dstStride + i]+=128; | |
| 4299 dst[ (black)*dstStride + i]+=128; | |
| 4300 } | |
| 4301 | |
| 4302 } | |
| 4303 #endif | |
| 4304 | |
| 3980 } | 4305 } |
