Mercurial > libpostproc.hg
comparison postprocess_altivec_template.c @ 100:b944f0b99b23 libpostproc
Remove declarations after statements from vertClassify_altivec
| author | lu_zero |
|---|---|
| date | Sun, 23 Mar 2008 15:33:24 +0000 |
| parents | fa0ecbc87f51 |
| children | db57626d7d76 |
comparison
equal
deleted
inserted
replaced
| 99:fa0ecbc87f51 | 100:b944f0b99b23 |
|---|---|
| 82 const vector signed short mask = vec_splat_s16(1); | 82 const vector signed short mask = vec_splat_s16(1); |
| 83 vector signed int v_numEq = vec_splat_s32(0); | 83 vector signed int v_numEq = vec_splat_s32(0); |
| 84 vector signed short v_data = vec_ld(0, data); | 84 vector signed short v_data = vec_ld(0, data); |
| 85 vector signed short v_srcAss0, v_srcAss1, v_srcAss2, v_srcAss3, | 85 vector signed short v_srcAss0, v_srcAss1, v_srcAss2, v_srcAss3, |
| 86 v_srcAss4, v_srcAss5, v_srcAss6, v_srcAss7; | 86 v_srcAss4, v_srcAss5, v_srcAss6, v_srcAss7; |
| 87 //FIXME avoid this mess if possible | |
| 88 register int j0 = 0, | |
| 89 j1 = stride, | |
| 90 j2 = 2 * stride, | |
| 91 j3 = 3 * stride, | |
| 92 j4 = 4 * stride, | |
| 93 j5 = 5 * stride, | |
| 94 j6 = 6 * stride, | |
| 95 j7 = 7 * stride; | |
| 96 vector unsigned char v_srcA0, v_srcA1, v_srcA2, v_srcA3, | |
| 97 v_srcA4, v_srcA5, v_srcA6, v_srcA7; | |
| 87 | 98 |
| 88 v_dcOffset = vec_splat(v_data, 0); | 99 v_dcOffset = vec_splat(v_data, 0); |
| 89 v_dcThreshold = (vector unsigned short)vec_splat(v_data, 1); | 100 v_dcThreshold = (vector unsigned short)vec_splat(v_data, 1); |
| 90 v2QP = vec_splat(v_data, 2); | 101 v2QP = vec_splat(v_data, 2); |
| 91 v4QP = (vector unsigned short)vec_splat(v_data, 3); | 102 v4QP = (vector unsigned short)vec_splat(v_data, 3); |
| 92 | 103 |
| 93 src2 += stride * 4; | 104 src2 += stride * 4; |
| 94 | 105 |
| 95 | 106 |
| 96 #define LOAD_LINE(i) \ | 107 #define LOAD_LINE(i) \ |
| 97 register int j##i = i * stride; \ | 108 { \ |
| 98 vector unsigned char perm##i = vec_lvsl(j##i, src2); \ | 109 vector unsigned char perm##i = vec_lvsl(j##i, src2); \ |
| 99 const vector unsigned char v_srcA1##i = vec_ld(j##i, src2); \ | |
| 100 vector unsigned char v_srcA2##i; \ | 110 vector unsigned char v_srcA2##i; \ |
| 111 vector unsigned char v_srcA1##i = vec_ld(j##i, src2); \ | |
| 101 if (two_vectors) \ | 112 if (two_vectors) \ |
| 102 v_srcA2##i = vec_ld(j##i + 16, src2); \ | 113 v_srcA2##i = vec_ld(j##i + 16, src2); \ |
| 103 const vector unsigned char v_srcA##i = \ | 114 v_srcA##i = \ |
| 104 vec_perm(v_srcA1##i, v_srcA2##i, perm##i); \ | 115 vec_perm(v_srcA1##i, v_srcA2##i, perm##i); \ |
| 105 v_srcAss##i = \ | 116 v_srcAss##i = \ |
| 106 (vector signed short)vec_mergeh((vector signed char)zero, \ | 117 (vector signed short)vec_mergeh((vector signed char)zero, \ |
| 107 (vector signed char)v_srcA##i) | 118 (vector signed char)v_srcA##i); } |
| 108 | 119 |
| 109 #define LOAD_LINE_ALIGNED(i) \ | 120 #define LOAD_LINE_ALIGNED(i) \ |
| 110 register int j##i = i * stride; \ | 121 v_srcA##i = vec_ld(j##i, src2); \ |
| 111 const vector unsigned char v_srcA##i = vec_ld(j##i, src2); \ | |
| 112 v_srcAss##i = \ | 122 v_srcAss##i = \ |
| 113 (vector signed short)vec_mergeh((vector signed char)zero, \ | 123 (vector signed short)vec_mergeh((vector signed char)zero, \ |
| 114 (vector signed char)v_srcA##i) | 124 (vector signed char)v_srcA##i) |
| 115 | 125 |
| 116 /* Special-casing the aligned case is worthwhile, as all calls from | 126 /* Special-casing the aligned case is worthwhile, as all calls from |
| 144 const vector signed short v_sum##i = \ | 154 const vector signed short v_sum##i = \ |
| 145 vec_add(v_diff##i, v_dcOffset); \ | 155 vec_add(v_diff##i, v_dcOffset); \ |
| 146 const vector signed short v_comp##i = \ | 156 const vector signed short v_comp##i = \ |
| 147 (vector signed short)vec_cmplt((vector unsigned short)v_sum##i, \ | 157 (vector signed short)vec_cmplt((vector unsigned short)v_sum##i, \ |
| 148 v_dcThreshold); \ | 158 v_dcThreshold); \ |
| 149 const vector signed short v_part##i = vec_and(mask, v_comp##i); \ | 159 const vector signed short v_part##i = vec_and(mask, v_comp##i); |
| 150 v_numEq = vec_sum4s(v_part##i, v_numEq); | 160 |
| 151 | 161 { |
| 152 ITER(0, 1); | 162 ITER(0, 1) |
| 153 ITER(1, 2); | 163 ITER(1, 2) |
| 154 ITER(2, 3); | 164 ITER(2, 3) |
| 155 ITER(3, 4); | 165 ITER(3, 4) |
| 156 ITER(4, 5); | 166 ITER(4, 5) |
| 157 ITER(5, 6); | 167 ITER(5, 6) |
| 158 ITER(6, 7); | 168 ITER(6, 7) |
| 169 | |
| 170 v_numEq = vec_sum4s(v_part0, v_numEq); | |
| 171 v_numEq = vec_sum4s(v_part1, v_numEq); | |
| 172 v_numEq = vec_sum4s(v_part2, v_numEq); | |
| 173 v_numEq = vec_sum4s(v_part3, v_numEq); | |
| 174 v_numEq = vec_sum4s(v_part4, v_numEq); | |
| 175 v_numEq = vec_sum4s(v_part5, v_numEq); | |
| 176 v_numEq = vec_sum4s(v_part6, v_numEq); | |
| 177 } | |
| 178 | |
| 159 #undef ITER | 179 #undef ITER |
| 160 | 180 |
| 161 v_numEq = vec_sums(v_numEq, zero); | 181 v_numEq = vec_sums(v_numEq, zero); |
| 162 | 182 |
| 163 v_numEq = vec_splat(v_numEq, 3); | 183 v_numEq = vec_splat(v_numEq, 3); |
