Mercurial > libavcodec.hg
annotate h264_loopfilter.c @ 11104:bb877c9cb102 libavcodec
Detect spatial direct MBs partitioned smaller than 16x16 that can be partitioned
as 16x16 (except ones changing interlacing relative to the colocated MB).
20 cycles slower during MV generation
175 cycles faster during MC
| author | michael |
|---|---|
| date | Mon, 08 Feb 2010 16:23:05 +0000 |
| parents | 1b8e11679883 |
| children | d9725977b699 |
| rev | line source |
|---|---|
| 10854 | 1 /* |
| 2 * H.26L/H.264/AVC/JVT/14496-10/... loop filter | |
| 3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> | |
| 4 * | |
| 5 * This file is part of FFmpeg. | |
| 6 * | |
| 7 * FFmpeg is free software; you can redistribute it and/or | |
| 8 * modify it under the terms of the GNU Lesser General Public | |
| 9 * License as published by the Free Software Foundation; either | |
| 10 * version 2.1 of the License, or (at your option) any later version. | |
| 11 * | |
| 12 * FFmpeg is distributed in the hope that it will be useful, | |
| 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 15 * Lesser General Public License for more details. | |
| 16 * | |
| 17 * You should have received a copy of the GNU Lesser General Public | |
| 18 * License along with FFmpeg; if not, write to the Free Software | |
| 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
| 20 */ | |
| 21 | |
| 22 /** | |
| 23 * @file libavcodec/h264_loopfilter.c | |
| 24 * H.264 / AVC / MPEG4 part10 loop filter. | |
| 25 * @author Michael Niedermayer <michaelni@gmx.at> | |
| 26 */ | |
| 27 | |
| 28 #include "internal.h" | |
| 29 #include "dsputil.h" | |
| 30 #include "avcodec.h" | |
| 31 #include "mpegvideo.h" | |
| 32 #include "h264.h" | |
| 33 #include "mathops.h" | |
| 34 #include "rectangle.h" | |
| 35 | |
| 36 //#undef NDEBUG | |
| 37 #include <assert.h> | |
| 38 | |
| 39 /* Deblocking filter (p153) */ | |
| 40 static const uint8_t alpha_table[52*3] = { | |
| 41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 46 0, 0, 0, 0, 0, 0, 4, 4, 5, 6, | |
| 47 7, 8, 9, 10, 12, 13, 15, 17, 20, 22, | |
| 48 25, 28, 32, 36, 40, 45, 50, 56, 63, 71, | |
| 49 80, 90,101,113,127,144,162,182,203,226, | |
| 50 255,255, | |
| 51 255,255,255,255,255,255,255,255,255,255,255,255,255, | |
| 52 255,255,255,255,255,255,255,255,255,255,255,255,255, | |
| 53 255,255,255,255,255,255,255,255,255,255,255,255,255, | |
| 54 255,255,255,255,255,255,255,255,255,255,255,255,255, | |
| 55 }; | |
| 56 static const uint8_t beta_table[52*3] = { | |
| 57 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 58 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 59 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 60 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 61 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 62 0, 0, 0, 0, 0, 0, 2, 2, 2, 3, | |
| 63 3, 3, 3, 4, 4, 4, 6, 6, 7, 7, | |
| 64 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, | |
| 65 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, | |
| 66 18, 18, | |
| 67 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, | |
| 68 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, | |
| 69 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, | |
| 70 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, | |
| 71 }; | |
| 72 static const uint8_t tc0_table[52*3][4] = { | |
| 73 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
| 74 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
| 75 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
| 76 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
| 77 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
| 78 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
| 79 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
| 80 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
| 81 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
| 82 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
| 83 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, | |
| 84 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 }, | |
| 85 {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 }, | |
| 86 {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, | |
| 87 {-1, 1, 1, 2 }, {-1, 1, 2, 3 }, {-1, 1, 2, 3 }, {-1, 2, 2, 3 }, {-1, 2, 2, 4 }, {-1, 2, 3, 4 }, | |
| 88 {-1, 2, 3, 4 }, {-1, 3, 3, 5 }, {-1, 3, 4, 6 }, {-1, 3, 4, 6 }, {-1, 4, 5, 7 }, {-1, 4, 5, 8 }, | |
| 89 {-1, 4, 6, 9 }, {-1, 5, 7,10 }, {-1, 6, 8,11 }, {-1, 6, 8,13 }, {-1, 7,10,14 }, {-1, 8,11,16 }, | |
| 90 {-1, 9,12,18 }, {-1,10,13,20 }, {-1,11,15,23 }, {-1,13,17,25 }, | |
| 91 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
| 92 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
| 93 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
| 94 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
| 95 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
| 96 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
| 97 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
| 98 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
| 99 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, | |
| 100 }; | |
| 101 | |
|
11042
74f672a1f763
Change wraper functions to always inline, they are faster now that way.
michael
parents:
11041
diff
changeset
|
102 static void av_always_inline filter_mb_edgev( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h) { |
|
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
103 const unsigned int index_a = qp + h->slice_alpha_c0_offset; |
| 10960 | 104 const int alpha = alpha_table[index_a]; |
|
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
105 const int beta = beta_table[qp + h->slice_beta_offset]; |
| 10854 | 106 if (alpha ==0 || beta == 0) return; |
| 107 | |
| 108 if( bS[0] < 4 ) { | |
| 109 int8_t tc[4]; | |
| 10960 | 110 tc[0] = tc0_table[index_a][bS[0]]; |
| 111 tc[1] = tc0_table[index_a][bS[1]]; | |
| 112 tc[2] = tc0_table[index_a][bS[2]]; | |
| 113 tc[3] = tc0_table[index_a][bS[3]]; | |
| 10854 | 114 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc); |
| 115 } else { | |
| 116 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta); | |
| 117 } | |
| 118 } | |
|
11042
74f672a1f763
Change wraper functions to always inline, they are faster now that way.
michael
parents:
11041
diff
changeset
|
119 static void av_always_inline filter_mb_edgecv( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) { |
|
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
120 const unsigned int index_a = qp + h->slice_alpha_c0_offset; |
| 10960 | 121 const int alpha = alpha_table[index_a]; |
|
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
122 const int beta = beta_table[qp + h->slice_beta_offset]; |
| 10854 | 123 if (alpha ==0 || beta == 0) return; |
| 124 | |
| 125 if( bS[0] < 4 ) { | |
| 126 int8_t tc[4]; | |
| 10960 | 127 tc[0] = tc0_table[index_a][bS[0]]+1; |
| 128 tc[1] = tc0_table[index_a][bS[1]]+1; | |
| 129 tc[2] = tc0_table[index_a][bS[2]]+1; | |
| 130 tc[3] = tc0_table[index_a][bS[3]]+1; | |
| 10854 | 131 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc); |
| 132 } else { | |
| 133 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta); | |
| 134 } | |
| 135 } | |
| 136 | |
|
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
137 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int bsi, int qp ) { |
| 10854 | 138 int i; |
| 10970 | 139 int index_a = qp + h->slice_alpha_c0_offset; |
|
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
140 int alpha = alpha_table[index_a]; |
|
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
141 int beta = beta_table[qp + h->slice_beta_offset]; |
|
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
142 for( i = 0; i < 8; i++, pix += stride) { |
|
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
143 const int bS_index = (i >> 1) * bsi; |
| 10854 | 144 |
| 145 if( bS[bS_index] == 0 ) { | |
| 146 continue; | |
| 147 } | |
| 148 | |
| 149 if( bS[bS_index] < 4 ) { | |
|
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
150 const int tc0 = tc0_table[index_a][bS[bS_index]]; |
| 10854 | 151 const int p0 = pix[-1]; |
| 152 const int p1 = pix[-2]; | |
| 153 const int p2 = pix[-3]; | |
| 154 const int q0 = pix[0]; | |
| 155 const int q1 = pix[1]; | |
| 156 const int q2 = pix[2]; | |
| 157 | |
| 158 if( FFABS( p0 - q0 ) < alpha && | |
| 159 FFABS( p1 - p0 ) < beta && | |
| 160 FFABS( q1 - q0 ) < beta ) { | |
| 161 int tc = tc0; | |
| 162 int i_delta; | |
| 163 | |
| 164 if( FFABS( p2 - p0 ) < beta ) { | |
| 10970 | 165 if(tc0) |
| 10854 | 166 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 ); |
| 167 tc++; | |
| 168 } | |
| 169 if( FFABS( q2 - q0 ) < beta ) { | |
| 10970 | 170 if(tc0) |
| 10854 | 171 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 ); |
| 172 tc++; | |
| 173 } | |
| 174 | |
| 175 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); | |
| 176 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */ | |
| 177 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */ | |
| 178 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1); | |
| 179 } | |
| 180 }else{ | |
| 181 const int p0 = pix[-1]; | |
| 182 const int p1 = pix[-2]; | |
| 183 const int p2 = pix[-3]; | |
| 184 | |
| 185 const int q0 = pix[0]; | |
| 186 const int q1 = pix[1]; | |
| 187 const int q2 = pix[2]; | |
| 188 | |
| 189 if( FFABS( p0 - q0 ) < alpha && | |
| 190 FFABS( p1 - p0 ) < beta && | |
| 191 FFABS( q1 - q0 ) < beta ) { | |
| 192 | |
| 193 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ | |
| 194 if( FFABS( p2 - p0 ) < beta) | |
| 195 { | |
| 196 const int p3 = pix[-4]; | |
| 197 /* p0', p1', p2' */ | |
| 198 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; | |
| 199 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; | |
| 200 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; | |
| 201 } else { | |
| 202 /* p0' */ | |
| 203 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; | |
| 204 } | |
| 205 if( FFABS( q2 - q0 ) < beta) | |
| 206 { | |
| 207 const int q3 = pix[3]; | |
| 208 /* q0', q1', q2' */ | |
| 209 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; | |
| 210 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; | |
| 211 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; | |
| 212 } else { | |
| 213 /* q0' */ | |
| 214 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; | |
| 215 } | |
| 216 }else{ | |
| 217 /* p0', q0' */ | |
| 218 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; | |
| 219 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; | |
| 220 } | |
| 221 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]); | |
| 222 } | |
| 223 } | |
| 224 } | |
| 225 } | |
|
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
226 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int bsi, int qp ) { |
| 10854 | 227 int i; |
| 10970 | 228 int index_a = qp + h->slice_alpha_c0_offset; |
|
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
229 int alpha = alpha_table[index_a]; |
|
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
230 int beta = beta_table[qp + h->slice_beta_offset]; |
|
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
231 for( i = 0; i < 4; i++, pix += stride) { |
|
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
232 const int bS_index = i*bsi; |
| 10854 | 233 |
| 234 if( bS[bS_index] == 0 ) { | |
| 235 continue; | |
| 236 } | |
| 237 | |
| 238 if( bS[bS_index] < 4 ) { | |
|
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
239 const int tc = tc0_table[index_a][bS[bS_index]] + 1; |
| 10854 | 240 const int p0 = pix[-1]; |
| 241 const int p1 = pix[-2]; | |
| 242 const int q0 = pix[0]; | |
| 243 const int q1 = pix[1]; | |
| 244 | |
| 245 if( FFABS( p0 - q0 ) < alpha && | |
| 246 FFABS( p1 - p0 ) < beta && | |
| 247 FFABS( q1 - q0 ) < beta ) { | |
| 248 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); | |
| 249 | |
| 250 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */ | |
| 251 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */ | |
| 252 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1); | |
| 253 } | |
| 254 }else{ | |
| 255 const int p0 = pix[-1]; | |
| 256 const int p1 = pix[-2]; | |
| 257 const int q0 = pix[0]; | |
| 258 const int q1 = pix[1]; | |
| 259 | |
| 260 if( FFABS( p0 - q0 ) < alpha && | |
| 261 FFABS( p1 - p0 ) < beta && | |
| 262 FFABS( q1 - q0 ) < beta ) { | |
| 263 | |
| 264 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ | |
| 265 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ | |
| 266 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]); | |
| 267 } | |
| 268 } | |
| 269 } | |
| 270 } | |
| 271 | |
|
11042
74f672a1f763
Change wraper functions to always inline, they are faster now that way.
michael
parents:
11041
diff
changeset
|
272 static void av_always_inline filter_mb_edgeh( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) { |
|
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
273 const unsigned int index_a = qp + h->slice_alpha_c0_offset; |
| 10960 | 274 const int alpha = alpha_table[index_a]; |
|
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
275 const int beta = beta_table[qp + h->slice_beta_offset]; |
| 10854 | 276 if (alpha ==0 || beta == 0) return; |
| 277 | |
| 278 if( bS[0] < 4 ) { | |
| 279 int8_t tc[4]; | |
| 10960 | 280 tc[0] = tc0_table[index_a][bS[0]]; |
| 281 tc[1] = tc0_table[index_a][bS[1]]; | |
| 282 tc[2] = tc0_table[index_a][bS[2]]; | |
| 283 tc[3] = tc0_table[index_a][bS[3]]; | |
| 10854 | 284 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc); |
| 285 } else { | |
| 286 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta); | |
| 287 } | |
| 288 } | |
| 289 | |
|
11042
74f672a1f763
Change wraper functions to always inline, they are faster now that way.
michael
parents:
11041
diff
changeset
|
290 static void av_always_inline filter_mb_edgech( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) { |
|
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
291 const unsigned int index_a = qp + h->slice_alpha_c0_offset; |
| 10960 | 292 const int alpha = alpha_table[index_a]; |
|
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
293 const int beta = beta_table[qp + h->slice_beta_offset]; |
| 10854 | 294 if (alpha ==0 || beta == 0) return; |
| 295 | |
| 296 if( bS[0] < 4 ) { | |
| 297 int8_t tc[4]; | |
| 10960 | 298 tc[0] = tc0_table[index_a][bS[0]]+1; |
| 299 tc[1] = tc0_table[index_a][bS[1]]+1; | |
| 300 tc[2] = tc0_table[index_a][bS[2]]+1; | |
| 301 tc[3] = tc0_table[index_a][bS[3]]+1; | |
| 10854 | 302 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc); |
| 303 } else { | |
| 304 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta); | |
| 305 } | |
| 306 } | |
| 307 | |
| 308 void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { | |
| 309 MpegEncContext * const s = &h->s; | |
|
11013
5e5d44c920b6
Simplify loop filter a little by using top/left_type.
michael
parents:
10984
diff
changeset
|
310 int mb_xy; |
|
11034
fd5921186064
Make the fast loop filter path work with unavailable left MBs.
michael
parents:
11033
diff
changeset
|
311 int mb_type, left_type; |
| 10854 | 312 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh; |
| 313 | |
| 314 mb_xy = h->mb_xy; | |
| 315 | |
|
11034
fd5921186064
Make the fast loop filter path work with unavailable left MBs.
michael
parents:
11033
diff
changeset
|
316 if(!h->top_type || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) { |
| 10854 | 317 ff_h264_filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize); |
| 318 return; | |
| 319 } | |
| 320 assert(!FRAME_MBAFF); | |
|
11034
fd5921186064
Make the fast loop filter path work with unavailable left MBs.
michael
parents:
11033
diff
changeset
|
321 left_type= h->left_type[0]; |
| 10854 | 322 |
| 323 mb_type = s->current_picture.mb_type[mb_xy]; | |
| 324 qp = s->current_picture.qscale_table[mb_xy]; | |
| 325 qp0 = s->current_picture.qscale_table[mb_xy-1]; | |
| 326 qp1 = s->current_picture.qscale_table[h->top_mb_xy]; | |
| 327 qpc = get_chroma_qp( h, 0, qp ); | |
| 328 qpc0 = get_chroma_qp( h, 0, qp0 ); | |
| 329 qpc1 = get_chroma_qp( h, 0, qp1 ); | |
| 330 qp0 = (qp + qp0 + 1) >> 1; | |
| 331 qp1 = (qp + qp1 + 1) >> 1; | |
| 332 qpc0 = (qpc + qpc0 + 1) >> 1; | |
| 333 qpc1 = (qpc + qpc1 + 1) >> 1; | |
|
10979
776dba50775c
Move +52 from the loop filter to the alpha/beta offsets in the context.
michael
parents:
10973
diff
changeset
|
334 qp_thresh = 15+52 - h->slice_alpha_c0_offset; |
| 10854 | 335 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh && |
| 336 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh) | |
| 337 return; | |
| 338 | |
| 339 if( IS_INTRA(mb_type) ) { | |
| 340 int16_t bS4[4] = {4,4,4,4}; | |
| 341 int16_t bS3[4] = {3,3,3,3}; | |
| 342 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4; | |
|
11045
5f712d734793
factorize first filter call out, this makes the code somewhat
michael
parents:
11042
diff
changeset
|
343 if(left_type) |
|
5f712d734793
factorize first filter call out, this makes the code somewhat
michael
parents:
11042
diff
changeset
|
344 filter_mb_edgev( &img_y[4*0], linesize, bS4, qp0, h); |
| 10854 | 345 if( IS_8x8DCT(mb_type) ) { |
| 10960 | 346 filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h); |
| 347 filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h); | |
| 348 filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h); | |
| 10854 | 349 } else { |
| 10960 | 350 filter_mb_edgev( &img_y[4*1], linesize, bS3, qp, h); |
| 351 filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h); | |
| 352 filter_mb_edgev( &img_y[4*3], linesize, bS3, qp, h); | |
| 353 filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h); | |
| 354 filter_mb_edgeh( &img_y[4*1*linesize], linesize, bS3, qp, h); | |
| 355 filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h); | |
| 356 filter_mb_edgeh( &img_y[4*3*linesize], linesize, bS3, qp, h); | |
| 10854 | 357 } |
|
11034
fd5921186064
Make the fast loop filter path work with unavailable left MBs.
michael
parents:
11033
diff
changeset
|
358 if(left_type){ |
|
fd5921186064
Make the fast loop filter path work with unavailable left MBs.
michael
parents:
11033
diff
changeset
|
359 filter_mb_edgecv( &img_cb[2*0], uvlinesize, bS4, qpc0, h); |
|
fd5921186064
Make the fast loop filter path work with unavailable left MBs.
michael
parents:
11033
diff
changeset
|
360 filter_mb_edgecv( &img_cr[2*0], uvlinesize, bS4, qpc0, h); |
|
fd5921186064
Make the fast loop filter path work with unavailable left MBs.
michael
parents:
11033
diff
changeset
|
361 } |
| 10960 | 362 filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h); |
| 363 filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h); | |
| 364 filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); | |
| 365 filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h); | |
| 366 filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); | |
| 367 filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h); | |
| 10854 | 368 return; |
| 369 } else { | |
|
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10960
diff
changeset
|
370 DECLARE_ALIGNED_8(int16_t, bS)[2][4][4]; |
| 10854 | 371 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS; |
| 372 int edges; | |
| 373 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) { | |
| 374 edges = 4; | |
| 375 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL; | |
| 376 } else { | |
| 11057 | 377 int mask_edge1 = (3*(((5*mb_type)>>5)&1)) | (mb_type>>4); //(mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : (mb_type & MB_TYPE_16x8) ? 1 : 0; |
|
11058
af4b724b99d5
Replace h->left_type[0] by the local variable for it we have.
michael
parents:
11057
diff
changeset
|
378 int mask_edge0 = 3*((mask_edge1>>1) & ((5*left_type)>>5)&1); // (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) && (h->left_type[0] & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : 0; |
| 11056 | 379 int step = 1+(mb_type>>24); //IS_8x8DCT(mb_type) ? 2 : 1; |
| 380 edges = 4 - 3*((mb_type>>3) & !(h->cbp & 15)); //(mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4; | |
| 10854 | 381 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache, |
|
10984
541acd292c48
Remove all uses of slice_type* from the loop filter, also remove its
michael
parents:
10979
diff
changeset
|
382 h->list_count==2, edges, step, mask_edge0, mask_edge1, FIELD_PICTURE); |
| 10854 | 383 } |
|
11058
af4b724b99d5
Replace h->left_type[0] by the local variable for it we have.
michael
parents:
11057
diff
changeset
|
384 if( IS_INTRA(left_type) ) |
| 10854 | 385 bSv[0][0] = 0x0004000400040004ULL; |
|
11013
5e5d44c920b6
Simplify loop filter a little by using top/left_type.
michael
parents:
10984
diff
changeset
|
386 if( IS_INTRA(h->top_type) ) |
| 10854 | 387 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL; |
| 388 | |
| 389 #define FILTER(hv,dir,edge)\ | |
| 390 if(bSv[dir][edge]) {\ | |
| 10960 | 391 filter_mb_edge##hv( &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir, h );\ |
| 10854 | 392 if(!(edge&1)) {\ |
| 10960 | 393 filter_mb_edgec##hv( &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\ |
| 394 filter_mb_edgec##hv( &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\ | |
| 10854 | 395 }\ |
| 396 } | |
|
11045
5f712d734793
factorize first filter call out, this makes the code somewhat
michael
parents:
11042
diff
changeset
|
397 if(left_type) |
|
5f712d734793
factorize first filter call out, this makes the code somewhat
michael
parents:
11042
diff
changeset
|
398 FILTER(v,0,0); |
| 10854 | 399 if( edges == 1 ) { |
| 400 FILTER(h,1,0); | |
| 401 } else if( IS_8x8DCT(mb_type) ) { | |
| 402 FILTER(v,0,2); | |
| 403 FILTER(h,1,0); | |
| 404 FILTER(h,1,2); | |
| 405 } else { | |
| 406 FILTER(v,0,1); | |
| 407 FILTER(v,0,2); | |
| 408 FILTER(v,0,3); | |
| 409 FILTER(h,1,0); | |
| 410 FILTER(h,1,1); | |
| 411 FILTER(h,1,2); | |
| 412 FILTER(h,1,3); | |
| 413 } | |
| 414 #undef FILTER | |
| 415 } | |
| 416 } | |
| 417 | |
| 11037 | 418 static int check_mv(H264Context *h, long b_idx, long bn_idx, int mvy_limit){ |
| 11038 | 419 int v; |
| 11037 | 420 |
|
11061
40b84645b089
Split first reference list comparission from mv comparission.
michael
parents:
11058
diff
changeset
|
421 v= h->ref_cache[0][b_idx] != h->ref_cache[0][bn_idx]; |
|
40b84645b089
Split first reference list comparission from mv comparission.
michael
parents:
11058
diff
changeset
|
422 if(!v && h->ref_cache[0][b_idx]!=-1) |
|
40b84645b089
Split first reference list comparission from mv comparission.
michael
parents:
11058
diff
changeset
|
423 v= h->mv_cache[0][b_idx][0] - h->mv_cache[0][bn_idx][0] + 3 >= 7U | |
|
40b84645b089
Split first reference list comparission from mv comparission.
michael
parents:
11058
diff
changeset
|
424 FFABS( h->mv_cache[0][b_idx][1] - h->mv_cache[0][bn_idx][1] ) >= mvy_limit; |
| 11039 | 425 |
| 426 if(h->list_count==2){ | |
| 427 if(!v) | |
| 11041 | 428 v = h->ref_cache[1][b_idx] != h->ref_cache[1][bn_idx] | |
| 429 h->mv_cache[1][b_idx][0] - h->mv_cache[1][bn_idx][0] + 3 >= 7U | | |
| 430 FFABS( h->mv_cache[1][b_idx][1] - h->mv_cache[1][bn_idx][1] ) >= mvy_limit; | |
| 11037 | 431 |
| 11041 | 432 if(v){ |
| 433 if(h->ref_cache[0][b_idx] != h->ref_cache[1][bn_idx] | | |
| 434 h->ref_cache[1][b_idx] != h->ref_cache[0][bn_idx]) | |
| 435 return 1; | |
| 436 return | |
| 437 h->mv_cache[0][b_idx][0] - h->mv_cache[1][bn_idx][0] + 3 >= 7U | | |
| 438 FFABS( h->mv_cache[0][b_idx][1] - h->mv_cache[1][bn_idx][1] ) >= mvy_limit | | |
| 439 h->mv_cache[1][b_idx][0] - h->mv_cache[0][bn_idx][0] + 3 >= 7U | | |
| 440 FFABS( h->mv_cache[1][b_idx][1] - h->mv_cache[0][bn_idx][1] ) >= mvy_limit; | |
| 441 } | |
| 11039 | 442 } |
| 11037 | 443 |
| 444 return v; | |
| 445 } | |
| 10854 | 446 |
| 447 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) { | |
| 448 MpegEncContext * const s = &h->s; | |
| 449 int edge; | |
| 450 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy; | |
|
11013
5e5d44c920b6
Simplify loop filter a little by using top/left_type.
michael
parents:
10984
diff
changeset
|
451 const int mbm_type = dir == 0 ? h->left_type[0] : h->top_type; |
| 10854 | 452 |
| 453 // how often to recheck mv-based bS when iterating between edges | |
|
10958
304db572a69a
Make calculation of mask_edge free of branches, faster of course but probably
michael
parents:
10950
diff
changeset
|
454 static const uint8_t mask_edge_tab[2][8]={{0,3,3,3,1,1,1,1}, |
|
304db572a69a
Make calculation of mask_edge free of branches, faster of course but probably
michael
parents:
10950
diff
changeset
|
455 {0,3,1,1,3,3,3,3}}; |
|
304db572a69a
Make calculation of mask_edge free of branches, faster of course but probably
michael
parents:
10950
diff
changeset
|
456 const int mask_edge = mask_edge_tab[dir][(mb_type>>3)&7]; |
|
10973
214adf5e303b
Set edges based on cbp and mv partitioning, not just skiped MBs.
michael
parents:
10970
diff
changeset
|
457 const int edges = mask_edge== 3 && !(h->cbp&15) ? 1 : 4; |
|
214adf5e303b
Set edges based on cbp and mv partitioning, not just skiped MBs.
michael
parents:
10970
diff
changeset
|
458 |
| 10854 | 459 // how often to recheck mv-based bS when iterating along each edge |
| 460 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)); | |
| 11033 | 461 |
| 11036 | 462 if(mbm_type && !first_vertical_edge_done){ |
| 10854 | 463 |
| 11033 | 464 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) |
|
11035
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
465 && IS_INTERLACED(mbm_type&~mb_type) |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
466 ) { |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
467 // This is a special case in the norm where the filtering must |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
468 // be done twice (one each of the field) even if we are in a |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
469 // frame macroblock. |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
470 // |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
471 unsigned int tmp_linesize = 2 * linesize; |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
472 unsigned int tmp_uvlinesize = 2 * uvlinesize; |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
473 int mbn_xy = mb_xy - 2 * s->mb_stride; |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
474 int j; |
| 10854 | 475 |
|
11035
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
476 for(j=0; j<2; j++, mbn_xy += s->mb_stride){ |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
477 DECLARE_ALIGNED_8(int16_t, bS)[4]; |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
478 int qp; |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
479 if( IS_INTRA(mb_type|s->current_picture.mb_type[mbn_xy]) ) { |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
480 *(uint64_t*)bS= 0x0003000300030003ULL; |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
481 } else { |
| 11087 | 482 if(!CABAC && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])){ |
| 483 bS[0]= 1+((h->cbp_table[mbn_xy] & 4)||h->non_zero_count_cache[scan8[0]+0]); | |
| 484 bS[1]= 1+((h->cbp_table[mbn_xy] & 4)||h->non_zero_count_cache[scan8[0]+1]); | |
| 485 bS[2]= 1+((h->cbp_table[mbn_xy] & 8)||h->non_zero_count_cache[scan8[0]+2]); | |
| 486 bS[3]= 1+((h->cbp_table[mbn_xy] & 8)||h->non_zero_count_cache[scan8[0]+3]); | |
| 487 }else{ | |
| 488 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy] + 4+3*8; | |
|
11035
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
489 int i; |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
490 for( i = 0; i < 4; i++ ) { |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
491 bS[i] = 1 + !!(h->non_zero_count_cache[scan8[0]+i] | mbn_nnz[i]); |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
492 } |
| 11087 | 493 } |
|
11035
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
494 } |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
495 // Do not use s->qscale as luma quantizer because it has not the same |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
496 // value in IPCM macroblocks. |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
497 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1; |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
498 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize); |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
499 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
500 filter_mb_edgeh( &img_y[j*linesize], tmp_linesize, bS, qp, h ); |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
501 filter_mb_edgech( &img_cb[j*uvlinesize], tmp_uvlinesize, bS, |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
502 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1, h); |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
503 filter_mb_edgech( &img_cr[j*uvlinesize], tmp_uvlinesize, bS, |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
504 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1, h); |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
505 } |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
506 }else{ |
|
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10960
diff
changeset
|
507 DECLARE_ALIGNED_8(int16_t, bS)[4]; |
|
10949
4c9b8e3065ee
Simplify/Optimize another of the mbaff loop filter cases.
michael
parents:
10948
diff
changeset
|
508 int qp; |
|
11035
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
509 |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
510 if( IS_INTRA(mb_type|mbm_type)) { |
| 10947 | 511 *(uint64_t*)bS= 0x0003000300030003ULL; |
|
11035
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
512 if ( (!IS_INTERLACED(mb_type|mbm_type)) |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
513 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0)) |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
514 ) |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
515 *(uint64_t*)bS= 0x0004000400040004ULL; |
| 10854 | 516 } else { |
|
11035
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
517 int i, l; |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
518 int mv_done; |
|
11032
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
519 |
|
11064
86ec27e828b5
Get rid of a check in one direction that cant be true in it in that part
michael
parents:
11061
diff
changeset
|
520 if( dir && FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbm_type)) { |
|
11035
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
521 *(uint64_t*)bS= 0x0001000100010001ULL; |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
522 mv_done = 1; |
|
11032
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
523 } |
|
11035
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
524 else if( mask_par0 && ((mbm_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) { |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
525 int b_idx= 8 + 4; |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
526 int bn_idx= b_idx - (dir ? 8:1); |
|
11032
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
527 |
| 11037 | 528 bS[0] = bS[1] = bS[2] = bS[3] = check_mv(h, 8 + 4, bn_idx, mvy_limit); |
|
11035
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
529 mv_done = 1; |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
530 } |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
531 else |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
532 mv_done = 0; |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
533 |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
534 for( i = 0; i < 4; i++ ) { |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
535 int x = dir == 0 ? 0 : i; |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
536 int y = dir == 0 ? i : 0; |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
537 int b_idx= 8 + 4 + x + 8*y; |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
538 int bn_idx= b_idx - (dir ? 8:1); |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
539 |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
540 if( h->non_zero_count_cache[b_idx] | |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
541 h->non_zero_count_cache[bn_idx] ) { |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
542 bS[i] = 2; |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
543 } |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
544 else if(!mv_done) |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
545 { |
| 11037 | 546 bS[i] = check_mv(h, b_idx, bn_idx, mvy_limit); |
|
11032
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
547 } |
|
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
548 } |
|
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
549 } |
|
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
550 |
|
11035
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
551 /* Filter edge */ |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
552 // Do not use s->qscale as luma quantizer because it has not the same |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
553 // value in IPCM macroblocks. |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
554 if(bS[0]+bS[1]+bS[2]+bS[3]){ |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
555 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbm_xy] + 1 ) >> 1; |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
556 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp[0], s->current_picture.qscale_table[mbn_xy]); |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
557 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize); |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
558 //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
559 if( dir == 0 ) { |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
560 filter_mb_edgev( &img_y[0], linesize, bS, qp, h ); |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
561 { |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
562 int qp= ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1; |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
563 filter_mb_edgecv( &img_cb[0], uvlinesize, bS, qp, h); |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
564 if(h->pps.chroma_qp_diff) |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
565 qp= ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1; |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
566 filter_mb_edgecv( &img_cr[0], uvlinesize, bS, qp, h); |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
567 } |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
568 } else { |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
569 filter_mb_edgeh( &img_y[0], linesize, bS, qp, h ); |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
570 { |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
571 int qp= ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1; |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
572 filter_mb_edgech( &img_cb[0], uvlinesize, bS, qp, h); |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
573 if(h->pps.chroma_qp_diff) |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
574 qp= ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1; |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
575 filter_mb_edgech( &img_cr[0], uvlinesize, bS, qp, h); |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
576 } |
|
4debec8a15fa
Cosmetics, mostly indention, 2 or so new fixme comments that i was to lazy
michael
parents:
11034
diff
changeset
|
577 } |
|
11032
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
578 } |
|
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
579 } |
| 11033 | 580 } |
| 581 | |
|
11032
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
582 /* Calculate bS */ |
|
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
583 for( edge = 1; edge < edges; edge++ ) { |
|
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10960
diff
changeset
|
584 DECLARE_ALIGNED_8(int16_t, bS)[4]; |
| 10854 | 585 int qp; |
| 586 | |
|
10969
2e8fbfc278d5
Optmize 8x8dct check used to skip some borders in the loop filter.
michael
parents:
10961
diff
changeset
|
587 if( IS_8x8DCT(mb_type & (edge<<24)) ) // (edge&1) && IS_8x8DCT(mb_type) |
| 10854 | 588 continue; |
| 589 | |
|
11032
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
590 if( IS_INTRA(mb_type)) { |
|
10945
0d93bbc17950
Simplify and optimize intra code in h264_loopfilter.c
michael
parents:
10942
diff
changeset
|
591 *(uint64_t*)bS= 0x0003000300030003ULL; |
| 10854 | 592 } else { |
| 593 int i, l; | |
| 594 int mv_done; | |
| 595 | |
| 596 if( edge & mask_edge ) { | |
| 10947 | 597 *(uint64_t*)bS= 0; |
| 10854 | 598 mv_done = 1; |
| 599 } | |
|
11032
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
600 else if( mask_par0 ) { |
| 10854 | 601 int b_idx= 8 + 4 + edge * (dir ? 8:1); |
| 602 int bn_idx= b_idx - (dir ? 8:1); | |
| 603 | |
| 11037 | 604 bS[0] = bS[1] = bS[2] = bS[3] = check_mv(h, b_idx, bn_idx, mvy_limit); |
| 10854 | 605 mv_done = 1; |
| 606 } | |
| 607 else | |
| 608 mv_done = 0; | |
| 609 | |
| 610 for( i = 0; i < 4; i++ ) { | |
| 611 int x = dir == 0 ? edge : i; | |
| 612 int y = dir == 0 ? i : edge; | |
| 613 int b_idx= 8 + 4 + x + 8*y; | |
| 614 int bn_idx= b_idx - (dir ? 8:1); | |
| 615 | |
| 616 if( h->non_zero_count_cache[b_idx] | | |
| 617 h->non_zero_count_cache[bn_idx] ) { | |
| 618 bS[i] = 2; | |
| 619 } | |
| 620 else if(!mv_done) | |
| 621 { | |
| 11037 | 622 bS[i] = check_mv(h, b_idx, bn_idx, mvy_limit); |
| 10854 | 623 } |
| 624 } | |
| 625 | |
| 626 if(bS[0]+bS[1]+bS[2]+bS[3] == 0) | |
| 627 continue; | |
| 628 } | |
| 629 | |
| 630 /* Filter edge */ | |
| 631 // Do not use s->qscale as luma quantizer because it has not the same | |
| 632 // value in IPCM macroblocks. | |
|
11032
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
633 qp = s->current_picture.qscale_table[mb_xy]; |
| 10906 | 634 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp[0], s->current_picture.qscale_table[mbn_xy]); |
| 10854 | 635 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize); |
| 10904 | 636 //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } |
| 10854 | 637 if( dir == 0 ) { |
| 10960 | 638 filter_mb_edgev( &img_y[4*edge], linesize, bS, qp, h ); |
| 10854 | 639 if( (edge&1) == 0 ) { |
|
11032
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
640 filter_mb_edgecv( &img_cb[2*edge], uvlinesize, bS, h->chroma_qp[0], h); |
|
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
641 filter_mb_edgecv( &img_cr[2*edge], uvlinesize, bS, h->chroma_qp[1], h); |
| 10854 | 642 } |
| 643 } else { | |
| 10960 | 644 filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h ); |
| 10854 | 645 if( (edge&1) == 0 ) { |
|
11032
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
646 filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h); |
|
01bd040f8607
Unroll main loop so the edge==0 case is seperate.
michael
parents:
11026
diff
changeset
|
647 filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h); |
| 10854 | 648 } |
| 649 } | |
| 650 } | |
| 651 } | |
| 652 | |
| 653 void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { | |
| 654 MpegEncContext * const s = &h->s; | |
| 655 const int mb_xy= mb_x + mb_y*s->mb_stride; | |
| 656 const int mb_type = s->current_picture.mb_type[mb_xy]; | |
| 657 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4; | |
| 658 int first_vertical_edge_done = 0; | |
| 659 av_unused int dir; | |
| 10906 | 660 int list; |
| 10854 | 661 |
| 662 if (FRAME_MBAFF | |
| 663 // and current and left pair do not have the same interlaced type | |
|
11013
5e5d44c920b6
Simplify loop filter a little by using top/left_type.
michael
parents:
10984
diff
changeset
|
664 && IS_INTERLACED(mb_type^h->left_type[0]) |
| 11026 | 665 // and left mb is in available to us |
|
11013
5e5d44c920b6
Simplify loop filter a little by using top/left_type.
michael
parents:
10984
diff
changeset
|
666 && h->left_type[0]) { |
| 10854 | 667 /* First vertical edge is different in MBAFF frames |
| 668 * There are 8 different bS to compute and 2 different Qp | |
| 669 */ | |
|
10961
34a65026fa06
Move array specifiers outside DECLARE_ALIGNED() invocations
mru
parents:
10960
diff
changeset
|
670 DECLARE_ALIGNED_8(int16_t, bS)[8]; |
| 10854 | 671 int qp[2]; |
| 672 int bqp[2]; | |
| 673 int rqp[2]; | |
| 674 int mb_qp, mbn0_qp, mbn1_qp; | |
| 675 int i; | |
| 676 first_vertical_edge_done = 1; | |
| 677 | |
| 678 if( IS_INTRA(mb_type) ) | |
| 10947 | 679 *(uint64_t*)&bS[0]= |
| 680 *(uint64_t*)&bS[4]= 0x0004000400040004ULL; | |
| 10854 | 681 else { |
|
11025
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
682 static const uint8_t offset[2][2][8]={ |
|
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
683 { |
|
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
684 {7+8*0, 7+8*0, 7+8*0, 7+8*0, 7+8*1, 7+8*1, 7+8*1, 7+8*1}, |
|
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
685 {7+8*2, 7+8*2, 7+8*2, 7+8*2, 7+8*3, 7+8*3, 7+8*3, 7+8*3}, |
|
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
686 },{ |
|
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
687 {7+8*0, 7+8*1, 7+8*2, 7+8*3, 7+8*0, 7+8*1, 7+8*2, 7+8*3}, |
|
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
688 {7+8*0, 7+8*1, 7+8*2, 7+8*3, 7+8*0, 7+8*1, 7+8*2, 7+8*3}, |
|
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
689 } |
|
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
690 }; |
|
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
691 const uint8_t *off= offset[MB_FIELD][mb_y&1]; |
| 10854 | 692 for( i = 0; i < 8; i++ ) { |
|
11021
2bc05f2fc993
Optimize loop filtering of the left edge in MBAFF.
michael
parents:
11016
diff
changeset
|
693 int j= MB_FIELD ? i>>2 : i&1; |
|
2bc05f2fc993
Optimize loop filtering of the left edge in MBAFF.
michael
parents:
11016
diff
changeset
|
694 int mbn_xy = h->left_mb_xy[j]; |
|
2bc05f2fc993
Optimize loop filtering of the left edge in MBAFF.
michael
parents:
11016
diff
changeset
|
695 int mbn_type= h->left_type[j]; |
| 10854 | 696 |
|
11021
2bc05f2fc993
Optimize loop filtering of the left edge in MBAFF.
michael
parents:
11016
diff
changeset
|
697 if( IS_INTRA( mbn_type ) ) |
| 10854 | 698 bS[i] = 4; |
|
11021
2bc05f2fc993
Optimize loop filtering of the left edge in MBAFF.
michael
parents:
11016
diff
changeset
|
699 else{ |
|
2bc05f2fc993
Optimize loop filtering of the left edge in MBAFF.
michael
parents:
11016
diff
changeset
|
700 bS[i] = 1 + !!(h->non_zero_count_cache[12+8*(i>>1)] | |
|
2bc05f2fc993
Optimize loop filtering of the left edge in MBAFF.
michael
parents:
11016
diff
changeset
|
701 ((!h->pps.cabac && IS_8x8DCT(mbn_type)) ? |
| 10854 | 702 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2)) |
| 703 : | |
|
11025
cd1f5f6a2e45
Use table to speedup access to non_zero_count in MBAFF with differing interlacing.
michael
parents:
11021
diff
changeset
|
704 h->non_zero_count[mbn_xy][ off[i] ])); |
|
11021
2bc05f2fc993
Optimize loop filtering of the left edge in MBAFF.
michael
parents:
11016
diff
changeset
|
705 } |
| 10854 | 706 } |
| 707 } | |
| 708 | |
| 709 mb_qp = s->current_picture.qscale_table[mb_xy]; | |
|
11015
d844c58b985a
Use left_mb_xy from fill_caches instead of recalculating it.
michael
parents:
11013
diff
changeset
|
710 mbn0_qp = s->current_picture.qscale_table[h->left_mb_xy[0]]; |
|
d844c58b985a
Use left_mb_xy from fill_caches instead of recalculating it.
michael
parents:
11013
diff
changeset
|
711 mbn1_qp = s->current_picture.qscale_table[h->left_mb_xy[1]]; |
| 10854 | 712 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1; |
| 713 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) + | |
| 714 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1; | |
| 715 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) + | |
| 716 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1; | |
| 717 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1; | |
| 718 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) + | |
| 719 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1; | |
| 720 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) + | |
| 721 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1; | |
| 722 | |
| 723 /* Filter edge */ | |
| 724 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize); | |
| 725 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } | |
|
10924
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
726 if(MB_FIELD){ |
|
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
727 filter_mb_mbaff_edgev ( h, img_y , linesize, bS , 1, qp [0] ); |
|
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
728 filter_mb_mbaff_edgev ( h, img_y + 8* linesize, linesize, bS+4, 1, qp [1] ); |
|
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
729 filter_mb_mbaff_edgecv( h, img_cb, uvlinesize, bS , 1, bqp[0] ); |
|
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
730 filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1] ); |
|
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
731 filter_mb_mbaff_edgecv( h, img_cr, uvlinesize, bS , 1, rqp[0] ); |
|
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
732 filter_mb_mbaff_edgecv( h, img_cr + 4*uvlinesize, uvlinesize, bS+4, 1, rqp[1] ); |
|
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
733 }else{ |
|
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
734 filter_mb_mbaff_edgev ( h, img_y , 2* linesize, bS , 2, qp [0] ); |
|
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
735 filter_mb_mbaff_edgev ( h, img_y + linesize, 2* linesize, bS+1, 2, qp [1] ); |
|
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
736 filter_mb_mbaff_edgecv( h, img_cb, 2*uvlinesize, bS , 2, bqp[0] ); |
|
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
737 filter_mb_mbaff_edgecv( h, img_cb + uvlinesize, 2*uvlinesize, bS+1, 2, bqp[1] ); |
|
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
738 filter_mb_mbaff_edgecv( h, img_cr, 2*uvlinesize, bS , 2, rqp[0] ); |
|
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
739 filter_mb_mbaff_edgecv( h, img_cr + uvlinesize, 2*uvlinesize, bS+1, 2, rqp[1] ); |
|
fb0307a3355e
Rather call filter_mb_mbaff_edge*v() more often than do extra calculations
michael
parents:
10922
diff
changeset
|
740 } |
| 10854 | 741 } |
| 742 | |
| 743 #if CONFIG_SMALL | |
| 744 for( dir = 0; dir < 2; dir++ ) | |
| 745 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir); | |
| 746 #else | |
| 747 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0); | |
| 748 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1); | |
| 749 #endif | |
| 750 } |
