Mercurial > libavcodec.hg
annotate h264_mvpred.h @ 12510:ef2f2db5b7be libavcodec
Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
code directly also and remove loop setup. 20% faster in function, 0.8% overall.
See "[PATCH] unroll loop in h264_idct_add8_sse2()" thread on ML.
| author | rbultje |
|---|---|
| date | Fri, 24 Sep 2010 14:05:45 +0000 |
| parents | 7dd2a45249a9 |
| children |
| rev | line source |
|---|---|
| 1168 | 1 /* |
| 10864 | 2 * H.26L/H.264/AVC/JVT/14496-10/... motion vector predicion |
| 1168 | 3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> |
| 4 * | |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3927
diff
changeset
|
5 * This file is part of FFmpeg. |
|
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3927
diff
changeset
|
6 * |
|
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3927
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
| 1168 | 8 * modify it under the terms of the GNU Lesser General Public |
| 9 * License as published by the Free Software Foundation; either | |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3927
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
| 1168 | 11 * |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3927
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
| 1168 | 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 15 * Lesser General Public License for more details. | |
| 16 * | |
| 17 * You should have received a copy of the GNU Lesser General Public | |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3927
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
|
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
3029
diff
changeset
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 1168 | 20 */ |
| 2967 | 21 |
| 1168 | 22 /** |
|
11644
7dd2a45249a9
Remove explicit filename from Doxygen @file commands.
diego
parents:
11293
diff
changeset
|
23 * @file |
| 10864 | 24 * H.264 / AVC / MPEG4 part10 motion vector predicion. |
| 1168 | 25 * @author Michael Niedermayer <michaelni@gmx.at> |
| 26 */ | |
| 27 | |
|
10882
5d34ab807e91
Add forgotton multiple inclusion guards to h264_mvpred.h.
michael
parents:
10864
diff
changeset
|
28 #ifndef AVCODEC_H264_MVPRED_H |
|
5d34ab807e91
Add forgotton multiple inclusion guards to h264_mvpred.h.
michael
parents:
10864
diff
changeset
|
29 #define AVCODEC_H264_MVPRED_H |
|
5d34ab807e91
Add forgotton multiple inclusion guards to h264_mvpred.h.
michael
parents:
10864
diff
changeset
|
30 |
|
9012
15a3df8c01fd
More approved hunks for VAAPI & our new and cleaner hwaccel API.
michael
parents:
9004
diff
changeset
|
31 #include "internal.h" |
| 1168 | 32 #include "avcodec.h" |
| 4975 | 33 #include "h264.h" |
|
1908
e20fd60b215c
h264 - progressive I frame CABAC support patch by (Laurent Aimar <fenrir at via dot ecp dot fr>)
michael
parents:
1899
diff
changeset
|
34 |
|
3284
a224d9752912
don't force asserts in release builds. 2% faster h264.
lorenm
parents:
3219
diff
changeset
|
35 //#undef NDEBUG |
| 1168 | 36 #include <assert.h> |
| 37 | |
| 10864 | 38 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){ |
| 39 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ]; | |
| 40 MpegEncContext *s = &h->s; | |
| 7481 | 41 |
| 10864 | 42 /* there is no consistent mapping of mvs to neighboring locations that will |
| 43 * make mbaff happy, so we can't move all this logic to fill_caches */ | |
| 44 if(FRAME_MBAFF){ | |
| 8443 | 45 |
|
11292
411ab09ada91
Get rid of mb2b8_xy and b8_stride, change arrays organized based on b8_stride to
michael
parents:
11203
diff
changeset
|
46 #define SET_DIAG_MV(MV_OP, REF_OP, XY, Y4)\ |
|
411ab09ada91
Get rid of mb2b8_xy and b8_stride, change arrays organized based on b8_stride to
michael
parents:
11203
diff
changeset
|
47 const int xy = XY, y4 = Y4;\ |
|
411ab09ada91
Get rid of mb2b8_xy and b8_stride, change arrays organized based on b8_stride to
michael
parents:
11203
diff
changeset
|
48 const int mb_type = mb_types[xy+(y4>>2)*s->mb_stride];\ |
| 10864 | 49 if(!USES_LIST(mb_type,list))\ |
| 50 return LIST_NOT_USED;\ | |
|
11292
411ab09ada91
Get rid of mb2b8_xy and b8_stride, change arrays organized based on b8_stride to
michael
parents:
11203
diff
changeset
|
51 mv = s->current_picture_ptr->motion_val[list][h->mb2b_xy[xy]+3 + y4*h->b_stride];\ |
| 10864 | 52 h->mv_cache[list][scan8[0]-2][0] = mv[0];\ |
| 53 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\ | |
|
11292
411ab09ada91
Get rid of mb2b8_xy and b8_stride, change arrays organized based on b8_stride to
michael
parents:
11203
diff
changeset
|
54 return s->current_picture_ptr->ref_index[list][4*xy+1 + (y4&~1)] REF_OP; |
| 2967 | 55 |
| 10864 | 56 if(topright_ref == PART_NOT_AVAILABLE |
|
10933
3a7fa91fa168
Remove 2 checks from fetch_diagonal_mv() that apparently serve no purpose.
michael
parents:
10932
diff
changeset
|
57 && i >= scan8[0]+8 && (i&7)==4 |
| 10864 | 58 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){ |
|
10932
5f0ef177bbac
Move a few things into a deeper if() in fetch_diagonal_mv().
michael
parents:
10931
diff
changeset
|
59 const uint32_t *mb_types = s->current_picture_ptr->mb_type; |
|
5f0ef177bbac
Move a few things into a deeper if() in fetch_diagonal_mv().
michael
parents:
10931
diff
changeset
|
60 const int16_t *mv; |
| 11203 | 61 AV_ZERO32(h->mv_cache[list][scan8[0]-2]); |
|
10932
5f0ef177bbac
Move a few things into a deeper if() in fetch_diagonal_mv().
michael
parents:
10931
diff
changeset
|
62 *C = h->mv_cache[list][scan8[0]-2]; |
|
5f0ef177bbac
Move a few things into a deeper if() in fetch_diagonal_mv().
michael
parents:
10931
diff
changeset
|
63 |
| 10864 | 64 if(!MB_FIELD |
| 11293 | 65 && IS_INTERLACED(h->left_type[0])){ |
| 66 SET_DIAG_MV(*2, >>1, h->left_mb_xy[0]+s->mb_stride, (s->mb_y&1)*2+(i>>5)); | |
|
11292
411ab09ada91
Get rid of mb2b8_xy and b8_stride, change arrays organized based on b8_stride to
michael
parents:
11203
diff
changeset
|
67 assert(h->left_mb_xy[0] == h->left_mb_xy[1]); |
| 10864 | 68 } |
| 69 if(MB_FIELD | |
| 11293 | 70 && !IS_INTERLACED(h->left_type[0])){ |
| 10864 | 71 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK. |
| 11293 | 72 SET_DIAG_MV(/2, <<1, h->left_mb_xy[i>=36], ((i>>2))&3); |
|
2551
615995277bc5
MBAFF I slice no deblocking patch by (Loic >>lll+ffmpeg m4x org<<)
michael
parents:
2548
diff
changeset
|
73 } |
|
615995277bc5
MBAFF I slice no deblocking patch by (Loic >>lll+ffmpeg m4x org<<)
michael
parents:
2548
diff
changeset
|
74 } |
| 10864 | 75 #undef SET_DIAG_MV |
| 1168 | 76 } |
| 77 | |
| 10864 | 78 if(topright_ref != PART_NOT_AVAILABLE){ |
| 79 *C= h->mv_cache[list][ i - 8 + part_width ]; | |
| 80 return topright_ref; | |
| 81 }else{ | |
| 82 tprintf(s->avctx, "topright MV not available\n"); | |
| 83 | |
| 84 *C= h->mv_cache[list][ i - 8 - 1 ]; | |
| 85 return h->ref_cache[list][ i - 8 - 1 ]; | |
| 86 } | |
| 87 } | |
| 88 | |
| 89 /** | |
| 90 * gets the predicted MV. | |
| 91 * @param n the block index | |
| 92 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4) | |
| 93 * @param mx the x component of the predicted motion vector | |
| 94 * @param my the y component of the predicted motion vector | |
| 95 */ | |
| 96 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){ | |
| 97 const int index8= scan8[n]; | |
| 98 const int top_ref= h->ref_cache[list][ index8 - 8 ]; | |
| 99 const int left_ref= h->ref_cache[list][ index8 - 1 ]; | |
| 100 const int16_t * const A= h->mv_cache[list][ index8 - 1 ]; | |
| 101 const int16_t * const B= h->mv_cache[list][ index8 - 8 ]; | |
| 102 const int16_t * C; | |
| 103 int diagonal_ref, match_count; | |
| 3316 | 104 |
| 10864 | 105 assert(part_width==1 || part_width==2 || part_width==4); |
| 106 | |
| 107 /* mv_cache | |
| 108 B . . A T T T T | |
| 109 U . . L . . , . | |
| 110 U . . L . . . . | |
| 111 U . . L . . , . | |
| 112 . . . L . . . . | |
| 113 */ | |
| 114 | |
| 115 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width); | |
| 116 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref); | |
| 117 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count); | |
| 118 if(match_count > 1){ //most common | |
| 119 *mx= mid_pred(A[0], B[0], C[0]); | |
| 120 *my= mid_pred(A[1], B[1], C[1]); | |
| 121 }else if(match_count==1){ | |
| 122 if(left_ref==ref){ | |
| 123 *mx= A[0]; | |
| 124 *my= A[1]; | |
| 125 }else if(top_ref==ref){ | |
| 126 *mx= B[0]; | |
| 127 *my= B[1]; | |
| 128 }else{ | |
| 129 *mx= C[0]; | |
| 130 *my= C[1]; | |
| 3316 | 131 } |
| 2449 | 132 }else{ |
| 10864 | 133 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){ |
| 134 *mx= A[0]; | |
| 135 *my= A[1]; | |
| 7532 | 136 }else{ |
| 10864 | 137 *mx= mid_pred(A[0], B[0], C[0]); |
| 138 *my= mid_pred(A[1], B[1], C[1]); | |
|
2551
615995277bc5
MBAFF I slice no deblocking patch by (Loic >>lll+ffmpeg m4x org<<)
michael
parents:
2548
diff
changeset
|
139 } |
|
615995277bc5
MBAFF I slice no deblocking patch by (Loic >>lll+ffmpeg m4x org<<)
michael
parents:
2548
diff
changeset
|
140 } |
|
615995277bc5
MBAFF I slice no deblocking patch by (Loic >>lll+ffmpeg m4x org<<)
michael
parents:
2548
diff
changeset
|
141 |
| 10864 | 142 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list); |
| 1168 | 143 } |
| 144 | |
| 145 /** | |
| 146 * gets the directionally predicted 16x8 MV. | |
| 147 * @param n the block index | |
| 148 * @param mx the x component of the predicted motion vector | |
| 149 * @param my the y component of the predicted motion vector | |
| 150 */ | |
| 151 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){ | |
| 152 if(n==0){ | |
| 153 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ]; | |
| 154 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ]; | |
| 155 | |
| 4600 | 156 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list); |
| 2967 | 157 |
| 1168 | 158 if(top_ref == ref){ |
| 159 *mx= B[0]; | |
| 160 *my= B[1]; | |
| 161 return; | |
| 162 } | |
| 163 }else{ | |
| 164 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ]; | |
| 165 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ]; | |
| 2967 | 166 |
| 4600 | 167 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); |
| 1168 | 168 |
| 169 if(left_ref == ref){ | |
| 170 *mx= A[0]; | |
| 171 *my= A[1]; | |
| 172 return; | |
| 173 } | |
| 174 } | |
| 175 | |
| 176 //RARE | |
| 177 pred_motion(h, n, 4, list, ref, mx, my); | |
| 178 } | |
| 179 | |
| 180 /** | |
| 181 * gets the directionally predicted 8x16 MV. | |
| 182 * @param n the block index | |
| 183 * @param mx the x component of the predicted motion vector | |
| 184 * @param my the y component of the predicted motion vector | |
| 185 */ | |
| 186 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){ | |
| 187 if(n==0){ | |
| 188 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ]; | |
| 189 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ]; | |
| 2967 | 190 |
| 4600 | 191 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); |
| 1168 | 192 |
| 193 if(left_ref == ref){ | |
| 194 *mx= A[0]; | |
| 195 *my= A[1]; | |
| 196 return; | |
| 197 } | |
| 198 }else{ | |
| 1169 | 199 const int16_t * C; |
| 200 int diagonal_ref; | |
| 201 | |
| 202 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2); | |
| 2967 | 203 |
| 4600 | 204 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list); |
| 1168 | 205 |
| 2967 | 206 if(diagonal_ref == ref){ |
| 1168 | 207 *mx= C[0]; |
| 208 *my= C[1]; | |
| 209 return; | |
| 210 } | |
| 211 } | |
| 212 | |
| 213 //RARE | |
| 214 pred_motion(h, n, 2, list, ref, mx, my); | |
| 215 } | |
| 216 | |
| 217 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){ | |
| 218 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ]; | |
| 219 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ]; | |
| 220 | |
| 4600 | 221 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y); |
| 1168 | 222 |
| 223 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE | |
| 11203 | 224 || !( top_ref | AV_RN32A(h->mv_cache[0][ scan8[0] - 8 ])) |
| 225 || !(left_ref | AV_RN32A(h->mv_cache[0][ scan8[0] - 1 ]))){ | |
| 2967 | 226 |
| 1168 | 227 *mx = *my = 0; |
| 228 return; | |
| 229 } | |
| 2967 | 230 |
| 1168 | 231 pred_motion(h, 0, 4, 0, 0, mx, my); |
| 232 | |
| 233 return; | |
| 234 } | |
|
10882
5d34ab807e91
Add forgotton multiple inclusion guards to h264_mvpred.h.
michael
parents:
10864
diff
changeset
|
235 |
|
5d34ab807e91
Add forgotton multiple inclusion guards to h264_mvpred.h.
michael
parents:
10864
diff
changeset
|
236 #endif /* AVCODEC_H264_MVPRED_H */ |
