Mercurial > libavcodec.hg
annotate snow.c @ 6474:e6995f3fbf7f libavcodec
cosmetics: Normalize {} placement after for, while, if.
| author | diego |
|---|---|
| date | Sat, 08 Mar 2008 20:24:24 +0000 |
| parents | 5154ab444372 |
| children | e9431bea10dc |
| rev | line source |
|---|---|
| 2138 | 1 /* |
| 2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at> | |
| 3 * | |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
4 * This file is part of FFmpeg. |
|
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
5 * |
|
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
6 * FFmpeg is free software; you can redistribute it and/or |
| 2138 | 7 * modify it under the terms of the GNU Lesser General Public |
| 8 * License as published by the Free Software Foundation; either | |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
9 * version 2.1 of the License, or (at your option) any later version. |
| 2138 | 10 * |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
11 * FFmpeg is distributed in the hope that it will be useful, |
| 2138 | 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 14 * Lesser General Public License for more details. | |
| 15 * | |
| 16 * You should have received a copy of the GNU Lesser General Public | |
|
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
17 * License along with FFmpeg; if not, write to the Free Software |
|
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
3035
diff
changeset
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 2138 | 19 */ |
| 20 | |
| 21 #include "avcodec.h" | |
| 22 #include "dsputil.h" | |
|
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
23 #include "snow.h" |
| 2335 | 24 |
| 25 #include "rangecoder.h" | |
| 2138 | 26 |
| 27 #include "mpegvideo.h" | |
| 28 | |
| 29 #undef NDEBUG | |
| 30 #include <assert.h> | |
| 31 | |
| 32 static const int8_t quant3[256]={ | |
| 33 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
| 42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
| 43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
| 44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
| 45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
| 46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
| 47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
| 48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, | |
| 49 }; | |
| 50 static const int8_t quant3b[256]={ | |
| 51 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 59 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
| 60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
| 61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
| 62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
| 63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
| 64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
| 65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
| 66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
| 67 }; | |
| 2596 | 68 static const int8_t quant3bA[256]={ |
| 69 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
| 70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
| 71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
| 72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
| 73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
| 74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
| 75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
| 76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
| 77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
| 78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
| 79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
| 80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
| 81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
| 82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
| 83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
| 84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
| 85 }; | |
| 2138 | 86 static const int8_t quant5[256]={ |
| 87 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
| 88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
| 89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
| 90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
| 91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
| 92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
| 93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
| 94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
| 95 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
| 96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
| 97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
| 98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
| 99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
| 100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
| 101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
| 102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1, | |
| 103 }; | |
| 104 static const int8_t quant7[256]={ | |
| 105 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
| 106 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
| 107 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, | |
| 108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
| 109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
| 110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
| 111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
| 112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
| 113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
| 114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
| 115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
| 116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
| 117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
| 118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2, | |
| 119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
| 120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1, | |
| 121 }; | |
| 122 static const int8_t quant9[256]={ | |
| 123 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
| 124 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
| 125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
| 126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
| 127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
| 128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
| 129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
| 130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
| 131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
| 132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
| 133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
| 134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
| 135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
| 136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
| 137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3, | |
| 138 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1, | |
| 139 }; | |
| 140 static const int8_t quant11[256]={ | |
| 141 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, | |
| 142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
| 143 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
| 144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
| 145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
| 146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
| 147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
| 148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
| 149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
| 150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
| 151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
| 152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
| 153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
| 154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4, | |
| 155 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
| 156 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1, | |
| 157 }; | |
| 158 static const int8_t quant13[256]={ | |
| 159 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, | |
| 160 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
| 161 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
| 162 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
| 163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
| 164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
| 165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
| 166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
| 167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
| 168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
| 169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
| 170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
| 171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5, | |
| 172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
| 173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
| 174 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1, | |
| 175 }; | |
| 176 | |
| 177 #if 0 //64*cubic | |
| 178 static const uint8_t obmc32[1024]={ | |
|
5659
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
180 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
181 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
182 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
183 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
184 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
185 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
186 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
187 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
188 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
189 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
190 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
191 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
192 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
193 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
194 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
196 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
197 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
198 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
199 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
200 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
201 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
202 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
203 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
204 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
205 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
206 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
207 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
208 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
209 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 2138 | 211 //error:0.000022 |
| 212 }; | |
| 213 static const uint8_t obmc16[256]={ | |
|
5659
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
214 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
215 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
216 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
217 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
218 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
219 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
220 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
221 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
223 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
224 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
225 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
226 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
227 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
228 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
229 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, |
| 2138 | 230 //error:0.000033 |
| 231 }; | |
| 232 #elif 1 // 64*linear | |
| 233 static const uint8_t obmc32[1024]={ | |
| 3206 | 234 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, |
| 235 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0, | |
| 236 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0, | |
| 237 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0, | |
| 238 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4, | |
| 239 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4, | |
| 240 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4, | |
| 241 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4, | |
| 242 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4, | |
| 243 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4, | |
| 244 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4, | |
| 245 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4, | |
| 246 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8, | |
| 247 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8, | |
| 248 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8, | |
| 249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8, | |
| 250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8, | |
| 251 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8, | |
| 252 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8, | |
| 253 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8, | |
| 254 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4, | |
| 255 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4, | |
| 256 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4, | |
| 257 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4, | |
| 258 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4, | |
| 259 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4, | |
| 260 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4, | |
| 261 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4, | |
| 262 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0, | |
| 263 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0, | |
| 264 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0, | |
| 265 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, | |
| 2138 | 266 //error:0.000020 |
| 267 }; | |
| 268 static const uint8_t obmc16[256]={ | |
| 3206 | 269 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0, |
| 270 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4, | |
| 271 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4, | |
| 272 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8, | |
| 273 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8, | |
| 274 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12, | |
| 275 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12, | |
| 276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16, | |
| 277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16, | |
| 278 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12, | |
| 279 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12, | |
| 280 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8, | |
| 281 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8, | |
| 282 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4, | |
| 283 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4, | |
| 284 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0, | |
| 2138 | 285 //error:0.000015 |
| 286 }; | |
| 287 #else //64*cos | |
| 288 static const uint8_t obmc32[1024]={ | |
|
5659
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
290 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
291 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
292 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
293 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
294 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
295 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
296 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
297 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
298 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
299 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
300 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
301 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
302 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
303 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
304 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
306 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
307 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
308 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
309 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
310 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
311 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
312 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
313 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
314 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
315 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
316 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
317 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
318 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
319 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 2138 | 321 //error:0.000022 |
| 322 }; | |
| 323 static const uint8_t obmc16[256]={ | |
|
5659
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
324 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
325 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
326 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
327 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
328 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
329 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
330 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
331 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
333 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
334 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
335 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
336 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
337 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
338 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0, |
|
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
339 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, |
| 2138 | 340 //error:0.000022 |
| 341 }; | |
| 5910 | 342 #endif /* 0 */ |
| 2138 | 343 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
344 //linear *64 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
345 static const uint8_t obmc8[64]={ |
| 3206 | 346 4, 12, 20, 28, 28, 20, 12, 4, |
| 347 12, 36, 60, 84, 84, 60, 36, 12, | |
| 348 20, 60,100,140,140,100, 60, 20, | |
| 349 28, 84,140,196,196,140, 84, 28, | |
| 350 28, 84,140,196,196,140, 84, 28, | |
| 351 20, 60,100,140,140,100, 60, 20, | |
| 352 12, 36, 60, 84, 84, 60, 36, 12, | |
| 353 4, 12, 20, 28, 28, 20, 12, 4, | |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
354 //error:0.000000 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
355 }; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
356 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
357 //linear *64 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
358 static const uint8_t obmc4[16]={ |
| 3206 | 359 16, 48, 48, 16, |
| 360 48,144,144, 48, | |
| 361 48,144,144, 48, | |
| 362 16, 48, 48, 16, | |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
363 //error:0.000000 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
364 }; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
365 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
366 static const uint8_t *obmc_tab[4]={ |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
367 obmc32, obmc16, obmc8, obmc4 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
368 }; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
369 |
|
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
370 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES]; |
|
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
371 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
372 typedef struct BlockNode{ |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
373 int16_t mx; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
374 int16_t my; |
| 3314 | 375 uint8_t ref; |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
376 uint8_t color[3]; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
377 uint8_t type; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
378 //#define TYPE_SPLIT 1 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
379 #define BLOCK_INTRA 1 |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
380 #define BLOCK_OPT 2 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
381 //#define TYPE_NOCOLOR 4 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
382 uint8_t level; //FIXME merge into type? |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
383 }BlockNode; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
384 |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
385 static const BlockNode null_block= { //FIXME add border maybe |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
386 .color= {128,128,128}, |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
387 .mx= 0, |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
388 .my= 0, |
| 3314 | 389 .ref= 0, |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
390 .type= 0, |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
391 .level= 0, |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
392 }; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
393 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
394 #define LOG2_MB_SIZE 4 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
395 #define MB_SIZE (1<<LOG2_MB_SIZE) |
| 5575 | 396 #define ENCODER_EXTRA_BITS 4 |
|
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
397 #define HTAPS_MAX 8 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
398 |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
399 typedef struct x_and_coeff{ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
400 int16_t x; |
| 2596 | 401 uint16_t coeff; |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
402 } x_and_coeff; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
403 |
| 2138 | 404 typedef struct SubBand{ |
| 405 int level; | |
| 406 int stride; | |
| 407 int width; | |
| 408 int height; | |
| 6412 | 409 int qlog; ///< log(qscale)/log[2^(1/6)] |
| 2138 | 410 DWTELEM *buf; |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
411 IDWTELEM *ibuf; |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
412 int buf_x_offset; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
413 int buf_y_offset; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
414 int stride_line; ///< Stride measured in lines, not pixels. |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
415 x_and_coeff * x_coeff; |
| 2138 | 416 struct SubBand *parent; |
| 417 uint8_t state[/*7*2*/ 7 + 512][32]; | |
| 418 }SubBand; | |
| 419 | |
| 420 typedef struct Plane{ | |
| 421 int width; | |
| 422 int height; | |
| 423 SubBand band[MAX_DECOMPOSITIONS][4]; | |
|
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
424 |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
425 int htaps; |
|
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
426 int8_t hcoeff[HTAPS_MAX/2]; |
|
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
427 int diag_mc; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
428 int fast_mc; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
429 |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
430 int last_htaps; |
|
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
431 int8_t last_hcoeff[HTAPS_MAX/2]; |
|
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
432 int last_diag_mc; |
| 2138 | 433 }Plane; |
| 434 | |
| 435 typedef struct SnowContext{ | |
| 6412 | 436 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX) |
| 2138 | 437 |
| 438 AVCodecContext *avctx; | |
| 2335 | 439 RangeCoder c; |
| 2138 | 440 DSPContext dsp; |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
441 AVFrame new_picture; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
442 AVFrame input_picture; ///< new_picture with the internal linesizes |
| 2138 | 443 AVFrame current_picture; |
| 3314 | 444 AVFrame last_picture[MAX_REF_FRAMES]; |
|
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
445 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4]; |
| 2138 | 446 AVFrame mconly_picture; |
| 447 // uint8_t q_context[16]; | |
| 448 uint8_t header_state[32]; | |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
449 uint8_t block_state[128 + 32*128]; |
| 2138 | 450 int keyframe; |
| 2199 | 451 int always_reset; |
| 2138 | 452 int version; |
| 453 int spatial_decomposition_type; | |
|
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
454 int last_spatial_decomposition_type; |
| 2138 | 455 int temporal_decomposition_type; |
| 456 int spatial_decomposition_count; | |
|
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
457 int last_spatial_decomposition_count; |
| 2138 | 458 int temporal_decomposition_count; |
| 3314 | 459 int max_ref_frames; |
| 460 int ref_frames; | |
| 461 int16_t (*ref_mvs[MAX_REF_FRAMES])[2]; | |
| 462 uint32_t *ref_scores[MAX_REF_FRAMES]; | |
| 2138 | 463 DWTELEM *spatial_dwt_buffer; |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
464 IDWTELEM *spatial_idwt_buffer; |
| 2138 | 465 int colorspace_type; |
| 466 int chroma_h_shift; | |
| 467 int chroma_v_shift; | |
| 468 int spatial_scalability; | |
| 469 int qlog; | |
|
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
470 int last_qlog; |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
471 int lambda; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
472 int lambda2; |
| 3313 | 473 int pass1_rc; |
| 2138 | 474 int mv_scale; |
|
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
475 int last_mv_scale; |
| 2138 | 476 int qbias; |
|
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
477 int last_qbias; |
| 2138 | 478 #define QBIAS_SHIFT 3 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
479 int b_width; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
480 int b_height; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
481 int block_max_depth; |
|
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
482 int last_block_max_depth; |
| 2138 | 483 Plane plane[MAX_PLANES]; |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
484 BlockNode *block; |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
485 #define ME_CACHE_SIZE 1024 |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
486 int me_cache[ME_CACHE_SIZE]; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
487 int me_cache_generation; |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
488 slice_buffer sb; |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
489 |
| 6412 | 490 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX) |
| 2138 | 491 }SnowContext; |
| 492 | |
| 2562 | 493 typedef struct { |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
494 IDWTELEM *b0; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
495 IDWTELEM *b1; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
496 IDWTELEM *b2; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
497 IDWTELEM *b3; |
| 2562 | 498 int y; |
| 499 } dwt_compose_t; | |
| 500 | |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
501 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num))) |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
502 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num))) |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
503 |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
504 static void iterative_me(SnowContext *s); |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
505 |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
506 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer) |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
507 { |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
508 int i; |
| 2967 | 509 |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
510 buf->base_buffer = base_buffer; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
511 buf->line_count = line_count; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
512 buf->line_width = line_width; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
513 buf->data_count = max_allocated_lines; |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
514 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count); |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
515 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines); |
| 2967 | 516 |
|
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
517 for(i = 0; i < max_allocated_lines; i++){ |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
518 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width); |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
519 } |
| 2967 | 520 |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
521 buf->data_stack_top = max_allocated_lines - 1; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
522 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
523 |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
524 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line) |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
525 { |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
526 int offset; |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
527 IDWTELEM * buffer; |
| 2967 | 528 |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
529 assert(buf->data_stack_top >= 0); |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
530 // assert(!buf->line[line]); |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
531 if (buf->line[line]) |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
532 return buf->line[line]; |
| 2967 | 533 |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
534 offset = buf->line_width * line; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
535 buffer = buf->data_stack[buf->data_stack_top]; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
536 buf->data_stack_top--; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
537 buf->line[line] = buffer; |
| 2967 | 538 |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
539 return buffer; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
540 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
541 |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
542 static void slice_buffer_release(slice_buffer * buf, int line) |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
543 { |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
544 int offset; |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
545 IDWTELEM * buffer; |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
546 |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
547 assert(line >= 0 && line < buf->line_count); |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
548 assert(buf->line[line]); |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
549 |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
550 offset = buf->line_width * line; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
551 buffer = buf->line[line]; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
552 buf->data_stack_top++; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
553 buf->data_stack[buf->data_stack_top] = buffer; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
554 buf->line[line] = NULL; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
555 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
556 |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
557 static void slice_buffer_flush(slice_buffer * buf) |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
558 { |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
559 int i; |
|
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
560 for(i = 0; i < buf->line_count; i++){ |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
561 if (buf->line[i]) |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
562 slice_buffer_release(buf, i); |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
563 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
564 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
565 |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
566 static void slice_buffer_destroy(slice_buffer * buf) |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
567 { |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
568 int i; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
569 slice_buffer_flush(buf); |
| 2967 | 570 |
|
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
571 for(i = buf->data_count - 1; i >= 0; i--){ |
| 3190 | 572 av_freep(&buf->data_stack[i]); |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
573 } |
| 3190 | 574 av_freep(&buf->data_stack); |
| 575 av_freep(&buf->line); | |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
576 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
577 |
| 2979 | 578 #ifdef __sgi |
|
2368
a7ac68734a91
fix for build on IRIX by (Michel Bardiaux {mbardiaux peaktime be})
michael
parents:
2335
diff
changeset
|
579 // Avoid a name clash on SGI IRIX |
| 2979 | 580 #undef qexp |
|
2368
a7ac68734a91
fix for build on IRIX by (Michel Bardiaux {mbardiaux peaktime be})
michael
parents:
2335
diff
changeset
|
581 #endif |
| 2246 | 582 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0 |
| 2600 | 583 static uint8_t qexp[QROOT]; |
| 2138 | 584 |
| 585 static inline int mirror(int v, int m){ | |
| 2998 | 586 while((unsigned)v > (unsigned)m){ |
| 587 v=-v; | |
| 588 if(v<0) v+= 2*m; | |
| 589 } | |
| 590 return v; | |
| 2138 | 591 } |
| 592 | |
| 2335 | 593 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){ |
| 2138 | 594 int i; |
| 595 | |
| 596 if(v){ | |
| 4001 | 597 const int a= FFABS(v); |
| 2138 | 598 const int e= av_log2(a); |
| 599 #if 1 | |
| 2967 | 600 const int el= FFMIN(e, 10); |
| 2335 | 601 put_rac(c, state+0, 0); |
| 2138 | 602 |
| 603 for(i=0; i<el; i++){ | |
| 2335 | 604 put_rac(c, state+1+i, 1); //1..10 |
| 2138 | 605 } |
| 606 for(; i<e; i++){ | |
| 2335 | 607 put_rac(c, state+1+9, 1); //1..10 |
| 2138 | 608 } |
| 2335 | 609 put_rac(c, state+1+FFMIN(i,9), 0); |
| 2138 | 610 |
| 611 for(i=e-1; i>=el; i--){ | |
| 2335 | 612 put_rac(c, state+22+9, (a>>i)&1); //22..31 |
| 2138 | 613 } |
| 614 for(; i>=0; i--){ | |
| 2335 | 615 put_rac(c, state+22+i, (a>>i)&1); //22..31 |
| 2138 | 616 } |
| 617 | |
| 618 if(is_signed) | |
| 2335 | 619 put_rac(c, state+11 + el, v < 0); //11..21 |
| 2138 | 620 #else |
| 2967 | 621 |
| 2335 | 622 put_rac(c, state+0, 0); |
| 2138 | 623 if(e<=9){ |
| 624 for(i=0; i<e; i++){ | |
| 2335 | 625 put_rac(c, state+1+i, 1); //1..10 |
| 2138 | 626 } |
| 2335 | 627 put_rac(c, state+1+i, 0); |
| 2138 | 628 |
| 629 for(i=e-1; i>=0; i--){ | |
| 2335 | 630 put_rac(c, state+22+i, (a>>i)&1); //22..31 |
| 2138 | 631 } |
| 632 | |
| 633 if(is_signed) | |
| 2335 | 634 put_rac(c, state+11 + e, v < 0); //11..21 |
| 2138 | 635 }else{ |
| 636 for(i=0; i<e; i++){ | |
| 2335 | 637 put_rac(c, state+1+FFMIN(i,9), 1); //1..10 |
| 2138 | 638 } |
| 2335 | 639 put_rac(c, state+1+FFMIN(i,9), 0); |
| 2138 | 640 |
| 641 for(i=e-1; i>=0; i--){ | |
| 2335 | 642 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31 |
| 2138 | 643 } |
| 644 | |
| 645 if(is_signed) | |
| 2335 | 646 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21 |
| 2138 | 647 } |
| 5910 | 648 #endif /* 1 */ |
| 2138 | 649 }else{ |
| 2335 | 650 put_rac(c, state+0, 1); |
| 2138 | 651 } |
| 652 } | |
| 653 | |
| 2335 | 654 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){ |
| 655 if(get_rac(c, state+0)) | |
| 2138 | 656 return 0; |
| 657 else{ | |
|
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
658 int i, e, a; |
|
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
659 e= 0; |
| 2335 | 660 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10 |
|
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
661 e++; |
| 2138 | 662 } |
| 663 | |
|
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
664 a= 1; |
|
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
665 for(i=e-1; i>=0; i--){ |
| 2335 | 666 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31 |
|
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
667 } |
|
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
668 |
| 2335 | 669 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21 |
| 2138 | 670 return -a; |
| 671 else | |
| 672 return a; | |
| 673 } | |
| 674 } | |
| 675 | |
| 2335 | 676 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){ |
|
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
677 int i; |
|
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
678 int r= log2>=0 ? 1<<log2 : 1; |
|
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
679 |
|
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
680 assert(v>=0); |
|
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
681 assert(log2>=-4); |
|
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
682 |
|
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
683 while(v >= r){ |
| 2335 | 684 put_rac(c, state+4+log2, 1); |
|
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
685 v -= r; |
|
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
686 log2++; |
|
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
687 if(log2>0) r+=r; |
|
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
688 } |
| 2335 | 689 put_rac(c, state+4+log2, 0); |
| 2967 | 690 |
|
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
691 for(i=log2-1; i>=0; i--){ |
| 2335 | 692 put_rac(c, state+31-i, (v>>i)&1); |
|
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
693 } |
|
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
694 } |
|
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
695 |
| 2335 | 696 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){ |
|
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
697 int i; |
|
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
698 int r= log2>=0 ? 1<<log2 : 1; |
|
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
699 int v=0; |
|
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
700 |
|
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
701 assert(log2>=-4); |
|
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
702 |
| 2335 | 703 while(get_rac(c, state+4+log2)){ |
|
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
704 v+= r; |
|
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
705 log2++; |
|
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
706 if(log2>0) r+=r; |
|
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
707 } |
| 2967 | 708 |
|
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
709 for(i=log2-1; i>=0; i--){ |
| 2335 | 710 v+= get_rac(c, state+31-i)<<i; |
|
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
711 } |
|
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
712 |
|
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
713 return v; |
|
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
714 } |
|
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
715 |
| 5627 | 716 static av_always_inline void |
| 717 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, | |
| 718 int dst_step, int src_step, int ref_step, | |
| 719 int width, int mul, int add, int shift, | |
| 720 int highpass, int inverse){ | |
| 2138 | 721 const int mirror_left= !highpass; |
| 722 const int mirror_right= (width&1) ^ highpass; | |
| 723 const int w= (width>>1) - 1 + (highpass & width); | |
| 724 int i; | |
| 725 | |
| 726 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref))) | |
| 727 if(mirror_left){ | |
| 728 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse); | |
| 729 dst += dst_step; | |
| 730 src += src_step; | |
| 731 } | |
| 2967 | 732 |
| 2138 | 733 for(i=0; i<w; i++){ |
| 5627 | 734 dst[i*dst_step] = |
| 735 LIFT(src[i*src_step], | |
| 736 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), | |
| 737 inverse); | |
| 2138 | 738 } |
| 2967 | 739 |
| 2138 | 740 if(mirror_right){ |
| 5627 | 741 dst[w*dst_step] = |
| 742 LIFT(src[w*src_step], | |
| 743 ((mul*2*ref[w*ref_step]+add)>>shift), | |
| 744 inverse); | |
| 2138 | 745 } |
| 746 } | |
| 747 | |
| 5627 | 748 static av_always_inline void |
| 749 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, | |
| 750 int dst_step, int src_step, int ref_step, | |
| 751 int width, int mul, int add, int shift, | |
| 752 int highpass, int inverse){ | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
753 const int mirror_left= !highpass; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
754 const int mirror_right= (width&1) ^ highpass; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
755 const int w= (width>>1) - 1 + (highpass & width); |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
756 int i; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
757 |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
758 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref))) |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
759 if(mirror_left){ |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
760 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse); |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
761 dst += dst_step; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
762 src += src_step; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
763 } |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
764 |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
765 for(i=0; i<w; i++){ |
| 5627 | 766 dst[i*dst_step] = |
| 767 LIFT(src[i*src_step], | |
| 768 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), | |
| 769 inverse); | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
770 } |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
771 |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
772 if(mirror_right){ |
| 5627 | 773 dst[w*dst_step] = |
| 774 LIFT(src[w*src_step], | |
| 775 ((mul*2*ref[w*ref_step]+add)>>shift), | |
| 776 inverse); | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
777 } |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
778 } |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
779 |
|
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
780 #ifndef liftS |
| 5627 | 781 static av_always_inline void |
| 782 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, | |
| 783 int dst_step, int src_step, int ref_step, | |
| 784 int width, int mul, int add, int shift, | |
| 785 int highpass, int inverse){ | |
|
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
786 const int mirror_left= !highpass; |
|
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
787 const int mirror_right= (width&1) ^ highpass; |
|
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
788 const int w= (width>>1) - 1 + (highpass & width); |
|
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
789 int i; |
|
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
790 |
|
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
791 assert(shift == 4); |
| 5627 | 792 #define LIFTS(src, ref, inv) \ |
| 793 ((inv) ? \ | |
| 794 (src) + (((ref) + 4*(src))>>shift): \ | |
| 795 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23))) | |
|
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
796 if(mirror_left){ |
|
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
797 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse); |
|
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
798 dst += dst_step; |
|
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
799 src += src_step; |
|
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
800 } |
| 2967 | 801 |
|
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
802 for(i=0; i<w; i++){ |
| 5627 | 803 dst[i*dst_step] = |
| 804 LIFTS(src[i*src_step], | |
| 805 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, | |
| 806 inverse); | |
|
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
807 } |
| 2967 | 808 |
|
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
809 if(mirror_right){ |
| 5627 | 810 dst[w*dst_step] = |
| 811 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse); | |
|
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
812 } |
|
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
813 } |
| 5627 | 814 static av_always_inline void |
| 815 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, | |
| 816 int dst_step, int src_step, int ref_step, | |
| 817 int width, int mul, int add, int shift, | |
| 818 int highpass, int inverse){ | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
819 const int mirror_left= !highpass; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
820 const int mirror_right= (width&1) ^ highpass; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
821 const int w= (width>>1) - 1 + (highpass & width); |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
822 int i; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
823 |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
824 assert(shift == 4); |
| 5627 | 825 #define LIFTS(src, ref, inv) \ |
| 826 ((inv) ? \ | |
| 827 (src) + (((ref) + 4*(src))>>shift): \ | |
| 828 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23))) | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
829 if(mirror_left){ |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
830 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse); |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
831 dst += dst_step; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
832 src += src_step; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
833 } |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
834 |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
835 for(i=0; i<w; i++){ |
| 5627 | 836 dst[i*dst_step] = |
| 837 LIFTS(src[i*src_step], | |
| 838 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, | |
| 839 inverse); | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
840 } |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
841 |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
842 if(mirror_right){ |
| 5627 | 843 dst[w*dst_step] = |
| 844 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse); | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
845 } |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
846 } |
| 6413 | 847 #endif /* ! liftS */ |
|
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
848 |
|
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
849 static void horizontal_decompose53i(DWTELEM *b, int width){ |
|
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
850 DWTELEM temp[width]; |
| 2138 | 851 const int width2= width>>1; |
| 2893 | 852 int x; |
| 2138 | 853 const int w2= (width+1)>>1; |
| 854 | |
| 855 for(x=0; x<width2; x++){ | |
| 856 temp[x ]= b[2*x ]; | |
| 857 temp[x+w2]= b[2*x + 1]; | |
| 858 } | |
| 859 if(width&1) | |
| 860 temp[x ]= b[2*x ]; | |
| 861 #if 0 | |
| 2893 | 862 { |
| 863 int A1,A2,A3,A4; | |
| 2138 | 864 A2= temp[1 ]; |
| 865 A4= temp[0 ]; | |
| 866 A1= temp[0+width2]; | |
| 867 A1 -= (A2 + A4)>>1; | |
| 868 A4 += (A1 + 1)>>1; | |
| 869 b[0+width2] = A1; | |
| 870 b[0 ] = A4; | |
| 871 for(x=1; x+1<width2; x+=2){ | |
| 872 A3= temp[x+width2]; | |
| 873 A4= temp[x+1 ]; | |
| 874 A3 -= (A2 + A4)>>1; | |
| 875 A2 += (A1 + A3 + 2)>>2; | |
| 876 b[x+width2] = A3; | |
| 877 b[x ] = A2; | |
| 878 | |
| 879 A1= temp[x+1+width2]; | |
| 880 A2= temp[x+2 ]; | |
| 881 A1 -= (A2 + A4)>>1; | |
| 882 A4 += (A1 + A3 + 2)>>2; | |
| 883 b[x+1+width2] = A1; | |
| 884 b[x+1 ] = A4; | |
| 885 } | |
| 886 A3= temp[width-1]; | |
| 887 A3 -= A2; | |
| 888 A2 += (A1 + A3 + 2)>>2; | |
| 889 b[width -1] = A3; | |
| 890 b[width2-1] = A2; | |
| 2893 | 891 } |
| 2967 | 892 #else |
| 2138 | 893 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0); |
| 894 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0); | |
| 6413 | 895 #endif /* 0 */ |
| 2138 | 896 } |
| 897 | |
|
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
898 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
| 2138 | 899 int i; |
| 2967 | 900 |
| 2138 | 901 for(i=0; i<width; i++){ |
| 902 b1[i] -= (b0[i] + b2[i])>>1; | |
| 903 } | |
| 904 } | |
| 905 | |
|
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
906 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
| 2138 | 907 int i; |
| 2967 | 908 |
| 2138 | 909 for(i=0; i<width; i++){ |
| 910 b1[i] += (b0[i] + b2[i] + 2)>>2; | |
| 911 } | |
| 912 } | |
| 913 | |
|
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
914 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){ |
| 2198 | 915 int y; |
| 2138 | 916 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride; |
| 917 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride; | |
| 2967 | 918 |
| 2138 | 919 for(y=-2; y<height; y+=2){ |
| 920 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride; | |
| 921 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride; | |
| 922 | |
| 2998 | 923 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width); |
| 924 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width); | |
| 6405 | 925 |
| 2998 | 926 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width); |
| 927 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width); | |
| 2967 | 928 |
| 2138 | 929 b0=b2; |
| 930 b1=b3; | |
| 931 } | |
| 932 } | |
| 933 | |
|
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
934 static void horizontal_decompose97i(DWTELEM *b, int width){ |
|
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
935 DWTELEM temp[width]; |
| 2138 | 936 const int w2= (width+1)>>1; |
| 937 | |
|
5565
93082c591c8b
Change rounding of the horizontal DWT to match the vertical one.
michael
parents:
5551
diff
changeset
|
938 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1); |
|
93082c591c8b
Change rounding of the horizontal DWT to match the vertical one.
michael
parents:
5551
diff
changeset
|
939 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0); |
|
5589
946c2db0a093
cleanup (remove some old experimentation related code)
michael
parents:
5588
diff
changeset
|
940 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0); |
| 2138 | 941 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0); |
| 942 } | |
| 943 | |
| 944 | |
|
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
945 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
| 2138 | 946 int i; |
| 2967 | 947 |
| 2138 | 948 for(i=0; i<width; i++){ |
| 949 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; | |
| 950 } | |
| 951 } | |
| 952 | |
|
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
953 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
| 2138 | 954 int i; |
| 2967 | 955 |
| 2138 | 956 for(i=0; i<width; i++){ |
| 957 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; | |
| 958 } | |
| 959 } | |
| 960 | |
|
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
961 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
| 2138 | 962 int i; |
| 2967 | 963 |
| 2138 | 964 for(i=0; i<width; i++){ |
|
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
965 #ifdef liftS |
| 2138 | 966 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; |
|
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
967 #else |
|
5565
93082c591c8b
Change rounding of the horizontal DWT to match the vertical one.
michael
parents:
5551
diff
changeset
|
968 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23); |
|
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
969 #endif |
| 2138 | 970 } |
| 971 } | |
| 972 | |
|
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
973 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
| 2138 | 974 int i; |
| 2967 | 975 |
| 2138 | 976 for(i=0; i<width; i++){ |
| 977 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; | |
| 978 } | |
| 979 } | |
| 980 | |
|
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
981 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){ |
| 2198 | 982 int y; |
| 2138 | 983 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride; |
| 984 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride; | |
| 985 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride; | |
| 986 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride; | |
| 2967 | 987 |
| 2138 | 988 for(y=-4; y<height; y+=2){ |
| 989 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride; | |
| 990 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride; | |
| 991 | |
| 2998 | 992 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width); |
| 993 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width); | |
| 6405 | 994 |
| 2998 | 995 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width); |
| 996 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width); | |
| 997 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width); | |
| 998 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width); | |
| 2138 | 999 |
| 1000 b0=b2; | |
| 1001 b1=b3; | |
| 1002 b2=b4; | |
| 1003 b3=b5; | |
| 1004 } | |
| 1005 } | |
| 1006 | |
|
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
1007 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
| 2138 | 1008 int level; |
| 2967 | 1009 |
| 2164 | 1010 for(level=0; level<decomposition_count; level++){ |
| 1011 switch(type){ | |
| 3326 | 1012 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break; |
| 1013 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break; | |
| 2138 | 1014 } |
| 1015 } | |
| 1016 } | |
| 1017 | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1018 static void horizontal_compose53i(IDWTELEM *b, int width){ |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1019 IDWTELEM temp[width]; |
| 2138 | 1020 const int width2= width>>1; |
| 1021 const int w2= (width+1)>>1; | |
| 2893 | 1022 int x; |
| 2138 | 1023 |
| 1024 #if 0 | |
| 2893 | 1025 int A1,A2,A3,A4; |
| 2138 | 1026 A2= temp[1 ]; |
| 1027 A4= temp[0 ]; | |
| 1028 A1= temp[0+width2]; | |
| 1029 A1 -= (A2 + A4)>>1; | |
| 1030 A4 += (A1 + 1)>>1; | |
| 1031 b[0+width2] = A1; | |
| 1032 b[0 ] = A4; | |
| 1033 for(x=1; x+1<width2; x+=2){ | |
| 1034 A3= temp[x+width2]; | |
| 1035 A4= temp[x+1 ]; | |
| 1036 A3 -= (A2 + A4)>>1; | |
| 1037 A2 += (A1 + A3 + 2)>>2; | |
| 1038 b[x+width2] = A3; | |
| 1039 b[x ] = A2; | |
| 1040 | |
| 1041 A1= temp[x+1+width2]; | |
| 1042 A2= temp[x+2 ]; | |
| 1043 A1 -= (A2 + A4)>>1; | |
| 1044 A4 += (A1 + A3 + 2)>>2; | |
| 1045 b[x+1+width2] = A1; | |
| 1046 b[x+1 ] = A4; | |
| 1047 } | |
| 1048 A3= temp[width-1]; | |
| 1049 A3 -= A2; | |
| 1050 A2 += (A1 + A3 + 2)>>2; | |
| 1051 b[width -1] = A3; | |
| 1052 b[width2-1] = A2; | |
| 2967 | 1053 #else |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1054 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1); |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1055 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1); |
| 5910 | 1056 #endif /* 0 */ |
| 2138 | 1057 for(x=0; x<width2; x++){ |
| 1058 b[2*x ]= temp[x ]; | |
| 1059 b[2*x + 1]= temp[x+w2]; | |
| 1060 } | |
| 1061 if(width&1) | |
| 1062 b[2*x ]= temp[x ]; | |
| 1063 } | |
| 1064 | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1065 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
| 2138 | 1066 int i; |
| 2967 | 1067 |
| 2138 | 1068 for(i=0; i<width; i++){ |
| 1069 b1[i] += (b0[i] + b2[i])>>1; | |
| 1070 } | |
| 1071 } | |
| 1072 | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1073 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
| 2138 | 1074 int i; |
| 2967 | 1075 |
| 2138 | 1076 for(i=0; i<width; i++){ |
| 1077 b1[i] -= (b0[i] + b2[i] + 2)>>2; | |
| 1078 } | |
| 1079 } | |
| 1080 | |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1081 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1082 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line); |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1083 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line); |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1084 cs->y = -1; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1085 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1086 |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1087 static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){ |
| 2562 | 1088 cs->b0 = buffer + mirror(-1-1, height-1)*stride; |
| 1089 cs->b1 = buffer + mirror(-1 , height-1)*stride; | |
| 1090 cs->y = -1; | |
| 1091 } | |
| 1092 | |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1093 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1094 int y= cs->y; |
| 2967 | 1095 |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1096 IDWTELEM *b0= cs->b0; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1097 IDWTELEM *b1= cs->b1; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1098 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line); |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1099 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line); |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1100 |
| 2998 | 1101 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); |
| 1102 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); | |
| 6405 | 1103 |
| 2998 | 1104 if(y-1<(unsigned)height) horizontal_compose53i(b0, width); |
| 1105 if(y+0<(unsigned)height) horizontal_compose53i(b1, width); | |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1106 |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1107 cs->b0 = b2; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1108 cs->b1 = b3; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1109 cs->y += 2; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1110 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1111 |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1112 static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){ |
| 2562 | 1113 int y= cs->y; |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1114 IDWTELEM *b0= cs->b0; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1115 IDWTELEM *b1= cs->b1; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1116 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1117 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride; |
| 2138 | 1118 |
| 2998 | 1119 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); |
| 1120 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); | |
| 6405 | 1121 |
| 2998 | 1122 if(y-1<(unsigned)height) horizontal_compose53i(b0, width); |
| 1123 if(y+0<(unsigned)height) horizontal_compose53i(b1, width); | |
| 2138 | 1124 |
| 2562 | 1125 cs->b0 = b2; |
| 1126 cs->b1 = b3; | |
| 1127 cs->y += 2; | |
| 1128 } | |
| 1129 | |
|
5989
382aa8b243c5
prevent warnings about functions being possibly unused
michael
parents:
5988
diff
changeset
|
1130 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){ |
|
5988
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1131 dwt_compose_t cs; |
|
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1132 spatial_compose53i_init(&cs, buffer, height, stride); |
|
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1133 while(cs.y <= height) |
|
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1134 spatial_compose53i_dy(&cs, buffer, width, height, stride); |
|
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1135 } |
|
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1136 |
| 2967 | 1137 |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1138 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){ |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1139 IDWTELEM temp[width]; |
| 2138 | 1140 const int w2= (width+1)>>1; |
| 1141 | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1142 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1); |
|
5589
946c2db0a093
cleanup (remove some old experimentation related code)
michael
parents:
5588
diff
changeset
|
1143 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1); |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1144 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1); |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1145 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0); |
| 2138 | 1146 } |
| 1147 | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1148 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
| 2138 | 1149 int i; |
| 2967 | 1150 |
| 2138 | 1151 for(i=0; i<width; i++){ |
| 1152 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; | |
| 1153 } | |
| 1154 } | |
| 1155 | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1156 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
| 2138 | 1157 int i; |
| 2967 | 1158 |
| 2138 | 1159 for(i=0; i<width; i++){ |
| 1160 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; | |
| 1161 } | |
| 1162 } | |
| 1163 | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1164 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
| 2138 | 1165 int i; |
| 2967 | 1166 |
| 2138 | 1167 for(i=0; i<width; i++){ |
|
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1168 #ifdef liftS |
| 2138 | 1169 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; |
|
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1170 #else |
|
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1171 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS; |
|
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1172 #endif |
| 2138 | 1173 } |
| 1174 } | |
| 1175 | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1176 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
| 2138 | 1177 int i; |
| 2967 | 1178 |
| 2138 | 1179 for(i=0; i<width; i++){ |
| 1180 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; | |
| 1181 } | |
| 1182 } | |
| 1183 | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1184 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){ |
| 2592 | 1185 int i; |
| 2967 | 1186 |
| 2592 | 1187 for(i=0; i<width; i++){ |
| 1188 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS; | |
| 1189 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS; | |
|
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1190 #ifdef liftS |
| 2592 | 1191 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS; |
|
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1192 #else |
|
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1193 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS; |
|
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1194 #endif |
| 2592 | 1195 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; |
| 1196 } | |
| 1197 } | |
| 1198 | |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1199 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1200 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line); |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1201 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line); |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1202 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line); |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1203 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line); |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1204 cs->y = -3; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1205 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1206 |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1207 static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){ |
| 2562 | 1208 cs->b0 = buffer + mirror(-3-1, height-1)*stride; |
| 1209 cs->b1 = buffer + mirror(-3 , height-1)*stride; | |
| 1210 cs->b2 = buffer + mirror(-3+1, height-1)*stride; | |
| 1211 cs->b3 = buffer + mirror(-3+2, height-1)*stride; | |
| 1212 cs->y = -3; | |
| 1213 } | |
| 2138 | 1214 |
|
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1215 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){ |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1216 int y = cs->y; |
| 2967 | 1217 |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1218 IDWTELEM *b0= cs->b0; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1219 IDWTELEM *b1= cs->b1; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1220 IDWTELEM *b2= cs->b2; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1221 IDWTELEM *b3= cs->b3; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1222 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line); |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1223 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line); |
| 2967 | 1224 |
| 2592 | 1225 if(y>0 && y+4<height){ |
|
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1226 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width); |
| 2592 | 1227 }else{ |
| 2998 | 1228 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); |
| 1229 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); | |
| 1230 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); | |
| 1231 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); | |
| 2592 | 1232 } |
| 6405 | 1233 |
|
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1234 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width); |
|
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1235 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width); |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1236 |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1237 cs->b0=b2; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1238 cs->b1=b3; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1239 cs->b2=b4; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1240 cs->b3=b5; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1241 cs->y += 2; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1242 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1243 |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1244 static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){ |
| 2562 | 1245 int y = cs->y; |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1246 IDWTELEM *b0= cs->b0; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1247 IDWTELEM *b1= cs->b1; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1248 IDWTELEM *b2= cs->b2; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1249 IDWTELEM *b3= cs->b3; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1250 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1251 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride; |
| 2138 | 1252 |
| 2998 | 1253 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); |
| 1254 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); | |
| 1255 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); | |
| 1256 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); | |
| 6405 | 1257 |
|
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1258 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width); |
|
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1259 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width); |
| 2562 | 1260 |
| 1261 cs->b0=b2; | |
| 1262 cs->b1=b3; | |
| 1263 cs->b2=b4; | |
| 1264 cs->b3=b5; | |
| 1265 cs->y += 2; | |
| 1266 } | |
| 1267 | |
|
5989
382aa8b243c5
prevent warnings about functions being possibly unused
michael
parents:
5988
diff
changeset
|
1268 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){ |
|
5988
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1269 dwt_compose_t cs; |
|
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1270 spatial_compose97i_init(&cs, buffer, height, stride); |
|
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1271 while(cs.y <= height) |
|
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1272 spatial_compose97i_dy(&cs, buffer, width, height, stride); |
|
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1273 } |
|
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1274 |
| 3075 | 1275 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){ |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1276 int level; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1277 for(level=decomposition_count-1; level>=0; level--){ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1278 switch(type){ |
| 3326 | 1279 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; |
| 1280 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; | |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1281 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1282 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1283 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1284 |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1285 static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
| 2562 | 1286 int level; |
| 1287 for(level=decomposition_count-1; level>=0; level--){ | |
| 1288 switch(type){ | |
| 3326 | 1289 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break; |
| 1290 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break; | |
| 2562 | 1291 } |
| 1292 } | |
| 1293 } | |
| 1294 | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1295 static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){ |
| 2562 | 1296 const int support = type==1 ? 3 : 5; |
| 1297 int level; | |
| 1298 if(type==2) return; | |
| 1299 | |
| 1300 for(level=decomposition_count-1; level>=0; level--){ | |
| 1301 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ | |
| 1302 switch(type){ | |
| 3326 | 1303 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); |
| 6414 | 1304 break; |
| 3326 | 1305 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); |
| 6414 | 1306 break; |
| 2562 | 1307 } |
| 1308 } | |
| 2138 | 1309 } |
| 1310 } | |
| 1311 | |
|
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1312 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){ |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1313 const int support = type==1 ? 3 : 5; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1314 int level; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1315 if(type==2) return; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1316 |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1317 for(level=decomposition_count-1; level>=0; level--){ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1318 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1319 switch(type){ |
| 3326 | 1320 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level); |
| 6414 | 1321 break; |
| 3326 | 1322 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level); |
| 6414 | 1323 break; |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1324 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1325 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1326 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1327 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1328 |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1329 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
| 2562 | 1330 dwt_compose_t cs[MAX_DECOMPOSITIONS]; |
| 1331 int y; | |
| 1332 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count); | |
| 1333 for(y=0; y<height; y+=4) | |
| 1334 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y); | |
| 2138 | 1335 } |
| 1336 | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1337 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){ |
|
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1338 const int w= b->width; |
|
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1339 const int h= b->height; |
|
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1340 int x, y; |
|
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1341 |
| 2138 | 1342 if(1){ |
| 1343 int run=0; | |
| 2149 | 1344 int runs[w*h]; |
| 2138 | 1345 int run_index=0; |
|
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1346 int max_index; |
| 2967 | 1347 |
| 2138 | 1348 for(y=0; y<h; y++){ |
| 1349 for(x=0; x<w; x++){ | |
|
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1350 int v, p=0; |
| 2144 | 1351 int /*ll=0, */l=0, lt=0, t=0, rt=0; |
| 2149 | 1352 v= src[x + y*stride]; |
| 2138 | 1353 |
| 1354 if(y){ | |
| 2149 | 1355 t= src[x + (y-1)*stride]; |
| 2138 | 1356 if(x){ |
| 2149 | 1357 lt= src[x - 1 + (y-1)*stride]; |
| 2138 | 1358 } |
| 1359 if(x + 1 < w){ | |
| 2149 | 1360 rt= src[x + 1 + (y-1)*stride]; |
| 2138 | 1361 } |
| 1362 } | |
| 1363 if(x){ | |
| 2149 | 1364 l= src[x - 1 + y*stride]; |
| 2144 | 1365 /*if(x > 1){ |
| 1366 if(orientation==1) ll= src[y + (x-2)*stride]; | |
| 1367 else ll= src[x - 2 + y*stride]; | |
| 2138 | 1368 }*/ |
| 1369 } | |
|
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1370 if(parent){ |
| 2149 | 1371 int px= x>>1; |
| 1372 int py= y>>1; | |
| 2967 | 1373 if(px<b->parent->width && py<b->parent->height) |
|
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1374 p= parent[px + py*2*stride]; |
|
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1375 } |
|
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1376 if(!(/*ll|*/l|lt|t|rt|p)){ |
| 2138 | 1377 if(v){ |
| 1378 runs[run_index++]= run; | |
| 1379 run=0; | |
| 1380 }else{ | |
| 1381 run++; | |
| 1382 } | |
| 1383 } | |
| 1384 } | |
| 1385 } | |
|
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1386 max_index= run_index; |
| 2138 | 1387 runs[run_index++]= run; |
| 1388 run_index=0; | |
| 1389 run= runs[run_index++]; | |
| 1390 | |
|
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1391 put_symbol2(&s->c, b->state[30], max_index, 0); |
|
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1392 if(run_index <= max_index) |
|
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1393 put_symbol2(&s->c, b->state[1], run, 3); |
| 2967 | 1394 |
| 2138 | 1395 for(y=0; y<h; y++){ |
|
2435
c89ac0e70c66
10l patch by (matthieu castet <castet.matthieu free fr>)
michael
parents:
2422
diff
changeset
|
1396 if(s->c.bytestream_end - s->c.bytestream < w*40){ |
| 2422 | 1397 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); |
| 1398 return -1; | |
| 1399 } | |
| 2138 | 1400 for(x=0; x<w; x++){ |
|
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1401 int v, p=0; |
| 2144 | 1402 int /*ll=0, */l=0, lt=0, t=0, rt=0; |
| 2149 | 1403 v= src[x + y*stride]; |
| 2138 | 1404 |
| 1405 if(y){ | |
| 2149 | 1406 t= src[x + (y-1)*stride]; |
| 2138 | 1407 if(x){ |
| 2149 | 1408 lt= src[x - 1 + (y-1)*stride]; |
| 2138 | 1409 } |
| 1410 if(x + 1 < w){ | |
| 2149 | 1411 rt= src[x + 1 + (y-1)*stride]; |
| 2138 | 1412 } |
| 1413 } | |
| 1414 if(x){ | |
| 2149 | 1415 l= src[x - 1 + y*stride]; |
| 2144 | 1416 /*if(x > 1){ |
| 1417 if(orientation==1) ll= src[y + (x-2)*stride]; | |
| 1418 else ll= src[x - 2 + y*stride]; | |
| 2138 | 1419 }*/ |
| 1420 } | |
|
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1421 if(parent){ |
| 2149 | 1422 int px= x>>1; |
| 1423 int py= y>>1; | |
| 2967 | 1424 if(px<b->parent->width && py<b->parent->height) |
|
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1425 p= parent[px + py*2*stride]; |
|
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1426 } |
|
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1427 if(/*ll|*/l|lt|t|rt|p){ |
| 4001 | 1428 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); |
| 2144 | 1429 |
| 2335 | 1430 put_rac(&s->c, &b->state[0][context], !!v); |
| 2138 | 1431 }else{ |
| 1432 if(!run){ | |
| 1433 run= runs[run_index++]; | |
|
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1434 |
|
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1435 if(run_index <= max_index) |
|
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1436 put_symbol2(&s->c, b->state[1], run, 3); |
| 2138 | 1437 assert(v); |
| 1438 }else{ | |
| 1439 run--; | |
| 1440 assert(!v); | |
| 1441 } | |
| 1442 } | |
| 1443 if(v){ | |
| 4001 | 1444 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); |
| 1445 int l2= 2*FFABS(l) + (l<0); | |
| 1446 int t2= 2*FFABS(t) + (t<0); | |
| 1447 | |
| 1448 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4); | |
| 2596 | 1449 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0); |
| 2138 | 1450 } |
| 1451 } | |
| 1452 } | |
| 1453 } | |
| 2422 | 1454 return 0; |
| 2138 | 1455 } |
| 1456 | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1457 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){ |
|
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1458 // encode_subband_qtree(s, b, src, parent, stride, orientation); |
|
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1459 // encode_subband_z0run(s, b, src, parent, stride, orientation); |
| 2422 | 1460 return encode_subband_c0run(s, b, src, parent, stride, orientation); |
|
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1461 // encode_subband_dzr(s, b, src, parent, stride, orientation); |
|
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1462 } |
|
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1463 |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1464 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){ |
| 2138 | 1465 const int w= b->width; |
| 1466 const int h= b->height; | |
| 1467 int x,y; | |
| 2967 | 1468 |
| 2138 | 1469 if(1){ |
|
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1470 int run, runs; |
|
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1471 x_and_coeff *xc= b->x_coeff; |
|
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1472 x_and_coeff *prev_xc= NULL; |
|
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1473 x_and_coeff *prev2_xc= xc; |
|
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1474 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL; |
|
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1475 x_and_coeff *prev_parent_xc= parent_xc; |
| 2138 | 1476 |
|
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1477 runs= get_symbol2(&s->c, b->state[30], 0); |
|
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1478 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3); |
|
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1479 else run= INT_MAX; |
|
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1480 |
| 2138 | 1481 for(y=0; y<h; y++){ |
| 2193 | 1482 int v=0; |
| 1483 int lt=0, t=0, rt=0; | |
| 1484 | |
|
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1485 if(y && prev_xc->x == 0){ |
|
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1486 rt= prev_xc->coeff; |
| 2193 | 1487 } |
| 2138 | 1488 for(x=0; x<w; x++){ |
| 2193 | 1489 int p=0; |
| 1490 const int l= v; | |
| 2967 | 1491 |
| 2193 | 1492 lt= t; t= rt; |
| 1493 | |
| 2194 | 1494 if(y){ |
|
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1495 if(prev_xc->x <= x) |
|
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1496 prev_xc++; |
|
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1497 if(prev_xc->x == x + 1) |
|
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1498 rt= prev_xc->coeff; |
| 2194 | 1499 else |
| 1500 rt=0; | |
| 1501 } | |
|
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1502 if(parent_xc){ |
|
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1503 if(x>>1 > parent_xc->x){ |
|
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1504 parent_xc++; |
| 2192 | 1505 } |
|
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1506 if(x>>1 == parent_xc->x){ |
|
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1507 p= parent_xc->coeff; |
| 2194 | 1508 } |
|
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1509 } |
|
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1510 if(/*ll|*/l|lt|t|rt|p){ |
| 4001 | 1511 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1)); |
| 2144 | 1512 |
| 2335 | 1513 v=get_rac(&s->c, &b->state[0][context]); |
| 2605 | 1514 if(v){ |
| 1515 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1); | |
| 1516 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]); | |
| 2967 | 1517 |
|
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1518 xc->x=x; |
|
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1519 (xc++)->coeff= v; |
| 2605 | 1520 } |
| 2138 | 1521 }else{ |
| 1522 if(!run){ | |
|
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1523 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3); |
|
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1524 else run= INT_MAX; |
| 2605 | 1525 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1); |
| 1526 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]); | |
| 2967 | 1527 |
|
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1528 xc->x=x; |
|
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1529 (xc++)->coeff= v; |
| 2138 | 1530 }else{ |
| 2606 | 1531 int max_run; |
| 2138 | 1532 run--; |
| 1533 v=0; | |
| 2191 | 1534 |
|
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1535 if(y) max_run= FFMIN(run, prev_xc->x - x - 2); |
| 2606 | 1536 else max_run= FFMIN(run, w-x-1); |
|
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1537 if(parent_xc) |
|
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1538 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1); |
| 2606 | 1539 x+= max_run; |
| 1540 run-= max_run; | |
| 2138 | 1541 } |
| 1542 } | |
| 2192 | 1543 } |
|
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1544 (xc++)->x= w+1; //end marker |
|
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1545 prev_xc= prev2_xc; |
|
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1546 prev2_xc= xc; |
| 2967 | 1547 |
|
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1548 if(parent_xc){ |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1549 if(y&1){ |
|
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1550 while(parent_xc->x != parent->width+1) |
|
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1551 parent_xc++; |
|
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1552 parent_xc++; |
|
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1553 prev_parent_xc= parent_xc; |
| 2192 | 1554 }else{ |
|
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1555 parent_xc= prev_parent_xc; |
| 2138 | 1556 } |
| 1557 } | |
| 1558 } | |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1559 |
|
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1560 (xc++)->x= w+1; //end marker |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1561 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1562 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1563 |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1564 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1565 const int w= b->width; |
| 2893 | 1566 int y; |
| 4594 | 1567 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
| 2600 | 1568 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1569 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1570 int new_index = 0; |
| 2967 | 1571 |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1572 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){ |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1573 qadd= 0; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1574 qmul= 1<<QEXPSHIFT; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1575 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1576 |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1577 /* If we are on the second or later slice, restore our index. */ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1578 if (start_y != 0) |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1579 new_index = save_state[0]; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1580 |
| 2967 | 1581 |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1582 for(y=start_y; y<h; y++){ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1583 int x = 0; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1584 int v; |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1585 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1586 memset(line, 0, b->width*sizeof(IDWTELEM)); |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1587 v = b->x_coeff[new_index].coeff; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1588 x = b->x_coeff[new_index++].x; |
|
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
1589 while(x < w){ |
| 2596 | 1590 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT; |
| 1591 register int u= -(v&1); | |
| 1592 line[x] = (t^u) - u; | |
| 1593 | |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1594 v = b->x_coeff[new_index].coeff; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1595 x = b->x_coeff[new_index++].x; |
| 2138 | 1596 } |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1597 } |
| 2967 | 1598 |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1599 /* Save our variables for the next slice. */ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1600 save_state[0] = new_index; |
| 2967 | 1601 |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1602 return; |
| 2138 | 1603 } |
| 1604 | |
|
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
1605 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts |
| 2138 | 1606 int plane_index, level, orientation; |
| 1607 | |
| 2199 | 1608 for(plane_index=0; plane_index<3; plane_index++){ |
|
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
1609 for(level=0; level<MAX_DECOMPOSITIONS; level++){ |
| 2138 | 1610 for(orientation=level ? 1:0; orientation<4; orientation++){ |
| 2335 | 1611 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state)); |
| 2138 | 1612 } |
| 1613 } | |
| 1614 } | |
| 2335 | 1615 memset(s->header_state, MID_STATE, sizeof(s->header_state)); |
| 1616 memset(s->block_state, MID_STATE, sizeof(s->block_state)); | |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1617 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1618 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1619 static int alloc_blocks(SnowContext *s){ |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1620 int w= -((-s->avctx->width )>>LOG2_MB_SIZE); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1621 int h= -((-s->avctx->height)>>LOG2_MB_SIZE); |
| 2967 | 1622 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1623 s->b_width = w; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1624 s->b_height= h; |
| 2967 | 1625 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1626 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2)); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1627 return 0; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1628 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1629 |
| 2335 | 1630 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){ |
| 1631 uint8_t *bytestream= d->bytestream; | |
| 1632 uint8_t *bytestream_start= d->bytestream_start; | |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1633 *d= *s; |
| 2335 | 1634 d->bytestream= bytestream; |
| 1635 d->bytestream_start= bytestream_start; | |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1636 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1637 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1638 //near copy & paste from dsputil, FIXME |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1639 static int pix_sum(uint8_t * pix, int line_size, int w) |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1640 { |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1641 int s, i, j; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1642 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1643 s = 0; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1644 for (i = 0; i < w; i++) { |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1645 for (j = 0; j < w; j++) { |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1646 s += pix[0]; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1647 pix ++; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1648 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1649 pix += line_size - w; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1650 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1651 return s; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1652 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1653 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1654 //near copy & paste from dsputil, FIXME |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1655 static int pix_norm1(uint8_t * pix, int line_size, int w) |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1656 { |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1657 int s, i, j; |
| 4179 | 1658 uint32_t *sq = ff_squareTbl + 256; |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1659 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1660 s = 0; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1661 for (i = 0; i < w; i++) { |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1662 for (j = 0; j < w; j ++) { |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1663 s += sq[pix[0]]; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1664 pix ++; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1665 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1666 pix += line_size - w; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1667 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1668 return s; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1669 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1670 |
| 3314 | 1671 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){ |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1672 const int w= s->b_width << s->block_max_depth; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1673 const int rem_depth= s->block_max_depth - level; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1674 const int index= (x + y*w) << rem_depth; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1675 const int block_w= 1<<rem_depth; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1676 BlockNode block; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1677 int i,j; |
| 2967 | 1678 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1679 block.color[0]= l; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1680 block.color[1]= cb; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1681 block.color[2]= cr; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1682 block.mx= mx; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1683 block.my= my; |
| 3314 | 1684 block.ref= ref; |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1685 block.type= type; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1686 block.level= level; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1687 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1688 for(j=0; j<block_w; j++){ |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1689 for(i=0; i<block_w; i++){ |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1690 s->block[index + i + j*w]= block; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1691 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1692 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1693 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1694 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1695 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){ |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1696 const int offset[3]= { |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1697 y*c-> stride + x, |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1698 ((y*c->uvstride + x)>>1), |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1699 ((y*c->uvstride + x)>>1), |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1700 }; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1701 int i; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1702 for(i=0; i<3; i++){ |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1703 c->src[0][i]= src [i]; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1704 c->ref[0][i]= ref [i] + offset[i]; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1705 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1706 assert(!ref_index); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1707 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1708 |
|
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1709 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref, |
|
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1710 const BlockNode *left, const BlockNode *top, const BlockNode *tr){ |
|
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1711 if(s->ref_frames == 1){ |
|
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1712 *mx = mid_pred(left->mx, top->mx, tr->mx); |
|
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1713 *my = mid_pred(left->my, top->my, tr->my); |
|
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1714 }else{ |
|
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1715 const int *scale = scale_mv_ref[ref]; |
|
4407
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1716 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8, |
|
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1717 (top ->mx * scale[top ->ref] + 128) >>8, |
|
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1718 (tr ->mx * scale[tr ->ref] + 128) >>8); |
|
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1719 *my = mid_pred((left->my * scale[left->ref] + 128) >>8, |
|
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1720 (top ->my * scale[top ->ref] + 128) >>8, |
|
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1721 (tr ->my * scale[tr ->ref] + 128) >>8); |
|
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1722 } |
|
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1723 } |
|
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1724 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1725 //FIXME copy&paste |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1726 #define P_LEFT P[1] |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1727 #define P_TOP P[2] |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1728 #define P_TOPRIGHT P[3] |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1729 #define P_MEDIAN P[4] |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1730 #define P_MV1 P[9] |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1731 #define FLAG_QPEL 1 //must be 1 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1732 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1733 static int encode_q_branch(SnowContext *s, int level, int x, int y){ |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1734 uint8_t p_buffer[1024]; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1735 uint8_t i_buffer[1024]; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1736 uint8_t p_state[sizeof(s->block_state)]; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1737 uint8_t i_state[sizeof(s->block_state)]; |
| 2335 | 1738 RangeCoder pc, ic; |
| 1739 uint8_t *pbbak= s->c.bytestream; | |
| 1740 uint8_t *pbbak_start= s->c.bytestream_start; | |
| 5082 | 1741 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits; |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1742 const int w= s->b_width << s->block_max_depth; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1743 const int h= s->b_height << s->block_max_depth; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1744 const int rem_depth= s->block_max_depth - level; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1745 const int index= (x + y*w) << rem_depth; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1746 const int block_w= 1<<(LOG2_MB_SIZE - level); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1747 int trx= (x+1)<<rem_depth; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1748 int try= (y+1)<<rem_depth; |
|
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1749 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
|
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1750 const BlockNode *top = y ? &s->block[index-w] : &null_block; |
|
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1751 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block; |
|
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1752 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block; |
|
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1753 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; |
|
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1754 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1755 int pl = left->color[0]; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1756 int pcb= left->color[1]; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1757 int pcr= left->color[2]; |
|
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1758 int pmx, pmy; |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1759 int mx=0, my=0; |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1760 int l,cr,cb; |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1761 const int stride= s->current_picture.linesize[0]; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1762 const int uvstride= s->current_picture.linesize[1]; |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1763 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w, |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1764 s->input_picture.data[1] + (x + y*uvstride)*block_w/2, |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1765 s->input_picture.data[2] + (x + y*uvstride)*block_w/2}; |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1766 int P[10][2]; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1767 int16_t last_mv[3][2]; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1768 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1769 const int shift= 1+qpel; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1770 MotionEstContext *c= &s->m.me; |
| 3314 | 1771 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); |
| 4001 | 1772 int mx_context= av_log2(2*FFABS(left->mx - top->mx)); |
| 1773 int my_context= av_log2(2*FFABS(left->my - top->my)); | |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1774 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; |
| 3314 | 1775 int ref, best_ref, ref_score, ref_mx, ref_my; |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1776 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1777 assert(sizeof(s->block_state) >= 256); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1778 if(s->keyframe){ |
|
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1779 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA); |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1780 return 0; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1781 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1782 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1783 // clip predictors / edge ? |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1784 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1785 P_LEFT[0]= left->mx; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1786 P_LEFT[1]= left->my; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1787 P_TOP [0]= top->mx; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1788 P_TOP [1]= top->my; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1789 P_TOPRIGHT[0]= tr->mx; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1790 P_TOPRIGHT[1]= tr->my; |
| 2967 | 1791 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1792 last_mv[0][0]= s->block[index].mx; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1793 last_mv[0][1]= s->block[index].my; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1794 last_mv[1][0]= right->mx; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1795 last_mv[1][1]= right->my; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1796 last_mv[2][0]= bottom->mx; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1797 last_mv[2][1]= bottom->my; |
| 2967 | 1798 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1799 s->m.mb_stride=2; |
| 2967 | 1800 s->m.mb_x= |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1801 s->m.mb_y= 0; |
|
4360
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1802 c->skip= 0; |
|
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1803 |
|
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1804 assert(c-> stride == stride); |
|
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1805 assert(c->uvstride == uvstride); |
| 2967 | 1806 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1807 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1808 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1809 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1810 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV; |
| 2967 | 1811 |
| 2206 | 1812 c->xmin = - x*block_w - 16+2; |
| 1813 c->ymin = - y*block_w - 16+2; | |
| 1814 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2; | |
| 1815 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2; | |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1816 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1817 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift); |
| 2967 | 1818 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift); |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1819 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1820 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1821 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1822 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1823 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1824 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1825 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1826 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1827 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1828 if (!y) { |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1829 c->pred_x= P_LEFT[0]; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1830 c->pred_y= P_LEFT[1]; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1831 } else { |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1832 c->pred_x = P_MEDIAN[0]; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1833 c->pred_y = P_MEDIAN[1]; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1834 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1835 |
| 3314 | 1836 score= INT_MAX; |
| 1837 best_ref= 0; | |
| 1838 for(ref=0; ref<s->ref_frames; ref++){ | |
| 1839 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0); | |
| 1840 | |
| 1841 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv, | |
| 1842 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w); | |
| 1843 | |
| 1844 assert(ref_mx >= c->xmin); | |
| 1845 assert(ref_mx <= c->xmax); | |
| 1846 assert(ref_my >= c->ymin); | |
| 1847 assert(ref_my <= c->ymax); | |
| 1848 | |
|
4360
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1849 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w); |
| 3314 | 1850 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0); |
| 1851 ref_score+= 2*av_log2(2*ref)*c->penalty_factor; | |
| 1852 if(s->ref_mvs[ref]){ | |
| 1853 s->ref_mvs[ref][index][0]= ref_mx; | |
| 1854 s->ref_mvs[ref][index][1]= ref_my; | |
| 1855 s->ref_scores[ref][index]= ref_score; | |
| 1856 } | |
| 1857 if(score > ref_score){ | |
| 1858 score= ref_score; | |
| 1859 best_ref= ref; | |
| 1860 mx= ref_mx; | |
| 1861 my= ref_my; | |
| 1862 } | |
| 1863 } | |
| 5127 | 1864 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2 |
| 2967 | 1865 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1866 // subpel search |
| 5085 | 1867 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start); |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1868 pc= s->c; |
| 2335 | 1869 pc.bytestream_start= |
| 1870 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo | |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1871 memcpy(p_state, s->block_state, sizeof(s->block_state)); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1872 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1873 if(level!=s->block_max_depth) |
| 2335 | 1874 put_rac(&pc, &p_state[4 + s_context], 1); |
| 1875 put_rac(&pc, &p_state[1 + left->type + top->type], 0); | |
| 3314 | 1876 if(s->ref_frames > 1) |
| 1877 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0); | |
|
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1878 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr); |
| 3314 | 1879 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1); |
| 1880 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1); | |
| 2335 | 1881 p_len= pc.bytestream - pc.bytestream_start; |
| 5082 | 1882 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT; |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1883 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1884 block_s= block_w*block_w; |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1885 sum = pix_sum(current_data[0], stride, block_w); |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1886 l= (sum + block_s/2)/block_s; |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1887 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s; |
| 2967 | 1888 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1889 block_s= block_w*block_w>>2; |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1890 sum = pix_sum(current_data[1], uvstride, block_w>>1); |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1891 cb= (sum + block_s/2)/block_s; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1892 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s; |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1893 sum = pix_sum(current_data[2], uvstride, block_w>>1); |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1894 cr= (sum + block_s/2)/block_s; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1895 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1896 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1897 ic= s->c; |
| 2335 | 1898 ic.bytestream_start= |
| 1899 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo | |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1900 memcpy(i_state, s->block_state, sizeof(s->block_state)); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1901 if(level!=s->block_max_depth) |
| 2335 | 1902 put_rac(&ic, &i_state[4 + s_context], 1); |
| 1903 put_rac(&ic, &i_state[1 + left->type + top->type], 1); | |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1904 put_symbol(&ic, &i_state[32], l-pl , 1); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1905 put_symbol(&ic, &i_state[64], cb-pcb, 1); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1906 put_symbol(&ic, &i_state[96], cr-pcr, 1); |
| 2335 | 1907 i_len= ic.bytestream - ic.bytestream_start; |
| 5082 | 1908 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT; |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1909 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1910 // assert(score==256*256*256*64-1); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1911 assert(iscore < 255*255*256 + s->lambda2*10); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1912 assert(iscore >= 0); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1913 assert(l>=0 && l<=255); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1914 assert(pl>=0 && pl<=255); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1915 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1916 if(level==0){ |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1917 int varc= iscore >> 8; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1918 int vard= score >> 8; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1919 if (vard <= 64 || vard < varc) |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1920 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1921 else |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1922 c->scene_change_score+= s->m.qscale; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1923 } |
| 2967 | 1924 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1925 if(level!=s->block_max_depth){ |
| 2335 | 1926 put_rac(&s->c, &s->block_state[4 + s_context], 0); |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1927 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1928 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1929 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1930 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1931 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead |
| 2967 | 1932 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1933 if(score2 < score && score2 < iscore) |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1934 return score2; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1935 } |
| 2967 | 1936 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1937 if(iscore < score){ |
|
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1938 pred_mv(s, &pmx, &pmy, 0, left, top, tr); |
| 2335 | 1939 memcpy(pbbak, i_buffer, i_len); |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1940 s->c= ic; |
| 2335 | 1941 s->c.bytestream_start= pbbak_start; |
| 1942 s->c.bytestream= pbbak + i_len; | |
| 3314 | 1943 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA); |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1944 memcpy(s->block_state, i_state, sizeof(s->block_state)); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1945 return iscore; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1946 }else{ |
| 2335 | 1947 memcpy(pbbak, p_buffer, p_len); |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1948 s->c= pc; |
| 2335 | 1949 s->c.bytestream_start= pbbak_start; |
| 1950 s->c.bytestream= pbbak + p_len; | |
| 3314 | 1951 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0); |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1952 memcpy(s->block_state, p_state, sizeof(s->block_state)); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1953 return score; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1954 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1955 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1956 |
|
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
1957 static av_always_inline int same_block(BlockNode *a, BlockNode *b){ |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1958 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){ |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1959 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2])); |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1960 }else{ |
| 3314 | 1961 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA)); |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1962 } |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1963 } |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1964 |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1965 static void encode_q_branch2(SnowContext *s, int level, int x, int y){ |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1966 const int w= s->b_width << s->block_max_depth; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1967 const int rem_depth= s->block_max_depth - level; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1968 const int index= (x + y*w) << rem_depth; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1969 int trx= (x+1)<<rem_depth; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1970 BlockNode *b= &s->block[index]; |
|
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1971 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
|
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1972 const BlockNode *top = y ? &s->block[index-w] : &null_block; |
|
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1973 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; |
|
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1974 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1975 int pl = left->color[0]; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1976 int pcb= left->color[1]; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1977 int pcr= left->color[2]; |
|
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1978 int pmx, pmy; |
| 3314 | 1979 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); |
| 4001 | 1980 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref; |
| 1981 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref; | |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1982 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1983 |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1984 if(s->keyframe){ |
|
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1985 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA); |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1986 return; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1987 } |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1988 |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1989 if(level!=s->block_max_depth){ |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1990 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){ |
|
2995
dfc271b90fe6
4mv + iter ME 10l fix (still not bugfree but better)
michael
parents:
2994
diff
changeset
|
1991 put_rac(&s->c, &s->block_state[4 + s_context], 1); |
|
dfc271b90fe6
4mv + iter ME 10l fix (still not bugfree but better)
michael
parents:
2994
diff
changeset
|
1992 }else{ |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1993 put_rac(&s->c, &s->block_state[4 + s_context], 0); |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1994 encode_q_branch2(s, level+1, 2*x+0, 2*y+0); |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1995 encode_q_branch2(s, level+1, 2*x+1, 2*y+0); |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1996 encode_q_branch2(s, level+1, 2*x+0, 2*y+1); |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1997 encode_q_branch2(s, level+1, 2*x+1, 2*y+1); |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1998 return; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1999 } |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2000 } |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2001 if(b->type & BLOCK_INTRA){ |
|
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2002 pred_mv(s, &pmx, &pmy, 0, left, top, tr); |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2003 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1); |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2004 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1); |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2005 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1); |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2006 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1); |
| 3314 | 2007 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA); |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2008 }else{ |
|
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2009 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr); |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2010 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0); |
| 3314 | 2011 if(s->ref_frames > 1) |
| 2012 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0); | |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2013 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1); |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2014 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1); |
| 3314 | 2015 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0); |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2016 } |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2017 } |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2018 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2019 static void decode_q_branch(SnowContext *s, int level, int x, int y){ |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2020 const int w= s->b_width << s->block_max_depth; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2021 const int rem_depth= s->block_max_depth - level; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2022 const int index= (x + y*w) << rem_depth; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2023 int trx= (x+1)<<rem_depth; |
|
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2024 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
|
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2025 const BlockNode *top = y ? &s->block[index-w] : &null_block; |
|
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2026 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; |
|
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2027 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2028 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; |
| 2967 | 2029 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2030 if(s->keyframe){ |
| 3314 | 2031 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA); |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2032 return; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2033 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2034 |
| 2335 | 2035 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){ |
| 4332 | 2036 int type, mx, my; |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2037 int l = left->color[0]; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2038 int cb= left->color[1]; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2039 int cr= left->color[2]; |
| 3314 | 2040 int ref = 0; |
| 2041 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); | |
| 4001 | 2042 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx)); |
| 2043 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my)); | |
| 2967 | 2044 |
| 2335 | 2045 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0; |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2046 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2047 if(type){ |
|
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2048 pred_mv(s, &mx, &my, 0, left, top, tr); |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2049 l += get_symbol(&s->c, &s->block_state[32], 1); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2050 cb+= get_symbol(&s->c, &s->block_state[64], 1); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2051 cr+= get_symbol(&s->c, &s->block_state[96], 1); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2052 }else{ |
| 3314 | 2053 if(s->ref_frames > 1) |
| 2054 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0); | |
|
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2055 pred_mv(s, &mx, &my, ref, left, top, tr); |
| 3314 | 2056 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1); |
| 2057 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1); | |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2058 } |
| 3314 | 2059 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type); |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2060 }else{ |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2061 decode_q_branch(s, level+1, 2*x+0, 2*y+0); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2062 decode_q_branch(s, level+1, 2*x+1, 2*y+0); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2063 decode_q_branch(s, level+1, 2*x+0, 2*y+1); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2064 decode_q_branch(s, level+1, 2*x+1, 2*y+1); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2065 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2066 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2067 |
|
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
2068 static void encode_blocks(SnowContext *s, int search){ |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2069 int x, y; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2070 int w= s->b_width; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2071 int h= s->b_height; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2072 |
|
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
2073 if(s->avctx->me_method == ME_ITER && !s->keyframe && search) |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2074 iterative_me(s); |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2075 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2076 for(y=0; y<h; y++){ |
|
2435
c89ac0e70c66
10l patch by (matthieu castet <castet.matthieu free fr>)
michael
parents:
2422
diff
changeset
|
2077 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit |
| 2422 | 2078 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); |
| 2079 return; | |
| 2080 } | |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2081 for(x=0; x<w; x++){ |
|
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
2082 if(s->avctx->me_method == ME_ITER || !search) |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2083 encode_q_branch2(s, 0, x, y); |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2084 else |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2085 encode_q_branch (s, 0, x, y); |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2086 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2087 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2088 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2089 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2090 static void decode_blocks(SnowContext *s){ |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2091 int x, y; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2092 int w= s->b_width; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2093 int h= s->b_height; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2094 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2095 for(y=0; y<h; y++){ |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2096 for(x=0; x<w; x++){ |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2097 decode_q_branch(s, 0, x, y); |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2098 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2099 } |
| 2138 | 2100 } |
| 2101 | |
|
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2102 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){ |
| 5648 | 2103 const static uint8_t weight[64]={ |
| 2104 8,7,6,5,4,3,2,1, | |
| 2105 7,7,0,0,0,0,0,1, | |
| 2106 6,0,6,0,0,0,2,0, | |
| 2107 5,0,0,5,0,3,0,0, | |
| 2108 4,0,0,0,4,0,0,0, | |
| 2109 3,0,0,5,0,3,0,0, | |
| 2110 2,0,6,0,0,0,2,0, | |
| 2111 1,7,0,0,0,0,0,1, | |
| 2112 }; | |
| 2113 | |
| 2114 const static uint8_t brane[256]={ | |
| 2115 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12, | |
| 2116 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52, | |
| 2117 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc, | |
| 2118 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc, | |
| 2119 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc, | |
| 2120 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc, | |
| 2121 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc, | |
| 2122 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16, | |
| 2123 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56, | |
| 2124 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96, | |
| 2125 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc, | |
| 2126 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc, | |
| 2127 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc, | |
| 2128 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc, | |
| 2129 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc, | |
| 2130 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A, | |
| 2131 }; | |
| 2132 | |
| 2133 const static uint8_t needs[16]={ | |
| 2134 0,1,0,0, | |
| 2135 2,4,2,0, | |
| 2136 0,1,0,0, | |
| 2137 15 | |
| 2138 }; | |
| 2139 | |
| 2140 int x, y, b, r, l; | |
|
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2141 int16_t tmpIt [64*(32+HTAPS_MAX)]; |
|
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2142 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)]; |
| 5648 | 2143 int16_t *tmpI= tmpIt; |
| 2144 uint8_t *tmp2= tmp2t[0]; | |
| 6205 | 2145 const uint8_t *hpel[11]; |
| 5648 | 2146 assert(dx<16 && dy<16); |
| 2147 r= brane[dx + 16*dy]&15; | |
| 2148 l= brane[dx + 16*dy]>>4; | |
| 2149 | |
| 2150 b= needs[l] | needs[r]; | |
|
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2151 if(p && !p->diag_mc) |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2152 b= 15; |
| 5648 | 2153 |
| 2154 if(b&5){ | |
|
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2155 for(y=0; y < b_h+HTAPS_MAX-1; y++){ |
| 5649 | 2156 for(x=0; x < b_w; x++){ |
|
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2157 int a_1=src[x + HTAPS_MAX/2-4]; |
|
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2158 int a0= src[x + HTAPS_MAX/2-3]; |
|
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2159 int a1= src[x + HTAPS_MAX/2-2]; |
|
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2160 int a2= src[x + HTAPS_MAX/2-1]; |
|
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2161 int a3= src[x + HTAPS_MAX/2+0]; |
|
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2162 int a4= src[x + HTAPS_MAX/2+1]; |
|
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2163 int a5= src[x + HTAPS_MAX/2+2]; |
|
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2164 int a6= src[x + HTAPS_MAX/2+3]; |
|
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2165 int am=0; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2166 if(!p || p->fast_mc){ |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2167 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2168 tmpI[x]= am; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2169 am= (am+16)>>5; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2170 }else{ |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2171 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6); |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2172 tmpI[x]= am; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2173 am= (am+32)>>6; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2174 } |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2175 |
| 5649 | 2176 if(am&(~255)) am= ~(am>>31); |
| 2177 tmp2[x]= am; | |
| 2178 } | |
| 2179 tmpI+= 64; | |
| 2180 tmp2+= stride; | |
| 2181 src += stride; | |
| 2138 | 2182 } |
| 5649 | 2183 src -= stride*y; |
| 5648 | 2184 } |
|
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2185 src += HTAPS_MAX/2 - 1; |
| 5648 | 2186 tmp2= tmp2t[1]; |
| 2187 | |
| 2188 if(b&2){ | |
| 5649 | 2189 for(y=0; y < b_h; y++){ |
| 2190 for(x=0; x < b_w+1; x++){ | |
|
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2191 int a_1=src[x + (HTAPS_MAX/2-4)*stride]; |
|
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2192 int a0= src[x + (HTAPS_MAX/2-3)*stride]; |
|
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2193 int a1= src[x + (HTAPS_MAX/2-2)*stride]; |
|
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2194 int a2= src[x + (HTAPS_MAX/2-1)*stride]; |
|
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2195 int a3= src[x + (HTAPS_MAX/2+0)*stride]; |
|
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2196 int a4= src[x + (HTAPS_MAX/2+1)*stride]; |
|
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2197 int a5= src[x + (HTAPS_MAX/2+2)*stride]; |
|
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2198 int a6= src[x + (HTAPS_MAX/2+3)*stride]; |
|
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2199 int am=0; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2200 if(!p || p->fast_mc) |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2201 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2202 else |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2203 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2204 |
| 5649 | 2205 if(am&(~255)) am= ~(am>>31); |
| 2206 tmp2[x]= am; | |
| 2207 } | |
| 2208 src += stride; | |
| 2209 tmp2+= stride; | |
| 5648 | 2210 } |
| 5649 | 2211 src -= stride*y; |
| 5648 | 2212 } |
|
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2213 src += stride*(HTAPS_MAX/2 - 1); |
| 5648 | 2214 tmp2= tmp2t[2]; |
| 2215 tmpI= tmpIt; | |
| 2216 if(b&4){ | |
| 2217 for(y=0; y < b_h; y++){ | |
| 2218 for(x=0; x < b_w; x++){ | |
|
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2219 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64]; |
|
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2220 int a0= tmpI[x + (HTAPS_MAX/2-3)*64]; |
|
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2221 int a1= tmpI[x + (HTAPS_MAX/2-2)*64]; |
|
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2222 int a2= tmpI[x + (HTAPS_MAX/2-1)*64]; |
|
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2223 int a3= tmpI[x + (HTAPS_MAX/2+0)*64]; |
|
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2224 int a4= tmpI[x + (HTAPS_MAX/2+1)*64]; |
|
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2225 int a5= tmpI[x + (HTAPS_MAX/2+2)*64]; |
|
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2226 int a6= tmpI[x + (HTAPS_MAX/2+3)*64]; |
|
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2227 int am=0; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2228 if(!p || p->fast_mc) |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2229 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2230 else |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2231 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12; |
| 5648 | 2232 if(am&(~255)) am= ~(am>>31); |
| 2233 tmp2[x]= am; | |
| 2234 } | |
| 2235 tmpI+= 64; | |
| 2236 tmp2+= stride; | |
| 2138 | 2237 } |
| 5648 | 2238 } |
| 2239 | |
| 2240 hpel[ 0]= src; | |
|
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2241 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1); |
| 5648 | 2242 hpel[ 2]= src + 1; |
| 2243 | |
| 2244 hpel[ 4]= tmp2t[1]; | |
| 2245 hpel[ 5]= tmp2t[2]; | |
| 2246 hpel[ 6]= tmp2t[1] + 1; | |
| 2247 | |
| 2248 hpel[ 8]= src + stride; | |
| 2249 hpel[ 9]= hpel[1] + stride; | |
| 2250 hpel[10]= hpel[8] + 1; | |
| 2251 | |
| 2252 if(b==15){ | |
| 6205 | 2253 const uint8_t *src1= hpel[dx/8 + dy/8*4 ]; |
| 2254 const uint8_t *src2= hpel[dx/8 + dy/8*4+1]; | |
| 2255 const uint8_t *src3= hpel[dx/8 + dy/8*4+4]; | |
| 2256 const uint8_t *src4= hpel[dx/8 + dy/8*4+5]; | |
| 5648 | 2257 dx&=7; |
| 2258 dy&=7; | |
| 2259 for(y=0; y < b_h; y++){ | |
| 2260 for(x=0; x < b_w; x++){ | |
| 2261 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+ | |
| 2262 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6; | |
| 2263 } | |
| 2264 src1+=stride; | |
| 2265 src2+=stride; | |
| 2266 src3+=stride; | |
| 2267 src4+=stride; | |
| 2268 dst +=stride; | |
| 2269 } | |
| 2270 }else{ | |
| 6205 | 2271 const uint8_t *src1= hpel[l]; |
| 2272 const uint8_t *src2= hpel[r]; | |
| 5648 | 2273 int a= weight[((dx&7) + (8*(dy&7)))]; |
| 2274 int b= 8-a; | |
| 2275 for(y=0; y < b_h; y++){ | |
| 2276 for(x=0; x < b_w; x++){ | |
| 2277 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3; | |
| 2278 } | |
| 2279 src1+=stride; | |
| 2280 src2+=stride; | |
| 2281 dst +=stride; | |
| 2282 } | |
| 2138 | 2283 } |
| 2284 } | |
| 2285 | |
| 2286 #define mca(dx,dy,b_w)\ | |
| 5254 | 2287 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\ |
|
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2288 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\ |
| 2138 | 2289 assert(h==b_w);\ |
|
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2290 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\ |
| 2138 | 2291 } |
| 2292 | |
| 2293 mca( 0, 0,16) | |
| 2294 mca( 8, 0,16) | |
| 2295 mca( 0, 8,16) | |
| 2296 mca( 8, 8,16) | |
|
2224
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2297 mca( 0, 0,8) |
|
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2298 mca( 8, 0,8) |
|
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2299 mca( 0, 8,8) |
|
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2300 mca( 8, 8,8) |
| 2138 | 2301 |
| 3314 | 2302 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){ |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2303 if(block->type & BLOCK_INTRA){ |
| 2206 | 2304 int x, y; |
| 3018 | 2305 const int color = block->color[plane_index]; |
| 2306 const int color4= color*0x01010101; | |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2307 if(b_w==32){ |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2308 for(y=0; y < b_h; y++){ |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2309 *(uint32_t*)&dst[0 + y*stride]= color4; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2310 *(uint32_t*)&dst[4 + y*stride]= color4; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2311 *(uint32_t*)&dst[8 + y*stride]= color4; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2312 *(uint32_t*)&dst[12+ y*stride]= color4; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2313 *(uint32_t*)&dst[16+ y*stride]= color4; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2314 *(uint32_t*)&dst[20+ y*stride]= color4; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2315 *(uint32_t*)&dst[24+ y*stride]= color4; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2316 *(uint32_t*)&dst[28+ y*stride]= color4; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2317 } |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2318 }else if(b_w==16){ |
| 3018 | 2319 for(y=0; y < b_h; y++){ |
| 2320 *(uint32_t*)&dst[0 + y*stride]= color4; | |
| 2321 *(uint32_t*)&dst[4 + y*stride]= color4; | |
| 2322 *(uint32_t*)&dst[8 + y*stride]= color4; | |
| 2323 *(uint32_t*)&dst[12+ y*stride]= color4; | |
| 2324 } | |
| 2325 }else if(b_w==8){ | |
| 2326 for(y=0; y < b_h; y++){ | |
| 2327 *(uint32_t*)&dst[0 + y*stride]= color4; | |
| 2328 *(uint32_t*)&dst[4 + y*stride]= color4; | |
| 2329 } | |
| 2330 }else if(b_w==4){ | |
| 2331 for(y=0; y < b_h; y++){ | |
| 2332 *(uint32_t*)&dst[0 + y*stride]= color4; | |
| 2333 } | |
| 2334 }else{ | |
| 2335 for(y=0; y < b_h; y++){ | |
| 2336 for(x=0; x < b_w; x++){ | |
| 2337 dst[x + y*stride]= color; | |
| 2338 } | |
| 2138 | 2339 } |
| 2340 } | |
| 2341 }else{ | |
| 3314 | 2342 uint8_t *src= s->last_picture[block->ref].data[plane_index]; |
| 2206 | 2343 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale; |
| 2344 int mx= block->mx*scale; | |
| 2345 int my= block->my*scale; | |
| 2223 | 2346 const int dx= mx&15; |
| 2347 const int dy= my&15; | |
|
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3018
diff
changeset
|
2348 const int tab_index= 3 - (b_w>>2) + (b_w>>4); |
|
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2349 sx += (mx>>4) - (HTAPS_MAX/2-1); |
|
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2350 sy += (my>>4) - (HTAPS_MAX/2-1); |
| 2206 | 2351 src += sx + sy*stride; |
|
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2352 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2) |
|
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2353 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){ |
|
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2354 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h); |
| 2206 | 2355 src= tmp + MB_SIZE; |
| 2138 | 2356 } |
| 3189 | 2357 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h); |
| 2358 // assert(!(b_w&(b_w-1))); | |
| 3018 | 2359 assert(b_w>1 && b_h>1); |
|
6206
ea591bc7522a
add parenthesis, fix warning: snow.c:2413: warning: suggest parentheses around && within ||
bcoudurier
parents:
6205
diff
changeset
|
2360 assert((tab_index>=0 && tab_index<4) || b_w==32); |
|
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2361 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc ) |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2362 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy); |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2363 else if(b_w==32){ |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2364 int y; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2365 for(y=0; y<b_h; y+=16){ |
|
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2366 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride); |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2367 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride); |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2368 } |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2369 }else if(b_w==b_h) |
|
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2370 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride); |
| 3018 | 2371 else if(b_w==2*b_h){ |
|
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2372 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride); |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2373 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride); |
| 3018 | 2374 }else{ |
| 2375 assert(2*b_w==b_h); | |
|
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2376 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride); |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2377 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride); |
| 3018 | 2378 } |
| 2138 | 2379 } |
| 2380 } | |
| 2381 | |
|
4436
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2382 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, |
|
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2383 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ |
|
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2384 int y, x; |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2385 IDWTELEM * dst; |
|
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2386 for(y=0; y<b_h; y++){ |
| 5409 | 2387 //FIXME ugly misuse of obmc_stride |
|
4436
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2388 const uint8_t *obmc1= obmc + y*obmc_stride; |
|
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2389 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); |
|
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2390 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); |
|
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2391 const uint8_t *obmc4= obmc3+ (obmc_stride>>1); |
|
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2392 dst = slice_buffer_get_line(sb, src_y + y); |
|
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2393 for(x=0; x<b_w; x++){ |
|
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2394 int v= obmc1[x] * block[3][x + y*src_stride] |
|
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2395 +obmc2[x] * block[2][x + y*src_stride] |
|
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2396 +obmc3[x] * block[1][x + y*src_stride] |
|
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2397 +obmc4[x] * block[0][x + y*src_stride]; |
|
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2398 |
|
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2399 v <<= 8 - LOG2_OBMC_MAX; |
|
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2400 if(FRAC_BITS != 8){ |
|
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2401 v >>= 8 - FRAC_BITS; |
|
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2402 } |
|
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2403 if(add){ |
|
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2404 v += dst[x + src_x]; |
|
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2405 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; |
|
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2406 if(v&(~255)) v= ~(v>>31); |
|
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2407 dst8[x + y*src_stride] = v; |
|
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2408 }else{ |
|
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2409 dst[x + src_x] -= v; |
|
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2410 } |
|
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2411 } |
|
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2412 } |
|
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2413 } |
|
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2414 |
| 6412 | 2415 //FIXME name cleanup (b_w, block_w, b_width stuff) |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2416 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){ |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2417 const int b_width = s->b_width << s->block_max_depth; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2418 const int b_height= s->b_height << s->block_max_depth; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2419 const int b_stride= b_width; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2420 BlockNode *lt= &s->block[b_x + b_y*b_stride]; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2421 BlockNode *rt= lt+1; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2422 BlockNode *lb= lt+b_stride; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2423 BlockNode *rb= lb+1; |
| 2967 | 2424 uint8_t *block[4]; |
| 2842 | 2425 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride; |
| 2426 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align | |
| 2427 uint8_t *ptmp; | |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2428 int x,y; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2429 |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2430 if(b_x<0){ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2431 lt= rt; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2432 lb= rb; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2433 }else if(b_x + 1 >= b_width){ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2434 rt= lt; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2435 rb= lb; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2436 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2437 if(b_y<0){ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2438 lt= lb; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2439 rt= rb; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2440 }else if(b_y + 1 >= b_height){ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2441 lb= lt; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2442 rb= rt; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2443 } |
| 2967 | 2444 |
| 6412 | 2445 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16 |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2446 obmc -= src_x; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2447 b_w += src_x; |
|
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2448 if(!sliced && !offset_dst) |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2449 dst -= src_x; |
| 2206 | 2450 src_x=0; |
| 2451 }else if(src_x + b_w > w){ | |
| 2452 b_w = w - src_x; | |
| 2453 } | |
| 2454 if(src_y<0){ | |
| 2455 obmc -= src_y*obmc_stride; | |
| 2456 b_h += src_y; | |
|
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2457 if(!sliced && !offset_dst) |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2458 dst -= src_y*dst_stride; |
| 2206 | 2459 src_y=0; |
| 2460 }else if(src_y + b_h> h){ | |
| 2461 b_h = h - src_y; | |
| 2462 } | |
| 2967 | 2463 |
| 2206 | 2464 if(b_w<=0 || b_h<=0) return; |
| 2465 | |
| 6414 | 2466 assert(src_stride > 2*MB_SIZE + 5); |
| 2467 | |
|
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2468 if(!sliced && offset_dst) |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2469 dst += src_x + src_y*dst_stride; |
|
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2470 dst8+= src_x + src_y*src_stride; |
| 2206 | 2471 // src += src_x + src_y*src_stride; |
| 2472 | |
| 2842 | 2473 ptmp= tmp + 3*tmp_step; |
| 2474 block[0]= ptmp; | |
| 2475 ptmp+=tmp_step; | |
| 3314 | 2476 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); |
| 2206 | 2477 |
| 2478 if(same_block(lt, rt)){ | |
| 2479 block[1]= block[0]; | |
| 2480 }else{ | |
| 2842 | 2481 block[1]= ptmp; |
| 2482 ptmp+=tmp_step; | |
| 3314 | 2483 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); |
| 2206 | 2484 } |
| 2967 | 2485 |
| 2206 | 2486 if(same_block(lt, lb)){ |
| 2487 block[2]= block[0]; | |
| 2488 }else if(same_block(rt, lb)){ | |
| 2489 block[2]= block[1]; | |
| 2490 }else{ | |
| 2842 | 2491 block[2]= ptmp; |
| 2492 ptmp+=tmp_step; | |
| 3314 | 2493 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); |
| 2206 | 2494 } |
| 2495 | |
| 2496 if(same_block(lt, rb) ){ | |
| 2497 block[3]= block[0]; | |
| 2498 }else if(same_block(rt, rb)){ | |
| 2499 block[3]= block[1]; | |
| 2500 }else if(same_block(lb, rb)){ | |
| 2501 block[3]= block[2]; | |
| 2502 }else{ | |
| 2842 | 2503 block[3]= ptmp; |
| 3314 | 2504 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); |
| 2206 | 2505 } |
| 2506 #if 0 | |
| 2507 for(y=0; y<b_h; y++){ | |
| 2508 for(x=0; x<b_w; x++){ | |
| 2509 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX); | |
| 2510 if(add) dst[x + y*dst_stride] += v; | |
| 2511 else dst[x + y*dst_stride] -= v; | |
| 2512 } | |
| 2513 } | |
| 2514 for(y=0; y<b_h; y++){ | |
| 2515 uint8_t *obmc2= obmc + (obmc_stride>>1); | |
| 2516 for(x=0; x<b_w; x++){ | |
| 2517 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX); | |
| 2518 if(add) dst[x + y*dst_stride] += v; | |
| 2519 else dst[x + y*dst_stride] -= v; | |
| 2520 } | |
| 2521 } | |
| 2522 for(y=0; y<b_h; y++){ | |
| 2523 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
| 2524 for(x=0; x<b_w; x++){ | |
| 2525 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX); | |
| 2526 if(add) dst[x + y*dst_stride] += v; | |
| 2527 else dst[x + y*dst_stride] -= v; | |
| 2528 } | |
| 2529 } | |
| 2530 for(y=0; y<b_h; y++){ | |
| 2531 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
| 2532 uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
| 2533 for(x=0; x<b_w; x++){ | |
| 2534 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX); | |
| 2535 if(add) dst[x + y*dst_stride] += v; | |
| 2536 else dst[x + y*dst_stride] -= v; | |
| 2537 } | |
| 2538 } | |
| 2539 #else | |
|
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2540 if(sliced){ |
|
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2541 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); |
|
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2542 }else |
| 2206 | 2543 for(y=0; y<b_h; y++){ |
| 5409 | 2544 //FIXME ugly misuse of obmc_stride |
|
4436
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2545 const uint8_t *obmc1= obmc + y*obmc_stride; |
|
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2546 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); |
|
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2547 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); |
|
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2548 const uint8_t *obmc4= obmc3+ (obmc_stride>>1); |
| 2206 | 2549 for(x=0; x<b_w; x++){ |
| 2550 int v= obmc1[x] * block[3][x + y*src_stride] | |
| 2551 +obmc2[x] * block[2][x + y*src_stride] | |
| 2552 +obmc3[x] * block[1][x + y*src_stride] | |
| 2553 +obmc4[x] * block[0][x + y*src_stride]; | |
| 2967 | 2554 |
|
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2555 v <<= 8 - LOG2_OBMC_MAX; |
| 2246 | 2556 if(FRAC_BITS != 8){ |
| 2557 v >>= 8 - FRAC_BITS; | |
| 2558 } | |
|
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2559 if(add){ |
|
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2560 v += dst[x + y*dst_stride]; |
|
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2561 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; |
|
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2562 if(v&(~255)) v= ~(v>>31); |
|
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2563 dst8[x + y*src_stride] = v; |
|
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2564 }else{ |
|
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2565 dst[x + y*dst_stride] -= v; |
|
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2566 } |
| 2206 | 2567 } |
| 2568 } | |
| 5910 | 2569 #endif /* 0 */ |
| 2206 | 2570 } |
| 2571 | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2572 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){ |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2573 Plane *p= &s->plane[plane_index]; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2574 const int mb_w= s->b_width << s->block_max_depth; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2575 const int mb_h= s->b_height << s->block_max_depth; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2576 int x, y, mb_x; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2577 int block_size = MB_SIZE >> s->block_max_depth; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2578 int block_w = plane_index ? block_size/2 : block_size; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2579 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2580 int obmc_stride= plane_index ? block_size : 2*block_size; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2581 int ref_stride= s->current_picture.linesize[plane_index]; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2582 uint8_t *dst8= s->current_picture.data[plane_index]; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2583 int w= p->width; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2584 int h= p->height; |
| 2967 | 2585 |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2586 if(s->keyframe || (s->avctx->debug&512)){ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2587 if(mb_y==mb_h) |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2588 return; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2589 |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2590 if(add){ |
|
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
2591 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2592 // DWTELEM * line = slice_buffer_get_line(sb, y); |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2593 IDWTELEM * line = sb->line[y]; |
|
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
2594 for(x=0; x<w; x++){ |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2595 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2596 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2597 v >>= FRAC_BITS; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2598 if(v&(~255)) v= ~(v>>31); |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2599 dst8[x + y*ref_stride]= v; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2600 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2601 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2602 }else{ |
|
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
2603 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2604 // DWTELEM * line = slice_buffer_get_line(sb, y); |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2605 IDWTELEM * line = sb->line[y]; |
|
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
2606 for(x=0; x<w; x++){ |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2607 line[x] -= 128 << FRAC_BITS; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2608 // buf[x + y*w]-= 128<<FRAC_BITS; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2609 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2610 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2611 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2612 |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2613 return; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2614 } |
| 2967 | 2615 |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2616 for(mb_x=0; mb_x<=mb_w; mb_x++){ |
|
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2617 add_yblock(s, 1, sb, old_buffer, dst8, obmc, |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2618 block_w*mb_x - block_w/2, |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2619 block_w*mb_y - block_w/2, |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2620 block_w, block_w, |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2621 w, h, |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2622 w, ref_stride, obmc_stride, |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2623 mb_x - 1, mb_y - 1, |
|
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2624 add, 0, plane_index); |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2625 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2626 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2627 |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2628 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){ |
| 2138 | 2629 Plane *p= &s->plane[plane_index]; |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2630 const int mb_w= s->b_width << s->block_max_depth; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2631 const int mb_h= s->b_height << s->block_max_depth; |
| 2562 | 2632 int x, y, mb_x; |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2633 int block_size = MB_SIZE >> s->block_max_depth; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2634 int block_w = plane_index ? block_size/2 : block_size; |
| 2206 | 2635 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2636 const int obmc_stride= plane_index ? block_size : 2*block_size; |
|
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2637 int ref_stride= s->current_picture.linesize[plane_index]; |
|
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2638 uint8_t *dst8= s->current_picture.data[plane_index]; |
| 2138 | 2639 int w= p->width; |
| 2640 int h= p->height; | |
| 2967 | 2641 |
| 2206 | 2642 if(s->keyframe || (s->avctx->debug&512)){ |
| 2562 | 2643 if(mb_y==mb_h) |
| 2644 return; | |
| 2645 | |
|
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2646 if(add){ |
|
2604
b7e6c3d31c65
Snow segfault bug in revision 1.43 patch by (Yartrebo /yartrebo earthlink net/)
michael
parents:
2602
diff
changeset
|
2647 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ |
|
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2648 for(x=0; x<w; x++){ |
|
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2649 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); |
|
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2650 v >>= FRAC_BITS; |
|
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2651 if(v&(~255)) v= ~(v>>31); |
|
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2652 dst8[x + y*ref_stride]= v; |
|
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2653 } |
|
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2654 } |
|
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2655 }else{ |
|
2604
b7e6c3d31c65
Snow segfault bug in revision 1.43 patch by (Yartrebo /yartrebo earthlink net/)
michael
parents:
2602
diff
changeset
|
2656 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ |
|
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2657 for(x=0; x<w; x++){ |
|
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2658 buf[x + y*w]-= 128<<FRAC_BITS; |
|
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2659 } |
| 2206 | 2660 } |
| 2138 | 2661 } |
| 2206 | 2662 |
| 2663 return; | |
| 2138 | 2664 } |
| 2967 | 2665 |
| 6414 | 2666 for(mb_x=0; mb_x<=mb_w; mb_x++){ |
| 2667 add_yblock(s, 0, NULL, buf, dst8, obmc, | |
| 2668 block_w*mb_x - block_w/2, | |
| 2669 block_w*mb_y - block_w/2, | |
| 2670 block_w, block_w, | |
| 2671 w, h, | |
| 2672 w, ref_stride, obmc_stride, | |
| 2673 mb_x - 1, mb_y - 1, | |
| 2674 add, 1, plane_index); | |
| 2675 } | |
| 2562 | 2676 } |
| 2677 | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2678 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){ |
| 2562 | 2679 const int mb_h= s->b_height << s->block_max_depth; |
| 2680 int mb_y; | |
| 2681 for(mb_y=0; mb_y<=mb_h; mb_y++) | |
| 2682 predict_slice(s, buf, plane_index, add, mb_y); | |
| 2138 | 2683 } |
| 2684 | |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2685 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){ |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2686 int i, x2, y2; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2687 Plane *p= &s->plane[plane_index]; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2688 const int block_size = MB_SIZE >> s->block_max_depth; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2689 const int block_w = plane_index ? block_size/2 : block_size; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2690 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2691 const int obmc_stride= plane_index ? block_size : 2*block_size; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2692 const int ref_stride= s->current_picture.linesize[plane_index]; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2693 uint8_t *src= s-> input_picture.data[plane_index]; |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2694 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2695 const int b_stride = s->b_width << s->block_max_depth; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2696 const int w= p->width; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2697 const int h= p->height; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2698 int index= mb_x + mb_y*b_stride; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2699 BlockNode *b= &s->block[index]; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2700 BlockNode backup= *b; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2701 int ab=0; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2702 int aa=0; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2703 |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2704 b->type|= BLOCK_INTRA; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2705 b->color[plane_index]= 0; |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2706 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM)); |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2707 |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2708 for(i=0; i<4; i++){ |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2709 int mb_x2= mb_x + (i &1) - 1; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2710 int mb_y2= mb_y + (i>>1) - 1; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2711 int x= block_w*mb_x2 + block_w/2; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2712 int y= block_w*mb_y2 + block_w/2; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2713 |
|
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2714 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc, |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2715 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index); |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2716 |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2717 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){ |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2718 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){ |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2719 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2720 int obmc_v= obmc[index]; |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2721 int d; |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2722 if(y<0) obmc_v += obmc[index + block_w*obmc_stride]; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2723 if(x<0) obmc_v += obmc[index + block_w]; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2724 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride]; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2725 if(x+block_w>w) obmc_v += obmc[index - block_w]; |
| 6412 | 2726 //FIXME precalculate this or simplify it somehow else |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2727 |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2728 d = -dst[index] + (1<<(FRAC_BITS-1)); |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2729 dst[index] = d; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2730 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v; |
| 6412 | 2731 aa += obmc_v * obmc_v; //FIXME precalculate this |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2732 } |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2733 } |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2734 } |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2735 *b= backup; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2736 |
| 5127 | 2737 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2738 } |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2739 |
| 3051 | 2740 static inline int get_block_bits(SnowContext *s, int x, int y, int w){ |
| 2741 const int b_stride = s->b_width << s->block_max_depth; | |
| 2742 const int b_height = s->b_height<< s->block_max_depth; | |
| 2743 int index= x + y*b_stride; | |
|
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2744 const BlockNode *b = &s->block[index]; |
|
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2745 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
|
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2746 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block; |
|
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2747 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left; |
|
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2748 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl; |
| 3051 | 2749 int dmx, dmy; |
| 4001 | 2750 // int mx_context= av_log2(2*FFABS(left->mx - top->mx)); |
| 2751 // int my_context= av_log2(2*FFABS(left->my - top->my)); | |
| 3051 | 2752 |
| 2753 if(x<0 || x>=b_stride || y>=b_height) | |
| 2754 return 0; | |
| 2755 /* | |
| 2756 1 0 0 | |
| 2757 01X 1-2 1 | |
| 2758 001XX 3-6 2-3 | |
| 2759 0001XXX 7-14 4-7 | |
| 2760 00001XXXX 15-30 8-15 | |
| 2761 */ | |
| 2762 //FIXME try accurate rate | |
| 6412 | 2763 //FIXME intra and inter predictors if surrounding blocks are not the same type |
| 3051 | 2764 if(b->type & BLOCK_INTRA){ |
| 4001 | 2765 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0])) |
| 2766 + av_log2(2*FFABS(left->color[1] - b->color[1])) | |
| 2767 + av_log2(2*FFABS(left->color[2] - b->color[2]))); | |
|
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2768 }else{ |
|
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2769 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr); |
|
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2770 dmx-= b->mx; |
|
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2771 dmy-= b->my; |
| 4001 | 2772 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda |
| 2773 + av_log2(2*FFABS(dmy)) | |
| 3314 | 2774 + av_log2(2*b->ref)); |
|
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2775 } |
| 3051 | 2776 } |
| 2777 | |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2778 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){ |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2779 Plane *p= &s->plane[plane_index]; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2780 const int block_size = MB_SIZE >> s->block_max_depth; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2781 const int block_w = plane_index ? block_size/2 : block_size; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2782 const int obmc_stride= plane_index ? block_size : 2*block_size; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2783 const int ref_stride= s->current_picture.linesize[plane_index]; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2784 uint8_t *dst= s->current_picture.data[plane_index]; |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2785 uint8_t *src= s-> input_picture.data[plane_index]; |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2786 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2787 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment |
|
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2788 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)]; |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2789 const int b_stride = s->b_width << s->block_max_depth; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2790 const int b_height = s->b_height<< s->block_max_depth; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2791 const int w= p->width; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2792 const int h= p->height; |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2793 int distortion; |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2794 int rate= 0; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2795 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2796 int sx= block_w*mb_x - block_w/2; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2797 int sy= block_w*mb_y - block_w/2; |
| 3206 | 2798 int x0= FFMAX(0,-sx); |
| 2799 int y0= FFMAX(0,-sy); | |
| 2800 int x1= FFMIN(block_w*2, w-sx); | |
| 2801 int y1= FFMIN(block_w*2, h-sy); | |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2802 int i,x,y; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2803 |
| 3314 | 2804 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h); |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2805 |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2806 for(y=y0; y<y1; y++){ |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2807 const uint8_t *obmc1= obmc_edged + y*obmc_stride; |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2808 const IDWTELEM *pred1 = pred + y*obmc_stride; |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2809 uint8_t *cur1 = cur + y*ref_stride; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2810 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2811 for(x=x0; x<x1; x++){ |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2812 #if FRAC_BITS >= LOG2_OBMC_MAX |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2813 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX); |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2814 #else |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2815 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS); |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2816 #endif |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2817 v = (v + pred1[x]) >> FRAC_BITS; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2818 if(v&(~255)) v= ~(v>>31); |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2819 dst1[x] = v; |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2820 } |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2821 } |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2822 |
| 3206 | 2823 /* copy the regions where obmc[] = (uint8_t)256 */ |
| 2824 if(LOG2_OBMC_MAX == 8 | |
| 2825 && (mb_x == 0 || mb_x == b_stride-1) | |
| 2826 && (mb_y == 0 || mb_y == b_height-1)){ | |
| 2827 if(mb_x == 0) | |
| 2828 x1 = block_w; | |
| 2829 else | |
| 2830 x0 = block_w; | |
| 2831 if(mb_y == 0) | |
| 2832 y1 = block_w; | |
| 2833 else | |
| 2834 y0 = block_w; | |
| 2835 for(y=y0; y<y1; y++) | |
| 2836 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0); | |
| 2837 } | |
| 2838 | |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2839 if(block_w==16){ |
|
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2840 /* FIXME rearrange dsputil to fit 32x32 cmp functions */ |
|
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2841 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */ |
| 6412 | 2842 /* FIXME cmps overlap but do not cover the wavelet's whole support. |
| 2843 * So improving the score of one block is not strictly guaranteed | |
| 2844 * to improve the score of the whole frame, thus iterative motion | |
| 2845 * estimation does not always converge. */ | |
|
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2846 if(s->avctx->me_cmp == FF_CMP_W97) |
| 4197 | 2847 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32); |
|
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2848 else if(s->avctx->me_cmp == FF_CMP_W53) |
| 4197 | 2849 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32); |
|
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2850 else{ |
|
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2851 distortion = 0; |
|
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2852 for(i=0; i<4; i++){ |
|
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2853 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride; |
|
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2854 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16); |
|
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2855 } |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2856 } |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2857 }else{ |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2858 assert(block_w==8); |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2859 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2); |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2860 } |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2861 |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2862 if(plane_index==0){ |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2863 for(i=0; i<4; i++){ |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2864 /* ..RRr |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2865 * .RXx. |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2866 * rxx.. |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2867 */ |
| 3051 | 2868 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1); |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2869 } |
| 3057 | 2870 if(mb_x == b_stride-2) |
| 2871 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1); | |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2872 } |
| 3051 | 2873 return distortion + rate*penalty_factor; |
| 2874 } | |
| 2875 | |
| 2876 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){ | |
| 2877 int i, y2; | |
| 2878 Plane *p= &s->plane[plane_index]; | |
| 2879 const int block_size = MB_SIZE >> s->block_max_depth; | |
| 2880 const int block_w = plane_index ? block_size/2 : block_size; | |
| 2881 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; | |
| 2882 const int obmc_stride= plane_index ? block_size : 2*block_size; | |
| 2883 const int ref_stride= s->current_picture.linesize[plane_index]; | |
| 2884 uint8_t *dst= s->current_picture.data[plane_index]; | |
| 2885 uint8_t *src= s-> input_picture.data[plane_index]; | |
| 6426 | 2886 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst |
| 2887 // const has only been removed from zero_dst to suppress a warning | |
| 2888 static IDWTELEM zero_dst[4096]; //FIXME | |
| 3051 | 2889 const int b_stride = s->b_width << s->block_max_depth; |
| 2890 const int w= p->width; | |
| 2891 const int h= p->height; | |
| 2892 int distortion= 0; | |
| 2893 int rate= 0; | |
| 2894 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); | |
| 2895 | |
| 2896 for(i=0; i<9; i++){ | |
| 2897 int mb_x2= mb_x + (i%3) - 1; | |
| 2898 int mb_y2= mb_y + (i/3) - 1; | |
| 2899 int x= block_w*mb_x2 + block_w/2; | |
| 2900 int y= block_w*mb_y2 + block_w/2; | |
| 2901 | |
|
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2902 add_yblock(s, 0, NULL, zero_dst, dst, obmc, |
| 3051 | 2903 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index); |
| 2904 | |
| 2905 //FIXME find a cleaner/simpler way to skip the outside stuff | |
| 2906 for(y2= y; y2<0; y2++) | |
| 2907 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); | |
| 2908 for(y2= h; y2<y+block_w; y2++) | |
| 2909 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); | |
| 2910 if(x<0){ | |
| 2911 for(y2= y; y2<y+block_w; y2++) | |
| 2912 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x); | |
| 2913 } | |
| 2914 if(x+block_w > w){ | |
| 2915 for(y2= y; y2<y+block_w; y2++) | |
| 2916 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w); | |
| 2917 } | |
| 2918 | |
| 2919 assert(block_w== 8 || block_w==16); | |
| 2920 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w); | |
| 2921 } | |
| 2922 | |
| 2923 if(plane_index==0){ | |
| 2924 BlockNode *b= &s->block[mb_x+mb_y*b_stride]; | |
| 2925 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1); | |
| 2926 | |
| 2927 /* ..RRRr | |
| 2928 * .RXXx. | |
| 2929 * .RXXx. | |
| 2930 * rxxx. | |
| 2931 */ | |
| 2932 if(merged) | |
| 2933 rate = get_block_bits(s, mb_x, mb_y, 2); | |
| 2934 for(i=merged?4:0; i<9; i++){ | |
| 2935 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}}; | |
| 2936 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1); | |
| 2937 } | |
| 2938 } | |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2939 return distortion + rate*penalty_factor; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2940 } |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2941 |
|
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
2942 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){ |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2943 const int b_stride= s->b_width << s->block_max_depth; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2944 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2945 BlockNode backup= *block; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2946 int rd, index, value; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2947 |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2948 assert(mb_x>=0 && mb_y>=0); |
| 2994 | 2949 assert(mb_x<b_stride); |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2950 |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2951 if(intra){ |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2952 block->color[0] = p[0]; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2953 block->color[1] = p[1]; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2954 block->color[2] = p[2]; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2955 block->type |= BLOCK_INTRA; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2956 }else{ |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2957 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1); |
| 3314 | 2958 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12); |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2959 if(s->me_cache[index] == value) |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2960 return 0; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2961 s->me_cache[index]= value; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2962 |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2963 block->mx= p[0]; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2964 block->my= p[1]; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2965 block->type &= ~BLOCK_INTRA; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2966 } |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2967 |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2968 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged); |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2969 |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2970 //FIXME chroma |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2971 if(rd < *best_rd){ |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2972 *best_rd= rd; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2973 return 1; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2974 }else{ |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2975 *block= backup; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2976 return 0; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2977 } |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2978 } |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2979 |
| 6412 | 2980 /* special case for int[2] args we discard afterwards, |
| 2981 * fixes compilation problem with gcc 2.95 */ | |
|
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
2982 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){ |
|
2999
bc83e7a080a5
this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it
mmu_man
parents:
2998
diff
changeset
|
2983 int p[2] = {p0, p1}; |
| 3197 | 2984 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd); |
|
2999
bc83e7a080a5
this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it
mmu_man
parents:
2998
diff
changeset
|
2985 } |
|
bc83e7a080a5
this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it
mmu_man
parents:
2998
diff
changeset
|
2986 |
|
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
2987 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){ |
| 3051 | 2988 const int b_stride= s->b_width << s->block_max_depth; |
| 2989 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; | |
| 2990 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]}; | |
| 2991 int rd, index, value; | |
| 2992 | |
| 2993 assert(mb_x>=0 && mb_y>=0); | |
| 2994 assert(mb_x<b_stride); | |
| 2995 assert(((mb_x|mb_y)&1) == 0); | |
| 2996 | |
| 2997 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1); | |
| 3314 | 2998 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12); |
| 3051 | 2999 if(s->me_cache[index] == value) |
| 3000 return 0; | |
| 3001 s->me_cache[index]= value; | |
| 3002 | |
| 3003 block->mx= p0; | |
| 3004 block->my= p1; | |
| 3314 | 3005 block->ref= ref; |
| 3051 | 3006 block->type &= ~BLOCK_INTRA; |
| 3007 block[1]= block[b_stride]= block[b_stride+1]= *block; | |
| 3008 | |
| 3009 rd= get_4block_rd(s, mb_x, mb_y, 0); | |
| 3010 | |
| 3011 //FIXME chroma | |
| 3012 if(rd < *best_rd){ | |
| 3013 *best_rd= rd; | |
| 3014 return 1; | |
| 3015 }else{ | |
| 3016 block[0]= backup[0]; | |
| 3017 block[1]= backup[1]; | |
| 3018 block[b_stride]= backup[2]; | |
| 3019 block[b_stride+1]= backup[3]; | |
| 3020 return 0; | |
| 3021 } | |
| 3022 } | |
| 3023 | |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3024 static void iterative_me(SnowContext *s){ |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3025 int pass, mb_x, mb_y; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3026 const int b_width = s->b_width << s->block_max_depth; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3027 const int b_height= s->b_height << s->block_max_depth; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3028 const int b_stride= b_width; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3029 int color[3]; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3030 |
|
3194
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3031 { |
|
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3032 RangeCoder r = s->c; |
|
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3033 uint8_t state[sizeof(s->block_state)]; |
|
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3034 memcpy(state, s->block_state, sizeof(s->block_state)); |
|
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3035 for(mb_y= 0; mb_y<s->b_height; mb_y++) |
|
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3036 for(mb_x= 0; mb_x<s->b_width; mb_x++) |
|
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3037 encode_q_branch(s, 0, mb_x, mb_y); |
|
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3038 s->c = r; |
|
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3039 memcpy(s->block_state, state, sizeof(s->block_state)); |
|
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3040 } |
|
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3041 |
|
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
3042 for(pass=0; pass<25; pass++){ |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3043 int change= 0; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3044 |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3045 for(mb_y= 0; mb_y<b_height; mb_y++){ |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3046 for(mb_x= 0; mb_x<b_width; mb_x++){ |
| 3314 | 3047 int dia_change, i, j, ref; |
| 3048 int best_rd= INT_MAX, ref_rd; | |
| 3049 BlockNode backup, ref_b; | |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3050 const int index= mb_x + mb_y * b_stride; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3051 BlockNode *block= &s->block[index]; |
| 3324 | 3052 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL; |
| 3053 BlockNode *lb = mb_x ? &s->block[index -1] : NULL; | |
| 3054 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL; | |
| 3055 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL; | |
| 3056 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL; | |
| 3057 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL; | |
| 3058 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL; | |
| 3059 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL; | |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3060 const int b_w= (MB_SIZE >> s->block_max_depth); |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3061 uint8_t obmc_edged[b_w*2][b_w*2]; |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3062 |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3063 if(pass && (block->type & BLOCK_OPT)) |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3064 continue; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3065 block->type |= BLOCK_OPT; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3066 |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3067 backup= *block; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3068 |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3069 if(!s->me_cache_generation) |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3070 memset(s->me_cache, 0, sizeof(s->me_cache)); |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3071 s->me_cache_generation += 1<<22; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3072 |
| 6412 | 3073 //FIXME precalculate |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3074 { |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3075 int x, y; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3076 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4); |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3077 if(mb_x==0) |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3078 for(y=0; y<b_w*2; y++) |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3079 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w); |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3080 if(mb_x==b_stride-1) |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3081 for(y=0; y<b_w*2; y++) |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3082 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w); |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3083 if(mb_y==0){ |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3084 for(x=0; x<b_w*2; x++) |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3085 obmc_edged[0][x] += obmc_edged[b_w-1][x]; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3086 for(y=1; y<b_w; y++) |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3087 memcpy(obmc_edged[y], obmc_edged[0], b_w*2); |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3088 } |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3089 if(mb_y==b_height-1){ |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3090 for(x=0; x<b_w*2; x++) |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3091 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x]; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3092 for(y=b_w; y<b_w*2-1; y++) |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3093 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2); |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3094 } |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3095 } |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3096 |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3097 //skip stuff outside the picture |
|
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
3098 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){ |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3099 uint8_t *src= s-> input_picture.data[0]; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3100 uint8_t *dst= s->current_picture.data[0]; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3101 const int stride= s->current_picture.linesize[0]; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3102 const int block_w= MB_SIZE >> s->block_max_depth; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3103 const int sx= block_w*mb_x - block_w/2; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3104 const int sy= block_w*mb_y - block_w/2; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3105 const int w= s->plane[0].width; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3106 const int h= s->plane[0].height; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3107 int y; |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3108 |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3109 for(y=sy; y<0; y++) |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3110 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2); |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3111 for(y=h; y<sy+block_w*2; y++) |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3112 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2); |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3113 if(sx<0){ |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3114 for(y=sy; y<sy+block_w*2; y++) |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3115 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx); |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3116 } |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3117 if(sx+block_w*2 > w){ |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3118 for(y=sy; y<sy+block_w*2; y++) |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3119 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w); |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3120 } |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3121 } |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3122 |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3123 // intra(black) = neighbors' contribution to the current block |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3124 for(i=0; i<3; i++) |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3125 color[i]= get_dc(s, mb_x, mb_y, i); |
|
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3126 |
| 5127 | 3127 // get previous score (cannot be cached due to OBMC) |
| 3057 | 3128 if(pass > 0 && (block->type&BLOCK_INTRA)){ |
| 3129 int color0[3]= {block->color[0], block->color[1], block->color[2]}; | |
| 3130 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd); | |
| 3131 }else | |
| 3197 | 3132 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd); |
| 3133 | |
| 3314 | 3134 ref_b= *block; |
| 3135 ref_rd= best_rd; | |
| 3136 for(ref=0; ref < s->ref_frames; ref++){ | |
| 3137 int16_t (*mvr)[2]= &s->ref_mvs[ref][index]; | |
| 3138 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold | |
| 3139 continue; | |
| 3140 block->ref= ref; | |
| 3141 best_rd= INT_MAX; | |
| 3142 | |
| 3143 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd); | |
| 3144 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd); | |
| 3324 | 3145 if(tb) |
| 3314 | 3146 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd); |
| 3324 | 3147 if(lb) |
| 3314 | 3148 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd); |
| 3324 | 3149 if(rb) |
| 3314 | 3150 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd); |
| 3324 | 3151 if(bb) |
| 3314 | 3152 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd); |
| 3153 | |
| 3154 /* fullpel ME */ | |
| 6412 | 3155 //FIXME avoid subpel interpolation / round to nearest integer |
| 3314 | 3156 do{ |
| 3157 dia_change=0; | |
| 3158 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){ | |
| 3159 for(j=0; j<i; j++){ | |
| 3160 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd); | |
| 3161 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd); | |
| 3162 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd); | |
| 3163 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd); | |
| 3164 } | |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3165 } |
| 3314 | 3166 }while(dia_change); |
| 3167 /* subpel ME */ | |
| 3168 do{ | |
| 3169 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},}; | |
| 3170 dia_change=0; | |
| 3171 for(i=0; i<8; i++) | |
| 3172 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd); | |
| 3173 }while(dia_change); | |
| 3174 //FIXME or try the standard 2 pass qpel or similar | |
| 3175 | |
| 3176 mvr[0][0]= block->mx; | |
| 3177 mvr[0][1]= block->my; | |
| 3178 if(ref_rd > best_rd){ | |
| 3179 ref_rd= best_rd; | |
| 3180 ref_b= *block; | |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3181 } |
| 3314 | 3182 } |
| 3183 best_rd= ref_rd; | |
| 3184 *block= ref_b; | |
| 2998 | 3185 #if 1 |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3186 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd); |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3187 //FIXME RD style color selection |
| 2998 | 3188 #endif |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3189 if(!same_block(block, &backup)){ |
| 3324 | 3190 if(tb ) tb ->type &= ~BLOCK_OPT; |
| 3191 if(lb ) lb ->type &= ~BLOCK_OPT; | |
| 3192 if(rb ) rb ->type &= ~BLOCK_OPT; | |
| 3193 if(bb ) bb ->type &= ~BLOCK_OPT; | |
| 3194 if(tlb) tlb->type &= ~BLOCK_OPT; | |
| 3195 if(trb) trb->type &= ~BLOCK_OPT; | |
| 3196 if(blb) blb->type &= ~BLOCK_OPT; | |
| 3197 if(brb) brb->type &= ~BLOCK_OPT; | |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3198 change ++; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3199 } |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3200 } |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3201 } |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3202 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change); |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3203 if(!change) |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3204 break; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3205 } |
| 3051 | 3206 |
| 3207 if(s->block_max_depth == 1){ | |
| 3208 int change= 0; | |
| 3209 for(mb_y= 0; mb_y<b_height; mb_y+=2){ | |
| 3210 for(mb_x= 0; mb_x<b_width; mb_x+=2){ | |
| 3324 | 3211 int i; |
| 3051 | 3212 int best_rd, init_rd; |
| 3213 const int index= mb_x + mb_y * b_stride; | |
| 3214 BlockNode *b[4]; | |
| 3215 | |
| 3216 b[0]= &s->block[index]; | |
| 3217 b[1]= b[0]+1; | |
| 3218 b[2]= b[0]+b_stride; | |
| 3219 b[3]= b[2]+1; | |
| 3220 if(same_block(b[0], b[1]) && | |
| 3221 same_block(b[0], b[2]) && | |
| 3222 same_block(b[0], b[3])) | |
| 3223 continue; | |
| 3224 | |
| 3225 if(!s->me_cache_generation) | |
| 3226 memset(s->me_cache, 0, sizeof(s->me_cache)); | |
| 3227 s->me_cache_generation += 1<<22; | |
| 3228 | |
| 3229 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0); | |
| 3230 | |
| 3314 | 3231 //FIXME more multiref search? |
| 3051 | 3232 check_4block_inter(s, mb_x, mb_y, |
| 3233 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2, | |
| 3314 | 3234 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd); |
| 3051 | 3235 |
| 3236 for(i=0; i<4; i++) | |
| 3237 if(!(b[i]->type&BLOCK_INTRA)) | |
| 3314 | 3238 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd); |
| 3051 | 3239 |
| 3240 if(init_rd != best_rd) | |
| 3241 change++; | |
| 3242 } | |
| 3243 } | |
| 3244 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4); | |
| 3245 } | |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3246 } |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3247 |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3248 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){ |
| 2138 | 3249 const int w= b->width; |
| 3250 const int h= b->height; | |
| 4594 | 3251 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
| 5575 | 3252 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS); |
| 2150 | 3253 int x,y, thres1, thres2; |
| 2138 | 3254 |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3255 if(s->qlog == LOSSLESS_QLOG){ |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3256 for(y=0; y<h; y++) |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3257 for(x=0; x<w; x++) |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3258 dst[x + y*stride]= src[x + y*stride]; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3259 return; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3260 } |
| 2967 | 3261 |
| 2138 | 3262 bias= bias ? 0 : (3*qmul)>>3; |
| 2150 | 3263 thres1= ((qmul - bias)>>QEXPSHIFT) - 1; |
| 3264 thres2= 2*thres1; | |
| 2967 | 3265 |
| 2138 | 3266 if(!bias){ |
| 3267 for(y=0; y<h; y++){ | |
| 3268 for(x=0; x<w; x++){ | |
| 2150 | 3269 int i= src[x + y*stride]; |
| 2967 | 3270 |
| 2150 | 3271 if((unsigned)(i+thres1) > thres2){ |
| 3272 if(i>=0){ | |
| 3273 i<<= QEXPSHIFT; | |
| 3274 i/= qmul; //FIXME optimize | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3275 dst[x + y*stride]= i; |
| 2150 | 3276 }else{ |
| 3277 i= -i; | |
| 3278 i<<= QEXPSHIFT; | |
| 3279 i/= qmul; //FIXME optimize | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3280 dst[x + y*stride]= -i; |
| 2150 | 3281 } |
| 3282 }else | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3283 dst[x + y*stride]= 0; |
| 2138 | 3284 } |
| 3285 } | |
| 3286 }else{ | |
| 3287 for(y=0; y<h; y++){ | |
| 3288 for(x=0; x<w; x++){ | |
| 2967 | 3289 int i= src[x + y*stride]; |
| 3290 | |
| 2150 | 3291 if((unsigned)(i+thres1) > thres2){ |
| 3292 if(i>=0){ | |
| 3293 i<<= QEXPSHIFT; | |
| 3294 i= (i + bias) / qmul; //FIXME optimize | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3295 dst[x + y*stride]= i; |
| 2150 | 3296 }else{ |
| 3297 i= -i; | |
| 3298 i<<= QEXPSHIFT; | |
| 3299 i= (i + bias) / qmul; //FIXME optimize | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3300 dst[x + y*stride]= -i; |
| 2150 | 3301 } |
| 3302 }else | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3303 dst[x + y*stride]= 0; |
| 2138 | 3304 } |
| 3305 } | |
| 3306 } | |
| 3307 } | |
| 3308 | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3309 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){ |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3310 const int w= b->width; |
| 4594 | 3311 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
| 2600 | 3312 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3313 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3314 int x,y; |
| 2967 | 3315 |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3316 if(s->qlog == LOSSLESS_QLOG) return; |
| 2967 | 3317 |
|
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3318 for(y=start_y; y<end_y; y++){ |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3319 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride)); |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3320 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3321 for(x=0; x<w; x++){ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3322 int i= line[x]; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3323 if(i<0){ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3324 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3325 }else if(i>0){ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3326 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT)); |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3327 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3328 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3329 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3330 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3331 |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3332 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){ |
| 2138 | 3333 const int w= b->width; |
| 3334 const int h= b->height; | |
| 4594 | 3335 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
| 2600 | 3336 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
| 2138 | 3337 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
| 3338 int x,y; | |
| 2967 | 3339 |
| 2161 | 3340 if(s->qlog == LOSSLESS_QLOG) return; |
| 2967 | 3341 |
| 2138 | 3342 for(y=0; y<h; y++){ |
| 3343 for(x=0; x<w; x++){ | |
| 3344 int i= src[x + y*stride]; | |
| 3345 if(i<0){ | |
| 3346 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias | |
| 3347 }else if(i>0){ | |
| 3348 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT)); | |
| 3349 } | |
| 3350 } | |
| 3351 } | |
| 3352 } | |
| 3353 | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3354 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ |
| 2138 | 3355 const int w= b->width; |
| 3356 const int h= b->height; | |
| 3357 int x,y; | |
| 2967 | 3358 |
| 2138 | 3359 for(y=h-1; y>=0; y--){ |
| 3360 for(x=w-1; x>=0; x--){ | |
| 3361 int i= x + y*stride; | |
| 2967 | 3362 |
| 2138 | 3363 if(x){ |
| 3364 if(use_median){ | |
| 3365 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]); | |
| 3366 else src[i] -= src[i - 1]; | |
| 3367 }else{ | |
| 3368 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); | |
| 3369 else src[i] -= src[i - 1]; | |
| 3370 } | |
| 3371 }else{ | |
| 3372 if(y) src[i] -= src[i - stride]; | |
| 3373 } | |
| 3374 } | |
| 3375 } | |
| 3376 } | |
| 3377 | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3378 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){ |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3379 const int w= b->width; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3380 int x,y; |
| 2967 | 3381 |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3382 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3383 IDWTELEM * prev; |
| 2967 | 3384 |
|
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3385 if (start_y != 0) |
|
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3386 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; |
| 2967 | 3387 |
|
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3388 for(y=start_y; y<end_y; y++){ |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3389 prev = line; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3390 // line = slice_buffer_get_line_from_address(sb, src + (y * stride)); |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3391 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3392 for(x=0; x<w; x++){ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3393 if(x){ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3394 if(use_median){ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3395 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]); |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3396 else line[x] += line[x - 1]; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3397 }else{ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3398 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]); |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3399 else line[x] += line[x - 1]; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3400 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3401 }else{ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3402 if(y) line[x] += prev[x]; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3403 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3404 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3405 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3406 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3407 |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3408 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ |
| 2138 | 3409 const int w= b->width; |
| 3410 const int h= b->height; | |
| 3411 int x,y; | |
| 2967 | 3412 |
| 2138 | 3413 for(y=0; y<h; y++){ |
| 3414 for(x=0; x<w; x++){ | |
| 3415 int i= x + y*stride; | |
| 2967 | 3416 |
| 2138 | 3417 if(x){ |
| 3418 if(use_median){ | |
| 3419 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]); | |
| 3420 else src[i] += src[i - 1]; | |
| 3421 }else{ | |
| 3422 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); | |
| 3423 else src[i] += src[i - 1]; | |
| 3424 } | |
| 3425 }else{ | |
| 3426 if(y) src[i] += src[i - stride]; | |
| 3427 } | |
| 3428 } | |
| 3429 } | |
| 3430 } | |
| 3431 | |
|
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3432 static void encode_qlogs(SnowContext *s){ |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3433 int plane_index, level, orientation; |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3434 |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3435 for(plane_index=0; plane_index<2; plane_index++){ |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3436 for(level=0; level<s->spatial_decomposition_count; level++){ |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3437 for(orientation=level ? 1:0; orientation<4; orientation++){ |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3438 if(orientation==2) continue; |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3439 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1); |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3440 } |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3441 } |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3442 } |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3443 } |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3444 |
| 2138 | 3445 static void encode_header(SnowContext *s){ |
|
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3446 int plane_index, i; |
| 2967 | 3447 uint8_t kstate[32]; |
| 3448 | |
| 3449 memset(kstate, MID_STATE, sizeof(kstate)); | |
| 2138 | 3450 |
| 2335 | 3451 put_rac(&s->c, kstate, s->keyframe); |
|
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3452 if(s->keyframe || s->always_reset){ |
| 2199 | 3453 reset_contexts(s); |
|
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3454 s->last_spatial_decomposition_type= |
|
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3455 s->last_qlog= |
|
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3456 s->last_qbias= |
|
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3457 s->last_mv_scale= |
|
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3458 s->last_block_max_depth= 0; |
|
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3459 for(plane_index=0; plane_index<2; plane_index++){ |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3460 Plane *p= &s->plane[plane_index]; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3461 p->last_htaps=0; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3462 p->last_diag_mc=0; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3463 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff)); |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3464 } |
|
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3465 } |
| 2138 | 3466 if(s->keyframe){ |
| 3467 put_symbol(&s->c, s->header_state, s->version, 0); | |
| 2335 | 3468 put_rac(&s->c, s->header_state, s->always_reset); |
| 2138 | 3469 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0); |
| 3470 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0); | |
| 3471 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0); | |
| 3472 put_symbol(&s->c, s->header_state, s->colorspace_type, 0); | |
| 3473 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0); | |
| 3474 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0); | |
| 2335 | 3475 put_rac(&s->c, s->header_state, s->spatial_scalability); |
| 3476 // put_rac(&s->c, s->header_state, s->rate_scalability); | |
| 3314 | 3477 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0); |
| 2138 | 3478 |
|
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3479 encode_qlogs(s); |
| 2138 | 3480 } |
|
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3481 |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3482 if(!s->keyframe){ |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3483 int update_mc=0; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3484 for(plane_index=0; plane_index<2; plane_index++){ |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3485 Plane *p= &s->plane[plane_index]; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3486 update_mc |= p->last_htaps != p->htaps; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3487 update_mc |= p->last_diag_mc != p->diag_mc; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3488 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff)); |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3489 } |
|
5667
9242e125395f
do not force the halfpel filter coeffs to be retransmitted on every frame
michael
parents:
5666
diff
changeset
|
3490 put_rac(&s->c, s->header_state, update_mc); |
|
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3491 if(update_mc){ |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3492 for(plane_index=0; plane_index<2; plane_index++){ |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3493 Plane *p= &s->plane[plane_index]; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3494 put_rac(&s->c, s->header_state, p->diag_mc); |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3495 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0); |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3496 for(i= p->htaps/2; i; i--) |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3497 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0); |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3498 } |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3499 } |
|
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
3500 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){ |
|
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
3501 put_rac(&s->c, s->header_state, 1); |
|
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3502 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0); |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3503 encode_qlogs(s); |
|
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
3504 }else |
|
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
3505 put_rac(&s->c, s->header_state, 0); |
|
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3506 } |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3507 |
|
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3508 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1); |
|
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3509 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1); |
|
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3510 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1); |
|
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3511 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1); |
|
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3512 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1); |
|
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3513 |
| 6144 | 3514 } |
| 3515 | |
| 3516 static void update_last_header_values(SnowContext *s){ | |
| 3517 int plane_index; | |
| 3518 | |
| 3519 if(!s->keyframe){ | |
| 3520 for(plane_index=0; plane_index<2; plane_index++){ | |
| 3521 Plane *p= &s->plane[plane_index]; | |
| 3522 p->last_diag_mc= p->diag_mc; | |
| 3523 p->last_htaps = p->htaps; | |
| 3524 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff)); | |
| 3525 } | |
| 3526 } | |
| 3527 | |
| 6414 | 3528 s->last_spatial_decomposition_type = s->spatial_decomposition_type; |
| 3529 s->last_qlog = s->qlog; | |
| 3530 s->last_qbias = s->qbias; | |
| 3531 s->last_mv_scale = s->mv_scale; | |
| 3532 s->last_block_max_depth = s->block_max_depth; | |
| 3533 s->last_spatial_decomposition_count = s->spatial_decomposition_count; | |
| 2138 | 3534 } |
| 3535 | |
|
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3536 static void decode_qlogs(SnowContext *s){ |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3537 int plane_index, level, orientation; |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3538 |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3539 for(plane_index=0; plane_index<3; plane_index++){ |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3540 for(level=0; level<s->spatial_decomposition_count; level++){ |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3541 for(orientation=level ? 1:0; orientation<4; orientation++){ |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3542 int q; |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3543 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog; |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3544 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog; |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3545 else q= get_symbol(&s->c, s->header_state, 1); |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3546 s->plane[plane_index].band[level][orientation].qlog= q; |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3547 } |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3548 } |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3549 } |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3550 } |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3551 |
| 2138 | 3552 static int decode_header(SnowContext *s){ |
|
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3553 int plane_index; |
| 2335 | 3554 uint8_t kstate[32]; |
| 2138 | 3555 |
| 2967 | 3556 memset(kstate, MID_STATE, sizeof(kstate)); |
| 2335 | 3557 |
| 3558 s->keyframe= get_rac(&s->c, kstate); | |
|
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3559 if(s->keyframe || s->always_reset){ |
| 2199 | 3560 reset_contexts(s); |
|
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3561 s->spatial_decomposition_type= |
|
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3562 s->qlog= |
|
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3563 s->qbias= |
|
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3564 s->mv_scale= |
|
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3565 s->block_max_depth= 0; |
|
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3566 } |
| 2138 | 3567 if(s->keyframe){ |
| 3568 s->version= get_symbol(&s->c, s->header_state, 0); | |
| 3569 if(s->version>0){ | |
| 3570 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version); | |
| 3571 return -1; | |
| 3572 } | |
| 2335 | 3573 s->always_reset= get_rac(&s->c, s->header_state); |
| 2138 | 3574 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0); |
| 3575 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0); | |
| 3576 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0); | |
| 3577 s->colorspace_type= get_symbol(&s->c, s->header_state, 0); | |
| 3578 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0); | |
| 3579 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0); | |
| 2335 | 3580 s->spatial_scalability= get_rac(&s->c, s->header_state); |
| 3581 // s->rate_scalability= get_rac(&s->c, s->header_state); | |
| 3314 | 3582 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1; |
| 2138 | 3583 |
|
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3584 decode_qlogs(s); |
| 2138 | 3585 } |
| 2967 | 3586 |
|
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3587 if(!s->keyframe){ |
|
5667
9242e125395f
do not force the halfpel filter coeffs to be retransmitted on every frame
michael
parents:
5666
diff
changeset
|
3588 if(get_rac(&s->c, s->header_state)){ |
|
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3589 for(plane_index=0; plane_index<2; plane_index++){ |
| 5807 | 3590 int htaps, i, sum=0; |
|
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3591 Plane *p= &s->plane[plane_index]; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3592 p->diag_mc= get_rac(&s->c, s->header_state); |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3593 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2; |
|
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
3594 if((unsigned)htaps > HTAPS_MAX || htaps==0) |
|
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3595 return -1; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3596 p->htaps= htaps; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3597 for(i= htaps/2; i; i--){ |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3598 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1)); |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3599 sum += p->hcoeff[i]; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3600 } |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3601 p->hcoeff[0]= 32-sum; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3602 } |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3603 s->plane[2].diag_mc= s->plane[1].diag_mc; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3604 s->plane[2].htaps = s->plane[1].htaps; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3605 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff)); |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3606 } |
|
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3607 if(get_rac(&s->c, s->header_state)){ |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3608 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0); |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3609 decode_qlogs(s); |
|
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3610 } |
|
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3611 } |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3612 |
|
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3613 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1); |
|
5588
effa59ca89b3
we only have 2 wavelets, the 3rd was just for experimentation ...
michael
parents:
5587
diff
changeset
|
3614 if(s->spatial_decomposition_type > 1){ |
| 2138 | 3615 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type); |
| 3616 return -1; | |
| 3617 } | |
| 2967 | 3618 |
|
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3619 s->qlog += get_symbol(&s->c, s->header_state, 1); |
|
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3620 s->mv_scale += get_symbol(&s->c, s->header_state, 1); |
|
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3621 s->qbias += get_symbol(&s->c, s->header_state, 1); |
|
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3622 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1); |
|
3303
68721b62a528
sanity checks, some might have been exploitable ...
michael
parents:
3206
diff
changeset
|
3623 if(s->block_max_depth > 1 || s->block_max_depth < 0){ |
| 2952 | 3624 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth); |
| 3625 s->block_max_depth= 0; | |
| 3626 return -1; | |
| 3627 } | |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3628 |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3629 return 0; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3630 } |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3631 |
| 3075 | 3632 static void init_qexp(void){ |
| 2600 | 3633 int i; |
| 3634 double v=128; | |
| 3635 | |
| 3636 for(i=0; i<QROOT; i++){ | |
| 3637 qexp[i]= lrintf(v); | |
| 2967 | 3638 v *= pow(2, 1.0 / QROOT); |
| 2600 | 3639 } |
| 3640 } | |
| 3641 | |
| 2138 | 3642 static int common_init(AVCodecContext *avctx){ |
| 3643 SnowContext *s = avctx->priv_data; | |
| 3644 int width, height; | |
|
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
3645 int i, j; |
| 2138 | 3646 |
| 3647 s->avctx= avctx; | |
| 2967 | 3648 |
| 2138 | 3649 dsputil_init(&s->dsp, avctx); |
| 3650 | |
| 3651 #define mcf(dx,dy)\ | |
| 3652 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\ | |
| 3653 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\ | |
|
2224
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3654 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\ |
|
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3655 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\ |
|
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3656 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\ |
|
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3657 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4]; |
| 2138 | 3658 |
| 3659 mcf( 0, 0) | |
| 3660 mcf( 4, 0) | |
| 3661 mcf( 8, 0) | |
| 3662 mcf(12, 0) | |
| 3663 mcf( 0, 4) | |
| 3664 mcf( 4, 4) | |
| 3665 mcf( 8, 4) | |
| 3666 mcf(12, 4) | |
| 3667 mcf( 0, 8) | |
| 3668 mcf( 4, 8) | |
| 3669 mcf( 8, 8) | |
| 3670 mcf(12, 8) | |
| 3671 mcf( 0,12) | |
| 3672 mcf( 4,12) | |
| 3673 mcf( 8,12) | |
| 3674 mcf(12,12) | |
| 3675 | |
| 3676 #define mcfh(dx,dy)\ | |
| 3677 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\ | |
| 3678 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\ | |
|
2224
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3679 mc_block_hpel ## dx ## dy ## 16;\ |
|
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3680 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\ |
|
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3681 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\ |
|
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3682 mc_block_hpel ## dx ## dy ## 8; |
| 2138 | 3683 |
| 3684 mcfh(0, 0) | |
| 3685 mcfh(8, 0) | |
| 3686 mcfh(0, 8) | |
| 3687 mcfh(8, 8) | |
| 2600 | 3688 |
| 3689 if(!qexp[0]) | |
| 3690 init_qexp(); | |
| 3691 | |
| 2138 | 3692 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift); |
| 2967 | 3693 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3694 width= s->avctx->width; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3695 height= s->avctx->height; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3696 |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3697 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM)); |
| 6412 | 3698 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here |
| 2967 | 3699 |
|
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3700 for(i=0; i<MAX_REF_FRAMES; i++) |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3701 for(j=0; j<MAX_REF_FRAMES; j++) |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3702 scale_mv_ref[i][j] = 256*(i+1)/(j+1); |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3703 |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3704 s->avctx->get_buffer(s->avctx, &s->mconly_picture); |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3705 |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3706 return 0; |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3707 } |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3708 |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3709 static int common_init_after_header(AVCodecContext *avctx){ |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3710 SnowContext *s = avctx->priv_data; |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3711 int plane_index, level, orientation; |
| 2967 | 3712 |
| 3713 for(plane_index=0; plane_index<3; plane_index++){ | |
| 2138 | 3714 int w= s->avctx->width; |
| 3715 int h= s->avctx->height; | |
| 3716 | |
| 3717 if(plane_index){ | |
| 3718 w>>= s->chroma_h_shift; | |
| 3719 h>>= s->chroma_v_shift; | |
| 3720 } | |
| 3721 s->plane[plane_index].width = w; | |
| 3722 s->plane[plane_index].height= h; | |
|
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3723 |
| 2138 | 3724 for(level=s->spatial_decomposition_count-1; level>=0; level--){ |
| 3725 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
| 3726 SubBand *b= &s->plane[plane_index].band[level][orientation]; | |
| 2967 | 3727 |
| 2138 | 3728 b->buf= s->spatial_dwt_buffer; |
| 3729 b->level= level; | |
| 3730 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level); | |
| 3731 b->width = (w + !(orientation&1))>>1; | |
| 3732 b->height= (h + !(orientation>1))>>1; | |
| 2967 | 3733 |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3734 b->stride_line = 1 << (s->spatial_decomposition_count - level); |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3735 b->buf_x_offset = 0; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3736 b->buf_y_offset = 0; |
| 2967 | 3737 |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3738 if(orientation&1){ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3739 b->buf += (w+1)>>1; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3740 b->buf_x_offset = (w+1)>>1; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3741 } |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3742 if(orientation>1){ |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3743 b->buf += b->stride>>1; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3744 b->buf_y_offset = b->stride_line >> 1; |
|
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3745 } |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3746 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer); |
| 2967 | 3747 |
| 2138 | 3748 if(level) |
| 3749 b->parent= &s->plane[plane_index].band[level-1][orientation]; | |
|
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3750 //FIXME avoid this realloc |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3751 av_freep(&b->x_coeff); |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3752 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff)); |
| 2138 | 3753 } |
| 3754 w= (w+1)>>1; | |
| 3755 h= (h+1)>>1; | |
| 3756 } | |
| 3757 } | |
| 2967 | 3758 |
| 2138 | 3759 return 0; |
| 3760 } | |
| 3761 | |
|
3322
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3762 static int qscale2qlog(int qscale){ |
|
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3763 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2)) |
|
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3764 + 61*QROOT/8; //<64 >60 |
|
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3765 } |
|
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3766 |
|
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3767 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict) |
| 3313 | 3768 { |
| 6412 | 3769 /* Estimate the frame's complexity as a sum of weighted dwt coefficients. |
| 3313 | 3770 * FIXME we know exact mv bits at this point, |
| 3771 * but ratecontrol isn't set up to include them. */ | |
| 3772 uint32_t coef_sum= 0; | |
|
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3773 int level, orientation, delta_qlog; |
| 3313 | 3774 |
| 3775 for(level=0; level<s->spatial_decomposition_count; level++){ | |
| 3776 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
| 3777 SubBand *b= &s->plane[0].band[level][orientation]; | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3778 IDWTELEM *buf= b->ibuf; |
| 3313 | 3779 const int w= b->width; |
| 3780 const int h= b->height; | |
| 3781 const int stride= b->stride; | |
| 4594 | 3782 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16); |
| 3313 | 3783 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
| 3784 const int qdiv= (1<<16)/qmul; | |
| 3785 int x, y; | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3786 //FIXME this is ugly |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3787 for(y=0; y<h; y++) |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3788 for(x=0; x<w; x++) |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3789 buf[x+y*stride]= b->buf[x+y*stride]; |
| 3313 | 3790 if(orientation==0) |
| 3791 decorrelate(s, b, buf, stride, 1, 0); | |
| 3792 for(y=0; y<h; y++) | |
| 3793 for(x=0; x<w; x++) | |
| 3794 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16; | |
| 3795 } | |
| 3796 } | |
| 3797 | |
| 3798 /* ugly, ratecontrol just takes a sqrt again */ | |
| 3799 coef_sum = (uint64_t)coef_sum * coef_sum >> 16; | |
| 3800 assert(coef_sum < INT_MAX); | |
| 3801 | |
| 3802 if(pict->pict_type == I_TYPE){ | |
| 3803 s->m.current_picture.mb_var_sum= coef_sum; | |
| 3804 s->m.current_picture.mc_mb_var_sum= 0; | |
| 3805 }else{ | |
| 3806 s->m.current_picture.mc_mb_var_sum= coef_sum; | |
| 3807 s->m.current_picture.mb_var_sum= 0; | |
| 3808 } | |
| 3809 | |
| 3810 pict->quality= ff_rate_estimate_qscale(&s->m, 1); | |
| 3766 | 3811 if (pict->quality < 0) |
|
4011
5bce97c30a69
-1 is a valid return value in ratecontrol_1pass() -> 100l for takis
michael
parents:
4001
diff
changeset
|
3812 return INT_MIN; |
| 3313 | 3813 s->lambda= pict->quality * 3/2; |
|
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3814 delta_qlog= qscale2qlog(pict->quality) - s->qlog; |
|
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3815 s->qlog+= delta_qlog; |
|
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3816 return delta_qlog; |
| 3313 | 3817 } |
| 2138 | 3818 |
| 5909 | 3819 static void calculate_visual_weight(SnowContext *s, Plane *p){ |
| 2138 | 3820 int width = p->width; |
| 3821 int height= p->height; | |
| 2198 | 3822 int level, orientation, x, y; |
| 2138 | 3823 |
| 3824 for(level=0; level<s->spatial_decomposition_count; level++){ | |
| 3825 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
| 3826 SubBand *b= &p->band[level][orientation]; | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3827 IDWTELEM *ibuf= b->ibuf; |
| 2138 | 3828 int64_t error=0; |
| 2967 | 3829 |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3830 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height); |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3831 ibuf[b->width/2 + b->height/2*b->stride]= 256*16; |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3832 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count); |
| 2138 | 3833 for(y=0; y<height; y++){ |
| 3834 for(x=0; x<width; x++){ | |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3835 int64_t d= s->spatial_idwt_buffer[x + y*width]*16; |
| 2138 | 3836 error += d*d; |
| 3837 } | |
| 3838 } | |
| 3839 | |
| 3840 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5); | |
| 3841 } | |
| 3842 } | |
| 3843 } | |
| 3844 | |
| 5702 | 3845 #define QUANTIZE2 0 |
| 3846 | |
| 3847 #if QUANTIZE2==1 | |
| 3848 #define Q2_STEP 8 | |
| 3849 | |
| 3850 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){ | |
| 3851 SubBand *b= &p->band[level][orientation]; | |
| 3852 int x, y; | |
| 3853 int xo=0; | |
| 3854 int yo=0; | |
| 3855 int step= 1 << (s->spatial_decomposition_count - level); | |
| 3856 | |
| 3857 if(orientation&1) | |
| 3858 xo= step>>1; | |
| 3859 if(orientation&2) | |
| 3860 yo= step>>1; | |
| 3861 | |
| 6412 | 3862 //FIXME bias for nonzero ? |
| 5702 | 3863 //FIXME optimize |
| 3864 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP)); | |
| 3865 for(y=0; y<p->height; y++){ | |
| 3866 for(x=0; x<p->width; x++){ | |
| 3867 int sx= (x-xo + step/2) / step / Q2_STEP; | |
| 3868 int sy= (y-yo + step/2) / step / Q2_STEP; | |
| 3869 int v= r0[x + y*p->width] - r1[x + y*p->width]; | |
| 3870 assert(sx>=0 && sy>=0 && sx < score_stride); | |
| 3871 v= ((v+8)>>4)<<4; | |
| 3872 score[sx + sy*score_stride] += v*v; | |
| 3873 assert(score[sx + sy*score_stride] >= 0); | |
| 3874 } | |
| 3875 } | |
| 3876 } | |
| 3877 | |
| 3878 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){ | |
| 3879 int level, orientation; | |
| 3880 | |
| 3881 for(level=0; level<s->spatial_decomposition_count; level++){ | |
| 3882 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
| 3883 SubBand *b= &p->band[level][orientation]; | |
| 3884 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer); | |
| 3885 | |
| 3886 dequantize(s, b, dst, b->stride); | |
| 3887 } | |
| 3888 } | |
| 3889 } | |
| 3890 | |
| 3891 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){ | |
| 3892 int level, orientation, ys, xs, x, y, pass; | |
| 3893 IDWTELEM best_dequant[height * stride]; | |
| 3894 IDWTELEM idwt2_buffer[height * stride]; | |
| 3895 const int score_stride= (width + 10)/Q2_STEP; | |
| 3896 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size | |
| 3897 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size | |
| 3898 int threshold= (s->m.lambda * s->m.lambda) >> 6; | |
| 3899 | |
| 3900 //FIXME pass the copy cleanly ? | |
| 3901 | |
| 3902 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM)); | |
| 3903 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count); | |
| 3904 | |
| 3905 for(level=0; level<s->spatial_decomposition_count; level++){ | |
| 3906 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
| 3907 SubBand *b= &p->band[level][orientation]; | |
| 3908 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer); | |
| 3909 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer); | |
| 6412 | 3910 assert(src == b->buf); // code does not depend on this but it is true currently |
| 5702 | 3911 |
| 3912 quantize(s, b, dst, src, b->stride, s->qbias); | |
| 3913 } | |
| 3914 } | |
| 3915 for(pass=0; pass<1; pass++){ | |
| 3916 if(s->qbias == 0) //keyframe | |
| 3917 continue; | |
| 3918 for(level=0; level<s->spatial_decomposition_count; level++){ | |
| 3919 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
| 3920 SubBand *b= &p->band[level][orientation]; | |
| 3921 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer); | |
| 3922 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer); | |
| 3923 | |
| 3924 for(ys= 0; ys<Q2_STEP; ys++){ | |
| 3925 for(xs= 0; xs<Q2_STEP; xs++){ | |
| 3926 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); | |
| 3927 dequantize_all(s, p, idwt2_buffer, width, height); | |
| 3928 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count); | |
| 3929 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation); | |
| 3930 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); | |
| 3931 for(y=ys; y<b->height; y+= Q2_STEP){ | |
| 3932 for(x=xs; x<b->width; x+= Q2_STEP){ | |
| 3933 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++; | |
| 3934 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--; | |
| 6412 | 3935 //FIXME try more than just -- |
| 5702 | 3936 } |
| 3937 } | |
| 3938 dequantize_all(s, p, idwt2_buffer, width, height); | |
| 3939 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count); | |
| 3940 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation); | |
| 3941 for(y=ys; y<b->height; y+= Q2_STEP){ | |
| 3942 for(x=xs; x<b->width; x+= Q2_STEP){ | |
| 3943 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride; | |
| 3944 if(score[score_idx] <= best_score[score_idx] + threshold){ | |
| 3945 best_score[score_idx]= score[score_idx]; | |
| 3946 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++; | |
| 3947 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--; | |
| 3948 //FIXME copy instead | |
| 3949 } | |
| 3950 } | |
| 3951 } | |
| 3952 } | |
| 3953 } | |
| 3954 } | |
| 3955 } | |
| 3956 } | |
| 6412 | 3957 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end |
| 5702 | 3958 } |
| 3959 | |
| 5910 | 3960 #endif /* QUANTIZE2==1 */ |
| 5702 | 3961 |
| 2138 | 3962 static int encode_init(AVCodecContext *avctx) |
| 3963 { | |
| 3964 SnowContext *s = avctx->priv_data; | |
| 2198 | 3965 int plane_index; |
| 2138 | 3966 |
|
2658
d1609cfeb1d0
#defines for strict_std_compliance and split between inofficial extensions and non standarized things
michael
parents:
2635
diff
changeset
|
3967 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){ |
| 6412 | 3968 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n" |
| 3969 "Use vstrict=-2 / -strict -2 to use it anyway.\n"); | |
| 2151 | 3970 return -1; |
| 3971 } | |
| 2967 | 3972 |
|
3327
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3973 if(avctx->prediction_method == DWT_97 |
|
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3974 && (avctx->flags & CODEC_FLAG_QSCALE) |
|
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3975 && avctx->global_quality == 0){ |
| 6412 | 3976 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n"); |
|
3327
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3977 return -1; |
|
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3978 } |
|
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3979 |
|
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3980 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3981 |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3982 s->chroma_h_shift= 1; //FIXME XXX |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3983 s->chroma_v_shift= 1; |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3984 |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3985 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4; |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3986 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0; |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3987 |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3988 for(plane_index=0; plane_index<3; plane_index++){ |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3989 s->plane[plane_index].diag_mc= 1; |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3990 s->plane[plane_index].htaps= 6; |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3991 s->plane[plane_index].hcoeff[0]= 40; |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3992 s->plane[plane_index].hcoeff[1]= -10; |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3993 s->plane[plane_index].hcoeff[2]= 2; |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3994 s->plane[plane_index].fast_mc= 1; |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3995 } |
|
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3996 |
| 2138 | 3997 common_init(avctx); |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3998 alloc_blocks(s); |
| 2967 | 3999 |
| 2138 | 4000 s->version=0; |
| 2967 | 4001 |
|
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4002 s->m.avctx = avctx; |
|
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4003 s->m.flags = avctx->flags; |
|
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4004 s->m.bit_rate= avctx->bit_rate; |
|
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4005 |
| 2138 | 4006 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t)); |
| 4007 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); | |
| 4008 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); | |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
4009 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t)); |
| 2138 | 4010 h263_encode_init(&s->m); //mv_penalty |
| 4011 | |
| 3314 | 4012 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1); |
| 4013 | |
|
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4014 if(avctx->flags&CODEC_FLAG_PASS1){ |
|
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4015 if(!avctx->stats_out) |
|
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4016 avctx->stats_out = av_mallocz(256); |
|
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4017 } |
|
3322
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
4018 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){ |
|
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4019 if(ff_rate_control_init(&s->m) < 0) |
|
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4020 return -1; |
|
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4021 } |
| 3313 | 4022 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2)); |
|
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4023 |
| 2138 | 4024 avctx->coded_frame= &s->current_picture; |
| 4025 switch(avctx->pix_fmt){ | |
| 4026 // case PIX_FMT_YUV444P: | |
| 4027 // case PIX_FMT_YUV422P: | |
| 4028 case PIX_FMT_YUV420P: | |
| 4029 case PIX_FMT_GRAY8: | |
| 4030 // case PIX_FMT_YUV411P: | |
| 4031 // case PIX_FMT_YUV410P: | |
| 4032 s->colorspace_type= 0; | |
| 4033 break; | |
|
4494
ce643a22f049
Replace deprecated PIX_FMT names by the newer variants.
diego
parents:
4436
diff
changeset
|
4034 /* case PIX_FMT_RGB32: |
| 2138 | 4035 s->colorspace= 1; |
| 4036 break;*/ | |
| 4037 default: | |
| 5908 | 4038 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n"); |
| 2138 | 4039 return -1; |
| 4040 } | |
| 4041 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift); | |
| 4042 s->chroma_h_shift= 1; | |
| 4043 s->chroma_v_shift= 1; | |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4044 |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4045 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp); |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4046 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp); |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4047 |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4048 s->avctx->get_buffer(s->avctx, &s->input_picture); |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4049 |
| 3314 | 4050 if(s->avctx->me_method == ME_ITER){ |
| 4051 int i; | |
| 4052 int size= s->b_width * s->b_height << 2*s->block_max_depth; | |
| 4053 for(i=0; i<s->max_ref_frames; i++){ | |
| 4054 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2])); | |
| 4055 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t)); | |
| 4056 } | |
| 4057 } | |
| 4058 | |
| 2138 | 4059 return 0; |
| 4060 } | |
| 4061 | |
| 5924 | 4062 #define USE_HALFPEL_PLANE 0 |
| 4063 | |
|
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4064 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){ |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4065 int p,x,y; |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4066 |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4067 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE)); |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4068 |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4069 for(p=0; p<3; p++){ |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4070 int is_chroma= !!p; |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4071 int w= s->avctx->width >>is_chroma; |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4072 int h= s->avctx->height >>is_chroma; |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4073 int ls= frame->linesize[p]; |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4074 uint8_t *src= frame->data[p]; |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4075 |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4076 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4077 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4078 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4079 |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4080 halfpel[0][p]= src; |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4081 for(y=0; y<h; y++){ |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4082 for(x=0; x<w; x++){ |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4083 int i= y*ls + x; |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4084 |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4085 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5; |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4086 } |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4087 } |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4088 for(y=0; y<h; y++){ |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4089 for(x=0; x<w; x++){ |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4090 int i= y*ls + x; |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4091 |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4092 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5; |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4093 } |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4094 } |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4095 src= halfpel[1][p]; |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4096 for(y=0; y<h; y++){ |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4097 for(x=0; x<w; x++){ |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4098 int i= y*ls + x; |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4099 |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4100 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5; |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4101 } |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4102 } |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4103 |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4104 //FIXME border! |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4105 } |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4106 } |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4107 |
| 2138 | 4108 static int frame_start(SnowContext *s){ |
| 4109 AVFrame tmp; | |
| 2187 | 4110 int w= s->avctx->width; //FIXME round up to x16 ? |
| 4111 int h= s->avctx->height; | |
| 2138 | 4112 |
| 2187 | 4113 if(s->current_picture.data[0]){ |
| 6437 | 4114 s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH ); |
| 4115 s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2); | |
| 4116 s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2); | |
| 2187 | 4117 } |
| 4118 | |
| 3314 | 4119 tmp= s->last_picture[s->max_ref_frames-1]; |
| 4120 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame)); | |
|
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4121 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4); |
| 5924 | 4122 if(USE_HALFPEL_PLANE && s->current_picture.data[0]) |
|
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4123 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture); |
| 3314 | 4124 s->last_picture[0]= s->current_picture; |
| 2138 | 4125 s->current_picture= tmp; |
| 2967 | 4126 |
| 3314 | 4127 if(s->keyframe){ |
| 4128 s->ref_frames= 0; | |
| 4129 }else{ | |
| 4130 int i; | |
| 4131 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++) | |
| 4132 if(i && s->last_picture[i-1].key_frame) | |
| 4133 break; | |
| 4134 s->ref_frames= i; | |
| 4135 } | |
| 4136 | |
| 2138 | 4137 s->current_picture.reference= 1; |
| 4138 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){ | |
| 4139 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | |
| 4140 return -1; | |
| 4141 } | |
| 2967 | 4142 |
| 3314 | 4143 s->current_picture.key_frame= s->keyframe; |
| 4144 | |
| 2138 | 4145 return 0; |
| 4146 } | |
| 4147 | |
| 4148 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){ | |
| 4149 SnowContext *s = avctx->priv_data; | |
| 2335 | 4150 RangeCoder * const c= &s->c; |
| 2138 | 4151 AVFrame *pict = data; |
| 4152 const int width= s->avctx->width; | |
| 4153 const int height= s->avctx->height; | |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4154 int level, orientation, plane_index, i, y; |
|
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4155 uint8_t rc_header_bak[sizeof(s->header_state)]; |
|
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4156 uint8_t rc_block_bak[sizeof(s->block_state)]; |
| 2138 | 4157 |
| 2335 | 4158 ff_init_range_encoder(c, buf, buf_size); |
| 4159 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
| 2967 | 4160 |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4161 for(i=0; i<3; i++){ |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4162 int shift= !!i; |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4163 for(y=0; y<(height>>shift); y++) |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4164 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]], |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4165 &pict->data[i][y * pict->linesize[i]], |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4166 width>>shift); |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4167 } |
|
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4168 s->new_picture = *pict; |
| 2138 | 4169 |
| 3313 | 4170 s->m.picture_number= avctx->frame_number; |
|
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4171 if(avctx->flags&CODEC_FLAG_PASS2){ |
|
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4172 s->m.pict_type = |
|
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4173 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type; |
|
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4174 s->keyframe= pict->pict_type==FF_I_TYPE; |
| 3766 | 4175 if(!(avctx->flags&CODEC_FLAG_QSCALE)) { |
| 3193 | 4176 pict->quality= ff_rate_estimate_qscale(&s->m, 0); |
| 3766 | 4177 if (pict->quality < 0) |
| 4178 return -1; | |
| 4179 } | |
|
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4180 }else{ |
|
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4181 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0; |
| 3313 | 4182 s->m.pict_type= |
|
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4183 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE; |
|
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4184 } |
| 2967 | 4185 |
| 3313 | 4186 if(s->pass1_rc && avctx->frame_number == 0) |
| 4187 pict->quality= 2*FF_QP2LAMBDA; | |
| 2161 | 4188 if(pict->quality){ |
|
3322
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
4189 s->qlog= qscale2qlog(pict->quality); |
| 3313 | 4190 s->lambda = pict->quality * 3/2; |
| 4191 } | |
| 4192 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){ | |
| 2161 | 4193 s->qlog= LOSSLESS_QLOG; |
| 3313 | 4194 s->lambda = 0; |
| 6412 | 4195 }//else keep previous frame's qlog until after motion estimation |
| 2138 | 4196 |
| 4197 frame_start(s); | |
| 4198 | |
|
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4199 s->m.current_picture_ptr= &s->m.current_picture; |
| 2138 | 4200 if(pict->pict_type == P_TYPE){ |
| 4201 int block_width = (width +15)>>4; | |
| 4202 int block_height= (height+15)>>4; | |
| 4203 int stride= s->current_picture.linesize[0]; | |
| 2967 | 4204 |
| 2138 | 4205 assert(s->current_picture.data[0]); |
| 3314 | 4206 assert(s->last_picture[0].data[0]); |
| 2967 | 4207 |
| 2138 | 4208 s->m.avctx= s->avctx; |
| 4209 s->m.current_picture.data[0]= s->current_picture.data[0]; | |
| 3314 | 4210 s->m. last_picture.data[0]= s->last_picture[0].data[0]; |
| 2138 | 4211 s->m. new_picture.data[0]= s-> input_picture.data[0]; |
| 4212 s->m. last_picture_ptr= &s->m. last_picture; | |
| 4213 s->m.linesize= | |
| 4214 s->m. last_picture.linesize[0]= | |
| 4215 s->m. new_picture.linesize[0]= | |
| 4216 s->m.current_picture.linesize[0]= stride; | |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4217 s->m.uvlinesize= s->current_picture.linesize[1]; |
| 2138 | 4218 s->m.width = width; |
| 4219 s->m.height= height; | |
| 4220 s->m.mb_width = block_width; | |
| 4221 s->m.mb_height= block_height; | |
| 4222 s->m.mb_stride= s->m.mb_width+1; | |
| 4223 s->m.b8_stride= 2*s->m.mb_width+1; | |
| 4224 s->m.f_code=1; | |
| 4225 s->m.pict_type= pict->pict_type; | |
| 4226 s->m.me_method= s->avctx->me_method; | |
| 4227 s->m.me.scene_change_score=0; | |
| 4228 s->m.flags= s->avctx->flags; | |
| 4229 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0; | |
| 4230 s->m.out_format= FMT_H263; | |
| 4231 s->m.unrestricted_mv= 1; | |
| 4232 | |
| 3313 | 4233 s->m.lambda = s->lambda; |
| 2138 | 4234 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7); |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4235 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT; |
|
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4236 |
| 2138 | 4237 s->m.dsp= s->dsp; //move |
| 4238 ff_init_me(&s->m); | |
|
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4239 s->dsp= s->m.dsp; |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4240 } |
| 2967 | 4241 |
|
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4242 if(s->pass1_rc){ |
|
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4243 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state)); |
|
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4244 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state)); |
|
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4245 } |
|
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4246 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4247 redo_frame: |
| 2967 | 4248 |
|
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4249 if(pict->pict_type == I_TYPE) |
|
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4250 s->spatial_decomposition_count= 5; |
|
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4251 else |
|
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4252 s->spatial_decomposition_count= 5; |
|
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4253 |
| 3313 | 4254 s->m.pict_type = pict->pict_type; |
| 2138 | 4255 s->qbias= pict->pict_type == P_TYPE ? 2 : 0; |
| 4256 | |
|
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4257 common_init_after_header(avctx); |
|
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4258 |
|
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4259 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){ |
|
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4260 for(plane_index=0; plane_index<3; plane_index++){ |
| 5909 | 4261 calculate_visual_weight(s, &s->plane[plane_index]); |
|
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4262 } |
|
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4263 } |
|
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4264 |
| 2138 | 4265 encode_header(s); |
|
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4266 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start); |
|
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4267 encode_blocks(s, 1); |
|
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4268 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits; |
| 2967 | 4269 |
| 2138 | 4270 for(plane_index=0; plane_index<3; plane_index++){ |
| 4271 Plane *p= &s->plane[plane_index]; | |
| 4272 int w= p->width; | |
| 4273 int h= p->height; | |
| 4274 int x, y; | |
| 2198 | 4275 // int bits= put_bits_count(&s->c.pb); |
| 2138 | 4276 |
| 6414 | 4277 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){ |
| 4278 //FIXME optimize | |
| 4279 if(pict->data[plane_index]) //FIXME gray hack | |
| 4280 for(y=0; y<h; y++){ | |
| 4281 for(x=0; x<w; x++){ | |
| 4282 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS; | |
| 4283 } | |
| 4284 } | |
| 4285 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0); | |
| 4286 | |
| 4287 if( plane_index==0 | |
| 4288 && pict->pict_type == P_TYPE | |
| 4289 && !(avctx->flags&CODEC_FLAG_PASS2) | |
| 4290 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){ | |
| 4291 ff_init_range_encoder(c, buf, buf_size); | |
| 4292 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
| 4293 pict->pict_type= FF_I_TYPE; | |
| 4294 s->keyframe=1; | |
| 4295 s->current_picture.key_frame=1; | |
| 4296 goto redo_frame; | |
| 2138 | 4297 } |
| 6414 | 4298 |
| 4299 if(s->qlog == LOSSLESS_QLOG){ | |
| 4300 for(y=0; y<h; y++){ | |
| 4301 for(x=0; x<w; x++){ | |
| 4302 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS; | |
| 4303 } | |
| 2161 | 4304 } |
| 6414 | 4305 }else{ |
| 4306 for(y=0; y<h; y++){ | |
| 4307 for(x=0; x<w; x++){ | |
| 4308 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS; | |
| 4309 } | |
| 5575 | 4310 } |
| 4311 } | |
| 6414 | 4312 |
| 4313 /* if(QUANTIZE2) | |
| 4314 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type); | |
| 4315 else*/ | |
| 4316 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); | |
| 4317 | |
| 4318 if(s->pass1_rc && plane_index==0){ | |
| 4319 int delta_qlog = ratecontrol_1pass(s, pict); | |
| 4320 if (delta_qlog <= INT_MIN) | |
| 4321 return -1; | |
| 4322 if(delta_qlog){ | |
| 4323 //reordering qlog in the bitstream would eliminate this reset | |
| 4324 ff_init_range_encoder(c, buf, buf_size); | |
| 4325 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state)); | |
| 4326 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state)); | |
| 4327 encode_header(s); | |
| 4328 encode_blocks(s, 0); | |
| 2161 | 4329 } |
| 4330 } | |
| 6414 | 4331 |
| 4332 for(level=0; level<s->spatial_decomposition_count; level++){ | |
| 4333 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
| 4334 SubBand *b= &p->band[level][orientation]; | |
| 4335 | |
| 4336 if(!QUANTIZE2) | |
| 4337 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias); | |
| 4338 if(orientation==0) | |
| 4339 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == P_TYPE, 0); | |
| 4340 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation); | |
| 4341 assert(b->parent==NULL || b->parent->stride == b->stride*2); | |
| 4342 if(orientation==0) | |
| 4343 correlate(s, b, b->ibuf, b->stride, 1, 0); | |
| 4344 } | |
| 4345 } | |
| 4346 | |
| 4347 for(level=0; level<s->spatial_decomposition_count; level++){ | |
| 4348 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
| 4349 SubBand *b= &p->band[level][orientation]; | |
| 4350 | |
| 4351 dequantize(s, b, b->ibuf, b->stride); | |
| 4352 } | |
| 4353 } | |
| 4354 | |
| 4355 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); | |
| 4356 if(s->qlog == LOSSLESS_QLOG){ | |
| 4357 for(y=0; y<h; y++){ | |
| 4358 for(x=0; x<w; x++){ | |
| 4359 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS; | |
| 4360 } | |
| 4361 } | |
| 4362 } | |
| 4363 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); | |
| 4364 }else{ | |
|
3338
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4365 //ME/MC only |
|
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4366 if(pict->pict_type == I_TYPE){ |
|
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4367 for(y=0; y<h; y++){ |
|
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4368 for(x=0; x<w; x++){ |
|
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4369 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= |
|
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4370 pict->data[plane_index][y*pict->linesize[plane_index] + x]; |
|
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4371 } |
|
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4372 } |
|
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4373 }else{ |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4374 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h); |
|
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4375 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); |
|
3338
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4376 } |
| 6414 | 4377 } |
| 2138 | 4378 if(s->avctx->flags&CODEC_FLAG_PSNR){ |
| 4379 int64_t error= 0; | |
| 2967 | 4380 |
| 6414 | 4381 if(pict->data[plane_index]) //FIXME gray hack |
| 4382 for(y=0; y<h; y++){ | |
| 4383 for(x=0; x<w; x++){ | |
| 4384 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x]; | |
| 4385 error += d*d; | |
| 4386 } | |
| 2138 | 4387 } |
| 4388 s->avctx->error[plane_index] += error; | |
| 2232 | 4389 s->current_picture.error[plane_index] = error; |
| 2138 | 4390 } |
| 6414 | 4391 |
| 2138 | 4392 } |
| 4393 | |
| 6144 | 4394 update_last_header_values(s); |
| 4395 | |
|
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4396 if(s->last_picture[s->max_ref_frames-1].data[0]){ |
| 3314 | 4397 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]); |
|
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4398 for(i=0; i<9; i++) |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4399 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4400 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3])); |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4401 } |
| 2138 | 4402 |
|
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4403 s->current_picture.coded_picture_number = avctx->frame_number; |
|
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4404 s->current_picture.pict_type = pict->pict_type; |
|
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4405 s->current_picture.quality = pict->quality; |
| 3313 | 4406 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start); |
| 4407 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits; | |
| 4408 s->m.current_picture.display_picture_number = | |
| 4409 s->m.current_picture.coded_picture_number = avctx->frame_number; | |
| 4410 s->m.current_picture.quality = pict->quality; | |
| 4411 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start); | |
| 4412 if(s->pass1_rc) | |
| 3766 | 4413 if (ff_rate_estimate_qscale(&s->m, 0) < 0) |
| 4414 return -1; | |
| 3313 | 4415 if(avctx->flags&CODEC_FLAG_PASS1) |
|
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4416 ff_write_pass1_stats(&s->m); |
| 3313 | 4417 s->m.last_pict_type = s->m.pict_type; |
|
4123
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4418 avctx->frame_bits = s->m.frame_bits; |
|
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4419 avctx->mv_bits = s->m.mv_bits; |
|
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4420 avctx->misc_bits = s->m.misc_bits; |
|
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4421 avctx->p_tex_bits = s->m.p_tex_bits; |
|
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4422 |
| 2138 | 4423 emms_c(); |
| 2967 | 4424 |
| 2335 | 4425 return ff_rac_terminate(c); |
| 2138 | 4426 } |
| 4427 | |
| 4428 static void common_end(SnowContext *s){ | |
| 3314 | 4429 int plane_index, level, orientation, i; |
| 2192 | 4430 |
| 2138 | 4431 av_freep(&s->spatial_dwt_buffer); |
|
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4432 av_freep(&s->spatial_idwt_buffer); |
| 2138 | 4433 |
| 2967 | 4434 av_freep(&s->m.me.scratchpad); |
| 2138 | 4435 av_freep(&s->m.me.map); |
| 4436 av_freep(&s->m.me.score_map); | |
|
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
4437 av_freep(&s->m.obmc_scratchpad); |
| 2967 | 4438 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4439 av_freep(&s->block); |
| 2192 | 4440 |
| 3314 | 4441 for(i=0; i<MAX_REF_FRAMES; i++){ |
| 4442 av_freep(&s->ref_mvs[i]); | |
| 4443 av_freep(&s->ref_scores[i]); | |
| 4444 if(s->last_picture[i].data[0]) | |
| 4445 s->avctx->release_buffer(s->avctx, &s->last_picture[i]); | |
| 4446 } | |
| 4447 | |
| 2967 | 4448 for(plane_index=0; plane_index<3; plane_index++){ |
| 2192 | 4449 for(level=s->spatial_decomposition_count-1; level>=0; level--){ |
| 4450 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
| 4451 SubBand *b= &s->plane[plane_index].band[level][orientation]; | |
| 2967 | 4452 |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4453 av_freep(&b->x_coeff); |
| 2192 | 4454 } |
| 4455 } | |
| 4456 } | |
| 2138 | 4457 } |
| 4458 | |
| 4459 static int encode_end(AVCodecContext *avctx) | |
| 4460 { | |
| 4461 SnowContext *s = avctx->priv_data; | |
| 4462 | |
| 4463 common_end(s); | |
|
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4464 av_free(avctx->stats_out); |
| 2138 | 4465 |
| 4466 return 0; | |
| 4467 } | |
| 4468 | |
| 4469 static int decode_init(AVCodecContext *avctx) | |
| 4470 { | |
| 2635 | 4471 avctx->pix_fmt= PIX_FMT_YUV420P; |
| 2138 | 4472 |
| 4473 common_init(avctx); | |
| 2967 | 4474 |
| 2138 | 4475 return 0; |
| 4476 } | |
| 4477 | |
| 6255 | 4478 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, const uint8_t *buf, int buf_size){ |
| 2138 | 4479 SnowContext *s = avctx->priv_data; |
| 2335 | 4480 RangeCoder * const c= &s->c; |
| 2138 | 4481 int bytes_read; |
| 4482 AVFrame *picture = data; | |
|
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4483 int level, orientation, plane_index, i; |
| 2138 | 4484 |
| 2335 | 4485 ff_init_range_decoder(c, buf, buf_size); |
| 4486 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
| 2138 | 4487 |
| 4488 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P | |
| 5668 | 4489 if(decode_header(s)<0) |
| 4490 return -1; | |
|
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4491 common_init_after_header(avctx); |
|
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4492 |
|
5662
77297fe0311c
allocate slice buffer after parsing the header containing needed parameters
michael
parents:
5661
diff
changeset
|
4493 // realloc slice buffer for the case that spatial_decomposition_count changed |
|
77297fe0311c
allocate slice buffer after parsing the header containing needed parameters
michael
parents:
5661
diff
changeset
|
4494 slice_buffer_destroy(&s->sb); |
|
77297fe0311c
allocate slice buffer after parsing the header containing needed parameters
michael
parents:
5661
diff
changeset
|
4495 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer); |
|
77297fe0311c
allocate slice buffer after parsing the header containing needed parameters
michael
parents:
5661
diff
changeset
|
4496 |
|
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4497 for(plane_index=0; plane_index<3; plane_index++){ |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4498 Plane *p= &s->plane[plane_index]; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4499 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40 |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4500 && p->hcoeff[1]==-10 |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4501 && p->hcoeff[2]==2; |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4502 } |
|
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4503 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4504 if(!s->block) alloc_blocks(s); |
| 2138 | 4505 |
| 4506 frame_start(s); | |
| 6412 | 4507 //keyframe flag duplication mess FIXME |
| 2138 | 4508 if(avctx->debug&FF_DEBUG_PICT_INFO) |
| 4509 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog); | |
| 2967 | 4510 |
|
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4511 decode_blocks(s); |
| 2138 | 4512 |
| 4513 for(plane_index=0; plane_index<3; plane_index++){ | |
| 4514 Plane *p= &s->plane[plane_index]; | |
| 4515 int w= p->width; | |
| 4516 int h= p->height; | |
| 4517 int x, y; | |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4518 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */ |
| 2967 | 4519 |
| 6414 | 4520 if(s->avctx->debug&2048){ |
| 4521 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h); | |
| 4522 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); | |
| 4523 | |
| 4524 for(y=0; y<h; y++){ | |
| 4525 for(x=0; x<w; x++){ | |
| 4526 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]; | |
| 4527 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v; | |
| 4528 } | |
| 2138 | 4529 } |
| 4530 } | |
| 6414 | 4531 |
| 4532 { | |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4533 for(level=0; level<s->spatial_decomposition_count; level++){ |
|
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4534 for(orientation=level ? 1 : 0; orientation<4; orientation++){ |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4535 SubBand *b= &p->band[level][orientation]; |
| 6414 | 4536 unpack_coeffs(s, b, b->parent, orientation); |
| 4537 } | |
| 4538 } | |
| 4539 } | |
| 4540 | |
| 4541 { | |
| 4542 const int mb_h= s->b_height << s->block_max_depth; | |
| 4543 const int block_size = MB_SIZE >> s->block_max_depth; | |
| 4544 const int block_w = plane_index ? block_size/2 : block_size; | |
| 4545 int mb_y; | |
| 4546 dwt_compose_t cs[MAX_DECOMPOSITIONS]; | |
| 4547 int yd=0, yq=0; | |
| 4548 int y; | |
| 4549 int end_y; | |
| 4550 | |
| 4551 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count); | |
| 4552 for(mb_y=0; mb_y<=mb_h; mb_y++){ | |
| 4553 | |
| 4554 int slice_starty = block_w*mb_y; | |
| 4555 int slice_h = block_w*(mb_y+1); | |
| 4556 if (!(s->keyframe || s->avctx->debug&512)){ | |
| 4557 slice_starty = FFMAX(0, slice_starty - (block_w >> 1)); | |
| 4558 slice_h -= (block_w >> 1); | |
| 4559 } | |
| 4560 | |
| 4561 for(level=0; level<s->spatial_decomposition_count; level++){ | |
| 4562 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
| 4563 SubBand *b= &p->band[level][orientation]; | |
| 4564 int start_y; | |
| 4565 int end_y; | |
| 4566 int our_mb_start = mb_y; | |
| 4567 int our_mb_end = (mb_y + 1); | |
| 4568 const int extra= 3; | |
| 4569 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0); | |
| 4570 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra); | |
| 4571 if (!(s->keyframe || s->avctx->debug&512)){ | |
| 4572 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level))); | |
| 4573 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level))); | |
|
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4574 } |
| 6414 | 4575 start_y = FFMIN(b->height, start_y); |
| 4576 end_y = FFMIN(b->height, end_y); | |
| 4577 | |
| 4578 if (start_y != end_y){ | |
| 4579 if (orientation == 0){ | |
| 4580 SubBand * correlate_band = &p->band[0][0]; | |
| 4581 int correlate_end_y = FFMIN(b->height, end_y + 1); | |
| 4582 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0)); | |
| 4583 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]); | |
| 4584 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y); | |
| 4585 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y); | |
| 4586 } | |
| 4587 else | |
| 4588 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]); | |
| 4589 } | |
|
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4590 } |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4591 } |
| 6414 | 4592 |
| 4593 for(; yd<slice_h; yd+=4){ | |
| 4594 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd); | |
| 4595 } | |
| 4596 | |
| 4597 if(s->qlog == LOSSLESS_QLOG){ | |
| 4598 for(; yq<slice_h && yq<h; yq++){ | |
| 4599 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq); | |
| 4600 for(x=0; x<w; x++){ | |
| 4601 line[x] <<= FRAC_BITS; | |
| 4602 } | |
| 2161 | 4603 } |
| 4604 } | |
| 6414 | 4605 |
| 4606 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y); | |
| 4607 | |
| 4608 y = FFMIN(p->height, slice_starty); | |
| 4609 end_y = FFMIN(p->height, slice_h); | |
| 4610 while(y < end_y) | |
| 4611 slice_buffer_release(&s->sb, y++); | |
| 2161 | 4612 } |
| 2562 | 4613 |
| 6414 | 4614 slice_buffer_flush(&s->sb); |
| 4615 } | |
| 4616 | |
| 2138 | 4617 } |
| 2967 | 4618 |
| 2138 | 4619 emms_c(); |
| 4620 | |
|
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4621 if(s->last_picture[s->max_ref_frames-1].data[0]){ |
| 3314 | 4622 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]); |
|
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4623 for(i=0; i<9; i++) |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4624 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4625 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3])); |
|
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4626 } |
| 2138 | 4627 |
| 6414 | 4628 if(!(s->avctx->debug&2048)) |
| 4629 *picture= s->current_picture; | |
| 4630 else | |
| 4631 *picture= s->mconly_picture; | |
| 2967 | 4632 |
| 2138 | 4633 *data_size = sizeof(AVFrame); |
| 2967 | 4634 |
| 2335 | 4635 bytes_read= c->bytestream - c->bytestream_start; |
| 4636 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME | |
| 2138 | 4637 |
| 4638 return bytes_read; | |
| 4639 } | |
| 4640 | |
| 4641 static int decode_end(AVCodecContext *avctx) | |
| 4642 { | |
| 4643 SnowContext *s = avctx->priv_data; | |
| 4644 | |
|
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4645 slice_buffer_destroy(&s->sb); |
| 2967 | 4646 |
| 2138 | 4647 common_end(s); |
| 4648 | |
| 4649 return 0; | |
| 4650 } | |
| 4651 | |
| 4652 AVCodec snow_decoder = { | |
| 4653 "snow", | |
| 4654 CODEC_TYPE_VIDEO, | |
| 4655 CODEC_ID_SNOW, | |
| 4656 sizeof(SnowContext), | |
| 4657 decode_init, | |
| 4658 NULL, | |
| 4659 decode_end, | |
| 4660 decode_frame, | |
| 4661 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/, | |
| 4662 NULL | |
| 4663 }; | |
| 4664 | |
| 5224 | 4665 #ifdef CONFIG_SNOW_ENCODER |
| 2138 | 4666 AVCodec snow_encoder = { |
| 4667 "snow", | |
| 4668 CODEC_TYPE_VIDEO, | |
| 4669 CODEC_ID_SNOW, | |
| 4670 sizeof(SnowContext), | |
| 4671 encode_init, | |
| 4672 encode_frame, | |
| 4673 encode_end, | |
| 4674 }; | |
|
2408
a6e4da1c28ee
Disable encoders patch by (Gianluigi Tiesi <mplayer netfarm it>)
michael
parents:
2368
diff
changeset
|
4675 #endif |
| 2138 | 4676 |
| 4677 | |
|
6164
ecaf5226e9b0
Consistently use TEST as the preprocessor condition to enable test code.
diego
parents:
6144
diff
changeset
|
4678 #ifdef TEST |
| 2138 | 4679 #undef malloc |
| 4680 #undef free | |
| 4681 #undef printf | |
|
5380
389366aa3458
Fix the self tests which are contained in some codecs and are using random().
takis
parents:
5254
diff
changeset
|
4682 #undef random |
| 2138 | 4683 |
| 5934 | 4684 int main(void){ |
| 2138 | 4685 int width=256; |
| 4686 int height=256; | |
| 4687 int buffer[2][width*height]; | |
| 4688 SnowContext s; | |
| 4689 int i; | |
| 4690 s.spatial_decomposition_count=6; | |
| 4691 s.spatial_decomposition_type=1; | |
| 2967 | 4692 |
| 2138 | 4693 printf("testing 5/3 DWT\n"); |
| 4694 for(i=0; i<width*height; i++) | |
| 4695 buffer[0][i]= buffer[1][i]= random()%54321 - 12345; | |
| 2967 | 4696 |
| 2951 | 4697 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
| 4698 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); | |
| 2967 | 4699 |
| 2138 | 4700 for(i=0; i<width*height; i++) |
| 4701 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]); | |
| 4702 | |
| 4703 printf("testing 9/7 DWT\n"); | |
| 4704 s.spatial_decomposition_type=0; | |
| 4705 for(i=0; i<width*height; i++) | |
| 4706 buffer[0][i]= buffer[1][i]= random()%54321 - 12345; | |
| 2967 | 4707 |
| 2951 | 4708 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
| 4709 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); | |
| 2967 | 4710 |
| 2138 | 4711 for(i=0; i<width*height; i++) |
| 4001 | 4712 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]); |
| 2967 | 4713 |
| 2951 | 4714 #if 0 |
| 2138 | 4715 printf("testing AC coder\n"); |
| 4716 memset(s.header_state, 0, sizeof(s.header_state)); | |
| 2335 | 4717 ff_init_range_encoder(&s.c, buffer[0], 256*256); |
| 2138 | 4718 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); |
| 2967 | 4719 |
| 2138 | 4720 for(i=-256; i<256; i++){ |
| 4001 | 4721 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1); |
| 2138 | 4722 } |
| 2335 | 4723 ff_rac_terminate(&s.c); |
| 2138 | 4724 |
| 4725 memset(s.header_state, 0, sizeof(s.header_state)); | |
| 2335 | 4726 ff_init_range_decoder(&s.c, buffer[0], 256*256); |
| 2138 | 4727 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); |
| 2967 | 4728 |
| 2138 | 4729 for(i=-256; i<256; i++){ |
| 4730 int j; | |
| 4731 j= get_symbol(&s.c, s.header_state, 1); | |
| 4001 | 4732 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j); |
| 2138 | 4733 } |
| 2951 | 4734 #endif |
| 6414 | 4735 { |
| 4736 int level, orientation, x, y; | |
| 4737 int64_t errors[8][4]; | |
| 4738 int64_t g=0; | |
| 4739 | |
| 4740 memset(errors, 0, sizeof(errors)); | |
| 4741 s.spatial_decomposition_count=3; | |
| 4742 s.spatial_decomposition_type=0; | |
| 4743 for(level=0; level<s.spatial_decomposition_count; level++){ | |
| 4744 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
| 4745 int w= width >> (s.spatial_decomposition_count-level); | |
| 4746 int h= height >> (s.spatial_decomposition_count-level); | |
| 4747 int stride= width << (s.spatial_decomposition_count-level); | |
| 4748 DWTELEM *buf= buffer[0]; | |
| 4749 int64_t error=0; | |
| 4750 | |
| 4751 if(orientation&1) buf+=w; | |
| 4752 if(orientation>1) buf+=stride>>1; | |
| 4753 | |
| 4754 memset(buffer[0], 0, sizeof(int)*width*height); | |
| 4755 buf[w/2 + h/2*stride]= 256*256; | |
| 4756 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); | |
| 4757 for(y=0; y<height; y++){ | |
| 4758 for(x=0; x<width; x++){ | |
| 4759 int64_t d= buffer[0][x + y*width]; | |
| 4760 error += d*d; | |
| 4761 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d); | |
| 4762 } | |
| 4763 if(FFABS(height/2-y)<9 && level==2) printf("\n"); | |
| 2138 | 4764 } |
| 6414 | 4765 error= (int)(sqrt(error)+0.5); |
| 4766 errors[level][orientation]= error; | |
| 4767 if(g) g=ff_gcd(g, error); | |
| 4768 else g= error; | |
| 2138 | 4769 } |
| 4770 } | |
| 6414 | 4771 printf("static int const visual_weight[][4]={\n"); |
| 4772 for(level=0; level<s.spatial_decomposition_count; level++){ | |
| 4773 printf(" {"); | |
| 4774 for(orientation=0; orientation<4; orientation++){ | |
| 4775 printf("%8"PRId64",", errors[level][orientation]/g); | |
| 4776 } | |
| 4777 printf("},\n"); | |
| 2138 | 4778 } |
| 6414 | 4779 printf("};\n"); |
| 4780 { | |
| 2138 | 4781 int level=2; |
| 4782 int w= width >> (s.spatial_decomposition_count-level); | |
| 6168 | 4783 //int h= height >> (s.spatial_decomposition_count-level); |
| 2138 | 4784 int stride= width << (s.spatial_decomposition_count-level); |
| 4785 DWTELEM *buf= buffer[0]; | |
| 4786 int64_t error=0; | |
| 4787 | |
| 4788 buf+=w; | |
| 4789 buf+=stride>>1; | |
| 2967 | 4790 |
| 2138 | 4791 memset(buffer[0], 0, sizeof(int)*width*height); |
| 4792 #if 1 | |
| 4793 for(y=0; y<height; y++){ | |
| 4794 for(x=0; x<width; x++){ | |
| 4795 int tab[4]={0,2,3,1}; | |
| 4796 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)]; | |
| 4797 } | |
| 4798 } | |
| 2951 | 4799 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
| 2138 | 4800 #else |
| 4801 for(y=0; y<h; y++){ | |
| 4802 for(x=0; x<w; x++){ | |
| 4803 buf[x + y*stride ]=169; | |
| 4804 buf[x + y*stride-w]=64; | |
| 4805 } | |
| 4806 } | |
| 2951 | 4807 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
| 2138 | 4808 #endif |
| 4809 for(y=0; y<height; y++){ | |
| 4810 for(x=0; x<width; x++){ | |
| 4811 int64_t d= buffer[0][x + y*width]; | |
| 4812 error += d*d; | |
|
4122
daae66c03857
Replace most of the %lld and %llx by their (cleaner) PRI*64 counterparts.
diego
parents:
4011
diff
changeset
|
4813 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d); |
| 2138 | 4814 } |
| 4001 | 4815 if(FFABS(height/2-y)<9) printf("\n"); |
| 2138 | 4816 } |
| 6414 | 4817 } |
| 4818 | |
| 2138 | 4819 } |
| 4820 return 0; | |
| 4821 } | |
|
6164
ecaf5226e9b0
Consistently use TEST as the preprocessor condition to enable test code.
diego
parents:
6144
diff
changeset
|
4822 #endif /* TEST */ |
