Mercurial > geeqie
annotate src/similar.c @ 1802:956aab097ea7
added 2010 to copyright text
| author | nadvornik |
|---|---|
| date | Tue, 16 Feb 2010 21:18:03 +0000 |
| parents | a6f9ba6fd751 |
| children |
| rev | line source |
|---|---|
| 9 | 1 /* |
| 196 | 2 * Geeqie |
| 9 | 3 * (C) 2004 John Ellis |
| 1802 | 4 * Copyright (C) 2008 - 2010 The Geeqie Team |
| 9 | 5 * |
| 6 * Author: John Ellis | |
| 7 * | |
| 8 * This software is released under the GNU General Public License (GNU GPL). | |
| 9 * Please read the included file COPYING for more information. | |
| 10 * This software comes with no warranty of any kind, use at your own risk! | |
| 11 */ | |
| 12 | |
| 13 | |
| 281 | 14 #include "main.h" |
| 9 | 15 #include "similar.h" |
| 16 | |
| 17 /* | |
| 18 * These functions are intended to find images with similar color content. For | |
| 19 * example when an image was saved at different compression levels or dimensions | |
| 20 * (scaled down/up) the contents are similar, but these files do not match by file | |
| 21 * size, dimensions, or checksum. | |
| 22 * | |
| 23 * These functions create a 32 x 32 array for each color channel (red, green, blue). | |
| 24 * The array represents the average color of each corresponding part of the | |
| 25 * image. (imagine the image cut into 1024 rectangles, or a 32 x 32 grid. | |
| 26 * Each grid is then processed for the average color value, this is what | |
| 27 * is stored in the array) | |
| 28 * | |
| 29 * To compare two images, generate a ImageSimilarityData for each image, then | |
| 30 * pass them to the compare function. The return value is the percent match | |
| 31 * of the two images. (for this, simple comparisons are used, basically the return | |
| 32 * is an average of the corresponding array differences) | |
| 33 * | |
| 34 * for image_sim_compare(), the return is 0.0 to 1.0: | |
| 35 * 1.0 for exact matches (an image is compared to itself) | |
| 36 * 0.0 for exact opposite images (compare an all black to an all white image) | |
| 37 * generally only a match of > 0.85 are significant at all, and >.95 is useful to | |
| 38 * find images that have been re-saved to other formats, dimensions, or compression. | |
| 39 */ | |
| 40 | |
| 41 | |
| 42 /* | |
| 43 * The experimental (alternate) algorithm is only for testing of new techniques to | |
| 44 * improve the result, and hopes to reduce false positives. | |
| 45 */ | |
| 46 | |
|
1420
3a9fb1b52559
Use gboolean where applicable, for the sake of consistency.
zas_
parents:
1284
diff
changeset
|
47 static gboolean alternate_enabled = FALSE; |
| 9 | 48 |
|
1420
3a9fb1b52559
Use gboolean where applicable, for the sake of consistency.
zas_
parents:
1284
diff
changeset
|
49 void image_sim_alternate_set(gboolean enable) |
| 9 | 50 { |
| 51 alternate_enabled = enable; | |
| 52 } | |
| 53 | |
|
1420
3a9fb1b52559
Use gboolean where applicable, for the sake of consistency.
zas_
parents:
1284
diff
changeset
|
54 gboolean image_sim_alternate_enabled(void) |
| 9 | 55 { |
| 56 return alternate_enabled; | |
| 57 } | |
| 58 | |
| 59 ImageSimilarityData *image_sim_new(void) | |
| 60 { | |
| 61 ImageSimilarityData *sd = g_new0(ImageSimilarityData, 1); | |
| 62 | |
| 63 return sd; | |
| 64 } | |
| 65 | |
| 66 void image_sim_free(ImageSimilarityData *sd) | |
| 67 { | |
| 68 g_free(sd); | |
| 69 } | |
| 70 | |
| 1003 | 71 static gint image_sim_channel_eq_sort_cb(gconstpointer a, gconstpointer b) |
| 9 | 72 { |
| 1002 | 73 gint *pa = (gpointer)a; |
| 74 gint *pb = (gpointer)b; | |
| 9 | 75 if (pa[1] < pb[1]) return -1; |
| 76 if (pa[1] > pb[1]) return 1; | |
| 77 return 0; | |
| 78 } | |
| 79 | |
| 80 static void image_sim_channel_equal(guint8 *pix, gint len) | |
| 81 { | |
| 82 gint *buf; | |
| 83 gint i; | |
| 84 gint p; | |
| 85 | |
| 86 buf = g_new0(gint, len * 2); | |
| 87 | |
| 88 p = 0; | |
| 89 for (i = 0; i < len; i++) | |
| 90 { | |
| 91 buf[p] = i; | |
| 92 p++; | |
| 93 buf[p] = (gint)pix[i]; | |
| 94 p++; | |
| 95 } | |
| 96 | |
|
512
f9bf33be53ff
Remove whitespace between function name and first parenthesis for the sake of consistency.
zas_
parents:
475
diff
changeset
|
97 qsort(buf, len, sizeof(gint) * 2, image_sim_channel_eq_sort_cb); |
| 9 | 98 |
| 99 p = 0; | |
| 100 for (i = 0; i < len; i++) | |
| 101 { | |
| 102 gint n; | |
| 103 | |
| 104 n = buf[p]; | |
| 927 | 105 p += 2; |
| 9 | 106 pix[n] = (guint8)(255 * i / len); |
| 107 } | |
| 108 | |
| 109 g_free(buf); | |
| 110 } | |
| 111 | |
| 112 static void image_sim_channel_norm(guint8 *pix, gint len) | |
| 113 { | |
| 927 | 114 guint8 l, h, delta; |
| 9 | 115 gint i; |
| 116 gdouble scale; | |
| 117 | |
| 118 l = h = pix[0]; | |
| 119 | |
| 120 for (i = 0; i < len; i++) | |
| 121 { | |
| 122 if (pix[i] < l) l = pix[i]; | |
| 123 if (pix[i] > h) h = pix[i]; | |
| 124 } | |
| 125 | |
| 927 | 126 delta = h - l; |
| 127 scale = (delta != 0) ? 255.0 / (gdouble)(delta) : 1.0; | |
| 9 | 128 |
| 129 for (i = 0; i < len; i++) | |
| 130 { | |
| 131 pix[i] = (guint8)((gdouble)(pix[i] - l) * scale); | |
| 132 } | |
| 133 } | |
| 134 | |
| 135 /* | |
| 136 * define these to enable various components of the experimental compare functions | |
| 137 * | |
| 138 * Convert the thumbprint to greyscale (ignore all color information when comparing) | |
| 139 * #define ALTERNATE_USES_GREYSCALE 1 | |
| 140 * | |
| 141 * Take into account the difference in change from one pixel to the next | |
| 142 * #define ALTERNATE_INCLUDE_COMPARE_CHANGE 1 | |
| 143 */ | |
| 144 | |
| 145 void image_sim_alternate_processing(ImageSimilarityData *sd) | |
| 146 { | |
| 147 #ifdef ALTERNATE_USES_GREYSCALE | |
| 148 gint i; | |
| 149 #endif | |
| 150 | |
| 151 if (!alternate_enabled) return; | |
| 442 | 152 |
| 9 | 153 image_sim_channel_norm(sd->avg_r, sizeof(sd->avg_r)); |
| 154 image_sim_channel_norm(sd->avg_g, sizeof(sd->avg_g)); | |
| 155 image_sim_channel_norm(sd->avg_b, sizeof(sd->avg_b)); | |
| 156 | |
| 157 image_sim_channel_equal(sd->avg_r, sizeof(sd->avg_r)); | |
| 158 image_sim_channel_equal(sd->avg_g, sizeof(sd->avg_g)); | |
| 159 image_sim_channel_equal(sd->avg_b, sizeof(sd->avg_b)); | |
| 160 | |
| 161 #ifdef ALTERNATE_USES_GREYSCALE | |
| 162 for (i = 0; i < sizeof(sd->avg_r); i++) | |
| 163 { | |
| 164 guint8 n; | |
| 442 | 165 |
| 9 | 166 n = (guint8)((gint)(sd->avg_r[i] + sd->avg_g[i] + sd->avg_b[i]) / 3); |
| 167 sd->avg_r[i] = sd->avg_g[i] = sd->avg_b[i] = n; | |
| 168 } | |
| 169 #endif | |
| 170 } | |
| 171 | |
| 172 void image_sim_fill_data(ImageSimilarityData *sd, GdkPixbuf *pixbuf) | |
| 173 { | |
| 174 gint w, h; | |
| 175 gint rs; | |
| 176 guchar *pix; | |
| 1446 | 177 gboolean has_alpha; |
| 9 | 178 gint p_step; |
| 179 | |
| 180 guchar *p; | |
| 181 gint i; | |
| 182 gint j; | |
| 927 | 183 gint x_inc, y_inc, xy_inc; |
| 9 | 184 gint xs, ys; |
| 185 | |
|
1420
3a9fb1b52559
Use gboolean where applicable, for the sake of consistency.
zas_
parents:
1284
diff
changeset
|
186 gboolean x_small = FALSE; /* if less than 32 w or h, set TRUE */ |
|
3a9fb1b52559
Use gboolean where applicable, for the sake of consistency.
zas_
parents:
1284
diff
changeset
|
187 gboolean y_small = FALSE; |
| 9 | 188 |
| 189 if (!sd || !pixbuf) return; | |
| 190 | |
| 191 w = gdk_pixbuf_get_width(pixbuf); | |
| 192 h = gdk_pixbuf_get_height(pixbuf); | |
| 193 rs = gdk_pixbuf_get_rowstride(pixbuf); | |
| 194 pix = gdk_pixbuf_get_pixels(pixbuf); | |
| 195 has_alpha = gdk_pixbuf_get_has_alpha(pixbuf); | |
| 196 | |
| 197 p_step = has_alpha ? 4 : 3; | |
| 198 x_inc = w / 32; | |
| 199 y_inc = h / 32; | |
| 200 | |
| 201 if (x_inc < 1) | |
| 202 { | |
| 203 x_inc = 1; | |
| 204 x_small = TRUE; | |
| 205 } | |
| 206 if (y_inc < 1) | |
| 207 { | |
| 208 y_inc = 1; | |
| 209 y_small = TRUE; | |
| 210 } | |
| 211 | |
| 927 | 212 xy_inc = x_inc * y_inc; |
| 213 | |
| 9 | 214 j = 0; |
| 215 | |
| 216 for (ys = 0; ys < 32; ys++) | |
| 217 { | |
| 218 if (y_small) j = (gdouble)h / 32 * ys; | |
| 219 | |
| 220 i = 0; | |
| 221 | |
| 222 for (xs = 0; xs < 32; xs++) | |
| 223 { | |
| 224 gint x, y; | |
| 225 gint r, g, b; | |
| 226 gint t; | |
| 927 | 227 guchar *xpos; |
| 9 | 228 |
| 229 if (x_small) i = (gdouble)w / 32 * xs; | |
| 230 | |
| 231 r = g = b = 0; | |
| 927 | 232 xpos = pix + (i * p_step); |
| 9 | 233 |
| 234 for (y = j; y < j + y_inc; y++) | |
| 235 { | |
| 927 | 236 p = xpos + (y * rs); |
| 9 | 237 for (x = i; x < i + x_inc; x++) |
| 238 { | |
| 239 r += *p; p++; | |
| 240 g += *p; p++; | |
| 241 b += *p; p++; | |
| 242 if (has_alpha) p++; | |
| 243 } | |
| 244 } | |
| 245 | |
| 927 | 246 r /= xy_inc; |
| 247 g /= xy_inc; | |
| 248 b /= xy_inc; | |
| 9 | 249 |
| 250 t = ys * 32 + xs; | |
| 251 sd->avg_r[t] = r; | |
| 252 sd->avg_g[t] = g; | |
| 253 sd->avg_b[t] = b; | |
| 254 | |
| 255 i += x_inc; | |
| 256 } | |
| 257 | |
| 258 j += y_inc; | |
| 259 } | |
| 260 | |
| 261 sd->filled = TRUE; | |
| 262 } | |
| 263 | |
| 264 ImageSimilarityData *image_sim_new_from_pixbuf(GdkPixbuf *pixbuf) | |
| 265 { | |
| 266 ImageSimilarityData *sd; | |
| 267 | |
| 268 sd = image_sim_new(); | |
| 269 image_sim_fill_data(sd, pixbuf); | |
| 270 | |
| 271 return sd; | |
| 272 } | |
| 273 | |
| 274 #ifdef ALTERNATE_INCLUDE_COMPARE_CHANGE | |
| 275 static gdouble alternate_image_sim_compare_fast(ImageSimilarityData *a, ImageSimilarityData *b, gdouble min) | |
| 276 { | |
| 277 gint sim; | |
| 278 gint i; | |
| 279 gint j; | |
| 280 gint ld; | |
| 281 | |
| 282 if (!a || !b || !a->filled || !b->filled) return 0.0; | |
| 283 | |
| 284 min = 1.0 - min; | |
| 285 sim = 0.0; | |
| 286 ld = 0; | |
| 287 | |
| 927 | 288 for (j = 0; j < 1024; j += 32) |
| 9 | 289 { |
| 290 for (i = j; i < j + 32; i++) | |
| 291 { | |
| 292 gint cr, cg, cb; | |
| 293 gint cd; | |
| 294 | |
| 295 cr = abs(a->avg_r[i] - b->avg_r[i]); | |
| 296 cg = abs(a->avg_g[i] - b->avg_g[i]); | |
| 297 cb = abs(a->avg_b[i] - b->avg_b[i]); | |
| 298 | |
| 299 cd = cr + cg + cb; | |
| 300 sim += cd + abs(cd - ld); | |
| 301 ld = cd / 3; | |
| 302 } | |
| 303 /* check for abort, if so return 0.0 */ | |
| 304 if ((gdouble)sim / (255.0 * 1024.0 * 4.0) > min) return 0.0; | |
| 305 } | |
| 306 | |
| 307 return (1.0 - ((gdouble)sim / (255.0 * 1024.0 * 4.0)) ); | |
| 308 } | |
| 309 #endif | |
| 310 | |
| 311 gdouble image_sim_compare(ImageSimilarityData *a, ImageSimilarityData *b) | |
| 312 { | |
| 313 gint sim; | |
| 314 gint i; | |
| 315 | |
| 316 if (!a || !b || !a->filled || !b->filled) return 0.0; | |
| 317 | |
| 318 sim = 0.0; | |
| 319 | |
| 320 for (i = 0; i < 1024; i++) | |
| 321 { | |
| 322 sim += abs(a->avg_r[i] - b->avg_r[i]); | |
| 323 sim += abs(a->avg_g[i] - b->avg_g[i]); | |
| 324 sim += abs(a->avg_b[i] - b->avg_b[i]); | |
| 325 } | |
| 326 | |
| 327 return 1.0 - ((gdouble)sim / (255.0 * 1024.0 * 3.0)); | |
| 328 } | |
| 329 | |
| 330 /* this uses a cutoff point so that it can abort early when it gets to | |
| 331 * a point that can simply no longer make the cut-off point. | |
| 332 */ | |
| 333 gdouble image_sim_compare_fast(ImageSimilarityData *a, ImageSimilarityData *b, gdouble min) | |
| 334 { | |
| 335 gint sim; | |
| 336 gint i; | |
| 337 gint j; | |
| 338 | |
| 339 #ifdef ALTERNATE_INCLUDE_COMPARE_CHANGE | |
| 340 if (alternate_enabled) return alternate_image_sim_compare_fast(a, b, min); | |
| 341 #endif | |
| 342 | |
| 343 if (!a || !b || !a->filled || !b->filled) return 0.0; | |
| 344 | |
| 345 min = 1.0 - min; | |
| 346 sim = 0.0; | |
| 347 | |
| 927 | 348 for (j = 0; j < 1024; j += 32) |
| 9 | 349 { |
| 350 for (i = j; i < j + 32; i++) | |
| 351 { | |
| 352 sim += abs(a->avg_r[i] - b->avg_r[i]); | |
| 353 sim += abs(a->avg_g[i] - b->avg_g[i]); | |
| 354 sim += abs(a->avg_b[i] - b->avg_b[i]); | |
| 355 } | |
| 356 /* check for abort, if so return 0.0 */ | |
| 357 if ((gdouble)sim / (255.0 * 1024.0 * 3.0) > min) return 0.0; | |
| 358 } | |
| 359 | |
| 360 return (1.0 - ((gdouble)sim / (255.0 * 1024.0 * 3.0)) ); | |
| 361 } | |
|
1055
1646720364cf
Adding a vim modeline to all files - patch by Klaus Ethgen
nadvornik
parents:
1003
diff
changeset
|
362 /* vim: set shiftwidth=8 softtabstop=0 cindent cinoptions={1s: */ |
