Mercurial > pidgin
annotate src/html.c @ 7094:2343c3aa1dec
[gaim-migrate @ 7659]
grab_url() and parse_url() are gone, replaced with gaim_url_fetch() and
gaim_url_parse(). They were also moved to util.[ch].
committer: Tailor Script <tailor@pidgin.im>
| author | Christian Hammond <chipx86@chipx86.com> |
|---|---|
| date | Wed, 01 Oct 2003 03:01:25 +0000 |
| parents | 67c4e9d39242 |
| children |
| rev | line source |
|---|---|
| 1 | 1 /* |
| 2 * gaim | |
| 3 * | |
| 4 * Copyright (C) 1998-1999, Mark Spencer <markster@marko.net> | |
| 5176 | 5 * 2003, Nathan Walp <faceprint@faceprint.com> |
| 1 | 6 * |
| 7 * This program is free software; you can redistribute it and/or modify | |
| 8 * it under the terms of the GNU General Public License as published by | |
| 9 * the Free Software Foundation; either version 2 of the License, or | |
| 10 * (at your option) any later version. | |
| 11 * | |
| 12 * This program is distributed in the hope that it will be useful, | |
| 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 15 * GNU General Public License for more details. | |
| 16 * | |
| 17 * You should have received a copy of the GNU General Public License | |
| 18 * along with this program; if not, write to the Free Software | |
| 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
| 20 * | |
| 21 */ | |
|
5872
059d95c67cda
[gaim-migrate @ 6304]
Christian Hammond <chipx86@chipx86.com>
parents:
5681
diff
changeset
|
22 #include "internal.h" |
| 3630 | 23 |
|
5872
059d95c67cda
[gaim-migrate @ 6304]
Christian Hammond <chipx86@chipx86.com>
parents:
5681
diff
changeset
|
24 #include "debug.h" |
|
6115
11bedb793a44
[gaim-migrate @ 6578]
Christian Hammond <chipx86@chipx86.com>
parents:
5940
diff
changeset
|
25 #include "html.h" |
|
1092
a930439f29b1
[gaim-migrate @ 1102]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1088
diff
changeset
|
26 #include "proxy.h" |
| 1 | 27 |
|
5872
059d95c67cda
[gaim-migrate @ 6304]
Christian Hammond <chipx86@chipx86.com>
parents:
5681
diff
changeset
|
28 #include "gaim.h" |
|
3717
988485669631
[gaim-migrate @ 3850]
Herman Bloggs <hermanator12002@yahoo.com>
parents:
3630
diff
changeset
|
29 |
|
4359
5fb47ec9bfe4
[gaim-migrate @ 4625]
Christian Hammond <chipx86@chipx86.com>
parents:
4335
diff
changeset
|
30 gchar *strip_html(const gchar *text) |
| 1 | 31 { |
|
1883
060161a5d5f8
[gaim-migrate @ 1893]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1881
diff
changeset
|
32 int i, j, k; |
| 1 | 33 int visible = 1; |
|
1883
060161a5d5f8
[gaim-migrate @ 1893]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1881
diff
changeset
|
34 gchar *text2 = g_strdup(text); |
|
1250
b5783215b245
[gaim-migrate @ 1260]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1092
diff
changeset
|
35 |
| 4757 | 36 if(!text) |
| 37 return NULL; | |
| 4503 | 38 |
|
1250
b5783215b245
[gaim-migrate @ 1260]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1092
diff
changeset
|
39 for (i = 0, j = 0; text2[i]; i++) { |
|
b5783215b245
[gaim-migrate @ 1260]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1092
diff
changeset
|
40 if (text2[i] == '<') { |
|
1883
060161a5d5f8
[gaim-migrate @ 1893]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1881
diff
changeset
|
41 k = i + 1; |
| 4777 | 42 if(g_ascii_isspace(text2[k])) { |
| 43 visible = 1; | |
| 44 } else { | |
| 45 while (text2[k]) { | |
| 46 if (text2[k] == '<') { | |
| 47 visible = 1; | |
| 48 break; | |
| 49 } | |
| 50 if (text2[k] == '>') { | |
| 51 visible = 0; | |
| 52 break; | |
| 53 } | |
| 54 k++; | |
|
1883
060161a5d5f8
[gaim-migrate @ 1893]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1881
diff
changeset
|
55 } |
|
060161a5d5f8
[gaim-migrate @ 1893]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1881
diff
changeset
|
56 } |
|
060161a5d5f8
[gaim-migrate @ 1893]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1881
diff
changeset
|
57 } else if (text2[i] == '>' && !visible) { |
| 1 | 58 visible = 1; |
| 59 continue; | |
| 60 } | |
| 4473 | 61 if (text2[i] == '&' && strncasecmp(text2+i,""",6) == 0) { |
| 62 text2[j++] = '\"'; | |
| 63 i = i+5; | |
| 64 continue; | |
| 65 } | |
|
1250
b5783215b245
[gaim-migrate @ 1260]
Eric Warmenhoven <eric@warmenhoven.org>
parents:
1092
diff
changeset
|
66 if (visible) { |
| 1 | 67 text2[j++] = text2[i]; |
| 68 } | |
| 69 } | |
| 70 text2[j] = '\0'; | |
| 71 return text2; | |
| 72 } | |
| 73 | |
| 5104 | 74 struct gaim_parse_tag { |
| 75 char *src_tag; | |
| 76 char *dest_tag; | |
| 77 }; | |
| 78 | |
| 5093 | 79 #define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \ |
| 5176 | 80 const char *o = c + strlen("<" x); \ |
| 5141 | 81 const char *p = NULL, *q = NULL, *r = NULL; \ |
| 5176 | 82 GString *innards = g_string_new(""); \ |
| 83 while(o && *o) { \ | |
| 5141 | 84 if(!q && (*o == '\"' || *o == '\'') ) { \ |
| 85 q = o; \ | |
| 86 } else if(q) { \ | |
| 87 if(*o == *q) { \ | |
| 5176 | 88 char *unescaped = g_strndup(q+1, o-q-1); \ |
| 89 char *escaped = g_markup_escape_text(unescaped, -1); \ | |
| 90 g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \ | |
| 5940 | 91 g_free(unescaped); \ |
| 92 g_free(escaped); \ | |
| 5141 | 93 q = NULL; \ |
| 94 } else if(*c == '\\') { \ | |
| 95 o++; \ | |
| 96 } \ | |
| 97 } else if(*o == '<') { \ | |
| 98 r = o; \ | |
| 99 } else if(*o == '>') { \ | |
| 100 p = o; \ | |
| 101 break; \ | |
| 5176 | 102 } else { \ |
| 103 innards = g_string_append_c(innards, *o); \ | |
| 5141 | 104 } \ |
| 105 o++; \ | |
| 106 } \ | |
| 107 if(p && !r) { \ | |
| 5104 | 108 if(*(p-1) != '/') { \ |
| 109 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); \ | |
| 110 pt->src_tag = x; \ | |
| 111 pt->dest_tag = y; \ | |
| 112 tags = g_list_prepend(tags, pt); \ | |
| 113 } \ | |
| 5093 | 114 xhtml = g_string_append(xhtml, "<" y); \ |
| 115 c += strlen("<" x ); \ | |
| 5176 | 116 xhtml = g_string_append(xhtml, innards->str); \ |
| 117 xhtml = g_string_append_c(xhtml, '>'); \ | |
| 5093 | 118 c = p + 1; \ |
| 119 } else { \ | |
| 120 xhtml = g_string_append(xhtml, "<"); \ | |
| 5110 | 121 plain = g_string_append_c(plain, '<'); \ |
| 5176 | 122 c++; \ |
| 5093 | 123 } \ |
| 5176 | 124 g_string_free(innards, TRUE); \ |
| 5093 | 125 continue; \ |
| 126 } \ | |
| 127 if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \ | |
| 128 (*(c+strlen("<" x)) == '>' || \ | |
| 129 !g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \ | |
| 130 xhtml = g_string_append(xhtml, "<" y); \ | |
| 131 c += strlen("<" x); \ | |
| 5104 | 132 if(*c != '/') { \ |
| 133 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); \ | |
| 134 pt->src_tag = x; \ | |
| 135 pt->dest_tag = y; \ | |
| 136 tags = g_list_prepend(tags, pt); \ | |
| 5110 | 137 xhtml = g_string_append_c(xhtml, '>'); \ |
| 138 } else { \ | |
| 139 xhtml = g_string_append(xhtml, "/>");\ | |
| 5104 | 140 } \ |
| 5110 | 141 c = strchr(c, '>') + 1; \ |
| 5093 | 142 continue; \ |
| 143 } | |
| 144 #define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x) | |
| 145 | |
| 5110 | 146 void html_to_xhtml(const char *html, char **xhtml_out, char **plain_out) { |
| 5093 | 147 GString *xhtml = g_string_new(""); |
| 5110 | 148 GString *plain = g_string_new(""); |
| 5093 | 149 GList *tags = NULL, *tag; |
| 5141 | 150 const char *c = html; |
| 5176 | 151 |
| 152 while(c && *c) { | |
| 5141 | 153 if(*c == '<') { |
| 5093 | 154 if(*(c+1) == '/') { /* closing tag */ |
| 155 tag = tags; | |
| 156 while(tag) { | |
| 5104 | 157 struct gaim_parse_tag *pt = tag->data; |
| 158 if(!g_ascii_strncasecmp((c+2), pt->src_tag, strlen(pt->src_tag)) && *(c+strlen(pt->src_tag)+2) == '>') { | |
| 159 c += strlen(pt->src_tag) + 3; | |
| 5093 | 160 break; |
| 161 } | |
| 162 tag = tag->next; | |
| 163 } | |
| 164 if(tag) { | |
| 165 while(tags) { | |
| 5104 | 166 struct gaim_parse_tag *pt = tags->data; |
| 167 g_string_append_printf(xhtml, "</%s>", pt->dest_tag); | |
| 5093 | 168 if(tags == tag) |
| 169 break; | |
| 5104 | 170 tags = g_list_remove(tags, pt); |
| 171 g_free(pt); | |
| 5093 | 172 } |
| 5104 | 173 g_free(tag->data); |
| 5093 | 174 tags = g_list_remove(tags, tag->data); |
| 175 } else { | |
| 176 /* we tried to close a tag we never opened! escape it | |
| 177 * and move on */ | |
| 178 xhtml = g_string_append(xhtml, "<"); | |
| 5110 | 179 plain = g_string_append_c(plain, '<'); |
| 5093 | 180 c++; |
| 181 } | |
| 182 } else { /* opening tag */ | |
| 183 ALLOW_TAG("a"); | |
| 5101 | 184 ALLOW_TAG_ALT("b", "strong"); |
| 5093 | 185 ALLOW_TAG("blockquote"); |
| 5101 | 186 ALLOW_TAG_ALT("bold", "strong"); |
| 5093 | 187 ALLOW_TAG("cite"); |
| 188 ALLOW_TAG("div"); | |
| 189 ALLOW_TAG("em"); | |
| 190 ALLOW_TAG("h1"); | |
| 191 ALLOW_TAG("h2"); | |
| 192 ALLOW_TAG("h3"); | |
| 193 ALLOW_TAG("h4"); | |
| 194 ALLOW_TAG("h5"); | |
| 195 ALLOW_TAG("h6"); | |
| 7014 | 196 /* we only allow html to start the message */ |
| 197 if(c == html) | |
| 198 ALLOW_TAG("html"); | |
| 5101 | 199 ALLOW_TAG_ALT("i", "em"); |
| 200 ALLOW_TAG_ALT("italic", "em"); | |
| 5093 | 201 ALLOW_TAG("li"); |
| 202 ALLOW_TAG("ol"); | |
| 203 ALLOW_TAG("p"); | |
| 204 ALLOW_TAG("pre"); | |
| 205 ALLOW_TAG("q"); | |
| 206 ALLOW_TAG("span"); | |
| 207 ALLOW_TAG("strong"); | |
| 208 ALLOW_TAG("ul"); | |
| 209 | |
| 5174 | 210 /* we skip <HR> because it's not legal in XHTML-IM. However, |
| 211 * we still want to send something sensible, so we put a | |
| 212 * linebreak in its place. <BR> also needs special handling | |
| 213 * because putting a </BR> to close it would just be dumb. */ | |
| 214 if((!g_ascii_strncasecmp(c, "<br", 3) | |
| 215 || !g_ascii_strncasecmp(c, "<hr", 3)) | |
| 216 && (*(c+3) == '>' || | |
| 217 !g_ascii_strncasecmp(c+3, "/>", 2) || | |
| 218 !g_ascii_strncasecmp(c+3, " />", 3))) { | |
| 219 c = strchr(c, '>') + 1; | |
| 220 xhtml = g_string_append(xhtml, "<br/>"); | |
| 221 if(*c != '\n') | |
| 222 plain = g_string_append_c(plain, '\n'); | |
| 223 continue; | |
| 224 } | |
| 225 if(!g_ascii_strncasecmp(c, "<u>", 3) || !g_ascii_strncasecmp(c, "<underline>", strlen("<underline>"))) { | |
| 5104 | 226 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); |
| 227 pt->src_tag = *(c+2) == '>' ? "u" : "underline"; | |
| 228 pt->dest_tag = "span"; | |
| 229 tags = g_list_prepend(tags, pt); | |
| 230 c = strchr(c, '>') + 1; | |
| 231 xhtml = g_string_append(xhtml, "<span style='text-decoration: underline;'>"); | |
| 232 continue; | |
| 233 } | |
| 5174 | 234 if(!g_ascii_strncasecmp(c, "<s>", 3) || !g_ascii_strncasecmp(c, "<strike>", strlen("<strike>"))) { |
| 5104 | 235 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); |
| 236 pt->src_tag = *(c+2) == '>' ? "s" : "strike"; | |
| 237 pt->dest_tag = "span"; | |
| 238 tags = g_list_prepend(tags, pt); | |
| 239 c = strchr(c, '>') + 1; | |
| 240 xhtml = g_string_append(xhtml, "<span style='text-decoration: line-through;'>"); | |
| 241 continue; | |
| 242 } | |
| 243 if(!g_ascii_strncasecmp(c, "<sub>", 5)) { | |
| 244 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); | |
| 245 pt->src_tag = "sub"; | |
| 246 pt->dest_tag = "span"; | |
| 247 tags = g_list_prepend(tags, pt); | |
| 248 c = strchr(c, '>') + 1; | |
| 249 xhtml = g_string_append(xhtml, "<span style='vertical-align:sub;'>"); | |
| 250 continue; | |
| 251 } | |
| 252 if(!g_ascii_strncasecmp(c, "<sup>", 5)) { | |
| 253 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); | |
| 254 pt->src_tag = "sup"; | |
| 255 pt->dest_tag = "span"; | |
| 256 tags = g_list_prepend(tags, pt); | |
| 257 c = strchr(c, '>') + 1; | |
| 258 xhtml = g_string_append(xhtml, "<span style='vertical-align:super;'>"); | |
| 259 continue; | |
| 260 } | |
| 5107 | 261 if(!g_ascii_strncasecmp(c, "<font", 5) && (*(c+5) == '>' || *(c+5) == ' ')) { |
| 262 const char *p = c; | |
| 263 GString *style = g_string_new(""); | |
| 264 struct gaim_parse_tag *pt; | |
| 265 while(*p && *p != '>') { | |
| 266 if(!g_ascii_strncasecmp(p, "color=", strlen("color="))) { | |
| 267 const char *q = p + strlen("color="); | |
| 268 GString *color = g_string_new(""); | |
| 269 if(*q == '\'' || *q == '\"') | |
| 270 q++; | |
| 271 while(*q && *q != '\"' && *q != '\'' && *q != ' ') { | |
| 272 color = g_string_append_c(color, *q); | |
| 273 q++; | |
| 274 } | |
| 275 g_string_append_printf(style, "color: %s; ", color->str); | |
| 276 g_string_free(color, TRUE); | |
| 277 p = q; | |
| 278 } else if(!g_ascii_strncasecmp(p, "face=", strlen("face="))) { | |
| 279 const char *q = p + strlen("face="); | |
| 280 gboolean space_allowed = FALSE; | |
| 281 GString *face = g_string_new(""); | |
| 282 if(*q == '\'' || *q == '\"') { | |
| 283 space_allowed = TRUE; | |
| 284 q++; | |
| 285 } | |
| 286 while(*q && *q != '\"' && *q != '\'' && (space_allowed || *q != ' ')) { | |
| 287 face = g_string_append_c(face, *q); | |
| 288 q++; | |
| 289 } | |
| 290 g_string_append_printf(style, "font-family: %s; ", face->str); | |
| 291 g_string_free(face, TRUE); | |
| 292 p = q; | |
| 293 } else if(!g_ascii_strncasecmp(p, "size=", strlen("size="))) { | |
| 294 const char *q = p + strlen("size="); | |
| 295 int sz; | |
| 296 const char *size = "medium"; | |
| 297 if(*q == '\'' || *q == '\"') | |
| 298 q++; | |
| 299 sz = atoi(q); | |
| 300 if(sz < 3) | |
| 301 size = "smaller"; | |
| 302 else if(sz > 3) | |
| 303 size = "larger"; | |
| 304 g_string_append_printf(style, "font-size: %s; ", size); | |
| 305 p = q; | |
| 306 } | |
| 307 p++; | |
| 308 } | |
| 309 c = strchr(c, '>') + 1; | |
| 310 pt = g_new0(struct gaim_parse_tag, 1); | |
| 311 pt->src_tag = "font"; | |
| 312 pt->dest_tag = "span"; | |
| 313 tags = g_list_prepend(tags, pt); | |
| 314 xhtml = g_string_append(xhtml, "<span"); | |
| 315 if(style->len) | |
| 316 g_string_append_printf(xhtml, " style='%s'", style->str); | |
| 317 xhtml = g_string_append_c(xhtml, '>'); | |
| 318 g_string_free(style, TRUE); | |
| 319 continue; | |
| 320 } | |
| 321 if(!g_ascii_strncasecmp(c, "<body ", 6)) { | |
| 322 const char *p = c; | |
| 323 gboolean did_something = FALSE; | |
| 324 while(*p && *p != '>') { | |
| 325 if(!g_ascii_strncasecmp(p, "bgcolor=", strlen("bgcolor="))) { | |
| 326 const char *q = p + strlen("bgcolor="); | |
| 327 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); | |
| 328 GString *color = g_string_new(""); | |
| 329 if(*q == '\'' || *q == '\"') | |
| 330 q++; | |
| 331 while(*q && *q != '\"' && *q != '\'' && *q != ' ') { | |
| 332 color = g_string_append_c(color, *q); | |
| 333 q++; | |
| 334 } | |
| 335 g_string_append_printf(xhtml, "<span style='background: %s;'>", color->str); | |
| 336 g_string_free(color, TRUE); | |
| 337 c = strchr(c, '>') + 1; | |
| 338 pt->src_tag = "body"; | |
| 339 pt->dest_tag = "span"; | |
| 340 tags = g_list_prepend(tags, pt); | |
| 341 did_something = TRUE; | |
| 342 break; | |
| 343 } | |
| 344 p++; | |
| 345 } | |
| 346 if(did_something) continue; | |
| 347 } | |
| 348 /* this has to come after the special case for bgcolor */ | |
| 349 ALLOW_TAG("body"); | |
| 5093 | 350 if(!g_ascii_strncasecmp(c, "<!--", strlen("<!--"))) { |
| 351 char *p = strstr(c + strlen("<!--"), "-->"); | |
| 352 if(p) { | |
| 353 xhtml = g_string_append(xhtml, "<!--"); | |
| 354 c += strlen("<!--"); | |
| 355 continue; | |
| 356 } | |
| 357 } | |
| 358 | |
| 359 xhtml = g_string_append(xhtml, "<"); | |
| 5110 | 360 plain = g_string_append_c(plain, '<'); |
| 5093 | 361 c++; |
| 362 } | |
| 363 } else { | |
| 364 xhtml = g_string_append_c(xhtml, *c); | |
| 5110 | 365 plain = g_string_append_c(plain, *c); |
| 5093 | 366 c++; |
| 367 } | |
| 368 } | |
| 369 tag = tags; | |
| 370 while(tag) { | |
| 371 g_string_append_printf(xhtml, "</%s>", (char *)tag->data); | |
| 372 tag = tag->next; | |
| 373 } | |
| 374 g_list_free(tags); | |
| 5110 | 375 if(xhtml_out) |
| 376 *xhtml_out = g_strdup(xhtml->str); | |
| 377 if(plain_out) | |
| 378 *plain_out = g_strdup(plain->str); | |
| 5093 | 379 g_string_free(xhtml, TRUE); |
| 5110 | 380 g_string_free(plain, TRUE); |
| 5093 | 381 } |
| 6514 | 382 |
|
7094
2343c3aa1dec
[gaim-migrate @ 7659]
Christian Hammond <chipx86@chipx86.com>
parents:
7014
diff
changeset
|
383 int info_extract_field(const char *original, char *add_to, char *start_tok, |
| 6514 | 384 int skip, char *end_tok, char check_value, char *no_value_tok, |
| 385 char *display_name, int islink, char *link_prefix) | |
| 386 { | |
| 387 char *p, *q; | |
| 388 char buf[1024]; | |
| 389 if (!original || !add_to || !start_tok || | |
| 390 !end_tok || !display_name) | |
| 391 return 0; | |
| 392 p = strstr(original, start_tok); | |
| 393 if (p) { | |
| 394 p += strlen(start_tok) + skip; | |
| 395 if (!check_value || (*p != check_value)) { | |
| 396 q = strstr(p, end_tok); | |
| 397 if (q && (!no_value_tok || | |
| 398 (no_value_tok && strncmp(p, no_value_tok, strlen(no_value_tok))))) { | |
| 399 strcat(add_to, "<b>"); | |
| 400 strcat(add_to, display_name); | |
| 401 strcat(add_to, ":</b> "); | |
| 402 if (islink) { | |
| 403 strcat(add_to, "<br><a href=\""); | |
| 404 memcpy(buf, p, q-p); | |
| 405 buf[q-p] = '\0'; | |
| 406 if (link_prefix) | |
| 407 strcat(add_to, link_prefix); | |
| 408 strcat(add_to, buf); | |
| 409 strcat(add_to, "\">"); | |
| 410 if (link_prefix) | |
| 411 strcat(add_to, link_prefix); | |
| 412 strcat(add_to, buf); | |
| 413 strcat(add_to, "</a>"); | |
| 414 } else { | |
| 415 memcpy(buf, p, q-p); | |
| 416 buf[q-p] = '\0'; | |
| 417 strcat(add_to, buf); | |
| 418 } | |
| 419 strcat(add_to, "<br>\n"); | |
| 420 return 1; | |
| 421 } else | |
| 422 return 0; | |
| 423 } else | |
| 424 return 0; | |
| 425 } else | |
| 426 return 0; | |
| 427 } |
