Mercurial > pidgin
comparison src/util.c @ 7095:c8bf2da398e3
[gaim-migrate @ 7660]
html.[ch] is gone. Everything inside was namespaced and put in util.[ch].
One less ugly part of gaim in the tree.
committer: Tailor Script <tailor@pidgin.im>
| author | Christian Hammond <chipx86@chipx86.com> |
|---|---|
| date | Wed, 01 Oct 2003 03:43:18 +0000 |
| parents | 2343c3aa1dec |
| children | 8130adad8b7a |
comparison
equal
deleted
inserted
replaced
| 7094:2343c3aa1dec | 7095:c8bf2da398e3 |
|---|---|
| 1 /* | 1 /* |
| 2 * @file util.h Utility Functions | 2 * @file util.h Utility Functions |
| 3 * @ingroup core | 3 * @ingroup core |
| 4 * | 4 * |
| 5 * Copyright (C) 1998-1999, Mark Spencer <markster@marko.net> | 5 * Copyright (C) 1998-1999 Mark Spencer <markster@marko.net> |
| 6 * Copyright (C) 2003 Christian Hammond <chipx86@gnupdate.org> | 6 * 2003 Christian Hammond <chipx86@gnupdate.org> |
| 7 * 2003 Nathan Walp <faceprint@faceprint.com> | |
| 7 * | 8 * |
| 8 * This program is free software; you can redistribute it and/or modify | 9 * This program is free software; you can redistribute it and/or modify |
| 9 * it under the terms of the GNU General Public License as published by | 10 * it under the terms of the GNU General Public License as published by |
| 10 * the Free Software Foundation; either version 2 of the License, or | 11 * the Free Software Foundation; either version 2 of the License, or |
| 11 * (at your option) any later version. | 12 * (at your option) any later version. |
| 1167 | 1168 |
| 1168 return found; | 1169 return found; |
| 1169 } | 1170 } |
| 1170 | 1171 |
| 1171 gboolean | 1172 gboolean |
| 1173 gaim_markup_extract_info_field(const char *str, char *dest_buffer, | |
| 1174 const char *start_token, int skip, | |
| 1175 const char *end_token, char check_value, | |
| 1176 const char *no_value_token, | |
| 1177 const char *display_name, gboolean is_link, | |
| 1178 const char *link_prefix) | |
| 1179 { | |
| 1180 const char *p, *q; | |
| 1181 char buf[1024]; | |
| 1182 | |
| 1183 g_return_val_if_fail(str != NULL, FALSE); | |
| 1184 g_return_val_if_fail(dest_buffer != NULL, FALSE); | |
| 1185 g_return_val_if_fail(start_token != NULL, FALSE); | |
| 1186 g_return_val_if_fail(end_token != NULL, FALSE); | |
| 1187 g_return_val_if_fail(display_name != NULL, FALSE); | |
| 1188 | |
| 1189 p = strstr(str, start_token); | |
| 1190 | |
| 1191 if (p == NULL) | |
| 1192 return FALSE; | |
| 1193 | |
| 1194 p += strlen(start_token) + skip; | |
| 1195 | |
| 1196 if (check_value != '\0' && *p == check_value) | |
| 1197 return FALSE; | |
| 1198 | |
| 1199 q = strstr(p, end_token); | |
| 1200 | |
| 1201 if (q != NULL && (!no_value_token || | |
| 1202 (no_value_token && strncmp(p, no_value_token, | |
| 1203 strlen(no_value_token))))) | |
| 1204 { | |
| 1205 strcat(dest_buffer, "<b>"); | |
| 1206 strcat(dest_buffer, display_name); | |
| 1207 strcat(dest_buffer, ":</b> "); | |
| 1208 | |
| 1209 if (is_link) | |
| 1210 { | |
| 1211 strcat(dest_buffer, "<br><a href=\""); | |
| 1212 memcpy(buf, p, q - p); | |
| 1213 buf[q - p] = '\0'; | |
| 1214 | |
| 1215 if (link_prefix) | |
| 1216 strcat(dest_buffer, link_prefix); | |
| 1217 | |
| 1218 strcat(dest_buffer, buf); | |
| 1219 strcat(dest_buffer, "\">"); | |
| 1220 | |
| 1221 if (link_prefix) | |
| 1222 strcat(dest_buffer, link_prefix); | |
| 1223 | |
| 1224 strcat(dest_buffer, buf); | |
| 1225 strcat(dest_buffer, "</a>"); | |
| 1226 } | |
| 1227 else | |
| 1228 { | |
| 1229 memcpy(buf, p, q - p); | |
| 1230 buf[q - p] = '\0'; | |
| 1231 strcat(dest_buffer, buf); | |
| 1232 } | |
| 1233 | |
| 1234 strcat(dest_buffer, "<br>\n"); | |
| 1235 | |
| 1236 return TRUE; | |
| 1237 } | |
| 1238 | |
| 1239 return FALSE; | |
| 1240 } | |
| 1241 | |
| 1242 struct gaim_parse_tag { | |
| 1243 char *src_tag; | |
| 1244 char *dest_tag; | |
| 1245 }; | |
| 1246 | |
| 1247 #define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \ | |
| 1248 const char *o = c + strlen("<" x); \ | |
| 1249 const char *p = NULL, *q = NULL, *r = NULL; \ | |
| 1250 GString *innards = g_string_new(""); \ | |
| 1251 while(o && *o) { \ | |
| 1252 if(!q && (*o == '\"' || *o == '\'') ) { \ | |
| 1253 q = o; \ | |
| 1254 } else if(q) { \ | |
| 1255 if(*o == *q) { \ | |
| 1256 char *unescaped = g_strndup(q+1, o-q-1); \ | |
| 1257 char *escaped = g_markup_escape_text(unescaped, -1); \ | |
| 1258 g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \ | |
| 1259 g_free(unescaped); \ | |
| 1260 g_free(escaped); \ | |
| 1261 q = NULL; \ | |
| 1262 } else if(*c == '\\') { \ | |
| 1263 o++; \ | |
| 1264 } \ | |
| 1265 } else if(*o == '<') { \ | |
| 1266 r = o; \ | |
| 1267 } else if(*o == '>') { \ | |
| 1268 p = o; \ | |
| 1269 break; \ | |
| 1270 } else { \ | |
| 1271 innards = g_string_append_c(innards, *o); \ | |
| 1272 } \ | |
| 1273 o++; \ | |
| 1274 } \ | |
| 1275 if(p && !r) { \ | |
| 1276 if(*(p-1) != '/') { \ | |
| 1277 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); \ | |
| 1278 pt->src_tag = x; \ | |
| 1279 pt->dest_tag = y; \ | |
| 1280 tags = g_list_prepend(tags, pt); \ | |
| 1281 } \ | |
| 1282 xhtml = g_string_append(xhtml, "<" y); \ | |
| 1283 c += strlen("<" x ); \ | |
| 1284 xhtml = g_string_append(xhtml, innards->str); \ | |
| 1285 xhtml = g_string_append_c(xhtml, '>'); \ | |
| 1286 c = p + 1; \ | |
| 1287 } else { \ | |
| 1288 xhtml = g_string_append(xhtml, "<"); \ | |
| 1289 plain = g_string_append_c(plain, '<'); \ | |
| 1290 c++; \ | |
| 1291 } \ | |
| 1292 g_string_free(innards, TRUE); \ | |
| 1293 continue; \ | |
| 1294 } \ | |
| 1295 if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \ | |
| 1296 (*(c+strlen("<" x)) == '>' || \ | |
| 1297 !g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \ | |
| 1298 xhtml = g_string_append(xhtml, "<" y); \ | |
| 1299 c += strlen("<" x); \ | |
| 1300 if(*c != '/') { \ | |
| 1301 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); \ | |
| 1302 pt->src_tag = x; \ | |
| 1303 pt->dest_tag = y; \ | |
| 1304 tags = g_list_prepend(tags, pt); \ | |
| 1305 xhtml = g_string_append_c(xhtml, '>'); \ | |
| 1306 } else { \ | |
| 1307 xhtml = g_string_append(xhtml, "/>");\ | |
| 1308 } \ | |
| 1309 c = strchr(c, '>') + 1; \ | |
| 1310 continue; \ | |
| 1311 } | |
| 1312 #define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x) | |
| 1313 void | |
| 1314 gaim_markup_html_to_xhtml(const char *html, char **xhtml_out, | |
| 1315 char **plain_out) | |
| 1316 { | |
| 1317 GString *xhtml = g_string_new(""); | |
| 1318 GString *plain = g_string_new(""); | |
| 1319 GList *tags = NULL, *tag; | |
| 1320 const char *c = html; | |
| 1321 | |
| 1322 while(c && *c) { | |
| 1323 if(*c == '<') { | |
| 1324 if(*(c+1) == '/') { /* closing tag */ | |
| 1325 tag = tags; | |
| 1326 while(tag) { | |
| 1327 struct gaim_parse_tag *pt = tag->data; | |
| 1328 if(!g_ascii_strncasecmp((c+2), pt->src_tag, strlen(pt->src_tag)) && *(c+strlen(pt->src_tag)+2) == '>') { | |
| 1329 c += strlen(pt->src_tag) + 3; | |
| 1330 break; | |
| 1331 } | |
| 1332 tag = tag->next; | |
| 1333 } | |
| 1334 if(tag) { | |
| 1335 while(tags) { | |
| 1336 struct gaim_parse_tag *pt = tags->data; | |
| 1337 g_string_append_printf(xhtml, "</%s>", pt->dest_tag); | |
| 1338 if(tags == tag) | |
| 1339 break; | |
| 1340 tags = g_list_remove(tags, pt); | |
| 1341 g_free(pt); | |
| 1342 } | |
| 1343 g_free(tag->data); | |
| 1344 tags = g_list_remove(tags, tag->data); | |
| 1345 } else { | |
| 1346 /* we tried to close a tag we never opened! escape it | |
| 1347 * and move on */ | |
| 1348 xhtml = g_string_append(xhtml, "<"); | |
| 1349 plain = g_string_append_c(plain, '<'); | |
| 1350 c++; | |
| 1351 } | |
| 1352 } else { /* opening tag */ | |
| 1353 ALLOW_TAG("a"); | |
| 1354 ALLOW_TAG_ALT("b", "strong"); | |
| 1355 ALLOW_TAG("blockquote"); | |
| 1356 ALLOW_TAG_ALT("bold", "strong"); | |
| 1357 ALLOW_TAG("cite"); | |
| 1358 ALLOW_TAG("div"); | |
| 1359 ALLOW_TAG("em"); | |
| 1360 ALLOW_TAG("h1"); | |
| 1361 ALLOW_TAG("h2"); | |
| 1362 ALLOW_TAG("h3"); | |
| 1363 ALLOW_TAG("h4"); | |
| 1364 ALLOW_TAG("h5"); | |
| 1365 ALLOW_TAG("h6"); | |
| 1366 /* we only allow html to start the message */ | |
| 1367 if(c == html) | |
| 1368 ALLOW_TAG("html"); | |
| 1369 ALLOW_TAG_ALT("i", "em"); | |
| 1370 ALLOW_TAG_ALT("italic", "em"); | |
| 1371 ALLOW_TAG("li"); | |
| 1372 ALLOW_TAG("ol"); | |
| 1373 ALLOW_TAG("p"); | |
| 1374 ALLOW_TAG("pre"); | |
| 1375 ALLOW_TAG("q"); | |
| 1376 ALLOW_TAG("span"); | |
| 1377 ALLOW_TAG("strong"); | |
| 1378 ALLOW_TAG("ul"); | |
| 1379 | |
| 1380 /* we skip <HR> because it's not legal in XHTML-IM. However, | |
| 1381 * we still want to send something sensible, so we put a | |
| 1382 * linebreak in its place. <BR> also needs special handling | |
| 1383 * because putting a </BR> to close it would just be dumb. */ | |
| 1384 if((!g_ascii_strncasecmp(c, "<br", 3) | |
| 1385 || !g_ascii_strncasecmp(c, "<hr", 3)) | |
| 1386 && (*(c+3) == '>' || | |
| 1387 !g_ascii_strncasecmp(c+3, "/>", 2) || | |
| 1388 !g_ascii_strncasecmp(c+3, " />", 3))) { | |
| 1389 c = strchr(c, '>') + 1; | |
| 1390 xhtml = g_string_append(xhtml, "<br/>"); | |
| 1391 if(*c != '\n') | |
| 1392 plain = g_string_append_c(plain, '\n'); | |
| 1393 continue; | |
| 1394 } | |
| 1395 if(!g_ascii_strncasecmp(c, "<u>", 3) || !g_ascii_strncasecmp(c, "<underline>", strlen("<underline>"))) { | |
| 1396 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); | |
| 1397 pt->src_tag = *(c+2) == '>' ? "u" : "underline"; | |
| 1398 pt->dest_tag = "span"; | |
| 1399 tags = g_list_prepend(tags, pt); | |
| 1400 c = strchr(c, '>') + 1; | |
| 1401 xhtml = g_string_append(xhtml, "<span style='text-decoration: underline;'>"); | |
| 1402 continue; | |
| 1403 } | |
| 1404 if(!g_ascii_strncasecmp(c, "<s>", 3) || !g_ascii_strncasecmp(c, "<strike>", strlen("<strike>"))) { | |
| 1405 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); | |
| 1406 pt->src_tag = *(c+2) == '>' ? "s" : "strike"; | |
| 1407 pt->dest_tag = "span"; | |
| 1408 tags = g_list_prepend(tags, pt); | |
| 1409 c = strchr(c, '>') + 1; | |
| 1410 xhtml = g_string_append(xhtml, "<span style='text-decoration: line-through;'>"); | |
| 1411 continue; | |
| 1412 } | |
| 1413 if(!g_ascii_strncasecmp(c, "<sub>", 5)) { | |
| 1414 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); | |
| 1415 pt->src_tag = "sub"; | |
| 1416 pt->dest_tag = "span"; | |
| 1417 tags = g_list_prepend(tags, pt); | |
| 1418 c = strchr(c, '>') + 1; | |
| 1419 xhtml = g_string_append(xhtml, "<span style='vertical-align:sub;'>"); | |
| 1420 continue; | |
| 1421 } | |
| 1422 if(!g_ascii_strncasecmp(c, "<sup>", 5)) { | |
| 1423 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); | |
| 1424 pt->src_tag = "sup"; | |
| 1425 pt->dest_tag = "span"; | |
| 1426 tags = g_list_prepend(tags, pt); | |
| 1427 c = strchr(c, '>') + 1; | |
| 1428 xhtml = g_string_append(xhtml, "<span style='vertical-align:super;'>"); | |
| 1429 continue; | |
| 1430 } | |
| 1431 if(!g_ascii_strncasecmp(c, "<font", 5) && (*(c+5) == '>' || *(c+5) == ' ')) { | |
| 1432 const char *p = c; | |
| 1433 GString *style = g_string_new(""); | |
| 1434 struct gaim_parse_tag *pt; | |
| 1435 while(*p && *p != '>') { | |
| 1436 if(!g_ascii_strncasecmp(p, "color=", strlen("color="))) { | |
| 1437 const char *q = p + strlen("color="); | |
| 1438 GString *color = g_string_new(""); | |
| 1439 if(*q == '\'' || *q == '\"') | |
| 1440 q++; | |
| 1441 while(*q && *q != '\"' && *q != '\'' && *q != ' ') { | |
| 1442 color = g_string_append_c(color, *q); | |
| 1443 q++; | |
| 1444 } | |
| 1445 g_string_append_printf(style, "color: %s; ", color->str); | |
| 1446 g_string_free(color, TRUE); | |
| 1447 p = q; | |
| 1448 } else if(!g_ascii_strncasecmp(p, "face=", strlen("face="))) { | |
| 1449 const char *q = p + strlen("face="); | |
| 1450 gboolean space_allowed = FALSE; | |
| 1451 GString *face = g_string_new(""); | |
| 1452 if(*q == '\'' || *q == '\"') { | |
| 1453 space_allowed = TRUE; | |
| 1454 q++; | |
| 1455 } | |
| 1456 while(*q && *q != '\"' && *q != '\'' && (space_allowed || *q != ' ')) { | |
| 1457 face = g_string_append_c(face, *q); | |
| 1458 q++; | |
| 1459 } | |
| 1460 g_string_append_printf(style, "font-family: %s; ", face->str); | |
| 1461 g_string_free(face, TRUE); | |
| 1462 p = q; | |
| 1463 } else if(!g_ascii_strncasecmp(p, "size=", strlen("size="))) { | |
| 1464 const char *q = p + strlen("size="); | |
| 1465 int sz; | |
| 1466 const char *size = "medium"; | |
| 1467 if(*q == '\'' || *q == '\"') | |
| 1468 q++; | |
| 1469 sz = atoi(q); | |
| 1470 if(sz < 3) | |
| 1471 size = "smaller"; | |
| 1472 else if(sz > 3) | |
| 1473 size = "larger"; | |
| 1474 g_string_append_printf(style, "font-size: %s; ", size); | |
| 1475 p = q; | |
| 1476 } | |
| 1477 p++; | |
| 1478 } | |
| 1479 c = strchr(c, '>') + 1; | |
| 1480 pt = g_new0(struct gaim_parse_tag, 1); | |
| 1481 pt->src_tag = "font"; | |
| 1482 pt->dest_tag = "span"; | |
| 1483 tags = g_list_prepend(tags, pt); | |
| 1484 xhtml = g_string_append(xhtml, "<span"); | |
| 1485 if(style->len) | |
| 1486 g_string_append_printf(xhtml, " style='%s'", style->str); | |
| 1487 xhtml = g_string_append_c(xhtml, '>'); | |
| 1488 g_string_free(style, TRUE); | |
| 1489 continue; | |
| 1490 } | |
| 1491 if(!g_ascii_strncasecmp(c, "<body ", 6)) { | |
| 1492 const char *p = c; | |
| 1493 gboolean did_something = FALSE; | |
| 1494 while(*p && *p != '>') { | |
| 1495 if(!g_ascii_strncasecmp(p, "bgcolor=", strlen("bgcolor="))) { | |
| 1496 const char *q = p + strlen("bgcolor="); | |
| 1497 struct gaim_parse_tag *pt = g_new0(struct gaim_parse_tag, 1); | |
| 1498 GString *color = g_string_new(""); | |
| 1499 if(*q == '\'' || *q == '\"') | |
| 1500 q++; | |
| 1501 while(*q && *q != '\"' && *q != '\'' && *q != ' ') { | |
| 1502 color = g_string_append_c(color, *q); | |
| 1503 q++; | |
| 1504 } | |
| 1505 g_string_append_printf(xhtml, "<span style='background: %s;'>", color->str); | |
| 1506 g_string_free(color, TRUE); | |
| 1507 c = strchr(c, '>') + 1; | |
| 1508 pt->src_tag = "body"; | |
| 1509 pt->dest_tag = "span"; | |
| 1510 tags = g_list_prepend(tags, pt); | |
| 1511 did_something = TRUE; | |
| 1512 break; | |
| 1513 } | |
| 1514 p++; | |
| 1515 } | |
| 1516 if(did_something) continue; | |
| 1517 } | |
| 1518 /* this has to come after the special case for bgcolor */ | |
| 1519 ALLOW_TAG("body"); | |
| 1520 if(!g_ascii_strncasecmp(c, "<!--", strlen("<!--"))) { | |
| 1521 char *p = strstr(c + strlen("<!--"), "-->"); | |
| 1522 if(p) { | |
| 1523 xhtml = g_string_append(xhtml, "<!--"); | |
| 1524 c += strlen("<!--"); | |
| 1525 continue; | |
| 1526 } | |
| 1527 } | |
| 1528 | |
| 1529 xhtml = g_string_append(xhtml, "<"); | |
| 1530 plain = g_string_append_c(plain, '<'); | |
| 1531 c++; | |
| 1532 } | |
| 1533 } else { | |
| 1534 xhtml = g_string_append_c(xhtml, *c); | |
| 1535 plain = g_string_append_c(plain, *c); | |
| 1536 c++; | |
| 1537 } | |
| 1538 } | |
| 1539 tag = tags; | |
| 1540 while(tag) { | |
| 1541 g_string_append_printf(xhtml, "</%s>", (char *)tag->data); | |
| 1542 tag = tag->next; | |
| 1543 } | |
| 1544 g_list_free(tags); | |
| 1545 if(xhtml_out) | |
| 1546 *xhtml_out = g_strdup(xhtml->str); | |
| 1547 if(plain_out) | |
| 1548 *plain_out = g_strdup(plain->str); | |
| 1549 g_string_free(xhtml, TRUE); | |
| 1550 g_string_free(plain, TRUE); | |
| 1551 } | |
| 1552 | |
| 1553 char * | |
| 1554 gaim_markup_strip_html(const char *str) | |
| 1555 { | |
| 1556 int i, j, k; | |
| 1557 gboolean visible = TRUE; | |
| 1558 gchar *str2; | |
| 1559 | |
| 1560 g_return_val_if_fail(str != NULL, NULL); | |
| 1561 | |
| 1562 str2 = g_strdup(str); | |
| 1563 | |
| 1564 for (i = 0, j = 0; str2[i]; i++) | |
| 1565 { | |
| 1566 if (str2[i] == '<') | |
| 1567 { | |
| 1568 k = i + 1; | |
| 1569 | |
| 1570 if(g_ascii_isspace(str2[k])) | |
| 1571 visible = TRUE; | |
| 1572 else | |
| 1573 { | |
| 1574 while (str2[k]) | |
| 1575 { | |
| 1576 if (str2[k] == '<') | |
| 1577 { | |
| 1578 visible = TRUE; | |
| 1579 break; | |
| 1580 } | |
| 1581 | |
| 1582 if (str2[k] == '>') | |
| 1583 { | |
| 1584 visible = FALSE; | |
| 1585 break; | |
| 1586 } | |
| 1587 | |
| 1588 k++; | |
| 1589 } | |
| 1590 } | |
| 1591 } | |
| 1592 else if (str2[i] == '>' && !visible) | |
| 1593 { | |
| 1594 visible = TRUE; | |
| 1595 continue; | |
| 1596 } | |
| 1597 | |
| 1598 if (str2[i] == '&' && strncasecmp(str2 + i, """, 6) == 0) | |
| 1599 { | |
| 1600 str2[j++] = '\"'; | |
| 1601 i = i + 5; | |
| 1602 continue; | |
| 1603 } | |
| 1604 | |
| 1605 if (visible) | |
| 1606 str2[j++] = str2[i]; | |
| 1607 } | |
| 1608 | |
| 1609 str2[j] = '\0'; | |
| 1610 | |
| 1611 return str2; | |
| 1612 } | |
| 1613 | |
| 1614 gboolean | |
| 1172 gaim_url_parse(const char *url, char **ret_host, int *ret_port, | 1615 gaim_url_parse(const char *url, char **ret_host, int *ret_port, |
| 1173 char **ret_path) | 1616 char **ret_path) |
| 1174 { | 1617 { |
| 1175 char scan_info[255]; | 1618 char scan_info[255]; |
| 1176 char port_str[5]; | 1619 char port_str[5]; |
