comparison src/util.c @ 8561:2d4ccd94e298

[gaim-migrate @ 9305] " In the irc tooltip, there's a line "Channel:". In 0.75, this seems to have been merged with the "_Channel:" line. In English, this works because underscores in the tooltip are removed before being displayed. However, in Chinese and Japanese, the translation of "_Channel:" looks like "Channel (_C):" and this translated text does not make any sense in the tooltip. The tooltip thus should not use the "_Channel:" string. Otherwise the tooltip output would look very strange in certain locales (at least in Chinese and Japanese)." --Ambrose C. LI who continues: "This second patch should be better. It correctly undoes the space character typically present before the left parenthesis, and added some checks so that it should not corrupt multibyte utf-8 characters. However, this has not been tested a lot. UTF8 handling is also not an area I am familiar with. I don't know whether the C library has existing functions to handle the utf8 things." i'm assuming we have time to test this before 0.77 committer: Tailor Script <tailor@pidgin.im>
author Luke Schierer <lschiere@pidgin.im>
date Fri, 02 Apr 2004 06:18:14 +0000
parents 848dfa9fe9d4
children 599d6ac9bbfe
comparison
equal deleted inserted replaced
8560:832fd9b754d0 8561:2d4ccd94e298
2473 2473
2474 char *gaim_text_strip_mnemonic(const char *in) 2474 char *gaim_text_strip_mnemonic(const char *in)
2475 { 2475 {
2476 char *out; 2476 char *out;
2477 char *a; 2477 char *a;
2478 char *a0;
2478 const char *b; 2479 const char *b;
2479 2480
2480 g_return_val_if_fail(in != NULL, NULL); 2481 g_return_val_if_fail(in != NULL, NULL);
2481 2482
2482 out = g_malloc(strlen(in)+1); 2483 out = g_malloc(strlen(in)+1);
2483 a = out; 2484 a = out;
2484 b = in; 2485 b = in;
2485 2486
2487 a0 = a; /* The last non-space char seen so far, or the first char */
2488
2486 while(*b) { 2489 while(*b) {
2487 if(*b == '_') { 2490 if(*b == '_') {
2488 if(*(b+1) == '_') { 2491 if(a > out && b > in && *(b-1) == '(' && *(b+1) && !(*(b+1) & 0x80) && *(b+2) == ')') {
2492 /* Detected CJK style shortcut (Bug 875311) */
2493 a = a0; /* undo the left parenthesis */
2494 b += 3; /* and skip the whole mess */
2495 } else if(*(b+1) == '_') {
2489 *(a++) = '_'; 2496 *(a++) = '_';
2490 b += 2; 2497 b += 2;
2498 a0 = a;
2491 } else { 2499 } else {
2492 b++; 2500 b++;
2493 } 2501 }
2502 /* We don't want to corrupt the middle of UTF-8 characters */
2503 } else if (!(*b & 0x80)) { /* other 1-byte char */
2504 if (*b != ' ')
2505 a0 = a;
2506 *(a++) = *(b++);
2494 } else { 2507 } else {
2495 *(a++) = *(b++); 2508 /* Multibyte utf8 char, don't look for _ inside these */
2509 int n = 0;
2510 int i;
2511 if ((*b & 0xe0) == 0xc0) {
2512 n = 2;
2513 } else if ((*b & 0xf0) == 0xe0) {
2514 n = 3;
2515 } else if ((*b & 0xf8) == 0xf0) {
2516 n = 4;
2517 } else if ((*b & 0xfc) == 0xf8) {
2518 n = 5;
2519 } else if ((*b & 0xfe) == 0xfc) {
2520 n = 6;
2521 } else { /* Illegal utf8 */
2522 n = 1;
2523 }
2524 a0 = a; /* unless we want to delete CJK spaces too */
2525 for (i = 0; i < n && *b; i += 1) {
2526 *(a++) = *(b++);
2527 }
2496 } 2528 }
2497 } 2529 }
2498 *a = '\0'; 2530 *a = '\0';
2499 2531
2500 return out; 2532 return out;