comparison src/coding.c @ 83541:694bbb62a75d

Merged from emacs@sv.gnu.org Patches applied: * emacs@sv.gnu.org/emacs--devo--0--patch-371 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-372 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-373 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-374 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-375 Merge from gnus--rel--5.10 * emacs@sv.gnu.org/emacs--devo--0--patch-376 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-377 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-378 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-379 Merge from erc--emacs--21 * emacs@sv.gnu.org/emacs--devo--0--patch-380 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-381 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-382 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-383 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-384 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-385 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-386 Update from erc--emacs--22 * emacs@sv.gnu.org/emacs--devo--0--patch-387 Fix ERC bug introduced in last patch * emacs@sv.gnu.org/emacs--devo--0--patch-388 Update from erc--emacs--22 * emacs@sv.gnu.org/emacs--devo--0--patch-389 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-390 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-391 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-392 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-393 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-394 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-395 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-396 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-397 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-398 Merge from gnus--rel--5.10 * emacs@sv.gnu.org/emacs--devo--0--patch-399 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-400 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-401 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-402 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-403 Rcirc update from Ryan Yeske * emacs@sv.gnu.org/emacs--devo--0--patch-404 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-405 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-406 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-407 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-408 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-409 Update from CVS * emacs@sv.gnu.org/emacs--devo--0--patch-410 Merge from gnus--rel--5.10 * emacs@sv.gnu.org/emacs--devo--0--patch-411 Miscellaneous tq-related fixes. * emacs@sv.gnu.org/emacs--devo--0--patch-412 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-121 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-122 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-123 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-124 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-125 Update from CVS * emacs@sv.gnu.org/gnus--rel--5.10--patch-126 Merge from emacs--devo--0 * emacs@sv.gnu.org/gnus--rel--5.10--patch-127 Update from CVS git-archimport-id: lorentey@elte.hu--2004/emacs--multi-tty--0--patch-581
author Karoly Lorentey <lorentey@elte.hu>
date Sat, 14 Oct 2006 16:56:21 +0000
parents 02e39decdc84 94e4795b333d
children 2d56e13fd23d
comparison
equal deleted inserted replaced
83540:0c89a85addc3 83541:694bbb62a75d
217 c2 = *src++; \ 217 c2 = *src++; \
218 } while (0) 218 } while (0)
219 219
220 220
221 /* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte 221 /* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte
222 form if MULTIBYTEP is nonzero. */ 222 form if MULTIBYTEP is nonzero. In addition, if SRC is not less
223 223 than SRC_END, return with RET. */
224 #define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep) \ 224
225 #define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep, ret) \
225 do { \ 226 do { \
226 if (src >= src_end) \ 227 if (src >= src_end) \
227 { \ 228 { \
228 coding->result = CODING_FINISH_INSUFFICIENT_SRC; \ 229 coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
229 goto label_end_of_loop; \ 230 return ret; \
230 } \ 231 } \
231 c1 = *src++; \ 232 c1 = *src++; \
232 if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL) \ 233 if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL) \
233 c1 = *src++ - 0x20; \ 234 c1 = *src++ - 0x20; \
234 } while (0) 235 } while (0)
626 struct coding_system dummy_coding; 627 struct coding_system dummy_coding;
627 struct coding_system *coding = &dummy_coding; 628 struct coding_system *coding = &dummy_coding;
628 629
629 while (1) 630 while (1)
630 { 631 {
631 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 632 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep,
632 633 CODING_CATEGORY_MASK_EMACS_MULE);
633 if (composing) 634 if (composing)
634 { 635 {
635 if (c < 0xA0) 636 if (c < 0xA0)
636 composing = 0; 637 composing = 0;
637 else if (c == 0xA0) 638 else if (c == 0xA0)
638 { 639 {
639 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 640 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0);
640 c &= 0x7F; 641 c &= 0x7F;
641 } 642 }
642 else 643 else
643 c -= 0x20; 644 c -= 0x20;
644 } 645 }
663 return 0; 664 return 0;
664 src = src_base + bytes; 665 src = src_base + bytes;
665 } 666 }
666 } 667 }
667 } 668 }
668 label_end_of_loop:
669 return CODING_CATEGORY_MASK_EMACS_MULE;
670 } 669 }
671 670
672 671
673 /* Record the starting position START and METHOD of one composition. */ 672 /* Record the starting position START and METHOD of one composition. */
674 673
1419 struct coding_system dummy_coding; 1418 struct coding_system dummy_coding;
1420 struct coding_system *coding = &dummy_coding; 1419 struct coding_system *coding = &dummy_coding;
1421 Lisp_Object safe_chars; 1420 Lisp_Object safe_chars;
1422 1421
1423 reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1; 1422 reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
1424 while (mask && src < src_end) 1423 while (mask)
1425 { 1424 {
1426 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 1425 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, mask & mask_found);
1427 retry: 1426 retry:
1428 switch (c) 1427 switch (c)
1429 { 1428 {
1430 case ISO_CODE_ESC: 1429 case ISO_CODE_ESC:
1431 if (inhibit_iso_escape_detection) 1430 if (inhibit_iso_escape_detection)
1432 break; 1431 break;
1433 single_shifting = 0; 1432 single_shifting = 0;
1434 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 1433 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, mask & mask_found);
1435 if (c >= '(' && c <= '/') 1434 if (c >= '(' && c <= '/')
1436 { 1435 {
1437 /* Designation sequence for a charset of dimension 1. */ 1436 /* Designation sequence for a charset of dimension 1. */
1438 ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep); 1437 ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep, mask & mask_found);
1439 if (c1 < ' ' || c1 >= 0x80 1438 if (c1 < ' ' || c1 >= 0x80
1440 || (charset = iso_charset_table[0][c >= ','][c1]) < 0) 1439 || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
1441 /* Invalid designation sequence. Just ignore. */ 1440 /* Invalid designation sequence. Just ignore. */
1442 break; 1441 break;
1443 reg[(c - '(') % 4] = charset; 1442 reg[(c - '(') % 4] = charset;
1444 } 1443 }
1445 else if (c == '$') 1444 else if (c == '$')
1446 { 1445 {
1447 /* Designation sequence for a charset of dimension 2. */ 1446 /* Designation sequence for a charset of dimension 2. */
1448 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 1447 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, mask & mask_found);
1449 if (c >= '@' && c <= 'B') 1448 if (c >= '@' && c <= 'B')
1450 /* Designation for JISX0208.1978, GB2312, or JISX0208. */ 1449 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
1451 reg[0] = charset = iso_charset_table[1][0][c]; 1450 reg[0] = charset = iso_charset_table[1][0][c];
1452 else if (c >= '(' && c <= '/') 1451 else if (c >= '(' && c <= '/')
1453 { 1452 {
1454 ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep); 1453 ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep,
1454 mask & mask_found);
1455 if (c1 < ' ' || c1 >= 0x80 1455 if (c1 < ' ' || c1 >= 0x80
1456 || (charset = iso_charset_table[1][c >= ','][c1]) < 0) 1456 || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
1457 /* Invalid designation sequence. Just ignore. */ 1457 /* Invalid designation sequence. Just ignore. */
1458 break; 1458 break;
1459 reg[(c - '(') % 4] = charset; 1459 reg[(c - '(') % 4] = charset;
1624 int i = 1; 1624 int i = 1;
1625 1625
1626 c = -1; 1626 c = -1;
1627 while (src < src_end) 1627 while (src < src_end)
1628 { 1628 {
1629 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 1629 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep,
1630 mask & mask_found);
1630 if (c < 0xA0) 1631 if (c < 0xA0)
1631 break; 1632 break;
1632 i++; 1633 i++;
1633 } 1634 }
1634 1635
1642 } 1643 }
1643 } 1644 }
1644 break; 1645 break;
1645 } 1646 }
1646 } 1647 }
1647 label_end_of_loop:
1648 return (mask & mask_found); 1648 return (mask & mask_found);
1649 } 1649 }
1650 1650
1651 /* Decode a character of which charset is CHARSET, the 1st position 1651 /* Decode a character of which charset is CHARSET, the 1st position
1652 code is C1, the 2nd position code is C2, and return the decoded 1652 code is C1, the 2nd position code is C2, and return the decoded
2913 struct coding_system dummy_coding; 2913 struct coding_system dummy_coding;
2914 struct coding_system *coding = &dummy_coding; 2914 struct coding_system *coding = &dummy_coding;
2915 2915
2916 while (1) 2916 while (1)
2917 { 2917 {
2918 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 2918 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_SJIS);
2919 if (c < 0x80) 2919 if (c < 0x80)
2920 continue; 2920 continue;
2921 if (c == 0x80 || c == 0xA0 || c > 0xEF) 2921 if (c == 0x80 || c == 0xA0 || c > 0xEF)
2922 return 0; 2922 return 0;
2923 if (c <= 0x9F || c >= 0xE0) 2923 if (c <= 0x9F || c >= 0xE0)
2924 { 2924 {
2925 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 2925 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0);
2926 if (c < 0x40 || c == 0x7F || c > 0xFC) 2926 if (c < 0x40 || c == 0x7F || c > 0xFC)
2927 return 0; 2927 return 0;
2928 } 2928 }
2929 } 2929 }
2930 label_end_of_loop:
2931 return CODING_CATEGORY_MASK_SJIS;
2932 } 2930 }
2933 2931
2934 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 2932 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2935 Check if a text is encoded in BIG5. If it is, return 2933 Check if a text is encoded in BIG5. If it is, return
2936 CODING_CATEGORY_MASK_BIG5, else return 0. */ 2934 CODING_CATEGORY_MASK_BIG5, else return 0. */
2945 struct coding_system dummy_coding; 2943 struct coding_system dummy_coding;
2946 struct coding_system *coding = &dummy_coding; 2944 struct coding_system *coding = &dummy_coding;
2947 2945
2948 while (1) 2946 while (1)
2949 { 2947 {
2950 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 2948 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_BIG5);
2951 if (c < 0x80) 2949 if (c < 0x80)
2952 continue; 2950 continue;
2953 if (c < 0xA1 || c > 0xFE) 2951 if (c < 0xA1 || c > 0xFE)
2954 return 0; 2952 return 0;
2955 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 2953 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0);
2956 if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE) 2954 if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE)
2957 return 0; 2955 return 0;
2958 } 2956 }
2959 label_end_of_loop:
2960 return CODING_CATEGORY_MASK_BIG5;
2961 } 2957 }
2962 2958
2963 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 2959 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2964 Check if a text is encoded in UTF-8. If it is, return 2960 Check if a text is encoded in UTF-8. If it is, return
2965 CODING_CATEGORY_MASK_UTF_8, else return 0. */ 2961 CODING_CATEGORY_MASK_UTF_8, else return 0. */
2983 struct coding_system dummy_coding; 2979 struct coding_system dummy_coding;
2984 struct coding_system *coding = &dummy_coding; 2980 struct coding_system *coding = &dummy_coding;
2985 2981
2986 while (1) 2982 while (1)
2987 { 2983 {
2988 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 2984 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_UTF_8);
2989 if (UTF_8_1_OCTET_P (c)) 2985 if (UTF_8_1_OCTET_P (c))
2990 continue; 2986 continue;
2991 else if (UTF_8_2_OCTET_LEADING_P (c)) 2987 else if (UTF_8_2_OCTET_LEADING_P (c))
2992 seq_maybe_bytes = 1; 2988 seq_maybe_bytes = 1;
2993 else if (UTF_8_3_OCTET_LEADING_P (c)) 2989 else if (UTF_8_3_OCTET_LEADING_P (c))
3001 else 2997 else
3002 return 0; 2998 return 0;
3003 2999
3004 do 3000 do
3005 { 3001 {
3006 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 3002 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0);
3007 if (!UTF_8_EXTRA_OCTET_P (c)) 3003 if (!UTF_8_EXTRA_OCTET_P (c))
3008 return 0; 3004 return 0;
3009 seq_maybe_bytes--; 3005 seq_maybe_bytes--;
3010 } 3006 }
3011 while (seq_maybe_bytes > 0); 3007 while (seq_maybe_bytes > 0);
3012 } 3008 }
3013
3014 label_end_of_loop:
3015 return CODING_CATEGORY_MASK_UTF_8;
3016 } 3009 }
3017 3010
3018 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 3011 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3019 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or 3012 Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
3020 Little Endian (otherwise). If it is, return 3013 Little Endian (otherwise). If it is, return
3039 unsigned char c1, c2; 3032 unsigned char c1, c2;
3040 /* Dummy for ONE_MORE_BYTE_CHECK_MULTIBYTE. */ 3033 /* Dummy for ONE_MORE_BYTE_CHECK_MULTIBYTE. */
3041 struct coding_system dummy_coding; 3034 struct coding_system dummy_coding;
3042 struct coding_system *coding = &dummy_coding; 3035 struct coding_system *coding = &dummy_coding;
3043 3036
3044 ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep); 3037 ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep, 0);
3045 ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep); 3038 ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep, 0);
3046 3039
3047 if ((c1 == 0xFF) && (c2 == 0xFE)) 3040 if ((c1 == 0xFF) && (c2 == 0xFE))
3048 return CODING_CATEGORY_MASK_UTF_16_LE; 3041 return CODING_CATEGORY_MASK_UTF_16_LE;
3049 else if ((c1 == 0xFE) && (c2 == 0xFF)) 3042 else if ((c1 == 0xFE) && (c2 == 0xFF))
3050 return CODING_CATEGORY_MASK_UTF_16_BE; 3043 return CODING_CATEGORY_MASK_UTF_16_BE;
3051
3052 label_end_of_loop:
3053 return 0; 3044 return 0;
3054 } 3045 }
3055 3046
3056 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". 3047 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3057 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */ 3048 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
3316 return 0; 3307 return 0;
3317 3308
3318 valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes; 3309 valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
3319 while (1) 3310 while (1)
3320 { 3311 {
3321 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 3312 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_CCL);
3322 if (! valid[c]) 3313 if (! valid[c])
3323 return 0; 3314 return 0;
3324 } 3315 }
3325 label_end_of_loop:
3326 return CODING_CATEGORY_MASK_CCL;
3327 } 3316 }
3328 3317
3329 3318
3330 /*** 6. End-of-line handlers ***/ 3319 /*** 6. End-of-line handlers ***/
3331 3320
7545 return Qnil; 7534 return Qnil;
7546 if (! NILP (Fcoding_system_p (val))) 7535 if (! NILP (Fcoding_system_p (val)))
7547 return Fcons (val, val); 7536 return Fcons (val, val);
7548 if (! NILP (Ffboundp (val))) 7537 if (! NILP (Ffboundp (val)))
7549 { 7538 {
7550 val = safe_call1 (val, Flist (nargs, args)); 7539 /* We use call1 rather than safe_call1
7540 so as to get bug reports about functions called here
7541 which don't handle the current interface. */
7542 val = call1 (val, Flist (nargs, args));
7551 if (CONSP (val)) 7543 if (CONSP (val))
7552 return val; 7544 return val;
7553 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val))) 7545 if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7554 return Fcons (val, val); 7546 return Fcons (val, val);
7555 } 7547 }
7969 If VAL is a coding system, it is used for both decoding and encoding 7961 If VAL is a coding system, it is used for both decoding and encoding
7970 the file contents. 7962 the file contents.
7971 If VAL is a cons of coding systems, the car part is used for decoding, 7963 If VAL is a cons of coding systems, the car part is used for decoding,
7972 and the cdr part is used for encoding. 7964 and the cdr part is used for encoding.
7973 If VAL is a function symbol, the function must return a coding system 7965 If VAL is a function symbol, the function must return a coding system
7974 or a cons of coding systems which are used as above. The function gets 7966 or a cons of coding systems which are used as above. The function is
7975 the arguments with which `find-operation-coding-system' was called. 7967 called with an argument that is a list of the arguments with which
7968 `find-operation-coding-system' was called.
7976 7969
7977 See also the function `find-operation-coding-system' 7970 See also the function `find-operation-coding-system'
7978 and the variable `auto-coding-alist'. */); 7971 and the variable `auto-coding-alist'. */);
7979 Vfile_coding_system_alist = Qnil; 7972 Vfile_coding_system_alist = Qnil;
7980 7973