comparison src/coding.c @ 34531:37f85e931855

(ONE_MORE_BYTE_CHECK_MULTIBYTE): New macro. (detect_coding_emacs_mule, detect_coding_iso2022,) (detect_coding_sjis, detect_coding_big5, detect_coding_utf_8) (detect_coding_utf_16, detect_coding_ccl): Make them static. New argument MULTIBYTEP. Callers changed. (detect_coding_mask, detect_coding_system): New argument MULTIBYTEP. Callers changed.
author Kenichi Handa <handa@m17n.org>
date Wed, 13 Dec 2000 23:24:37 +0000
parents 78561a43cdd1
children aa667988f2b0
comparison
equal deleted inserted replaced
34530:8739ed222334 34531:37f85e931855
110 110
111 These functions check if a text between SRC and SRC_END is encoded 111 These functions check if a text between SRC and SRC_END is encoded
112 in the coding system category XXX. Each returns an integer value in 112 in the coding system category XXX. Each returns an integer value in
113 which appropriate flag bits for the category XXX is set. The flag 113 which appropriate flag bits for the category XXX is set. The flag
114 bits are defined in macros CODING_CATEGORY_MASK_XXX. Below is the 114 bits are defined in macros CODING_CATEGORY_MASK_XXX. Below is the
115 template of these functions. */ 115 template of these functions. If MULTIBYTEP is nonzero, 8-bit codes
116 of the range 0x80..0x9F are in multibyte form. */
116 #if 0 117 #if 0
117 int 118 int
118 detect_coding_emacs_mule (src, src_end) 119 detect_coding_emacs_mule (src, src_end, multibytep)
119 unsigned char *src, *src_end; 120 unsigned char *src, *src_end;
121 int multibytep;
120 { 122 {
121 ... 123 ...
122 } 124 }
123 #endif 125 #endif
124 126
207 } \ 209 } \
208 c1 = *src++; \ 210 c1 = *src++; \
209 c2 = *src++; \ 211 c2 = *src++; \
210 } while (0) 212 } while (0)
211 213
214
215 /* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte
216 form if MULTIBYTEP is nonzero. */
217
218 #define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep) \
219 do { \
220 if (src >= src_end) \
221 { \
222 coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
223 goto label_end_of_loop; \
224 } \
225 c1 = *src++; \
226 if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL) \
227 c1 = *src++ - 0x20; \
228 } while (0)
212 229
213 /* Set C to the next character at the source text pointed by `src'. 230 /* Set C to the next character at the source text pointed by `src'.
214 If there are not enough characters in the source, jump to 231 If there are not enough characters in the source, jump to
215 `label_end_of_loop'. The caller should set variables `coding' 232 `label_end_of_loop'. The caller should set variables `coding'
216 `src', `src_end', and `translation_table' to appropriate pointers 233 `src', `src_end', and `translation_table' to appropriate pointers
534 551
535 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 552 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
536 Check if a text is encoded in Emacs' internal format. If it is, 553 Check if a text is encoded in Emacs' internal format. If it is,
537 return CODING_CATEGORY_MASK_EMACS_MULE, else return 0. */ 554 return CODING_CATEGORY_MASK_EMACS_MULE, else return 0. */
538 555
539 int 556 static int
540 detect_coding_emacs_mule (src, src_end) 557 detect_coding_emacs_mule (src, src_end, multibytep)
541 unsigned char *src, *src_end; 558 unsigned char *src, *src_end;
559 int multibytep;
542 { 560 {
543 unsigned char c; 561 unsigned char c;
544 int composing = 0; 562 int composing = 0;
545 /* Dummy for ONE_MORE_BYTE. */ 563 /* Dummy for ONE_MORE_BYTE. */
546 struct coding_system dummy_coding; 564 struct coding_system dummy_coding;
547 struct coding_system *coding = &dummy_coding; 565 struct coding_system *coding = &dummy_coding;
548 566
549 while (1) 567 while (1)
550 { 568 {
551 ONE_MORE_BYTE (c); 569 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
552 570
553 if (composing) 571 if (composing)
554 { 572 {
555 if (c < 0xA0) 573 if (c < 0xA0)
556 composing = 0; 574 composing = 0;
557 else if (c == 0xA0) 575 else if (c == 0xA0)
558 { 576 {
559 ONE_MORE_BYTE (c); 577 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
560 c &= 0x7F; 578 c &= 0x7F;
561 } 579 }
562 else 580 else
563 c -= 0x20; 581 c -= 0x20;
564 } 582 }
879 CODING_CATEGORY_MASK_ISO_7_ELSE 897 CODING_CATEGORY_MASK_ISO_7_ELSE
880 CODING_CATEGORY_MASK_ISO_8_ELSE 898 CODING_CATEGORY_MASK_ISO_8_ELSE
881 are set. If a code which should never appear in ISO2022 is found, 899 are set. If a code which should never appear in ISO2022 is found,
882 returns 0. */ 900 returns 0. */
883 901
884 int 902 static int
885 detect_coding_iso2022 (src, src_end) 903 detect_coding_iso2022 (src, src_end, multibytep)
886 unsigned char *src, *src_end; 904 unsigned char *src, *src_end;
905 int multibytep;
887 { 906 {
888 int mask = CODING_CATEGORY_MASK_ISO; 907 int mask = CODING_CATEGORY_MASK_ISO;
889 int mask_found = 0; 908 int mask_found = 0;
890 int reg[4], shift_out = 0, single_shifting = 0; 909 int reg[4], shift_out = 0, single_shifting = 0;
891 int c, c1, i, charset; 910 int c, c1, i, charset;
895 Lisp_Object safe_chars; 914 Lisp_Object safe_chars;
896 915
897 reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1; 916 reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
898 while (mask && src < src_end) 917 while (mask && src < src_end)
899 { 918 {
900 ONE_MORE_BYTE (c); 919 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
901 switch (c) 920 switch (c)
902 { 921 {
903 case ISO_CODE_ESC: 922 case ISO_CODE_ESC:
904 if (inhibit_iso_escape_detection) 923 if (inhibit_iso_escape_detection)
905 break; 924 break;
906 single_shifting = 0; 925 single_shifting = 0;
907 ONE_MORE_BYTE (c); 926 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
908 if (c >= '(' && c <= '/') 927 if (c >= '(' && c <= '/')
909 { 928 {
910 /* Designation sequence for a charset of dimension 1. */ 929 /* Designation sequence for a charset of dimension 1. */
911 ONE_MORE_BYTE (c1); 930 ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
912 if (c1 < ' ' || c1 >= 0x80 931 if (c1 < ' ' || c1 >= 0x80
913 || (charset = iso_charset_table[0][c >= ','][c1]) < 0) 932 || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
914 /* Invalid designation sequence. Just ignore. */ 933 /* Invalid designation sequence. Just ignore. */
915 break; 934 break;
916 reg[(c - '(') % 4] = charset; 935 reg[(c - '(') % 4] = charset;
917 } 936 }
918 else if (c == '$') 937 else if (c == '$')
919 { 938 {
920 /* Designation sequence for a charset of dimension 2. */ 939 /* Designation sequence for a charset of dimension 2. */
921 ONE_MORE_BYTE (c); 940 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
922 if (c >= '@' && c <= 'B') 941 if (c >= '@' && c <= 'B')
923 /* Designation for JISX0208.1978, GB2312, or JISX0208. */ 942 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
924 reg[0] = charset = iso_charset_table[1][0][c]; 943 reg[0] = charset = iso_charset_table[1][0][c];
925 else if (c >= '(' && c <= '/') 944 else if (c >= '(' && c <= '/')
926 { 945 {
927 ONE_MORE_BYTE (c1); 946 ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
928 if (c1 < ' ' || c1 >= 0x80 947 if (c1 < ' ' || c1 >= 0x80
929 || (charset = iso_charset_table[1][c >= ','][c1]) < 0) 948 || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
930 /* Invalid designation sequence. Just ignore. */ 949 /* Invalid designation sequence. Just ignore. */
931 break; 950 break;
932 reg[(c - '(') % 4] = charset; 951 reg[(c - '(') % 4] = charset;
1072 && mask & CODING_CATEGORY_MASK_ISO_8_2) 1091 && mask & CODING_CATEGORY_MASK_ISO_8_2)
1073 { 1092 {
1074 int i = 1; 1093 int i = 1;
1075 while (src < src_end) 1094 while (src < src_end)
1076 { 1095 {
1077 ONE_MORE_BYTE (c); 1096 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1078 if (c < 0xA0) 1097 if (c < 0xA0)
1079 break; 1098 break;
1080 i++; 1099 i++;
1081 } 1100 }
1082 1101
2290 2309
2291 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 2310 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2292 Check if a text is encoded in SJIS. If it is, return 2311 Check if a text is encoded in SJIS. If it is, return
2293 CODING_CATEGORY_MASK_SJIS, else return 0. */ 2312 CODING_CATEGORY_MASK_SJIS, else return 0. */
2294 2313
2295 int 2314 static int
2296 detect_coding_sjis (src, src_end) 2315 detect_coding_sjis (src, src_end, multibytep)
2297 unsigned char *src, *src_end; 2316 unsigned char *src, *src_end;
2317 int multibytep;
2298 { 2318 {
2299 int c; 2319 int c;
2300 /* Dummy for ONE_MORE_BYTE. */ 2320 /* Dummy for ONE_MORE_BYTE. */
2301 struct coding_system dummy_coding; 2321 struct coding_system dummy_coding;
2302 struct coding_system *coding = &dummy_coding; 2322 struct coding_system *coding = &dummy_coding;
2303 2323
2304 while (1) 2324 while (1)
2305 { 2325 {
2306 ONE_MORE_BYTE (c); 2326 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2307 if (c >= 0x81) 2327 if (c >= 0x81)
2308 { 2328 {
2309 if (c <= 0x9F || (c >= 0xE0 && c <= 0xEF)) 2329 if (c <= 0x9F || (c >= 0xE0 && c <= 0xEF))
2310 { 2330 {
2311 ONE_MORE_BYTE (c); 2331 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2312 if (c < 0x40 || c == 0x7F || c > 0xFC) 2332 if (c < 0x40 || c == 0x7F || c > 0xFC)
2313 return 0; 2333 return 0;
2314 } 2334 }
2315 else if (c > 0xDF) 2335 else if (c > 0xDF)
2316 return 0; 2336 return 0;
2322 2342
2323 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 2343 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2324 Check if a text is encoded in BIG5. If it is, return 2344 Check if a text is encoded in BIG5. If it is, return
2325 CODING_CATEGORY_MASK_BIG5, else return 0. */ 2345 CODING_CATEGORY_MASK_BIG5, else return 0. */
2326 2346
2327 int 2347 static int
2328 detect_coding_big5 (src, src_end) 2348 detect_coding_big5 (src, src_end, multibytep)
2329 unsigned char *src, *src_end; 2349 unsigned char *src, *src_end;
2350 int multibytep;
2330 { 2351 {
2331 int c; 2352 int c;
2332 /* Dummy for ONE_MORE_BYTE. */ 2353 /* Dummy for ONE_MORE_BYTE. */
2333 struct coding_system dummy_coding; 2354 struct coding_system dummy_coding;
2334 struct coding_system *coding = &dummy_coding; 2355 struct coding_system *coding = &dummy_coding;
2335 2356
2336 while (1) 2357 while (1)
2337 { 2358 {
2338 ONE_MORE_BYTE (c); 2359 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2339 if (c >= 0xA1) 2360 if (c >= 0xA1)
2340 { 2361 {
2341 ONE_MORE_BYTE (c); 2362 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2342 if (c < 0x40 || (c >= 0x7F && c <= 0xA0)) 2363 if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2343 return 0; 2364 return 0;
2344 } 2365 }
2345 } 2366 }
2346 label_end_of_loop: 2367 label_end_of_loop:
2357 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0) 2378 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2358 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0) 2379 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2359 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8) 2380 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2360 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC) 2381 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2361 2382
2362 int 2383 static int
2363 detect_coding_utf_8 (src, src_end) 2384 detect_coding_utf_8 (src, src_end, multibytep)
2364 unsigned char *src, *src_end; 2385 unsigned char *src, *src_end;
2386 int multibytep;
2365 { 2387 {
2366 unsigned char c; 2388 unsigned char c;
2367 int seq_maybe_bytes; 2389 int seq_maybe_bytes;
2368 /* Dummy for ONE_MORE_BYTE. */ 2390 /* Dummy for ONE_MORE_BYTE. */
2369 struct coding_system dummy_coding; 2391 struct coding_system dummy_coding;
2370 struct coding_system *coding = &dummy_coding; 2392 struct coding_system *coding = &dummy_coding;
2371 2393
2372 while (1) 2394 while (1)
2373 { 2395 {
2374 ONE_MORE_BYTE (c); 2396 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2375 if (UTF_8_1_OCTET_P (c)) 2397 if (UTF_8_1_OCTET_P (c))
2376 continue; 2398 continue;
2377 else if (UTF_8_2_OCTET_LEADING_P (c)) 2399 else if (UTF_8_2_OCTET_LEADING_P (c))
2378 seq_maybe_bytes = 1; 2400 seq_maybe_bytes = 1;
2379 else if (UTF_8_3_OCTET_LEADING_P (c)) 2401 else if (UTF_8_3_OCTET_LEADING_P (c))
2387 else 2409 else
2388 return 0; 2410 return 0;
2389 2411
2390 do 2412 do
2391 { 2413 {
2392 ONE_MORE_BYTE (c); 2414 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2393 if (!UTF_8_EXTRA_OCTET_P (c)) 2415 if (!UTF_8_EXTRA_OCTET_P (c))
2394 return 0; 2416 return 0;
2395 seq_maybe_bytes--; 2417 seq_maybe_bytes--;
2396 } 2418 }
2397 while (seq_maybe_bytes > 0); 2419 while (seq_maybe_bytes > 0);
2415 (((val) & 0xD800) == 0xD800) 2437 (((val) & 0xD800) == 0xD800)
2416 2438
2417 #define UTF_16_LOW_SURROGATE_P(val) \ 2439 #define UTF_16_LOW_SURROGATE_P(val) \
2418 (((val) & 0xDC00) == 0xDC00) 2440 (((val) & 0xDC00) == 0xDC00)
2419 2441
2420 int 2442 static int
2421 detect_coding_utf_16 (src, src_end) 2443 detect_coding_utf_16 (src, src_end, multibytep)
2422 unsigned char *src, *src_end; 2444 unsigned char *src, *src_end;
2445 int multibytep;
2423 { 2446 {
2424 unsigned char c1, c2; 2447 unsigned char c1, c2;
2425 /* Dummy for TWO_MORE_BYTES. */ 2448 /* Dummy for TWO_MORE_BYTES. */
2426 struct coding_system dummy_coding; 2449 struct coding_system dummy_coding;
2427 struct coding_system *coding = &dummy_coding; 2450 struct coding_system *coding = &dummy_coding;
2428 2451
2429 TWO_MORE_BYTES (c1, c2); 2452 ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
2453 ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep);
2430 2454
2431 if ((c1 == 0xFF) && (c2 == 0xFE)) 2455 if ((c1 == 0xFF) && (c2 == 0xFE))
2432 return CODING_CATEGORY_MASK_UTF_16_LE; 2456 return CODING_CATEGORY_MASK_UTF_16_LE;
2433 else if ((c1 == 0xFE) && (c2 == 0xFF)) 2457 else if ((c1 == 0xFE) && (c2 == 0xFF))
2434 return CODING_CATEGORY_MASK_UTF_16_BE; 2458 return CODING_CATEGORY_MASK_UTF_16_BE;
2675 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 2699 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2676 Check if a text is encoded in a coding system of which 2700 Check if a text is encoded in a coding system of which
2677 encoder/decoder are written in CCL program. If it is, return 2701 encoder/decoder are written in CCL program. If it is, return
2678 CODING_CATEGORY_MASK_CCL, else return 0. */ 2702 CODING_CATEGORY_MASK_CCL, else return 0. */
2679 2703
2680 int 2704 static int
2681 detect_coding_ccl (src, src_end) 2705 detect_coding_ccl (src, src_end, multibytep)
2682 unsigned char *src, *src_end; 2706 unsigned char *src, *src_end;
2707 int multibytep;
2683 { 2708 {
2684 unsigned char *valid; 2709 unsigned char *valid;
2685 int c; 2710 int c;
2686 /* Dummy for ONE_MORE_BYTE. */ 2711 /* Dummy for ONE_MORE_BYTE. */
2687 struct coding_system dummy_coding; 2712 struct coding_system dummy_coding;
2692 return 0; 2717 return 0;
2693 2718
2694 valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes; 2719 valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
2695 while (1) 2720 while (1)
2696 { 2721 {
2697 ONE_MORE_BYTE (c); 2722 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2698 if (! valid[c]) 2723 if (! valid[c])
2699 return 0; 2724 return 0;
2700 } 2725 }
2701 label_end_of_loop: 2726 label_end_of_loop:
2702 return CODING_CATEGORY_MASK_CCL; 2727 return CODING_CATEGORY_MASK_CCL;
3482 If it detects possible coding systems, return an integer in which 3507 If it detects possible coding systems, return an integer in which
3483 appropriate flag bits are set. Flag bits are defined by macros 3508 appropriate flag bits are set. Flag bits are defined by macros
3484 CODING_CATEGORY_MASK_XXX in `coding.h'. If PRIORITIES is non-NULL, 3509 CODING_CATEGORY_MASK_XXX in `coding.h'. If PRIORITIES is non-NULL,
3485 it should point the table `coding_priorities'. In that case, only 3510 it should point the table `coding_priorities'. In that case, only
3486 the flag bit for a coding system of the highest priority is set in 3511 the flag bit for a coding system of the highest priority is set in
3487 the returned value. 3512 the returned value. If MULTIBYTEP is nonzero, 8-bit codes of the
3513 range 0x80..0x9F are in multibyte form.
3488 3514
3489 How many ASCII characters are at the head is returned as *SKIP. */ 3515 How many ASCII characters are at the head is returned as *SKIP. */
3490 3516
3491 static int 3517 static int
3492 detect_coding_mask (source, src_bytes, priorities, skip) 3518 detect_coding_mask (source, src_bytes, priorities, skip, multibytep)
3493 unsigned char *source; 3519 unsigned char *source;
3494 int src_bytes, *priorities, *skip; 3520 int src_bytes, *priorities, *skip;
3521 int multibytep;
3495 { 3522 {
3496 register unsigned char c; 3523 register unsigned char c;
3497 unsigned char *src = source, *src_end = source + src_bytes; 3524 unsigned char *src = source, *src_end = source + src_bytes;
3498 unsigned int mask, utf16_examined_p, iso2022_examined_p; 3525 unsigned int mask, utf16_examined_p, iso2022_examined_p;
3499 int i, idx; 3526 int i, idx;
3517 Now, try to find in which coding system the text is encoded. */ 3544 Now, try to find in which coding system the text is encoded. */
3518 if (c < 0x80) 3545 if (c < 0x80)
3519 { 3546 {
3520 /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */ 3547 /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3521 /* C is an ISO2022 specific control code of C0. */ 3548 /* C is an ISO2022 specific control code of C0. */
3522 mask = detect_coding_iso2022 (src, src_end); 3549 mask = detect_coding_iso2022 (src, src_end, multibytep);
3523 if (mask == 0) 3550 if (mask == 0)
3524 { 3551 {
3525 /* No valid ISO2022 code follows C. Try again. */ 3552 /* No valid ISO2022 code follows C. Try again. */
3526 src++; 3553 src++;
3527 if (c == ISO_CODE_ESC) 3554 if (c == ISO_CODE_ESC)
3541 } 3568 }
3542 } 3569 }
3543 else 3570 else
3544 { 3571 {
3545 int try; 3572 int try;
3573
3574 if (multibytep && c == LEADING_CODE_8_BIT_CONTROL)
3575 c = *src++ - 0x20;
3546 3576
3547 if (c < 0xA0) 3577 if (c < 0xA0)
3548 { 3578 {
3549 /* C is the first byte of SJIS character code, 3579 /* C is the first byte of SJIS character code,
3550 or a leading-code of Emacs' internal format (emacs-mule), 3580 or a leading-code of Emacs' internal format (emacs-mule),
3600 { 3630 {
3601 mask |= detect_coding_iso2022 (src, src_end); 3631 mask |= detect_coding_iso2022 (src, src_end);
3602 iso2022_examined_p = 1; 3632 iso2022_examined_p = 1;
3603 } 3633 }
3604 else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS) 3634 else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
3605 mask |= detect_coding_sjis (src, src_end); 3635 mask |= detect_coding_sjis (src, src_end, multibytep);
3606 else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8) 3636 else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
3607 mask |= detect_coding_utf_8 (src, src_end); 3637 mask |= detect_coding_utf_8 (src, src_end, multibytep);
3608 else if (!utf16_examined_p 3638 else if (!utf16_examined_p
3609 && (priorities[i] & try & 3639 && (priorities[i] & try &
3610 CODING_CATEGORY_MASK_UTF_16_BE_LE)) 3640 CODING_CATEGORY_MASK_UTF_16_BE_LE))
3611 { 3641 {
3612 mask |= detect_coding_utf_16 (src, src_end); 3642 mask |= detect_coding_utf_16 (src, src_end, multibytep);
3613 utf16_examined_p = 1; 3643 utf16_examined_p = 1;
3614 } 3644 }
3615 else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5) 3645 else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
3616 mask |= detect_coding_big5 (src, src_end); 3646 mask |= detect_coding_big5 (src, src_end, multibytep);
3617 else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE) 3647 else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
3618 mask |= detect_coding_emacs_mule (src, src_end); 3648 mask |= detect_coding_emacs_mule (src, src_end, multibytep);
3619 else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL) 3649 else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
3620 mask |= detect_coding_ccl (src, src_end); 3650 mask |= detect_coding_ccl (src, src_end, multibytep);
3621 else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT) 3651 else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
3622 mask |= CODING_CATEGORY_MASK_RAW_TEXT; 3652 mask |= CODING_CATEGORY_MASK_RAW_TEXT;
3623 else if (priorities[i] & CODING_CATEGORY_MASK_BINARY) 3653 else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
3624 mask |= CODING_CATEGORY_MASK_BINARY; 3654 mask |= CODING_CATEGORY_MASK_BINARY;
3625 if (mask & priorities[i]) 3655 if (mask & priorities[i])
3626 return priorities[i]; 3656 return priorities[i];
3627 } 3657 }
3628 return CODING_CATEGORY_MASK_RAW_TEXT; 3658 return CODING_CATEGORY_MASK_RAW_TEXT;
3629 } 3659 }
3630 if (try & CODING_CATEGORY_MASK_ISO) 3660 if (try & CODING_CATEGORY_MASK_ISO)
3631 mask |= detect_coding_iso2022 (src, src_end); 3661 mask |= detect_coding_iso2022 (src, src_end, multibytep);
3632 if (try & CODING_CATEGORY_MASK_SJIS) 3662 if (try & CODING_CATEGORY_MASK_SJIS)
3633 mask |= detect_coding_sjis (src, src_end); 3663 mask |= detect_coding_sjis (src, src_end, multibytep);
3634 if (try & CODING_CATEGORY_MASK_BIG5) 3664 if (try & CODING_CATEGORY_MASK_BIG5)
3635 mask |= detect_coding_big5 (src, src_end); 3665 mask |= detect_coding_big5 (src, src_end, multibytep);
3636 if (try & CODING_CATEGORY_MASK_UTF_8) 3666 if (try & CODING_CATEGORY_MASK_UTF_8)
3637 mask |= detect_coding_utf_8 (src, src_end); 3667 mask |= detect_coding_utf_8 (src, src_end, multibytep);
3638 if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE) 3668 if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
3639 mask |= detect_coding_utf_16 (src, src_end); 3669 mask |= detect_coding_utf_16 (src, src_end, multibytep);
3640 if (try & CODING_CATEGORY_MASK_EMACS_MULE) 3670 if (try & CODING_CATEGORY_MASK_EMACS_MULE)
3641 mask |= detect_coding_emacs_mule (src, src_end); 3671 mask |= detect_coding_emacs_mule (src, src_end, multibytep);
3642 if (try & CODING_CATEGORY_MASK_CCL) 3672 if (try & CODING_CATEGORY_MASK_CCL)
3643 mask |= detect_coding_ccl (src, src_end); 3673 mask |= detect_coding_ccl (src, src_end, multibytep);
3644 } 3674 }
3645 return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY); 3675 return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
3646 } 3676 }
3647 3677
3648 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded. 3678 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3657 unsigned int idx; 3687 unsigned int idx;
3658 int skip, mask, i; 3688 int skip, mask, i;
3659 Lisp_Object val; 3689 Lisp_Object val;
3660 3690
3661 val = Vcoding_category_list; 3691 val = Vcoding_category_list;
3662 mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip); 3692 mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip, 0);
3663 coding->heading_ascii = skip; 3693 coding->heading_ascii = skip;
3664 3694
3665 if (!mask) return; 3695 if (!mask) return;
3666 3696
3667 /* We found a single coding system of the highest priority in MASK. */ 3697 /* We found a single coding system of the highest priority in MASK. */
5605 while (1) 5635 while (1)
5606 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil)); 5636 Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
5607 } 5637 }
5608 5638
5609 Lisp_Object 5639 Lisp_Object
5610 detect_coding_system (src, src_bytes, highest) 5640 detect_coding_system (src, src_bytes, highest, multibytep)
5611 unsigned char *src; 5641 unsigned char *src;
5612 int src_bytes, highest; 5642 int src_bytes, highest;
5643 int multibytep;
5613 { 5644 {
5614 int coding_mask, eol_type; 5645 int coding_mask, eol_type;
5615 Lisp_Object val, tmp; 5646 Lisp_Object val, tmp;
5616 int dummy; 5647 int dummy;
5617 5648
5618 coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy); 5649 coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy, multibytep);
5619 eol_type = detect_eol_type (src, src_bytes, &dummy); 5650 eol_type = detect_eol_type (src, src_bytes, &dummy);
5620 if (eol_type == CODING_EOL_INCONSISTENT) 5651 if (eol_type == CODING_EOL_INCONSISTENT)
5621 eol_type = CODING_EOL_UNDECIDED; 5652 eol_type = CODING_EOL_UNDECIDED;
5622 5653
5623 if (!coding_mask) 5654 if (!coding_mask)
5696 if (from < GPT && to >= GPT) 5727 if (from < GPT && to >= GPT)
5697 move_gap_both (to, to_byte); 5728 move_gap_both (to, to_byte);
5698 5729
5699 return detect_coding_system (BYTE_POS_ADDR (from_byte), 5730 return detect_coding_system (BYTE_POS_ADDR (from_byte),
5700 to_byte - from_byte, 5731 to_byte - from_byte,
5701 !NILP (highest)); 5732 !NILP (highest),
5733 !NILP (current_buffer
5734 ->enable_multibyte_characters));
5702 } 5735 }
5703 5736
5704 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string, 5737 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
5705 1, 2, 0, 5738 1, 2, 0,
5706 "Detect coding system of the text in STRING.\n\ 5739 "Detect coding system of the text in STRING.\n\
5717 { 5750 {
5718 CHECK_STRING (string, 0); 5751 CHECK_STRING (string, 0);
5719 5752
5720 return detect_coding_system (XSTRING (string)->data, 5753 return detect_coding_system (XSTRING (string)->data,
5721 STRING_BYTES (XSTRING (string)), 5754 STRING_BYTES (XSTRING (string)),
5722 !NILP (highest)); 5755 !NILP (highest),
5756 STRING_MULTIBYTE (string));
5723 } 5757 }
5724 5758
5725 /* Return an intersection of lists L1 and L2. */ 5759 /* Return an intersection of lists L1 and L2. */
5726 5760
5727 static Lisp_Object 5761 static Lisp_Object