Mercurial > emacs
comparison src/coding.c @ 17835:f36ffb6f1208
Name change through the code:
coding-category-internal => coding-category-emacs-mule,
XXX_coding_internal => XXX_coding_emacs_mule,
coding_type_internal => coding_type_emacs_mule,
coding_type_automatic => coding_type_undecided,
CODING_CATEGORY_MASK_INTERNAL => CODING_CATEGORY_MASK_EMACS_MULE,
CODING_CATEGORY_IDX_INTERNAL => CODING_CATEGORY_IDX_EMACS_MULE,
CODING_EOL_AUTOMATIC => CODING_EOL_UNDECIDED.
| author | Kenichi Handa <handa@m17n.org> |
|---|---|
| date | Fri, 16 May 1997 00:43:29 +0000 |
| parents | 92f042f73be2 |
| children | a14261786239 |
comparison
equal
deleted
inserted
replaced
| 17834:e154b943bbba | 17835:f36ffb6f1208 |
|---|---|
| 21 Boston, MA 02111-1307, USA. */ | 21 Boston, MA 02111-1307, USA. */ |
| 22 | 22 |
| 23 /*** TABLE OF CONTENTS *** | 23 /*** TABLE OF CONTENTS *** |
| 24 | 24 |
| 25 1. Preamble | 25 1. Preamble |
| 26 2. Emacs' internal format handlers | 26 2. Emacs' internal format (emacs-mule) handlers |
| 27 3. ISO2022 handlers | 27 3. ISO2022 handlers |
| 28 4. Shift-JIS and BIG5 handlers | 28 4. Shift-JIS and BIG5 handlers |
| 29 5. End-of-line handlers | 29 5. End-of-line handlers |
| 30 6. C library functions | 30 6. C library functions |
| 31 7. Emacs Lisp library functions | 31 7. Emacs Lisp library functions |
| 36 /*** GENERAL NOTE on CODING SYSTEM *** | 36 /*** GENERAL NOTE on CODING SYSTEM *** |
| 37 | 37 |
| 38 Coding system is an encoding mechanism of one or more character | 38 Coding system is an encoding mechanism of one or more character |
| 39 sets. Here's a list of coding systems which Emacs can handle. When | 39 sets. Here's a list of coding systems which Emacs can handle. When |
| 40 we say "decode", it means converting some other coding system to | 40 we say "decode", it means converting some other coding system to |
| 41 Emacs' internal format, and when we say "encode", it means | 41 Emacs' internal format (emacs-internal), and when we say "encode", |
| 42 converting Emacs' internal format to some other coding system. | 42 it means converting the coding system emacs-mule to some other |
| 43 | 43 coding system. |
| 44 0. Emacs' internal format | 44 |
| 45 0. Emacs' internal format (emacs-mule) | |
| 45 | 46 |
| 46 Emacs itself holds a multi-lingual character in a buffer and a string | 47 Emacs itself holds a multi-lingual character in a buffer and a string |
| 47 in a special format. Details are described in the section 2. | 48 in a special format. Details are described in the section 2. |
| 48 | 49 |
| 49 1. ISO2022 | 50 1. ISO2022 |
| 104 which appropriate flag bits for the category XXX is set. The flag | 105 which appropriate flag bits for the category XXX is set. The flag |
| 105 bits are defined in macros CODING_CATEGORY_MASK_XXX. Below is the | 106 bits are defined in macros CODING_CATEGORY_MASK_XXX. Below is the |
| 106 template of these functions. */ | 107 template of these functions. */ |
| 107 #if 0 | 108 #if 0 |
| 108 int | 109 int |
| 109 detect_coding_internal (src, src_end) | 110 detect_coding_emacs_mule (src, src_end) |
| 110 unsigned char *src, *src_end; | 111 unsigned char *src, *src_end; |
| 111 { | 112 { |
| 112 ... | 113 ... |
| 113 } | 114 } |
| 114 #endif | 115 #endif |
| 115 | 116 |
| 116 /*** GENERAL NOTES on `decode_coding_XXX ()' functions *** | 117 /*** GENERAL NOTES on `decode_coding_XXX ()' functions *** |
| 117 | 118 |
| 118 These functions decode SRC_BYTES length text at SOURCE encoded in | 119 These functions decode SRC_BYTES length text at SOURCE encoded in |
| 119 CODING to Emacs' internal format. The resulting text goes to a | 120 CODING to Emacs' internal format (emacs-mule). The resulting text |
| 120 place pointed by DESTINATION, the length of which should not exceed | 121 goes to a place pointed by DESTINATION, the length of which should |
| 121 DST_BYTES. The bytes actually processed is returned as *CONSUMED. | 122 not exceed DST_BYTES. The bytes actually processed is returned as |
| 122 The return value is the length of the decoded text. Below is a | 123 *CONSUMED. The return value is the length of the decoded text. |
| 123 template of these functions. */ | 124 Below is a template of these functions. */ |
| 124 #if 0 | 125 #if 0 |
| 125 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed) | 126 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed) |
| 126 struct coding_system *coding; | 127 struct coding_system *coding; |
| 127 unsigned char *source, *destination; | 128 unsigned char *source, *destination; |
| 128 int src_bytes, dst_bytes; | 129 int src_bytes, dst_bytes; |
| 132 } | 133 } |
| 133 #endif | 134 #endif |
| 134 | 135 |
| 135 /*** GENERAL NOTES on `encode_coding_XXX ()' functions *** | 136 /*** GENERAL NOTES on `encode_coding_XXX ()' functions *** |
| 136 | 137 |
| 137 These functions encode SRC_BYTES length text at SOURCE of Emacs | 138 These functions encode SRC_BYTES length text at SOURCE of Emacs' |
| 138 internal format to CODING. The resulting text goes to a place | 139 internal format (emacs-mule) to CODING. The resulting text goes to |
| 139 pointed by DESTINATION, the length of which should not exceed | 140 a place pointed by DESTINATION, the length of which should not |
| 140 DST_BYTES. The bytes actually processed is returned as *CONSUMED. | 141 exceed DST_BYTES. The bytes actually processed is returned as |
| 141 The return value is the length of the encoded text. Below is a | 142 *CONSUMED. The return value is the length of the encoded text. |
| 142 template of these functions. */ | 143 Below is a template of these functions. */ |
| 143 #if 0 | 144 #if 0 |
| 144 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed) | 145 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed) |
| 145 struct coding_system *coding; | 146 struct coding_system *coding; |
| 146 unsigned char *source, *destination; | 147 unsigned char *source, *destination; |
| 147 int src_bytes, dst_bytes; | 148 int src_bytes, dst_bytes; |
| 291 /* Table of coding-systems currently assigned to each coding-category. */ | 292 /* Table of coding-systems currently assigned to each coding-category. */ |
| 292 Lisp_Object coding_category_table[CODING_CATEGORY_IDX_MAX]; | 293 Lisp_Object coding_category_table[CODING_CATEGORY_IDX_MAX]; |
| 293 | 294 |
| 294 /* Table of names of symbol for each coding-category. */ | 295 /* Table of names of symbol for each coding-category. */ |
| 295 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = { | 296 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = { |
| 296 "coding-category-internal", | 297 "coding-category-emacs-mule", |
| 297 "coding-category-sjis", | 298 "coding-category-sjis", |
| 298 "coding-category-iso-7", | 299 "coding-category-iso-7", |
| 299 "coding-category-iso-8-1", | 300 "coding-category-iso-8-1", |
| 300 "coding-category-iso-8-2", | 301 "coding-category-iso-8-2", |
| 301 "coding-category-iso-else", | 302 "coding-category-iso-else", |
| 315 | 316 |
| 316 /* Alist of charsets vs revision number. */ | 317 /* Alist of charsets vs revision number. */ |
| 317 Lisp_Object Vcharset_revision_alist; | 318 Lisp_Object Vcharset_revision_alist; |
| 318 | 319 |
| 319 | 320 |
| 320 /*** 2. Emacs internal format handlers ***/ | 321 /*** 2. Emacs internal format (emacs-mule) handlers ***/ |
| 321 | 322 |
| 322 /* Emacs' internal format for encoding multiple character sets is a | 323 /* Emacs' internal format for encoding multiple character sets is a |
| 323 kind of multi-byte encoding, i.e. encoding a character by a sequence | 324 kind of multi-byte encoding, i.e. encoding a character by a sequence |
| 324 of one-byte codes of variable length. ASCII characters and control | 325 of one-byte codes of variable length. ASCII characters and control |
| 325 characters (e.g. `tab', `newline') are represented by one-byte as | 326 characters (e.g. `tab', `newline') are represented by one-byte as |
| 362 return 0; \ | 363 return 0; \ |
| 363 } while (0) | 364 } while (0) |
| 364 | 365 |
| 365 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 366 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
| 366 Check if a text is encoded in Emacs' internal format. If it is, | 367 Check if a text is encoded in Emacs' internal format. If it is, |
| 367 return CODING_CATEGORY_MASK_INTERNAL, else return 0. */ | 368 return CODING_CATEGORY_MASK_EMASC_MULE, else return 0. */ |
| 368 | 369 |
| 369 int | 370 int |
| 370 detect_coding_internal (src, src_end) | 371 detect_coding_emacs_mule (src, src_end) |
| 371 unsigned char *src, *src_end; | 372 unsigned char *src, *src_end; |
| 372 { | 373 { |
| 373 unsigned char c; | 374 unsigned char c; |
| 374 int composing = 0; | 375 int composing = 0; |
| 375 | 376 |
| 421 default: | 422 default: |
| 422 label_end_of_switch: | 423 label_end_of_switch: |
| 423 break; | 424 break; |
| 424 } | 425 } |
| 425 } | 426 } |
| 426 return CODING_CATEGORY_MASK_INTERNAL; | 427 return CODING_CATEGORY_MASK_EMACS_MULE; |
| 427 } | 428 } |
| 428 | 429 |
| 429 | 430 |
| 430 /*** 3. ISO2022 handlers ***/ | 431 /*** 3. ISO2022 handlers ***/ |
| 431 | 432 |
| 1455 if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL) | 1456 if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL) |
| 1456 bcopy (coding->spec.iso2022.initial_designation, | 1457 bcopy (coding->spec.iso2022.initial_designation, |
| 1457 coding->spec.iso2022.current_designation, | 1458 coding->spec.iso2022.current_designation, |
| 1458 sizeof coding->spec.iso2022.initial_designation); | 1459 sizeof coding->spec.iso2022.initial_designation); |
| 1459 if (coding->eol_type == CODING_EOL_LF | 1460 if (coding->eol_type == CODING_EOL_LF |
| 1460 || coding->eol_type == CODING_EOL_AUTOMATIC) | 1461 || coding->eol_type == CODING_EOL_UNDECIDED) |
| 1461 *dst++ = ISO_CODE_LF; | 1462 *dst++ = ISO_CODE_LF; |
| 1462 else if (coding->eol_type == CODING_EOL_CRLF) | 1463 else if (coding->eol_type == CODING_EOL_CRLF) |
| 1463 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF; | 1464 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF; |
| 1464 else | 1465 else |
| 1465 *dst++ = ISO_CODE_CR; | 1466 *dst++ = ISO_CODE_CR; |
| 1812 } | 1813 } |
| 1813 /* fall down to treat '\r' as '\n' ... */ | 1814 /* fall down to treat '\r' as '\n' ... */ |
| 1814 | 1815 |
| 1815 case EMACS_linefeed_code: | 1816 case EMACS_linefeed_code: |
| 1816 if (coding->eol_type == CODING_EOL_LF | 1817 if (coding->eol_type == CODING_EOL_LF |
| 1817 || coding->eol_type == CODING_EOL_AUTOMATIC) | 1818 || coding->eol_type == CODING_EOL_UNDECIDED) |
| 1818 *dst++ = '\n'; | 1819 *dst++ = '\n'; |
| 1819 else if (coding->eol_type == CODING_EOL_CRLF) | 1820 else if (coding->eol_type == CODING_EOL_CRLF) |
| 1820 *dst++ = '\r', *dst++ = '\n'; | 1821 *dst++ = '\r', *dst++ = '\n'; |
| 1821 else | 1822 else |
| 1822 *dst++ = '\r'; | 1823 *dst++ = '\r'; |
| 1968 return 0; | 1969 return 0; |
| 1969 | 1970 |
| 1970 switch (coding->eol_type) | 1971 switch (coding->eol_type) |
| 1971 { | 1972 { |
| 1972 case CODING_EOL_LF: | 1973 case CODING_EOL_LF: |
| 1973 case CODING_EOL_AUTOMATIC: | 1974 case CODING_EOL_UNDECIDED: |
| 1974 produced = (src_bytes > dst_bytes) ? dst_bytes : src_bytes; | 1975 produced = (src_bytes > dst_bytes) ? dst_bytes : src_bytes; |
| 1975 bcopy (source, destination, produced); | 1976 bcopy (source, destination, produced); |
| 1976 if (coding->selective) | 1977 if (coding->selective) |
| 1977 { | 1978 { |
| 1978 int i = produced; | 1979 int i = produced; |
| 2034 from that symbol. | 2035 from that symbol. |
| 2035 | 2036 |
| 2036 `element[0]' contains information to be set in `coding->type'. The | 2037 `element[0]' contains information to be set in `coding->type'. The |
| 2037 value and its meaning is as follows: | 2038 value and its meaning is as follows: |
| 2038 | 2039 |
| 2039 0 -- coding_system_internal | 2040 0 -- coding_type_emacs_mule |
| 2040 1 -- coding_system_sjis | 2041 1 -- coding_type_sjis |
| 2041 2 -- coding_system_iso2022 | 2042 2 -- coding_type_iso2022 |
| 2042 3 -- coding_system_big5 | 2043 3 -- coding_type_big5 |
| 2043 4 -- coding_system_ccl | 2044 4 -- coding_type_ccl encoder/decoder written in CCL |
| 2044 nil -- coding_system_no_conversion | 2045 nil -- coding_type_no_conversion |
| 2045 t -- coding_system_automatic | 2046 t -- coding_type_undecided (automatic conversion on decoding, |
| 2047 no-conversion on encoding) | |
| 2046 | 2048 |
| 2047 `element[4]' contains information to be set in `coding->flags' and | 2049 `element[4]' contains information to be set in `coding->flags' and |
| 2048 `coding->spec'. The meaning varies by `coding->type'. | 2050 `coding->spec'. The meaning varies by `coding->type'. |
| 2049 | 2051 |
| 2050 If `coding->type' is `coding_type_iso2022', element[4] is a vector | 2052 If `coding->type' is `coding_type_iso2022', element[4] is a vector |
| 2125 if (!VECTORP (coding_system) | 2127 if (!VECTORP (coding_system) |
| 2126 || XVECTOR (coding_system)->size != 5) | 2128 || XVECTOR (coding_system)->size != 5) |
| 2127 goto label_invalid_coding_system; | 2129 goto label_invalid_coding_system; |
| 2128 | 2130 |
| 2129 if (VECTORP (eol_type)) | 2131 if (VECTORP (eol_type)) |
| 2130 coding->eol_type = CODING_EOL_AUTOMATIC; | 2132 coding->eol_type = CODING_EOL_UNDECIDED; |
| 2131 else if (XFASTINT (eol_type) == 1) | 2133 else if (XFASTINT (eol_type) == 1) |
| 2132 coding->eol_type = CODING_EOL_CRLF; | 2134 coding->eol_type = CODING_EOL_CRLF; |
| 2133 else if (XFASTINT (eol_type) == 2) | 2135 else if (XFASTINT (eol_type) == 2) |
| 2134 coding->eol_type = CODING_EOL_CR; | 2136 coding->eol_type = CODING_EOL_CR; |
| 2135 else | 2137 else |
| 2137 | 2139 |
| 2138 type = XVECTOR (coding_system)->contents[0]; | 2140 type = XVECTOR (coding_system)->contents[0]; |
| 2139 switch (XFASTINT (type)) | 2141 switch (XFASTINT (type)) |
| 2140 { | 2142 { |
| 2141 case 0: | 2143 case 0: |
| 2142 coding->type = coding_type_internal; | 2144 coding->type = coding_type_emacs_mule; |
| 2143 break; | 2145 break; |
| 2144 | 2146 |
| 2145 case 1: | 2147 case 1: |
| 2146 coding->type = coding_type_sjis; | 2148 coding->type = coding_type_sjis; |
| 2147 break; | 2149 break; |
| 2307 coding->require_flushing = 1; | 2309 coding->require_flushing = 1; |
| 2308 break; | 2310 break; |
| 2309 | 2311 |
| 2310 default: | 2312 default: |
| 2311 if (EQ (type, Qt)) | 2313 if (EQ (type, Qt)) |
| 2312 coding->type = coding_type_automatic; | 2314 coding->type = coding_type_undecided; |
| 2313 else | 2315 else |
| 2314 coding->type = coding_type_no_conversion; | 2316 coding->type = coding_type_no_conversion; |
| 2315 break; | 2317 break; |
| 2316 } | 2318 } |
| 2317 return 0; | 2319 return 0; |
| 2328 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But, | 2330 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But, |
| 2329 it's impossible to distinguish some coding systems accurately | 2331 it's impossible to distinguish some coding systems accurately |
| 2330 because they use the same range of codes. So, at first, coding | 2332 because they use the same range of codes. So, at first, coding |
| 2331 systems are categorized into 7, those are: | 2333 systems are categorized into 7, those are: |
| 2332 | 2334 |
| 2333 o coding-category-internal | 2335 o coding-category-emacs-mule |
| 2334 | 2336 |
| 2335 The category for a coding system which has the same code range | 2337 The category for a coding system which has the same code range |
| 2336 as Emacs' internal format. Assigned the coding-system (Lisp | 2338 as Emacs' internal format. Assigned the coding-system (Lisp |
| 2337 symbol) `internal' by default. | 2339 symbol) `emacs-mule' by default. |
| 2338 | 2340 |
| 2339 o coding-category-sjis | 2341 o coding-category-sjis |
| 2340 | 2342 |
| 2341 The category for a coding system which has the same code range | 2343 The category for a coding system which has the same code range |
| 2342 as SJIS. Assigned the coding-system (Lisp | 2344 as SJIS. Assigned the coding-system (Lisp |
| 2437 /* C is an ISO2022 specific control code of C1, | 2439 /* C is an ISO2022 specific control code of C1, |
| 2438 or the first byte of SJIS's 2-byte character code, | 2440 or the first byte of SJIS's 2-byte character code, |
| 2439 or a leading code of Emacs. */ | 2441 or a leading code of Emacs. */ |
| 2440 mask = (detect_coding_iso2022 (src, src_end) | 2442 mask = (detect_coding_iso2022 (src, src_end) |
| 2441 | detect_coding_sjis (src, src_end) | 2443 | detect_coding_sjis (src, src_end) |
| 2442 | detect_coding_internal (src, src_end)); | 2444 | detect_coding_emacs_mule (src, src_end)); |
| 2443 | 2445 |
| 2444 else if (c < 0xA0) | 2446 else if (c < 0xA0) |
| 2445 /* C is the first byte of SJIS character code, | 2447 /* C is the first byte of SJIS character code, |
| 2446 or a leading-code of Emacs. */ | 2448 or a leading-code of Emacs. */ |
| 2447 mask = (detect_coding_sjis (src, src_end) | 2449 mask = (detect_coding_sjis (src, src_end) |
| 2448 | detect_coding_internal (src, src_end)); | 2450 | detect_coding_emacs_mule (src, src_end)); |
| 2449 | 2451 |
| 2450 else | 2452 else |
| 2451 /* C is a character of ISO2022 in graphic plane right, | 2453 /* C is a character of ISO2022 in graphic plane right, |
| 2452 or a SJIS's 1-byte character code (i.e. JISX0201), | 2454 or a SJIS's 1-byte character code (i.e. JISX0201), |
| 2453 or the first byte of BIG5's 2-byte code. */ | 2455 or the first byte of BIG5's 2-byte code. */ |
| 2509 setup_coding_system (XSYMBOL (coding_category_table[idx])->value, coding); | 2511 setup_coding_system (XSYMBOL (coding_category_table[idx])->value, coding); |
| 2510 } | 2512 } |
| 2511 | 2513 |
| 2512 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC | 2514 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC |
| 2513 is encoded. Return one of CODING_EOL_LF, CODING_EOL_CRLF, | 2515 is encoded. Return one of CODING_EOL_LF, CODING_EOL_CRLF, |
| 2514 CODING_EOL_CR, and CODING_EOL_AUTOMATIC. */ | 2516 CODING_EOL_CR, and CODING_EOL_UNDECIDED. */ |
| 2515 | 2517 |
| 2516 int | 2518 int |
| 2517 detect_eol_type (src, src_bytes) | 2519 detect_eol_type (src, src_bytes) |
| 2518 unsigned char *src; | 2520 unsigned char *src; |
| 2519 int src_bytes; | 2521 int src_bytes; |
| 2532 return CODING_EOL_CRLF; | 2534 return CODING_EOL_CRLF; |
| 2533 else | 2535 else |
| 2534 return CODING_EOL_CR; | 2536 return CODING_EOL_CR; |
| 2535 } | 2537 } |
| 2536 } | 2538 } |
| 2537 return CODING_EOL_AUTOMATIC; | 2539 return CODING_EOL_UNDECIDED; |
| 2538 } | 2540 } |
| 2539 | 2541 |
| 2540 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC | 2542 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC |
| 2541 is encoded. If it detects an appropriate format of end-of-line, it | 2543 is encoded. If it detects an appropriate format of end-of-line, it |
| 2542 sets the information in *CODING. */ | 2544 sets the information in *CODING. */ |
| 2548 int src_bytes; | 2550 int src_bytes; |
| 2549 { | 2551 { |
| 2550 Lisp_Object val; | 2552 Lisp_Object val; |
| 2551 int eol_type = detect_eol_type (src, src_bytes); | 2553 int eol_type = detect_eol_type (src, src_bytes); |
| 2552 | 2554 |
| 2553 if (eol_type == CODING_EOL_AUTOMATIC) | 2555 if (eol_type == CODING_EOL_UNDECIDED) |
| 2554 /* We found no end-of-line in the source text. */ | 2556 /* We found no end-of-line in the source text. */ |
| 2555 return; | 2557 return; |
| 2556 | 2558 |
| 2557 val = Fget (coding->symbol, Qeol_type); | 2559 val = Fget (coding->symbol, Qeol_type); |
| 2558 if (VECTORP (val) && XVECTOR (val)->size == 3) | 2560 if (VECTORP (val) && XVECTOR (val)->size == 3) |
| 2576 { | 2578 { |
| 2577 *consumed = 0; | 2579 *consumed = 0; |
| 2578 return 0; | 2580 return 0; |
| 2579 } | 2581 } |
| 2580 | 2582 |
| 2581 if (coding->type == coding_type_automatic) | 2583 if (coding->type == coding_type_undecided) |
| 2582 detect_coding (coding, source, src_bytes); | 2584 detect_coding (coding, source, src_bytes); |
| 2583 | 2585 |
| 2584 if (coding->eol_type == CODING_EOL_AUTOMATIC) | 2586 if (coding->eol_type == CODING_EOL_UNDECIDED) |
| 2585 detect_eol (coding, source, src_bytes); | 2587 detect_eol (coding, source, src_bytes); |
| 2586 | 2588 |
| 2587 coding->carryover_size = 0; | 2589 coding->carryover_size = 0; |
| 2588 switch (coding->type) | 2590 switch (coding->type) |
| 2589 { | 2591 { |
| 2592 produced = (src_bytes > dst_bytes) ? dst_bytes : src_bytes; | 2594 produced = (src_bytes > dst_bytes) ? dst_bytes : src_bytes; |
| 2593 bcopy (source, destination, produced); | 2595 bcopy (source, destination, produced); |
| 2594 *consumed = produced; | 2596 *consumed = produced; |
| 2595 break; | 2597 break; |
| 2596 | 2598 |
| 2597 case coding_type_internal: | 2599 case coding_type_emacs_mule: |
| 2598 case coding_type_automatic: | 2600 case coding_type_undecided: |
| 2599 if (coding->eol_type == CODING_EOL_LF | 2601 if (coding->eol_type == CODING_EOL_LF |
| 2600 || coding->eol_type == CODING_EOL_AUTOMATIC) | 2602 || coding->eol_type == CODING_EOL_UNDECIDED) |
| 2601 goto label_no_conversion; | 2603 goto label_no_conversion; |
| 2602 produced = decode_eol (coding, source, destination, | 2604 produced = decode_eol (coding, source, destination, |
| 2603 src_bytes, dst_bytes, consumed); | 2605 src_bytes, dst_bytes, consumed); |
| 2604 break; | 2606 break; |
| 2605 | 2607 |
| 2657 } | 2659 } |
| 2658 } | 2660 } |
| 2659 *consumed = produced; | 2661 *consumed = produced; |
| 2660 break; | 2662 break; |
| 2661 | 2663 |
| 2662 case coding_type_internal: | 2664 case coding_type_emacs_mule: |
| 2663 case coding_type_automatic: | 2665 case coding_type_undecided: |
| 2664 if (coding->eol_type == CODING_EOL_LF | 2666 if (coding->eol_type == CODING_EOL_LF |
| 2665 || coding->eol_type == CODING_EOL_AUTOMATIC) | 2667 || coding->eol_type == CODING_EOL_UNDECIDED) |
| 2666 goto label_no_conversion; | 2668 goto label_no_conversion; |
| 2667 produced = encode_eol (coding, source, destination, | 2669 produced = encode_eol (coding, source, destination, |
| 2668 src_bytes, dst_bytes, consumed); | 2670 src_bytes, dst_bytes, consumed); |
| 2669 break; | 2671 break; |
| 2670 | 2672 |
| 2833 | 2835 |
| 2834 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region, | 2836 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region, |
| 2835 2, 2, 0, | 2837 2, 2, 0, |
| 2836 "Detect coding-system of the text in the region between START and END.\n\ | 2838 "Detect coding-system of the text in the region between START and END.\n\ |
| 2837 Return a list of possible coding-systems ordered by priority.\n\ | 2839 Return a list of possible coding-systems ordered by priority.\n\ |
| 2838 If only ASCII characters are found, it returns `automatic-conversion'\n\ | 2840 If only ASCII characters are found, it returns `undecided'\n\ |
| 2839 or its subsidiary coding-system according to a detected end-of-line format.") | 2841 or its subsidiary coding-system according to a detected end-of-line format.") |
| 2840 (b, e) | 2842 (b, e) |
| 2841 Lisp_Object b, e; | 2843 Lisp_Object b, e; |
| 2842 { | 2844 { |
| 2843 int coding_mask, eol_type; | 2845 int coding_mask, eol_type; |
| 2851 coding_mask = detect_coding_mask (POS_ADDR (beg), end - beg); | 2853 coding_mask = detect_coding_mask (POS_ADDR (beg), end - beg); |
| 2852 eol_type = detect_eol_type (POS_ADDR (beg), end - beg); | 2854 eol_type = detect_eol_type (POS_ADDR (beg), end - beg); |
| 2853 | 2855 |
| 2854 if (coding_mask == CODING_CATEGORY_MASK_ANY) | 2856 if (coding_mask == CODING_CATEGORY_MASK_ANY) |
| 2855 { | 2857 { |
| 2856 val = intern ("automatic-conversion"); | 2858 val = intern ("undecided"); |
| 2857 if (eol_type != CODING_EOL_AUTOMATIC) | 2859 if (eol_type != CODING_EOL_UNDECIDED) |
| 2858 { | 2860 { |
| 2859 Lisp_Object val2 = Fget (val, Qeol_type); | 2861 Lisp_Object val2 = Fget (val, Qeol_type); |
| 2860 if (VECTORP (val2)) | 2862 if (VECTORP (val2)) |
| 2861 val = XVECTOR (val2)->contents[eol_type]; | 2863 val = XVECTOR (val2)->contents[eol_type]; |
| 2862 } | 2864 } |
| 2882 coding-systems. */ | 2884 coding-systems. */ |
| 2883 val2 = val; | 2885 val2 = val; |
| 2884 val = Qnil; | 2886 val = Qnil; |
| 2885 for (; !NILP (val2); val2 = XCONS (val2)->cdr) | 2887 for (; !NILP (val2); val2 = XCONS (val2)->cdr) |
| 2886 { | 2888 { |
| 2887 if (eol_type == CODING_EOL_AUTOMATIC) | 2889 if (eol_type == CODING_EOL_UNDECIDED) |
| 2888 val = Fcons (XCONS (val2)->car, val); | 2890 val = Fcons (XCONS (val2)->car, val); |
| 2889 else | 2891 else |
| 2890 { | 2892 { |
| 2891 Lisp_Object val3 = Fget (XCONS (val2)->car, Qeol_type); | 2893 Lisp_Object val3 = Fget (XCONS (val2)->car, Qeol_type); |
| 2892 if (VECTORP (val3)) | 2894 if (VECTORP (val3)) |
| 2912 int encodep; | 2914 int encodep; |
| 2913 { | 2915 { |
| 2914 register unsigned char *beg_addr = *begp, *end_addr = *endp; | 2916 register unsigned char *beg_addr = *begp, *end_addr = *endp; |
| 2915 | 2917 |
| 2916 if (coding->eol_type != CODING_EOL_LF | 2918 if (coding->eol_type != CODING_EOL_LF |
| 2917 && coding->eol_type != CODING_EOL_AUTOMATIC) | 2919 && coding->eol_type != CODING_EOL_UNDECIDED) |
| 2918 /* Since we anyway have to convert end-of-line format, it is not | 2920 /* Since we anyway have to convert end-of-line format, it is not |
| 2919 worth skipping at most 100 bytes or so. */ | 2921 worth skipping at most 100 bytes or so. */ |
| 2920 return; | 2922 return; |
| 2921 | 2923 |
| 2922 if (encodep) /* for encoding */ | 2924 if (encodep) /* for encoding */ |
| 2923 { | 2925 { |
| 2924 switch (coding->type) | 2926 switch (coding->type) |
| 2925 { | 2927 { |
| 2926 case coding_type_no_conversion: | 2928 case coding_type_no_conversion: |
| 2927 case coding_type_internal: | 2929 case coding_type_emacs_mule: |
| 2928 case coding_type_automatic: | 2930 case coding_type_undecided: |
| 2929 /* We need no conversion. */ | 2931 /* We need no conversion. */ |
| 2930 *begp = *endp; | 2932 *begp = *endp; |
| 2931 return; | 2933 return; |
| 2932 case coding_type_ccl: | 2934 case coding_type_ccl: |
| 2933 /* We can't skip any data. */ | 2935 /* We can't skip any data. */ |
| 2960 { | 2962 { |
| 2961 case coding_type_no_conversion: | 2963 case coding_type_no_conversion: |
| 2962 /* We need no conversion. */ | 2964 /* We need no conversion. */ |
| 2963 *begp = *endp; | 2965 *begp = *endp; |
| 2964 return; | 2966 return; |
| 2965 case coding_type_internal: | 2967 case coding_type_emacs_mule: |
| 2966 if (coding->eol_type == CODING_EOL_LF) | 2968 if (coding->eol_type == CODING_EOL_LF) |
| 2967 { | 2969 { |
| 2968 /* We need no conversion. */ | 2970 /* We need no conversion. */ |
| 2969 *begp = *endp; | 2971 *begp = *endp; |
| 2970 return; | 2972 return; |
| 3459 | 3461 |
| 3460 init_coding_once () | 3462 init_coding_once () |
| 3461 { | 3463 { |
| 3462 int i; | 3464 int i; |
| 3463 | 3465 |
| 3464 /* Emacs internal format specific initialize routine. */ | 3466 /* Emacs' internal format specific initialize routine. */ |
| 3465 for (i = 0; i <= 0x20; i++) | 3467 for (i = 0; i <= 0x20; i++) |
| 3466 emacs_code_class[i] = EMACS_control_code; | 3468 emacs_code_class[i] = EMACS_control_code; |
| 3467 emacs_code_class[0x0A] = EMACS_linefeed_code; | 3469 emacs_code_class[0x0A] = EMACS_linefeed_code; |
| 3468 emacs_code_class[0x0D] = EMACS_carriage_return_code; | 3470 emacs_code_class[0x0D] = EMACS_carriage_return_code; |
| 3469 for (i = 0x21 ; i < 0x7F; i++) | 3471 for (i = 0x21 ; i < 0x7F; i++) |
