Mercurial > emacs
comparison src/coding.c @ 36087:05ef3feab39b
Doc and message fixes.
| author | Dave Love <fx@gnu.org> |
|---|---|
| date | Wed, 14 Feb 2001 23:50:05 +0000 |
| parents | b511edc4a348 |
| children | 27b5c760df31 |
comparison
equal
deleted
inserted
replaced
| 36086:60d14f3aae0a | 36087:05ef3feab39b |
|---|---|
| 257 c = translate_char (translation_table, c, -1, 0, 0); \ | 257 c = translate_char (translation_table, c, -1, 0, 0); \ |
| 258 src += bytes; \ | 258 src += bytes; \ |
| 259 } while (0) | 259 } while (0) |
| 260 | 260 |
| 261 | 261 |
| 262 /* Produce a multibyte form of characater C to `dst'. Jump to | 262 /* Produce a multibyte form of character C to `dst'. Jump to |
| 263 `label_end_of_loop' if there's not enough space at `dst'. | 263 `label_end_of_loop' if there's not enough space at `dst'. |
| 264 | 264 |
| 265 If we are now in the middle of a composition sequence, the decoded | 265 If we are now in the middle of a composition sequence, the decoded |
| 266 character may be ALTCHAR (for the current composition). In that | 266 character may be ALTCHAR (for the current composition). In that |
| 267 case, the character goes to coding->cmp_data->data instead of | 267 case, the character goes to coding->cmp_data->data instead of |
| 459 /* Table of pointers to coding systems corresponding to each coding | 459 /* Table of pointers to coding systems corresponding to each coding |
| 460 categories. */ | 460 categories. */ |
| 461 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX]; | 461 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX]; |
| 462 | 462 |
| 463 /* Table of coding category masks. Nth element is a mask for a coding | 463 /* Table of coding category masks. Nth element is a mask for a coding |
| 464 cateogry of which priority is Nth. */ | 464 category of which priority is Nth. */ |
| 465 static | 465 static |
| 466 int coding_priorities[CODING_CATEGORY_IDX_MAX]; | 466 int coding_priorities[CODING_CATEGORY_IDX_MAX]; |
| 467 | 467 |
| 468 /* Flag to tell if we look up translation table on character code | 468 /* Flag to tell if we look up translation table on character code |
| 469 conversion. */ | 469 conversion. */ |
| 567 BYTES is 0x20 plus a byte length of this composition data, | 567 BYTES is 0x20 plus a byte length of this composition data, |
| 568 | 568 |
| 569 CHARS is 0x20 plus a number of characters composed by this | 569 CHARS is 0x20 plus a number of characters composed by this |
| 570 data, | 570 data, |
| 571 | 571 |
| 572 COMPONENTs are characters of multibye form or composition | 572 COMPONENTs are characters of multibyte form or composition |
| 573 rules encoded by two-byte of ASCII codes. | 573 rules encoded by two-byte of ASCII codes. |
| 574 | 574 |
| 575 In addition, for backward compatibility, the following formats are | 575 In addition, for backward compatibility, the following formats are |
| 576 also recognized as composition data on decoding. | 576 also recognized as composition data on decoding. |
| 577 | 577 |
| 675 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \ | 675 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \ |
| 676 (coding->cmp_data->data[coding->cmp_data->used++] = component) | 676 (coding->cmp_data->data[coding->cmp_data->used++] = component) |
| 677 | 677 |
| 678 | 678 |
| 679 /* Get one byte from a data pointed by SRC and increment SRC. If SRC | 679 /* Get one byte from a data pointed by SRC and increment SRC. If SRC |
| 680 is not less than SRC_END, return -1 without inccrementing Src. */ | 680 is not less than SRC_END, return -1 without incrementing Src. */ |
| 681 | 681 |
| 682 #define SAFE_ONE_MORE_BYTE() (src >= src_end ? -1 : *src++) | 682 #define SAFE_ONE_MORE_BYTE() (src >= src_end ? -1 : *src++) |
| 683 | 683 |
| 684 | 684 |
| 685 /* Decode a character represented as a component of composition | 685 /* Decode a character represented as a component of composition |
| 772 int dst_bytes; | 772 int dst_bytes; |
| 773 { | 773 { |
| 774 unsigned char *dst = *destination; | 774 unsigned char *dst = *destination; |
| 775 int method, data_len, nchars; | 775 int method, data_len, nchars; |
| 776 unsigned char *src_base = src++; | 776 unsigned char *src_base = src++; |
| 777 /* Store compoments of composition. */ | 777 /* Store components of composition. */ |
| 778 int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH]; | 778 int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH]; |
| 779 int ncomponent; | 779 int ncomponent; |
| 780 /* Store multibyte form of characters to be composed. This is for | 780 /* Store multibyte form of characters to be composed. This is for |
| 781 Emacs 20 style composition sequence. */ | 781 Emacs 20 style composition sequence. */ |
| 782 unsigned char buf[MAX_COMPOSITION_COMPONENTS * MAX_MULTIBYTE_LENGTH]; | 782 unsigned char buf[MAX_COMPOSITION_COMPONENTS * MAX_MULTIBYTE_LENGTH]; |
| 1135 ISO2022 provides many mechanisms to encode several character sets | 1135 ISO2022 provides many mechanisms to encode several character sets |
| 1136 in 7-bit and 8-bit environments. For 7-bit environments, all text | 1136 in 7-bit and 8-bit environments. For 7-bit environments, all text |
| 1137 is encoded using bytes less than 128. This may make the encoded | 1137 is encoded using bytes less than 128. This may make the encoded |
| 1138 text a little bit longer, but the text passes more easily through | 1138 text a little bit longer, but the text passes more easily through |
| 1139 several types of gateway, some of which strip off the MSB (Most | 1139 several types of gateway, some of which strip off the MSB (Most |
| 1140 Signigant Bit). | 1140 Significant Bit). |
| 1141 | 1141 |
| 1142 There are two kinds of character sets: control character sets and | 1142 There are two kinds of character sets: control character sets and |
| 1143 graphic character sets. The former contain control characters such | 1143 graphic character sets. The former contain control characters such |
| 1144 as `newline' and `escape' to provide control functions (control | 1144 as `newline' and `escape' to provide control functions (control |
| 1145 functions are also provided by escape sequences). The latter | 1145 functions are also provided by escape sequences). The latter |
| 1256 '(' can be omitted. We refer to this as "short-form" hereafter. | 1256 '(' can be omitted. We refer to this as "short-form" hereafter. |
| 1257 | 1257 |
| 1258 Now you may notice that there are a lot of ways of encoding the | 1258 Now you may notice that there are a lot of ways of encoding the |
| 1259 same multilingual text in ISO2022. Actually, there exist many | 1259 same multilingual text in ISO2022. Actually, there exist many |
| 1260 coding systems such as Compound Text (used in X11's inter client | 1260 coding systems such as Compound Text (used in X11's inter client |
| 1261 communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR | 1261 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR |
| 1262 (used in Korean internet), EUC (Extended UNIX Code, used in Asian | 1262 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian |
| 1263 localized platforms), and all of these are variants of ISO2022. | 1263 localized platforms), and all of these are variants of ISO2022. |
| 1264 | 1264 |
| 1265 In addition to the above, Emacs handles two more kinds of escape | 1265 In addition to the above, Emacs handles two more kinds of escape |
| 1266 sequences: ISO6429's direction specification and Emacs' private | 1266 sequences: ISO6429's direction specification and Emacs' private |
| 1267 sequence for specifying character composition. | 1267 sequence for specifying character composition. |
| 1291 Here's a list of example usages of these composition escape | 1291 Here's a list of example usages of these composition escape |
| 1292 sequences (categorized by `enum composition_method'). | 1292 sequences (categorized by `enum composition_method'). |
| 1293 | 1293 |
| 1294 COMPOSITION_RELATIVE: | 1294 COMPOSITION_RELATIVE: |
| 1295 ESC 0 CHAR [ CHAR ] ESC 1 | 1295 ESC 0 CHAR [ CHAR ] ESC 1 |
| 1296 COMPOSITOIN_WITH_RULE: | 1296 COMPOSITION_WITH_RULE: |
| 1297 ESC 2 CHAR [ RULE CHAR ] ESC 1 | 1297 ESC 2 CHAR [ RULE CHAR ] ESC 1 |
| 1298 COMPOSITION_WITH_ALTCHARS: | 1298 COMPOSITION_WITH_ALTCHARS: |
| 1299 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 | 1299 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 |
| 1300 COMPOSITION_WITH_RULE_ALTCHARS: | 1300 COMPOSITION_WITH_RULE_ALTCHARS: |
| 1301 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */ | 1301 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */ |
| 1626 { \ | 1626 { \ |
| 1627 /* This is surely the start of a composition. We must be sure \ | 1627 /* This is surely the start of a composition. We must be sure \ |
| 1628 that coding->cmp_data has enough space to store the \ | 1628 that coding->cmp_data has enough space to store the \ |
| 1629 information about the composition. If not, terminate the \ | 1629 information about the composition. If not, terminate the \ |
| 1630 current decoding loop, allocate one more memory block for \ | 1630 current decoding loop, allocate one more memory block for \ |
| 1631 coding->cmp_data in the calller, then start the decoding \ | 1631 coding->cmp_data in the caller, then start the decoding \ |
| 1632 loop again. We can't allocate memory here directly because \ | 1632 loop again. We can't allocate memory here directly because \ |
| 1633 it may cause buffer/string relocation. */ \ | 1633 it may cause buffer/string relocation. */ \ |
| 1634 if (!coding->cmp_data \ | 1634 if (!coding->cmp_data \ |
| 1635 || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \ | 1635 || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \ |
| 1636 >= COMPOSITION_DATA_SIZE)) \ | 1636 >= COMPOSITION_DATA_SIZE)) \ |
| 1658 coding->composition_rule_follows = 0; \ | 1658 coding->composition_rule_follows = 0; \ |
| 1659 } \ | 1659 } \ |
| 1660 } \ | 1660 } \ |
| 1661 } while (0) | 1661 } while (0) |
| 1662 | 1662 |
| 1663 /* Handle compositoin end sequence ESC 1. */ | 1663 /* Handle composition end sequence ESC 1. */ |
| 1664 | 1664 |
| 1665 #define DECODE_COMPOSITION_END(c1) \ | 1665 #define DECODE_COMPOSITION_END(c1) \ |
| 1666 do { \ | 1666 do { \ |
| 1667 if (coding->composing == COMPOSITION_DISABLED) \ | 1667 if (coding->composing == COMPOSITION_DISABLED) \ |
| 1668 { \ | 1668 { \ |
| 1957 case '[': /* specification of direction */ | 1957 case '[': /* specification of direction */ |
| 1958 if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION) | 1958 if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION) |
| 1959 goto label_invalid_code; | 1959 goto label_invalid_code; |
| 1960 /* For the moment, nested direction is not supported. | 1960 /* For the moment, nested direction is not supported. |
| 1961 So, `coding->mode & CODING_MODE_DIRECTION' zero means | 1961 So, `coding->mode & CODING_MODE_DIRECTION' zero means |
| 1962 left-to-right, and nozero means right-to-left. */ | 1962 left-to-right, and nonzero means right-to-left. */ |
| 1963 ONE_MORE_BYTE (c1); | 1963 ONE_MORE_BYTE (c1); |
| 1964 switch (c1) | 1964 switch (c1) |
| 1965 { | 1965 { |
| 1966 case ']': /* end of the current direction */ | 1966 case ']': /* end of the current direction */ |
| 1967 coding->mode &= ~CODING_MODE_DIRECTION; | 1967 coding->mode &= ~CODING_MODE_DIRECTION; |
| 2044 | 2044 |
| 2045 /* | 2045 /* |
| 2046 It is not enough to say just "ISO2022" on encoding, we have to | 2046 It is not enough to say just "ISO2022" on encoding, we have to |
| 2047 specify more details. In Emacs, each ISO2022 coding system | 2047 specify more details. In Emacs, each ISO2022 coding system |
| 2048 variant has the following specifications: | 2048 variant has the following specifications: |
| 2049 1. Initial designation to G0 thru G3. | 2049 1. Initial designation to G0 through G3. |
| 2050 2. Allows short-form designation? | 2050 2. Allows short-form designation? |
| 2051 3. ASCII should be designated to G0 before control characters? | 2051 3. ASCII should be designated to G0 before control characters? |
| 2052 4. ASCII should be designated to G0 at end of line? | 2052 4. ASCII should be designated to G0 at end of line? |
| 2053 5. 7-bit environment or 8-bit environment? | 2053 5. 7-bit environment or 8-bit environment? |
| 2054 6. Use locking-shift? | 2054 6. Use locking-shift? |
| 2538 else if (COMPOSING_P (coding)) | 2538 else if (COMPOSING_P (coding)) |
| 2539 { | 2539 { |
| 2540 /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR */ | 2540 /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR */ |
| 2541 if (coding->cmp_data_index == coding->cmp_data_start + data[0]) | 2541 if (coding->cmp_data_index == coding->cmp_data_start + data[0]) |
| 2542 /* We have consumed components of the composition. | 2542 /* We have consumed components of the composition. |
| 2543 What follows in SRC is the compositions's base | 2543 What follows in SRC is the composition's base |
| 2544 text. */ | 2544 text. */ |
| 2545 ENCODE_COMPOSITION_FAKE_START (coding); | 2545 ENCODE_COMPOSITION_FAKE_START (coding); |
| 2546 else | 2546 else |
| 2547 { | 2547 { |
| 2548 int c = cmp_data->data[coding->cmp_data_index++]; | 2548 int c = cmp_data->data[coding->cmp_data_index++]; |
| 3221 return; | 3221 return; |
| 3222 } | 3222 } |
| 3223 | 3223 |
| 3224 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions". Encode | 3224 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions". Encode |
| 3225 format of end-of-line according to `coding->eol_type'. It also | 3225 format of end-of-line according to `coding->eol_type'. It also |
| 3226 convert multibyte form 8-bit characers to unibyte if | 3226 convert multibyte form 8-bit characters to unibyte if |
| 3227 CODING->src_multibyte is nonzero. If `coding->mode & | 3227 CODING->src_multibyte is nonzero. If `coding->mode & |
| 3228 CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text | 3228 CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text |
| 3229 also means end-of-line. */ | 3229 also means end-of-line. */ |
| 3230 | 3230 |
| 3231 static void | 3231 static void |
| 3452 /* Get values of coding system properties: | 3452 /* Get values of coding system properties: |
| 3453 `post-read-conversion', `pre-write-conversion', | 3453 `post-read-conversion', `pre-write-conversion', |
| 3454 `translation-table-for-decode', `translation-table-for-encode'. */ | 3454 `translation-table-for-decode', `translation-table-for-encode'. */ |
| 3455 plist = XVECTOR (coding_spec)->contents[3]; | 3455 plist = XVECTOR (coding_spec)->contents[3]; |
| 3456 /* Pre & post conversion functions should be disabled if | 3456 /* Pre & post conversion functions should be disabled if |
| 3457 inhibit_eol_conversion is nozero. This is the case that a code | 3457 inhibit_eol_conversion is nonzero. This is the case that a code |
| 3458 conversion function is called while those functions are running. */ | 3458 conversion function is called while those functions are running. */ |
| 3459 if (! inhibit_pre_post_conversion) | 3459 if (! inhibit_pre_post_conversion) |
| 3460 { | 3460 { |
| 3461 coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion); | 3461 coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion); |
| 3462 coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion); | 3462 coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion); |
| 3847 symbol) `japanese-iso-8bit' by default. | 3847 symbol) `japanese-iso-8bit' by default. |
| 3848 | 3848 |
| 3849 o coding-category-iso-7-else | 3849 o coding-category-iso-7-else |
| 3850 | 3850 |
| 3851 The category for a coding system which has the same code range | 3851 The category for a coding system which has the same code range |
| 3852 as ISO2022 of 7-bit environemnt but uses locking shift or | 3852 as ISO2022 of 7-bit environment but uses locking shift or |
| 3853 single shift functions. Assigned the coding-system (Lisp | 3853 single shift functions. Assigned the coding-system (Lisp |
| 3854 symbol) `iso-2022-7bit-lock' by default. | 3854 symbol) `iso-2022-7bit-lock' by default. |
| 3855 | 3855 |
| 3856 o coding-category-iso-8-else | 3856 o coding-category-iso-8-else |
| 3857 | 3857 |
| 3858 The category for a coding system which has the same code range | 3858 The category for a coding system which has the same code range |
| 3859 as ISO2022 of 8-bit environemnt but uses locking shift or | 3859 as ISO2022 of 8-bit environment but uses locking shift or |
| 3860 single shift functions. Assigned the coding-system (Lisp | 3860 single shift functions. Assigned the coding-system (Lisp |
| 3861 symbol) `iso-2022-8bit-ss2' by default. | 3861 symbol) `iso-2022-8bit-ss2' by default. |
| 3862 | 3862 |
| 3863 o coding-category-big5 | 3863 o coding-category-big5 |
| 3864 | 3864 |
| 4680 if (coding->eol_type == CODING_EOL_UNDECIDED | 4680 if (coding->eol_type == CODING_EOL_UNDECIDED |
| 4681 && coding->type != coding_type_ccl) | 4681 && coding->type != coding_type_ccl) |
| 4682 { | 4682 { |
| 4683 detect_eol (coding, source, src_bytes); | 4683 detect_eol (coding, source, src_bytes); |
| 4684 /* We had better recover the original eol format if we | 4684 /* We had better recover the original eol format if we |
| 4685 encounter an inconsitent eol format while decoding. */ | 4685 encounter an inconsistent eol format while decoding. */ |
| 4686 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL; | 4686 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL; |
| 4687 } | 4687 } |
| 4688 | 4688 |
| 4689 coding->produced = coding->produced_char = 0; | 4689 coding->produced = coding->produced_char = 0; |
| 4690 coding->consumed = coding->consumed_char = 0; | 4690 coding->consumed = coding->consumed_char = 0; |
| 4974 break; | 4974 break; |
| 4975 | 4975 |
| 4976 case CODING_CATEGORY_IDX_ISO_7: | 4976 case CODING_CATEGORY_IDX_ISO_7: |
| 4977 case CODING_CATEGORY_IDX_ISO_7_TIGHT: | 4977 case CODING_CATEGORY_IDX_ISO_7_TIGHT: |
| 4978 { | 4978 { |
| 4979 /* We can skip all charactes at the tail except for 8-bit | 4979 /* We can skip all characters at the tail except for 8-bit |
| 4980 codes and ESC and the following 2-byte at the tail. */ | 4980 codes and ESC and the following 2-byte at the tail. */ |
| 4981 unsigned char *eight_bit = NULL; | 4981 unsigned char *eight_bit = NULL; |
| 4982 | 4982 |
| 4983 if (eol_conversion) | 4983 if (eol_conversion) |
| 4984 while (begp < endp | 4984 while (begp < endp |
| 5063 int i; | 5063 int i; |
| 5064 for (i = 0; i < 128; i++) | 5064 for (i = 0; i < 128; i++) |
| 5065 if (!NILP (CHAR_TABLE_REF (translation_table, i))) | 5065 if (!NILP (CHAR_TABLE_REF (translation_table, i))) |
| 5066 break; | 5066 break; |
| 5067 if (i < 128) | 5067 if (i < 128) |
| 5068 /* Some ASCII character should be tranlsated. We give up | 5068 /* Some ASCII character should be translated. We give up |
| 5069 shrinking. */ | 5069 shrinking. */ |
| 5070 return; | 5070 return; |
| 5071 } | 5071 } |
| 5072 | 5072 |
| 5073 if (str) | 5073 if (str) |
| 5229 coding->cmp_data = coding->cmp_data->prev; | 5229 coding->cmp_data = coding->cmp_data->prev; |
| 5230 coding->cmp_data_start = 0; | 5230 coding->cmp_data_start = 0; |
| 5231 } | 5231 } |
| 5232 | 5232 |
| 5233 /* Reflect the saved information about compositions to OBJ. | 5233 /* Reflect the saved information about compositions to OBJ. |
| 5234 CODING->cmp_data points to a memory block for the informaiton. OBJ | 5234 CODING->cmp_data points to a memory block for the information. OBJ |
| 5235 is a buffer or a string, defaults to the current buffer. */ | 5235 is a buffer or a string, defaults to the current buffer. */ |
| 5236 | 5236 |
| 5237 void | 5237 void |
| 5238 coding_restore_composition (coding, obj) | 5238 coding_restore_composition (coding, obj) |
| 5239 struct coding_system *coding; | 5239 struct coding_system *coding; |
| 5288 If REPLACE is nonzero, we do various things as if the original text | 5288 If REPLACE is nonzero, we do various things as if the original text |
| 5289 is deleted and a new text is inserted. See the comments in | 5289 is deleted and a new text is inserted. See the comments in |
| 5290 replace_range (insdel.c) to know what we are doing. | 5290 replace_range (insdel.c) to know what we are doing. |
| 5291 | 5291 |
| 5292 If REPLACE is zero, it is assumed that the source text is unibyte. | 5292 If REPLACE is zero, it is assumed that the source text is unibyte. |
| 5293 Otherwize, it is assumed that the source text is multibyte. */ | 5293 Otherwise, it is assumed that the source text is multibyte. */ |
| 5294 | 5294 |
| 5295 int | 5295 int |
| 5296 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) | 5296 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) |
| 5297 int from, from_byte, to, to_byte, encodep, replace; | 5297 int from, from_byte, to, to_byte, encodep, replace; |
| 5298 struct coding_system *coding; | 5298 struct coding_system *coding; |
| 5367 { | 5367 { |
| 5368 detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte); | 5368 detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte); |
| 5369 if (coding->eol_type == CODING_EOL_UNDECIDED) | 5369 if (coding->eol_type == CODING_EOL_UNDECIDED) |
| 5370 coding->eol_type = CODING_EOL_LF; | 5370 coding->eol_type = CODING_EOL_LF; |
| 5371 /* We had better recover the original eol format if we | 5371 /* We had better recover the original eol format if we |
| 5372 encounter an inconsitent eol format while decoding. */ | 5372 encounter an inconsistent eol format while decoding. */ |
| 5373 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL; | 5373 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL; |
| 5374 } | 5374 } |
| 5375 } | 5375 } |
| 5376 | 5376 |
| 5377 /* Now we convert the text. */ | 5377 /* Now we convert the text. */ |
| 5457 from += head_skip; | 5457 from += head_skip; |
| 5458 to -= tail_skip; | 5458 to -= tail_skip; |
| 5459 len -= total_skip; len_byte -= total_skip; | 5459 len -= total_skip; len_byte -= total_skip; |
| 5460 } | 5460 } |
| 5461 | 5461 |
| 5462 /* For converion, we must put the gap before the text in addition to | 5462 /* For conversion, we must put the gap before the text in addition to |
| 5463 making the gap larger for efficient decoding. The required gap | 5463 making the gap larger for efficient decoding. The required gap |
| 5464 size starts from 2000 which is the magic number used in make_gap. | 5464 size starts from 2000 which is the magic number used in make_gap. |
| 5465 But, after one batch of conversion, it will be incremented if we | 5465 But, after one batch of conversion, it will be incremented if we |
| 5466 find that it is not enough . */ | 5466 find that it is not enough . */ |
| 5467 require = 2000; | 5467 require = 2000; |
| 5627 break; | 5627 break; |
| 5628 } | 5628 } |
| 5629 if (first) | 5629 if (first) |
| 5630 { | 5630 { |
| 5631 /* We have just done the first batch of conversion which was | 5631 /* We have just done the first batch of conversion which was |
| 5632 stoped because of insufficient gap. Let's reconsider the | 5632 stopped because of insufficient gap. Let's reconsider the |
| 5633 required gap size (i.e. SRT - DST) now. | 5633 required gap size (i.e. SRT - DST) now. |
| 5634 | 5634 |
| 5635 We have converted ORIG bytes (== coding->consumed) into | 5635 We have converted ORIG bytes (== coding->consumed) into |
| 5636 NEW bytes (coding->produced). To convert the remaining | 5636 NEW bytes (coding->produced). To convert the remaining |
| 5637 LEN bytes, we may need REQUIRE bytes of gap, where: | 5637 LEN bytes, we may need REQUIRE bytes of gap, where: |
| 5676 GPT -= inserted_byte; GPT_BYTE -= inserted_byte; | 5676 GPT -= inserted_byte; GPT_BYTE -= inserted_byte; |
| 5677 } | 5677 } |
| 5678 inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte); | 5678 inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte); |
| 5679 } | 5679 } |
| 5680 | 5680 |
| 5681 /* If we have shrinked the conversion area, adjust it now. */ | 5681 /* If we shrank the conversion area, adjust it now. */ |
| 5682 if (total_skip > 0) | 5682 if (total_skip > 0) |
| 5683 { | 5683 { |
| 5684 if (tail_skip > 0) | 5684 if (tail_skip > 0) |
| 5685 safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip); | 5685 safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip); |
| 5686 inserted += total_skip; inserted_byte += total_skip; | 5686 inserted += total_skip; inserted_byte += total_skip; |
| 5827 saved_coding_symbol = coding->symbol; | 5827 saved_coding_symbol = coding->symbol; |
| 5828 detect_eol (coding, XSTRING (str)->data, to_byte); | 5828 detect_eol (coding, XSTRING (str)->data, to_byte); |
| 5829 if (coding->eol_type == CODING_EOL_UNDECIDED) | 5829 if (coding->eol_type == CODING_EOL_UNDECIDED) |
| 5830 coding->eol_type = CODING_EOL_LF; | 5830 coding->eol_type = CODING_EOL_LF; |
| 5831 /* We had better recover the original eol format if we | 5831 /* We had better recover the original eol format if we |
| 5832 encounter an inconsitent eol format while decoding. */ | 5832 encounter an inconsistent eol format while decoding. */ |
| 5833 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL; | 5833 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL; |
| 5834 } | 5834 } |
| 5835 } | 5835 } |
| 5836 | 5836 |
| 5837 if (coding->type == coding_type_no_conversion | 5837 if (coding->type == coding_type_no_conversion |
| 6493 } | 6493 } |
| 6494 | 6494 |
| 6495 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string, | 6495 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string, |
| 6496 2, 3, 0, | 6496 2, 3, 0, |
| 6497 "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\ | 6497 "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\ |
| 6498 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\ | 6498 Optional arg NOCOPY non-nil means it is OK to return STRING itself\n\ |
| 6499 if the decoding operation is trivial.\n\ | 6499 if the decoding operation is trivial.\n\ |
| 6500 This function sets `last-coding-system-used' to the precise coding system\n\ | 6500 This function sets `last-coding-system-used' to the precise coding system\n\ |
| 6501 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\ | 6501 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\ |
| 6502 not fully specified.)") | 6502 not fully specified.)") |
| 6503 (string, coding_system, nocopy) | 6503 (string, coding_system, nocopy) |
| 6507 } | 6507 } |
| 6508 | 6508 |
| 6509 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string, | 6509 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string, |
| 6510 2, 3, 0, | 6510 2, 3, 0, |
| 6511 "Encode STRING to CODING-SYSTEM, and return the result.\n\ | 6511 "Encode STRING to CODING-SYSTEM, and return the result.\n\ |
| 6512 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\ | 6512 Optional arg NOCOPY non-nil means it is OK to return STRING itself\n\ |
| 6513 if the encoding operation is trivial.\n\ | 6513 if the encoding operation is trivial.\n\ |
| 6514 This function sets `last-coding-system-used' to the precise coding system\n\ | 6514 This function sets `last-coding-system-used' to the precise coding system\n\ |
| 6515 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\ | 6515 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\ |
| 6516 not fully specified.)") | 6516 not fully specified.)") |
| 6517 (string, coding_system, nocopy) | 6517 (string, coding_system, nocopy) |
| 6676 { | 6676 { |
| 6677 CHECK_SYMBOL (coding_system, 0); | 6677 CHECK_SYMBOL (coding_system, 0); |
| 6678 setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding); | 6678 setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding); |
| 6679 /* We had better not send unsafe characters to terminal. */ | 6679 /* We had better not send unsafe characters to terminal. */ |
| 6680 terminal_coding.flags |= CODING_FLAG_ISO_SAFE; | 6680 terminal_coding.flags |= CODING_FLAG_ISO_SAFE; |
| 6681 /* Characer composition should be disabled. */ | 6681 /* Character composition should be disabled. */ |
| 6682 terminal_coding.composing = COMPOSITION_DISABLED; | 6682 terminal_coding.composing = COMPOSITION_DISABLED; |
| 6683 /* Error notification should be suppressed. */ | 6683 /* Error notification should be suppressed. */ |
| 6684 terminal_coding.suppress_error = 1; | 6684 terminal_coding.suppress_error = 1; |
| 6685 terminal_coding.src_multibyte = 1; | 6685 terminal_coding.src_multibyte = 1; |
| 6686 terminal_coding.dst_multibyte = 0; | 6686 terminal_coding.dst_multibyte = 0; |
| 6694 Lisp_Object coding_system; | 6694 Lisp_Object coding_system; |
| 6695 { | 6695 { |
| 6696 CHECK_SYMBOL (coding_system, 0); | 6696 CHECK_SYMBOL (coding_system, 0); |
| 6697 setup_coding_system (Fcheck_coding_system (coding_system), | 6697 setup_coding_system (Fcheck_coding_system (coding_system), |
| 6698 &safe_terminal_coding); | 6698 &safe_terminal_coding); |
| 6699 /* Characer composition should be disabled. */ | 6699 /* Character composition should be disabled. */ |
| 6700 safe_terminal_coding.composing = COMPOSITION_DISABLED; | 6700 safe_terminal_coding.composing = COMPOSITION_DISABLED; |
| 6701 /* Error notification should be suppressed. */ | 6701 /* Error notification should be suppressed. */ |
| 6702 terminal_coding.suppress_error = 1; | 6702 terminal_coding.suppress_error = 1; |
| 6703 safe_terminal_coding.src_multibyte = 1; | 6703 safe_terminal_coding.src_multibyte = 1; |
| 6704 safe_terminal_coding.dst_multibyte = 0; | 6704 safe_terminal_coding.dst_multibyte = 0; |
| 6719 (coding_system) | 6719 (coding_system) |
| 6720 Lisp_Object coding_system; | 6720 Lisp_Object coding_system; |
| 6721 { | 6721 { |
| 6722 CHECK_SYMBOL (coding_system, 0); | 6722 CHECK_SYMBOL (coding_system, 0); |
| 6723 setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding); | 6723 setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding); |
| 6724 /* Characer composition should be disabled. */ | 6724 /* Character composition should be disabled. */ |
| 6725 keyboard_coding.composing = COMPOSITION_DISABLED; | 6725 keyboard_coding.composing = COMPOSITION_DISABLED; |
| 6726 return Qnil; | 6726 return Qnil; |
| 6727 } | 6727 } |
| 6728 | 6728 |
| 6729 DEFUN ("keyboard-coding-system", | 6729 DEFUN ("keyboard-coding-system", |
| 6775 if (nargs < 2) | 6775 if (nargs < 2) |
| 6776 error ("Too few arguments"); | 6776 error ("Too few arguments"); |
| 6777 operation = args[0]; | 6777 operation = args[0]; |
| 6778 if (!SYMBOLP (operation) | 6778 if (!SYMBOLP (operation) |
| 6779 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx))) | 6779 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx))) |
| 6780 error ("Invalid first arguement"); | 6780 error ("Invalid first argument"); |
| 6781 if (nargs < 1 + XINT (target_idx)) | 6781 if (nargs < 1 + XINT (target_idx)) |
| 6782 error ("Too few arguments for operation: %s", | 6782 error ("Too few arguments for operation: %s", |
| 6783 XSYMBOL (operation)->name->data); | 6783 XSYMBOL (operation)->name->data); |
| 6784 target = args[XINT (target_idx) + 1]; | 6784 target = args[XINT (target_idx) + 1]; |
| 6785 if (!(STRINGP (target) | 6785 if (!(STRINGP (target) |
| 6786 || (EQ (operation, Qopen_network_stream) && INTEGERP (target)))) | 6786 || (EQ (operation, Qopen_network_stream) && INTEGERP (target)))) |
| 6787 error ("Invalid %dth argument", XINT (target_idx) + 1); | 6787 error ("Invalid argument %d", XINT (target_idx) + 1); |
| 6788 | 6788 |
| 6789 chain = ((EQ (operation, Qinsert_file_contents) | 6789 chain = ((EQ (operation, Qinsert_file_contents) |
| 6790 || EQ (operation, Qwrite_region)) | 6790 || EQ (operation, Qwrite_region)) |
| 6791 ? Vfile_coding_system_alist | 6791 ? Vfile_coding_system_alist |
| 6792 : (EQ (operation, Qopen_network_stream) | 6792 : (EQ (operation, Qopen_network_stream) |
| 7248 "Table for translating characters while decoding."); | 7248 "Table for translating characters while decoding."); |
| 7249 Vstandard_translation_table_for_decode = Qnil; | 7249 Vstandard_translation_table_for_decode = Qnil; |
| 7250 | 7250 |
| 7251 DEFVAR_LISP ("standard-translation-table-for-encode", | 7251 DEFVAR_LISP ("standard-translation-table-for-encode", |
| 7252 &Vstandard_translation_table_for_encode, | 7252 &Vstandard_translation_table_for_encode, |
| 7253 "Table for translationg characters while encoding."); | 7253 "Table for translating characters while encoding."); |
| 7254 Vstandard_translation_table_for_encode = Qnil; | 7254 Vstandard_translation_table_for_encode = Qnil; |
| 7255 | 7255 |
| 7256 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist, | 7256 DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist, |
| 7257 "Alist of charsets vs revision numbers.\n\ | 7257 "Alist of charsets vs revision numbers.\n\ |
| 7258 While encoding, if a charset (car part of an element) is found,\n\ | 7258 While encoding, if a charset (car part of an element) is found,\n\ |
| 7259 designate it with the escape sequence identifing revision (cdr part of the element)."); | 7259 designate it with the escape sequence identifying revision (cdr part of the element)."); |
| 7260 Vcharset_revision_alist = Qnil; | 7260 Vcharset_revision_alist = Qnil; |
| 7261 | 7261 |
| 7262 DEFVAR_LISP ("default-process-coding-system", | 7262 DEFVAR_LISP ("default-process-coding-system", |
| 7263 &Vdefault_process_coding_system, | 7263 &Vdefault_process_coding_system, |
| 7264 "Cons of coding systems used for process I/O by default.\n\ | 7264 "Cons of coding systems used for process I/O by default.\n\ |
