comparison src/coding.c @ 110983:b87d8337c695

Fix typos in comments
author Kenichi Handa <handa@m17n.org>
date Tue, 12 Oct 2010 21:52:05 +0900
parents c234b2db847c
children b8fde5ef9e14 913b779aa4ee
comparison
equal deleted inserted replaced
110982:2b3bece0553a 110983:b87d8337c695
165 int found = 0; 165 int found = 0;
166 ...; 166 ...;
167 167
168 while (1) 168 while (1)
169 { 169 {
170 /* Get one byte from the source. If the souce is exausted, jump 170 /* Get one byte from the source. If the source is exhausted, jump
171 to no_more_source:. */ 171 to no_more_source:. */
172 ONE_MORE_BYTE (c); 172 ONE_MORE_BYTE (c);
173 173
174 if (! __C_conforms_to_XXX___ (c)) 174 if (! __C_conforms_to_XXX___ (c))
175 break; 175 break;
179 /* The byte sequence is invalid for XXX. */ 179 /* The byte sequence is invalid for XXX. */
180 detect_info->rejected |= CATEGORY_MASK_XXX; 180 detect_info->rejected |= CATEGORY_MASK_XXX;
181 return 0; 181 return 0;
182 182
183 no_more_source: 183 no_more_source:
184 /* The source exausted successfully. */ 184 /* The source exhausted successfully. */
185 detect_info->found |= found; 185 detect_info->found |= found;
186 return 1; 186 return 1;
187 } 187 }
188 #endif 188 #endif
189 189
535 535
536 /* If set, designation sequence should be placed at beginning of line 536 /* If set, designation sequence should be placed at beginning of line
537 on output. */ 537 on output. */
538 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400 538 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
539 539
540 /* If set, do not encode unsafe charactes on output. */ 540 /* If set, do not encode unsafe characters on output. */
541 #define CODING_ISO_FLAG_SAFE 0x0800 541 #define CODING_ISO_FLAG_SAFE 0x0800
542 542
543 /* If set, extra latin codes (128..159) are accepted as a valid code 543 /* If set, extra latin codes (128..159) are accepted as a valid code
544 on input. */ 544 on input. */
545 #define CODING_ISO_FLAG_LATIN_EXTRA 0x1000 545 #define CODING_ISO_FLAG_LATIN_EXTRA 0x1000
691 /* List of symbols `coding-category-xxx' ordered by priority. This 691 /* List of symbols `coding-category-xxx' ordered by priority. This
692 variable is exposed to Emacs Lisp. */ 692 variable is exposed to Emacs Lisp. */
693 static Lisp_Object Vcoding_category_list; 693 static Lisp_Object Vcoding_category_list;
694 694
695 /* Table of coding categories (Lisp symbols). This variable is for 695 /* Table of coding categories (Lisp symbols). This variable is for
696 internal use oly. */ 696 internal use only. */
697 static Lisp_Object Vcoding_category_table; 697 static Lisp_Object Vcoding_category_table;
698 698
699 /* Table of coding-categories ordered by priority. */ 699 /* Table of coding-categories ordered by priority. */
700 static enum coding_category coding_priorities[coding_category_max]; 700 static enum coding_category coding_priorities[coding_category_max];
701 701
823 produced_chars++; \ 823 produced_chars++; \
824 *dst++ = (c); \ 824 *dst++ = (c); \
825 } while (0) 825 } while (0)
826 826
827 827
828 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2. */ 828 /* Like EMIT_ONE_ASCII_BYTE but store two bytes; C1 and C2. */
829 829
830 #define EMIT_TWO_ASCII_BYTES(c1, c2) \ 830 #define EMIT_TWO_ASCII_BYTES(c1, c2) \
831 do { \ 831 do { \
832 produced_chars += 2; \ 832 produced_chars += 2; \
833 *dst++ = (c1), *dst++ = (c2); \ 833 *dst++ = (c1), *dst++ = (c2); \
1239 old-style emacs-mule encoding, or 0 for the other kind of 1239 old-style emacs-mule encoding, or 0 for the other kind of
1240 composition. 1240 composition.
1241 1241
1242 METHOD is one of enum composition_method. 1242 METHOD is one of enum composition_method.
1243 1243
1244 Optionnal COMPOSITION-COMPONENTS are characters and composition 1244 Optional COMPOSITION-COMPONENTS are characters and composition
1245 rules. 1245 rules.
1246 1246
1247 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID 1247 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1248 follows. 1248 follows.
1249 1249
1952 BYTES is 0xA0 plus a byte length of this composition data, 1952 BYTES is 0xA0 plus a byte length of this composition data,
1953 1953
1954 CHARS is 0xA0 plus a number of characters composed by this 1954 CHARS is 0xA0 plus a number of characters composed by this
1955 data, 1955 data,
1956 1956
1957 COMPONENTs are characters of multibye form or composition 1957 COMPONENTs are characters of multibyte form or composition
1958 rules encoded by two-byte of ASCII codes. 1958 rules encoded by two-byte of ASCII codes.
1959 1959
1960 In addition, for backward compatibility, the following formats are 1960 In addition, for backward compatibility, the following formats are
1961 also recognized as composition data on decoding. 1961 also recognized as composition data on decoding.
1962 1962
2453 { 2453 {
2454 const unsigned char *src = coding->source + coding->consumed; 2454 const unsigned char *src = coding->source + coding->consumed;
2455 const unsigned char *src_end = coding->source + coding->src_bytes; 2455 const unsigned char *src_end = coding->source + coding->src_bytes;
2456 const unsigned char *src_base; 2456 const unsigned char *src_base;
2457 int *charbuf = coding->charbuf + coding->charbuf_used; 2457 int *charbuf = coding->charbuf + coding->charbuf_used;
2458 /* We may produce two annocations (charset and composition) in one 2458 /* We may produce two annotations (charset and composition) in one
2459 loop and one more charset annocation at the end. */ 2459 loop and one more charset annotation at the end. */
2460 int *charbuf_end 2460 int *charbuf_end
2461 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3); 2461 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
2462 int consumed_chars = 0, consumed_chars_base; 2462 int consumed_chars = 0, consumed_chars_base;
2463 int multibytep = coding->src_multibyte; 2463 int multibytep = coding->src_multibyte;
2464 Lisp_Object attrs, charset_list; 2464 Lisp_Object attrs, charset_list;
2530 { 2530 {
2531 int nchars, nbytes; 2531 int nchars, nbytes;
2532 /* emacs_mule_char can load a charset map from a file, which 2532 /* emacs_mule_char can load a charset map from a file, which
2533 allocates a large structure and might cause buffer text 2533 allocates a large structure and might cause buffer text
2534 to be relocated as result. Thus, we need to remember the 2534 to be relocated as result. Thus, we need to remember the
2535 original pointer to buffer text, and fixup all related 2535 original pointer to buffer text, and fix up all related
2536 pointers after the call. */ 2536 pointers after the call. */
2537 const unsigned char *orig = coding->source; 2537 const unsigned char *orig = coding->source;
2538 EMACS_INT offset; 2538 EMACS_INT offset;
2539 2539
2540 c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id, 2540 c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
2557 consumed_chars = consumed_chars_base + nchars; 2557 consumed_chars = consumed_chars_base + nchars;
2558 if (cmp_status->state >= COMPOSING_COMPONENT_CHAR) 2558 if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2559 cmp_status->ncomps -= nchars; 2559 cmp_status->ncomps -= nchars;
2560 } 2560 }
2561 2561
2562 /* Now if C >= 0, we found a normally encoded characer, if C < 2562 /* Now if C >= 0, we found a normally encoded character, if C <
2563 0, we found an old-style composition component character or 2563 0, we found an old-style composition component character or
2564 rule. */ 2564 rule. */
2565 2565
2566 if (cmp_status->state == COMPOSING_NO) 2566 if (cmp_status->state == COMPOSING_NO)
2567 { 2567 {
3070 ASET (attrs, coding_attr_safe_charsets, safe_charsets); 3070 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
3071 } 3071 }
3072 3072
3073 3073
3074 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 3074 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3075 Check if a text is encoded in one of ISO-2022 based codig systems. 3075 Check if a text is encoded in one of ISO-2022 based coding systems.
3076 If it is, return 1, else return 0. */ 3076 If it is, return 1, else return 0. */
3077 3077
3078 static int 3078 static int
3079 detect_coding_iso_2022 (coding, detect_info) 3079 detect_coding_iso_2022 (coding, detect_info)
3080 struct coding_system *coding; 3080 struct coding_system *coding;
3482 } 3482 }
3483 cmp_status->state = COMPOSING_NO; 3483 cmp_status->state = COMPOSING_NO;
3484 return new_chars; 3484 return new_chars;
3485 } 3485 }
3486 3486
3487 /* If characers are under composition, finish the composition. */ 3487 /* If characters are under composition, finish the composition. */
3488 #define MAYBE_FINISH_COMPOSITION() \ 3488 #define MAYBE_FINISH_COMPOSITION() \
3489 do { \ 3489 do { \
3490 if (cmp_status->state != COMPOSING_NO) \ 3490 if (cmp_status->state != COMPOSING_NO) \
3491 char_offset += finish_composition (charbuf, cmp_status); \ 3491 char_offset += finish_composition (charbuf, cmp_status); \
3492 } while (0) 3492 } while (0)
3589 { 3589 {
3590 const unsigned char *src = coding->source + coding->consumed; 3590 const unsigned char *src = coding->source + coding->consumed;
3591 const unsigned char *src_end = coding->source + coding->src_bytes; 3591 const unsigned char *src_end = coding->source + coding->src_bytes;
3592 const unsigned char *src_base; 3592 const unsigned char *src_base;
3593 int *charbuf = coding->charbuf + coding->charbuf_used; 3593 int *charbuf = coding->charbuf + coding->charbuf_used;
3594 /* We may produce two annocations (charset and composition) in one 3594 /* We may produce two annotations (charset and composition) in one
3595 loop and one more charset annocation at the end. */ 3595 loop and one more charset annotation at the end. */
3596 int *charbuf_end 3596 int *charbuf_end
3597 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3); 3597 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
3598 int consumed_chars = 0, consumed_chars_base; 3598 int consumed_chars = 0, consumed_chars_base;
3599 int multibytep = coding->src_multibyte; 3599 int multibytep = coding->src_multibyte;
3600 /* Charsets invoked to graphic plane 0 and 1 respectively. */ 3600 /* Charsets invoked to graphic plane 0 and 1 respectively. */
3892 case '[': /* specification of direction */ 3892 case '[': /* specification of direction */
3893 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)) 3893 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION))
3894 goto invalid_code; 3894 goto invalid_code;
3895 /* For the moment, nested direction is not supported. 3895 /* For the moment, nested direction is not supported.
3896 So, `coding->mode & CODING_MODE_DIRECTION' zero means 3896 So, `coding->mode & CODING_MODE_DIRECTION' zero means
3897 left-to-right, and nozero means right-to-left. */ 3897 left-to-right, and nonzero means right-to-left. */
3898 ONE_MORE_BYTE (c1); 3898 ONE_MORE_BYTE (c1);
3899 switch (c1) 3899 switch (c1)
3900 { 3900 {
3901 case ']': /* end of the current direction */ 3901 case ']': /* end of the current direction */
3902 coding->mode &= ~CODING_MODE_DIRECTION; 3902 coding->mode &= ~CODING_MODE_DIRECTION;
4805 { 4805 {
4806 const unsigned char *src = coding->source + coding->consumed; 4806 const unsigned char *src = coding->source + coding->consumed;
4807 const unsigned char *src_end = coding->source + coding->src_bytes; 4807 const unsigned char *src_end = coding->source + coding->src_bytes;
4808 const unsigned char *src_base; 4808 const unsigned char *src_base;
4809 int *charbuf = coding->charbuf + coding->charbuf_used; 4809 int *charbuf = coding->charbuf + coding->charbuf_used;
4810 /* We may produce one charset annocation in one loop and one more at 4810 /* We may produce one charset annotation in one loop and one more at
4811 the end. */ 4811 the end. */
4812 int *charbuf_end 4812 int *charbuf_end
4813 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); 4813 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4814 int consumed_chars = 0, consumed_chars_base; 4814 int consumed_chars = 0, consumed_chars_base;
4815 int multibytep = coding->src_multibyte; 4815 int multibytep = coding->src_multibyte;
4924 { 4924 {
4925 const unsigned char *src = coding->source + coding->consumed; 4925 const unsigned char *src = coding->source + coding->consumed;
4926 const unsigned char *src_end = coding->source + coding->src_bytes; 4926 const unsigned char *src_end = coding->source + coding->src_bytes;
4927 const unsigned char *src_base; 4927 const unsigned char *src_base;
4928 int *charbuf = coding->charbuf + coding->charbuf_used; 4928 int *charbuf = coding->charbuf + coding->charbuf_used;
4929 /* We may produce one charset annocation in one loop and one more at 4929 /* We may produce one charset annotation in one loop and one more at
4930 the end. */ 4930 the end. */
4931 int *charbuf_end 4931 int *charbuf_end
4932 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); 4932 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4933 int consumed_chars = 0, consumed_chars_base; 4933 int consumed_chars = 0, consumed_chars_base;
4934 int multibytep = coding->src_multibyte; 4934 int multibytep = coding->src_multibyte;
5590 { 5590 {
5591 const unsigned char *src = coding->source + coding->consumed; 5591 const unsigned char *src = coding->source + coding->consumed;
5592 const unsigned char *src_end = coding->source + coding->src_bytes; 5592 const unsigned char *src_end = coding->source + coding->src_bytes;
5593 const unsigned char *src_base; 5593 const unsigned char *src_base;
5594 int *charbuf = coding->charbuf + coding->charbuf_used; 5594 int *charbuf = coding->charbuf + coding->charbuf_used;
5595 /* We may produce one charset annocation in one loop and one more at 5595 /* We may produce one charset annotation in one loop and one more at
5596 the end. */ 5596 the end. */
5597 int *charbuf_end 5597 int *charbuf_end
5598 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); 5598 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
5599 int consumed_chars = 0, consumed_chars_base; 5599 int consumed_chars = 0, consumed_chars_base;
5600 int multibytep = coding->src_multibyte; 5600 int multibytep = coding->src_multibyte;
6201 symbol) `japanese-iso-8bit' by default. 6201 symbol) `japanese-iso-8bit' by default.
6202 6202
6203 o coding-category-iso-7-else 6203 o coding-category-iso-7-else
6204 6204
6205 The category for a coding system which has the same code range 6205 The category for a coding system which has the same code range
6206 as ISO2022 of 7-bit environemnt but uses locking shift or 6206 as ISO2022 of 7-bit environment but uses locking shift or
6207 single shift functions. Assigned the coding-system (Lisp 6207 single shift functions. Assigned the coding-system (Lisp
6208 symbol) `iso-2022-7bit-lock' by default. 6208 symbol) `iso-2022-7bit-lock' by default.
6209 6209
6210 o coding-category-iso-8-else 6210 o coding-category-iso-8-else
6211 6211
6212 The category for a coding system which has the same code range 6212 The category for a coding system which has the same code range
6213 as ISO2022 of 8-bit environemnt but uses locking shift or 6213 as ISO2022 of 8-bit environment but uses locking shift or
6214 single shift functions. Assigned the coding-system (Lisp 6214 single shift functions. Assigned the coding-system (Lisp
6215 symbol) `iso-2022-8bit-ss2' by default. 6215 symbol) `iso-2022-8bit-ss2' by default.
6216 6216
6217 o coding-category-big5 6217 o coding-category-big5
6218 6218
7633 7633
7634 /* 1 iff Vcode_conversion_reused_workbuf is already in use. */ 7634 /* 1 iff Vcode_conversion_reused_workbuf is already in use. */
7635 static int reused_workbuf_in_use; 7635 static int reused_workbuf_in_use;
7636 7636
7637 7637
7638 /* Return a working buffer of code convesion. MULTIBYTE specifies the 7638 /* Return a working buffer of code conversion. MULTIBYTE specifies the
7639 multibyteness of returning buffer. */ 7639 multibyteness of returning buffer. */
7640 7640
7641 static Lisp_Object 7641 static Lisp_Object
7642 make_conversion_work_buffer (multibyte) 7642 make_conversion_work_buffer (multibyte)
7643 int multibyte; 7643 int multibyte;
8296 } 8296 }
8297 8297
8298 8298
8299 /* Detect how the bytes at SRC of length SRC_BYTES are encoded. If 8299 /* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
8300 HIGHEST is nonzero, return the coding system of the highest 8300 HIGHEST is nonzero, return the coding system of the highest
8301 priority among the detected coding systems. Otherwize return a 8301 priority among the detected coding systems. Otherwise return a
8302 list of detected coding systems sorted by their priorities. If 8302 list of detected coding systems sorted by their priorities. If
8303 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct 8303 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
8304 multibyte form but contains only ASCII and eight-bit chars. 8304 multibyte form but contains only ASCII and eight-bit chars.
8305 Otherwise, the bytes are raw bytes. 8305 Otherwise, the bytes are raw bytes.
8306 8306
9421 struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1)); 9421 struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9422 CHECK_SYMBOL (coding_system); 9422 CHECK_SYMBOL (coding_system);
9423 setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding); 9423 setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
9424 /* We had better not send unsafe characters to terminal. */ 9424 /* We had better not send unsafe characters to terminal. */
9425 terminal_coding->mode |= CODING_MODE_SAFE_ENCODING; 9425 terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
9426 /* Characer composition should be disabled. */ 9426 /* Character composition should be disabled. */
9427 terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK; 9427 terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9428 terminal_coding->src_multibyte = 1; 9428 terminal_coding->src_multibyte = 1;
9429 terminal_coding->dst_multibyte = 0; 9429 terminal_coding->dst_multibyte = 0;
9430 return Qnil; 9430 return Qnil;
9431 } 9431 }
9438 Lisp_Object coding_system; 9438 Lisp_Object coding_system;
9439 { 9439 {
9440 CHECK_SYMBOL (coding_system); 9440 CHECK_SYMBOL (coding_system);
9441 setup_coding_system (Fcheck_coding_system (coding_system), 9441 setup_coding_system (Fcheck_coding_system (coding_system),
9442 &safe_terminal_coding); 9442 &safe_terminal_coding);
9443 /* Characer composition should be disabled. */ 9443 /* Character composition should be disabled. */
9444 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK; 9444 safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9445 safe_terminal_coding.src_multibyte = 1; 9445 safe_terminal_coding.src_multibyte = 1;
9446 safe_terminal_coding.dst_multibyte = 0; 9446 safe_terminal_coding.dst_multibyte = 0;
9447 return Qnil; 9447 return Qnil;
9448 } 9448 }
9475 if (NILP (coding_system)) 9475 if (NILP (coding_system))
9476 coding_system = Qno_conversion; 9476 coding_system = Qno_conversion;
9477 else 9477 else
9478 Fcheck_coding_system (coding_system); 9478 Fcheck_coding_system (coding_system);
9479 setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t)); 9479 setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
9480 /* Characer composition should be disabled. */ 9480 /* Character composition should be disabled. */
9481 TERMINAL_KEYBOARD_CODING (t)->common_flags 9481 TERMINAL_KEYBOARD_CODING (t)->common_flags
9482 &= ~CODING_ANNOTATE_COMPOSITION_MASK; 9482 &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9483 return Qnil; 9483 return Qnil;
9484 } 9484 }
9485 9485
9852 If Nth element is a number NUM, N is the first byte of a 9852 If Nth element is a number NUM, N is the first byte of a
9853 charset whose ID is NUM. 9853 charset whose ID is NUM.
9854 9854
9855 If Nth element is a list of charset IDs, N is the first byte 9855 If Nth element is a list of charset IDs, N is the first byte
9856 of one of them. The list is sorted by dimensions of the 9856 of one of them. The list is sorted by dimensions of the
9857 charsets. A charset of smaller dimension comes firtst. */ 9857 charsets. A charset of smaller dimension comes first. */
9858 val = Fmake_vector (make_number (256), Qnil); 9858 val = Fmake_vector (make_number (256), Qnil);
9859 9859
9860 for (tail = charset_list; CONSP (tail); tail = XCDR (tail)) 9860 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9861 { 9861 {
9862 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail))); 9862 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));