comparison src/coding.c @ 22226:557fac086b1b

(ascii_skip_code): New variable. (detect_coding_mask): Skip ASCII codes at the head according to ascii_skip_code. (coding_priorities): New variable. (Fset_coding_priority_internal): New function. (detect_coding): Call set_coding_mask with coding_priorities as arg CODING. (init_coding_once): Initialize ascii_skip_code. (syms_of_coding): Declare set-coding-priority-internal as a Lisp function.
author Kenichi Handa <handa@m17n.org>
date Mon, 25 May 1998 08:08:07 +0000
parents fc4aaf1b1772
children 1deb7d79af00
comparison
equal deleted inserted replaced
22225:6f56af1aab96 22226:557fac086b1b
361 "coding-category-big5", 361 "coding-category-big5",
362 "coding-category-raw-text", 362 "coding-category-raw-text",
363 "coding-category-binary" 363 "coding-category-binary"
364 }; 364 };
365 365
366 /* Table pointers to coding systems corresponding to each coding 366 /* Table of pointers to coding systems corresponding to each coding
367 categories. */ 367 categories. */
368 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX]; 368 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
369
370 /* Table of coding category masks. Nth element is a mask for a coding
371 cateogry of which priority is Nth. */
372 static
373 int coding_priorities[CODING_CATEGORY_IDX_MAX];
369 374
370 /* Flag to tell if we look up translation table on character code 375 /* Flag to tell if we look up translation table on character code
371 conversion. */ 376 conversion. */
372 Lisp_Object Venable_character_translation; 377 Lisp_Object Venable_character_translation;
373 /* Standard translation table to look up on decoding (reading). */ 378 /* Standard translation table to look up on decoding (reading). */
3165 highest priority. Priorities of categories are also specified by a 3170 highest priority. Priorities of categories are also specified by a
3166 user in a Lisp variable `coding-category-list'. 3171 user in a Lisp variable `coding-category-list'.
3167 3172
3168 */ 3173 */
3169 3174
3175 static
3176 int ascii_skip_code[256];
3177
3170 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded. 3178 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3171 If it detects possible coding systems, return an integer in which 3179 If it detects possible coding systems, return an integer in which
3172 appropriate flag bits are set. Flag bits are defined by macros 3180 appropriate flag bits are set. Flag bits are defined by macros
3173 CODING_CATEGORY_MASK_XXX in `coding.h'. 3181 CODING_CATEGORY_MASK_XXX in `coding.h'.
3174 3182
3179 unsigned char *source; 3187 unsigned char *source;
3180 int src_bytes, *priorities, *skip; 3188 int src_bytes, *priorities, *skip;
3181 { 3189 {
3182 register unsigned char c; 3190 register unsigned char c;
3183 unsigned char *src = source, *src_end = source + src_bytes; 3191 unsigned char *src = source, *src_end = source + src_bytes;
3184 unsigned int mask = (CODING_CATEGORY_MASK_ISO_7BIT 3192 unsigned int mask;
3185 | CODING_CATEGORY_MASK_ISO_SHIFT);
3186 int i; 3193 int i;
3187 3194
3188 /* At first, skip all ASCII characters and control characters except 3195 /* At first, skip all ASCII characters and control characters except
3189 for three ISO2022 specific control characters. */ 3196 for three ISO2022 specific control characters. */
3197 ascii_skip_code[ISO_CODE_SO] = 0;
3198 ascii_skip_code[ISO_CODE_SI] = 0;
3199 ascii_skip_code[ISO_CODE_ESC] = 0;
3200
3190 label_loop_detect_coding: 3201 label_loop_detect_coding:
3191 while (src < src_end) 3202 while (src < src_end && ascii_skip_code[*src]) src++;
3192 {
3193 c = *src;
3194 if (c >= 0x80
3195 || ((mask & CODING_CATEGORY_MASK_ISO_7BIT)
3196 && c == ISO_CODE_ESC)
3197 || ((mask & CODING_CATEGORY_MASK_ISO_SHIFT)
3198 && (c == ISO_CODE_SI || c == ISO_CODE_SO)))
3199 break;
3200 src++;
3201 }
3202 *skip = src - source; 3203 *skip = src - source;
3203 3204
3204 if (src >= src_end) 3205 if (src >= src_end)
3205 /* We found nothing other than ASCII. There's nothing to do. */ 3206 /* We found nothing other than ASCII. There's nothing to do. */
3206 return 0; 3207 return 0;
3214 mask = detect_coding_iso2022 (src, src_end); 3215 mask = detect_coding_iso2022 (src, src_end);
3215 if (mask == 0) 3216 if (mask == 0)
3216 { 3217 {
3217 /* No valid ISO2022 code follows C. Try again. */ 3218 /* No valid ISO2022 code follows C. Try again. */
3218 src++; 3219 src++;
3219 mask = (c != ISO_CODE_ESC 3220 if (c == ISO_CODE_ESC)
3220 ? CODING_CATEGORY_MASK_ISO_7BIT 3221 ascii_skip_code[ISO_CODE_ESC] = 1;
3221 : CODING_CATEGORY_MASK_ISO_SHIFT); 3222 else
3223 ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
3222 goto label_loop_detect_coding; 3224 goto label_loop_detect_coding;
3223 } 3225 }
3224 if (priorities) 3226 if (priorities)
3225 goto label_return_highest_only; 3227 goto label_return_highest_only;
3226 } 3228 }
3310 unsigned char *src; 3312 unsigned char *src;
3311 int src_bytes; 3313 int src_bytes;
3312 { 3314 {
3313 unsigned int idx; 3315 unsigned int idx;
3314 int skip, mask, i; 3316 int skip, mask, i;
3315 int priorities[CODING_CATEGORY_IDX_MAX];
3316 Lisp_Object val = Vcoding_category_list; 3317 Lisp_Object val = Vcoding_category_list;
3317 3318
3318 i = 0; 3319 mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip);
3319 while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
3320 {
3321 if (! SYMBOLP (XCONS (val)->car))
3322 break;
3323 idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
3324 if (idx >= CODING_CATEGORY_IDX_MAX)
3325 break;
3326 priorities[i++] = (1 << idx);
3327 val = XCONS (val)->cdr;
3328 }
3329 /* If coding-category-list is valid and contains all coding
3330 categories, `i' should be CODING_CATEGORY_IDX_MAX now. If not,
3331 the following code saves Emacs from craching. */
3332 while (i < CODING_CATEGORY_IDX_MAX)
3333 priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
3334
3335 mask = detect_coding_mask (src, src_bytes, priorities, &skip);
3336 coding->heading_ascii = skip; 3320 coding->heading_ascii = skip;
3337 3321
3338 if (!mask) return; 3322 if (!mask) return;
3339 3323
3340 /* We found a single coding system of the highest priority in MASK. */ 3324 /* We found a single coding system of the highest priority in MASK. */
4985 coding_system_table[i]); 4969 coding_system_table[i]);
4986 } 4970 }
4987 return Qnil; 4971 return Qnil;
4988 } 4972 }
4989 4973
4974 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
4975 Sset_coding_priority_internal, 0, 0, 0,
4976 "Update internal database for the current value of `coding-category-list'.\n\
4977 This function is internal use only.")
4978 ()
4979 {
4980 int i = 0, idx;
4981 Lisp_Object val = Vcoding_category_list;
4982
4983 while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
4984 {
4985 if (! SYMBOLP (XCONS (val)->car))
4986 break;
4987 idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
4988 if (idx >= CODING_CATEGORY_IDX_MAX)
4989 break;
4990 coding_priorities[i++] = (1 << idx);
4991 val = XCONS (val)->cdr;
4992 }
4993 /* If coding-category-list is valid and contains all coding
4994 categories, `i' should be CODING_CATEGORY_IDX_MAX now. If not,
4995 the following code saves Emacs from craching. */
4996 while (i < CODING_CATEGORY_IDX_MAX)
4997 coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
4998
4999 return Qnil;
5000 }
5001
4990 #endif /* emacs */ 5002 #endif /* emacs */
4991 5003
4992 5004
4993 /*** 8. Post-amble ***/ 5005 /*** 8. Post-amble ***/
4994 5006
5039 setup_coding_system (Qnil, &keyboard_coding); 5051 setup_coding_system (Qnil, &keyboard_coding);
5040 setup_coding_system (Qnil, &terminal_coding); 5052 setup_coding_system (Qnil, &terminal_coding);
5041 setup_coding_system (Qnil, &safe_terminal_coding); 5053 setup_coding_system (Qnil, &safe_terminal_coding);
5042 5054
5043 bzero (coding_system_table, sizeof coding_system_table); 5055 bzero (coding_system_table, sizeof coding_system_table);
5056
5057 bzero (ascii_skip_code, sizeof ascii_skip_code);
5058 for (i = 0; i < 128; i++)
5059 ascii_skip_code[i] = 1;
5044 5060
5045 #if defined (MSDOS) || defined (WINDOWSNT) 5061 #if defined (MSDOS) || defined (WINDOWSNT)
5046 system_eol_type = CODING_EOL_CRLF; 5062 system_eol_type = CODING_EOL_CRLF;
5047 #else 5063 #else
5048 system_eol_type = CODING_EOL_LF; 5064 system_eol_type = CODING_EOL_LF;
5178 defsubr (&Sterminal_coding_system); 5194 defsubr (&Sterminal_coding_system);
5179 defsubr (&Sset_keyboard_coding_system_internal); 5195 defsubr (&Sset_keyboard_coding_system_internal);
5180 defsubr (&Skeyboard_coding_system); 5196 defsubr (&Skeyboard_coding_system);
5181 defsubr (&Sfind_operation_coding_system); 5197 defsubr (&Sfind_operation_coding_system);
5182 defsubr (&Supdate_iso_coding_systems); 5198 defsubr (&Supdate_iso_coding_systems);
5199 defsubr (&Sset_coding_priority_internal);
5183 5200
5184 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list, 5201 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
5185 "List of coding systems.\n\ 5202 "List of coding systems.\n\
5186 \n\ 5203 \n\
5187 Do not alter the value of this variable manually. This variable should be\n\ 5204 Do not alter the value of this variable manually. This variable should be\n\