Mercurial > emacs
comparison src/coding.c @ 22226:557fac086b1b
(ascii_skip_code): New variable.
(detect_coding_mask): Skip ASCII codes at the head according to
ascii_skip_code.
(coding_priorities): New variable.
(Fset_coding_priority_internal): New function.
(detect_coding): Call set_coding_mask with coding_priorities as
arg CODING.
(init_coding_once): Initialize ascii_skip_code.
(syms_of_coding): Declare set-coding-priority-internal as a Lisp
function.
| author | Kenichi Handa <handa@m17n.org> |
|---|---|
| date | Mon, 25 May 1998 08:08:07 +0000 |
| parents | fc4aaf1b1772 |
| children | 1deb7d79af00 |
comparison
equal
deleted
inserted
replaced
| 22225:6f56af1aab96 | 22226:557fac086b1b |
|---|---|
| 361 "coding-category-big5", | 361 "coding-category-big5", |
| 362 "coding-category-raw-text", | 362 "coding-category-raw-text", |
| 363 "coding-category-binary" | 363 "coding-category-binary" |
| 364 }; | 364 }; |
| 365 | 365 |
| 366 /* Table pointers to coding systems corresponding to each coding | 366 /* Table of pointers to coding systems corresponding to each coding |
| 367 categories. */ | 367 categories. */ |
| 368 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX]; | 368 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX]; |
| 369 | |
| 370 /* Table of coding category masks. Nth element is a mask for a coding | |
| 371 cateogry of which priority is Nth. */ | |
| 372 static | |
| 373 int coding_priorities[CODING_CATEGORY_IDX_MAX]; | |
| 369 | 374 |
| 370 /* Flag to tell if we look up translation table on character code | 375 /* Flag to tell if we look up translation table on character code |
| 371 conversion. */ | 376 conversion. */ |
| 372 Lisp_Object Venable_character_translation; | 377 Lisp_Object Venable_character_translation; |
| 373 /* Standard translation table to look up on decoding (reading). */ | 378 /* Standard translation table to look up on decoding (reading). */ |
| 3165 highest priority. Priorities of categories are also specified by a | 3170 highest priority. Priorities of categories are also specified by a |
| 3166 user in a Lisp variable `coding-category-list'. | 3171 user in a Lisp variable `coding-category-list'. |
| 3167 | 3172 |
| 3168 */ | 3173 */ |
| 3169 | 3174 |
| 3175 static | |
| 3176 int ascii_skip_code[256]; | |
| 3177 | |
| 3170 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded. | 3178 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded. |
| 3171 If it detects possible coding systems, return an integer in which | 3179 If it detects possible coding systems, return an integer in which |
| 3172 appropriate flag bits are set. Flag bits are defined by macros | 3180 appropriate flag bits are set. Flag bits are defined by macros |
| 3173 CODING_CATEGORY_MASK_XXX in `coding.h'. | 3181 CODING_CATEGORY_MASK_XXX in `coding.h'. |
| 3174 | 3182 |
| 3179 unsigned char *source; | 3187 unsigned char *source; |
| 3180 int src_bytes, *priorities, *skip; | 3188 int src_bytes, *priorities, *skip; |
| 3181 { | 3189 { |
| 3182 register unsigned char c; | 3190 register unsigned char c; |
| 3183 unsigned char *src = source, *src_end = source + src_bytes; | 3191 unsigned char *src = source, *src_end = source + src_bytes; |
| 3184 unsigned int mask = (CODING_CATEGORY_MASK_ISO_7BIT | 3192 unsigned int mask; |
| 3185 | CODING_CATEGORY_MASK_ISO_SHIFT); | |
| 3186 int i; | 3193 int i; |
| 3187 | 3194 |
| 3188 /* At first, skip all ASCII characters and control characters except | 3195 /* At first, skip all ASCII characters and control characters except |
| 3189 for three ISO2022 specific control characters. */ | 3196 for three ISO2022 specific control characters. */ |
| 3197 ascii_skip_code[ISO_CODE_SO] = 0; | |
| 3198 ascii_skip_code[ISO_CODE_SI] = 0; | |
| 3199 ascii_skip_code[ISO_CODE_ESC] = 0; | |
| 3200 | |
| 3190 label_loop_detect_coding: | 3201 label_loop_detect_coding: |
| 3191 while (src < src_end) | 3202 while (src < src_end && ascii_skip_code[*src]) src++; |
| 3192 { | |
| 3193 c = *src; | |
| 3194 if (c >= 0x80 | |
| 3195 || ((mask & CODING_CATEGORY_MASK_ISO_7BIT) | |
| 3196 && c == ISO_CODE_ESC) | |
| 3197 || ((mask & CODING_CATEGORY_MASK_ISO_SHIFT) | |
| 3198 && (c == ISO_CODE_SI || c == ISO_CODE_SO))) | |
| 3199 break; | |
| 3200 src++; | |
| 3201 } | |
| 3202 *skip = src - source; | 3203 *skip = src - source; |
| 3203 | 3204 |
| 3204 if (src >= src_end) | 3205 if (src >= src_end) |
| 3205 /* We found nothing other than ASCII. There's nothing to do. */ | 3206 /* We found nothing other than ASCII. There's nothing to do. */ |
| 3206 return 0; | 3207 return 0; |
| 3214 mask = detect_coding_iso2022 (src, src_end); | 3215 mask = detect_coding_iso2022 (src, src_end); |
| 3215 if (mask == 0) | 3216 if (mask == 0) |
| 3216 { | 3217 { |
| 3217 /* No valid ISO2022 code follows C. Try again. */ | 3218 /* No valid ISO2022 code follows C. Try again. */ |
| 3218 src++; | 3219 src++; |
| 3219 mask = (c != ISO_CODE_ESC | 3220 if (c == ISO_CODE_ESC) |
| 3220 ? CODING_CATEGORY_MASK_ISO_7BIT | 3221 ascii_skip_code[ISO_CODE_ESC] = 1; |
| 3221 : CODING_CATEGORY_MASK_ISO_SHIFT); | 3222 else |
| 3223 ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1; | |
| 3222 goto label_loop_detect_coding; | 3224 goto label_loop_detect_coding; |
| 3223 } | 3225 } |
| 3224 if (priorities) | 3226 if (priorities) |
| 3225 goto label_return_highest_only; | 3227 goto label_return_highest_only; |
| 3226 } | 3228 } |
| 3310 unsigned char *src; | 3312 unsigned char *src; |
| 3311 int src_bytes; | 3313 int src_bytes; |
| 3312 { | 3314 { |
| 3313 unsigned int idx; | 3315 unsigned int idx; |
| 3314 int skip, mask, i; | 3316 int skip, mask, i; |
| 3315 int priorities[CODING_CATEGORY_IDX_MAX]; | |
| 3316 Lisp_Object val = Vcoding_category_list; | 3317 Lisp_Object val = Vcoding_category_list; |
| 3317 | 3318 |
| 3318 i = 0; | 3319 mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip); |
| 3319 while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX) | |
| 3320 { | |
| 3321 if (! SYMBOLP (XCONS (val)->car)) | |
| 3322 break; | |
| 3323 idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index)); | |
| 3324 if (idx >= CODING_CATEGORY_IDX_MAX) | |
| 3325 break; | |
| 3326 priorities[i++] = (1 << idx); | |
| 3327 val = XCONS (val)->cdr; | |
| 3328 } | |
| 3329 /* If coding-category-list is valid and contains all coding | |
| 3330 categories, `i' should be CODING_CATEGORY_IDX_MAX now. If not, | |
| 3331 the following code saves Emacs from craching. */ | |
| 3332 while (i < CODING_CATEGORY_IDX_MAX) | |
| 3333 priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT; | |
| 3334 | |
| 3335 mask = detect_coding_mask (src, src_bytes, priorities, &skip); | |
| 3336 coding->heading_ascii = skip; | 3320 coding->heading_ascii = skip; |
| 3337 | 3321 |
| 3338 if (!mask) return; | 3322 if (!mask) return; |
| 3339 | 3323 |
| 3340 /* We found a single coding system of the highest priority in MASK. */ | 3324 /* We found a single coding system of the highest priority in MASK. */ |
| 4985 coding_system_table[i]); | 4969 coding_system_table[i]); |
| 4986 } | 4970 } |
| 4987 return Qnil; | 4971 return Qnil; |
| 4988 } | 4972 } |
| 4989 | 4973 |
| 4974 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal, | |
| 4975 Sset_coding_priority_internal, 0, 0, 0, | |
| 4976 "Update internal database for the current value of `coding-category-list'.\n\ | |
| 4977 This function is internal use only.") | |
| 4978 () | |
| 4979 { | |
| 4980 int i = 0, idx; | |
| 4981 Lisp_Object val = Vcoding_category_list; | |
| 4982 | |
| 4983 while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX) | |
| 4984 { | |
| 4985 if (! SYMBOLP (XCONS (val)->car)) | |
| 4986 break; | |
| 4987 idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index)); | |
| 4988 if (idx >= CODING_CATEGORY_IDX_MAX) | |
| 4989 break; | |
| 4990 coding_priorities[i++] = (1 << idx); | |
| 4991 val = XCONS (val)->cdr; | |
| 4992 } | |
| 4993 /* If coding-category-list is valid and contains all coding | |
| 4994 categories, `i' should be CODING_CATEGORY_IDX_MAX now. If not, | |
| 4995 the following code saves Emacs from craching. */ | |
| 4996 while (i < CODING_CATEGORY_IDX_MAX) | |
| 4997 coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT; | |
| 4998 | |
| 4999 return Qnil; | |
| 5000 } | |
| 5001 | |
| 4990 #endif /* emacs */ | 5002 #endif /* emacs */ |
| 4991 | 5003 |
| 4992 | 5004 |
| 4993 /*** 8. Post-amble ***/ | 5005 /*** 8. Post-amble ***/ |
| 4994 | 5006 |
| 5039 setup_coding_system (Qnil, &keyboard_coding); | 5051 setup_coding_system (Qnil, &keyboard_coding); |
| 5040 setup_coding_system (Qnil, &terminal_coding); | 5052 setup_coding_system (Qnil, &terminal_coding); |
| 5041 setup_coding_system (Qnil, &safe_terminal_coding); | 5053 setup_coding_system (Qnil, &safe_terminal_coding); |
| 5042 | 5054 |
| 5043 bzero (coding_system_table, sizeof coding_system_table); | 5055 bzero (coding_system_table, sizeof coding_system_table); |
| 5056 | |
| 5057 bzero (ascii_skip_code, sizeof ascii_skip_code); | |
| 5058 for (i = 0; i < 128; i++) | |
| 5059 ascii_skip_code[i] = 1; | |
| 5044 | 5060 |
| 5045 #if defined (MSDOS) || defined (WINDOWSNT) | 5061 #if defined (MSDOS) || defined (WINDOWSNT) |
| 5046 system_eol_type = CODING_EOL_CRLF; | 5062 system_eol_type = CODING_EOL_CRLF; |
| 5047 #else | 5063 #else |
| 5048 system_eol_type = CODING_EOL_LF; | 5064 system_eol_type = CODING_EOL_LF; |
| 5178 defsubr (&Sterminal_coding_system); | 5194 defsubr (&Sterminal_coding_system); |
| 5179 defsubr (&Sset_keyboard_coding_system_internal); | 5195 defsubr (&Sset_keyboard_coding_system_internal); |
| 5180 defsubr (&Skeyboard_coding_system); | 5196 defsubr (&Skeyboard_coding_system); |
| 5181 defsubr (&Sfind_operation_coding_system); | 5197 defsubr (&Sfind_operation_coding_system); |
| 5182 defsubr (&Supdate_iso_coding_systems); | 5198 defsubr (&Supdate_iso_coding_systems); |
| 5199 defsubr (&Sset_coding_priority_internal); | |
| 5183 | 5200 |
| 5184 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list, | 5201 DEFVAR_LISP ("coding-system-list", &Vcoding_system_list, |
| 5185 "List of coding systems.\n\ | 5202 "List of coding systems.\n\ |
| 5186 \n\ | 5203 \n\ |
| 5187 Do not alter the value of this variable manually. This variable should be\n\ | 5204 Do not alter the value of this variable manually. This variable should be\n\ |
