Mercurial > emacs
comparison src/coding.c @ 101040:92b6c5b767f8
Fix Bug #876:
(inhibit_null_byte_detection): New variable.
(detect_coding, detect_coding_system): Don't pay attention to null bytes if
inhibit_null_byte_detection is non-zero.
(syms_of_coding) <inhibit-null-byte-detection>: Declare and document.
<inhibit-iso-escape-detection>: Doc fix.
| author | Eli Zaretskii <eliz@gnu.org> |
|---|---|
| date | Sat, 10 Jan 2009 10:40:45 +0000 |
| parents | e038c1a8307c |
| children | 674e67257137 |
comparison
equal
deleted
inserted
replaced
| 101039:246092ea8477 | 101040:92b6c5b767f8 |
|---|---|
| 377 /* Flag to inhibit code conversion of end-of-line format. */ | 377 /* Flag to inhibit code conversion of end-of-line format. */ |
| 378 int inhibit_eol_conversion; | 378 int inhibit_eol_conversion; |
| 379 | 379 |
| 380 /* Flag to inhibit ISO2022 escape sequence detection. */ | 380 /* Flag to inhibit ISO2022 escape sequence detection. */ |
| 381 int inhibit_iso_escape_detection; | 381 int inhibit_iso_escape_detection; |
| 382 | |
| 383 /* Flag to inhibit detection of binary files through null bytes. */ | |
| 384 int inhibit_null_byte_detection; | |
| 382 | 385 |
| 383 /* Flag to make buffer-file-coding-system inherit from process-coding. */ | 386 /* Flag to make buffer-file-coding-system inherit from process-coding. */ |
| 384 int inherit_process_coding_system; | 387 int inherit_process_coding_system; |
| 385 | 388 |
| 386 /* Coding system to be used to encode text for terminal display when | 389 /* Coding system to be used to encode text for terminal display when |
| 5904 } | 5907 } |
| 5905 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE; | 5908 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE; |
| 5906 break; | 5909 break; |
| 5907 } | 5910 } |
| 5908 } | 5911 } |
| 5909 else if (! c) | 5912 else if (! c && !inhibit_null_byte_detection) |
| 5910 { | 5913 { |
| 5911 null_byte_found = 1; | 5914 null_byte_found = 1; |
| 5912 if (eight_bit_found) | 5915 if (eight_bit_found) |
| 5913 break; | 5916 break; |
| 5914 } | 5917 } |
| 7788 } | 7791 } |
| 7789 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE; | 7792 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE; |
| 7790 break; | 7793 break; |
| 7791 } | 7794 } |
| 7792 } | 7795 } |
| 7793 else if (! c) | 7796 else if (! c && !inhibit_null_byte_detection) |
| 7794 { | 7797 { |
| 7795 null_byte_found = 1; | 7798 null_byte_found = 1; |
| 7796 if (eight_bit_found) | 7799 if (eight_bit_found) |
| 7797 break; | 7800 break; |
| 7798 } | 7801 } |
| 10293 | 10296 |
| 10294 | 10297 |
| 10295 DEFVAR_BOOL ("inhibit-iso-escape-detection", | 10298 DEFVAR_BOOL ("inhibit-iso-escape-detection", |
| 10296 &inhibit_iso_escape_detection, | 10299 &inhibit_iso_escape_detection, |
| 10297 doc: /* | 10300 doc: /* |
| 10298 If non-nil, Emacs ignores ISO2022's escape sequence on code detection. | 10301 If non-nil, Emacs ignores ISO-2022 escape sequences during code detection. |
| 10299 | 10302 |
| 10300 By default, on reading a file, Emacs tries to detect how the text is | 10303 When Emacs reads text, it tries to detect how the text is encoded. |
| 10301 encoded. This code detection is sensitive to escape sequences. If | 10304 This code detection is sensitive to escape sequences. If Emacs sees |
| 10302 the sequence is valid as ISO2022, the code is determined as one of | 10305 a valid ISO-2022 escape sequence, it assumes the text is encoded in one |
| 10303 the ISO2022 encodings, and the file is decoded by the corresponding | 10306 of the ISO2022 encodings, and decodes text by the corresponding coding |
| 10304 coding system (e.g. `iso-2022-7bit'). | 10307 system (e.g. `iso-2022-7bit'). |
| 10305 | 10308 |
| 10306 However, there may be a case that you want to read escape sequences in | 10309 However, there may be a case that you want to read escape sequences in |
| 10307 a file as is. In such a case, you can set this variable to non-nil. | 10310 a file as is. In such a case, you can set this variable to non-nil. |
| 10308 Then, as the code detection ignores any escape sequences, no file is | 10311 Then the code detection will ignore any escape sequences, and no text is |
| 10309 detected as encoded in some ISO2022 encoding. The result is that all | 10312 detected as encoded in some ISO-2022 encoding. The result is that all |
| 10310 escape sequences become visible in a buffer. | 10313 escape sequences become visible in a buffer. |
| 10311 | 10314 |
| 10312 The default value is nil, and it is strongly recommended not to change | 10315 The default value is nil, and it is strongly recommended not to change |
| 10313 it. That is because many Emacs Lisp source files that contain | 10316 it. That is because many Emacs Lisp source files that contain |
| 10314 non-ASCII characters are encoded by the coding system `iso-2022-7bit' | 10317 non-ASCII characters are encoded by the coding system `iso-2022-7bit' |
| 10315 in Emacs's distribution, and they won't be decoded correctly on | 10318 in Emacs's distribution, and they won't be decoded correctly on |
| 10316 reading if you suppress escape sequence detection. | 10319 reading if you suppress escape sequence detection. |
| 10317 | 10320 |
| 10318 The other way to read escape sequences in a file without decoding is | 10321 The other way to read escape sequences in a file without decoding is |
| 10319 to explicitly specify some coding system that doesn't use ISO2022's | 10322 to explicitly specify some coding system that doesn't use ISO-2022 |
| 10320 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */); | 10323 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */); |
| 10321 inhibit_iso_escape_detection = 0; | 10324 inhibit_iso_escape_detection = 0; |
| 10325 | |
| 10326 DEFVAR_BOOL ("inhibit-null-byte-detection", | |
| 10327 &inhibit_null_byte_detection, | |
| 10328 doc: /* If non-nil, Emacs ignores null bytes on code detection. | |
| 10329 By default, Emacs treats it as binary data, and does not attempt to | |
| 10330 decode it. The effect is as if you specified `no-conversion' for | |
| 10331 reading that text. | |
| 10332 | |
| 10333 Set this to non-nil when a regular text happens to include null bytes. | |
| 10334 Examples are Index nodes of Info files and null-byte delimited output | |
| 10335 from GNU Find and GNU Grep. Emacs will then ignore the null bytes and | |
| 10336 decode text as usual. */); | |
| 10337 inhibit_null_byte_detection = 0; | |
| 10322 | 10338 |
| 10323 DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input, | 10339 DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input, |
| 10324 doc: /* Char table for translating self-inserting characters. | 10340 doc: /* Char table for translating self-inserting characters. |
| 10325 This is applied to the result of input methods, not their input. | 10341 This is applied to the result of input methods, not their input. |
| 10326 See also `keyboard-translate-table'. */); | 10342 See also `keyboard-translate-table'. */); |
