comparison src/coding.c @ 101040:92b6c5b767f8

Fix Bug #876: (inhibit_null_byte_detection): New variable. (detect_coding, detect_coding_system): Don't pay attention to null bytes if inhibit_null_byte_detection is non-zero. (syms_of_coding) <inhibit-null-byte-detection>: Declare and document. <inhibit-iso-escape-detection>: Doc fix.
author Eli Zaretskii <eliz@gnu.org>
date Sat, 10 Jan 2009 10:40:45 +0000
parents e038c1a8307c
children 674e67257137
comparison
equal deleted inserted replaced
101039:246092ea8477 101040:92b6c5b767f8
377 /* Flag to inhibit code conversion of end-of-line format. */ 377 /* Flag to inhibit code conversion of end-of-line format. */
378 int inhibit_eol_conversion; 378 int inhibit_eol_conversion;
379 379
380 /* Flag to inhibit ISO2022 escape sequence detection. */ 380 /* Flag to inhibit ISO2022 escape sequence detection. */
381 int inhibit_iso_escape_detection; 381 int inhibit_iso_escape_detection;
382
383 /* Flag to inhibit detection of binary files through null bytes. */
384 int inhibit_null_byte_detection;
382 385
383 /* Flag to make buffer-file-coding-system inherit from process-coding. */ 386 /* Flag to make buffer-file-coding-system inherit from process-coding. */
384 int inherit_process_coding_system; 387 int inherit_process_coding_system;
385 388
386 /* Coding system to be used to encode text for terminal display when 389 /* Coding system to be used to encode text for terminal display when
5904 } 5907 }
5905 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE; 5908 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
5906 break; 5909 break;
5907 } 5910 }
5908 } 5911 }
5909 else if (! c) 5912 else if (! c && !inhibit_null_byte_detection)
5910 { 5913 {
5911 null_byte_found = 1; 5914 null_byte_found = 1;
5912 if (eight_bit_found) 5915 if (eight_bit_found)
5913 break; 5916 break;
5914 } 5917 }
7788 } 7791 }
7789 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE; 7792 detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
7790 break; 7793 break;
7791 } 7794 }
7792 } 7795 }
7793 else if (! c) 7796 else if (! c && !inhibit_null_byte_detection)
7794 { 7797 {
7795 null_byte_found = 1; 7798 null_byte_found = 1;
7796 if (eight_bit_found) 7799 if (eight_bit_found)
7797 break; 7800 break;
7798 } 7801 }
10293 10296
10294 10297
10295 DEFVAR_BOOL ("inhibit-iso-escape-detection", 10298 DEFVAR_BOOL ("inhibit-iso-escape-detection",
10296 &inhibit_iso_escape_detection, 10299 &inhibit_iso_escape_detection,
10297 doc: /* 10300 doc: /*
10298 If non-nil, Emacs ignores ISO2022's escape sequence on code detection. 10301 If non-nil, Emacs ignores ISO-2022 escape sequences during code detection.
10299 10302
10300 By default, on reading a file, Emacs tries to detect how the text is 10303 When Emacs reads text, it tries to detect how the text is encoded.
10301 encoded. This code detection is sensitive to escape sequences. If 10304 This code detection is sensitive to escape sequences. If Emacs sees
10302 the sequence is valid as ISO2022, the code is determined as one of 10305 a valid ISO-2022 escape sequence, it assumes the text is encoded in one
10303 the ISO2022 encodings, and the file is decoded by the corresponding 10306 of the ISO2022 encodings, and decodes text by the corresponding coding
10304 coding system (e.g. `iso-2022-7bit'). 10307 system (e.g. `iso-2022-7bit').
10305 10308
10306 However, there may be a case that you want to read escape sequences in 10309 However, there may be a case that you want to read escape sequences in
10307 a file as is. In such a case, you can set this variable to non-nil. 10310 a file as is. In such a case, you can set this variable to non-nil.
10308 Then, as the code detection ignores any escape sequences, no file is 10311 Then the code detection will ignore any escape sequences, and no text is
10309 detected as encoded in some ISO2022 encoding. The result is that all 10312 detected as encoded in some ISO-2022 encoding. The result is that all
10310 escape sequences become visible in a buffer. 10313 escape sequences become visible in a buffer.
10311 10314
10312 The default value is nil, and it is strongly recommended not to change 10315 The default value is nil, and it is strongly recommended not to change
10313 it. That is because many Emacs Lisp source files that contain 10316 it. That is because many Emacs Lisp source files that contain
10314 non-ASCII characters are encoded by the coding system `iso-2022-7bit' 10317 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10315 in Emacs's distribution, and they won't be decoded correctly on 10318 in Emacs's distribution, and they won't be decoded correctly on
10316 reading if you suppress escape sequence detection. 10319 reading if you suppress escape sequence detection.
10317 10320
10318 The other way to read escape sequences in a file without decoding is 10321 The other way to read escape sequences in a file without decoding is
10319 to explicitly specify some coding system that doesn't use ISO2022's 10322 to explicitly specify some coding system that doesn't use ISO-2022
10320 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */); 10323 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */);
10321 inhibit_iso_escape_detection = 0; 10324 inhibit_iso_escape_detection = 0;
10325
10326 DEFVAR_BOOL ("inhibit-null-byte-detection",
10327 &inhibit_null_byte_detection,
10328 doc: /* If non-nil, Emacs ignores null bytes on code detection.
10329 By default, Emacs treats it as binary data, and does not attempt to
10330 decode it. The effect is as if you specified `no-conversion' for
10331 reading that text.
10332
10333 Set this to non-nil when a regular text happens to include null bytes.
10334 Examples are Index nodes of Info files and null-byte delimited output
10335 from GNU Find and GNU Grep. Emacs will then ignore the null bytes and
10336 decode text as usual. */);
10337 inhibit_null_byte_detection = 0;
10322 10338
10323 DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input, 10339 DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
10324 doc: /* Char table for translating self-inserting characters. 10340 doc: /* Char table for translating self-inserting characters.
10325 This is applied to the result of input methods, not their input. 10341 This is applied to the result of input methods, not their input.
10326 See also `keyboard-translate-table'. */); 10342 See also `keyboard-translate-table'. */);