emacs: src/coding.c comparison

comparison src/coding.c @ 89686:9bfefb13fe83

(Qinsufficient_source, Qinconsistent_eol) (Qinvalid_source, Qinterrupted, Qinsufficient_memory): New variables. (Vlast_code_conversion_error): New variables. (syms_of_coding): DEFSYM or DEFVAR_LISP them. (ONE_MORE_BYTE): Record error if any instead of signaling an error. If non-ASCII multibyte char is found, return the negative value of the code. All callers changed to check it. (ONE_MORE_BYTE_NO_CHECK): Likewise. (record_conversion_result): New function. All codes setting coding->result are changed to call this function. (detect_coding_utf_8): Don't use the local variable incomplete. (decode_coding_utf_8): Likewise. (emacs_mule_char): Change the second arg to `const'. (detect_coding_emacs_mule): Don't use the local variable incomplete. (detect_coding_sjis): Likewise. (detect_coding_big5): Likewise. (decode_coding): Fix of flushing out unprocessed data. (make_conversion_work_buffer): Fix making of a work buffer. (decode_coding_object): Return coding->dst_object;

author	Kenichi Handa <handa@m17n.org>
date	Mon, 29 Dec 2003 07:52:49 +0000
parents	cf1ff36f92dc
children	d8fcefca5cf6

comparison

equal deleted inserted replaced

-:8970a5ea5efc
+:9bfefb13fe83
 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 Lisp_Object Qstart_process, Qopen_network_stream;
 Lisp_Object Qtarget_idx;
+Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
+Lisp_Object Qinterrupted, Qinsufficient_memory;
 int coding_system_require_warning;
 Lisp_Object Vselect_safe_coding_system_function;
 /* Mnemonic string for each format of end-of-line.  */
 Lisp_Object Vcoding_system_for_read;
 /* Coding-system for writing files and sending data to process.  */
 Lisp_Object Vcoding_system_for_write;
 /* Coding-system actually used in the latest I/O.  */
 Lisp_Object Vlast_coding_system_used;
+/* Set to non-nil when an error is detected while code conversion.  */
+Lisp_Object Vlast_code_conversion_error;
 /* A vector of length 256 which contains information about special
 Latin codes (especially for dealing with Microsoft codes).  */
 Lisp_Object Vlatin_extra_code_table;
 /* Flag to inhibit code conversion of end-of-line format.  */
 /* Two special coding systems.  */
 Lisp_Object Vsjis_coding_system;
 Lisp_Object Vbig5_coding_system;
+static void record_conversion_result (struct coding_system *coding,
+				      enum coding_result_code result);
 static int detect_coding_utf_8 P_ ((struct coding_system *,
 				    struct coding_detection_info *info));
 static void decode_coding_utf_8 P_ ((struct coding_system *));
 static int encode_coding_utf_8 P_ ((struct coding_system *));
 } while (0)
 /* Safely get one byte from the source text pointed by SRC which ends
 at SRC_END, and set C to that byte.  If there are not enough bytes
-in the source, it jumps to `no_more_source'.  The caller
+in the source, it jumps to `no_more_source'.  If multibytep is
-should declare and set these variables appropriately in advance:
+nonzero, and a multibyte character is found at SRC, set C to the
-	src, src_end, multibytep
+negative value of the character code.  The caller should declare
-*/
+and set these variables appropriately in advance:
+	src, src_end, multibytep */
-#define ONE_MORE_BYTE(c)					\
-do {								\
+#define ONE_MORE_BYTE(c)				\
-if (src == src_end)						\
+do {							\
-{								\
+if (src == src_end)					\
-	if (src_base < src)					\
+{							\
-	  coding->result = CODING_RESULT_INSUFFICIENT_SRC;	\
+	if (src_base < src)				\
-	goto no_more_source;					\
+	  record_conversion_result			\
-}								\
+	    (coding, CODING_RESULT_INSUFFICIENT_SRC);	\
-c = *src++;							\
+	goto no_more_source;				\
-if (multibytep && (c & 0x80))				\
+}							\
-{								\
+c = *src++;						\
-	if ((c & 0xFE) != 0xC0)					\
+if (multibytep && (c & 0x80))			\
-	  error ("Undecodable char found");			\
+{							\
-	c = ((c & 1) << 6) | *src++;				\
+	if ((c & 0xFE) == 0xC0)				\
-}								\
+	  c = ((c & 1) << 6) | *src++;			\
-consumed_chars++;						\
+	else						\
+	  {						\
+	    c = - string_char (--src, &src, NULL);	\
+	    record_conversion_result			\
+	      (coding, CODING_RESULT_INVALID_SRC);	\
+	  }						\
+}							\
+consumed_chars++;					\
 } while (0)
-#define ONE_MORE_BYTE_NO_CHECK(c)		\
+#define ONE_MORE_BYTE_NO_CHECK(c)			\
-do {						\
+do {							\
-c = *src++;					\
+c = *src++;						\
-if (multibytep && (c & 0x80))		\
+if (multibytep && (c & 0x80))			\
-{						\
+{							\
-	if ((c & 0xFE) != 0xC0)			\
+	if ((c & 0xFE) == 0xC0)				\
-	  error ("Undecodable char found");	\
+	  c = ((c & 1) << 6) | *src++;			\
-	c = ((c & 1) << 6) | *src++;		\
+	else						\
-}						\
+	  {						\
-consumed_chars++;				\
+	    c = - string_char (--src, &src, NULL);	\
+	    record_conversion_result			\
+	      (coding, CODING_RESULT_INVALID_SRC);	\
+	  }						\
+}							\
+consumed_chars++;					\
 } while (0)
 /* Store a byte C in the place pointed by DST and increment DST to the
 next free point, and increment PRODUCED_CHARS.  The caller should
 EMIT_TWO_BYTES (c1, c2);			\
 EMIT_TWO_BYTES (c3, c4);			\
 } while (0)
+static void
+record_conversion_result (struct coding_system *coding,
+			  enum coding_result_code result)
+{
+coding->result = result;
+switch (result)
+{
+case CODING_RESULT_INSUFFICIENT_SRC:
+Vlast_code_conversion_error = Qinsufficient_source;
+break;
+case CODING_RESULT_INCONSISTENT_EOL:
+Vlast_code_conversion_error = Qinconsistent_eol;
+break;
+case CODING_RESULT_INVALID_SRC:
+Vlast_code_conversion_error = Qinvalid_source;
+break;
+case CODING_RESULT_INTERRUPT:
+Vlast_code_conversion_error = Qinterrupted;
+break;
+case CODING_RESULT_INSUFFICIENT_MEM:
+Vlast_code_conversion_error = Qinsufficient_memory;
+break;
+}
+}
 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 do {									     \
 charset_map_loaded = 0;						     \
 c = DECODE_CHAR (charset, code);					     \
 if (charset_map_loaded)						     \
 if (BUFFERP (coding->dst_object))
 coding_alloc_by_making_gap (coding, nbytes);
 else
 coding_alloc_by_realloc (coding, nbytes);
-coding->result = CODING_RESULT_SUCCESS;
+record_conversion_result (coding, CODING_RESULT_SUCCESS);
 coding_set_destination (coding);
 dst = coding->destination + offset;
 return dst;
 }
 static int
 detect_coding_utf_8 (coding, detect_info)
 struct coding_system *coding;
 struct coding_detection_info *detect_info;
 {
-const unsigned char *src = coding->source, *src_base = src;
+const unsigned char *src = coding->source, *src_base;
 const unsigned char *src_end = coding->source + coding->src_bytes;
 int multibytep = coding->src_multibyte;
 int consumed_chars = 0;
 int found = 0;
-int incomplete;
 detect_info->checked |= CATEGORY_MASK_UTF_8;
 /* A coding system of this category is always ASCII compatible.  */
 src += coding->head_ascii;
 while (1)
 {
 int c, c1, c2, c3, c4;
-incomplete = 0;
+src_base = src;
 ONE_MORE_BYTE (c);
-if (UTF_8_1_OCTET_P (c))
+if (c < 0 || UTF_8_1_OCTET_P (c))
 	continue;
-incomplete = 1;
 ONE_MORE_BYTE (c1);
-if (! UTF_8_EXTRA_OCTET_P (c1))
+if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
 	break;
 if (UTF_8_2_OCTET_LEADING_P (c))
 	{
 	  found = CATEGORY_MASK_UTF_8;
 	  continue;
 	}
 ONE_MORE_BYTE (c2);
-if (! UTF_8_EXTRA_OCTET_P (c2))
+if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
 	break;
 if (UTF_8_3_OCTET_LEADING_P (c))
 	{
 	  found = CATEGORY_MASK_UTF_8;
 	  continue;
 	}
 ONE_MORE_BYTE (c3);
-if (! UTF_8_EXTRA_OCTET_P (c3))
+if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
 	break;
 if (UTF_8_4_OCTET_LEADING_P (c))
 	{
 	  found = CATEGORY_MASK_UTF_8;
 	  continue;
 	}
 ONE_MORE_BYTE (c4);
-if (! UTF_8_EXTRA_OCTET_P (c4))
+if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
 	break;
 if (UTF_8_5_OCTET_LEADING_P (c))
 	{
 	  found = CATEGORY_MASK_UTF_8;
 	  continue;
 }
 detect_info->rejected |= CATEGORY_MASK_UTF_8;
 return 0;
 no_more_source:
-if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
+if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
 {
 detect_info->rejected |= CATEGORY_MASK_UTF_8;
 return 0;
 }
 detect_info->found |= found;
 if (charbuf >= charbuf_end)
 	break;
 ONE_MORE_BYTE (c1);
-if (UTF_8_1_OCTET_P(c1))
+if (c1 < 0)
+	{
+	  c = - c1;
+	}
+else if (UTF_8_1_OCTET_P(c1))
 	{
 	  c = c1;
 	}
 else
 	{
 	  ONE_MORE_BYTE (c2);
-	  if (! UTF_8_EXTRA_OCTET_P (c2))
+	  if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
 	    goto invalid_code;
 	  if (UTF_8_2_OCTET_LEADING_P (c1))
 	    {
 	      c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
 	      /* Reject overlong sequences here and below.  Encoders
 		goto invalid_code;
 	    }
 	  else
 	    {
 	      ONE_MORE_BYTE (c3);
-	      if (! UTF_8_EXTRA_OCTET_P (c3))
+	      if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
 		goto invalid_code;
 	      if (UTF_8_3_OCTET_LEADING_P (c1))
 		{
 		  c = (((c1 & 0xF) << 12)
 		       | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
 		    goto invalid_code;
 		}
 	      else
 		{
 		  ONE_MORE_BYTE (c4);
-		  if (! UTF_8_EXTRA_OCTET_P (c4))
+		  if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
 		    goto invalid_code;
 		  if (UTF_8_4_OCTET_LEADING_P (c1))
 		    {
 		    c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
 			 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
 		      goto invalid_code;
 		    }
 		  else
 		    {
 		      ONE_MORE_BYTE (c5);
-		      if (! UTF_8_EXTRA_OCTET_P (c5))
+		      if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
 			goto invalid_code;
 		      if (UTF_8_5_OCTET_LEADING_P (c1))
 			{
 			  c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
 			       | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
 	  c = *charbuf++;
 	  dst += CHAR_STRING (c, dst);
 	  produced_chars++;
 	}
 }
-coding->result = CODING_RESULT_SUCCESS;
+record_conversion_result (coding, CODING_RESULT_SUCCESS);
 coding->produced_char += produced_chars;
 coding->produced = dst - coding->destination;
 return 0;
 }
 			     | CATEGORY_MASK_UTF_16_AUTO);
 detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
 				| CATEGORY_MASK_UTF_16_BE_NOSIG
 				| CATEGORY_MASK_UTF_16_LE_NOSIG);
 }
-else
+else if (c1 >= 0 && c2 >= 0)
 {
 unsigned char b1[256], b2[256];
 int b1_variants = 1, b2_variants = 1;
 int n;
 bzero (b1, 256), bzero (b2, 256);
 b1[c1]++, b2[c2]++;
 for (n = 0; n < 256 && src < src_end; n++)
 	{
+	  src_base = src;
 	  ONE_MORE_BYTE (c1);
 	  ONE_MORE_BYTE (c2);
+	  if (c1 < 0 || c2 < 0)
+	    break;
 	  if (! b1[c1++]) b1_variants++;
 	  if (! b2[c2++]) b2_variants++;
 	}
 if (b1_variants < b2_variants)
 	detect_info->found |= CATEGORY_MASK_UTF_16_BE_NOSIG;
 if (charbuf + 2 >= charbuf_end)
 	break;
 ONE_MORE_BYTE (c1);
+if (c1 < 0)
+	{
+	  *charbuf++ = -c1;
+	  continue;
+	}
 ONE_MORE_BYTE (c2);
+if (c2 < 0)
+	{
+	  *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
+	  *charbuf++ = -c2;
+	  continue;
+	}
 c = (endian == utf_16_big_endian
 	   ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
 if (surrogate)
 	{
 	  if (! UTF_16_LOW_SURROGATE_P (c))
 	    EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
 	  else
 	    EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
 	}
 }
-coding->result = CODING_RESULT_SUCCESS;
+record_conversion_result (coding, CODING_RESULT_SUCCESS);
 coding->produced = dst - coding->destination;
 coding->produced_char += produced_chars;
 return 0;
 }
 char emacs_mule_bytes[256];
 int
 emacs_mule_char (coding, src, nbytes, nchars, id)
 struct coding_system *coding;
-unsigned char *src;
+const unsigned char *src;
 int *nbytes, *nchars, *id;
 {
 const unsigned char *src_end = coding->source + coding->src_bytes;
 const unsigned char *src_base = src;
 int multibytep = coding->src_multibyte;
 unsigned code;
 int c;
 int consumed_chars = 0;
 ONE_MORE_BYTE (c);
-switch (emacs_mule_bytes[c])
+if (c < 0)
 {
-case 2:
+c = -c;
-if (! (charset = emacs_mule_charset[c]))
+charset = emacs_mule_charset[0];
-	goto invalid_code;
+}
-ONE_MORE_BYTE (c);
+else
-code = c & 0x7F;
+{
-break;
+switch (emacs_mule_bytes[c])
+	{
-case 3:
+	case 2:
-if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
-	  || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
-	{
-	  ONE_MORE_BYTE (c);
 	  if (! (charset = emacs_mule_charset[c]))
 	    goto invalid_code;
 	  ONE_MORE_BYTE (c);
+	  if (c < 0)
+	    goto invalid_code;
 	  code = c & 0x7F;
-	}
+	  break;
-else
-	{
+	case 3:
-	  if (! (charset = emacs_mule_charset[c]))
+	  if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
+	      || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
+	    {
+	      ONE_MORE_BYTE (c);
+	      if (c < 0 || ! (charset = emacs_mule_charset[c]))
+		goto invalid_code;
+	      ONE_MORE_BYTE (c);
+	      if (c < 0)
+		goto invalid_code;
+	      code = c & 0x7F;
+	    }
+	  else
+	    {
+	      if (! (charset = emacs_mule_charset[c]))
+		goto invalid_code;
+	      ONE_MORE_BYTE (c);
+	      if (c < 0)
+		goto invalid_code;
+	      code = (c & 0x7F) << 8;
+	      ONE_MORE_BYTE (c);
+	      if (c < 0)
+		goto invalid_code;
+	      code |= c & 0x7F;
+	    }
+	  break;
+	case 4:
+	  ONE_MORE_BYTE (c);
+	  if (c < 0 || ! (charset = emacs_mule_charset[c]))
 	    goto invalid_code;
 	  ONE_MORE_BYTE (c);
+	  if (c < 0)
+	    goto invalid_code;
 	  code = (c & 0x7F) << 8;
 	  ONE_MORE_BYTE (c);
+	  if (c < 0)
+	    goto invalid_code;
 	  code |= c & 0x7F;
-	}
+	  break;
-break;
+	case 1:
-case 4:
+	  code = c;
-ONE_MORE_BYTE (c);
+	  charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
-if (! (charset = emacs_mule_charset[c]))
+				     ? charset_ascii : charset_eight_bit);
+	  break;
+	default:
+	  abort ();
+	}
+c = DECODE_CHAR (charset, code);
+if (c < 0)
 	goto invalid_code;
-ONE_MORE_BYTE (c);
+}
-code = (c & 0x7F) << 8;
-ONE_MORE_BYTE (c);
-code |= c & 0x7F;
-break;
-case 1:
-code = c;
-charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
-				 ? charset_ascii : charset_eight_bit);
-break;
-default:
-abort ();
-}
-c = DECODE_CHAR (charset, code);
-if (c < 0)
-goto invalid_code;
 *nbytes = src - src_base;
 *nchars = consumed_chars;
 if (id)
 *id = charset->id;
 return c;
 static int
 detect_coding_emacs_mule (coding, detect_info)
 struct coding_system *coding;
 struct coding_detection_info *detect_info;
 {
-const unsigned char *src = coding->source, *src_base = src;
+const unsigned char *src = coding->source, *src_base;
 const unsigned char *src_end = coding->source + coding->src_bytes;
 int multibytep = coding->src_multibyte;
 int consumed_chars = 0;
 int c;
 int found = 0;
-int incomplete;
 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
 /* A coding system of this category is always ASCII compatible.  */
 src += coding->head_ascii;
 while (1)
 {
-incomplete = 0;
+src_base = src;
 ONE_MORE_BYTE (c);
-incomplete = 1;
+if (c < 0)
+	continue;
 if (c == 0x80)
 	{
 	  /* Perhaps the start of composite character.  We simple skip
 	     it because analyzing it is too heavy for detecting.  But,
 	     at least, we check that the composite character
 }
 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
 return 0;
 no_more_source:
-if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
+if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
 {
 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
 return 0;
 }
 detect_info->found |= found;
 int from, to;							\
 int consumed_chars_limit;						\
 int nbytes, nchars;							\
 									\
 ONE_MORE_BYTE (c);							\
+if (c < 0)								\
+goto invalid_code;						\
 nbytes = c - 0xA0;							\
 if (nbytes < 3)							\
 goto invalid_code;						\
 ONE_MORE_BYTE (c);							\
+if (c < 0)								\
+goto invalid_code;						\
 nchars = c - 0xA0;							\
 from = coding->produced + char_offset;				\
 to = from + nchars;							\
 ADD_COMPOSITION_DATA (charbuf, from, to, method);			\
 consumed_chars_limit = consumed_chars_base + nbytes;		\
 if (charbuf >= charbuf_end)
 	break;
 ONE_MORE_BYTE (c);
+if (c < 0)
-if (c < 0x80)
+	{
+	  *charbuf++ = -c;
+	  char_offset++;
+	}
+else if (c < 0x80)
 	{
 	  *charbuf++ = c;
 	  char_offset++;
 	}
 else if (c == 0x80)
 	{
 	  ONE_MORE_BYTE (c);
+	  if (c < 0)
+	    goto invalid_code;
 	  if (c - 0xF2 >= COMPOSITION_RELATIVE
 	      && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
 	    DECODE_EMACS_MULE_21_COMPOSITION (c);
 	  else if (c < 0xC0)
 	    DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
 	      EMIT_ONE_BYTE (code >> 8);
 	      EMIT_ONE_BYTE (code & 0xFF);
 	    }
 	}
 }
-coding->result = CODING_RESULT_SUCCESS;
+record_conversion_result (coding, CODING_RESULT_SUCCESS);
 coding->produced_char += produced_chars;
 coding->produced = dst - coding->destination;
 return 0;
 }
 /* A coding system of this category is always ASCII compatible.  */
 src += coding->head_ascii;
 while (rejected != CATEGORY_MASK_ISO)
 {
+src_base = src;
 ONE_MORE_BYTE (c);
 switch (c)
 	{
 	case ISO_CODE_ESC:
 	  if (inhibit_iso_escape_detection)
 	      & CODING_ISO_FLAG_SINGLE_SHIFT)
 	    found |= CATEGORY_MASK_ISO_8_2;
 	  goto check_extra_latin;
 	default:
+	  if (c < 0)
+	    continue;
 	  if (c < 0x80)
 	    {
 	      single_shifting = 0;
 	      break;
 	    }
 if (charbuf >= charbuf_end)
 	break;
 ONE_MORE_BYTE (c1);
+if (c1 < 0)
+	goto invalid_code;
 /* We produce at most one character.  */
 switch (iso_code_class [c1])
 	{
 	case ISO_0x20_or_0x7F:
 invalid_code:
 MAYBE_FINISH_COMPOSITION ();
 src = src_base;
 consumed_chars = consumed_chars_base;
 ONE_MORE_BYTE (c);
-*charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+*charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
 char_offset++;
 coding->errors++;
 continue;
 break_loop:
 && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
 {
 ASSURE_DESTINATION (safe_room);
 ENCODE_RESET_PLANE_AND_REGISTER ();
 }
-coding->result = CODING_RESULT_SUCCESS;
+record_conversion_result (coding, CODING_RESULT_SUCCESS);
 CODING_ISO_BOL (coding) = bol_designation;
 coding->produced_char += produced_chars;
 coding->produced = dst - coding->destination;
 return 0;
 }
 static int
 detect_coding_sjis (coding, detect_info)
 struct coding_system *coding;
 struct coding_detection_info *detect_info;
 {
-const unsigned char *src = coding->source, *src_base = src;
+const unsigned char *src = coding->source, *src_base;
 const unsigned char *src_end = coding->source + coding->src_bytes;
 int multibytep = coding->src_multibyte;
 int consumed_chars = 0;
 int found = 0;
 int c;
-int incomplete;
 detect_info->checked |= CATEGORY_MASK_SJIS;
 /* A coding system of this category is always ASCII compatible.  */
 src += coding->head_ascii;
 while (1)
 {
-incomplete = 0;
+src_base = src;
 ONE_MORE_BYTE (c);
-incomplete = 1;
 if (c < 0x80)
 	continue;
 if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
 	{
 	  ONE_MORE_BYTE (c);
 }
 detect_info->rejected |= CATEGORY_MASK_SJIS;
 return 0;
 no_more_source:
-if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
+if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
 {
 detect_info->rejected |= CATEGORY_MASK_SJIS;
 return 0;
 }
 detect_info->found |= found;
 static int
 detect_coding_big5 (coding, detect_info)
 struct coding_system *coding;
 struct coding_detection_info *detect_info;
 {
-const unsigned char *src = coding->source, *src_base = src;
+const unsigned char *src = coding->source, *src_base;
 const unsigned char *src_end = coding->source + coding->src_bytes;
 int multibytep = coding->src_multibyte;
 int consumed_chars = 0;
 int found = 0;
 int c;
-int incomplete;
 detect_info->checked |= CATEGORY_MASK_BIG5;
 /* A coding system of this category is always ASCII compatible.  */
 src += coding->head_ascii;
 while (1)
 {
-incomplete = 0;
+src_base = src;
 ONE_MORE_BYTE (c);
-incomplete = 1;
 if (c < 0x80)
 	continue;
 if (c >= 0xA1)
 	{
 	  ONE_MORE_BYTE (c);
 }
 detect_info->rejected |= CATEGORY_MASK_BIG5;
 return 0;
 no_more_source:
-if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
+if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
 {
 detect_info->rejected |= CATEGORY_MASK_BIG5;
 return 0;
 }
 detect_info->found |= found;
 if (charbuf >= charbuf_end)
 	break;
 ONE_MORE_BYTE (c);
+if (c < 0)
+	goto invalid_code;
 if (c < 0x80)
 	charset = charset_roman;
 else
 	{
 	  if (c >= 0xF0)
 invalid_code:
 src = src_base;
 consumed_chars = consumed_chars_base;
 ONE_MORE_BYTE (c);
-*charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+*charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
 char_offset++;
 coding->errors++;
 }
 no_more_source:
 if (charbuf >= charbuf_end)
 	break;
 ONE_MORE_BYTE (c);
+if (c < 0)
+	goto invalid_code;
 if (c < 0x80)
 	charset = charset_roman;
 else
 	{
 	  /* BIG5 -> Big5 */
 invalid_code:
 src = src_base;
 consumed_chars = consumed_chars_base;
 ONE_MORE_BYTE (c);
-*charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+*charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
 char_offset++;
 coding->errors++;
 }
 no_more_source:
 	    EMIT_ONE_BYTE (code | 0x80);
 	  else
 	    EMIT_ONE_ASCII_BYTE (code & 0x7F);
 	}
 }
-coding->result = CODING_RESULT_SUCCESS;
+record_conversion_result (coding, CODING_RESULT_SUCCESS);
 coding->produced_char += produced_chars;
 coding->produced = dst - coding->destination;
 return 0;
 }
 	    }
 	  else
 	    EMIT_ONE_ASCII_BYTE (code & 0x7F);
 	}
 }
-coding->result = CODING_RESULT_SUCCESS;
+record_conversion_result (coding, CODING_RESULT_SUCCESS);
 coding->produced_char += produced_chars;
 coding->produced = dst - coding->destination;
 return 0;
 }
 static int
 detect_coding_ccl (coding, detect_info)
 struct coding_system *coding;
 struct coding_detection_info *detect_info;
 {
-const unsigned char *src = coding->source, *src_base = src;
+const unsigned char *src = coding->source, *src_base;
 const unsigned char *src_end = coding->source + coding->src_bytes;
 int multibytep = coding->src_multibyte;
 int consumed_chars = 0;
 int found = 0;
 unsigned char *valids = CODING_CCL_VALIDS (coding);
 src += head_ascii;
 while (1)
 {
 int c;
+src_base = src;
 ONE_MORE_BYTE (c);
-if (! valids[c])
+if (c < 0 || ! valids[c])
 	break;
 if ((valids[c] > 1))
 	found = CATEGORY_MASK_CCL;
 }
 detect_info->rejected |= CATEGORY_MASK_CCL;
 }
 switch (ccl.status)
 {
 case CCL_STAT_SUSPEND_BY_SRC:
-coding->result = CODING_RESULT_INSUFFICIENT_SRC;
+record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
 break;
 case CCL_STAT_SUSPEND_BY_DST:
 break;
 case CCL_STAT_QUIT:
 case CCL_STAT_INVALID_CMD:
-coding->result = CODING_RESULT_INTERRUPT;
+record_conversion_result (coding, CODING_RESULT_INTERRUPT);
 break;
 default:
-coding->result = CODING_RESULT_SUCCESS;
+record_conversion_result (coding, CODING_RESULT_SUCCESS);
 break;
 }
 coding->consumed_char += consumed_chars;
 coding->consumed = src - coding->source;
 coding->charbuf_used = charbuf - coding->charbuf;
 }
 switch (ccl.status)
 {
 case CCL_STAT_SUSPEND_BY_SRC:
-coding->result = CODING_RESULT_INSUFFICIENT_SRC;
+record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
 break;
 case CCL_STAT_SUSPEND_BY_DST:
-coding->result = CODING_RESULT_INSUFFICIENT_DST;
+record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
 break;
 case CCL_STAT_QUIT:
 case CCL_STAT_INVALID_CMD:
-coding->result = CODING_RESULT_INTERRUPT;
+record_conversion_result (coding, CODING_RESULT_INTERRUPT);
 break;
 default:
-coding->result = CODING_RESULT_SUCCESS;
+record_conversion_result (coding, CODING_RESULT_SUCCESS);
 break;
 }
 coding->produced_char += produced_chars;
 coding->produced = dst - coding->destination;
 struct coding_system *coding;
 {
 coding->chars_at_source = 1;
 coding->consumed_char = 0;
 coding->consumed = 0;
-coding->result = CODING_RESULT_SUCCESS;
+record_conversion_result (coding, CODING_RESULT_SUCCESS);
 }
 static int
 encode_coding_raw_text (coding)
 struct coding_system *coding;
 	  while (charbuf < charbuf_end && dst < dst_end)
 	    *dst++ = *charbuf++;
 	  produced_chars = dst - (coding->destination + coding->dst_bytes);
 	}
 }
-coding->result = CODING_RESULT_SUCCESS;
+record_conversion_result (coding, CODING_RESULT_SUCCESS);
 coding->produced_char += produced_chars;
 coding->produced = dst - coding->destination;
 return 0;
 }
 static int
 detect_coding_charset (coding, detect_info)
 struct coding_system *coding;
 struct coding_detection_info *detect_info;
 {
-const unsigned char *src = coding->source, *src_base = src;
+const unsigned char *src = coding->source, *src_base;
 const unsigned char *src_end = coding->source + coding->src_bytes;
 int multibytep = coding->src_multibyte;
 int consumed_chars = 0;
 Lisp_Object attrs, valids;
 int found = 0;
 while (1)
 {
 int c;
+src_base = src;
 ONE_MORE_BYTE (c);
+if (c < 0)
+	continue;
 if (NILP (AREF (valids, c)))
 	break;
 if (c >= 0x80)
 	found = CATEGORY_MASK_CHARSET;
 }
 if (charbuf >= charbuf_end)
 	break;
 ONE_MORE_BYTE (c);
+if (c < 0)
+	goto invalid_code;
 code = c;
 val = AREF (valids, c);
 if (NILP (val))
 	goto invalid_code;
 invalid_code:
 src = src_base;
 consumed_chars = consumed_chars_base;
 ONE_MORE_BYTE (c);
-*charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
+*charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
 char_offset++;
 coding->errors++;
 }
 no_more_source:
 	      EMIT_ONE_BYTE (c);
 	    }
 	}
 }
-coding->result = CODING_RESULT_SUCCESS;
+record_conversion_result (coding, CODING_RESULT_SUCCESS);
 coding->produced_char += produced_chars;
 coding->produced = dst - coding->destination;
 return 0;
 }
 		    {
 		      if (EQ (eol_type, Qdos))
 			{
 			  if (src == src_end)
 			    {
-			      coding->result = CODING_RESULT_INSUFFICIENT_SRC;
+			      record_conversion_result
+				(coding, CODING_RESULT_INSUFFICIENT_SRC);
 			      goto no_more_source;
 			    }
 			  if (*src == '\n')
 			    c = *src++;
 			}
 	  break;							\
 	size >>= 1;							\
 }									\
 if (! coding->charbuf)						\
 {									\
-	coding->result = CODING_RESULT_INSUFFICIENT_MEM;		\
+	record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
 	return coding->result;						\
 }									\
 coding->charbuf_size = size;					\
 } while (0)
 }
 coding->consumed = coding->consumed_char = 0;
 coding->produced = coding->produced_char = 0;
 coding->chars_at_source = 0;
-coding->result = CODING_RESULT_SUCCESS;
+record_conversion_result (coding, CODING_RESULT_SUCCESS);
 coding->errors = 0;
 ALLOC_CONVERSION_WORK_AREA (coding);
 attrs = CODING_ID_ATTRS (coding->id);
 if (coding->mode & CODING_MODE_LAST_BLOCK)
 	{
 	  /* Flush out unprocessed data as binary chars.  We are sure
 	     that the number of data is less than the size of
 	     coding->charbuf.  */
+	  coding->charbuf_used = 0;
 	  while (nbytes-- > 0)
 	    {
 	      int c = *src++;
 	      coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
 	= ! NILP (current_buffer->enable_multibyte_characters);
 }
 coding->consumed = coding->consumed_char = 0;
 coding->produced = coding->produced_char = 0;
-coding->result = CODING_RESULT_SUCCESS;
+record_conversion_result (coding, CODING_RESULT_SUCCESS);
 coding->errors = 0;
 ALLOC_CONVERSION_WORK_AREA (coding);
 do {
 {
 Lisp_Object name, workbuf;
 struct buffer *current;
 if (reused_workbuf_in_use++)
-name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
+{
+name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
+workbuf = Fget_buffer_create (name);
+}
 else
-name = Vcode_conversion_workbuf_name;
+{
-workbuf = Fget_buffer_create (name);
+name = Vcode_conversion_workbuf_name;
+workbuf = Fget_buffer_create (name);
+if (NILP (Vcode_conversion_reused_workbuf))
+	Vcode_conversion_reused_workbuf = workbuf;
+}
 current = current_buffer;
 set_buffer_internal (XBUFFER (workbuf));
 Ferase_buffer ();
 current_buffer->undo_list = Qt;
 current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
 	{
 	  destination
 	    = (unsigned char *) xrealloc (destination, coding->produced);
 	  if (! destination)
 	    {
-	      coding->result = CODING_RESULT_INSUFFICIENT_DST;
+	      record_conversion_result (coding,
+					CODING_RESULT_INSUFFICIENT_DST);
 	      unbind_to (count, Qnil);
 	      return;
 	    }
 	  if (BEGV < GPT && GPT < BEGV + coding->produced_char)
 	    move_gap_both (BEGV, BEGV_BYTE);
 else
 	TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
 			  saved_pt_byte + (coding->produced - bytes));
 }
-unbind_to (count, Qnil);
+unbind_to (count, coding->dst_object);
 }
 void
 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
 decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
 			  dst_object);
 if (! norecord)
 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
-if (coding.result != CODING_RESULT_SUCCESS)
-error ("Code conversion error: %d", coding.result);
 return (BUFFERP (dst_object)
 	  ? make_number (coding.produced_char)
 	  : coding.dst_object);
 }
 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
 else
 decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
 if (! norecord)
 Vlast_coding_system_used = CODING_ID_NAME (coding.id);
-if (coding.result != CODING_RESULT_SUCCESS)
-error ("Code conversion error: %d", coding.result);
 return (BUFFERP (dst_object)
 	  ? make_number (coding.produced_char)
 	  : coding.dst_object);
 }
 ASET (Vcoding_category_table, coding_category_raw_text,
 	intern ("coding-category-raw-text"));
 ASET (Vcoding_category_table, coding_category_undecided,
 	intern ("coding-category-undecided"));
+DEFSYM (Qinsufficient_source, "insufficient-source");
+DEFSYM (Qinconsistent_eol, "inconsistent-eol");
+DEFSYM (Qinvalid_source, "invalid-source");
+DEFSYM (Qinterrupted, "interrupted");
+DEFSYM (Qinsufficient_memory, "insufficient-memory");
 defsubr (&Scoding_system_p);
 defsubr (&Sread_coding_system);
 defsubr (&Sread_non_nil_coding_system);
 defsubr (&Scheck_coding_system);
 defsubr (&Sdetect_coding_region);
 DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
 	       doc: /*
 Coding system used in the latest file or process I/O.  */);
 Vlast_coding_system_used = Qnil;
+DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
+	       doc: /*
+Error status of the last code conversion.
+When an error was detected in the last code conversion, this variable
+is set to one of the following symbols.
+`insufficient-source'
+`inconsistent-eol'
+`invalid-source'
+`interrupted'
+`insufficient-memory'
+When no error was detected, the value doesn't change.  So, to check
+the error status of a code conversion by this variable, you must
+explicitly set this variable to nil before performing code
+conversion.  */);
+Vlast_code_conversion_error = Qnil;
 DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
 	       doc: /*
 *Non-nil means always inhibit code conversion of end-of-line format.
 See info node `Coding Systems' and info node `Text and Binary' concerning
 such conversion.  */);

Mercurial > emacs

comparison src/coding.c @ 89686:9bfefb13fe83