emacs: src/coding.c comparison

comparison src/coding.c @ 92992:49c4ea77b83a

(CHAR_STRING_ADVANCE_NO_UNIFY) (STRING_CHAR_ADVANCE_NO_UNIFY): New macros. (coding_alloc_by_making_gap): Fix the way to preserve data in the gap. (alloc_destination): Fix the 2nd arg to coding_alloc_by_making_gap. (encode_coding_utf_8): Use CHAR_STRING_ADVANCE_NO_UNIFY instead of CHAR_STRING_ADVANCE. (produce_chars): Fix for the case that the source and the destination are the same buffer. Use CHAR_STRING_ADVANCE_NO_UNIFY instead of CHAR_STRING_ADVANCE. (consume_chars): Use STRING_CHAR_ADVANCE_NO_UNIFY instead of STRING_CHAR_ADVANCE.

author	Kenichi Handa <handa@m17n.org>
date	Sun, 16 Mar 2008 01:24:55 +0000
parents	5f5f07a5c076
children	212fa666680e

comparison

equal deleted inserted replaced

-:24a6717aed7f
+:49c4ea77b83a
 	dst_end = coding->destination + coding->dst_bytes;	\
 }								\
 } while (0)
+/* Store multibyte form of the character C in P, and advance P to the
+end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
+never calls MAYBE_UNIFY_CHAR.  */
+#define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)	\
+do {						\
+if ((c) <= MAX_1_BYTE_CHAR)			\
+*(p)++ = (c);				\
+else if ((c) <= MAX_2_BYTE_CHAR)		\
+*(p)++ = (0xC0 | ((c) >> 6)),		\
+	*(p)++ = (0x80 | ((c) & 0x3F));		\
+else if ((c) <= MAX_3_BYTE_CHAR)		\
+*(p)++ = (0xE0 | ((c) >> 12)),		\
+	*(p)++ = (0x80 | (((c) >> 6) & 0x3F)),	\
+	*(p)++ = (0x80 | ((c) & 0x3F));		\
+else if ((c) <= MAX_4_BYTE_CHAR)		\
+*(p)++ = (0xF0 | (c >> 18)),		\
+	*(p)++ = (0x80 | ((c >> 12) & 0x3F)),	\
+	*(p)++ = (0x80 | ((c >> 6) & 0x3F)),	\
+	*(p)++ = (0x80 | (c & 0x3F));		\
+else if ((c) <= MAX_5_BYTE_CHAR)		\
+*(p)++ = 0xF8,				\
+	*(p)++ = (0x80 | ((c >> 18) & 0x0F)),	\
+	*(p)++ = (0x80 | ((c >> 12) & 0x3F)),	\
+	*(p)++ = (0x80 | ((c >> 6) & 0x3F)),	\
+	*(p)++ = (0x80 | (c & 0x3F));		\
+else					\
+(p) += BYTE8_STRING ((c) - 0x3FFF80, p);	\
+} while (0)
+/* Return the character code of character whose multibyte form is at
+P, and advance P to the end of the multibyte form.  This is like
+STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
+#define STRING_CHAR_ADVANCE_NO_UNIFY(p)				\
+(!((p)[0] & 0x80)						\
+? *(p)++							\
+: ! ((p)[0] & 0x20)						\
+? ((p) += 2,							\
+((((p)[-2] & 0x1F) << 6)					\
+| ((p)[-1] & 0x3F)					\
+| ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))	\
+: ! ((p)[0] & 0x10)						\
+? ((p) += 3,							\
+((((p)[-3] & 0x0F) << 12)					\
+| (((p)[-2] & 0x3F) << 6)				\
+| ((p)[-1] & 0x3F)))					\
+: ! ((p)[0] & 0x08)						\
+? ((p) += 4,							\
+((((p)[-4] & 0xF) << 18)					\
+| (((p)[-3] & 0x3F) << 12)				\
+| (((p)[-2] & 0x3F) << 6)				\
+| ((p)[-1] & 0x3F)))					\
+: ((p) += 5,							\
+((((p)[-4] & 0x3F) << 18)					\
+| (((p)[-3] & 0x3F) << 12)				\
+| (((p)[-2] & 0x3F) << 6)				\
+| ((p)[-1] & 0x3F))))
 static void
 coding_set_source (coding)
 struct coding_system *coding;
 {
 						    coding->dst_bytes + bytes);
 coding->dst_bytes += bytes;
 }
 static void
-coding_alloc_by_making_gap (coding, offset, bytes)
+coding_alloc_by_making_gap (coding, gap_head_used, bytes)
 struct coding_system *coding;
-EMACS_INT offset, bytes;
+EMACS_INT gap_head_used, bytes;
 {
-if (BUFFERP (coding->dst_object)
+if (EQ (coding->src_object, coding->dst_object))
-&& EQ (coding->src_object, coding->dst_object))
+{
-{
+/* The gap may contain the produced data at the head and not-yet
-EMACS_INT add = offset + (coding->src_bytes - coding->consumed);
+	 consumed data at the tail.  To preserve those data, we at
+	 first make the gap size to zero, then increase the gap
-GPT += offset, GPT_BYTE += offset;
+	 size.  */
-GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
+EMACS_INT add = GAP_SIZE;
+GPT += gap_head_used, GPT_BYTE += gap_head_used;
+GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
 make_gap (bytes);
 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
-GPT -= offset, GPT_BYTE -= offset;
+GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
 }
 else
 {
 Lisp_Object this_buffer;
 unsigned char *dst;
 {
 EMACS_INT offset = dst - coding->destination;
 if (BUFFERP (coding->dst_object))
-coding_alloc_by_making_gap (coding, offset, nbytes);
+{
+struct buffer *buf = XBUFFER (coding->dst_object);
+coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
+}
 else
 coding_alloc_by_realloc (coding, nbytes);
 record_conversion_result (coding, CODING_RESULT_SUCCESS);
 coding_set_destination (coding);
 dst = coding->destination + offset;
 	      c = CHAR_TO_BYTE8 (c);
 	      EMIT_ONE_BYTE (c);
 	    }
 	  else
 	    {
-	      CHAR_STRING_ADVANCE (c, pend);
+	      CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
 	      for (p = str; p < pend; p++)
 		EMIT_ONE_BYTE (*p);
 	    }
 	}
 }
 	  ASSURE_DESTINATION (safe_room);
 	  c = *charbuf++;
 	  if (CHAR_BYTE8_P (c))
 	    *dst++ = CHAR_TO_BYTE8 (c);
 	  else
-	    dst += CHAR_STRING (c, dst);
+	    CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
 	  produced_chars++;
 	}
 }
 record_conversion_result (coding, CODING_RESULT_SUCCESS);
 coding->produced_char += produced_chars;
 {
 /* Source characters are in coding->charbuf.  */
 int *buf = coding->charbuf;
 int *buf_end = buf + coding->charbuf_used;
-if (BUFFERP (coding->src_object)
+if (EQ (coding->src_object, coding->dst_object))
-	  && EQ (coding->src_object, coding->dst_object))
+	{
-	dst_end = ((unsigned char *) coding->source) + coding->consumed;
+	  coding_set_source (coding);
+	  dst_end = ((unsigned char *) coding->source) + coding->consumed;
+	}
 while (buf < buf_end)
 	{
 	  int c = *buf, i;
 		{
 		  dst = alloc_destination (coding,
 					   buf_end - buf
 					   + MAX_MULTIBYTE_LENGTH * to_nchars,
 					   dst);
-		  dst_end = coding->destination + coding->dst_bytes;
+		  if (EQ (coding->src_object, coding->dst_object))
+		    {
+		      coding_set_source (coding);
+		      dst_end = ((unsigned char *) coding->source) + coding->consumed;
+		    }
+		  else
+		    dst_end = coding->destination + coding->dst_bytes;
 		}
 	      for (i = 0; i < to_nchars; i++)
 		{
 		  if (i > 0)
 		    c = XINT (AREF (trans, i));
 		  if (coding->dst_multibyte
 		      || ! CHAR_BYTE8_P (c))
-		    CHAR_STRING_ADVANCE (c, dst);
+		    CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
 		  else
 		    *dst++ = CHAR_TO_BYTE8 (c);
 		}
 	      produced_chars += to_nchars;
 	      *buf++ = to_nchars;
 {
 /* Source characters are at coding->source.  */
 const unsigned char *src = coding->source;
 const unsigned char *src_end = src + coding->consumed;
+if (EQ (coding->dst_object, coding->src_object))
+	dst_end = (unsigned char *) src;
 if (coding->src_multibyte != coding->dst_multibyte)
 	{
 	  if (coding->src_multibyte)
 	    {
 	      int multibytep = 1;
 						   dst);
 			  dst_end = coding->destination + coding->dst_bytes;
 			  coding_set_source (coding);
 			  src = coding->source + offset;
 			  src_end = coding->source + coding->src_bytes;
+			  if (EQ (coding->src_object, coding->dst_object))
+			    dst_end = (unsigned char *) src;
 			}
 		    }
 		  *dst++ = c;
 		  produced_chars++;
 		}
 		    if (EQ (coding->src_object, coding->dst_object))
 		      dst_end = (unsigned char *) src;
 		    if (dst >= dst_end - 1)
 		      {
 			EMACS_INT offset = src - coding->source;
+			EMACS_INT more_bytes;
-			dst = alloc_destination (coding, src_end - src + 2,
-						 dst);
+			if (EQ (coding->src_object, coding->dst_object))
+			  more_bytes = ((src_end - src) / 2) + 2;
+			else
+			  more_bytes = src_end - src + 2;
+			dst = alloc_destination (coding, more_bytes, dst);
 			dst_end = coding->destination + coding->dst_bytes;
 			coding_set_source (coding);
 			src = coding->source + offset;
 			src_end = coding->source + coding->src_bytes;
+			if (EQ (coding->src_object, coding->dst_object))
+			  dst_end = (unsigned char *) src;
 		      }
 		  }
 		EMIT_ONE_BYTE (c);
 	      }
 	}
 	  EMACS_INT bytes;
 	  if (coding->encoder == encode_coding_raw_text)
 	    c = *src++, pos++;
 	  else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
-	    c = STRING_CHAR_ADVANCE (src), pos += bytes;
+	    c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
 	  else
 	    c = BYTE8_TO_CHAR (*src), src++, pos++;
 	}
 else
-	c = STRING_CHAR_ADVANCE (src), pos++;
+	c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
 	c = '\n';
 if (! EQ (eol_type, Qunix))
 	{
 	  if (c == '\n')

Mercurial > emacs

comparison src/coding.c @ 92992:49c4ea77b83a