emacs: src/coding.c comparison

comparison src/coding.c @ 23325:bbd06336cd0c

(check_composing_code): If the current composing sequence doesn't end properly, return -1. (DECODE_CHARACTER_ASCII): Update coding->composed_chars. (DECODE_CHARACTER_DIMENSION1): Likewise. (decode_coding_iso2022): Check validity of a composing sequence. (code_convert_string): If the length of text to be converted is shrunk to zero, don't perform code conversion. (shrink_decoding_region): Fix previous change.

author	Kenichi Handa <handa@m17n.org>
date	Sat, 26 Sep 1998 04:20:48 +0000
parents	86a8b8566369
children	2da87b489590

comparison

equal deleted inserted replaced

-:4c5f12c6041c
+:bbd06336cd0c
 point to an appropriate area and the variable `coding' to point to
 the coding-system of the currently decoding text in advance.  */
 /* Decode one ASCII character C.  */
-#define DECODE_CHARACTER_ASCII(c)				\
+#define DECODE_CHARACTER_ASCII(c)		\
-do {								\
+do {						\
-if (COMPOSING_P (coding->composing))			\
+if (COMPOSING_P (coding->composing))	\
-*dst++ = 0xA0, *dst++ = (c) | 0x80;			\
+{						\
-else							\
+	*dst++ = 0xA0, *dst++ = (c) | 0x80;	\
-{								\
+	coding->composed_chars++;		\
-	*dst++ = (c);						\
+}						\
-	coding->produced_char++;				\
+else					\
-}								\
+{						\
+	*dst++ = (c);				\
+	coding->produced_char++;		\
+}						\
 } while (0)
 /* Decode one DIMENSION1 character whose charset is CHARSET and whose
 position-code is C.  */
 #define DECODE_CHARACTER_DIMENSION1(charset, c)				\
 do {									\
 unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset);	\
 if (COMPOSING_P (coding->composing))				\
-*dst++ = leading_code + 0x20;					\
+{									\
+	*dst++ = leading_code + 0x20;					\
+	coding->composed_chars++;					\
+}									\
 else								\
 {									\
 	*dst++ = leading_code;						\
 	coding->produced_char++;					\
 }									\
 	    }
 	  else
 	    invalid_code_found = 1;
 	}
 }
-return (invalid_code_found
+return (invalid_code_found ? src - src_start : -1);
-	  ? src - src_start
-	  : (coding->mode & CODING_MODE_LAST_BLOCK ? 0 : -1));
 }
 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 int
 if (!NILP (Venable_character_translation) && NILP (translation_table))
 translation_table = Vstandard_translation_table_for_decode;
 coding->produced_char = 0;
+coding->composed_chars = 0;
 coding->fake_multibyte = 0;
 while (src < src_end && (dst_bytes
 			   ? (dst < adjusted_dst_end)
 			   : (dst < src - 6)))
 {
 		if (result1 == 0)
 		  {
 		    coding->composing = (c1 == '0'
 					 ? COMPOSING_NO_RULE_HEAD
 					 : COMPOSING_WITH_RULE_HEAD);
-		    coding->produced_char++;
+		    coding->composed_chars = 0;
 		  }
 		else if (result1 > 0)
 		  {
 		    if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst)
 		      {
 			bcopy (src_base, dst, result1 + 2);
 			src += result1;
 			dst += result1 + 2;
 			coding->produced_char += result1 + 2;
+			coding->fake_multibyte = 1;
 		      }
 		    else
 		      {
 			result = CODING_FINISH_INSUFFICIENT_DST;
 			goto label_end_of_loop_2;
 		  goto label_end_of_loop;
 	      }
 	      break;
 	    case '1':		/* end composing */
+	      if (coding->composed_chars > 0)
+		{
+		  if (coding->composed_chars == 1)
+		    {
+		      unsigned char *this_char_start = dst;
+		      int this_bytes;
+		      /* Only one character is in the composing
+			 sequence.  Make it a normal character.  */
+		      while (*--this_char_start != LEADING_CODE_COMPOSITION);
+		      dst = (this_char_start
+			     + (coding->composing == COMPOSING_NO_RULE_TAIL
+				? 1 : 2));
+		      *dst -= 0x20;
+		      if (*dst == 0x80)
+			*++dst &= 0x7F;
+		      this_bytes = BYTES_BY_CHAR_HEAD (*dst);
+		      while (this_bytes--) *this_char_start++ = *dst++;
+		      dst = this_char_start;
+		    }
+		  coding->produced_char++;
+		}
 	      coding->composing = COMPOSING_NO;
 	      break;
 	    case '[':		/* specification of direction */
 	      if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
 	    endp++;
 	  break;
 	case CODING_CATEGORY_IDX_ISO_7:
 	case CODING_CATEGORY_IDX_ISO_7_TIGHT:
-	  /* We can skip all charactes at the tail except for ESC and
+	  {
-the following 2-byte at the tail.  */
+	    /* We can skip all charactes at the tail except for 8-bit
-	  if (eol_conversion)
+	       codes and ESC and the following 2-byte at the tail.  */
-	    while (begp < endp
+	    unsigned char *eight_bit = NULL;
-		   && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
-	      endp--;
+	    if (eol_conversion)
-	  else
+	      while (begp < endp
-	    while (begp < endp
+		     && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
-		   && (c = endp[-1]) != ISO_CODE_ESC)
+		{
-	      endp--;
+		  if (!eight_bit && c & 0x80) eight_bit = endp;
-	  /* Do not consider LF as ascii if preceded by CR, since that
+		  endp--;
-confuses eol decoding. */
+		}
-	  if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
+	    else
-	    endp++;
+	      while (begp < endp
-	  if (begp < endp && endp[-1] == ISO_CODE_ESC)
+		     && (c = endp[-1]) != ISO_CODE_ESC)
-	    {
+		{
-	      if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
+		  if (!eight_bit && c & 0x80) eight_bit = endp;
-		/* This is an ASCII designation sequence.  We can
+		  endp--;
-surely skip the tail.  */
+		}
-		endp += 2;
+	    /* Do not consider LF as ascii if preceded by CR, since that
-	      else
+	       confuses eol decoding. */
-		/* Hmmm, we can't skip the tail.  */
+	    if (begp < endp && endp < endp_orig
-		endp = endp_orig;
+		&& endp[-1] == '\r' && endp[0] == '\n')
-	    }
+	      endp++;
+	    if (begp < endp && endp[-1] == ISO_CODE_ESC)
+	      {
+		if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
+		  /* This is an ASCII designation sequence.  We can
+		     surely skip the tail.  But, if we have
+		     encountered an 8-bit code, skip only the codes
+		     after that.  */
+		  endp = eight_bit ? eight_bit : endp + 2;
+		else
+		  /* Hmmm, we can't skip the tail.  */
+		  endp = endp_orig;
+	      }
+	    else if (eight_bit)
+	      endp = eight_bit;
+	  }
 	}
 }
 *beg += begp - begp_orig;
 *end += endp - endp_orig;
 return;
 if (encodep)
 	shrink_encoding_region (&from, &to_byte, coding, XSTRING (str)->data);
 else
 	shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data);
 }
-if (from == to_byte
+if (from == to_byte)
-&& ! (coding->mode & CODING_MODE_LAST_BLOCK
-	    && CODING_REQUIRE_FLUSHING (coding)))
 return (nocopy ? str : Fcopy_sequence (str));
 if (encodep)
 len = encoding_buffer_size (coding, to_byte - from);
 else

Mercurial > emacs

comparison src/coding.c @ 23325:bbd06336cd0c