diff src/composite.c @ 108552:03ab6621f67d

Fix bidi-composition interaction.
author Kenichi Handa <handa@etlken>
date Fri, 14 May 2010 13:14:23 +0900
parents 973b5bc5fcfe
children dce075eda1bf
line wrap: on
line diff
--- a/src/composite.c	Mon May 10 12:37:03 2010 +0900
+++ b/src/composite.c	Fri May 14 13:14:23 2010 +0900
@@ -1018,12 +1018,29 @@
   return unbind_to (count, Qnil);
 }
 
+static Lisp_Object _work_val;
+static int _work_char;
+
+/* 1 iff the character C is composable.  */
+#define CHAR_COMPOSABLE_P(C)						\
+  ((C) == 0x200C || (C) == 0x200D					\
+   || (_work_val = CHAR_TABLE_REF (Vunicode_category_table, (C)),	\
+       (SYMBOLP (_work_val)						\
+	&& (_work_char = SDATA (SYMBOL_NAME (_work_val))[0]) != 'C'	\
+	&& _work_char != 'Z')))
 
 /* Update cmp_it->stop_pos to the next position after CHARPOS (and
    BYTEPOS) where character composition may happen.  If BYTEPOS is
-   negative, compute it.  If it is a static composition, set
-   cmp_it->ch to -1.  Otherwise, set cmp_it->ch to the character that
-   triggers a automatic composition.  */
+   negative, compute it.  ENDPOS is a limit of searching.  If it is
+   less than CHARPOS, search backward to ENDPOS+1 assuming that
+   set_iterator_to_next works in reverse order.  In this case, if a
+   composition closest to CHARPOS is found, set cmp_it->stop_pos to
+   the last character of the composition.
+
+   If no composition is found, set cmp_it->ch to -2.  If a static
+   composition is found, set cmp_it->ch to -1.  Otherwise, set
+   cmp_it->ch to the character that triggers the automatic
+   composition.  */
 
 void
 composition_compute_stop_pos (cmp_it, charpos, bytepos, endpos, string)
@@ -1036,60 +1053,200 @@
   /* This is from forward_to_next_line_start in xdisp.c.  */
   const int MAX_NEWLINE_DISTANCE = 500;
 
-  if (endpos > charpos + MAX_NEWLINE_DISTANCE)
-    endpos = charpos + MAX_NEWLINE_DISTANCE;
-  cmp_it->stop_pos = endpos;
+  if (charpos < endpos)
+    {
+      if (endpos > charpos + MAX_NEWLINE_DISTANCE)
+	endpos = charpos + MAX_NEWLINE_DISTANCE;
+    }
+  else if (endpos < charpos)
+    {
+      /* We search backward for a position to check composition.  */
+      if (endpos < 0)
+	{
+	  /* But we don't know where to stop the searching.  */
+	  endpos = NILP (string) ? BEGV - 1 : -1;
+	  /* Usually we don't reach ENDPOS because we stop searching
+	     at an uncomposable character (NL, LRE, etc).  */
+	}
+    }
   cmp_it->id = -1;
   cmp_it->ch = -2;
-  if (find_composition (charpos, endpos, &start, &end, &prop, string)
+  cmp_it->reversed_p = 0;
+  cmp_it->stop_pos = endpos;
+  if (charpos == endpos)
+    return;
+  /* FIXME: Bidi is not yet handled well in static composition.  */
+  if (charpos < endpos
+      && find_composition (charpos, endpos, &start, &end, &prop, string)
       && COMPOSITION_VALID_P (start, end, prop))
     {
       cmp_it->stop_pos = endpos = start;
       cmp_it->ch = -1;
     }
-  if (NILP (string) && PT > charpos && PT < endpos)
-    cmp_it->stop_pos = PT;
+  if (NILP (string))
+    {
+      /* A composition never strides over PT.  */
+      if (PT > charpos)
+	{
+	  if (PT < endpos)
+	    cmp_it->stop_pos = endpos = PT;
+	}
+      else if (PT < charpos && PT > endpos)
+	{
+	  cmp_it->stop_pos = endpos = PT - 1;
+	}
+    }
   if (NILP (current_buffer->enable_multibyte_characters)
       || NILP (Vauto_composition_mode))
     return;
   if (bytepos < 0)
     {
-      if (STRINGP (string))
+      if (NILP (string))
+	bytepos = CHAR_TO_BYTE (charpos);
+      else
 	bytepos = string_char_to_byte (string, charpos);
-      else
-	bytepos = CHAR_TO_BYTE (charpos);
     }
 
   start = charpos;
-  while (charpos < endpos)
+  if (charpos < endpos)
     {
-      if (STRINGP (string))
-	FETCH_STRING_CHAR_ADVANCE (c, string, charpos, bytepos);
+      /* Forward search.  */
+      while (charpos < endpos)
+	{
+	  if (STRINGP (string))
+	    FETCH_STRING_CHAR_ADVANCE (c, string, charpos, bytepos);
+	  else
+	    FETCH_CHAR_ADVANCE (c, charpos, bytepos);
+	  if (c == '\n')
+	    {
+	      cmp_it->ch = -2;
+	      break;
+	    }
+	  val = CHAR_TABLE_REF (Vcomposition_function_table, c);
+	  if (! NILP (val))
+	    {
+	      Lisp_Object elt;
+
+	      for (; CONSP (val); val = XCDR (val))
+		{
+		  elt = XCAR (val);
+		  if (VECTORP (elt) && ASIZE (elt) == 3
+		      && NATNUMP (AREF (elt, 1))
+		      && charpos - 1 - XFASTINT (AREF (elt, 1)) >= start)
+		    break;
+		}
+	      if (CONSP (val))
+		{
+		  cmp_it->lookback = XFASTINT (AREF (elt, 1));
+		  cmp_it->stop_pos = charpos - 1 - cmp_it->lookback;
+		  cmp_it->ch = c;
+		  return;
+		}
+	    }
+	}
+    }
+  else
+    {
+      /* Search backward for a pattern that may be composed and the
+	 position of (possibly) the last character of the match is
+	 closest to (but not after) START.  The reason for the last
+	 character is that set_iterator_to_next works in reverse order
+	 and, thus we must stop at the last character for composition
+	 check.  */
+      unsigned char *p;
+      int len;
+      /* limit byte position used in fast_looking_at.  This is the
+	 byte position of the next character of START. */
+      EMACS_INT limit;
+
+      if (NILP (string))
+	p = BYTE_POS_ADDR (bytepos);
       else
-	FETCH_CHAR_ADVANCE (c, charpos, bytepos);
-      if (c == '\n')
-	{
-	  cmp_it->ch = -2;
-	  break;
-	}
-      val = CHAR_TABLE_REF (Vcomposition_function_table, c);
-      if (! NILP (val))
+	p = SDATA (string) + bytepos;
+      c = STRING_CHAR_AND_LENGTH (p, len);
+      limit = bytepos + len;
+      while (CHAR_COMPOSABLE_P (c))
 	{
-	  Lisp_Object elt;
-
-	  for (; CONSP (val); val = XCDR (val))
+	  for (val = CHAR_TABLE_REF (Vcomposition_function_table, c);
+	       CONSP (val); val = XCDR (val))
 	    {
-	      elt = XCAR (val);
-	      if (VECTORP (elt) && ASIZE (elt) == 3 && NATNUMP (AREF (elt, 1))
-		  && charpos - 1 - XFASTINT (AREF (elt, 1)) >= start)
-		break;
+	      Lisp_Object elt = XCAR (val);
+	      int back, len;
+
+	      if (VECTORP (elt) && ASIZE (elt) == 3
+		  && NATNUMP (AREF (elt, 1))
+		  && charpos - (back = XFASTINT (AREF (elt, 1))) > endpos)
+		{
+		  EMACS_INT cpos = charpos - back, bpos;
+
+		  if (back == 0)
+		    bpos = bytepos;
+		  else
+		    bpos = (NILP (string) ? CHAR_TO_BYTE (cpos)
+			    : string_char_to_byte (string, cpos));
+		  if (STRINGP (AREF (elt, 0)))
+		    len = fast_looking_at (AREF (elt, 0), cpos, bpos,
+					   start + 1, limit, string);
+		  else
+		    len = 1;
+		  if (len > 0)
+		    {
+		      /* Make CPOS points the last character of match.
+			 Note that LEN is byte-length.  */
+		      bpos += len;
+		      if (NILP (string))
+			cpos = BYTE_TO_CHAR (bpos) - 1;
+		      else
+			cpos = string_byte_to_char (string, bpos) - 1;
+		      back = cpos - (charpos - back);
+		      if (cmp_it->stop_pos < cpos
+			  || (cmp_it->stop_pos == cpos
+			      && cmp_it->lookback < back))
+			{
+			  cmp_it->stop_pos = cpos;
+			  cmp_it->ch = c;
+			  cmp_it->lookback = back;
+			}
+		    }
+		}
 	    }
-	  if (CONSP (val))
+	  if (charpos - 1 == endpos)
+	    break;
+	  if (STRINGP (string))
+	    {
+	      p--, bytepos--;
+	      while (! CHAR_HEAD_P (*p))
+		p--, bytepos--;
+	      charpos--;
+	    }
+	  else
 	    {
-	      cmp_it->lookback = XFASTINT (AREF (elt, 1));
-	      cmp_it->stop_pos = charpos - 1 - cmp_it->lookback;
-	      cmp_it->ch = c;
-	      return;
+	      DEC_BOTH (charpos, bytepos);
+	      p = BYTE_POS_ADDR (bytepos);
+	    }
+	  c = STRING_CHAR (p);
+	}
+      if (cmp_it->ch >= 0)
+	/* We found a position to check.  */
+	return;
+      /* Skip all uncomposable characters.  */
+      if (NILP (string))
+	{
+	  while (charpos - 1 > endpos && ! CHAR_COMPOSABLE_P (c))
+	    {
+	      DEC_BOTH (charpos, bytepos);
+	      c = FETCH_MULTIBYTE_CHAR (bytepos);
+	    }
+	}
+      else
+	{
+	  while (charpos - 1 > endpos && ! CHAR_COMPOSABLE_P (c))
+	    {
+	      p--;
+	      while (! CHAR_HEAD_P (*p))
+		p--;
+	      charpos--;
+	      c = STRING_CHAR (p);
 	    }
 	}
     }
@@ -1104,8 +1261,8 @@
    string.  In that case, FACE must not be NULL.
 
    If the character is composed, setup members of CMP_IT (id, nglyphs,
-   and from), and return 1.  Otherwise, update CMP_IT->stop_pos, and
-   return 0.  */
+   from, to, reversed_p), and return 1.  Otherwise, update
+   CMP_IT->stop_pos, and return 0.  */
 
 int
 composition_reseat_it (cmp_it, charpos, bytepos, endpos, w, face, string)
@@ -1115,13 +1272,29 @@
      struct face *face;
      Lisp_Object string;
 {
-  if (NILP (string) && charpos < PT && PT < endpos)
-    endpos = PT;
+  if (endpos <= charpos)
+    {
+      if (NILP (string))
+	{
+	  if (endpos < 0)
+	    endpos = BEGV;
+	  if (endpos < PT && PT < charpos) 
+	    endpos = PT;
+	}
+      else if (endpos < 0)
+	endpos = 0;
+    }
+  else
+    {
+      if (NILP (string) && charpos < PT && PT < endpos)
+	endpos = PT;
+    }
 
   if (cmp_it->ch == -2)
     {
       composition_compute_stop_pos (cmp_it, charpos, bytepos, endpos, string);
-      if (cmp_it->ch == -2)
+      if (cmp_it->stop_pos != charpos)
+	/* The current position is not composed.  */
 	return 0;
     }
 
@@ -1145,18 +1318,46 @@
       int i;
 
       val = CHAR_TABLE_REF (Vcomposition_function_table, cmp_it->ch);
-      for (; CONSP (val); val = XCDR (val))
+      if (charpos < endpos)
 	{
-	  elt = XCAR (val);
-	  if (cmp_it->lookback == XFASTINT (AREF (elt, 1)))
-	    break;
+	  for (; CONSP (val); val = XCDR (val))
+	    {
+	      elt = XCAR (val);
+	      if (cmp_it->lookback == XFASTINT (AREF (elt, 1)))
+		break;
+	    }
+	  if (NILP (val))
+	    goto no_composition;
+
+	  val = autocmp_chars (val, charpos, bytepos, endpos, w, face, string);
+	  if (! composition_gstring_p (val))
+	    goto no_composition;
+	  cmp_it->reversed_p = 0;
 	}
-      if (NILP (val))
-	goto no_composition;
+      else
+	{
+	  EMACS_INT saved_charpos = charpos, saved_bytepos = bytepos;
 
-      val = autocmp_chars (val, charpos, bytepos, endpos, w, face, string);
-      if (! composition_gstring_p (val))
-	goto no_composition;
+	  if (cmp_it->lookback > 0)
+	    {
+	      charpos -= cmp_it->lookback;
+	      if (charpos < endpos)
+		goto no_composition;
+	      if (STRINGP (string))
+		bytepos = string_char_to_byte (string, charpos);
+	      else
+		bytepos = CHAR_TO_BYTE (charpos);
+	    }
+	  val = autocmp_chars (val, charpos, bytepos, saved_charpos + 1,
+			       w, face, string);
+	  if (! composition_gstring_p (val)
+	      || charpos + LGSTRING_CHAR_LEN (val) <= saved_charpos)
+	    {
+	      charpos = saved_charpos, bytepos = saved_bytepos;
+	      goto no_composition;
+	    }
+	  cmp_it->reversed_p = 1;
+	}
       if (NILP (LGSTRING_ID (val)))
 	val = composition_gstring_put_cache (val, -1);
       cmp_it->id = XINT (LGSTRING_ID (val));
@@ -1164,22 +1365,40 @@
 	if (NILP (LGSTRING_GLYPH (val, i)))
 	  break;
       cmp_it->nglyphs = i;
+      cmp_it->from = 0;
+      cmp_it->to = i;
     }
   else
     goto no_composition;
-  cmp_it->from = 0;
   return 1;
 
  no_composition:
-  charpos++;
-  if (STRINGP (string))
-    bytepos += MULTIBYTE_LENGTH_NO_CHECK (SDATA (string) + bytepos);
+  if (charpos == endpos)
+    return 0;
+  if (charpos < endpos)
+    {
+      charpos++;
+      if (STRINGP (string))
+	bytepos += MULTIBYTE_LENGTH_NO_CHECK (SDATA (string) + bytepos);
+      else
+	INC_POS (bytepos);
+    }
   else
-    INC_POS (bytepos);
+    {
+      charpos--;
+      /* BYTEPOS is calculated in composition_compute_stop_pos */
+      bytepos = -1;
+    }
   composition_compute_stop_pos (cmp_it, charpos, bytepos, endpos, string);
   return 0;
 }
 
+/* Update nchars, nbytes, and width of the current grapheme cluster
+   which is identified by CMP_IT->from (if the composition is static
+   or automatic in l2r context) or CMPT_IT->to (if the composition is
+   automatic in r2l context).  In addition, in the former case, update
+   CMP_IT->to, and in the latter case, update CMP_IT->from.  */
+
 int
 composition_update_it (cmp_it, charpos, bytepos, string)
      struct composition_it *cmp_it;
@@ -1215,7 +1434,7 @@
 	  cmp_it->nchars = LGSTRING_CHAR_LEN (gstring);
 	  cmp_it->width = 0;
 	}
-      else
+      else if (! cmp_it->reversed_p)
 	{
 	  Lisp_Object glyph = LGSTRING_GLYPH (gstring, cmp_it->from);
 	  int from = LGLYPH_FROM (glyph);
@@ -1234,6 +1453,33 @@
 		cmp_it->width += CHAR_WIDTH (LGLYPH_CHAR (glyph));
 	    }
 	}
+      else
+	{
+	  int from_idx = cmp_it->to - 1;
+	  Lisp_Object glyph = LGSTRING_GLYPH (gstring, from_idx);
+	  int from = LGLYPH_FROM (glyph);
+
+	  c = XINT (LGSTRING_CHAR (gstring, from));
+	  cmp_it->nchars = LGLYPH_TO (glyph) - from + 1;
+	  cmp_it->width = (LGLYPH_WIDTH (glyph) > 0
+			   ? CHAR_WIDTH (LGLYPH_CHAR (glyph)) : 0);
+	  for (from_idx--; from_idx >= 0; from_idx--)
+	    {
+	      glyph = LGSTRING_GLYPH (gstring, from_idx);
+	      if (LGLYPH_FROM (glyph) != from)
+		break;
+	      if (LGLYPH_WIDTH (glyph) > 0)
+		cmp_it->width += CHAR_WIDTH (LGLYPH_CHAR (glyph));
+	    }
+	  cmp_it->from = from_idx + 1;
+	  charpos -= cmp_it->nchars - 1;
+	  bytepos += CHAR_BYTES (c);
+	  if (STRINGP (string))
+	    cmp_it->nbytes = bytepos - string_char_to_byte (string, charpos);
+	  else
+	    cmp_it->nbytes = bytepos - CHAR_TO_BYTE (charpos);
+	  return c;
+	}
     }
 
   charpos += cmp_it->nchars;
@@ -1279,17 +1525,6 @@
     (POSITION).pos--;				\
   } while (0)
 
-static Lisp_Object _work_val;
-static int _work_char;
-
-/* 1 iff the character C is composable.  */
-#define CHAR_COMPOSABLE_P(C)						\
-  ((C) == 0x200C || (C) == 0x200D					\
-   || (_work_val = CHAR_TABLE_REF (Vunicode_category_table, (C)),	\
-       (SYMBOLP (_work_val)						\
-	&& (_work_char = SDATA (SYMBOL_NAME (_work_val))[0]) != 'C'	\
-	&& _work_char != 'Z')))
-
 /* This is like find_composition, but find an automatic composition
    instead.  If found, set *GSTRING to the glyph-string representing
    the composition, and return 1.  Otherwise, return 0.  */