diff src/bidi.c @ 110412:f67e6d84ff96

Fix bug #7038 with cursor motion in paragraphs w/o strong characters. xdisp.c (Fcurrent_bidi_paragraph_direction): Call bidi_paragraph_init with NO_DEFAULT_P non-zero. bidi.c (bidi_paragraph_init): Accept an additional argument NO_DEFAULT_P; all callers changed. If NO_DEFAULT_P is non-zero, search back until a paragraph with a strong directional character is found, and use that to determine paragraph's base direction. dispextern.h (bidi_paragraph_init): Update prototype.
author Eli Zaretskii <eliz@gnu.org>
date Sat, 18 Sep 2010 13:59:53 +0200
parents 7d60b9e85349
children 446e46eafccb
line wrap: on
line diff
--- a/src/bidi.c	Sat Sep 18 10:21:30 2010 +0000
+++ b/src/bidi.c	Sat Sep 18 13:59:53 2010 +0200
@@ -583,18 +583,26 @@
   return pos_byte;
 }
 
-/* Determine the direction, a.k.a. base embedding level, of the
+/* Determine the base direction, a.k.a. base embedding level, of the
    paragraph we are about to iterate through.  If DIR is either L2R or
    R2L, just use that.  Otherwise, determine the paragraph direction
-   from the first strong character of the paragraph.
+   from the first strong directional character of the paragraph.
 
-   Note that this gives the paragraph separator the same direction as
-   the preceding paragraph, even though Emacs generally views the
-   separartor as not belonging to any paragraph.  */
+   NO_DEFAULT_P non-nil means don't default to L2R if the paragraph
+   has no strong directional characters and both DIR and
+   bidi_it->paragraph_dir are NEUTRAL_DIR.  In that case, search back
+   in the buffer until a paragraph is found with a strong character,
+   or until hitting BEGV.  In the latter case, fall back to L2R.  This
+   flag is used in current-bidi-paragraph-direction.
+
+   Note that this function gives the paragraph separator the same
+   direction as the preceding paragraph, even though Emacs generally
+   views the separartor as not belonging to any paragraph.  */
 void
-bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it)
+bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
 {
   EMACS_INT bytepos = bidi_it->bytepos;
+  EMACS_INT pstartbyte;
 
   /* Special case for an empty buffer. */
   if (bytepos == BEGV_BYTE && bytepos == ZV_BYTE)
@@ -643,49 +651,75 @@
 
       /* We are either at the beginning of a paragraph or in the
 	 middle of it.  Find where this paragraph starts.  */
-      bytepos = bidi_find_paragraph_start (pos, bytepos);
-
+      pstartbyte = bidi_find_paragraph_start (pos, bytepos);
       bidi_it->separator_limit = -1;
       bidi_it->new_paragraph = 0;
-      ch = FETCH_CHAR (bytepos);
-      ch_len = CHAR_BYTES (ch);
-      pos = BYTE_TO_CHAR (bytepos);
-      type = bidi_get_type (ch, NEUTRAL_DIR);
+
+      /* The following loop is run more than once only if NO_DEFAULT_P
+	 is non-zero.  */
+      do {
+	bytepos = pstartbyte;
+	ch = FETCH_CHAR (bytepos);
+	ch_len = CHAR_BYTES (ch);
+	pos = BYTE_TO_CHAR (bytepos);
+	type = bidi_get_type (ch, NEUTRAL_DIR);
 
-      for (pos++, bytepos += ch_len;
-	   /* NOTE: UAX#9 says to search only for L, AL, or R types of
-	      characters, and ignore RLE, RLO, LRE, and LRO.  However,
-	      I'm not sure it makes sense to omit those 4; should try
-	      with and without that to see the effect.  */
-	   (bidi_get_category (type) != STRONG)
-	     || (bidi_ignore_explicit_marks_for_paragraph_level
-		 && (type == RLE || type == RLO
-		     || type == LRE || type == LRO));
-	   type = bidi_get_type (ch, NEUTRAL_DIR))
-	{
-	  if (type == NEUTRAL_B && bidi_at_paragraph_end (pos, bytepos) >= -1)
-	    break;
-	  if (bytepos >= ZV_BYTE)
-	    {
-	      /* Pretend there's a paragraph separator at end of buffer.  */
-	      type = NEUTRAL_B;
+	for (pos++, bytepos += ch_len;
+	     /* NOTE: UAX#9 says to search only for L, AL, or R types
+		of characters, and ignore RLE, RLO, LRE, and LRO.
+		However, I'm not sure it makes sense to omit those 4;
+		should try with and without that to see the effect.  */
+	     (bidi_get_category (type) != STRONG)
+	       || (bidi_ignore_explicit_marks_for_paragraph_level
+		   && (type == RLE || type == RLO
+		       || type == LRE || type == LRO));
+	     type = bidi_get_type (ch, NEUTRAL_DIR))
+	  {
+	    if (type == NEUTRAL_B && bidi_at_paragraph_end (pos, bytepos) >= -1)
 	      break;
-	    }
-	  FETCH_CHAR_ADVANCE (ch, pos, bytepos);
-	}
-      if (type == STRONG_R || type == STRONG_AL) /* P3 */
-	bidi_it->paragraph_dir = R2L;
-      else if (type == STRONG_L)
-	bidi_it->paragraph_dir = L2R;
+	    if (bytepos >= ZV_BYTE)
+	      {
+		/* Pretend there's a paragraph separator at end of
+		   buffer.  */
+		type = NEUTRAL_B;
+		break;
+	      }
+	    FETCH_CHAR_ADVANCE (ch, pos, bytepos);
+	  }
+	if (type == STRONG_R || type == STRONG_AL) /* P3 */
+	  bidi_it->paragraph_dir = R2L;
+	else if (type == STRONG_L)
+	  bidi_it->paragraph_dir = L2R;
+	if (no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
+	  {
+	    /* If this paragraph is at BEGV, default to L2R.  */
+	    if (pstartbyte == BEGV_BYTE)
+	      bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
+	    else
+	      {
+		EMACS_INT prevpbyte = pstartbyte;
+		EMACS_INT p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
+
+		/* Find the beginning of the previous paragraph, if any.  */
+		while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
+		  {
+		    p--;
+		    pbyte = CHAR_TO_BYTE (p);
+		    prevpbyte = bidi_find_paragraph_start (p, pbyte);
+		  }
+		pstartbyte = prevpbyte;
+	      }
+	  }
+      } while (no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
     }
   else
     abort ();
 
   /* Contrary to UAX#9 clause P3, we only default the paragraph
      direction to L2R if we have no previous usable paragraph
-     direction.  */
+     direction.  This is allowed by the HL1 clause.  */
   if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
-    bidi_it->paragraph_dir = L2R; /* P3 and ``higher protocols'' */
+    bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
   if (bidi_it->paragraph_dir == R2L)
     bidi_it->level_stack[0].level = 1;
   else