diff src/syntax.c @ 89943:4c90ffeb71c5

Revision: miles@gnu.org--gnu-2004/emacs--unicode--0--patch-15 Merge from emacs--cvs-trunk--0 Patches applied: * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-218 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-220 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-221 Restore deleted tagline in etc/TUTORIAL.ru * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-222 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-228 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-229 Remove TeX output files from the archive * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-230 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-247 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-248 src/lisp.h (CYCLE_CHECK): Macro moved from xfaces.c * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-249 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-256 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-258 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-263 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-264 Update from CVS: lispref/display.texi: emacs -> Emacs. * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-265 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-274 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-275 Update from CVS: man/makefile.w32-in: Revert last change * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-276 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-295 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-296 Allow restarting an existing debugger session that's exited * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-297 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-299 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-300 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-327 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-328 Update from CVS: src/.gdbinit (xsymbol): Fix last change. * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-329 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-344 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-345 Tweak source regexps so that building in place won't cause problems * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-346 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-351 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-352 Update from CVS: lisp/flymake.el: New file. * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-353 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-361 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-362 Support " [...]" style defaults in minibuffer-electric-default-mode * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-363 (read-number): Use canonical format for default in prompt. * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-364 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-367 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-368 Improve display-supports-face-attributes-p on non-ttys * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-369 Rewrite face-differs-from-default-p * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-370 Move `display-supports-face-attributes-p' entirely into C code * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-371 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-372 Simplify face-differs-from-default-p; don't consider :stipple. * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-373 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-374 (tty_supports_face_attributes_p): Ensure attributes differ from default * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-375 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-376 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-377 (Fdisplay_supports_face_attributes_p): Work around bootstrapping problem * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-378 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-380 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-381 Face merging cleanups * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-382 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-384 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-385 src/xfaces.c (push_named_merge_point): Return 0 if a cycle is detected * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-386 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-395 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-396 Tweak arch tagging to make build/install-in-place less annoying * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-397 Work around vc-arch problems when building eshell * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-398 Tweak permissions * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-399 Tweak directory permissions * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-400 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-401 More build-in-place tweaking of arch tagging * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-402 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-403 Yet more build-in-place tweaking of arch tagging * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-404 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-409 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-410 Make sure image types are initialized for lookup too * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-411 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-416 Update from CVS
author Miles Bader <miles@gnu.org>
date Mon, 28 Jun 2004 07:56:49 +0000
parents 68c22ea6027c 6725db0f57d5
children d8411455de48
line wrap: on
line diff
--- a/src/syntax.c	Sat May 29 02:17:09 2004 +0000
+++ b/src/syntax.c	Mon Jun 28 07:56:49 2004 +0000
@@ -26,6 +26,7 @@
 #include "buffer.h"
 #include "character.h"
 #include "keymap.h"
+#include "regex.h"
 
 /* Make syntax table lookup grant data in gl_state.  */
 #define SYNTAX_ENTRY_VIA_PROPERTY
@@ -97,12 +98,13 @@
 static int find_defun_start P_ ((int, int));
 static int back_comment P_ ((int, int, int, int, int, int *, int *));
 static int char_quoted P_ ((int, int));
-static Lisp_Object skip_chars P_ ((int, Lisp_Object, Lisp_Object));
+static Lisp_Object skip_chars P_ ((int, Lisp_Object, Lisp_Object, int));
 static Lisp_Object skip_syntaxes P_ ((int, Lisp_Object, Lisp_Object));
 static Lisp_Object scan_lists P_ ((int, int, int, int));
 static void scan_sexps_forward P_ ((struct lisp_parse_state *,
 				    int, int, int, int,
 				    int, Lisp_Object, int));
+static int in_classes P_ ((int, Lisp_Object));
 
 
 struct gl_state_s gl_state;		/* Global state of syntax parser.  */
@@ -293,8 +295,11 @@
 
   while (bytepos >= beg)
     {
+      int c;
+
       UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
-      code = SYNTAX (FETCH_CHAR_AS_MULTIBYTE (bytepos));
+      c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
+      code = SYNTAX (c);
       if (! (code == Scharquote || code == Sescape))
 	break;
 
@@ -381,12 +386,16 @@
   gl_state.use_global = 0;
   while (PT > BEGV)
     {
+      int c;
+
       /* Open-paren at start of line means we may have found our
 	 defun-start.  */
-      if (SYNTAX (FETCH_CHAR_AS_MULTIBYTE (PT_BYTE)) == Sopen)
+      c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
+      if (SYNTAX (c) == Sopen)
 	{
 	  SETUP_SYNTAX_TABLE (PT + 1, -1);	/* Try again... */
-	  if (SYNTAX (FETCH_CHAR_AS_MULTIBYTE (PT_BYTE)) == Sopen)
+	  c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
+	  if (SYNTAX (c) == Sopen)
 	    break;
 	  /* Now fallback to the default value.  */
 	  gl_state.current_syntax_table = current_buffer->syntax_table;
@@ -955,7 +964,7 @@
 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
   "cSet syntax for character: \nsSet syntax for %s to: ",
        doc: /* Set syntax for character CHAR according to string NEWENTRY.
-The syntax is changed only for table SYNTAX_TABLE, which defaults to
+The syntax is changed only for table SYNTAX-TABLE, which defaults to
  the current buffer's syntax table.
 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
 in the range MIN and MAX are changed.
@@ -1339,13 +1348,13 @@
  (but not as the end of a range; quoting is never needed there).
 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
 With arg "^a-zA-Z", skips nonletters stopping before first letter.
-Returns the distance traveled, either zero or positive.
-Note that char classes, e.g. `[:alpha:]', are not currently supported;
-they will be treated as literals.  */)
+Char classes, e.g. `[:alpha:]', are supported.
+
+Returns the distance traveled, either zero or positive.  */)
      (string, lim)
      Lisp_Object string, lim;
 {
-  return skip_chars (1, string, lim);
+  return skip_chars (1, string, lim, 1);
 }
 
 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
@@ -1355,7 +1364,7 @@
      (string, lim)
      Lisp_Object string, lim;
 {
-  return skip_chars (0, string, lim);
+  return skip_chars (0, string, lim, 1);
 }
 
 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
@@ -1383,9 +1392,10 @@
 }
 
 static Lisp_Object
-skip_chars (forwardp, string, lim)
+skip_chars (forwardp, string, lim, handle_iso_classes)
      int forwardp;
      Lisp_Object string, lim;
+     int handle_iso_classes;
 {
   register unsigned int c;
   unsigned char fastmap[0400];
@@ -1403,8 +1413,10 @@
   int size_byte;
   const unsigned char *str;
   int len;
+  Lisp_Object iso_classes;
 
   CHECK_STRING (string);
+  iso_classes = Qnil;
 
   if (NILP (lim))
     XSETINT (lim, forwardp ? ZV : BEGV);
@@ -1448,6 +1460,42 @@
 	{
 	  c = str[i_byte++];
 
+	  if (handle_iso_classes && c == '['
+	      && i_byte < size_byte
+	      && str[i_byte] == ':')
+	    {
+	      const unsigned char *class_beg = str + i_byte + 1;
+	      const unsigned char *class_end = class_beg;
+	      const unsigned char *class_limit = str + size_byte - 2;
+	      /* Leave room for the null.	 */
+	      unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
+	      re_wctype_t cc;
+
+	      if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
+		class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
+
+	      while (class_end < class_limit
+		     && *class_end >= 'a' && *class_end <= 'z')
+		class_end++;
+
+	      if (class_end == class_beg
+		  || *class_end != ':' || class_end[1] != ']')
+		goto not_a_class_name;
+
+	      bcopy (class_beg, class_name, class_end - class_beg);
+	      class_name[class_end - class_beg] = 0;
+
+	      cc = re_wctype (class_name);
+	      if (cc == 0)
+		error ("Invalid ISO C character class");
+
+	      iso_classes = Fcons (make_number (cc), iso_classes);
+
+	      i_byte = class_end + 2 - str;
+	      continue;
+	    }
+
+	not_a_class_name:
 	  if (c == '\\')
 	    {
 	      if (i_byte == size_byte)
@@ -1534,6 +1582,42 @@
 	  c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len);
 	  i_byte += len;
 
+	  if (handle_iso_classes && c == '['
+	      && i_byte < size_byte
+	      && STRING_CHAR (str + i_byte, size_byte - i_byte) == ':')
+	    {
+	      const unsigned char *class_beg = str + i_byte + 1;
+	      const unsigned char *class_end = class_beg;
+	      const unsigned char *class_limit = str + size_byte - 2;
+	      /* Leave room for the null.	 */
+	      unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
+	      re_wctype_t cc;
+
+	      if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
+		class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
+
+	      while (class_end < class_limit
+		     && *class_end >= 'a' && *class_end <= 'z')
+		class_end++;
+
+	      if (class_end == class_beg
+		  || *class_end != ':' || class_end[1] != ']')
+		goto not_a_class_name_multibyte;
+
+	      bcopy (class_beg, class_name, class_end - class_beg);
+	      class_name[class_end - class_beg] = 0;
+
+	      cc = re_wctype (class_name);
+	      if (cc == 0)
+		error ("Invalid ISO C character class");
+
+	      iso_classes = Fcons (make_number (cc), iso_classes);
+
+	      i_byte = class_end + 2 - str;
+	      continue;
+	    }
+
+	not_a_class_name_multibyte:
 	  if (c == '\\')
 	    {
 	      if (i_byte == size_byte)
@@ -1643,13 +1727,13 @@
 
     if (forwardp)
       {
- 	endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
- 	stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
+	endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
+	stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
       }
     else
       {
- 	endp = CHAR_POS_ADDR (XINT (lim));
- 	stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
+	endp = CHAR_POS_ADDR (XINT (lim));
+	stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
       }
 
     immediate_quit = 1;
@@ -1667,9 +1751,17 @@
 		  p = GAP_END_ADDR;
 		  stop = endp;
 		}
+	      c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
+	      if (! NILP (iso_classes) && in_classes (c, iso_classes))
+		{
+		  if (negate)
+		    break;
+		  else
+		    goto fwd_ok;
+		}
+
 	      if (! fastmap[*p])
 		break;
-	      c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
 	      if (! ASCII_CHAR_P (c))
 		{
 		  /* As we are looking at a multibyte character, we
@@ -1686,6 +1778,7 @@
 		  if (!(negate ^ (i < n_char_ranges)))
 		    break;
 		}
+	    fwd_ok:
 	      p += nbytes, pos++, pos_byte += nbytes;
 	    }
 	else
@@ -1698,8 +1791,18 @@
 		  p = GAP_END_ADDR;
 		  stop = endp;
 		}
+
+	      if (!NILP (iso_classes) && in_classes (*p, iso_classes))
+		{
+		  if (negate)
+		    break;
+		  else
+		    goto fwd_unibyte_ok;
+		}
+
 	      if (!fastmap[*p])
 		break;
+	    fwd_unibyte_ok:
 	      p++, pos++, pos_byte++;
 	    }
       }
@@ -1719,9 +1822,18 @@
 		}
 	      prev_p = p;
 	      while (--p >= stop && ! CHAR_HEAD_P (*p));
+	      c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
+
+	      if (! NILP (iso_classes) && in_classes (c, iso_classes))
+		{
+		  if (negate)
+		    break;
+		  else
+		    goto back_ok;
+		}
+
 	      if (! fastmap[*p])
 		break;
-	      c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
 	      if (! ASCII_CHAR_P (c))
 		{
 		  /* See the comment in the previous similar code.  */
@@ -1731,6 +1843,7 @@
 		  if (!(negate ^ (i < n_char_ranges)))
 		    break;
 		}
+	    back_ok:
 	      pos--, pos_byte -= prev_p - p;
 	    }
 	else
@@ -1743,8 +1856,18 @@
 		  p = GPT_ADDR;
 		  stop = endp;
 		}
+
+	      if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
+		{
+		  if (negate)
+		    break;
+		  else
+		    goto back_unibyte_ok;
+		}
+
 	      if (!fastmap[p[-1]])
 		break;
+	    back_unibyte_ok:
 	      p--, pos--, pos_byte--;
 	    }
       }
@@ -1927,6 +2050,30 @@
     return make_number (PT - start_point);
   }
 }
+
+/* Return 1 if character C belongs to one of the ISO classes
+   in the list ISO_CLASSES.  Each class is represented by an
+   integer which is its type according to re_wctype.  */
+
+static int
+in_classes (c, iso_classes)
+     int c;
+     Lisp_Object iso_classes;
+{
+  int fits_class = 0;
+
+  while (! NILP (iso_classes))
+    {
+      Lisp_Object elt;
+      elt = XCAR (iso_classes);
+      iso_classes = XCDR (iso_classes);
+
+      if (re_iswctype (c, XFASTINT (elt)))
+	fits_class = 1;
+    }
+
+  return fits_class;
+}
 
 /* Jump over a comment, assuming we are at the beginning of one.
    FROM is the current position.
@@ -2310,7 +2457,8 @@
 	  INC_BOTH (from, from_byte);
 	  UPDATE_SYNTAX_TABLE_FORWARD (from);
 	  if (from < stop && comstart_first
-	      && SYNTAX_COMSTART_SECOND (FETCH_CHAR_AS_MULTIBYTE (from_byte))
+	      && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
+		  SYNTAX_COMSTART_SECOND (c))
 	      && parse_sexp_ignore_comments)
 	    {
 	      /* we have encountered a comment start sequence and we
@@ -2636,7 +2784,7 @@
 	   Fcons (build_string ("Unbalanced parentheses"),
 		  Fcons (make_number (last_good),
 			 Fcons (make_number (from), Qnil))));
-
+  abort ();
   /* NOTREACHED */
 }
 
@@ -2776,8 +2924,8 @@
 #define INC_FROM				\
 do { prev_from = from;				\
      prev_from_byte = from_byte; 		\
-     prev_from_syntax				\
-       = SYNTAX_WITH_FLAGS (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte)); \
+     temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte);	\
+     prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
      INC_BOTH (from, from_byte);		\
      if (from < end)				\
        UPDATE_SYNTAX_TABLE_FORWARD (from);	\
@@ -2852,7 +3000,8 @@
   curlevel->last = -1;
 
   SETUP_SYNTAX_TABLE (prev_from, 1);
-  prev_from_syntax = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte));
+  temp = FETCH_CHAR (prev_from_byte);
+  prev_from_syntax = SYNTAX_WITH_FLAGS (temp);
   UPDATE_SYNTAX_TABLE_FORWARD (from);
 
   /* Enter the loop at a place appropriate for initial state.  */
@@ -2931,7 +3080,8 @@
 	  while (from < end)
 	    {
 	      /* Some compilers can't handle this inside the switch.  */
-	      temp = SYNTAX (FETCH_CHAR_AS_MULTIBYTE (from_byte));
+	      temp = FETCH_CHAR_AS_MULTIBYTE (from_byte);
+	      temp = SYNTAX (temp);
 	      switch (temp)
 		{
 		case Scharquote: