diff lisp/progmodes/cc-awk.el @ 51714:bc91cbf50c24

Updated CC Mode to version 5.30.
author Martin Stjernholm <mast@lysator.liu.se>
date Thu, 03 Jul 2003 12:30:59 +0000
parents
children 695cf19ef79e
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lisp/progmodes/cc-awk.el	Thu Jul 03 12:30:59 2003 +0000
@@ -0,0 +1,905 @@
+;;; cc-awk.el --- AWK specific code within cc-mode.
+
+;; Copyright (C) 1988,94,96,2000,01,02,03  Free Software Foundation, Inc.
+
+;; Author: Alan Mackenzie (originally based on awk-mode.el)
+;; Maintainer: FSF
+;; Keywords: AWK, cc-mode, unix, languages
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs; see the file COPYING.  If not, write to the
+;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
+
+;;; Commentary:
+
+;; This file contains (most of) the adaptations to cc-mode required for the
+;; integration of AWK Mode.
+;; It is organised thusly:
+;;   1. The AWK Mode syntax table.
+;;   2. Indentation calculation stuff ("c-awk-NL-prop text-property").
+;;   3. Syntax-table property/font-locking stuff, but not including the
+;;      font-lock-keywords setting.
+;;   4. The AWK Mode before/after-change-functions.
+;;   5. AWK Mode specific versions of commands like beginning-of-defun.
+;; The AWK Mode keymap, abbreviation table, and the mode function itself are
+;; in cc-mode.el.
+
+;;; Code:
+
+(eval-when-compile
+  (let ((load-path
+	 (if (and (boundp 'byte-compile-dest-file)
+		  (stringp byte-compile-dest-file))
+	     (cons (file-name-directory byte-compile-dest-file) load-path)
+	   load-path)))
+    (load "cc-bytecomp" nil t)))
+
+(cc-require 'cc-defs)
+
+;; Silence the byte compiler.
+(cc-bytecomp-defvar font-lock-mode)	; Checked with boundp before use.
+
+;; Some functions in cc-engine that are used below.  There's a cyclic
+;; dependency so it can't be required here.  (Perhaps some functions
+;; could be moved to cc-engine to avoid it.)
+(cc-bytecomp-defun c-backward-token-1)
+(cc-bytecomp-defun c-beginning-of-statement-1)
+(cc-bytecomp-defun c-backward-sws)
+
+(defvar awk-mode-syntax-table
+  (let ((st (make-syntax-table)))
+    (modify-syntax-entry ?\\ "\\" st)
+    (modify-syntax-entry ?\n ">   " st)
+    (modify-syntax-entry ?\r ">   " st)
+    (modify-syntax-entry ?\f ">   " st)
+    (modify-syntax-entry ?\# "<   " st)
+    ;; / can delimit regexes or be a division operator.  By default we assume
+    ;; that it is a division sign, and fix the regexp operator cases with
+    ;; `font-lock-syntactic-keywords'.
+    (modify-syntax-entry ?/ "." st)     ; ACM 2002/4/27.  
+    (modify-syntax-entry ?* "." st)
+    (modify-syntax-entry ?+ "." st)
+    (modify-syntax-entry ?- "." st)
+    (modify-syntax-entry ?= "." st)
+    (modify-syntax-entry ?% "." st)
+    (modify-syntax-entry ?< "." st)
+    (modify-syntax-entry ?> "." st)
+    (modify-syntax-entry ?& "." st)
+    (modify-syntax-entry ?| "." st)
+    (modify-syntax-entry ?_ "_" st)
+    (modify-syntax-entry ?\' "." st)
+    st)
+  "Syntax table in use in AWK Mode buffers.")
+
+;; ACM, 2002/5/29:
+;; 
+;; The next section of code is about determining whether or not an AWK
+;; statement is complete or not.  We use this to indent the following line.
+;; The determination is pretty straightforward in C, where a statement ends
+;; with either a ; or a }.  Only "while" really gives any trouble there, since
+;; it might be the end of a do-while.  In AWK, on the other hand, semicolons
+;; are rarely used, and EOLs _usually_ act as "virtual semicolons".  In
+;; addition, we have the complexity of escaped EOLs.  The core of this
+;; analysis is in the middle of the function
+;; c-awk-calculate-NL-prop-prev-line, about 130 lines lower down.
+;;
+;; To avoid continually repeating this expensive analysis, we "cache" its
+;; result in a text-property, c-awk-NL-prop, whose value for a line is set on
+;; the EOL (if any) which terminates that line.  Should the property be
+;; required for the very last line (which has no EOL), it is calculated as
+;; required but not cached.  The c-awk-NL-prop property should be thought of
+;; as only really valid immediately after a buffer change, not a permanently
+;; set property.  (By contrast, the syntax-table text properties (set by an
+;; after-change function) must be constantly updated for the mode to work
+;; properly).
+;;
+;; The valid values for c-awk-NL-prop are:
+;;
+;; nil The property is not currently set for this line.
+;; '#' There is NO statement on this line (at most a comment), and no open
+;;     statement from a previous line which could have been completed on this
+;;     line.
+;; '{' There is an unfinished statement on this (or a previous) line which
+;;     doesn't require \s to continue onto another line, e.g. the line ends
+;;     with {, or the && operator, or "if (condition)".  Note that even if the
+;;     newline is redundantly escaped, it remains a '{' line.
+;; '\' There is an escaped newline at the end of this line and this '\' is
+;;     essential to the syntax of the program.  (i.e. if it had been a
+;;     frivolous \, it would have been ignored and the line been given one of
+;;     the other property values.)
+;; ';' A statement is completed as the last thing (aside from ws) on the line -
+;;     i.e. there is (at least part of) a statement on this line, and the last
+;;     statement on the line is complete, OR (2002/10/25) the line is
+;;     content-free but terminates a statement from the preceding (continued)
+;;     line (which has property \).
+;;
+;; This set of values has been chosen so that the property's value on a line
+;; is completely determined by the contents of the line and the property on
+;; the previous line, EXCEPT for where a "while" might be the closing
+;; statement of a do-while.
+
+(defun c-awk-after-if-for-while-condition-p (&optional do-lim)
+  ;; Are we just after the ) in "if/for/while (<condition>)"?
+  ;;
+  ;; Note that the end of the ) in a do .... while (<condition>) doesn't
+  ;; count, since the purpose of this routine is essentially to decide
+  ;; whether to indent the next line.
+  ;;
+  ;; DO-LIM sets a limit on how far back we search for the "do" of a possible
+  ;; do-while.
+  (and
+   (eq (char-before) ?\))
+   (save-excursion
+     (let ((par-pos (c-safe (scan-lists (point) -1 0))))
+       (when par-pos
+         (goto-char par-pos)            ; back over "(...)"
+         (c-backward-token-1)           ; BOB isn't a problem.
+         (or (looking-at "\\(if\\|for\\)\\>\\([^_]\\|$\\)")
+             (and (looking-at "while\\>\\([^_]\\|$\\)") ; Ensure this isn't a do-while.
+                  (not (eq (c-beginning-of-statement-1 do-lim)
+                           'beginning)))))))))
+
+(defun c-awk-after-function-decl-param-list ()
+  ;; Are we just after the ) in "function foo (bar)" ?
+  (and (eq (char-before) ?\))
+       (save-excursion
+         (let ((par-pos (c-safe (scan-lists (point) -1 0))))
+           (when par-pos
+             (goto-char par-pos)        ; back over "(...)"
+             (c-backward-token-1)       ; BOB isn't a problem
+             (and (looking-at "[_a-zA-Z][_a-zA-Z0-9]*\\>")
+                  (progn (c-backward-token-1)
+                         (looking-at "func\\(tion\\)?\\>"))))))))
+
+;; 2002/11/8:  FIXME!  Check c-backward-token-1/2 for success (0 return code).
+(defun c-awk-after-continue-token ()
+;; Are we just after a token which can be continued onto the next line without
+;; a backslash?
+  (save-excursion
+    (c-backward-token-1)              ; FIXME 2002/10/27.  What if this fails?
+    (if (and (looking-at "[&|]") (not (bobp)))
+        (backward-char)) ; c-backward-token-1 doesn't do this :-(
+    (looking-at "[,{?:]\\|&&\\|||\\|do\\>\\|else\\>")))
+
+(defun c-awk-after-rbrace-or-statement-semicolon ()
+  ;; Are we just after a } or a ; which closes a statement?
+  ;; Be careful about ;s in for loop control bits.  They don't count!
+  (or (eq (char-before) ?\})
+      (and
+       (eq (char-before) ?\;)
+       (save-excursion
+         (let ((par-pos (c-safe (scan-lists (point) -1 1))))
+           (when par-pos
+             (goto-char par-pos) ; go back to containing (
+             (not (and (looking-at "(")
+                       (c-backward-token-1) ; BOB isn't a problem
+                       (looking-at "for\\>")))))))))
+
+(defun c-awk-back-to-contentful-text-or-NL-prop ()
+  ;;  Move back to just after the first found of either (i) an EOL which has
+  ;;  the c-awk-NL-prop text-property set; or (ii) non-ws text; or (iii) BOB.
+  ;;  We return either the value of c-awk-NL-prop (in case (i)) or nil.
+  ;;  Calling function can best distinguish cases (ii) and (iii) with (bolp).
+  ;;
+  ;;  Note that an escaped eol counts as whitespace here.
+  ;;
+  ;;  Kludge: If c-backward-syntactic-ws gets stuck at a BOL, it is likely
+  ;;  that the previous line contains an unterminated string (without \).  In
+  ;;  this case, assume that the previous line's c-awk-NL-prop is a ;.
+  ;; 
+  ;;  POINT MUST BE AT THE START OF A LINE when calling this function.  This
+  ;;  is to ensure that the various backward-comment functions will work
+  ;;  properly.
+  (let ((nl-prop nil)
+        bol-pos bsws-pos) ; starting pos for a backward-syntactic-ws call.
+    (while ;; We are at a BOL here.  Go back one line each iteration.
+        (and
+         (not (bobp))
+         (not (setq nl-prop (c-get-char-property (1- (point)) 'c-awk-NL-prop)))
+         (progn (setq bol-pos (c-point 'bopl))
+                (setq bsws-pos (point))
+                ;; N.B. the following function will not go back past an EOL if
+                ;; there is an open string (without \) on the previous line.
+                (c-backward-syntactic-ws bol-pos)
+                (or (/= (point) bsws-pos)
+                    (progn (setq nl-prop ?\;)
+                           nil)))
+         ;; If we had a backslash at EOL, c-backward-syntactic-ws will
+         ;; have gone backwards over it.  Check the backslash was "real".
+         (progn
+           (if (looking-at "[ \t]*\\\\+$")
+               (if (progn
+                     (end-of-line)
+                     (search-backward-regexp
+                      "\\(^\\|[^\\]\\)\\(\\\\\\\\\\)*\\\\$" ; ODD number of \s at EOL  :-)
+                      bol-pos t))
+                   (progn (end-of-line)   ; escaped EOL.
+                          (backward-char)
+                          (c-backward-syntactic-ws bol-pos))
+                 (end-of-line)))          ; The \ at eol is a fake.
+           (bolp))))
+    nl-prop))
+
+(defun c-awk-calculate-NL-prop-prev-line (&optional do-lim)
+  ;; Calculate and set the value of the c-awk-NL-prop on the immediately
+  ;; preceding EOL.  This may also involve doing the same for several
+  ;; preceding EOLs.
+  ;; 
+  ;; NOTE that if the property was already set, we return it without
+  ;; recalculation.  (This is by accident rather than design.)
+  ;; 
+  ;; Return the property which got set (or was already set) on the previous
+  ;; line.  Return nil if we hit BOB.
+  ;; 
+  ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
+  (save-excursion
+    (save-match-data
+      (beginning-of-line)
+      (let* ((pos (point))
+             (nl-prop (c-awk-back-to-contentful-text-or-NL-prop)))
+        ;; We are either (1) at a BOL (with nl-prop containing the previous
+        ;; line's c-awk-NL-prop) or (2) after contentful text on a line.  At
+        ;; the BOB counts as case (1), so we test next for bolp rather than
+        ;; non-nil nl-prop.
+        (when (not (bolp))
+          (setq nl-prop
+                (cond
+                 ;; Incomplete statement which doesn't require escaped EOL?
+                 ((or (c-awk-after-if-for-while-condition-p do-lim)
+                      (c-awk-after-function-decl-param-list)
+                      (c-awk-after-continue-token))
+                  ?\{)
+                 ;; Escaped EOL (where there's also something to continue)?
+                 ((and (looking-at "[ \t]*\\\\$")
+                       (not (c-awk-after-rbrace-or-statement-semicolon)))
+                  ?\\)
+                 (t ?\;)))            ; A statement was completed on this line
+          (end-of-line)
+          (c-put-char-property (point) 'c-awk-NL-prop nl-prop)
+          (forward-line))
+
+        ;; We are now at a (possibly empty) sequence of content-free lines.
+        ;; Set c-awk-NL-prop on each of these lines's EOL.
+        (while (< (point) pos)         ; one content-free line each iteration.
+          (cond              ; recalculate nl-prop from previous line's value.
+           ((memq nl-prop '(?\; nil)) (setq nl-prop ?\#))
+           ((eq nl-prop ?\\)
+            (if (not (looking-at "[ \t]*\\\\$")) (setq nl-prop ?\;))) ; was ?\#  2002/10/25
+           ;; ?\# (empty line) and ?\{ (open stmt) don't change.
+           )
+          (forward-line)
+          (c-put-char-property (1- (point)) 'c-awk-NL-prop nl-prop))
+        nl-prop))))
+
+(defun c-awk-get-NL-prop-prev-line (&optional do-lim)
+  ;; Get the c-awk-NL-prop text-property from the previous line, calculating
+  ;; it if necessary.  Return nil iff we're already at BOB.
+  ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
+  (if (bobp)
+      nil
+    (or (c-get-char-property (c-point 'eopl) 'c-awk-NL-prop)
+        (c-awk-calculate-NL-prop-prev-line do-lim))))
+
+(defun c-awk-get-NL-prop-cur-line (&optional do-lim)
+  ;; Get the c-awk-NL-prop text-property from the current line, calculating it
+  ;; if necessary. (As a special case, the property doesn't get set on an
+  ;; empty line at EOB (there's no position to set the property on), but the
+  ;; function returns the property value an EOL would have got.)
+  ;; 
+  ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
+  (save-excursion
+    (let ((extra-nl nil))
+      (end-of-line)                ; Necessary for the following test to work.
+      (when (= (forward-line) 1)        ; if we were on the last line....
+        (insert-char ?\n 1) ; ...artificial eol is needed for comment detection.
+        (setq extra-nl t))
+      (prog1 (c-awk-get-NL-prop-prev-line do-lim)
+        (if extra-nl (delete-backward-char 1))))))
+
+(defun c-awk-prev-line-incomplete-p (&optional do-lim)
+  ;; Is there an incomplete statement at the end of the previous line?
+  ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
+  (memq (c-awk-get-NL-prop-prev-line do-lim) '(?\\ ?\{)))
+
+(defun c-awk-cur-line-incomplete-p (&optional do-lim)
+  ;; Is there an incomplete statement at the end of the current line?
+  ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
+  (memq (c-awk-get-NL-prop-cur-line do-lim) '(?\\ ?\{)))
+
+(defun c-awk-completed-stmt-ws-ends-prev-line-p (&optional do-lim)
+  ;; Is there a termination of a statement as the last thing (apart from an
+  ;; optional comment) on the previous line?
+  ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
+  (eq (c-awk-get-NL-prop-prev-line do-lim) ?\;))
+
+(defun c-awk-completed-stmt-ws-ends-line-p (&optional pos do-lim)
+  ;; Same as previous function, but for the line containing position POS (or
+  ;; the current line if POS is omitted).
+  ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
+  (save-excursion
+    (if pos (goto-char pos))
+    (eq (c-awk-get-NL-prop-cur-line do-lim) ?\;)))
+
+(defun c-awk-after-logical-semicolon (&optional do-lim)
+;; Are we at BOL, the preceding EOL being a "logical semicolon"?
+;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
+  (and (bolp)
+       (eq (c-awk-get-NL-prop-prev-line do-lim) ?\;)))
+
+(defun c-awk-backward-syntactic-ws (&optional lim) 
+;; Skip backwards over awk-syntactic whitespace.  This is whitespace
+;; characters, comments, and NEWLINES WHICH AREN'T "VIRTUAL SEMICOLONS".  For
+;; this function, a newline isn't a "virtual semicolon" if that line ends with
+;; a real semicolon (or closing brace).
+;; However if point starts inside a comment or preprocessor directive, the
+;; content of it is not treated as whitespace.  LIM (optional) sets a limit on
+;; the backward movement.
+  (let ((lim (or lim (point-min)))
+        after-real-br)
+    (c-backward-syntactic-ws (max lim (c-point 'bol)))
+    (while                    ; go back one WS line each time round this loop.
+        (and (bolp)
+             (> (point) lim)
+             (/= (c-awk-get-NL-prop-prev-line) ?\;)
+             (/= (point)
+                 ;; The following function requires point at BONL [not EOL] to
+                 ;; recognise a preceding comment,.
+                 (progn (c-backward-syntactic-ws (max lim (c-point 'bopl)))
+                        (point)))))
+    ;; Does the previous line end with a real ; or }?  If so, go back to it.
+    (if (and (bolp)
+             (eq (c-awk-get-NL-prop-prev-line) ?\;)
+             (save-excursion
+               (c-backward-syntactic-ws (max lim (c-point 'bopl)))
+               (setq after-real-br (point))
+               (c-awk-after-rbrace-or-statement-semicolon)))
+        (goto-char after-real-br))))
+
+(defun c-awk-NL-prop-not-set ()
+  ;; Is the NL-prop on the current line either nil or unset?
+  (not (c-get-char-property (c-point 'eol) 'c-awk-NL-prop)))
+
+(defun c-awk-clear-NL-props (beg end)
+  ;; This function is run from before-change-hooks.  It clears the
+  ;; c-awk-NL-prop text property from beg to the end of the buffer (The END
+  ;; parameter is ignored).  This ensures that the indentation engine will
+  ;; never use stale values for this property.
+  (save-restriction
+    (widen)
+    (c-clear-char-properties beg (point-max) 'c-awk-NL-prop)))
+
+(defun c-awk-unstick-NL-prop ()
+  ;; Ensure that the text property c-awk-NL-prop is "non-sticky".  Without
+  ;; this, a new newline inserted after an old newline (e.g. by C-j) would
+  ;; inherit any c-awk-NL-prop from the old newline.  This would be a Bad
+  ;; Thing.  This function's action is required by c-put-char-property.
+  (if (and (boundp 'text-property-default-nonsticky) ; doesn't exist in Xemacs
+           (not (assoc 'c-awk-NL-prop text-property-default-nonsticky)))
+      (setq text-property-default-nonsticky
+            (cons '(c-awk-NL-prop . t) text-property-default-nonsticky))))
+
+;; The following is purely a diagnostic command, to be commented out of the
+;; final release.  ACM, 2002/6/1
+;; (defun NL-props ()
+;;   (interactive)
+;;   (let (pl-prop cl-prop)
+;;     (message "Prev-line: %s  Cur-line: %s"
+;;              (if (setq pl-prop (c-get-char-property (c-point 'eopl) 'c-awk-NL-prop))
+;;                  (char-to-string pl-prop)
+;;                "nil")
+;;              (if (setq cl-prop (c-get-char-property (c-point 'eol) 'c-awk-NL-prop))
+;;                  (char-to-string cl-prop)
+;;                "nil"))))
+;(define-key awk-mode-map [?\C-c ?\r] 'NL-props) ; commented out, 2002/8/31
+;for now.  In the byte compiled version, this causes things to crash because
+;awk-mode-map isn't yet defined.  :-(
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The following section of the code is to do with font-locking.  The biggest
+;; problem for font-locking is deciding whether a / is a regular expression
+;; delimiter or a division sign - determining precisely where strings and
+;; regular expressions start and stop is also troublesome.  This is the
+;; purpose of the function c-awk-set-syntax-table-properties and the myriad
+;; elisp regular expressions it uses.
+;;
+;; Because AWK is a line oriented language, I felt the normal cc-mode strategy
+;; for font-locking unterminated strings (i.e. font-locking the buffer up to
+;; the next string delimiter as a string) was inappropriate.  Instead,
+;; unbalanced string/regexp delimiters are given the warning font, being
+;; refonted with the string font as soon as the matching delimiter is entered.
+;;
+;; This requires the region processed by the current font-lock after-change
+;; function to have access to the start of the string/regexp, which may be
+;; several lines back.  The elisp "advice" feature is used on these functions
+;; to allow this.
+
+(defun c-awk-beginning-of-logical-line (&optional pos)
+;; Go back to the start of the (apparent) current line (or the start of the
+;; line containing POS), returning the buffer position of that point.  I.e.,
+;; go back to the last line which doesn't have an escaped EOL before it.
+;; 
+;; This is guaranteed to be "safe" for syntactic analysis, i.e. outwith any
+;; comment, string or regexp.  IT MAY WELL BE that this function should not be
+;; executed on a narrowed buffer.
+  (if pos (goto-char pos))
+  (forward-line 0)
+  (while (and (> (point) (point-min))
+              (eq (char-before (1- (point))) ?\\))
+    (forward-line -1))
+  (point))
+
+(defun c-awk-end-of-logical-line (&optional pos)
+;; Go forward to the end of the (apparent) current logical line (or the end of
+;; the line containing POS), returning the buffer position of that point.  I.e.,
+;; go to the end of the next line which doesn't have an escaped EOL.
+;;
+;; This is guaranteed to be "safe" for syntactic analysis, i.e. outwith any
+;; comment, string or regexp.  IT MAY WELL BE that this function should not be
+;; executed on a narrowed buffer.
+  (if pos (goto-char pos))
+  (end-of-line)
+  (while (and (< (point) (point-max))
+              (eq (char-before) ?\\))
+    (end-of-line 2))
+  (point))
+
+;; N.B. In the following regexps, an EOL is either \n OR \r.  This is because
+;; Emacs has in the past used \r to mark hidden lines in some fashion (and
+;; maybe still does).
+
+(defconst c-awk-esc-pair-re "\\\\\\(.\\|\n\\|\r\\|\\'\\)")
+;;   Matches any escaped (with \) character-pair, including an escaped newline.
+(defconst c-awk-comment-without-nl "#.*")
+;; Matches an AWK comment, not including the terminating NL (if any).  Note
+;; that the "enclosing" (elisp) regexp must ensure the # is real.
+(defconst c-awk-nl-or-eob "\\(\n\\|\r\\|\\'\\)")
+;; Matches a newline, or the end of buffer.
+
+;; "Space" regular expressions.
+(defconst c-awk-escaped-nl "\\\\[\n\r]") 
+;; Matches an escaped newline.
+(defconst c-awk-escaped-nls* (concat "\\(" c-awk-escaped-nl "\\)*"))
+;; Matches a possibly empty sequence of escaped newlines.  Used in
+;; awk-font-lock-keywords.
+;; (defconst c-awk-escaped-nls*-with-space*
+;;   (concat "\\(" c-awk-escaped-nls* "\\|" "[ \t]+" "\\)*"))
+;; The above RE was very slow.  It's runtime was doubling with each additional
+;; space :-(  Reformulate it as below:
+(defconst c-awk-escaped-nls*-with-space*
+  (concat "\\(" c-awk-escaped-nl "\\|" "[ \t]" "\\)*"))
+;; Matches a possibly empty sequence of escaped newlines with optional
+;; interspersed spaces and tabs.  Used in awk-font-lock-keywords.
+
+;; REGEXPS FOR "HARMLESS" STRINGS/LINES.
+(defconst c-awk-harmless-char-re "[^_#/\"\\\\\n\r]")
+;;   Matches any character but a _, #, /, ", \, or newline.  N.B. _" starts a
+;; localisation string in gawk 3.1
+(defconst c-awk-harmless-_ "_\\([^\"]\\|\\'\\)")
+;;   Matches an underline NOT followed by ".
+(defconst c-awk-harmless-string*-re
+  (concat "\\(" c-awk-harmless-char-re "\\|" c-awk-esc-pair-re "\\|" c-awk-harmless-_ "\\)*"))
+;;   Matches a (possibly empty) sequence of chars without unescaped /, ", \,
+;; #, or newlines.
+(defconst c-awk-harmless-string*-here-re
+  (concat "\\=" c-awk-harmless-string*-re))
+;; Matches the (possibly empty) sequence of chars without unescaped /, ", \,
+;; at point.
+(defconst c-awk-harmless-line-re
+  (concat c-awk-harmless-string*-re
+          "\\(" c-awk-comment-without-nl "\\)?" c-awk-nl-or-eob))
+;;   Matches (the tail of) an AWK \"logical\" line not containing an unescaped
+;; " or /.  "logical" means "possibly containing escaped newlines".  A comment
+;; is matched as part of the line even if it contains a " or a /.  The End of
+;; buffer is also an end of line.
+(defconst c-awk-harmless-lines+-here-re
+  (concat "\\=\\(" c-awk-harmless-line-re "\\)+"))
+;; Matches a sequence of (at least one) \"harmless-line\" at point.
+
+
+;; REGEXPS FOR AWK STRINGS.
+(defconst c-awk-string-ch-re "[^\"\\\n\r]")
+;; Matches any character which can appear unescaped in a string.
+(defconst c-awk-string-innards-re
+  (concat "\\(" c-awk-string-ch-re "\\|" c-awk-esc-pair-re "\\)*"))
+;;   Matches the inside of an AWK string (i.e. without the enclosing quotes).
+(defconst c-awk-string-without-end-here-re
+  (concat "\\=_?\"" c-awk-string-innards-re))
+;;   Matches an AWK string at point up to, but not including, any terminator.
+;; A gawk 3.1+ string may look like _"localisable string".
+
+;; REGEXPS FOR AWK REGEXPS.
+(defconst c-awk-regexp-normal-re "[^[/\\\n\r]")
+;;   Matches any AWK regexp character which doesn't require special analysis.
+(defconst c-awk-escaped-newlines*-re "\\(\\\\[\n\r]\\)*")
+;;   Matches a (possibly empty) sequence of escaped newlines.
+(defconst c-awk-regexp-char-class-re
+  (concat "\\[" c-awk-escaped-newlines*-re "^?" c-awk-escaped-newlines*-re "]?"
+          "\\(" c-awk-esc-pair-re "\\|" "[^]\n\r]" "\\)*" "\\(]\\|$\\)"))
+;;   Matches a regexp char class, up to (but not including) EOL if the ] is
+;;   missing.
+(defconst c-awk-regexp-innards-re
+  (concat "\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-class-re
+          "\\|" c-awk-regexp-normal-re "\\)*"))
+;;   Matches the inside of an AWK regexp (i.e. without the enclosing /s)
+(defconst c-awk-regexp-without-end-re
+  (concat "/" c-awk-regexp-innards-re))
+;; Matches an AWK regexp up to, but not including, any terminating /. 
+
+;; REGEXPS used for scanning an AWK buffer in order to decide IF A '/' IS A
+;; REGEXP OPENER OR A DIVISION SIGN.  By "state" in the following is meant
+;; whether a '/' at the current position would by a regexp opener or a
+;; division sign.
+(defconst c-awk-neutral-re
+;  "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)+") ; changed, 2003/6/7
+  "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)")
+;;   A "neutral" char(pair).  Doesn't change the "state" of a subsequent /.
+;; This is space/tab, braces, an auto-increment/decrement operator or an
+;; escaped character.  Or one of the (illegal) characters @ or `.  But NOT an
+;; end of line (even if escpaed).
+(defconst c-awk-neutrals*-re
+  (concat "\\(" c-awk-neutral-re "\\)*"))
+;;   A (possibly empty) string of neutral characters (or character pairs).
+(defconst c-awk-var-num-ket-re "[]\)0-9a-zA-Z_$.\x80-\xff]+")
+;;   Matches a char which is a constituent of a variable or number, or a ket
+;; (i.e. closing bracKET), round or square.  Assume that all characters \x80 to
+;; \xff are "letters".
+(defconst c-awk-div-sign-re
+  (concat c-awk-var-num-ket-re c-awk-neutrals*-re "/"))
+;;   Will match a piece of AWK buffer ending in / which is a division sign, in
+;; a context where an immediate / would be a regexp bracket.  It follows a
+;; variable or number (with optional intervening "neutral" characters).  This
+;; will only work when there won't be a preceding " or / before the sought /
+;; to foul things up.
+(defconst c-awk-non-arith-op-bra-re
+  "[[\(&=:!><,?;'~|]")
+;;   Matches an openeing BRAcket ,round or square, or any operator character
+;; apart from +,-,/,*,%.  For the purpose at hand (detecting a / which is a
+;; regexp bracket) these arith ops are unnecessary and a pain, because of "++"
+;; and "--".
+(defconst c-awk-regexp-sign-re
+  (concat c-awk-non-arith-op-bra-re c-awk-neutrals*-re "/"))
+;;   Will match a piece of AWK buffer ending in / which is an opening regexp
+;; bracket, in a context where an immediate / would be a division sign.  This
+;; will only work when there won't be a preceding " or / before the sought /
+;; to foul things up.
+
+;; ACM, 2002/02/15: The idea of the next function is to put the "Error font"
+;; on strings/regexps which are missing their closing delimiter.
+;; 2002/4/28.  The default syntax for / has been changed from "string" to
+;; "punctuation", to reduce hassle when this character appears within a string
+;; or comment.
+
+(defun c-awk-set-string-regexp-syntax-table-properties (beg end)
+;; BEG and END bracket a (possibly unterminated) string or regexp.  The
+;; opening delimiter is after BEG, and the closing delimiter, IF ANY, is AFTER
+;; END.  Set the appropriate syntax-table properties on the delimiters and
+;; contents of this string/regex.
+;;
+;; "String" here can also mean a gawk 3.1 "localizable" string which starts
+;; with _".  In this case, we step over the _ and ignore it; It will get it's
+;; font from an entry in awk-font-lock-keywords.
+;;
+;; If the closing delimiter is missing (i.e., there is an EOL there) set the
+;; STRING-FENCE property on the opening " or / and closing EOL.
+  (if (eq (char-after beg) ?_) (setq beg (1+ beg)))
+
+  ;; First put the properties on the delimiters.
+  (cond ((eq end (point-max))           ; string/regexp terminated by EOB
+         (put-text-property beg (1+ beg) 'syntax-table '(15))) ; (15) = "string fence"
+        ((/= (char-after beg) (char-after end)) ; missing end delimiter
+         (put-text-property beg (1+ beg) 'syntax-table '(15))
+         (put-text-property end (1+ end) 'syntax-table '(15)))
+        ((eq (char-after beg) ?/)       ; Properly bracketed regexp
+         (put-text-property beg (1+ beg) 'syntax-table '(7)) ; (7) = "string"
+         (put-text-property end (1+ end) 'syntax-table '(7)))
+        (t))                       ; Properly bracketed string: Nothing to do.
+  ;; Now change the properties of any escaped "s in the string to punctuation.
+  (save-excursion
+    (goto-char (1+ beg))
+    (or (eobp)
+        (while (search-forward "\"" end t)
+          (put-text-property (1- (point)) (point) 'syntax-table '(1))))))
+
+(defun c-awk-syntax-tablify-string ()
+  ;; Point is at the opening " or _" of a string.  Set the syntax-table
+  ;; properties on this string, leaving point just after the string.
+  ;;
+  ;; The result is nil if a / immediately after the string would be a regexp
+  ;; opener, t if it would be a division sign.
+  (search-forward-regexp c-awk-string-without-end-here-re nil t) ; a (possibly unterminated) string
+  (c-awk-set-string-regexp-syntax-table-properties
+   (match-beginning 0) (match-end 0))
+  (cond ((looking-at "\"")
+         (forward-char)
+         t)                             ; In AWK, ("15" / 5) gives 3 ;-)
+        ((looking-at "[\n\r]")          ; Unterminated string with EOL.
+         (forward-char)
+         nil)                           ; / on next line would start a regexp
+        (t nil)))                       ; Unterminated string at EOB
+
+(defun c-awk-syntax-tablify-/ (anchor anchor-state-/div)
+  ;; Point is at a /.  Determine whether this is a division sign or a regexp
+  ;; opener, and if the latter, apply syntax-table properties to the entire
+  ;; regexp.  Point is left immediately after the division sign or regexp, as
+  ;; the case may be.
+  ;;
+  ;; ANCHOR-STATE-/DIV identifies whether a / at ANCHOR would have been a
+  ;; division sign (value t) or a regexp opener (value nil).  The idea is that
+  ;; we analyse the line from ANCHOR up till point to determine what the / at
+  ;; point is.
+  ;;
+  ;; The result is what ANCHOR-STATE-/DIV (see above) is where point is left.
+  (let ((/point (point)))
+    (goto-char anchor)
+    ;; Analyse the line to find out what the / is.
+    (if (if anchor-state-/div
+            (not (search-forward-regexp c-awk-regexp-sign-re (1+ /point) t))
+          (search-forward-regexp c-awk-div-sign-re (1+ /point) t))
+        ;; A division sign.
+      (progn (goto-char (1+ /point)) nil)
+      ;; A regexp opener
+      ;; Jump over the regexp innards, setting the match data.
+      (goto-char /point)
+      (search-forward-regexp c-awk-regexp-without-end-re)
+      (c-awk-set-string-regexp-syntax-table-properties
+       (match-beginning 0) (match-end 0))
+      (cond ((looking-at "/")           ; Terminating /
+             (forward-char)
+             t)
+            ((looking-at "[\n\r]")      ; Incomplete regexp terminated by EOL
+             (forward-char)
+             nil)                  ; / on next line would start another regexp
+            (t nil)))))                 ; Unterminated regexp at EOB
+
+(defun c-awk-set-syntax-table-properties (lim)
+;;     Scan the buffer text between point and LIM, setting (and clearing) the
+;; syntax-table property where necessary.
+;;
+;; This function is designed to be called as the FUNCTION in a MATCHER in
+;; font-lock-syntactic-keywords, and it always returns NIL (to inhibit
+;; repeated calls from font-lock: See elisp info page "Search-based
+;; Fontification").  It also gets called, with a bit of glue, from
+;; after-change-functions when font-lock isn't active.  Point is left
+;; "undefined" after this function exits.  THE BUFFER SHOULD HAVE BEEN
+;; WIDENED, AND ANY PRECIOUS MATCH-DATA SAVED BEFORE CALLING THIS ROUTINE.
+;;
+;; We need to set/clear the syntax-table property on:
+;; (i) / - It is set to "string" on a / which is the opening or closing
+;;     delimiter of the properly terminated regexp (and left unset on a
+;;     division sign).
+;; (ii) the opener of an unterminated string/regexp, we set the property
+;;    "generic string delimiter" on both the opening " or / and the end of the
+;;    line where the closing delimiter is missing.
+;; (iii) "s inside strings/regexps (these will all be escaped "s).  They are
+;;   given the property "punctuation".  This will later allow other routines
+;;   to use the regexp "\\S\"*" to skip over the string innards.
+;; (iv) Inside a comment, all syntax-table properties are cleared.
+  (let (anchor
+	(anchor-state-/div nil)) ; t means a following / would be a div sign.
+    (c-awk-beginning-of-logical-line) ; ACM 2002/7/21.  This is probably redundant.
+    (put-text-property (point) lim 'syntax-table nil)
+    (search-forward-regexp c-awk-harmless-lines+-here-re nil t) ; skip harmless lines.
+
+    ;; Once round the next loop for each string, regexp, or div sign
+    (while (< (point) lim)
+      (setq anchor (point))
+      (search-forward-regexp c-awk-harmless-string*-here-re nil t)
+      ;; We are now looking at either a " or a /.
+      ;; Do our thing on the string, regexp or divsion sign.
+      (setq anchor-state-/div
+            (if (looking-at "_?\"")
+                (c-awk-syntax-tablify-string)
+              (c-awk-syntax-tablify-/ anchor anchor-state-/div)))
+
+      ;; Skip any further "harmless" lines before the next tricky one. 
+      (if (search-forward-regexp c-awk-harmless-lines+-here-re nil t)
+          (setq anchor-state-/div nil)))
+    nil))
+
+
+;; ACM, 2002/07/21: Thoughts: We need an AWK Mode after-change function to set
+;; the syntax-table properties even when font-lock isn't enabled, for the
+;; subsequent use of movement functions, etc.  However, it seems that if font
+;; lock _is_ enabled, we can always leave it to do the job.
+(defvar c-awk-old-EOLL 0)
+(make-variable-buffer-local 'c-awk-old-EOLL)
+;; End of logical line following the region which is about to be changed.  Set
+;; in c-awk-before-change and used in c-awk-after-change.
+
+(defun c-awk-before-change (beg end)
+;; This function is called exclusively from the before-change-functions hook.
+;; It does two things: Finds the end of the (logical) line on which END lies,
+;; and clears c-awk-NL-prop text properties from this point onwards.
+  (save-restriction
+    (save-excursion
+      (setq c-awk-old-EOLL (c-awk-end-of-logical-line end))
+      (c-save-buffer-state nil
+       (c-awk-clear-NL-props end (point-max))))))
+
+(defun c-awk-end-of-change-region (beg end old-len)
+  ;; Find the end of the region which needs to be font-locked after a change.
+  ;; This is the end of the logical line on which the change happened, either
+  ;; as it was before the change, or as it is now, which ever is later.
+  ;; N.B. point is left undefined.
+  (max (+ (- c-awk-old-EOLL old-len) (- end beg))
+       (c-awk-end-of-logical-line end)))
+
+(defun c-awk-after-change (beg end old-len)
+;; This function is called exclusively as an after-change function in
+;; AWK Mode.  It ensures that the syntax-table properties get set in the
+;; changed region.  However, if font-lock is enabled, this function does
+;; nothing, since an enabled font-lock after-change function will always do
+;; this.
+  (unless (and (boundp 'font-lock-mode) font-lock-mode)
+    (save-restriction
+      (save-excursion
+        (setq end (c-awk-end-of-change-region beg end old-len))
+        (c-awk-beginning-of-logical-line beg)
+        (c-save-buffer-state nil ; So that read-only status isn't affected.
+                                        ; (e.g. when first loading the buffer)
+          (c-awk-set-syntax-table-properties end))))))
+
+;; ACM 2002/5/25.  When font-locking is invoked by a buffer change, the region
+;; specified by the font-lock after-change function must be expanded to
+;; include ALL of any string or regexp within the region.  The simplest way to
+;; do this in practice is to use the beginning/end-of-logical-line functions.
+;; Don't overlook the possibility of the buffer change being the "recapturing"
+;; of a previously escaped newline.
+(defmacro c-awk-advise-fl-for-awk-region (function)
+  `(defadvice ,function (before get-awk-region activate)
+;; When font-locking an AWK Mode buffer, make sure that any string/regexp is
+;; completely font-locked.
+  (when (eq major-mode 'awk-mode)
+    (save-excursion
+      (ad-set-arg 1 (c-awk-end-of-change-region
+                     (ad-get-arg 0)     ; beg
+                     (ad-get-arg 1)     ; end
+                     (ad-get-arg 2)))   ; old-len
+      (ad-set-arg 0 (c-awk-beginning-of-logical-line (ad-get-arg 0)))))))
+
+(c-awk-advise-fl-for-awk-region font-lock-after-change-function)
+(c-awk-advise-fl-for-awk-region jit-lock-after-change)
+(c-awk-advise-fl-for-awk-region lazy-lock-defer-rest-after-change)
+(c-awk-advise-fl-for-awk-region lazy-lock-defer-line-after-change)
+
+;; ACM 2002/9/29.  Functions for C-M-a and C-M-e
+
+(defconst c-awk-terminated-regexp-or-string-here-re "\\=\\s\"\\S\"*\\s\"")
+;; Matches a terminated string/regexp (utilising syntax-table properties).
+
+(defconst c-awk-unterminated-regexp-or-string-here-re "\\=\\s|\\S|*$")
+;; Matches an unterminated string/regexp, NOT including the eol at the end.
+
+(defconst c-awk-harmless-pattern-characters*
+  (concat "\\([^{;#/\"\\\\\n\r]\\|" c-awk-esc-pair-re "\\)*"))
+;; Matches any "harmless" character in a pattern or an escaped character pair.
+
+(defun c-awk-beginning-of-defun (&optional arg)
+  "Move backward to the beginning of an AWK \"defun\".  With ARG, do it that
+many times.  Negative arg -N means move forward to Nth following beginning of
+defun.  Returns t unless search stops due to beginning or end of buffer.
+
+By a \"defun\" is meant either a pattern-action pair or a function.  The start
+of a defun is recognised as code starting at column zero which is neither a
+closing brace nor a comment nor a continuation of the previous line.  Unlike
+in some other modes, having an opening brace at column 0 is neither necessary
+nor helpful."
+  (interactive "p")
+  (save-match-data
+    (c-save-buffer-state                ; ensures the buffer is writable.
+     nil
+     (let ((found t))     ; Has the most recent regexp search found b-of-defun?
+       (if (>= arg 0)
+           ;; Go back one defun each time round the following loop. (For +ve arg)
+           (while (and found (> arg 0) (not (eq (point) (point-min))))
+             ;; Go back one "candidate" each time round the next loop until one
+             ;; is genuinely a beginning-of-defun.
+             (while (and (setq found (search-backward-regexp
+                                      "^[^#} \t\n\r]" (point-min) 'stop-at-limit))
+                         (not (memq (c-awk-get-NL-prop-prev-line) '(?\; ?\#)))))
+             (setq arg (1- arg)))
+         ;; The same for a -ve arg.
+         (if (not (eq (point) (point-max))) (forward-char 1))
+         (while (and found (< arg 0) (not (eq (point) (point-max)))) ; The same for -ve arg.
+           (while (and (setq found (search-forward-regexp
+                                    "^[^#} \t\n\r]" (point-max) 'stop-at-limit))
+                       (not (memq (c-awk-get-NL-prop-prev-line) '(?\; ?\#)))))
+           (setq arg (1+ arg)))
+         (if found (goto-char (match-beginning 0))))
+       (eq arg 0)))))
+
+(defun c-awk-forward-awk-pattern ()
+  ;; Point is at the start of an AWK pattern (which may be null) or function
+  ;; declaration.  Move to the pattern's end, and past any trailing space or
+  ;; comment.  Typically, we stop at the { which denotes the corresponding AWK
+  ;; action/function body.  Otherwise we stop at the EOL (or ;) marking the
+  ;; absence of an explicit action.
+  (while
+      (progn
+        (search-forward-regexp c-awk-harmless-pattern-characters*)
+        (if (looking-at "#") (end-of-line))
+        (cond
+         ((eobp) nil)
+         ((looking-at "[{;]") nil)  ; We've finished!
+         ((eolp)
+          (if (c-awk-cur-line-incomplete-p)
+              (forward-line)            ; returns non-nil
+            nil))
+         ((search-forward-regexp c-awk-terminated-regexp-or-string-here-re nil t))
+         ((search-forward-regexp c-awk-unterminated-regexp-or-string-here-re nil t))
+         ((looking-at "/") (forward-char) t))))) ; division sign.
+
+(defun c-awk-end-of-defun1 ()
+  ;; point is at the start of a "defun".  Move to its end.  Return end position.
+  (c-awk-forward-awk-pattern)
+  (cond
+   ((looking-at "{") (goto-char (scan-sexps (point) 1)))
+   ((looking-at ";") (forward-char))
+   ((eolp))
+   (t (error "c-awk-end-of-defun1:  Failure of c-awk-forward-awk-pattern")))
+  (point))
+
+(defun c-awk-beginning-of-defun-p ()
+  ;; Are we already at the beginning of a defun?  (i.e. at code in column 0
+  ;; which isn't a }, and isn't a continuation line of any sort.
+  (and (looking-at "^[^#} \t\n\r]")
+       (not (c-awk-prev-line-incomplete-p))))
+
+(defun c-awk-end-of-defun (&optional arg)
+  "Move forward to next end of defun.  With argument, do it that many times.
+Negative argument -N means move back to Nth preceding end of defun.
+
+An end of a defun occurs right after the closing brace that matches the
+opening brace at its start, or immediately after the AWK pattern when there is
+no explicit action; see function `c-awk-beginning-of-defun'."
+  (interactive "p")
+  (or arg (setq arg 1))
+  (save-match-data
+    (c-save-buffer-state
+     nil
+     (let ((start-point (point)) end-point)
+       ;; Strategy: (For +ve ARG): If we're not already at a beginning-of-defun,
+       ;; move backwards to one.
+       ;; Repeat [(i) move forward to end-of-current-defun (see below);
+       ;;         (ii) If this isn't it, move forward to beginning-of-defun].
+       ;; We start counting ARG only when step (i) has passed the original point.
+       (when (> arg 0)
+         ;; Try to move back to a beginning-of-defun, if not already at one.
+         (if (not (c-awk-beginning-of-defun-p))
+             (when (not (c-awk-beginning-of-defun 1)) ; No bo-defun before point.
+               (goto-char start-point)
+               (c-awk-beginning-of-defun -1))) ; if this fails, we're at EOB, tough!
+         ;; Now count forward, one defun at a time
+         (while (and (not (eobp))
+                     (c-awk-end-of-defun1)
+                     (if (> (point) start-point) (setq arg (1- arg)) t)
+                     (> arg 0)
+                     (c-awk-beginning-of-defun -1))))
+
+       (when (< arg 0)
+         (setq end-point start-point)
+         (while (and (not (bobp))
+                     (c-awk-beginning-of-defun 1)
+                     (if (< (setq end-point (if (bobp) (point)
+                                              (save-excursion (c-awk-end-of-defun1))))
+                            start-point)
+                         (setq arg (1+ arg)) t)
+                     (< arg 0)))
+         (goto-char (min start-point end-point)))))))
+
+(cc-provide 'cc-awk)			; Changed from 'awk-mode, ACM 2002/5/21
+;;; awk-mode.el ends here