Mercurial > emacs
annotate lisp/nxml/nxml-parse.el @ 95948:d55ec23f052d
*** empty log message ***
| author | Glenn Morris <rgm@gnu.org> |
|---|---|
| date | Sun, 15 Jun 2008 02:53:17 +0000 |
| parents | d495d4d5452f |
| children | e374c747704b |
| rev | line source |
|---|---|
| 86361 | 1 ;;; nxml-parse.el --- XML parser, sharing infrastructure with nxml-mode |
| 2 | |
| 87665 | 3 ;; Copyright (C) 2003, 2007, 2008 Free Software Foundation, Inc. |
| 86361 | 4 |
| 5 ;; Author: James Clark | |
| 6 ;; Keywords: XML | |
| 7 | |
| 86541 | 8 ;; This file is part of GNU Emacs. |
| 9 | |
|
94666
d495d4d5452f
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
87665
diff
changeset
|
10 ;; GNU Emacs is free software: you can redistribute it and/or modify |
| 86541 | 11 ;; it under the terms of the GNU General Public License as published by |
|
94666
d495d4d5452f
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
87665
diff
changeset
|
12 ;; the Free Software Foundation, either version 3 of the License, or |
|
d495d4d5452f
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
87665
diff
changeset
|
13 ;; (at your option) any later version. |
| 86361 | 14 |
| 86541 | 15 ;; GNU Emacs is distributed in the hope that it will be useful, |
| 16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 18 ;; GNU General Public License for more details. | |
| 86361 | 19 |
| 86541 | 20 ;; You should have received a copy of the GNU General Public License |
|
94666
d495d4d5452f
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
87665
diff
changeset
|
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
| 86361 | 22 |
| 23 ;;; Commentary: | |
| 24 | |
| 25 ;; Entry point is `nxml-parse-file'. | |
| 26 | |
| 27 ;;; Code: | |
| 28 | |
| 29 (require 'nxml-util) | |
| 30 (require 'xmltok) | |
| 31 (require 'nxml-enc) | |
| 32 (require 'nxml-ns) | |
| 33 | |
| 34 (defvar nxml-parse-file-name nil) | |
| 35 | |
| 36 (defvar nxml-validate-function nil | |
| 37 "Nil or a function to be called by `nxml-parse-file' to perform validation. | |
| 38 The function will be called once for each start-tag or end-tag. The | |
| 39 function is passed two arguments TEXT and START-TAG. For a start-tag, | |
| 40 START-TAG is a list (NAME ATTRIBUTES) where NAME and ATTRIBUTES are in | |
| 41 the same form as returned by `nxml-parse-file. For an end-tag, | |
| 42 START-TAG is nil. TEXT is a string containing the text immediately | |
| 43 preceding the tag, or nil if there was no such text. An empty element | |
| 44 is treated as a start-tag followed by an end-tag. | |
| 45 | |
| 46 For a start-tag, the namespace state will be the state after | |
| 47 processing the namespace declarations in the start-tag. For an | |
| 48 end-tag, the namespace state will be the state before popping the | |
| 49 namespace declarations for the corresponding start-tag. | |
| 50 | |
| 51 The function must return nil if no error is detected or a | |
| 52 cons (MESSAGE . LOCATION) where MESSAGE is a string containing | |
| 53 an error message and LOCATION indicates what caused the error | |
| 54 as follows: | |
| 55 | |
| 56 - nil indicates the tag as whole caused it; this is always allowed; | |
| 57 | |
| 58 - text indicates the text caused it; this is allowed only if | |
| 59 TEXT is non-nil; | |
| 60 | |
| 61 - tag-close indicates the close of the tag caused it; this is | |
| 62 allowed only if START-TAG is non-nil; | |
| 63 | |
| 64 - (attribute-name . N) indicates that the name of the Nth attribute | |
| 65 caused it; N counts from 0; this is allowed only if START-TAG is non-nil | |
| 66 and N must be less than the number of attributes; | |
| 67 | |
| 68 - (attribute-value . N) indicates that the value of the Nth attribute | |
| 69 caused it; N counts from 0; this is allowed only if START-TAG is non-nil | |
| 70 and N must be less than the number of attributes.") | |
| 71 | |
| 72 (defun nxml-parse-file (file) | |
| 73 "Parse the XML document in FILE and return it as a list. | |
| 74 An XML element is represented as a list (NAME ATTRIBUTES . CHILDREN). | |
| 75 NAME is either a string, in the case where the name does not have a | |
| 76 namespace, or a cons (NAMESPACE . LOCAL-NAME), where NAMESPACE is a | |
| 77 symbol and LOCAL-NAME is a string, in the case where the name does | |
| 78 have a namespace. NAMESPACE is a keyword whose name is `:URI', where | |
| 79 URI is the namespace name. ATTRIBUTES is an alist of attributes where | |
| 80 each attribute has the form (NAME . VALUE), where NAME has the same | |
| 81 form as an element name, and VALUE is a string. A namespace | |
| 82 declaration is represented as an attribute whose name is | |
| 83 \(:http://www.w3.org/2000/xmlns/ . LOCAL-NAME). CHILDREN is a list | |
| 84 containing strings and child elements; CHILDREN never contains two | |
| 85 consecutive strings and never contains an empty string. Processing | |
| 86 instructions and comments are not represented. The return value is a | |
| 87 list representing the document element. | |
| 88 | |
| 89 If the XML document is not well-formed, an error having the condition | |
| 90 `nxml-file-parse-error' will be signaled; the error data will be a | |
| 91 list of the \(FILE POSITION MESSAGE), where POSITION is an integer | |
| 92 specifying the position where the error was detected, and MESSAGE is a | |
| 93 string describing the error. | |
| 94 | |
| 95 The current contents of FILE will be parsed even if there is a | |
| 96 modified buffer currently visiting FILE. | |
| 97 | |
| 98 If the variable `nxml-validation-function' is non-nil, it will be | |
| 99 called twice for each element, and any reported error will be signaled | |
| 100 in the same way as well-formedness error." | |
| 101 (save-excursion | |
| 102 (set-buffer (nxml-parse-find-file file)) | |
| 103 (unwind-protect | |
| 104 (let ((nxml-parse-file-name file)) | |
| 105 (nxml-parse-instance)) | |
| 106 (kill-buffer nil)))) | |
| 107 | |
| 108 (defun nxml-parse-find-file (file) | |
| 109 (save-excursion | |
| 110 (set-buffer (get-buffer-create " *nXML Parse*")) | |
| 111 (erase-buffer) | |
| 112 (let ((set-auto-coding-function 'nxml-set-xml-coding)) | |
| 113 (insert-file-contents file)) | |
| 114 (current-buffer))) | |
| 115 | |
| 116 (defun nxml-parse-instance () | |
| 117 (let (xmltok-dtd) | |
| 118 (xmltok-save | |
| 119 (xmltok-forward-prolog) | |
| 120 (nxml-check-xmltok-errors) | |
| 121 (nxml-ns-save | |
| 122 (nxml-parse-instance-1))))) | |
| 123 | |
| 124 (defun nxml-parse-instance-1 () | |
| 125 (let* ((top (cons nil nil)) | |
| 126 ;; tail is a cons cell, whose cdr is nil | |
| 127 ;; additional elements will destructively appended to tail | |
| 128 (tail top) | |
| 129 ;; stack of tails one for each open element | |
| 130 tail-stack | |
| 131 ;; list of QNames of open elements | |
| 132 open-element-tags | |
| 133 ;; list of strings buffering a text node, in reverse order | |
| 134 text | |
| 135 ;; position of beginning of first (in buffer) string in text | |
| 136 text-pos) | |
| 137 (while (xmltok-forward) | |
| 138 (nxml-check-xmltok-errors) | |
| 139 (cond ((memq xmltok-type '(start-tag end-tag empty-element)) | |
| 140 (when text | |
| 141 (setq text (apply 'concat (nreverse text))) | |
| 142 (setcdr tail (cons text nil)) | |
| 143 (setq tail (cdr tail))) | |
| 144 (when (not (eq xmltok-type 'end-tag)) | |
| 145 (when (and (not open-element-tags) | |
| 146 (not (eq tail top))) | |
| 147 (nxml-parse-error nil "Multiple top-level elements")) | |
| 148 (setq open-element-tags | |
| 149 (cons (xmltok-start-tag-qname) | |
| 150 open-element-tags)) | |
| 151 (nxml-ns-push-state) | |
| 152 (let ((tag (nxml-parse-start-tag))) | |
| 153 (nxml-validate-tag text text-pos tag) | |
| 154 (setq text nil) | |
| 155 (setcdr tail (cons tag nil)) | |
| 156 (setq tail (cdr tail)) | |
| 157 (setq tail-stack (cons tail tail-stack)) | |
| 158 (setq tail (last tag)))) | |
| 159 (when (not (eq xmltok-type 'start-tag)) | |
| 160 (or (eq xmltok-type 'empty-element) | |
| 161 (equal (car open-element-tags) | |
| 162 (xmltok-end-tag-qname)) | |
| 163 (if open-element-tags | |
| 164 (nxml-parse-error nil | |
| 165 "Unbalanced end-tag; expected </%s>" | |
| 166 (car open-element-tags)) | |
| 167 (nxml-parse-error nil "Extra end-tag"))) | |
| 168 (nxml-validate-tag text text-pos nil) | |
| 169 (setq text nil) | |
| 170 (nxml-ns-pop-state) | |
| 171 (setq open-element-tags (cdr open-element-tags)) | |
| 172 (setq tail (car tail-stack)) | |
| 173 (setq tail-stack (cdr tail-stack))) | |
| 174 (setq text-pos nil)) | |
| 175 ((memq xmltok-type '(space data entity-ref char-ref cdata-section)) | |
| 176 (cond (open-element-tags | |
| 177 (unless text-pos | |
| 178 (setq text-pos xmltok-start)) | |
| 179 (setq text | |
| 180 (cons (nxml-current-text-string) text))) | |
| 181 ((not (eq xmltok-type 'space)) | |
| 182 (nxml-parse-error | |
| 183 nil | |
| 184 "%s at top-level" | |
| 185 (cdr (assq xmltok-type | |
| 186 '((data . "Text characters") | |
| 187 (entity-ref . "Entity reference") | |
| 188 (char-ref . "Character reference") | |
| 189 (cdata-section . "CDATA section")))))))))) | |
| 190 (unless (cdr top) | |
| 191 (nxml-parse-error (point-max) "Missing document element")) | |
| 192 (cadr top))) | |
| 193 | |
| 194 (defun nxml-parse-start-tag () | |
| 195 (let (parsed-attributes | |
| 196 parsed-namespace-attributes | |
| 197 atts att prefixes prefix ns value name) | |
| 198 (setq atts xmltok-namespace-attributes) | |
| 199 (while atts | |
| 200 (setq att (car atts)) | |
| 201 (setq value (or (xmltok-attribute-value att) | |
| 202 (nxml-parse-error nil "Invalid attribute value"))) | |
| 203 (setq ns (nxml-make-namespace value)) | |
| 204 (setq prefix (and (xmltok-attribute-prefix att) | |
| 205 (xmltok-attribute-local-name att))) | |
| 206 (cond ((member prefix prefixes) | |
| 207 (nxml-parse-error nil "Duplicate namespace declaration")) | |
| 208 ((not prefix) | |
| 209 (nxml-ns-set-default ns)) | |
| 210 (ns | |
| 211 (nxml-ns-set-prefix prefix ns)) | |
| 212 (t (nxml-parse-error nil "Cannot undeclare namespace prefix"))) | |
| 213 (setq prefixes (cons prefix prefixes)) | |
| 214 (setq parsed-namespace-attributes | |
| 215 (cons (cons (nxml-make-name nxml-xmlns-namespace-uri | |
| 216 (xmltok-attribute-local-name att)) | |
| 217 value) | |
| 218 parsed-namespace-attributes)) | |
| 219 (setq atts (cdr atts))) | |
| 220 (setq name | |
| 221 (nxml-make-name | |
| 222 (let ((prefix (xmltok-start-tag-prefix))) | |
| 223 (if prefix | |
| 224 (or (nxml-ns-get-prefix prefix) | |
| 225 (nxml-parse-error (1+ xmltok-start) | |
| 226 "Prefix `%s' undeclared" | |
| 227 prefix)) | |
| 228 (nxml-ns-get-default))) | |
| 229 (xmltok-start-tag-local-name))) | |
| 230 (setq atts xmltok-attributes) | |
| 231 (while atts | |
| 232 (setq att (car atts)) | |
| 233 (setq ns | |
| 234 (let ((prefix (xmltok-attribute-prefix att))) | |
| 235 (and prefix | |
| 236 (or (nxml-ns-get-prefix prefix) | |
| 237 (nxml-parse-error (xmltok-attribute-name-start att) | |
| 238 "Prefix `%s' undeclared" | |
| 239 prefix))))) | |
| 240 (setq parsed-attributes | |
| 241 (let ((nm (nxml-make-name ns | |
| 242 (xmltok-attribute-local-name att)))) | |
| 243 (when (assoc nm parsed-attributes) | |
| 244 (nxml-parse-error (xmltok-attribute-name-start att) | |
| 245 "Duplicate attribute")) | |
| 246 (cons (cons nm (or (xmltok-attribute-value att) | |
| 247 (nxml-parse-error nil "Invalid attribute value"))) | |
| 248 parsed-attributes))) | |
| 249 (setq atts (cdr atts))) | |
| 250 ;; We want to end up with the attributes followed by the | |
| 251 ;; the namespace attributes in the same order as | |
| 252 ;; xmltok-attributes and xmltok-namespace-attributes respectively. | |
| 253 (when parsed-namespace-attributes | |
| 254 (setq parsed-attributes | |
| 255 (nconc parsed-namespace-attributes parsed-attributes))) | |
| 256 (list name (nreverse parsed-attributes)))) | |
| 257 | |
| 258 (defun nxml-validate-tag (text text-pos tag) | |
| 259 (when nxml-validate-function | |
| 260 (let ((err (funcall nxml-validate-function text tag)) | |
| 261 pos) | |
| 262 (when err | |
| 263 (setq pos (nxml-validate-error-position (cdr err) | |
| 264 (and text text-pos) | |
| 265 tag)) | |
| 266 (or pos (error "Incorrect return value from %s" | |
| 267 nxml-validate-function)) | |
| 268 (nxml-parse-error pos (car err)))))) | |
| 269 | |
| 270 (defun nxml-validate-error-position (location text-pos tag) | |
| 271 (cond ((null location) xmltok-start) | |
| 272 ((eq location 'text) text-pos) | |
| 273 ((eq location 'tag-close) | |
| 274 (and tag (- (point) (if (eq xmltok-type 'empty-element ) 2 1)))) | |
| 275 ((consp location) | |
| 276 (let ((att (nth (cdr location) xmltok-attributes))) | |
| 277 (when (not att) | |
| 278 (setq att (nth (- (cdr location) (length xmltok-attributes)) | |
| 279 xmltok-namespace-attributes))) | |
| 280 (cond ((not att)) | |
| 281 ((eq (car location) 'attribute-name) | |
| 282 (xmltok-attribute-name-start att)) | |
| 283 ((eq (car location) 'attribute-value) | |
| 284 (xmltok-attribute-value-start att))))))) | |
| 285 | |
| 286 (defun nxml-make-name (ns local-name) | |
| 287 (if ns | |
| 288 (cons ns local-name) | |
| 289 local-name)) | |
| 290 | |
| 291 (defun nxml-current-text-string () | |
| 292 (cond ((memq xmltok-type '(space data)) | |
| 293 (buffer-substring-no-properties xmltok-start | |
| 294 (point))) | |
| 295 ((eq xmltok-type 'cdata-section) | |
| 296 (buffer-substring-no-properties (+ xmltok-start 9) | |
| 297 (- (point) 3))) | |
| 298 ((memq xmltok-type '(char-ref entity-ref)) | |
| 299 (unless xmltok-replacement | |
| 300 (nxml-parse-error nil | |
| 301 (if (eq xmltok-type 'char-ref) | |
| 302 "Reference to unsupported Unicode character" | |
| 303 "Unresolvable entity reference"))) | |
| 304 xmltok-replacement))) | |
| 305 | |
| 306 (defun nxml-parse-error (position &rest args) | |
| 307 (nxml-signal-file-parse-error nxml-parse-file-name | |
| 308 (or position xmltok-start) | |
| 309 (apply 'format args))) | |
| 310 | |
| 311 (defun nxml-check-xmltok-errors () | |
| 312 (when xmltok-errors | |
| 313 (let ((err (car (last xmltok-errors)))) | |
| 314 (nxml-signal-file-parse-error nxml-parse-file-name | |
| 315 (xmltok-error-start err) | |
| 316 (xmltok-error-message err))))) | |
| 317 | |
| 318 (provide 'nxml-parse) | |
| 319 | |
| 86379 | 320 ;; arch-tag: fc19639b-1bff-4673-9992-f539da89ba1e |
| 86361 | 321 ;;; nxml-parse.el ends here |
