Mercurial > emacs
annotate lisp/mail/rfc822.el @ 59061:a7985894de81
Comment change.
| author | Richard M. Stallman <rms@gnu.org> |
|---|---|
| date | Tue, 21 Dec 2004 11:50:52 +0000 |
| parents | a2ead0e84644 |
| children | 18a818a2ee7c |
| rev | line source |
|---|---|
|
658
7cbd4fcd8b0f
*** empty log message ***
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
584
diff
changeset
|
1 ;;; rfc822.el --- hairy rfc822 parser for mail and news and suchlike |
|
7cbd4fcd8b0f
*** empty log message ***
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
584
diff
changeset
|
2 |
| 845 | 3 ;; Copyright (C) 1986, 87, 1990 Free Software Foundation, Inc. |
| 4 | |
|
789
71d052f72ac1
*** empty log message ***
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
658
diff
changeset
|
5 ;; Author: Richard Mlynarik <mly@eddie.mit.edu> |
|
71d052f72ac1
*** empty log message ***
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
658
diff
changeset
|
6 ;; Maintainer: FSF |
|
814
38b2499cb3e9
*** empty log message ***
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
804
diff
changeset
|
7 ;; Keywords: mail |
|
789
71d052f72ac1
*** empty log message ***
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
658
diff
changeset
|
8 |
| 43 | 9 ;; This file is part of GNU Emacs. |
| 10 | |
| 11 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
| 12 ;; it under the terms of the GNU General Public License as published by | |
| 804 | 13 ;; the Free Software Foundation; either version 2, or (at your option) |
| 43 | 14 ;; any later version. |
| 15 | |
| 16 ;; GNU Emacs is distributed in the hope that it will be useful, | |
| 17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 19 ;; GNU General Public License for more details. | |
| 20 | |
| 21 ;; You should have received a copy of the GNU General Public License | |
| 14169 | 22 ;; along with GNU Emacs; see the file COPYING. If not, write to the |
| 23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
| 24 ;; Boston, MA 02111-1307, USA. | |
| 43 | 25 |
|
2315
9e7ec92a4fdf
Added or corrected Commentary headers
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
845
diff
changeset
|
26 ;;; Commentary: |
|
9e7ec92a4fdf
Added or corrected Commentary headers
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
845
diff
changeset
|
27 |
|
9e7ec92a4fdf
Added or corrected Commentary headers
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
845
diff
changeset
|
28 ;; Support functions for parsing RFC-822 headers, used by mail and news |
|
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
21165
diff
changeset
|
29 ;; modes. |
|
2315
9e7ec92a4fdf
Added or corrected Commentary headers
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
845
diff
changeset
|
30 |
|
789
71d052f72ac1
*** empty log message ***
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
658
diff
changeset
|
31 ;;; Code: |
|
71d052f72ac1
*** empty log message ***
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
658
diff
changeset
|
32 |
|
53414
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
33 (defvar rfc822-address-start) |
|
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
34 |
|
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
35 ;; uses rfc822-address-start free, throws to address |
| 43 | 36 (defun rfc822-bad-address (reason) |
| 37 (save-restriction | |
| 38 (insert "_^_") | |
|
53414
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
39 (narrow-to-region rfc822-address-start |
| 43 | 40 (if (re-search-forward "[,;]" nil t) |
| 41 (max (point-min) (1- (point))) | |
| 42 (point-max))) | |
| 43 ;; make the error string be suitable for inclusion in (...) | |
| 44 (let ((losers '("\\" "(" ")" "\n"))) | |
| 45 (while losers | |
| 46 (goto-char (point-min)) | |
| 47 (while (search-forward (car losers) nil t) | |
| 48 (backward-char 1) | |
| 49 (insert ?\\) | |
| 50 (forward-char 1)) | |
| 51 (setq losers (cdr losers)))) | |
| 52 (goto-char (point-min)) (insert "(Unparsable address -- " | |
| 53 reason | |
|
15882
874f0cc6c431
(rfc822-bad-address): Don't put a newline into the
Richard M. Stallman <rms@gnu.org>
parents:
14169
diff
changeset
|
54 ": \"") |
| 43 | 55 (goto-char (point-max)) (insert "\")")) |
| 56 (rfc822-nuke-whitespace) | |
|
53414
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
57 (throw 'address (buffer-substring rfc822-address-start (point)))) |
| 43 | 58 |
| 59 (defun rfc822-nuke-whitespace (&optional leave-space) | |
| 60 (let (ch) | |
| 61 (while (cond ((eobp) | |
| 62 nil) | |
| 63 ((= (setq ch (following-char)) ?\() | |
| 64 (forward-char 1) | |
| 65 (while (if (eobp) | |
| 66 (rfc822-bad-address "Unbalanced comment (...)") | |
| 67 (/= (setq ch (following-char)) ?\))) | |
| 68 (cond ((looking-at "[^()\\]+") | |
| 69 (replace-match "")) | |
| 70 ((= ch ?\() | |
| 71 (rfc822-nuke-whitespace)) | |
| 72 ((< (point) (1- (point-max))) | |
| 73 (delete-char 2)) | |
| 74 (t | |
| 75 (rfc822-bad-address "orphaned backslash")))) | |
| 76 ;; delete remaining "()" | |
| 77 (forward-char -1) | |
| 78 (delete-char 2) | |
| 79 t) | |
|
49846
7e279209f48b
(rfc822-nuke-whitespace): Fix character constant.
Juanma Barranquero <lekktu@gmail.com>
parents:
49598
diff
changeset
|
80 ((memq ch '(?\ ?\t ?\n)) |
| 43 | 81 (delete-region (point) |
| 82 (progn (skip-chars-forward " \t\n") (point))) | |
| 83 t) | |
| 84 (t | |
| 85 nil))) | |
| 86 (or (not leave-space) | |
| 87 (eobp) | |
| 88 (bobp) | |
| 89 (= (preceding-char) ?\ ) | |
| 90 (insert ?\ )))) | |
| 91 | |
| 92 (defun rfc822-looking-at (regex &optional leave-space) | |
| 93 (if (cond ((stringp regex) | |
| 94 (if (looking-at regex) | |
| 95 (progn (goto-char (match-end 0)) | |
| 96 t))) | |
| 97 (t | |
| 98 (if (and (not (eobp)) | |
| 99 (= (following-char) regex)) | |
| 100 (progn (forward-char 1) | |
| 101 t)))) | |
| 102 (let ((tem (match-data))) | |
| 103 (rfc822-nuke-whitespace leave-space) | |
|
21165
480d7479ccac
(rfc822-looking-at): store-match-data => set-match-data.
Richard M. Stallman <rms@gnu.org>
parents:
20683
diff
changeset
|
104 (set-match-data tem) |
| 43 | 105 t))) |
| 106 | |
| 107 (defun rfc822-snarf-word () | |
| 108 ;; word is atom | quoted-string | |
| 109 (cond ((= (following-char) ?\") | |
| 110 ;; quoted-string | |
| 111 (or (rfc822-looking-at "\"\\([^\"\\\n]\\|\\\\.\\|\\\\\n\\)*\"") | |
| 112 (rfc822-bad-address "Unterminated quoted string"))) | |
|
20683
5623c78a31e5
(rfc822-snarf-word): Don't reject non-ASCII chars.
Richard M. Stallman <rms@gnu.org>
parents:
15882
diff
changeset
|
113 ((rfc822-looking-at "[^][\000-\037 ()<>@,;:\\\".]+") |
| 43 | 114 ;; atom |
| 115 ) | |
| 116 (t | |
| 117 (rfc822-bad-address "Rubbish in address")))) | |
| 118 | |
| 119 (defun rfc822-snarf-words () | |
| 120 (rfc822-snarf-word) | |
| 121 (while (rfc822-looking-at ?.) | |
| 122 (rfc822-snarf-word))) | |
| 123 | |
| 124 (defun rfc822-snarf-subdomain () | |
| 125 ;; sub-domain is domain-ref | domain-literal | |
| 126 (cond ((= (following-char) ?\[) | |
| 127 ;; domain-ref | |
| 128 (or (rfc822-looking-at "\\[\\([^][\\\n]\\|\\\\.\\|\\\\\n\\)*\\]") | |
| 129 (rfc822-bad-address "Unterminated domain literal [...]"))) | |
|
20683
5623c78a31e5
(rfc822-snarf-word): Don't reject non-ASCII chars.
Richard M. Stallman <rms@gnu.org>
parents:
15882
diff
changeset
|
130 ((rfc822-looking-at "[^][\000-\037 ()<>@,;:\\\".]+") |
| 43 | 131 ;; domain-literal = atom |
| 132 ) | |
| 133 (t | |
| 134 (rfc822-bad-address "Rubbish in host/domain specification")))) | |
| 135 | |
| 136 (defun rfc822-snarf-domain () | |
| 137 (rfc822-snarf-subdomain) | |
| 138 (while (rfc822-looking-at ?.) | |
| 139 (rfc822-snarf-subdomain))) | |
| 140 | |
| 141 (defun rfc822-snarf-frob-list (name separator terminator snarfer | |
| 142 &optional return) | |
| 143 (let ((first t) | |
| 144 (list ()) | |
| 145 tem) | |
| 146 (while (cond ((eobp) | |
| 147 (rfc822-bad-address | |
| 148 (format "End of addresses in middle of %s" name))) | |
| 149 ((rfc822-looking-at terminator) | |
| 150 nil) | |
| 151 ((rfc822-looking-at separator) | |
| 152 ;; multiple separators are allowed and do nothing. | |
| 153 (while (rfc822-looking-at separator)) | |
| 154 t) | |
| 155 (first | |
| 156 t) | |
| 157 (t | |
| 158 (rfc822-bad-address | |
| 159 (format "Gubbish in middle of %s" name)))) | |
| 160 (setq tem (funcall snarfer) | |
| 161 first nil) | |
| 162 (and return tem | |
| 163 (setq list (if (listp tem) | |
| 164 (nconc (reverse tem) list) | |
| 165 (cons tem list))))) | |
| 166 (nreverse list))) | |
| 167 | |
| 168 ;; return either an address (a string) or a list of addresses | |
| 169 (defun rfc822-addresses-1 (&optional allow-groups) | |
| 170 ;; Looking for an rfc822 `address' | |
| 171 ;; Either a group (1*word ":" [#mailbox] ";") | |
| 172 ;; or a mailbox (addr-spec | 1*word route-addr) | |
| 173 ;; addr-spec is (local-part "@" domain) | |
| 174 ;; route-addr is ("<" [1#("@" domain) ":"] addr-spec ">") | |
| 175 ;; local-part is (word *("." word)) | |
| 176 ;; word is (atom | quoted-string) | |
| 177 ;; quoted-string is ("\([^\"\\n]\|\\.\|\\\n\)") | |
| 178 ;; atom is [^\000-\037\177 ()<>@,;:\".[]]+ | |
| 179 ;; domain is sub-domain *("." sub-domain) | |
| 180 ;; sub-domain is domain-ref | domain-literal | |
| 181 ;; domain-literal is "[" *(dtext | quoted-pair) "]" | |
| 182 ;; dtext is "[^][\\n" | |
| 183 ;; domain-ref is atom | |
|
53414
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
184 (let ((rfc822-address-start (point)) |
| 43 | 185 (n 0)) |
| 186 (catch 'address | |
| 187 ;; optimize common cases: | |
| 188 ;; foo | |
| 189 ;; foo.bar@bar.zap | |
| 190 ;; followed by "\\'\\|,\\|([^()\\]*)\\'" | |
| 191 ;; other common cases are: | |
| 192 ;; foo bar <foo.bar@baz.zap> | |
| 193 ;; "foo bar" <foo.bar@baz.zap> | |
| 194 ;; those aren't hacked yet. | |
|
20683
5623c78a31e5
(rfc822-snarf-word): Don't reject non-ASCII chars.
Richard M. Stallman <rms@gnu.org>
parents:
15882
diff
changeset
|
195 (if (and (rfc822-looking-at "[^][\000-\037 ()<>@,;:\\\"]+\\(\\|@[^][\000-\037 ()<>@,;:\\\"]+\\)" t) |
| 43 | 196 (progn (or (eobp) |
| 197 (rfc822-looking-at ?,)))) | |
| 198 (progn | |
| 199 ;; rfc822-looking-at may have inserted a space | |
| 200 (or (bobp) (/= (preceding-char) ?\ ) (delete-char -1)) | |
| 201 ;; relying on the fact that rfc822-looking-at <char> | |
| 202 ;; doesn't mung match-data | |
|
53414
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
203 (throw 'address (buffer-substring rfc822-address-start (match-end 0))))) |
|
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
204 (goto-char rfc822-address-start) |
| 43 | 205 (while t |
| 206 (cond ((and (= n 1) (rfc822-looking-at ?@)) | |
| 207 ;; local-part@domain | |
| 208 (rfc822-snarf-domain) | |
| 209 (throw 'address | |
|
53414
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
210 (buffer-substring rfc822-address-start (point)))) |
| 43 | 211 ((rfc822-looking-at ?:) |
| 212 (cond ((not allow-groups) | |
| 213 (rfc822-bad-address "A group name may not appear here")) | |
| 214 ((= n 0) | |
| 215 (rfc822-bad-address "No name for :...; group"))) | |
| 216 ;; group | |
| 217 (throw 'address | |
| 218 ;; return a list of addresses | |
| 219 (rfc822-snarf-frob-list ":...; group" ?\, ?\; | |
| 220 'rfc822-addresses-1 t))) | |
| 221 ((rfc822-looking-at ?<) | |
| 222 (let ((start (point)) | |
| 223 (strip t)) | |
| 224 (cond ((rfc822-looking-at ?>) | |
| 225 ;; empty path | |
| 226 ()) | |
| 227 ((and (not (eobp)) (= (following-char) ?\@)) | |
| 228 ;; <@foo.bar,@baz:quux@abcd.efg> | |
| 229 (rfc822-snarf-frob-list "<...> address" ?\, ?\: | |
| 230 (function (lambda () | |
| 231 (if (rfc822-looking-at ?\@) | |
| 232 (rfc822-snarf-domain) | |
| 233 (rfc822-bad-address | |
| 234 "Gubbish in route-addr"))))) | |
| 235 (rfc822-snarf-words) | |
| 236 (or (rfc822-looking-at ?@) | |
| 237 (rfc822-bad-address "Malformed <..@..> address")) | |
| 238 (rfc822-snarf-domain) | |
| 239 (setq strip nil)) | |
| 240 ((progn (rfc822-snarf-words) (rfc822-looking-at ?@)) | |
| 241 ; allow <foo> (losing unix seems to do this) | |
| 242 (rfc822-snarf-domain))) | |
| 243 (let ((end (point))) | |
| 244 (if (rfc822-looking-at ?\>) | |
| 245 (throw 'address | |
| 246 (buffer-substring (if strip start (1- start)) | |
| 247 (if strip end (1+ end)))) | |
| 248 (rfc822-bad-address "Unterminated <...> address"))))) | |
|
20683
5623c78a31e5
(rfc822-snarf-word): Don't reject non-ASCII chars.
Richard M. Stallman <rms@gnu.org>
parents:
15882
diff
changeset
|
249 ((looking-at "[^][\000-\037 ()<>@,;:\\.]") |
| 43 | 250 ;; this allows "." to be part of the words preceding |
| 251 ;; an addr-spec, since many broken mailers output | |
| 252 ;; "Hern K. Herklemeyer III | |
| 253 ;; <yank@megadeath.dod.gods-own-country>" | |
| 125 | 254 (let ((again t)) |
| 255 (while again | |
| 256 (or (= n 0) (bobp) (= (preceding-char) ?\ ) | |
| 257 (insert ?\ )) | |
| 126 | 258 (rfc822-snarf-words) |
| 125 | 259 (setq n (1+ n)) |
| 260 (setq again (or (rfc822-looking-at ?.) | |
|
20683
5623c78a31e5
(rfc822-snarf-word): Don't reject non-ASCII chars.
Richard M. Stallman <rms@gnu.org>
parents:
15882
diff
changeset
|
261 (looking-at "[^][\000-\037 ()<>@,;:\\.]")))))) |
| 43 | 262 ((= n 0) |
| 263 (throw 'address nil)) | |
| 264 ((= n 1) ; allow "foo" (losing unix seems to do this) | |
| 265 (throw 'address | |
|
53414
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
266 (buffer-substring rfc822-address-start (point)))) |
| 125 | 267 ((> n 1) |
| 268 (rfc822-bad-address "Missing comma between addresses or badly-formatted address")) | |
| 269 ((or (eobp) (= (following-char) ?,)) | |
| 43 | 270 (rfc822-bad-address "Missing comma or route-spec")) |
| 271 (t | |
| 272 (rfc822-bad-address "Strange character or missing comma"))))))) | |
| 273 | |
|
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
21165
diff
changeset
|
274 |
| 43 | 275 (defun rfc822-addresses (header-text) |
|
20683
5623c78a31e5
(rfc822-snarf-word): Don't reject non-ASCII chars.
Richard M. Stallman <rms@gnu.org>
parents:
15882
diff
changeset
|
276 (if (string-match "\\`[ \t]*\\([^][\000-\037 ()<>@,;:\\\".]+\\)[ \t]*\\'" |
| 43 | 277 header-text) |
| 278 ;; Make very simple case moderately fast. | |
| 279 (list (substring header-text (match-beginning 1) (match-end 1))) | |
| 280 (let ((buf (generate-new-buffer " rfc822"))) | |
| 281 (unwind-protect | |
| 282 (save-excursion | |
| 283 (set-buffer buf) | |
| 284 (make-local-variable 'case-fold-search) | |
| 285 (setq case-fold-search nil) ;For speed(?) | |
| 286 (insert header-text) | |
| 287 ;; unfold continuation lines | |
| 288 (goto-char (point-min)) | |
| 289 | |
| 290 (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t) | |
| 291 (replace-match "\\1 " t)) | |
| 292 | |
| 293 (goto-char (point-min)) | |
| 294 (let ((list ()) | |
| 295 tem | |
|
53414
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
296 rfc822-address-start); this is for rfc822-bad-address |
|
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
297 (rfc822-nuke-whitespace) |
| 43 | 298 (while (not (eobp)) |
|
53414
a2ead0e84644
(rfc822-address-start): Declare variable.
Richard M. Stallman <rms@gnu.org>
parents:
52401
diff
changeset
|
299 (setq rfc822-address-start (point)) |
| 43 | 300 (setq tem |
| 301 (catch 'address ; this is for rfc822-bad-address | |
| 302 (cond ((rfc822-looking-at ?\,) | |
| 303 nil) | |
|
20683
5623c78a31e5
(rfc822-snarf-word): Don't reject non-ASCII chars.
Richard M. Stallman <rms@gnu.org>
parents:
15882
diff
changeset
|
304 ((looking-at "[][\000-\037@;:\\.>)]") |
| 43 | 305 (forward-char) |
| 306 (rfc822-bad-address | |
| 307 (format "Strange character \\%c found" | |
| 308 (preceding-char)))) | |
| 309 (t | |
| 310 (rfc822-addresses-1 t))))) | |
| 311 (cond ((null tem)) | |
| 312 ((stringp tem) | |
| 313 (setq list (cons tem list))) | |
| 314 (t | |
| 315 (setq list (nconc (nreverse tem) list))))) | |
| 316 (nreverse list))) | |
| 317 (and buf (kill-buffer buf)))))) | |
| 318 | |
| 584 | 319 (provide 'rfc822) |
| 320 | |
| 52401 | 321 ;;; arch-tag: 5d388a24-e173-40fb-9b8e-85269de44b37 |
|
658
7cbd4fcd8b0f
*** empty log message ***
Eric S. Raymond <esr@snark.thyrsus.com>
parents:
584
diff
changeset
|
322 ;;; rfc822.el ends here |
