Mercurial > emacs
annotate admin/charsets/compact.awk @ 107437:0a2bb00a71bd
* s-region.el: Move to obsolete.
| author | Juri Linkov <juri@jurta.org> |
|---|---|
| date | Sat, 20 Mar 2010 03:29:12 +0200 |
| parents | 1d1d5d9bd884 |
| children | 376148b31b5e |
| rev | line source |
|---|---|
| 88123 | 1 # compact.awk -- Make charset map compact. |
| 106815 | 2 # Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 |
| 88123 | 3 # National Institute of Advanced Industrial Science and Technology (AIST) |
| 4 # Registration Number H13PRO009 | |
|
94832
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91412
diff
changeset
|
5 |
| 88123 | 6 # This file is part of GNU Emacs. |
|
94832
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91412
diff
changeset
|
7 |
|
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91412
diff
changeset
|
8 # GNU Emacs is free software: you can redistribute it and/or modify |
| 88123 | 9 # it under the terms of the GNU General Public License as published by |
|
94832
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91412
diff
changeset
|
10 # the Free Software Foundation, either version 3 of the License, or |
|
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91412
diff
changeset
|
11 # (at your option) any later version. |
|
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91412
diff
changeset
|
12 |
| 88123 | 13 # GNU Emacs is distributed in the hope that it will be useful, |
| 14 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 16 # GNU General Public License for more details. | |
|
94832
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91412
diff
changeset
|
17 |
| 88123 | 18 # You should have received a copy of the GNU General Public License |
|
94832
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91412
diff
changeset
|
19 # along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
| 88123 | 20 |
|
94832
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91412
diff
changeset
|
21 # Commentary: |
| 88123 | 22 # Make a charset map compact by changing this kind of line sequence: |
| 23 # 0x00 0x0000 | |
| 24 # 0x01 0x0001 | |
| 25 # ... | |
| 26 # 0x7F 0x007F | |
| 27 # to one line of this format: | |
| 28 # 0x00-0x7F 0x0000 | |
| 29 | |
| 30 BEGIN { | |
| 31 tohex["0"] = 1; | |
| 32 tohex["1"] = 2; | |
| 33 tohex["2"] = 3; | |
| 34 tohex["3"] = 4; | |
| 35 tohex["4"] = 5; | |
| 36 tohex["5"] = 6; | |
| 37 tohex["6"] = 7; | |
| 38 tohex["7"] = 8; | |
| 39 tohex["8"] = 9; | |
| 40 tohex["9"] = 10; | |
| 41 tohex["A"] = 11; | |
| 42 tohex["B"] = 12; | |
| 43 tohex["C"] = 13; | |
| 44 tohex["D"] = 14; | |
| 45 tohex["E"] = 15; | |
| 46 tohex["F"] = 16; | |
| 47 tohex["a"] = 11; | |
| 48 tohex["b"] = 12; | |
| 49 tohex["c"] = 13; | |
| 50 tohex["d"] = 14; | |
| 51 tohex["e"] = 15; | |
| 52 tohex["f"] = 16; | |
| 53 from_code = 0; | |
| 54 to_code = -1; | |
| 55 to_unicode = 0; | |
| 56 from_unicode = 0; | |
| 57 } | |
| 58 | |
| 59 function decode_hex(str, idx) { | |
| 60 n = 0; | |
| 61 len = length(str); | |
| 62 for (i = idx; i <= len; i++) | |
| 63 { | |
| 64 c = tohex[substr (str, i, 1)]; | |
| 65 if (c == 0) | |
| 66 break; | |
| 67 n = n * 16 + c - 1; | |
| 68 } | |
| 69 return n; | |
| 70 } | |
| 71 | |
| 72 /^\#/ { | |
| 73 print; | |
| 74 next; | |
| 75 } | |
| 76 | |
| 77 { | |
| 78 code = decode_hex($1, 3); | |
| 79 unicode = decode_hex($2, 3); | |
| 80 if ((code == to_code + 1) && (unicode == to_unicode + 1)) | |
| 81 { | |
| 82 to_code++; | |
| 83 to_unicode++; | |
| 84 } | |
| 85 else | |
| 86 { | |
| 87 if (to_code < 256) | |
| 88 { | |
| 89 if (from_code == to_code) | |
| 90 printf "0x%02X 0x%04X\n", from_code, from_unicode; | |
| 91 else if (from_code < to_code) | |
| 92 printf "0x%02X-0x%02X 0x%04X\n", from_code, to_code, from_unicode; | |
| 93 } | |
| 94 else | |
| 95 { | |
| 96 if (from_code == to_code) | |
| 97 printf "0x%04X 0x%04X\n", from_code, from_unicode; | |
| 98 else if (from_code < to_code) | |
| 99 printf "0x%04X-0x%04X 0x%04X\n", from_code, to_code, from_unicode; | |
| 100 } | |
| 101 from_code = to_code = code; | |
| 102 from_unicode = to_unicode = unicode; | |
| 103 } | |
| 104 } | |
| 105 | |
| 106 END { | |
| 107 if (to_code < 256) | |
| 108 { | |
| 109 if (from_code == to_code) | |
| 110 printf "0x%02X 0x%04X\n", from_code, from_unicode; | |
| 111 else | |
| 112 printf "0x%02X-0x%02X 0x%04X\n", from_code, to_code, from_unicode; | |
| 113 } | |
| 114 else | |
| 115 { | |
| 116 if (from_code == to_code) | |
| 117 printf "0x%04X 0x%04X\n", from_code, from_unicode; | |
| 118 else | |
| 119 printf "0x%04X-0x%04X 0x%04X\n", from_code, to_code, from_unicode; | |
| 120 } | |
| 121 } | |
|
89916
e0e4e6a0599f
Changes from arch/CVS synchronization
Miles Bader <miles@gnu.org>
parents:
88123
diff
changeset
|
122 |
|
e0e4e6a0599f
Changes from arch/CVS synchronization
Miles Bader <miles@gnu.org>
parents:
88123
diff
changeset
|
123 # arch-tag: 7e6f57c3-8e62-4af3-8916-ca67bca3a0ce |
