Mercurial > emacs
annotate lisp/language/indian.el @ 37678:ebec0594dece
(compile-files): Redirect output of chmod to
/dev/null.
| author | Gerd Moellmann <gerd@gnu.org> |
|---|---|
| date | Fri, 11 May 2001 10:53:56 +0000 |
| parents | 6d966e8b4bbe |
| children | b174db545cfd |
| rev | line source |
|---|---|
|
23158
5c0a4ed13900
Add coding: local variable, to avoid bootstrapping problem
Paul Eggert <eggert@twinsun.com>
parents:
20837
diff
changeset
|
1 ;;; indian.el --- Support for Indian Languages -*- coding: iso-2022-7bit; -*- |
| 17052 | 2 |
| 3 ;; Copyright (C) 1995 Free Software Foundation, Inc. | |
| 4 | |
| 5 ;; Author: KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp> | |
| 6 | |
| 7 ;; Keywords: multilingual, Indian | |
| 8 | |
| 9 ;; This file is part of GNU Emacs. | |
| 10 | |
| 11 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
| 12 ;; it under the terms of the GNU General Public License as published by | |
| 13 ;; the Free Software Foundation; either version 2, or (at your option) | |
| 14 ;; any later version. | |
| 15 | |
| 16 ;; GNU Emacs is distributed in the hope that it will be useful, | |
| 17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 19 ;; GNU General Public License for more details. | |
| 20 | |
| 21 ;; You should have received a copy of the GNU General Public License | |
|
17314
f438ebf1c679
Fix FSF address in comment.
Kenichi Handa <handa@m17n.org>
parents:
17300
diff
changeset
|
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the |
|
f438ebf1c679
Fix FSF address in comment.
Kenichi Handa <handa@m17n.org>
parents:
17300
diff
changeset
|
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
|
f438ebf1c679
Fix FSF address in comment.
Kenichi Handa <handa@m17n.org>
parents:
17300
diff
changeset
|
24 ;; Boston, MA 02111-1307, USA. |
| 17052 | 25 |
| 26 ;;; Commentary: | |
| 27 | |
| 28 ;; History: | |
| 29 ;; 1996.10.18 written by KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp> | |
| 30 | |
| 31 ;; For Indian, the character set IS 13194 is supported. | |
| 32 ;; | |
| 33 ;; IS 13194 does not specifically assign glyphs for each characters. | |
| 34 ;; Following code is not specific to each Indian language. | |
| 35 ;; | |
| 36 ;; Eventually, this code will support generic information about | |
| 37 ;; following scripts. | |
| 38 ;; | |
| 39 ;; Devanagari | |
| 40 ;; Bengali | |
| 41 ;; Gurmukhi | |
| 42 ;; Gujarati | |
| 43 ;; Oriya | |
| 44 ;; Tamil | |
| 45 ;; Telgu | |
| 46 ;; Kannada | |
| 47 ;; Malayalam | |
| 48 ;; | |
| 49 ;; In this file, charsets other than charset-ascii and charset-indian-is13194 | |
| 50 ;; should not be used except in the comment. | |
| 51 | |
| 52 ;;; Code: | |
| 53 | |
| 54 ;; Followings are what you see when you refer to the Emacs | |
| 55 ;; representations of IS 13194 charcters. However, this is merely | |
| 56 ;; tentative apperance, and you must convert them by | |
| 57 ;; indian-to-xxxxxx(specific script) function to use them. | |
| 58 ;; Devanagari is not an exception of this rule. | |
| 59 | |
| 60 ;; 0xa0 //(5!"#$%&'()*+,-./(B | |
| 61 ;; 0xb0 (50123456789:;<=>?(B | |
| 62 ;; 0xc0 (5@ABCDEFGHIJKLMNO(B | |
| 63 ;; 0xd0 (5PQRSTUVWXYZ[\]^_(B | |
| 64 ;; 0xe0 (5`abcdefghijklmno(B | |
| 65 ;; 0xf0 (5pqrstuvwxyz{|}~(B// | |
| 66 | |
| 67 ;; Note - In IS 13194, several symbols are obtained by special | |
| 68 ;; combination of several characters and Nukta sign. | |
| 69 ;; | |
| 70 ;; Sanskrit Vowel R -> (5*(B + (5i(B | |
| 71 ;; Sanskrit Vowel L -> (5&(B + (5i(B | |
| 72 ;; Sanskrit Vowel LL -> (5'(B + (5i(B | |
| 73 ;; Sanskrit Avagrah -> (5j(B + (5i(B | |
| 74 ;; OM -> (5!(B + (5i(B | |
| 75 ;; | |
| 76 ;; Note - IS 13194 defines ATR(0xEF) and EXT(0xF0), but they are | |
| 77 ;; not used in Emacs. | |
| 78 ;; | |
| 79 ;; Note - the above characters DO NOT represent any script. For | |
| 80 ;; example, if you want to obtain Devanagari character, you must do | |
| 81 ;; something like the following. | |
| 82 ;; | |
| 83 ;; (char-to-string (indian-to-devanagari ?(5$(B)) | |
| 84 ;; "$(5!$(B" | |
| 85 | |
| 86 ;;; ITRANS | |
| 87 ;; | |
| 88 ;; ITRANS is one of the most popular method to exchange indian scripts | |
| 89 ;; electronically. Here is the table to convert between ITRANS code and | |
| 90 ;; IS 13194 code. | |
| 91 | |
| 92 (defvar indian-itrans-consonant-alist | |
| 93 '( | |
| 94 ("k" . "(53(B") | |
| 95 ("kh" . "(54(B") | |
| 96 ("g" . "(55(B") | |
| 97 ("gh" . "(56(B") | |
| 98 ("N^" . "(57(B") | |
| 99 ("ch" . "(58(B") | |
| 100 ("chh" . "(59(B") | |
| 101 ("j" . "(5:(B") | |
| 102 ("jh" . "(5;(B") | |
| 103 ("JN" . "(5<(B") | |
| 104 ("T" . "(5=(B") | |
| 105 ("Th" . "(5>(B") | |
| 106 ("D" . "(5?(B") | |
| 107 ("Dh" . "(5@(B") | |
| 108 ("N" . "(5A(B") | |
| 109 ("t" . "(5B(B") | |
| 110 ("th" . "(5C(B") | |
| 111 ("d" . "(5D(B") | |
| 112 ("dh" . "(5E(B") | |
| 113 ("n" . "(5F(B") | |
| 114 ("nh" . "(5G(B") ; For transcription of non-Devanagari Languages. | |
| 115 ("p" . "(5H(B") | |
| 116 ("ph" . "(5I(B") | |
| 117 ("b" . "(5J(B") | |
| 118 ("bh" . "(5K(B") | |
| 119 ("m" . "(5L(B") | |
| 120 ("y" . "(5M(B") | |
| 121 ("yh" . "(5N(B") ; For transcription of non-Devanagari Languages. | |
| 122 ("r" . "(5O(B") | |
| 123 ("rh" . "(5P(B") ; For transcription of non-Devanagari Languages. | |
| 124 ("l" . "(5Q(B") | |
| 125 ("v" . "(5T(B") | |
| 126 ("sh" . "(5U(B") | |
| 127 ("shh" . "(5V(B") | |
| 128 ("s" . "(5W(B") | |
| 129 ("h" . "(5X(B") | |
| 130 ("ld" . "(5R(B") | |
| 131 ("L" . "(5R(B") | |
| 132 ("ksh" . "$(5!3!h!V(B") | |
| 133 ("GY" . "***GY***") ; Must check out later. | |
| 134 ;; special consonants | |
| 135 ("q" . "(53i(B") | |
| 136 ("K" . "(54i(B") | |
| 137 ("G" . "(55i(B") | |
| 138 ("z" . "(5:i(B") | |
| 139 ("f" . "(5Ii(B") | |
| 140 (".D" . "(5?i(B") | |
| 141 (".Dh" . "(5@i(B") | |
| 142 )) | |
| 143 | |
| 144 (defvar indian-itrans-vowel-sign-alist | |
| 145 '( | |
| 146 ;; Special treatment unique to IS 13194 Transliteration | |
| 147 ("" . "(5h(B") | |
| 148 ("a" . "") | |
| 149 ;; Matra (Vowel Sign) | |
| 150 ("aa" . "(5Z(B") | |
| 151 ("A" . "(5Z(B") | |
| 152 ("i" . "(5[(B") | |
| 153 ("ii" . "(5\(B") | |
| 154 ("I" . "(5\(B") | |
| 155 ("u" . "(5](B") | |
| 156 ("uu" . "(5^(B") | |
| 157 ("U" . "(5^(B") | |
| 158 ("R^i" . "(5_(B") ; These must be checked out later. | |
| 159 ("R^I" . "(5_i(B") | |
| 160 ("L^i" . "(5[i(B") | |
| 161 ("L^I" . "(5\i(B") | |
| 162 ("E" . "(5`(B") ; For transcription of non-Devanangri Languages. | |
| 163 ("e" . "(5a(B") | |
| 164 ("ai" . "(5b(B") | |
| 165 ;; ("e.c" . "(5c(B") ; Tentatively suppressed. | |
| 166 ("O" . "(5d(B") ; For transcription of non-Devanagari Languages. | |
| 167 ("o" . "(5e(B") | |
| 168 ("au" . "(5f(B") | |
| 169 ;; ("o.c" . "(5g(B") ; Tentatively suppressed. | |
| 170 )) | |
| 171 | |
| 172 ;; | |
| 173 ;; Independent vowels and other signs. | |
| 174 ;; | |
| 175 | |
| 176 (defvar indian-itrans-other-letters-alist | |
| 177 '( | |
| 178 ("a" . "(5$(B") | |
| 179 ("aa" . "(5%(B") | |
| 180 ("A" . "(5%(B") | |
| 181 ("i" . "(5&(B") | |
| 182 ("ii" . "(5'(B") | |
| 183 ("I" . "(5'(B") | |
| 184 ("u" . "(5((B") | |
| 185 ("uu" . "(5)(B") | |
| 186 ("U" . "(5)(B") | |
| 187 ("R^i" . "(5*(B") | |
| 188 ("R^I" . "(5*i(B") | |
| 189 ("L^i" . "(5&i(B") | |
| 190 ("L^I" . "(5'i(B") | |
| 191 ("E" . "(5+(B") ; For transcription of non-Devanagari Languages. | |
| 192 ("e" . "(5,(B") | |
| 193 ("ai" . "(5-(B") | |
| 194 ;; ("e.c" . "(5.(B") ; Candra E | |
| 195 ("O" . "(5/(B") ; For transcription of non-Devanagari Languages. | |
| 196 ("o" . "(50(B") | |
| 197 ("au" . "(51(B") | |
| 198 ;; ("o.c" . "(52(B") ; Candra O | |
| 199 ("M" . "(5$(B") | |
| 200 ("H" . "(5#(B") | |
| 201 ("AUM" . "(5!i(B") | |
| 202 ("OM" . "(5!i(B") | |
| 203 (".r" . "(5Oh(B") | |
| 204 (".n" . "(5"(B") | |
| 205 (".N" . "(5!(B") | |
| 206 (".h" . "(5h(B") ; Halant | |
| 207 (".." . "(5j(B") | |
| 208 (".a" . "(5ji(B") ; Avagrah | |
| 209 ("0" . "(5q(B") | |
| 210 ("1" . "(5r(B") | |
| 211 ("2" . "(5s(B") | |
| 212 ("3" . "(5t(B") | |
| 213 ("4" . "(5u(B") | |
| 214 ("5" . "(5v(B") | |
| 215 ("6" . "(5w(B") | |
| 216 ("7" . "(5x(B") | |
| 217 ("8" . "(5y(B") | |
| 218 ("9" . "(5z(B") | |
| 219 )) | |
| 220 | |
| 221 ;; Regular expression matching single Indian character represented | |
| 222 ;; by ITRANS. | |
| 223 | |
| 224 (defvar indian-itrans-regexp | |
| 225 (let ((consonant "\\([cs]hh?\\)\\|[kgjTDnpbyr]h?\\|\\(N\\^?\\)\\|\\(jN\\)\\|[mvqKGzfs]\\|\\(ld?\\)\\|\\(ksh\\)\\|\\(GY\\)\\|\\(\\.Dh?\\)") | |
| 226 (vowel "\\(a[aiu]\\)\\|\\(ii\\)\\|\\(uu\\)\\|\\([RL]\\^[iI]\\)\\|[AIEOeoaiu]") | |
| 227 (misc "[MH0-9]\\|\\(AUM\\)\\|\\(OM\\)\\|\\(\\.[rnNh\\.a]\\)") | |
| 228 (lpre "\\(") (rpre "\\)") (orre "\\|")) | |
| 229 (concat lpre misc rpre orre | |
| 230 lpre lpre consonant rpre "?" lpre vowel rpre rpre orre | |
| 231 lpre consonant rpre ))) | |
| 232 | |
| 233 ;; | |
| 234 ;; Regular expression matching single ITRANS unit for IS 13194 characters. | |
| 235 ;; | |
| 236 | |
| 237 (defvar itrans-indian-regexp | |
| 238 (let ((vowel "[(5$(B-(52(B]") | |
| 239 (consonant "[(53(B-(5X(B]") | |
|
17300
01d528c5dd18
Handle more Devanagari characters correctly.
Kenichi Handa <handa@m17n.org>
parents:
17071
diff
changeset
|
240 (matra "[(5Z(B-(5g(B]") |
| 17052 | 241 (misc "[(5q(B-(5z(B]") |
| 242 (lpre "\\(") (rpre "\\)") (orre "\\|")) | |
|
17300
01d528c5dd18
Handle more Devanagari characters correctly.
Kenichi Handa <handa@m17n.org>
parents:
17071
diff
changeset
|
243 (concat misc orre |
|
01d528c5dd18
Handle more Devanagari characters correctly.
Kenichi Handa <handa@m17n.org>
parents:
17071
diff
changeset
|
244 lpre consonant matra "?" rpre orre |
|
01d528c5dd18
Handle more Devanagari characters correctly.
Kenichi Handa <handa@m17n.org>
parents:
17071
diff
changeset
|
245 vowel))) |
| 17052 | 246 |
| 247 ;; | |
| 248 ;; IS13194 - ITRANS conversion table for string matching above regexp. | |
| 249 ;; | |
| 250 | |
| 251 (defvar indian-itrans-alist | |
| 252 (let ((cl indian-itrans-consonant-alist) | |
| 253 (ml indian-itrans-other-letters-alist) rules) | |
| 254 (while cl | |
| 255 (let ((vl indian-itrans-vowel-sign-alist)) | |
| 256 (while vl | |
| 257 (setq rules | |
| 258 (cons (cons (concat (car (car cl)) (car (car vl))) | |
| 259 (concat (cdr (car cl)) (cdr (car vl)))) | |
| 260 rules)) | |
| 261 (setq vl (cdr vl)))) | |
| 262 (setq cl (cdr cl))) | |
| 263 (while ml | |
| 264 (setq rules (cons (cons (car (car ml)) | |
| 265 (cdr (car ml))) | |
| 266 rules)) | |
| 267 (setq ml (cdr ml))) | |
| 268 rules)) | |
| 269 | |
| 270 ;; | |
| 271 ;; Utility program to convert from ITRANS to IS 13194 in specified region. | |
| 272 ;; | |
| 273 | |
| 274 (defun indian-decode-itrans-region (from to) | |
| 275 "Convert `ITRANS' mnemonics of the current region to Indian characters. | |
| 276 When called from a program, expects two arguments, | |
| 277 positions (integers or markers) specifying the stretch of the region." | |
| 278 (interactive "r") | |
| 279 (save-restriction | |
| 280 (narrow-to-region from to) | |
| 281 (goto-char (point-min)) | |
| 282 (while (re-search-forward indian-itrans-regexp nil t) | |
| 283 (let* ((itrans (buffer-substring (match-beginning 0) (match-end 0))) | |
| 284 (ch (cdr (assoc itrans indian-itrans-alist)))) | |
| 285 (if ch | |
| 286 (progn | |
| 287 (delete-region (match-beginning 0) (match-end 0)) | |
| 288 (insert ch))))) | |
| 289 (goto-char (point-min)) | |
| 290 (while (re-search-forward "\\((5h(B\\)[^\\c0]" nil t) | |
| 291 (delete-region (match-beginning 1) (match-end 1))))) | |
| 292 | |
| 293 ;; | |
| 294 ;; Utility program to convert from IS 13194 to ITRANS in specified region. | |
| 295 ;; | |
| 296 | |
|
17300
01d528c5dd18
Handle more Devanagari characters correctly.
Kenichi Handa <handa@m17n.org>
parents:
17071
diff
changeset
|
297 (defun indian-encode-itrans-region (from to) |
|
01d528c5dd18
Handle more Devanagari characters correctly.
Kenichi Handa <handa@m17n.org>
parents:
17071
diff
changeset
|
298 "Convert indian region to ITRANS mnemonics." |
|
01d528c5dd18
Handle more Devanagari characters correctly.
Kenichi Handa <handa@m17n.org>
parents:
17071
diff
changeset
|
299 (interactive "r") |
|
01d528c5dd18
Handle more Devanagari characters correctly.
Kenichi Handa <handa@m17n.org>
parents:
17071
diff
changeset
|
300 (save-restriction |
|
01d528c5dd18
Handle more Devanagari characters correctly.
Kenichi Handa <handa@m17n.org>
parents:
17071
diff
changeset
|
301 (narrow-to-region from to) |
|
01d528c5dd18
Handle more Devanagari characters correctly.
Kenichi Handa <handa@m17n.org>
parents:
17071
diff
changeset
|
302 (goto-char (point-min)) |
|
01d528c5dd18
Handle more Devanagari characters correctly.
Kenichi Handa <handa@m17n.org>
parents:
17071
diff
changeset
|
303 (while (re-search-forward itrans-indian-regexp nil t) |
|
01d528c5dd18
Handle more Devanagari characters correctly.
Kenichi Handa <handa@m17n.org>
parents:
17071
diff
changeset
|
304 (let* ((indian (buffer-substring (match-beginning 0) (match-end 0))) |
|
01d528c5dd18
Handle more Devanagari characters correctly.
Kenichi Handa <handa@m17n.org>
parents:
17071
diff
changeset
|
305 (ch (car (rassoc indian indian-itrans-alist)))) |
|
01d528c5dd18
Handle more Devanagari characters correctly.
Kenichi Handa <handa@m17n.org>
parents:
17071
diff
changeset
|
306 (if ch |
|
01d528c5dd18
Handle more Devanagari characters correctly.
Kenichi Handa <handa@m17n.org>
parents:
17071
diff
changeset
|
307 (progn |
|
01d528c5dd18
Handle more Devanagari characters correctly.
Kenichi Handa <handa@m17n.org>
parents:
17071
diff
changeset
|
308 (delete-region (match-beginning 0) (match-end 0)) |
|
01d528c5dd18
Handle more Devanagari characters correctly.
Kenichi Handa <handa@m17n.org>
parents:
17071
diff
changeset
|
309 (insert ch))))) |
|
01d528c5dd18
Handle more Devanagari characters correctly.
Kenichi Handa <handa@m17n.org>
parents:
17071
diff
changeset
|
310 (goto-char (point-min)))) |
| 33778 | 311 |
| 312 (provide 'indian) | |
|
17300
01d528c5dd18
Handle more Devanagari characters correctly.
Kenichi Handa <handa@m17n.org>
parents:
17071
diff
changeset
|
313 |
| 17052 | 314 ;;; indian.el ends here |
