Mercurial > emacs
annotate lisp/language/tml-util.el @ 55338:3fe6300a67bf
*** empty log message ***
| author | Jason Rumney <jasonr@gnu.org> |
|---|---|
| date | Mon, 03 May 2004 13:51:59 +0000 |
| parents | 18d7e5b12285 |
| children | 18a818a2ee7c |
| rev | line source |
|---|---|
| 49702 | 1 ;;; tml-util.el --- support for composing tamil characters -*-coding: iso-2022-7bit;-*- |
| 2 | |
| 3 ;; Copyright (C) 2001 Free Software Foundation, Inc. | |
| 4 | |
| 5 ;; Maintainer: KAWABATA, Taichi <kawabata@m17n.org> | |
| 6 ;; Keywords: multilingual, Indian, Tamil | |
| 7 | |
| 8 ;; This file is part of GNU Emacs. | |
| 9 | |
| 10 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
| 11 ;; it under the terms of the GNU General Public License as published by | |
| 12 ;; the Free Software Foundation; either version 2, or (at your option) | |
| 13 ;; any later version. | |
| 14 | |
| 15 ;; GNU Emacs is distributed in the hope that it will be useful, | |
| 16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 18 ;; GNU General Public License for more details. | |
| 19 | |
| 20 ;; You should have received a copy of the GNU General Public License | |
| 21 ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
| 22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
| 23 ;; Boston, MA 02111-1307, USA. | |
| 24 | |
| 25 ;; Created: Nov. 08. 2002 | |
| 26 | |
| 27 ;;; Commentary: | |
| 28 | |
| 29 ;; This file provides character(Unicode) to glyph(CDAC) conversion and | |
| 30 ;; composition of Tamil script characters. | |
| 31 | |
| 32 ;;; Code: | |
| 33 | |
| 34 ;; Tamil Composable Pattern | |
| 35 ;; C .. Consonants | |
| 36 ;; V .. Vowel | |
| 37 ;; H .. Pulli | |
| 38 ;; M .. Matra | |
| 39 ;; V .. Vowel | |
| 40 ;; A .. Anuswar | |
| 41 ;; D .. Chandrabindu | |
| 42 ;; 1. vowel | |
| 43 ;; V | |
| 44 ;; 2. syllable : only ligature-formed pattern forms composition. | |
| 45 ;; (CkHCs|C)(H|M)? | |
| 46 ;; 3. sri special | |
| 47 ;; (CsHCrVi) | |
| 48 | |
| 49 ;; oririnal | |
| 50 ;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)? | |
| 51 | |
| 52 (defconst tamil-consonant | |
| 53 "[$,1<5(B-$,1<Y(B]") | |
| 54 | |
| 55 (defconst tamil-composable-pattern | |
| 56 (concat | |
| 57 "\\([$,1<%(B-$,1<4(B]\\)\\|" | |
| 58 "[$,1<"<#(B]\\|" ;; vowel modifier considered independent | |
| 59 "\\(\\(?:\\(?:$,1<5<m<W(B\\)\\|[$,1<5(B-$,1<Y(B]\\)[$,1<m<^(B-$,1<l(B]?\\)\\|" | |
| 60 "\\($,1<W<m<P<`(B\\)") | |
| 61 "Regexp matching a composable sequence of Tamil characters.") | |
| 62 | |
| 63 ;;;###autoload | |
| 64 (defun tamil-compose-region (from to) | |
| 65 (interactive "r") | |
| 66 (save-excursion | |
| 67 (save-restriction | |
| 68 (narrow-to-region from to) | |
| 69 (goto-char (point-min)) | |
| 70 (while (re-search-forward tamil-composable-pattern nil t) | |
| 71 (tamil-compose-syllable-region (match-beginning 0) | |
| 72 (match-end 0)))))) | |
| 73 (defun tamil-compose-string (string) | |
| 74 (with-temp-buffer | |
| 75 (insert (decompose-string string)) | |
| 76 (tamil-compose-region (point-min) (point-max)) | |
| 77 (buffer-string))) | |
| 78 | |
|
52519
18d7e5b12285
(tamil-post-read-conversion): Add autoload cookie.
Kenichi Handa <handa@m17n.org>
parents:
52401
diff
changeset
|
79 ;;;###autoload |
| 49702 | 80 (defun tamil-post-read-conversion (len) |
| 81 (save-excursion | |
| 82 (save-restriction | |
| 83 (let ((buffer-modified-p (buffer-modified-p))) | |
| 84 (narrow-to-region (point) (+ (point) len)) | |
| 85 (tamil-compose-region (point-min) (point-max)) | |
| 86 (set-buffer-modified-p buffer-modified-p) | |
| 87 (- (point-max) (point-min)))))) | |
| 88 | |
| 89 (defun tamil-range (from to) | |
| 90 "Make the list of the integers of range FROM to TO." | |
| 91 (let (result) | |
| 92 (while (<= from to) (setq result (cons to result) to (1- to))) result)) | |
| 93 | |
| 94 (defun tamil-regexp-of-hashtbl-keys (hashtbl) | |
| 95 "Return a regular expression that matches all keys in hashtable HASHTBL." | |
| 96 (let ((max-specpdl-size 1000)) | |
| 97 (regexp-opt | |
| 98 (sort | |
| 99 (let (dummy) | |
| 100 (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl) | |
| 101 dummy) | |
| 102 (function (lambda (x y) (> (length x) (length y)))))))) | |
| 103 | |
| 104 | |
| 105 ;;;###autoload | |
| 106 (defun tamil-composition-function (from to pattern &optional string) | |
| 107 "Compose Tamil characters in REGION, or STRING if specified. | |
| 108 Assume that the REGION or STRING must fully match the composable | |
| 109 PATTERN regexp." | |
| 110 (if string (tamil-compose-syllable-string string) | |
| 111 (tamil-compose-syllable-region from to)) | |
| 112 (- to from)) | |
| 113 | |
| 114 ;; Register a function to compose Tamil characters. | |
| 115 (mapc | |
| 116 (function (lambda (ucs) | |
| 117 (aset composition-function-table (decode-char 'ucs ucs) | |
| 118 (list (cons tamil-composable-pattern | |
| 119 'tamil-composition-function))))) | |
| 120 (nconc '(#x0b82 #x0b83) (tamil-range #x0b85 #x0bb9))) | |
| 121 | |
| 122 ;; Notes on conversion steps. | |
| 123 | |
| 124 ;; 1. chars to glyphs | |
| 125 ;; Simple replacement of characters to glyphs is done. | |
| 126 | |
| 127 ;; 2. glyphs reordering. | |
| 128 ;; following "$,4)j(B", "$,4)k(B", "$,4)l(B" goes to the front. | |
| 129 | |
| 130 ;; 3. glyphs to glyphs | |
| 131 ;; reordered vowels are ligatured to consonants. | |
| 132 | |
| 133 ;; 4. Composition. | |
| 134 ;; left modifiers will be attached at the left. | |
| 135 ;; others will be attached right. | |
| 136 | |
| 137 (defvar tml-char-glyph | |
| 138 '(;; various signs | |
| 139 ;;("$,1<"(B" . "") | |
| 140 ("$,1<#(B" . "$,4*G(B") | |
| 141 ;; Independent Vowels | |
| 142 ("$,1<%(B" . "$,4*<(B") | |
| 143 ("$,1<&(B" . "$,4*=(B") | |
| 144 ("$,1<'(B" . "$,4*>(B") | |
| 145 ("$,1<((B" . "$,4*?(B") | |
| 146 ("$,1<)(B" . "$,4*@(B") | |
| 147 ("$,1<*(B" . "$,4*A(B") | |
| 148 ("$,1<.(B" . "$,4*B(B") | |
| 149 ("$,1</(B" . "$,4*C(B") | |
| 150 ("$,1<0(B" . "$,4*D(B") | |
| 151 ("$,1<2(B" . "$,4*E(B") | |
| 152 ("$,1<3(B" . "$,4*F(B") | |
| 153 ("$,1<4(B" . "$,4*E*W(B") | |
| 154 ;; Consonants | |
| 155 ("$,1<5<m<W<m(B" . "$,4):(B") ; ks. | |
| 156 ("$,1<5<m<W(B" . "$,4*^(B") ; ks | |
| 157 ("$,1<5(B" . "$,4*H(B") | |
| 158 | |
| 159 ("$,1<9(B" . "$,4*I(B") | |
| 160 ("$,1<:(B" . "$,4*J(B") | |
| 161 ("$,1<<(B" . "$,4*\(B") | |
| 162 ("$,1<<<m(B" . "$,4)8(B") | |
| 163 ("$,1<>(B" . "$,4*K(B") | |
| 164 ("$,1<?(B" . "$,4*L(B") | |
| 165 ("$,1<C(B" . "$,4*M(B") | |
| 166 ("$,1<D(B" . "$,4*N(B") | |
| 167 ("$,1<H(B" . "$,4*O(B") | |
| 168 ("$,1<I(B" . "$,4*Y(B") | |
| 169 ("$,1<I<m(B" . "$,4)a(B") | |
| 170 ("$,1<J(B" . "$,4*P(B") | |
| 171 ("$,1<N(B" . "$,4*Q(B") | |
| 172 ("$,1<O(B" . "$,4*R(B") | |
| 173 ("$,1<P(B" . "$,4*S(B") | |
| 174 ("$,1<Q(B" . "$,4*X(B") | |
| 175 ("$,1<R(B" . "$,4*T(B") | |
| 176 ("$,1<S(B" . "$,4*W(B") | |
| 177 ("$,1<T(B" . "$,4*V(B") | |
| 178 ("$,1<U(B" . "$,4*U(B") | |
| 179 ("$,1<W(B" . "$,4*[(B") | |
| 180 ("$,1<W<m(B" . "$,4)7(B") | |
| 181 ("$,1<W<m<P<`(B" . "$,4*_(B") | |
| 182 ("$,1<X(B" . "$,4*Z(B") | |
| 183 ("$,1<X<m(B" . "$,4)6(B") | |
| 184 ("$,1<Y(B" . "$,4*](B") | |
| 185 ("$,1<Y<m(B" . "$,4)9(B") | |
| 186 | |
| 187 ;; Dependent vowel signs | |
| 188 ("$,1<^(B" . "$,4)c(B") | |
| 189 ("$,1<_(B" . "$,4)d(B") | |
| 190 ("$,1<`(B" . "$,4)f(B") | |
| 191 ("$,1<a(B" . "$,4)g(B") | |
| 192 ("$,1<b(B" . "$,4)h(B") | |
| 193 ("$,1<f(B" . "$,4)j(B") | |
| 194 ("$,1<g(B" . "$,4)k(B") | |
| 195 ("$,1<h(B" . "$,4)l(B") | |
| 196 ("$,1<j(B" . "$,4)j)c(B") | |
| 197 ("$,1<k(B" . "$,4)k)c(B") | |
| 198 ("$,1<l(B" . "$,4)j*W(B") | |
| 199 | |
| 200 ;; Various signs | |
| 201 ("$,1<m(B" . "$,4)b(B") | |
| 202 ("$,1<w(B" . "nil") ;; not supported? | |
| 203 )) | |
| 204 | |
| 205 (defvar tml-char-glyph-hash | |
| 206 (let* ((hash (make-hash-table :test 'equal))) | |
| 207 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) | |
| 208 tml-char-glyph) | |
| 209 hash)) | |
| 210 | |
| 211 (defvar tml-char-glyph-regexp | |
| 212 (tamil-regexp-of-hashtbl-keys tml-char-glyph-hash)) | |
| 213 | |
| 214 ;; Tamil languages needed to be reordered. | |
| 215 | |
| 216 (defvar tml-consonants-regexp | |
| 217 "[$,4*H*^*I*J*\*K*L*M*N*O*Y*P*Q*R*S*X*T*W*V*U*[*Z*](B]") | |
| 218 | |
| 219 (defvar tml-glyph-reorder-key-glyphs "[$,4)j)k)l(B]") | |
| 220 | |
| 221 (defvar tml-glyph-reordering-regexp-list | |
| 222 (cons | |
| 223 (concat "\\(" tml-consonants-regexp "\\)\\([$,4)j)k)l(B]\\)") "\\2\\1")) | |
| 224 | |
| 225 ;; Tamil vowel modifiers to be ligatured. | |
| 226 (defvar tml-glyph-glyph | |
| 227 '( | |
| 228 ("$,4*H)d(B" . "$,4(a(B") ; ki | |
| 229 ("$,4*^)d(B" . "$,4(v(B") ; ksi | |
| 230 ("$,4*^)f(B" . "$,4)2(B") ; ksi~ | |
| 231 ("$,4*I)d(B" . "$,4(b(B") ; n^i | |
| 232 ("$,4*J)d(B" . "$,4(c(B") ; ci | |
| 233 ("$,4*K)d(B" . "$,4(d(B") ; n~i | |
| 234 ("$,4*L)d(B" . "$,4)n(B") ; t.i | |
| 235 ("$,4*M)d(B" . "$,4(e(B") ; n.i | |
| 236 ("$,4*N)d(B" . "$,4(f(B") ; ti | |
| 237 ("$,4*O)d(B" . "$,4(g(B") ; ni | |
| 238 ("$,4*P)d(B" . "$,4(h(B") ; pi | |
| 239 ("$,4*Q)d(B" . "$,4(i(B") ; mi | |
| 240 ("$,4*R)d(B" . "$,4(j(B") ; yi | |
| 241 ("$,4*S)d(B" . "$,4(k(B") ; ri | |
| 242 ("$,4*T)d(B" . "$,4(l(B") ; li | |
| 243 ("$,4*U)d(B" . "$,4(m(B") ; vi | |
| 244 ("$,4*V)d(B" . "$,4(n(B") ; l_i | |
| 245 ("$,4*W)d(B" . "$,4(o(B") ; l.i | |
| 246 ("$,4*X)d(B" . "$,4(p(B") ; r_i | |
| 247 ("$,4*Y)d(B" . "$,4(q(B") ; n_i | |
| 248 ("$,4*Z)d(B" . "$,4(r(B") ; si | |
| 249 ("$,4*[)d(B" . "$,4(s(B") ; s'i | |
| 250 ("$,4*\)d(B" . "$,4(t(B") ; ji | |
| 251 ("$,4*])d(B" . "$,4(u(B") ; hi | |
| 252 | |
| 253 ("$,4*H)f(B" . "$,4(w(B") ; ki~ | |
| 254 ("$,4*I)f(B" . "$,4(x(B") ; n^i~ | |
| 255 ("$,4*J)f(B" . "$,4(y(B") ; ci~ | |
| 256 ("$,4*K)f(B" . "$,4(z(B") ; n~i~ | |
| 257 ("$,4*L)f(B" . "$,4)o(B") ; t.i~ | |
| 258 ("$,4*M)f(B" . "$,4)!(B") ; n.i~ | |
| 259 ("$,4*N)f(B" . "$,4)"(B") ; ti~ | |
| 260 ("$,4*O)f(B" . "$,4)#(B") ; ni~ | |
| 261 ("$,4*P)f(B" . "$,4)$(B") ; pi~ | |
| 262 ("$,4*Q)f(B" . "$,4)%(B") ; mi~ | |
| 263 ("$,4*R)f(B" . "$,4)&(B") ; yi~ | |
| 264 ("$,4*S)f(B" . "$,4)'(B") ; ri~ | |
| 265 ("$,4*T)f(B" . "$,4)((B") ; li~ | |
| 266 ("$,4*U)f(B" . "$,4))(B") ; vi~ | |
| 267 ("$,4*V)f(B" . "$,4)*(B") ; l_i~ | |
| 268 ("$,4*W)f(B" . "$,4)+(B") ; l.i~ | |
| 269 ("$,4*X)f(B" . "$,4),(B") ; r_i~ | |
| 270 ("$,4*Y)f(B" . "$,4)-(B") ; n_i~ | |
| 271 ("$,4*Z)f(B" . "$,4).(B") ; si~ | |
| 272 ("$,4*[)f(B" . "$,4)/(B") ; s'i~ | |
| 273 ("$,4*\)f(B" . "$,4)0(B") ; ji~ | |
| 274 ("$,4*])f(B" . "$,4)1(B") ; hi~ | |
| 275 | |
| 276 ("$,4*H)g(B" . "$,4)p(B") ; ku | |
| 277 ("$,4*I)g(B" . "$,4)q(B") ; n^u | |
| 278 ("$,4*J)g(B" . "$,4)r(B") ; cu | |
| 279 ("$,4*K)g(B" . "$,4)s(B") ; n~u | |
| 280 ("$,4*L)g(B" . "$,4)t(B") ; t.u | |
| 281 ("$,4*M)g(B" . "$,4)u(B") ; n.u | |
| 282 ("$,4*N)g(B" . "$,4)v(B") ; tu | |
| 283 ("$,4*O)g(B" . "$,4)x(B") ; nu | |
| 284 ("$,4*P)g(B" . "$,4)y(B") ; pu | |
| 285 ("$,4*Q)g(B" . "$,4)z(B") ; mu | |
| 286 ("$,4*R)g(B" . "$,4){(B") ; yu | |
| 287 ("$,4*S)g(B" . "$,4)|(B") ; ru | |
| 288 ("$,4*T)g(B" . "$,4)}(B") ; lu | |
| 289 ("$,4*U)g(B" . "$,4)~(B") ; vu | |
| 290 ("$,4*V)g(B" . "$,4)(B") ; l_u | |
| 291 ("$,4*W)g(B" . "$,4* (B") ; l.u | |
| 292 ("$,4*X)g(B" . "$,4*!(B") ; r_u | |
| 293 ("$,4*Y)g(B" . "$,4*"(B") ; n_u | |
| 294 | |
| 295 ("$,4*H)h(B" . "$,4*#(B") ; ku~ | |
| 296 ("$,4*I)h(B" . "$,4*$(B") ; n^u~ | |
| 297 ("$,4*J)h(B" . "$,4*%(B") ; cu~ | |
| 298 ("$,4*K)h(B" . "$,4*&(B") ; n~u~ | |
| 299 ("$,4*L)h(B" . "$,4*'(B") ; t.u~ | |
| 300 ("$,4*M)h(B" . "$,4*((B") ; n.u~ | |
| 301 ("$,4*N)h(B" . "$,4*)(B") ; tu~ | |
| 302 ("$,4*O)h(B" . "$,4*+(B") ; nu~ | |
| 303 ("$,4*P)h(B" . "$,4*,(B") ; pu~ | |
| 304 ("$,4*Q)h(B" . "$,4*-(B") ; mu~ | |
| 305 ("$,4*R)h(B" . "$,4*.(B") ; yu~ | |
| 306 ("$,4*S)h(B" . "$,4*/(B") ; ru~ | |
| 307 ("$,4*T)h(B" . "$,4*6(B") ; lu~ | |
| 308 ("$,4*U)h(B" . "$,4*7(B") ; vu~ | |
| 309 ("$,4*V)h(B" . "$,4*8(B") ; l_u~ | |
| 310 ("$,4*W)h(B" . "$,4*9(B") ; l.u~ | |
| 311 ("$,4*X)h(B" . "$,4*:(B") ; r_u~ | |
| 312 ("$,4*Y)h(B" . "$,4*;(B") ; n_u~ | |
| 313 )) | |
| 314 | |
| 315 (defvar tml-glyph-glyph-hash | |
| 316 (let* ((hash (make-hash-table :test 'equal))) | |
| 317 (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) | |
| 318 tml-glyph-glyph) | |
| 319 hash)) | |
| 320 | |
| 321 (defvar tml-glyph-glyph-regexp | |
| 322 (tamil-regexp-of-hashtbl-keys tml-glyph-glyph-hash)) | |
| 323 | |
| 324 (defun tamil-compose-syllable-string (string) | |
| 325 (with-temp-buffer | |
| 326 (insert (decompose-string string)) | |
| 327 (tamil-compose-syllable-region (point-min) (point-max)) | |
| 328 (buffer-string))) | |
| 329 | |
| 330 (defun tamil-compose-syllable-region (from to) | |
| 331 "Compose tamil syllable in region FROM to TO." | |
| 332 (let (glyph-str match-str glyph-reorder-regexps) | |
| 333 (save-excursion | |
| 334 (save-restriction | |
| 335 (narrow-to-region from to) | |
| 336 (goto-char (point-min)) | |
| 337 ;; char-glyph-conversion | |
| 338 (while (re-search-forward tml-char-glyph-regexp nil t) | |
| 339 (setq match-str (match-string 0)) | |
| 340 (setq glyph-str | |
| 341 (concat glyph-str (gethash match-str tml-char-glyph-hash)))) | |
| 342 ;; glyph reordering | |
| 343 (when (string-match tml-glyph-reorder-key-glyphs glyph-str) | |
| 344 (if (string-match (car tml-glyph-reordering-regexp-list) | |
| 345 glyph-str) | |
| 346 (setq glyph-str | |
| 347 (replace-match (cdr tml-glyph-reordering-regexp-list) | |
| 348 nil nil glyph-str)))) | |
| 349 ;; glyph-glyph-conversion | |
| 350 (when (string-match tml-glyph-glyph-regexp glyph-str) | |
| 351 (setq match-str (match-string 0 glyph-str)) | |
| 352 (setq glyph-str | |
| 353 (replace-match (gethash match-str tml-glyph-glyph-hash) | |
| 354 nil nil glyph-str))) | |
| 355 ;; concatenate and attach reference-points. | |
| 356 (setq glyph-str | |
| 357 (cdr | |
| 358 (apply | |
| 359 'nconc | |
| 360 (mapcar | |
| 361 (function | |
| 362 (lambda (x) (list '(5 . 3) x))) ;; default ref. point. | |
| 363 glyph-str)))) | |
| 364 (compose-region from to glyph-str))))) | |
| 365 | |
| 366 (provide 'tml-util) | |
| 367 | |
| 52401 | 368 ;;; arch-tag: 4d1c9737-e7b1-44cf-a040-4f64c50e773e |
| 49702 | 369 ;;; tml-util.el ends here |
