Mercurial > emacs
comparison src/coding.c @ 18787:954e6be0a757
(detect_coding_iso2022): Distinguish coding-category-iso-7-else and
coding-category-iso-8-else.
| author | Kenichi Handa <handa@m17n.org> |
|---|---|
| date | Tue, 15 Jul 1997 08:21:17 +0000 |
| parents | ac2e7e21abb0 |
| children | f55d91d45bde |
comparison
equal
deleted
inserted
replaced
| 18786:e44570b2a6a0 | 18787:954e6be0a757 |
|---|---|
| 306 "coding-category-emacs-mule", | 306 "coding-category-emacs-mule", |
| 307 "coding-category-sjis", | 307 "coding-category-sjis", |
| 308 "coding-category-iso-7", | 308 "coding-category-iso-7", |
| 309 "coding-category-iso-8-1", | 309 "coding-category-iso-8-1", |
| 310 "coding-category-iso-8-2", | 310 "coding-category-iso-8-2", |
| 311 "coding-category-iso-else", | 311 "coding-category-iso-7-else", |
| 312 "coding-category-iso-8-else", | |
| 312 "coding-category-big5", | 313 "coding-category-big5", |
| 313 "coding-category-binary" | 314 "coding-category-binary" |
| 314 }; | 315 }; |
| 315 | 316 |
| 316 /* Flag to tell if we look up unification table on character code | 317 /* Flag to tell if we look up unification table on character code |
| 593 Check if a text is encoded in ISO2022. If it is, returns an | 594 Check if a text is encoded in ISO2022. If it is, returns an |
| 594 integer in which appropriate flag bits any of: | 595 integer in which appropriate flag bits any of: |
| 595 CODING_CATEGORY_MASK_ISO_7 | 596 CODING_CATEGORY_MASK_ISO_7 |
| 596 CODING_CATEGORY_MASK_ISO_8_1 | 597 CODING_CATEGORY_MASK_ISO_8_1 |
| 597 CODING_CATEGORY_MASK_ISO_8_2 | 598 CODING_CATEGORY_MASK_ISO_8_2 |
| 598 CODING_CATEGORY_MASK_ISO_ELSE | 599 CODING_CATEGORY_MASK_ISO_7_ELSE |
| 600 CODING_CATEGORY_MASK_ISO_8_ELSE | |
| 599 are set. If a code which should never appear in ISO2022 is found, | 601 are set. If a code which should never appear in ISO2022 is found, |
| 600 returns 0. */ | 602 returns 0. */ |
| 601 | 603 |
| 602 int | 604 int |
| 603 detect_coding_iso2022 (src, src_end) | 605 detect_coding_iso2022 (src, src_end) |
| 604 unsigned char *src, *src_end; | 606 unsigned char *src, *src_end; |
| 605 { | 607 { |
| 606 int mask = (CODING_CATEGORY_MASK_ISO_7 | 608 int mask = (CODING_CATEGORY_MASK_ISO_7 |
| 607 | CODING_CATEGORY_MASK_ISO_8_1 | 609 | CODING_CATEGORY_MASK_ISO_8_1 |
| 608 | CODING_CATEGORY_MASK_ISO_8_2 | 610 | CODING_CATEGORY_MASK_ISO_8_2 |
| 609 | CODING_CATEGORY_MASK_ISO_ELSE); | 611 | CODING_CATEGORY_MASK_ISO_7_ELSE |
| 612 | CODING_CATEGORY_MASK_ISO_8_ELSE | |
| 613 ); | |
| 610 int g1 = 0; /* 1 iff designating to G1. */ | 614 int g1 = 0; /* 1 iff designating to G1. */ |
| 611 int c, i; | 615 int c, i; |
| 612 | 616 |
| 613 while (src < src_end) | 617 while (src < src_end) |
| 614 { | 618 { |
| 626 { | 630 { |
| 627 /* Valid designation sequence. */ | 631 /* Valid designation sequence. */ |
| 628 if (c == ')' || (c == '$' && *src == ')')) | 632 if (c == ')' || (c == '$' && *src == ')')) |
| 629 { | 633 { |
| 630 g1 = 1; | 634 g1 = 1; |
| 631 mask &= ~CODING_CATEGORY_MASK_ISO_7; | 635 mask &= ~(CODING_CATEGORY_MASK_ISO_7 |
| 636 | CODING_CATEGORY_MASK_ISO_7_ELSE); | |
| 632 } | 637 } |
| 633 src++; | 638 src++; |
| 634 break; | 639 break; |
| 635 } | 640 } |
| 636 else if (c == 'N' || c == 'O' || c == 'n' || c == 'o') | 641 else if (c == 'N' || c == 'O' || c == 'n' || c == 'o') |
| 637 return CODING_CATEGORY_MASK_ISO_ELSE; | 642 mask &= (CODING_CATEGORY_MASK_ISO_7_ELSE |
| 643 | CODING_CATEGORY_MASK_ISO_8_ELSE); | |
| 638 break; | 644 break; |
| 639 | 645 |
| 640 case ISO_CODE_SO: | 646 case ISO_CODE_SO: |
| 641 if (g1) | 647 if (g1) |
| 642 return CODING_CATEGORY_MASK_ISO_ELSE; | 648 mask &= (CODING_CATEGORY_MASK_ISO_7_ELSE |
| 649 | CODING_CATEGORY_MASK_ISO_8_ELSE); | |
| 643 break; | 650 break; |
| 644 | 651 |
| 645 case ISO_CODE_CSI: | 652 case ISO_CODE_CSI: |
| 646 case ISO_CODE_SS2: | 653 case ISO_CODE_SS2: |
| 647 case ISO_CODE_SS3: | 654 case ISO_CODE_SS3: |
| 648 mask &= ~CODING_CATEGORY_MASK_ISO_7; | 655 mask &= ~(CODING_CATEGORY_MASK_ISO_7 |
| 656 | CODING_CATEGORY_MASK_ISO_7_ELSE); | |
| 649 break; | 657 break; |
| 650 | 658 |
| 651 default: | 659 default: |
| 652 if (c < 0x80) | 660 if (c < 0x80) |
| 653 break; | 661 break; |
| 654 else if (c < 0xA0) | 662 else if (c < 0xA0) |
| 655 return 0; | 663 return 0; |
| 656 else | 664 else |
| 657 { | 665 { |
| 658 int count = 1; | 666 unsigned char *src_begin = src; |
| 659 | 667 |
| 660 mask &= ~CODING_CATEGORY_MASK_ISO_7; | 668 mask &= ~(CODING_CATEGORY_MASK_ISO_7 |
| 669 | CODING_CATEGORY_MASK_ISO_7_ELSE); | |
| 661 while (src < src_end && *src >= 0xA0) | 670 while (src < src_end && *src >= 0xA0) |
| 662 count++, src++; | 671 src++; |
| 663 if (count & 1 && src < src_end) | 672 if ((src - src_begin - 1) & 1 && src < src_end) |
| 664 mask &= ~CODING_CATEGORY_MASK_ISO_8_2; | 673 mask &= ~CODING_CATEGORY_MASK_ISO_8_2; |
| 665 } | 674 } |
| 666 break; | 675 break; |
| 667 } | 676 } |
| 668 } | 677 } |
| 2441 | 2450 |
| 2442 o coding-category-sjis | 2451 o coding-category-sjis |
| 2443 | 2452 |
| 2444 The category for a coding system which has the same code range | 2453 The category for a coding system which has the same code range |
| 2445 as SJIS. Assigned the coding-system (Lisp | 2454 as SJIS. Assigned the coding-system (Lisp |
| 2446 symbol) `shift-jis' by default. | 2455 symbol) `japanese-shift-jis' by default. |
| 2447 | 2456 |
| 2448 o coding-category-iso-7 | 2457 o coding-category-iso-7 |
| 2449 | 2458 |
| 2450 The category for a coding system which has the same code range | 2459 The category for a coding system which has the same code range |
| 2451 as ISO2022 of 7-bit environment. Assigned the coding-system | 2460 as ISO2022 of 7-bit environment. This doesn't use any locking |
| 2452 (Lisp symbol) `iso-2022-7' by default. | 2461 shift and single shift functions. Assigned the coding-system |
| 2462 (Lisp symbol) `iso-2022-7bit' by default. | |
| 2453 | 2463 |
| 2454 o coding-category-iso-8-1 | 2464 o coding-category-iso-8-1 |
| 2455 | 2465 |
| 2456 The category for a coding system which has the same code range | 2466 The category for a coding system which has the same code range |
| 2457 as ISO2022 of 8-bit environment and graphic plane 1 used only | 2467 as ISO2022 of 8-bit environment and graphic plane 1 used only |
| 2458 for DIMENSION1 charset. Assigned the coding-system (Lisp | 2468 for DIMENSION1 charset. This doesn't use any locking shift |
| 2459 symbol) `iso-8859-1' by default. | 2469 and single shift functions. Assigned the coding-system (Lisp |
| 2470 symbol) `iso-latin-1' by default. | |
| 2460 | 2471 |
| 2461 o coding-category-iso-8-2 | 2472 o coding-category-iso-8-2 |
| 2462 | 2473 |
| 2463 The category for a coding system which has the same code range | 2474 The category for a coding system which has the same code range |
| 2464 as ISO2022 of 8-bit environment and graphic plane 1 used only | 2475 as ISO2022 of 8-bit environment and graphic plane 1 used only |
| 2465 for DIMENSION2 charset. Assigned the coding-system (Lisp | 2476 for DIMENSION2 charset. This doesn't use any locking shift |
| 2466 symbol) `euc-japan' by default. | 2477 and single shift functions. Assigned the coding-system (Lisp |
| 2467 | 2478 symbol) `japanese-iso-8bit' by default. |
| 2468 o coding-category-iso-else | 2479 |
| 2480 o coding-category-iso-7-else | |
| 2469 | 2481 |
| 2470 The category for a coding system which has the same code range | 2482 The category for a coding system which has the same code range |
| 2471 as ISO2022 but not belongs to any of the above three | 2483 as ISO2022 of 7-bit environemnt but uses locking shift or |
| 2472 categories. Assigned the coding-system (Lisp symbol) | 2484 single shift functions. Assigned the coding-system (Lisp |
| 2473 `iso-2022-ss2-7' by default. | 2485 symbol) `iso-2022-7bit-lock' by default. |
| 2486 | |
| 2487 o coding-category-iso-8-else | |
| 2488 | |
| 2489 The category for a coding system which has the same code range | |
| 2490 as ISO2022 of 8-bit environemnt but uses locking shift or | |
| 2491 single shift functions. Assigned the coding-system (Lisp | |
| 2492 symbol) `iso-2022-8bit-ss2' by default. | |
| 2474 | 2493 |
| 2475 o coding-category-big5 | 2494 o coding-category-big5 |
| 2476 | 2495 |
| 2477 The category for a coding system which has the same code range | 2496 The category for a coding system which has the same code range |
| 2478 as BIG5. Assigned the coding-system (Lisp symbol) | 2497 as BIG5. Assigned the coding-system (Lisp symbol) |
