comparison src/coding.c @ 17119:2cfb31c15ced

(create_process, Fopen_network_stream): Typo in indexes of array proc_encode_coding_system fixed. Remove prefix "coding-system-" from coding system symbol names. (encode_coding) : Fix typo ("=" -> "=="). (detect_coding_iso2022): Detect coding-category-iso-8-2 more precisely. (ENCODE_RESET_PLANE_AND_REGISTER): Argument `eol' is deleted. Don't call ENCODE_DESIGNATION if nothing designated initially. (encode_designation_at_bol) New function. (encode_coding_iso2022): Handle CODING_FLAG_ISO_INIT_AT_BOL and CODING_FLAG_ISO_DESIGNATE_AT_BOL. (setup_coding_system): Now, flags of ISO2022 coding systems contains charsets instead of charset IDs. (detect_coding_iso2022, decode_coding_iso2022): Make the code robust against invalid SI and SO. (Ffind_coding_system, syms_of_coding): Escape newlines in docstring. (setup_coding_system): Correct setting coding->symbol and coding->eol_type. The performance improved.
author Kenichi Handa <handa@m17n.org>
date Thu, 27 Feb 1997 11:10:42 +0000
parents 70194012fb3a
children bd8d38879c97
comparison
equal deleted inserted replaced
17118:dcfb481ee914 17119:2cfb31c15ced
579 579
580 int 580 int
581 detect_coding_iso2022 (src, src_end) 581 detect_coding_iso2022 (src, src_end)
582 unsigned char *src, *src_end; 582 unsigned char *src, *src_end;
583 { 583 {
584 unsigned char graphic_register[4]; 584 unsigned char c, g1 = 0;
585 unsigned char c, esc_cntl;
586 int mask = (CODING_CATEGORY_MASK_ISO_7 585 int mask = (CODING_CATEGORY_MASK_ISO_7
587 | CODING_CATEGORY_MASK_ISO_8_1 586 | CODING_CATEGORY_MASK_ISO_8_1
588 | CODING_CATEGORY_MASK_ISO_8_2); 587 | CODING_CATEGORY_MASK_ISO_8_2);
589 /* We may look ahead maximum 3 bytes. */ 588 /* We may look ahead at most 4 bytes. */
590 unsigned char *adjusted_src_end = src_end - 3; 589 unsigned char *adjusted_src_end = src_end - 4;
591 int i; 590 int i;
592 591
593 for (i = 0; i < 4; i++) 592 while (src < src_end)
594 graphic_register[i] = CHARSET_ASCII;
595
596 while (src < adjusted_src_end)
597 { 593 {
598 c = *src++; 594 c = *src++;
599 switch (c) 595 switch (c)
600 { 596 {
601 case ISO_CODE_ESC: 597 case ISO_CODE_ESC:
602 if (src >= adjusted_src_end) 598 if (src >= src_end)
603 break; 599 break;
604 c = *src++; 600 c = *src++;
605 if (c == '$') 601 if (src + 2 >= src_end
602 && ((c >= '(' && c <= '/')
603 || c == '$' && ((*src >= '(' && *src <= '/')
604 || (*src >= '@' && *src <= 'B'))))
606 { 605 {
607 /* Designation of 2-byte character set. */ 606 /* Valid designation sequence. */
608 if (src >= adjusted_src_end) 607 if (c == ')' || (c == '$' && *src == ')'))
609 break; 608 g1 = 1;
610 c = *src++; 609 src++;
610 break;
611 } 611 }
612 if ((c >= ')' && c <= '+') || (c >= '-' && c <= '/'))
613 /* Designation to graphic register 1, 2, or 3. */
614 mask &= ~CODING_CATEGORY_MASK_ISO_7;
615 else if (c == 'N' || c == 'O' || c == 'n' || c == 'o') 612 else if (c == 'N' || c == 'O' || c == 'n' || c == 'o')
616 return CODING_CATEGORY_MASK_ISO_ELSE; 613 return CODING_CATEGORY_MASK_ISO_ELSE;
617 break; 614 break;
618 615
619 case ISO_CODE_SI:
620 case ISO_CODE_SO: 616 case ISO_CODE_SO:
621 return CODING_CATEGORY_MASK_ISO_ELSE; 617 if (g1)
622 618 return CODING_CATEGORY_MASK_ISO_ELSE;
619 break;
620
623 case ISO_CODE_CSI: 621 case ISO_CODE_CSI:
624 case ISO_CODE_SS2: 622 case ISO_CODE_SS2:
625 case ISO_CODE_SS3: 623 case ISO_CODE_SS3:
626 mask &= ~CODING_CATEGORY_MASK_ISO_7; 624 mask &= ~CODING_CATEGORY_MASK_ISO_7;
627 break; 625 break;
634 else 632 else
635 { 633 {
636 int count = 1; 634 int count = 1;
637 635
638 mask &= ~CODING_CATEGORY_MASK_ISO_7; 636 mask &= ~CODING_CATEGORY_MASK_ISO_7;
639 while (src < adjusted_src_end && *src >= 0xA0) 637 while (src < src_end && *src >= 0xA0)
640 count++, src++; 638 count++, src++;
641 if (count & 1 && src < adjusted_src_end) 639 if (count & 1 && src < src_end)
642 mask &= ~CODING_CATEGORY_MASK_ISO_8_2; 640 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
643 } 641 }
644 break; 642 break;
645 } 643 }
646 } 644 }
792 *dst++ = c1; 790 *dst++ = c1;
793 } 791 }
794 break; 792 break;
795 793
796 case ISO_shift_out: 794 case ISO_shift_out:
795 if (CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
796 goto label_invalid_escape_sequence;
797 CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; 797 CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
798 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); 798 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
799 break; 799 break;
800 800
801 case ISO_shift_in: 801 case ISO_shift_in:
828 switch (c1) 828 switch (c1)
829 { 829 {
830 case '&': /* revision of following character set */ 830 case '&': /* revision of following character set */
831 ONE_MORE_BYTE (c1); 831 ONE_MORE_BYTE (c1);
832 if (!(c1 >= '@' && c1 <= '~')) 832 if (!(c1 >= '@' && c1 <= '~'))
833 { 833 goto label_invalid_escape_sequence;
834 goto label_invalid_escape_sequence;
835 }
836 ONE_MORE_BYTE (c1); 834 ONE_MORE_BYTE (c1);
837 if (c1 != ISO_CODE_ESC) 835 if (c1 != ISO_CODE_ESC)
838 { 836 goto label_invalid_escape_sequence;
839 goto label_invalid_escape_sequence;
840 }
841 ONE_MORE_BYTE (c1); 837 ONE_MORE_BYTE (c1);
842 goto label_escape_sequence; 838 goto label_escape_sequence;
843 839
844 case '$': /* designation of 2-byte character set */ 840 case '$': /* designation of 2-byte character set */
845 ONE_MORE_BYTE (c1); 841 ONE_MORE_BYTE (c1);
857 { /* designation of DIMENSION2_CHARS96 character set */ 853 { /* designation of DIMENSION2_CHARS96 character set */
858 ONE_MORE_BYTE (c2); 854 ONE_MORE_BYTE (c2);
859 DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2); 855 DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
860 } 856 }
861 else 857 else
862 { 858 goto label_invalid_escape_sequence;
863 goto label_invalid_escape_sequence;
864 }
865 break; 859 break;
866 860
867 case 'n': /* invocation of locking-shift-2 */ 861 case 'n': /* invocation of locking-shift-2 */
862 if (CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
863 goto label_invalid_escape_sequence;
868 CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; 864 CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
865 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
869 break; 866 break;
870 867
871 case 'o': /* invocation of locking-shift-3 */ 868 case 'o': /* invocation of locking-shift-3 */
869 if (CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
870 goto label_invalid_escape_sequence;
872 CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; 871 CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
872 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
873 break; 873 break;
874 874
875 case 'N': /* invocation of single-shift-2 */ 875 case 'N': /* invocation of single-shift-2 */
876 if (CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
877 goto label_invalid_escape_sequence;
876 ONE_MORE_BYTE (c1); 878 ONE_MORE_BYTE (c1);
877 charset = CODING_SPEC_ISO_DESIGNATION (coding, 2); 879 charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
878 DECODE_ISO_CHARACTER (charset, c1); 880 DECODE_ISO_CHARACTER (charset, c1);
879 break; 881 break;
880 882
881 case 'O': /* invocation of single-shift-3 */ 883 case 'O': /* invocation of single-shift-3 */
884 if (CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
885 goto label_invalid_escape_sequence;
882 ONE_MORE_BYTE (c1); 886 ONE_MORE_BYTE (c1);
883 charset = CODING_SPEC_ISO_DESIGNATION (coding, 3); 887 charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
884 DECODE_ISO_CHARACTER (charset, c1); 888 DECODE_ISO_CHARACTER (charset, c1);
885 break; 889 break;
886 890
1244 #define ENCODE_DIRECTION_L2R \ 1248 #define ENCODE_DIRECTION_L2R \
1245 ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']' 1249 ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']'
1246 1250
1247 /* Produce codes for designation and invocation to reset the graphic 1251 /* Produce codes for designation and invocation to reset the graphic
1248 planes and registers to initial state. */ 1252 planes and registers to initial state. */
1249 #define ENCODE_RESET_PLANE_AND_REGISTER(eol) \ 1253 #define ENCODE_RESET_PLANE_AND_REGISTER \
1250 do { \ 1254 do { \
1251 int reg; \ 1255 int reg; \
1252 if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0) \ 1256 if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0) \
1253 ENCODE_SHIFT_IN; \ 1257 ENCODE_SHIFT_IN; \
1254 for (reg = 0; reg < 4; reg++) \ 1258 for (reg = 0; reg < 4; reg++) \
1255 { \ 1259 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0 \
1256 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) < 0) \ 1260 && (CODING_SPEC_ISO_DESIGNATION (coding, reg) \
1257 { \ 1261 != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg))) \
1258 if (eol) CODING_SPEC_ISO_DESIGNATION (coding, reg) = -1; \ 1262 ENCODE_DESIGNATION \
1259 } \ 1263 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1260 else if (CODING_SPEC_ISO_DESIGNATION (coding, reg) \
1261 != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)) \
1262 ENCODE_DESIGNATION \
1263 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1264 } \
1265 } while (0) 1264 } while (0)
1265
1266 int
1267 encode_designation_at_bol (coding, src, src_end, dstp)
1268 struct coding_system *coding;
1269 unsigned char *src, *src_end, **dstp;
1270 {
1271 int charset, reg, r[4];
1272 unsigned char *dst = *dstp, c;
1273 for (reg = 0; reg < 4; reg++) r[reg] = -1;
1274 while (src < src_end && (c = *src++) != '\n')
1275 {
1276 switch (emacs_code_class[c])
1277 {
1278 case EMACS_ascii_code:
1279 charset = CHARSET_ASCII;
1280 break;
1281 case EMACS_leading_code_2:
1282 if (++src >= src_end) continue;
1283 charset = c;
1284 break;
1285 case EMACS_leading_code_3:
1286 if ((src += 2) >= src_end) continue;
1287 charset = (c < LEADING_CODE_PRIVATE_11 ? c : *(src - 2));
1288 break;
1289 case EMACS_leading_code_4:
1290 if ((src += 3) >= src_end) continue;
1291 charset = *(src - 3);
1292 break;
1293 default:
1294 continue;
1295 }
1296 reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1297 if (r[reg] < 0
1298 && CODING_SPEC_ISO_DESIGNATION (coding, reg) != charset)
1299 r[reg] = charset;
1300 }
1301 if (c != '\n' && !coding->last_block)
1302 return -1;
1303 for (reg = 0; reg < 4; reg++)
1304 if (r[reg] >= 0)
1305 ENCODE_DESIGNATION (r[reg], reg, coding);
1306 *dstp = dst;
1307 return 0;
1308 }
1266 1309
1267 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */ 1310 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
1268 1311
1269 int 1312 int
1270 encode_coding_iso2022 (coding, source, destination, 1313 encode_coding_iso2022 (coding, source, destination,
1276 { 1319 {
1277 unsigned char *src = source; 1320 unsigned char *src = source;
1278 unsigned char *src_end = source + src_bytes; 1321 unsigned char *src_end = source + src_bytes;
1279 unsigned char *dst = destination; 1322 unsigned char *dst = destination;
1280 unsigned char *dst_end = destination + dst_bytes; 1323 unsigned char *dst_end = destination + dst_bytes;
1281 /* Since the maximum bytes produced by each loop is 6, we subtract 5 1324 /* Since the maximum bytes produced by each loop is 20, we subtract 19
1282 from DST_END to assure overflow checking is necessary only at the 1325 from DST_END to assure overflow checking is necessary only at the
1283 head of loop. */ 1326 head of loop. */
1284 unsigned char *adjusted_dst_end = dst_end - 5; 1327 unsigned char *adjusted_dst_end = dst_end - 19;
1285 1328
1286 while (src < src_end && dst < adjusted_dst_end) 1329 while (src < src_end && dst < adjusted_dst_end)
1287 { 1330 {
1288 /* SRC_BASE remembers the start position in source in each loop. 1331 /* SRC_BASE remembers the start position in source in each loop.
1289 The loop will be exited when there's not enough source text 1332 The loop will be exited when there's not enough source text
1290 to analyze multi-byte codes (within macros ONE_MORE_BYTE, 1333 to analyze multi-byte codes (within macros ONE_MORE_BYTE,
1291 TWO_MORE_BYTES, and THREE_MORE_BYTES). In that case, SRC is 1334 TWO_MORE_BYTES, and THREE_MORE_BYTES). In that case, SRC is
1292 reset to SRC_BASE before exiting. */ 1335 reset to SRC_BASE before exiting. */
1293 unsigned char *src_base = src; 1336 unsigned char *src_base = src;
1294 unsigned char c1 = *src++, c2, c3, c4; 1337 unsigned char c1, c2, c3, c4;
1295 int charset; 1338 int charset;
1296 1339
1340 if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
1341 && CODING_SPEC_ISO_BOL (coding))
1342 {
1343 /* We have to produce destination sequences now. */
1344 if (encode_designation_at_bol (coding, src, src_end, &dst) < 0)
1345 /* We can't find end of line in the current block. Let's
1346 repeat encoding starting from the current position
1347 pointed by SRC. */
1348 break;
1349 CODING_SPEC_ISO_BOL (coding) = 0;
1350 }
1351
1352 c1 = *src++;
1297 /* If we are seeing a component of a composite character, we are 1353 /* If we are seeing a component of a composite character, we are
1298 seeing a leading-code specially encoded for composition, or a 1354 seeing a leading-code specially encoded for composition, or a
1299 composition rule if composing with rule. We must set C1 1355 composition rule if composing with rule. We must set C1
1300 to a normal leading-code or an ASCII code. If we are not at 1356 to a normal leading-code or an ASCII code. If we are not at
1301 a composed character, we must reset the composition state. */ 1357 a composed character, we must reset the composition state. */
1337 ENCODE_ISO_CHARACTER_DIMENSION1 (CHARSET_ASCII, c1); 1393 ENCODE_ISO_CHARACTER_DIMENSION1 (CHARSET_ASCII, c1);
1338 break; 1394 break;
1339 1395
1340 case EMACS_control_code: 1396 case EMACS_control_code:
1341 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL) 1397 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1342 ENCODE_RESET_PLANE_AND_REGISTER (0); 1398 ENCODE_RESET_PLANE_AND_REGISTER;
1343 *dst++ = c1; 1399 *dst++ = c1;
1344 break; 1400 break;
1345 1401
1346 case EMACS_carriage_return_code: 1402 case EMACS_carriage_return_code:
1347 if (!coding->selective) 1403 if (!coding->selective)
1348 { 1404 {
1349 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL) 1405 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1350 ENCODE_RESET_PLANE_AND_REGISTER (0); 1406 ENCODE_RESET_PLANE_AND_REGISTER;
1351 *dst++ = c1; 1407 *dst++ = c1;
1352 break; 1408 break;
1353 } 1409 }
1354 /* fall down to treat '\r' as '\n' ... */ 1410 /* fall down to treat '\r' as '\n' ... */
1355 1411
1356 case EMACS_linefeed_code: 1412 case EMACS_linefeed_code:
1357 if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL) 1413 if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
1358 ENCODE_RESET_PLANE_AND_REGISTER (1); 1414 ENCODE_RESET_PLANE_AND_REGISTER;
1415 if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
1416 bcopy (coding->spec.iso2022.initial_designation,
1417 coding->spec.iso2022.current_designation,
1418 sizeof coding->spec.iso2022.initial_designation);
1359 if (coding->eol_type == CODING_EOL_LF 1419 if (coding->eol_type == CODING_EOL_LF
1360 || coding->eol_type == CODING_EOL_AUTOMATIC) 1420 || coding->eol_type == CODING_EOL_AUTOMATIC)
1361 *dst++ = ISO_CODE_LF; 1421 *dst++ = ISO_CODE_LF;
1362 else if (coding->eol_type == CODING_EOL_CRLF) 1422 else if (coding->eol_type == CODING_EOL_CRLF)
1363 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF; 1423 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
1364 else 1424 else
1365 *dst++ = ISO_CODE_CR; 1425 *dst++ = ISO_CODE_CR;
1426 CODING_SPEC_ISO_BOL (coding) = 1;
1366 break; 1427 break;
1367 1428
1368 case EMACS_leading_code_2: 1429 case EMACS_leading_code_2:
1369 ONE_MORE_BYTE (c2); 1430 ONE_MORE_BYTE (c2);
1370 ENCODE_ISO_CHARACTER_DIMENSION1 (c1, c2); 1431 ENCODE_ISO_CHARACTER_DIMENSION1 (c1, c2);
1416 reset the state of graphic planes and registers to initial one. 1477 reset the state of graphic planes and registers to initial one.
1417 In addition, we had better just flush out all remaining codes in 1478 In addition, we had better just flush out all remaining codes in
1418 the text although they are not valid characters. */ 1479 the text although they are not valid characters. */
1419 if (coding->last_block) 1480 if (coding->last_block)
1420 { 1481 {
1421 ENCODE_RESET_PLANE_AND_REGISTER (1); 1482 ENCODE_RESET_PLANE_AND_REGISTER;
1422 bcopy(src, dst, src_end - src); 1483 bcopy(src, dst, src_end - src);
1423 dst += (src_end - src); 1484 dst += (src_end - src);
1424 src = src_end; 1485 src = src_end;
1425 } 1486 }
1426 *consumed = src - source; 1487 *consumed = src - source;
1983 and set it in CODING. If CODING_SYSTEM_SYMBOL is invalid, CODING 2044 and set it in CODING. If CODING_SYSTEM_SYMBOL is invalid, CODING
1984 is setup so that no conversion is necessary and return -1, else 2045 is setup so that no conversion is necessary and return -1, else
1985 return 0. */ 2046 return 0. */
1986 2047
1987 int 2048 int
1988 setup_coding_system (coding_system_symbol, coding) 2049 setup_coding_system (coding_system, coding)
1989 Lisp_Object coding_system_symbol; 2050 Lisp_Object coding_system;
1990 struct coding_system *coding; 2051 struct coding_system *coding;
1991 { 2052 {
1992 Lisp_Object coding_system_vector = Qnil;
1993 Lisp_Object type, eol_type; 2053 Lisp_Object type, eol_type;
1994 2054
1995 /* At first, set several fields default values. */ 2055 /* At first, set several fields default values. */
1996 coding->require_flushing = 0; 2056 coding->require_flushing = 0;
1997 coding->last_block = 0; 2057 coding->last_block = 0;
1998 coding->selective = 0; 2058 coding->selective = 0;
1999 coding->composing = 0; 2059 coding->composing = 0;
2000 coding->direction = 0; 2060 coding->direction = 0;
2001 coding->carryover_size = 0; 2061 coding->carryover_size = 0;
2002 coding->symbol = Qnil;
2003 coding->post_read_conversion = coding->pre_write_conversion = Qnil; 2062 coding->post_read_conversion = coding->pre_write_conversion = Qnil;
2004 2063
2005 /* Get value of property `coding-system'. If it is a Lisp symbol 2064 Vlast_coding_system_used = coding->symbol = coding_system;
2006 pointing another coding system, fetch its property until we get a 2065 eol_type = Qnil;
2007 vector. */ 2066 /* Get value of property `coding-system' until we get a vector.
2008 while (!NILP (coding_system_symbol)) 2067 While doing that, also get values of properties
2068 `post-read-conversion', `pre-write-conversion', and `eol-type'. */
2069 while (!NILP (coding_system) && SYMBOLP (coding_system))
2009 { 2070 {
2010 coding->symbol = coding_system_symbol;
2011 if (NILP (coding->post_read_conversion)) 2071 if (NILP (coding->post_read_conversion))
2012 coding->post_read_conversion = Fget (coding_system_symbol, 2072 coding->post_read_conversion = Fget (coding_system,
2013 Qpost_read_conversion); 2073 Qpost_read_conversion);
2014 if (NILP (coding->pre_write_conversion)) 2074 if (NILP (coding->pre_write_conversion))
2015 coding->pre_write_conversion = Fget (coding_system_symbol, 2075 coding->pre_write_conversion = Fget (coding_system,
2016 Qpre_write_conversion); 2076 Qpre_write_conversion);
2017 2077 if (NILP (eol_type))
2018 coding_system_vector = Fget (coding_system_symbol, Qcoding_system); 2078 eol_type = Fget (coding_system, Qeol_type);
2019 if (VECTORP (coding_system_vector)) 2079 coding_system = Fget (coding_system, Qcoding_system);
2020 break;
2021 coding_system_symbol = coding_system_vector;
2022 } 2080 }
2023 Vlast_coding_system_used = coding->symbol; 2081 if (!VECTORP (coding_system)
2024 2082 || XVECTOR (coding_system)->size != 5)
2025 if (!VECTORP (coding_system_vector)
2026 || XVECTOR (coding_system_vector)->size != 5)
2027 goto label_invalid_coding_system; 2083 goto label_invalid_coding_system;
2028
2029 /* Get value of property `eol-type' by searching from the root
2030 coding-system. */
2031 coding_system_symbol = coding->symbol;
2032 eol_type = Qnil;
2033 while (SYMBOLP (coding_system_symbol) && !NILP (coding_system_symbol))
2034 {
2035 eol_type = Fget (coding_system_symbol, Qeol_type);
2036 if (!NILP (eol_type))
2037 break;
2038 coding_system_symbol = Fget (coding_system_symbol, Qcoding_system);
2039 }
2040 2084
2041 if (VECTORP (eol_type)) 2085 if (VECTORP (eol_type))
2042 coding->eol_type = CODING_EOL_AUTOMATIC; 2086 coding->eol_type = CODING_EOL_AUTOMATIC;
2043 else if (XFASTINT (eol_type) == 1) 2087 else if (XFASTINT (eol_type) == 1)
2044 coding->eol_type = CODING_EOL_CRLF; 2088 coding->eol_type = CODING_EOL_CRLF;
2045 else if (XFASTINT (eol_type) == 2) 2089 else if (XFASTINT (eol_type) == 2)
2046 coding->eol_type = CODING_EOL_CR; 2090 coding->eol_type = CODING_EOL_CR;
2047 else 2091 else
2048 coding->eol_type = CODING_EOL_LF; 2092 coding->eol_type = CODING_EOL_LF;
2049 2093
2050 type = XVECTOR (coding_system_vector)->contents[0]; 2094 type = XVECTOR (coding_system)->contents[0];
2051 switch (XFASTINT (type)) 2095 switch (XFASTINT (type))
2052 { 2096 {
2053 case 0: 2097 case 0:
2054 coding->type = coding_type_internal; 2098 coding->type = coding_type_internal;
2055 break; 2099 break;
2059 break; 2103 break;
2060 2104
2061 case 2: 2105 case 2:
2062 coding->type = coding_type_iso2022; 2106 coding->type = coding_type_iso2022;
2063 { 2107 {
2064 Lisp_Object val = XVECTOR (coding_system_vector)->contents[4]; 2108 Lisp_Object val = XVECTOR (coding_system)->contents[4];
2065 Lisp_Object *flags; 2109 Lisp_Object *flags;
2066 int i, charset, default_reg_bits = 0; 2110 int i, charset, default_reg_bits = 0;
2067 2111
2068 if (!VECTORP (val) || XVECTOR (val)->size != 32) 2112 if (!VECTORP (val) || XVECTOR (val)->size != 32)
2069 goto label_invalid_coding_system; 2113 goto label_invalid_coding_system;
2076 | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS) 2120 | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
2077 | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT) 2121 | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
2078 | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT) 2122 | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
2079 | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN) 2123 | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
2080 | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS) 2124 | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
2081 | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)); 2125 | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
2126 | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
2127 | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL));
2082 2128
2083 /* Invoke graphic register 0 to plane 0. */ 2129 /* Invoke graphic register 0 to plane 0. */
2084 CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; 2130 CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
2085 /* Invoke graphic register 1 to plane 1 if we can use full 8-bit. */ 2131 /* Invoke graphic register 1 to plane 1 if we can use full 8-bit. */
2086 CODING_SPEC_ISO_INVOCATION (coding, 1) 2132 CODING_SPEC_ISO_INVOCATION (coding, 1)
2087 = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1); 2133 = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
2088 /* Not single shifting at first. */ 2134 /* Not single shifting at first. */
2089 CODING_SPEC_ISO_SINGLE_SHIFTING(coding) = 0; 2135 CODING_SPEC_ISO_SINGLE_SHIFTING(coding) = 0;
2136 /* Beginning of buffer should also be regarded as bol. */
2137 CODING_SPEC_ISO_BOL(coding) = 1;
2090 2138
2091 /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations. 2139 /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
2092 FLAGS[REG] can be one of below: 2140 FLAGS[REG] can be one of below:
2093 integer CHARSET: CHARSET occupies register I, 2141 integer CHARSET: CHARSET occupies register I,
2094 t: designate nothing to REG initially, but can be used 2142 t: designate nothing to REG initially, but can be used
2101 for (charset = 0; charset < MAX_CHARSET; charset++) 2149 for (charset = 0; charset < MAX_CHARSET; charset++)
2102 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = -1; 2150 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = -1;
2103 for (i = 0; i < 4; i++) 2151 for (i = 0; i < 4; i++)
2104 { 2152 {
2105 if (INTEGERP (flags[i]) 2153 if (INTEGERP (flags[i])
2106 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))) 2154 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
2155 || (charset = get_charset_id (flags[i])) >= 0)
2107 { 2156 {
2108 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset; 2157 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2109 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i; 2158 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
2110 } 2159 }
2111 else if (EQ (flags[i], Qt)) 2160 else if (EQ (flags[i], Qt))
2117 { 2166 {
2118 Lisp_Object tail = flags[i]; 2167 Lisp_Object tail = flags[i];
2119 2168
2120 if (INTEGERP (XCONS (tail)->car) 2169 if (INTEGERP (XCONS (tail)->car)
2121 && (charset = XINT (XCONS (tail)->car), 2170 && (charset = XINT (XCONS (tail)->car),
2122 CHARSET_VALID_P (charset))) 2171 CHARSET_VALID_P (charset))
2172 || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2123 { 2173 {
2124 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset; 2174 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2125 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i; 2175 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
2126 } 2176 }
2127 else 2177 else
2129 tail = XCONS (tail)->cdr; 2179 tail = XCONS (tail)->cdr;
2130 while (CONSP (tail)) 2180 while (CONSP (tail))
2131 { 2181 {
2132 if (INTEGERP (XCONS (tail)->car) 2182 if (INTEGERP (XCONS (tail)->car)
2133 && (charset = XINT (XCONS (tail)->car), 2183 && (charset = XINT (XCONS (tail)->car),
2134 CHARSET_VALID_P (charset))) 2184 CHARSET_VALID_P (charset))
2185 || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2135 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) 2186 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2136 = i; 2187 = i;
2137 else if (EQ (XCONS (tail)->car, Qt)) 2188 else if (EQ (XCONS (tail)->car, Qt))
2138 default_reg_bits |= 1 << i; 2189 default_reg_bits |= 1 << i;
2139 tail = XCONS (tail)->cdr; 2190 tail = XCONS (tail)->cdr;
2188 break; 2239 break;
2189 2240
2190 case 3: 2241 case 3:
2191 coding->type = coding_type_big5; 2242 coding->type = coding_type_big5;
2192 coding->flags 2243 coding->flags
2193 = (NILP (XVECTOR (coding_system_vector)->contents[4]) 2244 = (NILP (XVECTOR (coding_system)->contents[4])
2194 ? CODING_FLAG_BIG5_HKU 2245 ? CODING_FLAG_BIG5_HKU
2195 : CODING_FLAG_BIG5_ETEN); 2246 : CODING_FLAG_BIG5_ETEN);
2196 break; 2247 break;
2197 2248
2198 case 4: 2249 case 4:
2199 coding->type = coding_type_ccl; 2250 coding->type = coding_type_ccl;
2200 { 2251 {
2201 Lisp_Object val = XVECTOR (coding_system_vector)->contents[4]; 2252 Lisp_Object val = XVECTOR (coding_system)->contents[4];
2202 if (CONSP (val) 2253 if (CONSP (val)
2203 && VECTORP (XCONS (val)->car) 2254 && VECTORP (XCONS (val)->car)
2204 && VECTORP (XCONS (val)->cdr)) 2255 && VECTORP (XCONS (val)->cdr))
2205 { 2256 {
2206 setup_ccl_program (&(coding->spec.ccl.decoder), XCONS (val)->car); 2257 setup_ccl_program (&(coding->spec.ccl.decoder), XCONS (val)->car);
2221 } 2272 }
2222 return 0; 2273 return 0;
2223 2274
2224 label_invalid_coding_system: 2275 label_invalid_coding_system:
2225 coding->type = coding_type_no_conversion; 2276 coding->type = coding_type_no_conversion;
2277 coding->symbol = coding->pre_write_conversion = coding->post_read_conversion
2278 = Qnil;
2226 return -1; 2279 return -1;
2227 } 2280 }
2228 2281
2229 /* Emacs has a mechanism to automatically detect a coding system if it 2282 /* Emacs has a mechanism to automatically detect a coding system if it
2230 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But, 2283 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
2234 2287
2235 o coding-category-internal 2288 o coding-category-internal
2236 2289
2237 The category for a coding system which has the same code range 2290 The category for a coding system which has the same code range
2238 as Emacs' internal format. Assigned the coding-system (Lisp 2291 as Emacs' internal format. Assigned the coding-system (Lisp
2239 symbol) `coding-system-internal' by default. 2292 symbol) `internal' by default.
2240 2293
2241 o coding-category-sjis 2294 o coding-category-sjis
2242 2295
2243 The category for a coding system which has the same code range 2296 The category for a coding system which has the same code range
2244 as SJIS. Assigned the coding-system (Lisp 2297 as SJIS. Assigned the coding-system (Lisp
2245 symbol) `coding-system-sjis' by default. 2298 symbol) `shift-jis' by default.
2246 2299
2247 o coding-category-iso-7 2300 o coding-category-iso-7
2248 2301
2249 The category for a coding system which has the same code range 2302 The category for a coding system which has the same code range
2250 as ISO2022 of 7-bit environment. Assigned the coding-system 2303 as ISO2022 of 7-bit environment. Assigned the coding-system
2251 (Lisp symbol) `coding-system-junet' by default. 2304 (Lisp symbol) `iso-2022-7' by default.
2252 2305
2253 o coding-category-iso-8-1 2306 o coding-category-iso-8-1
2254 2307
2255 The category for a coding system which has the same code range 2308 The category for a coding system which has the same code range
2256 as ISO2022 of 8-bit environment and graphic plane 1 used only 2309 as ISO2022 of 8-bit environment and graphic plane 1 used only
2257 for DIMENSION1 charset. Assigned the coding-system (Lisp 2310 for DIMENSION1 charset. Assigned the coding-system (Lisp
2258 symbol) `coding-system-ctext' by default. 2311 symbol) `iso-8859-1' by default.
2259 2312
2260 o coding-category-iso-8-2 2313 o coding-category-iso-8-2
2261 2314
2262 The category for a coding system which has the same code range 2315 The category for a coding system which has the same code range
2263 as ISO2022 of 8-bit environment and graphic plane 1 used only 2316 as ISO2022 of 8-bit environment and graphic plane 1 used only
2264 for DIMENSION2 charset. Assigned the coding-system (Lisp 2317 for DIMENSION2 charset. Assigned the coding-system (Lisp
2265 symbol) `coding-system-euc-japan' by default. 2318 symbol) `euc-japan' by default.
2266 2319
2267 o coding-category-iso-else 2320 o coding-category-iso-else
2268 2321
2269 The category for a coding system which has the same code range 2322 The category for a coding system which has the same code range
2270 as ISO2022 but not belongs to any of the above three 2323 as ISO2022 but not belongs to any of the above three
2271 categories. Assigned the coding-system (Lisp symbol) 2324 categories. Assigned the coding-system (Lisp symbol)
2272 `coding-system-iso-2022-ss2-7' by default. 2325 `iso-2022-ss2-7' by default.
2273 2326
2274 o coding-category-big5 2327 o coding-category-big5
2275 2328
2276 The category for a coding system which has the same code range 2329 The category for a coding system which has the same code range
2277 as BIG5. Assigned the coding-system (Lisp symbol) 2330 as BIG5. Assigned the coding-system (Lisp symbol)
2278 `coding-system-big5' by default. 2331 `cn-big5' by default.
2279 2332
2280 o coding-category-binary 2333 o coding-category-binary
2281 2334
2282 The category for a coding system not categorized in any of the 2335 The category for a coding system not categorized in any of the
2283 above. Assigned the coding-system (Lisp symbol) 2336 above. Assigned the coding-system (Lisp symbol)
2284 `coding-system-noconv' by default. 2337 `no-conversion' by default.
2285 2338
2286 Each of them is a Lisp symbol and the value is an actual 2339 Each of them is a Lisp symbol and the value is an actual
2287 `coding-system's (this is also a Lisp symbol) assigned by a user. 2340 `coding-system's (this is also a Lisp symbol) assigned by a user.
2288 What Emacs does actually is to detect a category of coding system. 2341 What Emacs does actually is to detect a category of coding system.
2289 Then, it uses a `coding-system' assigned to it. If Emacs can't 2342 Then, it uses a `coding-system' assigned to it. If Emacs can't
2547 bcopy (source, destination, produced); 2600 bcopy (source, destination, produced);
2548 if (coding->selective) 2601 if (coding->selective)
2549 { 2602 {
2550 unsigned char *p = destination, *pend = destination + produced; 2603 unsigned char *p = destination, *pend = destination + produced;
2551 while (p < pend) 2604 while (p < pend)
2552 if (*p++ = '\015') p[-1] = '\n'; 2605 if (*p++ == '\015') p[-1] = '\n';
2553 } 2606 }
2554 } 2607 }
2555 *consumed = produced; 2608 *consumed = produced;
2556 break; 2609 break;
2557 2610
2685 return ((NILP (obj) || !NILP (Fcoding_system_vector (obj))) ? Qt : Qnil); 2738 return ((NILP (obj) || !NILP (Fcoding_system_vector (obj))) ? Qt : Qnil);
2686 } 2739 }
2687 2740
2688 DEFUN ("read-non-nil-coding-system", 2741 DEFUN ("read-non-nil-coding-system",
2689 Fread_non_nil_coding_system, Sread_non_nil_coding_system, 1, 1, 0, 2742 Fread_non_nil_coding_system, Sread_non_nil_coding_system, 1, 1, 0,
2690 "Read a coding-system from the minibuffer, prompting with string PROMPT.") 2743 "Read a coding system from the minibuffer, prompting with string PROMPT.")
2691 (prompt) 2744 (prompt)
2692 Lisp_Object prompt; 2745 Lisp_Object prompt;
2693 { 2746 {
2694 return Fintern (Fcompleting_read (prompt, Vobarray, Qcoding_system_vector, 2747 Lisp_Object val;
2695 Qt, Qnil, Qnil), 2748 do {
2696 Qnil); 2749 val = Fcompleting_read (prompt, Vobarray, Qcoding_system_vector,
2750 Qt, Qnil, Qnil);
2751 } while (XSTRING (val)->size == 0);
2752 return (Fintern (val, Qnil));
2697 } 2753 }
2698 2754
2699 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 1, 0, 2755 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 1, 0,
2700 "Read a coding-system or nil from the minibuffer, prompting with string PROMPT.") 2756 "Read a coding system or nil from the minibuffer, prompting with string PROMPT.")
2701 (prompt) 2757 (prompt)
2702 Lisp_Object prompt; 2758 Lisp_Object prompt;
2703 { 2759 {
2704 return Fintern (Fcompleting_read (prompt, Vobarray, Qcoding_system_p, 2760 Lisp_Object val = Fcompleting_read (prompt, Vobarray, Qcoding_system_p,
2705 Qt, Qnil, Qnil), 2761 Qt, Qnil, Qnil);
2706 Qnil); 2762 return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
2707 } 2763 }
2708 2764
2709 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system, 2765 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
2710 1, 1, 0, 2766 1, 1, 0,
2711 "Check validity of CODING-SYSTEM.\n\ 2767 "Check validity of CODING-SYSTEM.\n\
2724 2780
2725 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region, 2781 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
2726 2, 2, 0, 2782 2, 2, 0,
2727 "Detect coding-system of the text in the region between START and END.\n\ 2783 "Detect coding-system of the text in the region between START and END.\n\
2728 Return a list of possible coding-systems ordered by priority.\n\ 2784 Return a list of possible coding-systems ordered by priority.\n\
2729 If only ASCII characters are found, it returns `coding-system-automatic'\n\ 2785 If only ASCII characters are found, it returns `automatic-conversion'\n\
2730 or its subsidiary coding-system according to a detected end-of-line format.") 2786 or its subsidiary coding-system according to a detected end-of-line format.")
2731 (b, e) 2787 (b, e)
2732 Lisp_Object b, e; 2788 Lisp_Object b, e;
2733 { 2789 {
2734 int coding_mask, eol_type; 2790 int coding_mask, eol_type;
2742 coding_mask = detect_coding_mask (POS_ADDR (beg), end - beg); 2798 coding_mask = detect_coding_mask (POS_ADDR (beg), end - beg);
2743 eol_type = detect_eol_type (POS_ADDR (beg), end - beg); 2799 eol_type = detect_eol_type (POS_ADDR (beg), end - beg);
2744 2800
2745 if (coding_mask == CODING_CATEGORY_MASK_ANY) 2801 if (coding_mask == CODING_CATEGORY_MASK_ANY)
2746 { 2802 {
2747 val = intern ("coding-system-automatic"); 2803 val = intern ("automatic-conversion");
2748 if (eol_type != CODING_EOL_AUTOMATIC) 2804 if (eol_type != CODING_EOL_AUTOMATIC)
2749 { 2805 {
2750 Lisp_Object val2 = Fget (val, Qeol_type); 2806 Lisp_Object val2 = Fget (val, Qeol_type);
2751 if (VECTORP (val2)) 2807 if (VECTORP (val2))
2752 val = XVECTOR (val2)->contents[eol_type]; 2808 val = XVECTOR (val2)->contents[eol_type];
2821 *begp = *endp; 2877 *begp = *endp;
2822 return; 2878 return;
2823 case coding_type_ccl: 2879 case coding_type_ccl:
2824 /* We can't skip any data. */ 2880 /* We can't skip any data. */
2825 return; 2881 return;
2882 case coding_type_iso2022:
2883 if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
2884 {
2885 unsigned char *bol = beg_addr;
2886 while (beg_addr < end_addr && *beg_addr < 0x80)
2887 {
2888 beg_addr++;
2889 if (*(beg_addr - 1) == '\n')
2890 bol = beg_addr;
2891 }
2892 beg_addr = bol;
2893 goto label_skip_tail;
2894 }
2895 /* fall down ... */
2826 default: 2896 default:
2827 /* We can skip all ASCII characters at the head and tail. */ 2897 /* We can skip all ASCII characters at the head and tail. */
2828 while (beg_addr < end_addr && *beg_addr < 0x80) beg_addr++; 2898 while (beg_addr < end_addr && *beg_addr < 0x80) beg_addr++;
2899 label_skip_tail:
2829 while (beg_addr < end_addr && *(end_addr - 1) < 0x80) end_addr--; 2900 while (beg_addr < end_addr && *(end_addr - 1) < 0x80) end_addr--;
2830 break; 2901 break;
2831 } 2902 }
2832 } 2903 }
2833 else /* for decoding */ 2904 else /* for decoding */
2972 3043
2973 return make_number (len); 3044 return make_number (len);
2974 } 3045 }
2975 3046
2976 Lisp_Object 3047 Lisp_Object
2977 code_convert_string (str, coding, encodep) 3048 code_convert_string (str, coding, encodep, nocopy)
2978 Lisp_Object str; 3049 Lisp_Object str, nocopy;
2979 struct coding_system *coding; 3050 struct coding_system *coding;
2980 int encodep; 3051 int encodep;
2981 { 3052 {
2982 int len, consumed, produced; 3053 int len, consumed, produced;
2983 char *buf; 3054 char *buf;
3012 endp = begp + XSTRING (str)->size; 3083 endp = begp + XSTRING (str)->size;
3013 shrink_conversion_area (&begp, &endp, coding, encodep); 3084 shrink_conversion_area (&begp, &endp, coding, encodep);
3014 3085
3015 if (begp == endp) 3086 if (begp == endp)
3016 /* We need no conversion. */ 3087 /* We need no conversion. */
3017 return str; 3088 return (NILP (nocopy) ? Fcopy_sequence (str) : str);
3018 3089
3019 head_skip = begp - XSTRING (str)->data; 3090 head_skip = begp - XSTRING (str)->data;
3020 tail_skip = XSTRING (str)->size - head_skip - (endp - begp); 3091 tail_skip = XSTRING (str)->size - head_skip - (endp - begp);
3021 3092
3022 GCPRO1 (str); 3093 GCPRO1 (str);
3042 3113
3043 return make_string (buf, head_skip + produced + tail_skip); 3114 return make_string (buf, head_skip + produced + tail_skip);
3044 } 3115 }
3045 3116
3046 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region, 3117 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
3047 3, 3, 0, 3118 3, 3, "r\nzCoding system: ",
3048 "Decode the text between START and END which is encoded in CODING-SYSTEM.\n\ 3119 "Decode current region by specified coding system.\n\
3120 When called from a program, takes three arguments:\n\
3121 START, END, and CODING-SYSTEM. START END are buffer positions.\n\
3049 Return length of decoded text.") 3122 Return length of decoded text.")
3050 (b, e, coding_system) 3123 (b, e, coding_system)
3051 Lisp_Object b, e, coding_system; 3124 Lisp_Object b, e, coding_system;
3052 { 3125 {
3053 struct coding_system coding; 3126 struct coding_system coding;
3054 3127
3055 CHECK_NUMBER_COERCE_MARKER (b, 0); 3128 CHECK_NUMBER_COERCE_MARKER (b, 0);
3056 CHECK_NUMBER_COERCE_MARKER (e, 1); 3129 CHECK_NUMBER_COERCE_MARKER (e, 1);
3057 CHECK_SYMBOL (coding_system, 2); 3130 CHECK_SYMBOL (coding_system, 2);
3058 3131
3132 if (NILP (coding_system))
3133 return make_number (XFASTINT (e) - XFASTINT (b));
3059 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) 3134 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
3060 error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data); 3135 error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
3061 3136
3062 return code_convert_region (b, e, &coding, 0); 3137 return code_convert_region (b, e, &coding, 0);
3063 } 3138 }
3064 3139
3065 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region, 3140 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
3066 3, 3, 0, 3141 3, 3, "r\nzCoding system: ",
3067 "Encode the text between START and END to CODING-SYSTEM.\n\ 3142 "Encode current region by specified coding system.\n\
3143 When called from a program, takes three arguments:\n\
3144 START, END, and CODING-SYSTEM. START END are buffer positions.\n\
3068 Return length of encoded text.") 3145 Return length of encoded text.")
3069 (b, e, coding_system) 3146 (b, e, coding_system)
3070 Lisp_Object b, e, coding_system; 3147 Lisp_Object b, e, coding_system;
3071 { 3148 {
3072 struct coding_system coding; 3149 struct coding_system coding;
3073 3150
3074 CHECK_NUMBER_COERCE_MARKER (b, 0); 3151 CHECK_NUMBER_COERCE_MARKER (b, 0);
3075 CHECK_NUMBER_COERCE_MARKER (e, 1); 3152 CHECK_NUMBER_COERCE_MARKER (e, 1);
3076 CHECK_SYMBOL (coding_system, 2); 3153 CHECK_SYMBOL (coding_system, 2);
3077 3154
3155 if (NILP (coding_system))
3156 return make_number (XFASTINT (e) - XFASTINT (b));
3078 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) 3157 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
3079 error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data); 3158 error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
3080 3159
3081 return code_convert_region (b, e, &coding, 1); 3160 return code_convert_region (b, e, &coding, 1);
3082 } 3161 }
3083 3162
3084 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string, 3163 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
3085 2, 2, 0, 3164 2, 3, 0,
3086 "Decode STRING which is encoded in CODING-SYSTEM, and return the result.") 3165 "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
3087 (string, coding_system) 3166 Optional arg NOCOPY non-nil means return STRING itself if there's no need\n\
3088 Lisp_Object string, coding_system; 3167 of decoding.")
3168 (string, coding_system, nocopy)
3169 Lisp_Object string, coding_system, nocopy;
3089 { 3170 {
3090 struct coding_system coding; 3171 struct coding_system coding;
3091 3172
3092 CHECK_STRING (string, 0); 3173 CHECK_STRING (string, 0);
3093 CHECK_SYMBOL (coding_system, 1); 3174 CHECK_SYMBOL (coding_system, 1);
3094 3175
3176 if (NILP (coding_system))
3177 return (NILP (nocopy) ? Fcopy_sequence (string) : string);
3095 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) 3178 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
3096 error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data); 3179 error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
3097 3180
3098 return code_convert_string (string, &coding, 0); 3181 return code_convert_string (string, &coding, 0, nocopy);
3099 } 3182 }
3100 3183
3101 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string, 3184 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
3102 2, 2, 0, 3185 2, 3, 0,
3103 "Encode STRING to CODING-SYSTEM, and return the result.") 3186 "Encode STRING to CODING-SYSTEM, and return the result.\n\
3104 (string, coding_system) 3187 Optional arg NOCOPY non-nil means return STRING itself if there's no need\n\
3105 Lisp_Object string, coding_system; 3188 of encoding.")
3189 (string, coding_system, nocopy)
3190 Lisp_Object string, coding_system, nocopy;
3106 { 3191 {
3107 struct coding_system coding; 3192 struct coding_system coding;
3108 3193
3109 CHECK_STRING (string, 0); 3194 CHECK_STRING (string, 0);
3110 CHECK_SYMBOL (coding_system, 1); 3195 CHECK_SYMBOL (coding_system, 1);
3111 3196
3197 if (NILP (coding_system))
3198 return (NILP (nocopy) ? Fcopy_sequence (string) : string);
3112 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) 3199 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
3113 error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data); 3200 error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data);
3114 3201
3115 return code_convert_string (string, &coding, 1); 3202 return code_convert_string (string, &coding, 1, nocopy);
3116 } 3203 }
3117 3204
3118 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0, 3205 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
3119 "Decode a JISX0208 character of SJIS coding-system-sjis.\n\ 3206 "Decode a JISX0208 character of shift-jis encoding.\n\
3120 CODE is the character code in SJIS.\n\ 3207 CODE is the character code in SJIS.\n\
3121 Return the corresponding character.") 3208 Return the corresponding character.")
3122 (code) 3209 (code)
3123 Lisp_Object code; 3210 Lisp_Object code;
3124 { 3211 {
3253 For process I/O, TARGET is a process name.\n\ 3340 For process I/O, TARGET is a process name.\n\
3254 For network I/O, TARGET is a service name or a port number\n\ 3341 For network I/O, TARGET is a service name or a port number\n\
3255 \n\ 3342 \n\
3256 The return value is a cons of coding systems for decoding and encoding\n\ 3343 The return value is a cons of coding systems for decoding and encoding\n\
3257 registered in nested alist `coding-system-alist' (which see) at a slot\n\ 3344 registered in nested alist `coding-system-alist' (which see) at a slot\n\
3258 corresponding to OPERATION and TARGET. 3345 corresponding to OPERATION and TARGET.\n\
3259 If a function symbol is at the slot, return a result of the function call.\n\ 3346 If a function symbol is at the slot, return a result of the function call.\n\
3260 The function is called with one argument, a list of all the arguments.") 3347 The function is called with one argument, a list of all the arguments.")
3261 (nargs, args) 3348 (nargs, args)
3262 int nargs; 3349 int nargs;
3263 Lisp_Object *args; 3350 Lisp_Object *args;
3344 iso_code_class[ISO_CODE_ESC] = ISO_escape; 3431 iso_code_class[ISO_CODE_ESC] = ISO_escape;
3345 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2; 3432 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
3346 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3; 3433 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
3347 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer; 3434 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
3348 3435
3436 conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
3437 conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
3438
3439 setup_coding_system (Qnil, &keyboard_coding);
3440 setup_coding_system (Qnil, &terminal_coding);
3441 }
3442
3443 #ifdef emacs
3444
3445 syms_of_coding ()
3446 {
3447 Qtarget_idx = intern ("target-idx");
3448 staticpro (&Qtarget_idx);
3449
3450 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
3451 Fput (Qwrite_region, Qtarget_idx, make_number (2));
3452
3453 Qcall_process = intern ("call-process");
3454 staticpro (&Qcall_process);
3455 Fput (Qcall_process, Qtarget_idx, make_number (0));
3456
3457 Qcall_process_region = intern ("call-process-region");
3458 staticpro (&Qcall_process_region);
3459 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
3460
3461 Qstart_process = intern ("start-process");
3462 staticpro (&Qstart_process);
3463 Fput (Qstart_process, Qtarget_idx, make_number (2));
3464
3465 Qopen_network_stream = intern ("open-network-stream");
3466 staticpro (&Qopen_network_stream);
3467 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
3468
3349 Qcoding_system = intern ("coding-system"); 3469 Qcoding_system = intern ("coding-system");
3350 staticpro (&Qcoding_system); 3470 staticpro (&Qcoding_system);
3351 3471
3352 Qeol_type = intern ("eol-type"); 3472 Qeol_type = intern ("eol-type");
3353 staticpro (&Qeol_type); 3473 staticpro (&Qeol_type);
3386 staticpro (&coding_category_table[i]); 3506 staticpro (&coding_category_table[i]);
3387 Fput (coding_category_table[i], Qcoding_category_index, 3507 Fput (coding_category_table[i], Qcoding_category_index,
3388 make_number (i)); 3508 make_number (i));
3389 } 3509 }
3390 } 3510 }
3391
3392 conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
3393 conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
3394
3395 setup_coding_system (Qnil, &keyboard_coding);
3396 setup_coding_system (Qnil, &terminal_coding);
3397 }
3398
3399 #ifdef emacs
3400
3401 syms_of_coding ()
3402 {
3403 Qtarget_idx = intern ("target-idx");
3404 staticpro (&Qtarget_idx);
3405
3406 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
3407 Fput (Qwrite_region, Qtarget_idx, make_number (2));
3408
3409 Qcall_process = intern ("call-process");
3410 staticpro (&Qcall_process);
3411 Fput (Qcall_process, Qtarget_idx, make_number (0));
3412
3413 Qcall_process_region = intern ("call-process-region");
3414 staticpro (&Qcall_process_region);
3415 Fput (Qcall_process_region, Qtarget_idx, make_number (2));
3416
3417 Qstart_process = intern ("start-process");
3418 staticpro (&Qstart_process);
3419 Fput (Qstart_process, Qtarget_idx, make_number (2));
3420
3421 Qopen_network_stream = intern ("open-network-stream");
3422 staticpro (&Qopen_network_stream);
3423 Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
3424 3511
3425 defsubr (&Scoding_system_vector); 3512 defsubr (&Scoding_system_vector);
3426 defsubr (&Scoding_system_p); 3513 defsubr (&Scoding_system_p);
3427 defsubr (&Sread_coding_system); 3514 defsubr (&Sread_coding_system);
3428 defsubr (&Sread_non_nil_coding_system); 3515 defsubr (&Sread_non_nil_coding_system);
3470 Vlast_coding_system_used = Qnil; 3557 Vlast_coding_system_used = Qnil;
3471 3558
3472 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist, 3559 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
3473 "Nested alist to decide a coding system for a specific I/O operation.\n\ 3560 "Nested alist to decide a coding system for a specific I/O operation.\n\
3474 The format is ((OPERATION . ((REGEXP . CODING-SYSTEMS) ...)) ...).\n\ 3561 The format is ((OPERATION . ((REGEXP . CODING-SYSTEMS) ...)) ...).\n\
3475 3562 \n\
3476 OPERATION is one of the following Emacs I/O primitives:\n\ 3563 OPERATION is one of the following Emacs I/O primitives:\n\
3477 For file I/O, insert-file-contents and write-region.\n\ 3564 For file I/O, insert-file-contents and write-region.\n\
3478 For process I/O, call-process, call-process-region, and start-process.\n\ 3565 For process I/O, call-process, call-process-region, and start-process.\n\
3479 For network I/O, open-network-stream.\n\ 3566 For network I/O, open-network-stream.\n\
3480 In addition, for process I/O, `process-argument' can be specified for\n\ 3567 In addition, for process I/O, `process-argument' can be specified for\n\