Mercurial > emacs
comparison src/coding.c @ 17119:2cfb31c15ced
(create_process, Fopen_network_stream): Typo in indexes
of array proc_encode_coding_system fixed.
Remove prefix "coding-system-" from coding system symbol names.
(encode_coding) : Fix typo ("=" -> "==").
(detect_coding_iso2022): Detect coding-category-iso-8-2
more precisely.
(ENCODE_RESET_PLANE_AND_REGISTER): Argument `eol' is
deleted. Don't call ENCODE_DESIGNATION if nothing designated
initially.
(encode_designation_at_bol) New function.
(encode_coding_iso2022): Handle CODING_FLAG_ISO_INIT_AT_BOL and
CODING_FLAG_ISO_DESIGNATE_AT_BOL.
(setup_coding_system): Now, flags of ISO2022 coding
systems contains charsets instead of charset IDs.
(detect_coding_iso2022, decode_coding_iso2022): Make the code
robust against invalid SI and SO.
(Ffind_coding_system, syms_of_coding): Escape newlines in docstring.
(setup_coding_system): Correct setting coding->symbol
and coding->eol_type. The performance improved.
| author | Kenichi Handa <handa@m17n.org> |
|---|---|
| date | Thu, 27 Feb 1997 11:10:42 +0000 |
| parents | 70194012fb3a |
| children | bd8d38879c97 |
comparison
equal
deleted
inserted
replaced
| 17118:dcfb481ee914 | 17119:2cfb31c15ced |
|---|---|
| 579 | 579 |
| 580 int | 580 int |
| 581 detect_coding_iso2022 (src, src_end) | 581 detect_coding_iso2022 (src, src_end) |
| 582 unsigned char *src, *src_end; | 582 unsigned char *src, *src_end; |
| 583 { | 583 { |
| 584 unsigned char graphic_register[4]; | 584 unsigned char c, g1 = 0; |
| 585 unsigned char c, esc_cntl; | |
| 586 int mask = (CODING_CATEGORY_MASK_ISO_7 | 585 int mask = (CODING_CATEGORY_MASK_ISO_7 |
| 587 | CODING_CATEGORY_MASK_ISO_8_1 | 586 | CODING_CATEGORY_MASK_ISO_8_1 |
| 588 | CODING_CATEGORY_MASK_ISO_8_2); | 587 | CODING_CATEGORY_MASK_ISO_8_2); |
| 589 /* We may look ahead maximum 3 bytes. */ | 588 /* We may look ahead at most 4 bytes. */ |
| 590 unsigned char *adjusted_src_end = src_end - 3; | 589 unsigned char *adjusted_src_end = src_end - 4; |
| 591 int i; | 590 int i; |
| 592 | 591 |
| 593 for (i = 0; i < 4; i++) | 592 while (src < src_end) |
| 594 graphic_register[i] = CHARSET_ASCII; | |
| 595 | |
| 596 while (src < adjusted_src_end) | |
| 597 { | 593 { |
| 598 c = *src++; | 594 c = *src++; |
| 599 switch (c) | 595 switch (c) |
| 600 { | 596 { |
| 601 case ISO_CODE_ESC: | 597 case ISO_CODE_ESC: |
| 602 if (src >= adjusted_src_end) | 598 if (src >= src_end) |
| 603 break; | 599 break; |
| 604 c = *src++; | 600 c = *src++; |
| 605 if (c == '$') | 601 if (src + 2 >= src_end |
| 602 && ((c >= '(' && c <= '/') | |
| 603 || c == '$' && ((*src >= '(' && *src <= '/') | |
| 604 || (*src >= '@' && *src <= 'B')))) | |
| 606 { | 605 { |
| 607 /* Designation of 2-byte character set. */ | 606 /* Valid designation sequence. */ |
| 608 if (src >= adjusted_src_end) | 607 if (c == ')' || (c == '$' && *src == ')')) |
| 609 break; | 608 g1 = 1; |
| 610 c = *src++; | 609 src++; |
| 610 break; | |
| 611 } | 611 } |
| 612 if ((c >= ')' && c <= '+') || (c >= '-' && c <= '/')) | |
| 613 /* Designation to graphic register 1, 2, or 3. */ | |
| 614 mask &= ~CODING_CATEGORY_MASK_ISO_7; | |
| 615 else if (c == 'N' || c == 'O' || c == 'n' || c == 'o') | 612 else if (c == 'N' || c == 'O' || c == 'n' || c == 'o') |
| 616 return CODING_CATEGORY_MASK_ISO_ELSE; | 613 return CODING_CATEGORY_MASK_ISO_ELSE; |
| 617 break; | 614 break; |
| 618 | 615 |
| 619 case ISO_CODE_SI: | |
| 620 case ISO_CODE_SO: | 616 case ISO_CODE_SO: |
| 621 return CODING_CATEGORY_MASK_ISO_ELSE; | 617 if (g1) |
| 622 | 618 return CODING_CATEGORY_MASK_ISO_ELSE; |
| 619 break; | |
| 620 | |
| 623 case ISO_CODE_CSI: | 621 case ISO_CODE_CSI: |
| 624 case ISO_CODE_SS2: | 622 case ISO_CODE_SS2: |
| 625 case ISO_CODE_SS3: | 623 case ISO_CODE_SS3: |
| 626 mask &= ~CODING_CATEGORY_MASK_ISO_7; | 624 mask &= ~CODING_CATEGORY_MASK_ISO_7; |
| 627 break; | 625 break; |
| 634 else | 632 else |
| 635 { | 633 { |
| 636 int count = 1; | 634 int count = 1; |
| 637 | 635 |
| 638 mask &= ~CODING_CATEGORY_MASK_ISO_7; | 636 mask &= ~CODING_CATEGORY_MASK_ISO_7; |
| 639 while (src < adjusted_src_end && *src >= 0xA0) | 637 while (src < src_end && *src >= 0xA0) |
| 640 count++, src++; | 638 count++, src++; |
| 641 if (count & 1 && src < adjusted_src_end) | 639 if (count & 1 && src < src_end) |
| 642 mask &= ~CODING_CATEGORY_MASK_ISO_8_2; | 640 mask &= ~CODING_CATEGORY_MASK_ISO_8_2; |
| 643 } | 641 } |
| 644 break; | 642 break; |
| 645 } | 643 } |
| 646 } | 644 } |
| 792 *dst++ = c1; | 790 *dst++ = c1; |
| 793 } | 791 } |
| 794 break; | 792 break; |
| 795 | 793 |
| 796 case ISO_shift_out: | 794 case ISO_shift_out: |
| 795 if (CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0) | |
| 796 goto label_invalid_escape_sequence; | |
| 797 CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; | 797 CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; |
| 798 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); | 798 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); |
| 799 break; | 799 break; |
| 800 | 800 |
| 801 case ISO_shift_in: | 801 case ISO_shift_in: |
| 828 switch (c1) | 828 switch (c1) |
| 829 { | 829 { |
| 830 case '&': /* revision of following character set */ | 830 case '&': /* revision of following character set */ |
| 831 ONE_MORE_BYTE (c1); | 831 ONE_MORE_BYTE (c1); |
| 832 if (!(c1 >= '@' && c1 <= '~')) | 832 if (!(c1 >= '@' && c1 <= '~')) |
| 833 { | 833 goto label_invalid_escape_sequence; |
| 834 goto label_invalid_escape_sequence; | |
| 835 } | |
| 836 ONE_MORE_BYTE (c1); | 834 ONE_MORE_BYTE (c1); |
| 837 if (c1 != ISO_CODE_ESC) | 835 if (c1 != ISO_CODE_ESC) |
| 838 { | 836 goto label_invalid_escape_sequence; |
| 839 goto label_invalid_escape_sequence; | |
| 840 } | |
| 841 ONE_MORE_BYTE (c1); | 837 ONE_MORE_BYTE (c1); |
| 842 goto label_escape_sequence; | 838 goto label_escape_sequence; |
| 843 | 839 |
| 844 case '$': /* designation of 2-byte character set */ | 840 case '$': /* designation of 2-byte character set */ |
| 845 ONE_MORE_BYTE (c1); | 841 ONE_MORE_BYTE (c1); |
| 857 { /* designation of DIMENSION2_CHARS96 character set */ | 853 { /* designation of DIMENSION2_CHARS96 character set */ |
| 858 ONE_MORE_BYTE (c2); | 854 ONE_MORE_BYTE (c2); |
| 859 DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2); | 855 DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2); |
| 860 } | 856 } |
| 861 else | 857 else |
| 862 { | 858 goto label_invalid_escape_sequence; |
| 863 goto label_invalid_escape_sequence; | |
| 864 } | |
| 865 break; | 859 break; |
| 866 | 860 |
| 867 case 'n': /* invocation of locking-shift-2 */ | 861 case 'n': /* invocation of locking-shift-2 */ |
| 862 if (CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0) | |
| 863 goto label_invalid_escape_sequence; | |
| 868 CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; | 864 CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; |
| 865 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); | |
| 869 break; | 866 break; |
| 870 | 867 |
| 871 case 'o': /* invocation of locking-shift-3 */ | 868 case 'o': /* invocation of locking-shift-3 */ |
| 869 if (CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0) | |
| 870 goto label_invalid_escape_sequence; | |
| 872 CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; | 871 CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; |
| 872 charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); | |
| 873 break; | 873 break; |
| 874 | 874 |
| 875 case 'N': /* invocation of single-shift-2 */ | 875 case 'N': /* invocation of single-shift-2 */ |
| 876 if (CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0) | |
| 877 goto label_invalid_escape_sequence; | |
| 876 ONE_MORE_BYTE (c1); | 878 ONE_MORE_BYTE (c1); |
| 877 charset = CODING_SPEC_ISO_DESIGNATION (coding, 2); | 879 charset = CODING_SPEC_ISO_DESIGNATION (coding, 2); |
| 878 DECODE_ISO_CHARACTER (charset, c1); | 880 DECODE_ISO_CHARACTER (charset, c1); |
| 879 break; | 881 break; |
| 880 | 882 |
| 881 case 'O': /* invocation of single-shift-3 */ | 883 case 'O': /* invocation of single-shift-3 */ |
| 884 if (CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0) | |
| 885 goto label_invalid_escape_sequence; | |
| 882 ONE_MORE_BYTE (c1); | 886 ONE_MORE_BYTE (c1); |
| 883 charset = CODING_SPEC_ISO_DESIGNATION (coding, 3); | 887 charset = CODING_SPEC_ISO_DESIGNATION (coding, 3); |
| 884 DECODE_ISO_CHARACTER (charset, c1); | 888 DECODE_ISO_CHARACTER (charset, c1); |
| 885 break; | 889 break; |
| 886 | 890 |
| 1244 #define ENCODE_DIRECTION_L2R \ | 1248 #define ENCODE_DIRECTION_L2R \ |
| 1245 ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']' | 1249 ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']' |
| 1246 | 1250 |
| 1247 /* Produce codes for designation and invocation to reset the graphic | 1251 /* Produce codes for designation and invocation to reset the graphic |
| 1248 planes and registers to initial state. */ | 1252 planes and registers to initial state. */ |
| 1249 #define ENCODE_RESET_PLANE_AND_REGISTER(eol) \ | 1253 #define ENCODE_RESET_PLANE_AND_REGISTER \ |
| 1250 do { \ | 1254 do { \ |
| 1251 int reg; \ | 1255 int reg; \ |
| 1252 if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0) \ | 1256 if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0) \ |
| 1253 ENCODE_SHIFT_IN; \ | 1257 ENCODE_SHIFT_IN; \ |
| 1254 for (reg = 0; reg < 4; reg++) \ | 1258 for (reg = 0; reg < 4; reg++) \ |
| 1255 { \ | 1259 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0 \ |
| 1256 if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) < 0) \ | 1260 && (CODING_SPEC_ISO_DESIGNATION (coding, reg) \ |
| 1257 { \ | 1261 != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg))) \ |
| 1258 if (eol) CODING_SPEC_ISO_DESIGNATION (coding, reg) = -1; \ | 1262 ENCODE_DESIGNATION \ |
| 1259 } \ | 1263 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \ |
| 1260 else if (CODING_SPEC_ISO_DESIGNATION (coding, reg) \ | |
| 1261 != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)) \ | |
| 1262 ENCODE_DESIGNATION \ | |
| 1263 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \ | |
| 1264 } \ | |
| 1265 } while (0) | 1264 } while (0) |
| 1265 | |
| 1266 int | |
| 1267 encode_designation_at_bol (coding, src, src_end, dstp) | |
| 1268 struct coding_system *coding; | |
| 1269 unsigned char *src, *src_end, **dstp; | |
| 1270 { | |
| 1271 int charset, reg, r[4]; | |
| 1272 unsigned char *dst = *dstp, c; | |
| 1273 for (reg = 0; reg < 4; reg++) r[reg] = -1; | |
| 1274 while (src < src_end && (c = *src++) != '\n') | |
| 1275 { | |
| 1276 switch (emacs_code_class[c]) | |
| 1277 { | |
| 1278 case EMACS_ascii_code: | |
| 1279 charset = CHARSET_ASCII; | |
| 1280 break; | |
| 1281 case EMACS_leading_code_2: | |
| 1282 if (++src >= src_end) continue; | |
| 1283 charset = c; | |
| 1284 break; | |
| 1285 case EMACS_leading_code_3: | |
| 1286 if ((src += 2) >= src_end) continue; | |
| 1287 charset = (c < LEADING_CODE_PRIVATE_11 ? c : *(src - 2)); | |
| 1288 break; | |
| 1289 case EMACS_leading_code_4: | |
| 1290 if ((src += 3) >= src_end) continue; | |
| 1291 charset = *(src - 3); | |
| 1292 break; | |
| 1293 default: | |
| 1294 continue; | |
| 1295 } | |
| 1296 reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset); | |
| 1297 if (r[reg] < 0 | |
| 1298 && CODING_SPEC_ISO_DESIGNATION (coding, reg) != charset) | |
| 1299 r[reg] = charset; | |
| 1300 } | |
| 1301 if (c != '\n' && !coding->last_block) | |
| 1302 return -1; | |
| 1303 for (reg = 0; reg < 4; reg++) | |
| 1304 if (r[reg] >= 0) | |
| 1305 ENCODE_DESIGNATION (r[reg], reg, coding); | |
| 1306 *dstp = dst; | |
| 1307 return 0; | |
| 1308 } | |
| 1266 | 1309 |
| 1267 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */ | 1310 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */ |
| 1268 | 1311 |
| 1269 int | 1312 int |
| 1270 encode_coding_iso2022 (coding, source, destination, | 1313 encode_coding_iso2022 (coding, source, destination, |
| 1276 { | 1319 { |
| 1277 unsigned char *src = source; | 1320 unsigned char *src = source; |
| 1278 unsigned char *src_end = source + src_bytes; | 1321 unsigned char *src_end = source + src_bytes; |
| 1279 unsigned char *dst = destination; | 1322 unsigned char *dst = destination; |
| 1280 unsigned char *dst_end = destination + dst_bytes; | 1323 unsigned char *dst_end = destination + dst_bytes; |
| 1281 /* Since the maximum bytes produced by each loop is 6, we subtract 5 | 1324 /* Since the maximum bytes produced by each loop is 20, we subtract 19 |
| 1282 from DST_END to assure overflow checking is necessary only at the | 1325 from DST_END to assure overflow checking is necessary only at the |
| 1283 head of loop. */ | 1326 head of loop. */ |
| 1284 unsigned char *adjusted_dst_end = dst_end - 5; | 1327 unsigned char *adjusted_dst_end = dst_end - 19; |
| 1285 | 1328 |
| 1286 while (src < src_end && dst < adjusted_dst_end) | 1329 while (src < src_end && dst < adjusted_dst_end) |
| 1287 { | 1330 { |
| 1288 /* SRC_BASE remembers the start position in source in each loop. | 1331 /* SRC_BASE remembers the start position in source in each loop. |
| 1289 The loop will be exited when there's not enough source text | 1332 The loop will be exited when there's not enough source text |
| 1290 to analyze multi-byte codes (within macros ONE_MORE_BYTE, | 1333 to analyze multi-byte codes (within macros ONE_MORE_BYTE, |
| 1291 TWO_MORE_BYTES, and THREE_MORE_BYTES). In that case, SRC is | 1334 TWO_MORE_BYTES, and THREE_MORE_BYTES). In that case, SRC is |
| 1292 reset to SRC_BASE before exiting. */ | 1335 reset to SRC_BASE before exiting. */ |
| 1293 unsigned char *src_base = src; | 1336 unsigned char *src_base = src; |
| 1294 unsigned char c1 = *src++, c2, c3, c4; | 1337 unsigned char c1, c2, c3, c4; |
| 1295 int charset; | 1338 int charset; |
| 1296 | 1339 |
| 1340 if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL | |
| 1341 && CODING_SPEC_ISO_BOL (coding)) | |
| 1342 { | |
| 1343 /* We have to produce destination sequences now. */ | |
| 1344 if (encode_designation_at_bol (coding, src, src_end, &dst) < 0) | |
| 1345 /* We can't find end of line in the current block. Let's | |
| 1346 repeat encoding starting from the current position | |
| 1347 pointed by SRC. */ | |
| 1348 break; | |
| 1349 CODING_SPEC_ISO_BOL (coding) = 0; | |
| 1350 } | |
| 1351 | |
| 1352 c1 = *src++; | |
| 1297 /* If we are seeing a component of a composite character, we are | 1353 /* If we are seeing a component of a composite character, we are |
| 1298 seeing a leading-code specially encoded for composition, or a | 1354 seeing a leading-code specially encoded for composition, or a |
| 1299 composition rule if composing with rule. We must set C1 | 1355 composition rule if composing with rule. We must set C1 |
| 1300 to a normal leading-code or an ASCII code. If we are not at | 1356 to a normal leading-code or an ASCII code. If we are not at |
| 1301 a composed character, we must reset the composition state. */ | 1357 a composed character, we must reset the composition state. */ |
| 1337 ENCODE_ISO_CHARACTER_DIMENSION1 (CHARSET_ASCII, c1); | 1393 ENCODE_ISO_CHARACTER_DIMENSION1 (CHARSET_ASCII, c1); |
| 1338 break; | 1394 break; |
| 1339 | 1395 |
| 1340 case EMACS_control_code: | 1396 case EMACS_control_code: |
| 1341 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL) | 1397 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL) |
| 1342 ENCODE_RESET_PLANE_AND_REGISTER (0); | 1398 ENCODE_RESET_PLANE_AND_REGISTER; |
| 1343 *dst++ = c1; | 1399 *dst++ = c1; |
| 1344 break; | 1400 break; |
| 1345 | 1401 |
| 1346 case EMACS_carriage_return_code: | 1402 case EMACS_carriage_return_code: |
| 1347 if (!coding->selective) | 1403 if (!coding->selective) |
| 1348 { | 1404 { |
| 1349 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL) | 1405 if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL) |
| 1350 ENCODE_RESET_PLANE_AND_REGISTER (0); | 1406 ENCODE_RESET_PLANE_AND_REGISTER; |
| 1351 *dst++ = c1; | 1407 *dst++ = c1; |
| 1352 break; | 1408 break; |
| 1353 } | 1409 } |
| 1354 /* fall down to treat '\r' as '\n' ... */ | 1410 /* fall down to treat '\r' as '\n' ... */ |
| 1355 | 1411 |
| 1356 case EMACS_linefeed_code: | 1412 case EMACS_linefeed_code: |
| 1357 if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL) | 1413 if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL) |
| 1358 ENCODE_RESET_PLANE_AND_REGISTER (1); | 1414 ENCODE_RESET_PLANE_AND_REGISTER; |
| 1415 if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL) | |
| 1416 bcopy (coding->spec.iso2022.initial_designation, | |
| 1417 coding->spec.iso2022.current_designation, | |
| 1418 sizeof coding->spec.iso2022.initial_designation); | |
| 1359 if (coding->eol_type == CODING_EOL_LF | 1419 if (coding->eol_type == CODING_EOL_LF |
| 1360 || coding->eol_type == CODING_EOL_AUTOMATIC) | 1420 || coding->eol_type == CODING_EOL_AUTOMATIC) |
| 1361 *dst++ = ISO_CODE_LF; | 1421 *dst++ = ISO_CODE_LF; |
| 1362 else if (coding->eol_type == CODING_EOL_CRLF) | 1422 else if (coding->eol_type == CODING_EOL_CRLF) |
| 1363 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF; | 1423 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF; |
| 1364 else | 1424 else |
| 1365 *dst++ = ISO_CODE_CR; | 1425 *dst++ = ISO_CODE_CR; |
| 1426 CODING_SPEC_ISO_BOL (coding) = 1; | |
| 1366 break; | 1427 break; |
| 1367 | 1428 |
| 1368 case EMACS_leading_code_2: | 1429 case EMACS_leading_code_2: |
| 1369 ONE_MORE_BYTE (c2); | 1430 ONE_MORE_BYTE (c2); |
| 1370 ENCODE_ISO_CHARACTER_DIMENSION1 (c1, c2); | 1431 ENCODE_ISO_CHARACTER_DIMENSION1 (c1, c2); |
| 1416 reset the state of graphic planes and registers to initial one. | 1477 reset the state of graphic planes and registers to initial one. |
| 1417 In addition, we had better just flush out all remaining codes in | 1478 In addition, we had better just flush out all remaining codes in |
| 1418 the text although they are not valid characters. */ | 1479 the text although they are not valid characters. */ |
| 1419 if (coding->last_block) | 1480 if (coding->last_block) |
| 1420 { | 1481 { |
| 1421 ENCODE_RESET_PLANE_AND_REGISTER (1); | 1482 ENCODE_RESET_PLANE_AND_REGISTER; |
| 1422 bcopy(src, dst, src_end - src); | 1483 bcopy(src, dst, src_end - src); |
| 1423 dst += (src_end - src); | 1484 dst += (src_end - src); |
| 1424 src = src_end; | 1485 src = src_end; |
| 1425 } | 1486 } |
| 1426 *consumed = src - source; | 1487 *consumed = src - source; |
| 1983 and set it in CODING. If CODING_SYSTEM_SYMBOL is invalid, CODING | 2044 and set it in CODING. If CODING_SYSTEM_SYMBOL is invalid, CODING |
| 1984 is setup so that no conversion is necessary and return -1, else | 2045 is setup so that no conversion is necessary and return -1, else |
| 1985 return 0. */ | 2046 return 0. */ |
| 1986 | 2047 |
| 1987 int | 2048 int |
| 1988 setup_coding_system (coding_system_symbol, coding) | 2049 setup_coding_system (coding_system, coding) |
| 1989 Lisp_Object coding_system_symbol; | 2050 Lisp_Object coding_system; |
| 1990 struct coding_system *coding; | 2051 struct coding_system *coding; |
| 1991 { | 2052 { |
| 1992 Lisp_Object coding_system_vector = Qnil; | |
| 1993 Lisp_Object type, eol_type; | 2053 Lisp_Object type, eol_type; |
| 1994 | 2054 |
| 1995 /* At first, set several fields default values. */ | 2055 /* At first, set several fields default values. */ |
| 1996 coding->require_flushing = 0; | 2056 coding->require_flushing = 0; |
| 1997 coding->last_block = 0; | 2057 coding->last_block = 0; |
| 1998 coding->selective = 0; | 2058 coding->selective = 0; |
| 1999 coding->composing = 0; | 2059 coding->composing = 0; |
| 2000 coding->direction = 0; | 2060 coding->direction = 0; |
| 2001 coding->carryover_size = 0; | 2061 coding->carryover_size = 0; |
| 2002 coding->symbol = Qnil; | |
| 2003 coding->post_read_conversion = coding->pre_write_conversion = Qnil; | 2062 coding->post_read_conversion = coding->pre_write_conversion = Qnil; |
| 2004 | 2063 |
| 2005 /* Get value of property `coding-system'. If it is a Lisp symbol | 2064 Vlast_coding_system_used = coding->symbol = coding_system; |
| 2006 pointing another coding system, fetch its property until we get a | 2065 eol_type = Qnil; |
| 2007 vector. */ | 2066 /* Get value of property `coding-system' until we get a vector. |
| 2008 while (!NILP (coding_system_symbol)) | 2067 While doing that, also get values of properties |
| 2068 `post-read-conversion', `pre-write-conversion', and `eol-type'. */ | |
| 2069 while (!NILP (coding_system) && SYMBOLP (coding_system)) | |
| 2009 { | 2070 { |
| 2010 coding->symbol = coding_system_symbol; | |
| 2011 if (NILP (coding->post_read_conversion)) | 2071 if (NILP (coding->post_read_conversion)) |
| 2012 coding->post_read_conversion = Fget (coding_system_symbol, | 2072 coding->post_read_conversion = Fget (coding_system, |
| 2013 Qpost_read_conversion); | 2073 Qpost_read_conversion); |
| 2014 if (NILP (coding->pre_write_conversion)) | 2074 if (NILP (coding->pre_write_conversion)) |
| 2015 coding->pre_write_conversion = Fget (coding_system_symbol, | 2075 coding->pre_write_conversion = Fget (coding_system, |
| 2016 Qpre_write_conversion); | 2076 Qpre_write_conversion); |
| 2017 | 2077 if (NILP (eol_type)) |
| 2018 coding_system_vector = Fget (coding_system_symbol, Qcoding_system); | 2078 eol_type = Fget (coding_system, Qeol_type); |
| 2019 if (VECTORP (coding_system_vector)) | 2079 coding_system = Fget (coding_system, Qcoding_system); |
| 2020 break; | |
| 2021 coding_system_symbol = coding_system_vector; | |
| 2022 } | 2080 } |
| 2023 Vlast_coding_system_used = coding->symbol; | 2081 if (!VECTORP (coding_system) |
| 2024 | 2082 || XVECTOR (coding_system)->size != 5) |
| 2025 if (!VECTORP (coding_system_vector) | |
| 2026 || XVECTOR (coding_system_vector)->size != 5) | |
| 2027 goto label_invalid_coding_system; | 2083 goto label_invalid_coding_system; |
| 2028 | |
| 2029 /* Get value of property `eol-type' by searching from the root | |
| 2030 coding-system. */ | |
| 2031 coding_system_symbol = coding->symbol; | |
| 2032 eol_type = Qnil; | |
| 2033 while (SYMBOLP (coding_system_symbol) && !NILP (coding_system_symbol)) | |
| 2034 { | |
| 2035 eol_type = Fget (coding_system_symbol, Qeol_type); | |
| 2036 if (!NILP (eol_type)) | |
| 2037 break; | |
| 2038 coding_system_symbol = Fget (coding_system_symbol, Qcoding_system); | |
| 2039 } | |
| 2040 | 2084 |
| 2041 if (VECTORP (eol_type)) | 2085 if (VECTORP (eol_type)) |
| 2042 coding->eol_type = CODING_EOL_AUTOMATIC; | 2086 coding->eol_type = CODING_EOL_AUTOMATIC; |
| 2043 else if (XFASTINT (eol_type) == 1) | 2087 else if (XFASTINT (eol_type) == 1) |
| 2044 coding->eol_type = CODING_EOL_CRLF; | 2088 coding->eol_type = CODING_EOL_CRLF; |
| 2045 else if (XFASTINT (eol_type) == 2) | 2089 else if (XFASTINT (eol_type) == 2) |
| 2046 coding->eol_type = CODING_EOL_CR; | 2090 coding->eol_type = CODING_EOL_CR; |
| 2047 else | 2091 else |
| 2048 coding->eol_type = CODING_EOL_LF; | 2092 coding->eol_type = CODING_EOL_LF; |
| 2049 | 2093 |
| 2050 type = XVECTOR (coding_system_vector)->contents[0]; | 2094 type = XVECTOR (coding_system)->contents[0]; |
| 2051 switch (XFASTINT (type)) | 2095 switch (XFASTINT (type)) |
| 2052 { | 2096 { |
| 2053 case 0: | 2097 case 0: |
| 2054 coding->type = coding_type_internal; | 2098 coding->type = coding_type_internal; |
| 2055 break; | 2099 break; |
| 2059 break; | 2103 break; |
| 2060 | 2104 |
| 2061 case 2: | 2105 case 2: |
| 2062 coding->type = coding_type_iso2022; | 2106 coding->type = coding_type_iso2022; |
| 2063 { | 2107 { |
| 2064 Lisp_Object val = XVECTOR (coding_system_vector)->contents[4]; | 2108 Lisp_Object val = XVECTOR (coding_system)->contents[4]; |
| 2065 Lisp_Object *flags; | 2109 Lisp_Object *flags; |
| 2066 int i, charset, default_reg_bits = 0; | 2110 int i, charset, default_reg_bits = 0; |
| 2067 | 2111 |
| 2068 if (!VECTORP (val) || XVECTOR (val)->size != 32) | 2112 if (!VECTORP (val) || XVECTOR (val)->size != 32) |
| 2069 goto label_invalid_coding_system; | 2113 goto label_invalid_coding_system; |
| 2076 | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS) | 2120 | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS) |
| 2077 | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT) | 2121 | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT) |
| 2078 | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT) | 2122 | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT) |
| 2079 | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN) | 2123 | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN) |
| 2080 | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS) | 2124 | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS) |
| 2081 | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)); | 2125 | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION) |
| 2126 | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL) | |
| 2127 | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)); | |
| 2082 | 2128 |
| 2083 /* Invoke graphic register 0 to plane 0. */ | 2129 /* Invoke graphic register 0 to plane 0. */ |
| 2084 CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; | 2130 CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; |
| 2085 /* Invoke graphic register 1 to plane 1 if we can use full 8-bit. */ | 2131 /* Invoke graphic register 1 to plane 1 if we can use full 8-bit. */ |
| 2086 CODING_SPEC_ISO_INVOCATION (coding, 1) | 2132 CODING_SPEC_ISO_INVOCATION (coding, 1) |
| 2087 = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1); | 2133 = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1); |
| 2088 /* Not single shifting at first. */ | 2134 /* Not single shifting at first. */ |
| 2089 CODING_SPEC_ISO_SINGLE_SHIFTING(coding) = 0; | 2135 CODING_SPEC_ISO_SINGLE_SHIFTING(coding) = 0; |
| 2136 /* Beginning of buffer should also be regarded as bol. */ | |
| 2137 CODING_SPEC_ISO_BOL(coding) = 1; | |
| 2090 | 2138 |
| 2091 /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations. | 2139 /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations. |
| 2092 FLAGS[REG] can be one of below: | 2140 FLAGS[REG] can be one of below: |
| 2093 integer CHARSET: CHARSET occupies register I, | 2141 integer CHARSET: CHARSET occupies register I, |
| 2094 t: designate nothing to REG initially, but can be used | 2142 t: designate nothing to REG initially, but can be used |
| 2101 for (charset = 0; charset < MAX_CHARSET; charset++) | 2149 for (charset = 0; charset < MAX_CHARSET; charset++) |
| 2102 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = -1; | 2150 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = -1; |
| 2103 for (i = 0; i < 4; i++) | 2151 for (i = 0; i < 4; i++) |
| 2104 { | 2152 { |
| 2105 if (INTEGERP (flags[i]) | 2153 if (INTEGERP (flags[i]) |
| 2106 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))) | 2154 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset)) |
| 2155 || (charset = get_charset_id (flags[i])) >= 0) | |
| 2107 { | 2156 { |
| 2108 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset; | 2157 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset; |
| 2109 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i; | 2158 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i; |
| 2110 } | 2159 } |
| 2111 else if (EQ (flags[i], Qt)) | 2160 else if (EQ (flags[i], Qt)) |
| 2117 { | 2166 { |
| 2118 Lisp_Object tail = flags[i]; | 2167 Lisp_Object tail = flags[i]; |
| 2119 | 2168 |
| 2120 if (INTEGERP (XCONS (tail)->car) | 2169 if (INTEGERP (XCONS (tail)->car) |
| 2121 && (charset = XINT (XCONS (tail)->car), | 2170 && (charset = XINT (XCONS (tail)->car), |
| 2122 CHARSET_VALID_P (charset))) | 2171 CHARSET_VALID_P (charset)) |
| 2172 || (charset = get_charset_id (XCONS (tail)->car)) >= 0) | |
| 2123 { | 2173 { |
| 2124 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset; | 2174 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset; |
| 2125 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i; | 2175 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i; |
| 2126 } | 2176 } |
| 2127 else | 2177 else |
| 2129 tail = XCONS (tail)->cdr; | 2179 tail = XCONS (tail)->cdr; |
| 2130 while (CONSP (tail)) | 2180 while (CONSP (tail)) |
| 2131 { | 2181 { |
| 2132 if (INTEGERP (XCONS (tail)->car) | 2182 if (INTEGERP (XCONS (tail)->car) |
| 2133 && (charset = XINT (XCONS (tail)->car), | 2183 && (charset = XINT (XCONS (tail)->car), |
| 2134 CHARSET_VALID_P (charset))) | 2184 CHARSET_VALID_P (charset)) |
| 2185 || (charset = get_charset_id (XCONS (tail)->car)) >= 0) | |
| 2135 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) | 2186 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) |
| 2136 = i; | 2187 = i; |
| 2137 else if (EQ (XCONS (tail)->car, Qt)) | 2188 else if (EQ (XCONS (tail)->car, Qt)) |
| 2138 default_reg_bits |= 1 << i; | 2189 default_reg_bits |= 1 << i; |
| 2139 tail = XCONS (tail)->cdr; | 2190 tail = XCONS (tail)->cdr; |
| 2188 break; | 2239 break; |
| 2189 | 2240 |
| 2190 case 3: | 2241 case 3: |
| 2191 coding->type = coding_type_big5; | 2242 coding->type = coding_type_big5; |
| 2192 coding->flags | 2243 coding->flags |
| 2193 = (NILP (XVECTOR (coding_system_vector)->contents[4]) | 2244 = (NILP (XVECTOR (coding_system)->contents[4]) |
| 2194 ? CODING_FLAG_BIG5_HKU | 2245 ? CODING_FLAG_BIG5_HKU |
| 2195 : CODING_FLAG_BIG5_ETEN); | 2246 : CODING_FLAG_BIG5_ETEN); |
| 2196 break; | 2247 break; |
| 2197 | 2248 |
| 2198 case 4: | 2249 case 4: |
| 2199 coding->type = coding_type_ccl; | 2250 coding->type = coding_type_ccl; |
| 2200 { | 2251 { |
| 2201 Lisp_Object val = XVECTOR (coding_system_vector)->contents[4]; | 2252 Lisp_Object val = XVECTOR (coding_system)->contents[4]; |
| 2202 if (CONSP (val) | 2253 if (CONSP (val) |
| 2203 && VECTORP (XCONS (val)->car) | 2254 && VECTORP (XCONS (val)->car) |
| 2204 && VECTORP (XCONS (val)->cdr)) | 2255 && VECTORP (XCONS (val)->cdr)) |
| 2205 { | 2256 { |
| 2206 setup_ccl_program (&(coding->spec.ccl.decoder), XCONS (val)->car); | 2257 setup_ccl_program (&(coding->spec.ccl.decoder), XCONS (val)->car); |
| 2221 } | 2272 } |
| 2222 return 0; | 2273 return 0; |
| 2223 | 2274 |
| 2224 label_invalid_coding_system: | 2275 label_invalid_coding_system: |
| 2225 coding->type = coding_type_no_conversion; | 2276 coding->type = coding_type_no_conversion; |
| 2277 coding->symbol = coding->pre_write_conversion = coding->post_read_conversion | |
| 2278 = Qnil; | |
| 2226 return -1; | 2279 return -1; |
| 2227 } | 2280 } |
| 2228 | 2281 |
| 2229 /* Emacs has a mechanism to automatically detect a coding system if it | 2282 /* Emacs has a mechanism to automatically detect a coding system if it |
| 2230 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But, | 2283 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But, |
| 2234 | 2287 |
| 2235 o coding-category-internal | 2288 o coding-category-internal |
| 2236 | 2289 |
| 2237 The category for a coding system which has the same code range | 2290 The category for a coding system which has the same code range |
| 2238 as Emacs' internal format. Assigned the coding-system (Lisp | 2291 as Emacs' internal format. Assigned the coding-system (Lisp |
| 2239 symbol) `coding-system-internal' by default. | 2292 symbol) `internal' by default. |
| 2240 | 2293 |
| 2241 o coding-category-sjis | 2294 o coding-category-sjis |
| 2242 | 2295 |
| 2243 The category for a coding system which has the same code range | 2296 The category for a coding system which has the same code range |
| 2244 as SJIS. Assigned the coding-system (Lisp | 2297 as SJIS. Assigned the coding-system (Lisp |
| 2245 symbol) `coding-system-sjis' by default. | 2298 symbol) `shift-jis' by default. |
| 2246 | 2299 |
| 2247 o coding-category-iso-7 | 2300 o coding-category-iso-7 |
| 2248 | 2301 |
| 2249 The category for a coding system which has the same code range | 2302 The category for a coding system which has the same code range |
| 2250 as ISO2022 of 7-bit environment. Assigned the coding-system | 2303 as ISO2022 of 7-bit environment. Assigned the coding-system |
| 2251 (Lisp symbol) `coding-system-junet' by default. | 2304 (Lisp symbol) `iso-2022-7' by default. |
| 2252 | 2305 |
| 2253 o coding-category-iso-8-1 | 2306 o coding-category-iso-8-1 |
| 2254 | 2307 |
| 2255 The category for a coding system which has the same code range | 2308 The category for a coding system which has the same code range |
| 2256 as ISO2022 of 8-bit environment and graphic plane 1 used only | 2309 as ISO2022 of 8-bit environment and graphic plane 1 used only |
| 2257 for DIMENSION1 charset. Assigned the coding-system (Lisp | 2310 for DIMENSION1 charset. Assigned the coding-system (Lisp |
| 2258 symbol) `coding-system-ctext' by default. | 2311 symbol) `iso-8859-1' by default. |
| 2259 | 2312 |
| 2260 o coding-category-iso-8-2 | 2313 o coding-category-iso-8-2 |
| 2261 | 2314 |
| 2262 The category for a coding system which has the same code range | 2315 The category for a coding system which has the same code range |
| 2263 as ISO2022 of 8-bit environment and graphic plane 1 used only | 2316 as ISO2022 of 8-bit environment and graphic plane 1 used only |
| 2264 for DIMENSION2 charset. Assigned the coding-system (Lisp | 2317 for DIMENSION2 charset. Assigned the coding-system (Lisp |
| 2265 symbol) `coding-system-euc-japan' by default. | 2318 symbol) `euc-japan' by default. |
| 2266 | 2319 |
| 2267 o coding-category-iso-else | 2320 o coding-category-iso-else |
| 2268 | 2321 |
| 2269 The category for a coding system which has the same code range | 2322 The category for a coding system which has the same code range |
| 2270 as ISO2022 but not belongs to any of the above three | 2323 as ISO2022 but not belongs to any of the above three |
| 2271 categories. Assigned the coding-system (Lisp symbol) | 2324 categories. Assigned the coding-system (Lisp symbol) |
| 2272 `coding-system-iso-2022-ss2-7' by default. | 2325 `iso-2022-ss2-7' by default. |
| 2273 | 2326 |
| 2274 o coding-category-big5 | 2327 o coding-category-big5 |
| 2275 | 2328 |
| 2276 The category for a coding system which has the same code range | 2329 The category for a coding system which has the same code range |
| 2277 as BIG5. Assigned the coding-system (Lisp symbol) | 2330 as BIG5. Assigned the coding-system (Lisp symbol) |
| 2278 `coding-system-big5' by default. | 2331 `cn-big5' by default. |
| 2279 | 2332 |
| 2280 o coding-category-binary | 2333 o coding-category-binary |
| 2281 | 2334 |
| 2282 The category for a coding system not categorized in any of the | 2335 The category for a coding system not categorized in any of the |
| 2283 above. Assigned the coding-system (Lisp symbol) | 2336 above. Assigned the coding-system (Lisp symbol) |
| 2284 `coding-system-noconv' by default. | 2337 `no-conversion' by default. |
| 2285 | 2338 |
| 2286 Each of them is a Lisp symbol and the value is an actual | 2339 Each of them is a Lisp symbol and the value is an actual |
| 2287 `coding-system's (this is also a Lisp symbol) assigned by a user. | 2340 `coding-system's (this is also a Lisp symbol) assigned by a user. |
| 2288 What Emacs does actually is to detect a category of coding system. | 2341 What Emacs does actually is to detect a category of coding system. |
| 2289 Then, it uses a `coding-system' assigned to it. If Emacs can't | 2342 Then, it uses a `coding-system' assigned to it. If Emacs can't |
| 2547 bcopy (source, destination, produced); | 2600 bcopy (source, destination, produced); |
| 2548 if (coding->selective) | 2601 if (coding->selective) |
| 2549 { | 2602 { |
| 2550 unsigned char *p = destination, *pend = destination + produced; | 2603 unsigned char *p = destination, *pend = destination + produced; |
| 2551 while (p < pend) | 2604 while (p < pend) |
| 2552 if (*p++ = '\015') p[-1] = '\n'; | 2605 if (*p++ == '\015') p[-1] = '\n'; |
| 2553 } | 2606 } |
| 2554 } | 2607 } |
| 2555 *consumed = produced; | 2608 *consumed = produced; |
| 2556 break; | 2609 break; |
| 2557 | 2610 |
| 2685 return ((NILP (obj) || !NILP (Fcoding_system_vector (obj))) ? Qt : Qnil); | 2738 return ((NILP (obj) || !NILP (Fcoding_system_vector (obj))) ? Qt : Qnil); |
| 2686 } | 2739 } |
| 2687 | 2740 |
| 2688 DEFUN ("read-non-nil-coding-system", | 2741 DEFUN ("read-non-nil-coding-system", |
| 2689 Fread_non_nil_coding_system, Sread_non_nil_coding_system, 1, 1, 0, | 2742 Fread_non_nil_coding_system, Sread_non_nil_coding_system, 1, 1, 0, |
| 2690 "Read a coding-system from the minibuffer, prompting with string PROMPT.") | 2743 "Read a coding system from the minibuffer, prompting with string PROMPT.") |
| 2691 (prompt) | 2744 (prompt) |
| 2692 Lisp_Object prompt; | 2745 Lisp_Object prompt; |
| 2693 { | 2746 { |
| 2694 return Fintern (Fcompleting_read (prompt, Vobarray, Qcoding_system_vector, | 2747 Lisp_Object val; |
| 2695 Qt, Qnil, Qnil), | 2748 do { |
| 2696 Qnil); | 2749 val = Fcompleting_read (prompt, Vobarray, Qcoding_system_vector, |
| 2750 Qt, Qnil, Qnil); | |
| 2751 } while (XSTRING (val)->size == 0); | |
| 2752 return (Fintern (val, Qnil)); | |
| 2697 } | 2753 } |
| 2698 | 2754 |
| 2699 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 1, 0, | 2755 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 1, 0, |
| 2700 "Read a coding-system or nil from the minibuffer, prompting with string PROMPT.") | 2756 "Read a coding system or nil from the minibuffer, prompting with string PROMPT.") |
| 2701 (prompt) | 2757 (prompt) |
| 2702 Lisp_Object prompt; | 2758 Lisp_Object prompt; |
| 2703 { | 2759 { |
| 2704 return Fintern (Fcompleting_read (prompt, Vobarray, Qcoding_system_p, | 2760 Lisp_Object val = Fcompleting_read (prompt, Vobarray, Qcoding_system_p, |
| 2705 Qt, Qnil, Qnil), | 2761 Qt, Qnil, Qnil); |
| 2706 Qnil); | 2762 return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil)); |
| 2707 } | 2763 } |
| 2708 | 2764 |
| 2709 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system, | 2765 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system, |
| 2710 1, 1, 0, | 2766 1, 1, 0, |
| 2711 "Check validity of CODING-SYSTEM.\n\ | 2767 "Check validity of CODING-SYSTEM.\n\ |
| 2724 | 2780 |
| 2725 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region, | 2781 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region, |
| 2726 2, 2, 0, | 2782 2, 2, 0, |
| 2727 "Detect coding-system of the text in the region between START and END.\n\ | 2783 "Detect coding-system of the text in the region between START and END.\n\ |
| 2728 Return a list of possible coding-systems ordered by priority.\n\ | 2784 Return a list of possible coding-systems ordered by priority.\n\ |
| 2729 If only ASCII characters are found, it returns `coding-system-automatic'\n\ | 2785 If only ASCII characters are found, it returns `automatic-conversion'\n\ |
| 2730 or its subsidiary coding-system according to a detected end-of-line format.") | 2786 or its subsidiary coding-system according to a detected end-of-line format.") |
| 2731 (b, e) | 2787 (b, e) |
| 2732 Lisp_Object b, e; | 2788 Lisp_Object b, e; |
| 2733 { | 2789 { |
| 2734 int coding_mask, eol_type; | 2790 int coding_mask, eol_type; |
| 2742 coding_mask = detect_coding_mask (POS_ADDR (beg), end - beg); | 2798 coding_mask = detect_coding_mask (POS_ADDR (beg), end - beg); |
| 2743 eol_type = detect_eol_type (POS_ADDR (beg), end - beg); | 2799 eol_type = detect_eol_type (POS_ADDR (beg), end - beg); |
| 2744 | 2800 |
| 2745 if (coding_mask == CODING_CATEGORY_MASK_ANY) | 2801 if (coding_mask == CODING_CATEGORY_MASK_ANY) |
| 2746 { | 2802 { |
| 2747 val = intern ("coding-system-automatic"); | 2803 val = intern ("automatic-conversion"); |
| 2748 if (eol_type != CODING_EOL_AUTOMATIC) | 2804 if (eol_type != CODING_EOL_AUTOMATIC) |
| 2749 { | 2805 { |
| 2750 Lisp_Object val2 = Fget (val, Qeol_type); | 2806 Lisp_Object val2 = Fget (val, Qeol_type); |
| 2751 if (VECTORP (val2)) | 2807 if (VECTORP (val2)) |
| 2752 val = XVECTOR (val2)->contents[eol_type]; | 2808 val = XVECTOR (val2)->contents[eol_type]; |
| 2821 *begp = *endp; | 2877 *begp = *endp; |
| 2822 return; | 2878 return; |
| 2823 case coding_type_ccl: | 2879 case coding_type_ccl: |
| 2824 /* We can't skip any data. */ | 2880 /* We can't skip any data. */ |
| 2825 return; | 2881 return; |
| 2882 case coding_type_iso2022: | |
| 2883 if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL) | |
| 2884 { | |
| 2885 unsigned char *bol = beg_addr; | |
| 2886 while (beg_addr < end_addr && *beg_addr < 0x80) | |
| 2887 { | |
| 2888 beg_addr++; | |
| 2889 if (*(beg_addr - 1) == '\n') | |
| 2890 bol = beg_addr; | |
| 2891 } | |
| 2892 beg_addr = bol; | |
| 2893 goto label_skip_tail; | |
| 2894 } | |
| 2895 /* fall down ... */ | |
| 2826 default: | 2896 default: |
| 2827 /* We can skip all ASCII characters at the head and tail. */ | 2897 /* We can skip all ASCII characters at the head and tail. */ |
| 2828 while (beg_addr < end_addr && *beg_addr < 0x80) beg_addr++; | 2898 while (beg_addr < end_addr && *beg_addr < 0x80) beg_addr++; |
| 2899 label_skip_tail: | |
| 2829 while (beg_addr < end_addr && *(end_addr - 1) < 0x80) end_addr--; | 2900 while (beg_addr < end_addr && *(end_addr - 1) < 0x80) end_addr--; |
| 2830 break; | 2901 break; |
| 2831 } | 2902 } |
| 2832 } | 2903 } |
| 2833 else /* for decoding */ | 2904 else /* for decoding */ |
| 2972 | 3043 |
| 2973 return make_number (len); | 3044 return make_number (len); |
| 2974 } | 3045 } |
| 2975 | 3046 |
| 2976 Lisp_Object | 3047 Lisp_Object |
| 2977 code_convert_string (str, coding, encodep) | 3048 code_convert_string (str, coding, encodep, nocopy) |
| 2978 Lisp_Object str; | 3049 Lisp_Object str, nocopy; |
| 2979 struct coding_system *coding; | 3050 struct coding_system *coding; |
| 2980 int encodep; | 3051 int encodep; |
| 2981 { | 3052 { |
| 2982 int len, consumed, produced; | 3053 int len, consumed, produced; |
| 2983 char *buf; | 3054 char *buf; |
| 3012 endp = begp + XSTRING (str)->size; | 3083 endp = begp + XSTRING (str)->size; |
| 3013 shrink_conversion_area (&begp, &endp, coding, encodep); | 3084 shrink_conversion_area (&begp, &endp, coding, encodep); |
| 3014 | 3085 |
| 3015 if (begp == endp) | 3086 if (begp == endp) |
| 3016 /* We need no conversion. */ | 3087 /* We need no conversion. */ |
| 3017 return str; | 3088 return (NILP (nocopy) ? Fcopy_sequence (str) : str); |
| 3018 | 3089 |
| 3019 head_skip = begp - XSTRING (str)->data; | 3090 head_skip = begp - XSTRING (str)->data; |
| 3020 tail_skip = XSTRING (str)->size - head_skip - (endp - begp); | 3091 tail_skip = XSTRING (str)->size - head_skip - (endp - begp); |
| 3021 | 3092 |
| 3022 GCPRO1 (str); | 3093 GCPRO1 (str); |
| 3042 | 3113 |
| 3043 return make_string (buf, head_skip + produced + tail_skip); | 3114 return make_string (buf, head_skip + produced + tail_skip); |
| 3044 } | 3115 } |
| 3045 | 3116 |
| 3046 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region, | 3117 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region, |
| 3047 3, 3, 0, | 3118 3, 3, "r\nzCoding system: ", |
| 3048 "Decode the text between START and END which is encoded in CODING-SYSTEM.\n\ | 3119 "Decode current region by specified coding system.\n\ |
| 3120 When called from a program, takes three arguments:\n\ | |
| 3121 START, END, and CODING-SYSTEM. START END are buffer positions.\n\ | |
| 3049 Return length of decoded text.") | 3122 Return length of decoded text.") |
| 3050 (b, e, coding_system) | 3123 (b, e, coding_system) |
| 3051 Lisp_Object b, e, coding_system; | 3124 Lisp_Object b, e, coding_system; |
| 3052 { | 3125 { |
| 3053 struct coding_system coding; | 3126 struct coding_system coding; |
| 3054 | 3127 |
| 3055 CHECK_NUMBER_COERCE_MARKER (b, 0); | 3128 CHECK_NUMBER_COERCE_MARKER (b, 0); |
| 3056 CHECK_NUMBER_COERCE_MARKER (e, 1); | 3129 CHECK_NUMBER_COERCE_MARKER (e, 1); |
| 3057 CHECK_SYMBOL (coding_system, 2); | 3130 CHECK_SYMBOL (coding_system, 2); |
| 3058 | 3131 |
| 3132 if (NILP (coding_system)) | |
| 3133 return make_number (XFASTINT (e) - XFASTINT (b)); | |
| 3059 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) | 3134 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) |
| 3060 error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data); | 3135 error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data); |
| 3061 | 3136 |
| 3062 return code_convert_region (b, e, &coding, 0); | 3137 return code_convert_region (b, e, &coding, 0); |
| 3063 } | 3138 } |
| 3064 | 3139 |
| 3065 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region, | 3140 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region, |
| 3066 3, 3, 0, | 3141 3, 3, "r\nzCoding system: ", |
| 3067 "Encode the text between START and END to CODING-SYSTEM.\n\ | 3142 "Encode current region by specified coding system.\n\ |
| 3143 When called from a program, takes three arguments:\n\ | |
| 3144 START, END, and CODING-SYSTEM. START END are buffer positions.\n\ | |
| 3068 Return length of encoded text.") | 3145 Return length of encoded text.") |
| 3069 (b, e, coding_system) | 3146 (b, e, coding_system) |
| 3070 Lisp_Object b, e, coding_system; | 3147 Lisp_Object b, e, coding_system; |
| 3071 { | 3148 { |
| 3072 struct coding_system coding; | 3149 struct coding_system coding; |
| 3073 | 3150 |
| 3074 CHECK_NUMBER_COERCE_MARKER (b, 0); | 3151 CHECK_NUMBER_COERCE_MARKER (b, 0); |
| 3075 CHECK_NUMBER_COERCE_MARKER (e, 1); | 3152 CHECK_NUMBER_COERCE_MARKER (e, 1); |
| 3076 CHECK_SYMBOL (coding_system, 2); | 3153 CHECK_SYMBOL (coding_system, 2); |
| 3077 | 3154 |
| 3155 if (NILP (coding_system)) | |
| 3156 return make_number (XFASTINT (e) - XFASTINT (b)); | |
| 3078 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) | 3157 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) |
| 3079 error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data); | 3158 error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data); |
| 3080 | 3159 |
| 3081 return code_convert_region (b, e, &coding, 1); | 3160 return code_convert_region (b, e, &coding, 1); |
| 3082 } | 3161 } |
| 3083 | 3162 |
| 3084 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string, | 3163 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string, |
| 3085 2, 2, 0, | 3164 2, 3, 0, |
| 3086 "Decode STRING which is encoded in CODING-SYSTEM, and return the result.") | 3165 "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\ |
| 3087 (string, coding_system) | 3166 Optional arg NOCOPY non-nil means return STRING itself if there's no need\n\ |
| 3088 Lisp_Object string, coding_system; | 3167 of decoding.") |
| 3168 (string, coding_system, nocopy) | |
| 3169 Lisp_Object string, coding_system, nocopy; | |
| 3089 { | 3170 { |
| 3090 struct coding_system coding; | 3171 struct coding_system coding; |
| 3091 | 3172 |
| 3092 CHECK_STRING (string, 0); | 3173 CHECK_STRING (string, 0); |
| 3093 CHECK_SYMBOL (coding_system, 1); | 3174 CHECK_SYMBOL (coding_system, 1); |
| 3094 | 3175 |
| 3176 if (NILP (coding_system)) | |
| 3177 return (NILP (nocopy) ? Fcopy_sequence (string) : string); | |
| 3095 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) | 3178 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) |
| 3096 error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data); | 3179 error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data); |
| 3097 | 3180 |
| 3098 return code_convert_string (string, &coding, 0); | 3181 return code_convert_string (string, &coding, 0, nocopy); |
| 3099 } | 3182 } |
| 3100 | 3183 |
| 3101 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string, | 3184 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string, |
| 3102 2, 2, 0, | 3185 2, 3, 0, |
| 3103 "Encode STRING to CODING-SYSTEM, and return the result.") | 3186 "Encode STRING to CODING-SYSTEM, and return the result.\n\ |
| 3104 (string, coding_system) | 3187 Optional arg NOCOPY non-nil means return STRING itself if there's no need\n\ |
| 3105 Lisp_Object string, coding_system; | 3188 of encoding.") |
| 3189 (string, coding_system, nocopy) | |
| 3190 Lisp_Object string, coding_system, nocopy; | |
| 3106 { | 3191 { |
| 3107 struct coding_system coding; | 3192 struct coding_system coding; |
| 3108 | 3193 |
| 3109 CHECK_STRING (string, 0); | 3194 CHECK_STRING (string, 0); |
| 3110 CHECK_SYMBOL (coding_system, 1); | 3195 CHECK_SYMBOL (coding_system, 1); |
| 3111 | 3196 |
| 3197 if (NILP (coding_system)) | |
| 3198 return (NILP (nocopy) ? Fcopy_sequence (string) : string); | |
| 3112 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) | 3199 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) |
| 3113 error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data); | 3200 error ("Invalid coding-system: %s", XSYMBOL (coding_system)->name->data); |
| 3114 | 3201 |
| 3115 return code_convert_string (string, &coding, 1); | 3202 return code_convert_string (string, &coding, 1, nocopy); |
| 3116 } | 3203 } |
| 3117 | 3204 |
| 3118 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0, | 3205 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0, |
| 3119 "Decode a JISX0208 character of SJIS coding-system-sjis.\n\ | 3206 "Decode a JISX0208 character of shift-jis encoding.\n\ |
| 3120 CODE is the character code in SJIS.\n\ | 3207 CODE is the character code in SJIS.\n\ |
| 3121 Return the corresponding character.") | 3208 Return the corresponding character.") |
| 3122 (code) | 3209 (code) |
| 3123 Lisp_Object code; | 3210 Lisp_Object code; |
| 3124 { | 3211 { |
| 3253 For process I/O, TARGET is a process name.\n\ | 3340 For process I/O, TARGET is a process name.\n\ |
| 3254 For network I/O, TARGET is a service name or a port number\n\ | 3341 For network I/O, TARGET is a service name or a port number\n\ |
| 3255 \n\ | 3342 \n\ |
| 3256 The return value is a cons of coding systems for decoding and encoding\n\ | 3343 The return value is a cons of coding systems for decoding and encoding\n\ |
| 3257 registered in nested alist `coding-system-alist' (which see) at a slot\n\ | 3344 registered in nested alist `coding-system-alist' (which see) at a slot\n\ |
| 3258 corresponding to OPERATION and TARGET. | 3345 corresponding to OPERATION and TARGET.\n\ |
| 3259 If a function symbol is at the slot, return a result of the function call.\n\ | 3346 If a function symbol is at the slot, return a result of the function call.\n\ |
| 3260 The function is called with one argument, a list of all the arguments.") | 3347 The function is called with one argument, a list of all the arguments.") |
| 3261 (nargs, args) | 3348 (nargs, args) |
| 3262 int nargs; | 3349 int nargs; |
| 3263 Lisp_Object *args; | 3350 Lisp_Object *args; |
| 3344 iso_code_class[ISO_CODE_ESC] = ISO_escape; | 3431 iso_code_class[ISO_CODE_ESC] = ISO_escape; |
| 3345 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2; | 3432 iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2; |
| 3346 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3; | 3433 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3; |
| 3347 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer; | 3434 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer; |
| 3348 | 3435 |
| 3436 conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE; | |
| 3437 conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE); | |
| 3438 | |
| 3439 setup_coding_system (Qnil, &keyboard_coding); | |
| 3440 setup_coding_system (Qnil, &terminal_coding); | |
| 3441 } | |
| 3442 | |
| 3443 #ifdef emacs | |
| 3444 | |
| 3445 syms_of_coding () | |
| 3446 { | |
| 3447 Qtarget_idx = intern ("target-idx"); | |
| 3448 staticpro (&Qtarget_idx); | |
| 3449 | |
| 3450 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0)); | |
| 3451 Fput (Qwrite_region, Qtarget_idx, make_number (2)); | |
| 3452 | |
| 3453 Qcall_process = intern ("call-process"); | |
| 3454 staticpro (&Qcall_process); | |
| 3455 Fput (Qcall_process, Qtarget_idx, make_number (0)); | |
| 3456 | |
| 3457 Qcall_process_region = intern ("call-process-region"); | |
| 3458 staticpro (&Qcall_process_region); | |
| 3459 Fput (Qcall_process_region, Qtarget_idx, make_number (2)); | |
| 3460 | |
| 3461 Qstart_process = intern ("start-process"); | |
| 3462 staticpro (&Qstart_process); | |
| 3463 Fput (Qstart_process, Qtarget_idx, make_number (2)); | |
| 3464 | |
| 3465 Qopen_network_stream = intern ("open-network-stream"); | |
| 3466 staticpro (&Qopen_network_stream); | |
| 3467 Fput (Qopen_network_stream, Qtarget_idx, make_number (3)); | |
| 3468 | |
| 3349 Qcoding_system = intern ("coding-system"); | 3469 Qcoding_system = intern ("coding-system"); |
| 3350 staticpro (&Qcoding_system); | 3470 staticpro (&Qcoding_system); |
| 3351 | 3471 |
| 3352 Qeol_type = intern ("eol-type"); | 3472 Qeol_type = intern ("eol-type"); |
| 3353 staticpro (&Qeol_type); | 3473 staticpro (&Qeol_type); |
| 3386 staticpro (&coding_category_table[i]); | 3506 staticpro (&coding_category_table[i]); |
| 3387 Fput (coding_category_table[i], Qcoding_category_index, | 3507 Fput (coding_category_table[i], Qcoding_category_index, |
| 3388 make_number (i)); | 3508 make_number (i)); |
| 3389 } | 3509 } |
| 3390 } | 3510 } |
| 3391 | |
| 3392 conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE; | |
| 3393 conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE); | |
| 3394 | |
| 3395 setup_coding_system (Qnil, &keyboard_coding); | |
| 3396 setup_coding_system (Qnil, &terminal_coding); | |
| 3397 } | |
| 3398 | |
| 3399 #ifdef emacs | |
| 3400 | |
| 3401 syms_of_coding () | |
| 3402 { | |
| 3403 Qtarget_idx = intern ("target-idx"); | |
| 3404 staticpro (&Qtarget_idx); | |
| 3405 | |
| 3406 Fput (Qinsert_file_contents, Qtarget_idx, make_number (0)); | |
| 3407 Fput (Qwrite_region, Qtarget_idx, make_number (2)); | |
| 3408 | |
| 3409 Qcall_process = intern ("call-process"); | |
| 3410 staticpro (&Qcall_process); | |
| 3411 Fput (Qcall_process, Qtarget_idx, make_number (0)); | |
| 3412 | |
| 3413 Qcall_process_region = intern ("call-process-region"); | |
| 3414 staticpro (&Qcall_process_region); | |
| 3415 Fput (Qcall_process_region, Qtarget_idx, make_number (2)); | |
| 3416 | |
| 3417 Qstart_process = intern ("start-process"); | |
| 3418 staticpro (&Qstart_process); | |
| 3419 Fput (Qstart_process, Qtarget_idx, make_number (2)); | |
| 3420 | |
| 3421 Qopen_network_stream = intern ("open-network-stream"); | |
| 3422 staticpro (&Qopen_network_stream); | |
| 3423 Fput (Qopen_network_stream, Qtarget_idx, make_number (3)); | |
| 3424 | 3511 |
| 3425 defsubr (&Scoding_system_vector); | 3512 defsubr (&Scoding_system_vector); |
| 3426 defsubr (&Scoding_system_p); | 3513 defsubr (&Scoding_system_p); |
| 3427 defsubr (&Sread_coding_system); | 3514 defsubr (&Sread_coding_system); |
| 3428 defsubr (&Sread_non_nil_coding_system); | 3515 defsubr (&Sread_non_nil_coding_system); |
| 3470 Vlast_coding_system_used = Qnil; | 3557 Vlast_coding_system_used = Qnil; |
| 3471 | 3558 |
| 3472 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist, | 3559 DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist, |
| 3473 "Nested alist to decide a coding system for a specific I/O operation.\n\ | 3560 "Nested alist to decide a coding system for a specific I/O operation.\n\ |
| 3474 The format is ((OPERATION . ((REGEXP . CODING-SYSTEMS) ...)) ...).\n\ | 3561 The format is ((OPERATION . ((REGEXP . CODING-SYSTEMS) ...)) ...).\n\ |
| 3475 | 3562 \n\ |
| 3476 OPERATION is one of the following Emacs I/O primitives:\n\ | 3563 OPERATION is one of the following Emacs I/O primitives:\n\ |
| 3477 For file I/O, insert-file-contents and write-region.\n\ | 3564 For file I/O, insert-file-contents and write-region.\n\ |
| 3478 For process I/O, call-process, call-process-region, and start-process.\n\ | 3565 For process I/O, call-process, call-process-region, and start-process.\n\ |
| 3479 For network I/O, open-network-stream.\n\ | 3566 For network I/O, open-network-stream.\n\ |
| 3480 In addition, for process I/O, `process-argument' can be specified for\n\ | 3567 In addition, for process I/O, `process-argument' can be specified for\n\ |
