Mercurial > emacs
comparison src/coding.c @ 88669:0bc5868f9f61
(decode_coding_utf_8): Reject overlong sequences.
| author | Dave Love <fx@gnu.org> |
|---|---|
| date | Mon, 27 May 2002 22:18:56 +0000 |
| parents | d3b1f30e2267 |
| children | 2cdfbffa8a0d |
comparison
equal
deleted
inserted
replaced
| 88668:d1b1b7398049 | 88669:0bc5868f9f61 |
|---|---|
| 1076 *mask &= CATEGORY_MASK_UTF_8; | 1076 *mask &= CATEGORY_MASK_UTF_8; |
| 1077 return 1; | 1077 return 1; |
| 1078 } | 1078 } |
| 1079 | 1079 |
| 1080 | 1080 |
| 1081 /* Fixme: deal with surrogates? */ | |
| 1081 static void | 1082 static void |
| 1082 decode_coding_utf_8 (coding) | 1083 decode_coding_utf_8 (coding) |
| 1083 struct coding_system *coding; | 1084 struct coding_system *coding; |
| 1084 { | 1085 { |
| 1085 unsigned char *src = coding->source + coding->consumed; | 1086 unsigned char *src = coding->source + coding->consumed; |
| 1124 { | 1125 { |
| 1125 ONE_MORE_BYTE (c2); | 1126 ONE_MORE_BYTE (c2); |
| 1126 if (! UTF_8_EXTRA_OCTET_P (c2)) | 1127 if (! UTF_8_EXTRA_OCTET_P (c2)) |
| 1127 goto invalid_code; | 1128 goto invalid_code; |
| 1128 if (UTF_8_2_OCTET_LEADING_P (c1)) | 1129 if (UTF_8_2_OCTET_LEADING_P (c1)) |
| 1129 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F); | 1130 { |
| 1131 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F); | |
| 1132 /* Reject overlong sequences here and below. Encoders | |
| 1133 producing them are incorrect, they can be misleading, | |
| 1134 and they mess up read/write invariance. */ | |
| 1135 if (c < 128) | |
| 1136 goto invalid_code; | |
| 1137 } | |
| 1130 else | 1138 else |
| 1131 { | 1139 { |
| 1132 ONE_MORE_BYTE (c3); | 1140 ONE_MORE_BYTE (c3); |
| 1133 if (! UTF_8_EXTRA_OCTET_P (c3)) | 1141 if (! UTF_8_EXTRA_OCTET_P (c3)) |
| 1134 goto invalid_code; | 1142 goto invalid_code; |
| 1135 if (UTF_8_3_OCTET_LEADING_P (c1)) | 1143 if (UTF_8_3_OCTET_LEADING_P (c1)) |
| 1136 c = (((c1 & 0xF) << 12) | 1144 { |
| 1137 | ((c2 & 0x3F) << 6) | (c3 & 0x3F)); | 1145 c = (((c1 & 0xF) << 12) |
| 1146 | ((c2 & 0x3F) << 6) | (c3 & 0x3F)); | |
| 1147 if (c < 0x800) | |
| 1148 goto invalid_code; | |
| 1149 } | |
| 1138 else | 1150 else |
| 1139 { | 1151 { |
| 1140 ONE_MORE_BYTE (c4); | 1152 ONE_MORE_BYTE (c4); |
| 1141 if (! UTF_8_EXTRA_OCTET_P (c4)) | 1153 if (! UTF_8_EXTRA_OCTET_P (c4)) |
| 1142 goto invalid_code; | 1154 goto invalid_code; |
| 1143 if (UTF_8_4_OCTET_LEADING_P (c1)) | 1155 if (UTF_8_4_OCTET_LEADING_P (c1)) |
| 1156 { | |
| 1144 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12) | 1157 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12) |
| 1145 | ((c3 & 0x3F) << 6) | (c4 & 0x3F)); | 1158 | ((c3 & 0x3F) << 6) | (c4 & 0x3F)); |
| 1159 if (c < 0x10000) | |
| 1160 goto invalid_code; | |
| 1161 } | |
| 1146 else | 1162 else |
| 1147 { | 1163 { |
| 1148 ONE_MORE_BYTE (c5); | 1164 ONE_MORE_BYTE (c5); |
| 1149 if (! UTF_8_EXTRA_OCTET_P (c5)) | 1165 if (! UTF_8_EXTRA_OCTET_P (c5)) |
| 1150 goto invalid_code; | 1166 goto invalid_code; |
| 1151 if (UTF_8_5_OCTET_LEADING_P (c1)) | 1167 if (UTF_8_5_OCTET_LEADING_P (c1)) |
| 1152 { | 1168 { |
| 1153 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18) | 1169 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18) |
| 1154 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6) | 1170 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6) |
| 1155 | (c5 & 0x3F)); | 1171 | (c5 & 0x3F)); |
| 1156 if (c > MAX_CHAR) | 1172 if ((c > MAX_CHAR) || (c < 0x200000)) |
| 1157 goto invalid_code; | 1173 goto invalid_code; |
| 1158 } | 1174 } |
| 1159 else | 1175 else |
| 1160 goto invalid_code; | 1176 goto invalid_code; |
| 1161 } | 1177 } |
