Mercurial > emacs
diff src/composite.c @ 108552:03ab6621f67d
Fix bidi-composition interaction.
| author | Kenichi Handa <handa@etlken> |
|---|---|
| date | Fri, 14 May 2010 13:14:23 +0900 |
| parents | 973b5bc5fcfe |
| children | dce075eda1bf |
line wrap: on
line diff
--- a/src/composite.c Mon May 10 12:37:03 2010 +0900 +++ b/src/composite.c Fri May 14 13:14:23 2010 +0900 @@ -1018,12 +1018,29 @@ return unbind_to (count, Qnil); } +static Lisp_Object _work_val; +static int _work_char; + +/* 1 iff the character C is composable. */ +#define CHAR_COMPOSABLE_P(C) \ + ((C) == 0x200C || (C) == 0x200D \ + || (_work_val = CHAR_TABLE_REF (Vunicode_category_table, (C)), \ + (SYMBOLP (_work_val) \ + && (_work_char = SDATA (SYMBOL_NAME (_work_val))[0]) != 'C' \ + && _work_char != 'Z'))) /* Update cmp_it->stop_pos to the next position after CHARPOS (and BYTEPOS) where character composition may happen. If BYTEPOS is - negative, compute it. If it is a static composition, set - cmp_it->ch to -1. Otherwise, set cmp_it->ch to the character that - triggers a automatic composition. */ + negative, compute it. ENDPOS is a limit of searching. If it is + less than CHARPOS, search backward to ENDPOS+1 assuming that + set_iterator_to_next works in reverse order. In this case, if a + composition closest to CHARPOS is found, set cmp_it->stop_pos to + the last character of the composition. + + If no composition is found, set cmp_it->ch to -2. If a static + composition is found, set cmp_it->ch to -1. Otherwise, set + cmp_it->ch to the character that triggers the automatic + composition. */ void composition_compute_stop_pos (cmp_it, charpos, bytepos, endpos, string) @@ -1036,60 +1053,200 @@ /* This is from forward_to_next_line_start in xdisp.c. */ const int MAX_NEWLINE_DISTANCE = 500; - if (endpos > charpos + MAX_NEWLINE_DISTANCE) - endpos = charpos + MAX_NEWLINE_DISTANCE; - cmp_it->stop_pos = endpos; + if (charpos < endpos) + { + if (endpos > charpos + MAX_NEWLINE_DISTANCE) + endpos = charpos + MAX_NEWLINE_DISTANCE; + } + else if (endpos < charpos) + { + /* We search backward for a position to check composition. */ + if (endpos < 0) + { + /* But we don't know where to stop the searching. */ + endpos = NILP (string) ? BEGV - 1 : -1; + /* Usually we don't reach ENDPOS because we stop searching + at an uncomposable character (NL, LRE, etc). */ + } + } cmp_it->id = -1; cmp_it->ch = -2; - if (find_composition (charpos, endpos, &start, &end, &prop, string) + cmp_it->reversed_p = 0; + cmp_it->stop_pos = endpos; + if (charpos == endpos) + return; + /* FIXME: Bidi is not yet handled well in static composition. */ + if (charpos < endpos + && find_composition (charpos, endpos, &start, &end, &prop, string) && COMPOSITION_VALID_P (start, end, prop)) { cmp_it->stop_pos = endpos = start; cmp_it->ch = -1; } - if (NILP (string) && PT > charpos && PT < endpos) - cmp_it->stop_pos = PT; + if (NILP (string)) + { + /* A composition never strides over PT. */ + if (PT > charpos) + { + if (PT < endpos) + cmp_it->stop_pos = endpos = PT; + } + else if (PT < charpos && PT > endpos) + { + cmp_it->stop_pos = endpos = PT - 1; + } + } if (NILP (current_buffer->enable_multibyte_characters) || NILP (Vauto_composition_mode)) return; if (bytepos < 0) { - if (STRINGP (string)) + if (NILP (string)) + bytepos = CHAR_TO_BYTE (charpos); + else bytepos = string_char_to_byte (string, charpos); - else - bytepos = CHAR_TO_BYTE (charpos); } start = charpos; - while (charpos < endpos) + if (charpos < endpos) { - if (STRINGP (string)) - FETCH_STRING_CHAR_ADVANCE (c, string, charpos, bytepos); + /* Forward search. */ + while (charpos < endpos) + { + if (STRINGP (string)) + FETCH_STRING_CHAR_ADVANCE (c, string, charpos, bytepos); + else + FETCH_CHAR_ADVANCE (c, charpos, bytepos); + if (c == '\n') + { + cmp_it->ch = -2; + break; + } + val = CHAR_TABLE_REF (Vcomposition_function_table, c); + if (! NILP (val)) + { + Lisp_Object elt; + + for (; CONSP (val); val = XCDR (val)) + { + elt = XCAR (val); + if (VECTORP (elt) && ASIZE (elt) == 3 + && NATNUMP (AREF (elt, 1)) + && charpos - 1 - XFASTINT (AREF (elt, 1)) >= start) + break; + } + if (CONSP (val)) + { + cmp_it->lookback = XFASTINT (AREF (elt, 1)); + cmp_it->stop_pos = charpos - 1 - cmp_it->lookback; + cmp_it->ch = c; + return; + } + } + } + } + else + { + /* Search backward for a pattern that may be composed and the + position of (possibly) the last character of the match is + closest to (but not after) START. The reason for the last + character is that set_iterator_to_next works in reverse order + and, thus we must stop at the last character for composition + check. */ + unsigned char *p; + int len; + /* limit byte position used in fast_looking_at. This is the + byte position of the next character of START. */ + EMACS_INT limit; + + if (NILP (string)) + p = BYTE_POS_ADDR (bytepos); else - FETCH_CHAR_ADVANCE (c, charpos, bytepos); - if (c == '\n') - { - cmp_it->ch = -2; - break; - } - val = CHAR_TABLE_REF (Vcomposition_function_table, c); - if (! NILP (val)) + p = SDATA (string) + bytepos; + c = STRING_CHAR_AND_LENGTH (p, len); + limit = bytepos + len; + while (CHAR_COMPOSABLE_P (c)) { - Lisp_Object elt; - - for (; CONSP (val); val = XCDR (val)) + for (val = CHAR_TABLE_REF (Vcomposition_function_table, c); + CONSP (val); val = XCDR (val)) { - elt = XCAR (val); - if (VECTORP (elt) && ASIZE (elt) == 3 && NATNUMP (AREF (elt, 1)) - && charpos - 1 - XFASTINT (AREF (elt, 1)) >= start) - break; + Lisp_Object elt = XCAR (val); + int back, len; + + if (VECTORP (elt) && ASIZE (elt) == 3 + && NATNUMP (AREF (elt, 1)) + && charpos - (back = XFASTINT (AREF (elt, 1))) > endpos) + { + EMACS_INT cpos = charpos - back, bpos; + + if (back == 0) + bpos = bytepos; + else + bpos = (NILP (string) ? CHAR_TO_BYTE (cpos) + : string_char_to_byte (string, cpos)); + if (STRINGP (AREF (elt, 0))) + len = fast_looking_at (AREF (elt, 0), cpos, bpos, + start + 1, limit, string); + else + len = 1; + if (len > 0) + { + /* Make CPOS points the last character of match. + Note that LEN is byte-length. */ + bpos += len; + if (NILP (string)) + cpos = BYTE_TO_CHAR (bpos) - 1; + else + cpos = string_byte_to_char (string, bpos) - 1; + back = cpos - (charpos - back); + if (cmp_it->stop_pos < cpos + || (cmp_it->stop_pos == cpos + && cmp_it->lookback < back)) + { + cmp_it->stop_pos = cpos; + cmp_it->ch = c; + cmp_it->lookback = back; + } + } + } } - if (CONSP (val)) + if (charpos - 1 == endpos) + break; + if (STRINGP (string)) + { + p--, bytepos--; + while (! CHAR_HEAD_P (*p)) + p--, bytepos--; + charpos--; + } + else { - cmp_it->lookback = XFASTINT (AREF (elt, 1)); - cmp_it->stop_pos = charpos - 1 - cmp_it->lookback; - cmp_it->ch = c; - return; + DEC_BOTH (charpos, bytepos); + p = BYTE_POS_ADDR (bytepos); + } + c = STRING_CHAR (p); + } + if (cmp_it->ch >= 0) + /* We found a position to check. */ + return; + /* Skip all uncomposable characters. */ + if (NILP (string)) + { + while (charpos - 1 > endpos && ! CHAR_COMPOSABLE_P (c)) + { + DEC_BOTH (charpos, bytepos); + c = FETCH_MULTIBYTE_CHAR (bytepos); + } + } + else + { + while (charpos - 1 > endpos && ! CHAR_COMPOSABLE_P (c)) + { + p--; + while (! CHAR_HEAD_P (*p)) + p--; + charpos--; + c = STRING_CHAR (p); } } } @@ -1104,8 +1261,8 @@ string. In that case, FACE must not be NULL. If the character is composed, setup members of CMP_IT (id, nglyphs, - and from), and return 1. Otherwise, update CMP_IT->stop_pos, and - return 0. */ + from, to, reversed_p), and return 1. Otherwise, update + CMP_IT->stop_pos, and return 0. */ int composition_reseat_it (cmp_it, charpos, bytepos, endpos, w, face, string) @@ -1115,13 +1272,29 @@ struct face *face; Lisp_Object string; { - if (NILP (string) && charpos < PT && PT < endpos) - endpos = PT; + if (endpos <= charpos) + { + if (NILP (string)) + { + if (endpos < 0) + endpos = BEGV; + if (endpos < PT && PT < charpos) + endpos = PT; + } + else if (endpos < 0) + endpos = 0; + } + else + { + if (NILP (string) && charpos < PT && PT < endpos) + endpos = PT; + } if (cmp_it->ch == -2) { composition_compute_stop_pos (cmp_it, charpos, bytepos, endpos, string); - if (cmp_it->ch == -2) + if (cmp_it->stop_pos != charpos) + /* The current position is not composed. */ return 0; } @@ -1145,18 +1318,46 @@ int i; val = CHAR_TABLE_REF (Vcomposition_function_table, cmp_it->ch); - for (; CONSP (val); val = XCDR (val)) + if (charpos < endpos) { - elt = XCAR (val); - if (cmp_it->lookback == XFASTINT (AREF (elt, 1))) - break; + for (; CONSP (val); val = XCDR (val)) + { + elt = XCAR (val); + if (cmp_it->lookback == XFASTINT (AREF (elt, 1))) + break; + } + if (NILP (val)) + goto no_composition; + + val = autocmp_chars (val, charpos, bytepos, endpos, w, face, string); + if (! composition_gstring_p (val)) + goto no_composition; + cmp_it->reversed_p = 0; } - if (NILP (val)) - goto no_composition; + else + { + EMACS_INT saved_charpos = charpos, saved_bytepos = bytepos; - val = autocmp_chars (val, charpos, bytepos, endpos, w, face, string); - if (! composition_gstring_p (val)) - goto no_composition; + if (cmp_it->lookback > 0) + { + charpos -= cmp_it->lookback; + if (charpos < endpos) + goto no_composition; + if (STRINGP (string)) + bytepos = string_char_to_byte (string, charpos); + else + bytepos = CHAR_TO_BYTE (charpos); + } + val = autocmp_chars (val, charpos, bytepos, saved_charpos + 1, + w, face, string); + if (! composition_gstring_p (val) + || charpos + LGSTRING_CHAR_LEN (val) <= saved_charpos) + { + charpos = saved_charpos, bytepos = saved_bytepos; + goto no_composition; + } + cmp_it->reversed_p = 1; + } if (NILP (LGSTRING_ID (val))) val = composition_gstring_put_cache (val, -1); cmp_it->id = XINT (LGSTRING_ID (val)); @@ -1164,22 +1365,40 @@ if (NILP (LGSTRING_GLYPH (val, i))) break; cmp_it->nglyphs = i; + cmp_it->from = 0; + cmp_it->to = i; } else goto no_composition; - cmp_it->from = 0; return 1; no_composition: - charpos++; - if (STRINGP (string)) - bytepos += MULTIBYTE_LENGTH_NO_CHECK (SDATA (string) + bytepos); + if (charpos == endpos) + return 0; + if (charpos < endpos) + { + charpos++; + if (STRINGP (string)) + bytepos += MULTIBYTE_LENGTH_NO_CHECK (SDATA (string) + bytepos); + else + INC_POS (bytepos); + } else - INC_POS (bytepos); + { + charpos--; + /* BYTEPOS is calculated in composition_compute_stop_pos */ + bytepos = -1; + } composition_compute_stop_pos (cmp_it, charpos, bytepos, endpos, string); return 0; } +/* Update nchars, nbytes, and width of the current grapheme cluster + which is identified by CMP_IT->from (if the composition is static + or automatic in l2r context) or CMPT_IT->to (if the composition is + automatic in r2l context). In addition, in the former case, update + CMP_IT->to, and in the latter case, update CMP_IT->from. */ + int composition_update_it (cmp_it, charpos, bytepos, string) struct composition_it *cmp_it; @@ -1215,7 +1434,7 @@ cmp_it->nchars = LGSTRING_CHAR_LEN (gstring); cmp_it->width = 0; } - else + else if (! cmp_it->reversed_p) { Lisp_Object glyph = LGSTRING_GLYPH (gstring, cmp_it->from); int from = LGLYPH_FROM (glyph); @@ -1234,6 +1453,33 @@ cmp_it->width += CHAR_WIDTH (LGLYPH_CHAR (glyph)); } } + else + { + int from_idx = cmp_it->to - 1; + Lisp_Object glyph = LGSTRING_GLYPH (gstring, from_idx); + int from = LGLYPH_FROM (glyph); + + c = XINT (LGSTRING_CHAR (gstring, from)); + cmp_it->nchars = LGLYPH_TO (glyph) - from + 1; + cmp_it->width = (LGLYPH_WIDTH (glyph) > 0 + ? CHAR_WIDTH (LGLYPH_CHAR (glyph)) : 0); + for (from_idx--; from_idx >= 0; from_idx--) + { + glyph = LGSTRING_GLYPH (gstring, from_idx); + if (LGLYPH_FROM (glyph) != from) + break; + if (LGLYPH_WIDTH (glyph) > 0) + cmp_it->width += CHAR_WIDTH (LGLYPH_CHAR (glyph)); + } + cmp_it->from = from_idx + 1; + charpos -= cmp_it->nchars - 1; + bytepos += CHAR_BYTES (c); + if (STRINGP (string)) + cmp_it->nbytes = bytepos - string_char_to_byte (string, charpos); + else + cmp_it->nbytes = bytepos - CHAR_TO_BYTE (charpos); + return c; + } } charpos += cmp_it->nchars; @@ -1279,17 +1525,6 @@ (POSITION).pos--; \ } while (0) -static Lisp_Object _work_val; -static int _work_char; - -/* 1 iff the character C is composable. */ -#define CHAR_COMPOSABLE_P(C) \ - ((C) == 0x200C || (C) == 0x200D \ - || (_work_val = CHAR_TABLE_REF (Vunicode_category_table, (C)), \ - (SYMBOLP (_work_val) \ - && (_work_char = SDATA (SYMBOL_NAME (_work_val))[0]) != 'C' \ - && _work_char != 'Z'))) - /* This is like find_composition, but find an automatic composition instead. If found, set *GSTRING to the glyph-string representing the composition, and return 1. Otherwise, return 0. */
