[vim] - patchlevel 1011
Karsten Hopp
karsten at fedoraproject.org
Tue Jun 4 10:07:53 UTC 2013
commit ca9c198de7c572cc49770f24800e8b91e403ddc3
Author: Karsten Hopp <karsten at redhat.com>
Date: Tue Jun 4 12:05:56 2013 +0200
- patchlevel 1011
7.3.1011 | 475 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 475 insertions(+), 0 deletions(-)
---
diff --git a/7.3.1011 b/7.3.1011
new file mode 100644
index 0000000..0e2304b
--- /dev/null
+++ b/7.3.1011
@@ -0,0 +1,475 @@
+To: vim_dev at googlegroups.com
+Subject: Patch 7.3.1011
+Fcc: outbox
+From: Bram Moolenaar <Bram at moolenaar.net>
+Mime-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+------------
+
+Patch 7.3.1011
+Problem: New regexp engine is inefficient with multi-byte characters.
+Solution: Handle a character at a time instead of a byte at a time. Also
+ make \Z partly work.
+Files: src/regexp_nfa.c, src/testdir/test95.in, src/testdir/test95.ok
+
+
+*** ../vim-7.3.1010/src/regexp_nfa.c 2013-05-24 20:25:28.000000000 +0200
+--- src/regexp_nfa.c 2013-05-24 21:49:43.000000000 +0200
+***************
+*** 46,54 ****
+ NFA_NCLOSE, /* End of subexpr. marked with \%( ... \) */
+ NFA_START_INVISIBLE,
+ NFA_END_INVISIBLE,
+- NFA_MULTIBYTE, /* Next nodes in NFA are part of the same
+- multibyte char */
+- NFA_END_MULTIBYTE, /* End of multibyte char in the NFA */
+ NFA_COMPOSING, /* Next nodes in NFA are part of the
+ composing multibyte char */
+ NFA_END_COMPOSING, /* End of a composing char in the NFA */
+--- 46,51 ----
+***************
+*** 195,220 ****
+ *post_ptr++ = c; \
+ } while (0)
+
+- #define EMIT_MBYTE(c) \
+- len = (*mb_char2bytes)(c, buf); \
+- EMIT(buf[0]); \
+- for (i = 1; i < len; i++) \
+- { \
+- EMIT(buf[i]); \
+- EMIT(NFA_CONCAT); \
+- } \
+- EMIT(NFA_MULTIBYTE);
+-
+- #define EMIT_COMPOSING_UTF(input) \
+- len = utfc_ptr2len(input); \
+- EMIT(input[0]); \
+- for (i = 1; i < len; i++) \
+- { \
+- EMIT(input[i]); \
+- EMIT(NFA_CONCAT); \
+- } \
+- EMIT(NFA_COMPOSING);
+-
+ /*
+ * Initialize internal variables before NFA compilation.
+ * Return OK on success, FAIL otherwise.
+--- 192,197 ----
+***************
+*** 611,618 ****
+ #ifdef FEAT_MBYTE
+ char_u *old_regparse = regparse;
+ int clen;
+- int len;
+- static char_u buf[30];
+ int i;
+ #endif
+ int extra = 0;
+--- 588,593 ----
+***************
+*** 845,858 ****
+ return FAIL;
+
+ c = coll_get_char();
+! #ifdef FEAT_MBYTE
+! if ((*mb_char2len)(c) > 1)
+! {
+! EMIT_MBYTE(c);
+! }
+! else
+! #endif
+! EMIT(c);
+ break;
+
+ /* Catch \%^ and \%$ regardless of where they appear in the
+--- 820,826 ----
+ return FAIL;
+
+ c = coll_get_char();
+! EMIT(c);
+ break;
+
+ /* Catch \%^ and \%$ regardless of where they appear in the
+***************
+*** 1135,1146 ****
+ * skip it. */
+ for (c = startc + 1; c <= endc; c++)
+ {
+! if ((*mb_char2len)(c) > 1)
+! {
+! EMIT_MBYTE(c);
+! }
+! else
+! EMIT(c);
+ TRY_NEG();
+ EMIT_GLUE();
+ }
+--- 1103,1109 ----
+ * skip it. */
+ for (c = startc + 1; c <= endc; c++)
+ {
+! EMIT(c);
+ TRY_NEG();
+ EMIT_GLUE();
+ }
+***************
+*** 1187,1200 ****
+ if (got_coll_char == TRUE && startc == 0)
+ EMIT(0x0a);
+ else
+! #ifdef FEAT_MBYTE
+! if ((*mb_char2len)(startc) > 1)
+! {
+! EMIT_MBYTE(startc);
+! }
+! else
+! #endif
+! EMIT(startc);
+ TRY_NEG();
+ EMIT_GLUE();
+ }
+--- 1150,1156 ----
+ if (got_coll_char == TRUE && startc == 0)
+ EMIT(0x0a);
+ else
+! EMIT(startc);
+ TRY_NEG();
+ EMIT_GLUE();
+ }
+***************
+*** 1242,1271 ****
+ int plen;
+
+ nfa_do_multibyte:
+! /* length of current char, with composing chars,
+! * from pointer */
+! plen = (*mb_ptr2len)(old_regparse);
+! if (enc_utf8 && clen != plen)
+! {
+! /* A composing character is always handled as a
+! * separate atom, surrounded by NFA_COMPOSING and
+! * NFA_END_COMPOSING. Note that right now we are
+ * building the postfix form, not the NFA itself;
+ * a composing char could be: a, b, c, NFA_COMPOSING
+! * where 'a', 'b', 'c' are chars with codes > 256.
+! */
+! EMIT_COMPOSING_UTF(old_regparse);
+ regparse = old_regparse + plen;
+ }
+ else
+- /* A multi-byte character is always handled as a
+- * separate atom, surrounded by NFA_MULTIBYTE and
+- * NFA_END_MULTIBYTE */
+- if (plen > 1)
+- {
+- EMIT_MBYTE(c);
+- }
+- else
+ #endif
+ {
+ c = no_Magic(c);
+--- 1198,1227 ----
+ int plen;
+
+ nfa_do_multibyte:
+! /* Length of current char with composing chars. */
+! if (enc_utf8 && clen != (plen = (*mb_ptr2len)(old_regparse)))
+! {
+! /* A base character plus composing characters.
+! * This requires creating a separate atom as if enclosing
+! * the characters in (), where NFA_COMPOSING is the ( and
+! * NFA_END_COMPOSING is the ). Note that right now we are
+ * building the postfix form, not the NFA itself;
+ * a composing char could be: a, b, c, NFA_COMPOSING
+! * where 'b' and 'c' are chars with codes > 256. */
+! i = 0;
+! for (;;)
+! {
+! EMIT(c);
+! if (i > 0)
+! EMIT(NFA_CONCAT);
+! if (i += utf_char2len(c) >= plen)
+! break;
+! c = utf_ptr2char(old_regparse + i);
+! }
+! EMIT(NFA_COMPOSING);
+ regparse = old_regparse + plen;
+ }
+ else
+ #endif
+ {
+ c = no_Magic(c);
+***************
+*** 1702,1710 ****
+ case NFA_START_INVISIBLE: STRCPY(code, "NFA_START_INVISIBLE"); break;
+ case NFA_END_INVISIBLE: STRCPY(code, "NFA_END_INVISIBLE"); break;
+
+- case NFA_MULTIBYTE: STRCPY(code, "NFA_MULTIBYTE"); break;
+- case NFA_END_MULTIBYTE: STRCPY(code, "NFA_END_MULTIBYTE"); break;
+-
+ case NFA_COMPOSING: STRCPY(code, "NFA_COMPOSING"); break;
+ case NFA_END_COMPOSING: STRCPY(code, "NFA_END_COMPOSING"); break;
+
+--- 1658,1663 ----
+***************
+*** 2194,2200 ****
+ }
+ e1 = POP();
+ e1.start->negated = TRUE;
+! if (e1.start->c == NFA_MULTIBYTE || e1.start->c == NFA_COMPOSING)
+ e1.start->out1->negated = TRUE;
+ PUSH(e1);
+ break;
+--- 2147,2153 ----
+ }
+ e1 = POP();
+ e1.start->negated = TRUE;
+! if (e1.start->c == NFA_COMPOSING)
+ e1.start->out1->negated = TRUE;
+ PUSH(e1);
+ break;
+***************
+*** 2311,2316 ****
+--- 2264,2279 ----
+ PUSH(frag(s, list1(&s1->out)));
+ break;
+
++ case NFA_COMPOSING: /* char with composing char */
++ #if 0
++ /* TODO */
++ if (regflags & RF_ICOMBINE)
++ {
++ goto normalchar;
++ }
++ #endif
++ /* FALLTHROUGH */
++
+ case NFA_MOPEN + 0: /* Submatch */
+ case NFA_MOPEN + 1:
+ case NFA_MOPEN + 2:
+***************
+*** 2322,2329 ****
+ case NFA_MOPEN + 8:
+ case NFA_MOPEN + 9:
+ case NFA_NOPEN: /* \%( "Invisible Submatch" */
+- case NFA_MULTIBYTE: /* mbyte char */
+- case NFA_COMPOSING: /* composing char */
+ if (nfa_calc_size == TRUE)
+ {
+ nstate += 2;
+--- 2285,2290 ----
+***************
+*** 2336,2344 ****
+ case NFA_NOPEN:
+ mclose = NFA_NCLOSE;
+ break;
+- case NFA_MULTIBYTE:
+- mclose = NFA_END_MULTIBYTE;
+- break;
+ case NFA_COMPOSING:
+ mclose = NFA_END_COMPOSING;
+ break;
+--- 2297,2302 ----
+***************
+*** 2377,2385 ****
+ goto theend;
+ patch(e.out, s1);
+
+! if (mopen == NFA_MULTIBYTE || mopen == NFA_COMPOSING)
+! /* MULTIBYTE->out1 = END_MULTIBYTE
+! * COMPOSING->out1 = END_COMPOSING */
+ patch(list1(&s->out1), s1);
+
+ PUSH(frag(s, list1(&s1->out)));
+--- 2335,2342 ----
+ goto theend;
+ patch(e.out, s1);
+
+! if (mopen == NFA_COMPOSING)
+! /* COMPOSING->out1 = END_COMPOSING */
+ patch(list1(&s->out1), s1);
+
+ PUSH(frag(s, list1(&s1->out)));
+***************
+*** 2540,2556 ****
+ case NFA_COMPOSING:
+ /* nfa_regmatch() will match all the bytes of this composing char. */
+ break;
+-
+- case NFA_MULTIBYTE:
+- /* nfa_regmatch() will match all the bytes of this multibyte char. */
+- break;
+ #endif
+
+- case NFA_END_MULTIBYTE:
+- /* Successfully matched this mbyte char */
+- addstate(l, state->out, m, off, lid, match);
+- break;
+-
+ case NFA_NOPEN:
+ case NFA_NCLOSE:
+ addstate(l, state->out, m, off, lid, match);
+--- 2497,2504 ----
+***************
+*** 2841,2847 ****
+ regsub_T *submatch;
+ regsub_T *m;
+ {
+! int c = -1;
+ int n;
+ int i = 0;
+ int result;
+--- 2789,2795 ----
+ regsub_T *submatch;
+ regsub_T *m;
+ {
+! int c;
+ int n;
+ int i = 0;
+ int result;
+***************
+*** 2859,2865 ****
+ List *listtbl[2][2];
+ List *ll;
+ int listid = 1;
+- int endnode;
+ List *thislist;
+ List *nextlist;
+ List *neglist;
+--- 2807,2812 ----
+***************
+*** 3190,3222 ****
+ break;
+ }
+
+! case NFA_MULTIBYTE:
+ case NFA_COMPOSING:
+! endnode = t->state->c + 1;
+ result = OK;
+ sta = t->state->out;
+! len = 1;
+! while (sta->c != endnode && len <= n)
+ {
+! if (reginput[len-1] != sta->c)
+! {
+! result = FAIL;
+ break;
+! }
+! len++;
+ sta = sta->out;
+ }
+
+ /* if input char length doesn't match regexp char length */
+! if (len -1 < n || sta->c != endnode)
+ result = FAIL;
+! end = t->state->out1; /* NFA_END_MULTIBYTE or
+! NFA_END_COMPOSING */
+ /* If \Z was present, then ignore composing characters */
+! if (ireg_icombine && endnode == NFA_END_COMPOSING)
+ result = 1 ^ sta->negated;
+ ADD_POS_NEG_STATE(end);
+ break;
+
+ case NFA_NEWL:
+ if (!reg_line_lbr && REG_MULTI
+--- 3137,3171 ----
+ break;
+ }
+
+! #ifdef FEAT_MBYTE
+ case NFA_COMPOSING:
+! {
+! int mc = c;
+!
+ result = OK;
+ sta = t->state->out;
+! len = 0;
+! while (sta->c != NFA_END_COMPOSING && len < n)
+ {
+! if (len > 0)
+! mc = mb_ptr2char(reginput + len);
+! if (mc != sta->c)
+ break;
+! len += mb_char2len(mc);
+ sta = sta->out;
+ }
+
+ /* if input char length doesn't match regexp char length */
+! if (len < n || sta->c != NFA_END_COMPOSING)
+ result = FAIL;
+! end = t->state->out1; /* NFA_END_COMPOSING */
+ /* If \Z was present, then ignore composing characters */
+! if (ireg_icombine)
+ result = 1 ^ sta->negated;
+ ADD_POS_NEG_STATE(end);
+ break;
++ }
++ #endif
+
+ case NFA_NEWL:
+ if (!reg_line_lbr && REG_MULTI
+***************
+*** 3425,3430 ****
+--- 3374,3387 ----
+ if (!result)
+ result = ireg_ic == TRUE
+ && MB_TOLOWER(t->state->c) == MB_TOLOWER(c);
++ #ifdef FEAT_MBYTE
++ /* If there is a composing character which is not being
++ * ignored there can be no match. Match with composing
++ * character uses NFA_COMPOSING above. */
++ if (result && enc_utf8 && !ireg_icombine
++ && n != utf_char2len(c))
++ result = FALSE;
++ #endif
+ ADD_POS_NEG_STATE(t->state);
+ break;
+ }
+*** ../vim-7.3.1010/src/testdir/test95.in 2013-05-24 20:25:28.000000000 +0200
+--- src/testdir/test95.in 2013-05-24 20:45:08.000000000 +0200
+***************
+*** 35,40 ****
+--- 35,44 ----
+ :call add(tl, ['\f\+', '&*fname ', 'fname'])
+ :call add(tl, ['\%#=1\f\+', '&*fname ', 'fname'])
+
++ :"""" Test composing character matching
++ :call add(tl, ['.ม', 'xม่x yมy', 'yม'])
++ :call add(tl, ['.ม่', 'xม่x yมy', 'xม่'])
++
+ :"""" Test \Z
+ :call add(tl, ['ú\Z', 'x'])
+
+*** ../vim-7.3.1010/src/testdir/test95.ok 2013-05-24 20:25:28.000000000 +0200
+--- src/testdir/test95.ok 2013-05-24 20:44:41.000000000 +0200
+***************
+*** 9,13 ****
+--- 9,15 ----
+ OK - \%#=1\i\+
+ OK - \f\+
+ OK - \%#=1\f\+
++ OK - .ม
++ OK - .ม่
+ OK - ú\Z
+ OK - [^[=a=]]\+
+*** ../vim-7.3.1010/src/version.c 2013-05-24 20:25:28.000000000 +0200
+--- src/version.c 2013-05-24 21:56:02.000000000 +0200
+***************
+*** 730,731 ****
+--- 730,733 ----
+ { /* Add new patch number below this line */
++ /**/
++ 1011,
+ /**/
+
+--
+If you had to identify, in one word, the reason why the
+human race has not achieved, and never will achieve, its
+full potential, that word would be "meetings."
+
+ /// Bram Moolenaar -- Bram at Moolenaar.net -- http://www.Moolenaar.net \\\
+/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
+\\\ an exciting new programming language -- http://www.Zimbu.org ///
+ \\\ help me help AIDS victims -- http://ICCF-Holland.org ///
More information about the scm-commits
mailing list