[vim] - patchlevel 1011

Karsten Hopp karsten at fedoraproject.org
Tue Jun 4 10:07:53 UTC 2013


commit ca9c198de7c572cc49770f24800e8b91e403ddc3
Author: Karsten Hopp <karsten at redhat.com>
Date:   Tue Jun 4 12:05:56 2013 +0200

    - patchlevel 1011

 7.3.1011 |  475 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 475 insertions(+), 0 deletions(-)
---
diff --git a/7.3.1011 b/7.3.1011
new file mode 100644
index 0000000..0e2304b
--- /dev/null
+++ b/7.3.1011
@@ -0,0 +1,475 @@
+To: vim_dev at googlegroups.com
+Subject: Patch 7.3.1011
+Fcc: outbox
+From: Bram Moolenaar <Bram at moolenaar.net>
+Mime-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+------------
+
+Patch 7.3.1011
+Problem:    New regexp engine is inefficient with multi-byte characters.
+Solution:   Handle a character at a time instead of a byte at a time.  Also
+            make \Z partly work.
+Files:      src/regexp_nfa.c, src/testdir/test95.in, src/testdir/test95.ok
+
+
+*** ../vim-7.3.1010/src/regexp_nfa.c	2013-05-24 20:25:28.000000000 +0200
+--- src/regexp_nfa.c	2013-05-24 21:49:43.000000000 +0200
+***************
+*** 46,54 ****
+      NFA_NCLOSE,			    /* End of subexpr. marked with \%( ... \) */
+      NFA_START_INVISIBLE,
+      NFA_END_INVISIBLE,
+-     NFA_MULTIBYTE,		    /* Next nodes in NFA are part of the same
+- 				       multibyte char */
+-     NFA_END_MULTIBYTE,		    /* End of multibyte char in the NFA */
+      NFA_COMPOSING,		    /* Next nodes in NFA are part of the
+  				       composing multibyte char */
+      NFA_END_COMPOSING,		    /* End of a composing char in the NFA */
+--- 46,51 ----
+***************
+*** 195,220 ****
+  		    *post_ptr++ = c;		\
+  		} while (0)
+  
+- #define EMIT_MBYTE(c)					    \
+- 			len = (*mb_char2bytes)(c, buf);	    \
+- 			EMIT(buf[0]);			    \
+- 			for (i = 1; i < len; i++)	    \
+- 			{				    \
+- 			    EMIT(buf[i]);		    \
+- 			    EMIT(NFA_CONCAT);		    \
+- 			}				    \
+- 			EMIT(NFA_MULTIBYTE);
+- 
+- #define EMIT_COMPOSING_UTF(input)			    \
+- 			len = utfc_ptr2len(input);	    \
+- 			EMIT(input[0]);			    \
+- 			for (i = 1; i < len; i++)	    \
+- 			{				    \
+- 			    EMIT(input[i]);		    \
+- 			    EMIT(NFA_CONCAT);		    \
+- 			}				    \
+- 			EMIT(NFA_COMPOSING);
+- 
+  /*
+   * Initialize internal variables before NFA compilation.
+   * Return OK on success, FAIL otherwise.
+--- 192,197 ----
+***************
+*** 611,618 ****
+  #ifdef FEAT_MBYTE
+      char_u	*old_regparse = regparse;
+      int		clen;
+-     int		len;
+-     static char_u	buf[30];
+      int		i;
+  #endif
+      int		extra = 0;
+--- 588,593 ----
+***************
+*** 845,858 ****
+  		    return FAIL;
+  
+  		    c = coll_get_char();
+! #ifdef FEAT_MBYTE
+! 		    if ((*mb_char2len)(c) > 1)
+! 		    {
+! 			EMIT_MBYTE(c);
+! 		    }
+! 		    else
+! #endif
+! 			EMIT(c);
+  		    break;
+  
+  		/* Catch \%^ and \%$ regardless of where they appear in the
+--- 820,826 ----
+  		    return FAIL;
+  
+  		    c = coll_get_char();
+! 		    EMIT(c);
+  		    break;
+  
+  		/* Catch \%^ and \%$ regardless of where they appear in the
+***************
+*** 1135,1146 ****
+  			     * skip it. */
+  			    for (c = startc + 1; c <= endc; c++)
+  			    {
+! 				if ((*mb_char2len)(c) > 1)
+! 				{
+! 				    EMIT_MBYTE(c);
+! 				}
+! 				else
+! 				    EMIT(c);
+  				TRY_NEG();
+  				EMIT_GLUE();
+  			    }
+--- 1103,1109 ----
+  			     * skip it. */
+  			    for (c = startc + 1; c <= endc; c++)
+  			    {
+! 				EMIT(c);
+  				TRY_NEG();
+  				EMIT_GLUE();
+  			    }
+***************
+*** 1187,1200 ****
+  			if (got_coll_char == TRUE && startc == 0)
+  			    EMIT(0x0a);
+  			else
+! #ifdef FEAT_MBYTE
+! 			    if ((*mb_char2len)(startc) > 1)
+! 			    {
+! 				EMIT_MBYTE(startc);
+! 			    }
+! 			    else
+! #endif
+! 				EMIT(startc);
+  			TRY_NEG();
+  			EMIT_GLUE();
+  		    }
+--- 1150,1156 ----
+  			if (got_coll_char == TRUE && startc == 0)
+  			    EMIT(0x0a);
+  			else
+! 			    EMIT(startc);
+  			TRY_NEG();
+  			EMIT_GLUE();
+  		    }
+***************
+*** 1242,1271 ****
+  		int	plen;
+  
+  nfa_do_multibyte:
+! 		/* length of current char, with composing chars,
+! 		 * from pointer */
+! 		plen = (*mb_ptr2len)(old_regparse);
+! 		if (enc_utf8 && clen != plen)
+! 		{
+! 		    /* A composing character is always handled as a
+! 		     * separate atom, surrounded by NFA_COMPOSING and
+! 		     * NFA_END_COMPOSING. Note that right now we are
+  		     * building the postfix form, not the NFA itself;
+  		     * a composing char could be: a, b, c, NFA_COMPOSING
+! 		     * where 'a', 'b', 'c' are chars with codes > 256.
+! 		     */
+! 		    EMIT_COMPOSING_UTF(old_regparse);
+  		    regparse = old_regparse + plen;
+  		}
+  		else
+- 		    /* A multi-byte character is always handled as a
+- 		     * separate atom, surrounded by NFA_MULTIBYTE and
+- 		     * NFA_END_MULTIBYTE */
+- 		    if (plen > 1)
+- 		    {
+- 			EMIT_MBYTE(c);
+- 		    }
+- 		    else
+  #endif
+  		{
+  		    c = no_Magic(c);
+--- 1198,1227 ----
+  		int	plen;
+  
+  nfa_do_multibyte:
+! 		/* Length of current char with composing chars. */
+! 		if (enc_utf8 && clen != (plen = (*mb_ptr2len)(old_regparse)))
+! 		{
+! 		    /* A base character plus composing characters.
+! 		     * This requires creating a separate atom as if enclosing
+! 		     * the characters in (), where NFA_COMPOSING is the ( and
+! 		     * NFA_END_COMPOSING is the ). Note that right now we are
+  		     * building the postfix form, not the NFA itself;
+  		     * a composing char could be: a, b, c, NFA_COMPOSING
+! 		     * where 'b' and 'c' are chars with codes > 256. */
+! 		    i = 0;
+! 		    for (;;)
+! 		    {
+! 			EMIT(c);
+! 			if (i > 0)
+! 			    EMIT(NFA_CONCAT);
+! 			if (i += utf_char2len(c) >= plen)
+! 			    break;
+! 			c = utf_ptr2char(old_regparse + i);
+! 		    }
+! 		    EMIT(NFA_COMPOSING);
+  		    regparse = old_regparse + plen;
+  		}
+  		else
+  #endif
+  		{
+  		    c = no_Magic(c);
+***************
+*** 1702,1710 ****
+  	case NFA_START_INVISIBLE:   STRCPY(code, "NFA_START_INVISIBLE"); break;
+  	case NFA_END_INVISIBLE:	    STRCPY(code, "NFA_END_INVISIBLE"); break;
+  
+- 	case NFA_MULTIBYTE:	    STRCPY(code, "NFA_MULTIBYTE"); break;
+- 	case NFA_END_MULTIBYTE:	    STRCPY(code, "NFA_END_MULTIBYTE"); break;
+- 
+  	case NFA_COMPOSING:	    STRCPY(code, "NFA_COMPOSING"); break;
+  	case NFA_END_COMPOSING:	    STRCPY(code, "NFA_END_COMPOSING"); break;
+  
+--- 1658,1663 ----
+***************
+*** 2194,2200 ****
+  	    }
+  	    e1 = POP();
+  	    e1.start->negated = TRUE;
+! 	    if (e1.start->c == NFA_MULTIBYTE || e1.start->c == NFA_COMPOSING)
+  		e1.start->out1->negated = TRUE;
+  	    PUSH(e1);
+  	    break;
+--- 2147,2153 ----
+  	    }
+  	    e1 = POP();
+  	    e1.start->negated = TRUE;
+! 	    if (e1.start->c == NFA_COMPOSING)
+  		e1.start->out1->negated = TRUE;
+  	    PUSH(e1);
+  	    break;
+***************
+*** 2311,2316 ****
+--- 2264,2279 ----
+  	    PUSH(frag(s, list1(&s1->out)));
+  	    break;
+  
++ 	case NFA_COMPOSING:	/* char with composing char */
++ #if 0
++ 	    /* TODO */
++ 	    if (regflags & RF_ICOMBINE)
++ 	    {
++ 		goto normalchar;
++ 	    }
++ #endif
++ 	    /* FALLTHROUGH */
++ 
+  	case NFA_MOPEN + 0:	/* Submatch */
+  	case NFA_MOPEN + 1:
+  	case NFA_MOPEN + 2:
+***************
+*** 2322,2329 ****
+  	case NFA_MOPEN + 8:
+  	case NFA_MOPEN + 9:
+  	case NFA_NOPEN:		/* \%( "Invisible Submatch" */
+- 	case NFA_MULTIBYTE:	/* mbyte char */
+- 	case NFA_COMPOSING:	/* composing char */
+  	    if (nfa_calc_size == TRUE)
+  	    {
+  		nstate += 2;
+--- 2285,2290 ----
+***************
+*** 2336,2344 ****
+  		case NFA_NOPEN:
+  		    mclose = NFA_NCLOSE;
+  		    break;
+- 		case NFA_MULTIBYTE:
+- 		    mclose = NFA_END_MULTIBYTE;
+- 		    break;
+  		case NFA_COMPOSING:
+  		    mclose = NFA_END_COMPOSING;
+  		    break;
+--- 2297,2302 ----
+***************
+*** 2377,2385 ****
+  		goto theend;
+  	    patch(e.out, s1);
+  
+! 	    if (mopen == NFA_MULTIBYTE || mopen == NFA_COMPOSING)
+! 		/* MULTIBYTE->out1 = END_MULTIBYTE
+! 		* COMPOSING->out1 = END_COMPOSING */
+  		patch(list1(&s->out1), s1);
+  
+  	    PUSH(frag(s, list1(&s1->out)));
+--- 2335,2342 ----
+  		goto theend;
+  	    patch(e.out, s1);
+  
+! 	    if (mopen == NFA_COMPOSING)
+! 		/* COMPOSING->out1 = END_COMPOSING */
+  		patch(list1(&s->out1), s1);
+  
+  	    PUSH(frag(s, list1(&s1->out)));
+***************
+*** 2540,2556 ****
+  	case NFA_COMPOSING:
+  	    /* nfa_regmatch() will match all the bytes of this composing char. */
+  	    break;
+- 
+- 	case NFA_MULTIBYTE:
+- 	    /* nfa_regmatch() will match all the bytes of this multibyte char. */
+- 	    break;
+  #endif
+  
+- 	case NFA_END_MULTIBYTE:
+- 	    /* Successfully matched this mbyte char */
+- 	    addstate(l, state->out, m, off, lid, match);
+- 	    break;
+- 
+  	case NFA_NOPEN:
+  	case NFA_NCLOSE:
+  	    addstate(l, state->out, m, off, lid, match);
+--- 2497,2504 ----
+***************
+*** 2841,2847 ****
+      regsub_T		*submatch;
+      regsub_T		*m;
+  {
+!     int		c = -1;
+      int		n;
+      int		i = 0;
+      int		result;
+--- 2789,2795 ----
+      regsub_T		*submatch;
+      regsub_T		*m;
+  {
+!     int		c;
+      int		n;
+      int		i = 0;
+      int		result;
+***************
+*** 2859,2865 ****
+      List	*listtbl[2][2];
+      List	*ll;
+      int		listid = 1;
+-     int		endnode;
+      List	*thislist;
+      List	*nextlist;
+      List	*neglist;
+--- 2807,2812 ----
+***************
+*** 3190,3222 ****
+  		break;
+  	    }
+  
+! 	    case NFA_MULTIBYTE:
+  	    case NFA_COMPOSING:
+! 	        endnode = t->state->c + 1;
+  		result = OK;
+  		sta = t->state->out;
+! 		len = 1;
+! 		while (sta->c != endnode && len <= n)
+  		{
+! 		    if (reginput[len-1] != sta->c)
+! 		    {
+! 			result = FAIL;
+  			break;
+! 		    }
+! 		    len++;
+  		    sta = sta->out;
+  		}
+  
+  		/* if input char length doesn't match regexp char length */
+! 		if (len -1 < n || sta->c != endnode)
+  		    result = FAIL;
+! 		end = t->state->out1;	    /* NFA_END_MULTIBYTE or
+! 					       NFA_END_COMPOSING */
+  		/* If \Z was present, then ignore composing characters */
+! 		if (ireg_icombine && endnode == NFA_END_COMPOSING)
+  		    result = 1 ^ sta->negated;
+  		ADD_POS_NEG_STATE(end);
+  		break;
+  
+  	    case NFA_NEWL:
+  		if (!reg_line_lbr && REG_MULTI
+--- 3137,3171 ----
+  		break;
+  	    }
+  
+! #ifdef FEAT_MBYTE
+  	    case NFA_COMPOSING:
+! 	    {
+! 		int mc = c;
+! 
+  		result = OK;
+  		sta = t->state->out;
+! 		len = 0;
+! 		while (sta->c != NFA_END_COMPOSING && len < n)
+  		{
+! 		    if (len > 0)
+! 			mc = mb_ptr2char(reginput + len);
+! 		    if (mc != sta->c)
+  			break;
+! 		    len += mb_char2len(mc);
+  		    sta = sta->out;
+  		}
+  
+  		/* if input char length doesn't match regexp char length */
+! 		if (len < n || sta->c != NFA_END_COMPOSING)
+  		    result = FAIL;
+! 		end = t->state->out1;	    /* NFA_END_COMPOSING */
+  		/* If \Z was present, then ignore composing characters */
+! 		if (ireg_icombine)
+  		    result = 1 ^ sta->negated;
+  		ADD_POS_NEG_STATE(end);
+  		break;
++ 	    }
++ #endif
+  
+  	    case NFA_NEWL:
+  		if (!reg_line_lbr && REG_MULTI
+***************
+*** 3425,3430 ****
+--- 3374,3387 ----
+  		if (!result)
+  		    result = ireg_ic == TRUE
+  				&& MB_TOLOWER(t->state->c) == MB_TOLOWER(c);
++ #ifdef FEAT_MBYTE
++ 		/* If there is a composing character which is not being
++ 		 * ignored there can be no match. Match with composing
++ 		 * character uses NFA_COMPOSING above. */
++ 		if (result && enc_utf8 && !ireg_icombine
++ 						      && n != utf_char2len(c))
++ 		    result = FALSE;
++ #endif
+  		ADD_POS_NEG_STATE(t->state);
+  		break;
+  	    }
+*** ../vim-7.3.1010/src/testdir/test95.in	2013-05-24 20:25:28.000000000 +0200
+--- src/testdir/test95.in	2013-05-24 20:45:08.000000000 +0200
+***************
+*** 35,40 ****
+--- 35,44 ----
+  :call add(tl, ['\f\+', '&*Ÿfname ', 'fname'])
+  :call add(tl, ['\%#=1\f\+', '&*Ÿfname ', 'fname'])
+  
++ :"""" Test composing character matching
++ :call add(tl, ['.ม', 'xม่x yมy', 'yม'])
++ :call add(tl, ['.ม่', 'xม่x yมy', 'xม่'])
++ 
+  :"""" Test \Z
+  :call add(tl, ['ú\Z', 'x'])
+  
+*** ../vim-7.3.1010/src/testdir/test95.ok	2013-05-24 20:25:28.000000000 +0200
+--- src/testdir/test95.ok	2013-05-24 20:44:41.000000000 +0200
+***************
+*** 9,13 ****
+--- 9,15 ----
+  OK - \%#=1\i\+
+  OK - \f\+
+  OK - \%#=1\f\+
++ OK - .ม
++ OK - .ม่
+  OK - ú\Z
+  OK - [^[=a=]]\+
+*** ../vim-7.3.1010/src/version.c	2013-05-24 20:25:28.000000000 +0200
+--- src/version.c	2013-05-24 21:56:02.000000000 +0200
+***************
+*** 730,731 ****
+--- 730,733 ----
+  {   /* Add new patch number below this line */
++ /**/
++     1011,
+  /**/
+
+-- 
+If you had to identify, in one word, the reason why the
+human race has not achieved, and never will achieve, its
+full potential, that word would be "meetings."
+
+ /// Bram Moolenaar -- Bram at Moolenaar.net -- http://www.Moolenaar.net   \\\
+///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
+\\\  an exciting new programming language -- http://www.Zimbu.org        ///
+ \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///


More information about the scm-commits mailing list