[vim] - patchlevel 1137

Karsten Hopp karsten at fedoraproject.org
Thu Jun 13 22:32:23 UTC 2013


commit 5e4d4a98031aeb9fc08597d3f2a60818ee5feccf
Author: Karsten Hopp <karsten at redhat.com>
Date:   Fri Jun 14 00:28:22 2013 +0200

    - patchlevel 1137

 7.3.1137 |  966 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 966 insertions(+), 0 deletions(-)
---
diff --git a/7.3.1137 b/7.3.1137
new file mode 100644
index 0000000..fb26417
--- /dev/null
+++ b/7.3.1137
@@ -0,0 +1,966 @@
+To: vim_dev at googlegroups.com
+Subject: Patch 7.3.1137
+Fcc: outbox
+From: Bram Moolenaar <Bram at moolenaar.net>
+Mime-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+------------
+
+Patch 7.3.1137
+Problem:    New regexp engine: collections are slow.
+Solution:   Handle all characters in one go.
+Files:	    src/regexp_nfa.c
+
+
+*** ../vim-7.3.1136/src/regexp_nfa.c	2013-06-06 18:46:00.000000000 +0200
+--- src/regexp_nfa.c	2013-06-07 13:40:58.000000000 +0200
+***************
+*** 34,48 ****
+      NFA_SPLIT = -1024,
+      NFA_MATCH,
+      NFA_SKIP_CHAR,		    /* matches a 0-length char */
+-     NFA_END_NEG_RANGE,		    /* Used when expanding [^ab] */
+  
+!     NFA_CONCAT,
+      NFA_OR,
+      NFA_STAR,			    /* greedy * */
+      NFA_STAR_NONGREEDY,		    /* non-greedy * */
+      NFA_QUEST,			    /* greedy \? */
+      NFA_QUEST_NONGREEDY,	    /* non-greedy \? */
+-     NFA_NOT,			    /* used for [^ab] negated char ranges */
+  
+      NFA_BOL,			    /* ^    Begin line */
+      NFA_EOL,			    /* $    End line */
+--- 34,56 ----
+      NFA_SPLIT = -1024,
+      NFA_MATCH,
+      NFA_SKIP_CHAR,		    /* matches a 0-length char */
+  
+!     NFA_START_COLL,		    /* [abc] start */
+!     NFA_END_COLL,		    /* [abc] end */
+!     NFA_START_NEG_COLL,		    /* [^abc] start */
+!     NFA_END_NEG_COLL,		    /* [^abc] end (only used in postfix) */
+!     NFA_RANGE,			    /* range of the two previous items (only
+! 				     * used in postfix) */
+!     NFA_RANGE_MIN,		    /* low end of a range  */
+!     NFA_RANGE_MAX,		    /* high end of a range  */
+! 
+!     NFA_CONCAT,			    /* concatenate two previous items (only
+! 				     * used in postfix) */
+      NFA_OR,
+      NFA_STAR,			    /* greedy * */
+      NFA_STAR_NONGREEDY,		    /* non-greedy * */
+      NFA_QUEST,			    /* greedy \? */
+      NFA_QUEST_NONGREEDY,	    /* non-greedy \? */
+  
+      NFA_BOL,			    /* ^    Begin line */
+      NFA_EOL,			    /* $    End line */
+***************
+*** 260,266 ****
+  static int nfa_get_reganch __ARGS((nfa_state_T *start, int depth));
+  static int nfa_get_regstart __ARGS((nfa_state_T *start, int depth));
+  static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl));
+! static int nfa_emit_equi_class __ARGS((int c, int neg));
+  static int nfa_regatom __ARGS((void));
+  static int nfa_regpiece __ARGS((void));
+  static int nfa_regconcat __ARGS((void));
+--- 268,274 ----
+  static int nfa_get_reganch __ARGS((nfa_state_T *start, int depth));
+  static int nfa_get_regstart __ARGS((nfa_state_T *start, int depth));
+  static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl));
+! static int nfa_emit_equi_class __ARGS((int c));
+  static int nfa_regatom __ARGS((void));
+  static int nfa_regpiece __ARGS((void));
+  static int nfa_regconcat __ARGS((void));
+***************
+*** 664,684 ****
+   * NOTE! When changing this function, also update reg_equi_class()
+   */
+      static int
+! nfa_emit_equi_class(c, neg)
+      int	    c;
+-     int	    neg;
+  {
+!     int	first = TRUE;
+!     int	glue = neg == TRUE ? NFA_CONCAT : NFA_OR;
+! #define EMIT2(c)		\
+! 	EMIT(c);		\
+! 	if (neg == TRUE) {	\
+! 	    EMIT(NFA_NOT);	\
+! 	}			\
+! 	if (first == FALSE)	\
+! 	    EMIT(glue);		\
+! 	else			\
+! 	    first = FALSE;	\
+  
+  #ifdef FEAT_MBYTE
+      if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
+--- 672,681 ----
+   * NOTE! When changing this function, also update reg_equi_class()
+   */
+      static int
+! nfa_emit_equi_class(c)
+      int	    c;
+  {
+! #define EMIT2(c)   EMIT(c); EMIT(NFA_CONCAT);
+  
+  #ifdef FEAT_MBYTE
+      if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
+***************
+*** 687,770 ****
+      {
+  	switch (c)
+  	{
+! 	    case 'A': case '\300': case '\301': case '\302':
+! 	    case '\303': case '\304': case '\305':
+! 		    EMIT2('A');	    EMIT2('\300');  EMIT2('\301');
+! 		    EMIT2('\302');  EMIT2('\303');  EMIT2('\304');
+! 		    EMIT2('\305');
+  		    return OK;
+  
+! 	    case 'C': case '\307':
+! 		    EMIT2('C');	    EMIT2('\307');
+  		    return OK;
+  
+! 	    case 'E': case '\310': case '\311': case '\312': case '\313':
+! 		    EMIT2('E');	    EMIT2('\310');  EMIT2('\311');
+! 		    EMIT2('\312');  EMIT2('\313');
+  		    return OK;
+  
+! 	    case 'I': case '\314': case '\315': case '\316': case '\317':
+! 		    EMIT2('I');	    EMIT2('\314');  EMIT2('\315');
+! 		    EMIT2('\316');  EMIT2('\317');
+  		    return OK;
+  
+! 	    case 'N': case '\321':
+! 		    EMIT2('N');	    EMIT2('\321');
+  		    return OK;
+  
+! 	    case 'O': case '\322': case '\323': case '\324': case '\325':
+! 	    case '\326':
+! 		    EMIT2('O');	    EMIT2('\322');  EMIT2('\323');
+! 		    EMIT2('\324');  EMIT2('\325');  EMIT2('\326');
+  		    return OK;
+  
+! 	    case 'U': case '\331': case '\332': case '\333': case '\334':
+! 		    EMIT2('U');	    EMIT2('\331');  EMIT2('\332');
+! 		    EMIT2('\333');  EMIT2('\334');
+  		    return OK;
+  
+! 	    case 'Y': case '\335':
+! 		    EMIT2('Y');	    EMIT2('\335');
+  		    return OK;
+  
+! 	    case 'a': case '\340': case '\341': case '\342':
+! 	    case '\343': case '\344': case '\345':
+! 		    EMIT2('a');	    EMIT2('\340');  EMIT2('\341');
+! 		    EMIT2('\342');  EMIT2('\343');  EMIT2('\344');
+! 		    EMIT2('\345');
+  		    return OK;
+  
+! 	    case 'c': case '\347':
+! 		    EMIT2('c');	    EMIT2('\347');
+  		    return OK;
+  
+! 	    case 'e': case '\350': case '\351': case '\352': case '\353':
+! 		    EMIT2('e');	    EMIT2('\350');  EMIT2('\351');
+! 		    EMIT2('\352');  EMIT2('\353');
+  		    return OK;
+  
+! 	    case 'i': case '\354': case '\355': case '\356': case '\357':
+! 		    EMIT2('i');	    EMIT2('\354');  EMIT2('\355');
+! 		    EMIT2('\356');  EMIT2('\357');
+  		    return OK;
+  
+! 	    case 'n': case '\361':
+! 		    EMIT2('n');	    EMIT2('\361');
+  		    return OK;
+  
+! 	    case 'o': case '\362': case '\363': case '\364': case '\365':
+! 	    case '\366':
+! 		    EMIT2('o');	    EMIT2('\362');  EMIT2('\363');
+! 		    EMIT2('\364');  EMIT2('\365');  EMIT2('\366');
+  		    return OK;
+  
+! 	    case 'u': case '\371': case '\372': case '\373': case '\374':
+! 		    EMIT2('u');	    EMIT2('\371');  EMIT2('\372');
+! 		    EMIT2('\373');  EMIT2('\374');
+  		    return OK;
+  
+! 	    case 'y': case '\375': case '\377':
+! 		    EMIT2('y');	    EMIT2('\375');  EMIT2('\377');
+  		    return OK;
+  
+  	    default:
+--- 684,767 ----
+      {
+  	switch (c)
+  	{
+! 	    case 'A': case 0300: case 0301: case 0302:
+! 	    case 0303: case 0304: case 0305:
+! 		    EMIT2('A');	    EMIT2(0300);  EMIT2(0301);
+! 		    EMIT2(0302);  EMIT2(0303);  EMIT2(0304);
+! 		    EMIT2(0305);
+  		    return OK;
+  
+! 	    case 'C': case 0307:
+! 		    EMIT2('C');	    EMIT2(0307);
+  		    return OK;
+  
+! 	    case 'E': case 0310: case 0311: case 0312: case 0313:
+! 		    EMIT2('E');	    EMIT2(0310);  EMIT2(0311);
+! 		    EMIT2(0312);  EMIT2(0313);
+  		    return OK;
+  
+! 	    case 'I': case 0314: case 0315: case 0316: case 0317:
+! 		    EMIT2('I');	    EMIT2(0314);  EMIT2(0315);
+! 		    EMIT2(0316);  EMIT2(0317);
+  		    return OK;
+  
+! 	    case 'N': case 0321:
+! 		    EMIT2('N');	    EMIT2(0321);
+  		    return OK;
+  
+! 	    case 'O': case 0322: case 0323: case 0324: case 0325:
+! 	    case 0326:
+! 		    EMIT2('O');	    EMIT2(0322);  EMIT2(0323);
+! 		    EMIT2(0324);  EMIT2(0325);  EMIT2(0326);
+  		    return OK;
+  
+! 	    case 'U': case 0331: case 0332: case 0333: case 0334:
+! 		    EMIT2('U');	    EMIT2(0331);  EMIT2(0332);
+! 		    EMIT2(0333);  EMIT2(0334);
+  		    return OK;
+  
+! 	    case 'Y': case 0335:
+! 		    EMIT2('Y');	    EMIT2(0335);
+  		    return OK;
+  
+! 	    case 'a': case 0340: case 0341: case 0342:
+! 	    case 0343: case 0344: case 0345:
+! 		    EMIT2('a');	    EMIT2(0340);  EMIT2(0341);
+! 		    EMIT2(0342);  EMIT2(0343);  EMIT2(0344);
+! 		    EMIT2(0345);
+  		    return OK;
+  
+! 	    case 'c': case 0347:
+! 		    EMIT2('c');	    EMIT2(0347);
+  		    return OK;
+  
+! 	    case 'e': case 0350: case 0351: case 0352: case 0353:
+! 		    EMIT2('e');	    EMIT2(0350);  EMIT2(0351);
+! 		    EMIT2(0352);  EMIT2(0353);
+  		    return OK;
+  
+! 	    case 'i': case 0354: case 0355: case 0356: case 0357:
+! 		    EMIT2('i');	    EMIT2(0354);  EMIT2(0355);
+! 		    EMIT2(0356);  EMIT2(0357);
+  		    return OK;
+  
+! 	    case 'n': case 0361:
+! 		    EMIT2('n');	    EMIT2(0361);
+  		    return OK;
+  
+! 	    case 'o': case 0362: case 0363: case 0364: case 0365:
+! 	    case 0366:
+! 		    EMIT2('o');	    EMIT2(0362);  EMIT2(0363);
+! 		    EMIT2(0364);  EMIT2(0365);  EMIT2(0366);
+  		    return OK;
+  
+! 	    case 'u': case 0371: case 0372: case 0373: case 0374:
+! 		    EMIT2('u');	    EMIT2(0371);  EMIT2(0372);
+! 		    EMIT2(0373);  EMIT2(0374);
+  		    return OK;
+  
+! 	    case 'y': case 0375: case 0377:
+! 		    EMIT2('y');	    EMIT2(0375);  EMIT2(0377);
+  		    return OK;
+  
+  	    default:
+***************
+*** 811,824 ****
+      char_u	*old_regparse = regparse;
+  #endif
+      int		extra = 0;
+-     int		first;
+      int		emit_range;
+      int		negated;
+      int		result;
+      int		startc = -1;
+      int		endc = -1;
+      int		oldstartc = -1;
+-     int		glue;		/* ID that will "glue" nodes together */
+  
+      c = getchr();
+      switch (c)
+--- 808,819 ----
+***************
+*** 927,934 ****
+  
+  	case Magic('n'):
+  	    if (reg_string)
+! 	    /* In a string "\n" matches a newline character. */
+! 	    EMIT(NL);
+  	    else
+  	    {
+  		/* In buffer text "\n" matches the end of a line. */
+--- 922,929 ----
+  
+  	case Magic('n'):
+  	    if (reg_string)
+! 		/* In a string "\n" matches a newline character. */
+! 		EMIT(NL);
+  	    else
+  	    {
+  		/* In buffer text "\n" matches the end of a line. */
+***************
+*** 1160,1191 ****
+  	case Magic('['):
+  collection:
+  	    /*
+! 	     * Glue is emitted between several atoms from the [].
+! 	     * It is either NFA_OR, or NFA_CONCAT.
+! 	     *
+! 	     * [abc] expands to 'a b NFA_OR c NFA_OR' (in postfix notation)
+! 	     * [^abc] expands to 'a NFA_NOT b NFA_NOT NFA_CONCAT c NFA_NOT
+! 	     *		NFA_CONCAT NFA_END_NEG_RANGE NFA_CONCAT' (in postfix
+! 	     *		notation)
+! 	     *
+  	     */
+  
+- 
+- /* Emit negation atoms, if needed.
+-  * The CONCAT below merges the NOT with the previous node. */
+- #define TRY_NEG()		    \
+- 	    if (negated == TRUE)    \
+- 	    {			    \
+- 		EMIT(NFA_NOT);	    \
+- 	    }
+- 
+- /* Emit glue between important nodes : CONCAT or OR. */
+- #define EMIT_GLUE()		    \
+- 	    if (first == FALSE)	    \
+- 		EMIT(glue);	    \
+- 	    else		    \
+- 		first = FALSE;
+- 
+  	    p = regparse;
+  	    endp = skip_anyof(p);
+  	    if (*endp == ']')
+--- 1155,1169 ----
+  	case Magic('['):
+  collection:
+  	    /*
+! 	     * [abc]  uses NFA_START_COLL - NFA_END_COLL
+! 	     * [^abc] uses NFA_START_NEG_COLL - NFA_END_NEG_COLL
+! 	     * Each character is produced as a regular state, using
+! 	     * NFA_CONCAT to bind them together.
+! 	     * Besides normal characters there can be:
+! 	     * - character classes  NFA_CLASS_*
+! 	     * - ranges, two characters followed by NFA_RANGE.
+  	     */
+  
+  	    p = regparse;
+  	    endp = skip_anyof(p);
+  	    if (*endp == ']')
+***************
+*** 1216,1236 ****
+  		 * version that turns [abc] into 'a' OR 'b' OR 'c'
+  		 */
+  		startc = endc = oldstartc = -1;
+- 		first = TRUE;	    /* Emitting first atom in this sequence? */
+  		negated = FALSE;
+- 		glue = NFA_OR;
+  		if (*regparse == '^')			/* negated range */
+  		{
+  		    negated = TRUE;
+- 		    glue = NFA_CONCAT;
+  		    mb_ptr_adv(regparse);
+  		}
+  		if (*regparse == '-')
+  		{
+  		    startc = '-';
+  		    EMIT(startc);
+! 		    TRY_NEG();
+! 		    EMIT_GLUE();
+  		    mb_ptr_adv(regparse);
+  		}
+  		/* Emit the OR branches for each character in the [] */
+--- 1194,1213 ----
+  		 * version that turns [abc] into 'a' OR 'b' OR 'c'
+  		 */
+  		startc = endc = oldstartc = -1;
+  		negated = FALSE;
+  		if (*regparse == '^')			/* negated range */
+  		{
+  		    negated = TRUE;
+  		    mb_ptr_adv(regparse);
++ 		    EMIT(NFA_START_NEG_COLL);
+  		}
++ 		else
++ 		    EMIT(NFA_START_COLL);
+  		if (*regparse == '-')
+  		{
+  		    startc = '-';
+  		    EMIT(startc);
+! 		    EMIT(NFA_CONCAT);
+  		    mb_ptr_adv(regparse);
+  		}
+  		/* Emit the OR branches for each character in the [] */
+***************
+*** 1306,1325 ****
+  				    EMIT(NFA_CLASS_ESCAPE);
+  				    break;
+  			    }
+! 			    TRY_NEG();
+! 			    EMIT_GLUE();
+  			    continue;
+  			}
+  			/* Try equivalence class [=a=] and the like */
+  			if (equiclass != 0)
+  			{
+! 			    result = nfa_emit_equi_class(equiclass, negated);
+  			    if (result == FAIL)
+  			    {
+  				/* should never happen */
+  				EMSG_RET_FAIL(_("E868: Error building NFA with equivalence class!"));
+  			    }
+- 			    EMIT_GLUE();
+  			    continue;
+  			}
+  			/* Try collating class like [. .]  */
+--- 1283,1300 ----
+  				    EMIT(NFA_CLASS_ESCAPE);
+  				    break;
+  			    }
+! 			    EMIT(NFA_CONCAT);
+  			    continue;
+  			}
+  			/* Try equivalence class [=a=] and the like */
+  			if (equiclass != 0)
+  			{
+! 			    result = nfa_emit_equi_class(equiclass);
+  			    if (result == FAIL)
+  			    {
+  				/* should never happen */
+  				EMSG_RET_FAIL(_("E868: Error building NFA with equivalence class!"));
+  			    }
+  			    continue;
+  			}
+  			/* Try collating class like [. .]  */
+***************
+*** 1391,1409 ****
+  			startc = oldstartc;
+  			if (startc > endc)
+  			    EMSG_RET_FAIL(_(e_invrange));
+  #ifdef FEAT_MBYTE
+! 			if (has_mbyte && ((*mb_char2len)(startc) > 1
+  				    || (*mb_char2len)(endc) > 1))
+  			{
+! 			    if (endc > startc + 256)
+! 				EMSG_RET_FAIL(_(e_invrange));
+! 			    /* Emit the range. "startc" was already emitted, so
+! 			     * skip it. */
+  			    for (c = startc + 1; c <= endc; c++)
+  			    {
+  				EMIT(c);
+! 				TRY_NEG();
+! 				EMIT_GLUE();
+  			    }
+  			}
+  			else
+--- 1366,1397 ----
+  			startc = oldstartc;
+  			if (startc > endc)
+  			    EMSG_RET_FAIL(_(e_invrange));
++ 
++ 			if (endc > startc + 2)
++ 			{
++ 			    /* Emit a range instead of the sequence of
++ 			     * individual characters. */
++ 			    if (startc == 0)
++ 				/* \x00 is translated to \x0a, start at \x01. */
++ 				EMIT(1);
++ 			    else
++ 				--post_ptr; /* remove NFA_CONCAT */
++ 			    EMIT(endc);
++ 			    EMIT(NFA_RANGE);
++ 			    EMIT(NFA_CONCAT);
++ 			}
++ 			else
+  #ifdef FEAT_MBYTE
+! 			     if (has_mbyte && ((*mb_char2len)(startc) > 1
+  				    || (*mb_char2len)(endc) > 1))
+  			{
+! 			    /* Emit the characters in the range.
+! 			     * "startc" was already emitted, so skip it.
+! 			     * */
+  			    for (c = startc + 1; c <= endc; c++)
+  			    {
+  				EMIT(c);
+! 				EMIT(NFA_CONCAT);
+  			    }
+  			}
+  			else
+***************
+*** 1425,1432 ****
+  #endif
+  				{
+  				    EMIT(c);
+! 				    TRY_NEG();
+! 				    EMIT_GLUE();
+  				}
+  			}
+  			emit_range = FALSE;
+--- 1413,1419 ----
+  #endif
+  				{
+  				    EMIT(c);
+! 				    EMIT(NFA_CONCAT);
+  				}
+  			}
+  			emit_range = FALSE;
+***************
+*** 1434,1456 ****
+  		    }
+  		    else
+  		    {
+! 			/*
+! 			 * This char (startc) is not part of a range. Just
+  			 * emit it.
+- 			 *
+  			 * Normally, simply emit startc. But if we get char
+  			 * code=0 from a collating char, then replace it with
+  			 * 0x0a.
+- 			 *
+  			 * This is needed to completely mimic the behaviour of
+! 			 * the backtracking engine.
+! 			 */
+! 			if (got_coll_char == TRUE && startc == 0)
+! 			    EMIT(0x0a);
+  			else
+! 			    EMIT(startc);
+! 			TRY_NEG();
+! 			EMIT_GLUE();
+  		    }
+  
+  		    mb_ptr_adv(regparse);
+--- 1421,1449 ----
+  		    }
+  		    else
+  		    {
+! 			/* This char (startc) is not part of a range. Just
+  			 * emit it.
+  			 * Normally, simply emit startc. But if we get char
+  			 * code=0 from a collating char, then replace it with
+  			 * 0x0a.
+  			 * This is needed to completely mimic the behaviour of
+! 			 * the backtracking engine. */
+! 			if (startc == NFA_NEWL)
+! 			{
+! 			    /* Line break can't be matched as part of the
+! 			     * collection, add an OR below. But not for negated
+! 			     * range. */
+! 			    if (!negated)
+! 				extra = ADD_NL;
+! 			}
+  			else
+! 			{
+! 			    if (got_coll_char == TRUE && startc == 0)
+! 				EMIT(0x0a);
+! 			    else
+! 				EMIT(startc);
+! 			    EMIT(NFA_CONCAT);
+! 			}
+  		    }
+  
+  		    mb_ptr_adv(regparse);
+***************
+*** 1460,1479 ****
+  		if (*regparse == '-')	    /* if last, '-' is just a char */
+  		{
+  		    EMIT('-');
+! 		    TRY_NEG();
+! 		    EMIT_GLUE();
+  		}
+  		mb_ptr_adv(regparse);
+  
+  		/* skip the trailing ] */
+  		regparse = endp;
+  		mb_ptr_adv(regparse);
+  		if (negated == TRUE)
+! 		{
+! 		    /* Mark end of negated char range */
+! 		    EMIT(NFA_END_NEG_RANGE);
+! 		    EMIT(NFA_CONCAT);
+! 		}
+  
+  		/* \_[] also matches \n but it's not negated */
+  		if (extra == ADD_NL)
+--- 1453,1471 ----
+  		if (*regparse == '-')	    /* if last, '-' is just a char */
+  		{
+  		    EMIT('-');
+! 		    EMIT(NFA_CONCAT);
+  		}
+  		mb_ptr_adv(regparse);
+  
+  		/* skip the trailing ] */
+  		regparse = endp;
+  		mb_ptr_adv(regparse);
++ 
++ 		/* Mark end of the collection. */
+  		if (negated == TRUE)
+! 		    EMIT(NFA_END_NEG_COLL);
+! 		else
+! 		    EMIT(NFA_END_COLL);
+  
+  		/* \_[] also matches \n but it's not negated */
+  		if (extra == ADD_NL)
+***************
+*** 1532,1540 ****
+  	    }
+      }
+  
+- #undef TRY_NEG
+- #undef EMIT_GLUE
+- 
+      return OK;
+  }
+  
+--- 1524,1529 ----
+***************
+*** 2091,2100 ****
+  	case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break;
+  	case NFA_QUEST:		STRCPY(code, "NFA_QUEST"); break;
+  	case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break;
+- 	case NFA_NOT:		STRCPY(code, "NFA_NOT "); break;
+  	case NFA_SKIP_CHAR:	STRCPY(code, "NFA_SKIP_CHAR"); break;
+  	case NFA_OR:		STRCPY(code, "NFA_OR"); break;
+! 	case NFA_END_NEG_RANGE:	STRCPY(code, "NFA_END_NEG_RANGE"); break;
+  	case NFA_CLASS_ALNUM:	STRCPY(code, "NFA_CLASS_ALNUM"); break;
+  	case NFA_CLASS_ALPHA:	STRCPY(code, "NFA_CLASS_ALPHA"); break;
+  	case NFA_CLASS_BLANK:	STRCPY(code, "NFA_CLASS_BLANK"); break;
+--- 2080,2096 ----
+  	case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break;
+  	case NFA_QUEST:		STRCPY(code, "NFA_QUEST"); break;
+  	case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break;
+  	case NFA_SKIP_CHAR:	STRCPY(code, "NFA_SKIP_CHAR"); break;
+  	case NFA_OR:		STRCPY(code, "NFA_OR"); break;
+! 
+! 	case NFA_START_COLL:	STRCPY(code, "NFA_START_COLL"); break;
+! 	case NFA_END_COLL:	STRCPY(code, "NFA_END_COLL"); break;
+! 	case NFA_START_NEG_COLL: STRCPY(code, "NFA_START_NEG_COLL"); break;
+! 	case NFA_END_NEG_COLL:	STRCPY(code, "NFA_END_NEG_COLL"); break;
+! 	case NFA_RANGE:		STRCPY(code, "NFA_RANGE"); break;
+! 	case NFA_RANGE_MIN:	STRCPY(code, "NFA_RANGE_MIN"); break;
+! 	case NFA_RANGE_MAX:	STRCPY(code, "NFA_RANGE_MAX"); break;
+! 
+  	case NFA_CLASS_ALNUM:	STRCPY(code, "NFA_CLASS_ALNUM"); break;
+  	case NFA_CLASS_ALPHA:	STRCPY(code, "NFA_CLASS_ALPHA"); break;
+  	case NFA_CLASS_BLANK:	STRCPY(code, "NFA_CLASS_BLANK"); break;
+***************
+*** 2231,2238 ****
+  	fprintf(debugf, " %s", p);
+  
+      nfa_set_code(state->c);
+!     fprintf(debugf, "%s%s (%d) (id=%d)\n",
+! 		 state->negated ? "NOT " : "", code, state->c, abs(state->id));
+      if (state->id < 0)
+  	return;
+  
+--- 2227,2238 ----
+  	fprintf(debugf, " %s", p);
+  
+      nfa_set_code(state->c);
+!     fprintf(debugf, "%s%s (%d) (id=%d) val=%d\n",
+! 		 state->negated ? "NOT " : "",
+! 		 code,
+! 		 state->c,
+! 		 abs(state->id),
+! 		 state->val);
+      if (state->id < 0)
+  	return;
+  
+***************
+*** 2325,2330 ****
+--- 2325,2331 ----
+      s->c    = c;
+      s->out  = out;
+      s->out1 = out1;
++     s->val  = 0;
+  
+      s->id   = istate;
+      s->lastlist[0] = 0;
+***************
+*** 2565,2577 ****
+  	switch (*p)
+  	{
+  	case NFA_CONCAT:
+! 	    /* Catenation.
+! 	     * Pay attention: this operator does not exist
+! 	     * in the r.e. itself (it is implicit, really).
+! 	     * It is added when r.e. is translated to postfix
+! 	     * form in re2post().
+! 	     *
+! 	     * No new state added here. */
+  	    if (nfa_calc_size == TRUE)
+  	    {
+  		/* nstate += 0; */
+--- 2566,2575 ----
+  	switch (*p)
+  	{
+  	case NFA_CONCAT:
+! 	    /* Concatenation.
+! 	     * Pay attention: this operator does not exist in the r.e. itself
+! 	     * (it is implicit, really).  It is added when r.e. is translated
+! 	     * to postfix form in re2post(). */
+  	    if (nfa_calc_size == TRUE)
+  	    {
+  		/* nstate += 0; */
+***************
+*** 2583,2604 ****
+  	    PUSH(frag(e1.start, e2.out));
+  	    break;
+  
+- 	case NFA_NOT:
+- 	    /* Negation of a character */
+- 	    if (nfa_calc_size == TRUE)
+- 	    {
+- 		/* nstate += 0; */
+- 		break;
+- 	    }
+- 	    e1 = POP();
+- 	    e1.start->negated = TRUE;
+- #ifdef FEAT_MBYTE
+- 	    if (e1.start->c == NFA_COMPOSING)
+- 		e1.start->out1->negated = TRUE;
+- #endif
+- 	    PUSH(e1);
+- 	    break;
+- 
+  	case NFA_OR:
+  	    /* Alternation */
+  	    if (nfa_calc_size == TRUE)
+--- 2581,2586 ----
+***************
+*** 2672,2677 ****
+--- 2654,2696 ----
+  	    PUSH(frag(s, append(e.out, list1(&s->out))));
+  	    break;
+  
++ 	case NFA_END_COLL:
++ 	case NFA_END_NEG_COLL:
++ 	    /* On the stack is the sequence starting with NFA_START_COLL or
++ 	     * NFA_START_NEG_COLL and all possible characters. Patch it to
++ 	     * add the output to the start. */
++ 	    if (nfa_calc_size == TRUE)
++ 	    {
++ 		nstate++;
++ 		break;
++ 	    }
++ 	    e = POP();
++ 	    s = alloc_state(NFA_END_COLL, NULL, NULL);
++ 	    if (s == NULL)
++ 		goto theend;
++ 	    patch(e.out, s);
++ 	    e.start->out1 = s;
++ 	    PUSH(frag(e.start, list1(&s->out)));
++ 	    break;
++ 
++ 	case NFA_RANGE:
++ 	    /* Before this are two characters, the low and high end of a
++ 	     * range.  Turn them into two states with MIN and MAX. */
++ 	    if (nfa_calc_size == TRUE)
++ 	    {
++ 		/* nstate += 0; */
++ 		break;
++ 	    }
++ 	    e2 = POP();
++ 	    e1 = POP();
++ 	    e2.start->val = e2.start->c;
++ 	    e2.start->c = NFA_RANGE_MAX;
++ 	    e1.start->val = e1.start->c;
++ 	    e1.start->c = NFA_RANGE_MIN;
++ 	    patch(e1.out, e2.start);
++ 	    PUSH(frag(e1.start, e2.out));
++ 	    break;
++ 
+  	case NFA_SKIP_CHAR:
+  	    /* Symbol of 0-length, Used in a repetition
+  	     * with max/min count of 0 */
+***************
+*** 2990,2995 ****
+--- 3009,3016 ----
+      matchstate = &state_ptr[istate++]; /* the match state */
+      matchstate->c = NFA_MATCH;
+      matchstate->out = matchstate->out1 = NULL;
++     matchstate->negated = FALSE;
++     matchstate->id = 0;
+  
+      patch(e.out, matchstate);
+      ret = e.start;
+***************
+*** 3308,3314 ****
+      switch (state->c)
+      {
+  	case NFA_SPLIT:
+- 	case NFA_NOT:
+  	case NFA_NOPEN:
+  	case NFA_SKIP_CHAR:
+  	case NFA_NCLOSE:
+--- 3329,3334 ----
+***************
+*** 3782,3788 ****
+  
+  	default:
+  	    /* should not be here :P */
+! 	    EMSG_RET_FAIL(_("E877: (NFA regexp) Invalid character class "));
+      }
+      return FAIL;
+  }
+--- 3802,3809 ----
+  
+  	default:
+  	    /* should not be here :P */
+! 	    EMSGN("E877: (NFA regexp) Invalid character class: %ld", class);
+! 	    return FAIL;
+      }
+      return FAIL;
+  }
+***************
+*** 4320,4327 ****
+      addstate(thislist, start, m, 0);
+  
+      /* There are two cases when the NFA advances: 1. input char matches the
+!      * NFA node and 2. input char does not match the NFA node, but the next
+!      * node is NFA_NOT. The following macro calls addstate() according to
+       * these rules. It is used A LOT, so use the "listtbl" table for speed */
+      listtbl[0][0] = NULL;
+      listtbl[0][1] = neglist;
+--- 4341,4348 ----
+      addstate(thislist, start, m, 0);
+  
+      /* There are two cases when the NFA advances: 1. input char matches the
+!      * NFA node and 2. input char does not match the NFA node and the state
+!      * has the negated flag. The following macro calls addstate() according to
+       * these rules. It is used A LOT, so use the "listtbl" table for speed */
+      listtbl[0][0] = NULL;
+      listtbl[0][1] = neglist;
+***************
+*** 4845,4860 ****
+  		ADD_POS_NEG_STATE(t->state);
+  		break;
+  
+! 	    case NFA_END_NEG_RANGE:
+! 		/* This follows a series of negated nodes, like:
+! 		 * NOT CHAR(x), NOT CHAR(y), etc. */
+! 		if (curc > 0)
+  		{
+  		    ll = nextlist;
+! 		    add_state = t->state->out;
+  		    add_off = clen;
+  		}
+  		break;
+  
+  	    case NFA_ANY:
+  		/* Any char except '\0', (end of input) does not match. */
+--- 4866,4944 ----
+  		ADD_POS_NEG_STATE(t->state);
+  		break;
+  
+! 	    case NFA_START_COLL:
+! 	    case NFA_START_NEG_COLL:
+! 	      {
+! 		/* What follows is a list of characters, until NFA_END_COLL.
+! 		 * One of them must match or none of them must match. */
+! 		nfa_state_T	*state;
+! 		int		result_if_matched;
+! 		int		c1, c2;
+! 
+! 		/* Never match EOL. If it's part of the collection it is added
+! 		 * as a separate state with an OR. */
+! 		if (curc == NUL)
+! 		    break;
+! 
+! 		state = t->state->out;
+! 		result_if_matched = (t->state->c == NFA_START_COLL);
+! 		for (;;)
+  		{
++ 		    if (state->c == NFA_END_COLL)
++ 		    {
++ 			result = !result_if_matched;
++ 			break;
++ 		    }
++ 		    if (state->c == NFA_RANGE_MIN)
++ 		    {
++ 			c1 = state->val;
++ 			state = state->out; /* advance to NFA_RANGE_MAX */
++ 			c2 = state->val;
++ #ifdef ENABLE_LOG
++ 			fprintf(log_fd, "NFA_RANGE_MIN curc=%d c1=%d c2=%d\n",
++ 				curc, c1, c2);
++ #endif
++ 			if (curc >= c1 && curc <= c2)
++ 			{
++ 			    result = result_if_matched;
++ 			    break;
++ 			}
++ 			if (ireg_ic)
++ 			{
++ 			    int curc_low = MB_TOLOWER(curc);
++ 			    int done = FALSE;
++ 
++ 			    for ( ; c1 <= c2; ++c1)
++ 				if (MB_TOLOWER(c1) == curc_low)
++ 				{
++ 				    result = result_if_matched;
++ 				    done = TRUE;
++ 				    break;
++ 				}
++ 			    if (done)
++ 				break;
++ 			}
++ 		    }
++ 		    else if (state->c < 0 ? check_char_class(state->c, curc)
++ 			        : (curc == state->c
++ 				   || (ireg_ic && MB_TOLOWER(curc)
++ 						    == MB_TOLOWER(state->c))))
++ 		    {
++ 			result = result_if_matched;
++ 			break;
++ 		    }
++ 		    state = state->out;
++ 		}
++ 		if (result)
++ 		{
++ 		    /* next state is in out of the NFA_END_COLL, out1 of
++ 		     * START points to the END state */
+  		    ll = nextlist;
+! 		    add_state = t->state->out1->out;
+  		    add_off = clen;
+  		}
+  		break;
++ 	      }
+  
+  	    case NFA_ANY:
+  		/* Any char except '\0', (end of input) does not match. */
+*** ../vim-7.3.1136/src/version.c	2013-06-06 21:31:02.000000000 +0200
+--- src/version.c	2013-06-07 13:21:57.000000000 +0200
+***************
+*** 730,731 ****
+--- 730,733 ----
+  {   /* Add new patch number below this line */
++ /**/
++     1137,
+  /**/
+
+-- 
+From "know your smileys":
+ :.-(	Crying
+
+ /// Bram Moolenaar -- Bram at Moolenaar.net -- http://www.Moolenaar.net   \\\
+///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
+\\\  an exciting new programming language -- http://www.Zimbu.org        ///
+ \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///


More information about the scm-commits mailing list