[vim] - patchlevel 1137
Karsten Hopp
karsten at fedoraproject.org
Thu Jun 13 22:32:23 UTC 2013
commit 5e4d4a98031aeb9fc08597d3f2a60818ee5feccf
Author: Karsten Hopp <karsten at redhat.com>
Date: Fri Jun 14 00:28:22 2013 +0200
- patchlevel 1137
7.3.1137 | 966 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 966 insertions(+), 0 deletions(-)
---
diff --git a/7.3.1137 b/7.3.1137
new file mode 100644
index 0000000..fb26417
--- /dev/null
+++ b/7.3.1137
@@ -0,0 +1,966 @@
+To: vim_dev at googlegroups.com
+Subject: Patch 7.3.1137
+Fcc: outbox
+From: Bram Moolenaar <Bram at moolenaar.net>
+Mime-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+------------
+
+Patch 7.3.1137
+Problem: New regexp engine: collections are slow.
+Solution: Handle all characters in one go.
+Files: src/regexp_nfa.c
+
+
+*** ../vim-7.3.1136/src/regexp_nfa.c 2013-06-06 18:46:00.000000000 +0200
+--- src/regexp_nfa.c 2013-06-07 13:40:58.000000000 +0200
+***************
+*** 34,48 ****
+ NFA_SPLIT = -1024,
+ NFA_MATCH,
+ NFA_SKIP_CHAR, /* matches a 0-length char */
+- NFA_END_NEG_RANGE, /* Used when expanding [^ab] */
+
+! NFA_CONCAT,
+ NFA_OR,
+ NFA_STAR, /* greedy * */
+ NFA_STAR_NONGREEDY, /* non-greedy * */
+ NFA_QUEST, /* greedy \? */
+ NFA_QUEST_NONGREEDY, /* non-greedy \? */
+- NFA_NOT, /* used for [^ab] negated char ranges */
+
+ NFA_BOL, /* ^ Begin line */
+ NFA_EOL, /* $ End line */
+--- 34,56 ----
+ NFA_SPLIT = -1024,
+ NFA_MATCH,
+ NFA_SKIP_CHAR, /* matches a 0-length char */
+
+! NFA_START_COLL, /* [abc] start */
+! NFA_END_COLL, /* [abc] end */
+! NFA_START_NEG_COLL, /* [^abc] start */
+! NFA_END_NEG_COLL, /* [^abc] end (only used in postfix) */
+! NFA_RANGE, /* range of the two previous items (only
+! * used in postfix) */
+! NFA_RANGE_MIN, /* low end of a range */
+! NFA_RANGE_MAX, /* high end of a range */
+!
+! NFA_CONCAT, /* concatenate two previous items (only
+! * used in postfix) */
+ NFA_OR,
+ NFA_STAR, /* greedy * */
+ NFA_STAR_NONGREEDY, /* non-greedy * */
+ NFA_QUEST, /* greedy \? */
+ NFA_QUEST_NONGREEDY, /* non-greedy \? */
+
+ NFA_BOL, /* ^ Begin line */
+ NFA_EOL, /* $ End line */
+***************
+*** 260,266 ****
+ static int nfa_get_reganch __ARGS((nfa_state_T *start, int depth));
+ static int nfa_get_regstart __ARGS((nfa_state_T *start, int depth));
+ static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl));
+! static int nfa_emit_equi_class __ARGS((int c, int neg));
+ static int nfa_regatom __ARGS((void));
+ static int nfa_regpiece __ARGS((void));
+ static int nfa_regconcat __ARGS((void));
+--- 268,274 ----
+ static int nfa_get_reganch __ARGS((nfa_state_T *start, int depth));
+ static int nfa_get_regstart __ARGS((nfa_state_T *start, int depth));
+ static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl));
+! static int nfa_emit_equi_class __ARGS((int c));
+ static int nfa_regatom __ARGS((void));
+ static int nfa_regpiece __ARGS((void));
+ static int nfa_regconcat __ARGS((void));
+***************
+*** 664,684 ****
+ * NOTE! When changing this function, also update reg_equi_class()
+ */
+ static int
+! nfa_emit_equi_class(c, neg)
+ int c;
+- int neg;
+ {
+! int first = TRUE;
+! int glue = neg == TRUE ? NFA_CONCAT : NFA_OR;
+! #define EMIT2(c) \
+! EMIT(c); \
+! if (neg == TRUE) { \
+! EMIT(NFA_NOT); \
+! } \
+! if (first == FALSE) \
+! EMIT(glue); \
+! else \
+! first = FALSE; \
+
+ #ifdef FEAT_MBYTE
+ if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
+--- 672,681 ----
+ * NOTE! When changing this function, also update reg_equi_class()
+ */
+ static int
+! nfa_emit_equi_class(c)
+ int c;
+ {
+! #define EMIT2(c) EMIT(c); EMIT(NFA_CONCAT);
+
+ #ifdef FEAT_MBYTE
+ if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
+***************
+*** 687,770 ****
+ {
+ switch (c)
+ {
+! case 'A': case '\300': case '\301': case '\302':
+! case '\303': case '\304': case '\305':
+! EMIT2('A'); EMIT2('\300'); EMIT2('\301');
+! EMIT2('\302'); EMIT2('\303'); EMIT2('\304');
+! EMIT2('\305');
+ return OK;
+
+! case 'C': case '\307':
+! EMIT2('C'); EMIT2('\307');
+ return OK;
+
+! case 'E': case '\310': case '\311': case '\312': case '\313':
+! EMIT2('E'); EMIT2('\310'); EMIT2('\311');
+! EMIT2('\312'); EMIT2('\313');
+ return OK;
+
+! case 'I': case '\314': case '\315': case '\316': case '\317':
+! EMIT2('I'); EMIT2('\314'); EMIT2('\315');
+! EMIT2('\316'); EMIT2('\317');
+ return OK;
+
+! case 'N': case '\321':
+! EMIT2('N'); EMIT2('\321');
+ return OK;
+
+! case 'O': case '\322': case '\323': case '\324': case '\325':
+! case '\326':
+! EMIT2('O'); EMIT2('\322'); EMIT2('\323');
+! EMIT2('\324'); EMIT2('\325'); EMIT2('\326');
+ return OK;
+
+! case 'U': case '\331': case '\332': case '\333': case '\334':
+! EMIT2('U'); EMIT2('\331'); EMIT2('\332');
+! EMIT2('\333'); EMIT2('\334');
+ return OK;
+
+! case 'Y': case '\335':
+! EMIT2('Y'); EMIT2('\335');
+ return OK;
+
+! case 'a': case '\340': case '\341': case '\342':
+! case '\343': case '\344': case '\345':
+! EMIT2('a'); EMIT2('\340'); EMIT2('\341');
+! EMIT2('\342'); EMIT2('\343'); EMIT2('\344');
+! EMIT2('\345');
+ return OK;
+
+! case 'c': case '\347':
+! EMIT2('c'); EMIT2('\347');
+ return OK;
+
+! case 'e': case '\350': case '\351': case '\352': case '\353':
+! EMIT2('e'); EMIT2('\350'); EMIT2('\351');
+! EMIT2('\352'); EMIT2('\353');
+ return OK;
+
+! case 'i': case '\354': case '\355': case '\356': case '\357':
+! EMIT2('i'); EMIT2('\354'); EMIT2('\355');
+! EMIT2('\356'); EMIT2('\357');
+ return OK;
+
+! case 'n': case '\361':
+! EMIT2('n'); EMIT2('\361');
+ return OK;
+
+! case 'o': case '\362': case '\363': case '\364': case '\365':
+! case '\366':
+! EMIT2('o'); EMIT2('\362'); EMIT2('\363');
+! EMIT2('\364'); EMIT2('\365'); EMIT2('\366');
+ return OK;
+
+! case 'u': case '\371': case '\372': case '\373': case '\374':
+! EMIT2('u'); EMIT2('\371'); EMIT2('\372');
+! EMIT2('\373'); EMIT2('\374');
+ return OK;
+
+! case 'y': case '\375': case '\377':
+! EMIT2('y'); EMIT2('\375'); EMIT2('\377');
+ return OK;
+
+ default:
+--- 684,767 ----
+ {
+ switch (c)
+ {
+! case 'A': case 0300: case 0301: case 0302:
+! case 0303: case 0304: case 0305:
+! EMIT2('A'); EMIT2(0300); EMIT2(0301);
+! EMIT2(0302); EMIT2(0303); EMIT2(0304);
+! EMIT2(0305);
+ return OK;
+
+! case 'C': case 0307:
+! EMIT2('C'); EMIT2(0307);
+ return OK;
+
+! case 'E': case 0310: case 0311: case 0312: case 0313:
+! EMIT2('E'); EMIT2(0310); EMIT2(0311);
+! EMIT2(0312); EMIT2(0313);
+ return OK;
+
+! case 'I': case 0314: case 0315: case 0316: case 0317:
+! EMIT2('I'); EMIT2(0314); EMIT2(0315);
+! EMIT2(0316); EMIT2(0317);
+ return OK;
+
+! case 'N': case 0321:
+! EMIT2('N'); EMIT2(0321);
+ return OK;
+
+! case 'O': case 0322: case 0323: case 0324: case 0325:
+! case 0326:
+! EMIT2('O'); EMIT2(0322); EMIT2(0323);
+! EMIT2(0324); EMIT2(0325); EMIT2(0326);
+ return OK;
+
+! case 'U': case 0331: case 0332: case 0333: case 0334:
+! EMIT2('U'); EMIT2(0331); EMIT2(0332);
+! EMIT2(0333); EMIT2(0334);
+ return OK;
+
+! case 'Y': case 0335:
+! EMIT2('Y'); EMIT2(0335);
+ return OK;
+
+! case 'a': case 0340: case 0341: case 0342:
+! case 0343: case 0344: case 0345:
+! EMIT2('a'); EMIT2(0340); EMIT2(0341);
+! EMIT2(0342); EMIT2(0343); EMIT2(0344);
+! EMIT2(0345);
+ return OK;
+
+! case 'c': case 0347:
+! EMIT2('c'); EMIT2(0347);
+ return OK;
+
+! case 'e': case 0350: case 0351: case 0352: case 0353:
+! EMIT2('e'); EMIT2(0350); EMIT2(0351);
+! EMIT2(0352); EMIT2(0353);
+ return OK;
+
+! case 'i': case 0354: case 0355: case 0356: case 0357:
+! EMIT2('i'); EMIT2(0354); EMIT2(0355);
+! EMIT2(0356); EMIT2(0357);
+ return OK;
+
+! case 'n': case 0361:
+! EMIT2('n'); EMIT2(0361);
+ return OK;
+
+! case 'o': case 0362: case 0363: case 0364: case 0365:
+! case 0366:
+! EMIT2('o'); EMIT2(0362); EMIT2(0363);
+! EMIT2(0364); EMIT2(0365); EMIT2(0366);
+ return OK;
+
+! case 'u': case 0371: case 0372: case 0373: case 0374:
+! EMIT2('u'); EMIT2(0371); EMIT2(0372);
+! EMIT2(0373); EMIT2(0374);
+ return OK;
+
+! case 'y': case 0375: case 0377:
+! EMIT2('y'); EMIT2(0375); EMIT2(0377);
+ return OK;
+
+ default:
+***************
+*** 811,824 ****
+ char_u *old_regparse = regparse;
+ #endif
+ int extra = 0;
+- int first;
+ int emit_range;
+ int negated;
+ int result;
+ int startc = -1;
+ int endc = -1;
+ int oldstartc = -1;
+- int glue; /* ID that will "glue" nodes together */
+
+ c = getchr();
+ switch (c)
+--- 808,819 ----
+***************
+*** 927,934 ****
+
+ case Magic('n'):
+ if (reg_string)
+! /* In a string "\n" matches a newline character. */
+! EMIT(NL);
+ else
+ {
+ /* In buffer text "\n" matches the end of a line. */
+--- 922,929 ----
+
+ case Magic('n'):
+ if (reg_string)
+! /* In a string "\n" matches a newline character. */
+! EMIT(NL);
+ else
+ {
+ /* In buffer text "\n" matches the end of a line. */
+***************
+*** 1160,1191 ****
+ case Magic('['):
+ collection:
+ /*
+! * Glue is emitted between several atoms from the [].
+! * It is either NFA_OR, or NFA_CONCAT.
+! *
+! * [abc] expands to 'a b NFA_OR c NFA_OR' (in postfix notation)
+! * [^abc] expands to 'a NFA_NOT b NFA_NOT NFA_CONCAT c NFA_NOT
+! * NFA_CONCAT NFA_END_NEG_RANGE NFA_CONCAT' (in postfix
+! * notation)
+! *
+ */
+
+-
+- /* Emit negation atoms, if needed.
+- * The CONCAT below merges the NOT with the previous node. */
+- #define TRY_NEG() \
+- if (negated == TRUE) \
+- { \
+- EMIT(NFA_NOT); \
+- }
+-
+- /* Emit glue between important nodes : CONCAT or OR. */
+- #define EMIT_GLUE() \
+- if (first == FALSE) \
+- EMIT(glue); \
+- else \
+- first = FALSE;
+-
+ p = regparse;
+ endp = skip_anyof(p);
+ if (*endp == ']')
+--- 1155,1169 ----
+ case Magic('['):
+ collection:
+ /*
+! * [abc] uses NFA_START_COLL - NFA_END_COLL
+! * [^abc] uses NFA_START_NEG_COLL - NFA_END_NEG_COLL
+! * Each character is produced as a regular state, using
+! * NFA_CONCAT to bind them together.
+! * Besides normal characters there can be:
+! * - character classes NFA_CLASS_*
+! * - ranges, two characters followed by NFA_RANGE.
+ */
+
+ p = regparse;
+ endp = skip_anyof(p);
+ if (*endp == ']')
+***************
+*** 1216,1236 ****
+ * version that turns [abc] into 'a' OR 'b' OR 'c'
+ */
+ startc = endc = oldstartc = -1;
+- first = TRUE; /* Emitting first atom in this sequence? */
+ negated = FALSE;
+- glue = NFA_OR;
+ if (*regparse == '^') /* negated range */
+ {
+ negated = TRUE;
+- glue = NFA_CONCAT;
+ mb_ptr_adv(regparse);
+ }
+ if (*regparse == '-')
+ {
+ startc = '-';
+ EMIT(startc);
+! TRY_NEG();
+! EMIT_GLUE();
+ mb_ptr_adv(regparse);
+ }
+ /* Emit the OR branches for each character in the [] */
+--- 1194,1213 ----
+ * version that turns [abc] into 'a' OR 'b' OR 'c'
+ */
+ startc = endc = oldstartc = -1;
+ negated = FALSE;
+ if (*regparse == '^') /* negated range */
+ {
+ negated = TRUE;
+ mb_ptr_adv(regparse);
++ EMIT(NFA_START_NEG_COLL);
+ }
++ else
++ EMIT(NFA_START_COLL);
+ if (*regparse == '-')
+ {
+ startc = '-';
+ EMIT(startc);
+! EMIT(NFA_CONCAT);
+ mb_ptr_adv(regparse);
+ }
+ /* Emit the OR branches for each character in the [] */
+***************
+*** 1306,1325 ****
+ EMIT(NFA_CLASS_ESCAPE);
+ break;
+ }
+! TRY_NEG();
+! EMIT_GLUE();
+ continue;
+ }
+ /* Try equivalence class [=a=] and the like */
+ if (equiclass != 0)
+ {
+! result = nfa_emit_equi_class(equiclass, negated);
+ if (result == FAIL)
+ {
+ /* should never happen */
+ EMSG_RET_FAIL(_("E868: Error building NFA with equivalence class!"));
+ }
+- EMIT_GLUE();
+ continue;
+ }
+ /* Try collating class like [. .] */
+--- 1283,1300 ----
+ EMIT(NFA_CLASS_ESCAPE);
+ break;
+ }
+! EMIT(NFA_CONCAT);
+ continue;
+ }
+ /* Try equivalence class [=a=] and the like */
+ if (equiclass != 0)
+ {
+! result = nfa_emit_equi_class(equiclass);
+ if (result == FAIL)
+ {
+ /* should never happen */
+ EMSG_RET_FAIL(_("E868: Error building NFA with equivalence class!"));
+ }
+ continue;
+ }
+ /* Try collating class like [. .] */
+***************
+*** 1391,1409 ****
+ startc = oldstartc;
+ if (startc > endc)
+ EMSG_RET_FAIL(_(e_invrange));
+ #ifdef FEAT_MBYTE
+! if (has_mbyte && ((*mb_char2len)(startc) > 1
+ || (*mb_char2len)(endc) > 1))
+ {
+! if (endc > startc + 256)
+! EMSG_RET_FAIL(_(e_invrange));
+! /* Emit the range. "startc" was already emitted, so
+! * skip it. */
+ for (c = startc + 1; c <= endc; c++)
+ {
+ EMIT(c);
+! TRY_NEG();
+! EMIT_GLUE();
+ }
+ }
+ else
+--- 1366,1397 ----
+ startc = oldstartc;
+ if (startc > endc)
+ EMSG_RET_FAIL(_(e_invrange));
++
++ if (endc > startc + 2)
++ {
++ /* Emit a range instead of the sequence of
++ * individual characters. */
++ if (startc == 0)
++ /* \x00 is translated to \x0a, start at \x01. */
++ EMIT(1);
++ else
++ --post_ptr; /* remove NFA_CONCAT */
++ EMIT(endc);
++ EMIT(NFA_RANGE);
++ EMIT(NFA_CONCAT);
++ }
++ else
+ #ifdef FEAT_MBYTE
+! if (has_mbyte && ((*mb_char2len)(startc) > 1
+ || (*mb_char2len)(endc) > 1))
+ {
+! /* Emit the characters in the range.
+! * "startc" was already emitted, so skip it.
+! * */
+ for (c = startc + 1; c <= endc; c++)
+ {
+ EMIT(c);
+! EMIT(NFA_CONCAT);
+ }
+ }
+ else
+***************
+*** 1425,1432 ****
+ #endif
+ {
+ EMIT(c);
+! TRY_NEG();
+! EMIT_GLUE();
+ }
+ }
+ emit_range = FALSE;
+--- 1413,1419 ----
+ #endif
+ {
+ EMIT(c);
+! EMIT(NFA_CONCAT);
+ }
+ }
+ emit_range = FALSE;
+***************
+*** 1434,1456 ****
+ }
+ else
+ {
+! /*
+! * This char (startc) is not part of a range. Just
+ * emit it.
+- *
+ * Normally, simply emit startc. But if we get char
+ * code=0 from a collating char, then replace it with
+ * 0x0a.
+- *
+ * This is needed to completely mimic the behaviour of
+! * the backtracking engine.
+! */
+! if (got_coll_char == TRUE && startc == 0)
+! EMIT(0x0a);
+ else
+! EMIT(startc);
+! TRY_NEG();
+! EMIT_GLUE();
+ }
+
+ mb_ptr_adv(regparse);
+--- 1421,1449 ----
+ }
+ else
+ {
+! /* This char (startc) is not part of a range. Just
+ * emit it.
+ * Normally, simply emit startc. But if we get char
+ * code=0 from a collating char, then replace it with
+ * 0x0a.
+ * This is needed to completely mimic the behaviour of
+! * the backtracking engine. */
+! if (startc == NFA_NEWL)
+! {
+! /* Line break can't be matched as part of the
+! * collection, add an OR below. But not for negated
+! * range. */
+! if (!negated)
+! extra = ADD_NL;
+! }
+ else
+! {
+! if (got_coll_char == TRUE && startc == 0)
+! EMIT(0x0a);
+! else
+! EMIT(startc);
+! EMIT(NFA_CONCAT);
+! }
+ }
+
+ mb_ptr_adv(regparse);
+***************
+*** 1460,1479 ****
+ if (*regparse == '-') /* if last, '-' is just a char */
+ {
+ EMIT('-');
+! TRY_NEG();
+! EMIT_GLUE();
+ }
+ mb_ptr_adv(regparse);
+
+ /* skip the trailing ] */
+ regparse = endp;
+ mb_ptr_adv(regparse);
+ if (negated == TRUE)
+! {
+! /* Mark end of negated char range */
+! EMIT(NFA_END_NEG_RANGE);
+! EMIT(NFA_CONCAT);
+! }
+
+ /* \_[] also matches \n but it's not negated */
+ if (extra == ADD_NL)
+--- 1453,1471 ----
+ if (*regparse == '-') /* if last, '-' is just a char */
+ {
+ EMIT('-');
+! EMIT(NFA_CONCAT);
+ }
+ mb_ptr_adv(regparse);
+
+ /* skip the trailing ] */
+ regparse = endp;
+ mb_ptr_adv(regparse);
++
++ /* Mark end of the collection. */
+ if (negated == TRUE)
+! EMIT(NFA_END_NEG_COLL);
+! else
+! EMIT(NFA_END_COLL);
+
+ /* \_[] also matches \n but it's not negated */
+ if (extra == ADD_NL)
+***************
+*** 1532,1540 ****
+ }
+ }
+
+- #undef TRY_NEG
+- #undef EMIT_GLUE
+-
+ return OK;
+ }
+
+--- 1524,1529 ----
+***************
+*** 2091,2100 ****
+ case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break;
+ case NFA_QUEST: STRCPY(code, "NFA_QUEST"); break;
+ case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break;
+- case NFA_NOT: STRCPY(code, "NFA_NOT "); break;
+ case NFA_SKIP_CHAR: STRCPY(code, "NFA_SKIP_CHAR"); break;
+ case NFA_OR: STRCPY(code, "NFA_OR"); break;
+! case NFA_END_NEG_RANGE: STRCPY(code, "NFA_END_NEG_RANGE"); break;
+ case NFA_CLASS_ALNUM: STRCPY(code, "NFA_CLASS_ALNUM"); break;
+ case NFA_CLASS_ALPHA: STRCPY(code, "NFA_CLASS_ALPHA"); break;
+ case NFA_CLASS_BLANK: STRCPY(code, "NFA_CLASS_BLANK"); break;
+--- 2080,2096 ----
+ case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break;
+ case NFA_QUEST: STRCPY(code, "NFA_QUEST"); break;
+ case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break;
+ case NFA_SKIP_CHAR: STRCPY(code, "NFA_SKIP_CHAR"); break;
+ case NFA_OR: STRCPY(code, "NFA_OR"); break;
+!
+! case NFA_START_COLL: STRCPY(code, "NFA_START_COLL"); break;
+! case NFA_END_COLL: STRCPY(code, "NFA_END_COLL"); break;
+! case NFA_START_NEG_COLL: STRCPY(code, "NFA_START_NEG_COLL"); break;
+! case NFA_END_NEG_COLL: STRCPY(code, "NFA_END_NEG_COLL"); break;
+! case NFA_RANGE: STRCPY(code, "NFA_RANGE"); break;
+! case NFA_RANGE_MIN: STRCPY(code, "NFA_RANGE_MIN"); break;
+! case NFA_RANGE_MAX: STRCPY(code, "NFA_RANGE_MAX"); break;
+!
+ case NFA_CLASS_ALNUM: STRCPY(code, "NFA_CLASS_ALNUM"); break;
+ case NFA_CLASS_ALPHA: STRCPY(code, "NFA_CLASS_ALPHA"); break;
+ case NFA_CLASS_BLANK: STRCPY(code, "NFA_CLASS_BLANK"); break;
+***************
+*** 2231,2238 ****
+ fprintf(debugf, " %s", p);
+
+ nfa_set_code(state->c);
+! fprintf(debugf, "%s%s (%d) (id=%d)\n",
+! state->negated ? "NOT " : "", code, state->c, abs(state->id));
+ if (state->id < 0)
+ return;
+
+--- 2227,2238 ----
+ fprintf(debugf, " %s", p);
+
+ nfa_set_code(state->c);
+! fprintf(debugf, "%s%s (%d) (id=%d) val=%d\n",
+! state->negated ? "NOT " : "",
+! code,
+! state->c,
+! abs(state->id),
+! state->val);
+ if (state->id < 0)
+ return;
+
+***************
+*** 2325,2330 ****
+--- 2325,2331 ----
+ s->c = c;
+ s->out = out;
+ s->out1 = out1;
++ s->val = 0;
+
+ s->id = istate;
+ s->lastlist[0] = 0;
+***************
+*** 2565,2577 ****
+ switch (*p)
+ {
+ case NFA_CONCAT:
+! /* Catenation.
+! * Pay attention: this operator does not exist
+! * in the r.e. itself (it is implicit, really).
+! * It is added when r.e. is translated to postfix
+! * form in re2post().
+! *
+! * No new state added here. */
+ if (nfa_calc_size == TRUE)
+ {
+ /* nstate += 0; */
+--- 2566,2575 ----
+ switch (*p)
+ {
+ case NFA_CONCAT:
+! /* Concatenation.
+! * Pay attention: this operator does not exist in the r.e. itself
+! * (it is implicit, really). It is added when r.e. is translated
+! * to postfix form in re2post(). */
+ if (nfa_calc_size == TRUE)
+ {
+ /* nstate += 0; */
+***************
+*** 2583,2604 ****
+ PUSH(frag(e1.start, e2.out));
+ break;
+
+- case NFA_NOT:
+- /* Negation of a character */
+- if (nfa_calc_size == TRUE)
+- {
+- /* nstate += 0; */
+- break;
+- }
+- e1 = POP();
+- e1.start->negated = TRUE;
+- #ifdef FEAT_MBYTE
+- if (e1.start->c == NFA_COMPOSING)
+- e1.start->out1->negated = TRUE;
+- #endif
+- PUSH(e1);
+- break;
+-
+ case NFA_OR:
+ /* Alternation */
+ if (nfa_calc_size == TRUE)
+--- 2581,2586 ----
+***************
+*** 2672,2677 ****
+--- 2654,2696 ----
+ PUSH(frag(s, append(e.out, list1(&s->out))));
+ break;
+
++ case NFA_END_COLL:
++ case NFA_END_NEG_COLL:
++ /* On the stack is the sequence starting with NFA_START_COLL or
++ * NFA_START_NEG_COLL and all possible characters. Patch it to
++ * add the output to the start. */
++ if (nfa_calc_size == TRUE)
++ {
++ nstate++;
++ break;
++ }
++ e = POP();
++ s = alloc_state(NFA_END_COLL, NULL, NULL);
++ if (s == NULL)
++ goto theend;
++ patch(e.out, s);
++ e.start->out1 = s;
++ PUSH(frag(e.start, list1(&s->out)));
++ break;
++
++ case NFA_RANGE:
++ /* Before this are two characters, the low and high end of a
++ * range. Turn them into two states with MIN and MAX. */
++ if (nfa_calc_size == TRUE)
++ {
++ /* nstate += 0; */
++ break;
++ }
++ e2 = POP();
++ e1 = POP();
++ e2.start->val = e2.start->c;
++ e2.start->c = NFA_RANGE_MAX;
++ e1.start->val = e1.start->c;
++ e1.start->c = NFA_RANGE_MIN;
++ patch(e1.out, e2.start);
++ PUSH(frag(e1.start, e2.out));
++ break;
++
+ case NFA_SKIP_CHAR:
+ /* Symbol of 0-length, Used in a repetition
+ * with max/min count of 0 */
+***************
+*** 2990,2995 ****
+--- 3009,3016 ----
+ matchstate = &state_ptr[istate++]; /* the match state */
+ matchstate->c = NFA_MATCH;
+ matchstate->out = matchstate->out1 = NULL;
++ matchstate->negated = FALSE;
++ matchstate->id = 0;
+
+ patch(e.out, matchstate);
+ ret = e.start;
+***************
+*** 3308,3314 ****
+ switch (state->c)
+ {
+ case NFA_SPLIT:
+- case NFA_NOT:
+ case NFA_NOPEN:
+ case NFA_SKIP_CHAR:
+ case NFA_NCLOSE:
+--- 3329,3334 ----
+***************
+*** 3782,3788 ****
+
+ default:
+ /* should not be here :P */
+! EMSG_RET_FAIL(_("E877: (NFA regexp) Invalid character class "));
+ }
+ return FAIL;
+ }
+--- 3802,3809 ----
+
+ default:
+ /* should not be here :P */
+! EMSGN("E877: (NFA regexp) Invalid character class: %ld", class);
+! return FAIL;
+ }
+ return FAIL;
+ }
+***************
+*** 4320,4327 ****
+ addstate(thislist, start, m, 0);
+
+ /* There are two cases when the NFA advances: 1. input char matches the
+! * NFA node and 2. input char does not match the NFA node, but the next
+! * node is NFA_NOT. The following macro calls addstate() according to
+ * these rules. It is used A LOT, so use the "listtbl" table for speed */
+ listtbl[0][0] = NULL;
+ listtbl[0][1] = neglist;
+--- 4341,4348 ----
+ addstate(thislist, start, m, 0);
+
+ /* There are two cases when the NFA advances: 1. input char matches the
+! * NFA node and 2. input char does not match the NFA node and the state
+! * has the negated flag. The following macro calls addstate() according to
+ * these rules. It is used A LOT, so use the "listtbl" table for speed */
+ listtbl[0][0] = NULL;
+ listtbl[0][1] = neglist;
+***************
+*** 4845,4860 ****
+ ADD_POS_NEG_STATE(t->state);
+ break;
+
+! case NFA_END_NEG_RANGE:
+! /* This follows a series of negated nodes, like:
+! * NOT CHAR(x), NOT CHAR(y), etc. */
+! if (curc > 0)
+ {
+ ll = nextlist;
+! add_state = t->state->out;
+ add_off = clen;
+ }
+ break;
+
+ case NFA_ANY:
+ /* Any char except '\0', (end of input) does not match. */
+--- 4866,4944 ----
+ ADD_POS_NEG_STATE(t->state);
+ break;
+
+! case NFA_START_COLL:
+! case NFA_START_NEG_COLL:
+! {
+! /* What follows is a list of characters, until NFA_END_COLL.
+! * One of them must match or none of them must match. */
+! nfa_state_T *state;
+! int result_if_matched;
+! int c1, c2;
+!
+! /* Never match EOL. If it's part of the collection it is added
+! * as a separate state with an OR. */
+! if (curc == NUL)
+! break;
+!
+! state = t->state->out;
+! result_if_matched = (t->state->c == NFA_START_COLL);
+! for (;;)
+ {
++ if (state->c == NFA_END_COLL)
++ {
++ result = !result_if_matched;
++ break;
++ }
++ if (state->c == NFA_RANGE_MIN)
++ {
++ c1 = state->val;
++ state = state->out; /* advance to NFA_RANGE_MAX */
++ c2 = state->val;
++ #ifdef ENABLE_LOG
++ fprintf(log_fd, "NFA_RANGE_MIN curc=%d c1=%d c2=%d\n",
++ curc, c1, c2);
++ #endif
++ if (curc >= c1 && curc <= c2)
++ {
++ result = result_if_matched;
++ break;
++ }
++ if (ireg_ic)
++ {
++ int curc_low = MB_TOLOWER(curc);
++ int done = FALSE;
++
++ for ( ; c1 <= c2; ++c1)
++ if (MB_TOLOWER(c1) == curc_low)
++ {
++ result = result_if_matched;
++ done = TRUE;
++ break;
++ }
++ if (done)
++ break;
++ }
++ }
++ else if (state->c < 0 ? check_char_class(state->c, curc)
++ : (curc == state->c
++ || (ireg_ic && MB_TOLOWER(curc)
++ == MB_TOLOWER(state->c))))
++ {
++ result = result_if_matched;
++ break;
++ }
++ state = state->out;
++ }
++ if (result)
++ {
++ /* next state is in out of the NFA_END_COLL, out1 of
++ * START points to the END state */
+ ll = nextlist;
+! add_state = t->state->out1->out;
+ add_off = clen;
+ }
+ break;
++ }
+
+ case NFA_ANY:
+ /* Any char except '\0', (end of input) does not match. */
+*** ../vim-7.3.1136/src/version.c 2013-06-06 21:31:02.000000000 +0200
+--- src/version.c 2013-06-07 13:21:57.000000000 +0200
+***************
+*** 730,731 ****
+--- 730,733 ----
+ { /* Add new patch number below this line */
++ /**/
++ 1137,
+ /**/
+
+--
+From "know your smileys":
+ :.-( Crying
+
+ /// Bram Moolenaar -- Bram at Moolenaar.net -- http://www.Moolenaar.net \\\
+/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
+\\\ an exciting new programming language -- http://www.Zimbu.org ///
+ \\\ help me help AIDS victims -- http://ICCF-Holland.org ///
More information about the scm-commits
mailing list