To: vim_dev@googlegroups.com Subject: Patch 7.3.1122 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 7.3.1122 Problem: New regexp engine: \@> not supported. Solution: Implement \@>. Files: src/regexp_nfa.c, src/testdir/test64.in, src/testdir/test64.ok *** ../vim-7.3.1121/src/regexp_nfa.c 2013-06-05 16:51:53.000000000 +0200 --- src/regexp_nfa.c 2013-06-05 18:45:57.000000000 +0200 *************** *** 57,63 **** --- 57,65 ---- NFA_NCLOSE, /* End of subexpr. marked with \%( ... \) */ NFA_START_INVISIBLE, NFA_START_INVISIBLE_BEFORE, + NFA_START_PATTERN, NFA_END_INVISIBLE, + NFA_END_PATTERN, NFA_COMPOSING, /* Next nodes in NFA are part of the composing multibyte char */ NFA_END_COMPOSING, /* End of a composing char in the NFA */ *************** *** 1505,1513 **** i = NFA_PREV_ATOM_JUST_BEFORE_NEG; break; case '>': ! /* \@> Not supported yet */ ! /* i = NFA_PREV_ATOM_LIKE_PATTERN; */ ! return FAIL; } if (i == 0) { --- 1507,1515 ---- i = NFA_PREV_ATOM_JUST_BEFORE_NEG; break; case '>': ! /* \@> */ ! i = NFA_PREV_ATOM_LIKE_PATTERN; ! break; } if (i == 0) { *************** *** 1885,1896 **** --- 1887,1903 ---- STRCPY(code, "NFA_PREV_ATOM_JUST_BEFORE"); break; case NFA_PREV_ATOM_JUST_BEFORE_NEG: STRCPY(code, "NFA_PREV_ATOM_JUST_BEFORE_NEG"); break; + case NFA_PREV_ATOM_LIKE_PATTERN: + STRCPY(code, "NFA_PREV_ATOM_LIKE_PATTERN"); break; + case NFA_NOPEN: STRCPY(code, "NFA_NOPEN"); break; case NFA_NCLOSE: STRCPY(code, "NFA_NCLOSE"); break; case NFA_START_INVISIBLE: STRCPY(code, "NFA_START_INVISIBLE"); break; case NFA_START_INVISIBLE_BEFORE: STRCPY(code, "NFA_START_INVISIBLE_BEFORE"); break; + case NFA_START_PATTERN: STRCPY(code, "NFA_START_PATTERN"); break; case NFA_END_INVISIBLE: STRCPY(code, "NFA_END_INVISIBLE"); break; + case NFA_END_PATTERN: STRCPY(code, "NFA_END_PATTERN"); break; case NFA_COMPOSING: STRCPY(code, "NFA_COMPOSING"); break; case NFA_END_COMPOSING: STRCPY(code, "NFA_END_COMPOSING"); break; *************** *** 2601,2612 **** case NFA_PREV_ATOM_NO_WIDTH_NEG: case NFA_PREV_ATOM_JUST_BEFORE: case NFA_PREV_ATOM_JUST_BEFORE_NEG: { int neg = (*p == NFA_PREV_ATOM_NO_WIDTH_NEG || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG); int before = (*p == NFA_PREV_ATOM_JUST_BEFORE || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG); ! int n; if (before) n = *++p; /* get the count */ --- 2608,2633 ---- case NFA_PREV_ATOM_NO_WIDTH_NEG: case NFA_PREV_ATOM_JUST_BEFORE: case NFA_PREV_ATOM_JUST_BEFORE_NEG: + case NFA_PREV_ATOM_LIKE_PATTERN: { int neg = (*p == NFA_PREV_ATOM_NO_WIDTH_NEG || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG); int before = (*p == NFA_PREV_ATOM_JUST_BEFORE || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG); ! int pattern = (*p == NFA_PREV_ATOM_LIKE_PATTERN); ! int start_state = NFA_START_INVISIBLE; ! int end_state = NFA_END_INVISIBLE; ! int n = 0; ! nfa_state_T *zend; ! nfa_state_T *skip; ! ! if (before) ! start_state = NFA_START_INVISIBLE_BEFORE; ! else if (pattern) ! { ! start_state = NFA_START_PATTERN; ! end_state = NFA_END_PATTERN; ! } if (before) n = *++p; /* get the count */ *************** *** 2620,2635 **** if (nfa_calc_size == TRUE) { ! nstate += 2; break; } e = POP(); ! s1 = alloc_state(NFA_END_INVISIBLE, NULL, NULL); if (s1 == NULL) goto theend; - patch(e.out, s1); ! s = alloc_state(NFA_START_INVISIBLE, e.start, s1); if (s == NULL) goto theend; if (neg) --- 2641,2655 ---- if (nfa_calc_size == TRUE) { ! nstate += pattern ? 4 : 2; break; } e = POP(); ! s1 = alloc_state(end_state, NULL, NULL); if (s1 == NULL) goto theend; ! s = alloc_state(start_state, e.start, s1); if (s == NULL) goto theend; if (neg) *************** *** 2638,2649 **** s1->negated = TRUE; } if (before) - { s->val = n; /* store the count */ ! ++s->c; /* NFA_START_INVISIBLE -> NFA_START_INVISIBLE_BEFORE */ } - - PUSH(frag(s, list1(&s1->out))); break; } --- 2658,2678 ---- s1->negated = TRUE; } if (before) s->val = n; /* store the count */ ! if (pattern) ! { ! /* NFA_ZEND -> NFA_END_PATTERN -> NFA_SKIP -> what follows. */ ! skip = alloc_state(NFA_SKIP, NULL, NULL); ! zend = alloc_state(NFA_ZEND, s1, NULL); ! s1->out= skip; ! patch(e.out, zend); ! PUSH(frag(s, list1(&skip->out))); ! } ! else ! { ! patch(e.out, s1); ! PUSH(frag(s, list1(&s1->out))); } break; } *************** *** 2953,2959 **** for (j = 0; j < sub->in_use; j++) if (REG_MULTI) ! fprintf(log_fd, "\n *** group %d, start: c=%d, l=%d, end: c=%d, l=%d", j, sub->list.multi[j].start.col, (int)sub->list.multi[j].start.lnum, --- 2982,2988 ---- for (j = 0; j < sub->in_use; j++) if (REG_MULTI) ! fprintf(log_fd, "*** group %d, start: c=%d, l=%d, end: c=%d, l=%d\n", j, sub->list.multi[j].start.col, (int)sub->list.multi[j].start.lnum, *************** *** 2964,2975 **** char *s = (char *)sub->list.line[j].start; char *e = (char *)sub->list.line[j].end; ! fprintf(log_fd, "\n *** group %d, start: \"%s\", end: \"%s\"", j, s == NULL ? "NULL" : s, e == NULL ? "NULL" : e); } - fprintf(log_fd, "\n"); } #endif --- 2993,3003 ---- char *s = (char *)sub->list.line[j].start; char *e = (char *)sub->list.line[j].end; ! fprintf(log_fd, "*** group %d, start: \"%s\", end: \"%s\"\n", j, s == NULL ? "NULL" : s, e == NULL ? "NULL" : e); } } #endif *************** *** 4317,4322 **** --- 4345,4351 ---- } case NFA_END_INVISIBLE: + case NFA_END_PATTERN: /* * This is only encountered after a NFA_START_INVISIBLE or * NFA_START_INVISIBLE_BEFORE node. *************** *** 4343,4349 **** (int)(nfa_endp->se_u.ptr - reginput)); } #endif ! /* It's only a match if it ends at "nfa_endp" */ if (nfa_endp != NULL && (REG_MULTI ? (reglnum != nfa_endp->se_u.pos.lnum || (int)(reginput - regline) --- 4372,4379 ---- (int)(nfa_endp->se_u.ptr - reginput)); } #endif ! /* If "nfa_endp" is set it's only a match if it ends at ! * "nfa_endp" */ if (nfa_endp != NULL && (REG_MULTI ? (reglnum != nfa_endp->se_u.pos.lnum || (int)(reginput - regline) *************** *** 4360,4365 **** --- 4390,4399 ---- copy_sub(&m->synt, &t->subs.synt); #endif } + #ifdef ENABLE_LOG + fprintf(log_fd, "Match found:\n"); + log_subsexpr(m); + #endif nfa_match = TRUE; break; *************** *** 4435,4440 **** --- 4469,4531 ---- } break; + case NFA_START_PATTERN: + /* First try matching the pattern. */ + result = recursive_regmatch(t->state, prog, + submatch, m, &listids); + if (result) + { + int bytelen; + + #ifdef ENABLE_LOG + fprintf(log_fd, "NFA_START_PATTERN matches:\n"); + log_subsexpr(m); + #endif + /* Copy submatch info from the recursive call */ + copy_sub_off(&t->subs.norm, &m->norm); + #ifdef FEAT_SYN_HL + copy_sub_off(&t->subs.synt, &m->synt); + #endif + /* Now we need to skip over the matched text and then + * continue with what follows. */ + if (REG_MULTI) + /* TODO: multi-line match */ + bytelen = m->norm.list.multi[0].end.col + - (int)(reginput - regline); + else + bytelen = (int)(m->norm.list.line[0].end - reginput); + + #ifdef ENABLE_LOG + fprintf(log_fd, "NFA_START_PATTERN length: %d\n", bytelen); + #endif + if (bytelen == 0) + { + /* empty match, output of corresponding + * NFA_END_PATTERN/NFA_SKIP to be used at current + * position */ + addstate_here(thislist, t->state->out1->out->out, + &t->subs, t->pim, &listidx); + } + else if (bytelen <= clen) + { + /* match current character, output of corresponding + * NFA_END_PATTERN to be used at next position. */ + ll = nextlist; + add_state = t->state->out1->out->out; + add_off = clen; + } + else + { + /* skip over the matched characters, set character + * count in NFA_SKIP */ + ll = nextlist; + add_state = t->state->out1->out; + add_off = bytelen; + add_count = bytelen - clen; + } + } + break; + case NFA_BOL: if (reginput == regline) addstate_here(thislist, t->state->out, &t->subs, *************** *** 4846,4854 **** ll = nextlist; add_state = t->state->out->out; add_off = clen; - #ifdef ENABLE_LOG - log_subsexpr(&nextlist->t[nextlist->n - 1].subs); - #endif } else { --- 4937,4942 ---- *************** *** 4858,4866 **** add_state = t->state->out; add_off = bytelen; add_count = bytelen - clen; - #ifdef ENABLE_LOG - log_subsexpr(&nextlist->t[nextlist->n - 1].subs); - #endif } } break; --- 4946,4951 ---- *************** *** 4873,4881 **** ll = nextlist; add_state = t->state->out; add_off = clen; - #ifdef ENABLE_LOG - log_subsexpr(&nextlist->t[nextlist->n - 1].subs); - #endif } else { --- 4958,4963 ---- *************** *** 4884,4892 **** add_state = t->state; add_off = 0; add_count = t->count - clen; - #ifdef ENABLE_LOG - log_subsexpr(&nextlist->t[nextlist->n - 1].subs); - #endif } break; --- 4966,4971 ---- *************** *** 5158,5170 **** f = fopen(NFA_REGEXP_RUN_LOG, "a"); if (f != NULL) { ! fprintf(f, "\n\n\n\n\n\n\t\t=======================================================\n"); ! fprintf(f, " =======================================================\n"); #ifdef DEBUG fprintf(f, "\tRegexp is \"%s\"\n", nfa_regengine.expr); #endif fprintf(f, "\tInput text is \"%s\" \n", reginput); ! fprintf(f, " =======================================================\n\n"); nfa_print_state(f, start); fprintf(f, "\n\n"); fclose(f); --- 5237,5248 ---- f = fopen(NFA_REGEXP_RUN_LOG, "a"); if (f != NULL) { ! fprintf(f, "\n\n\t=======================================================\n"); #ifdef DEBUG fprintf(f, "\tRegexp is \"%s\"\n", nfa_regengine.expr); #endif fprintf(f, "\tInput text is \"%s\" \n", reginput); ! fprintf(f, "\t=======================================================\n\n"); nfa_print_state(f, start); fprintf(f, "\n\n"); fclose(f); *** ../vim-7.3.1121/src/testdir/test64.in 2013-06-05 11:05:12.000000000 +0200 --- src/testdir/test64.in 2013-06-05 18:45:28.000000000 +0200 *************** *** 385,390 **** --- 385,396 ---- :call add(tl, [2, '\(<<\)\@2<=span.', 'xxspanxxxx + :call add(tl, [2, '\(a*\)\@>a', 'aaaa']) + :call add(tl, [2, '\(a*\)\@>b', 'aaab', 'aaab', 'aaa']) + :" TODO: BT engine does not restore submatch after failure + :call add(tl, [1, '\(a*\)\@>a\|a\+', 'aaaa', 'aaaa']) + :" :"""" "\_" prepended negated collection matches EOL :call add(tl, [2, '\_[^8-9]\+', "asfi\n9888", "asfi\n"]) :call add(tl, [2, '\_[^a]\+', "asfi\n9888", "sfi\n9888"]) *************** *** 401,407 **** : let text = t[2] : let matchidx = 3 : for engine in [0, 1, 2] ! : if engine == 2 && !re : continue : endif : let ®expengine = engine --- 407,413 ---- : let text = t[2] : let matchidx = 3 : for engine in [0, 1, 2] ! : if engine == 2 && re == 0 || engine == 1 && re ==1 : continue : endif : let ®expengine = engine *** ../vim-7.3.1121/src/testdir/test64.ok 2013-06-05 11:05:12.000000000 +0200 --- src/testdir/test64.ok 2013-06-05 18:47:54.000000000 +0200 *************** *** 872,877 **** --- 872,885 ---- OK 0 - \(foo\)\@a + OK 1 - \(a*\)\@>a + OK 2 - \(a*\)\@>a + OK 0 - \(a*\)\@>b + OK 1 - \(a*\)\@>b + OK 2 - \(a*\)\@>b + OK 0 - \(a*\)\@>a\|a\+ + OK 2 - \(a*\)\@>a\|a\+ OK 0 - \_[^8-9]\+ OK 1 - \_[^8-9]\+ OK 2 - \_[^8-9]\+ *** ../vim-7.3.1121/src/version.c 2013-06-05 16:51:53.000000000 +0200 --- src/version.c 2013-06-05 18:51:35.000000000 +0200 *************** *** 730,731 **** --- 730,733 ---- { /* Add new patch number below this line */ + /**/ + 1122, /**/ -- From "know your smileys": :q vi user saying, "How do I get out of this damn emacs editor?" /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ \\\ an exciting new programming language -- http://www.Zimbu.org /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///