Branch: refs/heads/master Author: Colomban Wendling ban@herbesfolles.org Committer: Colomban Wendling ban@herbesfolles.org Date: Sat, 28 Jul 2012 17:23:37 Commit: 5110fab0e10b2f352ce4dbf66d991820a97079dd https://github.com/geany/geany/commit/5110fab0e10b2f352ce4dbf66d991820a97079...
Log Message: ----------- Fix search and replacement of empty matches (again)
f4eb89cd7d79738a9c6c45e29abdd9d15d22e4fd was partially wrong and removed legitimate re-matches when two different matches ends at the same position. Particularly, the replacement changes are reverted.
Interestingly, Perl and Python does not agree on how to do such replacements. Python does what I did in the above-cited commit, e.g. doesn't replace twice if the match end overlaps, but Perl does. Perl looks more legitimate here since both Python and Perl does find the overlapping matches when performing a search, so Python is the odd guy here doing it differently on replace than it does upon search.
For example, replacing using the pattern "a?(?=b)" and the replacement string "_":
Python: ababcdb -> _b_bcd_b Perl: ababcdb -> __b__bcd_b
But finding using the same pattern on the same input gives the same results on both:
Python: ababcdb -> ['a', '', 'a', '', ''] Perl: ababcdb -> ['a', '', 'a', '', '']
Anyway, GLib and us claim to support "Perl-compatible regular expressions", so we gotta follow Perl, especially in such doubtful situations.
Modified Paths: -------------- src/search.c
Modified: src/search.c 47 files changed, 16 insertions(+), 31 deletions(-) =================================================================== @@ -2028,7 +2028,6 @@ static gint find_document_usage(GeanyDocument *doc, const gchar *search_text, gi struct Sci_TextToFind ttf; gint count = 0; gint prev_line = -1; - gint prev_end = -1;
g_return_val_if_fail(doc != NULL, 0);
@@ -2045,27 +2044,22 @@ static gint find_document_usage(GeanyDocument *doc, const gchar *search_text, gi if (pos == -1) break; /* no more matches */
- /* avoid rematching with empty matches like "(?=[a-z])" or "^$". - * note we cannot assume a match will always be empty or not, like with "a?(?=b)"*/ - if (ttf.chrgText.cpMax != prev_end) + count++; + line = sci_get_line_from_position(doc->editor->sci, pos); + if (line != prev_line) { - count++; - line = sci_get_line_from_position(doc->editor->sci, pos); - if (line != prev_line) - { - buffer = sci_get_line(doc->editor->sci, line); - msgwin_msg_add(COLOR_BLACK, line + 1, doc, - "%s:%d: %s", short_file_name, line + 1, g_strstrip(buffer)); - g_free(buffer); - prev_line = line; - } + buffer = sci_get_line(doc->editor->sci, line); + msgwin_msg_add(COLOR_BLACK, line + 1, doc, + "%s:%d: %s", short_file_name, line + 1, g_strstrip(buffer)); + g_free(buffer); + prev_line = line; }
- prev_end = ttf.chrgText.cpMax; - - if (ttf.chrg.cpMin < ttf.chrgText.cpMax) - ttf.chrg.cpMin = ttf.chrgText.cpMax; - else + ttf.chrg.cpMin = ttf.chrgText.cpMax; + /* avoid rematching with empty matches like "(?=[a-z])" or "^$". + * note we cannot assume a match will always be empty or not and then break out, since + * matches like "a?(?=b)" will me sometimes empty and sometimes not */ + if ((ttf.chrgText.cpMax - ttf.chrgText.cpMin) == 0) ttf.chrg.cpMin ++; } g_free(short_file_name); @@ -2136,7 +2130,6 @@ guint search_replace_range(ScintillaObject *sci, struct Sci_TextToFind *ttf, const gchar *find_text = ttf->lpstrText; gint start = ttf->chrg.cpMin; gint end = ttf->chrg.cpMax; - gint prev_find_end = -1;
g_return_val_if_fail(sci != NULL && find_text != NULL && replace_text != NULL, 0); if (! *find_text) @@ -2168,14 +2161,9 @@ guint search_replace_range(ScintillaObject *sci, struct Sci_TextToFind *ttf, if (chNext == '\r' || chNext == '\n') movepastEOL = 1; } - /* make sure we don't replace the same position twice, in case of pattern - * like "a?(?=b)" (would match "a"b and then ""b -- empty match the 2nd time) */ - if (prev_find_end != ttf->chrgText.cpMax) - { - replace_len = search_replace_target(sci, replace_text, - flags & SCFIND_REGEXP); - count++; - } + replace_len = search_replace_target(sci, replace_text, + flags & SCFIND_REGEXP); + count++; if (search_pos == end) break; /* Prevent hang when replacing regex $ */
@@ -2186,9 +2174,6 @@ guint search_replace_range(ScintillaObject *sci, struct Sci_TextToFind *ttf, ttf->chrg.cpMin = start; end += replace_len - find_len; /* update end of range now text has changed */ ttf->chrg.cpMax = end; - - /* match end + replacement offset */ - prev_find_end = ttf->chrgText.cpMax + replace_len - find_len; }
}
@@ Diff output truncated at 100000 characters. @@
-------------- This E-Mail was brought to you by github_commit_mail.py (Source: TBD).