Branch: refs/heads/master Author: Colomban Wendling ban@herbesfolles.org Committer: Colomban Wendling ban@herbesfolles.org Date: Fri, 27 Jul 2012 22:32:26 Commit: f4eb89cd7d79738a9c6c45e29abdd9d15d22e4fd https://github.com/geany/geany/commit/f4eb89cd7d79738a9c6c45e29abdd9d15d22e4...
Log Message: ----------- Fix search and replacement of empty matches
Regular expressions might match empty ranges, which weren't handled properly, but generally simply omitted.
For example, the regular expression "^$" (empty line) never matched because we assumed empty results were not interesting, and actually needed more care in the code. Alike, an expression matching only on its lookahead part, like "a?(?=b)" against "b", would stop further search, although it could even match a non-empty range if "ab" is present later in the input. This last kind of expressions also lead to double-replacement because they first matched "ab" and then "b" alone when re-matching after the "a" replacement.
This commit fixes: * searching when the matched range is empty; * step-by-step search when matching an empty range; * double replacement when the pattern re-matches an empty range straight after a replacement; * replacement to and empty string when the matching an empty range.
A know issue though is the step-by-step replacement that won't replace an empty match but rather find the next match.
Modified Paths: -------------- src/search.c
Modified: src/search.c 55 files changed, 37 insertions(+), 18 deletions(-) =================================================================== @@ -1940,6 +1940,9 @@ gint search_find_next(ScintillaObject *sci, const gchar *str, gint flags)
pos = sci_get_current_position(sci); ret = find_regex(sci, pos, regex); + /* avoid re-matching the same position in case of empty matches */ + if (ret == pos && regex_matches[0].start == regex_matches[0].end) + ret = find_regex(sci, pos + 1, regex); if (ret >= 0) sci_set_selection(sci, ret, regex_matches[0].end);
@@ -2025,6 +2028,7 @@ static gint find_document_usage(GeanyDocument *doc, const gchar *search_text, gi struct Sci_TextToFind ttf; gint count = 0; gint prev_line = -1; + gint prev_end = -1;
g_return_val_if_fail(doc != NULL, 0);
@@ -2035,27 +2039,34 @@ static gint find_document_usage(GeanyDocument *doc, const gchar *search_text, gi ttf.lpstrText = (gchar *)search_text; while (1) { - gint pos, line, find_len; + gint pos, line;
pos = search_find_text(doc->editor->sci, flags, &ttf); if (pos == -1) break; /* no more matches */ - find_len = ttf.chrgText.cpMax - ttf.chrgText.cpMin; - if (find_len == 0) - break; /* Ignore regex ^ or $ */
- count++; - line = sci_get_line_from_position(doc->editor->sci, pos); - if (line != prev_line) + /* avoid rematching with empty matches like "(?=[a-z])" or "^$". + * note we cannot assume a match will always be empty or not, like with "a?(?=b)"*/ + if (ttf.chrgText.cpMax != prev_end) { - buffer = sci_get_line(doc->editor->sci, line); - msgwin_msg_add(COLOR_BLACK, line + 1, doc, - "%s:%d: %s", short_file_name, line + 1, g_strstrip(buffer)); - g_free(buffer); - prev_line = line; + count++; + line = sci_get_line_from_position(doc->editor->sci, pos); + if (line != prev_line) + { + buffer = sci_get_line(doc->editor->sci, line); + msgwin_msg_add(COLOR_BLACK, line + 1, doc, + "%s:%d: %s", short_file_name, line + 1, g_strstrip(buffer)); + g_free(buffer); + prev_line = line; + } }
- ttf.chrg.cpMin = ttf.chrgText.cpMax; + prev_end = ttf.chrgText.cpMax; + + if (ttf.chrg.cpMin < ttf.chrgText.cpMax) + ttf.chrg.cpMin = ttf.chrgText.cpMax; + else + ttf.chrg.cpMin ++; } g_free(short_file_name); return count; @@ -2125,6 +2136,7 @@ guint search_replace_range(ScintillaObject *sci, struct Sci_TextToFind *ttf, const gchar *find_text = ttf->lpstrText; gint start = ttf->chrg.cpMin; gint end = ttf->chrg.cpMax; + gint prev_find_end = -1;
g_return_val_if_fail(sci != NULL && find_text != NULL && replace_text != NULL, 0); if (! *find_text) @@ -2139,8 +2151,6 @@ guint search_replace_range(ScintillaObject *sci, struct Sci_TextToFind *ttf, find_len = ttf->chrgText.cpMax - ttf->chrgText.cpMin; if (search_pos == -1) break; /* no more matches */ - if (find_len == 0 && ! NZV(replace_text)) - break; /* nothing to do */
if (search_pos + find_len > end) break; /* found text is partly out of range */ @@ -2158,9 +2168,14 @@ guint search_replace_range(ScintillaObject *sci, struct Sci_TextToFind *ttf, if (chNext == '\r' || chNext == '\n') movepastEOL = 1; } - replace_len = search_replace_target(sci, replace_text, - flags & SCFIND_REGEXP); - count++; + /* make sure we don't replace the same position twice, in case of pattern + * like "a?(?=b)" (would match "a"b and then ""b -- empty match the 2nd time) */ + if (prev_find_end != ttf->chrgText.cpMax) + { + replace_len = search_replace_target(sci, replace_text, + flags & SCFIND_REGEXP); + count++; + } if (search_pos == end) break; /* Prevent hang when replacing regex $ */
@@ -2171,7 +2186,11 @@ guint search_replace_range(ScintillaObject *sci, struct Sci_TextToFind *ttf, ttf->chrg.cpMin = start; end += replace_len - find_len; /* update end of range now text has changed */ ttf->chrg.cpMax = end; + + /* match end + replacement offset */ + prev_find_end = ttf->chrgText.cpMax + replace_len - find_len; } + } return count; }
@@ Diff output truncated at 100000 characters. @@
-------------- This E-Mail was brought to you by github_commit_mail.py (Source: TBD).