[geany/geany] 5110fa: Fix search and replacement of empty matches (again)

Colomban Wendling git-noreply at xxxxx
Sat Jul 28 17:23:37 UTC 2012


Branch:      refs/heads/master
Author:      Colomban Wendling <ban at herbesfolles.org>
Committer:   Colomban Wendling <ban at herbesfolles.org>
Date:        Sat, 28 Jul 2012 17:23:37
Commit:      5110fab0e10b2f352ce4dbf66d991820a97079dd
             https://github.com/geany/geany/commit/5110fab0e10b2f352ce4dbf66d991820a97079dd

Log Message:
-----------
Fix search and replacement of empty matches (again)

f4eb89cd7d79738a9c6c45e29abdd9d15d22e4fd was partially wrong and
removed legitimate re-matches when two different matches ends at the
same position.  Particularly, the replacement changes are reverted.

Interestingly, Perl and Python does not agree on how to do such
replacements.  Python does what I did in the above-cited commit, e.g.
doesn't replace twice if the match end overlaps, but Perl does.
Perl looks more legitimate here since both Python and Perl does find
the overlapping matches when performing a search, so Python is the odd
guy here doing it differently on replace than it does upon search.

For example, replacing using the pattern "a?(?=b)" and the replacement
string "_":

	Python: ababcdb -> _b_bcd_b
	Perl:   ababcdb -> __b__bcd_b

But finding using the same pattern on the same input gives the same
results on both:

	Python: ababcdb -> ['a', '', 'a', '', '']
	Perl:   ababcdb -> ['a', '', 'a', '', '']

Anyway, GLib and us claim to support "Perl-compatible regular
expressions", so we gotta follow Perl, especially in such doubtful
situations.


Modified Paths:
--------------
    src/search.c

Modified: src/search.c
47 files changed, 16 insertions(+), 31 deletions(-)
===================================================================
@@ -2028,7 +2028,6 @@ static gint find_document_usage(GeanyDocument *doc, const gchar *search_text, gi
 	struct Sci_TextToFind ttf;
 	gint count = 0;
 	gint prev_line = -1;
-	gint prev_end = -1;
 
 	g_return_val_if_fail(doc != NULL, 0);
 
@@ -2045,27 +2044,22 @@ static gint find_document_usage(GeanyDocument *doc, const gchar *search_text, gi
 		if (pos == -1)
 			break;	/* no more matches */
 
-		/* avoid rematching with empty matches like "(?=[a-z])" or "^$".
-		 * note we cannot assume a match will always be empty or not, like with "a?(?=b)"*/
-		if (ttf.chrgText.cpMax != prev_end)
+		count++;
+		line = sci_get_line_from_position(doc->editor->sci, pos);
+		if (line != prev_line)
 		{
-			count++;
-			line = sci_get_line_from_position(doc->editor->sci, pos);
-			if (line != prev_line)
-			{
-				buffer = sci_get_line(doc->editor->sci, line);
-				msgwin_msg_add(COLOR_BLACK, line + 1, doc,
-					"%s:%d: %s", short_file_name, line + 1, g_strstrip(buffer));
-				g_free(buffer);
-				prev_line = line;
-			}
+			buffer = sci_get_line(doc->editor->sci, line);
+			msgwin_msg_add(COLOR_BLACK, line + 1, doc,
+				"%s:%d: %s", short_file_name, line + 1, g_strstrip(buffer));
+			g_free(buffer);
+			prev_line = line;
 		}
 
-		prev_end = ttf.chrgText.cpMax;
-
-		if (ttf.chrg.cpMin < ttf.chrgText.cpMax)
-			ttf.chrg.cpMin = ttf.chrgText.cpMax;
-		else
+		ttf.chrg.cpMin = ttf.chrgText.cpMax;
+		/* avoid rematching with empty matches like "(?=[a-z])" or "^$".
+		 * note we cannot assume a match will always be empty or not and then break out, since
+		 * matches like "a?(?=b)" will me sometimes empty and sometimes not */
+		if ((ttf.chrgText.cpMax - ttf.chrgText.cpMin) == 0)
 			ttf.chrg.cpMin ++;
 	}
 	g_free(short_file_name);
@@ -2136,7 +2130,6 @@ guint search_replace_range(ScintillaObject *sci, struct Sci_TextToFind *ttf,
 	const gchar *find_text = ttf->lpstrText;
 	gint start = ttf->chrg.cpMin;
 	gint end = ttf->chrg.cpMax;
-	gint prev_find_end = -1;
 
 	g_return_val_if_fail(sci != NULL && find_text != NULL && replace_text != NULL, 0);
 	if (! *find_text)
@@ -2168,14 +2161,9 @@ guint search_replace_range(ScintillaObject *sci, struct Sci_TextToFind *ttf,
 				if (chNext == '\r' || chNext == '\n')
 					movepastEOL = 1;
 			}
-			/* make sure we don't replace the same position twice, in case of pattern
-			 * like "a?(?=b)" (would match "a"b and then ""b -- empty match the 2nd time) */
-			if (prev_find_end != ttf->chrgText.cpMax)
-			{
-				replace_len = search_replace_target(sci, replace_text,
-					flags & SCFIND_REGEXP);
-				count++;
-			}
+			replace_len = search_replace_target(sci, replace_text,
+				flags & SCFIND_REGEXP);
+			count++;
 			if (search_pos == end)
 				break;	/* Prevent hang when replacing regex $ */
 
@@ -2186,9 +2174,6 @@ guint search_replace_range(ScintillaObject *sci, struct Sci_TextToFind *ttf,
 			ttf->chrg.cpMin = start;
 			end += replace_len - find_len;	/* update end of range now text has changed */
 			ttf->chrg.cpMax = end;
-
-			/* match end + replacement offset */
-			prev_find_end = ttf->chrgText.cpMax + replace_len - find_len;
 		}
 
 	}


@@ Diff output truncated at 100000 characters. @@


--------------
This E-Mail was brought to you by github_commit_mail.py (Source: TBD).



More information about the Commits mailing list