SF.net SVN: geany:[5666] trunk

1 Apr 2011

Revision: 5666
          http://geany.svn.sourceforge.net/geany/?rev=5666&view=rev
Author:   colombanw
Date:     2011-03-31 23:27:58 +0000 (Thu, 31 Mar 2011)
Log Message:
-----------
Better handle badly-written encoding names
Make encoding name comparison more permissive, finding names that are
very likely to refer to the same encoding.
For example, "utf8" now matches "UTF-8", and "iso8859_1" matches
"ISO-8859-1".
This makes encodings_get_idx_from_charset() and
encodings_get_from_charset() more permissive, and allow to normalize
an encoding name.
It is used to better handle user-provided encodings (e.g. one found by
a regex search) by normalizing it to the Geany name.
Modified Paths:
--------------
    trunk/ChangeLog
    trunk/src/encodings.c
Modified: trunk/ChangeLog
===================================================================

--- trunk/ChangeLog	2011-03-31 23:27:45 UTC (rev 5665)
+++ trunk/ChangeLog	2011-03-31 23:27:58 UTC (rev 5666)
@@ -3,6 +3,12 @@
  * src/encodings.c:
    Update regex used to find encodings for it to allow the encoding to
    be quoted, adding support for XML (closes #3183506).
+ * src/encodings.c:
+   Implement charset name normalization in order to better deal with
+   badly-written encoding names (i.e. names found by regex search).
+   This also makes encodings_get_idx_from_charset() and
+   encodings_get_from_charset() more permissive regarding the passed-in
+   encoding name.
2011-03-31  Nick Treleaven  <nick(dot)treleaven(at)btinternet(dot)com>
Modified: trunk/src/encodings.c
===================================================================
--- trunk/src/encodings.c	2011-03-31 23:27:45 UTC (rev 5665)
+++ trunk/src/encodings.c	2011-03-31 23:27:58 UTC (rev 5666)
@@ -147,6 +147,56 @@
 }
+/* compares two encoding names in a permissive fashion.
+ * e.g. "utf8" matches "UTF-8", "iso8859_1" matches "ISO-8859-1", etc. */
+static gboolean encodings_charset_equals(const gchar *a, const gchar *b)
+{
+	gboolean was_alpha = FALSE; /* whether last character of previous word was a letter */
+	gboolean need_sep = FALSE; /* whether we're expecting an implicit separator */
+
+	while (*a && *b)
+	{
+		gboolean is_alpha;
+
+		if (g_ascii_toupper(*a) == g_ascii_toupper(*b) &&
+			((is_alpha = g_ascii_isalpha(*a)) || g_ascii_isdigit(*a)))
+		{
+			/* either there was a real separator, or we need a implicit one (a chage from alpha to
+			 * numeric or so) */
+			if (! need_sep || (was_alpha != is_alpha))
+			{
+				a++;
+				b++;
+				was_alpha = is_alpha;
+				need_sep = FALSE;
+			}
+			else
+				return FALSE;
+		}
+		else
+		{
+			guint n_sep = 0;
+
+			if (! g_ascii_isalnum(*a))
+			{
+				a++;
+				n_sep++;
+			}
+			if (! g_ascii_isalnum(*b))
+			{
+				b++;
+				n_sep++;
+			}
+			if (n_sep < 1)
+				return FALSE;
+			else if (n_sep < 2)
+				need_sep = TRUE;
+		}
+	}
+	return *a == *b;
+}
+
+
 GeanyEncodingIndex encodings_get_idx_from_charset(const gchar *charset)
 {
    gint i;
@@ -157,7 +207,7 @@
    i = 0;
    while (i < GEANY_ENCODINGS_MAX)
    {
-		if (strcmp(charset, encodings[i].charset) == 0)
+		if (encodings_charset_equals(charset, encodings[i].charset))
    		return i;
++i;
@@ -176,7 +226,7 @@
    i = 0;
    while (i < GEANY_ENCODINGS_MAX)
    {
-		if (strcmp(charset, encodings[i].charset) == 0)
+		if (encodings_charset_equals(charset, encodings[i].charset))
    		return &encodings[i];
++i;
@@ -186,6 +236,18 @@
 }
+static const gchar *encodings_normalize_charset(const gchar *charset)
+{
+	const GeanyEncoding *encoding;
+
+	encoding = encodings_get_from_charset(charset);
+	if (encoding != NULL)
+		return encoding->charset;
+
+	return NULL;
+}
+
+
 const GeanyEncoding *encodings_get_from_index(gint idx)
 {
    g_return_val_if_fail(idx >= 0 && idx < GEANY_ENCODINGS_MAX, NULL);
@@ -556,7 +618,9 @@
    	if (check_regex)
    	{
    		check_regex = FALSE;
-			charset = regex_charset;
+			charset = encodings_normalize_charset(regex_charset);
+			if (! charset) /* we found a regex encoding that we can't normalize, try it as is */
+				charset = regex_charset;
    		i = -2; /* keep i below the start value to have it again at -1 on the next loop run */
    	}
    	else if (check_locale)
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.

    

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

2006

SF.net SVN: geany:[5666] trunk