SF.net SVN: geany:[5666] trunk
colombanw at users.sourceforge.net
colombanw at xxxxx
Thu Mar 31 23:27:58 UTC 2011
Revision: 5666
http://geany.svn.sourceforge.net/geany/?rev=5666&view=rev
Author: colombanw
Date: 2011-03-31 23:27:58 +0000 (Thu, 31 Mar 2011)
Log Message:
-----------
Better handle badly-written encoding names
Make encoding name comparison more permissive, finding names that are
very likely to refer to the same encoding.
For example, "utf8" now matches "UTF-8", and "iso8859_1" matches
"ISO-8859-1".
This makes encodings_get_idx_from_charset() and
encodings_get_from_charset() more permissive, and allow to normalize
an encoding name.
It is used to better handle user-provided encodings (e.g. one found by
a regex search) by normalizing it to the Geany name.
Modified Paths:
--------------
trunk/ChangeLog
trunk/src/encodings.c
Modified: trunk/ChangeLog
===================================================================
--- trunk/ChangeLog 2011-03-31 23:27:45 UTC (rev 5665)
+++ trunk/ChangeLog 2011-03-31 23:27:58 UTC (rev 5666)
@@ -3,6 +3,12 @@
* src/encodings.c:
Update regex used to find encodings for it to allow the encoding to
be quoted, adding support for XML (closes #3183506).
+ * src/encodings.c:
+ Implement charset name normalization in order to better deal with
+ badly-written encoding names (i.e. names found by regex search).
+ This also makes encodings_get_idx_from_charset() and
+ encodings_get_from_charset() more permissive regarding the passed-in
+ encoding name.
2011-03-31 Nick Treleaven <nick(dot)treleaven(at)btinternet(dot)com>
Modified: trunk/src/encodings.c
===================================================================
--- trunk/src/encodings.c 2011-03-31 23:27:45 UTC (rev 5665)
+++ trunk/src/encodings.c 2011-03-31 23:27:58 UTC (rev 5666)
@@ -147,6 +147,56 @@
}
+/* compares two encoding names in a permissive fashion.
+ * e.g. "utf8" matches "UTF-8", "iso8859_1" matches "ISO-8859-1", etc. */
+static gboolean encodings_charset_equals(const gchar *a, const gchar *b)
+{
+ gboolean was_alpha = FALSE; /* whether last character of previous word was a letter */
+ gboolean need_sep = FALSE; /* whether we're expecting an implicit separator */
+
+ while (*a && *b)
+ {
+ gboolean is_alpha;
+
+ if (g_ascii_toupper(*a) == g_ascii_toupper(*b) &&
+ ((is_alpha = g_ascii_isalpha(*a)) || g_ascii_isdigit(*a)))
+ {
+ /* either there was a real separator, or we need a implicit one (a chage from alpha to
+ * numeric or so) */
+ if (! need_sep || (was_alpha != is_alpha))
+ {
+ a++;
+ b++;
+ was_alpha = is_alpha;
+ need_sep = FALSE;
+ }
+ else
+ return FALSE;
+ }
+ else
+ {
+ guint n_sep = 0;
+
+ if (! g_ascii_isalnum(*a))
+ {
+ a++;
+ n_sep++;
+ }
+ if (! g_ascii_isalnum(*b))
+ {
+ b++;
+ n_sep++;
+ }
+ if (n_sep < 1)
+ return FALSE;
+ else if (n_sep < 2)
+ need_sep = TRUE;
+ }
+ }
+ return *a == *b;
+}
+
+
GeanyEncodingIndex encodings_get_idx_from_charset(const gchar *charset)
{
gint i;
@@ -157,7 +207,7 @@
i = 0;
while (i < GEANY_ENCODINGS_MAX)
{
- if (strcmp(charset, encodings[i].charset) == 0)
+ if (encodings_charset_equals(charset, encodings[i].charset))
return i;
++i;
@@ -176,7 +226,7 @@
i = 0;
while (i < GEANY_ENCODINGS_MAX)
{
- if (strcmp(charset, encodings[i].charset) == 0)
+ if (encodings_charset_equals(charset, encodings[i].charset))
return &encodings[i];
++i;
@@ -186,6 +236,18 @@
}
+static const gchar *encodings_normalize_charset(const gchar *charset)
+{
+ const GeanyEncoding *encoding;
+
+ encoding = encodings_get_from_charset(charset);
+ if (encoding != NULL)
+ return encoding->charset;
+
+ return NULL;
+}
+
+
const GeanyEncoding *encodings_get_from_index(gint idx)
{
g_return_val_if_fail(idx >= 0 && idx < GEANY_ENCODINGS_MAX, NULL);
@@ -556,7 +618,9 @@
if (check_regex)
{
check_regex = FALSE;
- charset = regex_charset;
+ charset = encodings_normalize_charset(regex_charset);
+ if (! charset) /* we found a regex encoding that we can't normalize, try it as is */
+ charset = regex_charset;
i = -2; /* keep i below the start value to have it again at -1 on the next loop run */
}
else if (check_locale)
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
More information about the Commits
mailing list