Revision: 5666 http://geany.svn.sourceforge.net/geany/?rev=5666&view=rev Author: colombanw Date: 2011-03-31 23:27:58 +0000 (Thu, 31 Mar 2011)
Log Message: ----------- Better handle badly-written encoding names
Make encoding name comparison more permissive, finding names that are very likely to refer to the same encoding. For example, "utf8" now matches "UTF-8", and "iso8859_1" matches "ISO-8859-1".
This makes encodings_get_idx_from_charset() and encodings_get_from_charset() more permissive, and allow to normalize an encoding name. It is used to better handle user-provided encodings (e.g. one found by a regex search) by normalizing it to the Geany name.
Modified Paths: -------------- trunk/ChangeLog trunk/src/encodings.c
Modified: trunk/ChangeLog =================================================================== --- trunk/ChangeLog 2011-03-31 23:27:45 UTC (rev 5665) +++ trunk/ChangeLog 2011-03-31 23:27:58 UTC (rev 5666) @@ -3,6 +3,12 @@ * src/encodings.c: Update regex used to find encodings for it to allow the encoding to be quoted, adding support for XML (closes #3183506). + * src/encodings.c: + Implement charset name normalization in order to better deal with + badly-written encoding names (i.e. names found by regex search). + This also makes encodings_get_idx_from_charset() and + encodings_get_from_charset() more permissive regarding the passed-in + encoding name.
2011-03-31 Nick Treleaven <nick(dot)treleaven(at)btinternet(dot)com>
Modified: trunk/src/encodings.c =================================================================== --- trunk/src/encodings.c 2011-03-31 23:27:45 UTC (rev 5665) +++ trunk/src/encodings.c 2011-03-31 23:27:58 UTC (rev 5666) @@ -147,6 +147,56 @@ }
+/* compares two encoding names in a permissive fashion. + * e.g. "utf8" matches "UTF-8", "iso8859_1" matches "ISO-8859-1", etc. */ +static gboolean encodings_charset_equals(const gchar *a, const gchar *b) +{ + gboolean was_alpha = FALSE; /* whether last character of previous word was a letter */ + gboolean need_sep = FALSE; /* whether we're expecting an implicit separator */ + + while (*a && *b) + { + gboolean is_alpha; + + if (g_ascii_toupper(*a) == g_ascii_toupper(*b) && + ((is_alpha = g_ascii_isalpha(*a)) || g_ascii_isdigit(*a))) + { + /* either there was a real separator, or we need a implicit one (a chage from alpha to + * numeric or so) */ + if (! need_sep || (was_alpha != is_alpha)) + { + a++; + b++; + was_alpha = is_alpha; + need_sep = FALSE; + } + else + return FALSE; + } + else + { + guint n_sep = 0; + + if (! g_ascii_isalnum(*a)) + { + a++; + n_sep++; + } + if (! g_ascii_isalnum(*b)) + { + b++; + n_sep++; + } + if (n_sep < 1) + return FALSE; + else if (n_sep < 2) + need_sep = TRUE; + } + } + return *a == *b; +} + + GeanyEncodingIndex encodings_get_idx_from_charset(const gchar *charset) { gint i; @@ -157,7 +207,7 @@ i = 0; while (i < GEANY_ENCODINGS_MAX) { - if (strcmp(charset, encodings[i].charset) == 0) + if (encodings_charset_equals(charset, encodings[i].charset)) return i;
++i; @@ -176,7 +226,7 @@ i = 0; while (i < GEANY_ENCODINGS_MAX) { - if (strcmp(charset, encodings[i].charset) == 0) + if (encodings_charset_equals(charset, encodings[i].charset)) return &encodings[i];
++i; @@ -186,6 +236,18 @@ }
+static const gchar *encodings_normalize_charset(const gchar *charset) +{ + const GeanyEncoding *encoding; + + encoding = encodings_get_from_charset(charset); + if (encoding != NULL) + return encoding->charset; + + return NULL; +} + + const GeanyEncoding *encodings_get_from_index(gint idx) { g_return_val_if_fail(idx >= 0 && idx < GEANY_ENCODINGS_MAX, NULL); @@ -556,7 +618,9 @@ if (check_regex) { check_regex = FALSE; - charset = regex_charset; + charset = encodings_normalize_charset(regex_charset); + if (! charset) /* we found a regex encoding that we can't normalize, try it as is */ + charset = regex_charset; i = -2; /* keep i below the start value to have it again at -1 on the next loop run */ } else if (check_locale)
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.