SF.net SVN: geany: [1167] trunk
ntrel at users.sourceforge.net
ntrel at xxxxx
Sun Jan 7 16:22:41 UTC 2007
Revision: 1167
http://svn.sourceforge.net/geany/?rev=1167&view=rev
Author: ntrel
Date: 2007-01-07 08:22:41 -0800 (Sun, 07 Jan 2007)
Log Message:
-----------
Fix memory leak when using utils_scan_unicode_bom().
Prevent invalid memory read in utils_scan_unicode_bom() when text
length is < 4.
Move utils_scan_unicode_bom(), utils_is_unicode_charset() to
encodings.c.
Read the BOM length in handle_bom().
Modified Paths:
--------------
trunk/ChangeLog
trunk/src/dialogs.c
trunk/src/document.c
trunk/src/encodings.c
trunk/src/encodings.h
trunk/src/ui_utils.c
trunk/src/utils.c
trunk/src/utils.h
Modified: trunk/ChangeLog
===================================================================
--- trunk/ChangeLog 2007-01-07 14:04:13 UTC (rev 1166)
+++ trunk/ChangeLog 2007-01-07 16:22:41 UTC (rev 1167)
@@ -1,3 +1,15 @@
+2007-01-07 Nick Treleaven <nick.treleaven at btinternet.com>
+
+ * src/utils.c, src/utils.h, src/encodings.c, src/document.c,
+ src/encodings.h, src/dialogs.c, src/ui_utils.c:
+ Fix memory leak when using utils_scan_unicode_bom().
+ Prevent invalid memory read in utils_scan_unicode_bom() when text
+ length is < 4.
+ Move utils_scan_unicode_bom(), utils_is_unicode_charset() to
+ encodings.c.
+ Read the BOM length in handle_bom().
+
+
2007-01-07 Enrico Tröger <enrico.troeger at uvena.de>
* geany.glade, src/interface.c: Fixed typo.
Modified: trunk/src/dialogs.c
===================================================================
--- trunk/src/dialogs.c 2007-01-07 14:04:13 UTC (rev 1166)
+++ trunk/src/dialogs.c 2007-01-07 16:22:41 UTC (rev 1167)
@@ -1013,8 +1013,9 @@
gtk_misc_set_alignment(GTK_MISC(label), 1, 0);
enctext = g_strdup_printf("%s %s",
- doc_list[idx].encoding,
- (utils_is_unicode_charset(doc_list[idx].encoding)) ? ((doc_list[idx].has_bom) ? _("(with BOM)") : _("(without BOM)")) : "");
+ doc_list[idx].encoding,
+ (encodings_is_unicode_charset(doc_list[idx].encoding)) ?
+ ((doc_list[idx].has_bom) ? _("(with BOM)") : _("(without BOM)")) : "");
label = gtk_label_new(enctext);
g_free(enctext);
Modified: trunk/src/document.c
===================================================================
--- trunk/src/document.c 2007-01-07 14:04:13 UTC (rev 1166)
+++ trunk/src/document.c 2007-01-07 16:22:41 UTC (rev 1167)
@@ -436,17 +436,14 @@
static gboolean
handle_forced_encoding(FileData *filedata, const gchar *forced_enc)
{
+ GeanyEncodingIndex enc_idx;
+
if (utils_str_equal(forced_enc, "UTF-8"))
{
if (! g_utf8_validate(filedata->data, filedata->len, NULL))
{
return FALSE;
}
- else
- {
- filedata->bom = utils_str_equal(utils_scan_unicode_bom(filedata->data), "UTF-8");
- filedata->enc = g_strdup(forced_enc);
- }
}
else
{
@@ -461,53 +458,68 @@
g_free(filedata->data);
filedata->data = converted_text;
filedata->len = strlen(converted_text);
- filedata->bom = utils_str_equal(utils_scan_unicode_bom(filedata->data), "UTF-8");
- filedata->enc = g_strdup(forced_enc);
}
}
+ enc_idx = encodings_scan_unicode_bom(filedata->data, filedata->len, NULL);
+ filedata->bom = (enc_idx == GEANY_ENCODING_UTF_8);
+ filedata->enc = g_strdup(forced_enc);
return TRUE;
}
+// detect encoding and convert to UTF-8 if necessary
static gboolean
handle_encoding(FileData *filedata)
{
- if (filedata->len > 0)
- { // the usual way to detect encoding and convert to UTF-8
- if (filedata->len >= 4)
+ g_return_val_if_fail(filedata->enc == NULL, FALSE);
+ g_return_val_if_fail(filedata->bom == FALSE, FALSE);
+
+ if (filedata->len == 0)
+ {
+ // we have no data so assume UTF-8
+ filedata->enc = g_strdup("UTF-8");
+ }
+ else
+ {
+ // first check for a BOM
+ GeanyEncodingIndex enc_idx =
+ encodings_scan_unicode_bom(filedata->data, filedata->len, NULL);
+
+ if (enc_idx != GEANY_ENCODING_NONE)
{
- filedata->enc = utils_scan_unicode_bom(filedata->data);
- }
- if (filedata->enc != NULL)
- {
+ filedata->enc = g_strdup(encodings[enc_idx].charset);
filedata->bom = TRUE;
- if ((filedata->enc)[4] != '8') // the BOM indicated something else than UTF-8
+
+ if (enc_idx != GEANY_ENCODING_UTF_8) // the BOM indicated something else than UTF-8
{
gchar *converted_text = encodings_convert_to_utf8_from_charset(
- filedata->data, filedata->len, filedata->enc, FALSE);
- if (converted_text == NULL)
+ filedata->data, filedata->len, filedata->enc, FALSE);
+ if (converted_text != NULL)
{
+ g_free(filedata->data);
+ filedata->data = converted_text;
+ filedata->len = strlen(converted_text);
+ }
+ else
+ {
+ // there was a problem converting data from BOM encoding type
g_free(filedata->enc);
filedata->enc = NULL;
filedata->bom = FALSE;
}
- else
- {
- g_free(filedata->data);
- filedata->data = converted_text;
- filedata->len = strlen(converted_text);
- }
}
}
- // this if is important, else doesn't work because enc can be altered in the above block
- if (filedata->enc == NULL)
+
+ if (filedata->enc == NULL) // either there was no BOM or the BOM encoding failed
{
+ // try UTF-8 first
if (g_utf8_validate(filedata->data, filedata->len, NULL))
{
filedata->enc = g_strdup("UTF-8");
}
else
{
+ // detect the encoding
gchar *converted_text = encodings_convert_to_utf8(filedata->data,
filedata->len, &filedata->enc);
@@ -515,19 +527,12 @@
{
return FALSE;
}
- else
- {
- g_free(filedata->data);
- filedata->data = converted_text;
- filedata->len = strlen(converted_text);
- }
+ g_free(filedata->data);
+ filedata->data = converted_text;
+ filedata->len = strlen(converted_text);
}
}
}
- else
- {
- filedata->enc = g_strdup("UTF-8");
- }
return TRUE;
}
@@ -535,14 +540,15 @@
static void
handle_bom(FileData *filedata)
{
- gchar *data_without_bom;
+ guint bom_len;
- g_return_if_fail(filedata->len >= 3);
+ encodings_scan_unicode_bom(filedata->data, filedata->len, &bom_len);
+ g_return_if_fail(bom_len != 0);
- data_without_bom = g_strdup(filedata->data + 3);
- g_free(filedata->data);
- filedata->data = data_without_bom;
- filedata->len -= 3;
+ filedata->len -= bom_len;
+ // overwrite the BOM with the remainder of the file contents, plus the NULL terminator.
+ g_memmove(filedata->data, filedata->data + bom_len, filedata->len + 1);
+ g_realloc(filedata->data, filedata->len + 1);
}
@@ -871,7 +877,7 @@
sci_convert_eols(doc_list[idx].sci, sci_get_eol_mode(doc_list[idx].sci));
len = sci_get_length(doc_list[idx].sci) + 1;
- if (doc_list[idx].has_bom && utils_is_unicode_charset(doc_list[idx].encoding))
+ if (doc_list[idx].has_bom && encodings_is_unicode_charset(doc_list[idx].encoding))
{
data = (gchar*) g_malloc(len + 3); // 3 chars for BOM
data[0] = 0xef;
@@ -1642,7 +1648,7 @@
ui_update_statusbar(idx, -1);
gtk_widget_set_sensitive(lookup_widget(app->window, "menu_write_unicode_bom1"),
- utils_is_unicode_charset(doc_list[idx].encoding));
+ encodings_is_unicode_charset(doc_list[idx].encoding));
}
Modified: trunk/src/encodings.c
===================================================================
--- trunk/src/encodings.c 2007-01-07 14:04:13 UTC (rev 1166)
+++ trunk/src/encodings.c 2007-01-07 16:22:41 UTC (rev 1167)
@@ -39,12 +39,12 @@
-#define fill(v, w, x, y, z) \
- encodings[x].idx = x; \
- encodings[x].order = v; \
- encodings[x].group = w; \
- encodings[x].charset = y; \
- encodings[x].name = z;
+#define fill(Order, Group, Idx, Charset, Name) \
+ encodings[Idx].idx = Idx; \
+ encodings[Idx].order = Order; \
+ encodings[Idx].group = Group; \
+ encodings[Idx].charset = Charset; \
+ encodings[Idx].name = Name;
static void init_encodings(void)
{
@@ -377,3 +377,72 @@
return NULL;
}
+
+
+/* If there's a BOM, return a corresponding GEANY_ENCODING_UTF_* index,
+ * otherwise GEANY_ENCODING_NONE.
+ * */
+GeanyEncodingIndex encodings_scan_unicode_bom(const gchar *string, gsize len, guint *bom_len)
+{
+ if (len >= 3)
+ {
+ if (bom_len)
+ *bom_len = 3;
+
+ if ((guchar)string[0] == 0xef && (guchar)string[1] == 0xbb &&
+ (guchar)string[2] == 0xbf)
+ {
+ return GEANY_ENCODING_UTF_8;
+ }
+ }
+ if (len >= 4)
+ {
+ if (bom_len)
+ *bom_len = 4;
+
+ if ((guchar)string[0] == 0x00 && (guchar)string[1] == 0x00 &&
+ (guchar)string[2] == 0xfe && (guchar)string[3] == 0xff)
+ {
+ return GEANY_ENCODING_UTF_32BE; // Big endian
+ }
+ if ((guchar)string[0] == 0xff && (guchar)string[1] == 0xfe &&
+ (guchar)string[2] == 0x00 && (guchar)string[3] == 0x00)
+ {
+ return GEANY_ENCODING_UTF_32LE; // Little endian
+ }
+ if ((string[0] == 0x2b && string[1] == 0x2f && string[2] == 0x76) &&
+ (string[3] == 0x38 || string[3] == 0x39 || string[3] == 0x2b || string[3] == 0x2f))
+ {
+ return GEANY_ENCODING_UTF_7;
+ }
+ }
+ if (len >= 2)
+ {
+ if (bom_len)
+ *bom_len = 2;
+
+ if ((guchar)string[0]==0xfe && (guchar)string[1] == 0xff)
+ {
+ return GEANY_ENCODING_UTF_16BE; // Big endian
+ }
+ if ((guchar)string[0] == 0xff && (guchar)string[1] == 0xfe)
+ {
+ return GEANY_ENCODING_UTF_16LE; // Little endian
+ }
+ }
+ if (bom_len)
+ *bom_len = 0;
+ return GEANY_ENCODING_NONE;
+}
+
+
+gboolean encodings_is_unicode_charset(const gchar *string)
+{
+ if (string != NULL && (strncmp(string, "UTF", 3) == 0 || strncmp(string, "UCS", 3) == 0))
+ {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+
Modified: trunk/src/encodings.h
===================================================================
--- trunk/src/encodings.h 2007-01-07 14:04:13 UTC (rev 1166)
+++ trunk/src/encodings.h 2007-01-07 16:22:41 UTC (rev 1167)
@@ -74,7 +74,9 @@
gchar *encodings_convert_to_utf8_from_charset(const gchar *buffer, gsize size,
const gchar *charset, gboolean fast);
+gboolean encodings_is_unicode_charset(const gchar *string);
+
/*
* The original versions of the following tables are taken from profterm
*
@@ -152,7 +154,7 @@
GEANY_ENCODING_WINDOWS_1256,
GEANY_ENCODING_WINDOWS_1257,
GEANY_ENCODING_WINDOWS_1258,
-
+
GEANY_ENCODING_NONE,
GEANY_ENCODINGS_MAX
@@ -161,4 +163,7 @@
GeanyEncoding encodings[GEANY_ENCODINGS_MAX];
+
+GeanyEncodingIndex encodings_scan_unicode_bom(const gchar *string, gsize len, guint *bom_len);
+
#endif
Modified: trunk/src/ui_utils.c
===================================================================
--- trunk/src/ui_utils.c 2007-01-07 14:04:13 UTC (rev 1166)
+++ trunk/src/ui_utils.c 2007-01-07 16:22:41 UTC (rev 1167)
@@ -120,7 +120,8 @@
(doc_list[idx].readonly) ? ", read only" : "",
cur_tag,
(doc_list[idx].encoding) ? doc_list[idx].encoding : _("unknown"),
- (utils_is_unicode_charset(doc_list[idx].encoding)) ? ((doc_list[idx].has_bom) ? _("(with BOM)") : _("(without BOM)")) : "",
+ (encodings_is_unicode_charset(doc_list[idx].encoding)) ?
+ ((doc_list[idx].has_bom) ? _("(with BOM)") : _("(without BOM)")) : "",
(doc_list[idx].file_type) ? doc_list[idx].file_type->title : _("unknown"));
set_statusbar(text, TRUE); // can be overridden by status messages
g_free(text);
@@ -706,7 +707,7 @@
TRUE);
gtk_widget_set_sensitive(lookup_widget(app->window, "menu_write_unicode_bom1"),
- utils_is_unicode_charset(doc_list[idx].encoding));
+ encodings_is_unicode_charset(doc_list[idx].encoding));
encodings_select_radio_item(doc_list[idx].encoding);
filetypes_select_radio_item(doc_list[idx].file_type);
Modified: trunk/src/utils.c
===================================================================
--- trunk/src/utils.c 2007-01-07 14:04:13 UTC (rev 1166)
+++ trunk/src/utils.c 2007-01-07 16:22:41 UTC (rev 1167)
@@ -1403,50 +1403,6 @@
}
-gchar *utils_scan_unicode_bom(const gchar *string)
-{
- if ((unsigned char)string[0] == 0xef && (unsigned char)string[1] == 0xbb &&
- (unsigned char)string[2] == 0xbf)
- {
- return g_strdup("UTF-8");
- }
- else if ((unsigned char)string[0] == 0x00 && (unsigned char)string[1] == 0x00 &&
- (unsigned char)string[2] == 0xfe && (unsigned char)string[3] == 0xff)
- {
- return g_strdup("UTF-32BE"); // Big endian
- }
- else if ((unsigned char)string[0] == 0xff && (unsigned char)string[1] == 0xfe &&
- (unsigned char)string[2] == 0x00 && (unsigned char)string[3] == 0x00)
- {
- return g_strdup("UTF-32LE"); // Little endian
- }
- else if ((unsigned char)string[0]==0xfe && (unsigned char)string[1] == 0xff)
- {
- return g_strdup("UTF-16BE"); // Big endian
- }
- else if ((unsigned char)string[0] == 0xff && (unsigned char)string[1] == 0xfe)
- {
- return g_strdup("UTF-16LE"); // Little endian
- }
- else if ((string[0] == 0x2b && string[1] == 0x2f && string[2] == 0x76) &&
- (string[3] == 0x38 || string[3] == 0x39 || string[3] == 0x2b || string[3] == 0x2f))
- {
- return g_strdup("UTF-7");
- }
- return NULL;
-}
-
-
-gboolean utils_is_unicode_charset(const gchar *string)
-{
- if (string != NULL && (strncmp(string, "UTF", 3) == 0 || strncmp(string, "UCS", 3) == 0))
- {
- return TRUE;
- }
- return FALSE;
-}
-
-
/* Wraps a string in place, replacing a space with a newline character.
* wrapstart is the minimum position to start wrapping or -1 for default */
gboolean utils_wrap_string(gchar *string, gint wrapstart)
Modified: trunk/src/utils.h
===================================================================
--- trunk/src/utils.h 2007-01-07 14:04:13 UTC (rev 1166)
+++ trunk/src/utils.h 2007-01-07 16:22:41 UTC (rev 1167)
@@ -135,10 +135,6 @@
* Replaces \\, \r, \n, \t and \uXXX by their real counterparts */
gboolean utils_str_replace_escape(gchar *string);
-gchar *utils_scan_unicode_bom(const gchar *string);
-
-gboolean utils_is_unicode_charset(const gchar *string);
-
/* Wraps a string in place, replacing a space with a newline character.
* wrapstart is the minimum position to start wrapping or -1 for default */
gboolean utils_wrap_string(gchar *string, gint wrapstart);
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
More information about the Commits
mailing list