SF.net SVN: geany: [1566] trunk

eht16 at users.sourceforge.net eht16 at xxxxx
Thu May 24 10:24:26 UTC 2007


Revision: 1566
          http://svn.sourceforge.net/geany/?rev=1566&view=rev
Author:   eht16
Date:     2007-05-24 03:24:26 -0700 (Thu, 24 May 2007)

Log Message:
-----------
Fix loading of UTF-16/32 encoded files with a BOM.

Modified Paths:
--------------
    trunk/ChangeLog
    trunk/src/document.c

Modified: trunk/ChangeLog
===================================================================
--- trunk/ChangeLog	2007-05-23 17:20:47 UTC (rev 1565)
+++ trunk/ChangeLog	2007-05-24 10:24:26 UTC (rev 1566)
@@ -1,3 +1,8 @@
+2007-05-24  Enrico Tröger  <enrico.troeger at uvena.de>
+
+ * src/document.c: Fix loading of UTF-16/32 encoded files with a BOM.
+
+
 2007-05-23  Enrico Tröger  <enrico.troeger at uvena.de>
 
  * src/vte.c: Add popup menu item: Change current working directory.

Modified: trunk/src/document.c
===================================================================
--- trunk/src/document.c	2007-05-23 17:20:47 UTC (rev 1565)
+++ trunk/src/document.c	2007-05-24 10:24:26 UTC (rev 1566)
@@ -473,6 +473,7 @@
 typedef struct
 {
 	gchar		*data;	// null-terminated file data
+	gsize		 size;	// actual file size on disk
 	gsize		 len;	// string length of data
 	gchar		*enc;
 	gboolean	 bom;
@@ -509,7 +510,7 @@
 			filedata->len = strlen(converted_text);
 		}
 	}
-	enc_idx = encodings_scan_unicode_bom(filedata->data, filedata->len, NULL);
+	enc_idx = encodings_scan_unicode_bom(filedata->data, filedata->size, NULL);
 	filedata->bom = (enc_idx == GEANY_ENCODING_UTF_8);
 	filedata->enc = g_strdup(forced_enc);
 	return TRUE;
@@ -523,16 +524,17 @@
 	g_return_val_if_fail(filedata->enc == NULL, FALSE);
 	g_return_val_if_fail(filedata->bom == FALSE, FALSE);
 
-	if (filedata->len == 0)
+	if (filedata->size == 0)
 	{
-		// we have no data so assume UTF-8
+		// we have no data so assume UTF-8, filedata->len can be 0 even we have an empty
+		// e.g. UTF32 file with a BOM(so size is 4, len is 0)
 		filedata->enc = g_strdup("UTF-8");
 	}
 	else
 	{
 		// first check for a BOM
 		GeanyEncodingIndex enc_idx =
-			encodings_scan_unicode_bom(filedata->data, filedata->len, NULL);
+			encodings_scan_unicode_bom(filedata->data, filedata->size, NULL);
 
 		if (enc_idx != GEANY_ENCODING_NONE)
 		{
@@ -542,7 +544,7 @@
 			if (enc_idx != GEANY_ENCODING_UTF_8) // the BOM indicated something else than UTF-8
 			{
 				gchar *converted_text = encodings_convert_to_utf8_from_charset(
-										filedata->data, filedata->len, filedata->enc, FALSE);
+										filedata->data, filedata->size, filedata->enc, FALSE);
 				if (converted_text != NULL)
 				{
 					g_free(filedata->data);
@@ -570,7 +572,7 @@
 			{
 				// detect the encoding
 				gchar *converted_text = encodings_convert_to_utf8(filedata->data,
-					filedata->len, &filedata->enc);
+					filedata->size, &filedata->enc);
 
 				if (converted_text == NULL)
 				{
@@ -591,9 +593,10 @@
 {
 	guint bom_len;
 
-	encodings_scan_unicode_bom(filedata->data, filedata->len, &bom_len);
+	encodings_scan_unicode_bom(filedata->data, filedata->size, &bom_len);
 	g_return_if_fail(bom_len != 0);
 
+	// use filedata->len here because the contents are already converted into UTF-8
 	filedata->len -= bom_len;
 	// overwrite the BOM with the remainder of the file contents, plus the NULL terminator.
 	g_memmove(filedata->data, filedata->data + bom_len, filedata->len + 1);
@@ -607,6 +610,7 @@
 {
 	GError *err = NULL;
 	struct stat st;
+	GeanyEncodingIndex tmp_enc_idx;
 
 	filedata->data = NULL;
 	filedata->len = 0;
@@ -630,10 +634,18 @@
 	}
 
 	// use strlen to check for null chars
+	filedata->size = (gsize) st.st_size;
 	filedata->len = strlen(filedata->data);
 
+	// temporarily retrieve the encoding idx based on the BOM to suppress the following warning
+	// if we have a BOM
+	tmp_enc_idx = encodings_scan_unicode_bom(filedata->data, filedata->size, NULL);
+
 	/* check whether the size of the loaded data is equal to the size of the file in the filesystem */
-	if (filedata->len != (gsize) st.st_size)
+	if (filedata->len != filedata->size && (
+		tmp_enc_idx == GEANY_ENCODING_UTF_8 || // tmp_enc_idx can be UTF-7/8/16/32, UCS and None
+		tmp_enc_idx == GEANY_ENCODING_UTF_7 || // filter out UTF-7/8 and None where no NULL bytes
+		tmp_enc_idx == GEANY_ENCODING_NONE))   // are allowed
 	{
 		gchar *warn_msg = _("The file \"%s\" could not be opened properly and has been truncated. "
 				"This can occur if the file contains a NULL byte. "


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.



More information about the Commits mailing list