[geany/geany] cb68cd: Refactor to remove duplication, lower assumptions and improve safety

21 Apr 2024


      Branch:      refs/heads/master
Author:      Colomban Wendling ban@herbesfolles.org
Committer:   Colomban Wendling ban@herbesfolles.org
Date:        Fri, 08 Dec 2023 20:58:01 UTC
Commit:      cb68cdb2f1b5c544b3235edde3bb12f149360fe7
             https://github.com/geany/geany/commit/cb68cdb2f1b5c544b3235edde3bb12f149360f...
Log Message:
-----------
Refactor to remove duplication, lower assumptions and improve safety
* Do not have repeated code paths having to manually update
  `buffer->size` with `strlen(buffer->data)`;
* This also reduces how many code paths assume `buffer->data` is
  NUL-free at this stage -- we still have some, but they are a little
  less critical, and it's a step in the right direction anyway;
* Perform the NUL-freeness check unconditionally, only assuming
  `buffer->size` is correct, which it ought to be anyway.
Modified Paths:
--------------
    src/encodings.c
Modified: src/encodings.c
71 lines changed, 38 insertions(+), 33 deletions(-)
===================================================================
@@ -619,21 +619,9 @@ void encodings_encoding_store_cell_data_func(GtkCellLayout *cell_layout,
 }
-/**
- *  Tries to convert @a buffer into UTF-8 encoding from the encoding specified with @a charset.
- *  If @a fast is not set, additional checks to validate the converted string are performed.
- *
- *  @param buffer The input string to convert.
- *  @param size The length of the string, or -1 if the string is nul-terminated.
- *  @param charset The charset to be used for conversion.
- *  @param fast @c TRUE to only convert the input and skip extended checks on the converted string.
- *
- *  @return If the conversion was successful, a newly allocated nul-terminated string,
- *    which must be freed with @c g_free(). Otherwise @c NULL.
- **/
-GEANY_API_SYMBOL
-gchar *encodings_convert_to_utf8_from_charset(const gchar *buffer, gssize size,
-											  const gchar *charset, gboolean fast)
+static gchar *convert_to_utf8_from_charset(const gchar *buffer, gssize size,
+										   const gchar *charset, gboolean fast,
+										   gsize *utf8_size)
 {
    gchar *utf8_content = NULL;
    GError *conv_error = NULL;
@@ -671,10 +659,35 @@ gchar *encodings_convert_to_utf8_from_charset(const gchar *buffer, gssize size,
    	utf8_content = converted_contents;
    }
+	if (utf8_content && utf8_size)
+		*utf8_size = bytes_written;
+
    return utf8_content;
 }
+/**
+ *  Tries to convert @a buffer into UTF-8 encoding from the encoding specified with @a charset.
+ *  If @a fast is not set, additional checks to validate the converted string are performed.
+ *
+ *  @param buffer The input string to convert.
+ *  @param size The length of the string, or -1 if the string is nul-terminated.
+ *  @param charset The charset to be used for conversion.
+ *  @param fast @c TRUE to only convert the input and skip extended checks on the converted string.
+ *
+ *  @return If the conversion was successful, a newly allocated nul-terminated string,
+ *    which must be freed with @c g_free(). Otherwise @c NULL.
+ **/
+GEANY_API_SYMBOL
+gchar *encodings_convert_to_utf8_from_charset(const gchar *buffer, gssize size,
+											  const gchar *charset, gboolean fast)
+{
+	/* If fast=FALSE, we can safely ignore the size as the output cannot contain NULs.
+	 * Otherwise, the caller already agrees on partial data anyway. */
+	return convert_to_utf8_from_charset(buffer, size, charset, fast, NULL);
+}
+
+
 static gchar *encodings_check_regexes(const gchar *buffer, gsize size)
 {
    guint i;
@@ -691,7 +704,7 @@ static gchar *encodings_check_regexes(const gchar *buffer, gsize size)
static gchar *encodings_convert_to_utf8_with_suggestion(const gchar *buffer, gssize size,
-		const gchar *suggested_charset, gchar **used_encoding)
+		const gchar *suggested_charset, gchar **used_encoding, gsize *utf8_size)
 {
    const gchar *locale_charset = NULL;
    const gchar *charset;
@@ -758,7 +771,7 @@ static gchar *encodings_convert_to_utf8_with_suggestion(const gchar *buffer, gss
geany_debug("Trying to convert %" G_GSIZE_FORMAT " bytes of data from %s into UTF-8.",
    		size, charset);
-		utf8_content = encodings_convert_to_utf8_from_charset(buffer, size, charset, FALSE);
+		utf8_content = convert_to_utf8_from_charset(buffer, size, charset, FALSE, utf8_size);
if (G_LIKELY(utf8_content != NULL))
    	{
@@ -798,7 +811,8 @@ gchar *encodings_convert_to_utf8(const gchar *buffer, gssize size, gchar **used_
/* first try to read the encoding from the file content */
    regex_charset = encodings_check_regexes(buffer, size);
-	utf8 = encodings_convert_to_utf8_with_suggestion(buffer, size, regex_charset, used_encoding);
+	/* we know this cannot succeed if there are NULs in the output, so ignoring the size is OK */
+	utf8 = encodings_convert_to_utf8_with_suggestion(buffer, size, regex_charset, used_encoding, NULL);
    g_free(regex_charset);
return utf8;
@@ -879,7 +893,6 @@ typedef struct
    gsize		 size;	/* actual data size */
    gchar		*enc;
    gboolean	 bom;
-	gboolean	 has_nuls;
 } BufferData;
@@ -898,17 +911,15 @@ handle_forced_encoding(BufferData *buffer, const gchar *forced_enc)
    }
    else
    {
-		gchar *converted_text = encodings_convert_to_utf8_from_charset(
-										buffer->data, buffer->size, forced_enc, FALSE);
+		gchar *converted_text = convert_to_utf8_from_charset(
+										buffer->data, buffer->size, forced_enc, FALSE, &buffer->size);
    	if (converted_text == NULL)
    	{
    		return FALSE;
    	}
    	else
    	{
    		SETPTR(buffer->data, converted_text);
-			/* we can't succeed with NULs, so this is OK */
-			buffer->size = strlen(converted_text);
    	}
    }
    enc_idx = encodings_scan_unicode_bom(buffer->data, buffer->size, NULL);
@@ -949,13 +960,11 @@ handle_encoding(BufferData *buffer, GeanyEncodingIndex enc_idx)
    		}
    		else /* the BOM indicated something else than UTF-8 */
    		{
-				gchar *converted_text = encodings_convert_to_utf8_from_charset(
-										buffer->data, buffer->size, buffer->enc, FALSE);
+				gchar *converted_text = convert_to_utf8_from_charset(
+										buffer->data, buffer->size, buffer->enc, FALSE, &buffer->size);
    			if (converted_text != NULL)
    			{
    				SETPTR(buffer->data, converted_text);
-					/* we can't succeed with NULs, so this is OK */
-					buffer->size = strlen(converted_text);
    			}
    			else
    			{
@@ -981,16 +990,14 @@ handle_encoding(BufferData *buffer, GeanyEncodingIndex enc_idx)
    		{
    			/* detect the encoding */
    			gchar *converted_text = encodings_convert_to_utf8_with_suggestion(buffer->data,
-					buffer->size, regex_charset, &buffer->enc);
+					buffer->size, regex_charset, &buffer->enc, &buffer->size);
if (converted_text == NULL)
    			{
    				g_free(regex_charset);
    				return FALSE;
    			}
    			SETPTR(buffer->data, converted_text);
-				/* we can't succeed with NULs, so this is OK */
-				buffer->size = strlen(converted_text);
    		}
    		g_free(regex_charset);
    	}
@@ -1032,7 +1039,6 @@ static gboolean handle_buffer(BufferData *buffer, const gchar *forced_enc)
    	{
    		buffer->bom = FALSE;
    		buffer->enc = g_strdup(encodings[GEANY_ENCODING_NONE].charset);
-			buffer->has_nuls = strlen(buffer->data) != buffer->size;
    	}
    	else if (! handle_forced_encoding(buffer, forced_enc))
    	{
@@ -1075,7 +1081,6 @@ gboolean encodings_convert_to_utf8_auto(gchar **buf, gsize *size, const gchar *f
    buffer.size = *size;
    buffer.enc = NULL;
    buffer.bom = FALSE;
-	buffer.has_nuls = FALSE;
if (! handle_buffer(&buffer, forced_enc))
    	return FALSE;
@@ -1088,7 +1093,7 @@ gboolean encodings_convert_to_utf8_auto(gchar **buf, gsize *size, const gchar *f
    if (has_bom)
    	*has_bom = buffer.bom;
    if (has_nuls)
-		*has_nuls = buffer.has_nuls;
+		*has_nuls = strlen(buffer.data) != buffer.size;
*buf = buffer.data;
    return TRUE;
--------------
This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).

    

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

2006

[geany/geany] cb68cd: Refactor to remove duplication, lower assumptions and improve safety