[geany/geany] ab02be: Use GRegex for encoding regex matching
Nick Treleaven
git-noreply at xxxxx
Thu Jul 26 00:02:36 UTC 2012
Branch: refs/heads/document-messages
Author: Nick Treleaven <nick.treleaven at btinternet.com>
Committer: Nick Treleaven <nick.treleaven at btinternet.com>
Date: Fri, 30 Dec 2011 15:55:06
Commit: ab02beff5a0e39bca9e1c5d6cd34ec3d92770b0a
https://github.com/geany/geany/commit/ab02beff5a0e39bca9e1c5d6cd34ec3d92770b0a
Log Message:
-----------
Use GRegex for encoding regex matching
Modified Paths:
--------------
src/encodings.c
Modified: src/encodings.c
50 files changed, 21 insertions(+), 29 deletions(-)
===================================================================
@@ -42,19 +42,13 @@
#include "callbacks.h"
#include "ui_utils.h"
-#ifdef HAVE_REGEX_H
-# include <regex.h>
-#else
-# include "gnuregex.h"
-#endif
-
/* <meta http-equiv="content-type" content="text/html; charset=UTF-8" /> */
#define PATTERN_HTMLMETA "<meta[ \t\n\r\f]+http-equiv[ \t\n\r\f]*=[ \t\n\r\f]*\"?content-type\"?[ \t\n\r\f]+content[ \t\n\r\f]*=[ \t\n\r\f]*\"text/x?html;[ \t\n\r\f]*charset=([a-z0-9_-]+)\"[ \t\n\r\f]*/?>"
/* " geany_encoding=utf-8 " or " coding: utf-8 " */
#define PATTERN_CODING "coding[\t ]*[:=][\t ]*\"?([a-z0-9-]+)\"?[\t ]*"
/* precompiled regexps */
-static regex_t pregs[2];
+static GRegex *pregs[2];
static gboolean pregs_loaded = FALSE;
@@ -321,42 +315,40 @@ void encodings_select_radio_item(const gchar *charset)
* regex_compile() is used to compile regular expressions on program init and keep it in memory
* for faster access when opening a file. Pre-compiled regexps will be freed on program exit.
*/
-static void regex_compile(regex_t *preg, const gchar *pattern)
+static GRegex *regex_compile(const gchar *pattern)
{
- gint retval = regcomp(preg, pattern, REG_EXTENDED | REG_ICASE);
- if (retval != 0)
+ GError *error = NULL;
+ GRegex *regex = g_regex_new(pattern, G_REGEX_CASELESS, 0, &error);
+
+ if (!regex)
{
- gchar errmsg[512];
- regerror(retval, preg, errmsg, 512);
- geany_debug("regcomp() failed (%s)", errmsg);
- regfree(preg);
- return;
+ geany_debug("Failed to compile encoding regex (%s)", error->message);
+ g_error_free(error);
}
+ return regex;
}
-static gchar *regex_match(regex_t *preg, const gchar *buffer, gsize size)
+static gchar *regex_match(GRegex *preg, const gchar *buffer, gsize size)
{
- gint retval;
- gchar *tmp_buf = NULL;
gchar *encoding = NULL;
- regmatch_t pmatch[10];
+ GMatchInfo *minfo;
if (G_UNLIKELY(! pregs_loaded || buffer == NULL))
return NULL;
- if (size > 512)
- tmp_buf = g_strndup(buffer, 512); /* scan only the first 512 characters in the buffer */
+ /* scan only the first 512 characters in the buffer */
+ size = MIN(size, 512);
- retval = regexec(preg, (tmp_buf != NULL) ? tmp_buf : buffer, 10, pmatch, 0);
- if (retval == 0 && pmatch[0].rm_so != -1 && pmatch[1].rm_so != -1)
+ if (g_regex_match_full(preg, buffer, size, 0, 0, &minfo, NULL) &&
+ g_match_info_get_match_count(minfo) >= 2)
{
- encoding = g_strndup(&buffer[pmatch[1].rm_so], pmatch[1].rm_eo - pmatch[1].rm_so);
+ encoding = g_match_info_fetch(minfo, 1);
geany_debug("Detected encoding by regex search: %s", encoding);
setptr(encoding, g_utf8_strup(encoding, -1));
}
- g_free(tmp_buf);
+ g_match_info_free(minfo);
return encoding;
}
@@ -390,7 +382,7 @@ void encodings_finalize(void)
len = G_N_ELEMENTS(pregs);
for (i = 0; i < len; i++)
{
- regfree(&pregs[i]);
+ g_regex_unref(pregs[i]);
}
}
}
@@ -411,8 +403,8 @@ void encodings_init(void)
if (! pregs_loaded)
{
- regex_compile(&pregs[0], PATTERN_HTMLMETA);
- regex_compile(&pregs[1], PATTERN_CODING);
+ pregs[0] = regex_compile(PATTERN_HTMLMETA);
+ pregs[1] = regex_compile(PATTERN_CODING);
pregs_loaded = TRUE;
}
@@ -568,7 +560,7 @@ static gchar *encodings_check_regexes(const gchar *buffer, gsize size)
{
gchar *charset;
- if ((charset = regex_match(&pregs[i], buffer, size)) != NULL)
+ if ((charset = regex_match(pregs[i], buffer, size)) != NULL)
return charset;
}
return NULL;
@@ Diff output truncated at 100000 characters. @@
--------------
This E-Mail was brought to you by github_commit_mail.py (Source: TBD).
More information about the Commits
mailing list