[geany/geany] e38c7e: Merge pull request #879 from b4n/c/cxx11-raw-strings

Colomban Wendling git-noreply at xxxxx
Thu Feb 11 14:26:32 UTC 2016


Branch:      refs/heads/master
Author:      Colomban Wendling <ban at herbesfolles.org>
Committer:   Colomban Wendling <ban at herbesfolles.org>
Date:        Thu, 11 Feb 2016 14:26:32 UTC
Commit:      e38c7e3b6781668269267b08893015b63765b9f0
             https://github.com/geany/geany/commit/e38c7e3b6781668269267b08893015b63765b9f0

Log Message:
-----------
Merge pull request #879 from b4n/c/cxx11-raw-strings

c++: Fix parsing of C++11 raw string literals.


Modified Paths:
--------------
    tagmanager/ctags/c.c
    tagmanager/ctags/get.c
    tagmanager/ctags/get.h
    tagmanager/ctags/read.c
    tagmanager/ctags/read.h
    tests/ctags/Makefile.am
    tests/ctags/cxx11-raw-strings.cpp
    tests/ctags/cxx11-raw-strings.cpp.tags

Modified: tagmanager/ctags/c.c
2 lines changed, 1 insertions(+), 1 deletions(-)
===================================================================
@@ -3172,7 +3172,7 @@ static boolean findCTags (const unsigned int passCount)
 	contextual_fake_count = 0;
 
 	Assert (passCount < 3);
-	cppInit ((boolean) (passCount > 1), isLanguage (Lang_csharp));
+	cppInit ((boolean) (passCount > 1), isLanguage (Lang_csharp), isLanguage (Lang_cpp));
 
 	exception = (exception_t) setjmp (Exception);
 	retry = FALSE;


Modified: tagmanager/ctags/get.c
93 lines changed, 92 insertions(+), 1 deletions(-)
===================================================================
@@ -62,6 +62,7 @@ typedef struct sCppState {
 	int		ungetch, ungetch2;   /* ungotten characters, if any */
 	boolean resolveRequired;     /* must resolve if/else/elif/endif branch */
 	boolean hasAtLiteralStrings; /* supports @"c:\" strings */
+	boolean hasCxxRawLiteralStrings; /* supports R"xxx(...)xxx" strings */
 	struct sDirective {
 		enum eState state;       /* current directive being processed */
 		boolean	accept;          /* is a directive syntactically permitted? */
@@ -83,6 +84,7 @@ static cppState Cpp = {
 	'\0', '\0',  /* ungetch characters */
 	FALSE,       /* resolveRequired */
 	FALSE,       /* hasAtLiteralStrings */
+	FALSE,       /* hasCxxRawLiteralStrings */
 	{
 		DRCTV_NONE,  /* state */
 		FALSE,       /* accept */
@@ -106,7 +108,8 @@ extern unsigned int getDirectiveNestLevel (void)
 	return Cpp.directive.nestLevel;
 }
 
-extern void cppInit (const boolean state, const boolean hasAtLiteralStrings)
+extern void cppInit (const boolean state, const boolean hasAtLiteralStrings,
+                     const boolean hasCxxRawLiteralStrings)
 {
 	BraceFormat = state;
 
@@ -114,6 +117,7 @@ extern void cppInit (const boolean state, const boolean hasAtLiteralStrings)
 	Cpp.ungetch2        = '\0';
 	Cpp.resolveRequired = FALSE;
 	Cpp.hasAtLiteralStrings = hasAtLiteralStrings;
+	Cpp.hasCxxRawLiteralStrings = hasCxxRawLiteralStrings;
 
 	Cpp.directive.state     = DRCTV_NONE;
 	Cpp.directive.accept    = TRUE;
@@ -533,6 +537,55 @@ static int skipToEndOfString (boolean ignoreBackslash)
 	return STRING_SYMBOL;  /* symbolic representation of string */
 }
 
+static int isCxxRawLiteralDelimiterChar (int c)
+{
+	return (c != ' ' && c != '\f' && c != '\n' && c != '\r' && c != '\t' && c != '\v' &&
+	        c != '(' && c != ')' && c != '\\');
+}
+
+static int skipToEndOfCxxRawLiteralString (void)
+{
+	int c = fileGetc ();
+
+	if (c != '(' && ! isCxxRawLiteralDelimiterChar (c))
+	{
+		fileUngetc (c);
+		c = skipToEndOfString (FALSE);
+	}
+	else
+	{
+		char delim[16];
+		unsigned int delimLen = 0;
+		boolean collectDelim = TRUE;
+
+		do
+		{
+			if (collectDelim)
+			{
+				if (isCxxRawLiteralDelimiterChar (c) &&
+				    delimLen < (sizeof delim / sizeof *delim))
+					delim[delimLen++] = c;
+				else
+					collectDelim = FALSE;
+			}
+			else if (c == ')')
+			{
+				unsigned int i = 0;
+
+				while ((c = fileGetc ()) != EOF && i < delimLen && delim[i] == c)
+					i++;
+				if (i == delimLen && c == DOUBLE_QUOTE)
+					break;
+				else
+					fileUngetc (c);
+			}
+		}
+		while ((c = fileGetc ()) != EOF);
+		c = STRING_SYMBOL;
+	}
+	return c;
+}
+
 /*  Skips to the end of the three (possibly four) 'c' sequence, returning a
  *  special character to symbolically represent a generic character.
  *  Also detects Vera numbers that include a base specifier (ie. 'b1010).
@@ -729,6 +782,44 @@ extern int cppGetc (void)
 					else
 						fileUngetc (next);
 				}
+				else if (c == 'R' && Cpp.hasCxxRawLiteralStrings)
+				{
+					/* OMG!11 HACK!!11  Get the previous character.
+					 *
+					 * We need to know whether the previous character was an identifier or not,
+					 * because "R" has to be on its own, not part of an identifier.  This allows
+					 * for constructs like:
+					 *
+					 * 	#define FOUR "4"
+					 * 	const char *p = FOUR"5";
+					 *
+					 * which is not a raw literal, but a preprocessor concatenation.
+					 *
+					 * FIXME: handle
+					 *
+					 * 	const char *p = R\
+					 * 	"xxx(raw)xxx";
+					 *
+					 * which is perfectly valid (yet probably very unlikely). */
+					int prev = fileGetNthPrevC (1, '\0');
+					int prev2 = fileGetNthPrevC (2, '\0');
+					int prev3 = fileGetNthPrevC (3, '\0');
+
+					if (! isident (prev) ||
+					    (! isident (prev2) && (prev == 'L' || prev == 'u' || prev == 'U')) ||
+					    (! isident (prev3) && (prev2 == 'u' && prev == '8')))
+					{
+						int next = fileGetc ();
+						if (next != DOUBLE_QUOTE)
+							fileUngetc (next);
+						else
+						{
+							Cpp.directive.accept = FALSE;
+							c = skipToEndOfCxxRawLiteralString ();
+							break;
+						}
+					}
+				}
 			enter:
 				Cpp.directive.accept = FALSE;
 				if (directive)


Modified: tagmanager/ctags/get.h
3 lines changed, 2 insertions(+), 1 deletions(-)
===================================================================
@@ -36,7 +36,8 @@
 */
 extern boolean isBraceFormat (void);
 extern unsigned int getDirectiveNestLevel (void);
-extern void cppInit (const boolean state, const boolean hasAtLiteralStrings);
+extern void cppInit (const boolean state, const boolean hasAtLiteralStrings,
+                     const boolean hasCxxRawLiteralStrings);
 extern void cppTerminate (void);
 extern void cppBeginStatement (void);
 extern void cppEndStatement (void);


Modified: tagmanager/ctags/read.c
13 lines changed, 13 insertions(+), 0 deletions(-)
===================================================================
@@ -501,6 +501,19 @@ extern int fileGetc (void)
     return c;
 }
 
+/* returns the nth previous character (0 meaning current), or def if nth cannot
+ * be accessed.  Note that this can't access previous line data. */
+extern int fileGetNthPrevC (unsigned int nth, int def)
+{
+	const unsigned char *base = (unsigned char *) vStringValue (File.line);
+	const unsigned int offset = File.ungetchIdx + 1 + nth;
+
+	if (File.currentLine != NULL && File.currentLine >= base + offset)
+		return (int) *(File.currentLine - offset);
+	else
+		return def;
+}
+
 extern int fileSkipToCharacter (int c)
 {
 	int d;


Modified: tagmanager/ctags/read.h
1 lines changed, 1 insertions(+), 0 deletions(-)
===================================================================
@@ -102,6 +102,7 @@ extern boolean fileOpen (const char *const fileName, const langType language);
 extern boolean fileEOF (void);
 extern void fileClose (void);
 extern int fileGetc (void);
+extern int fileGetNthPrevC (unsigned int nth, int def);
 extern int fileSkipToCharacter (int c);
 extern void fileUngetc (int c);
 extern const unsigned char *fileReadLine (void);


Modified: tests/ctags/Makefile.am
1 lines changed, 1 insertions(+), 0 deletions(-)
===================================================================
@@ -135,6 +135,7 @@ test_sources = \
 	cxx11-final.cpp					\
 	cxx11-noexcept.cpp				\
 	cxx11-override.cpp				\
+	cxx11-raw-strings.cpp			\
 	cxx14-combined.cpp				\
 	db-trig.sql						\
 	debian_432872.f90				\


Modified: tests/ctags/cxx11-raw-strings.cpp
34 lines changed, 34 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,34 @@
+
+static const char* str1 = R"blah(
+lots
+of text
+)blah";
+
+struct typ1 { int memb1; };
+
+static const char* str2 = R"blah(
+lots
+of text including a quote"
+)blah";
+
+struct typ2 { int memb2; };
+
+/* check we don't get confused by string concatenation */
+#define FOUR "four"
+
+static const char* str3 = FOUR"f(iv)e";
+
+struct typ3 { int memb3; };
+
+/* check for prefixes */
+static const char* str4 = LR"blah(";int bug4;)blah";
+struct typ4 { int memb4; };
+
+static const char* str5 = u8R"blah(";int bug5;)blah";
+struct typ5 { int memb5; };
+
+static const char* str6 = uR"blah(";int bug6;)blah";
+struct typ6 { int memb6; };
+
+static const char* str7 = UR"blah(";int bug7;)blah";
+struct typ7 { int memb7; };


Modified: tests/ctags/cxx11-raw-strings.cpp.tags
23 lines changed, 23 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,23 @@
+# format=tagmanager
+FOUR�65536�0
+memb1�64�typ1�0�int
+memb2�64�typ2�0�int
+memb3�64�typ3�0�int
+memb4�64�typ4�0�int
+memb5�64�typ5�0�int
+memb6�64�typ6�0�int
+memb7�64�typ7�0�int
+str1�16384�0�char
+str2�16384�0�char
+str3�16384�0�char
+str4�16384�0�char
+str5�16384�0�char
+str6�16384�0�char
+str7�16384�0�char
+typ1�2048�0
+typ2�2048�0
+typ3�2048�0
+typ4�2048�0
+typ5�2048�0
+typ6�2048�0
+typ7�2048�0



--------------
This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).


More information about the Commits mailing list