[geany/geany] a14aa9: c++: Fix parsing of C++11 raw string literals
Colomban Wendling
git-noreply at xxxxx
Thu Feb 11 14:36:04 UTC 2016
Branch: refs/heads/master
Author: Colomban Wendling <ban at herbesfolles.org>
Committer: Colomban Wendling <ban at herbesfolles.org>
Date: Sat, 23 Jan 2016 20:52:40 UTC
Commit: a14aa908c5f0a945f6d817a1aa46173b492193e6
https://github.com/geany/geany/commit/a14aa908c5f0a945f6d817a1aa46173b492193e6
Log Message:
-----------
c++: Fix parsing of C++11 raw string literals
See http://en.cppreference.com/w/cpp/language/string_literal
Closes #877.
---
This contains a pretty ugly hack to fetch the previous character, in
order not to get fooled by string concatenation hidden behind a macro,
like in `FOUR"five"`, which is not a raw string literal but simply the
identifier `FOUR` followed by the string `"five"`.
While this may sound uncommon, it is not and lead to complaints [2][3]
when Scintilla [1] broke this when they introduced C++11 raw string
literal support themselves.
The implementation here still contains a bug with line continuations: a
raw literal of the form:
```c
const char *str = R\
"xxx(...)xxx";
```
is not properly recognized as such, although it's perfectly valid (yet
probably very uncommon). For the record, Scintilla has also suffers
from this but nobody complained about it yet.
[1] http://scintilla.org/
[2] https://sourceforge.net/p/scintilla/bugs/1207/
[3] https://sourceforge.net/p/scintilla/bugs/1454/
Modified Paths:
--------------
tagmanager/ctags/c.c
tagmanager/ctags/get.c
tagmanager/ctags/get.h
tests/ctags/Makefile.am
tests/ctags/cxx11-raw-strings.cpp
tests/ctags/cxx11-raw-strings.cpp.tags
Modified: tagmanager/ctags/c.c
2 lines changed, 1 insertions(+), 1 deletions(-)
===================================================================
@@ -3120,7 +3120,7 @@ static boolean findCTags (const unsigned int passCount)
contextual_fake_count = 0;
Assert (passCount < 3);
- cppInit ((boolean) (passCount > 1), isLanguage (Lang_csharp));
+ cppInit ((boolean) (passCount > 1), isLanguage (Lang_csharp), isLanguage (Lang_cpp));
exception = (exception_t) setjmp (Exception);
retry = FALSE;
Modified: tagmanager/ctags/get.c
92 lines changed, 91 insertions(+), 1 deletions(-)
===================================================================
@@ -62,6 +62,7 @@ typedef struct sCppState {
int ungetch, ungetch2; /* ungotten characters, if any */
boolean resolveRequired; /* must resolve if/else/elif/endif branch */
boolean hasAtLiteralStrings; /* supports @"c:\" strings */
+ boolean hasCxxRawLiteralStrings; /* supports R"xxx(...)xxx" strings */
struct sDirective {
enum eState state; /* current directive being processed */
boolean accept; /* is a directive syntactically permitted? */
@@ -83,6 +84,7 @@ static cppState Cpp = {
'\0', '\0', /* ungetch characters */
FALSE, /* resolveRequired */
FALSE, /* hasAtLiteralStrings */
+ FALSE, /* hasCxxRawLiteralStrings */
{
DRCTV_NONE, /* state */
FALSE, /* accept */
@@ -106,7 +108,8 @@ extern unsigned int getDirectiveNestLevel (void)
return Cpp.directive.nestLevel;
}
-extern void cppInit (const boolean state, const boolean hasAtLiteralStrings)
+extern void cppInit (const boolean state, const boolean hasAtLiteralStrings,
+ const boolean hasCxxRawLiteralStrings)
{
BraceFormat = state;
@@ -114,6 +117,7 @@ extern void cppInit (const boolean state, const boolean hasAtLiteralStrings)
Cpp.ungetch2 = '\0';
Cpp.resolveRequired = FALSE;
Cpp.hasAtLiteralStrings = hasAtLiteralStrings;
+ Cpp.hasCxxRawLiteralStrings = hasCxxRawLiteralStrings;
Cpp.directive.state = DRCTV_NONE;
Cpp.directive.accept = TRUE;
@@ -533,6 +537,55 @@ static int skipToEndOfString (boolean ignoreBackslash)
return STRING_SYMBOL; /* symbolic representation of string */
}
+static int isCxxRawLiteralDelimiterChar (int c)
+{
+ return (c != ' ' && c != '\f' && c != '\n' && c != '\r' && c != '\t' && c != '\v' &&
+ c != '(' && c != ')' && c != '\\');
+}
+
+static int skipToEndOfCxxRawLiteralString (void)
+{
+ int c = fileGetc ();
+
+ if (c != '(' && ! isCxxRawLiteralDelimiterChar (c))
+ {
+ fileUngetc (c);
+ c = skipToEndOfString (FALSE);
+ }
+ else
+ {
+ char delim[16];
+ unsigned int delimLen = 0;
+ boolean collectDelim = TRUE;
+
+ do
+ {
+ if (collectDelim)
+ {
+ if (isCxxRawLiteralDelimiterChar (c) &&
+ delimLen < (sizeof delim / sizeof *delim))
+ delim[delimLen++] = c;
+ else
+ collectDelim = FALSE;
+ }
+ else if (c == ')')
+ {
+ unsigned int i = 0;
+
+ while ((c = fileGetc ()) != EOF && i < delimLen && delim[i] == c)
+ i++;
+ if (i == delimLen && c == DOUBLE_QUOTE)
+ break;
+ else
+ fileUngetc (c);
+ }
+ }
+ while ((c = fileGetc ()) != EOF);
+ c = STRING_SYMBOL;
+ }
+ return c;
+}
+
/* Skips to the end of the three (possibly four) 'c' sequence, returning a
* special character to symbolically represent a generic character.
* Also detects Vera numbers that include a base specifier (ie. 'b1010).
@@ -729,6 +782,43 @@ extern int cppGetc (void)
else
fileUngetc (next);
}
+ else if (c == 'R' && Cpp.hasCxxRawLiteralStrings)
+ {
+ /* OMG!11 HACK!!11 Get the previous character.
+ *
+ * We need to know whether the previous character was an identifier or not,
+ * because "R" has to be on its own, not part of an identifier. This allows
+ * for constructs like:
+ *
+ * #define FOUR "4"
+ * const char *p = FOUR"5";
+ *
+ * which is not a raw literal, but a preprocessor concatenation.
+ *
+ * FIXME: handle
+ *
+ * const char *p = R\
+ * "xxx(raw)xxx";
+ *
+ * which is perfectly valid (yet probably very unlikely). */
+ const unsigned char *base = (unsigned char *) vStringValue (File.line);
+ int prev = '\n';
+ if (File.currentLine - File.ungetchIdx - 2 >= base)
+ prev = (int) *(File.currentLine - File.ungetchIdx - 2);
+
+ if (! isident (prev))
+ {
+ int next = fileGetc ();
+ if (next != DOUBLE_QUOTE)
+ fileUngetc (next);
+ else
+ {
+ Cpp.directive.accept = FALSE;
+ c = skipToEndOfCxxRawLiteralString ();
+ break;
+ }
+ }
+ }
enter:
Cpp.directive.accept = FALSE;
if (directive)
Modified: tagmanager/ctags/get.h
3 lines changed, 2 insertions(+), 1 deletions(-)
===================================================================
@@ -36,7 +36,8 @@
*/
extern boolean isBraceFormat (void);
extern unsigned int getDirectiveNestLevel (void);
-extern void cppInit (const boolean state, const boolean hasAtLiteralStrings);
+extern void cppInit (const boolean state, const boolean hasAtLiteralStrings,
+ const boolean hasCxxRawLiteralStrings);
extern void cppTerminate (void);
extern void cppBeginStatement (void);
extern void cppEndStatement (void);
Modified: tests/ctags/Makefile.am
1 lines changed, 1 insertions(+), 0 deletions(-)
===================================================================
@@ -135,6 +135,7 @@ test_sources = \
cxx11-final.cpp \
cxx11-noexcept.cpp \
cxx11-override.cpp \
+ cxx11-raw-strings.cpp \
cxx14-combined.cpp \
db-trig.sql \
debian_432872.f90 \
Modified: tests/ctags/cxx11-raw-strings.cpp
21 lines changed, 21 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,21 @@
+
+static const char* str1 = R"blah(
+lots
+of text
+)blah";
+
+struct typ1 { int memb1; };
+
+static const char* str2 = R"blah(
+lots
+of text including a quote"
+)blah";
+
+struct typ2 { int memb2; };
+
+/* check we don't get confused by string concatenation */
+#define FOUR "four"
+
+static const char* str3 = FOUR"f(iv)e";
+
+struct typ3 { int memb3; };
Modified: tests/ctags/cxx11-raw-strings.cpp.tags
11 lines changed, 11 insertions(+), 0 deletions(-)
===================================================================
@@ -0,0 +1,11 @@
+# format=tagmanager
+FOUR�65536�0
+memb1�64�typ1�0�int
+memb2�64�typ2�0�int
+memb3�64�typ3�0�int
+str1�16384�0�char
+str2�16384�0�char
+str3�16384�0�char
+typ1�2048�0
+typ2�2048�0
+typ3�2048�0
--------------
This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).
More information about the Commits
mailing list