Branch: refs/heads/master Author: Colomban Wendling ban@herbesfolles.org Committer: Colomban Wendling ban@herbesfolles.org Date: Sun, 21 Jul 2013 10:04:14 UTC Commit: f0f3a6cd9926b2b36406986fad9c4e866b379b1d https://github.com/geany/geany/commit/f0f3a6cd9926b2b36406986fad9c4e866b379b...
Log Message: ----------- Update Scintilla to version 3.3.4
Modified Paths: -------------- scintilla/Makefile.am scintilla/gtk/PlatGTK.cxx scintilla/gtk/ScintillaGTK.cxx scintilla/include/ILexer.h scintilla/include/Platform.h scintilla/lexers/LexAda.cxx scintilla/lexers/LexHaskell.cxx scintilla/lexers/LexLua.cxx scintilla/lexers/LexOthers.cxx scintilla/lexers/LexPerl.cxx scintilla/lexers/LexRuby.cxx scintilla/lexlib/CharacterCategory.cxx scintilla/lexlib/CharacterCategory.h scintilla/lexlib/LexAccessor.h scintilla/lexlib/StyleContext.h scintilla/makefile.win32 scintilla/scintilla_changes.patch scintilla/src/CaseConvert.cxx scintilla/src/CaseConvert.h scintilla/src/CaseFolder.cxx scintilla/src/CaseFolder.h scintilla/src/CellBuffer.cxx scintilla/src/CellBuffer.h scintilla/src/Document.cxx scintilla/src/Document.h scintilla/src/Editor.cxx scintilla/src/Editor.h scintilla/src/PositionCache.cxx scintilla/src/ScintillaBase.cxx scintilla/src/Selection.cxx scintilla/src/UnicodeFromUTF8.h scintilla/src/ViewStyle.cxx scintilla/version.txt
Modified: scintilla/Makefile.am 7 files changed, 7 insertions(+), 0 deletions(-) =================================================================== @@ -50,6 +50,8 @@ gtk/scintilla-marshal.c \ gtk/scintilla-marshal.h \ lexlib/Accessor.cxx \ lexlib/Accessor.h \ +lexlib/CharacterCategory.cxx \ +lexlib/CharacterCategory.h \ lexlib/CharacterSet.cxx \ lexlib/CharacterSet.h \ lexlib/LexAccessor.h \ @@ -74,6 +76,10 @@ src/AutoComplete.cxx \ src/AutoComplete.h \ src/CallTip.cxx \ src/CallTip.h \ +src/CaseConvert.cxx \ +src/CaseConvert.h \ +src/CaseFolder.cxx \ +src/CaseFolder.h \ src/Catalogue.cxx \ src/Catalogue.h \ src/CellBuffer.cxx \ @@ -116,6 +122,7 @@ src/Style.cxx \ src/Style.h \ src/UniConversion.cxx \ src/UniConversion.h \ +src/UnicodeFromUTF8.h \ src/ViewStyle.cxx \ src/ViewStyle.h \ src/XPM.cxx \
Modified: scintilla/gtk/PlatGTK.cxx 2 files changed, 1 insertions(+), 1 deletions(-) =================================================================== @@ -185,7 +185,7 @@ class FontHandle { width[i] = 0; } } - XYPOSITION CharWidth(unsigned char ch, encodingType et_) { + XYPOSITION CharWidth(unsigned char ch, encodingType et_) const { XYPOSITION w = 0; FontMutexLock(); if ((ch <= 127) && (et == et_)) {
Modified: scintilla/gtk/ScintillaGTK.cxx 91 files changed, 37 insertions(+), 54 deletions(-) =================================================================== @@ -46,12 +46,14 @@ #include "ViewStyle.h" #include "Decoration.h" #include "CharClassify.h" +#include "CaseFolder.h" #include "Document.h" #include "Selection.h" #include "PositionCache.h" #include "Editor.h" #include "ScintillaBase.h" #include "UniConversion.h" +#include "CaseConvert.h"
#include "scintilla-marshal.h"
@@ -118,8 +120,8 @@ class ScintillaGTK : public ScintillaBase { Window scrollbarh; GtkAdjustment *adjustmentv; GtkAdjustment *adjustmenth; - int scrollBarWidth; - int scrollBarHeight; + int verticalScrollBarWidth; + int horizontalScrollBarHeight;
SelectionText primary;
@@ -232,8 +234,10 @@ class ScintillaGTK : public ScintillaBase { gint FocusOutThis(GtkWidget *widget); static gint FocusOut(GtkWidget *widget, GdkEventFocus *event); static void SizeRequest(GtkWidget *widget, GtkRequisition *requisition); +#if GTK_CHECK_VERSION(3,0,0) static void GetPreferredWidth(GtkWidget *widget, gint *minimalWidth, gint *naturalWidth); static void GetPreferredHeight(GtkWidget *widget, gint *minimalHeight, gint *naturalHeight); +#endif static void SizeAllocate(GtkWidget *widget, GtkAllocation *allocation); #if GTK_CHECK_VERSION(3,0,0) gboolean DrawTextThis(cairo_t *cr); @@ -354,7 +358,7 @@ static ScintillaGTK *ScintillaFromWidget(GtkWidget *widget) {
ScintillaGTK::ScintillaGTK(_ScintillaObject *sci_) : adjustmentv(0), adjustmenth(0), - scrollBarWidth(30), scrollBarHeight(30), + verticalScrollBarWidth(30), horizontalScrollBarHeight(30), evbtn(0), capturedMouse(false), dragWasDropped(false), lastKey(0), rectangularSelectionModifier(SCMOD_CTRL), parentClass(0), im_context(NULL), @@ -678,6 +682,8 @@ void ScintillaGTK::SizeRequest(GtkWidget *widget, GtkRequisition *requisition) { #endif }
+#if GTK_CHECK_VERSION(3,0,0) + void ScintillaGTK::GetPreferredWidth(GtkWidget *widget, gint *minimalWidth, gint *naturalWidth) { GtkRequisition requisition; SizeRequest(widget, &requisition); @@ -690,6 +696,8 @@ void ScintillaGTK::GetPreferredHeight(GtkWidget *widget, gint *minimalHeight, gi *minimalHeight = *naturalHeight = requisition.height; }
+#endif + void ScintillaGTK::SizeAllocate(GtkWidget *widget, GtkAllocation *allocation) { ScintillaGTK *sciThis = ScintillaFromWidget(widget); try { @@ -1087,9 +1095,9 @@ void ScintillaGTK::FullPaint() { PRectangle ScintillaGTK::GetClientRectangle() { PRectangle rc = wMain.GetClientPosition(); if (verticalScrollBarVisible) - rc.right -= scrollBarWidth; + rc.right -= verticalScrollBarWidth; if (horizontalScrollBarVisible && (wrapState == eWrapNone)) - rc.bottom -= scrollBarHeight; + rc.bottom -= horizontalScrollBarHeight; // Move to origin rc.right -= rc.left; rc.bottom -= rc.top; @@ -1232,29 +1240,6 @@ const char *ScintillaGTK::CharacterSetID() const { return ::CharacterSetID(vs.styles[STYLE_DEFAULT].characterSet); }
-class CaseFolderUTF8 : public CaseFolderTable { -public: - CaseFolderUTF8() { - StandardASCII(); - } - virtual size_t Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed) { - if ((lenMixed == 1) && (sizeFolded > 0)) { - folded[0] = mapping[static_cast<unsigned char>(mixed[0])]; - return 1; - } else { - gchar *mapped = g_utf8_casefold(mixed, lenMixed); - size_t lenMapped = strlen(mapped); - if (lenMapped < sizeFolded) { - memcpy(folded, mapped, lenMapped); - } else { - lenMapped = 0; - } - g_free(mapped); - return lenMapped; - } - } -}; - class CaseFolderDBCS : public CaseFolderTable { const char *charSet; public: @@ -1289,7 +1274,7 @@ class CaseFolderDBCS : public CaseFolderTable {
CaseFolder *ScintillaGTK::CaseFolderForEncoding() { if (pdoc->dbcsCodePage == SC_CP_UTF8) { - return new CaseFolderUTF8(); + return new CaseFolderUnicode(); } else { const char *charSetBuffer = CharacterSetID(); if (charSetBuffer) { @@ -1343,15 +1328,20 @@ struct CaseMapper { }
std::string ScintillaGTK::CaseMapString(const std::string &s, int caseMapping) { - if (s.size() == 0) - return std::string(); - - if (caseMapping == cmSame) + if ((s.size() == 0) || (caseMapping == cmSame)) return s;
+ if (IsUnicodeMode()) { + std::string retMapped(s.length() * maxExpansionCaseConversion, 0); + size_t lenMapped = CaseConvertString(&retMapped[0], retMapped.length(), s.c_str(), s.length(), + (caseMapping == cmUpper) ? CaseConversionUpper : CaseConversionLower); + retMapped.resize(lenMapped); + return retMapped; + } + const char *charSetBuffer = CharacterSetID();
- if (IsUnicodeMode() || !*charSetBuffer) { + if (!*charSetBuffer) { CaseMapper mapper(s, caseMapping == cmUpper); return std::string(mapper.mapped, strlen(mapper.mapped)); } else { @@ -1676,44 +1666,41 @@ void ScintillaGTK::Resize(int width, int height) { #if GTK_CHECK_VERSION(3,0,0) GtkRequisition requisition; gtk_widget_get_requisition(PWidget(scrollbarv), &requisition); - scrollBarWidth = requisition.width; + verticalScrollBarWidth = requisition.width; gtk_widget_get_requisition(PWidget(scrollbarh), &requisition); - scrollBarHeight = requisition.height; + horizontalScrollBarHeight = requisition.height; #else - scrollBarWidth = GTK_WIDGET(PWidget(scrollbarv))->requisition.width; - scrollBarHeight = GTK_WIDGET(PWidget(scrollbarh))->requisition.height; + verticalScrollBarWidth = GTK_WIDGET(PWidget(scrollbarv))->requisition.width; + horizontalScrollBarHeight = GTK_WIDGET(PWidget(scrollbarh))->requisition.height; #endif
// These allocations should never produce negative sizes as they would wrap around to huge // unsigned numbers inside GTK+ causing warnings. bool showSBHorizontal = horizontalScrollBarVisible && (wrapState == eWrapNone); - int horizontalScrollBarHeight = scrollBarHeight; - if (!showSBHorizontal) - horizontalScrollBarHeight = 0;
GtkAllocation alloc; if (showSBHorizontal) { gtk_widget_show(GTK_WIDGET(PWidget(scrollbarh))); alloc.x = 0; - alloc.y = height - scrollBarHeight; - alloc.width = Platform::Maximum(1, width - scrollBarWidth); + alloc.y = height - horizontalScrollBarHeight; + alloc.width = Platform::Maximum(1, width - verticalScrollBarWidth); alloc.height = horizontalScrollBarHeight; gtk_widget_size_allocate(GTK_WIDGET(PWidget(scrollbarh)), &alloc); } else { gtk_widget_hide(GTK_WIDGET(PWidget(scrollbarh))); + horizontalScrollBarHeight = 0; // in case horizontalScrollBarVisible is true. }
if (verticalScrollBarVisible) { gtk_widget_show(GTK_WIDGET(PWidget(scrollbarv))); - alloc.x = width - scrollBarWidth; + alloc.x = width - verticalScrollBarWidth; alloc.y = 0; - alloc.width = scrollBarWidth; - alloc.height = Platform::Maximum(1, height - scrollBarHeight); - if (!showSBHorizontal) - alloc.height += scrollBarWidth-1; + alloc.width = verticalScrollBarWidth; + alloc.height = Platform::Maximum(1, height - horizontalScrollBarHeight); gtk_widget_size_allocate(GTK_WIDGET(PWidget(scrollbarv)), &alloc); } else { gtk_widget_hide(GTK_WIDGET(PWidget(scrollbarv))); + verticalScrollBarWidth = 0; } if (IS_WIDGET_MAPPED(PWidget(wMain))) { ChangeSize(); @@ -1721,12 +1708,8 @@ void ScintillaGTK::Resize(int width, int height) {
alloc.x = 0; alloc.y = 0; - alloc.width = Platform::Maximum(1, width - scrollBarWidth); - alloc.height = Platform::Maximum(1, height - scrollBarHeight); - if (!showSBHorizontal) - alloc.height += scrollBarHeight; - if (!verticalScrollBarVisible) - alloc.width += scrollBarWidth; + alloc.width = Platform::Maximum(1, width - verticalScrollBarWidth); + alloc.height = Platform::Maximum(1, height - horizontalScrollBarHeight); gtk_widget_size_allocate(GTK_WIDGET(PWidget(wText)), &alloc); }
Modified: scintilla/include/ILexer.h 2 files changed, 2 insertions(+), 0 deletions(-) =================================================================== @@ -48,6 +48,8 @@ class IDocument { class IDocumentWithLineEnd : public IDocument { public: virtual int SCI_METHOD LineEnd(int line) const = 0; + virtual int SCI_METHOD GetRelativePosition(int positionStart, int characterOffset) const = 0; + virtual int SCI_METHOD GetCharacterAndWidth(int position, int *pWidth) const = 0; };
enum { lvOriginal=0, lvSubStyles=1 };
Modified: scintilla/include/Platform.h 6 files changed, 6 insertions(+), 0 deletions(-) =================================================================== @@ -13,6 +13,7 @@ // PLAT_GTK_WIN32 is defined additionally when running PLAT_GTK under Win32 // PLAT_WIN = Win32 API on Win32 OS // PLAT_WX is wxWindows on any supported platform +// PLAT_TK = Tcl/TK on Linux or Win32
#define PLAT_GTK 0 #define PLAT_GTK_WIN32 0 @@ -23,6 +24,7 @@ #define PLAT_QT 0 #define PLAT_FOX 0 #define PLAT_CURSES 0 +#define PLAT_TK 0
#if defined(FOX) #undef PLAT_FOX @@ -40,6 +42,10 @@ #undef PLAT_QT #define PLAT_QT 1
+#elif defined(TK) +#undef PLAT_TK +#define PLAT_TK 1 + #elif defined(GTK) #undef PLAT_GTK #define PLAT_GTK 1
Modified: scintilla/lexers/LexAda.cxx 15 files changed, 0 insertions(+), 15 deletions(-) =================================================================== @@ -65,8 +65,6 @@ static void ColouriseDocument( static void ColouriseWord(StyleContext& sc, WordList& keywords, bool& apostropheStartsAttribute);
static inline bool IsDelimiterCharacter(int ch); -static inline bool IsNumberStartCharacter(int ch); -static inline bool IsNumberCharacter(int ch); static inline bool IsSeparatorOrDelimiterCharacter(int ch); static bool IsValidIdentifier(const std::string& identifier); static bool IsValidNumber(const std::string& number); @@ -310,19 +308,6 @@ static inline bool IsDelimiterCharacter(int ch) { } }
-static inline bool IsNumberCharacter(int ch) { - return IsNumberStartCharacter(ch) || - ch == '_' || - ch == '.' || - ch == '#' || - (ch >= 'a' && ch <= 'f') || - (ch >= 'A' && ch <= 'F'); -} - -static inline bool IsNumberStartCharacter(int ch) { - return IsADigit(ch); -} - static inline bool IsSeparatorOrDelimiterCharacter(int ch) { return IsASpace(ch) || IsDelimiterCharacter(ch); }
Modified: scintilla/lexers/LexHaskell.cxx 41 files changed, 19 insertions(+), 22 deletions(-) =================================================================== @@ -40,40 +40,37 @@ #include "CharacterSet.h" #include "LexerModule.h" #include "OptionSet.h" +#include "CharacterCategory.h"
#ifdef SCI_NAMESPACE using namespace Scintilla; #endif
-static int u_iswalpha(int); -static int u_iswalnum(int); -static int u_iswupper(int); -static int u_IsHaskellSymbol(int); +// See https://github.com/ghc/ghc/blob/master/compiler/parser/Lexer.x#L1682 +// Note, letter modifiers are prohibited.
-// #define HASKELL_UNICODE - -#ifndef HASKELL_UNICODE - -// Stubs - -static int u_iswalpha(int) { - return 0; +static int u_iswupper (int ch) { + CharacterCategory c = CategoriseCharacter(ch); + return c == ccLu || c == ccLt; }
-static int u_iswalnum(int) { - return 0; +static int u_iswalpha (int ch) { + CharacterCategory c = CategoriseCharacter(ch); + return c == ccLl || c == ccLu || c == ccLt || c == ccLo; }
-static int u_iswupper(int) { - return 0; +static int u_iswalnum (int ch) { + CharacterCategory c = CategoriseCharacter(ch); + return c == ccLl || c == ccLu || c == ccLt || c == ccLo + || c == ccNd || c == ccNo; }
-static int u_IsHaskellSymbol(int) { - return 0; +static int u_IsHaskellSymbol(int ch) { + CharacterCategory c = CategoriseCharacter(ch); + return c == ccPc || c == ccPd || c == ccPo + || c == ccSm || c == ccSc || c == ccSk || c == ccSo; }
-#endif - static inline bool IsHaskellLetter(const int ch) { if (IsASCII(ch)) { return (ch >= 'a' && ch <= 'z') @@ -597,7 +594,8 @@ void SCI_METHOD LexerHaskell::Lex(unsigned int startPos, int length, int initSty hs.lmode = LITERATE_BIRD; } // Preprocessor - else if (sc.atLineStart && sc.ch == '#' && options.cpp) { + else if (sc.atLineStart && sc.ch == '#' && options.cpp + && (!options.stylingWithinPreprocessor || sc.state == SCE_HA_DEFAULT)) { sc.SetState(SCE_HA_PREPROCESSOR); sc.Forward(); } @@ -960,7 +958,6 @@ void SCI_METHOD LexerHaskell::Lex(unsigned int startPos, int length, int initSty sc.Forward(); } } - styler.SetLineState(lineCurrent, hs.ToLineState()); sc.Complete(); }
Modified: scintilla/lexers/LexLua.cxx 35 files changed, 9 insertions(+), 26 deletions(-) =================================================================== @@ -132,55 +132,38 @@ static void ColouriseLuaDoc( if (sc.state == SCE_LUA_OPERATOR) { if (sc.ch == ':' && sc.chPrev == ':') { // :: <label> :: forward scan sc.Forward(); - int ln = 0, maxln = startPos + length - sc.currentPos; - int c; - while (ln < maxln) { // determine line extent - c = sc.GetRelative(ln); - if (c == '\r' || c == '\n') - break; - ln++; - } - maxln = ln; ln = 0; - while (ln < maxln) { // skip over spaces/tabs - if (!IsASpaceOrTab(sc.GetRelative(ln))) - break; + int ln = 0; + while (IsASpaceOrTab(sc.GetRelative(ln))) // skip over spaces/tabs ln++; - } int ws1 = ln; if (setWordStart.Contains(sc.GetRelative(ln))) { - int i = 0; + int c, i = 0; char s[100]; - while (ln < maxln) { // get potential label - c = sc.GetRelative(ln); - if (!setWord.Contains(c)) - break; + while (setWord.Contains(c = sc.GetRelative(ln))) { // get potential label if (i < 90) s[i++] = c; ln++; } s[i] = '\0'; int lbl = ln; if (!keywords.InList(s)) { - while (ln < maxln) { // skip over spaces/tabs - if (!IsASpaceOrTab(sc.GetRelative(ln))) - break; + while (IsASpaceOrTab(sc.GetRelative(ln))) // skip over spaces/tabs ln++; - } int ws2 = ln - lbl; if (sc.GetRelative(ln) == ':' && sc.GetRelative(ln + 1) == ':') { // final :: found, complete valid label construct sc.ChangeState(SCE_LUA_LABEL); if (ws1) { sc.SetState(SCE_LUA_DEFAULT); - sc.Forward(ws1); + sc.ForwardBytes(ws1); } sc.SetState(SCE_LUA_LABEL); - sc.Forward(lbl - ws1); + sc.ForwardBytes(lbl - ws1); if (ws2) { sc.SetState(SCE_LUA_DEFAULT); - sc.Forward(ws2); + sc.ForwardBytes(ws2); } sc.SetState(SCE_LUA_LABEL); - sc.Forward(2); + sc.ForwardBytes(2); } } }
Modified: scintilla/lexers/LexOthers.cxx 5 files changed, 3 insertions(+), 2 deletions(-) =================================================================== @@ -922,8 +922,9 @@ static int RecogniseErrorListLine(const char *lineBuffer, unsigned int lengthLin (strstr(lineBuffer, " at ") < (lineBuffer + lengthLine)) && strstr(lineBuffer, " line ") && (strstr(lineBuffer, " line ") < (lineBuffer + lengthLine)) && - (strstr(lineBuffer, " at ") < (strstr(lineBuffer, " line ")))) { - // perl error message + (strstr(lineBuffer, " at ") + 4 < (strstr(lineBuffer, " line ")))) { + // perl error message: + // <message> at <file> line <line> return SCE_ERR_PERL; } else if ((memcmp(lineBuffer, " at ", 6) == 0) && strstr(lineBuffer, ":line ")) {
Modified: scintilla/lexers/LexPerl.cxx 118 files changed, 59 insertions(+), 59 deletions(-) =================================================================== @@ -188,22 +188,6 @@ static int styleCheckIdentifier(LexAccessor &styler, unsigned int bk) { return 0; }
-static int inputsymbolScan(LexAccessor &styler, unsigned int pos, unsigned int endPos) { - // looks forward for matching > on same line; a bit ugly - unsigned int fw = pos; - while (++fw < endPos) { - int fwch = static_cast<unsigned char>(styler.SafeGetCharAt(fw)); - if (fwch == '\r' || fwch == '\n') { - return 0; - } else if (fwch == '>') { - if (styler.Match(fw - 2, "<=>")) // '<=>' case - return 0; - return fw - pos; - } - } - return 0; -} - static int podLineScan(LexAccessor &styler, unsigned int &pos, unsigned int endPos) { // forward scan the current line to classify line for POD style int state = -1; @@ -398,6 +382,7 @@ class LexerPerl : public ILexer { static ILexer *LexerFactoryPerl() { return new LexerPerl(); } + int InputSymbolScan(StyleContext &sc); void InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern=false); };
@@ -427,6 +412,21 @@ int SCI_METHOD LexerPerl::WordListSet(int n, const char *wl) { return firstModification; }
+int LexerPerl::InputSymbolScan(StyleContext &sc) { + // forward scan for matching > on same line; file handles + int c, sLen = 0; + while ((c = sc.GetRelativeCharacter(++sLen)) != 0) { + if (c == '\r' || c == '\n') { + return 0; + } else if (c == '>') { + if (sc.Match("<=>")) // '<=>' case + return 0; + return sLen; + } + } + return 0; +} + void LexerPerl::InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern) { // interpolate a segment (with no active backslashes or delimiters within) // switch in or out of an interpolation style or continue current style @@ -441,39 +441,42 @@ void LexerPerl::InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern) if (sc.ch == '$' && sc.chNext == '#') { // starts with $# sLen++; } - while ((maxSeg > sLen) && (sc.GetRelative(sLen) == '$')) // >0 $ dereference within + while ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '$')) // >0 $ dereference within sLen++; - if ((maxSeg > sLen) && (sc.GetRelative(sLen) == '{')) { // { start for {word} + if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '{')) { // { start for {word} sLen++; braces = true; } if (maxSeg > sLen) { - int c = sc.GetRelative(sLen); + int c = sc.GetRelativeCharacter(sLen); if (setWordStart.Contains(c)) { // word (various) sLen++; isVar = true; - while ((maxSeg > sLen) && setWord.Contains(sc.GetRelative(sLen))) + while (maxSeg > sLen) { + if (!setWord.Contains(sc.GetRelativeCharacter(sLen))) + break; sLen++; + } } else if (braces && IsADigit(c) && (sLen == 2)) { // digit for ${digit} sLen++; isVar = true; } } if (braces) { - if ((maxSeg > sLen) && (sc.GetRelative(sLen) == '}')) { // } end for {word} + if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '}')) { // } end for {word} sLen++; } else isVar = false; } } if (!isVar && (maxSeg > 1)) { // $- or @-specific variable patterns - sLen = 1; int c = sc.chNext; if (sc.ch == '$') { + sLen = 1; if (IsADigit(c)) { // $[0-9] and slurp trailing digits sLen++; isVar = true; - while ((maxSeg > sLen) && IsADigit(sc.GetRelative(sLen))) + while ((maxSeg > sLen) && IsADigit(sc.GetRelativeCharacter(sLen))) sLen++; } else if (setSpecialVar.Contains(c)) { // $ special variables sLen++; @@ -483,12 +486,13 @@ void LexerPerl::InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern) isVar = true; } else if (c == '^') { // $^A control-char style sLen++; - if ((maxSeg > sLen) && setControlVar.Contains(sc.GetRelative(sLen))) { + if ((maxSeg > sLen) && setControlVar.Contains(sc.GetRelativeCharacter(sLen))) { sLen++; isVar = true; } } } else if (sc.ch == '@') { + sLen = 1; if (!isPattern && ((c == '+') || (c == '-'))) { // @ specials non-pattern sLen++; isVar = true; @@ -576,7 +580,7 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle, int Count; int Up, Down; QuoteCls() { - this->New(1); + New(1); } void New(int r = 1) { Rep = r; @@ -896,19 +900,18 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle, break; } while (!sc.atLineEnd) { // "EOF" and `EOF` interpolated - int s = 0, endType = 0; - int maxSeg = endPos - sc.currentPos; - while (s < maxSeg) { // scan to break string into segments - int c = sc.GetRelative(s); + int c, sLen = 0, endType = 0; + while ((c = sc.GetRelativeCharacter(sLen)) != 0) { + // scan to break string into segments if (c == '\') { endType = 1; break; } else if (c == '\r' || c == '\n') { endType = 2; break; } - s++; + sLen++; } - if (s > 0) // process non-empty segments - InterpolateSegment(sc, s); + if (sLen > 0) // process non-empty segments + InterpolateSegment(sc, sLen); if (endType == 1) { sc.Forward(); // \ at end-of-line does not appear to have any effect, skip @@ -969,10 +972,9 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle, } else if (!Quote.Up && !IsASpace(sc.ch)) { Quote.Open(sc.ch); } else { - int s = 0, endType = 0; - int maxSeg = endPos - sc.currentPos; - while (s < maxSeg) { // scan to break string into segments - int c = sc.GetRelative(s); + int c, sLen = 0, endType = 0; + while ((c = sc.GetRelativeCharacter(sLen)) != 0) { + // scan to break string into segments if (IsASpace(c)) { break; } else if (c == '\' && Quote.Up != '\') { @@ -985,13 +987,13 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle, } } else if (c == Quote.Up) Quote.Count++; - s++; + sLen++; } - if (s > 0) { // process non-empty segments + if (sLen > 0) { // process non-empty segments if (Quote.Up != ''') { - InterpolateSegment(sc, s, true); + InterpolateSegment(sc, sLen, true); } else // non-interpolated path - sc.Forward(s); + sc.Forward(sLen); } if (endType == 1) sc.Forward(); @@ -1005,11 +1007,10 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle, } else if (!Quote.Up && !IsASpace(sc.ch)) { Quote.Open(sc.ch); } else { - int s = 0, endType = 0; - int maxSeg = endPos - sc.currentPos; + int c, sLen = 0, endType = 0; bool isPattern = (Quote.Rep == 2); - while (s < maxSeg) { // scan to break string into segments - int c = sc.GetRelative(s); + while ((c = sc.GetRelativeCharacter(sLen)) != 0) { + // scan to break string into segments if (c == '\' && Quote.Up != '\') { endType = 2; break; } else if (Quote.Count == 0 && Quote.Rep == 1) { @@ -1020,7 +1021,7 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle, // For '#', if no whitespace in between, it's a delimiter. if (IsASpace(c)) { // Keep going - } else if (c == '#' && IsASpaceOrTab(sc.GetRelative(s - 1))) { + } else if (c == '#' && IsASpaceOrTab(sc.GetRelative(sLen - 1))) { endType = 3; } else Quote.Open(c); @@ -1039,13 +1040,13 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle, Quote.Count++; } else if (IsASpace(c)) break; - s++; + sLen++; } - if (s > 0) { // process non-empty segments + if (sLen > 0) { // process non-empty segments if (sc.state == SCE_PL_REGSUBST && Quote.Up != ''') { - InterpolateSegment(sc, s, isPattern); + InterpolateSegment(sc, sLen, isPattern); } else // non-interpolated path - sc.Forward(s); + sc.Forward(sLen); } if (endType == 2) { sc.Forward(); @@ -1063,10 +1064,9 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle, if (!Quote.Down && !IsASpace(sc.ch)) { Quote.Open(sc.ch); } else { - int s = 0, endType = 0; - int maxSeg = endPos - sc.currentPos; - while (s < maxSeg) { // scan to break string into segments - int c = sc.GetRelative(s); + int c, sLen = 0, endType = 0; + while ((c = sc.GetRelativeCharacter(sLen)) != 0) { + // scan to break string into segments if (IsASpace(c)) { break; } else if (c == '\' && Quote.Up != '\') { @@ -1078,23 +1078,23 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle, } } else if (c == Quote.Up) Quote.Count++; - s++; + sLen++; } - if (s > 0) { // process non-empty segments + if (sLen > 0) { // process non-empty segments switch (sc.state) { case SCE_PL_STRING: case SCE_PL_STRING_QQ: case SCE_PL_BACKTICKS: - InterpolateSegment(sc, s); + InterpolateSegment(sc, sLen); break; case SCE_PL_STRING_QX: if (Quote.Up != ''') { - InterpolateSegment(sc, s); + InterpolateSegment(sc, sLen); break; } // (continued for ' delim) default: // non-interpolated path - sc.Forward(s); + sc.Forward(sLen); } } if (endType == 2) { @@ -1474,7 +1474,7 @@ void SCI_METHOD LexerPerl::Lex(unsigned int startPos, int length, int initStyle, } else if (sc.ch == '<') { // handle '<', inputsymbol if (preferRE) { // forward scan - int i = inputsymbolScan(styler, sc.currentPos, endPos); + int i = InputSymbolScan(sc); if (i > 0) { sc.SetState(SCE_PL_IDENTIFIER); sc.Forward(i);
Modified: scintilla/lexers/LexRuby.cxx 2 files changed, 1 insertions(+), 1 deletions(-) =================================================================== @@ -254,7 +254,7 @@ class QuoteCls { char Up; char Down; QuoteCls() { - this->New(); + New(); } void New() { Count = 0;
Modified: scintilla/lexlib/CharacterCategory.cxx 3303 files changed, 3303 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,3303 @@ +// Scintilla source code edit control +/** @file CharacterCategory.cxx + ** Returns the Unicode general category of a character. + ** Table automatically regenerated by scripts/GenerateCharacterCategory.py + ** Should only be rarely regenerated for new versions of Unicode. + **/ +// Copyright 2013 by Neil Hodgson neilh@scintilla.org +// The License.txt file describes the conditions under which this software may be distributed. + +#include <algorithm> + +#include "CharacterCategory.h" + +#ifdef SCI_NAMESPACE +namespace Scintilla { +#endif + +namespace { + // Use an unnamed namespace to protect the declarations from name conflicts + +const int catRanges[] = { +//++Autogenerated -- start of section automatically generated +// Created with Python 3.3.0, Unicode 6.1.0 +25, +1046, +1073, +1171, +1201, +1293, +1326, +1361, +1394, +1425, +1452, +1489, +1544, +1873, +1938, +2033, +2080, +2925, +2961, +2990, +3028, +3051, +3092, +3105, +3949, +3986, +4014, +4050, +4089, +5142, +5169, +5203, +5333, +5361, +5396, +5429, +5444, +5487, +5522, +5562, +5589, +5620, +5653, +5682, +5706, +5780, +5793, +5841, +5908, +5930, +5956, +6000, +6026, +6129, +6144, +6898, +6912, +7137, +7922, +7937, +8192, +8225, +8256, +8289, +8320, +8353, +8384, +8417, +8448, +8481, +8512, +8545, +8576, +8609, +8640, +8673, +8704, +8737, +8768, +8801, +8832, +8865, +8896, +8929, +8960, +8993, +9024, +9057, +9088, +9121, +9152, +9185, +9216, +9249, +9280, +9313, +9344, +9377, +9408, +9441, +9472, +9505, +9536, +9569, +9600, +9633, +9664, +9697, +9728, +9761, +9792, +9825, +9856, +9889, +9920, +9953, +10016, +10049, +10080, +10113, +10144, +10177, +10208, +10241, +10272, +10305, +10336, +10369, +10400, +10433, +10464, +10497, +10560, +10593, +10624, +10657, +10688, +10721, +10752, +10785, +10816, +10849, +10880, +10913, +10944, +10977, +11008, +11041, +11072, +11105, +11136, +11169, +11200, +11233, +11264, +11297, +11328, +11361, +11392, +11425, +11456, +11489, +11520, +11553, +11584, +11617, +11648, +11681, +11712, +11745, +11776, +11809, +11840, +11873, +11904, +11937, +11968, +12001, +12032, +12097, +12128, +12161, +12192, +12225, +12320, +12385, +12416, +12449, +12480, +12545, +12576, +12673, +12736, +12865, +12896, +12961, +12992, +13089, +13184, +13249, +13280, +13345, +13376, +13409, +13440, +13473, +13504, +13569, +13600, +13633, +13696, +13729, +13760, +13825, +13856, +13953, +13984, +14017, +14048, +14113, +14180, +14208, +14241, +14340, +14464, +14498, +14529, +14560, +14594, +14625, +14656, +14690, +14721, +14752, +14785, +14816, +14849, +14880, +14913, +14944, +14977, +15008, +15041, +15072, +15105, +15136, +15169, +15200, +15233, +15296, +15329, +15360, +15393, +15424, +15457, +15488, +15521, +15552, +15585, +15616, +15649, +15680, +15713, +15744, +15777, +15808, +15841, +15904, +15938, +15969, +16000, +16033, +16064, +16161, +16192, +16225, +16256, +16289, +16320, +16353, +16384, +16417, +16448, +16481, +16512, +16545, +16576, +16609, +16640, +16673, +16704, +16737, +16768, +16801, +16832, +16865, +16896, +16929, +16960, +16993, +17024, +17057, +17088, +17121, +17152, +17185, +17216, +17249, +17280, +17313, +17344, +17377, +17408, +17441, +17472, +17505, +17536, +17569, +17600, +17633, +17664, +17697, +17728, +17761, +17792, +17825, +17856, +17889, +17920, +17953, +17984, +18017, +18240, +18305, +18336, +18401, +18464, +18497, +18528, +18657, +18688, +18721, +18752, +18785, +18816, +18849, +18880, +18913, +21124, +21153, +22019, +22612, +22723, +23124, +23555, +23732, +23939, +23988, +24003, +24052, +24581, +28160, +28193, +28224, +28257, +28291, +28340, +28352, +28385, +28445, +28483, +28513, +28625, +28669, +28820, +28864, +28913, +28928, +29053, +29056, +29117, +29120, +29185, +29216, +29789, +29792, +30081, +31200, +31233, +31296, +31393, +31488, +31521, +31552, +31585, +31616, +31649, +31680, +31713, +31744, +31777, +31808, +31841, +31872, +31905, +31936, +31969, +32000, +32033, +32064, +32097, +32128, +32161, +32192, +32225, +32384, +32417, +32466, +32480, +32513, +32544, +32609, +32672, +34305, +35840, +35873, +35904, +35937, +35968, +36001, +36032, +36065, +36096, +36129, +36160, +36193, +36224, +36257, +36288, +36321, +36352, +36385, +36416, +36449, +36480, +36513, +36544, +36577, +36608, +36641, +36672, +36705, +36736, +36769, +36800, +36833, +36864, +36897, +36949, +36965, +37127, +37184, +37217, +37248, +37281, +37312, +37345, +37376, +37409, +37440, +37473, +37504, +37537, +37568, +37601, +37632, +37665, +37696, +37729, +37760, +37793, +37824, +37857, +37888, +37921, +37952, +37985, +38016, +38049, +38080, +38113, +38144, +38177, +38208, +38241, +38272, +38305, +38336, +38369, +38400, +38433, +38464, +38497, +38528, +38561, +38592, +38625, +38656, +38689, +38720, +38753, +38784, +38817, +38848, +38881, +38912, +38977, +39008, +39041, +39072, +39105, +39136, +39169, +39200, +39233, +39264, +39297, +39328, +39361, +39424, +39457, +39488, +39521, +39552, +39585, +39616, +39649, +39680, +39713, +39744, +39777, +39808, +39841, +39872, +39905, +39936, +39969, +40000, +40033, +40064, +40097, +40128, +40161, +40192, +40225, +40256, +40289, +40320, +40353, +40384, +40417, +40448, +40481, +40512, +40545, +40576, +40609, +40640, +40673, +40704, +40737, +40768, +40801, +40832, +40865, +40896, +40929, +40960, +40993, +41024, +41057, +41088, +41121, +41152, +41185, +41216, +41249, +41280, +41313, +41344, +41377, +41408, +41441, +41472, +41505, +41536, +41569, +41600, +41633, +41664, +41697, +41728, +41761, +41792, +41825, +41856, +41889, +41920, +41953, +41984, +42017, +42048, +42081, +42112, +42145, +42176, +42209, +42269, +42528, +43773, +43811, +43857, +44061, +44065, +45341, +45361, +45388, +45437, +45555, +45597, +45605, +47052, +47077, +47121, +47141, +47217, +47237, +47313, +47333, +47389, +47620, +48509, +48644, +48753, +48829, +49178, +49341, +49362, +49457, +49523, +49553, +49621, +49669, +50033, +50077, +50129, +50180, +51203, +51236, +51557, +52232, +52561, +52676, +52741, +52772, +55953, +55972, +56005, +56250, +56277, +56293, +56483, +56549, +56629, +56645, +56772, +56840, +57156, +57269, +57316, +57361, +57821, +57850, +57860, +57893, +57924, +58885, +59773, +59812, +62661, +63012, +63069, +63496, +63812, +64869, +65155, +65237, +65265, +65347, +65405, +65540, +66245, +66371, +66405, +66691, +66725, +66819, +66853, +67037, +67089, +67581, +67588, +68389, +68509, +68561, +68605, +70660, +70717, +70724, +71101, +72837, +73725, +73733, +73830, +73860, +75589, +75622, +75653, +75684, +75718, +75813, +76070, +76197, +76230, +76292, +76325, +76548, +76869, +76945, +77000, +77329, +77347, +77380, +77597, +77604, +77853, +77861, +77894, +77981, +77988, +78269, +78308, +78397, +78436, +79165, +79172, +79421, +79428, +79485, +79556, +79709, +79749, +79780, +79814, +79909, +80061, +80102, +80189, +80230, +80293, +80324, +80381, +80614, +80669, +80772, +80861, +80868, +80965, +81053, +81096, +81412, +81491, +81546, +81749, +81779, +81821, +81957, +82022, +82077, +82084, +82301, +82404, +82493, +82532, +83261, +83268, +83517, +83524, +83613, +83620, +83709, +83716, +83805, +83845, +83901, +83910, +84005, +84093, +84197, +84285, +84325, +84445, +84517, +84573, +84772, +84925, +84932, +84989, +85192, +85509, +85572, +85669, +85725, +86053, +86118, +86173, +86180, +86493, +86500, +86621, +86628, +87357, +87364, +87613, +87620, +87709, +87716, +87901, +87941, +87972, +88006, +88101, +88285, +88293, +88358, +88413, +88422, +88485, +88541, +88580, +88637, +89092, +89157, +89245, +89288, +89617, +89651, +89693, +90149, +90182, +90269, +90276, +90557, +90596, +90685, +90724, +91453, +91460, +91709, +91716, +91805, +91812, +91997, +92037, +92068, +92102, +92133, +92166, +92197, +92349, +92390, +92477, +92518, +92581, +92637, +92869, +92902, +92957, +93060, +93149, +93156, +93253, +93341, +93384, +93717, +93732, +93770, +93981, +94277, +94308, +94365, +94372, +94589, +94660, +94781, +94788, +94941, +95012, +95101, +95108, +95165, +95172, +95261, +95332, +95421, +95492, +95613, +95684, +96093, +96198, +96261, +96294, +96381, +96454, +96573, +96582, +96677, +96733, +96772, +96829, +96998, +97053, +97480, +97802, +97909, +98099, +98133, +98173, +98342, +98461, +98468, +98749, +98756, +98877, +98884, +99645, +99652, +99997, +100004, +100189, +100260, +100293, +100390, +100541, +100549, +100669, +100677, +100829, +101029, +101117, +101124, +101213, +101380, +101445, +101533, +101576, +101917, +102154, +102389, +102429, +102470, +102557, +102564, +102845, +102852, +102973, +102980, +103741, +103748, +104093, +104100, +104285, +104325, +104356, +104390, +104421, +104454, +104637, +104645, +104678, +104765, +104774, +104837, +104925, +105126, +105213, +105412, +105469, +105476, +105541, +105629, +105672, +106013, +106020, +106109, +106566, +106653, +106660, +106941, +106948, +107069, +107076, +108413, +108452, +108486, +108581, +108733, +108742, +108861, +108870, +108965, +108996, +109053, +109286, +109341, +109572, +109637, +109725, +109768, +110090, +110301, +110389, +110404, +110621, +110662, +110749, +110756, +111357, +111428, +112221, +112228, +112541, +112548, +112605, +112644, +112893, +112965, +113021, +113126, +113221, +113341, +113349, +113405, +113414, +113693, +114246, +114321, +114365, +114724, +116261, +116292, +116357, +116605, +116723, +116740, +116931, +116965, +117233, +117256, +117585, +117661, +118820, +118909, +118916, +118973, +119012, +119101, +119108, +119165, +119204, +119261, +119428, +119581, +119588, +119837, +119844, +119965, +119972, +120029, +120036, +120093, +120132, +120221, +120228, +120357, +120388, +120453, +120669, +120677, +120740, +120797, +120836, +121021, +121027, +121085, +121093, +121309, +121352, +121693, +121732, +121885, +122884, +122933, +123025, +123509, +123537, +123573, +123653, +123733, +123912, +124234, +124565, +124581, +124629, +124645, +124693, +124709, +124749, +124782, +124813, +124846, +124870, +124932, +125213, +125220, +126397, +126501, +126950, +126981, +127153, +127173, +127236, +127397, +127773, +127781, +128957, +128981, +129221, +129269, +129469, +129493, +129553, +129717, +129841, +129917, +131076, +132454, +132517, +132646, +132677, +132870, +132901, +132966, +133029, +133092, +133128, +133457, +133636, +133830, +133893, +133956, +134085, +134180, +134214, +134308, +134374, +134596, +134693, +134820, +135237, +135270, +135333, +135398, +135589, +135620, +135654, +135688, +136006, +136101, +136149, +136192, +137437, +137440, +137501, +137632, +137693, +137732, +139121, +139139, +139172, +149821, +149828, +149981, +150020, +150269, +150276, +150333, +150340, +150493, +150532, +151869, +151876, +152029, +152068, +153149, +153156, +153309, +153348, +153597, +153604, +153661, +153668, +153821, +153860, +154365, +154372, +156221, +156228, +156381, +156420, +158589, +158629, +158737, +159018, +159677, +159748, +160277, +160605, +160772, +163517, +163852, +163876, +183729, +183780, +184342, +184356, +185197, +185230, +185277, +185348, +187761, +187849, +187965, +188420, +188861, +188868, +188997, +189117, +189444, +190021, +190129, +190205, +190468, +191045, +191133, +191492, +191933, +191940, +192061, +192069, +192157, +192516, +194181, +194246, +194277, +194502, +194757, +194790, +194853, +195217, +195299, +195345, +195443, +195460, +195493, +195549, +195592, +195933, +196106, +196445, +196625, +196812, +196849, +196965, +197078, +197117, +197128, +197469, +197636, +198755, +198788, +200477, +200708, +202021, +202052, +202109, +202244, +204509, +204804, +205757, +205829, +205926, +206053, +206118, +206237, +206342, +206405, +206438, +206629, +206749, +206869, +206909, +206993, +207048, +207364, +208349, +208388, +208573, +208900, +210333, +210438, +210980, +211206, +211293, +211464, +211786, +211837, +211925, +212996, +213733, +213798, +213917, +213969, +214020, +215718, +215749, +215782, +215813, +216061, +216069, +216102, +216133, +216166, +216229, +216486, +216677, +217021, +217061, +217096, +217437, +217608, +217949, +218129, +218339, +218385, +218589, +221189, +221318, +221348, +222853, +222886, +222917, +223078, +223109, +223142, +223301, +223334, +223396, +223645, +223752, +224081, +224309, +224613, +224917, +225213, +225285, +225350, +225380, +226342, +226373, +226502, +226565, +226630, +226661, +226694, +226756, +226824, +227140, +228549, +228582, +228613, +228678, +228773, +228806, +228837, +228934, +229021, +229265, +229380, +230534, +230789, +231046, +231109, +231197, +231281, +231432, +231773, +231844, +231944, +232260, +233219, +233425, +233501, +235537, +235805, +236037, +236145, +236165, +236582, +236613, +236836, +236965, +236996, +237126, +237189, +237220, +237309, +237569, +238979, +240993, +241411, +241441, +242531, +243717, +244989, +245637, +245760, +245793, +245824, +245857, +245888, +245921, +245952, +245985, +246016, +246049, +246080, +246113, +246144, +246177, +246208, +246241, +246272, +246305, +246336, +246369, +246400, +246433, +246464, +246497, +246528, +246561, +246592, +246625, +246656, +246689, +246720, +246753, +246784, +246817, +246848, +246881, +246912, +246945, +246976, +247009, +247040, +247073, +247104, +247137, +247168, +247201, +247232, +247265, +247296, +247329, +247360, +247393, +247424, +247457, +247488, +247521, +247552, +247585, +247616, +247649, +247680, +247713, +247744, +247777, +247808, +247841, +247872, +247905, +247936, +247969, +248000, +248033, +248064, +248097, +248128, +248161, +248192, +248225, +248256, +248289, +248320, +248353, +248384, +248417, +248448, +248481, +248512, +248545, +248576, +248609, +248640, +248673, +248704, +248737, +248768, +248801, +248832, +248865, +248896, +248929, +248960, +248993, +249024, +249057, +249088, +249121, +249152, +249185, +249216, +249249, +249280, +249313, +249344, +249377, +249408, +249441, +249472, +249505, +249536, +249569, +249600, +249633, +249664, +249697, +249728, +249761, +249792, +249825, +249856, +249889, +249920, +249953, +249984, +250017, +250048, +250081, +250112, +250145, +250176, +250209, +250240, +250273, +250304, +250337, +250368, +250401, +250432, +250465, +250496, +250529, +250816, +250849, +250880, +250913, +250944, +250977, +251008, +251041, +251072, +251105, +251136, +251169, +251200, +251233, +251264, +251297, +251328, +251361, +251392, +251425, +251456, +251489, +251520, +251553, +251584, +251617, +251648, +251681, +251712, +251745, +251776, +251809, +251840, +251873, +251904, +251937, +251968, +252001, +252032, +252065, +252096, +252129, +252160, +252193, +252224, +252257, +252288, +252321, +252352, +252385, +252416, +252449, +252480, +252513, +252544, +252577, +252608, +252641, +252672, +252705, +252736, +252769, +252800, +252833, +252864, +252897, +252928, +252961, +252992, +253025, +253056, +253089, +253120, +253153, +253184, +253217, +253248, +253281, +253312, +253345, +253376, +253409, +253440, +253473, +253504, +253537, +253568, +253601, +253632, +253665, +253696, +253729, +253760, +253793, +253824, +253857, +253888, +253921, +254208, +254465, +254685, +254720, +254941, +254977, +255232, +255489, +255744, +256001, +256221, +256256, +256477, +256513, +256797, +256800, +256861, +256864, +256925, +256928, +256989, +256992, +257025, +257280, +257537, +258013, +258049, +258306, +258561, +258818, +259073, +259330, +259585, +259773, +259777, +259840, +259970, +260020, +260033, +260084, +260161, +260285, +260289, +260352, +260482, +260532, +260609, +260765, +260801, +260864, +261021, +261044, +261121, +261376, +261556, +261661, +261697, +261821, +261825, +261888, +262018, +262068, +262141, +262166, +262522, +262668, +262865, +262927, +262960, +262989, +263023, +263088, +263117, +263151, +263185, +263447, +263480, +263514, +263670, +263697, +263983, +264016, +264049, +264171, +264241, +264338, +264365, +264398, +264433, +264786, +264817, +264843, +264881, +265206, +265242, +265405, +265562, +265738, +265763, +265821, +265866, +266066, +266157, +266190, +266211, +266250, +266578, +266669, +266702, +266749, +266755, +267197, +267283, +268125, +268805, +269223, +269349, +269383, +269477, +269885, +270357, +270400, +270453, +270560, +270613, +270657, +270688, +270785, +270848, +270945, +270997, +271008, +271061, +271122, +271136, +271317, +271488, +271541, +271552, +271605, +271616, +271669, +271680, +271829, +271841, +271872, +272001, +272036, +272161, +272213, +272257, +272320, +272402, +272544, +272577, +272725, +272754, +272789, +272833, +272885, +272906, +273417, +274528, +274561, +274601, +274730, +274781, +274962, +275125, +275282, +275349, +275474, +275509, +275570, +275605, +275666, +275701, +275922, +275957, +276946, +277013, +277074, +277109, +277138, +277173, +278162, +286741, +286994, +287125, +287762, +287829, +288045, +288078, +288117, +290706, +290741, +291698, +292501, +293778, +293973, +294557, +294933, +296189, +296981, +297341, +297994, +299925, +302410, +303125, +308978, +309013, +309298, +309333, +311058, +311317, +314866, +314901, +319517, +319541, +322829, +322862, +322893, +322926, +322957, +322990, +323021, +323054, +323085, +323118, +323149, +323182, +323213, +323246, +323274, +324245, +325650, +325805, +325838, +325874, +326861, +326894, +326925, +326958, +326989, +327022, +327053, +327086, +327117, +327150, +327186, +327701, +335890, +340077, +340110, +340141, +340174, +340205, +340238, +340269, +340302, +340333, +340366, +340397, +340430, +340461, +340494, +340525, +340558, +340589, +340622, +340653, +340686, +340717, +340750, +340786, +342797, +342830, +342861, +342894, +342930, +343949, +343982, +344018, +352277, +353810, +354485, +354546, +354749, +354837, +355165, +360448, +361981, +361985, +363517, +363520, +363553, +363584, +363681, +363744, +363777, +363808, +363841, +363872, +363905, +363936, +364065, +364096, +364129, +364192, +364225, +364419, +364480, +364577, +364608, +364641, +364672, +364705, +364736, +364769, +364800, +364833, +364864, +364897, +364928, +364961, +364992, +365025, +365056, +365089, +365120, +365153, +365184, +365217, +365248, +365281, +365312, +365345, +365376, +365409, +365440, +365473, +365504, +365537, +365568, +365601, +365632, +365665, +365696, +365729, +365760, +365793, +365824, +365857, +365888, +365921, +365952, +365985, +366016, +366049, +366080, +366113, +366144, +366177, +366208, +366241, +366272, +366305, +366336, +366369, +366400, +366433, +366464, +366497, +366528, +366561, +366592, +366625, +366656, +366689, +366720, +366753, +366784, +366817, +366848, +366881, +366912, +366945, +366976, +367009, +367040, +367073, +367104, +367137, +367168, +367201, +367232, +367265, +367296, +367329, +367360, +367393, +367424, +367457, +367488, +367521, +367552, +367585, +367616, +367649, +367680, +367713, +367797, +367968, +368001, +368032, +368065, +368101, +368192, +368225, +368285, +368433, +368554, +368593, +368641, +369885, +369889, +369949, +370081, +370141, +370180, +371997, +372195, +372241, +372285, +372709, +372740, +373501, +373764, +374013, +374020, +374269, +374276, +374525, +374532, +374781, +374788, +375037, +375044, +375293, +375300, +375549, +375556, +375805, +375813, +376849, +376911, +376944, +376975, +377008, +377041, +377135, +377168, +377201, +377231, +377264, +377297, +377580, +377617, +377676, +377713, +377743, +377776, +377809, +377871, +377904, +377933, +377966, +377997, +378030, +378061, +378094, +378125, +378158, +378193, +378339, +378385, +378700, +378781, +380949, +381789, +381813, +384669, +385045, +391901, +392725, +393117, +393238, +393265, +393365, +393379, +393412, +393449, +393485, +393518, +393549, +393582, +393613, +393646, +393677, +393710, +393741, +393774, +393813, +393869, +393902, +393933, +393966, +393997, +394030, +394061, +394094, +394124, +394157, +394190, +394261, +394281, +394565, +394694, +394764, +394787, +394965, +395017, +395107, +395140, +395185, +395221, +395293, +395300, +398077, +398117, +398196, +398243, +398308, +398348, +398372, +401265, +401283, +401380, +401437, +401572, +402909, +402980, +406013, +406037, +406090, +406229, +406532, +407421, +407573, +408733, +409092, +409621, +410621, +410634, +410965, +411914, +412181, +412202, +412693, +413706, +414037, +415274, +415765, +417789, +417813, +425988, +636637, +636949, +638980, +1309117, +1310724, +1311395, +1311428, +1348029, +1348117, +1349885, +1350148, +1351427, +1351633, +1351684, +1360259, +1360305, +1360388, +1360904, +1361220, +1361309, +1361920, +1361953, +1361984, +1362017, +1362048, +1362081, +1362112, +1362145, +1362176, +1362209, +1362240, +1362273, +1362304, +1362337, +1362368, +1362401, +1362432, +1362465, +1362496, +1362529, +1362560, +1362593, +1362624, +1362657, +1362688, +1362721, +1362752, +1362785, +1362816, +1362849, +1362880, +1362913, +1362944, +1362977, +1363008, +1363041, +1363072, +1363105, +1363136, +1363169, +1363200, +1363233, +1363264, +1363297, +1363328, +1363361, +1363396, +1363429, +1363463, +1363569, +1363589, +1363921, +1363939, +1363968, +1364001, +1364032, +1364065, +1364096, +1364129, +1364160, +1364193, +1364224, +1364257, +1364288, +1364321, +1364352, +1364385, +1364416, +1364449, +1364480, +1364513, +1364544, +1364577, +1364608, +1364641, +1364672, +1364705, +1364765, +1364965, +1364996, +1367241, +1367557, +1367633, +1367837, +1368084, +1368803, +1369108, +1369152, +1369185, +1369216, +1369249, +1369280, +1369313, +1369344, +1369377, +1369408, +1369441, +1369472, +1369505, +1369536, +1369569, +1369664, +1369697, +1369728, +1369761, +1369792, +1369825, +1369856, +1369889, +1369920, +1369953, +1369984, +1370017, +1370048, +1370081, +1370112, +1370145, +1370176, +1370209, +1370240, +1370273, +1370304, +1370337, +1370368, +1370401, +1370432, +1370465, +1370496, +1370529, +1370560, +1370593, +1370624, +1370657, +1370688, +1370721, +1370752, +1370785, +1370816, +1370849, +1370880, +1370913, +1370944, +1370977, +1371008, +1371041, +1371072, +1371105, +1371136, +1371169, +1371200, +1371233, +1371264, +1371297, +1371328, +1371361, +1371392, +1371425, +1371456, +1371489, +1371520, +1371553, +1371584, +1371617, +1371651, +1371681, +1371936, +1371969, +1372000, +1372033, +1372064, +1372129, +1372160, +1372193, +1372224, +1372257, +1372288, +1372321, +1372352, +1372385, +1372419, +1372468, +1372512, +1372545, +1372576, +1372609, +1372669, +1372672, +1372705, +1372736, +1372769, +1372829, +1373184, +1373217, +1373248, +1373281, +1373312, +1373345, +1373376, +1373409, +1373440, +1373473, +1373504, +1373565, +1376003, +1376065, +1376100, +1376325, +1376356, +1376453, +1376484, +1376613, +1376644, +1377382, +1377445, +1377510, +1377557, +1377693, +1377802, +1378005, +1378067, +1378101, +1378141, +1378308, +1379985, +1380125, +1380358, +1380420, +1382022, +1382533, +1382589, +1382865, +1382920, +1383261, +1383429, +1384004, +1384209, +1384292, +1384349, +1384456, +1384772, +1385669, +1385937, +1385988, +1386725, +1387078, +1387165, +1387505, +1387524, +1388477, +1388549, +1388646, +1388676, +1390181, +1390214, +1390277, +1390406, +1390469, +1390502, +1390641, +1391069, +1391075, +1391112, +1391453, +1391569, +1391645, +1392644, +1393957, +1394150, +1394213, +1394278, +1394341, +1394429, +1394692, +1394789, +1394820, +1395077, +1395110, +1395165, +1395208, +1395549, +1395601, +1395716, +1396227, +1396260, +1396469, +1396548, +1396582, +1396637, +1396740, +1398277, +1398308, +1398341, +1398436, +1398501, +1398564, +1398725, +1398788, +1398821, +1398852, +1398909, +1399652, +1399715, +1399761, +1399812, +1400166, +1400197, +1400262, +1400337, +1400388, +1400419, +1400486, +1400517, +1400573, +1400868, +1401085, +1401124, +1401341, +1401380, +1401597, +1401860, +1402109, +1402116, +1402365, +1406980, +1408102, +1408165, +1408198, +1408261, +1408294, +1408369, +1408390, +1408421, +1408477, +1408520, +1408861, +1409028, +1766557, +1766916, +1767677, +1767780, +1769373, +1769499, +1835036, +2039812, +2051549, +2051588, +2055005, +2056193, +2056445, +2056801, +2056989, +2057124, +2057157, +2057188, +2057522, +2057540, +2057981, +2057988, +2058173, +2058180, +2058237, +2058244, +2058333, +2058340, +2058429, +2058436, +2061908, +2062429, +2062948, +2074573, +2074606, +2074653, +2075140, +2077213, +2077252, +2079005, +2080260, +2080659, +2080693, +2080733, +2080773, +2081297, +2081517, +2081550, +2081585, +2081629, +2081797, +2082045, +2082321, +2082348, +2082411, +2082477, +2082510, +2082541, +2082574, +2082605, +2082638, +2082669, +2082702, +2082733, +2082766, +2082797, +2082830, +2082861, +2082894, +2082925, +2082958, +2082993, +2083053, +2083086, +2083121, +2083243, +2083345, +2083453, +2083473, +2083596, +2083629, +2083662, +2083693, +2083726, +2083757, +2083790, +2083825, +2083922, +2083948, +2083986, +2084093, +2084113, +2084147, +2084177, +2084253, +2084356, +2084541, +2084548, +2088893, +2088954, +2088989, +2089009, +2089107, +2089137, +2089229, +2089262, +2089297, +2089330, +2089361, +2089388, +2089425, +2089480, +2089809, +2089874, +2089969, +2090016, +2090861, +2090897, +2090926, +2090964, +2090987, +2091028, +2091041, +2091885, +2091922, +2091950, +2091986, +2092013, +2092046, +2092081, +2092109, +2092142, +2092177, +2092228, +2092547, +2092580, +2094019, +2094084, +2095101, +2095172, +2095389, +2095428, +2095645, +2095684, +2095901, +2095940, +2096061, +2096147, +2096210, +2096244, +2096277, +2096307, +2096381, +2096405, +2096434, +2096565, +2096637, +2096954, +2097045, +2097117, +2097156, +2097565, +2097572, +2098429, +2098436, +2099069, +2099076, +2099165, +2099172, +2099677, +2099716, +2100189, +2101252, +2105213, +2105361, +2105469, +2105578, +2107037, +2107125, +2107401, +2109098, +2109237, +2109770, +2109821, +2109973, +2110365, +2112021, +2113445, +2113501, +2117636, +2118589, +2118660, +2120253, +2121732, +2122749, +2122762, +2122909, +2123268, +2123817, +2123844, +2124105, +2124157, +2125828, +2126813, +2126833, +2126852, +2128029, +2128132, +2128401, +2128425, +2128605, +2129920, +2131201, +2132484, +2135005, +2135048, +2135389, +2162692, +2162909, +2162948, +2163005, +2163012, +2164445, +2164452, +2164541, +2164612, +2164669, +2164708, +2165469, +2165489, +2165514, +2165789, +2170884, +2171594, +2171805, +2171889, +2171908, +2172765, +2172913, +2172957, +2174980, +2176797, +2176964, +2177053, +2179076, +2179109, +2179229, +2179237, +2179325, +2179461, +2179588, +2179741, +2179748, +2179869, +2179876, +2180765, +2180869, +2180989, +2181093, +2181130, +2181405, +2181649, +2181949, +2182148, +2183082, +2183153, +2183197, +2187268, +2189021, +2189105, +2189316, +2190045, +2190090, +2190340, +2190973, +2191114, +2191389, +2195460, +2197821, +2214922, +2215933, +2228230, +2228261, +2228294, +2228324, +2230021, +2230513, +2230749, +2230858, +2231496, +2231837, +2232325, +2232390, +2232420, +2233862, +2233957, +2234086, +2234149, +2234225, +2234298, +2234321, +2234461, +2234884, +2235709, +2235912, +2236253, +2236421, +2236516, +2237669, +2237830, +2237861, +2238141, +2238152, +2238481, +2238621, +2240517, +2240582, +2240612, +2242150, +2242245, +2242534, +2242596, +2242737, +2242877, +2243080, +2243421, +2281476, +2282853, +2282886, +2282917, +2282950, +2283013, +2283206, +2283237, +2283293, +2283528, +2283869, +2359300, +2387453, +2392073, +2395261, +2395665, +2395805, +2490372, +2524669, +2949124, +2967357, +3006468, +3008701, +3009028, +3009062, +3010557, +3011045, +3011171, +3011613, +3538948, +3539037, +3801109, +3808989, +3809301, +3810557, +3810613, +3812518, +3812581, +3812693, +3812774, +3812986, +3813221, +3813493, +3813541, +3813781, +3814725, +3814869, +3816413, +3817493, +3819589, +3819701, +3819741, +3825685, +3828477, +3828746, +3829341, +3833856, +3834689, +3835520, +3836353, +3836605, +3836609, +3837184, +3838017, +3838848, +3838909, +3838912, +3839005, +3839040, +3839101, +3839136, +3839229, +3839264, +3839421, +3839424, +3839681, +3839837, +3839841, +3839901, +3839905, +3840157, +3840161, +3840512, +3841345, +3842176, +3842269, +3842272, +3842429, +3842464, +3842749, +3842752, +3843005, +3843009, +3843840, +3843933, +3843936, +3844093, +3844096, +3844285, +3844288, +3844349, +3844416, +3844669, +3844673, +3845504, +3846337, +3847168, +3848001, +3848832, +3849665, +3850496, +3851329, +3852160, +3852993, +3853824, +3854657, +3855581, +3855616, +3856434, +3856449, +3857266, +3857281, +3857472, +3858290, +3858305, +3859122, +3859137, +3859328, +3860146, +3860161, +3860978, +3860993, +3861184, +3862002, +3862017, +3862834, +3862849, +3863040, +3863858, +3863873, +3864690, +3864705, +3864896, +3864929, +3864989, +3865032, +3866653, +4046852, +4047005, +4047012, +4047901, +4047908, +4047997, +4048004, +4048061, +4048100, +4048157, +4048164, +4048509, +4048516, +4048669, +4048676, +4048733, +4048740, +4048797, +4048964, +4049021, +4049124, +4049181, +4049188, +4049245, +4049252, +4049309, +4049316, +4049437, +4049444, +4049533, +4049540, +4049597, +4049636, +4049693, +4049700, +4049757, +4049764, +4049821, +4049828, +4049885, +4049892, +4049949, +4049956, +4050045, +4050052, +4050109, +4050148, +4050301, +4050308, +4050557, +4050564, +4050717, +4050724, +4050877, +4050884, +4050941, +4050948, +4051293, +4051300, +4051869, +4052004, +4052125, +4052132, +4052317, +4052324, +4052893, +4054546, +4054621, +4063253, +4064669, +4064789, +4067997, +4068373, +4068861, +4068917, +4069373, +4069429, +4069917, +4069941, +4070429, +4071434, +4071805, +4071957, +4072957, +4072981, +4074909, +4075029, +4076413, +4078805, +4079741, +4080149, +4081533, +4081685, +4081981, +4082197, +4082269, +4087829, +4088893, +4089365, +4089565, +4089589, +4091837, +4091925, +4092573, +4092949, +4094141, +4094165, +4094333, +4094997, +4095549, +4096021, +4098045, +4098069, +4098109, +4098133, +4103965, +4103989, +4104125, +4104213, +4106205, +4106261, +4106397, +4106773, +4107549, +4112245, +4114493, +4114613, +4114973, +4116501, +4118749, +4120597, +4124317, +4194308, +5561085, +5562372, +5695165, +5695492, +5702621, +6225924, +6243293, +29360186, +29360221, +29361178, +29364253, +29368325, +29376029, +31457308, +33554397, +33554460, +35651549, +//--Autogenerated -- end of section automatically generated +}; + +const int maxUnicode = 0x10ffff; +const int maskCategory = 0x1F; +const int nRanges = sizeof(catRanges) / sizeof(catRanges[0]); + +} + +// Each element in catRanges is the start of a range of Unicode characters in +// one general category. +// The value is comprised of a 21-bit character value shifted 5 bits and a 5 bit +// category matching the CharacterCategory enumeration. +// Initial version has 3249 entries and adds about 13K to the executable. +// The array is in ascending order so can be searched using binary search. +// Therefore the average call takes log2(3249) = 12 comparisons. +// For speed, it may be an useful to make a linear table for the common values, +// possibly for 0..0xff for most Western European text or 0..0xfff for most +// alphabetic languages. + +CharacterCategory CategoriseCharacter(int character) { + if (character < 0 || character > maxUnicode) + return ccCn; + const int baseValue = character * (maskCategory+1) + maskCategory; + const int *placeAfter = std::lower_bound(catRanges, catRanges+nRanges, baseValue); + return static_cast<CharacterCategory>(*(placeAfter-1) & maskCategory); +} + +#ifdef SCI_NAMESPACE +} +#endif
Modified: scintilla/lexlib/CharacterCategory.h 31 files changed, 31 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,31 @@ +// Scintilla source code edit control +/** @file CharacterCategory.h + ** Returns the Unicode general category of a character. + **/ +// Copyright 2013 by Neil Hodgson neilh@scintilla.org +// The License.txt file describes the conditions under which this software may be distributed. + +#ifndef CHARACTERCATEGORY_H +#define CHARACTERCATEGORY_H + +#ifdef SCI_NAMESPACE +namespace Scintilla { +#endif + +enum CharacterCategory { + ccLu, ccLl, ccLt, ccLm, ccLo, + ccMn, ccMc, ccMe, + ccNd, ccNl, ccNo, + ccPc, ccPd, ccPs, ccPe, ccPi, ccPf, ccPo, + ccSm, ccSc, ccSk, ccSo, + ccZs, ccZl, ccZp, + ccCc, ccCf, ccCs, ccCo, ccCn +}; + +CharacterCategory CategoriseCharacter(int character); + +#ifdef SCI_NAMESPACE +} +#endif + +#endif
Modified: scintilla/lexlib/LexAccessor.h 6 files changed, 6 insertions(+), 0 deletions(-) =================================================================== @@ -79,6 +79,12 @@ class LexAccessor { } return buf[position - startPos]; } + IDocumentWithLineEnd *MultiByteAccess() const { + if (documentVersion >= dvLineEnd) { + return static_cast<IDocumentWithLineEnd *>(pAccess); + } + return 0; + } /** Safe version of operator[], returning a defined value for invalid position. */ char SafeGetCharAt(int position, char chDefault=' ') { if (position < startPos || position >= endPos) {
Modified: scintilla/lexlib/StyleContext.h 150 files changed, 69 insertions(+), 81 deletions(-) =================================================================== @@ -1,5 +1,5 @@ // Scintilla source code edit control -/** @file StyleContext.cxx +/** @file StyleContext.h ** Lexer infrastructure. **/ // Copyright 1998-2004 by Neil Hodgson neilh@scintilla.org @@ -19,67 +19,36 @@ static inline int MakeLowerCase(int ch) { return ch - 'A' + 'a'; }
-inline int UnicodeCodePoint(const unsigned char *us) { - if (us[0] < 0xC2) { - return us[0]; - } else if (us[0] < 0xE0) { - return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F); - } else if (us[0] < 0xF0) { - return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F); - } else if (us[0] < 0xF5) { - return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F); - } - return us[0]; -} - -inline int BytesInUnicodeCodePoint(int codePoint) { - if (codePoint < 0x80) - return 1; - else if (codePoint < 0x800) - return 2; - else if (codePoint < 0x10000) - return 3; - else - return 4; -} - // All languages handled so far can treat all characters >= 0x80 as one class // which just continues the current token or starts an identifier if in default. // DBCS treated specially as the second character can be < 0x80 and hence // syntactically significant. UTF-8 avoids this as all trail bytes are >= 0x80 class StyleContext { LexAccessor &styler; + IDocumentWithLineEnd *multiByteAccess; unsigned int endPos; unsigned int lengthDocument; + + // Used for optimizing GetRelativeCharacter + unsigned int posRelative; + unsigned int currentPosLastRelative; + int offsetRelative; + StyleContext &operator=(const StyleContext &);
- void GetNextChar(unsigned int pos) { - chNext = static_cast<unsigned char>(styler.SafeGetCharAt(pos+1, 0)); - if (styler.Encoding() == encUnicode) { - if (chNext >= 0x80) { - unsigned char bytes[4] = { static_cast<unsigned char>(chNext), 0, 0, 0 }; - for (int trail=1; trail<3; trail++) { - bytes[trail] = static_cast<unsigned char>(styler.SafeGetCharAt(pos+1+trail, 0)); - if (!((bytes[trail] >= 0x80) && (bytes[trail] < 0xc0))) { - bytes[trail] = 0; - break; - } - } - chNext = UnicodeCodePoint(bytes); - } - } else if (styler.Encoding() == encDBCS) { - if (styler.IsLeadByte(static_cast<char>(chNext))) { - chNext = chNext << 8; - chNext |= static_cast<unsigned char>(styler.SafeGetCharAt(pos+2, 0)); - } + void GetNextChar() { + if (multiByteAccess) { + chNext = multiByteAccess->GetCharacterAndWidth(currentPos+width, &widthNext); + } else { + chNext = static_cast<unsigned char>(styler.SafeGetCharAt(currentPos+width, 0)); + widthNext = 1; } - // End of line? - // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) - // or on LF alone (Unix). Avoid triggering two times on Dos/Win. + // End of line determined from line end position, allowing CR, LF, + // CRLF and Unicode line ends as set by document. if (currentLine < lineDocEnd) - atLineEnd = static_cast<int>(pos) >= (lineStartNext-1); + atLineEnd = static_cast<int>(currentPos) >= (lineStartNext-1); else // Last line - atLineEnd = static_cast<int>(pos) >= lineStartNext; + atLineEnd = static_cast<int>(currentPos) >= lineStartNext; }
public: @@ -92,12 +61,18 @@ class StyleContext { int state; int chPrev; int ch; + int width; int chNext; + int widthNext;
StyleContext(unsigned int startPos, unsigned int length, int initStyle, LexAccessor &styler_, char chMask=31) : styler(styler_), + multiByteAccess(0), endPos(startPos + length), + posRelative(0), + currentPosLastRelative(0x7FFFFFFF), + offsetRelative(0), currentPos(startPos), currentLine(-1), lineStartNext(-1), @@ -105,7 +80,12 @@ class StyleContext { state(initStyle & chMask), // Mask off all bits which aren't in the chMask. chPrev(0), ch(0), - chNext(0) { + width(0), + chNext(0), + widthNext(1) { + if (styler.Encoding() != enc8bit) { + multiByteAccess = styler.MultiByteAccess(); + } styler.StartAt(startPos, chMask); styler.StartSegment(startPos); currentLine = styler.GetLine(startPos); @@ -115,21 +95,14 @@ class StyleContext { endPos++; lineDocEnd = styler.GetLine(lengthDocument); atLineStart = static_cast<unsigned int>(styler.LineStart(currentLine)) == startPos; - unsigned int pos = currentPos; - ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos, 0)); - if (styler.Encoding() == encUnicode) { - // Get the current char - GetNextChar(pos-1); - ch = chNext; - pos += BytesInUnicodeCodePoint(ch) - 1; - } else if (styler.Encoding() == encDBCS) { - if (styler.IsLeadByte(static_cast<char>(ch))) { - pos++; - ch = ch << 8; - ch |= static_cast<unsigned char>(styler.SafeGetCharAt(pos, 0)); - } - } - GetNextChar(pos); + + // Variable width is now 0 so GetNextChar gets the char at currentPos into chNext/widthNext + width = 0; + GetNextChar(); + ch = chNext; + width = widthNext; + + GetNextChar(); } void Complete() { styler.ColourTo(currentPos - ((currentPos > lengthDocument) ? 2 : 1), state); @@ -146,23 +119,10 @@ class StyleContext { lineStartNext = styler.LineStart(currentLine+1); } chPrev = ch; - if (styler.Encoding() == encUnicode) { - currentPos += BytesInUnicodeCodePoint(ch); - } else if (styler.Encoding() == encDBCS) { - currentPos++; - if (ch >= 0x100) - currentPos++; - } else { - currentPos++; - } + currentPos += width; ch = chNext; - if (styler.Encoding() == encUnicode) { - GetNextChar(currentPos + BytesInUnicodeCodePoint(ch)-1); - } else if (styler.Encoding() == encDBCS) { - GetNextChar(currentPos + ((ch >= 0x100) ? 1 : 0)); - } else { - GetNextChar(currentPos); - } + width = widthNext; + GetNextChar(); } else { atLineStart = false; chPrev = ' '; @@ -176,6 +136,12 @@ class StyleContext { Forward(); } } + void ForwardBytes(int nb) { + size_t forwardPos = currentPos + nb; + while (forwardPos > currentPos) { + Forward(); + } + } void ChangeState(int state_) { state = state_; } @@ -194,6 +160,28 @@ class StyleContext { int GetRelative(int n) { return static_cast<unsigned char>(styler.SafeGetCharAt(currentPos+n, 0)); } + int GetRelativeCharacter(int n) { + if (n == 0) + return ch; + if (multiByteAccess) { + if ((currentPosLastRelative != currentPos) || + ((n > 0) && ((offsetRelative < 0) || (n < offsetRelative))) || + ((n < 0) && ((offsetRelative > 0) || (n > offsetRelative)))) { + posRelative = currentPos; + offsetRelative = 0; + } + int diffRelative = n - offsetRelative; + int posNew = multiByteAccess->GetRelativePosition(posRelative, diffRelative); + int ch = multiByteAccess->GetCharacterAndWidth(posNew, 0); + posRelative = posNew; + currentPosLastRelative = currentPos; + offsetRelative = n; + return ch; + } else { + // fast version for single byte encodings + return static_cast<unsigned char>(styler.SafeGetCharAt(currentPos + n, 0)); + } + } bool Match(char ch0) const { return ch == static_cast<unsigned char>(ch0); }
Modified: scintilla/makefile.win32 3 files changed, 3 insertions(+), 0 deletions(-) =================================================================== @@ -108,6 +108,9 @@ SRCOBJS=\ PlatGTK.o \ ScintillaGTK.o \ Accessor.o \ + CharacterCategory.o \ + CaseConvert.o \ + CaseFolder.o \ CharacterSet.o \ LexerBase.o \ LexerModule.o \
Modified: scintilla/scintilla_changes.patch 2 files changed, 1 insertions(+), 1 deletions(-) =================================================================== @@ -38,9 +38,9 @@ index 84d003e..37b2a3c 100644 - LINK_LEXER(lmA68k); LINK_LEXER(lmAbaqus); LINK_LEXER(lmAda); -- LINK_LEXER(lmAns1); - LINK_LEXER(lmAPDL); LINK_LEXER(lmAsm); +- LINK_LEXER(lmAsn1); - LINK_LEXER(lmASY); - LINK_LEXER(lmAU3); - LINK_LEXER(lmAVE);
Modified: scintilla/src/CaseConvert.cxx 630 files changed, 630 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,630 @@ +// Scintilla source code edit control +// Encoding: UTF-8 +/** @file CaseConvert.cxx + ** Case fold characters and convert them to upper or lower case. + ** Tables automatically regenerated by scripts/GenerateCharacterCategory.py + ** Should only be rarely regenerated for new versions of Unicode. + **/ +// Copyright 2013 by Neil Hodgson neilh@scintilla.org +// The License.txt file describes the conditions under which this software may be distributed. + +#include <cstring> + +#include <vector> +#include <algorithm> + +#include "CaseConvert.h" +#include "UniConversion.h" +#include "UnicodeFromUTF8.h" + +#ifdef SCI_NAMESPACE +using namespace Scintilla; +#endif + +namespace { + // Use an unnamed namespace to protect the declarations from name conflicts + +// Unicode code points are ordered by groups and follow patterns. +// Most characters (pitch==1) are in ranges for a particular alphabet and their +// upper case forms are a fixed distance away. +// Another pattern (pitch==2) is where each lower case letter is preceded by +// the upper case form. These are also grouped into ranges. + +int symmetricCaseConversionRanges[] = { +//lower, upper, range length, range pitch +//++Autogenerated -- start of section automatically generated +//**(*\n) +97,65,26,1, +224,192,23,1, +248,216,7,1, +257,256,24,2, +314,313,8,2, +331,330,23,2, +462,461,8,2, +479,478,9,2, +505,504,20,2, +547,546,9,2, +583,582,5,2, +945,913,17,1, +963,931,9,1, +985,984,12,2, +1072,1040,32,1, +1104,1024,16,1, +1121,1120,17,2, +1163,1162,27,2, +1218,1217,7,2, +1233,1232,44,2, +1377,1329,38,1, +7681,7680,75,2, +7841,7840,48,2, +7936,7944,8,1, +7952,7960,6,1, +7968,7976,8,1, +7984,7992,8,1, +8000,8008,6,1, +8032,8040,8,1, +8560,8544,16,1, +9424,9398,26,1, +11312,11264,47,1, +11393,11392,50,2, +11520,4256,38,1, +42561,42560,23,2, +42625,42624,12,2, +42787,42786,7,2, +42803,42802,31,2, +42879,42878,5,2, +42913,42912,5,2, +65345,65313,26,1, +66600,66560,40,1, + +//--Autogenerated -- end of section automatically generated +}; + +// Code points that are symmetric but don't fit into a range of similar characters +// are listed here. + +int symmetricCaseConversions[] = { +//lower, upper +//++Autogenerated -- start of section automatically generated +//**1 (*\n) +255,376, +307,306, +309,308, +311,310, +378,377, +380,379, +382,381, +384,579, +387,386, +389,388, +392,391, +396,395, +402,401, +405,502, +409,408, +410,573, +414,544, +417,416, +419,418, +421,420, +424,423, +429,428, +432,431, +436,435, +438,437, +441,440, +445,444, +447,503, +454,452, +457,455, +460,458, +477,398, +499,497, +501,500, +572,571, +575,11390, +576,11391, +578,577, +592,11375, +593,11373, +594,11376, +595,385, +596,390, +598,393, +599,394, +601,399, +603,400, +608,403, +611,404, +613,42893, +614,42922, +616,407, +617,406, +619,11362, +623,412, +625,11374, +626,413, +629,415, +637,11364, +640,422, +643,425, +648,430, +649,580, +650,433, +651,434, +652,581, +658,439, +881,880, +883,882, +887,886, +891,1021, +892,1022, +893,1023, +940,902, +941,904, +942,905, +943,906, +972,908, +973,910, +974,911, +983,975, +1010,1017, +1016,1015, +1019,1018, +1231,1216, +7545,42877, +7549,11363, +8017,8025, +8019,8027, +8021,8029, +8023,8031, +8048,8122, +8049,8123, +8050,8136, +8051,8137, +8052,8138, +8053,8139, +8054,8154, +8055,8155, +8056,8184, +8057,8185, +8058,8170, +8059,8171, +8060,8186, +8061,8187, +8112,8120, +8113,8121, +8144,8152, +8145,8153, +8160,8168, +8161,8169, +8165,8172, +8526,8498, +8580,8579, +11361,11360, +11365,570, +11366,574, +11368,11367, +11370,11369, +11372,11371, +11379,11378, +11382,11381, +11500,11499, +11502,11501, +11507,11506, +11559,4295, +11565,4301, +42874,42873, +42876,42875, +42892,42891, +42897,42896, +42899,42898, + +//--Autogenerated -- end of section automatically generated +}; + +// Characters that have complex case conversions are listed here. +// This includes cases where more than one character is needed for a conversion, +// folding is different to lowering, or (as appropriate) upper(lower(x)) != x or +// lower(upper(x)) != x. + +const char *complexCaseConversions = +// Original | Folded | Upper | Lower | +//++Autogenerated -- start of section automatically generated +//**2 (*\n) +"µ|μ|Μ||" +"ß|ss|SS||" +"İ|i̇||i̇|" +"ı||I||" +"ʼn|ʼn|ʼN||" +"ſ|s|S||" +"Dž|dž|DŽ|dž|" +"Lj|lj|LJ|lj|" +"Nj|nj|NJ|nj|" +"ǰ|ǰ|J̌||" +"Dz|dz|DZ|dz|" +"ͅ|ι|Ι||" +"ΐ|ΐ|Ϊ́||" +"ΰ|ΰ|Ϋ́||" +"ς|σ|Σ||" +"ϐ|β|Β||" +"ϑ|θ|Θ||" +"ϕ|φ|Φ||" +"ϖ|π|Π||" +"ϰ|κ|Κ||" +"ϱ|ρ|Ρ||" +"ϴ|θ||θ|" +"ϵ|ε|Ε||" +"և|եւ|ԵՒ||" +"ẖ|ẖ|H̱||" +"ẗ|ẗ|T̈||" +"ẘ|ẘ|W̊||" +"ẙ|ẙ|Y̊||" +"ẚ|aʾ|Aʾ||" +"ẛ|ṡ|Ṡ||" +"ẞ|ss||ß|" +"ὐ|ὐ|Υ̓||" +"ὒ|ὒ|Υ̓̀||" +"ὔ|ὔ|Υ̓́||" +"ὖ|ὖ|Υ̓͂||" +"ᾀ|ἀι|ἈΙ||" +"ᾁ|ἁι|ἉΙ||" +"ᾂ|ἂι|ἊΙ||" +"ᾃ|ἃι|ἋΙ||" +"ᾄ|ἄι|ἌΙ||" +"ᾅ|ἅι|ἍΙ||" +"ᾆ|ἆι|ἎΙ||" +"ᾇ|ἇι|ἏΙ||" +"ᾈ|ἀι|ἈΙ|ᾀ|" +"ᾉ|ἁι|ἉΙ|ᾁ|" +"ᾊ|ἂι|ἊΙ|ᾂ|" +"ᾋ|ἃι|ἋΙ|ᾃ|" +"ᾌ|ἄι|ἌΙ|ᾄ|" +"ᾍ|ἅι|ἍΙ|ᾅ|" +"ᾎ|ἆι|ἎΙ|ᾆ|" +"ᾏ|ἇι|ἏΙ|ᾇ|" +"ᾐ|ἠι|ἨΙ||" +"ᾑ|ἡι|ἩΙ||" +"ᾒ|ἢι|ἪΙ||" +"ᾓ|ἣι|ἫΙ||" +"ᾔ|ἤι|ἬΙ||" +"ᾕ|ἥι|ἭΙ||" +"ᾖ|ἦι|ἮΙ||" +"ᾗ|ἧι|ἯΙ||" +"ᾘ|ἠι|ἨΙ|ᾐ|" +"ᾙ|ἡι|ἩΙ|ᾑ|" +"ᾚ|ἢι|ἪΙ|ᾒ|" +"ᾛ|ἣι|ἫΙ|ᾓ|" +"ᾜ|ἤι|ἬΙ|ᾔ|" +"ᾝ|ἥι|ἭΙ|ᾕ|" +"ᾞ|ἦι|ἮΙ|ᾖ|" +"ᾟ|ἧι|ἯΙ|ᾗ|" +"ᾠ|ὠι|ὨΙ||" +"ᾡ|ὡι|ὩΙ||" +"ᾢ|ὢι|ὪΙ||" +"ᾣ|ὣι|ὫΙ||" +"ᾤ|ὤι|ὬΙ||" +"ᾥ|ὥι|ὭΙ||" +"ᾦ|ὦι|ὮΙ||" +"ᾧ|ὧι|ὯΙ||" +"ᾨ|ὠι|ὨΙ|ᾠ|" +"ᾩ|ὡι|ὩΙ|ᾡ|" +"ᾪ|ὢι|ὪΙ|ᾢ|" +"ᾫ|ὣι|ὫΙ|ᾣ|" +"ᾬ|ὤι|ὬΙ|ᾤ|" +"ᾭ|ὥι|ὭΙ|ᾥ|" +"ᾮ|ὦι|ὮΙ|ᾦ|" +"ᾯ|ὧι|ὯΙ|ᾧ|" +"ᾲ|ὰι|ᾺΙ||" +"ᾳ|αι|ΑΙ||" +"ᾴ|άι|ΆΙ||" +"ᾶ|ᾶ|Α͂||" +"ᾷ|ᾶι|Α͂Ι||" +"ᾼ|αι|ΑΙ|ᾳ|" +"ι|ι|Ι||" +"ῂ|ὴι|ῊΙ||" +"ῃ|ηι|ΗΙ||" +"ῄ|ήι|ΉΙ||" +"ῆ|ῆ|Η͂||" +"ῇ|ῆι|Η͂Ι||" +"ῌ|ηι|ΗΙ|ῃ|" +"ῒ|ῒ|Ϊ̀||" +"ΐ|ΐ|Ϊ́||" +"ῖ|ῖ|Ι͂||" +"ῗ|ῗ|Ϊ͂||" +"ῢ|ῢ|Ϋ̀||" +"ΰ|ΰ|Ϋ́||" +"ῤ|ῤ|Ρ̓||" +"ῦ|ῦ|Υ͂||" +"ῧ|ῧ|Ϋ͂||" +"ῲ|ὼι|ῺΙ||" +"ῳ|ωι|ΩΙ||" +"ῴ|ώι|ΏΙ||" +"ῶ|ῶ|Ω͂||" +"ῷ|ῶι|Ω͂Ι||" +"ῼ|ωι|ΩΙ|ῳ|" +"Ω|ω||ω|" +"K|k||k|" +"Å|å||å|" +"ff|ff|FF||" +"fi|fi|FI||" +"fl|fl|FL||" +"ffi|ffi|FFI||" +"ffl|ffl|FFL||" +"ſt|st|ST||" +"st|st|ST||" +"ﬓ|մն|ՄՆ||" +"ﬔ|մե|ՄԵ||" +"ﬕ|մի|ՄԻ||" +"ﬖ|վն|ՎՆ||" +"ﬗ|մխ|ՄԽ||" + +//--Autogenerated -- end of section automatically generated +; + +class CaseConverter : public ICaseConverter { + // Maximum length of a case conversion result is 6 bytes in UTF-8 + enum { maxConversionLength=6 }; + struct ConversionString { + char conversion[maxConversionLength+1]; + }; + // Conversions are initially store in a vector of structs but then decomposed into + // parallel arrays as that is about 10% faster to search. + struct CharacterConversion { + int character; + ConversionString conversion; + CharacterConversion(int character_=0, const char *conversion_="") : character(character_) { + strcpy(conversion.conversion, conversion_); + } + bool operator<(const CharacterConversion &other) const { + return character < other.character; + } + }; + typedef std::vector<CharacterConversion> CharacterToConversion; + CharacterToConversion characterToConversion; + // The parallel arrays + std::vector<int> characters; + std::vector<ConversionString> conversions; + +public: + CaseConverter() { + } + bool Initialised() const { + return characters.size() > 0; + } + void Add(int character, const char *conversion) { + characterToConversion.push_back(CharacterConversion(character, conversion)); + } + const char *Find(int character) { + const std::vector<int>::iterator it = std::lower_bound(characters.begin(), characters.end(), character); + if (it == characters.end()) + return 0; + else if (*it == character) + return conversions[it - characters.begin()].conversion; + else + return 0; + } + size_t CaseConvertString(char *converted, size_t sizeConverted, const char *mixed, size_t lenMixed) { + size_t lenConverted = 0; + size_t mixedPos = 0; + unsigned char bytes[UTF8MaxBytes + 1]; + while (mixedPos < lenMixed) { + const unsigned char leadByte = static_cast<unsigned char>(mixed[mixedPos]); + const char *caseConverted = 0; + size_t lenMixedChar = 1; + if (UTF8IsAscii(leadByte)) { + caseConverted = Find(leadByte); + } else { + bytes[0] = leadByte; + const int widthCharBytes = UTF8BytesOfLead[leadByte]; + for (int b=1; b<widthCharBytes; b++) { + bytes[b] = (mixedPos+b < lenMixed) ? mixed[mixedPos+b] : 0; + } + int classified = UTF8Classify(bytes, widthCharBytes); + if (!(classified & UTF8MaskInvalid)) { + // valid UTF-8 + lenMixedChar = classified & UTF8MaskWidth; + int character = UnicodeFromUTF8(bytes); + caseConverted = Find(character); + } + } + if (caseConverted) { + // Character has a conversion so copy that conversion in + while (*caseConverted) { + converted[lenConverted++] = *caseConverted++; + if (lenConverted >= sizeConverted) + return 0; + } + } else { + // Character has no conversion so copy the input to output + for (size_t i=0; i<lenMixedChar; i++) { + converted[lenConverted++] = mixed[mixedPos+i]; + if (lenConverted >= sizeConverted) + return 0; + } + } + mixedPos += lenMixedChar; + } + return lenConverted; + } + void FinishedAdding() { + std::sort(characterToConversion.begin(), characterToConversion.end()); + characters.reserve(characterToConversion.size()); + conversions.reserve(characterToConversion.size()); + for (CharacterToConversion::iterator it = characterToConversion.begin(); it != characterToConversion.end(); ++it) { + characters.push_back(it->character); + conversions.push_back(it->conversion); + } + // Empty the original calculated data completely + CharacterToConversion().swap(characterToConversion); + } +}; + +CaseConverter caseConvFold; +CaseConverter caseConvUp; +CaseConverter caseConvLow; + +void UTF8FromUTF32Character(int uch, char *putf) { + size_t k = 0; + if (uch < 0x80) { + putf[k++] = static_cast<char>(uch); + } else if (uch < 0x800) { + putf[k++] = static_cast<char>(0xC0 | (uch >> 6)); + putf[k++] = static_cast<char>(0x80 | (uch & 0x3f)); + } else if (uch < 0x10000) { + putf[k++] = static_cast<char>(0xE0 | (uch >> 12)); + putf[k++] = static_cast<char>(0x80 | ((uch >> 6) & 0x3f)); + putf[k++] = static_cast<char>(0x80 | (uch & 0x3f)); + } else { + putf[k++] = static_cast<char>(0xF0 | (uch >> 18)); + putf[k++] = static_cast<char>(0x80 | ((uch >> 12) & 0x3f)); + putf[k++] = static_cast<char>(0x80 | ((uch >> 6) & 0x3f)); + putf[k++] = static_cast<char>(0x80 | (uch & 0x3f)); + } + putf[k] = 0; +} + +void AddSymmetric(enum CaseConversion conversion, int lower,int upper) { + char lowerUTF8[UTF8MaxBytes+1]; + UTF8FromUTF32Character(lower, lowerUTF8); + char upperUTF8[UTF8MaxBytes+1]; + UTF8FromUTF32Character(upper, upperUTF8); + + switch (conversion) { + case CaseConversionFold: + caseConvFold.Add(upper, lowerUTF8); + break; + case CaseConversionUpper: + caseConvUp.Add(lower, upperUTF8); + break; + case CaseConversionLower: + caseConvLow.Add(upper, lowerUTF8); + break; + } +} + +void SetupConversions(enum CaseConversion conversion) { + // First initialize for the symmetric ranges + for (size_t i=0; i<sizeof(symmetricCaseConversionRanges)/sizeof(symmetricCaseConversionRanges[0]);) { + int lower = symmetricCaseConversionRanges[i++]; + int upper = symmetricCaseConversionRanges[i++]; + int length = symmetricCaseConversionRanges[i++]; + int pitch = symmetricCaseConversionRanges[i++]; + for (int j=0;j<length*pitch;j+=pitch) { + AddSymmetric(conversion, lower+j, upper+j); + } + } + // Add the symmetric singletons + for (size_t i=0; i<sizeof(symmetricCaseConversions)/sizeof(symmetricCaseConversions[0]);) { + int lower = symmetricCaseConversions[i++]; + int upper = symmetricCaseConversions[i++]; + AddSymmetric(conversion, lower, upper); + } + // Add the complex cases + const char *sComplex = complexCaseConversions; + while (*sComplex) { + // Longest ligature is 3 character so 5 for safety + const size_t lenUTF8 = 5*UTF8MaxBytes+1; + char originUTF8[lenUTF8]; + char foldedUTF8[lenUTF8]; + char lowerUTF8[lenUTF8]; + char upperUTF8[lenUTF8]; + size_t i = 0; + while (*sComplex && *sComplex != '|') { + originUTF8[i++] = *sComplex; + sComplex++; + } + sComplex++; + originUTF8[i] = 0; + i = 0; + while (*sComplex && *sComplex != '|') { + foldedUTF8[i++] = *sComplex; + sComplex++; + } + sComplex++; + foldedUTF8[i] = 0; + i = 0; + while (*sComplex && *sComplex != '|') { + upperUTF8[i++] = *sComplex; + sComplex++; + } + sComplex++; + upperUTF8[i] = 0; + i = 0; + while (*sComplex && *sComplex != '|') { + lowerUTF8[i++] = *sComplex; + sComplex++; + } + sComplex++; + lowerUTF8[i] = 0; + + int character = UnicodeFromUTF8(reinterpret_cast<unsigned char *>(originUTF8)); + + if (conversion == CaseConversionFold && foldedUTF8[0]) { + caseConvFold.Add(character, foldedUTF8); + } + + if (conversion == CaseConversionUpper && upperUTF8[0]) { + caseConvUp.Add(character, upperUTF8); + } + + if (conversion == CaseConversionLower && lowerUTF8[0]) { + caseConvLow.Add(character, lowerUTF8); + } + } + + switch (conversion) { + case CaseConversionFold: + caseConvFold.FinishedAdding(); + break; + case CaseConversionUpper: + caseConvUp.FinishedAdding(); + break; + case CaseConversionLower: + caseConvLow.FinishedAdding(); + break; + } +} + +CaseConverter *ConverterForConversion(enum CaseConversion conversion) { + switch (conversion) { + case CaseConversionFold: + return &caseConvFold; + case CaseConversionUpper: + return &caseConvUp; + case CaseConversionLower: + return &caseConvLow; + } + return 0; +} + +} + +#ifdef SCI_NAMESPACE +namespace Scintilla { +#endif + +ICaseConverter *ConverterFor(enum CaseConversion conversion) { + CaseConverter *pCaseConv = ConverterForConversion(conversion); + if (!pCaseConv->Initialised()) + SetupConversions(conversion); + return pCaseConv; +} + +const char *CaseConvert(int character, enum CaseConversion conversion) { + CaseConverter *pCaseConv = ConverterForConversion(conversion); + if (!pCaseConv->Initialised()) + SetupConversions(conversion); + return pCaseConv->Find(character); +} + +size_t CaseConvertString(char *converted, size_t sizeConverted, const char *mixed, size_t lenMixed, enum CaseConversion conversion) { + CaseConverter *pCaseConv = ConverterForConversion(conversion); + if (!pCaseConv->Initialised()) + SetupConversions(conversion); + return pCaseConv->CaseConvertString(converted, sizeConverted, mixed, lenMixed); +} + +#ifdef SCI_NAMESPACE +} +#endif
Modified: scintilla/src/CaseConvert.h 47 files changed, 47 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,47 @@ +// Scintilla source code edit control +// Encoding: UTF-8 +/** @file CaseConvert.h + ** Performs Unicode case conversions. + ** Does not handle locale-sensitive case conversion. + **/ +// Copyright 2013 by Neil Hodgson neilh@scintilla.org +// The License.txt file describes the conditions under which this software may be distributed. + +#ifndef CASECONVERT_H +#define CASECONVERT_H + +#ifdef SCI_NAMESPACE +namespace Scintilla { +#endif + +enum CaseConversion { + CaseConversionFold, + CaseConversionUpper, + CaseConversionLower +}; + +class ICaseConverter { +public: + virtual size_t CaseConvertString(char *converted, size_t sizeConverted, const char *mixed, size_t lenMixed) = 0; +}; + +ICaseConverter *ConverterFor(enum CaseConversion conversion); + +// Returns a UTF-8 string. Empty when no conversion +const char *CaseConvert(int character, enum CaseConversion conversion); + +// When performing CaseConvertString, the converted value may be up to 3 times longer than the input. +// Ligatures are often decomposed into multiple characters and long cases include: +// ΐ "\xce\x90" folds to ΐ "\xce\xb9\xcc\x88\xcc\x81" +const int maxExpansionCaseConversion=3; + +// Converts a mixed case string using a particular conversion. +// Result may be a different length to input and the length is the return value. +// If there is not enough space then 0 is returned. +size_t CaseConvertString(char *converted, size_t sizeConverted, const char *mixed, size_t lenMixed, enum CaseConversion conversion); + +#ifdef SCI_NAMESPACE +} +#endif + +#endif
Modified: scintilla/src/CaseFolder.cxx 68 files changed, 68 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,68 @@ +// Scintilla source code edit control +/** @file CaseFolder.cxx + ** Classes for case folding. + **/ +// Copyright 1998-2013 by Neil Hodgson neilh@scintilla.org +// The License.txt file describes the conditions under which this software may be distributed. + +#include <vector> +#include <algorithm> + +#include "CaseConvert.h" +#include "UniConversion.h" +#include "CaseFolder.h" + +#ifdef SCI_NAMESPACE +using namespace Scintilla; +#endif + +CaseFolder::~CaseFolder() { +} + +CaseFolderTable::CaseFolderTable() { + for (size_t iChar=0; iChar<sizeof(mapping); iChar++) { + mapping[iChar] = static_cast<char>(iChar); + } +} + +CaseFolderTable::~CaseFolderTable() { +} + +size_t CaseFolderTable::Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed) { + if (lenMixed > sizeFolded) { + return 0; + } else { + for (size_t i=0; i<lenMixed; i++) { + folded[i] = mapping[static_cast<unsigned char>(mixed[i])]; + } + return lenMixed; + } +} + +void CaseFolderTable::SetTranslation(char ch, char chTranslation) { + mapping[static_cast<unsigned char>(ch)] = chTranslation; +} + +void CaseFolderTable::StandardASCII() { + for (size_t iChar=0; iChar<sizeof(mapping); iChar++) { + if (iChar >= 'A' && iChar <= 'Z') { + mapping[iChar] = static_cast<char>(iChar - 'A' + 'a'); + } else { + mapping[iChar] = static_cast<char>(iChar); + } + } +} + +CaseFolderUnicode::CaseFolderUnicode() { + StandardASCII(); + converter = ConverterFor(CaseConversionFold); +} + +size_t CaseFolderUnicode::Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed) { + if ((lenMixed == 1) && (sizeFolded > 0)) { + folded[0] = mapping[static_cast<unsigned char>(mixed[0])]; + return 1; + } else { + return converter->CaseConvertString(folded, sizeFolded, mixed, lenMixed); + } +}
Modified: scintilla/src/CaseFolder.h 45 files changed, 45 insertions(+), 0 deletions(-) =================================================================== @@ -0,0 +1,45 @@ +// Scintilla source code edit control +/** @file CaseFolder.h + ** Classes for case folding. + **/ +// Copyright 1998-2013 by Neil Hodgson neilh@scintilla.org +// The License.txt file describes the conditions under which this software may be distributed. + +#ifndef CASEFOLDER_H +#define CASEFOLDER_H + +#ifdef SCI_NAMESPACE +namespace Scintilla { +#endif + +class CaseFolder { +public: + virtual ~CaseFolder(); + virtual size_t Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed) = 0; +}; + +class CaseFolderTable : public CaseFolder { +protected: + char mapping[256]; +public: + CaseFolderTable(); + virtual ~CaseFolderTable(); + virtual size_t Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed); + void SetTranslation(char ch, char chTranslation); + void StandardASCII(); +}; + +class ICaseConverter; + +class CaseFolderUnicode : public CaseFolderTable { + ICaseConverter *converter; +public: + CaseFolderUnicode(); + virtual size_t Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed); +}; + +#ifdef SCI_NAMESPACE +} +#endif + +#endif
Modified: scintilla/src/CellBuffer.cxx 14 files changed, 8 insertions(+), 6 deletions(-) =================================================================== @@ -168,7 +168,7 @@ void UndoHistory::EnsureUndoRoom() { } }
-void UndoHistory::AppendAction(actionType at, int position, const char *data, int lengthData, +const char *UndoHistory::AppendAction(actionType at, int position, const char *data, int lengthData, bool &startSequence, bool mayCoalesce) { EnsureUndoRoom(); //Platform::DebugPrintf("%% %d action %d %d %d\n", at, position, lengthData, currentAction); @@ -232,10 +232,12 @@ void UndoHistory::AppendAction(actionType at, int position, const char *data, in currentAction++; } startSequence = oldCurrentAction != currentAction; + int actionWithData = currentAction; actions[currentAction].Create(at, position, data, lengthData, mayCoalesce); currentAction++; actions[currentAction].Create(startAction); maxAction = currentAction; + return actions[actionWithData].data; }
void UndoHistory::BeginUndoAction() { @@ -393,13 +395,13 @@ int CellBuffer::GapPosition() const {
// The char* returned is to an allocation owned by the undo history const char *CellBuffer::InsertString(int position, const char *s, int insertLength, bool &startSequence) { - char *data = 0; // InsertString and DeleteChars are the bottleneck though which all changes occur + const char *data = s; if (!readOnly) { if (collectingUndo) { // Save into the undo/redo stack, but only the characters - not the formatting // This takes up about half load time - uh.AppendAction(insertAction, position, s, insertLength, startSequence); + data = uh.AppendAction(insertAction, position, s, insertLength, startSequence); }
BasicInsertString(position, s, insertLength); @@ -437,13 +439,13 @@ bool CellBuffer::SetStyleFor(int position, int lengthStyle, char styleValue, cha const char *CellBuffer::DeleteChars(int position, int deleteLength, bool &startSequence) { // InsertString and DeleteChars are the bottleneck though which all changes occur PLATFORM_ASSERT(deleteLength > 0); - char *data = 0; + const char *data = 0; if (!readOnly) { if (collectingUndo) { // Save into the undo/redo stack, but only the characters - not the formatting // The gap would be moved to position anyway for the deletion so this doesn't cost extra - const char *data = substance.RangePointer(position, deleteLength); - uh.AppendAction(removeAction, position, data, deleteLength, startSequence); + data = substance.RangePointer(position, deleteLength); + data = uh.AppendAction(removeAction, position, data, deleteLength, startSequence); }
BasicDeleteChars(position, deleteLength);
Modified: scintilla/src/CellBuffer.h 2 files changed, 1 insertions(+), 1 deletions(-) =================================================================== @@ -105,7 +105,7 @@ class UndoHistory { UndoHistory(); ~UndoHistory();
- void AppendAction(actionType at, int position, const char *data, int length, bool &startSequence, bool mayCoalesce=true); + const char *AppendAction(actionType at, int position, const char *data, int length, bool &startSequence, bool mayCoalesce=true);
void BeginUndoAction(); void EndUndoAction();
Modified: scintilla/src/Document.cxx 115 files changed, 74 insertions(+), 41 deletions(-) =================================================================== @@ -28,6 +28,7 @@ #include "CharClassify.h" #include "CharacterSet.h" #include "Decoration.h" +#include "CaseFolder.h" #include "Document.h" #include "RESearch.h" #include "UniConversion.h" @@ -699,6 +700,79 @@ bool Document::NextCharacter(int &pos, int moveDir) const { } }
+static inline int UnicodeFromBytes(const unsigned char *us) { + if (us[0] < 0xC2) { + return us[0]; + } else if (us[0] < 0xE0) { + return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F); + } else if (us[0] < 0xF0) { + return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F); + } else if (us[0] < 0xF5) { + return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F); + } + return us[0]; +} + +// Return -1 on out-of-bounds +int SCI_METHOD Document::GetRelativePosition(int positionStart, int characterOffset) const { + int pos = positionStart; + if (dbcsCodePage) { + const int increment = (characterOffset > 0) ? 1 : -1; + while (characterOffset != 0) { + const int posNext = NextPosition(pos, increment); + if (posNext == pos) + return INVALID_POSITION; + pos = posNext; + characterOffset -= increment; + } + } else { + pos = positionStart + characterOffset; + if ((pos < 0) || (pos > Length())) + return INVALID_POSITION; + } + return pos; +} + +int SCI_METHOD Document::GetCharacterAndWidth(int position, int *pWidth) const { + int character; + int bytesInCharacter = 1; + if (dbcsCodePage) { + const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position)); + if (SC_CP_UTF8 == dbcsCodePage) { + if (UTF8IsAscii(leadByte)) { + // Single byte character or invalid + character = leadByte; + } else { + const int widthCharBytes = UTF8BytesOfLead[leadByte]; + unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; + for (int b=1; b<widthCharBytes; b++) + charBytes[b] = static_cast<unsigned char>(cb.CharAt(position+b)); + int utf8status = UTF8Classify(charBytes, widthCharBytes); + if (utf8status & UTF8MaskInvalid) { + // Report as singleton surrogate values which are invalid Unicode + character = 0xDC80 + leadByte; + } else { + bytesInCharacter = utf8status & UTF8MaskWidth; + character = UnicodeFromBytes(charBytes); + } + } + } else { + if (IsDBCSLeadByte(leadByte)) { + bytesInCharacter = 2; + character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(position+1)); + } else { + character = leadByte; + } + } + } else { + character = cb.CharAt(position); + } + if (pWidth) { + *pWidth = bytesInCharacter; + } + return character; +} + int SCI_METHOD Document::CodePage() const { return dbcsCodePage; } @@ -1423,47 +1497,6 @@ bool Document::IsWordAt(int start, int end) const { return IsWordStartAt(start) && IsWordEndAt(end); }
-static inline char MakeLowerCase(char ch) { - if (ch < 'A' || ch > 'Z') - return ch; - else - return static_cast<char>(ch - 'A' + 'a'); -} - -CaseFolderTable::CaseFolderTable() { - for (size_t iChar=0; iChar<sizeof(mapping); iChar++) { - mapping[iChar] = static_cast<char>(iChar); - } -} - -CaseFolderTable::~CaseFolderTable() { -} - -size_t CaseFolderTable::Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed) { - if (lenMixed > sizeFolded) { - return 0; - } else { - for (size_t i=0; i<lenMixed; i++) { - folded[i] = mapping[static_cast<unsigned char>(mixed[i])]; - } - return lenMixed; - } -} - -void CaseFolderTable::SetTranslation(char ch, char chTranslation) { - mapping[static_cast<unsigned char>(ch)] = chTranslation; -} - -void CaseFolderTable::StandardASCII() { - for (size_t iChar=0; iChar<sizeof(mapping); iChar++) { - if (iChar >= 'A' && iChar <= 'Z') { - mapping[iChar] = static_cast<char>(iChar - 'A' + 'a'); - } else { - mapping[iChar] = static_cast<char>(iChar); - } - } -} - bool Document::MatchesWordOptions(bool word, bool wordStart, int pos, int length) const { return (!word && !wordStart) || (word && IsWordAt(pos, pos + length)) ||
Modified: scintilla/src/Document.h 20 files changed, 2 insertions(+), 18 deletions(-) =================================================================== @@ -155,24 +155,6 @@ class HighlightDelimiter { bool isEnabled; };
-class CaseFolder { -public: - virtual ~CaseFolder() { - } - virtual size_t Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed) = 0; -}; - -class CaseFolderTable : public CaseFolder { -protected: - char mapping[256]; -public: - CaseFolderTable(); - virtual ~CaseFolderTable(); - virtual size_t Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed); - void SetTranslation(char ch, char chTranslation); - void StandardASCII(); -}; - class Document;
class LexInterface { @@ -279,6 +261,8 @@ class Document : PerLine, public IDocumentWithLineEnd, public ILoader { int MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd=true); int NextPosition(int pos, int moveDir) const; bool NextCharacter(int &pos, int moveDir) const; // Returns true if pos changed + int SCI_METHOD GetRelativePosition(int positionStart, int characterOffset) const; + int SCI_METHOD GetCharacterAndWidth(int position, int *pWidth) const; int SCI_METHOD CodePage() const; bool SCI_METHOD IsDBCSLeadByte(char ch) const; int SafeSegment(const char *text, int length, int lengthSegment) const;
Modified: scintilla/src/Editor.cxx 222 files changed, 104 insertions(+), 118 deletions(-) =================================================================== @@ -36,6 +36,7 @@ #include "ViewStyle.h" #include "CharClassify.h" #include "Decoration.h" +#include "CaseFolder.h" #include "Document.h" #include "UniConversion.h" #include "Selection.h" @@ -116,6 +117,7 @@ static inline bool IsAllSpacesOrTabs(char *s, unsigned int len) { hasFocus = false; hideSelection = false; inOverstrike = false; + drawOverstrikeCaret = true; errorStatus = 0; mouseDownCaptures = true;
@@ -212,8 +214,6 @@ static inline bool IsAllSpacesOrTabs(char *s, unsigned int len) {
wrapState = eWrapNone; wrapWidth = LineLayout::wrapWidthInfinite; - wrapStart = wrapLineLarge; - wrapEnd = wrapLineLarge; wrapVisualFlags = 0; wrapVisualFlagsLocation = 0; wrapVisualStartIndent = 0; @@ -961,8 +961,8 @@ int Editor::MovePositionTo(SelectionPosition newPos, Selection::selTypes selt, b int currentLine = pdoc->LineFromPosition(newPos.Position()); if (ensureVisible) { // In case in need of wrapping to ensure DisplayFromDoc works. - if (currentLine >= wrapStart) - WrapLines(true, -1); + if (currentLine >= wrapPending.start) + WrapLines(wsAll); XYScrollPosition newXY = XYScrollToMakeVisible( SelectionRange(posDrag.IsValid() ? posDrag : sel.RangeMain().caret), xysDefault); if (simpleCaret && (newXY.xOffset == xOffset)) { @@ -1541,17 +1541,12 @@ void Editor::UpdateSystemCaret() { }
void Editor::NeedWrapping(int docLineStart, int docLineEnd) { - docLineStart = Platform::Clamp(docLineStart, 0, pdoc->LinesTotal()); - if (wrapStart > docLineStart) { - wrapStart = docLineStart; +//Platform::DebugPrintf("\nNeedWrapping: %0d..%0d\n", docLineStart, docLineEnd); + if (wrapPending.AddRange(docLineStart, docLineEnd)) { llc.Invalidate(LineLayout::llPositions); } - if (wrapEnd < docLineEnd) { - wrapEnd = docLineEnd; - } - wrapEnd = Platform::Clamp(wrapEnd, 0, pdoc->LinesTotal()); // Wrap lines during idle. - if ((wrapState != eWrapNone) && (wrapEnd != wrapStart)) { + if ((wrapState != eWrapNone) && wrapPending.NeedsWrap()) { SetIdle(true); } } @@ -1567,117 +1562,97 @@ bool Editor::WrapOneLine(Surface *surface, int lineToWrap) { (vs.annotationVisible ? pdoc->AnnotationLines(lineToWrap) : 0)); }
-// Check if wrapping needed and perform any needed wrapping. -// fullwrap: if true, all lines which need wrapping will be done, -// in this single call. -// priorityWrapLineStart: If greater than or equal to zero, all lines starting from -// here to 1 page + 100 lines past will be wrapped (even if there are -// more lines under wrapping process in idle). -// If it is neither fullwrap, nor priorityWrap, then 1 page + 100 lines will be -// wrapped, if there are any wrapping going on in idle. (Generally this -// condition is called only from idler). +// Perform wrapping for a subset of the lines needing wrapping. +// wsAll: wrap all lines which need wrapping in this single call +// wsVisible: wrap currently visible lines +// wsIdle: wrap one page + 100 lines // Return true if wrapping occurred. -bool Editor::WrapLines(bool fullWrap, int priorityWrapLineStart) { - // If there are any pending wraps, do them during idle if possible. - int linesInOneCall = LinesOnScreen() + 100; - if (priorityWrapLineStart >= 0) { - // Using DocFromDisplay() here may result in chicken and egg problem in certain corner cases, - // which will hopefully be handled by added 100 lines. If some lines are still missed, idle wrapp@@ Diff output truncated at 100000 characters. @@
-------------- This E-Mail was brought to you by github_commit_mail.py (Source: https://github.com/geany/infrastructure).