diff --git a/lib/Parse/Lexer.cpp b/lib/Parse/Lexer.cpp index fef46453c959e..06a949bda90b5 100644 --- a/lib/Parse/Lexer.cpp +++ b/lib/Parse/Lexer.cpp @@ -1241,19 +1241,6 @@ static bool diagnoseZeroWidthMatchAndAdvance(char Target, const char *&CurPtr, return *CurPtr == Target && CurPtr++; } -/// advanceIfMultilineDelimiter - Centralized check for multiline delimiter. -static bool advanceIfMultilineDelimiter(const char *&CurPtr, - DiagnosticEngine *Diags) { - const char *TmpPtr = CurPtr; - if (*(TmpPtr - 1) == '"' && - diagnoseZeroWidthMatchAndAdvance('"', TmpPtr, Diags) && - diagnoseZeroWidthMatchAndAdvance('"', TmpPtr, Diags)) { - CurPtr = TmpPtr; - return true; - } - return false; -} - /// advanceIfCustomDelimiter - Extracts/detects any custom delimiter on /// opening a string literal, advances CurPtr if a delimiter is found and /// returns a non-zero delimiter length. CurPtr[-1] must be '#' when called. @@ -1300,6 +1287,37 @@ static bool delimiterMatches(unsigned CustomDelimiterLen, const char *&BytesPtr, return true; } +/// advanceIfMultilineDelimiter - Centralized check for multiline delimiter. +static bool advanceIfMultilineDelimiter(unsigned CustomDelimiterLen, + const char *&CurPtr, + DiagnosticEngine *Diags, + bool IsOpening = false) { + + // Test for single-line string literals that resemble multiline delimiter. + const char *TmpPtr = CurPtr + 1; + if (IsOpening && CustomDelimiterLen) { + while (*TmpPtr != '\r' && *TmpPtr != '\n') { + if (*TmpPtr == '"') { + if (delimiterMatches(CustomDelimiterLen, ++TmpPtr, nullptr)) { + return false; + } + continue; + } + ++TmpPtr; + } + } + + TmpPtr = CurPtr; + if (*(TmpPtr - 1) == '"' && + diagnoseZeroWidthMatchAndAdvance('"', TmpPtr, Diags) && + diagnoseZeroWidthMatchAndAdvance('"', TmpPtr, Diags)) { + CurPtr = TmpPtr; + return true; + } + + return false; +} + /// lexCharacter - Read a character and return its UTF32 code. If this is the /// end of enclosing string/character sequence (i.e. the character is equal to /// 'StopQuote'), this returns ~0U and advances 'CurPtr' pointing to the end of @@ -1342,7 +1360,8 @@ unsigned Lexer::lexCharacter(const char *&CurPtr, char StopQuote, DiagnosticEngine *D = EmitDiagnostics ? Diags : nullptr; auto TmpPtr = CurPtr; - if (IsMultilineString && !advanceIfMultilineDelimiter(TmpPtr, D)) + if (IsMultilineString && + !advanceIfMultilineDelimiter(CustomDelimiterLen, TmpPtr, D)) return '"'; if (CustomDelimiterLen && !delimiterMatches(CustomDelimiterLen, TmpPtr, D, /*IsClosing=*/true)) @@ -1478,7 +1497,9 @@ static const char *skipToEndOfInterpolatedExpression(const char *CurPtr, if (!inStringLiteral()) { // Open string literal. OpenDelimiters.push_back(CurPtr[-1]); - AllowNewline.push_back(advanceIfMultilineDelimiter(CurPtr, nullptr)); + AllowNewline.push_back(advanceIfMultilineDelimiter(CustomDelimiterLen, + CurPtr, nullptr, + true)); CustomDelimiter.push_back(CustomDelimiterLen); continue; } @@ -1490,7 +1511,8 @@ static const char *skipToEndOfInterpolatedExpression(const char *CurPtr, continue; // Multi-line string can only be closed by '"""'. - if (AllowNewline.back() && !advanceIfMultilineDelimiter(CurPtr, nullptr)) + if (AllowNewline.back() && + !advanceIfMultilineDelimiter(CustomDelimiterLen, CurPtr, nullptr)) continue; // Check whether we have equivalent number of '#'s. @@ -1827,7 +1849,8 @@ void Lexer::lexStringLiteral(unsigned CustomDelimiterLen) { // diagnostics about changing them to double quotes. assert((QuoteChar == '"' || QuoteChar == '\'') && "Unexpected start"); - bool IsMultilineString = advanceIfMultilineDelimiter(CurPtr, Diags); + bool IsMultilineString = advanceIfMultilineDelimiter(CustomDelimiterLen, + CurPtr, Diags, true); if (IsMultilineString && *CurPtr != '\n' && *CurPtr != '\r') diagnose(CurPtr, diag::lex_illegal_multiline_string_start) .fixItInsert(Lexer::getSourceLoc(CurPtr), "\n"); diff --git a/test/Parse/raw_string.swift b/test/Parse/raw_string.swift index d07bbd24fa54f..cbf721ec9ee31 100644 --- a/test/Parse/raw_string.swift +++ b/test/Parse/raw_string.swift @@ -68,6 +68,49 @@ _ = ##""" """## // CHECK: "a raw string with \"\"\" in it" +// ===---------- False Multiline Delimiters --------=== + +/// Source code contains zero-width character in this format: `#"[U+200B]"[U+200B]"#` +/// The check contains zero-width character in this format: `"[U+200B]\"[U+200B]"` +/// If this check fails after you implement `diagnoseZeroWidthMatchAndAdvance`, +/// then you may need to tweak how to test for single-line string literals that +/// resemble a multiline delimiter in `advanceIfMultilineDelimiter` so that it +/// passes again. +/// See https://bugs.swift.org/browse/SR-8678 +_ = #"​"​"# +// CHECK: "​\"​" + +_ = #""""# +// CHECK: "\"\"" + +_ = #"""""# +// CHECK: "\"\"\"" + +_ = #""""""# +// CHECK: "\"\"\"\"" + +_ = #"""# +// CHECK: "\"" + +_ = ##""" foo # "# "## +// CHECK: "\"\" foo # \"# " + +_ = ###""" "# "## "### +// CHECK: "\"\" \"# \"## " + +_ = ###"""##"### +// CHECK: "\"\"##" + +_ = "interpolating \(#"""false delimiter"#)" +// CHECK: "interpolating " +// CHECK: "\"\"false delimiter" + +_ = """ + interpolating \(#"""false delimiters"""#) + """ +// CHECK: "interpolating " +// CHECK: "\"\"false delimiters\"\"" + let foo = "Interpolation" _ = #"\b\b \#(foo)\#(foo) Kappa"# // CHECK: "\\b\\b " diff --git a/test/Parse/raw_string_errors.swift b/test/Parse/raw_string_errors.swift index 0ba693fc79f30..07e5a72cb8410 100644 --- a/test/Parse/raw_string_errors.swift +++ b/test/Parse/raw_string_errors.swift @@ -9,6 +9,11 @@ let _ = #"\##("invalid")"# // expected-error@-1{{too many '#' characters in delimited escape}} // expected-error@-2{{invalid escape sequence in literal}} +let _ = ###"""invalid"###### +// expected-error@-1{{too many '#' characters in closing delimiter}}{{26-29=}} +// expected-error@-2{{consecutive statements on a line must be separated by ';'}} +// expected-error@-3{{expected expression}} + let _ = ####"invalid"### // expected-error@-1{{unterminated string literal}} @@ -17,8 +22,16 @@ let _ = ###"invalid"###### // expected-error@-2{{consecutive statements on a line must be separated by ';'}} // expected-error@-3{{expected expression}} -let _ = ##"""## +let _ = ##"""aa foobar - ##"""## + aa"""## // expected-error@-3{{multi-line string literal content must begin on a new line}}{{14-14=\n}} // expected-error@-2{{multi-line string literal closing delimiter must begin on a new line}}{{5-5=\n}} + +let _ = #""" foo "bar" #baz + """# +// expected-error@-2{{multi-line string literal content must begin on a new line}}{{13-13=\n}} + +let _ = ###""" "# "## + """### +// expected-error@-2{{multi-line string literal content must begin on a new line}}{{15-15=\n}}