From 52068f0286e9575c7c5609923c61732504919508 Mon Sep 17 00:00:00 2001 From: Michael Verges Date: Tue, 5 Mar 2019 18:52:51 -0500 Subject: [PATCH 01/10] [Lexer] SR-10011: Allow the multiline delimiter to be escaped in raw strings. The multiline delimiter for Strings (aka. """ or triple quote) can now be escaped when the correct number of terminating '#' marks are found within the same line. Before, #"""# was invalid because the multiline delimiter only allowed a newline character following the triple quote. Now, #"""# is a valid String equivalent to "\"". [See SR-10011](https://bugs.swift.org/browse/SR-10011) --- lib/Parse/Lexer.cpp | 16 ++++++++++++++-- unittests/Parse/LexerTests.cpp | 26 ++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/lib/Parse/Lexer.cpp b/lib/Parse/Lexer.cpp index 1caa40abc68b4..070ed09c5a96c 100644 --- a/lib/Parse/Lexer.cpp +++ b/lib/Parse/Lexer.cpp @@ -1843,9 +1843,21 @@ void Lexer::lexStringLiteral(unsigned CustomDelimiterLen) { assert((QuoteChar == '"' || QuoteChar == '\'') && "Unexpected start"); bool IsMultilineString = advanceIfMultilineDelimiter(CurPtr, Diags); - if (IsMultilineString && *CurPtr != '\n' && *CurPtr != '\r') - diagnose(CurPtr, diag::lex_illegal_multiline_string_start) + // Test for single-line Strings that may resemble multiline delimiter + if (IsMultilineString && *CurPtr != '\n' && *CurPtr != '\r') { + const char *TmpPtr = CurPtr + 1; + TmpPtr = skipToEndOfInterpolatedExpression(TmpPtr, BufferEnd, false); + TmpPtr = TmpPtr - CustomDelimiterLen; + if (CustomDelimiterLen != 0 + && delimiterMatches(CustomDelimiterLen, TmpPtr, Diags)) { + // Undo effects from falsely detecting multiline delimiter + CurPtr = CurPtr - 2; + IsMultilineString = false; + } else { + diagnose(CurPtr, diag::lex_illegal_multiline_string_start) .fixItInsert(Lexer::getSourceLoc(CurPtr), "\n"); + } + } bool wasErroneous = false; while (true) { diff --git a/unittests/Parse/LexerTests.cpp b/unittests/Parse/LexerTests.cpp index a1428e452b5e8..e4c3b6f4760be 100644 --- a/unittests/Parse/LexerTests.cpp +++ b/unittests/Parse/LexerTests.cpp @@ -124,6 +124,32 @@ TEST_F(LexerTest, StringLiteralWithNUL1) { EXPECT_EQ(Toks[1].getLength(), 0U); } +TEST_F(LexerTest, StringLiteralFalseMultilineDelimiter) { + const char *Source = + "#\"\"\"meow\"#\n" + "#\"\"\"#" + ; + std::vector ExpectedTokens{ + tok::string_literal, tok::string_literal + }; + std::vector Toks = checkLex(Source, ExpectedTokens); + EXPECT_EQ(Toks[0].getLength(), 10U); + EXPECT_EQ(Toks[1].getLength(), 5U); +} + +TEST_F(LexerTest, StringLiteralInvalidMultilineDelimiter) { + const char *Source = + "#\"\"\"meow#\n" + "#\"\"\"meow" + ; + std::vector ExpectedTokens{ + tok::unknown, tok::unknown + }; + std::vector Toks = checkLex(Source, ExpectedTokens); + EXPECT_EQ(Toks[0].getLength(), 9U); + EXPECT_EQ(Toks[1].getLength(), 8U); +} + TEST_F(LexerTest, ContentStartHashbangSkip) { const char *Source = "#!/usr/bin/swift\naaa"; From 6ba19f5577a4c4e5b0f7b82b83bdbaa034defe16 Mon Sep 17 00:00:00 2001 From: Michael Verges Date: Fri, 8 Mar 2019 20:07:47 -0500 Subject: [PATCH 02/10] Remove tests from incorrect location. Adapt tests for escaping multiline strings --- test/Parse/raw_string.swift | 3 +++ test/Parse/raw_string_errors.swift | 4 ++-- unittests/Parse/LexerTests.cpp | 26 -------------------------- 3 files changed, 5 insertions(+), 28 deletions(-) diff --git a/test/Parse/raw_string.swift b/test/Parse/raw_string.swift index d07bbd24fa54f..553df3c07bc91 100644 --- a/test/Parse/raw_string.swift +++ b/test/Parse/raw_string.swift @@ -68,6 +68,9 @@ _ = ##""" """## // CHECK: "a raw string with \"\"\" in it" +_ = #"""# +// CHECK: "a raw string with \" in it" + let foo = "Interpolation" _ = #"\b\b \#(foo)\#(foo) Kappa"# // CHECK: "\\b\\b " diff --git a/test/Parse/raw_string_errors.swift b/test/Parse/raw_string_errors.swift index 0ba693fc79f30..f3db7518513e0 100644 --- a/test/Parse/raw_string_errors.swift +++ b/test/Parse/raw_string_errors.swift @@ -17,8 +17,8 @@ let _ = ###"invalid"###### // expected-error@-2{{consecutive statements on a line must be separated by ';'}} // expected-error@-3{{expected expression}} -let _ = ##"""## +let _ = ##"""a foobar - ##"""## + a"""## // expected-error@-3{{multi-line string literal content must begin on a new line}}{{14-14=\n}} // expected-error@-2{{multi-line string literal closing delimiter must begin on a new line}}{{5-5=\n}} diff --git a/unittests/Parse/LexerTests.cpp b/unittests/Parse/LexerTests.cpp index e4c3b6f4760be..a1428e452b5e8 100644 --- a/unittests/Parse/LexerTests.cpp +++ b/unittests/Parse/LexerTests.cpp @@ -124,32 +124,6 @@ TEST_F(LexerTest, StringLiteralWithNUL1) { EXPECT_EQ(Toks[1].getLength(), 0U); } -TEST_F(LexerTest, StringLiteralFalseMultilineDelimiter) { - const char *Source = - "#\"\"\"meow\"#\n" - "#\"\"\"#" - ; - std::vector ExpectedTokens{ - tok::string_literal, tok::string_literal - }; - std::vector Toks = checkLex(Source, ExpectedTokens); - EXPECT_EQ(Toks[0].getLength(), 10U); - EXPECT_EQ(Toks[1].getLength(), 5U); -} - -TEST_F(LexerTest, StringLiteralInvalidMultilineDelimiter) { - const char *Source = - "#\"\"\"meow#\n" - "#\"\"\"meow" - ; - std::vector ExpectedTokens{ - tok::unknown, tok::unknown - }; - std::vector Toks = checkLex(Source, ExpectedTokens); - EXPECT_EQ(Toks[0].getLength(), 9U); - EXPECT_EQ(Toks[1].getLength(), 8U); -} - TEST_F(LexerTest, ContentStartHashbangSkip) { const char *Source = "#!/usr/bin/swift\naaa"; From abfa95eedab20f23725bfcf2cb5d7c6b62ffcd8b Mon Sep 17 00:00:00 2001 From: Michael Verges Date: Fri, 8 Mar 2019 21:52:39 -0500 Subject: [PATCH 03/10] Iterate through buffer to find raw delimiters before newline --- lib/Parse/Lexer.cpp | 30 +++++++++++++++++++----------- test/Parse/raw_string.swift | 5 ++++- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/lib/Parse/Lexer.cpp b/lib/Parse/Lexer.cpp index 070ed09c5a96c..564b7401dcc06 100644 --- a/lib/Parse/Lexer.cpp +++ b/lib/Parse/Lexer.cpp @@ -1843,19 +1843,27 @@ void Lexer::lexStringLiteral(unsigned CustomDelimiterLen) { assert((QuoteChar == '"' || QuoteChar == '\'') && "Unexpected start"); bool IsMultilineString = advanceIfMultilineDelimiter(CurPtr, Diags); - // Test for single-line Strings that may resemble multiline delimiter if (IsMultilineString && *CurPtr != '\n' && *CurPtr != '\r') { - const char *TmpPtr = CurPtr + 1; - TmpPtr = skipToEndOfInterpolatedExpression(TmpPtr, BufferEnd, false); - TmpPtr = TmpPtr - CustomDelimiterLen; - if (CustomDelimiterLen != 0 - && delimiterMatches(CustomDelimiterLen, TmpPtr, Diags)) { - // Undo effects from falsely detecting multiline delimiter - CurPtr = CurPtr - 2; - IsMultilineString = false; - } else { + // Test for single-line Strings that may resemble multiline delimiter + for (const char *Ptr = CurPtr; Ptr <= BufferEnd-CustomDelimiterLen; Ptr++) { + if (*Ptr == '\r' || *Ptr == '\n') { + break; + } + if (*Ptr == '#') { + const char *TmpPtr = Ptr + 1; + while (*TmpPtr == '#') { + TmpPtr++; + } + if (TmpPtr-Ptr == CustomDelimiterLen) { + // Undo effects from falsely detecting multiline delimiter + CurPtr = CurPtr - 2; + IsMultilineString = false; + } + } + } + if (IsMultilineString) { diagnose(CurPtr, diag::lex_illegal_multiline_string_start) - .fixItInsert(Lexer::getSourceLoc(CurPtr), "\n"); + .fixItInsert(Lexer::getSourceLoc(CurPtr), "\n"); } } diff --git a/test/Parse/raw_string.swift b/test/Parse/raw_string.swift index 553df3c07bc91..d2ceca9ef64ad 100644 --- a/test/Parse/raw_string.swift +++ b/test/Parse/raw_string.swift @@ -69,7 +69,10 @@ _ = ##""" // CHECK: "a raw string with \"\"\" in it" _ = #"""# -// CHECK: "a raw string with \" in it" +// CHECK: "\"" + +_ = ###"""##"### +// CHECK: "\"\"##" let foo = "Interpolation" _ = #"\b\b \#(foo)\#(foo) Kappa"# From e1b8ddfb662282100419bb9991836daac5e94af7 Mon Sep 17 00:00:00 2001 From: Michael Verges Date: Fri, 8 Mar 2019 21:57:50 -0500 Subject: [PATCH 04/10] Added missing break statement --- lib/Parse/Lexer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Parse/Lexer.cpp b/lib/Parse/Lexer.cpp index 564b7401dcc06..ebbb8795d184f 100644 --- a/lib/Parse/Lexer.cpp +++ b/lib/Parse/Lexer.cpp @@ -1858,6 +1858,7 @@ void Lexer::lexStringLiteral(unsigned CustomDelimiterLen) { // Undo effects from falsely detecting multiline delimiter CurPtr = CurPtr - 2; IsMultilineString = false; + break; } } } From 22d4de83e0683c0ebd1b7f386712f58ed86966bf Mon Sep 17 00:00:00 2001 From: Xiaodi Wu Date: Sat, 9 Mar 2019 13:24:25 -0500 Subject: [PATCH 05/10] Apply markup edits from code review Co-Authored-By: maustinstar --- lib/Parse/Lexer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Parse/Lexer.cpp b/lib/Parse/Lexer.cpp index ebbb8795d184f..e6853d1cae6d6 100644 --- a/lib/Parse/Lexer.cpp +++ b/lib/Parse/Lexer.cpp @@ -1844,7 +1844,7 @@ void Lexer::lexStringLiteral(unsigned CustomDelimiterLen) { bool IsMultilineString = advanceIfMultilineDelimiter(CurPtr, Diags); if (IsMultilineString && *CurPtr != '\n' && *CurPtr != '\r') { - // Test for single-line Strings that may resemble multiline delimiter + // Test for single-line string literals that may resemble multiline delimiter. for (const char *Ptr = CurPtr; Ptr <= BufferEnd-CustomDelimiterLen; Ptr++) { if (*Ptr == '\r' || *Ptr == '\n') { break; @@ -1855,7 +1855,7 @@ void Lexer::lexStringLiteral(unsigned CustomDelimiterLen) { TmpPtr++; } if (TmpPtr-Ptr == CustomDelimiterLen) { - // Undo effects from falsely detecting multiline delimiter + // Undo effects from falsely detecting multiline delimiter. CurPtr = CurPtr - 2; IsMultilineString = false; break; From 4a6155b7d7070e79a99d4a7f9f4927d62414895e Mon Sep 17 00:00:00 2001 From: Michael Verges Date: Mon, 11 Mar 2019 15:58:10 -0400 Subject: [PATCH 06/10] Use delimiterMatches after checking for quote. --- lib/Parse/Lexer.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/lib/Parse/Lexer.cpp b/lib/Parse/Lexer.cpp index ebbb8795d184f..0a1ba3edd9f3d 100644 --- a/lib/Parse/Lexer.cpp +++ b/lib/Parse/Lexer.cpp @@ -1845,16 +1845,13 @@ void Lexer::lexStringLiteral(unsigned CustomDelimiterLen) { bool IsMultilineString = advanceIfMultilineDelimiter(CurPtr, Diags); if (IsMultilineString && *CurPtr != '\n' && *CurPtr != '\r') { // Test for single-line Strings that may resemble multiline delimiter - for (const char *Ptr = CurPtr; Ptr <= BufferEnd-CustomDelimiterLen; Ptr++) { + for (const char *Ptr = CurPtr-1; Ptr <= BufferEnd-CustomDelimiterLen; Ptr++) { if (*Ptr == '\r' || *Ptr == '\n') { break; } - if (*Ptr == '#') { + if (*Ptr == '"') { const char *TmpPtr = Ptr + 1; - while (*TmpPtr == '#') { - TmpPtr++; - } - if (TmpPtr-Ptr == CustomDelimiterLen) { + if (delimiterMatches(CustomDelimiterLen, TmpPtr, nullptr)) { // Undo effects from falsely detecting multiline delimiter CurPtr = CurPtr - 2; IsMultilineString = false; From 9e378c9becc2d3f00d9dbe56bb7259f89f03d20a Mon Sep 17 00:00:00 2001 From: Michael Verges Date: Mon, 11 Mar 2019 16:00:28 -0400 Subject: [PATCH 07/10] Edit comments --- lib/Parse/Lexer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Parse/Lexer.cpp b/lib/Parse/Lexer.cpp index 0a1ba3edd9f3d..f6636616177d6 100644 --- a/lib/Parse/Lexer.cpp +++ b/lib/Parse/Lexer.cpp @@ -1844,7 +1844,7 @@ void Lexer::lexStringLiteral(unsigned CustomDelimiterLen) { bool IsMultilineString = advanceIfMultilineDelimiter(CurPtr, Diags); if (IsMultilineString && *CurPtr != '\n' && *CurPtr != '\r') { - // Test for single-line Strings that may resemble multiline delimiter + // Test for single-line string literals that may resemble multiline delimiter. for (const char *Ptr = CurPtr-1; Ptr <= BufferEnd-CustomDelimiterLen; Ptr++) { if (*Ptr == '\r' || *Ptr == '\n') { break; @@ -1852,7 +1852,7 @@ void Lexer::lexStringLiteral(unsigned CustomDelimiterLen) { if (*Ptr == '"') { const char *TmpPtr = Ptr + 1; if (delimiterMatches(CustomDelimiterLen, TmpPtr, nullptr)) { - // Undo effects from falsely detecting multiline delimiter + // Undo effects from falsely detecting multiline delimiter. CurPtr = CurPtr - 2; IsMultilineString = false; break; From ff29bdb33fc5211eecfa5718e2ce23a97fae1722 Mon Sep 17 00:00:00 2001 From: Michael Verges Date: Fri, 15 Mar 2019 09:23:06 -0400 Subject: [PATCH 08/10] Add test cases --- test/Parse/raw_string.swift | 6 ++++++ test/Parse/raw_string_errors.swift | 10 +++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/test/Parse/raw_string.swift b/test/Parse/raw_string.swift index d2ceca9ef64ad..b84ebef7f94f7 100644 --- a/test/Parse/raw_string.swift +++ b/test/Parse/raw_string.swift @@ -71,6 +71,12 @@ _ = ##""" _ = #"""# // CHECK: "\"" +_ = ##""" foo # "# "## +// CHECK: "\"\" foo # \"# " + +_ = #"""""# +// CHECK: "\"\"\"" + _ = ###"""##"### // CHECK: "\"\"##" diff --git a/test/Parse/raw_string_errors.swift b/test/Parse/raw_string_errors.swift index f3db7518513e0..51868d3c1f1c0 100644 --- a/test/Parse/raw_string_errors.swift +++ b/test/Parse/raw_string_errors.swift @@ -12,6 +12,9 @@ let _ = #"\##("invalid")"# let _ = ####"invalid"### // expected-error@-1{{unterminated string literal}} +let _ = ###"""invalid"## +// expected-error@-1{{unterminated string literal}} + let _ = ###"invalid"###### // expected-error@-1{{too many '#' characters in closing delimiter}}{{24-27=}} // expected-error@-2{{consecutive statements on a line must be separated by ';'}} @@ -20,5 +23,10 @@ let _ = ###"invalid"###### let _ = ##"""a foobar a"""## -// expected-error@-3{{multi-line string literal content must begin on a new line}}{{14-14=\n}} +// expected-error@-1{{multi-line string literal content must begin on a new line}}{{14-14=\n}} // expected-error@-2{{multi-line string literal closing delimiter must begin on a new line}}{{5-5=\n}} + +let _ = #""" foo "bar" #baz +"""# +// expected-error@-1{{multi-line string literal content must begin on a new line}}{{14-14=\n}} + From c2e9818dfb5894e1cb26eb671e25e174d409f466 Mon Sep 17 00:00:00 2001 From: Michael Verges Date: Mon, 25 Mar 2019 09:34:52 -0400 Subject: [PATCH 09/10] Check for CustomDelimiterLen > 0 --- lib/Parse/Lexer.cpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/lib/Parse/Lexer.cpp b/lib/Parse/Lexer.cpp index f6636616177d6..9ef984d848057 100644 --- a/lib/Parse/Lexer.cpp +++ b/lib/Parse/Lexer.cpp @@ -1845,18 +1845,20 @@ void Lexer::lexStringLiteral(unsigned CustomDelimiterLen) { bool IsMultilineString = advanceIfMultilineDelimiter(CurPtr, Diags); if (IsMultilineString && *CurPtr != '\n' && *CurPtr != '\r') { // Test for single-line string literals that may resemble multiline delimiter. - for (const char *Ptr = CurPtr-1; Ptr <= BufferEnd-CustomDelimiterLen; Ptr++) { - if (*Ptr == '\r' || *Ptr == '\n') { - break; - } - if (*Ptr == '"') { - const char *TmpPtr = Ptr + 1; - if (delimiterMatches(CustomDelimiterLen, TmpPtr, nullptr)) { - // Undo effects from falsely detecting multiline delimiter. - CurPtr = CurPtr - 2; - IsMultilineString = false; + if (CustomDelimiterLen > 0) { + for (const char *Ptr = CurPtr-1; Ptr <= BufferEnd-CustomDelimiterLen; Ptr++) { + if (*Ptr == '\r' || *Ptr == '\n') { break; } + if (*Ptr == '"') { + const char *TmpPtr = Ptr + 1; + if (delimiterMatches(CustomDelimiterLen, TmpPtr, nullptr)) { + // Undo effects from falsely detecting multiline delimiter. + CurPtr = CurPtr - 2; + IsMultilineString = false; + break; + } + } } } if (IsMultilineString) { From 4540f70876bf4092af133bdab3467acee15fdfc7 Mon Sep 17 00:00:00 2001 From: maustinstar Date: Thu, 18 Apr 2019 10:38:40 -0400 Subject: [PATCH 10/10] Alter test; remove unterminated multiline string that eats entire file --- test/Parse/raw_string_errors.swift | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/Parse/raw_string_errors.swift b/test/Parse/raw_string_errors.swift index 51868d3c1f1c0..145e082bfa109 100644 --- a/test/Parse/raw_string_errors.swift +++ b/test/Parse/raw_string_errors.swift @@ -12,8 +12,10 @@ let _ = #"\##("invalid")"# let _ = ####"invalid"### // expected-error@-1{{unterminated string literal}} -let _ = ###"""invalid"## -// expected-error@-1{{unterminated string literal}} +let _ = ###"""invalid"###### +// expected-error@-1{{too many '#' characters in closing delimiter}}{{26-29=}} +// expected-error@-2{{consecutive statements on a line must be separated by ';'}} +// expected-error@-3{{expected expression}} let _ = ###"invalid"###### // expected-error@-1{{too many '#' characters in closing delimiter}}{{24-27=}}