From 755f6aa2e40ada035887c5d167dd40dc96502a9b Mon Sep 17 00:00:00 2001 From: Erik Eckstein Date: Tue, 4 Aug 2020 17:14:13 +0200 Subject: [PATCH] AST, SIL: Remove UTF16 encoding from StringLiteralExpr and StringLiteralInst The UTF16 encoding is not used (anymore). I think it became obsolete with the switch to the UTF8 String representation. --- include/swift/AST/Expr.h | 3 --- include/swift/Basic/Unicode.h | 4 ---- include/swift/SIL/SILInstruction.h | 1 - lib/AST/ASTDumper.cpp | 1 - lib/Basic/Unicode.cpp | 19 ------------------- lib/IRGen/GenConstant.cpp | 8 -------- lib/SIL/IR/SILGlobalVariable.cpp | 1 - lib/SIL/IR/SILInstructions.cpp | 3 --- lib/SIL/IR/SILPrinter.cpp | 1 - lib/SIL/Parser/ParseSIL.cpp | 2 -- lib/SILGen/SILGenApply.cpp | 10 ---------- .../Utils/SpecializationMangler.cpp | 1 - lib/Serialization/DeserializeSIL.cpp | 1 - lib/Serialization/ModuleFormat.h | 2 +- lib/Serialization/SILFormat.h | 1 - lib/Serialization/SerializeSIL.cpp | 1 - test/IRGen/literals.sil | 15 --------------- test/SILOptimizer/cse.sil | 2 +- test/SILOptimizer/sil_combine.sil | 6 +++--- 19 files changed, 5 insertions(+), 77 deletions(-) diff --git a/include/swift/AST/Expr.h b/include/swift/AST/Expr.h index a62c045674a18..7d3ee3823d2a5 100644 --- a/include/swift/AST/Expr.h +++ b/include/swift/AST/Expr.h @@ -861,9 +861,6 @@ class StringLiteralExpr : public LiteralExpr { /// A UTF-8 string. UTF8, - /// A UTF-16 string. - UTF16, - /// A single UnicodeScalar, passed as an integer. OneUnicodeScalar }; diff --git a/include/swift/Basic/Unicode.h b/include/swift/Basic/Unicode.h index ff2163eb0f374..4b1108af7fac1 100644 --- a/include/swift/Basic/Unicode.h +++ b/include/swift/Basic/Unicode.h @@ -68,10 +68,6 @@ bool isSingleUnicodeScalar(StringRef S); unsigned extractFirstUnicodeScalar(StringRef S); -/// Get the length of the UTF8 string transcoded into UTF16. -/// Returns the number of code units in UTF16 representation -uint64_t getUTF16Length(StringRef Str); - } // end namespace unicode } // end namespace swift diff --git a/include/swift/SIL/SILInstruction.h b/include/swift/SIL/SILInstruction.h index dd9e3731b9ac9..d504f37085831 100644 --- a/include/swift/SIL/SILInstruction.h +++ b/include/swift/SIL/SILInstruction.h @@ -3307,7 +3307,6 @@ class StringLiteralInst final enum class Encoding { Bytes, UTF8, - UTF16, /// UTF-8 encoding of an Objective-C selector. ObjCSelector, }; diff --git a/lib/AST/ASTDumper.cpp b/lib/AST/ASTDumper.cpp index 94e6654c5562b..8d46581edb4bf 100644 --- a/lib/AST/ASTDumper.cpp +++ b/lib/AST/ASTDumper.cpp @@ -368,7 +368,6 @@ static StringRef getStringLiteralExprEncodingString(StringLiteralExpr::Encoding value) { switch (value) { case StringLiteralExpr::UTF8: return "utf8"; - case StringLiteralExpr::UTF16: return "utf16"; case StringLiteralExpr::OneUnicodeScalar: return "unicodeScalar"; } diff --git a/lib/Basic/Unicode.cpp b/lib/Basic/Unicode.cpp index a299c457a7805..e7479be57820f 100644 --- a/lib/Basic/Unicode.cpp +++ b/lib/Basic/Unicode.cpp @@ -123,22 +123,3 @@ unsigned swift::unicode::extractFirstUnicodeScalar(StringRef S) { (void)Result; return Scalar; } - -uint64_t swift::unicode::getUTF16Length(StringRef Str) { - uint64_t Length; - // Transcode the string to UTF-16 to get its length. - SmallVector buffer(Str.size() + 1); // +1 for ending nulls. - const llvm::UTF8 *fromPtr = (const llvm::UTF8 *) Str.data(); - llvm::UTF16 *toPtr = &buffer[0]; - llvm::ConversionResult Result = - ConvertUTF8toUTF16(&fromPtr, fromPtr + Str.size(), - &toPtr, toPtr + Str.size(), - llvm::strictConversion); - assert(Result == llvm::conversionOK && - "UTF-8 encoded string cannot be converted into UTF-16 encoding"); - (void)Result; - - // The length of the transcoded string in UTF-16 code points. - Length = toPtr - &buffer[0]; - return Length; -} diff --git a/lib/IRGen/GenConstant.cpp b/lib/IRGen/GenConstant.cpp index 071d49f82603b..0d6e021e8327f 100644 --- a/lib/IRGen/GenConstant.cpp +++ b/lib/IRGen/GenConstant.cpp @@ -102,14 +102,6 @@ llvm::Constant *irgen::emitAddrOfConstantString(IRGenModule &IGM, case StringLiteralInst::Encoding::UTF8: return IGM.getAddrOfGlobalString(SLI->getValue()); - case StringLiteralInst::Encoding::UTF16: { - // This is always a GEP of a GlobalVariable with a nul terminator. - auto addr = IGM.getAddrOfGlobalUTF16String(SLI->getValue()); - - // Cast to Builtin.RawPointer. - return llvm::ConstantExpr::getBitCast(addr, IGM.Int8PtrTy); - } - case StringLiteralInst::Encoding::ObjCSelector: llvm_unreachable("cannot get the address of an Objective-C selector"); } diff --git a/lib/SIL/IR/SILGlobalVariable.cpp b/lib/SIL/IR/SILGlobalVariable.cpp index bad50c8437f93..609a8318b153d 100644 --- a/lib/SIL/IR/SILGlobalVariable.cpp +++ b/lib/SIL/IR/SILGlobalVariable.cpp @@ -157,7 +157,6 @@ bool SILGlobalVariable::isValidStaticInitializerInst(const SILInstruction *I, switch (cast(I)->getEncoding()) { case StringLiteralInst::Encoding::Bytes: case StringLiteralInst::Encoding::UTF8: - case StringLiteralInst::Encoding::UTF16: return true; case StringLiteralInst::Encoding::ObjCSelector: // Objective-C selector string literals cannot be used in static diff --git a/lib/SIL/IR/SILInstructions.cpp b/lib/SIL/IR/SILInstructions.cpp index 85e21ea609f21..1faa9ec6ae6a1 100644 --- a/lib/SIL/IR/SILInstructions.cpp +++ b/lib/SIL/IR/SILInstructions.cpp @@ -1043,9 +1043,6 @@ CondFailInst *CondFailInst::create(SILDebugLocation DebugLoc, SILValue Operand, } uint64_t StringLiteralInst::getCodeUnitCount() { - auto E = unsigned(Encoding::UTF16); - if (SILInstruction::Bits.StringLiteralInst.TheEncoding == E) - return unicode::getUTF16Length(getValue()); return SILInstruction::Bits.StringLiteralInst.Length; } diff --git a/lib/SIL/IR/SILPrinter.cpp b/lib/SIL/IR/SILPrinter.cpp index d4da68de7622f..bb84c23ef1e4c 100644 --- a/lib/SIL/IR/SILPrinter.cpp +++ b/lib/SIL/IR/SILPrinter.cpp @@ -1339,7 +1339,6 @@ class SILPrinter : public SILInstructionVisitor { switch (kind) { case StringLiteralInst::Encoding::Bytes: return "bytes "; case StringLiteralInst::Encoding::UTF8: return "utf8 "; - case StringLiteralInst::Encoding::UTF16: return "utf16 "; case StringLiteralInst::Encoding::ObjCSelector: return "objc_selector "; } llvm_unreachable("bad string literal encoding"); diff --git a/lib/SIL/Parser/ParseSIL.cpp b/lib/SIL/Parser/ParseSIL.cpp index 61f099db1956f..00ceaf52ca02e 100644 --- a/lib/SIL/Parser/ParseSIL.cpp +++ b/lib/SIL/Parser/ParseSIL.cpp @@ -2619,8 +2619,6 @@ bool SILParser::parseSpecificSILInstruction(SILBuilder &B, StringLiteralInst::Encoding encoding; if (P.Tok.getText() == "utf8") { encoding = StringLiteralInst::Encoding::UTF8; - } else if (P.Tok.getText() == "utf16") { - encoding = StringLiteralInst::Encoding::UTF16; } else if (P.Tok.getText() == "objc_selector") { encoding = StringLiteralInst::Encoding::ObjCSelector; } else if (P.Tok.getText() == "bytes") { diff --git a/lib/SILGen/SILGenApply.cpp b/lib/SILGen/SILGenApply.cpp index c8c80a3f1d3f3..bffecc2c2c187 100644 --- a/lib/SILGen/SILGenApply.cpp +++ b/lib/SILGen/SILGenApply.cpp @@ -1628,11 +1628,6 @@ static PreparedArguments emitStringLiteral(SILGenFunction &SGF, Expr *E, Length = Str.size(); break; - case StringLiteralExpr::UTF16: { - instEncoding = StringLiteralInst::Encoding::UTF16; - Length = unicode::getUTF16Length(Str); - break; - } case StringLiteralExpr::OneUnicodeScalar: { SILType Int32Ty = SILType::getBuiltinIntegerType(32, SGF.getASTContext()); SILValue UnicodeScalarValue = @@ -1674,11 +1669,6 @@ static PreparedArguments emitStringLiteral(SILGenFunction &SGF, Expr *E, ArrayRef Elts; ArrayRef TypeElts; switch (instEncoding) { - case StringLiteralInst::Encoding::UTF16: - Elts = llvm::makeArrayRef(EltsArray).slice(0, 2); - TypeElts = llvm::makeArrayRef(TypeEltsArray).slice(0, 2); - break; - case StringLiteralInst::Encoding::UTF8: Elts = EltsArray; TypeElts = TypeEltsArray; diff --git a/lib/SILOptimizer/Utils/SpecializationMangler.cpp b/lib/SILOptimizer/Utils/SpecializationMangler.cpp index 9807b43eb5f6d..3a37e3f3430ba 100644 --- a/lib/SILOptimizer/Utils/SpecializationMangler.cpp +++ b/lib/SILOptimizer/Utils/SpecializationMangler.cpp @@ -243,7 +243,6 @@ FunctionSignatureSpecializationMangler::mangleConstantProp(LiteralInst *LI) { switch (SLI->getEncoding()) { case StringLiteralInst::Encoding::Bytes: ArgOpBuffer << 'B'; break; case StringLiteralInst::Encoding::UTF8: ArgOpBuffer << 'b'; break; - case StringLiteralInst::Encoding::UTF16: ArgOpBuffer << 'w'; break; case StringLiteralInst::Encoding::ObjCSelector: ArgOpBuffer << 'c'; break; } break; diff --git a/lib/Serialization/DeserializeSIL.cpp b/lib/Serialization/DeserializeSIL.cpp index 26ee442264524..97c84001d009c 100644 --- a/lib/Serialization/DeserializeSIL.cpp +++ b/lib/Serialization/DeserializeSIL.cpp @@ -53,7 +53,6 @@ fromStableStringEncoding(unsigned value) { switch (value) { case SIL_BYTES: return StringLiteralInst::Encoding::Bytes; case SIL_UTF8: return StringLiteralInst::Encoding::UTF8; - case SIL_UTF16: return StringLiteralInst::Encoding::UTF16; case SIL_OBJC_SELECTOR: return StringLiteralInst::Encoding::ObjCSelector; default: return None; } diff --git a/lib/Serialization/ModuleFormat.h b/lib/Serialization/ModuleFormat.h index 6bf15541faf97..b3a7a893f0932 100644 --- a/lib/Serialization/ModuleFormat.h +++ b/lib/Serialization/ModuleFormat.h @@ -55,7 +55,7 @@ const uint16_t SWIFTMODULE_VERSION_MAJOR = 0; /// describe what change you made. The content of this comment isn't important; /// it just ensures a conflict if two people change the module format. /// Don't worry about adhering to the 80-column limit for this line. -const uint16_t SWIFTMODULE_VERSION_MINOR = 567; // async sil modifier +const uint16_t SWIFTMODULE_VERSION_MINOR = 568; // removed UTF16 /// A standard hash seed used for all string hashes in a serialized module. /// diff --git a/lib/Serialization/SILFormat.h b/lib/Serialization/SILFormat.h index 3a68d6c0baddf..dfa1ea155b886 100644 --- a/lib/Serialization/SILFormat.h +++ b/lib/Serialization/SILFormat.h @@ -31,7 +31,6 @@ using SILTypeCategoryField = BCFixed<2>; enum SILStringEncoding : uint8_t { SIL_UTF8, - SIL_UTF16, SIL_OBJC_SELECTOR, SIL_BYTES }; diff --git a/lib/Serialization/SerializeSIL.cpp b/lib/Serialization/SerializeSIL.cpp index 849138d351534..00e9443213ea0 100644 --- a/lib/Serialization/SerializeSIL.cpp +++ b/lib/Serialization/SerializeSIL.cpp @@ -49,7 +49,6 @@ static unsigned toStableStringEncoding(StringLiteralInst::Encoding encoding) { switch (encoding) { case StringLiteralInst::Encoding::Bytes: return SIL_BYTES; case StringLiteralInst::Encoding::UTF8: return SIL_UTF8; - case StringLiteralInst::Encoding::UTF16: return SIL_UTF16; case StringLiteralInst::Encoding::ObjCSelector: return SIL_OBJC_SELECTOR; } llvm_unreachable("bad string encoding"); diff --git a/test/IRGen/literals.sil b/test/IRGen/literals.sil index 193bf812b86ce..24c2b9450780d 100644 --- a/test/IRGen/literals.sil +++ b/test/IRGen/literals.sil @@ -4,8 +4,6 @@ // CHECK: [[U8_0:@.*]] = private unnamed_addr constant [8 x i8] c"help\09me\00" // CHECK: [[U8_1:@.*]] = private unnamed_addr constant [5 x i8] c"\00x\C6\AB\00" -// CHECK: [[U16_0:@.*]] = private unnamed_addr constant [8 x i16] [i16 104, i16 101, i16 108, i16 112, i16 9, i16 109, i16 101, i16 0] -// CHECK: [[U16_1:@.*]] = private unnamed_addr constant [4 x i16] [i16 0, i16 120, i16 427, i16 0] sil_stage canonical @@ -27,16 +25,3 @@ bb0: // CHECK: define{{( dllexport)?}}{{( protected)?}} swiftcc i8* @utf8_literal_with_nul() {{.*}} { // CHECK: ret i8* getelementptr inbounds ([5 x i8], [5 x i8]* [[U8_1]], i64 0, i64 0) -sil @utf16_literal : $@convention(thin) () -> Builtin.RawPointer { -bb0: - %0 = string_literal utf16 "help\tme" - return %0 : $Builtin.RawPointer -} -// CHECK: define{{( dllexport)?}}{{( protected)?}} swiftcc i8* @utf16_literal() {{.*}} { -// CHECK: ret i8* bitcast ([8 x i16]* [[U16_0]] to i8*) - -sil @utf16_literal_with_nul : $@convention(thin) () -> Builtin.RawPointer { -bb0: - %0 = string_literal utf16 "\u{00}x\u{01ab}" - return %0 : $Builtin.RawPointer -} diff --git a/test/SILOptimizer/cse.sil b/test/SILOptimizer/cse.sil index 52f65b49001b3..60df4ccb7d4a6 100644 --- a/test/SILOptimizer/cse.sil +++ b/test/SILOptimizer/cse.sil @@ -523,7 +523,7 @@ sil @helper2 : $@convention(thin) (UInt8, UInt8) -> Builtin.Word // CHECK-LABEL: sil @sil_string_different_encodings sil @sil_string_different_encodings : $@convention(thin) () -> Builtin.Word { %0 = string_literal utf8 "help" - %1 = string_literal utf16 "help" + %1 = string_literal objc_selector "help" %2 = function_ref @helper : $@convention(thin) (Builtin.RawPointer, Builtin.RawPointer) -> Builtin.Word %3 = apply %2(%0, %1) : $@convention(thin) (Builtin.RawPointer, Builtin.RawPointer) -> Builtin.Word return %3 : $Builtin.Word diff --git a/test/SILOptimizer/sil_combine.sil b/test/SILOptimizer/sil_combine.sil index 81b635f2b21e4..145399c069f35 100644 --- a/test/SILOptimizer/sil_combine.sil +++ b/test/SILOptimizer/sil_combine.sil @@ -1850,7 +1850,7 @@ bb0(%0 : $Builtin.NativeObject, %1 : $Builtin.RawPointer): //CHECK: return sil @remove_pointer_compare_to_zero : $@convention(thin) (Int) -> () { bb0(%0 : $Int): - %1 = string_literal utf16 "ss" + %1 = string_literal utf8 "ss" %2 = integer_literal $Builtin.Word, 0 %4 = builtin "inttoptr_Word"(%2 : $Builtin.Word) : $Builtin.RawPointer %6 = builtin "cmp_eq_RawPointer"(%1 : $Builtin.RawPointer, %4 : $Builtin.RawPointer) : $Builtin.Int1 @@ -1865,7 +1865,7 @@ bb0(%0 : $Int): //CHECK: unreachable sil @remove_pointer_compare_to_zero_NE : $@convention(thin) (Int) -> () { bb0(%0 : $Int): - %1 = string_literal utf16 "ss" + %1 = string_literal utf8 "ss" %2 = integer_literal $Builtin.Word, 0 %4 = builtin "inttoptr_Word"(%2 : $Builtin.Word) : $Builtin.RawPointer %6 = builtin "cmp_ne_RawPointer"(%1 : $Builtin.RawPointer, %4 : $Builtin.RawPointer) : $Builtin.Int1 @@ -1881,7 +1881,7 @@ bb0(%0 : $Int): //CHECK: return sil @remove_pointer_compare_to_zero_arith : $@convention(thin) (Builtin.Word) -> () { bb0(%0 : $Builtin.Word): - %1 = string_literal utf16 "ss" + %1 = string_literal utf8 "ss" %2 = integer_literal $Builtin.Word, 0 %3 = integer_literal $Builtin.Word, 4 %4 = integer_literal $Builtin.Int1, -1