Skip to content

AST, SIL: Remove UTF16 encoding from StringLiteralExpr and StringLiteralInst #33288

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions include/swift/AST/Expr.h
Original file line number Diff line number Diff line change
Expand Up @@ -861,9 +861,6 @@ class StringLiteralExpr : public LiteralExpr {
/// A UTF-8 string.
UTF8,

/// A UTF-16 string.
UTF16,

/// A single UnicodeScalar, passed as an integer.
OneUnicodeScalar
};
Expand Down
4 changes: 0 additions & 4 deletions include/swift/Basic/Unicode.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,6 @@ bool isSingleUnicodeScalar(StringRef S);

unsigned extractFirstUnicodeScalar(StringRef S);

/// Get the length of the UTF8 string transcoded into UTF16.
/// Returns the number of code units in UTF16 representation
uint64_t getUTF16Length(StringRef Str);

} // end namespace unicode
} // end namespace swift

Expand Down
1 change: 0 additions & 1 deletion include/swift/SIL/SILInstruction.h
Original file line number Diff line number Diff line change
Expand Up @@ -3307,7 +3307,6 @@ class StringLiteralInst final
enum class Encoding {
Bytes,
UTF8,
UTF16,
/// UTF-8 encoding of an Objective-C selector.
ObjCSelector,
};
Expand Down
1 change: 0 additions & 1 deletion lib/AST/ASTDumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,6 @@ static StringRef
getStringLiteralExprEncodingString(StringLiteralExpr::Encoding value) {
switch (value) {
case StringLiteralExpr::UTF8: return "utf8";
case StringLiteralExpr::UTF16: return "utf16";
case StringLiteralExpr::OneUnicodeScalar: return "unicodeScalar";
}

Expand Down
19 changes: 0 additions & 19 deletions lib/Basic/Unicode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,22 +123,3 @@ unsigned swift::unicode::extractFirstUnicodeScalar(StringRef S) {
(void)Result;
return Scalar;
}

uint64_t swift::unicode::getUTF16Length(StringRef Str) {
uint64_t Length;
// Transcode the string to UTF-16 to get its length.
SmallVector<llvm::UTF16, 128> buffer(Str.size() + 1); // +1 for ending nulls.
const llvm::UTF8 *fromPtr = (const llvm::UTF8 *) Str.data();
llvm::UTF16 *toPtr = &buffer[0];
llvm::ConversionResult Result =
ConvertUTF8toUTF16(&fromPtr, fromPtr + Str.size(),
&toPtr, toPtr + Str.size(),
llvm::strictConversion);
assert(Result == llvm::conversionOK &&
"UTF-8 encoded string cannot be converted into UTF-16 encoding");
(void)Result;

// The length of the transcoded string in UTF-16 code points.
Length = toPtr - &buffer[0];
return Length;
}
8 changes: 0 additions & 8 deletions lib/IRGen/GenConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,14 +102,6 @@ llvm::Constant *irgen::emitAddrOfConstantString(IRGenModule &IGM,
case StringLiteralInst::Encoding::UTF8:
return IGM.getAddrOfGlobalString(SLI->getValue());

case StringLiteralInst::Encoding::UTF16: {
// This is always a GEP of a GlobalVariable with a nul terminator.
auto addr = IGM.getAddrOfGlobalUTF16String(SLI->getValue());

// Cast to Builtin.RawPointer.
return llvm::ConstantExpr::getBitCast(addr, IGM.Int8PtrTy);
}

case StringLiteralInst::Encoding::ObjCSelector:
llvm_unreachable("cannot get the address of an Objective-C selector");
}
Expand Down
1 change: 0 additions & 1 deletion lib/SIL/IR/SILGlobalVariable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@ bool SILGlobalVariable::isValidStaticInitializerInst(const SILInstruction *I,
switch (cast<StringLiteralInst>(I)->getEncoding()) {
case StringLiteralInst::Encoding::Bytes:
case StringLiteralInst::Encoding::UTF8:
case StringLiteralInst::Encoding::UTF16:
return true;
case StringLiteralInst::Encoding::ObjCSelector:
// Objective-C selector string literals cannot be used in static
Expand Down
3 changes: 0 additions & 3 deletions lib/SIL/IR/SILInstructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1043,9 +1043,6 @@ CondFailInst *CondFailInst::create(SILDebugLocation DebugLoc, SILValue Operand,
}

uint64_t StringLiteralInst::getCodeUnitCount() {
auto E = unsigned(Encoding::UTF16);
if (SILInstruction::Bits.StringLiteralInst.TheEncoding == E)
return unicode::getUTF16Length(getValue());
return SILInstruction::Bits.StringLiteralInst.Length;
}

Expand Down
1 change: 0 additions & 1 deletion lib/SIL/IR/SILPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1339,7 +1339,6 @@ class SILPrinter : public SILInstructionVisitor<SILPrinter> {
switch (kind) {
case StringLiteralInst::Encoding::Bytes: return "bytes ";
case StringLiteralInst::Encoding::UTF8: return "utf8 ";
case StringLiteralInst::Encoding::UTF16: return "utf16 ";
case StringLiteralInst::Encoding::ObjCSelector: return "objc_selector ";
}
llvm_unreachable("bad string literal encoding");
Expand Down
2 changes: 0 additions & 2 deletions lib/SIL/Parser/ParseSIL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2619,8 +2619,6 @@ bool SILParser::parseSpecificSILInstruction(SILBuilder &B,
StringLiteralInst::Encoding encoding;
if (P.Tok.getText() == "utf8") {
encoding = StringLiteralInst::Encoding::UTF8;
} else if (P.Tok.getText() == "utf16") {
encoding = StringLiteralInst::Encoding::UTF16;
} else if (P.Tok.getText() == "objc_selector") {
encoding = StringLiteralInst::Encoding::ObjCSelector;
} else if (P.Tok.getText() == "bytes") {
Expand Down
10 changes: 0 additions & 10 deletions lib/SILGen/SILGenApply.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1628,11 +1628,6 @@ static PreparedArguments emitStringLiteral(SILGenFunction &SGF, Expr *E,
Length = Str.size();
break;

case StringLiteralExpr::UTF16: {
instEncoding = StringLiteralInst::Encoding::UTF16;
Length = unicode::getUTF16Length(Str);
break;
}
case StringLiteralExpr::OneUnicodeScalar: {
SILType Int32Ty = SILType::getBuiltinIntegerType(32, SGF.getASTContext());
SILValue UnicodeScalarValue =
Expand Down Expand Up @@ -1674,11 +1669,6 @@ static PreparedArguments emitStringLiteral(SILGenFunction &SGF, Expr *E,
ArrayRef<ManagedValue> Elts;
ArrayRef<AnyFunctionType::Param> TypeElts;
switch (instEncoding) {
case StringLiteralInst::Encoding::UTF16:
Elts = llvm::makeArrayRef(EltsArray).slice(0, 2);
TypeElts = llvm::makeArrayRef(TypeEltsArray).slice(0, 2);
break;

case StringLiteralInst::Encoding::UTF8:
Elts = EltsArray;
TypeElts = TypeEltsArray;
Expand Down
1 change: 0 additions & 1 deletion lib/SILOptimizer/Utils/SpecializationMangler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,6 @@ FunctionSignatureSpecializationMangler::mangleConstantProp(LiteralInst *LI) {
switch (SLI->getEncoding()) {
case StringLiteralInst::Encoding::Bytes: ArgOpBuffer << 'B'; break;
case StringLiteralInst::Encoding::UTF8: ArgOpBuffer << 'b'; break;
case StringLiteralInst::Encoding::UTF16: ArgOpBuffer << 'w'; break;
case StringLiteralInst::Encoding::ObjCSelector: ArgOpBuffer << 'c'; break;
}
break;
Expand Down
1 change: 0 additions & 1 deletion lib/Serialization/DeserializeSIL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ fromStableStringEncoding(unsigned value) {
switch (value) {
case SIL_BYTES: return StringLiteralInst::Encoding::Bytes;
case SIL_UTF8: return StringLiteralInst::Encoding::UTF8;
case SIL_UTF16: return StringLiteralInst::Encoding::UTF16;
case SIL_OBJC_SELECTOR: return StringLiteralInst::Encoding::ObjCSelector;
default: return None;
}
Expand Down
2 changes: 1 addition & 1 deletion lib/Serialization/ModuleFormat.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ const uint16_t SWIFTMODULE_VERSION_MAJOR = 0;
/// describe what change you made. The content of this comment isn't important;
/// it just ensures a conflict if two people change the module format.
/// Don't worry about adhering to the 80-column limit for this line.
const uint16_t SWIFTMODULE_VERSION_MINOR = 567; // async sil modifier
const uint16_t SWIFTMODULE_VERSION_MINOR = 568; // removed UTF16

/// A standard hash seed used for all string hashes in a serialized module.
///
Expand Down
1 change: 0 additions & 1 deletion lib/Serialization/SILFormat.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ using SILTypeCategoryField = BCFixed<2>;

enum SILStringEncoding : uint8_t {
SIL_UTF8,
SIL_UTF16,
SIL_OBJC_SELECTOR,
SIL_BYTES
};
Expand Down
1 change: 0 additions & 1 deletion lib/Serialization/SerializeSIL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ static unsigned toStableStringEncoding(StringLiteralInst::Encoding encoding) {
switch (encoding) {
case StringLiteralInst::Encoding::Bytes: return SIL_BYTES;
case StringLiteralInst::Encoding::UTF8: return SIL_UTF8;
case StringLiteralInst::Encoding::UTF16: return SIL_UTF16;
case StringLiteralInst::Encoding::ObjCSelector: return SIL_OBJC_SELECTOR;
}
llvm_unreachable("bad string encoding");
Expand Down
15 changes: 0 additions & 15 deletions test/IRGen/literals.sil
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

// CHECK: [[U8_0:@.*]] = private unnamed_addr constant [8 x i8] c"help\09me\00"
// CHECK: [[U8_1:@.*]] = private unnamed_addr constant [5 x i8] c"\00x\C6\AB\00"
// CHECK: [[U16_0:@.*]] = private unnamed_addr constant [8 x i16] [i16 104, i16 101, i16 108, i16 112, i16 9, i16 109, i16 101, i16 0]
// CHECK: [[U16_1:@.*]] = private unnamed_addr constant [4 x i16] [i16 0, i16 120, i16 427, i16 0]

sil_stage canonical

Expand All @@ -27,16 +25,3 @@ bb0:
// CHECK: define{{( dllexport)?}}{{( protected)?}} swiftcc i8* @utf8_literal_with_nul() {{.*}} {
// CHECK: ret i8* getelementptr inbounds ([5 x i8], [5 x i8]* [[U8_1]], i64 0, i64 0)

sil @utf16_literal : $@convention(thin) () -> Builtin.RawPointer {
bb0:
%0 = string_literal utf16 "help\tme"
return %0 : $Builtin.RawPointer
}
// CHECK: define{{( dllexport)?}}{{( protected)?}} swiftcc i8* @utf16_literal() {{.*}} {
// CHECK: ret i8* bitcast ([8 x i16]* [[U16_0]] to i8*)

sil @utf16_literal_with_nul : $@convention(thin) () -> Builtin.RawPointer {
bb0:
%0 = string_literal utf16 "\u{00}x\u{01ab}"
return %0 : $Builtin.RawPointer
}
2 changes: 1 addition & 1 deletion test/SILOptimizer/cse.sil
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ sil @helper2 : $@convention(thin) (UInt8, UInt8) -> Builtin.Word
// CHECK-LABEL: sil @sil_string_different_encodings
sil @sil_string_different_encodings : $@convention(thin) () -> Builtin.Word {
%0 = string_literal utf8 "help"
%1 = string_literal utf16 "help"
%1 = string_literal objc_selector "help"
%2 = function_ref @helper : $@convention(thin) (Builtin.RawPointer, Builtin.RawPointer) -> Builtin.Word
%3 = apply %2(%0, %1) : $@convention(thin) (Builtin.RawPointer, Builtin.RawPointer) -> Builtin.Word
return %3 : $Builtin.Word
Expand Down
6 changes: 3 additions & 3 deletions test/SILOptimizer/sil_combine.sil
Original file line number Diff line number Diff line change
Expand Up @@ -1850,7 +1850,7 @@ bb0(%0 : $Builtin.NativeObject, %1 : $Builtin.RawPointer):
//CHECK: return
sil @remove_pointer_compare_to_zero : $@convention(thin) (Int) -> () {
bb0(%0 : $Int):
%1 = string_literal utf16 "ss"
%1 = string_literal utf8 "ss"
%2 = integer_literal $Builtin.Word, 0
%4 = builtin "inttoptr_Word"(%2 : $Builtin.Word) : $Builtin.RawPointer
%6 = builtin "cmp_eq_RawPointer"(%1 : $Builtin.RawPointer, %4 : $Builtin.RawPointer) : $Builtin.Int1
Expand All @@ -1865,7 +1865,7 @@ bb0(%0 : $Int):
//CHECK: unreachable
sil @remove_pointer_compare_to_zero_NE : $@convention(thin) (Int) -> () {
bb0(%0 : $Int):
%1 = string_literal utf16 "ss"
%1 = string_literal utf8 "ss"
%2 = integer_literal $Builtin.Word, 0
%4 = builtin "inttoptr_Word"(%2 : $Builtin.Word) : $Builtin.RawPointer
%6 = builtin "cmp_ne_RawPointer"(%1 : $Builtin.RawPointer, %4 : $Builtin.RawPointer) : $Builtin.Int1
Expand All @@ -1881,7 +1881,7 @@ bb0(%0 : $Int):
//CHECK: return
sil @remove_pointer_compare_to_zero_arith : $@convention(thin) (Builtin.Word) -> () {
bb0(%0 : $Builtin.Word):
%1 = string_literal utf16 "ss"
%1 = string_literal utf8 "ss"
%2 = integer_literal $Builtin.Word, 0
%3 = integer_literal $Builtin.Word, 4
%4 = integer_literal $Builtin.Int1, -1
Expand Down