Closed
Description
Given the following code Godbolt
enum class E {
A = 0,
};
constexpr int t[1][1]{{1}};
int f1(E a) {
return t[static_cast<int>(a)][static_cast<int>(a)];
}
int f2(E a, E b) {
return t[static_cast<int>(a)][static_cast<int>(b)];
}
clang-trunk generates this assembly:
f1(E): # @f1(E)
movsxd rax, edi
lea rcx, [rip + t]
lea rcx, [rcx + 4*rax]
mov eax, dword ptr [rcx + 4*rax]
ret
f2(E, E): # @f2(E, E)
movsxd rax, edi
movsxd rcx, esi
lea rdx, [rip + t]
lea rax, [rdx + 4*rax]
mov eax, dword ptr [rax + 4*rcx]
ret
t:
.long 1 # 0x1
and this IR:
@_ZL1t = internal unnamed_addr constant [1 x [1 x i32]] [[1 x i32] [i32 1]], align 4, !dbg !0
define dso_local noundef i32 @_Z2f11E(i32 noundef %0) local_unnamed_addr #0 !dbg !23 {
call void @llvm.dbg.value(metadata i32 %0, metadata !27, metadata !DIExpression()), !dbg !28
%2 = sext i32 %0 to i64, !dbg !29
%3 = getelementptr inbounds [1 x [1 x i32]], ptr @_ZL1t, i64 0, i64 %2, i64 %2, !dbg !29
%4 = load i32, ptr %3, align 4, !dbg !29, !tbaa !30
ret i32 %4, !dbg !34
}
define dso_local noundef i32 @_Z2f21ES_(i32 noundef %0, i32 noundef %1) local_unnamed_addr #0 !dbg !35 {
call void @llvm.dbg.value(metadata i32 %0, metadata !39, metadata !DIExpression()), !dbg !41
call void @llvm.dbg.value(metadata i32 %1, metadata !40, metadata !DIExpression()), !dbg !41
%3 = sext i32 %0 to i64, !dbg !42
%4 = sext i32 %1 to i64, !dbg !42
%5 = getelementptr inbounds [1 x [1 x i32]], ptr @_ZL1t, i64 0, i64 %3, i64 %4, !dbg !42
%6 = load i32, ptr %5, align 4, !dbg !42, !tbaa !30
ret i32 %6, !dbg !43
}
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
While gcc-trunk generates just:
f1(E):
mov eax, 1
ret
f2(E, E):
mov eax, 1
ret
Interestingly using just a plain enum without static_cast
's Clang generates the optimal code Godbolt. But with the static_cast
's it still doesn't Godbolt.
Using __builtin_unreachable()
to explicitly tell the compiler that a
and b
are E::A
works for f1()
but strangely not for f2()
Godbolt.
But using __builtin_assume(...)
works for both Godbolt.
Declaring the storage type of enum class E
to unsigned short
or unsigned char
also fixes the problem Godbolt same goes for casting to unsigned short
or unsigned char
Godbolt.