From d87168dfc107478878e72422164c472858718f47 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Fri, 3 Nov 2023 10:06:09 +0000 Subject: [PATCH 1/5] [CodeGen][AArch64] Set min jump table entries to 13 for AArch64 targets There are some workloads that are negatively impacted by using jump tables when the number of entries is small. The SPEC2017 perlbench benchmark is one example of this, where increasing the threshold to around 13 gives a ~1.5% improvement on neoverse-v1. I chose the minimum threshold based on empirical evidence rather than science, and just manually increased the threshold until I got the best performance without impacting other workloads. For neoverse-v1 I saw around ~0.2% improvement in the SPEC2017 integer geomean, and no overall change for neoverse-n1. If we find issues with this threshold later on we can always revisit this. The most significant SPEC2017 score changes on neoverse-v1 were: 500.perlbench_r: +1.6% 520.omnetpp_r: +0.6% and the rest saw changes < 0.5%. I updated CodeGen/AArch64/min-jump-table.ll to reflect the new threshold. For most of the affected tests I manually set the min number of entries back to 4 on the RUN line because the tests seem to rely upon this behaviour. --- .../Target/AArch64/AArch64ISelLowering.cpp | 6 ++ .../GlobalISel/arm64-irtranslator-switch.ll | 2 +- llvm/test/CodeGen/AArch64/arm64-jumptable.ll | 4 +- .../CodeGen/AArch64/bti-branch-relaxation.ll | 2 +- .../implicit-def-subreg-to-reg-regression.ll | 2 +- llvm/test/CodeGen/AArch64/jump-table-32.ll | 2 +- .../test/CodeGen/AArch64/jump-table-exynos.ll | 4 +- llvm/test/CodeGen/AArch64/jump-table.ll | 12 +-- llvm/test/CodeGen/AArch64/max-jump-table.ll | 10 +- llvm/test/CodeGen/AArch64/min-jump-table.ll | 96 ++++++++++++++++++- .../AArch64/patchable-function-entry-bti.ll | 2 +- .../AArch64/redundant-mov-from-zero-extend.ll | 41 ++++---- .../AArch64/switch-unreachable-default.ll | 2 +- llvm/test/CodeGen/AArch64/win64-jumptable.ll | 4 +- llvm/test/CodeGen/AArch64/wineh-bti.ll | 2 +- .../Generic/machine-function-splitter.ll | 8 +- llvm/test/DebugInfo/COFF/jump-table.ll | 2 +- 17 files changed, 150 insertions(+), 51 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 291f0c8c5d991..002b95c68272f 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -144,6 +144,10 @@ static cl::opt EnableExtToTBL("aarch64-enable-ext-to-tbl", cl::Hidden, static cl::opt MaxXors("aarch64-max-xors", cl::init(16), cl::Hidden, cl::desc("Maximum of xors")); +static cl::opt AArch64MinimumJumpTableEntries( + "aarch64-min-jump-table-entries", cl::init(13), cl::Hidden, + cl::desc("Set minimum number of entries to use a jump table on AArch64")); + /// Value type used for condition codes. static const MVT MVT_CC = MVT::i32; @@ -1653,6 +1657,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); IsStrictFPEnabled = true; + + setMinimumJumpTableEntries(AArch64MinimumJumpTableEntries); } void AArch64TargetLowering::addTypeForNEON(MVT VT) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll index 4e4297b7a5e22..9371edd439fe4 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple aarch64 -O0 -aarch64-enable-atomic-cfg-tidy=0 -stop-after=irtranslator -simplify-mir -verify-machineinstrs %s -o - 2>&1 | FileCheck %s +; RUN: llc -global-isel -mtriple aarch64 -aarch64-min-jump-table-entries=4 -O0 -aarch64-enable-atomic-cfg-tidy=0 -stop-after=irtranslator -simplify-mir -verify-machineinstrs %s -o - 2>&1 | FileCheck %s define i32 @switch(i32 %argc) { ; CHECK-LABEL: name: switch diff --git a/llvm/test/CodeGen/AArch64/arm64-jumptable.ll b/llvm/test/CodeGen/AArch64/arm64-jumptable.ll index d4ac9e72b28ff..7d9adf92a6a95 100644 --- a/llvm/test/CodeGen/AArch64/arm64-jumptable.ll +++ b/llvm/test/CodeGen/AArch64/arm64-jumptable.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=arm64-apple-ios < %s | FileCheck %s -; RUN: llc -mtriple=arm64-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-LINUX +; RUN: llc -mtriple=arm64-apple-ios -aarch64-min-jump-table-entries=4 < %s | FileCheck %s +; RUN: llc -mtriple=arm64-linux-gnu -aarch64-min-jump-table-entries=4 < %s | FileCheck %s --check-prefix=CHECK-LINUX ; define void @sum(i32 %a, ptr %to, i32 %c) { diff --git a/llvm/test/CodeGen/AArch64/bti-branch-relaxation.ll b/llvm/test/CodeGen/AArch64/bti-branch-relaxation.ll index d9ed54fb86bf1..e3e7f42526f78 100644 --- a/llvm/test/CodeGen/AArch64/bti-branch-relaxation.ll +++ b/llvm/test/CodeGen/AArch64/bti-branch-relaxation.ll @@ -1,4 +1,4 @@ -; RUN: llc %s -o - | FileCheck %s +; RUN: llc %s -aarch64-min-jump-table-entries=4 -o - | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64" diff --git a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll index 73b180dc4ae76..0f208f8ed9052 100644 --- a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll +++ b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -mtriple=arm64-apple-ios < %s | FileCheck %s +; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios < %s | FileCheck %s ; Check there's no assert in spilling from implicit-def operands on an ; IMPLICIT_DEF. diff --git a/llvm/test/CodeGen/AArch64/jump-table-32.ll b/llvm/test/CodeGen/AArch64/jump-table-32.ll index d8572e901af29..e65813de8943e 100644 --- a/llvm/test/CodeGen/AArch64/jump-table-32.ll +++ b/llvm/test/CodeGen/AArch64/jump-table-32.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64_32-apple-ios7.0 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s +; RUN: llc -verify-machineinstrs -o - %s -aarch64-min-jump-table-entries=4 -mtriple=arm64_32-apple-ios7.0 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s define i32 @test_jumptable(i32 %in) { ; CHECK: test_jumptable diff --git a/llvm/test/CodeGen/AArch64/jump-table-exynos.ll b/llvm/test/CodeGen/AArch64/jump-table-exynos.ll index b5b400ecfbffc..61b0df5de2af3 100644 --- a/llvm/test/CodeGen/AArch64/jump-table-exynos.ll +++ b/llvm/test/CodeGen/AArch64/jump-table-exynos.ll @@ -1,5 +1,5 @@ -; RUN: llc -o - %s -mtriple=aarch64-none-linux-gnu -mattr=+force-32bit-jump-tables -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s -; RUN: llc -o - %s -mtriple=aarch64-none-linux-gnu -mcpu=exynos-m3 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s +; RUN: llc -o - %s -mtriple=aarch64-none-linux-gnu -mattr=+force-32bit-jump-tables -aarch64-min-jump-table-entries=4 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s +; RUN: llc -o - %s -mtriple=aarch64-none-linux-gnu -mcpu=exynos-m3 -aarch64-min-jump-table-entries=4 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s ; Exynos doesn't want jump tables to be compressed for now. diff --git a/llvm/test/CodeGen/AArch64/jump-table.ll b/llvm/test/CodeGen/AArch64/jump-table.ll index 6c32444657542..bb6c74b3b9df6 100644 --- a/llvm/test/CodeGen/AArch64/jump-table.ll +++ b/llvm/test/CodeGen/AArch64/jump-table.ll @@ -1,9 +1,9 @@ -; RUN: llc -no-integrated-as -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s -; RUN: llc -no-integrated-as -code-model=large -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-LARGE %s -; RUN: llc -no-integrated-as -code-model=large -relocation-model=pic -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-PIC %s -; RUN: llc -no-integrated-as -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -aarch64-enable-atomic-cfg-tidy=0 -o - %s | FileCheck --check-prefix=CHECK-PIC %s -; RUN: llc -no-integrated-as -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-IOS %s -; RUN: llc -no-integrated-as -code-model=tiny -verify-machineinstrs -o - %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-TINY %s +; RUN: llc -no-integrated-as -verify-machineinstrs -o - %s -aarch64-min-jump-table-entries=4 -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s +; RUN: llc -no-integrated-as -code-model=large -verify-machineinstrs -o - %s -aarch64-min-jump-table-entries=4 -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-LARGE %s +; RUN: llc -no-integrated-as -code-model=large -relocation-model=pic -o - %s -aarch64-min-jump-table-entries=4 -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-PIC %s +; RUN: llc -no-integrated-as -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -aarch64-min-jump-table-entries=4 -aarch64-enable-atomic-cfg-tidy=0 -o - %s | FileCheck --check-prefix=CHECK-PIC %s +; RUN: llc -no-integrated-as -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios -aarch64-min-jump-table-entries=4 -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-IOS %s +; RUN: llc -no-integrated-as -code-model=tiny -verify-machineinstrs -o - %s -aarch64-min-jump-table-entries=4 -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck --check-prefix=CHECK-TINY %s define i32 @test_jumptable(i32 %in) { ; CHECK: test_jumptable diff --git a/llvm/test/CodeGen/AArch64/max-jump-table.ll b/llvm/test/CodeGen/AArch64/max-jump-table.ll index d01924a9a5427..1268f5e6167ad 100644 --- a/llvm/test/CodeGen/AArch64/max-jump-table.ll +++ b/llvm/test/CodeGen/AArch64/max-jump-table.ll @@ -1,8 +1,8 @@ -; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK0 < %t -; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -max-jump-table-size=4 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK4 < %t -; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -max-jump-table-size=8 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK8 < %t -; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -max-jump-table-size=16 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK16 < %t -; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -mcpu=exynos-m3 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECKM3 < %t +; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -aarch64-min-jump-table-entries=4 -jump-table-density=40 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK0 < %t +; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -aarch64-min-jump-table-entries=4 -jump-table-density=40 -max-jump-table-size=4 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK4 < %t +; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -aarch64-min-jump-table-entries=4 -jump-table-density=40 -max-jump-table-size=8 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK8 < %t +; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -aarch64-min-jump-table-entries=4 -jump-table-density=40 -max-jump-table-size=16 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK16 < %t +; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -aarch64-min-jump-table-entries=4 -jump-table-density=40 -mcpu=exynos-m3 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECKM3 < %t declare void @ext(i32, i32) diff --git a/llvm/test/CodeGen/AArch64/min-jump-table.ll b/llvm/test/CodeGen/AArch64/min-jump-table.ll index bf6bddf7b9c4f..3b3f79746de0a 100644 --- a/llvm/test/CodeGen/AArch64/min-jump-table.ll +++ b/llvm/test/CodeGen/AArch64/min-jump-table.ll @@ -1,7 +1,9 @@ -; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -min-jump-table-entries=0 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK0 < %t -; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -min-jump-table-entries=2 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK2 < %t -; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -min-jump-table-entries=4 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK4 < %t -; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -min-jump-table-entries=8 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK8 < %t +; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -aarch64-min-jump-table-entries=0 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK0 < %t +; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -aarch64-min-jump-table-entries=2 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK2 < %t +; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -aarch64-min-jump-table-entries=4 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK4 < %t +; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -aarch64-min-jump-table-entries=8 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK8 < %t +; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -aarch64-min-jump-table-entries=12 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK12 < %t +; RUN: llc %s -O2 -print-after-isel -mtriple=aarch64-linux-gnu -jump-table-density=40 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK-DEFAULT < %t declare void @ext(i32, i32) @@ -16,6 +18,8 @@ entry: ; CHECK2-NEXT: Jump Tables: ; CHECK4-NOT: {{^}}Jump Tables: ; CHECK8-NOT: {{^}}Jump Tables: +; CHECK12-NOT: {{^}}Jump Tables: +; CHECK-DEFAULT-NOT: {{^}}Jump Tables: bb1: tail call void @ext(i32 1, i32 0) br label %return bb2: tail call void @ext(i32 2, i32 2) br label %return @@ -36,6 +40,8 @@ entry: ; CHECK2-NEXT: Jump Tables: ; CHECK4-NEXT: Jump Tables: ; CHECK8-NOT: {{^}}Jump Tables: +; CHECK12-NOT: {{^}}Jump Tables: +; CHECK-DEFAULT-NOT: {{^}}Jump Tables: bb1: tail call void @ext(i32 1, i32 0) br label %return bb2: tail call void @ext(i32 3, i32 2) br label %return @@ -58,6 +64,83 @@ entry: i32 9, label %bb8 ] ; CHECK-LABEL: function jt8: +; CHECK0-NEXT: Jump Tables: +; CHECK2-NEXT: Jump Tables: +; CHECK4-NEXT: Jump Tables: +; CHECK8-NEXT: Jump Tables: +; CHECK12-NOT: Jump Tables: +; CHECK-DEFAULT-NOT: {{^}}Jump Tables: + +bb1: tail call void @ext(i32 1, i32 0) br label %return +bb2: tail call void @ext(i32 2, i32 2) br label %return +bb3: tail call void @ext(i32 3, i32 4) br label %return +bb4: tail call void @ext(i32 4, i32 6) br label %return +bb5: tail call void @ext(i32 5, i32 8) br label %return +bb6: tail call void @ext(i32 6, i32 10) br label %return +bb7: tail call void @ext(i32 7, i32 12) br label %return +bb8: tail call void @ext(i32 8, i32 14) br label %return + +return: ret i32 %b +} + +define i32 @jt12(i32 %a, i32 %b) { +entry: + switch i32 %a, label %return [ + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + i32 4, label %bb4 + i32 5, label %bb5 + i32 6, label %bb6 + i32 7, label %bb7 + i32 8, label %bb8 + i32 9, label %bb9 + i32 10, label %bb10 + i32 11, label %bb11 + i32 12, label %bb12 + ] +; CHECK-LABEL: function jt12: +; CHECK0-NEXT: Jump Tables: +; CHECK2-NEXT: Jump Tables: +; CHECK4-NEXT: Jump Tables: +; CHECK8-NEXT: Jump Tables: +; CHECK12-NEXT: Jump Tables: +; CHECK-DEFAULT-NOT: {{^}}Jump Tables: + +bb1: tail call void @ext(i32 1, i32 0) br label %return +bb2: tail call void @ext(i32 2, i32 2) br label %return +bb3: tail call void @ext(i32 3, i32 4) br label %return +bb4: tail call void @ext(i32 4, i32 6) br label %return +bb5: tail call void @ext(i32 5, i32 8) br label %return +bb6: tail call void @ext(i32 6, i32 10) br label %return +bb7: tail call void @ext(i32 7, i32 12) br label %return +bb8: tail call void @ext(i32 8, i32 14) br label %return +bb9: tail call void @ext(i32 9, i32 16) br label %return +bb10: tail call void @ext(i32 10, i32 18) br label %return +bb11: tail call void @ext(i32 11, i32 20) br label %return +bb12: tail call void @ext(i32 12, i32 22) br label %return + +return: ret i32 %b +} + +define i32 @jt13(i32 %a, i32 %b) { +entry: + switch i32 %a, label %return [ + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + i32 4, label %bb4 + i32 5, label %bb5 + i32 6, label %bb6 + i32 7, label %bb7 + i32 8, label %bb8 + i32 9, label %bb9 + i32 10, label %bb10 + i32 11, label %bb11 + i32 12, label %bb12 + i32 13, label %bb13 + ] +; CHECK-LABEL: function jt13: ; CHECK-NEXT: Jump Tables: bb1: tail call void @ext(i32 1, i32 0) br label %return @@ -68,6 +151,11 @@ bb5: tail call void @ext(i32 5, i32 8) br label %return bb6: tail call void @ext(i32 6, i32 10) br label %return bb7: tail call void @ext(i32 7, i32 12) br label %return bb8: tail call void @ext(i32 8, i32 14) br label %return +bb9: tail call void @ext(i32 9, i32 16) br label %return +bb10: tail call void @ext(i32 10, i32 18) br label %return +bb11: tail call void @ext(i32 11, i32 20) br label %return +bb12: tail call void @ext(i32 12, i32 22) br label %return +bb13: tail call void @ext(i32 13, i32 24) br label %return return: ret i32 %b } diff --git a/llvm/test/CodeGen/AArch64/patchable-function-entry-bti.ll b/llvm/test/CodeGen/AArch64/patchable-function-entry-bti.ll index 15657730c2cdc..106f6bb856b6b 100644 --- a/llvm/test/CodeGen/AArch64/patchable-function-entry-bti.ll +++ b/llvm/test/CodeGen/AArch64/patchable-function-entry-bti.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s +; RUN: llc -mtriple=aarch64 -aarch64-min-jump-table-entries=4 %s -o - | FileCheck %s define void @f0() "patchable-function-entry"="0" "branch-target-enforcement"="true" { ; CHECK-LABEL: f0: diff --git a/llvm/test/CodeGen/AArch64/redundant-mov-from-zero-extend.ll b/llvm/test/CodeGen/AArch64/redundant-mov-from-zero-extend.ll index c150cb889313a..caa28b31a6f65 100644 --- a/llvm/test/CodeGen/AArch64/redundant-mov-from-zero-extend.ll +++ b/llvm/test/CodeGen/AArch64/redundant-mov-from-zero-extend.ll @@ -11,33 +11,37 @@ define i32 @test(i32 %input, i32 %n, i32 %a) { ; CHECK-NEXT: .LBB0_2: // %bb.0 ; CHECK-NEXT: add w8, w0, w1 ; CHECK-NEXT: mov w0, #100 // =0x64 -; CHECK-NEXT: cmp w8, #4 -; CHECK-NEXT: b.hi .LBB0_5 +; CHECK-NEXT: cmp w8, #1 +; CHECK-NEXT: b.le .LBB0_7 ; CHECK-NEXT: // %bb.3: // %bb.0 -; CHECK-NEXT: adrp x9, .LJTI0_0 -; CHECK-NEXT: add x9, x9, :lo12:.LJTI0_0 -; CHECK-NEXT: adr x10, .LBB0_4 -; CHECK-NEXT: ldrb w11, [x9, x8] -; CHECK-NEXT: add x10, x10, x11, lsl #2 -; CHECK-NEXT: br x10 -; CHECK-NEXT: .LBB0_4: // %sw.bb -; CHECK-NEXT: add w0, w2, #1 -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB0_5: // %bb.0 +; CHECK-NEXT: cmp w8, #2 +; CHECK-NEXT: b.eq .LBB0_10 +; CHECK-NEXT: // %bb.4: // %bb.0 +; CHECK-NEXT: cmp w8, #4 +; CHECK-NEXT: b.eq .LBB0_11 +; CHECK-NEXT: // %bb.5: // %bb.0 ; CHECK-NEXT: cmp w8, #200 -; CHECK-NEXT: b.ne .LBB0_9 +; CHECK-NEXT: b.ne .LBB0_12 ; CHECK-NEXT: // %bb.6: // %sw.bb7 ; CHECK-NEXT: add w0, w2, #7 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB0_7: // %sw.bb3 +; CHECK-NEXT: .LBB0_7: // %bb.0 +; CHECK-NEXT: cbz w8, .LBB0_13 +; CHECK-NEXT: // %bb.8: // %bb.0 +; CHECK-NEXT: cmp w8, #1 +; CHECK-NEXT: b.ne .LBB0_12 +; CHECK-NEXT: // %bb.9: // %sw.bb1 +; CHECK-NEXT: add w0, w2, #3 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_10: // %sw.bb3 ; CHECK-NEXT: add w0, w2, #4 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB0_8: // %sw.bb5 +; CHECK-NEXT: .LBB0_11: // %sw.bb5 ; CHECK-NEXT: add w0, w2, #5 -; CHECK-NEXT: .LBB0_9: // %return +; CHECK-NEXT: .LBB0_12: // %return ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB0_10: // %sw.bb1 -; CHECK-NEXT: add w0, w2, #3 +; CHECK-NEXT: .LBB0_13: // %sw.bb +; CHECK-NEXT: add w0, w2, #1 ; CHECK-NEXT: ret entry: %b = add nsw i32 %input, %n @@ -77,3 +81,4 @@ return: %retval.0 = phi i32 [ %add8, %sw.bb7 ], [ %add6, %sw.bb5 ], [ %add4, %sw.bb3 ], [ %add2, %sw.bb1 ], [ %add, %sw.bb ], [ 100, %bb.0 ], [ 0, %entry ] ret i32 %retval.0 } + diff --git a/llvm/test/CodeGen/AArch64/switch-unreachable-default.ll b/llvm/test/CodeGen/AArch64/switch-unreachable-default.ll index 9acc5a150b630..949485bf52f2e 100644 --- a/llvm/test/CodeGen/AArch64/switch-unreachable-default.ll +++ b/llvm/test/CodeGen/AArch64/switch-unreachable-default.ll @@ -1,4 +1,4 @@ -; RUN: llc -O3 -o - %s | FileCheck %s +; RUN: llc -O3 -aarch64-min-jump-table-entries=4 -o - %s | FileCheck %s ; Test that the output in the presence of an unreachable default does not have ; a compare and branch at the top of the switch to handle the default case. diff --git a/llvm/test/CodeGen/AArch64/win64-jumptable.ll b/llvm/test/CodeGen/AArch64/win64-jumptable.ll index 0b9b7deceae11..f9f2b0bf0ca5c 100644 --- a/llvm/test/CodeGen/AArch64/win64-jumptable.ll +++ b/llvm/test/CodeGen/AArch64/win64-jumptable.ll @@ -1,5 +1,5 @@ -; RUN: llc -o - %s -mtriple=aarch64-windows -aarch64-enable-compress-jump-tables=0 | FileCheck %s -; RUN: llc -o - %s -mtriple=aarch64-windows -aarch64-enable-compress-jump-tables=0 -filetype=obj | llvm-readobj --unwind - | FileCheck %s -check-prefix=UNWIND +; RUN: llc -o - %s -mtriple=aarch64-windows -aarch64-min-jump-table-entries=4 -aarch64-enable-compress-jump-tables=0 | FileCheck %s +; RUN: llc -o - %s -mtriple=aarch64-windows -aarch64-min-jump-table-entries=4 -aarch64-enable-compress-jump-tables=0 -filetype=obj | llvm-readobj --unwind - | FileCheck %s -check-prefix=UNWIND define void @f(i32 %x) { entry: diff --git a/llvm/test/CodeGen/AArch64/wineh-bti.ll b/llvm/test/CodeGen/AArch64/wineh-bti.ll index edf3699d52fd2..a73f4d219bc31 100644 --- a/llvm/test/CodeGen/AArch64/wineh-bti.ll +++ b/llvm/test/CodeGen/AArch64/wineh-bti.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=aarch64-windows | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-windows -aarch64-min-jump-table-entries=4 | FileCheck %s define dso_local i32 @func(i32 %in) { entry: diff --git a/llvm/test/CodeGen/Generic/machine-function-splitter.ll b/llvm/test/CodeGen/Generic/machine-function-splitter.ll index 364eadd64c755..a236f201eaafc 100644 --- a/llvm/test/CodeGen/Generic/machine-function-splitter.ll +++ b/llvm/test/CodeGen/Generic/machine-function-splitter.ll @@ -8,10 +8,10 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-split-ehcode | FileCheck %s -check-prefixes=MFS-EH-SPLIT,MFS-EH-SPLIT-X86 ; RUN: llc < %s -mtriple=x86_64 -split-machine-functions -O0 -mfs-psi-cutoff=0 -mfs-count-threshold=10000 | FileCheck %s -check-prefixes=MFS-O0,MFS-O0-X86 -; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions | FileCheck %s -check-prefixes=MFS-DEFAULTS,MFS-DEFAULTS-AARCH64 -; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefixes=MFS-OPTS1,MFS-OPTS1-AARCH64 -; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefixes=MFS-OPTS2,MFS-OPTS2-AARCH64 -; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-split-ehcode | FileCheck %s -check-prefixes=MFS-EH-SPLIT,MFS-EH-SPLIT-AARCH64 +; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -aarch64-min-jump-table-entries=4 -enable-split-machine-functions | FileCheck %s -check-prefixes=MFS-DEFAULTS,MFS-DEFAULTS-AARCH64 +; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -aarch64-min-jump-table-entries=4 -enable-split-machine-functions -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefixes=MFS-OPTS1,MFS-OPTS1-AARCH64 +; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -aarch64-min-jump-table-entries=4 -enable-split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefixes=MFS-OPTS2,MFS-OPTS2-AARCH64 +; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -aarch64-min-jump-table-entries=4 -enable-split-machine-functions -mfs-split-ehcode | FileCheck %s -check-prefixes=MFS-EH-SPLIT,MFS-EH-SPLIT-AARCH64 ; RUN: llc < %s -mtriple=aarch64 -enable-split-machine-functions -aarch64-redzone | FileCheck %s -check-prefixes=MFS-REDZONE-AARCH64 ; COM: Machine function splitting with AFDO profiles diff --git a/llvm/test/DebugInfo/COFF/jump-table.ll b/llvm/test/DebugInfo/COFF/jump-table.ll index a1f16f48962f9..4d16c78c97882 100644 --- a/llvm/test/DebugInfo/COFF/jump-table.ll +++ b/llvm/test/DebugInfo/COFF/jump-table.ll @@ -3,7 +3,7 @@ ; REQUIRES: x86-registered-target ; RUN: llc -mtriple=i686-windows < %s | FileCheck %s --check-prefixes=CHECK,I686,NOTA32 ; RUN: llc -mtriple=x86_64-windows < %s | FileCheck %s --check-prefixes=CHECK,X64,NOTA32 -; RUN: llc -mtriple=aarch64-windows < %s | FileCheck %s --check-prefixes=CHECK,A64,NOTA32 +; RUN: llc -mtriple=aarch64-windows -aarch64-min-jump-table-entries=4 < %s | FileCheck %s --check-prefixes=CHECK,A64,NOTA32 ; RUN: llc -mtriple=thumbv7a-windows < %s | FileCheck %s --check-prefixes=CHECK,A32 ; RUN: llc -mtriple=x86_64-windows -filetype=obj < %s | llvm-readobj - --codeview | FileCheck %s --check-prefixes=CV From bb2a9b78e459dc41a26c1847dc0fed238782bfd3 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Fri, 10 Nov 2023 14:32:55 +0000 Subject: [PATCH 2/5] Take minsize attribute into account I've changed getMinimumJumpTableEntries to now take a Function parameter so that we can query the minsize attribute and only increase the threshold if minsize is absent. --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 3 +- llvm/include/llvm/CodeGen/TargetLowering.h | 2 +- llvm/lib/CodeGen/SwitchLoweringUtils.cpp | 5 ++- llvm/lib/CodeGen/TargetLoweringBase.cpp | 3 +- .../Target/AArch64/AArch64ISelLowering.cpp | 11 ++++- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 + llvm/lib/Target/VE/VEISelLowering.cpp | 4 +- llvm/lib/Target/VE/VEISelLowering.h | 2 +- llvm/test/CodeGen/AArch64/min-jump-table.ll | 40 +++++++++++++++++++ 9 files changed, 62 insertions(+), 10 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 7a8f36da58cec..7e91f263c97ed 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -470,7 +470,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { // Check if suitable for a jump table. if (IsJTAllowed) { - if (N < 2 || N < TLI->getMinimumJumpTableEntries()) + if (N < 2 || + N < TLI->getMinimumJumpTableEntries(SI.getParent()->getParent())) return N; uint64_t Range = (MaxCaseVal - MinCaseVal) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index c87537291e3b1..48927f5cbf783 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1904,7 +1904,7 @@ class TargetLoweringBase { virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; } /// Return lower limit for number of blocks in a jump table. - virtual unsigned getMinimumJumpTableEntries() const; + virtual unsigned getMinimumJumpTableEntries(const Function *F) const; /// Return lower limit of the density in a jump table. unsigned getMinimumJumpTableDensity(bool OptForSize) const; diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp index b01a8bed0a394..165a9ec5af357 100644 --- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp +++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp @@ -58,10 +58,11 @@ void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters, #endif assert(TLI && "TLI not set!"); - if (!TLI->areJTsAllowed(SI->getParent()->getParent())) + const Function *F = SI->getParent()->getParent(); + if (!TLI->areJTsAllowed(F)) return; - const unsigned MinJumpTableEntries = TLI->getMinimumJumpTableEntries(); + const unsigned MinJumpTableEntries = TLI->getMinimumJumpTableEntries(F); const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2; // Bail if not enough cases. diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 722cefb1eddb3..31f48a2ef4771 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -2035,7 +2035,8 @@ Function *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const { return nullptr; } -unsigned TargetLoweringBase::getMinimumJumpTableEntries() const { +unsigned +TargetLoweringBase::getMinimumJumpTableEntries(const Function *F) const { return MinimumJumpTableEntries; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 002b95c68272f..f3f78dc064490 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1657,8 +1657,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); IsStrictFPEnabled = true; - - setMinimumJumpTableEntries(AArch64MinimumJumpTableEntries); } void AArch64TargetLowering::addTypeForNEON(MVT VT) { @@ -26550,3 +26548,12 @@ bool AArch64TargetLowering::preferScalarizeSplat(SDNode *N) const { } return true; } + +unsigned +AArch64TargetLowering::getMinimumJumpTableEntries(const Function *F) const { + if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 || + !F->hasMinSize()) + return AArch64MinimumJumpTableEntries; + + return TargetLoweringBase::getMinimumJumpTableEntries(F); +} diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 7332a95615a4d..efcb2a7b11a20 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -853,6 +853,8 @@ class AArch64TargetLowering : public TargetLowering { bool isComplexDeinterleavingOperationSupported( ComplexDeinterleavingOperation Operation, Type *Ty) const override; + unsigned getMinimumJumpTableEntries(const Function *F) const override; + Value *createComplexDeinterleavingIR( IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp index 0267aefd1e914..c2b1ae0294418 100644 --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -3085,12 +3085,12 @@ VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, // VE Target Optimization Support //===----------------------------------------------------------------------===// -unsigned VETargetLowering::getMinimumJumpTableEntries() const { +unsigned VETargetLowering::getMinimumJumpTableEntries(const Function *F) const { // Specify 8 for PIC model to relieve the impact of PIC load instructions. if (isJumpTableRelative()) return 8; - return TargetLowering::getMinimumJumpTableEntries(); + return TargetLowering::getMinimumJumpTableEntries(F); } bool VETargetLowering::hasAndNot(SDValue Y) const { diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h index 8b9412d786625..02f6cff782c47 100644 --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -326,7 +326,7 @@ class VETargetLowering : public TargetLowering { /// Target Optimization { // Return lower limit for number of blocks in a jump table. - unsigned getMinimumJumpTableEntries() const override; + unsigned getMinimumJumpTableEntries(const Function *F) const override; // SX-Aurora VE's s/udiv is 5-9 times slower than multiply. bool isIntDivCheap(EVT, AttributeList) const override { return false; } diff --git a/llvm/test/CodeGen/AArch64/min-jump-table.ll b/llvm/test/CodeGen/AArch64/min-jump-table.ll index 3b3f79746de0a..98b89210f5a05 100644 --- a/llvm/test/CodeGen/AArch64/min-jump-table.ll +++ b/llvm/test/CodeGen/AArch64/min-jump-table.ll @@ -123,6 +123,46 @@ bb12: tail call void @ext(i32 12, i32 22) br label %return return: ret i32 %b } +define i32 @jt12_min_size(i32 %a, i32 %b) minsize { +entry: + switch i32 %a, label %return [ + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + i32 4, label %bb4 + i32 5, label %bb5 + i32 6, label %bb6 + i32 7, label %bb7 + i32 8, label %bb8 + i32 9, label %bb9 + i32 10, label %bb10 + i32 11, label %bb11 + i32 12, label %bb12 + ] +; CHECK-LABEL: function jt12_min_size: +; CHECK0-NEXT: Jump Tables: +; CHECK2-NEXT: Jump Tables: +; CHECK4-NEXT: Jump Tables: +; CHECK8-NEXT: Jump Tables: +; CHECK12-NEXT: Jump Tables: +; CHECK-DEFAULT: Jump Tables: + +bb1: tail call void @ext(i32 1, i32 0) br label %return +bb2: tail call void @ext(i32 2, i32 2) br label %return +bb3: tail call void @ext(i32 3, i32 4) br label %return +bb4: tail call void @ext(i32 4, i32 6) br label %return +bb5: tail call void @ext(i32 5, i32 8) br label %return +bb6: tail call void @ext(i32 6, i32 10) br label %return +bb7: tail call void @ext(i32 7, i32 12) br label %return +bb8: tail call void @ext(i32 8, i32 14) br label %return +bb9: tail call void @ext(i32 9, i32 16) br label %return +bb10: tail call void @ext(i32 10, i32 18) br label %return +bb11: tail call void @ext(i32 11, i32 20) br label %return +bb12: tail call void @ext(i32 12, i32 22) br label %return + +return: ret i32 %b +} + define i32 @jt13(i32 %a, i32 %b) { entry: switch i32 %a, label %return [ From 5514ecd741c111517a2f746bb1ab414c4b9b6bec Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Mon, 13 Nov 2023 08:48:41 +0000 Subject: [PATCH 3/5] Let the subtarget decide the minimum at construction Instead of querying the function attributes every time, we can let the subtarget decide once at construction. --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 3 +-- llvm/include/llvm/CodeGen/TargetLowering.h | 2 +- llvm/lib/CodeGen/SwitchLoweringUtils.cpp | 5 ++--- llvm/lib/CodeGen/TargetLoweringBase.cpp | 3 +-- .../Target/AArch64/AArch64ISelLowering.cpp | 13 ++----------- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 4 ++-- llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 19 ++++++++++++++----- llvm/lib/Target/AArch64/AArch64Subtarget.h | 10 +++++++--- .../Target/AArch64/AArch64TargetMachine.cpp | 6 ++++-- llvm/lib/Target/VE/VEISelLowering.cpp | 4 ++-- llvm/lib/Target/VE/VEISelLowering.h | 2 +- 11 files changed, 37 insertions(+), 34 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 7e91f263c97ed..7a8f36da58cec 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -470,8 +470,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { // Check if suitable for a jump table. if (IsJTAllowed) { - if (N < 2 || - N < TLI->getMinimumJumpTableEntries(SI.getParent()->getParent())) + if (N < 2 || N < TLI->getMinimumJumpTableEntries()) return N; uint64_t Range = (MaxCaseVal - MinCaseVal) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 48927f5cbf783..c87537291e3b1 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1904,7 +1904,7 @@ class TargetLoweringBase { virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; } /// Return lower limit for number of blocks in a jump table. - virtual unsigned getMinimumJumpTableEntries(const Function *F) const; + virtual unsigned getMinimumJumpTableEntries() const; /// Return lower limit of the density in a jump table. unsigned getMinimumJumpTableDensity(bool OptForSize) const; diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp index 165a9ec5af357..b01a8bed0a394 100644 --- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp +++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp @@ -58,11 +58,10 @@ void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters, #endif assert(TLI && "TLI not set!"); - const Function *F = SI->getParent()->getParent(); - if (!TLI->areJTsAllowed(F)) + if (!TLI->areJTsAllowed(SI->getParent()->getParent())) return; - const unsigned MinJumpTableEntries = TLI->getMinimumJumpTableEntries(F); + const unsigned MinJumpTableEntries = TLI->getMinimumJumpTableEntries(); const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2; // Bail if not enough cases. diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 31f48a2ef4771..722cefb1eddb3 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -2035,8 +2035,7 @@ Function *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const { return nullptr; } -unsigned -TargetLoweringBase::getMinimumJumpTableEntries(const Function *F) const { +unsigned TargetLoweringBase::getMinimumJumpTableEntries() const { return MinimumJumpTableEntries; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index f3f78dc064490..5d56a32be4958 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -144,10 +144,6 @@ static cl::opt EnableExtToTBL("aarch64-enable-ext-to-tbl", cl::Hidden, static cl::opt MaxXors("aarch64-max-xors", cl::init(16), cl::Hidden, cl::desc("Maximum of xors")); -static cl::opt AArch64MinimumJumpTableEntries( - "aarch64-min-jump-table-entries", cl::init(13), cl::Hidden, - cl::desc("Set minimum number of entries to use a jump table on AArch64")); - /// Value type used for condition codes. static const MVT MVT_CC = MVT::i32; @@ -26549,11 +26545,6 @@ bool AArch64TargetLowering::preferScalarizeSplat(SDNode *N) const { return true; } -unsigned -AArch64TargetLowering::getMinimumJumpTableEntries(const Function *F) const { - if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 || - !F->hasMinSize()) - return AArch64MinimumJumpTableEntries; - - return TargetLoweringBase::getMinimumJumpTableEntries(F); +unsigned AArch64TargetLowering::getMinimumJumpTableEntries() const { + return Subtarget->getMinimumJumpTableEntries(); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index efcb2a7b11a20..f7d004fa3cbcc 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -853,8 +853,6 @@ class AArch64TargetLowering : public TargetLowering { bool isComplexDeinterleavingOperationSupported( ComplexDeinterleavingOperation Operation, Type *Ty) const override; - unsigned getMinimumJumpTableEntries(const Function *F) const override; - Value *createComplexDeinterleavingIR( IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, @@ -1252,6 +1250,8 @@ class AArch64TargetLowering : public TargetLowering { SDLoc DL, EVT VT) const; bool preferScalarizeSplat(SDNode *N) const override; + + unsigned getMinimumJumpTableEntries() const override; }; namespace AArch64 { diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index e3c3bff8e3298..e085124ad3bac 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -77,6 +77,10 @@ static cl::opt "to authenticated LR during tail call"), cl::values(AUTH_CHECK_METHOD_CL_VALUES_LR)); +static cl::opt AArch64MinimumJumpTableEntries( + "aarch64-min-jump-table-entries", cl::init(13), cl::Hidden, + cl::desc("Set minimum number of entries to use a jump table on AArch64")); + unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const { if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0) return OverrideVectorInsertExtractBaseCost; @@ -84,7 +88,7 @@ unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const { } AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies( - StringRef FS, StringRef CPUString, StringRef TuneCPUString) { + StringRef FS, StringRef CPUString, StringRef TuneCPUString, bool HasMinSize) { // Determine default and user-specified characteristics if (CPUString.empty()) @@ -94,12 +98,12 @@ AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies( TuneCPUString = CPUString; ParseSubtargetFeatures(CPUString, TuneCPUString, FS); - initializeProperties(); + initializeProperties(HasMinSize); return *this; } -void AArch64Subtarget::initializeProperties() { +void AArch64Subtarget::initializeProperties(bool HasMinSize) { // Initialize CPU specific properties. We should add a tablegen feature for // this in the future so we can specify it together with the subtarget // features. @@ -292,6 +296,10 @@ void AArch64Subtarget::initializeProperties() { MaxInterleaveFactor = 4; break; } + + if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 || + !HasMinSize) + MinimumJumpTableEntries = AArch64MinimumJumpTableEntries; } AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU, @@ -300,7 +308,8 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU, unsigned MinSVEVectorSizeInBitsOverride, unsigned MaxSVEVectorSizeInBitsOverride, bool StreamingSVEMode, - bool StreamingCompatibleSVEMode) + bool StreamingCompatibleSVEMode, + bool HasMinSize) : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS), ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()), ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()), @@ -310,7 +319,7 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU, StreamingCompatibleSVEMode(StreamingCompatibleSVEMode), MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride), MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT), - InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)), + InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)), TLInfo(TM, *this) { if (AArch64::isX18ReservedByDefault(TT)) ReserveXRegister.set(18); diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index b91c5c81ed4d2..a6c2f4f7fa28e 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -112,6 +112,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { Align PrefFunctionAlignment; Align PrefLoopAlignment; unsigned MaxBytesForLoopAlignment = 0; + unsigned MinimumJumpTableEntries = 4; unsigned MaxJumpTableSize = 0; // ReserveXRegister[i] - X#i is not available as a general purpose register. @@ -153,10 +154,11 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { /// subtarget initialization. AArch64Subtarget &initializeSubtargetDependencies(StringRef FS, StringRef CPUString, - StringRef TuneCPUString); + StringRef TuneCPUString, + bool HasMinSize); /// Initialize properties based on the selected processor family. - void initializeProperties(); + void initializeProperties(bool HasMinSize); public: /// This constructor initializes the data members to match that @@ -166,7 +168,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { unsigned MinSVEVectorSizeInBitsOverride = 0, unsigned MaxSVEVectorSizeInBitsOverride = 0, bool StreamingSVEMode = false, - bool StreamingCompatibleSVEMode = false); + bool StreamingCompatibleSVEMode = false, + bool HasMinSize = false); // Getters for SubtargetFeatures defined in tablegen #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ @@ -274,6 +277,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { } unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; } + unsigned getMinimumJumpTableEntries() const { return MinimumJumpTableEntries; } /// CPU has TBI (top byte of addresses is ignored during HW address /// translation) and OS enables it. diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 3d818c76bd4b7..c21bb247d24c9 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -397,6 +397,7 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const { StringRef CPU = CPUAttr.isValid() ? CPUAttr.getValueAsString() : TargetCPU; StringRef TuneCPU = TuneAttr.isValid() ? TuneAttr.getValueAsString() : CPU; StringRef FS = FSAttr.isValid() ? FSAttr.getValueAsString() : TargetFS; + bool HasMinSize = F.hasMinSize(); bool StreamingSVEMode = F.hasFnAttribute("aarch64_pstate_sm_enabled") || F.hasFnAttribute("aarch64_pstate_sm_body"); @@ -433,7 +434,7 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const { << "StreamingSVEMode=" << StreamingSVEMode << "StreamingCompatibleSVEMode=" << StreamingCompatibleSVEMode << CPU << TuneCPU - << FS; + << FS << "HasMinSize=" << HasMinSize; auto &I = SubtargetMap[Key]; if (!I) { @@ -443,7 +444,8 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const { resetTargetOptions(F); I = std::make_unique( TargetTriple, CPU, TuneCPU, FS, *this, isLittle, MinSVEVectorSize, - MaxSVEVectorSize, StreamingSVEMode, StreamingCompatibleSVEMode); + MaxSVEVectorSize, StreamingSVEMode, StreamingCompatibleSVEMode, + HasMinSize); } assert((!StreamingSVEMode || I->hasSME()) && diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp index c2b1ae0294418..0267aefd1e914 100644 --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -3085,12 +3085,12 @@ VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, // VE Target Optimization Support //===----------------------------------------------------------------------===// -unsigned VETargetLowering::getMinimumJumpTableEntries(const Function *F) const { +unsigned VETargetLowering::getMinimumJumpTableEntries() const { // Specify 8 for PIC model to relieve the impact of PIC load instructions. if (isJumpTableRelative()) return 8; - return TargetLowering::getMinimumJumpTableEntries(F); + return TargetLowering::getMinimumJumpTableEntries(); } bool VETargetLowering::hasAndNot(SDValue Y) const { diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h index 02f6cff782c47..8b9412d786625 100644 --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -326,7 +326,7 @@ class VETargetLowering : public TargetLowering { /// Target Optimization { // Return lower limit for number of blocks in a jump table. - unsigned getMinimumJumpTableEntries(const Function *F) const override; + unsigned getMinimumJumpTableEntries() const override; // SX-Aurora VE's s/udiv is 5-9 times slower than multiply. bool isIntDivCheap(EVT, AttributeList) const override { return false; } From 328d6800885fd9efc6975859094bddafb197cc9d Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Mon, 13 Nov 2023 09:35:12 +0000 Subject: [PATCH 4/5] Fix clang format --- llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index e085124ad3bac..216a96ea97f80 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -314,8 +314,7 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU, ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()), ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()), CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()), - IsLittle(LittleEndian), - StreamingSVEMode(StreamingSVEMode), + IsLittle(LittleEndian), StreamingSVEMode(StreamingSVEMode), StreamingCompatibleSVEMode(StreamingCompatibleSVEMode), MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride), MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT), From 6020ea9e3b90d1f15d6c210788ef4338309b7ba8 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Tue, 14 Nov 2023 11:40:26 +0000 Subject: [PATCH 5/5] Really fix the clang formatting issues --- llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 6 +++--- llvm/lib/Target/AArch64/AArch64Subtarget.h | 4 +++- llvm/lib/Target/AArch64/AArch64TargetMachine.cpp | 4 ++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 216a96ea97f80..ff14fcf5dfcc0 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -88,7 +88,8 @@ unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const { } AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies( - StringRef FS, StringRef CPUString, StringRef TuneCPUString, bool HasMinSize) { + StringRef FS, StringRef CPUString, StringRef TuneCPUString, + bool HasMinSize) { // Determine default and user-specified characteristics if (CPUString.empty()) @@ -297,8 +298,7 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) { break; } - if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 || - !HasMinSize) + if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 || !HasMinSize) MinimumJumpTableEntries = AArch64MinimumJumpTableEntries; } diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index a6c2f4f7fa28e..b2ee2e76d0e8e 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -277,7 +277,9 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { } unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; } - unsigned getMinimumJumpTableEntries() const { return MinimumJumpTableEntries; } + unsigned getMinimumJumpTableEntries() const { + return MinimumJumpTableEntries; + } /// CPU has TBI (top byte of addresses is ignored during HW address /// translation) and OS enables it. diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index c21bb247d24c9..d418a297218eb 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -433,8 +433,8 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const { << MaxSVEVectorSize << "StreamingSVEMode=" << StreamingSVEMode << "StreamingCompatibleSVEMode=" - << StreamingCompatibleSVEMode << CPU << TuneCPU - << FS << "HasMinSize=" << HasMinSize; + << StreamingCompatibleSVEMode << CPU << TuneCPU << FS + << "HasMinSize=" << HasMinSize; auto &I = SubtargetMap[Key]; if (!I) {