From 2302c76418ceaa5454a49fead7b8df508a56ada6 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Wed, 28 Dec 2022 11:33:39 -0800 Subject: [PATCH 1/2] [SYCL][NFCI] Move default optimization level configuration Right now the front-end compiler modifies default optimization level in SYCL device compilation mode (upstream clang uses O0, whereas DPC++ uses O2). As some of CodeGen module patches landed to upstream, this difference causes conflicts and regresions when we pull changes from the upstream to the `sycl` branch. Current optimization level configuration is modeled after OpenCL compiler, which is a JIT compiler, so it doesn't use clang Driver. For DPC++ compiler it's more reasonable to make this type of changes in the driver to avoid significant divergence in LIT tests. --- clang/lib/Driver/ToolChains/Clang.cpp | 4 ++++ clang/lib/Frontend/CompilerInvocation.cpp | 7 +++--- .../CodeGenSYCL/address-space-cond-op.cpp | 12 +++++----- .../check-direct-attribute-propagation.cpp | 2 +- clang/test/CodeGenSYCL/const-wg-init.cpp | 1 - .../CodeGenSYCL/functionptr-addrspace.cpp | 2 +- clang/test/CodeGenSYCL/group-local-memory.cpp | 2 +- clang/test/CodeGenSYCL/inline_asm.cpp | 2 +- clang/test/CodeGenSYCL/inlining.cpp | 4 +++- clang/test/CodeGenSYCL/intel-fpga-loops.cpp | 2 +- clang/test/CodeGenSYCL/max-concurrency.cpp | 8 +------ .../no-opaque-ptr-kernel_binding_decls.cpp | 8 +++---- .../no-opaque-ptrs-sycl-intelfpga-bitint.cpp | 14 +++++++---- .../no_opaque_address-space-cond-op.cpp | 24 +++++++++---------- ...que_check-direct-attribute-propagation.cpp | 2 +- .../CodeGenSYCL/no_opaque_const-wg-init.cpp | 1 - .../test/CodeGenSYCL/no_opaque_inline_asm.cpp | 11 ++++++--- .../CodeGenSYCL/no_opaque_max-concurrency.cpp | 2 +- clang/test/CodeGenSYCL/no_opaque_sampler.cpp | 2 -- .../no_opaque_stall_enable_device.cpp | 6 ++--- clang/test/CodeGenSYCL/no_opaque_stream.cpp | 5 ++-- .../remove-restriction-builtin-printf.cpp | 2 +- .../reqd-sub-group-size-spirv-intrin.cpp | 2 +- clang/test/CodeGenSYCL/sampler.cpp | 1 - clang/test/CodeGenSYCL/simplifycfg.cpp | 7 +++--- .../test/CodeGenSYCL/stall_enable_device.cpp | 6 ++--- clang/test/CodeGenSYCL/stream.cpp | 5 ++-- .../CodeGenSYCL/sycl-intelfpga-bitint.cpp | 14 +++++++---- 28 files changed, 84 insertions(+), 74 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index f621e25b62097..7ce72fdaabc4b 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4994,6 +4994,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-Wno-sycl-strict"); } + // Set O2 optimization level by default + if (!Args.getLastArg(options::OPT_O_Group)) + CmdArgs.push_back("-O2"); + // Add the integration header option to generate the header. StringRef Header(D.getIntegrationHeader(Input.getBaseInput())); if (!Header.empty()) { diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index a458a83ae9956..61401563cba5d 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -579,10 +579,9 @@ static bool FixupInvocation(CompilerInvocation &Invocation, static unsigned getOptimizationLevel(ArgList &Args, InputKind IK, DiagnosticsEngine &Diags) { unsigned DefaultOpt = llvm::CodeGenOpt::None; - if (((IK.getLanguage() == Language::OpenCL || - IK.getLanguage() == Language::OpenCLCXX) && - !Args.hasArg(OPT_cl_opt_disable)) || - Args.hasArg(OPT_fsycl_is_device)) + if ((IK.getLanguage() == Language::OpenCL || + IK.getLanguage() == Language::OpenCLCXX) && + !Args.hasArg(OPT_cl_opt_disable)) DefaultOpt = llvm::CodeGenOpt::Default; if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { diff --git a/clang/test/CodeGenSYCL/address-space-cond-op.cpp b/clang/test/CodeGenSYCL/address-space-cond-op.cpp index d3933664d75f5..3592e4810345c 100644 --- a/clang/test/CodeGenSYCL/address-space-cond-op.cpp +++ b/clang/test/CodeGenSYCL/address-space-cond-op.cpp @@ -5,27 +5,27 @@ struct S { unsigned short x; }; -// CHECK-LABEL: @_Z3foobR1SS_( +// CHECK-LABEL: define {{[^@]+}}@_Z3foobR1SS_( // CHECK: entry: // CHECK-NEXT: [[COND_ADDR:%.*]] = alloca i8, align 1 // CHECK-NEXT: [[LHS_ADDR:%.*]] = alloca ptr addrspace(4), align 8 // CHECK-NEXT: [[COND_ADDR_ASCAST:%.*]] = addrspacecast ptr [[COND_ADDR]] to ptr addrspace(4) // CHECK-NEXT: [[LHS_ADDR_ASCAST:%.*]] = addrspacecast ptr [[LHS_ADDR]] to ptr addrspace(4) // CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[COND:%.*]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL]], ptr addrspace(4) [[COND_ADDR_ASCAST]], align 1, [[TBAA12:!tbaa !.*]] -// CHECK-NEXT: store ptr addrspace(4) [[LHS:%.*]], ptr addrspace(4) [[LHS_ADDR_ASCAST]], align 8, [[TBAA5:!tbaa !.*]] +// CHECK-NEXT: store i8 [[FROMBOOL]], ptr addrspace(4) [[COND_ADDR_ASCAST]], align 1 +// CHECK-NEXT: store ptr addrspace(4) [[LHS:%.*]], ptr addrspace(4) [[LHS_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[RHS_ASCAST:%.*]] = addrspacecast ptr [[RHS:%.*]] to ptr addrspace(4) -// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[COND_ADDR_ASCAST]], align 1, [[TBAA12]], [[RNG14:!range !.*]] +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(4) [[COND_ADDR_ASCAST]], align 1 // CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK: cond.true: -// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[LHS_ADDR_ASCAST]], align 8, [[TBAA5]] +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[LHS_ADDR_ASCAST]], align 8 // CHECK-NEXT: br label [[COND_END:%.*]] // CHECK: cond.false: // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: // CHECK-NEXT: [[COND_LVALUE:%.*]] = phi ptr addrspace(4) [ [[TMP1]], [[COND_TRUE]] ], [ [[RHS_ASCAST]], [[COND_FALSE]] ] -// CHECK-NEXT: call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 2 %agg.result, ptr addrspace(4) align 2 [[COND_LVALUE]], i64 2, i1 false), !tbaa.struct !{{[0-9]+}} +// CHECK-NEXT: call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 2 [[AGG_RESULT:%.*]], ptr addrspace(4) align 2 [[COND_LVALUE]], i64 2, i1 false) // CHECK-NEXT: ret void // S foo(bool cond, S &lhs, S rhs) { diff --git a/clang/test/CodeGenSYCL/check-direct-attribute-propagation.cpp b/clang/test/CodeGenSYCL/check-direct-attribute-propagation.cpp index 305f611be5483..1a589e0b0e638 100644 --- a/clang/test/CodeGenSYCL/check-direct-attribute-propagation.cpp +++ b/clang/test/CodeGenSYCL/check-direct-attribute-propagation.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -O2 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s // Tests for IR of [[intel::scheduler_target_fmax_mhz()]], [[intel::num_simd_work_items()]], // [[intel::no_global_work_offset()]], [[intel::max_global_work_dim()]], [[sycl::reqd_sub_group_size()]], diff --git a/clang/test/CodeGenSYCL/const-wg-init.cpp b/clang/test/CodeGenSYCL/const-wg-init.cpp index 5f145a7267be8..082a132e79f44 100644 --- a/clang/test/CodeGenSYCL/const-wg-init.cpp +++ b/clang/test/CodeGenSYCL/const-wg-init.cpp @@ -15,6 +15,5 @@ int main() { const int WG_CONST = 10; }); // CHECK: store i32 10, ptr addrspace(4) addrspacecast (ptr addrspace(3) @{{.*}}WG_CONST{{.*}} to ptr addrspace(4)) - // CHECK: %{{[0-9]+}} = call ptr @llvm.invariant.start.p4(i64 4, ptr addrspace(4) addrspacecast (ptr addrspace(3) @{{.*}}WG_CONST{{.*}} to ptr addrspace(4))) return 0; } diff --git a/clang/test/CodeGenSYCL/functionptr-addrspace.cpp b/clang/test/CodeGenSYCL/functionptr-addrspace.cpp index 93a73e8445e9e..42494dc694a8a 100644 --- a/clang/test/CodeGenSYCL/functionptr-addrspace.cpp +++ b/clang/test/CodeGenSYCL/functionptr-addrspace.cpp @@ -7,7 +7,7 @@ __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { kernelFunc(); } -// CHECK: define dso_local spir_func{{.*}}invoke_function{{.*}}(ptr nocapture noundef %fptr, ptr addrspace(4) nocapture noundef %ptr) +// CHECK: define dso_local spir_func{{.*}}invoke_function{{.*}}(ptr noundef %fptr, ptr addrspace(4) noundef %ptr) void invoke_function(int (*fptr)(), int *ptr) {} int f() { return 0; } diff --git a/clang/test/CodeGenSYCL/group-local-memory.cpp b/clang/test/CodeGenSYCL/group-local-memory.cpp index ec7c3cb22b510..61129d92acba8 100644 --- a/clang/test/CodeGenSYCL/group-local-memory.cpp +++ b/clang/test/CodeGenSYCL/group-local-memory.cpp @@ -1,7 +1,7 @@ // Check that SYCLLowerWGLocalMemory pass is added to the SYCL device // compilation pipeline with the inliner pass (new Pass Manager). -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm \ +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm -O2 \ // RUN: -mdebug-pass Structure %s -o /dev/null 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHECK-INL,CHECK diff --git a/clang/test/CodeGenSYCL/inline_asm.cpp b/clang/test/CodeGenSYCL/inline_asm.cpp index ae7711b553c73..2dc191d686130 100644 --- a/clang/test/CodeGenSYCL/inline_asm.cpp +++ b/clang/test/CodeGenSYCL/inline_asm.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -opaque-pointers -emit-llvm -x c++ %s -o - | FileCheck %s +// RUN: %clang_cc1 -O1 -fsycl-is-device -triple spir64-unknown-unknown -opaque-pointers -emit-llvm -x c++ %s -o - | FileCheck %s class kernel; diff --git a/clang/test/CodeGenSYCL/inlining.cpp b/clang/test/CodeGenSYCL/inlining.cpp index 4cd63f1bc0812..d728d37d1a691 100644 --- a/clang/test/CodeGenSYCL/inlining.cpp +++ b/clang/test/CodeGenSYCL/inlining.cpp @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown %s -S -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -O1 -fsycl-is-device -triple spir64-unknown-unknown %s -S -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -O0 -fsycl-is-device -triple spir64-unknown-unknown %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-O0 template __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { @@ -6,6 +7,7 @@ __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { } int main() { + // CHECK-O0: noinline // CHECK-NOT: noinline kernel_single_task([]() {}); return 0; diff --git a/clang/test/CodeGenSYCL/intel-fpga-loops.cpp b/clang/test/CodeGenSYCL/intel-fpga-loops.cpp index b6522ab5b5890..80efd740cdf46 100644 --- a/clang/test/CodeGenSYCL/intel-fpga-loops.cpp +++ b/clang/test/CodeGenSYCL/intel-fpga-loops.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple spir64-unknown-unknown -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -O2 -triple spir64-unknown-unknown -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s // CHECK: br label %for.cond, !llvm.loop ![[MD_DLP:[0-9]+]] // CHECK: br label %for.cond, !llvm.loop ![[MD_II:[0-9]+]] diff --git a/clang/test/CodeGenSYCL/max-concurrency.cpp b/clang/test/CodeGenSYCL/max-concurrency.cpp index 5bc51f0465d05..89fe02982340e 100644 --- a/clang/test/CodeGenSYCL/max-concurrency.cpp +++ b/clang/test/CodeGenSYCL/max-concurrency.cpp @@ -15,7 +15,7 @@ // CHECK: %inc = add nsw i32 [[TMP2]], 1 // CHECK: store i32 %inc, ptr addrspace(4) %i.ascast, align 4 // CHECK: br label %for.cond, !llvm.loop ![[MD_MC:[0-9]+]] -// CHECK: store i32 %inc10, ptr addrspace(4) %i1.ascast, align 4 +// CHECK: store i32 %inc8, ptr addrspace(4) %i1.ascast, align 4 // CHECK: br label %for.cond2, !llvm.loop ![[MD_MC_1:[0-9]+]] // CHECK: ret void @@ -23,18 +23,14 @@ // CHECK: entry: // CHECK: [[F1:%.*]] = alloca [[CLASS_F1:%.*]], align 1 // CHECK: [[F1_ASCAST:%.*]] = addrspacecast ptr [[F1]] to ptr addrspace(4) -// CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr [[F1]]) // CHECK: call spir_func void @_ZNK8Functor1clEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) [[F1_ASCAST]]) -// CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr [[F1]]) // CHECK: ret void // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name4() [[ATTR0]] {{.*}} !max_concurrency ![[NUM1:[0-9]+]] // CHECK: entry // CHECK: [[F3:%.*]] = alloca [[CLASS_F3:%.*]], align 1 // CHECK: [[F3_ASCAST:%.*]] = addrspacecast ptr [[F3]] to ptr addrspace(4) -// CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr [[F3]]) // CHECK: call spir_func void @_ZNK8Functor3ILi4EEclEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) [[F3_ASCAST]]) -// CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr [[F3]] // CHECK: ret void // CHECK: define linkonce_odr spir_func void @_ZNK8Functor3ILi4EEclEv @@ -49,9 +45,7 @@ // CHECK: entry: // CHECK: [[H1:%.*]] = alloca [[H:%.*]], align 1 // CHECK: [[H2:%.*]] = addrspacecast ptr [[H1]] to ptr addrspace(4) -// CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr [[H1]]) // CHECK: call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) [[H2]]) -// CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr [[H1]]) // CHECK: ret void // CHECK: define {{.*}}spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv diff --git a/clang/test/CodeGenSYCL/no-opaque-ptr-kernel_binding_decls.cpp b/clang/test/CodeGenSYCL/no-opaque-ptr-kernel_binding_decls.cpp index a7d82d65a8d9e..d9d23606f0588 100644 --- a/clang/test/CodeGenSYCL/no-opaque-ptr-kernel_binding_decls.cpp +++ b/clang/test/CodeGenSYCL/no-opaque-ptr-kernel_binding_decls.cpp @@ -33,11 +33,11 @@ void foo() { // Store the int and the float into the struct created // CHECK: %x = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %__SYCLKernel{{.*}}, i32 0, i32 0 -// CHECK: %1 = load i32, i32 addrspace(4)* %_arg_x.addr -// CHECK: store i32 %1, i32 addrspace(4)* %x +// CHECK: %0 = load i32, i32 addrspace(4)* %_arg_x.addr +// CHECK: store i32 %0, i32 addrspace(4)* %x // CHECK: %f2 = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %__SYCLKernel{{.*}}, i32 0, i32 1 -// CHECK: %2 = load float, float addrspace(4)* %_arg_f2.addr -// CHECK: store float %2, float addrspace(4)* %f2 +// CHECK: %1 = load float, float addrspace(4)* %_arg_f2.addr +// CHECK: store float %1, float addrspace(4)* %f2 // Call the lambda // CHECK: call spir_func void @{{.*}}foo{{.*}}(%class.anon addrspace(4)* {{.*}} %__SYCLKernel{{.*}}) diff --git a/clang/test/CodeGenSYCL/no-opaque-ptrs-sycl-intelfpga-bitint.cpp b/clang/test/CodeGenSYCL/no-opaque-ptrs-sycl-intelfpga-bitint.cpp index e132278a542a3..cb8ef1b8bfd8d 100644 --- a/clang/test/CodeGenSYCL/no-opaque-ptrs-sycl-intelfpga-bitint.cpp +++ b/clang/test/CodeGenSYCL/no-opaque-ptrs-sycl-intelfpga-bitint.cpp @@ -9,10 +9,16 @@ // CHECK: define{{.*}} void @_Z3fooDB4096_S_(i4096 addrspace(4)* {{.*}} sret(i4096) align 8 %agg.result, i4096* {{.*}} byval(i4096) align 8 %[[ARG1:[0-9]+]], i4096* {{.*}} byval(i4096) align 8 %[[ARG2:[0-9]+]]) signed _BitInt(4096) foo(signed _BitInt(4096) a, signed _BitInt(4096) b) { - // CHECK: %[[VAR_A:a]] = load i4096, i4096* %[[ARG1]], align 8 - // CHECK: %[[VAR_B:b]] = load i4096, i4096* %[[ARG2]], align 8 - // CHECK: %[[RES:div]] = sdiv i4096 %[[VAR_A]], %[[VAR_B]] - // CHECK: store i4096 %[[RES]], i4096 addrspace(4)* %agg.result, align 8 + // CHECK: %a.addr.ascast = addrspacecast i4096* %a.addr to i4096 addrspace(4)* + // CHECK: %b.addr.ascast = addrspacecast i4096* %b.addr to i4096 addrspace(4)* + // CHECK: %a = load i4096, i4096* %[[ARG1]], align 8 + // CHECK: %b = load i4096, i4096* %[[ARG2]], align 8 + // CHECK: store i4096 %a, i4096 addrspace(4)* %a.addr.ascast, align 8 + // CHECK: store i4096 %b, i4096 addrspace(4)* %b.addr.ascast, align 8 + // CHECK: %2 = load i4096, i4096 addrspace(4)* %a.addr.ascast, align 8 + // CHECK: %3 = load i4096, i4096 addrspace(4)* %b.addr.ascast, align 8 + // CHECK: %div = sdiv i4096 %2, %3 + // CHECK: store i4096 %div, i4096 addrspace(4)* %agg.result, align 8 // CHECK: ret void return a / b; } diff --git a/clang/test/CodeGenSYCL/no_opaque_address-space-cond-op.cpp b/clang/test/CodeGenSYCL/no_opaque_address-space-cond-op.cpp index 88e74268fe402..bee4bc44626cf 100644 --- a/clang/test/CodeGenSYCL/no_opaque_address-space-cond-op.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_address-space-cond-op.cpp @@ -5,29 +5,29 @@ struct S { unsigned short x; }; -// CHECK-LABEL: @_Z3foobR1SS_( +// CHECK-LABEL: define {{[^@]+}}@_Z3foobR1SS_( // CHECK: entry: // CHECK-NEXT: [[COND_ADDR:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[LHS_ADDR:%.*]] = alloca [[STRUCT__ZTS1S_S:%.*]] addrspace(4)*, align 8 +// CHECK-NEXT: [[LHS_ADDR:%.*]] = alloca [[STRUCT_S:%.*]] addrspace(4)*, align 8 // CHECK-NEXT: [[COND_ADDR_ASCAST:%.*]] = addrspacecast i8* [[COND_ADDR]] to i8 addrspace(4)* -// CHECK-NEXT: [[LHS_ADDR_ASCAST:%.*]] = addrspacecast [[STRUCT__ZTS1S_S]] addrspace(4)** [[LHS_ADDR]] to [[STRUCT__ZTS1S_S]] addrspace(4)* addrspace(4)* +// CHECK-NEXT: [[LHS_ADDR_ASCAST:%.*]] = addrspacecast [[STRUCT_S]] addrspace(4)** [[LHS_ADDR]] to [[STRUCT_S]] addrspace(4)* addrspace(4)* // CHECK-NEXT: [[FROMBOOL:%.*]] = zext i1 [[COND:%.*]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL]], i8 addrspace(4)* [[COND_ADDR_ASCAST]], align 1, [[TBAA12:!tbaa !.*]] -// CHECK-NEXT: store [[STRUCT__ZTS1S_S]] addrspace(4)* [[LHS:%.*]], [[STRUCT__ZTS1S_S]] addrspace(4)* addrspace(4)* [[LHS_ADDR_ASCAST]], align 8, [[TBAA5:!tbaa !.*]] -// CHECK-NEXT: [[RHS_ASCAST:%.*]] = addrspacecast %struct.S* [[RHS:%.*]] to [[STRUCT__ZTS1S_S]] addrspace(4)* -// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8 addrspace(4)* [[COND_ADDR_ASCAST]], align 1, [[TBAA12]], [[RNG14:!range !.*]] +// CHECK-NEXT: store i8 [[FROMBOOL]], i8 addrspace(4)* [[COND_ADDR_ASCAST]], align 1 +// CHECK-NEXT: store [[STRUCT_S]] addrspace(4)* [[LHS:%.*]], [[STRUCT_S]] addrspace(4)* addrspace(4)* [[LHS_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[RHS_ASCAST:%.*]] = addrspacecast %struct.S* [[RHS:%.*]] to [[STRUCT_S]] addrspace(4)* +// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8 addrspace(4)* [[COND_ADDR_ASCAST]], align 1 // CHECK-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK: cond.true: -// CHECK-NEXT: [[TMP1:%.*]] = load [[STRUCT__ZTS1S_S]] addrspace(4)*, [[STRUCT__ZTS1S_S]] addrspace(4)* addrspace(4)* [[LHS_ADDR_ASCAST]], align 8, [[TBAA5]] +// CHECK-NEXT: [[TMP1:%.*]] = load [[STRUCT_S]] addrspace(4)*, [[STRUCT_S]] addrspace(4)* addrspace(4)* [[LHS_ADDR_ASCAST]], align 8 // CHECK-NEXT: br label [[COND_END:%.*]] // CHECK: cond.false: // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: -// CHECK-NEXT: [[COND_LVALUE:%.*]] = phi [[STRUCT__ZTS1S_S]] addrspace(4)* [ [[TMP1]], [[COND_TRUE]] ], [ [[RHS_ASCAST]], [[COND_FALSE]] ] -// CHECK-NEXT: [[TMP2:%.*]] = bitcast [[STRUCT__ZTS1S_S]] addrspace(4)* [[AGG_RESULT:%.*]] to i8 addrspace(4)* -// CHECK-NEXT: [[TMP3:%.*]] = bitcast [[STRUCT__ZTS1S_S]] addrspace(4)* [[COND_LVALUE]] to i8 addrspace(4)* -// CHECK-NEXT: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 2 [[TMP2]], i8 addrspace(4)* align 2 [[TMP3]], i64 2, i1 false), !tbaa.struct !{{[0-9]+}} +// CHECK-NEXT: [[COND_LVALUE:%.*]] = phi [[STRUCT_S]] addrspace(4)* [ [[TMP1]], [[COND_TRUE]] ], [ [[RHS_ASCAST]], [[COND_FALSE]] ] +// CHECK-NEXT: [[TMP2:%.*]] = bitcast [[STRUCT_S]] addrspace(4)* [[AGG_RESULT:%.*]] to i8 addrspace(4)* +// CHECK-NEXT: [[TMP3:%.*]] = bitcast [[STRUCT_S]] addrspace(4)* [[COND_LVALUE]] to i8 addrspace(4)* +// CHECK-NEXT: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 2 [[TMP2]], i8 addrspace(4)* align 2 [[TMP3]], i64 2, i1 false) // CHECK-NEXT: ret void // S foo(bool cond, S &lhs, S rhs) { diff --git a/clang/test/CodeGenSYCL/no_opaque_check-direct-attribute-propagation.cpp b/clang/test/CodeGenSYCL/no_opaque_check-direct-attribute-propagation.cpp index 40372d327501f..3b8812fbe8d9a 100644 --- a/clang/test/CodeGenSYCL/no_opaque_check-direct-attribute-propagation.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_check-direct-attribute-propagation.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -O2 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s // Tests for IR of [[intel::scheduler_target_fmax_mhz()]], [[intel::num_simd_work_items()]], // [[intel::no_global_work_offset()]], [[intel::max_global_work_dim()]], [[sycl::reqd_sub_group_size()]], diff --git a/clang/test/CodeGenSYCL/no_opaque_const-wg-init.cpp b/clang/test/CodeGenSYCL/no_opaque_const-wg-init.cpp index b222876cc835c..8ff1a9a01e9bc 100644 --- a/clang/test/CodeGenSYCL/no_opaque_const-wg-init.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_const-wg-init.cpp @@ -15,7 +15,6 @@ int main() { const int WG_CONST = 10; }); // CHECK: store i32 10, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @{{.*}}WG_CONST{{.*}} to i32 addrspace(4)*) -// CHECK: %{{[0-9]+}} = call {}* @llvm.invariant.start.p4i8(i64 4, i8 addrspace(4)* addrspacecast (i8 addrspace(3)* bitcast (i32 addrspace(3)* @{{.*}}WG_CONST{{.*}} to i8 addrspace(3)*) to i8 addrspace(4)*)) return 0; } diff --git a/clang/test/CodeGenSYCL/no_opaque_inline_asm.cpp b/clang/test/CodeGenSYCL/no_opaque_inline_asm.cpp index 092fa56ee62d8..4b7c70306bf95 100644 --- a/clang/test/CodeGenSYCL/no_opaque_inline_asm.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_inline_asm.cpp @@ -5,15 +5,20 @@ class kernel; template __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { // CHECK: %[[ARRAY_A:[0-9a-z]+]] = alloca [100 x i32], align 4 - // CHECK: %[[IDX:.*]] = getelementptr inbounds [100 x i32], [100 x i32]* %[[ARRAY_A]], i64 0, i64 0 - // CHECK: %[[IDX4:.*]] = addrspacecast i32* %[[IDX]] to i32 addrspace(4)* + // CHECK: %[[I:[0-9a-z]+]] = alloca i32, align 4 + // CHECK: %[[ARRAY_A]].ascast = addrspacecast [100 x i32]* %[[ARRAY_A]] to [100 x i32] addrspace(4)* + // CHECK: %[[I]].ascast = addrspacecast i32* %[[I]] to i32 addrspace(4)* + // CHECK: store i32 0, i32 addrspace(4)* %[[I]].ascast, align 4 + // CHECK: %0 = load i32, i32 addrspace(4)* %[[I]].ascast, align 4 + // CHECK: %[[IDXPROM:[0-9a-z]+]] = sext i32 %0 to i64 + // CHECK: %[[IDX:.*]] = getelementptr inbounds [100 x i32], [100 x i32] addrspace(4)* %[[ARRAY_A]].ascast, i64 0, i64 %[[IDXPROM]] int a[100], i = 0; // CHECK-NEXT: call void asm sideeffect // CHECK: ".decl V52 v_type=G type=d num_elts=16 align=GRF // CHECK: svm_gather.4.1 (M1, 16) $0.0 V52.0 // CHECK: add(M1, 16) V52(0, 0)<1> V52(0, 0)<1; 1, 0> 0x1 // CHECK: svm_scatter.4.1 (M1, 16) $0.0 V52.0", - // CHECK: "rw"(i32 addrspace(4)* %[[IDX4]]) + // CHECK: "rw"(i32 addrspace(4)* %[[IDX]]) // TODO: nonnull attribute missing? asm volatile(".decl V52 v_type=G type=d num_elts=16 align=GRF\n" "svm_gather.4.1 (M1, 16) %0.0 V52.0\n" diff --git a/clang/test/CodeGenSYCL/no_opaque_max-concurrency.cpp b/clang/test/CodeGenSYCL/no_opaque_max-concurrency.cpp index b71fdd8b676f9..a04db80f9f520 100644 --- a/clang/test/CodeGenSYCL/no_opaque_max-concurrency.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_max-concurrency.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -O2 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s #include "sycl.hpp" diff --git a/clang/test/CodeGenSYCL/no_opaque_sampler.cpp b/clang/test/CodeGenSYCL/no_opaque_sampler.cpp index 6165a54593cd6..ba0d9d37a8487 100644 --- a/clang/test/CodeGenSYCL/no_opaque_sampler.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_sampler.cpp @@ -5,8 +5,6 @@ // CHECK: [[ANON:%[a-zA-Z0-9_]+]] = alloca %class.anon, align 8 // CHECK: [[ANONCAST:%[a-zA-Z0-9_.]+]] = addrspacecast %class.anon* [[ANON]] to %class.anon addrspace(4)* // CHECK: store %opencl.sampler_t addrspace(2)* [[SAMPLER_ARG]], %opencl.sampler_t addrspace(2)* addrspace(4)* [[SAMPLER_ARG]].addr.ascast, align 8 -// CHECK-NEXT: [[BITCAST:%[0-9]+]] = bitcast %class.anon* [[ANON]] to i8* -// CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[BITCAST]]) #4 // CHECK-NEXT: [[GEP:%[a-zA-z0-9]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* [[ANONCAST]], i32 0, i32 0 // CHECK-NEXT: [[LOAD_SAMPLER_ARG:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)* addrspace(4)* [[SAMPLER_ARG]].addr.ascast, align 8 // CHECK-NEXT: call spir_func void @{{[a-zA-Z0-9_]+}}(%"class.sycl::_V1::sampler" addrspace(4)* {{[^,]*}} [[GEP]], %opencl.sampler_t addrspace(2)* [[LOAD_SAMPLER_ARG]]) diff --git a/clang/test/CodeGenSYCL/no_opaque_stall_enable_device.cpp b/clang/test/CodeGenSYCL/no_opaque_stall_enable_device.cpp index 242b2abc6852e..8d3a426977288 100644 --- a/clang/test/CodeGenSYCL/no_opaque_stall_enable_device.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_stall_enable_device.cpp @@ -26,12 +26,12 @@ class Foo { int main() { q.submit([&](handler &h) { // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel1() {{.*}} !stall_enable ![[NUM4:[0-9]+]] - // CHECK: define {{.*}}spir_func void @{{.*}}FuncObjclEv(%struct.{{.*}}FuncObj addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2{{.*}} !stall_enable ![[NUM4]] + // CHECK: define {{.*}}spir_func void @{{.*}}FuncObjclEv(%struct.{{.*}}FuncObj addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #2 comdat align 2{{.*}} !stall_enable ![[NUM4]] h.single_task( FuncObj()); // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel2() {{.*}} !stall_enable ![[NUM4]] - // CHECK define {{.*}}spir_func void @{{.*}}FooclEv(%class._ZTS3Foo.Foo addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2{{.*}} !stall_enable ![[NUM4]] + // CHECK define {{.*}}spir_func void @{{.*}}FooclEv(%class._ZTS3Foo.Foo addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #2 comdat align 2{{.*}} !stall_enable ![[NUM4]] Foo f; h.single_task(f); @@ -47,7 +47,7 @@ int main() { // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel4() // CHECK-NOT: !stall_enable // CHECK-SAME: { - // CHECK: define {{.*}}spir_func void @{{.*}}func1{{.*}}(%class.anon{{.*}} addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #4 align 2{{.*}} !stall_enable ![[NUM4]] + // CHECK: define {{.*}}spir_func void @{{.*}}func1{{.*}}(%class.anon{{.*}} addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #2 align 2{{.*}} !stall_enable ![[NUM4]] h.single_task( []() { func1(); }); diff --git a/clang/test/CodeGenSYCL/no_opaque_stream.cpp b/clang/test/CodeGenSYCL/no_opaque_stream.cpp index 4bd88c3d5fcfb..d4e1c981c3f07 100644 --- a/clang/test/CodeGenSYCL/no_opaque_stream.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_stream.cpp @@ -13,8 +13,9 @@ // Alloca and addrspace casts for kernel parameters // CHECK: [[ARG:%[a-zA-Z0-9_]+]].addr = alloca i8 addrspace(1)*, align 8 -// CHECK: [[ARG:%[a-zA-Z0-9_]+]].addr.ascast = addrspacecast i8 addrspace(1)** [[ARG]].addr to i8 addrspace(1)* addrspace(4)* -// CHECK: [[ARG_LOAD:%[a-zA-Z0-9_]+]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[ARG]].addr.ascast, align 8, +// CHECK: [[ARG]].addr.ascast = addrspacecast i8 addrspace(1)** [[ARG]].addr to i8 addrspace(1)* addrspace(4)* +// CHECK: store i8 addrspace(1)* [[ARG]], i8 addrspace(1)* addrspace(4)* [[ARG]].addr.ascast, align 8 +// CHECK: [[ARG_LOAD:%[a-zA-Z0-9_]+]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[ARG]].addr.ascast, align 8 // Check __init and __finalize method calls // CHECK: call spir_func void @{{.*}}__init{{.*}}(%{{.*}}sycl::_V1::stream" addrspace(4)* noundef align 4 dereferenceable_or_null(16) %{{[a-zA-Z0-9_]+}}, i8 addrspace(1)* noundef [[ARG_LOAD]], %[[RANGE_TYPE]]* noundef byval(%[[RANGE_TYPE]]) {{.*}}%{{.*}} diff --git a/clang/test/CodeGenSYCL/remove-restriction-builtin-printf.cpp b/clang/test/CodeGenSYCL/remove-restriction-builtin-printf.cpp index 307def146b3bf..43bef7382c8ea 100644 --- a/clang/test/CodeGenSYCL/remove-restriction-builtin-printf.cpp +++ b/clang/test/CodeGenSYCL/remove-restriction-builtin-printf.cpp @@ -10,7 +10,7 @@ int main() { q.submit([&](handler &h) { // CHECK: define {{.*}}spir_kernel {{.*}} h.single_task([=]() { - // CHECK: call {{.*}}printf(ptr noundef nonnull dereferenceable(1) @{{.*}}, i32 noundef 24) + // CHECK: call {{.*}}printf(ptr noundef @{{.*}}, i32 noundef 24) __builtin_printf("hello, %d\n", 24); }); }); diff --git a/clang/test/CodeGenSYCL/reqd-sub-group-size-spirv-intrin.cpp b/clang/test/CodeGenSYCL/reqd-sub-group-size-spirv-intrin.cpp index 8b3c981c7f4b3..fe1c242fb7bcd 100644 --- a/clang/test/CodeGenSYCL/reqd-sub-group-size-spirv-intrin.cpp +++ b/clang/test/CodeGenSYCL/reqd-sub-group-size-spirv-intrin.cpp @@ -21,7 +21,7 @@ int main() { } // CHECK: define dso_local spir_kernel void @{{.*}}main{{.*}}kernel_class() {{.*}} !intel_reqd_sub_group_size ![[SUBGROUPSIZE:[0-9]+]] -// CHECK: tail call spir_func void @{{.*}}__spirv_ControlBarrier{{.*}}({{.*}}) +// CHECK: call spir_func void @{{.*}}__spirv_ControlBarrier{{.*}}({{.*}}) // CHECK: declare spir_func void @{{.*}}__spirv_ControlBarrier{{.*}}({{.*}}) diff --git a/clang/test/CodeGenSYCL/sampler.cpp b/clang/test/CodeGenSYCL/sampler.cpp index 9fe2336fbcdd2..af2f74f453e06 100644 --- a/clang/test/CodeGenSYCL/sampler.cpp +++ b/clang/test/CodeGenSYCL/sampler.cpp @@ -5,7 +5,6 @@ // CHECK: [[ANON:%[a-zA-Z0-9_]+]] = alloca %class.anon, align 8 // CHECK: [[ANONCAST:%[a-zA-Z0-9_.]+]] = addrspacecast ptr [[ANON]] to ptr addrspace(4) // CHECK: store ptr addrspace(2) [[SAMPLER_ARG]], ptr addrspace(4) [[SAMPLER_ARG]].addr.ascast, align 8 -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[ANON]]) #4 // CHECK-NEXT: [[GEP:%[a-zA-z0-9]+]] = getelementptr inbounds %class.anon, ptr addrspace(4) [[ANONCAST]], i32 0, i32 0 // CHECK-NEXT: [[LOAD_SAMPLER_ARG:%[0-9]+]] = load ptr addrspace(2), ptr addrspace(4) [[SAMPLER_ARG]].addr.ascast, align 8 // CHECK-NEXT: call spir_func void @{{[a-zA-Z0-9_]+}}(ptr addrspace(4) {{[^,]*}} [[GEP]], ptr addrspace(2) [[LOAD_SAMPLER_ARG]]) diff --git a/clang/test/CodeGenSYCL/simplifycfg.cpp b/clang/test/CodeGenSYCL/simplifycfg.cpp index 723a3ad04b86d..8cee1cc95b4f5 100644 --- a/clang/test/CodeGenSYCL/simplifycfg.cpp +++ b/clang/test/CodeGenSYCL/simplifycfg.cpp @@ -1,5 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -mllvm -sycl-opt %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -mllvm -sycl-opt %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -mllvm -sycl-opt %s -emit-llvm -O3 -o - | FileCheck %s // // This test checks that foo (which is @_Z3foov) is called twice after O3 optimizations. // @@ -15,8 +14,8 @@ // There is a relevant discussion about introducing // a reliable tool for such cases: https://reviews.llvm.org/D85603 -// CHECK: tail call spir_func void @_Z3foov() -// CHECK: tail call spir_func void @_Z3foov() +// CHECK: call spir_func void @_Z3foov() +// CHECK: call spir_func void @_Z3foov() SYCL_EXTERNAL void foo(); diff --git a/clang/test/CodeGenSYCL/stall_enable_device.cpp b/clang/test/CodeGenSYCL/stall_enable_device.cpp index 83ef246348b37..1cdfd4ee4c189 100644 --- a/clang/test/CodeGenSYCL/stall_enable_device.cpp +++ b/clang/test/CodeGenSYCL/stall_enable_device.cpp @@ -26,12 +26,12 @@ class Foo { int main() { q.submit([&](handler &h) { // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel1() {{.*}} !stall_enable ![[NUM4:[0-9]+]] - // CHECK: define {{.*}}spir_func void @{{.*}}FuncObjclEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2{{.*}} !stall_enable ![[NUM4]] + // CHECK: define {{.*}}spir_func void @{{.*}}FuncObjclEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) %this) #2 comdat align 2{{.*}} !stall_enable ![[NUM4]] h.single_task( FuncObj()); // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel2() {{.*}} !stall_enable ![[NUM4]] - // CHECK define {{.*}}spir_func void @{{.*}}FooclEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2{{.*}} !stall_enable ![[NUM4]] + // CHECK define {{.*}}spir_func void @{{.*}}FooclEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) %this) #2 comdat align 2{{.*}} !stall_enable ![[NUM4]] Foo f; h.single_task(f); @@ -47,7 +47,7 @@ int main() { // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel4() // CHECK-NOT: !stall_enable // CHECK-SAME: { - // CHECK: define {{.*}}spir_func void @{{.*}}func1{{.*}}(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) %this) #4 align 2{{.*}} !stall_enable ![[NUM4]] + // CHECK: define {{.*}}spir_func void @{{.*}}func1{{.*}}(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) %this) #2 align 2{{.*}} !stall_enable ![[NUM4]] h.single_task( []() { func1(); }); diff --git a/clang/test/CodeGenSYCL/stream.cpp b/clang/test/CodeGenSYCL/stream.cpp index 87682fa054f8e..49d3fc94c2e51 100644 --- a/clang/test/CodeGenSYCL/stream.cpp +++ b/clang/test/CodeGenSYCL/stream.cpp @@ -13,8 +13,9 @@ // Alloca and addrspace casts for kernel parameters // CHECK: [[ARG:%[a-zA-Z0-9_]+]].addr = alloca ptr addrspace(1), align 8 -// CHECK: [[ARG:%[a-zA-Z0-9_]+]].addr.ascast = addrspacecast ptr [[ARG]].addr to ptr addrspace(4) -// CHECK: [[ARG_LOAD:%[a-zA-Z0-9_]+]] = load ptr addrspace(1), ptr addrspace(4) [[ARG]].addr.ascast, align 8, +// CHECK: [[ARG]].addr.ascast = addrspacecast ptr [[ARG]].addr to ptr addrspace(4) +// CHECK: store ptr addrspace(1) [[ARG]], ptr addrspace(4) [[ARG]].addr.ascast, align 8 +// CHECK: [[ARG_LOAD:%[a-zA-Z0-9_]+]] = load ptr addrspace(1), ptr addrspace(4) [[ARG]].addr.ascast, align 8 // Check __init and __finalize method calls // CHECK: call spir_func void @{{.*}}__init{{.*}}(ptr addrspace(4) noundef align 4 dereferenceable_or_null(16) %{{[a-zA-Z0-9_]+}}, ptr addrspace(1) noundef [[ARG_LOAD]], ptr noundef byval(%[[RANGE_TYPE]]) {{.*}}%{{.*}} diff --git a/clang/test/CodeGenSYCL/sycl-intelfpga-bitint.cpp b/clang/test/CodeGenSYCL/sycl-intelfpga-bitint.cpp index ed428f01d600a..23951fb335f5a 100644 --- a/clang/test/CodeGenSYCL/sycl-intelfpga-bitint.cpp +++ b/clang/test/CodeGenSYCL/sycl-intelfpga-bitint.cpp @@ -9,11 +9,15 @@ // CHECK: define{{.*}} void @_Z3fooDB4096_S_(ptr addrspace(4) {{.*}} sret(i4096) align 8 %agg.result, ptr {{.*}} byval(i4096) align 8 %[[ARG1:[0-9]+]], ptr {{.*}} byval(i4096) align 8 %[[ARG2:[0-9]+]]) signed _BitInt(4096) foo(signed _BitInt(4096) a, signed _BitInt(4096) b) { - // CHECK: %[[VAR_A:a]] = load i4096, ptr %[[ARG1]], align 8 - // CHECK: %[[VAR_B:b]] = load i4096, ptr %[[ARG2]], align 8 - // CHECK: %[[RES:div]] = sdiv i4096 %[[VAR_A]], %[[VAR_B]] - // CHECK: store i4096 %[[RES]], ptr addrspace(4) %agg.result, align 8 - // CHECK: ret void + // CHECK: %a.addr.ascast = addrspacecast ptr %a.addr to ptr addrspace(4) + // CHECK: %b.addr.ascast = addrspacecast ptr %b.addr to ptr addrspace(4) + // CHECK: %a = load i4096, ptr %[[ARG1]], align 8 + // CHECK: %b = load i4096, ptr %[[ARG2]], align 8 + // CHECK: store i4096 %a, ptr addrspace(4) %a.addr.ascast, align 8 + // CHECK: store i4096 %b, ptr addrspace(4) %b.addr.ascast, align 8 + // CHECK: %2 = load i4096, ptr addrspace(4) %a.addr.ascast, align 8 + // CHECK: %3 = load i4096, ptr addrspace(4) %b.addr.ascast, align 8 + // CHECK: %div = sdiv i4096 %2, %3 return a / b; } From f2035e9c2f55ba9db43bf233c8b3bad6bd24af7d Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Wed, 4 Jan 2023 13:08:21 -0800 Subject: [PATCH 2/2] Add driver regression test. --- clang/test/Driver/sycl-device-optimizations.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang/test/Driver/sycl-device-optimizations.cpp b/clang/test/Driver/sycl-device-optimizations.cpp index db72b84bdd33d..8d35b2122414e 100644 --- a/clang/test/Driver/sycl-device-optimizations.cpp +++ b/clang/test/Driver/sycl-device-optimizations.cpp @@ -13,6 +13,8 @@ // RUN: | FileCheck -check-prefix=CHECK-DEFAULT %s // CHECK-DEFAULT-NOT: "-fno-sycl-early-optimizations" // CHECK-DEFAULT-NOT: "-disable-llvm-passes" +// CHECK-DEFAULT: "-fsycl-is-device" +// CHECK-DEFAULT-SAME: "-O2" /// Check "-fno-sycl-early-optimizations" is passed to the front-end: // RUN: %clang -### -fsycl -fno-sycl-early-optimizations %s 2>&1 \