diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 0805fd65e75d2..42378b34a8c11 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -64,6 +64,8 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, VLASupported = false; AddrSpaceMap = &NVPTXAddrSpaceMap; UseAddrSpaceMapMangling = true; + HasLegalHalfType = true; + HasFloat16 = true; // Define available target features // These must be defined in sorted order! diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 6b6b5a239dbf0..882048d766b0c 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -1753,12 +1753,7 @@ void Sema::checkDeviceDecl(const ValueDecl *D, SourceLocation Loc) { if (Ty->isDependentType()) return; - auto IsSYCLDeviceCuda = getLangOpts().SYCLIsDevice && - Context.getTargetInfo().getTriple().isNVPTX(); - if ((Ty->isFloat16Type() && !Context.getTargetInfo().hasFloat16Type() && - // Disable check for SYCL CUDA BE until FP16 support is properly - // reported there (issue#1799) - !IsSYCLDeviceCuda) || + if ((Ty->isFloat16Type() && !Context.getTargetInfo().hasFloat16Type()) || ((Ty->isFloat128Type() || (Ty->isRealFloatingType() && Context.getTypeSize(Ty) == 128)) && !Context.getTargetInfo().hasFloat128Type()) || diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index b4f1a7bfb01b9..7a205e30ae7d6 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -1521,12 +1521,11 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) { // CUDA host and device may have different _Float16 support, therefore // do not diagnose _Float16 usage to avoid false alarm. // ToDo: more precise diagnostics for CUDA. - auto IsSYCLDeviceCuda = - S.getLangOpts().SYCLIsDevice && S.Context.getTargetInfo().getTriple().isNVPTX(); - if (!S.Context.getTargetInfo().hasFloat16Type() && !S.getLangOpts().CUDA && - !(S.getLangOpts().OpenMP && S.getLangOpts().OpenMPIsDevice) && !IsSYCLDeviceCuda) + if (!S.Context.getTargetInfo().hasFloat16Type() && + !S.getLangOpts().CUDA && + !(S.getLangOpts().OpenMP && S.getLangOpts().OpenMPIsDevice)) S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported) - << "_Float16"; + << "_Float16"; } Result = Context.Float16Ty; break; diff --git a/clang/test/CodeGenCXX/nvptx-float16.cpp b/clang/test/CodeGenCXX/nvptx-float16.cpp new file mode 100644 index 0000000000000..255bfb5bad236 --- /dev/null +++ b/clang/test/CodeGenCXX/nvptx-float16.cpp @@ -0,0 +1,22 @@ +// REQUIRES: nvptx-registered-target +// RUN: %clang_cc1 -triple nvptx-nvidia-cuda -target-cpu sm_20 -S -o - %s | FileCheck %s -check-prefix=NOF16 +// RUN: %clang_cc1 -triple nvptx-nvidia-cuda -target-cpu sm_60 -S -o - %s | FileCheck %s + +// CHECK: .target sm_60 +// NOF16: .target sm_20 + +void f() { + _Float16 x, y, z; + // CHECK: add.rn.f16 + // NOF16: add.rn.f32 + z = x + y; + // CHECK: sub.rn.f16 + // NOF16: sub.rn.f32 + z = x - y; + // CHECK: mul.rn.f16 + // NOF16: mul.rn.f32 + z = x * y; + // CHECK: div.rn.f32 + // NOF16: div.rn.f32 + z = x / y; +}